### Comments are donated by "#". If these appear on a line, all after are ignored by the computer
### also "Cat" is NOT equivalent to "cat". Always is case specific.
### If you run into trouble and need to stop something use "control" + "c" to kill it. try:

info cat ### and to get out of it use "control" + "c"

### to better understand a command run the following:
man cat      ### "cat" can be replaced with ANY unix command. e.g. "man sed" OR
cat --help   ### will often work on unix servers as well. 

cat taxons.txt      ## Reads to standard output (i.e. your screen)
cat taxons.txt > test.txt     ### Reads taxons.txt and prints to a new file called "test.txt". If you repeat this command it will rewrite the file
cat taxons.txt >> test.txt    ## will append test.txt and make a long list of all the files you read. VERY helpful for making lots of files into 1. See next example.
cat *.fasta >> test.fas     ### This will put ALL your fasta files that end in ".fasta" (e.g. "mouse_18s.fasta") and make a single file ending in ".fas". "*" is a variable that means anything. 
cat taxons.* >> test.fas     ###This will open and read all files named taxons.something regardless of extension name

### basic find and replace ### will get you ALL the examples in the book (chapter 2) without using a text editor. Useful for command line. 

sed s/"galma"/./ ./taxons.txt >> ./test.txt    ## Replaces the first "galma" in a file and replaces it with ".". Writes to a file in your current folder "test.txt"
sed s/"us"/./ ./taxons.txt >> ./test.txt    ## Replaces the first "us" in a line in a file and replaces it with ".". Writes to a file in your current folder "test.txt"
sed s/"us"/./g ./taxons.txt >> ./test.txt    ## Notice the difference... "g" means do this command "globally" on this line, not just the first time you see it. This gets you the example in the book

 
###to add "Position: " prior to the number requires a bit more work.
sed s/[a-z]/'\t'/g ./ord.txt 

 sed s/[a-z]//g ./ord.txt | awk '{ print "Position: ", $1}' | sort -n >> test.txt ##Does the same as above, but sorts by numerical order
sed s/[a-z]//g ./ord.txt | awk '{ print "Position: ", $1}' | sort -n -r >> test.txt ## Does the same, but sorts in reverse order

### in the find and replace. WILL NOT WORK WITHOUT GNU sed

# sed s/(\w)\w\w/\1/g ./ord.txt 
# sed 's/(\w)\w+/\1./g' ./ord.txt >> test.remove
# sed 's/(\w)\w+(\w+)/\1. \2/g' ./taxons.txt >> test.remove
# sed 's/\(\w\)\w+\(\w+\)/\1. \2/g' ./taxons.txt >> test.remove

sed 's/[A-Z][a-z]*/[A-Z]\./g' ./taxons.txt >> test.remove # replaces first word with "[A-Z]."

### Then it gets REALLY hairy to get it to work...

sed 's/\([^[:space:]]\)[^[:space:]]*/\1\./' ./taxons.txt >> test.remove # makes "Agalma elegans" into "A. elegans"
sed 's/\([[:alnum:]]\)[^[:space:]]*/\1\./' ./taxons.txt >> test.remove  ##Identical as above
sed 's/\([[:alnum:]]\)\([[:alnum:]]*\)\([[:space:]]\)\([[:alnum:]]*\)/\1\. \4/' ./taxons.txt >> test.remove  ##Identical as above
sed 's/\([A-Z]\)\([a-z]*\)/\1\./g' ./taxons.txt >> test.remove # Identical as above
sed 's/\([A-Z]\)\([a-z]*\)\([a-z]*\)/\1\./g' ./taxons.txt >> test.remove # Identical as above
sed 's/\([^[:space:]]\)\([^[:space:]]*\)/\2\./' ./taxons.txt >> test.remove # makes "Agalma elegans" into "galma. elegans"

sed 's/\([[:alnum:]]\)\([[:alnum:]]*\)\([[:space:]]\)\([[:alnum:]]*\)/\1\. \4 \1/' ./taxons.txt >> test.remove  ##Identical as above
sed 's/\([[:alnum:]]\)\([[:alnum:]]*\)\([[:space:]]\)\([[:alnum:]]*\)/\1\. \4 \1\_\4/' ./taxons.txt >> test.remove  ## A. elegans A_elegans
sed 's/\([[:alnum:]]\)\([[:alnum:]]*\)\([[:space:]]\)\([[:alnum:]]*\)/\1\2 \4 \1\_\4/' ./taxons.txt >> test.remove  ## Agalma elegans A_elegans


echo "Someone Is Watching" | sed 's/\([^[:space:]]\)[^[:space:]]*[[:space:]]*/\1\./g' ##Gives you "S.I.W."
echo "Someone Is Watching" | sed 's/\([^[:space:]]\)[^[:space:]]*[[:space:]]*/\1\.\" \"/' ##Gives you "S.I.W."
echo "Someone is Watching" | sed 's/\([[:upper:]]\)[^[:space:]]*[[:space:]]*/\1\./g' ##Gives you "S. is W."
echo "Someone is Watching" | sed 's/\([[:upper:]]\)[^[:space:]]*/\1\./g' ##Gives you "S. is W."
echo "Someone is Watching" | sed 's/\([[:upper:]]\)[^[:space:]]*/\1\./' ##Gives you "S. is Watching". Notice the effect of the "g"!

### Working with protein fasta sequences.. See pg. 28. renamed "FPexamples.fta" "protein.fasta"
sed 's/\.[0-9]\=//g' ./protein.fasta >> test.remove  ##The easiest way to deal with this is in stages ##removes the version # off the accession # and the "=" sign.

sed 's/\.[^"]*\[/_/' ./protein.fasta >> test2.remove   ### removes anything BETWEEN the "." and the first bracket "[".  
sed 's/ [^"]*\]//' ./test2.remove >> test3.remove   ### NOW remove anything BETWEEN the first space and the  bracket "]".

sed 's/\.[^"]*\[/_/' ./protein.fasta | sed 's/ [^"]*\]//' >> test.remove ## WOW! I had no idea you could pipe into the same call! Good to learn something everyday. Talk about one stop shopping...

### Lots of commands you may find useful    ###

sed s/[0-9]//g ./gps.txt >> ./test.txt    ## Changes ALL numbers to "", which is nothing.
sed s/[A-Z]//g ./gps.txt >> ./test.txt    ## Changes all CAPITAL letters to "", which is nothing.
sed s/[a-z]//g ./taxons.txt >> ./test.txt    ## Changes all lowercase letters to "", which is nothing.
sed s/[0-9,A-Z,a-z]//g ./gps.txt >> ./test.txt     ## Changes ALL numbers, lower and UPPER case letters  to "", which is nothing.
sed s/"'"//g ./gps.txt >> ./test.txt      ## removes all "'" in the original file.
sed s/[^A-Z]// ./taxons.txt >>test.txt    ### replaces all letters than AREN'T capitals with nothing. 
sed s/"\."/","/g ./gps.txt >> ./test.txt    ## Changes ALL periods "." to commas. Periods are a variable that mean any one character. "\." is an escaped character that means simply the "." and not the variable. 

### Other useful commands  ###

diff ./file1 ./file2 >> ./file3 #finds differences between your two files and prints them to a third file. Be sure they are sorted. See "man sort" for details.
head -1 ./filename >> ./new file  ## will transfer the first line from one file to another. VERY useful for making headers. 
wc -l ./filename  ###Tells the number of lines present in your file. Extraordinarily helpful for diagnosing the effects of a line of code on your file. 
grep ">" ./filename | wc -l  ### Tells you the number of fasta sequences in your file. 
printf ":1d\n:wq\n" | ex ./$FILE_NAME   ##removes the first line of a file in place. i.e. no temporary file is needed

### Awk commands ###
awk -F '\t' '{print $1, $2, $3, $4, $5, $6, $7, $8, $9, $10}' ./all_counts.tab >> ./test ##takes the first ten columns (tab delimited) from the first file and makes a second file.