Monday, August 18, 2014

###################
#Lane 2 Ar_jam MOE#
###################
gunzip pair*
cat pair.DR004_Artibeus_Jamaicensis_MOE_ACAGTG_L002_R1* >> Arjam_MOE_L2_R1.fq
cat pair.DR004_Artibeus_Jamaicensis_MOE_ACAGTG_L002_R2* >> Arjam_MOE_L2_R2.fq
fastq_quality_filter -i Arjam_MOE_L2_R1.fq -o Arjam_MOE_R1_L2_fastxtrimmed.fastq -q 20 -p 80 -Q 33 -v
Quality cut-off: 20
Minimum percentage: 80
Input: 15317036 reads.
Output: 14847480 reads.
discarded 469556 (3%) low-quality reads.
fastq_quality_filter -i Arjam_MOE_L2_R2.fq -o Arjam_MOE_R2_L2_fastxtrimmed.fastq -q 20 -p 80 -Q 33 -v
Quality cut-off: 20
Minimum percentage: 80
Input: 15317036 reads.
Output: 14608448 reads.
discarded 708588 (4%) low-quality reads.
~/Scripts/both.py Arjam_MOE_R1_L2_fastxtrimmed.fastq Arjam_MOE_R2_L2_fastxtrimmed.fastq
#run trinity
Trinity.pl --seqType fq --left Arjam_MOE_R1_L2_fastxtrimmed.fastq.both --right Arjam_MOE_R2_L2_fastxtrimmed.fastq.both --CPU 4 --JM 20G --output output_Lane2

###################
#Lane 1 Ar_jam VNO#
###################
gunzip pair*
cat pair.DR011_Artibeus_Jamaicensis_VNO_TGACCA_L001_R1* >> Arjam_VNO_L1_R1.fq
cat pair.DR011_Artibeus_Jamaicensis_VNO_TGACCA_L001_R2* >> Arjam_VNO_L1_R2.fq
mv Arjam_VNO_L1_R* /Volumes/Spare/transcriptomes/MOE_VNO_transcriptomes/Trinity_assembly/to_assemble/DR_011_Arjam_VNO/
fastq_quality_filter -i Arjam_VNO_L1_R1.fq -o Arjam_VNO_R1_L1_fastxtrimmed.fastq -q 20 -p 80 -Q 33 -v
Quality cut-off: 20
Minimum percentage: 80
Input: 13582991 reads.
Output: 13087556 reads.
discarded 495435 (3%) low-quality reads.
fastq_quality_filter -i Arjam_VNO_L1_R2.fq -o Arjam_VNO_R2_L1_fastxtrimmed.fastq -q 20 -p 80 -Q 33 -v
Quality cut-off: 20
Minimum percentage: 80
Input: 13582991 reads.
Output: 12665813 reads.
discarded 917178 (6%) low-quality reads.

~/Scripts/both.py Arjam_VNO_R1_L1_fastxtrimmed.fastq Arjam_VNO_R2_L1_fastxtrimmed.fastq

Trinity.pl --seqType fq --left Arjam_VNO_R1_L1_fastxtrimmed.fastq.both --right Arjam_VNO_R2_L1_fastxtrimmed.fastq.both --CPU 4 --JM 20G --output output_Lane1

###################
#Lane 2 Ar_jam VNO#
###################
gunzip pair*
cat pair.DR011_Artibeus_Jamaicensis_VNO_TGACCA_L002_R1* >> Arjam_VNO_L2_R1.fq
cat pair.DR011_Artibeus_Jamaicensis_VNO_TGACCA_L002_R2* >> Arjam_VNO_L2_R2.fq
mv Arjam_VNO_L2_R* /Volumes/Spare/transcriptomes/MOE_VNO_transcriptomes/Trinity_assembly/to_assemble/DR_011_Arjam_VNO/
fastq_quality_filter -i Arjam_VNO_L2_R1.fq -o Arjam_VNO_R1_L2_fastxtrimmed.fastq -q 20 -p 80 -Q 33 -v
Quality cut-off: 20
Minimum percentage: 80
Input: 13746292 reads.
Output: 13242643 reads.
discarded 503649 (3%) low-quality reads.
fastq_quality_filter -i Arjam_VNO_L2_R2.fq -o Arjam_VNO_R2_L2_fastxtrimmed.fastq -q 20 -p 80 -Q 33 -v
Quality cut-off: 20
Minimum percentage: 80
Input: 13746292 reads.
Output: 12800383 reads.
discarded 945909 (6%) low-quality reads.

~/Scripts/both.py Arjam_VNO_R1_L2_fastxtrimmed.fastq Arjam_VNO_R2_L2_fastxtrimmed.fastq

Trinity.pl --seqType fq --left Arjam_VNO_R1_L2_fastxtrimmed.fastq.both --right Arjam_VNO_R2_L2_fastxtrimmed.fastq.both --CPU 4 --JM 20G --output output_Lane2

###################
#Lane 1 Mo_red MOE#
###################
gunzip pair*
cat pair.DR013_Monophyllus_Redmani_MOE_CAGATC_L001_R1* >> Mored_MOE_L1_R1.fq
cat pair.DR013_Monophyllus_Redmani_MOE_CAGATC_L001_R2* >> Mored_MOE_L1_R2.fq
mv Mored_MOE_L1_R* /Volumes/Spare/transcriptomes/MOE_VNO_transcriptomes/Trinity_assembly/to_assemble/DR_013_Mored_MOE/

fastq_quality_filter -i Mored_MOE_L1_R1.fq -o Mored_MOE_R1_L1_fastxtrimmed.fastq -q 20 -p 80 -Q 33 -v
Quality cut-off: 20
Minimum percentage: 80
Input: 14546140 reads.
Output: 14205942 reads.
discarded 340198 (2%) low-quality reads.

fastq_quality_filter -i Mored_MOE_L1_R2.fq -o Mored_MOE_R2_L1_fastxtrimmed.fastq -q 20 -p 80 -Q 33 -v
Quality cut-off: 20
Minimum percentage: 80
Input: 14546140 reads.
Output: 13998979 reads.
discarded 547161 (3%) low-quality reads.

~/Scripts/both.py Mored_MOE_R1_L1_fastxtrimmed.fastq Mored_MOE_R2_L1_fastxtrimmed.fastq

Trinity.pl --seqType fq --left Mored_MOE_R1_L1_fastxtrimmed.fastq.both --right Mored_MOE_R2_L1_fastxtrimmed.fastq.both --CPU 4 --JM 20G --output output_Lane1
###################
#Lane 2 Mo_red MOE#
###################
gunzip pair*
cat pair.DR013_Monophyllus_Redmani_MOE_CAGATC_L002_R1* >> Mored_MOE_L2_R1.fq
cat pair.DR013_Monophyllus_Redmani_MOE_CAGATC_L002_R2* >> Mored_MOE_L2_R2.fq
mv Mored_MOE_L2_R* /Volumes/Spare/transcriptomes/MOE_VNO_transcriptomes/Trinity_assembly/to_assemble/DR_013_Mored_MOE/

fastq_quality_filter -i Mored_MOE_L2_R1.fq -o Mored_MOE_R1_L2_fastxtrimmed.fastq -q 20 -p 80 -Q 33 -v


Quality cut-off: 20
Minimum percentage: 80
Input: 14719223 reads.
Output: 14370971 reads.
discarded 348252 (2%) low-quality reads.

fastq_quality_filter -i Mored_MOE_L2_R2.fq -o Mored_MOE_R2_L2_fastxtrimmed.fastq -q 20 -p 80 -Q 33 -v
Quality cut-off: 20
Minimum percentage: 80
Input: 14719223 reads.
Output: 14152391 reads.
discarded 566832 (3%) low-quality reads.

~/Scripts/both.py Mored_MOE_R1_L2_fastxtrimmed.fastq Mored_MOE_R2_L2_fastxtrimmed.fastq

Trinity.pl --seqType fq --left Mored_MOE_R1_L2_fastxtrimmed.fastq.both --right Mored_MOE_R2_L2_fastxtrimmed.fastq.both --CPU 4 --JM 20G --output output_Lane2

###################
#Lane 1 Mo_red VNO#
###################
gunzip pair*
cat pair.DR013_Monophyllus_Redmani_VNO_AGTCAA_L001_R1* >> Mored_VNO_L1_R1.fq
cat pair.DR013_Monophyllus_Redmani_VNO_AGTCAA_L001_R2* >> Mored_VNO_L1_R2.fq
mv Mored_VNO_L1_R* /Volumes/Spare/transcriptomes/MOE_VNO_transcriptomes/Trinity_assembly/to_assemble/DR_013_Mored_VNO/

fastq_quality_filter -i Mored_VNO_L1_R1.fq -o Mored_VNO_R1_L1_fastxtrimmed.fastq -q 20 -p 80 -Q 33 -v
Quality cut-off: 20
Minimum percentage: 80
Input: 14001874 reads.
Output: 13728472 reads.
discarded 273402 (1%) low-quality reads
fastq_quality_filter -i Mored_VNO_L1_R2.fq -o Mored_VNO_R2_L1_fastxtrimmed.fastq -q 20 -p 80 -Q 33 -v
Quality cut-off: 20
Minimum percentage: 80
Input: 14001874 reads.
Output: 13444029 reads.
discarded 557845 (3%) low-quality reads.

~/Scripts/both.py Mored_VNO_R1_L1_fastxtrimmed.fastq Mored_VNO_R2_L1_fastxtrimmed.fastq

Trinity.pl --seqType fq --left Mored_VNO_R1_L1_fastxtrimmed.fastq.both --right Mored_VNO_R2_L1_fastxtrimmed.fastq.both --CPU 4 --JM 20G --output output_Lane1
###################
#Lane 2 Mo_red VNO#
###################
gunzip pair*
cat pair.DR013_Monophyllus_Redmani_VNO_AGTCAA_L002_R1* >> Mored_VNO_L2_R1.fq
cat pair.DR013_Monophyllus_Redmani_VNO_AGTCAA_L002_R2* >> Mored_VNO_L2_R2.fq
mv Mored_VNO_L2_R* /Volumes/Spare/transcriptomes/MOE_VNO_transcriptomes/Trinity_assembly/to_assemble/DR_013_Mored_VNO/

fastq_quality_filter -i Mored_VNO_L2_R1.fq -o Mored_VNO_R1_L2_fastxtrimmed.fastq -q 20 -p 80 -Q 33 -v
Quality cut-off: 20
Minimum percentage: 80
Input: 14176309 reads.
Output: 13892340 reads.
discarded 283969 (2%) low-quality reads.
fastq_quality_filter -i Mored_VNO_L2_R2.fq -o Mored_VNO_R2_L2_fastxtrimmed.fastq -q 20 -p 80 -Q 33 
-v
Quality cut-off: 20
Minimum percentage: 80
Input: 14176309 reads.
Output: 13594457 reads.
discarded 581852 (4%) low-quality reads.


~/Scripts/both.py Mored_VNO_R1_L2_fastxtrimmed.fastq Mored_VNO_R2_L2_fastxtrimmed.fastq

Trinity.pl --seqType fq --left Mored_VNO_R1_L2_fastxtrimmed.fastq.both --right Mored_VNO_R2_L2_fastxtrimmed.fastq.both --CPU 4 --JM 20G --output output_Lane2
###################
#Lane 1 Mo_bla MOE#
###################
gunzip pair*
cat pair.DR091_Mormoops_Blainvelli_MOE_CGATGT_L001_R1* >> Mobla_MOE_L1_R1.fq
cat pair.DR091_Mormoops_Blainvelli_MOE_CGATGT_L001_R2* >> Mobla_MOE_L1_R2.fq
mv Mobla_MOE_L1_R* /Volumes/Spare/transcriptomes/MOE_VNO_transcriptomes/Trinity_assembly/to_assemble/DR_091_Mobl_MOE/

fastq_quality_filter -i Mobla_MOE_L1_R1.fq -o Mobl_MOE_R1_L1_fastxtrimmed.fastq -q 20 -p 80 -Q 33 -v
Quality cut-off: 20
Minimum percentage: 80
Input: 16080335 reads.
Output: 15736163 reads.

discarded 344172 (2%) low-quality reads.
fastq_quality_filter -i Mobla_MOE_L1_R2.fq -o Mobl_MOE_R2_L1_fastxtrimmed.fastq -q 20 -p 80 -Q 33 -v
Quality cut-off: 20
Minimum percentage: 80
Input: 16080335 reads.
Output: 15462533 reads.

discarded 617802 (3%) low-quality reads.
~/Scripts/both.py Mobl_MOE_R1_L1_fastxtrimmed.fastq Mobl_MOE_R2_L1_fastxtrimmed.fastq

Trinity.pl --seqType fq --left Mobl_MOE_R1_L1_fastxtrimmed.fastq.both --right Mobl_MOE_R2_L1_fastxtrimmed.fastq.both --CPU 4 --JM 20G --output output_Lane1
###################
#Lane 2 Mo_bla MOE#
###################
gunzip pair*
cat pair.DR091_Mormoops_Blainvelli_MOE_CGATGT_L002_R1* >> Mobla_MOE_L2_R1.fq
cat pair.DR091_Mormoops_Blainvelli_MOE_CGATGT_L002_R2* >> Mobla_MOE_L2_R2.fq
mv Mobla_MOE_L2_R* /Volumes/Spare/transcriptomes/MOE_VNO_transcriptomes/Trinity_assembly/to_assemble/DR_091_Mobl_MOE/

fastq_quality_filter -i Mobla_MOE_L2_R1.fq -o Mobl_MOE_R1_L2_fastxtrimmed.fastq -q 20 -p 80 -Q 33 -v
Quality cut-off: 20
Minimum percentage: 80
Input: 16263150 reads.
Output: 15909002 reads.

discarded 354148 (2%) low-quality reads.
fastq_quality_filter -i Mobla_MOE_L2_R2.fq -o Mobl_MOE_R2_L2_fastxtrimmed.fastq -q 20 -p 80 -Q 33 -v
Quality cut-off: 20
Minimum percentage: 80
Input: 16263150 reads.
Output: 15621798 reads.

discarded 641352 (3%) low-quality reads.
~/Scripts/both.py Mobl_MOE_R1_L2_fastxtrimmed.fastq Mobl_MOE_R2_L2_fastxtrimmed.fastq

Trinity.pl --seqType fq --left Mobl_MOE_R1_L2_fastxtrimmed.fastq.both --right Mobl_MOE_R2_L2_fastxtrimmed.fastq.both --CPU 4 --JM 20G --output output_Lane2

Friday, August 15, 2014

Looks like the 24 hours of redoing the libc++ is working out.

CMD finished (161 seconds)
Thursday, August 14, 2014: 21:56:02     CMD: touch /Volumes/Spare/transcriptomes/MOE_VNO_transcriptomes/Trinity_assembly/to_assemble/DR_004_Arjam_MOE/output_4/chrysalis/file_partitioning.ok
CMD finished (0 seconds)
---------------------------------------------------
----------- Chrysalis: QuantifyGraph --------------
-- (Integrate mapped reads into de Bruijn graph) --
---------------------------------------------------

Thursday, August 14, 2014: 21:56:03     CMD: /usr/local/Cellar/trinity/r20131110/trinity-plugins/parafly/bin/ParaFly -c /Volumes/Spare/transcriptomes/MOE_VNO_transcriptomes/Trinity_assembly/to_assemble/DR_004_Arjam_MOE/output_4/chrysalis/quantifyGraph_commands -CPU 4 -failed_cmds failed_quantify_graph_commands.82191.txt -v -shuffle
Number of  Commands: 57201

succeeded(48273)   84.3919% completed.

...several hours later...

Butterfly finished too!
72,120 transcripts were assembled. Are there any ORs? Gah, I hope so... :-|

Thursday, August 14, 2014

brew install https://raw.github.com/Homebrew/homebrew-science/master/fastx_toolkit.rb
brew link --overwrite fastx_toolkit

Linking /usr/local/Cellar/fastx_toolkit/0.0.14... 
Error: Could not symlink include/gtextutils/gtextutils/container_join.h
/usr/local/include/gtextutils/gtextutils is not writable.

brew install https://raw.github.com/Homebrew/homebrew-science/master/fastx_toolkit.rb
######################################################################## 100.0%
Warning: fastx_toolkit-0.0.14 already installed, it's just not linked

sudo chown -R laurelyohe /usr/local/include
brew link --overwrite fastx_toolkit

Alright! I think it is installed. I have avoided using fastx-toolkit because it is such a pain in the butt to install, but alas, the uncertain has become certain. Why am I doing this? Because I am getting these alarming warning messages while Trinity is running. It seems to carry on regardless but it doesn't seem right.

...
warning: ignoring read XXXX since it cannot decipher if /1 or /2 of a pair.
Error: note that there were 4329123 reads that could not be deciphered as being /1 or /2 of a PE fragment. Hopefully, these were SE reads that should have been ignored. Otherwise, please research this further.

Okay, awesome. Seems like someone else ran into this problem:

After looking at what my input files were before and after trimming, it is clear that the fastq files have lost track of being from the right or left pair. Uggh! I knew that girl's Trinity help was too good to be true.

head Arjam_MOE_R1.fq
@HWI-ST885:197:HA2RPADXX:1:1101:1464:2292 1:N:0:ACAGTG
GCCCTTGCCGCCAGCTGGCGCGGCCACGCAGCGGCTGAGCGAGTCTAGGCGCGCGCTGTACTCGGTGAACTTCTTTTGGTAGCGCAGAGCAGTCATTTGC
+
CCCFFFFFHHHHHJJJIJJJJJJJJJJJJJJIHHFDDCDDDDDDDDDDDCBBDBBBDDDDDEDDD@DDDDDDDDDDDDDACCDDDDDDDDDDDDDDEEED
@HWI-ST885:197:HA2RPADXX:1:1101:1438:2386 1:N:0:ACAGTG
GGGTGGAGCAAATTCTGGGCCAAAAGTAGTAGGTTCTGAAGGAGGAGGCAATGGTGGTGGTGGTGGACGTGGTCTCCTGAAGTGTCTGCGACTCTCACCA
+
CCCFFFFFDHHHHJGGHGIGIJJJHIFHIGIIJJJIIIIGHGGHI?FGHIGIIJ8@=5BEHHH9BEFEDCDDBDDDDDDCCD>@CCDDDDBDBBDDCCBC
@HWI-ST885:197:HA2RPADXX:1:1101:1310:2396 1:N:0:ACAGTG
CTTGGGTCGTGAGTGAGAACAGGCTGGTAGACGGGGCGCTCGCCGAAGGCTGGGATGAAGTCCCGAAACTAAACCCACCAGCGCTGGATGAGGAGAAAGA

head Arjam_MOE_R2.fq
@HWI-ST885:197:HA2RPADXX:1:1101:1464:2292 2:N:0:ACAGTG
CAACGGCGCGCTGCAGGCCCGGCTCGCCGCGCTGCACAAGGCTTTCAAGAAGGAGGCTCTGCGCGCAGGCAAGCGCGAGAAGTCGCTGGTGGCGCAGCTG
+
CCCFFFFFHHHHHJJJJJJIIJIEIBEHBBBBBDDD<CDDDDDDDCCDDDDDD?ABBBDDDDDDDBBDD@DDDDDDD<BBDDDDDDDBB<CBABD@BDDC
@HWI-ST885:197:HA2RPADXX:1:1101:1438:2386 2:N:0:ACAGTG
CCTGTTTCTCGCCTGGTGAGAGTCGCAGACACTTCAGGAGACCACGTCCACCACCACCACCATTGCCTCCTCCTTCAGAACCTACTACTTTTGGCCCAGA
+
BBBFFFFFHHGHHIJJGHHHHIGHIIIIJJJJJJJJJJIJIJFJJGHIIHJGHJJEHHHFFFFEEEEEEDDDDDDDDDDDDDDDDDDDDDEDDDDDDBDD
@HWI-ST885:197:HA2RPADXX:1:1101:1310:2396 2:N:0:ACAGTG
CAGGCGGGTTCACGTTTGGCACCGCAAAGACGGCGACAACCACCCCTGCCACCGGCTTTTCTTTCTCCTCATCCAGCGCTGGTGGGTTTAGTTTCGGGAC

Okay see how there is a 1 and a 2 in the title line?

head Arjam_MOE_R1_trim.fq
@HWI-ST885:197:HA2RPADXX:1:1101:1464:2292
GCCCTTGCCGCCAGCTGGCGCGGCCACGCAGCGGCTGAGCGAGTCTAGGCGCGCGCTGTACTCGGTGAACTTCTTTTGGTAGCGCAGAGCAGTCATTTGC
+
CCCFFFFFHHHHHJJJIJJJJJJJJJJJJJJIHHFDDCDDDDDDDDDDDCBBDBBBDDDDDEDDD@DDDDDDDDDDDDDACCDDDDDDDDDDDDDDEEED
@HWI-ST885:197:HA2RPADXX:1:1101:1438:2386
GGGTGGAGCAAATTCTGGGCCAAAAGTAGTAGGTTCTGAAGGAGGAGGCAATGGTGGTGGTGGTGGACGTGGTCTCCTGAAGTGTCTGCGACTCTCACCA
+
CCCFFFFFDHHHHJGGHGIGIJJJHIFHIGIIJJJIIIIGHGGHI?FGHIGIIJ8@=5BEHHH9BEFEDCDDBDDDDDDCCD>@CCDDDDBDBBDDCCBC
@HWI-ST885:197:HA2RPADXX:1:1101:1310:2396
CTTGGGTCGTGAGTGAGAACAGGCTGGTAGACGGGGCGCTCGCCGAAGGCTGGGATGAAGTCCCGAAACTAAACCCACCAGCGCTGGATGAGGAGAAAGA

head Arjam_MOE_R2_trim.fq
@HWI-ST885:197:HA2RPADXX:1:1101:1464:2292
CAACGGCGCGCTGCAGGCCCGGCTCGCCGCGCTGCACAAGGCTTTCAAGAAGGAGGCTCTGCGCGCAGGCAAGCGCGAGAAGTCGCTGGTGGCGCAGCTG
+
CCCFFFFFHHHHHJJJJJJIIJIEIBEHBBBBBDDD<CDDDDDDDCCDDDDDD?ABBBDDDDDDDBBDD@DDDDDDD<BBDDDDDDDBB<CBABD@BDDC
@HWI-ST885:197:HA2RPADXX:1:1101:1438:2386
CCTGTTTCTCGCCTGGTGAGAGTCGCAGACACTTCAGGAGACCACGTCCACCACCACCACCATTGCCTCCTCCTTCAGAACCTACTACTTTTGGCCCAGA
+
BBBFFFFFHHGHHIJJGHHHHIGHIIIIJJJJJJJJJJIJIJFJJGHIIHJGHJJEHHHFFFFEEEEEEDDDDDDDDDDDDDDDDDDDDDEDDDDDDBDD
@HWI-ST885:197:HA2RPADXX:1:1101:1310:2396
CAGGCGGGTTCACGTTTGGCACCGCAAAGACGGCGACAACCACCCCTGCCACCGGCTTTTCTTTCTCCTCATCCAGCGCTGGTGGGTTTAGTTTCGGGAC

See how it goes away? Bollocks! This happens after running python ~/Scripts/q-trim.py.

Let's see if this strips it.
fastq_quality_filter -i Arjam_MOE_R1.fq -o Arjam_MOE_R1_fastxtrimmed.fastq -q 20 -p 80 -Q 33 -v
Quality cut-off: 20
Minimum percentage: 80
Input: 15149790 reads.
Output: 14691119 reads.

head Arjam_MOE_R1_fastxtrimmed.fastq 
@HWI-ST885:197:HA2RPADXX:1:1101:1464:2292 1:N:0:ACAGTG
GCCCTTGCCGCCAGCTGGCGCGGCCACGCAGCGGCTGAGCGAGTCTAGGCGCGCGCTGTACTCGGTGAACTTCTTTTGGTAGCGCAGAGCAGTCATTTGC
+
CCCFFFFFHHHHHJJJIJJJJJJJJJJJJJJIHHFDDCDDDDDDDDDDDCBBDBBBDDDDDEDDD@DDDDDDDDDDDDDACCDDDDDDDDDDDDDDEEED
@HWI-ST885:197:HA2RPADXX:1:1101:1438:2386 1:N:0:ACAGTG
GGGTGGAGCAAATTCTGGGCCAAAAGTAGTAGGTTCTGAAGGAGGAGGCAATGGTGGTGGTGGTGGACGTGGTCTCCTGAAGTGTCTGCGACTCTCACCA
+
CCCFFFFFDHHHHJGGHGIGIJJJHIFHIGIIJJJIIIIGHGGHI?FGHIGIIJ8@=5BEHHH9BEFEDCDDBDDDDDDCCD>@CCDDDDBDBBDDCCBC
@HWI-ST885:197:HA2RPADXX:1:1101:1310:2396 1:N:0:ACAGTG
CTTGGGTCGTGAGTGAGAACAGGCTGGTAGACGGGGCGCTCGCCGAAGGCTGGGATGAAGTCCCGAAACTAAACCCACCAGCGCTGGATGAGGAGAAAGA

Cool! Looks like it kept the extended header on.

fastq_quality_filter -i Arjam_MOE_R2.fq -o Arjam_MOE_R2_fastxtrimmed.fastq -q 20 -p 80 -Q 33 -v
Quality cut-off: 20
Minimum percentage: 80
Input: 15149790 reads.
Output: 14464395 reads.
discarded 685395 (4%) low-quality reads.

For once something worked. Oh yeah, just as a side note if you have illumina reads you must put -Q 33 parameter. This of course, is an undocumented fix discovered by users. Three+ years after this discovery, -Q remains to be documented in the fastx-toolkit. 

I still am going to do the pair matching before running trinity again.

~/Scripts/both.py Arjam_MOE_R1_fastxtrimmed.fastq Arjam_MOE_R2_fastxtrimmed.fastq 

Trinity.pl --seqType fq --left Arjam_MOE_R1_fastxtrimmed.fastq.both --right Arjam_MOE_R2_fastxtrimmed.fastq.both --CPU 4 --JM 20G --output output_4

----------------------------------------------------------------

Yay! I fixed the /1 /2 problem. You can see that the fasta files trinity creates there is now the /1 and /2. Now we will see if we can get past chrysalis and if the other error with the c++libs is fixed.

head left.fa
>HWI-ST885:197:HA2RPADXX:1:1101:1464:2292/1
GCCCTTGCCGCCAGCTGGCGCGGCCACGCAGCGGCTGAGCGAGTCTAGGCGCGCGCTGTACTCGGTGAACTTCTTTTGGTAGCGCAGAGCAGTCATTTGC
>HWI-ST885:197:HA2RPADXX:1:1101:1438:2386/1
GGGTGGAGCAAATTCTGGGCCAAAAGTAGTAGGTTCTGAAGGAGGAGGCAATGGTGGTGGTGGTGGACGTGGTCTCCTGAAGTGTCTGCGACTCTCACCA
>HWI-ST885:197:HA2RPADXX:1:1101:1310:2396/1
CTTGGGTCGTGAGTGAGAACAGGCTGGTAGACGGGGCGCTCGCCGAAGGCTGGGATGAAGTCCCGAAACTAAACCCACCAGCGCTGGATGAGGAGAAAGA
>HWI-ST885:197:HA2RPADXX:1:1101:1346:2468/1
ATGGAGCCCAGGCCTCCAGTGCAGAGTGAGTGCTTCCTTCCATGGTCCCCATGCCATCGTGTGACAAGTTCTGTGACCTGATTTCCAGCACTGTCATCCA
>HWI-ST885:197:HA2RPADXX:1:1101:1467:2481/1
GGCCCAGGGAGTCCTCCACCACCCCCTCCCCTTTCCTGGCCTGCTCTCTAATTCTCTAGAAACCTTCCTGTGTATCCTGCCTACTTAAACCCTGCATCCC
head right.fa
>HWI-ST885:197:HA2RPADXX:1:1101:1464:2292/2
CAACGGCGCGCTGCAGGCCCGGCTCGCCGCGCTGCACAAGGCTTTCAAGAAGGAGGCTCTGCGCGCAGGCAAGCGCGAGAAGTCGCTGGTGGCGCAGCTG
>HWI-ST885:197:HA2RPADXX:1:1101:1438:2386/2
CCTGTTTCTCGCCTGGTGAGAGTCGCAGACACTTCAGGAGACCACGTCCACCACCACCACCATTGCCTCCTCCTTCAGAACCTACTACTTTTGGCCCAGA
>HWI-ST885:197:HA2RPADXX:1:1101:1310:2396/2
CAGGCGGGTTCACGTTTGGCACCGCAAAGACGGCGACAACCACCCCTGCCACCGGCTTTTCTTTCTCCTCATCCAGCGCTGGTGGGTTTAGTTTCGGGAC
>HWI-ST885:197:HA2RPADXX:1:1101:1346:2468/2
GGGGGTGGAAATTTTGAGACAAATGTTGATTCTTGGTGAGTTGATGAGTCTTTTCTAGACACATAGAGAAGGTGCTGAAGATTGAGAGAAAACGCCTTCC
>HWI-ST885:197:HA2RPADXX:1:1101:1467:2481/2

GTCAAACTCGTTGTAGCAGATTCTACTTGGGATGCAGGGTTTAAGTAGGCAGGATACACAGGAAGGTTTCTAGAGAATTAGAGAGCAGGCCAGGAAAGGG

To be continued.

There doesn't seem to be a lot of resources troubleshooting this error for Trinity.  Because we are running it on a Mac OSX and Trinity is compiled only for Linux, there is no real support from the Trinity side of things. It may be possible that we are missing libc++abi. This is not a minor undertaking and takes about 24 hours to download, configure, build, and compile. Despite this terrifying error message:

Mac users, remember to be careful when replacing the system's libc++. Your system will not be able to boot without a funcioning libc++.

I will carry onwards.

svn co http://llvm.org/svn/llvm-project/llvm/trunk llvm
cd llvm/tools
svn co http://llvm.org/svn/llvm-project/cfe/trunk clang
cd ../..
cd llvm/tools/clang/tools
svn co http://llvm.org/svn/llvm-project/clang-tools-extra/trunk extra
cd ../../../..
cd llvm/projects
svn co http://llvm.org/svn/llvm-project/compiler-rt/trunk compiler-rt
cd ../..
mkdir build
cd build
../llvm/configure
make #this took several hours; make sure to screen
#check to see if it worked
clang --help


#now install the other libraries
brew update
brew install cmake #you will need this later
cd
cd llvm/projects
svn co http://llvm.org/svn/llvm-project/libcxxabi/trunk libcxxabi
svn co http://llvm.org/svn/llvm-project/libcxx/trunk libcxx
cd .. #now you are in the llvm directory
mkdir build && cd build
cmake ..
make cxx
make #this also takes several hours

Okay done. Let's hope I didn't break break the computer and let's see if this actually did anything.

Tuesday, August 12, 2014

Fail.

succeeded(0), failed(58736)   99.9949% completed.    libc++abi.dylib: terminating with uncaught exception of type std::length_error: vector::_M_fill_insert
succeeded(0), failed(58737)   99.9966% completed.    libc++abi.dylib: terminating with uncaught exception of type std::length_error: vector::_M_fill_insert
succeeded(0), failed(58738)   99.9983% completed.    libc++abi.dylib: terminating with uncaught exception of type std::length_error: vector::_M_fill_insert
succeeded(0), failed(58739)   100% completed.

We are sorry, commands in file: [failed_quantify_graph_commands.6513.txt] failed.  :-(


Error, cmd: /usr/local/Cellar/trinity/r20131110/trinity-plugins/parafly/bin/ParaFly -c /Volumes/Spare/transcriptomes/MOE_VNO_transcriptomes/Trinity_assembly/to_assemble/DR_004_Arjam_MOE/trinity_output//chrysalis/quantifyGraph_commands -CPU 4 -failed_cmds failed_quantify_graph_commands.6513.txt -v -shuffle  died with ret 256 at /usr/local/bin/Trinity.pl line 1793.

This is to be continued. At least they put a frowny face in the error message. 
Here is a blog that gives excellent Trinity instructions:
https://cartwrightlab.wikispaces.com/RNAseq+Assembly+in+Trinity

#for paired end reads, combine each side of pairs into one file
cat pair.DR004_Artibeus_Jamaicensis_MOE_ACAGTG_L001_R1* >> Arjam_MOE_R1.fq
cat pair.DR004_Artibeus_Jamaicensis_MOE_ACAGTG_L001_R2* >> Arjam_MOE_R2.fq

#run a trimming script
python ~/Scripts/q-trim.py Arjam_MOE_R1.fq Arjam_MOE_R1_trim.fq

#woops I run into this error:
importerror no module named screed

#it looks like I need the "screed" module for this; looks like I also need to update "setup tools"
#first install pip
sudo python get-pip.py
pip install ipython #not sure what this did
#still missing the screed module
sudo pip install setuptools --upgrade
sudo pip install git+https://github.com/ged-lab/screed.git

#okay seems to be working now
#trimmed with the default setting in which a minimum Qscore = 5
#minimum length = 30; lets see how this works
python ~/Scripts/q-trim.py Arjam_MOE_R1.fq Arjam_MOE_R1_trim.fq
python ~/Scripts/q-trim.py Arjam_MOE_R2.fq Arjam_MOE_R2_trim.fq

#now i need to assemble paired reads--extracting only the pairs using both.py
python ~/Scripts/both.py Arjam_MOE_R1_trim.fq Arjam_MOE_R2_trim.fq
#this creates two files ...R1_trim.fq.both and R2_trim.fq.both
#this script took about one hour to run

#finally time to run trinity!
Trinity.pl --seqType fq --left Arjam_MOE_R1_trim.fq.both --right Arjam_MOE_R2_trim.fq.both --CPU 4 --JM 20G --output trinity_output/

Everything is up and running now. Stay tuned.

Monday, August 4, 2014

Okay we have FINALLY updated our operating system.

Apparently Mavericks already has commandline tools installed so it is not necessary to install again. However, there are some issues if you install Homebrew and the gcc compiler not being linked. So starting from scratch, here is what do do.

#install homebrew

ruby -e "$(curl -fsSL https://raw.github.com/Homebrew/homebrew/go/install)"
#check to see your comp is ready to brew
brew doctor
#i had several errors/warnings (errors indicated in orange)
/usr/bin comes before /usr/local/bin

export PATH='/usr/local/bin:$PATH' >> ~/.bash_profile

/usr/local/lib/gcc is not writable.

sudo chown -R laurelyohe /usr/local

brew install homebrew/science/bowtie
brew install apple-gcc42
brew install samtools
brew install trinity

Okay, everything seems good to go. Now I am unzipping the transcriptome output from trimmomatic.