Thursday, August 21, 2014


#prior lambda = 6
#for the geometric mean (body size)
x<-data$GM
names(x)<-rownames(data)
prior <- make.prior(tree, model = "OU", param=list("dalpha"=list(),"dsig2"=list(),"dtheta"=list(),
                                         "dk"=list(lambda=6,kmax=length(tree$tip.label)*2-2),
                                          "dloc"=list(min=0,max=1),
                                         "dsb"=list(bmax=Inf,prob=tree$edge.length)))


fit.1<-bayou.mcmc(tree, x, model = "OU",prior= prior, ngen = 500000)
fit.2<-bayou.mcmc(tree, x,model = "OU", prior= prior, ngen = 500000)
chain.1 <- load.bayou(fit.1, save.Rdata=FALSE, cleanup=TRUE)
chain.2 <- load.bayou(fit.2, save.Rdata=FALSE, cleanup=TRUE)

combine<-combine.chains(chain.1, chain.2, burnin.prop = 0.2)

plotSimmap.mcmc(tree,combine, burnin = .1, fsize = 0.5,circle.lwd = 0.5 )

> effectiveSize(combine$lnL)
    var1
47696.89
> effectiveSize(combine$alpha)
    var1
17668.65
> effectiveSize(combine$k)
    var1
200.3831

http://andrewgelman.com/2010/10/29/could_someone_p/

#GM
#lambda=15
prior <- make.prior(tree, dists=list(dalpha="dlnorm", dsig2="dlnorm",dsb="dsb", dk="cdpois",
                                     dtheta="dnorm"), param=list(dalpha=list(meanlog=-5, sdlog=2),
                                     dsig2=list(meanlog=-1, sdlog=5), dk=list(lambda=15, kmax=200),
                                     dsb=list(bmax=Inf,prob=tree$edge.length),
                                     dtheta=list(mean=mean(x), sd=2)))



fit.1<-bayou.mcmc(tree, x, model = "OU",prior= prior, ngen = 500000)
fit.2<-bayou.mcmc(tree, x,model = "OU", prior= prior, ngen = 500000)
chain.1 <- load.bayou(fit.1, save.Rdata=FALSE, cleanup=TRUE)
chain.2 <- load.bayou(fit.2, save.Rdata=FALSE, cleanup=TRUE)

combine<-combine.chains(chain.1, chain.2, burnin.prop = 0.2)

> effectiveSize(combine$lnL)
    var1
54733.55
> effectiveSize(combine$alpha)
    var1
21377.18
> effectiveSize(combine$k)
    var1
460.0945
> median(combine$alpha)
[1] 0.005785393
> median(combine$k)
[1] 1
> median(combine$lnL)
[1] -226.6656
> median(combine$sig2)
[1] 0.1995975
> median(combine$ntheta)
[1] 2
> median(combine$theta)

plotSimmap.mcmc(tree,combine, burnin = 0, fsize = 0.5,circle.lwd = 0.5 )


#pPC1, lambda = 6
x<-data$PC1
names(x)<-rownames(data)
prior <- make.prior(tree, model = "OU", param=list("dalpha"=list(),"dsig2"=list(),"dtheta"=list(),
                                         "dk"=list(lambda=6,kmax=length(tree$tip.label)*2-2),
                                          "dloc"=list(min=0,max=1),
                                         "dsb"=list(bmax=Inf,prob=tree$edge.length)))
> effectiveSize(combine$lnL)
    var1
77556.68
> effectiveSize(combine$alpha)
    var1
750.1128
> effectiveSize(combine$k)
    var1
1098.867
> median(combine$alpha)
[1] 0.01054099
> median(combine$k)
[1] 5
> median(combine$lnL)
[1] -232.5772
> median(combine$sig2)
[1] 0.2309304
> median(combine$ntheta)
[1] 6

#pPC1
#lambda=15
prior <- make.prior(tree, dists=list(dalpha="dlnorm", dsig2="dlnorm",dsb="dsb", dk="cdpois",
                                     dtheta="dnorm"), param=list(dalpha=list(meanlog=-5, sdlog=2),
                                     dsig2=list(meanlog=-1, sdlog=5), dk=list(lambda=15, kmax=200),
                                     dsb=list(bmax=Inf,prob=tree$edge.length),
                                     dtheta=list(mean=mean(x), sd=2)))

fit.1<-bayou.mcmc(tree, x, model = "OU",prior= prior, ngen = 500000, plot.freq = NULL)
fit.2<-bayou.mcmc(tree, x,model = "OU", prior= prior, ngen = 500000, plot.freq = NULL)
chain.1 <- load.bayou(fit.1, save.Rdata=FALSE, cleanup=TRUE)
chain.2 <- load.bayou(fit.2, save.Rdata=FALSE, cleanup=TRUE)

combine<-combine.chains(chain.1, chain.2, burnin.prop = 0.2)

> effectiveSize(combine$lnL)
    var1
46687.59
> effectiveSize(combine$alpha)
    var1
1882.031
> effectiveSize(combine$k)
    var1
1916.088
> median(combine$alpha)
[1] 0.01097906
> median(combine$k)
[1] 6
> median(combine$lnL)
[1] -682.1394
> median(combine$sig2)
[1] 187.9879
> median(combine$ntheta)
[1] 7

Tuesday, August 19, 2014

#installing Fasta_reader (needed to run script for stats on Trinity assembly)
#first need to install npm
brew install node
vi server.js
#paste the following
var http = require('http'); http.createServer(function (req, res) { res.writeHead(200, {'Content-Type': 'text/plain'}); res.end('Hello World\n'); }).listen(8124, "127.0.0.1"); console.log('Server running at http://127.0.0.1:8124/');
#ESC wq!

#to check if it works
node server.js

npm install fastareader

WOOPS! nevermind! All of this stuff is already installed with Trinity. To run TrinityStats.pl, it cannot be run alone. You must navigate to the directory in which it is found.
###################
#   Ar_jam MOE    #
###################
#combine data from Lanes 1 & 2
cat Arjam_MOE* >> DR_004_Arjam_MOE_assembled.fasta
sudo perl /Volumes/Spare/transcriptomes/Trinity/util/TrinityStats.pl /Volumes/Spare/transcriptomes/MOE_VNO_transcriptomes/Trinity_assembly/DR_004_Arjam_MOE_assembled.fasta
################################
## Counts of transcripts, etc.
################################
Total trinity transcripts: 140189
Total trinity components: 63956
Percent GC: 55.16

########################################
Stats based on ALL transcript contigs:
########################################

Contig N10: 4088
Contig N20: 3085
Contig N30: 2434
Contig N40: 1927
Contig N50: 1502

Median contig length: 426
Average contig: 826.50
Total assembled bases: 115865619


#####################################################
## Stats based on ONLY LONGEST ISOFORM per COMPONENT:
#####################################################

Contig N10: 3860
Contig N20: 2862
Contig N30: 2212
Contig N40: 1726
Contig N50: 1310

Median contig length: 440
Average contig: 793.55
Total assembled bases: 50752060 
###################
#   Ar_jam VNO    #
###################
#combine data from Lanes 1 & 2
cat Arjam_VNO* >> DR_011_Arjam_VNO_assembled.fasta
sudo perl /Volumes/Spare/transcriptomes/Trinity/util/TrinityStats.pl /Volumes/Spare/transcriptomes/MOE_VNO_transcriptomes/Trinity_assembly/DR_011_Arjam_VNO_assembled.fasta
################################
## Counts of transcripts, etc.
################################
Total trinity transcripts: 138142
Total trinity components: 61477
Percent GC: 55.51

########################################
Stats based on ALL transcript contigs:
########################################

Contig N10: 4735
Contig N20: 3550
Contig N30: 2822
Contig N40: 2240
Contig N50: 1741

Median contig length: 455
Average contig: 920.11
Total assembled bases: 127106357


#####################################################
## Stats based on ONLY LONGEST ISOFORM per COMPONENT:
#####################################################

Contig N10: 3855
Contig N20: 2871
Contig N30: 2270
Contig N40: 1814
Contig N50: 1436

Median contig length: 467
Average contig: 848.36

Total assembled bases: 52154403
###################
#   Mo_red MOE    #
###################
#combine data from Lanes 1 & 2
cat Mored_MOE* >> DR_013_Mored_MOE_assembled.fasta
sudo perl /Volumes/Spare/transcriptomes/Trinity/util/TrinityStats.pl /Volumes/Spare/transcriptomes/MOE_VNO_transcriptomes/Trinity_assembly/DR_013_Mored_MOE_assembled.fasta
################################
## Counts of transcripts, etc.
################################
Total trinity transcripts: 245573
Total trinity components: 100799
Percent GC: 56.14

########################################
Stats based on ALL transcript contigs:
########################################

Contig N10: 5311
Contig N20: 4036
Contig N30: 3304
Contig N40: 2703
Contig N50: 2118

Median contig length: 406
Average contig: 960.45
Total assembled bases: 235861464


#####################################################
## Stats based on ONLY LONGEST ISOFORM per COMPONENT:
#####################################################

Contig N10: 4140
Contig N20: 2880
Contig N30: 2105
Contig N40: 1512
Contig N50: 1053

Median contig length: 377
Average contig: 685.78

Total assembled bases: 69126334

###################
#   Mo_red VNO    #
###################
#combine data from Lanes 1 & 2
cat Mored_VNO* >> DR_013_Mored_VNO_assembled.fasta
sudo perl $TRINITY_HOME/util/TrinityStats.pl DR_013_Mored_VNO_assembled.fasta
################################
## Counts of transcripts, etc.
################################
Total trinity transcripts: 163233
Total trinity components: 70319
Percent GC: 55.74

########################################
Stats based on ALL transcript contigs:
########################################

Contig N10: 4648
Contig N20: 3450
Contig N30: 2778
Contig N40: 2186
Contig N50: 1709

Median contig length: 430
Average contig: 883.37
Total assembled bases: 144195048


#####################################################
## Stats based on ONLY LONGEST ISOFORM per COMPONENT:
#####################################################

Contig N10: 3865
Contig N20: 2811
Contig N30: 2110
Contig N40: 1605
Contig N50: 1187

Median contig length: 427
Average contig: 754.88

Total assembled bases: 53082440


###################
#   Mo_bla MOE    #
###################
cat Mobl_MOE* >> DR_091_Mobl_MOE_assembled.fasta
sudo perl $TRINITY_HOME/util/TrinityStats.pl DR_091_Mobl_MOE_assembled.fasta
################################
## Counts of transcripts, etc.
################################
Total trinity transcripts: 249551
Total trinity components: 115871
Percent GC: 54.01

########################################
Stats based on ALL transcript contigs:
########################################

Contig N10: 4786
Contig N20: 3549
Contig N30: 2741
Contig N40: 2123
Contig N50: 1598

Median contig length: 335
Average contig: 763.67
Total assembled bases: 190573978


#####################################################
## Stats based on ONLY LONGEST ISOFORM per COMPONENT:
#####################################################

Contig N10: 3761
Contig N20: 2670
Contig N30: 1971
Contig N40: 1426
Contig N50: 989

Median contig length: 344
Average contig: 636.73

Total assembled bases: 73778325

Monday, August 18, 2014

###################
#Lane 2 Ar_jam MOE#
###################
gunzip pair*
cat pair.DR004_Artibeus_Jamaicensis_MOE_ACAGTG_L002_R1* >> Arjam_MOE_L2_R1.fq
cat pair.DR004_Artibeus_Jamaicensis_MOE_ACAGTG_L002_R2* >> Arjam_MOE_L2_R2.fq
fastq_quality_filter -i Arjam_MOE_L2_R1.fq -o Arjam_MOE_R1_L2_fastxtrimmed.fastq -q 20 -p 80 -Q 33 -v
Quality cut-off: 20
Minimum percentage: 80
Input: 15317036 reads.
Output: 14847480 reads.
discarded 469556 (3%) low-quality reads.
fastq_quality_filter -i Arjam_MOE_L2_R2.fq -o Arjam_MOE_R2_L2_fastxtrimmed.fastq -q 20 -p 80 -Q 33 -v
Quality cut-off: 20
Minimum percentage: 80
Input: 15317036 reads.
Output: 14608448 reads.
discarded 708588 (4%) low-quality reads.
~/Scripts/both.py Arjam_MOE_R1_L2_fastxtrimmed.fastq Arjam_MOE_R2_L2_fastxtrimmed.fastq
#run trinity
Trinity.pl --seqType fq --left Arjam_MOE_R1_L2_fastxtrimmed.fastq.both --right Arjam_MOE_R2_L2_fastxtrimmed.fastq.both --CPU 4 --JM 20G --output output_Lane2

###################
#Lane 1 Ar_jam VNO#
###################
gunzip pair*
cat pair.DR011_Artibeus_Jamaicensis_VNO_TGACCA_L001_R1* >> Arjam_VNO_L1_R1.fq
cat pair.DR011_Artibeus_Jamaicensis_VNO_TGACCA_L001_R2* >> Arjam_VNO_L1_R2.fq
mv Arjam_VNO_L1_R* /Volumes/Spare/transcriptomes/MOE_VNO_transcriptomes/Trinity_assembly/to_assemble/DR_011_Arjam_VNO/
fastq_quality_filter -i Arjam_VNO_L1_R1.fq -o Arjam_VNO_R1_L1_fastxtrimmed.fastq -q 20 -p 80 -Q 33 -v
Quality cut-off: 20
Minimum percentage: 80
Input: 13582991 reads.
Output: 13087556 reads.
discarded 495435 (3%) low-quality reads.
fastq_quality_filter -i Arjam_VNO_L1_R2.fq -o Arjam_VNO_R2_L1_fastxtrimmed.fastq -q 20 -p 80 -Q 33 -v
Quality cut-off: 20
Minimum percentage: 80
Input: 13582991 reads.
Output: 12665813 reads.
discarded 917178 (6%) low-quality reads.

~/Scripts/both.py Arjam_VNO_R1_L1_fastxtrimmed.fastq Arjam_VNO_R2_L1_fastxtrimmed.fastq

Trinity.pl --seqType fq --left Arjam_VNO_R1_L1_fastxtrimmed.fastq.both --right Arjam_VNO_R2_L1_fastxtrimmed.fastq.both --CPU 4 --JM 20G --output output_Lane1

###################
#Lane 2 Ar_jam VNO#
###################
gunzip pair*
cat pair.DR011_Artibeus_Jamaicensis_VNO_TGACCA_L002_R1* >> Arjam_VNO_L2_R1.fq
cat pair.DR011_Artibeus_Jamaicensis_VNO_TGACCA_L002_R2* >> Arjam_VNO_L2_R2.fq
mv Arjam_VNO_L2_R* /Volumes/Spare/transcriptomes/MOE_VNO_transcriptomes/Trinity_assembly/to_assemble/DR_011_Arjam_VNO/
fastq_quality_filter -i Arjam_VNO_L2_R1.fq -o Arjam_VNO_R1_L2_fastxtrimmed.fastq -q 20 -p 80 -Q 33 -v
Quality cut-off: 20
Minimum percentage: 80
Input: 13746292 reads.
Output: 13242643 reads.
discarded 503649 (3%) low-quality reads.
fastq_quality_filter -i Arjam_VNO_L2_R2.fq -o Arjam_VNO_R2_L2_fastxtrimmed.fastq -q 20 -p 80 -Q 33 -v
Quality cut-off: 20
Minimum percentage: 80
Input: 13746292 reads.
Output: 12800383 reads.
discarded 945909 (6%) low-quality reads.

~/Scripts/both.py Arjam_VNO_R1_L2_fastxtrimmed.fastq Arjam_VNO_R2_L2_fastxtrimmed.fastq

Trinity.pl --seqType fq --left Arjam_VNO_R1_L2_fastxtrimmed.fastq.both --right Arjam_VNO_R2_L2_fastxtrimmed.fastq.both --CPU 4 --JM 20G --output output_Lane2

###################
#Lane 1 Mo_red MOE#
###################
gunzip pair*
cat pair.DR013_Monophyllus_Redmani_MOE_CAGATC_L001_R1* >> Mored_MOE_L1_R1.fq
cat pair.DR013_Monophyllus_Redmani_MOE_CAGATC_L001_R2* >> Mored_MOE_L1_R2.fq
mv Mored_MOE_L1_R* /Volumes/Spare/transcriptomes/MOE_VNO_transcriptomes/Trinity_assembly/to_assemble/DR_013_Mored_MOE/

fastq_quality_filter -i Mored_MOE_L1_R1.fq -o Mored_MOE_R1_L1_fastxtrimmed.fastq -q 20 -p 80 -Q 33 -v
Quality cut-off: 20
Minimum percentage: 80
Input: 14546140 reads.
Output: 14205942 reads.
discarded 340198 (2%) low-quality reads.

fastq_quality_filter -i Mored_MOE_L1_R2.fq -o Mored_MOE_R2_L1_fastxtrimmed.fastq -q 20 -p 80 -Q 33 -v
Quality cut-off: 20
Minimum percentage: 80
Input: 14546140 reads.
Output: 13998979 reads.
discarded 547161 (3%) low-quality reads.

~/Scripts/both.py Mored_MOE_R1_L1_fastxtrimmed.fastq Mored_MOE_R2_L1_fastxtrimmed.fastq

Trinity.pl --seqType fq --left Mored_MOE_R1_L1_fastxtrimmed.fastq.both --right Mored_MOE_R2_L1_fastxtrimmed.fastq.both --CPU 4 --JM 20G --output output_Lane1
###################
#Lane 2 Mo_red MOE#
###################
gunzip pair*
cat pair.DR013_Monophyllus_Redmani_MOE_CAGATC_L002_R1* >> Mored_MOE_L2_R1.fq
cat pair.DR013_Monophyllus_Redmani_MOE_CAGATC_L002_R2* >> Mored_MOE_L2_R2.fq
mv Mored_MOE_L2_R* /Volumes/Spare/transcriptomes/MOE_VNO_transcriptomes/Trinity_assembly/to_assemble/DR_013_Mored_MOE/

fastq_quality_filter -i Mored_MOE_L2_R1.fq -o Mored_MOE_R1_L2_fastxtrimmed.fastq -q 20 -p 80 -Q 33 -v


Quality cut-off: 20
Minimum percentage: 80
Input: 14719223 reads.
Output: 14370971 reads.
discarded 348252 (2%) low-quality reads.

fastq_quality_filter -i Mored_MOE_L2_R2.fq -o Mored_MOE_R2_L2_fastxtrimmed.fastq -q 20 -p 80 -Q 33 -v
Quality cut-off: 20
Minimum percentage: 80
Input: 14719223 reads.
Output: 14152391 reads.
discarded 566832 (3%) low-quality reads.

~/Scripts/both.py Mored_MOE_R1_L2_fastxtrimmed.fastq Mored_MOE_R2_L2_fastxtrimmed.fastq

Trinity.pl --seqType fq --left Mored_MOE_R1_L2_fastxtrimmed.fastq.both --right Mored_MOE_R2_L2_fastxtrimmed.fastq.both --CPU 4 --JM 20G --output output_Lane2

###################
#Lane 1 Mo_red VNO#
###################
gunzip pair*
cat pair.DR013_Monophyllus_Redmani_VNO_AGTCAA_L001_R1* >> Mored_VNO_L1_R1.fq
cat pair.DR013_Monophyllus_Redmani_VNO_AGTCAA_L001_R2* >> Mored_VNO_L1_R2.fq
mv Mored_VNO_L1_R* /Volumes/Spare/transcriptomes/MOE_VNO_transcriptomes/Trinity_assembly/to_assemble/DR_013_Mored_VNO/

fastq_quality_filter -i Mored_VNO_L1_R1.fq -o Mored_VNO_R1_L1_fastxtrimmed.fastq -q 20 -p 80 -Q 33 -v
Quality cut-off: 20
Minimum percentage: 80
Input: 14001874 reads.
Output: 13728472 reads.
discarded 273402 (1%) low-quality reads
fastq_quality_filter -i Mored_VNO_L1_R2.fq -o Mored_VNO_R2_L1_fastxtrimmed.fastq -q 20 -p 80 -Q 33 -v
Quality cut-off: 20
Minimum percentage: 80
Input: 14001874 reads.
Output: 13444029 reads.
discarded 557845 (3%) low-quality reads.

~/Scripts/both.py Mored_VNO_R1_L1_fastxtrimmed.fastq Mored_VNO_R2_L1_fastxtrimmed.fastq

Trinity.pl --seqType fq --left Mored_VNO_R1_L1_fastxtrimmed.fastq.both --right Mored_VNO_R2_L1_fastxtrimmed.fastq.both --CPU 4 --JM 20G --output output_Lane1
###################
#Lane 2 Mo_red VNO#
###################
gunzip pair*
cat pair.DR013_Monophyllus_Redmani_VNO_AGTCAA_L002_R1* >> Mored_VNO_L2_R1.fq
cat pair.DR013_Monophyllus_Redmani_VNO_AGTCAA_L002_R2* >> Mored_VNO_L2_R2.fq
mv Mored_VNO_L2_R* /Volumes/Spare/transcriptomes/MOE_VNO_transcriptomes/Trinity_assembly/to_assemble/DR_013_Mored_VNO/

fastq_quality_filter -i Mored_VNO_L2_R1.fq -o Mored_VNO_R1_L2_fastxtrimmed.fastq -q 20 -p 80 -Q 33 -v
Quality cut-off: 20
Minimum percentage: 80
Input: 14176309 reads.
Output: 13892340 reads.
discarded 283969 (2%) low-quality reads.
fastq_quality_filter -i Mored_VNO_L2_R2.fq -o Mored_VNO_R2_L2_fastxtrimmed.fastq -q 20 -p 80 -Q 33 
-v
Quality cut-off: 20
Minimum percentage: 80
Input: 14176309 reads.
Output: 13594457 reads.
discarded 581852 (4%) low-quality reads.


~/Scripts/both.py Mored_VNO_R1_L2_fastxtrimmed.fastq Mored_VNO_R2_L2_fastxtrimmed.fastq

Trinity.pl --seqType fq --left Mored_VNO_R1_L2_fastxtrimmed.fastq.both --right Mored_VNO_R2_L2_fastxtrimmed.fastq.both --CPU 4 --JM 20G --output output_Lane2
###################
#Lane 1 Mo_bla MOE#
###################
gunzip pair*
cat pair.DR091_Mormoops_Blainvelli_MOE_CGATGT_L001_R1* >> Mobla_MOE_L1_R1.fq
cat pair.DR091_Mormoops_Blainvelli_MOE_CGATGT_L001_R2* >> Mobla_MOE_L1_R2.fq
mv Mobla_MOE_L1_R* /Volumes/Spare/transcriptomes/MOE_VNO_transcriptomes/Trinity_assembly/to_assemble/DR_091_Mobl_MOE/

fastq_quality_filter -i Mobla_MOE_L1_R1.fq -o Mobl_MOE_R1_L1_fastxtrimmed.fastq -q 20 -p 80 -Q 33 -v
Quality cut-off: 20
Minimum percentage: 80
Input: 16080335 reads.
Output: 15736163 reads.

discarded 344172 (2%) low-quality reads.
fastq_quality_filter -i Mobla_MOE_L1_R2.fq -o Mobl_MOE_R2_L1_fastxtrimmed.fastq -q 20 -p 80 -Q 33 -v
Quality cut-off: 20
Minimum percentage: 80
Input: 16080335 reads.
Output: 15462533 reads.

discarded 617802 (3%) low-quality reads.
~/Scripts/both.py Mobl_MOE_R1_L1_fastxtrimmed.fastq Mobl_MOE_R2_L1_fastxtrimmed.fastq

Trinity.pl --seqType fq --left Mobl_MOE_R1_L1_fastxtrimmed.fastq.both --right Mobl_MOE_R2_L1_fastxtrimmed.fastq.both --CPU 4 --JM 20G --output output_Lane1
###################
#Lane 2 Mo_bla MOE#
###################
gunzip pair*
cat pair.DR091_Mormoops_Blainvelli_MOE_CGATGT_L002_R1* >> Mobla_MOE_L2_R1.fq
cat pair.DR091_Mormoops_Blainvelli_MOE_CGATGT_L002_R2* >> Mobla_MOE_L2_R2.fq
mv Mobla_MOE_L2_R* /Volumes/Spare/transcriptomes/MOE_VNO_transcriptomes/Trinity_assembly/to_assemble/DR_091_Mobl_MOE/

fastq_quality_filter -i Mobla_MOE_L2_R1.fq -o Mobl_MOE_R1_L2_fastxtrimmed.fastq -q 20 -p 80 -Q 33 -v
Quality cut-off: 20
Minimum percentage: 80
Input: 16263150 reads.
Output: 15909002 reads.

discarded 354148 (2%) low-quality reads.
fastq_quality_filter -i Mobla_MOE_L2_R2.fq -o Mobl_MOE_R2_L2_fastxtrimmed.fastq -q 20 -p 80 -Q 33 -v
Quality cut-off: 20
Minimum percentage: 80
Input: 16263150 reads.
Output: 15621798 reads.

discarded 641352 (3%) low-quality reads.
~/Scripts/both.py Mobl_MOE_R1_L2_fastxtrimmed.fastq Mobl_MOE_R2_L2_fastxtrimmed.fastq

Trinity.pl --seqType fq --left Mobl_MOE_R1_L2_fastxtrimmed.fastq.both --right Mobl_MOE_R2_L2_fastxtrimmed.fastq.both --CPU 4 --JM 20G --output output_Lane2

Friday, August 15, 2014

Looks like the 24 hours of redoing the libc++ is working out.

CMD finished (161 seconds)
Thursday, August 14, 2014: 21:56:02     CMD: touch /Volumes/Spare/transcriptomes/MOE_VNO_transcriptomes/Trinity_assembly/to_assemble/DR_004_Arjam_MOE/output_4/chrysalis/file_partitioning.ok
CMD finished (0 seconds)
---------------------------------------------------
----------- Chrysalis: QuantifyGraph --------------
-- (Integrate mapped reads into de Bruijn graph) --
---------------------------------------------------

Thursday, August 14, 2014: 21:56:03     CMD: /usr/local/Cellar/trinity/r20131110/trinity-plugins/parafly/bin/ParaFly -c /Volumes/Spare/transcriptomes/MOE_VNO_transcriptomes/Trinity_assembly/to_assemble/DR_004_Arjam_MOE/output_4/chrysalis/quantifyGraph_commands -CPU 4 -failed_cmds failed_quantify_graph_commands.82191.txt -v -shuffle
Number of  Commands: 57201

succeeded(48273)   84.3919% completed.

...several hours later...

Butterfly finished too!
72,120 transcripts were assembled. Are there any ORs? Gah, I hope so... :-|

Thursday, August 14, 2014

brew install https://raw.github.com/Homebrew/homebrew-science/master/fastx_toolkit.rb
brew link --overwrite fastx_toolkit

Linking /usr/local/Cellar/fastx_toolkit/0.0.14... 
Error: Could not symlink include/gtextutils/gtextutils/container_join.h
/usr/local/include/gtextutils/gtextutils is not writable.

brew install https://raw.github.com/Homebrew/homebrew-science/master/fastx_toolkit.rb
######################################################################## 100.0%
Warning: fastx_toolkit-0.0.14 already installed, it's just not linked

sudo chown -R laurelyohe /usr/local/include
brew link --overwrite fastx_toolkit

Alright! I think it is installed. I have avoided using fastx-toolkit because it is such a pain in the butt to install, but alas, the uncertain has become certain. Why am I doing this? Because I am getting these alarming warning messages while Trinity is running. It seems to carry on regardless but it doesn't seem right.

...
warning: ignoring read XXXX since it cannot decipher if /1 or /2 of a pair.
Error: note that there were 4329123 reads that could not be deciphered as being /1 or /2 of a PE fragment. Hopefully, these were SE reads that should have been ignored. Otherwise, please research this further.

Okay, awesome. Seems like someone else ran into this problem:

After looking at what my input files were before and after trimming, it is clear that the fastq files have lost track of being from the right or left pair. Uggh! I knew that girl's Trinity help was too good to be true.

head Arjam_MOE_R1.fq
@HWI-ST885:197:HA2RPADXX:1:1101:1464:2292 1:N:0:ACAGTG
GCCCTTGCCGCCAGCTGGCGCGGCCACGCAGCGGCTGAGCGAGTCTAGGCGCGCGCTGTACTCGGTGAACTTCTTTTGGTAGCGCAGAGCAGTCATTTGC
+
CCCFFFFFHHHHHJJJIJJJJJJJJJJJJJJIHHFDDCDDDDDDDDDDDCBBDBBBDDDDDEDDD@DDDDDDDDDDDDDACCDDDDDDDDDDDDDDEEED
@HWI-ST885:197:HA2RPADXX:1:1101:1438:2386 1:N:0:ACAGTG
GGGTGGAGCAAATTCTGGGCCAAAAGTAGTAGGTTCTGAAGGAGGAGGCAATGGTGGTGGTGGTGGACGTGGTCTCCTGAAGTGTCTGCGACTCTCACCA
+
CCCFFFFFDHHHHJGGHGIGIJJJHIFHIGIIJJJIIIIGHGGHI?FGHIGIIJ8@=5BEHHH9BEFEDCDDBDDDDDDCCD>@CCDDDDBDBBDDCCBC
@HWI-ST885:197:HA2RPADXX:1:1101:1310:2396 1:N:0:ACAGTG
CTTGGGTCGTGAGTGAGAACAGGCTGGTAGACGGGGCGCTCGCCGAAGGCTGGGATGAAGTCCCGAAACTAAACCCACCAGCGCTGGATGAGGAGAAAGA

head Arjam_MOE_R2.fq
@HWI-ST885:197:HA2RPADXX:1:1101:1464:2292 2:N:0:ACAGTG
CAACGGCGCGCTGCAGGCCCGGCTCGCCGCGCTGCACAAGGCTTTCAAGAAGGAGGCTCTGCGCGCAGGCAAGCGCGAGAAGTCGCTGGTGGCGCAGCTG
+
CCCFFFFFHHHHHJJJJJJIIJIEIBEHBBBBBDDD<CDDDDDDDCCDDDDDD?ABBBDDDDDDDBBDD@DDDDDDD<BBDDDDDDDBB<CBABD@BDDC
@HWI-ST885:197:HA2RPADXX:1:1101:1438:2386 2:N:0:ACAGTG
CCTGTTTCTCGCCTGGTGAGAGTCGCAGACACTTCAGGAGACCACGTCCACCACCACCACCATTGCCTCCTCCTTCAGAACCTACTACTTTTGGCCCAGA
+
BBBFFFFFHHGHHIJJGHHHHIGHIIIIJJJJJJJJJJIJIJFJJGHIIHJGHJJEHHHFFFFEEEEEEDDDDDDDDDDDDDDDDDDDDDEDDDDDDBDD
@HWI-ST885:197:HA2RPADXX:1:1101:1310:2396 2:N:0:ACAGTG
CAGGCGGGTTCACGTTTGGCACCGCAAAGACGGCGACAACCACCCCTGCCACCGGCTTTTCTTTCTCCTCATCCAGCGCTGGTGGGTTTAGTTTCGGGAC

Okay see how there is a 1 and a 2 in the title line?

head Arjam_MOE_R1_trim.fq
@HWI-ST885:197:HA2RPADXX:1:1101:1464:2292
GCCCTTGCCGCCAGCTGGCGCGGCCACGCAGCGGCTGAGCGAGTCTAGGCGCGCGCTGTACTCGGTGAACTTCTTTTGGTAGCGCAGAGCAGTCATTTGC
+
CCCFFFFFHHHHHJJJIJJJJJJJJJJJJJJIHHFDDCDDDDDDDDDDDCBBDBBBDDDDDEDDD@DDDDDDDDDDDDDACCDDDDDDDDDDDDDDEEED
@HWI-ST885:197:HA2RPADXX:1:1101:1438:2386
GGGTGGAGCAAATTCTGGGCCAAAAGTAGTAGGTTCTGAAGGAGGAGGCAATGGTGGTGGTGGTGGACGTGGTCTCCTGAAGTGTCTGCGACTCTCACCA
+
CCCFFFFFDHHHHJGGHGIGIJJJHIFHIGIIJJJIIIIGHGGHI?FGHIGIIJ8@=5BEHHH9BEFEDCDDBDDDDDDCCD>@CCDDDDBDBBDDCCBC
@HWI-ST885:197:HA2RPADXX:1:1101:1310:2396
CTTGGGTCGTGAGTGAGAACAGGCTGGTAGACGGGGCGCTCGCCGAAGGCTGGGATGAAGTCCCGAAACTAAACCCACCAGCGCTGGATGAGGAGAAAGA

head Arjam_MOE_R2_trim.fq
@HWI-ST885:197:HA2RPADXX:1:1101:1464:2292
CAACGGCGCGCTGCAGGCCCGGCTCGCCGCGCTGCACAAGGCTTTCAAGAAGGAGGCTCTGCGCGCAGGCAAGCGCGAGAAGTCGCTGGTGGCGCAGCTG
+
CCCFFFFFHHHHHJJJJJJIIJIEIBEHBBBBBDDD<CDDDDDDDCCDDDDDD?ABBBDDDDDDDBBDD@DDDDDDD<BBDDDDDDDBB<CBABD@BDDC
@HWI-ST885:197:HA2RPADXX:1:1101:1438:2386
CCTGTTTCTCGCCTGGTGAGAGTCGCAGACACTTCAGGAGACCACGTCCACCACCACCACCATTGCCTCCTCCTTCAGAACCTACTACTTTTGGCCCAGA
+
BBBFFFFFHHGHHIJJGHHHHIGHIIIIJJJJJJJJJJIJIJFJJGHIIHJGHJJEHHHFFFFEEEEEEDDDDDDDDDDDDDDDDDDDDDEDDDDDDBDD
@HWI-ST885:197:HA2RPADXX:1:1101:1310:2396
CAGGCGGGTTCACGTTTGGCACCGCAAAGACGGCGACAACCACCCCTGCCACCGGCTTTTCTTTCTCCTCATCCAGCGCTGGTGGGTTTAGTTTCGGGAC

See how it goes away? Bollocks! This happens after running python ~/Scripts/q-trim.py.

Let's see if this strips it.
fastq_quality_filter -i Arjam_MOE_R1.fq -o Arjam_MOE_R1_fastxtrimmed.fastq -q 20 -p 80 -Q 33 -v
Quality cut-off: 20
Minimum percentage: 80
Input: 15149790 reads.
Output: 14691119 reads.

head Arjam_MOE_R1_fastxtrimmed.fastq 
@HWI-ST885:197:HA2RPADXX:1:1101:1464:2292 1:N:0:ACAGTG
GCCCTTGCCGCCAGCTGGCGCGGCCACGCAGCGGCTGAGCGAGTCTAGGCGCGCGCTGTACTCGGTGAACTTCTTTTGGTAGCGCAGAGCAGTCATTTGC
+
CCCFFFFFHHHHHJJJIJJJJJJJJJJJJJJIHHFDDCDDDDDDDDDDDCBBDBBBDDDDDEDDD@DDDDDDDDDDDDDACCDDDDDDDDDDDDDDEEED
@HWI-ST885:197:HA2RPADXX:1:1101:1438:2386 1:N:0:ACAGTG
GGGTGGAGCAAATTCTGGGCCAAAAGTAGTAGGTTCTGAAGGAGGAGGCAATGGTGGTGGTGGTGGACGTGGTCTCCTGAAGTGTCTGCGACTCTCACCA
+
CCCFFFFFDHHHHJGGHGIGIJJJHIFHIGIIJJJIIIIGHGGHI?FGHIGIIJ8@=5BEHHH9BEFEDCDDBDDDDDDCCD>@CCDDDDBDBBDDCCBC
@HWI-ST885:197:HA2RPADXX:1:1101:1310:2396 1:N:0:ACAGTG
CTTGGGTCGTGAGTGAGAACAGGCTGGTAGACGGGGCGCTCGCCGAAGGCTGGGATGAAGTCCCGAAACTAAACCCACCAGCGCTGGATGAGGAGAAAGA

Cool! Looks like it kept the extended header on.

fastq_quality_filter -i Arjam_MOE_R2.fq -o Arjam_MOE_R2_fastxtrimmed.fastq -q 20 -p 80 -Q 33 -v
Quality cut-off: 20
Minimum percentage: 80
Input: 15149790 reads.
Output: 14464395 reads.
discarded 685395 (4%) low-quality reads.

For once something worked. Oh yeah, just as a side note if you have illumina reads you must put -Q 33 parameter. This of course, is an undocumented fix discovered by users. Three+ years after this discovery, -Q remains to be documented in the fastx-toolkit. 

I still am going to do the pair matching before running trinity again.

~/Scripts/both.py Arjam_MOE_R1_fastxtrimmed.fastq Arjam_MOE_R2_fastxtrimmed.fastq 

Trinity.pl --seqType fq --left Arjam_MOE_R1_fastxtrimmed.fastq.both --right Arjam_MOE_R2_fastxtrimmed.fastq.both --CPU 4 --JM 20G --output output_4

----------------------------------------------------------------

Yay! I fixed the /1 /2 problem. You can see that the fasta files trinity creates there is now the /1 and /2. Now we will see if we can get past chrysalis and if the other error with the c++libs is fixed.

head left.fa
>HWI-ST885:197:HA2RPADXX:1:1101:1464:2292/1
GCCCTTGCCGCCAGCTGGCGCGGCCACGCAGCGGCTGAGCGAGTCTAGGCGCGCGCTGTACTCGGTGAACTTCTTTTGGTAGCGCAGAGCAGTCATTTGC
>HWI-ST885:197:HA2RPADXX:1:1101:1438:2386/1
GGGTGGAGCAAATTCTGGGCCAAAAGTAGTAGGTTCTGAAGGAGGAGGCAATGGTGGTGGTGGTGGACGTGGTCTCCTGAAGTGTCTGCGACTCTCACCA
>HWI-ST885:197:HA2RPADXX:1:1101:1310:2396/1
CTTGGGTCGTGAGTGAGAACAGGCTGGTAGACGGGGCGCTCGCCGAAGGCTGGGATGAAGTCCCGAAACTAAACCCACCAGCGCTGGATGAGGAGAAAGA
>HWI-ST885:197:HA2RPADXX:1:1101:1346:2468/1
ATGGAGCCCAGGCCTCCAGTGCAGAGTGAGTGCTTCCTTCCATGGTCCCCATGCCATCGTGTGACAAGTTCTGTGACCTGATTTCCAGCACTGTCATCCA
>HWI-ST885:197:HA2RPADXX:1:1101:1467:2481/1
GGCCCAGGGAGTCCTCCACCACCCCCTCCCCTTTCCTGGCCTGCTCTCTAATTCTCTAGAAACCTTCCTGTGTATCCTGCCTACTTAAACCCTGCATCCC
head right.fa
>HWI-ST885:197:HA2RPADXX:1:1101:1464:2292/2
CAACGGCGCGCTGCAGGCCCGGCTCGCCGCGCTGCACAAGGCTTTCAAGAAGGAGGCTCTGCGCGCAGGCAAGCGCGAGAAGTCGCTGGTGGCGCAGCTG
>HWI-ST885:197:HA2RPADXX:1:1101:1438:2386/2
CCTGTTTCTCGCCTGGTGAGAGTCGCAGACACTTCAGGAGACCACGTCCACCACCACCACCATTGCCTCCTCCTTCAGAACCTACTACTTTTGGCCCAGA
>HWI-ST885:197:HA2RPADXX:1:1101:1310:2396/2
CAGGCGGGTTCACGTTTGGCACCGCAAAGACGGCGACAACCACCCCTGCCACCGGCTTTTCTTTCTCCTCATCCAGCGCTGGTGGGTTTAGTTTCGGGAC
>HWI-ST885:197:HA2RPADXX:1:1101:1346:2468/2
GGGGGTGGAAATTTTGAGACAAATGTTGATTCTTGGTGAGTTGATGAGTCTTTTCTAGACACATAGAGAAGGTGCTGAAGATTGAGAGAAAACGCCTTCC
>HWI-ST885:197:HA2RPADXX:1:1101:1467:2481/2

GTCAAACTCGTTGTAGCAGATTCTACTTGGGATGCAGGGTTTAAGTAGGCAGGATACACAGGAAGGTTTCTAGAGAATTAGAGAGCAGGCCAGGAAAGGG

To be continued.

There doesn't seem to be a lot of resources troubleshooting this error for Trinity.  Because we are running it on a Mac OSX and Trinity is compiled only for Linux, there is no real support from the Trinity side of things. It may be possible that we are missing libc++abi. This is not a minor undertaking and takes about 24 hours to download, configure, build, and compile. Despite this terrifying error message:

Mac users, remember to be careful when replacing the system's libc++. Your system will not be able to boot without a funcioning libc++.

I will carry onwards.

svn co http://llvm.org/svn/llvm-project/llvm/trunk llvm
cd llvm/tools
svn co http://llvm.org/svn/llvm-project/cfe/trunk clang
cd ../..
cd llvm/tools/clang/tools
svn co http://llvm.org/svn/llvm-project/clang-tools-extra/trunk extra
cd ../../../..
cd llvm/projects
svn co http://llvm.org/svn/llvm-project/compiler-rt/trunk compiler-rt
cd ../..
mkdir build
cd build
../llvm/configure
make #this took several hours; make sure to screen
#check to see if it worked
clang --help


#now install the other libraries
brew update
brew install cmake #you will need this later
cd
cd llvm/projects
svn co http://llvm.org/svn/llvm-project/libcxxabi/trunk libcxxabi
svn co http://llvm.org/svn/llvm-project/libcxx/trunk libcxx
cd .. #now you are in the llvm directory
mkdir build && cd build
cmake ..
make cxx
make #this also takes several hours

Okay done. Let's hope I didn't break break the computer and let's see if this actually did anything.

Tuesday, August 12, 2014

Fail.

succeeded(0), failed(58736)   99.9949% completed.    libc++abi.dylib: terminating with uncaught exception of type std::length_error: vector::_M_fill_insert
succeeded(0), failed(58737)   99.9966% completed.    libc++abi.dylib: terminating with uncaught exception of type std::length_error: vector::_M_fill_insert
succeeded(0), failed(58738)   99.9983% completed.    libc++abi.dylib: terminating with uncaught exception of type std::length_error: vector::_M_fill_insert
succeeded(0), failed(58739)   100% completed.

We are sorry, commands in file: [failed_quantify_graph_commands.6513.txt] failed.  :-(


Error, cmd: /usr/local/Cellar/trinity/r20131110/trinity-plugins/parafly/bin/ParaFly -c /Volumes/Spare/transcriptomes/MOE_VNO_transcriptomes/Trinity_assembly/to_assemble/DR_004_Arjam_MOE/trinity_output//chrysalis/quantifyGraph_commands -CPU 4 -failed_cmds failed_quantify_graph_commands.6513.txt -v -shuffle  died with ret 256 at /usr/local/bin/Trinity.pl line 1793.

This is to be continued. At least they put a frowny face in the error message. 
Here is a blog that gives excellent Trinity instructions:
https://cartwrightlab.wikispaces.com/RNAseq+Assembly+in+Trinity

#for paired end reads, combine each side of pairs into one file
cat pair.DR004_Artibeus_Jamaicensis_MOE_ACAGTG_L001_R1* >> Arjam_MOE_R1.fq
cat pair.DR004_Artibeus_Jamaicensis_MOE_ACAGTG_L001_R2* >> Arjam_MOE_R2.fq

#run a trimming script
python ~/Scripts/q-trim.py Arjam_MOE_R1.fq Arjam_MOE_R1_trim.fq

#woops I run into this error:
importerror no module named screed

#it looks like I need the "screed" module for this; looks like I also need to update "setup tools"
#first install pip
sudo python get-pip.py
pip install ipython #not sure what this did
#still missing the screed module
sudo pip install setuptools --upgrade
sudo pip install git+https://github.com/ged-lab/screed.git

#okay seems to be working now
#trimmed with the default setting in which a minimum Qscore = 5
#minimum length = 30; lets see how this works
python ~/Scripts/q-trim.py Arjam_MOE_R1.fq Arjam_MOE_R1_trim.fq
python ~/Scripts/q-trim.py Arjam_MOE_R2.fq Arjam_MOE_R2_trim.fq

#now i need to assemble paired reads--extracting only the pairs using both.py
python ~/Scripts/both.py Arjam_MOE_R1_trim.fq Arjam_MOE_R2_trim.fq
#this creates two files ...R1_trim.fq.both and R2_trim.fq.both
#this script took about one hour to run

#finally time to run trinity!
Trinity.pl --seqType fq --left Arjam_MOE_R1_trim.fq.both --right Arjam_MOE_R2_trim.fq.both --CPU 4 --JM 20G --output trinity_output/

Everything is up and running now. Stay tuned.

Monday, August 4, 2014

Okay we have FINALLY updated our operating system.

Apparently Mavericks already has commandline tools installed so it is not necessary to install again. However, there are some issues if you install Homebrew and the gcc compiler not being linked. So starting from scratch, here is what do do.

#install homebrew

ruby -e "$(curl -fsSL https://raw.github.com/Homebrew/homebrew/go/install)"
#check to see your comp is ready to brew
brew doctor
#i had several errors/warnings (errors indicated in orange)
/usr/bin comes before /usr/local/bin

export PATH='/usr/local/bin:$PATH' >> ~/.bash_profile

/usr/local/lib/gcc is not writable.

sudo chown -R laurelyohe /usr/local

brew install homebrew/science/bowtie
brew install apple-gcc42
brew install samtools
brew install trinity

Okay, everything seems good to go. Now I am unzipping the transcriptome output from trimmomatic.