#remove newlines
awk '{if (substr($0,1,1)==">"){print "\n"$0} else printf("%s",$0);p++;}END{print "\n"}' DR_091_Mobl_MOE_assembled.fasta > joined.fasta
awk '{if (substr($0,1,1)==">"){print "\n"$0} else printf("%s",$0);p++;}END{print "\n"}' DR_091_Mobl_MOE_assembled.fasta > joined.fasta
If I try running BLASTALL with my transcriptome data as the database, I get this error:
All the names of the sequences are too similar so I have to rename them--I am just going to rename them by number.
awk '/^>/{print ">" ++i; next}{print}' < DR_091_Mobl_MOE_assembled.fasta > renamed_DR_091_Mobl_MOE_assembled.fasta
#export the path the blast
export PATH=$PATH:/Users/laurelyohe/blast-2.2.22/bin/
#format the database
[blastall 2.2.22] ERROR: SeqPortNew: lcl
awk '/^>/{print ">" ++i; next}{print}' < DR_091_Mobl_MOE_assembled.fasta > renamed_DR_091_Mobl_MOE_assembled.fasta
export PATH=$PATH:/Users/laurelyohe/blast-2.2.22/bin/
#because the transcriptome is bigger than the indices, it will serve as the database
formatdb -i renamed_DR_091_Mobl_MOE_assembled.fasta -o T -p F
#run blast
blastall -p tblastx -d renamed_DR_013_Mored_MOE_assembled.fasta -i OR_VR_indices.fasta -e 1e-06 -v 5 -b 5 -a 2 -o ./DR_013_Mored_MOE_assembled_v_OR_VR_indices.tblastx
#now we need to parse the BLAST script
#now we need to parse the BLAST script
perl ~/Scripts/parse_bls.pl --i DR_091_Mobl_MOE_assembled_v_OR_VR_indices.tblastx > DR_091_Mobl_MOE_assembled_v_OR_VR_indices.tblastx.parsed
#next combine the parsed file with the sequences
~/Scripts/extract_hsps_new.pl -i renamed_DR_091_Mobl_MOE_assembled.fasta -t DR_091_Mobl_MOE_assembled_v_OR_VR_indices.tblastx.parsed -s DR_091_Mormoops_blainvelli
No comments:
Post a Comment