Skip to content

Commit

Permalink
Merge branch 'master' of github.com:jlanga/smsk_trinotate
Browse files Browse the repository at this point in the history
  • Loading branch information
jlanga committed Feb 13, 2018
2 parents 1029177 + bd773a3 commit e043c62
Show file tree
Hide file tree
Showing 4 changed files with 153 additions and 58 deletions.
4 changes: 2 additions & 2 deletions Snakefile
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,6 @@ rule all:
# transdecoder + "transdecoder.pep.fai"
# trinotate + "blastx.tsv",
# trinotate + "blastp.tsv",
# trinotate + "hmmscan.tsv"
# trinotate + "init.txt"
# trinotate + "hmmscan.tsv",
# trinotate + "init.txt",
trinotate + "trinotate.tsv"
2 changes: 1 addition & 1 deletion src/config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -19,5 +19,5 @@ trinotate:
evalue: 1e-5
pfam_cutoff: DNC
rnammer:
rnammer_path: /usr/bin/software/rnammer_v1.2/rnammer # Modify this
rnammer_path: ./src/rnammer-1.2/rnammer # Modify this
org_type: euk # arc|bac|euk
118 changes: 64 additions & 54 deletions src/snakefiles/trinotate.py
Original file line number Diff line number Diff line change
Expand Up @@ -204,20 +204,21 @@
# input:
# pep = transdecoder + "transdecoder.pep"
# output:
# tsv = trinotate + "signalp.tsv"
# tsv = touch(trinotate + "signalp.tsv")
# log:
# trinotate + "signalp.log"
# benchmark:
# trinotate + "signalp.json"
# shell:
# "signalp "
# "./src/signalp-4.1/signalp "
# "-f short "
# "-n {output.tsv} "
# "{input.pep} "
# "2> {log}"
#
#
#
# "2> {log} 1>&2 "




# rule trinotate_tmhmm:
# """
# Predict transmembrane regions
Expand All @@ -231,35 +232,42 @@
# benchmark:
# trinotate + "tmhmm.json"
# shell:
# "tmhmm "
# "./src/tmhmm-2.0c/bin/tmhmm "
# "--short "
# "< transdecoder.pep "
# "> tmhmm.out "
# "2> {log}"
#
#
#
# "< {input.pep} "
# "> {output.tsv} "
# "2> {log} && "
# "rm -rf TMHMM_*"



# rule trinotate_rnammer:
# """
# Identify rRNAs
# """
# input:
# assembly = raw + "assembly.fasta",
# output:
# tsv = trinotate + "rnammer.tsv"
# gff = trinotate + "rnammer.gff"
# "assembly.fasta.rnammer.gff"
# params:
# rnammer_path = config["trinotate"]["rnammer"]["rnammer_path"],
# org_type = config["trinotate"]["rnammer"]["org_type"]
# org_type = config["trinotate"]["rnammer"]["org_type"],
# gff_tmp = "assembly.fasta.rnammer.gff"
# log:
# trinotate + "rnammer.log"
# benchmark:
# trinotate + "rnammer.json"
# shell:
# "RnammerTranscriptome.pl "
# "./src/Trinotate-3.0.2/util/rnammer_support/RnammerTranscriptome.pl "
# "--transcriptome {input.assembly} "
# "--path_to_rnammer {params.rnammer_path} "
# "--path_to_rnammer ./src/rnammer-1.2/rnammer "
# "--org_type {params.org_type} "
# "2> {log}"
# "2> {log} 1>&2 ; "
# "mv {params.gff_tmp} {output.gff} ; "
# "rm tmp.superscaff.rnammer.gff "
# "transcriptSuperScaffold.bed "
# "transcriptSuperScaffold.fasta"



Expand All @@ -278,14 +286,39 @@
"EMBL_dat_to_Trinotate_sqlite_resourceDB.pl "
"--sqlite {output.sqlite} "
"--create "
"2> {log}"
"2> {log} 1>&2"



rule trinotate_fill:
rule trinotate_init:
"""
Initialize db with genes, transcripts and proteins
"""
input:
sqlite = trinotate + "trinotate.sqlite",
is_created = trinotate + "create.txt",
g2t = raw + "gene_to_trans_map.tsv",
assembly = raw + "assembly.fasta",
proteome = transdecoder + "transdecoder.pep"
output:
touch(trinotate + "init.txt")
log:
trinotate + "init.log"
benchmark:
trinotate + "init.json"
shell:
"Trinotate {input.sqlite} init "
"--gene_trans_map {input.g2t} "
"--transcript_fasta {input.assembly} "
"--transdecoder_pep {input.proteome} "
"2> {log} 1>&2"



rule trinotate_fill:
input:
sqlite = trinotate + "trinotate.sqlite",
is_init = trinotate + "init.txt",
eggnog = db + "NOG.annotations.tsv.bulk_load",
go = db + "go-basic.obo.tab",
uniprot_index = db + "trinotate.UniprotIndex",
Expand All @@ -294,9 +327,9 @@
output:
is_filled = touch(trinotate + "fill.txt")
log:
db + "load_trinotate_db.log"
db + "fill.log"
benchmark:
db + "load_trinotate_db.json"
db + "fill.json"
shell:
"EMBL_dat_to_Trinotate_sqlite_resourceDB.pl "
"--sqlite {input.sqlite} "
Expand All @@ -305,62 +338,39 @@
"--uniprot_index {input.uniprot_index} "
"--taxonomy_index {input.taxonomy_index} "
"--pfam {input.pfam} "
"> {log} 2>&1"

"2> {log} 1>&2"


rule trinotate_init:
"""
Initialize db with genes, transcripts and proteins
"""
input:
sqlite = trinotate + "trinotate.sqlite",
is_copied = trinotate + "fill.txt",
g2t = raw + "gene_to_trans_map.tsv",
assembly = raw + "assembly.fasta",
proteome = transdecoder + "transdecoder.pep"
output:
touch(trinotate + "init.txt")
log:
trinotate + "init.log"
benchmark:
trinotate + "init.josn"
shell:
"Trinotate {input.sqlite} init "
"--gene_trans_map {input.g2t} "
"--transcript_fasta {input.assembly} "
"--transdecoder_pep {input.proteome} "
"2> {log}"


rule trinotate_load:
input:
sqlite = trinotate + "trinotate.sqlite",
is_initialized = trinotate + "init.txt",
is_filled = trinotate + "fill.txt",
blastx = trinotate + "blastx.tsv",
blastp = trinotate + "blastp.tsv",
pfam = trinotate + "hmmscan.tsv",
# signalp = trinotate + "signalp.tsv",
# tmhmm = trinotate + "tmhmm.tsv",
# rnammer = trinotate + "rnammer.tsv",
# rnammer = trinotate + "rnammer.gff",
output:
touch(trinotate + "load.txt")
log:
trinotate + "load.log"
benchmark:
trinotate + "load.log"
trinotate + "load.json"
shell:
"Trinotate {input.sqlite} "
"LOAD_swissprot_blastp {input.blastp} 2> {log} 1>&2; "
"Trinotate {input.sqlite} "
"LOAD_swissprot_blastx {input.blastx} 2>> {log} 1>&2; "
"Trinotate {input.sqlite} "
"LOAD_pfam {input.pfam} 2>> {log} 1>&2; "
# "Trinotate {input.sqlite} "
# "LOAD_signalp {input.signalp} 2>> {log} 1>&2; "
# "Trinotate {input.sqlite} "
# "LOAD_tmhmm {input.tmhmm} 2>> {log} 1>&2; "
# "Trinotate {input.sqlite} "
# "LOAD_signalp {input.signalp} 2>> {log} 1>&2; "
"Trinotate {input.sqlite} "
"LOAD_swissprot_blastx {input.blastx} 2>> {log} 1>&2; "
# "Trinotate {input.sqlite} "
# "LOAD_rnammer {input.rnammer} 2>> {log} 1>&2; "


Expand All @@ -382,6 +392,6 @@
shell:
"Trinotate {input.sqlite} report "
"-E {params.evalue} "
"--pfam_cutoff {params.pfam_cutoff}"
"--pfam_cutoff {params.pfam_cutoff} "
"> {output} "
"2> {log}"
87 changes: 86 additions & 1 deletion transcript.fa
Original file line number Diff line number Diff line change
Expand Up @@ -97,4 +97,89 @@ ATATATCAAAGAGTATTTTATTTACTGGGCTTTAGTGCTGAATCTACTGATGTGGGCTAG
CGTCACTGACTTGACCATATGAATACAATCTGGTGCAAATATGTGAAATATTGTAGAGAA
TAATGAAGTGCTTTGATGTGTAAAAATGTTTGAGATACATTTATACTGTGTGTGGCATAT
AAATTAATACAAGAAAACAAAAAAAAA
>TRINITY_DN66968_c0_g1_i1 len=202 path=[180:0-201] [-1, 180, -2]
>TRINITY_DN99999_c0_g1_i1
TACGTGGTTTCCTTCATGCATTTGCCCGCTTACCTGGATGATCAATTAATATTAGATAAA
TTAGAAGGCTGGGGAGTATTTCCCATAACAAAAATTAAAAGAAGGGTATATCCGGGCACA
GAAATAGAAGATGGAACTCGATATCTCAAAGTGAGATTCCCCAGAGAAGTGATATCTCTC
CCTTACAGCACAAGACTGGAAACGGCAGAAGGTCAGCAATATTTTAGGGTGATGCACAGC
CACCAGGTTAAGACTTGTAGGCTGTGCATGAGCCCTGACCATATGGTCAAAGATTGCCCA
GACTTTAAATGCCATAAATGTGAGGAAAGGGGACATTTTGCCAGAGACTGCGATGCTATT
AAGTGCCCGGACTGCCAGAACTATTTAAGTAAGTGTGAATGTTGGATGGAGGAAGAGGAG
GAGGAGGATGAGATCCAGGTGAGTGGGCAAATGCATGAAGGAAACAGTGAAAAGGAAAGT
AATGAAGAGGAACAAACAACGACACAAACAATACAAACTACAACAGAGGAAATAACATTG
AAGGAGGAGAATGCAAAAAAAGAAAAGGAAAAAGAAACAGAAACAGACAGTCAACAAATA
GAGCATGAAGAGGAGGTAGCATGGACACCAATGGACATAACTTCTAGCTTCAAAAACGTT
TTGGATGTAATTGAAAAAGAGGATCTTAAAGAACAAAGCAAGGGAACGGACAGAGAGACT
GGAAAACATGAGGAGGAAGTAGAAAAAGACAAACTTGAGAAAAGACAAACAAGACGATCG
GCAAAAGTTTTATCAAAATTAGAAACTGCAAGAAAAAAGGGTTTGAAAAAAGGACAACTG
AAAAGCCACAACAGATATGATTCTTTGAGAGGTTTGGGAGAAGAAGAGGACTAAGATGAC
GGTTTTTATTTCCTTTTTCCTTCTTTTAATGGTTTTAAATTGCGTGTCTTTTAATGCAAG
AGGTTTAATGGACTTAAGAAAATTTAATAATGTAAAGGAAAAATGTAAAAGAGAAGAAAT
AATTATTTTACAAGAAACAAATTGGAAAAATGAGGTGATGGACATATATAAAAAAGAATG
GGATGGGGACTTTTATTATAGTAATGGAGATACGAAAGCTGGGAGAGGAGTAGCAATTTT
AATAAAGAAGAATGCACTGCAAATGAGTAAAGTAATATATAAAGACAAACAAGGAAAATG
TATGATCTTAGAAATAAAATATGAAGGGAAAGATATTATTTTAGTTAATGTGCATGCACC
AAATGAAGAGAGTGAAAAGAAAAGTTTTTTTAACATATTAAGAAGGTTTTTAAAAAACTA
TAAACAAATAATAGTATGTGGGGATTTTAACACTGTTTTTAGCAGACAGGACATAACAGA
AGGTATGGTTTTTAAATCAGACACGGGAAGGAAAGAACTAAAATCACTAATAGAGGAAAA
AGGAATGATAGACATTTGGAGAGAAAGAAATGGGAAGAAAAAGGAGTTTTCTAGAAGACA
AATAGTAGGGAATTTTGTAAACCAATCAAGAATAGACTATGTGTTATGCACAAGAAATAT
AGAAATTTATATAGAAAAAATAAGGTACGATGAAACTGTTTTAAGTGACCATAAATTTGT
ATTTTTTAATTTTAATACAGATGAAATACAAAGAGGCCCAGGGGTATGGACATTAAATAG
TGATATTTTAAATAACGAAGACTATGTTAAAAAAATAACAGAAATAATTGAAAAGGAAAA
AGTAAACCAGATTTATAATGAAGACAAAAGACTATGGTGGGAAAATGTCAAATTTCTGAT
TAAAAAATCAACATTAACATTCTGTAGGATAATACAAAAGAATAAAAGACACAAAGAAAA
AACAATCAAAGAAAACTTAGAAAAAGAACTAGGAAAAAATGAAAAAGACATTCAAAAAAT
AAAAGAAATGGAGGGAAAACTGAAAGAAATAGAAGAAAAGAAATATGAAGGAGCTAGACT
AAGAAGCAAAGCAAAATATGTAGCAGAAGGAGAAAAATGCACAAAGTTCTTCTTTGATTT
AGAAAGACAAAAAGGAAAAGCAGAAACAATAAAGATAATACAAGGAGCAAAAGGAGAAAG
CATAGAAGGAAATGAAGAAATTTTAAAAGAAATCAAAAAATACTATGAGGAGTTATTTAA
AACACAGGGAGTTGATGAAGTACAAATGGCAAAATTATTAAAACAGATAAAAACAAAAGT
AGATGAAGAGGATAAAAAAGAATGCGACCAAGAAATAGGGGAAGAAGAAATAAGAAAAGC
AATAGAAAGCTTGAACAAAAAGAAAAGTCCAGGAATAGATGGTTTAAATAGTGAATTTTA
TGTATGTTTTAAAGAAATTTTAATTCCAATTTTAATTGACGTTTTTAAGGAAATACTGCA
AAAAGAAGAACTAAATGAAAGAATGGGAATGGGATTAATGAAATTAATACACAAAAAAGG
AGAAAAGACATTGTTAAAAAATTACAGACCAATCACAATGTTAAACACAGATTTGAAGAT
TTTAACAAAAATTTTAGCAAATAGATTAAAAGAAGTGATGCCAAAAATAATTAAAACAAC
ACAGGCCTATGCGATAAAAGGAAGAGACATAGCAGACATAACAATGAGCATAAAAGACAT
CATAGAATATATGAAGGAGAAAAAAGAGGAAGGATATATAATAAGTCTGGATTTTGAAAA
AGCTTTTGATAGAGTTGAACACCAGTTTTTATTCAAAGTACTCAAAAAGTTTGGTTTTGG
AGAAGTTTTTAGAAAATGGATAAAGATTTTGTATAAGGGTATTTTAACAAAAGTTAAATG
TAATGGCTTTTTAACAGAATGTTTTAAAATAACAAGATCGATAAGGCAAGGATGTCCTCT
GTCAGCACTTTTATATTCACTTGTGGCAGAACCCCTGGGCTTAGCTATCAACGAAGAAGA
AAAAATAAAAGGAATTGAGATTGAAGAAAATAAAGTAAATAAAAAAATGTTTCAATATGC
AGATGACACTACATTAATAGTAAAAGGAAAAGAGAGTGTGAAAGAAGCCATGAAAATAGT
ACAACAATTTTGTAAAGGATCGGGGAGTAAAGTAAATGAAGACAAAACGGTTTATATGAA
GTTTGGAAAGGAAACAGATTTAGCAGAATGCACCAATTTCAAAGAAGTAGAAGAAATCAA
GATTTTAGGGGTTTTATTGGGGAAAGATGCCAGAAAAGCGAGAGATAAGATGTGGGAAGG
TTTTTTAACAGATATAGAAAGGAGGTTAAATTACTGGAAACTAAGAACACTAACATTAAA
AGGAAAAGTTTTGATTTTAAATGTTTTAATGGAGTCTAAATTGTGGCATGTTTTATATGT
TTTAGAAATGCCAATGTGGATAGAAAAGAGGTTGAAAAAATGTTTTACTGATTTTTTATG
GGGAGGTAAGCCACCAAGGATTGCTTTTAATACAGTCGTAGGGGAAATAGACAAGGGTGG
TCTGGGTTTAATAGATGTAGAACAAAGAAAAAATAGTTTAAGAGTGAAAAGAATAAAGAA
GTATCTAGAAAAAGAAAACAAAGCAGAGTGGAAAAAAACAATGAAATATTTTTTAAACAA
ATGTGGTAATTTTAACATGGGAGATGGGATTTTATGGATGAAAACAAAAGCTTGGATGAC
AGAAAACTTACCTGAATTTTATAGAGAAATTTTAAGTGCGTGGGGGAATTTTTTAAAACA
AGTGGAATATAGTCCACATGGAAGGGAAAACATTTTAAACCAACCTCTTTTCTTGAACAA
TAATATTTTAAGTCAAGGGAAGGTTTTATACTATAAGAAATGGATAGAAGTTGGGATTTT
AAAAGTGCGGGACATTTTATATGAATTTAAAGAAGGCTTTTTAACTGAACAATATGTTAT
AGACACAATGGAGGAGGCGAAAGAGGAATACAACAGAAAGGAAATTGAGAAAAATCTTGA
CATAATTAAACAGGCGATTCCAAAAGAATGGATAAAAAGCATAGAAAATTTTGAAAAAGA
AAAAGAAACAAAAGAAGTGTATGTGAAAACAGGTGAAAAAATATGCAATTTTAATGAATG
TACTGTGAAAAATATTTATTGTTTTTTTAGAGAGAATGTTTTTAAAGAACCAACAGCAAA
CAAATACTGGATAGAGAAATACAAAAATGTAAAAGCAAATGAAATATGGGGAAACATGAA
AGGAAGGTATGTAGAAACAAAAGTCGAATGTCTAGAATTTTTAATAAGGCACAAAGCAAT
CTTTTCTGATGTCATTTTAAACAAGATAGGGATGGAACAAAGTGGAATGTGTAAAGTATG
TCAAAAAGAAGAAGAGGGTTTTTTACACATGTTTTTATATTGTCAAGAATTGGAAGGTTT
TTTAAAAGATTGTAAAGTTTTAATTAAAGGACTACTTGGAGACTGGGATGAAAATGAAAC
AGAATGGAACAGAGTAGTGATGTTGGGATGGAATAAGAACAATGAAAATAAGAAAATAGT
AAACCTATGTATAATGCTGATGAAAAATGCAATGTGGGAAAGGAGAATTGTGGCAAAAAA
AGAGAAAATGGTGATGGATGTATGGGGAATTTTTAAGAGGAAAATGGAGAGATATGTGGA
AAAACTGTATATATATCATAAAAATGAGAACATTTTGAGTGAGCTGCACAAAATTCTGAC
GGACAAAGCATGTCAAGTTTTTAAAGAAATGAACTGGAAACTACCACATTTTTAATGTTT
TTGAATATGAAATGATTTTTTAATGTCTGTTTTATTATGATTTACTGTAAAAGATTGTAA
TTGAAATTGGTTCAATAAAAAAAAAAAAAAAAAAAAAAAAAAAAGAAATGCCCGATCTCA
TCTGAACTCGGAAGTAAAGCAGGGTCGGGCCTGGTAAGTACTTGGATGGGAGACCGCCTG
GGAATACCAGGTGCTGTAAGCTTTTCGAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA
AAAAAGAAATGCCCGATCTCATCTGAATTCGGAAGTAAAGCAGGGTCGGGCCTGGTTAGT
ACT

0 comments on commit e043c62

Please # to comment.