forked from EBI-Metagenomics/pipeline-v5
-
Notifications
You must be signed in to change notification settings - Fork 8
/
config.yml
99 lines (80 loc) · 2.78 KB
/
config.yml
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
# Steps to go for
qc_and_merge_step: true
taxonomic_inventory: true
cgc_step: true
reads_functional_annotation: true
assemble: true
# Global
threads: 40
# As a rule of thumb keep that as floor(threads/8) where threads the previous parameter
interproscan_threads: 5
# fastp parameters
detect_adapter_for_pe: false
overrepresentation_analysis: false
min_length_required: 108
force_polyg_tail_trimming:
base_correction: false
qualified_phred_quality:
unqualified_percent_limit:
disable_trim_poly_g:
overlap_len_require:
cut_right: false
correction: false
# Assembly
memory: 0.9
min-contig-len: 500
# Combined Gene Caller // the size is in MB
cgc_chunk_size: 200
# # Taxonomic inference using Diamond and the contigs
# diamond_maxTargetSeqs: 1
# Functional annotation
protein_chunk_size_IPS: 1000000 # 20000000
protein_chunk_size_eggnog: 4000000
protein_chunk_size_hmm: 4000000
# -----------------
# Run wf partially
# -----------------
# The following variables should be considered only in case
# the user has already ran some of the first steps and wants to
# run the following parts of the workflow.
# For example, you have ran the quality contron and the rna prediction steps
# and you would like to go just for the assembly step.
# Currently, because of CWL-limitations (see https://github.com/common-workflow-language/cwl-v1.3/issues/3)
# you need to provide values to some of the following variables even if it is not to be used.
# To that end, we provide pseudo-files under the /test_input folder you may use
# ATTENTION!
# Give full path of your files, NOT relative !
# Mandatory for running any step; merged pre-processed reads (*.merged.fasta)
processed_reads: {
class: File,
format: "edam:format_1929",
path: results/ERR599171.merged.fasta
}
# Mandatory for running the taxonomy inventory step
input_for_motus: {
class: File,
path: workflows/pseudo_files/pseudo.merged.unfiltered.fasta
}
# Mandatory for running the functional annotation steps
# If produced previously from metaGOflow, will have a suffix like: .cmsearch.all.tblout.deoverlapped
maskfile: {
class: File,
path: results/ERR599171.merged.cmsearch.all.tblout.deoverlapped
}
# Mandatory for the functional annotation step
# Give the number of the sequences included in the predicted_faa_from_previous_run file
# You may get this by running:
# grep -c ">" <*..merged_CDS.faa>
count_faa_from_previous_run: 18934897
# Mandatory for the functional annotation step
predicted_faa_from_previous_run: {
class: File,
format: "edam:format_1929",
path: results/ERR599171.merged_CDS.faa
}
# Mandatory for running the assembly step
processed_read_files:
- class: File
path: workflows/pseudo_files/pseudo_1_clean.fastq.trimmed.fasta
- class: File
path: workflows/pseudo_files/pseudo_2_clean.fastq.trimmed.fasta