config.yml

# Steps to go for
qc_and_merge_step: true
taxonomic_inventory: true
cgc_step: true
reads_functional_annotation: true
assemble: true

# Global
threads: 40

# As a rule of thumb keep that as floor(threads/8) where threads the previous parameter
interproscan_threads: 5

# fastp parameters
detect_adapter_for_pe: false
overrepresentation_analysis: false
min_length_required: 108
force_polyg_tail_trimming: 
base_correction: false
qualified_phred_quality: 
unqualified_percent_limit: 
disable_trim_poly_g:
overlap_len_require: 
cut_right: false
correction: false

# Assembly
memory: 0.9
min-contig-len: 500

# Combined Gene Caller // the size is in MB
cgc_chunk_size: 200

# # Taxonomic inference using Diamond and the contigs
# diamond_maxTargetSeqs: 1

# Functional annotation
protein_chunk_size_IPS: 1000000 # 20000000
protein_chunk_size_eggnog: 4000000
protein_chunk_size_hmm: 4000000

# -----------------
# Run wf partially
# -----------------

# The following variables should be considered only in case 
# the user has already ran some of the first steps and wants to 
# run the following parts of the workflow. 
# For example, you have ran the quality contron and the rna prediction steps
# and you would like to go just for the assembly step. 

# Currently, because of CWL-limitations (see https://github.com/common-workflow-language/cwl-v1.3/issues/3)
# you need to provide values to some of the following variables even if it is not to be used.
# To that end, we provide pseudo-files under the /test_input folder you may use 

# ATTENTION! 
# Give full path of your files, NOT relative !

# Mandatory for running any step; merged pre-processed reads (*.merged.fasta)
processed_reads: {
  class: File, 
  format: "edam:format_1929",
  path:  results/ERR599171.merged.fasta
}

# Mandatory for running the taxonomy inventory step
input_for_motus: {
  class: File, 
  path:  workflows/pseudo_files/pseudo.merged.unfiltered.fasta
}


# Mandatory for running the functional annotation steps
# If produced previously from metaGOflow, will have a suffix like: .cmsearch.all.tblout.deoverlapped 
maskfile: {
  class: File, 
  path:  results/ERR599171.merged.cmsearch.all.tblout.deoverlapped
}

# Mandatory for the functional annotation step 
# Give the number of the sequences included in the predicted_faa_from_previous_run file 
# You may get this by running:
# grep -c ">" <*..merged_CDS.faa>
count_faa_from_previous_run: 18934897

# Mandatory for the functional annotation step
predicted_faa_from_previous_run: {
  class: File, 
  format: "edam:format_1929",
  path:  results/ERR599171.merged_CDS.faa
}

# Mandatory for running the assembly step 
processed_read_files: 
  - class: File
    path:  workflows/pseudo_files/pseudo_1_clean.fastq.trimmed.fasta
  - class: File
    path:  workflows/pseudo_files/pseudo_2_clean.fastq.trimmed.fasta