From bbf492680d9cb1807719818ae2c126506b755715 Mon Sep 17 00:00:00 2001 From: Francesco Tabaro Date: Wed, 15 Nov 2023 12:16:15 +0100 Subject: [PATCH] fix: config directory --- config/README.md | 86 ++++++++++++++++++++++++++++++++++++++++++++++ config/config.yaml | 12 +++---- 2 files changed, 92 insertions(+), 6 deletions(-) diff --git a/config/README.md b/config/README.md index e69de29..54cf6e4 100644 --- a/config/README.md +++ b/config/README.md @@ -0,0 +1,86 @@ +# Configuration instructions + +See below for an example config file with explanation of each option. + +```yaml +# config/config.yaml + +# A list of datasets +sequencing_libraries: + - name: GSE13073 + sample_sheet: sample-sheet.csv + trimmomatic: >- + "ILLUMINACLIP:TruSeq3-PE.fa:1:0:15:2 + SLIDINGWINDOW:20:22 + MAXINFO:20:0.6 + LEADING:22 + TRAILING:20 + MINLEN:75" + star: >- + "--seedSearchStartLmax 30 + --outFilterMismatchNoverReadLmax 0.04 + --winAnchorMultimapNmax 40" + bamCoverage: "--binSize 50 --normalizeUsing None" + +# - name: ... +# sample_sheet: ... +# trimmomatic: ... +# star: ... +# bamCoverage: ... + +# +globals: + # path to reads folder + # NB: ./GSE13073 is expected to exist + reads_folder: . + + # path to results folder + results_folder: results/ + + # path to qc + qc_folder: results/qc + + # path to log + log_folder: results/log + + # path to references + references_folder: results/references + + # temp folder + tmp_folder: /tmp + + # path to analysis + analysis_folder: results/analysis + +# genome informations +genome: + # genome label + label: mm10 + + # annotation type + # can be ensembl, mgi, gencode + annotation_type: ensembl + + # URL or path to genome sequence + fasta_url: + + # URL or path to genome annotation file + gtf_url: + + # URL to gtRNAdb zip file + gtrnadb_url: + +# Differential expression analysis parameters +deseq2: + # wd + working_directory: ../../.. + + # DESeq2 test name, can be Wald or LRT + test: Wald + + # name of the column from sample sheet with experimental variable + variable: genotype + + # base level from variable column + reference_level: wt +``` diff --git a/config/config.yaml b/config/config.yaml index 1964971..c2c6f9c 100644 --- a/config/config.yaml +++ b/config/config.yaml @@ -29,21 +29,21 @@ genome: annotation_type: other # URL for genome fasta file - fasta_url: https://s3.embl.de/boulard/GRCh38.p13.chr22.fa?X-Amz-Algorithm=AWS4-HMAC-SHA256&X-Amz-Credential=LJ7ZKXIZLLMABJBTY2T0%2F20230713%2Fus-east-1%2Fs3%2Faws4_request&X-Amz-Date=20230713T121255Z&X-Amz-Expires=604800&X-Amz-Security-Token=eyJhbGciOiJIUzUxMiIsInR5cCI6IkpXVCJ9.eyJhY2Nlc3NLZXkiOiJMSjdaS1hJWkxMTUFCSkJUWTJUMCIsImV4cCI6MTY4OTI1MTczMSwibGRhcFVzZXIiOiJjbj1GcmFuY2VzY28gVGFiYXJvLGNuPVVzZXJzLGRjPWVtYmwsZGM9b3JnIiwibGRhcFVzZXJuYW1lIjoidGFiYXJvIn0.eOb38Xy0mIPYpbPWETE73njY-knhF-JsgbdxmwcxeWAY9x7RkqhbBqi8V0VGGPQ1tHiQ4O1O2moNi5jT3DmA6Q&X-Amz-SignedHeaders=host&versionId=null&X-Amz-Signature=e7cf5df4cf63c169dcd87290c860b0f027757de363101590c77cdf0151fc58fe + fasta_url: https://s3.embl.de/boulard/GRCh38.p13.chr22.fa # URL for genome GTF file - gtf_url: https://s3.embl.de/boulard/gencode.v43.chr22.gtf?X-Amz-Algorithm=AWS4-HMAC-SHA256&X-Amz-Credential=LJ7ZKXIZLLMABJBTY2T0%2F20230713%2Fus-east-1%2Fs3%2Faws4_request&X-Amz-Date=20230713T121233Z&X-Amz-Expires=604800&X-Amz-Security-Token=eyJhbGciOiJIUzUxMiIsInR5cCI6IkpXVCJ9.eyJhY2Nlc3NLZXkiOiJMSjdaS1hJWkxMTUFCSkJUWTJUMCIsImV4cCI6MTY4OTI1MTczMSwibGRhcFVzZXIiOiJjbj1GcmFuY2VzY28gVGFiYXJvLGNuPVVzZXJzLGRjPWVtYmwsZGM9b3JnIiwibGRhcFVzZXJuYW1lIjoidGFiYXJvIn0.eOb38Xy0mIPYpbPWETE73njY-knhF-JsgbdxmwcxeWAY9x7RkqhbBqi8V0VGGPQ1tHiQ4O1O2moNi5jT3DmA6Q&X-Amz-SignedHeaders=host&versionId=null&X-Amz-Signature=c02e1f7dc3d1af80b3d4d215c67d6b4f194e49dbb17c7afb8193d6d5fdb9e9f0 + gtf_url: https://s3.embl.de/boulard/gencode.v43.chr22.gtf # Get a RepeatMasker GTF file from UCSC genome browser rmsk_path: /home/francesco/Projects/snakemake-rnaseq/tests/test-datasets-rnaseq/references/rmsk.gtf.gz - # URL to Gene Ontology association file - gaf_url: http://current.geneontology.org/annotations/mgi.gaf.gz - # URL to GtRNAdb files gtrnadb_url: http://gtrnadb.ucsc.edu/genomes/eukaryota/Hsapi38/hg38-tRNAs.tar.gz #gtrnadb_bed: ../../../data/references/tRNA/mm10-tRNAs.bed deseq2: working_directory: ../../.. - notebook_path: ../../../src/Rmd/deseq2_viz.rmd + #notebook_path: ../../../src/Rmd/deseq2_viz.rmd + test: Wald + reference_level: wt + variable: genotype \ No newline at end of file