Merge pull request #51 from sanger-tol/second-round-fixes

Second round fixes
sanger-tol · Jan 19, 2023 · e53f58f · e53f58f
2 parents a9e4458 + 9ce8d92
commit e53f58f
Show file tree

Hide file tree

Showing 83 changed files with 824 additions and 864 deletions.
diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
@@ -21,30 +21,18 @@ jobs:
     strategy:
       matrix:
         # Nextflow versions
-        include:
-          # Test pipeline minimum Nextflow version
-          - NXF_VER: "21.10.3"
-            NXF_EDGE: ""
-          # Test latest edge release of Nextflow
-          - NXF_VER: ""
-            NXF_EDGE: "1"
+        NXF_VER:
+          - "22.04.0"
+          - "latest-everything"
     steps:
       - name: Check out pipeline code
         uses: actions/checkout@v2
 
       - name: Install Nextflow
-        env:
-          NXF_VER: ${{ matrix.NXF_VER }}
-          # Uncomment only if the edge release is more recent than the latest stable release
-          # See https://github.com/nextflow-io/nextflow/issues/2467
-          # NXF_EDGE: ${{ matrix.NXF_EDGE }}
-        run: |
-          wget -qO- get.nextflow.io | bash
-          sudo mv nextflow /usr/local/bin/
+        uses: nf-core/setup-nextflow@v1
+        with:
+          version: "${{ matrix.NXF_VER }}"
 
-      - name: Run pipeline with test data
-        # TODO nf-core: You can customise CI pipeline run tests as required
-        # For example: adding multiple test runs with different parameters
-        # Remember that you can parallelise this by using strategy.matrix
+      - name: Run the full test profile of the pipeline
         run: |
-          nextflow run ${GITHUB_WORKSPACE} -profile test,docker --outdir ./results
+          nextflow run ${GITHUB_WORKSPACE} -profile full_s3_test,docker --outdir ./results_full
diff --git a/.github/workflows/linting.yml b/.github/workflows/linting.yml
@@ -50,7 +50,7 @@ jobs:
 
       - uses: actions/setup-python@v3
         with:
-          python-version: "3.6"
+          python-version: "3.7"
           architecture: "x64"
 
       - name: Install dependencies

diff --git a/.nf-core.yml b/.nf-core.yml
@@ -1 +1,18 @@
 repository_type: pipeline
+lint:
+  files_unchanged:
+    - LICENSE
+    - .gitattributes
+    - .github/CONTRIBUTING.md
+    - .github/ISSUE_TEMPLATE/bug_report.yml
+    - .github/workflows/linting.yml
+    - assets/email_template.txt
+    - assets/sendmail_template.txt
+    - lib/NfcoreTemplate.groovy
+    - lib/NfcoreSchema.groovy
+    - .github/PULL_REQUEST_TEMPLATE.md
+    - .github/workflows/linting_comment.yml
+    - .prettierignore
+  nextflow_config:
+    - manifest.name
+    - manifest.homePage
diff --git a/README.md b/README.md
@@ -1,10 +1,12 @@
 # ![nf-core/treeval](docs/images/nf-core-treeval_logo_light.png#gh-light-mode-only) ![nf-core/treeval](docs/images/nf-core-treeval_logo_dark.png#gh-dark-mode-only)
 
-[![GitHub Actions CI Status](https://github.com/nf-core/treeval/workflows/nf-core%20CI/badge.svg)](https://github.com/nf-core/treeval/actions?query=workflow%3A%22nf-core+CI%22)
-[![GitHub Actions Linting Status](https://github.com/nf-core/treeval/workflows/nf-core%20linting/badge.svg)](https://github.com/nf-core/treeval/actions?query=workflow%3A%22nf-core+linting%22)
+[![GitHub Actions CI Status](https://github.com/sanger-tol/treeval/workflows/nf-core%20CI/badge.svg)](https://github.com/sanger-tol/treeval/actions?query=workflow%3A%22nf-core+CI%22)
+
+<!-- [![GitHub Actions Linting Status](https://github.com/sanger-tol/ensemblgenedownload/workflows/nf-core%20linting/badge.svg)](https://github.com/sanger-tol/ensemblgenedownload/actions?query=workflow%3A%22nf-core+linting%22) -->
+
 [![Cite with Zenodo](http://img.shields.io/badge/DOI-10.5281/zenodo.XXXXXXX-1073c8)](https://doi.org/10.5281/zenodo.XXXXXXX)
 
-[![Nextflow](https://img.shields.io/badge/nextflow%20DSL2-%E2%89%A521.10.3-23aa62.svg)](https://www.nextflow.io/)
+[![Nextflow](https://img.shields.io/badge/nextflow%20DSL2-%E2%89%A522.04.0-23aa62.svg)](https://www.nextflow.io/)
 [![run with conda](http://img.shields.io/badge/run%20with-conda-3EB049?logo=anaconda)](https://docs.conda.io/en/latest/)
 [![run with docker](https://img.shields.io/badge/run%20with-docker-0db7ed?logo=docker)](https://www.docker.com/)
 [![run with singularity](https://img.shields.io/badge/run%20with-singularity-1d355c.svg)](https://sylabs.io/docs/)
@@ -62,7 +64,7 @@ The version 1 pipeline will be made up of the following steps:
 
 ## Quick Start
 
-1. Install [`Nextflow`](https://www.nextflow.io/docs/latest/getstarted.html#installation) (`>=21.10.3`)
+1. Install [`Nextflow`](https://www.nextflow.io/docs/latest/getstarted.html#installation) (`>=22.04.0`)
 
 2. Install any of [`Docker`](https://docs.docker.com/engine/installation/), [`Singularity`](https://www.sylabs.io/guides/3.0/user-guide/) (you can follow [this tutorial](https://singularity-tutorial.github.io/01-installation/)), [`Podman`](https://podman.io/), [`Shifter`](https://nersc.gitlab.io/development/shifter/how-to-use/) or [`Charliecloud`](https://hpc.github.io/charliecloud/) for full pipeline reproducibility _(you can use [`Conda`](https://conda.io/miniconda.html) both to install Nextflow itself and also to manage software within pipelines. Please only use it within pipelines as a last resort; see [docs](https://nf-co.re/usage/configuration#basic-configuration-profiles))_.
 

diff --git a/assets/full_s3_treeval_test.yaml b/assets/full_s3_treeval_test.yaml
@@ -0,0 +1,26 @@
+assembly:
+  sizeClass: "" # S if {genome => 4Gb} else L
+  level: scaffold
+  sample_id: nxOscDoli1
+  classT: nematode
+  asmVersion: PB.a1
+  dbVersion: "1"
+  gevalType: DTOL
+reference_file: https://tolit.cog.sanger.ac.uk/test-data/Gae_host/assembly/DTOL_nxOscDoli1_1_FULL.fa
+assem_reads:
+  pacbio: path
+  hic: path
+  supplementary: path
+alignment:
+  data_dir: https://tolit.cog.sanger.ac.uk/test-data/Gae_host/genomic_data/
+  common_name: "" # For future implementation (adding bee, wasp, ant etc)
+  geneset: "s3_Gae_Host.Gae"
+  #Path should end up looking like "{data_dir}{classT}/{common_name}/csv_data/{geneset}-data.csv"
+self_comp:
+  motif_len: 0
+  mummer_chunk: 10
+synteny:
+  synteny_genome_path: "" # No Syntenic Data for Nematode
+outdir: "NEEDS TESTING"
+intron:
+  size: "50k"
diff --git a/assets/nematode/csv_data/s3_Gae_Host.Gae-data.csv b/assets/nematode/csv_data/s3_Gae_Host.Gae-data.csv
@@ -0,0 +1,5 @@
+org,type,data_file
+Gae_host.Gae,cdna,https://tolit.cog.sanger.ac.uk/test-data/Gae_host/genomic_data/gene_alignment/Gae_host5000cdna.MOD.fa
+Gae_host.Gae,cds,https://tolit.cog.sanger.ac.uk/test-data/Gae_host/genomic_data/gene_alignment/Gae_host12003cds.MOD.fa
+Gae_host.Gae,pep,https://tolit.cog.sanger.ac.uk/test-data/Gae_host/genomic_data/gene_alignment/Gae_host12005pep.MOD.fa
+Gae_host.Gae,rna,https://tolit.cog.sanger.ac.uk/test-data/Gae_host/genomic_data/gene_alignment/Gae_host18005rna.MOD.fa
diff --git a/assets/s3_treeval_test.yaml b/assets/s3_treeval_test.yaml
@@ -0,0 +1,26 @@
+assembly:
+  sizeClass: "" # S if {genome => 4Gb} else L
+  level: scaffold
+  sample_id: nxOscDoli1
+  classT: nematode
+  asmVersion: PB.a1
+  dbVersion: "1"
+  gevalType: DTOL
+reference_file: https://tolit.cog.sanger.ac.uk/test-data/Gae_host/assembly/DTOL_nxOscDoli1_1_FULL.fa
+assem_reads:
+  pacbio: path
+  hic: path
+  supplementary: path
+alignment:
+  data_dir: /lustre/scratch123/tol/teams/grit/dp24/treeval2/treeval/assets/
+  common_name: "" # For future implementation (adding bee, wasp, ant etc)
+  geneset: "s3_Gae_Host.Gae"
+  #Path should end up looking like "{data_dir}{classT}/{common_name}/csv_data/{geneset}-data.csv"
+self_comp:
+  motif_len: 0
+  mummer_chunk: 10
+synteny:
+  synteny_genome_path: "" # No Syntenic Data for Nematode
+outdir: "NEEDS TESTING"
+intron:
+  size: "50k"
diff --git a/assets/samplesheet.csv b/assets/samplesheet.csv
diff --git a/assets/treeval_test.yaml b/assets/treeval_test.yaml
@@ -10,7 +10,7 @@ reference_file: /lustre/scratch123/tol/teams/grit/geval_pipeline/geval_runs/DTOL
 assem_reads:
   pacbio: path
   hic: path
-  supplementary: path|na
+  supplementary: path
 alignment:
   data_dir: /nfs/team135/dp24/treeval_testdata/gene_alignment_data/
   common_name: "" # For future implementation (adding bee, wasp, ant etc)
@@ -22,4 +22,5 @@ self_comp:
 synteny:
   synteny_genome_path: "/nfs/team135/dp24/treeval_testdata/synteny_data"
 outdir: "NEEDS TESTING"
-intron: "50k"
+intron:
+  size: "50k"
diff --git a/conf/digest.config b/conf/digest.config
@@ -1,7 +1,6 @@
 params {
     outdir = "output/"
     publish_dir_mode = "copy"
-    enable_conda = false
     singularity_pull_docker_container = false
 }
 
@@ -14,8 +13,6 @@ process {
 if ("$PROFILE" == "singularity") {
     singularity.enabled = true
     singularity.autoMounts = true
-} else if ("$PROFILE" == "conda") {
-    params.enable_conda = true
 } else {
     docker.enabled = true
     docker.userEmulation = true

diff --git a/conf/full_s3_test.config b/conf/full_s3_test.config
@@ -0,0 +1,24 @@
+/*
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+    Nextflow config file for running minimal tests
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+    Defines input files and everything required to run a fast and simple pipeline test.
+
+    Use as follows:
+        nextflow run nf-core/treeval -profile s3_test,<docker/singularity> --outdir <OUTDIR>
+
+----------------------------------------------------------------------------------------
+*/
+
+params {
+    config_profile_name        = 's3_test'
+    config_profile_description = 'Minimal Test Data for GitHub Actions test'
+
+    // Limit resources so that this can run on GitHub Actions
+    max_cpus   = 2
+    max_memory = '6.GB'
+    max_time   = '6.h'
+
+    // Input data
+    input  = "${projectDir}/assets/full_s3_treeval_test.yaml"
+}
diff --git a/conf/modules.config b/conf/modules.config
@@ -26,6 +26,10 @@ process {
         ]
     }
 
+    withName: BEDTOOLS_SORT {
+        ext.prefix  = { "${meta.id}.sorted" }
+    }
+
     withName: MINIPROT_ALIGN {
         ext.args    = "-u --gff -j 1"
     }
@@ -39,11 +43,6 @@ process {
         ext.prefix      = { "${meta.id}" }
     }
 
-    withName: '.*:.*:GENE_ALIGNMENT:UCSC_BEDTOBIGBED' {
-        ext.args    = { " -type=bed6+2 -extraIndex=name,geneSymbol" }
-        ext.prefix  = { "${meta.id}-${meta.type}"}
-    }
-
     withName: '.*:.*:SELFCOMP:UCSC_BEDTOBIGBED' {
         ext.args    = { " -type=bed3+6 -extraIndex=name,qStart,qEnd" }
         ext.prefix  = { "${meta.id}" }
@@ -54,12 +53,12 @@ process {
         ext.prefix  = { "${meta.id}_synteny_${reference.getName().tokenize('.')[0]}" }
     }
 
-    withName: '.*:.*:.*:NUC_ALIGNMENTS:MINIMAP2_ALIGN' {
-        ext.args    = "-ax splice"
+    withName: '.*:.*:NUC_ALIGNMENTS:MINIMAP2_ALIGN' {
+        ext.args    = {"-ax splice ${meta.intron_size ? "-G ${meta.intron_size}" : ""}"}
         ext.prefix  = { "${meta.id}_alignment_${reference.getName().tokenize('.')[0]}" }
     }
 
     withName : MUMMER {
         ext.args = "-n -b -c -L -l 400"
     }
-}
+}
diff --git a/conf/s3_test.config b/conf/s3_test.config
@@ -0,0 +1,24 @@
+/*
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+    Nextflow config file for running minimal tests
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+    Defines input files and everything required to run a fast and simple pipeline test.
+
+    Use as follows:
+        nextflow run nf-core/treeval -profile s3_test,<docker/singularity> --outdir <OUTDIR>
+
+----------------------------------------------------------------------------------------
+*/
+
+params {
+    config_profile_name        = 's3_test'
+    config_profile_description = 'Minimal Test Data for GitHub Actions test'
+
+    // Limit resources so that this can run on GitHub Actions
+    max_cpus   = 2
+    max_memory = '6.GB'
+    max_time   = '6.h'
+
+    // Input data
+    input  = "${projectDir}/assets/s3_treeval_test.yaml"
+}
diff --git a/conf/selfcomp.config b/conf/selfcomp.config
@@ -1,7 +1,6 @@
 params {
     outdir = "output/"
     publish_dir_mode = "copy"
-    enable_conda = false
     singularity_pull_docker_container = false
 }
 
@@ -14,8 +13,6 @@ process {
 if ("$PROFILE" == "singularity") {
     singularity.enabled = true
     singularity.autoMounts = true
-} else if ("$PROFILE" == "conda") {
-    params.enable_conda = true
 } else {
     docker.enabled = true
     docker.userEmulation = true

diff --git a/conf/test.config b/conf/test.config
@@ -1,28 +1,23 @@
 /*
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-    Nextflow config file for running minimal tests
+    Nextflow config file for running full-size tests
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-    Defines input files and everything required to run a fast and simple pipeline test.
+    Defines input files and everything required to run a full size pipeline test.
 
     Use as follows:
-        nextflow run nf-core/treeval -profile test,<docker/singularity> --outdir <OUTDIR>
+        nextflow run nf-core/treeval -profile test_full,<docker/singularity> --outdir <OUTDIR>
 
 ----------------------------------------------------------------------------------------
 */
 
 params {
-    config_profile_name        = 'Test profile'
-    config_profile_description = 'Minimal test dataset to check pipeline function'
+    config_profile_name        = 'Full test profile'
+    config_profile_description = 'Full test dataset to check pipeline function'
 
-    // Limit resources so that this can run on GitHub Actions
-    max_cpus   = 2
-    max_memory = '6.GB'
-    max_time   = '6.h'
-
-    // Input data
-    // TODO nf-core: Specify the paths to your test data on nf-core/test-datasets
+    // Input data for full size test
+    // TODO nf-core: Specify the paths to your full test data ( on nf-core/test-datasets or directly in repositories, e.g. SRA)
     // TODO nf-core: Give any required params for the test so that command line flags are not needed
-    input  = 'https://raw.githubusercontent.com/nf-core/test-datasets/viralrecon/samplesheet/samplesheet_test_illumina_amplicon.csv'
+    input = 'https://raw.githubusercontent.com/nf-core/test-datasets/viralrecon/samplesheet/samplesheet_full_illumina_amplicon.csv'
 
     // Genome references
     genome = 'R64-1-1'

diff --git a/conf/test_genealignment.config b/conf/test_genealignment.config
@@ -14,6 +14,6 @@ params {
     config_profile_name         = 'test_genealignment'
     config_profile_description  = 'Minimal data set for gene alignments to input fasta'
 
-    input                       = './assets/treeval_test.yaml'
+    input                       = "${projectDir}/assets/treeval_test.yaml"
     outdir			            = './testing/'
 }
diff --git a/conf/test_selfcomp.config b/conf/test_selfcomp.config
@@ -12,6 +12,6 @@ params {
     config_profile_name        = 'test_selfcomp'
     config_profile_description = 'Minimal test dataset to check selfcomp pipeline function'
 
-    input                      = './assets/treeval_test.yaml'
+    input                      = "${projectDir}/assets/treeval_test.yaml"
     outdir			           = './testing/'
 }
diff --git a/conf/test_synteny.config b/conf/test_synteny.config
@@ -14,6 +14,6 @@ params {
     config_profile_name        = 'test_synteny'
     config_profile_description = 'Minimal test dataset to check syntenypipeline function'
 
-    input                      = "./assets/treeval_test.yaml"
+    input                      = "${projectDir}/assets/treeval_test.yaml"
     outdir			            = './testing/'
 }
diff --git a/lib/WorkflowMain.groovy b/lib/WorkflowMain.groovy
@@ -64,11 +64,6 @@ class WorkflowMain {
         // Check that a -profile or Nextflow config has been provided to run the pipeline
         NfcoreTemplate.checkConfigProvided(workflow, log)
 
-        // Check that conda channels are set-up correctly
-        if (params.enable_conda) {
-            Utils.checkCondaChannels(log)
-        }
-
         // Check AWS batch settings
         NfcoreTemplate.awsBatch(workflow, params)