Merge pull request #24 from nf-core/dev

Dev -> Master for v1.2 release
nf-core · Jul 28, 2021 · 8c00805 · 8c00805
2 parents 67b1faa + 62ae163
commit 8c00805
Show file tree

Hide file tree

Showing 44 changed files with 117 additions and 89 deletions.
diff --git a/.editorconfig b/.editorconfig
@@ -11,6 +11,9 @@ indent_style = space
 [*.{yml,yaml}]
 indent_size = 2
 
+[*.json]
+insert_final_newline = unset
+
 # These files are edited and tested upstream in nf-core/modules
 [/modules/nf-core/**]
 charset = unset

diff --git a/.gitattributes b/.gitattributes
diff --git a/.github/.dockstore.yml b/.github/.dockstore.yml
diff --git a/.github/CONTRIBUTING.md b/.github/CONTRIBUTING.md
@@ -19,7 +19,7 @@ If you'd like to write some code for nf-core/fetchngs, the standard workflow is
     * If there isn't one already, please create one so that others know you're working on this
 2. [Fork](https://help.github.com/en/github/getting-started-with-github/fork-a-repo) the [nf-core/fetchngs repository](https://github.com/nf-core/fetchngs) to your GitHub account
 3. Make the necessary changes / additions within your forked repository following [Pipeline conventions](#pipeline-contribution-conventions)
-4. Use `nf-core schema build .` and add any new parameters to the pipeline JSON schema (requires [nf-core tools](https://github.com/nf-core/tools) >= 1.10).
+4. Use `nf-core schema build` and add any new parameters to the pipeline JSON schema (requires [nf-core tools](https://github.com/nf-core/tools) >= 1.10).
 5. Submit a Pull Request against the `dev` branch and wait for the code to be reviewed and merged
 
 If you're not used to this workflow with git, you can start with some [docs from GitHub](https://help.github.com/en/github/collaborating-with-issues-and-pull-requests) or even their [excellent `git` resources](https://try.github.io/).
@@ -69,7 +69,7 @@ If you wish to contribute a new step, please use the following coding standards:
 2. Write the process block (see below).
 3. Define the output channel if needed (see below).
 4. Add any new flags/options to `nextflow.config` with a default (see below).
-5. Add any new flags/options to `nextflow_schema.json` with help text (with `nf-core schema build .`).
+5. Add any new flags/options to `nextflow_schema.json` with help text (with `nf-core schema build`).
 6. Add any new flags/options to the help message (for integer/text parameters, print to help the corresponding `nextflow.config` parameter).
 7. Add sanity checks for all relevant parameters.
 8. Add any new software to the `scrape_software_versions.py` script in `bin/` and the version command to the `scrape_software_versions` process in `main.nf`.
@@ -81,7 +81,7 @@ If you wish to contribute a new step, please use the following coding standards:
 
 Parameters should be initialised / defined with default values in `nextflow.config` under the `params` scope.
 
-Once there, use `nf-core schema build .` to add to `nextflow_schema.json`.
+Once there, use `nf-core schema build` to add to `nextflow_schema.json`.
 
 ### Default processes resource requirements
 

diff --git a/.github/ISSUE_TEMPLATE/bug_report.md b/.github/ISSUE_TEMPLATE/bug_report.md
diff --git a/.github/ISSUE_TEMPLATE/config.yml b/.github/ISSUE_TEMPLATE/config.yml
diff --git a/.github/ISSUE_TEMPLATE/feature_request.md b/.github/ISSUE_TEMPLATE/feature_request.md
diff --git a/.github/PULL_REQUEST_TEMPLATE.md b/.github/PULL_REQUEST_TEMPLATE.md
@@ -18,7 +18,7 @@ Learn more about contributing: [CONTRIBUTING.md](https://github.com/nf-core/fetc
 - [ ] If you've fixed a bug or added code that should be tested, add tests!
     - [ ] If you've added a new tool - have you followed the pipeline conventions in the [contribution docs](https://github.com/nf-core/fetchngs/tree/master/.github/CONTRIBUTING.md)
     - [ ] If necessary, also make a PR on the nf-core/fetchngs _branch_ on the [nf-core/test-datasets](https://github.com/nf-core/test-datasets) repository.
-- [ ] Make sure your code lints (`nf-core lint .`).
+- [ ] Make sure your code lints (`nf-core lint`).
 - [ ] Ensure the test suite passes (`nextflow run . -profile test,docker`).
 - [ ] Usage Documentation in `docs/usage.md` is updated.
 - [ ] Output Documentation in `docs/output.md` is updated.

diff --git a/.github/workflows/awsfulltest.yml b/.github/workflows/awsfulltest.yml
@@ -1,39 +1,29 @@
 name: nf-core AWS full size tests
 # This workflow is triggered on published releases.
-# It can be additionally triggered manually with GitHub actions workflow dispatch.
+# It can be additionally triggered manually with GitHub actions workflow dispatch button.
 # It runs the -profile 'test_full' on AWS batch
 
 on:
   release:
     types: [published]
   workflow_dispatch:
-
-env:
-  AWS_ACCESS_KEY_ID: ${{ secrets.AWS_ACCESS_KEY_ID }}
-  AWS_SECRET_ACCESS_KEY: ${{ secrets.AWS_SECRET_ACCESS_KEY }}
-  TOWER_ACCESS_TOKEN: ${{ secrets.AWS_TOWER_TOKEN }}
-  AWS_JOB_DEFINITION: ${{ secrets.AWS_JOB_DEFINITION }}
-  AWS_JOB_QUEUE: ${{ secrets.AWS_JOB_QUEUE }}
-  AWS_S3_BUCKET: ${{ secrets.AWS_S3_BUCKET }}
-
 jobs:
-  run-awstest:
+  run-tower:
     name: Run AWS full tests
     if: github.repository == 'nf-core/fetchngs'
     runs-on: ubuntu-latest
     steps:
-      - name: Setup Miniconda
-        uses: conda-incubator/setup-miniconda@v2
+      - name: Launch workflow via tower
+        uses: nf-core/tower-action@master
         with:
-          auto-update-conda: true
-          python-version: 3.7
-      - name: Install awscli
-        run: conda install -c conda-forge awscli
-      - name: Start AWS batch job
-        run: |
-          aws batch submit-job \
-            --region eu-west-1 \
-            --job-name nf-core-fetchngs \
-            --job-queue $AWS_JOB_QUEUE \
-            --job-definition $AWS_JOB_DEFINITION \
-            --container-overrides '{"command": ["nf-core/fetchngs", "-r '"${GITHUB_SHA}"' -profile test_full --outdir s3://'"${AWS_S3_BUCKET}"'/fetchngs/results-'"${GITHUB_SHA}"' -w s3://'"${AWS_S3_BUCKET}"'/fetchngs/work-'"${GITHUB_SHA}"' -with-tower"], "environment": [{"name": "TOWER_ACCESS_TOKEN", "value": "'"$TOWER_ACCESS_TOKEN"'"}]}'
+          workspace_id: ${{ secrets.TOWER_WORKSPACE_ID }}
+          bearer_token: ${{ secrets.TOWER_BEARER_TOKEN }}
+          compute_env: ${{ secrets.TOWER_COMPUTE_ENV }}
+          pipeline: ${{ github.repository }}
+          revision: ${{ github.sha }}
+          workdir: s3://${{ secrets.AWS_S3_BUCKET }}/work/fetchngs/work-${{ github.sha }}
+          parameters: |
+            {
+              "outdir": "s3://${{ secrets.AWS_S3_BUCKET }}/fetchngs/results-${{ github.sha }}"
+            }
+          profiles: '[ "test_full", "aws_tower" ]'
diff --git a/.github/workflows/awstest.yml b/.github/workflows/awstest.yml
@@ -1,37 +1,27 @@
 name: nf-core AWS test
-# This workflow is triggered on push to the master branch.
-# It can be additionally triggered manually with GitHub actions workflow dispatch.
-# It runs the -profile 'test' on AWS batch.
+# This workflow can be triggered manually with the GitHub actions workflow dispatch button.
+# It runs the -profile 'test' on AWS batch
 
 on:
   workflow_dispatch:
-
-env:
-  AWS_ACCESS_KEY_ID: ${{ secrets.AWS_ACCESS_KEY_ID }}
-  AWS_SECRET_ACCESS_KEY: ${{ secrets.AWS_SECRET_ACCESS_KEY }}
-  TOWER_ACCESS_TOKEN: ${{ secrets.AWS_TOWER_TOKEN }}
-  AWS_JOB_DEFINITION: ${{ secrets.AWS_JOB_DEFINITION }}
-  AWS_JOB_QUEUE: ${{ secrets.AWS_JOB_QUEUE }}
-  AWS_S3_BUCKET: ${{ secrets.AWS_S3_BUCKET }}
-
 jobs:
-  run-awstest:
+  run-tower:
     name: Run AWS tests
     if: github.repository == 'nf-core/fetchngs'
     runs-on: ubuntu-latest
     steps:
-      - name: Setup Miniconda
-        uses: conda-incubator/setup-miniconda@v2
+      - name: Launch workflow via tower
+        uses: nf-core/tower-action@master
+
         with:
-          auto-update-conda: true
-          python-version: 3.7
-      - name: Install awscli
-        run: conda install -c conda-forge awscli
-      - name: Start AWS batch job
-        run: |
-          aws batch submit-job \
-          --region eu-west-1 \
-          --job-name nf-core-fetchngs \
-          --job-queue $AWS_JOB_QUEUE \
-          --job-definition $AWS_JOB_DEFINITION \
-          --container-overrides '{"command": ["nf-core/fetchngs", "-r '"${GITHUB_SHA}"' -profile test --outdir s3://'"${AWS_S3_BUCKET}"'/fetchngs/results-'"${GITHUB_SHA}"' -w s3://'"${AWS_S3_BUCKET}"'/fetchngs/work-'"${GITHUB_SHA}"' -with-tower"], "environment": [{"name": "TOWER_ACCESS_TOKEN", "value": "'"$TOWER_ACCESS_TOKEN"'"}]}'
+          workspace_id: ${{ secrets.TOWER_WORKSPACE_ID }}
+          bearer_token: ${{ secrets.TOWER_BEARER_TOKEN }}
+          compute_env: ${{ secrets.TOWER_COMPUTE_ENV }}
+          pipeline: ${{ github.repository }}
+          revision: ${{ github.sha }}
+          workdir: s3://${{ secrets.AWS_S3_BUCKET }}/work/fetchngs/work-${{ github.sha }}
+          parameters: |
+            {
+              "outdir": "s3://${{ secrets.AWS_S3_BUCKET }}/fetchngs/results-${{ github.sha }}"
+            }
+          profiles: '[ "test", "aws_tower" ]'
diff --git a/.github/workflows/branch.yml b/.github/workflows/branch.yml
diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
diff --git a/.github/workflows/linting.yml b/.github/workflows/linting.yml
@@ -53,7 +53,7 @@ jobs:
 
       - uses: actions/setup-node@v1
         with:
-          node-version: "10"
+          node-version: '10'
 
       - name: Install editorconfig-checker
         run: npm install -g editorconfig-checker
@@ -127,7 +127,7 @@ jobs:
           GITHUB_COMMENTS_URL: ${{ github.event.pull_request.comments_url }}
           GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
           GITHUB_PR_COMMIT: ${{ github.event.pull_request.head.sha }}
-        run: nf-core -l lint_log.txt lint ${GITHUB_WORKSPACE} --markdown lint_results.md
+        run: nf-core -l lint_log.txt lint --dir ${GITHUB_WORKSPACE} --markdown lint_results.md
 
       - name: Save PR number
         if: ${{ always() }}

diff --git a/.github/workflows/linting_comment.yml b/.github/workflows/linting_comment.yml
diff --git a/.gitignore b/.gitignore
diff --git a/.markdownlint.yml b/.markdownlint.yml
diff --git a/.nf-core-lint.yaml b/.nf-core-lint.yaml
diff --git a/.nf-core.yml b/.nf-core.yml
@@ -0,0 +1,5 @@
+lint:
+  files_unchanged:
+    - .github/CONTRIBUTING.md
+    - assets/sendmail_template.txt
+    - lib/NfcoreTemplate.groovy
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -3,6 +3,13 @@
 The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/)
 and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
 
+## [[1.2](https://github.com/nf-core/fetchngs/releases/tag/1.2)] - 2021-07-28
+
+### Enhancements & fixes
+
+* Updated pipeline template to [nf-core/tools 2.1](https://github.com/nf-core/tools/releases/tag/2.1)
+* [[#26](https://github.com/nf-core/fetchngs/pull/26)] - Update broken EBI API URL
+
 ## [[1.1](https://github.com/nf-core/fetchngs/releases/tag/1.1)] - 2021-06-22
 
 ### Enhancements & fixes

diff --git a/CITATIONS.md b/CITATIONS.md
diff --git a/CODE_OF_CONDUCT.md b/CODE_OF_CONDUCT.md
diff --git a/LICENSE b/LICENSE
diff --git a/README.md b/README.md
@@ -35,7 +35,7 @@ The columns in the auto-created samplesheet can be tailored to be accepted out-o
 
 ## Quick Start
 
-1. Install [`Nextflow`](https://nf-co.re/usage/installation) (`>=21.04.0`)
+1. Install [`Nextflow`](https://www.nextflow.io/docs/latest/getstarted.html#installation) (`>=21.04.0`)
 
 2. Install any of [`Docker`](https://docs.docker.com/engine/installation/), [`Singularity`](https://www.sylabs.io/guides/3.0/user-guide/), [`Podman`](https://podman.io/), [`Shifter`](https://nersc.gitlab.io/development/shifter/how-to-use/) or [`Charliecloud`](https://hpc.github.io/charliecloud/) for full pipeline reproducibility _(please only use [`Conda`](https://conda.io/miniconda.html) as a last resort; see [docs](https://nf-co.re/usage/configuration#basic-configuration-profiles))_
 

diff --git a/assets/email_template.html b/assets/email_template.html
diff --git a/assets/email_template.txt b/assets/email_template.txt
diff --git a/assets/schema_input.json b/assets/schema_input.json
diff --git a/assets/sendmail_template.txt b/assets/sendmail_template.txt
diff --git a/bin/sra_ids_to_runinfo.py b/bin/sra_ids_to_runinfo.py
@@ -79,7 +79,7 @@ def id_to_srx(db_id):
 def id_to_erx(db_id):
     ids = []
     fields = ['run_accession', 'experiment_accession']
-    url = 'http://www.ebi.ac.uk/ena/data/warehouse/filereport?accession={}&result=read_run&fields={}'.format(db_id,','.join(fields))
+    url = 'https://www.ebi.ac.uk/ena/portal/api/filereport?accession={}&result=read_run&fields={}'.format(db_id,','.join(fields))
     for row in csv.DictReader(fetch_url(url), delimiter='\t'):
         ids.append(row['experiment_accession'])
     return ids
@@ -128,7 +128,7 @@ def fetch_sra_runinfo(file_in, file_out, ena_metadata_fields=ENA_METADATA_FIELDS
 
                         ## Resolve/expand to get run identifier from ENA and write to file
                         for id in ids:
-                            url = 'http://www.ebi.ac.uk/ena/data/warehouse/filereport?accession={}&result=read_run&fields={}'.format(id,','.join(ena_metadata_fields))
+                            url = 'https://www.ebi.ac.uk/ena/portal/api/filereport?accession={}&result=read_run&fields={}'.format(id,','.join(ena_metadata_fields))
                             csv_dict = csv.DictReader(fetch_url(url), delimiter='\t')
                             for row in csv_dict:
                                 run_id = row['run_accession']

diff --git a/conf/base.config b/conf/base.config
diff --git a/conf/modules.config b/conf/modules.config
diff --git a/conf/test.config b/conf/test.config
diff --git a/conf/test_full.config b/conf/test_full.config
diff --git a/docs/README.md b/docs/README.md
diff --git a/docs/output.md b/docs/output.md
diff --git a/docs/usage.md b/docs/usage.md
@@ -32,7 +32,7 @@ If you have a GEO accession (found in the data availability section of published
 * Click `SRA Run Selector` at the bottom of the GEO accession page
 * Select the desired samples in the `SRA Run Selector` and then download the `Accession List`
 
-This downloads a text file called `SRR_Acc_List.txt` which can be directly provided to the pipeline e.g. `--public_data_ids SRR_Acc_List.txt`.
+This downloads a text file called `SRR_Acc_List.txt` that can be directly provided to the pipeline e.g. `--input SRR_Acc_List.txt`.
 
 ## Running the pipeline
 
@@ -57,7 +57,7 @@ results         # Finished results (configurable, see below)
 
 When you run the above command, Nextflow automatically pulls the pipeline code from GitHub and stores it as a cached version. When running the pipeline after this, it will always use the cached version if available - even if the pipeline has been updated since. To make sure that you're running the latest version of the pipeline, make sure that you regularly update the cached version of the pipeline:
 
-```bash
+```console
 nextflow pull nf-core/fetchngs
 ```
 
@@ -199,6 +199,46 @@ params {
 }
 ```
 
+### Updating containers
+
+The [Nextflow DSL2](https://www.nextflow.io/docs/latest/dsl2.html) implementation of this pipeline uses one container per process which makes it much easier to maintain and update software dependencies. If for some reason you need to use a different version of a particular tool with the pipeline then you just need to identify the `process` name and override the Nextflow `container` definition for that process using the `withName` declaration. For example, in the [nf-core/viralrecon](https://nf-co.re/viralrecon) pipeline a tool called [Pangolin](https://github.com/cov-lineages/pangolin) has been used during the COVID-19 pandemic to assign lineages to SARS-CoV-2 genome sequenced samples. Given that the lineage assignments change quite frequently it doesn't make sense to re-release the nf-core/viralrecon everytime a new version of Pangolin has been released. However, you can override the default container used by the pipeline by creating a custom config file and passing it as a command-line argument via `-c custom.config`.
+
+1. Check the default version used by the pipeline in the module file for [Pangolin](https://github.com/nf-core/viralrecon/blob/a85d5969f9025409e3618d6c280ef15ce417df65/modules/nf-core/software/pangolin/main.nf#L14-L19)
+2. Find the latest version of the Biocontainer available on [Quay.io](https://quay.io/repository/biocontainers/pangolin?tag=latest&tab=tags)
+3. Create the custom config accordingly:
+
+    * For Docker:
+
+        ```nextflow
+        process {
+            withName: PANGOLIN {
+                container = 'quay.io/biocontainers/pangolin:3.0.5--pyhdfd78af_0'
+            }
+        }
+        ```
+
+    * For Singularity:
+
+        ```nextflow
+        process {
+            withName: PANGOLIN {
+                container = 'https://depot.galaxyproject.org/singularity/pangolin:3.0.5--pyhdfd78af_0'
+            }
+        }
+        ```
+
+    * For Conda:
+
+        ```nextflow
+        process {
+            withName: PANGOLIN {
+                conda = 'bioconda::pangolin=3.0.5'
+            }
+        }
+        ```
+
+> **NB:** If you wish to periodically update individual tool-specific results (e.g. Pangolin) generated by the pipeline then you must ensure to keep the `work/` directory otherwise the `-resume` ability of the pipeline will be compromised and it will restart from scratch.
+
 ### nf-core/configs
 
 In most cases, you will only need to create a custom config as a one-off but if you and others within your organisation are likely to be running nf-core pipelines regularly and need to use the same settings regularly it may be a good idea to request that your custom config file is uploaded to the `nf-core/configs` git repository. Before you do this please can you test that the config file works with your pipeline of choice using the `-c` parameter. You can then create a pull request to the `nf-core/configs` repository with the addition of your config file, associated documentation file (see examples in [`nf-core/configs/docs`](https://github.com/nf-core/configs/tree/master/docs)), and amending [`nfcore_custom.config`](https://github.com/nf-core/configs/blob/master/nfcore_custom.config) to include your custom profile.
@@ -221,6 +261,6 @@ Some HPC setups also allow you to run nextflow within a cluster job submitted yo
 In some cases, the Nextflow Java virtual machines can start to request a large amount of memory.
 We recommend adding the following line to your environment to limit this (typically in `~/.bashrc` or `~./bash_profile`):
 
-```bash
+```console
 NXF_OPTS='-Xms1g -Xmx4g'
 ```
diff --git a/lib/NfcoreTemplate.groovy b/lib/NfcoreTemplate.groovy
@@ -24,17 +24,21 @@ class NfcoreTemplate {
     public static void hostName(workflow, params, log) {
         Map colors = logColours(params.monochrome_logs)
         if (params.hostnames) {
-            def hostname = "hostname".execute().text.trim()
-            params.hostnames.each { prof, hnames ->
-                hnames.each { hname ->
-                    if (hostname.contains(hname) && !workflow.profile.contains(prof)) {
-                        log.info "=${colors.yellow}====================================================${colors.reset}=\n" +
-                            "${colors.yellow}WARN: You are running with `-profile $workflow.profile`\n" +
-                            "      but your machine hostname is ${colors.white}'$hostname'${colors.reset}.\n" +
-                            "      ${colors.yellow_bold}Please use `-profile $prof${colors.reset}`\n" +
-                            "=${colors.yellow}====================================================${colors.reset}="
+            try {
+                def hostname = "hostname".execute().text.trim()
+                params.hostnames.each { prof, hnames ->
+                    hnames.each { hname ->
+                        if (hostname.contains(hname) && !workflow.profile.contains(prof)) {
+                            log.info "=${colors.yellow}====================================================${colors.reset}=\n" +
+                                "${colors.yellow}WARN: You are running with `-profile $workflow.profile`\n" +
+                                "      but your machine hostname is ${colors.white}'$hostname'${colors.reset}.\n" +
+                                "      ${colors.yellow_bold}Please use `-profile $prof${colors.reset}`\n" +
+                                "=${colors.yellow}====================================================${colors.reset}="
+                        }
                     }
                 }
+            } catch (Exception e) {
+                log.warn "[$workflow.manifest.name] Could not determine 'hostname' - skipping check. Reason: ${e.message}."
             }
         }
     }

diff --git a/lib/nfcore_external_java_deps.jar b/lib/nfcore_external_java_deps.jar
diff --git a/main.nf b/main.nf
diff --git a/modules/local/functions.nf b/modules/local/functions.nf
diff --git a/modules/local/get_software_versions.nf b/modules/local/get_software_versions.nf
diff --git a/nextflow.config b/nextflow.config
@@ -147,7 +147,7 @@ manifest {
     description     = 'Pipeline to fetch metadata and raw FastQ files from public databases'
     mainScript      = 'main.nf'
     nextflowVersion = '!>=21.04.0'
-    version         = '1.1'
+    version         = '1.2'
 }
 
 // Function to ensure that resource requirements don't go beyond

diff --git a/nextflow_schema.json b/nextflow_schema.json
@@ -18,7 +18,7 @@
                     "type": "string",
                     "format": "file-path",
                     "mimetype": "text/plain",
-                    "pattern": "\\.txt$",
+                    "pattern": "^\\S+\\.txt$",
                     "schema": "assets/schema_input.json",
                     "fa_icon": "fas fa-file-excel",
                     "description": "File containing SRA/ENA/GEO identifiers one per line to download their associated metadata and FastQ files."

diff --git a/subworkflows/local/input_check.nf b/subworkflows/local/input_check.nf
diff --git a/workflows/fetchngs.nf b/workflows/fetchngs.nf
@@ -147,7 +147,9 @@ workflow FETCHNGS {
 */
 
 workflow.onComplete {
-    NfcoreTemplate.email(workflow, params, summary_params, projectDir, log)
+    if (params.email || params.email_on_fail) {
+        NfcoreTemplate.email(workflow, params, summary_params, projectDir, log)
+    }
     NfcoreTemplate.summary(workflow, params, log)
     WorkflowFetchngs.curateSamplesheetWarn(log)
 }