diff --git a/.devcontainer/devcontainer.json b/.devcontainer/devcontainer.json index 4ecfbfe..6af5b50 100644 --- a/.devcontainer/devcontainer.json +++ b/.devcontainer/devcontainer.json @@ -22,7 +22,11 @@ }, // Add the IDs of extensions you want installed when the container is created. - "extensions": ["ms-python.python", "ms-python.vscode-pylance", "nf-core.nf-core-extensionpack"] + "extensions": [ + "ms-python.python", + "ms-python.vscode-pylance", + "nf-core.nf-core-extensionpack" + ] } } } diff --git a/.editorconfig b/.editorconfig index b6b3190..9b99008 100644 --- a/.editorconfig +++ b/.editorconfig @@ -22,3 +22,11 @@ indent_size = unset [/assets/email*] indent_size = unset + +# ignore Readme +[README.md] +indent_style = unset + +# ignore python +[*.{py}] +indent_style = unset diff --git a/.github/.dockstore.yml b/.github/.dockstore.yml index 191fabd..b9d3387 100644 --- a/.github/.dockstore.yml +++ b/.github/.dockstore.yml @@ -1,6 +1,6 @@ # Dockstore config version, not pipeline version version: 1.2 workflows: - - subclass: nfl - primaryDescriptorPath: /nextflow.config - publish: True + - subclass: nfl + primaryDescriptorPath: /nextflow.config + publish: True diff --git a/.github/CONTRIBUTING.md b/.github/CONTRIBUTING.md index 2c80c00..9a480ec 100644 --- a/.github/CONTRIBUTING.md +++ b/.github/CONTRIBUTING.md @@ -53,9 +53,9 @@ These tests are run both with the latest available version of `Nextflow` and als :warning: Only in the unlikely and regretful event of a release happening with a bug. -- On your own fork, make a new branch `patch` based on `upstream/master`. -- Fix the bug, and bump version (X.Y.Z+1). -- A PR should be made on `master` from patch to directly this particular bug. +- On your own fork, make a new branch `patch` based on `upstream/master`. +- Fix the bug, and bump version (X.Y.Z+1). +- A PR should be made on `master` from patch to directly this particular bug. ## Getting help @@ -96,8 +96,8 @@ The process resources can be passed on to the tool dynamically within the proces Please use the following naming schemes, to make it easy to understand what is going where. -- initial process channel: `ch_output_from_` -- intermediate and terminal channels: `ch__for_` +- initial process channel: `ch_output_from_` +- intermediate and terminal channels: `ch__for_` ### Nextflow version bumping @@ -113,11 +113,11 @@ This repo includes a devcontainer configuration which will create a GitHub Codes To get started: -- Open the repo in [Codespaces](https://github.com/nf-core/omicsgenetraitassociation/codespaces) -- Tools installed - - nf-core - - Nextflow +- Open the repo in [Codespaces](https://github.com/nf-core/omicsgenetraitassociation/codespaces) +- Tools installed + - nf-core + - Nextflow Devcontainer specs: -- [DevContainer config](.devcontainer/devcontainer.json) +- [DevContainer config](.devcontainer/devcontainer.json) diff --git a/.github/ISSUE_TEMPLATE/bug_report.yml b/.github/ISSUE_TEMPLATE/bug_report.yml index 6c6f783..4714694 100644 --- a/.github/ISSUE_TEMPLATE/bug_report.yml +++ b/.github/ISSUE_TEMPLATE/bug_report.yml @@ -2,49 +2,49 @@ name: Bug report description: Report something that is broken or incorrect labels: bug body: - - type: markdown - attributes: - value: | - Before you post this issue, please check the documentation: + - type: markdown + attributes: + value: | + Before you post this issue, please check the documentation: - - [nf-core website: troubleshooting](https://nf-co.re/usage/troubleshooting) - - [nf-core/omicsgenetraitassociation pipeline documentation](https://nf-co.re/omicsgenetraitassociation/usage) + - [nf-core website: troubleshooting](https://nf-co.re/usage/troubleshooting) + - [nf-core/omicsgenetraitassociation pipeline documentation](https://nf-co.re/omicsgenetraitassociation/usage) - - type: textarea - id: description - attributes: - label: Description of the bug - description: A clear and concise description of what the bug is. - validations: - required: true + - type: textarea + id: description + attributes: + label: Description of the bug + description: A clear and concise description of what the bug is. + validations: + required: true - - type: textarea - id: command_used - attributes: - label: Command used and terminal output - description: Steps to reproduce the behaviour. Please paste the command you used to launch the pipeline and the output from your terminal. - render: console - placeholder: | - $ nextflow run ... + - type: textarea + id: command_used + attributes: + label: Command used and terminal output + description: Steps to reproduce the behaviour. Please paste the command you used to launch the pipeline and the output from your terminal. + render: console + placeholder: | + $ nextflow run ... - Some output where something broke + Some output where something broke - - type: textarea - id: files - attributes: - label: Relevant files - description: | - Please drag and drop the relevant files here. Create a `.zip` archive if the extension is not allowed. - Your verbose log file `.nextflow.log` is often useful _(this is a hidden file in the directory where you launched the pipeline)_ as well as custom Nextflow configuration files. + - type: textarea + id: files + attributes: + label: Relevant files + description: | + Please drag and drop the relevant files here. Create a `.zip` archive if the extension is not allowed. + Your verbose log file `.nextflow.log` is often useful _(this is a hidden file in the directory where you launched the pipeline)_ as well as custom Nextflow configuration files. - - type: textarea - id: system - attributes: - label: System information - description: | - * Nextflow version _(eg. 23.04.0)_ - * Hardware _(eg. HPC, Desktop, Cloud)_ - * Executor _(eg. slurm, local, awsbatch)_ - * Container engine: _(e.g. Docker, Singularity, Conda, Podman, Shifter, Charliecloud, or Apptainer)_ - * OS _(eg. CentOS Linux, macOS, Linux Mint)_ - * Version of nf-core/omicsgenetraitassociation _(eg. 1.1, 1.5, 1.8.2)_ + - type: textarea + id: system + attributes: + label: System information + description: | + * Nextflow version _(eg. 23.04.0)_ + * Hardware _(eg. HPC, Desktop, Cloud)_ + * Executor _(eg. slurm, local, awsbatch)_ + * Container engine: _(e.g. Docker, Singularity, Conda, Podman, Shifter, Charliecloud, or Apptainer)_ + * OS _(eg. CentOS Linux, macOS, Linux Mint)_ + * Version of nf-core/omicsgenetraitassociation _(eg. 1.1, 1.5, 1.8.2)_ diff --git a/.github/ISSUE_TEMPLATE/config.yml b/.github/ISSUE_TEMPLATE/config.yml index 80016e8..f8b33e4 100644 --- a/.github/ISSUE_TEMPLATE/config.yml +++ b/.github/ISSUE_TEMPLATE/config.yml @@ -1,7 +1,7 @@ contact_links: - - name: Join nf-core - url: https://nf-co.re/join - about: Please join the nf-core community here - - name: "Slack #omicsgenetraitassociation channel" - url: https://nfcore.slack.com/channels/omicsgenetraitassociation - about: Discussion about the nf-core/omicsgenetraitassociation pipeline + - name: Join nf-core + url: https://nf-co.re/join + about: Please join the nf-core community here + - name: "Slack #omicsgenetraitassociation channel" + url: https://nfcore.slack.com/channels/omicsgenetraitassociation + about: Discussion about the nf-core/omicsgenetraitassociation pipeline diff --git a/.github/ISSUE_TEMPLATE/feature_request.yml b/.github/ISSUE_TEMPLATE/feature_request.yml index 86c7556..b2a12a1 100644 --- a/.github/ISSUE_TEMPLATE/feature_request.yml +++ b/.github/ISSUE_TEMPLATE/feature_request.yml @@ -2,10 +2,10 @@ name: Feature request description: Suggest an idea for the nf-core/omicsgenetraitassociation pipeline labels: enhancement body: - - type: textarea - id: description - attributes: - label: Description of feature - description: Please describe your suggestion for a new feature. It might help to describe a problem or use case, plus any alternatives that you have considered. - validations: - required: true + - type: textarea + id: description + attributes: + label: Description of feature + description: Please describe your suggestion for a new feature. It might help to describe a problem or use case, plus any alternatives that you have considered. + validations: + required: true diff --git a/.github/PULL_REQUEST_TEMPLATE.md b/.github/PULL_REQUEST_TEMPLATE.md index 3c7c096..15b4759 100644 --- a/.github/PULL_REQUEST_TEMPLATE.md +++ b/.github/PULL_REQUEST_TEMPLATE.md @@ -13,14 +13,14 @@ Learn more about contributing: [CONTRIBUTING.md](https://github.com/nf-core/omic ## PR checklist -- [ ] This comment contains a description of changes (with reason). -- [ ] If you've fixed a bug or added code that should be tested, add tests! -- [ ] If you've added a new tool - have you followed the pipeline conventions in the [contribution docs](https://github.com/nf-core/omicsgenetraitassociation/tree/master/.github/CONTRIBUTING.md) -- [ ] If necessary, also make a PR on the nf-core/omicsgenetraitassociation _branch_ on the [nf-core/test-datasets](https://github.com/nf-core/test-datasets) repository. -- [ ] Make sure your code lints (`nf-core lint`). -- [ ] Ensure the test suite passes (`nextflow run . -profile test,docker --outdir `). -- [ ] Check for unexpected warnings in debug mode (`nextflow run . -profile debug,test,docker --outdir `). -- [ ] Usage Documentation in `docs/usage.md` is updated. -- [ ] Output Documentation in `docs/output.md` is updated. -- [ ] `CHANGELOG.md` is updated. -- [ ] `README.md` is updated (including new tool citations and authors/contributors). +- [ ] This comment contains a description of changes (with reason). +- [ ] If you've fixed a bug or added code that should be tested, add tests! +- [ ] If you've added a new tool - have you followed the pipeline conventions in the [contribution docs](https://github.com/nf-core/omicsgenetraitassociation/tree/master/.github/CONTRIBUTING.md) +- [ ] If necessary, also make a PR on the nf-core/omicsgenetraitassociation _branch_ on the [nf-core/test-datasets](https://github.com/nf-core/test-datasets) repository. +- [ ] Make sure your code lints (`nf-core lint`). +- [ ] Ensure the test suite passes (`nextflow run . -profile test,docker --outdir `). +- [ ] Check for unexpected warnings in debug mode (`nextflow run . -profile debug,test,docker --outdir `). +- [ ] Usage Documentation in `docs/usage.md` is updated. +- [ ] Output Documentation in `docs/output.md` is updated. +- [ ] `CHANGELOG.md` is updated. +- [ ] `README.md` is updated (including new tool citations and authors/contributors). diff --git a/.github/workflows/awsfulltest.yml b/.github/workflows/awsfulltest.yml index ef1b956..af85af0 100644 --- a/.github/workflows/awsfulltest.yml +++ b/.github/workflows/awsfulltest.yml @@ -4,36 +4,36 @@ name: nf-core AWS full size tests # It runs the -profile 'test_full' on AWS batch on: - release: - types: [published] - workflow_dispatch: + release: + types: [published] + workflow_dispatch: jobs: - run-tower: - name: Run AWS full tests - if: github.repository == 'nf-core/omicsgenetraitassociation' - runs-on: ubuntu-latest - steps: - - name: Launch workflow via tower - uses: seqeralabs/action-tower-launch@v2 - # TODO nf-core: You can customise AWS full pipeline tests as required - # Add full size test data (but still relatively small datasets for few samples) - # on the `test_full.config` test runs with only one set of parameters - with: - workspace_id: ${{ secrets.TOWER_WORKSPACE_ID }} - access_token: ${{ secrets.TOWER_ACCESS_TOKEN }} - compute_env: ${{ secrets.TOWER_COMPUTE_ENV }} - revision: ${{ github.sha }} - workdir: s3://${{ secrets.AWS_S3_BUCKET }}/work/omicsgenetraitassociation/work-${{ github.sha }} - parameters: | - { - "hook_url": "${{ secrets.MEGATESTS_ALERTS_SLACK_HOOK_URL }}", - "outdir": "s3://${{ secrets.AWS_S3_BUCKET }}/omicsgenetraitassociation/results-${{ github.sha }}" - } - profiles: test_full + run-tower: + name: Run AWS full tests + if: github.repository == 'nf-core/omicsgenetraitassociation' + runs-on: ubuntu-latest + steps: + - name: Launch workflow via tower + uses: seqeralabs/action-tower-launch@v2 + # TODO nf-core: You can customise AWS full pipeline tests as required + # Add full size test data (but still relatively small datasets for few samples) + # on the `test_full.config` test runs with only one set of parameters + with: + workspace_id: ${{ secrets.TOWER_WORKSPACE_ID }} + access_token: ${{ secrets.TOWER_ACCESS_TOKEN }} + compute_env: ${{ secrets.TOWER_COMPUTE_ENV }} + revision: ${{ github.sha }} + workdir: s3://${{ secrets.AWS_S3_BUCKET }}/work/omicsgenetraitassociation/work-${{ github.sha }} + parameters: | + { + "hook_url": "${{ secrets.MEGATESTS_ALERTS_SLACK_HOOK_URL }}", + "outdir": "s3://${{ secrets.AWS_S3_BUCKET }}/omicsgenetraitassociation/results-${{ github.sha }}" + } + profiles: test_full - - uses: actions/upload-artifact@v3 - with: - name: Tower debug log file - path: | - tower_action_*.log - tower_action_*.json + - uses: actions/upload-artifact@v4 + with: + name: Tower debug log file + path: | + tower_action_*.log + tower_action_*.json diff --git a/.github/workflows/awstest.yml b/.github/workflows/awstest.yml index 7f344b7..a6d8950 100644 --- a/.github/workflows/awstest.yml +++ b/.github/workflows/awstest.yml @@ -3,31 +3,31 @@ name: nf-core AWS test # It runs the -profile 'test' on AWS batch on: - workflow_dispatch: + workflow_dispatch: jobs: - run-tower: - name: Run AWS tests - if: github.repository == 'nf-core/omicsgenetraitassociation' - runs-on: ubuntu-latest - steps: - # Launch workflow using Tower CLI tool action - - name: Launch workflow via tower - uses: seqeralabs/action-tower-launch@v2 - with: - workspace_id: ${{ secrets.TOWER_WORKSPACE_ID }} - access_token: ${{ secrets.TOWER_ACCESS_TOKEN }} - compute_env: ${{ secrets.TOWER_COMPUTE_ENV }} - revision: ${{ github.sha }} - workdir: s3://${{ secrets.AWS_S3_BUCKET }}/work/omicsgenetraitassociation/work-${{ github.sha }} - parameters: | - { - "outdir": "s3://${{ secrets.AWS_S3_BUCKET }}/omicsgenetraitassociation/results-test-${{ github.sha }}" - } - profiles: test + run-tower: + name: Run AWS tests + if: github.repository == 'nf-core/omicsgenetraitassociation' + runs-on: ubuntu-latest + steps: + # Launch workflow using Tower CLI tool action + - name: Launch workflow via tower + uses: seqeralabs/action-tower-launch@v2 + with: + workspace_id: ${{ secrets.TOWER_WORKSPACE_ID }} + access_token: ${{ secrets.TOWER_ACCESS_TOKEN }} + compute_env: ${{ secrets.TOWER_COMPUTE_ENV }} + revision: ${{ github.sha }} + workdir: s3://${{ secrets.AWS_S3_BUCKET }}/work/omicsgenetraitassociation/work-${{ github.sha }} + parameters: | + { + "outdir": "s3://${{ secrets.AWS_S3_BUCKET }}/omicsgenetraitassociation/results-test-${{ github.sha }}" + } + profiles: test - - uses: actions/upload-artifact@v3 - with: - name: Tower debug log file - path: | - tower_action_*.log - tower_action_*.json + - uses: actions/upload-artifact@v4 + with: + name: Tower debug log file + path: | + tower_action_*.log + tower_action_*.json diff --git a/.github/workflows/branch.yml b/.github/workflows/branch.yml index d90f69d..f9bef5d 100644 --- a/.github/workflows/branch.yml +++ b/.github/workflows/branch.yml @@ -2,43 +2,43 @@ name: nf-core branch protection # This workflow is triggered on PRs to master branch on the repository # It fails when someone tries to make a PR against the nf-core `master` branch instead of `dev` on: - pull_request_target: - branches: [master] + pull_request_target: + branches: [master] jobs: - test: - runs-on: ubuntu-latest - steps: - # PRs to the nf-core repo master branch are only ok if coming from the nf-core repo `dev` or any `patch` branches - - name: Check PRs - if: github.repository == 'nf-core/omicsgenetraitassociation' - run: | - { [[ ${{github.event.pull_request.head.repo.full_name }} == nf-core/omicsgenetraitassociation ]] && [[ $GITHUB_HEAD_REF == "dev" ]]; } || [[ $GITHUB_HEAD_REF == "patch" ]] - - # If the above check failed, post a comment on the PR explaining the failure - # NOTE - this doesn't currently work if the PR is coming from a fork, due to limitations in GitHub actions secrets - - name: Post PR comment - if: failure() - uses: mshick/add-pr-comment@v1 - with: - message: | - ## This PR is against the `master` branch :x: - - * Do not close this PR - * Click _Edit_ and change the `base` to `dev` - * This CI test will remain failed until you push a new commit - - --- - - Hi @${{ github.event.pull_request.user.login }}, - - It looks like this pull-request is has been made against the [${{github.event.pull_request.head.repo.full_name }}](https://github.com/${{github.event.pull_request.head.repo.full_name }}) `master` branch. - The `master` branch on nf-core repositories should always contain code from the latest release. - Because of this, PRs to `master` are only allowed if they come from the [${{github.event.pull_request.head.repo.full_name }}](https://github.com/${{github.event.pull_request.head.repo.full_name }}) `dev` branch. - - You do not need to close this PR, you can change the target branch to `dev` by clicking the _"Edit"_ button at the top of this page. - Note that even after this, the test will continue to show as failing until you push a new commit. - - Thanks again for your contribution! - repo-token: ${{ secrets.GITHUB_TOKEN }} - allow-repeats: false + test: + runs-on: ubuntu-latest + steps: + # PRs to the nf-core repo master branch are only ok if coming from the nf-core repo `dev` or any `patch` branches + - name: Check PRs + if: github.repository == 'nf-core/omicsgenetraitassociation' + run: | + { [[ ${{github.event.pull_request.head.repo.full_name }} == nf-core/omicsgenetraitassociation ]] && [[ $GITHUB_HEAD_REF == "dev" ]]; } || [[ $GITHUB_HEAD_REF == "patch" ]] + + # If the above check failed, post a comment on the PR explaining the failure + # NOTE - this doesn't currently work if the PR is coming from a fork, due to limitations in GitHub actions secrets + - name: Post PR comment + if: failure() + uses: mshick/add-pr-comment@b8f338c590a895d50bcbfa6c5859251edc8952fc # v2 + with: + message: | + ## This PR is against the `master` branch :x: + + * Do not close this PR + * Click _Edit_ and change the `base` to `dev` + * This CI test will remain failed until you push a new commit + + --- + + Hi @${{ github.event.pull_request.user.login }}, + + It looks like this pull-request is has been made against the [${{github.event.pull_request.head.repo.full_name }}](https://github.com/${{github.event.pull_request.head.repo.full_name }}) `master` branch. + The `master` branch on nf-core repositories should always contain code from the latest release. + Because of this, PRs to `master` are only allowed if they come from the [${{github.event.pull_request.head.repo.full_name }}](https://github.com/${{github.event.pull_request.head.repo.full_name }}) `dev` branch. + + You do not need to close this PR, you can change the target branch to `dev` by clicking the _"Edit"_ button at the top of this page. + Note that even after this, the test will continue to show as failing until you push a new commit. + + Thanks again for your contribution! + repo-token: ${{ secrets.GITHUB_TOKEN }} + allow-repeats: false diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 23ea2ad..1b48caa 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -1,43 +1,42 @@ name: nf-core CI # This workflow runs the pipeline with the minimal test dataset to check that it completes without any syntax errors on: - push: - branches: - - dev - pull_request: - release: - types: [published] + push: + branches: + - dev + pull_request: + release: + types: [published] env: - NXF_ANSI_LOG: false + NXF_ANSI_LOG: false concurrency: - group: "${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }}" - cancel-in-progress: true + group: "${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }}" + cancel-in-progress: true jobs: - test: - name: Run pipeline with test data - # Only run on push if this is the nf-core dev branch (merged PRs) - if: "${{ github.event_name != 'push' || (github.event_name == 'push' && github.repository == 'nf-core/omicsgenetraitassociation') }}" - runs-on: ubuntu-latest - strategy: - matrix: - NXF_VER: - - "23.04.0" - - "latest-everything" - steps: - - name: Check out pipeline code - uses: actions/checkout@v4 + test: + name: Run pipeline with test data + # Only run on push if this is the nf-core dev branch (merged PRs) + if: "${{ github.event_name != 'push' || (github.event_name == 'push' && github.repository == 'nf-core/omicsgenetraitassociation') }}" + runs-on: ubuntu-latest + strategy: + matrix: + NXF_VER: + - "23.04.0" + - "latest-everything" + steps: + - name: Check out pipeline code + uses: actions/checkout@v4 - - name: Install Nextflow - uses: nf-core/setup-nextflow@v1 - with: - version: "${{ matrix.NXF_VER }}" + - name: Install Nextflow + uses: nf-core/setup-nextflow@v1 + with: + version: "${{ matrix.NXF_VER }}" - - name: Run pipeline with test data - # TODO nf-core: You can customise CI pipeline run tests as required - # For example: adding multiple test runs with different parameters - # Remember that you can parallelise this by using strategy.matrix - run: | - nextflow run ${GITHUB_WORKSPACE} -profile test,docker --outdir ./results + - name: Run pipeline with test data + # For example: adding multiple test runs with different parameters + # Remember that you can parallelise this by using strategy.matrix + run: | + nextflow run ${GITHUB_WORKSPACE} -profile test,docker --outdir ./results diff --git a/.github/workflows/clean-up.yml b/.github/workflows/clean-up.yml index 694e90e..e5f0e6b 100644 --- a/.github/workflows/clean-up.yml +++ b/.github/workflows/clean-up.yml @@ -1,24 +1,24 @@ name: "Close user-tagged issues and PRs" on: - schedule: - - cron: "0 0 * * 0" # Once a week + schedule: + - cron: "0 0 * * 0" # Once a week jobs: - clean-up: - runs-on: ubuntu-latest - permissions: - issues: write - pull-requests: write - steps: - - uses: actions/stale@v7 - with: - stale-issue-message: "This issue has been tagged as awaiting-changes or awaiting-feedback by an nf-core contributor. Remove stale label or add a comment otherwise this issue will be closed in 20 days." - stale-pr-message: "This PR has been tagged as awaiting-changes or awaiting-feedback by an nf-core contributor. Remove stale label or add a comment if it is still useful." - close-issue-message: "This issue was closed because it has been tagged as awaiting-changes or awaiting-feedback by an nf-core contributor and then staled for 20 days with no activity." - days-before-stale: 30 - days-before-close: 20 - days-before-pr-close: -1 - any-of-labels: "awaiting-changes,awaiting-feedback" - exempt-issue-labels: "WIP" - exempt-pr-labels: "WIP" - repo-token: "${{ secrets.GITHUB_TOKEN }}" + clean-up: + runs-on: ubuntu-latest + permissions: + issues: write + pull-requests: write + steps: + - uses: actions/stale@v9 + with: + stale-issue-message: "This issue has been tagged as awaiting-changes or awaiting-feedback by an nf-core contributor. Remove stale label or add a comment otherwise this issue will be closed in 20 days." + stale-pr-message: "This PR has been tagged as awaiting-changes or awaiting-feedback by an nf-core contributor. Remove stale label or add a comment if it is still useful." + close-issue-message: "This issue was closed because it has been tagged as awaiting-changes or awaiting-feedback by an nf-core contributor and then staled for 20 days with no activity." + days-before-stale: 30 + days-before-close: 20 + days-before-pr-close: -1 + any-of-labels: "awaiting-changes,awaiting-feedback" + exempt-issue-labels: "WIP" + exempt-pr-labels: "WIP" + repo-token: "${{ secrets.GITHUB_TOKEN }}" diff --git a/.github/workflows/download_pipeline.yml b/.github/workflows/download_pipeline.yml new file mode 100644 index 0000000..2547eb6 --- /dev/null +++ b/.github/workflows/download_pipeline.yml @@ -0,0 +1,67 @@ +name: Test successful pipeline download with 'nf-core download' + +# Run the workflow when: +# - dispatched manually +# - when a PR is opened or reopened to master branch +# - the head branch of the pull request is updated, i.e. if fixes for a release are pushed last minute to dev. +on: + workflow_dispatch: + pull_request: + types: + - opened + branches: + - master + pull_request_target: + branches: + - master + +env: + NXF_ANSI_LOG: false + +jobs: + download: + runs-on: ubuntu-latest + steps: + - name: Install Nextflow + uses: nf-core/setup-nextflow@v1 + + - uses: actions/setup-python@v5 + with: + python-version: "3.11" + architecture: "x64" + - uses: eWaterCycle/setup-singularity@v7 + with: + singularity-version: 3.8.3 + + - name: Install dependencies + run: | + python -m pip install --upgrade pip + pip install git+https://github.com/nf-core/tools.git@dev + + - name: Get the repository name and current branch set as environment variable + run: | + echo "REPO_LOWERCASE=${GITHUB_REPOSITORY,,}" >> ${GITHUB_ENV} + echo "REPOTITLE_LOWERCASE=$(basename ${GITHUB_REPOSITORY,,})" >> ${GITHUB_ENV} + echo "REPO_BRANCH=${GITHUB_REF#refs/heads/}" >> ${GITHUB_ENV} + + - name: Download the pipeline + env: + NXF_SINGULARITY_CACHEDIR: ./ + run: | + nf-core download ${{ env.REPO_LOWERCASE }} \ + --revision ${{ env.REPO_BRANCH }} \ + --outdir ./${{ env.REPOTITLE_LOWERCASE }} \ + --compress "none" \ + --container-system 'singularity' \ + --container-library "quay.io" -l "docker.io" -l "ghcr.io" \ + --container-cache-utilisation 'amend' \ + --download-configuration + + - name: Inspect download + run: tree ./${{ env.REPOTITLE_LOWERCASE }} + + - name: Run the downloaded pipeline + env: + NXF_SINGULARITY_CACHEDIR: ./ + NXF_SINGULARITY_HOME_MOUNT: true + run: nextflow run ./${{ env.REPOTITLE_LOWERCASE }}/$( sed 's/\W/_/g' <<< ${{ env.REPO_BRANCH }}) -stub -profile test,singularity --outdir ./results diff --git a/.github/workflows/fix-linting.yml b/.github/workflows/fix-linting.yml index 9b25d06..5a89221 100644 --- a/.github/workflows/fix-linting.yml +++ b/.github/workflows/fix-linting.yml @@ -1,55 +1,89 @@ name: Fix linting from a comment on: - issue_comment: - types: [created] + issue_comment: + types: [created] jobs: - deploy: - # Only run if comment is on a PR with the main repo, and if it contains the magic keywords - if: > - contains(github.event.comment.html_url, '/pull/') && - contains(github.event.comment.body, '@nf-core-bot fix linting') && - github.repository == 'nf-core/omicsgenetraitassociation' - runs-on: ubuntu-latest - steps: - # Use the @nf-core-bot token to check out so we can push later - - uses: actions/checkout@v4 - with: - token: ${{ secrets.nf_core_bot_auth_token }} - - # Action runs on the issue comment, so we don't get the PR by default - # Use the gh cli to check out the PR - - name: Checkout Pull Request - run: gh pr checkout ${{ github.event.issue.number }} - env: - GITHUB_TOKEN: ${{ secrets.nf_core_bot_auth_token }} - - - uses: actions/setup-node@v4 - - - name: Install Prettier - run: npm install -g prettier @prettier/plugin-php - - # Check that we actually need to fix something - - name: Run 'prettier --check' - id: prettier_status - run: | - if prettier --check ${GITHUB_WORKSPACE}; then - echo "result=pass" >> $GITHUB_OUTPUT - else - echo "result=fail" >> $GITHUB_OUTPUT - fi - - - name: Run 'prettier --write' - if: steps.prettier_status.outputs.result == 'fail' - run: prettier --write ${GITHUB_WORKSPACE} - - - name: Commit & push changes - if: steps.prettier_status.outputs.result == 'fail' - run: | - git config user.email "core@nf-co.re" - git config user.name "nf-core-bot" - git config push.default upstream - git add . - git status - git commit -m "[automated] Fix linting with Prettier" - git push + fix-linting: + # Only run if comment is on a PR with the main repo, and if it contains the magic keywords + if: > + contains(github.event.comment.html_url, '/pull/') && + contains(github.event.comment.body, '@nf-core-bot fix linting') && + github.repository == 'nf-core/omicsgenetraitassociation' + runs-on: ubuntu-latest + steps: + # Use the @nf-core-bot token to check out so we can push later + - uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11 # v4 + with: + token: ${{ secrets.nf_core_bot_auth_token }} + + # indication that the linting is being fixed + - name: React on comment + uses: peter-evans/create-or-update-comment@71345be0265236311c031f5c7866368bd1eff043 # v4 + with: + comment-id: ${{ github.event.comment.id }} + reactions: eyes + + # Action runs on the issue comment, so we don't get the PR by default + # Use the gh cli to check out the PR + - name: Checkout Pull Request + run: gh pr checkout ${{ github.event.issue.number }} + env: + GITHUB_TOKEN: ${{ secrets.nf_core_bot_auth_token }} + + # Install and run pre-commit + - uses: actions/setup-python@0a5c61591373683505ea898e09a3ea4f39ef2b9c # v5 + with: + python-version: 3.11 + + - name: Install pre-commit + run: pip install pre-commit + + - name: Run pre-commit + id: pre-commit + run: pre-commit run --all-files + continue-on-error: true + + # indication that the linting has finished + - name: react if linting finished succesfully + if: steps.pre-commit.outcome == 'success' + uses: peter-evans/create-or-update-comment@71345be0265236311c031f5c7866368bd1eff043 # v4 + with: + comment-id: ${{ github.event.comment.id }} + reactions: "+1" + + - name: Commit & push changes + id: commit-and-push + if: steps.pre-commit.outcome == 'failure' + run: | + git config user.email "core@nf-co.re" + git config user.name "nf-core-bot" + git config push.default upstream + git add . + git status + git commit -m "[automated] Fix code linting" + git push + + - name: react if linting errors were fixed + id: react-if-fixed + if: steps.commit-and-push.outcome == 'success' + uses: peter-evans/create-or-update-comment@71345be0265236311c031f5c7866368bd1eff043 # v4 + with: + comment-id: ${{ github.event.comment.id }} + reactions: hooray + + - name: react if linting errors were not fixed + if: steps.commit-and-push.outcome == 'failure' + uses: peter-evans/create-or-update-comment@71345be0265236311c031f5c7866368bd1eff043 # v4 + with: + comment-id: ${{ github.event.comment.id }} + reactions: confused + + - name: react if linting errors were not fixed + if: steps.commit-and-push.outcome == 'failure' + uses: peter-evans/create-or-update-comment@71345be0265236311c031f5c7866368bd1eff043 # v4 + with: + issue-number: ${{ github.event.issue.number }} + body: | + @${{ github.actor }} I tried to fix the linting errors, but it didn't work. Please fix them manually. + See [CI log](https://github.com/nf-core/omicsgenetraitassociation/actions/runs/${{ github.run_id }}) for more details. diff --git a/.github/workflows/linting.yml b/.github/workflows/linting.yml index 905c58e..f792588 100644 --- a/.github/workflows/linting.yml +++ b/.github/workflows/linting.yml @@ -3,106 +3,67 @@ name: nf-core linting # It runs the `nf-core lint` and markdown lint tests to ensure # that the code meets the nf-core guidelines. on: - push: - branches: - - dev - pull_request: - release: - types: [published] + push: + branches: + - dev + pull_request: + release: + types: [published] jobs: - EditorConfig: - runs-on: ubuntu-latest - steps: - - uses: actions/checkout@v4 - - - uses: actions/setup-node@v4 - - - name: Install editorconfig-checker - run: npm install -g editorconfig-checker - - - name: Run ECLint check - run: editorconfig-checker -exclude README.md $(find .* -type f | grep -v '.git\|.py\|.md\|json\|yml\|yaml\|html\|css\|work\|.nextflow\|build\|nf_core.egg-info\|log.txt\|Makefile') - - Prettier: - runs-on: ubuntu-latest - steps: - - uses: actions/checkout@v4 - - - uses: actions/setup-node@v4 - - - name: Install Prettier - run: npm install -g prettier - - - name: Run Prettier --check - run: prettier --check ${GITHUB_WORKSPACE} - - PythonBlack: - runs-on: ubuntu-latest - steps: - - uses: actions/checkout@v4 - - - name: Check code lints with Black - uses: psf/black@stable - - # If the above check failed, post a comment on the PR explaining the failure - - name: Post PR comment - if: failure() - uses: mshick/add-pr-comment@v1 - with: - message: | - ## Python linting (`black`) is failing - - To keep the code consistent with lots of contributors, we run automated code consistency checks. - To fix this CI test, please run: - - * Install [`black`](https://black.readthedocs.io/en/stable/): `pip install black` - * Fix formatting errors in your pipeline: `black .` - - Once you push these changes the test should pass, and you can hide this comment :+1: - - We highly recommend setting up Black in your code editor so that this formatting is done automatically on save. Ask about it on Slack for help! - - Thanks again for your contribution! - repo-token: ${{ secrets.GITHUB_TOKEN }} - allow-repeats: false - - nf-core: - runs-on: ubuntu-latest - steps: - - name: Check out pipeline code - uses: actions/checkout@v4 - - - name: Install Nextflow - uses: nf-core/setup-nextflow@v1 - - - uses: actions/setup-python@v4 - with: - python-version: "3.11" - architecture: "x64" - - - name: Install dependencies - run: | - python -m pip install --upgrade pip - pip install nf-core - - - name: Run nf-core lint - env: - GITHUB_COMMENTS_URL: ${{ github.event.pull_request.comments_url }} - GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} - GITHUB_PR_COMMIT: ${{ github.event.pull_request.head.sha }} - run: nf-core -l lint_log.txt lint --dir ${GITHUB_WORKSPACE} --markdown lint_results.md - - - name: Save PR number - if: ${{ always() }} - run: echo ${{ github.event.pull_request.number }} > PR_number.txt - - - name: Upload linting log file artifact - if: ${{ always() }} - uses: actions/upload-artifact@v3 - with: - name: linting-logs - path: | - lint_log.txt - lint_results.md - PR_number.txt + pre-commit: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11 # v4 + + - name: Set up Python 3.11 + uses: actions/setup-python@0a5c61591373683505ea898e09a3ea4f39ef2b9c # v5 + with: + python-version: 3.11 + cache: "pip" + + - name: Install pre-commit + run: pip install pre-commit + + - name: Run pre-commit + run: pre-commit run --all-files + + nf-core: + runs-on: ubuntu-latest + steps: + - name: Check out pipeline code + uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11 # v4 + + - name: Install Nextflow + uses: nf-core/setup-nextflow@b9f764e8ba5c76b712ace14ecbfcef0e40ae2dd8 # v1 + + - uses: actions/setup-python@0a5c61591373683505ea898e09a3ea4f39ef2b9c # v5 + with: + python-version: "3.11" + architecture: "x64" + + - name: Install dependencies + run: | + python -m pip install --upgrade pip + pip install nf-core + + - name: Run nf-core lint + env: + GITHUB_COMMENTS_URL: ${{ github.event.pull_request.comments_url }} + GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} + GITHUB_PR_COMMIT: ${{ github.event.pull_request.head.sha }} + run: nf-core -l lint_log.txt lint --dir ${GITHUB_WORKSPACE} --markdown lint_results.md + + - name: Save PR number + if: ${{ always() }} + run: echo ${{ github.event.pull_request.number }} > PR_number.txt + + - name: Upload linting log file artifact + if: ${{ always() }} + uses: actions/upload-artifact@5d5d22a31266ced268874388b861e4b58bb5c2f3 # v4 + with: + name: linting-logs + path: | + lint_log.txt + lint_results.md + PR_number.txt diff --git a/.github/workflows/linting_comment.yml b/.github/workflows/linting_comment.yml index 0bbcd30..ff69c2b 100644 --- a/.github/workflows/linting_comment.yml +++ b/.github/workflows/linting_comment.yml @@ -3,26 +3,26 @@ name: nf-core linting comment # It posts an automated comment to the PR, even if the PR is coming from a fork on: - workflow_run: - workflows: ["nf-core linting"] + workflow_run: + workflows: ["nf-core linting"] jobs: - test: - runs-on: ubuntu-latest - steps: - - name: Download lint results - uses: dawidd6/action-download-artifact@v2 - with: - workflow: linting.yml - workflow_conclusion: completed + test: + runs-on: ubuntu-latest + steps: + - name: Download lint results + uses: dawidd6/action-download-artifact@f6b0bace624032e30a85a8fd9c1a7f8f611f5737 # v3 + with: + workflow: linting.yml + workflow_conclusion: completed - - name: Get PR number - id: pr_number - run: echo "pr_number=$(cat linting-logs/PR_number.txt)" >> $GITHUB_OUTPUT + - name: Get PR number + id: pr_number + run: echo "pr_number=$(cat linting-logs/PR_number.txt)" >> $GITHUB_OUTPUT - - name: Post PR comment - uses: marocchino/sticky-pull-request-comment@v2 - with: - GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} - number: ${{ steps.pr_number.outputs.pr_number }} - path: linting-logs/lint_results.md + - name: Post PR comment + uses: marocchino/sticky-pull-request-comment@331f8f5b4215f0445d3c07b4967662a32a2d3e31 # v2 + with: + GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} + number: ${{ steps.pr_number.outputs.pr_number }} + path: linting-logs/lint_results.md diff --git a/.github/workflows/release-announcements.yml b/.github/workflows/release-announcements.yml index 6ad3392..26a9bbb 100644 --- a/.github/workflows/release-announcements.yml +++ b/.github/workflows/release-announcements.yml @@ -1,68 +1,68 @@ name: release-announcements # Automatic release toot and tweet anouncements on: - release: - types: [published] - workflow_dispatch: + release: + types: [published] + workflow_dispatch: jobs: - toot: - runs-on: ubuntu-latest - steps: - - uses: rzr/fediverse-action@master - with: - access-token: ${{ secrets.MASTODON_ACCESS_TOKEN }} - host: "mstdn.science" # custom host if not "mastodon.social" (default) - # GitHub event payload - # https://docs.github.com/en/developers/webhooks-and-events/webhooks/webhook-events-and-payloads#release - message: | - Pipeline release! ${{ github.repository }} v${{ github.event.release.tag_name }} - ${{ github.event.release.name }}! + toot: + runs-on: ubuntu-latest + steps: + - uses: rzr/fediverse-action@master + with: + access-token: ${{ secrets.MASTODON_ACCESS_TOKEN }} + host: "mstdn.science" # custom host if not "mastodon.social" (default) + # GitHub event payload + # https://docs.github.com/en/developers/webhooks-and-events/webhooks/webhook-events-and-payloads#release + message: | + Pipeline release! ${{ github.repository }} v${{ github.event.release.tag_name }} - ${{ github.event.release.name }}! - Please see the changelog: ${{ github.event.release.html_url }} + Please see the changelog: ${{ github.event.release.html_url }} - send-tweet: - runs-on: ubuntu-latest + send-tweet: + runs-on: ubuntu-latest - steps: - - uses: actions/setup-python@v4 - with: - python-version: "3.10" - - name: Install dependencies - run: pip install tweepy==4.14.0 - - name: Send tweet - shell: python - run: | - import os - import tweepy + steps: + - uses: actions/setup-python@v5 + with: + python-version: "3.10" + - name: Install dependencies + run: pip install tweepy==4.14.0 + - name: Send tweet + shell: python + run: | + import os + import tweepy - client = tweepy.Client( - access_token=os.getenv("TWITTER_ACCESS_TOKEN"), - access_token_secret=os.getenv("TWITTER_ACCESS_TOKEN_SECRET"), - consumer_key=os.getenv("TWITTER_CONSUMER_KEY"), - consumer_secret=os.getenv("TWITTER_CONSUMER_SECRET"), - ) - tweet = os.getenv("TWEET") - client.create_tweet(text=tweet) - env: - TWEET: | - Pipeline release! ${{ github.repository }} v${{ github.event.release.tag_name }} - ${{ github.event.release.name }}! + client = tweepy.Client( + access_token=os.getenv("TWITTER_ACCESS_TOKEN"), + access_token_secret=os.getenv("TWITTER_ACCESS_TOKEN_SECRET"), + consumer_key=os.getenv("TWITTER_CONSUMER_KEY"), + consumer_secret=os.getenv("TWITTER_CONSUMER_SECRET"), + ) + tweet = os.getenv("TWEET") + client.create_tweet(text=tweet) + env: + TWEET: | + Pipeline release! ${{ github.repository }} v${{ github.event.release.tag_name }} - ${{ github.event.release.name }}! - Please see the changelog: ${{ github.event.release.html_url }} - TWITTER_CONSUMER_KEY: ${{ secrets.TWITTER_CONSUMER_KEY }} - TWITTER_CONSUMER_SECRET: ${{ secrets.TWITTER_CONSUMER_SECRET }} - TWITTER_ACCESS_TOKEN: ${{ secrets.TWITTER_ACCESS_TOKEN }} - TWITTER_ACCESS_TOKEN_SECRET: ${{ secrets.TWITTER_ACCESS_TOKEN_SECRET }} + Please see the changelog: ${{ github.event.release.html_url }} + TWITTER_CONSUMER_KEY: ${{ secrets.TWITTER_CONSUMER_KEY }} + TWITTER_CONSUMER_SECRET: ${{ secrets.TWITTER_CONSUMER_SECRET }} + TWITTER_ACCESS_TOKEN: ${{ secrets.TWITTER_ACCESS_TOKEN }} + TWITTER_ACCESS_TOKEN_SECRET: ${{ secrets.TWITTER_ACCESS_TOKEN_SECRET }} - bsky-post: - runs-on: ubuntu-latest - steps: - - uses: zentered/bluesky-post-action@v0.0.2 - with: - post: | - Pipeline release! ${{ github.repository }} v${{ github.event.release.tag_name }} - ${{ github.event.release.name }}! + bsky-post: + runs-on: ubuntu-latest + steps: + - uses: zentered/bluesky-post-action@v0.1.0 + with: + post: | + Pipeline release! ${{ github.repository }} v${{ github.event.release.tag_name }} - ${{ github.event.release.name }}! - Please see the changelog: ${{ github.event.release.html_url }} - env: - BSKY_IDENTIFIER: ${{ secrets.BSKY_IDENTIFIER }} - BSKY_PASSWORD: ${{ secrets.BSKY_PASSWORD }} - # + Please see the changelog: ${{ github.event.release.html_url }} + env: + BSKY_IDENTIFIER: ${{ secrets.BSKY_IDENTIFIER }} + BSKY_PASSWORD: ${{ secrets.BSKY_PASSWORD }} + # diff --git a/.gitpod.yml b/.gitpod.yml index acf7269..01179e3 100644 --- a/.gitpod.yml +++ b/.gitpod.yml @@ -1,21 +1,21 @@ image: nfcore/gitpod:latest tasks: - - name: Update Nextflow and setup pre-commit - command: | - pre-commit install --install-hooks - nextflow self-update - - name: unset JAVA_TOOL_OPTIONS - command: | - unset JAVA_TOOL_OPTIONS + - name: Update Nextflow and setup pre-commit + command: | + pre-commit install --install-hooks + nextflow self-update + - name: unset JAVA_TOOL_OPTIONS + command: | + unset JAVA_TOOL_OPTIONS vscode: - extensions: # based on nf-core.nf-core-extensionpack - - codezombiech.gitignore # Language support for .gitignore files - # - cssho.vscode-svgviewer # SVG viewer - - esbenp.prettier-vscode # Markdown/CommonMark linting and style checking for Visual Studio Code - - eamodio.gitlens # Quickly glimpse into whom, why, and when a line or code block was changed - - EditorConfig.EditorConfig # override user/workspace settings with settings found in .editorconfig files - - Gruntfuggly.todo-tree # Display TODO and FIXME in a tree view in the activity bar - - mechatroner.rainbow-csv # Highlight columns in csv files in different colors - # - nextflow.nextflow # Nextflow syntax highlighting - - oderwat.indent-rainbow # Highlight indentation level - - streetsidesoftware.code-spell-checker # Spelling checker for source code + extensions: # based on nf-core.nf-core-extensionpack + - codezombiech.gitignore # Language support for .gitignore files + # - cssho.vscode-svgviewer # SVG viewer + - esbenp.prettier-vscode # Markdown/CommonMark linting and style checking for Visual Studio Code + - eamodio.gitlens # Quickly glimpse into whom, why, and when a line or code block was changed + - EditorConfig.EditorConfig # override user/workspace settings with settings found in .editorconfig files + - Gruntfuggly.todo-tree # Display TODO and FIXME in a tree view in the activity bar + - mechatroner.rainbow-csv # Highlight columns in csv files in different colors + # - nextflow.nextflow # Nextflow syntax highlighting + - oderwat.indent-rainbow # Highlight indentation level + - streetsidesoftware.code-spell-checker # Spelling checker for source code diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 0c31cdb..46518b1 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -1,5 +1,10 @@ repos: - - repo: https://github.com/pre-commit/mirrors-prettier - rev: "v2.7.1" - hooks: - - id: prettier + - repo: https://github.com/pre-commit/mirrors-prettier + rev: "v3.1.0" + hooks: + - id: prettier + - repo: https://github.com/editorconfig-checker/editorconfig-checker.python + rev: "2.7.3" + hooks: + - id: editorconfig-checker + alias: ec diff --git a/CITATIONS.md b/CITATIONS.md index 774eb51..20047c2 100644 --- a/CITATIONS.md +++ b/CITATIONS.md @@ -10,32 +10,28 @@ ## Pipeline tools -- [FastQC](https://www.bioinformatics.babraham.ac.uk/projects/fastqc/) +- [corrmeta](https://doi.org/10.1142/9789814447973_0023) - > Andrews, S. (2010). FastQC: A Quality Control Tool for High Throughput Sequence Data [Online]. - -- [MultiQC](https://pubmed.ncbi.nlm.nih.gov/27312411/) - - > Ewels P, Magnusson M, Lundin S, Käller M. MultiQC: summarize analysis results for multiple tools and samples in a single report. Bioinformatics. 2016 Oct 1;32(19):3047-8. doi: 10.1093/bioinformatics/btw354. Epub 2016 Jun 16. PubMed PMID: 27312411; PubMed Central PMCID: PMC5039924. + > Province MA. Meta-analyses of correlated genomic scans. Genet Epidemiol. 2005;29:274,#137. ## Software packaging/containerisation tools -- [Anaconda](https://anaconda.com) +- [Anaconda](https://anaconda.com) - > Anaconda Software Distribution. Computer software. Vers. 2-2.4.0. Anaconda, Nov. 2016. Web. + > Anaconda Software Distribution. Computer software. Vers. 2-2.4.0. Anaconda, Nov. 2016. Web. -- [Bioconda](https://pubmed.ncbi.nlm.nih.gov/29967506/) +- [Bioconda](https://pubmed.ncbi.nlm.nih.gov/29967506/) - > Grüning B, Dale R, Sjödin A, Chapman BA, Rowe J, Tomkins-Tinch CH, Valieris R, Köster J; Bioconda Team. Bioconda: sustainable and comprehensive software distribution for the life sciences. Nat Methods. 2018 Jul;15(7):475-476. doi: 10.1038/s41592-018-0046-7. PubMed PMID: 29967506. + > Grüning B, Dale R, Sjödin A, Chapman BA, Rowe J, Tomkins-Tinch CH, Valieris R, Köster J; Bioconda Team. Bioconda: sustainable and comprehensive software distribution for the life sciences. Nat Methods. 2018 Jul;15(7):475-476. doi: 10.1038/s41592-018-0046-7. PubMed PMID: 29967506. -- [BioContainers](https://pubmed.ncbi.nlm.nih.gov/28379341/) +- [BioContainers](https://pubmed.ncbi.nlm.nih.gov/28379341/) - > da Veiga Leprevost F, Grüning B, Aflitos SA, Röst HL, Uszkoreit J, Barsnes H, Vaudel M, Moreno P, Gatto L, Weber J, Bai M, Jimenez RC, Sachsenberg T, Pfeuffer J, Alvarez RV, Griss J, Nesvizhskii AI, Perez-Riverol Y. BioContainers: an open-source and community-driven framework for software standardization. Bioinformatics. 2017 Aug 15;33(16):2580-2582. doi: 10.1093/bioinformatics/btx192. PubMed PMID: 28379341; PubMed Central PMCID: PMC5870671. + > da Veiga Leprevost F, Grüning B, Aflitos SA, Röst HL, Uszkoreit J, Barsnes H, Vaudel M, Moreno P, Gatto L, Weber J, Bai M, Jimenez RC, Sachsenberg T, Pfeuffer J, Alvarez RV, Griss J, Nesvizhskii AI, Perez-Riverol Y. BioContainers: an open-source and community-driven framework for software standardization. Bioinformatics. 2017 Aug 15;33(16):2580-2582. doi: 10.1093/bioinformatics/btx192. PubMed PMID: 28379341; PubMed Central PMCID: PMC5870671. -- [Docker](https://dl.acm.org/doi/10.5555/2600239.2600241) +- [Docker](https://dl.acm.org/doi/10.5555/2600239.2600241) - > Merkel, D. (2014). Docker: lightweight linux containers for consistent development and deployment. Linux Journal, 2014(239), 2. doi: 10.5555/2600239.2600241. + > Merkel, D. (2014). Docker: lightweight linux containers for consistent development and deployment. Linux Journal, 2014(239), 2. doi: 10.5555/2600239.2600241. -- [Singularity](https://pubmed.ncbi.nlm.nih.gov/28494014/) +- [Singularity](https://pubmed.ncbi.nlm.nih.gov/28494014/) - > Kurtzer GM, Sochat V, Bauer MW. Singularity: Scientific containers for mobility of compute. PLoS One. 2017 May 11;12(5):e0177459. doi: 10.1371/journal.pone.0177459. eCollection 2017. PubMed PMID: 28494014; PubMed Central PMCID: PMC5426675. + > Kurtzer GM, Sochat V, Bauer MW. Singularity: Scientific containers for mobility of compute. PLoS One. 2017 May 11;12(5):e0177459. doi: 10.1371/journal.pone.0177459. eCollection 2017. PubMed PMID: 28494014; PubMed Central PMCID: PMC5426675. diff --git a/CODE_OF_CONDUCT.md b/CODE_OF_CONDUCT.md index c089ec7..ff18271 100644 --- a/CODE_OF_CONDUCT.md +++ b/CODE_OF_CONDUCT.md @@ -4,21 +4,21 @@ In the interest of fostering an open, collaborative, and welcoming environment, we as contributors and maintainers of nf-core pledge to making participation in our projects and community a harassment-free experience for everyone, regardless of: -- Age -- Ability -- Body size -- Caste -- Familial status -- Gender identity and expression -- Geographical location -- Level of experience -- Nationality and national origins -- Native language -- Neurodiversity -- Race or ethnicity -- Religion -- Sexual identity and orientation -- Socioeconomic status +- Age +- Ability +- Body size +- Caste +- Familial status +- Gender identity and expression +- Geographical location +- Level of experience +- Nationality and national origins +- Native language +- Neurodiversity +- Race or ethnicity +- Religion +- Sexual identity and orientation +- Socioeconomic status Please note that the list above is alphabetised and is therefore not ranked in any order of preference or importance. @@ -54,38 +54,38 @@ Members of the core team or the Safety Team who violate the CoC will be required Participation in the nf-core community is contingent on following these guidelines in all our workspaces and events, such as hackathons, workshops, bytesize, and collaborative workspaces on gather.town. These guidelines include, but are not limited to, the following (listed alphabetically and therefore in no order of preference): -- Communicating with an official project email address. -- Communicating with community members within the nf-core Slack channel. -- Participating in hackathons organised by nf-core (both online and in-person events). -- Participating in collaborative work on GitHub, Google Suite, community calls, mentorship meetings, email correspondence, and on the nf-core gather.town workspace. -- Participating in workshops, training, and seminar series organised by nf-core (both online and in-person events). This applies to events hosted on web-based platforms such as Zoom, gather.town, Jitsi, YouTube live etc. -- Representing nf-core on social media. This includes both official and personal accounts. +- Communicating with an official project email address. +- Communicating with community members within the nf-core Slack channel. +- Participating in hackathons organised by nf-core (both online and in-person events). +- Participating in collaborative work on GitHub, Google Suite, community calls, mentorship meetings, email correspondence, and on the nf-core gather.town workspace. +- Participating in workshops, training, and seminar series organised by nf-core (both online and in-person events). This applies to events hosted on web-based platforms such as Zoom, gather.town, Jitsi, YouTube live etc. +- Representing nf-core on social media. This includes both official and personal accounts. ## nf-core cares 😊 nf-core's CoC and expectations of respectful behaviours for all participants (including organisers and the nf-core team) include, but are not limited to, the following (listed in alphabetical order): -- Ask for consent before sharing another community member’s personal information (including photographs) on social media. -- Be respectful of differing viewpoints and experiences. We are all here to learn from one another and a difference in opinion can present a good learning opportunity. -- Celebrate your accomplishments! (Get creative with your use of emojis 🎉 🥳 💯 🙌 !) -- Demonstrate empathy towards other community members. (We don’t all have the same amount of time to dedicate to nf-core. If tasks are pending, don’t hesitate to gently remind members of your team. If you are leading a task, ask for help if you feel overwhelmed.) -- Engage with and enquire after others. (This is especially important given the geographically remote nature of the nf-core community, so let’s do this the best we can) -- Focus on what is best for the team and the community. (When in doubt, ask) -- Accept feedback, yet be unafraid to question, deliberate, and learn. -- Introduce yourself to members of the community. (We’ve all been outsiders and we know that talking to strangers can be hard for some, but remember we’re interested in getting to know you and your visions for open science!) -- Show appreciation and **provide clear feedback**. (This is especially important because we don’t see each other in person and it can be harder to interpret subtleties. Also remember that not everyone understands a certain language to the same extent as you do, so **be clear in your communication to be kind.**) -- Take breaks when you feel like you need them. -- Use welcoming and inclusive language. (Participants are encouraged to display their chosen pronouns on Zoom or in communication on Slack) +- Ask for consent before sharing another community member’s personal information (including photographs) on social media. +- Be respectful of differing viewpoints and experiences. We are all here to learn from one another and a difference in opinion can present a good learning opportunity. +- Celebrate your accomplishments! (Get creative with your use of emojis 🎉 🥳 💯 🙌 !) +- Demonstrate empathy towards other community members. (We don’t all have the same amount of time to dedicate to nf-core. If tasks are pending, don’t hesitate to gently remind members of your team. If you are leading a task, ask for help if you feel overwhelmed.) +- Engage with and enquire after others. (This is especially important given the geographically remote nature of the nf-core community, so let’s do this the best we can) +- Focus on what is best for the team and the community. (When in doubt, ask) +- Accept feedback, yet be unafraid to question, deliberate, and learn. +- Introduce yourself to members of the community. (We’ve all been outsiders and we know that talking to strangers can be hard for some, but remember we’re interested in getting to know you and your visions for open science!) +- Show appreciation and **provide clear feedback**. (This is especially important because we don’t see each other in person and it can be harder to interpret subtleties. Also remember that not everyone understands a certain language to the same extent as you do, so **be clear in your communication to be kind.**) +- Take breaks when you feel like you need them. +- Use welcoming and inclusive language. (Participants are encouraged to display their chosen pronouns on Zoom or in communication on Slack) ## nf-core frowns on 😕 The following behaviours from any participants within the nf-core community (including the organisers) will be considered unacceptable under this CoC. Engaging or advocating for any of the following could result in expulsion from nf-core workspaces: -- Deliberate intimidation, stalking or following and sustained disruption of communication among participants of the community. This includes hijacking shared screens through actions such as using the annotate tool in conferencing software such as Zoom. -- “Doxing” i.e. posting (or threatening to post) another person’s personal identifying information online. -- Spamming or trolling of individuals on social media. -- Use of sexual or discriminatory imagery, comments, jokes, or unwelcome sexual attention. -- Verbal and text comments that reinforce social structures of domination related to gender, gender identity and expression, sexual orientation, ability, physical appearance, body size, race, age, religion, or work experience. +- Deliberate intimidation, stalking or following and sustained disruption of communication among participants of the community. This includes hijacking shared screens through actions such as using the annotate tool in conferencing software such as Zoom. +- “Doxing” i.e. posting (or threatening to post) another person’s personal identifying information online. +- Spamming or trolling of individuals on social media. +- Use of sexual or discriminatory imagery, comments, jokes, or unwelcome sexual attention. +- Verbal and text comments that reinforce social structures of domination related to gender, gender identity and expression, sexual orientation, ability, physical appearance, body size, race, age, religion, or work experience. ### Online Trolling @@ -105,14 +105,14 @@ All reports will be handled with the utmost discretion and confidentiality. You can also report any CoC violations to safety [at] nf-co [dot] re. In your email report, please do your best to include: -- Your contact information. -- Identifying information (e.g. names, nicknames, pseudonyms) of the participant who has violated the Code of Conduct. -- The behaviour that was in violation and the circumstances surrounding the incident. -- The approximate time of the behaviour (if different than the time the report was made). -- Other people involved in the incident, if applicable. -- If you believe the incident is ongoing. -- If there is a publicly available record (e.g. mailing list record, a screenshot). -- Any additional information. +- Your contact information. +- Identifying information (e.g. names, nicknames, pseudonyms) of the participant who has violated the Code of Conduct. +- The behaviour that was in violation and the circumstances surrounding the incident. +- The approximate time of the behaviour (if different than the time the report was made). +- Other people involved in the incident, if applicable. +- If you believe the incident is ongoing. +- If there is a publicly available record (e.g. mailing list record, a screenshot). +- Any additional information. After you file a report, one or more members of our Safety Team will contact you to follow up on your report. @@ -144,39 +144,39 @@ We will not name harassment victims, beyond discussions between the safety offic Actions taken by the nf-core’s Safety Team may include, but are not limited to: -- Asking anyone to stop a behaviour. -- Asking anyone to leave the event and online spaces either temporarily, for the remainder of the event, or permanently. -- Removing access to the gather.town and Slack, either temporarily or permanently. -- Communicating to all participants to reinforce our expectations for conduct and remind what is unacceptable behaviour; this may be public for practical reasons. -- Communicating to all participants that an incident has taken place and how we will act or have acted — this may be for the purpose of letting event participants know we are aware of and dealing with the incident. -- Banning anyone from participating in nf-core-managed spaces, future events, and activities, either temporarily or permanently. -- No action. +- Asking anyone to stop a behaviour. +- Asking anyone to leave the event and online spaces either temporarily, for the remainder of the event, or permanently. +- Removing access to the gather.town and Slack, either temporarily or permanently. +- Communicating to all participants to reinforce our expectations for conduct and remind what is unacceptable behaviour; this may be public for practical reasons. +- Communicating to all participants that an incident has taken place and how we will act or have acted — this may be for the purpose of letting event participants know we are aware of and dealing with the incident. +- Banning anyone from participating in nf-core-managed spaces, future events, and activities, either temporarily or permanently. +- No action. ## Attribution and Acknowledgements -- The [Contributor Covenant, version 1.4](http://contributor-covenant.org/version/1/4) -- The [OpenCon 2017 Code of Conduct](http://www.opencon2017.org/code_of_conduct) (CC BY 4.0 OpenCon organisers, SPARC and Right to Research Coalition) -- The [eLife innovation sprint 2020 Code of Conduct](https://sprint.elifesciences.org/code-of-conduct/) -- The [Mozilla Community Participation Guidelines v3.1](https://www.mozilla.org/en-US/about/governance/policies/participation/) (version 3.1, CC BY-SA 3.0 Mozilla) +- The [Contributor Covenant, version 1.4](http://contributor-covenant.org/version/1/4) +- The [OpenCon 2017 Code of Conduct](http://www.opencon2017.org/code_of_conduct) (CC BY 4.0 OpenCon organisers, SPARC and Right to Research Coalition) +- The [eLife innovation sprint 2020 Code of Conduct](https://sprint.elifesciences.org/code-of-conduct/) +- The [Mozilla Community Participation Guidelines v3.1](https://www.mozilla.org/en-US/about/governance/policies/participation/) (version 3.1, CC BY-SA 3.0 Mozilla) ## Changelog ### v1.4 - February 8th, 2022 -- Included a new member of the Safety Team. Corrected a typographical error in the text. +- Included a new member of the Safety Team. Corrected a typographical error in the text. ### v1.3 - December 10th, 2021 -- Added a statement that the CoC applies to nf-core gather.town workspaces. Corrected typographical errors in the text. +- Added a statement that the CoC applies to nf-core gather.town workspaces. Corrected typographical errors in the text. ### v1.2 - November 12th, 2021 -- Removed information specific to reporting CoC violations at the Hackathon in October 2021. +- Removed information specific to reporting CoC violations at the Hackathon in October 2021. ### v1.1 - October 14th, 2021 -- Updated with names of new Safety Officers and specific information for the hackathon in October 2021. +- Updated with names of new Safety Officers and specific information for the hackathon in October 2021. ### v1.0 - March 15th, 2021 -- Complete rewrite from original [Contributor Covenant](http://contributor-covenant.org/) CoC. +- Complete rewrite from original [Contributor Covenant](http://contributor-covenant.org/) CoC. diff --git a/LICENSE b/LICENSE index b0c7139..244229e 100644 --- a/LICENSE +++ b/LICENSE @@ -1,6 +1,6 @@ MIT License -Copyright (c) +Copyright (c) Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal diff --git a/README.md b/README.md index 7a85f64..efbfe8d 100644 --- a/README.md +++ b/README.md @@ -1,5 +1,9 @@ -# ![nf-core/omicsgenetraitassociation](docs/images/nf-core-omicsgenetraitassociation_logo_light.png#gh-light-mode-only) ![nf-core/omicsgenetraitassociation](docs/images/nf-core-omicsgenetraitassociation_logo_dark.png#gh-dark-mode-only) - +

+ + + nf-core/omicsgenetraitassociation + +

[![GitHub Actions CI Status](https://github.com/nf-core/omicsgenetraitassociation/workflows/nf-core%20CI/badge.svg)](https://github.com/nf-core/omicsgenetraitassociation/actions?query=workflow%3A%22nf-core+CI%22) [![GitHub Actions Linting Status](https://github.com/nf-core/omicsgenetraitassociation/workflows/nf-core%20linting/badge.svg)](https://github.com/nf-core/omicsgenetraitassociation/actions?query=workflow%3A%22nf-core+linting%22)[![AWS CI](https://img.shields.io/badge/CI%20tests-full%20size-FF9900?labelColor=000000&logo=Amazon%20AWS)](https://nf-co.re/omicsgenetraitassociation/results)[![Cite with Zenodo](http://img.shields.io/badge/DOI-10.5281/zenodo.XXXXXXX-1073c8?labelColor=000000)](https://doi.org/10.5281/zenodo.XXXXXXX) @@ -13,51 +17,40 @@ ## Introduction -**nf-core/omicsgenetraitassociation** is a bioinformatics pipeline that ... - - +**nf-core/omicsgenetraitassociation** is a bioinformatics pipeline that can be used to perform meta-analysis of trait associations accounting for correlations across omics studies due to hidden non-independencies between study elements which may arise from overlapping or related samples. It takes a samplesheet with input omic association data, performs gene-level aggregation, correlated meta-analysis, and produces a report on downstream module enrichment and gene ontology enrichment analyses. - - +![nf-core/omicsgenetraitassociation metro map](docs/images/nf-core-omicgenetraitassociation_metro_map.png) -1. Read QC ([`FastQC`](https://www.bioinformatics.babraham.ac.uk/projects/fastqc/)) -2. Present QC for raw reads ([`MultiQC`](http://multiqc.info/)) +1. Gene-level aggregation of GWAS summary statistics [`PASCAL`](https://github.com/BergmannLab/PascalX) +2. Gene-trait association [`MMAP`](https://mmap.github.io/) +3. Correlated meta-analysis [`corrmeta`](https://github.com/wsjung/corrmeta) +4. Module enrichment analysis [`MEA`](https://github.com/BergmannLab/PascalX) +5. Gene ontology (GO) enrichment analysis [`GO`](https://cran.r-project.org/web/packages/WebGestaltR/index.html) ## Usage > [!NOTE] > If you are new to Nextflow and nf-core, please refer to [this page](https://nf-co.re/docs/usage/installation) on how to set-up Nextflow. Make sure to [test your setup](https://nf-co.re/docs/usage/introduction#how-to-run-a-pipeline) with `-profile test` before running the workflow on actual data. - +Each row represents a single correlated meta-analysis run. `pascal` is the GWAS summary statistics to be aggreagted to the gene-level. `twas` is the gene-trait association phenotype file (please refer to [usage.md](docs/usage.md) for details). `additional_sources` lists paths to additional omic association p-values. Now, you can run the pipeline using: - - ```bash nextflow run nf-core/omicsgenetraitassociation \ - -profile \ - --input samplesheet.csv \ - --outdir + -profile \ + --input samplesheet.csv \ + --outdir ``` > [!WARNING] @@ -74,11 +67,16 @@ For more details about the output files and reports, please refer to the ## Credits -nf-core/omicsgenetraitassociation was originally written by . +nf-core/omicsgenetraitassociation was originally written by Woo Jung ([@wsjung](https://github.com/wsjung)). -We thank the following people for their extensive assistance in the development of this pipeline: +Many thanks to others who have written parts of the pipeline or helped out along the way too, including (but not limited to): - +- [Chase Mateusiak](https://github.com/cmatKhan) +- [Sandeep Acharya](https://github.com/sandeepacharya464) +- [Edward Kang](https://github.com/edwardkang0925) +- Lisa Liao +- Michael Brent +- Michael Province ## Contributions and Support @@ -91,8 +89,6 @@ For further information or help, don't hesitate to get in touch on the [Slack `# - - An extensive list of references for the tools used by the pipeline can be found in the [`CITATIONS.md`](CITATIONS.md) file. You can cite the `nf-core` publication as follows: diff --git a/assets/email_template.html b/assets/email_template.html index 10c58b8..9e88b32 100644 --- a/assets/email_template.html +++ b/assets/email_template.html @@ -12,7 +12,7 @@ -

nf-core/omicsgenetraitassociation v${version}

+

nf-core/omicsgenetraitassociation ${version}

Run Name: $runName

<% if (!success){ diff --git a/assets/email_template.txt b/assets/email_template.txt index 1fcb820..d72bb0f 100644 --- a/assets/email_template.txt +++ b/assets/email_template.txt @@ -4,7 +4,7 @@ |\\ | |__ __ / ` / \\ |__) |__ } { | \\| | \\__, \\__/ | \\ |___ \\`-._,-`-, `._,._,' - nf-core/omicsgenetraitassociation v${version} + nf-core/omicsgenetraitassociation ${version} ---------------------------------------------------- Run Name: $runName diff --git a/assets/methods_description_template.yml b/assets/methods_description_template.yml index 22f8da5..dddc0a0 100644 --- a/assets/methods_description_template.yml +++ b/assets/methods_description_template.yml @@ -6,24 +6,24 @@ plot_type: "html" ## TODO nf-core: Update the HTML below to your preferred methods description, e.g. add publication citation for this pipeline ## You inject any metadata in the Nextflow '${workflow}' object data: | -

Methods

-

Data was processed using nf-core/omicsgenetraitassociation v${workflow.manifest.version} ${doi_text} of the nf-core collection of workflows (Ewels et al., 2020), utilising reproducible software environments from the Bioconda (Grüning et al., 2018) and Biocontainers (da Veiga Leprevost et al., 2017) projects.

-

The pipeline was executed with Nextflow v${workflow.nextflow.version} (Di Tommaso et al., 2017) with the following command:

-
${workflow.commandLine}
-

${tool_citations}

-

References

-
    -
  • Di Tommaso, P., Chatzou, M., Floden, E. W., Barja, P. P., Palumbo, E., & Notredame, C. (2017). Nextflow enables reproducible computational workflows. Nature Biotechnology, 35(4), 316-319. doi: 10.1038/nbt.3820
  • -
  • Ewels, P. A., Peltzer, A., Fillinger, S., Patel, H., Alneberg, J., Wilm, A., Garcia, M. U., Di Tommaso, P., & Nahnsen, S. (2020). The nf-core framework for community-curated bioinformatics pipelines. Nature Biotechnology, 38(3), 276-278. doi: 10.1038/s41587-020-0439-x
  • -
  • Grüning, B., Dale, R., Sjödin, A., Chapman, B. A., Rowe, J., Tomkins-Tinch, C. H., Valieris, R., Köster, J., & Bioconda Team. (2018). Bioconda: sustainable and comprehensive software distribution for the life sciences. Nature Methods, 15(7), 475–476. doi: 10.1038/s41592-018-0046-7
  • -
  • da Veiga Leprevost, F., Grüning, B. A., Alves Aflitos, S., Röst, H. L., Uszkoreit, J., Barsnes, H., Vaudel, M., Moreno, P., Gatto, L., Weber, J., Bai, M., Jimenez, R. C., Sachsenberg, T., Pfeuffer, J., Vera Alvarez, R., Griss, J., Nesvizhskii, A. I., & Perez-Riverol, Y. (2017). BioContainers: an open-source and community-driven framework for software standardization. Bioinformatics (Oxford, England), 33(16), 2580–2582. doi: 10.1093/bioinformatics/btx192
  • - ${tool_bibliography} -
-
-
Notes:
+

Methods

+

Data was processed using nf-core/omicsgenetraitassociation v${workflow.manifest.version} ${doi_text} of the nf-core collection of workflows (Ewels et al., 2020), utilising reproducible software environments from the Bioconda (Grüning et al., 2018) and Biocontainers (da Veiga Leprevost et al., 2017) projects.

+

The pipeline was executed with Nextflow v${workflow.nextflow.version} (Di Tommaso et al., 2017) with the following command:

+
${workflow.commandLine}
+

${tool_citations}

+

References

    - ${nodoi_text} -
  • The command above does not include parameters contained in any configs or profiles that may have been used. Ensure the config file is also uploaded with your publication!
  • -
  • You should also cite all software used within this run. Check the "Software Versions" of this report to get version information.
  • +
  • Di Tommaso, P., Chatzou, M., Floden, E. W., Barja, P. P., Palumbo, E., & Notredame, C. (2017). Nextflow enables reproducible computational workflows. Nature Biotechnology, 35(4), 316-319. doi: 10.1038/nbt.3820
  • +
  • Ewels, P. A., Peltzer, A., Fillinger, S., Patel, H., Alneberg, J., Wilm, A., Garcia, M. U., Di Tommaso, P., & Nahnsen, S. (2020). The nf-core framework for community-curated bioinformatics pipelines. Nature Biotechnology, 38(3), 276-278. doi: 10.1038/s41587-020-0439-x
  • +
  • Grüning, B., Dale, R., Sjödin, A., Chapman, B. A., Rowe, J., Tomkins-Tinch, C. H., Valieris, R., Köster, J., & Bioconda Team. (2018). Bioconda: sustainable and comprehensive software distribution for the life sciences. Nature Methods, 15(7), 475–476. doi: 10.1038/s41592-018-0046-7
  • +
  • da Veiga Leprevost, F., Grüning, B. A., Alves Aflitos, S., Röst, H. L., Uszkoreit, J., Barsnes, H., Vaudel, M., Moreno, P., Gatto, L., Weber, J., Bai, M., Jimenez, R. C., Sachsenberg, T., Pfeuffer, J., Vera Alvarez, R., Griss, J., Nesvizhskii, A. I., & Perez-Riverol, Y. (2017). BioContainers: an open-source and community-driven framework for software standardization. Bioinformatics (Oxford, England), 33(16), 2580–2582. doi: 10.1093/bioinformatics/btx192
  • + ${tool_bibliography}
-
+
+
Notes:
+
    + ${nodoi_text} +
  • The command above does not include parameters contained in any configs or profiles that may have been used. Ensure the config file is also uploaded with your publication!
  • +
  • You should also cite all software used within this run. Check the "Software Versions" of this report to get version information.
  • +
+
diff --git a/assets/multiqc_config.yml b/assets/multiqc_config.yml index 516fbb5..b22135d 100644 --- a/assets/multiqc_config.yml +++ b/assets/multiqc_config.yml @@ -1,13 +1,13 @@ report_comment: > - This report has been generated by the nf-core/omicsgenetraitassociation - analysis pipeline. For information about how to interpret these results, please see the - documentation. + This report has been generated by the nf-core/omicsgenetraitassociation + analysis pipeline. For information about how to interpret these results, please see the + documentation. report_section_order: - "nf-core-omicsgenetraitassociation-methods-description": - order: -1000 - software_versions: - order: -1001 - "nf-core-omicsgenetraitassociation-summary": - order: -1002 + "nf-core-omicsgenetraitassociation-methods-description": + order: -1000 + software_versions: + order: -1001 + "nf-core-omicsgenetraitassociation-summary": + order: -1002 export_plots: true diff --git a/assets/nf-core-omicsgenetraitassociation_logo_light.png b/assets/nf-core-omicsgenetraitassociation_logo_light.png index 0291fbd..edf746e 100644 Binary files a/assets/nf-core-omicsgenetraitassociation_logo_light.png and b/assets/nf-core-omicsgenetraitassociation_logo_light.png differ diff --git a/assets/samplesheet.csv b/assets/samplesheet.csv index 5f653ab..bb0a747 100644 --- a/assets/samplesheet.csv +++ b/assets/samplesheet.csv @@ -1,3 +1,2 @@ -sample,fastq_1,fastq_2 -SAMPLE_PAIRED_END,/path/to/fastq/files/AEG588A1_S1_L002_R1_001.fastq.gz,/path/to/fastq/files/AEG588A1_S1_L002_R2_001.fastq.gz -SAMPLE_SINGLE_END,/path/to/fastq/files/AEG588A4_S4_L003_R1_001.fastq.gz, +sample,trait,pascal,twas,additional_sources +1,fhshdl,s3://brentlab-nextflow-testdata/omicsgenetraitassociation/pascal/gwasA.csv.gz,s3://brentlab-nextflow-testdata/omicsgenetraitassociation/minimal/mmap/demo_phenotype.csv diff --git a/conf/cma_mea.config b/conf/cma_mea.config index 671eebe..4563791 100644 --- a/conf/cma_mea.config +++ b/conf/cma_mea.config @@ -37,9 +37,9 @@ process { singularity { - enabled = true - autoMounts = true - cacheDir = "singularity_images" + enabled = true + autoMounts = true + cacheDir = "singularity_images" } conda.enabled = true diff --git a/conf/modules.config b/conf/modules.config index 401139f..95ea2b7 100644 --- a/conf/modules.config +++ b/conf/modules.config @@ -19,79 +19,79 @@ process { ] withName: MMAP { - publishDir = [ - path: { "${params.outdir}/mmap/mmap" }, - mode: params.publish_dir_mode, - saveAs: { filename -> filename.endsWith('.poly.cov.csv') ? filename : null } - ] + publishDir = [ + path: { "${params.outdir}/mmap/mmap" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.endsWith('.poly.cov.csv') ? filename : null } + ] } withName: MMAP_PARSE { - publishDir = [ - path: { "${params.outdir}/mmap" }, - mode: params.publish_dir_mode, - saveAs: { filename -> filename.startsWith('parsed_output_') ? filename : null } - ] + publishDir = [ + path: { "${params.outdir}/mmap" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.startsWith('parsed_output_') ? filename : null } + ] } withName: PREPROCESS_PASCAL { - publishDir = [ - path: { "${params.outdir}/mea" }, - mode: params.publish_dir_mode, - saveAs: { filename -> filename.equals('versions.yml') ? null : filename } - ] + publishDir = [ + path: { "${params.outdir}/mea" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + ] } withName: RUN_PASCAL { - publishDir = [ - path: { "${params.outdir}/mea" }, - mode: params.publish_dir_mode, - saveAs: { filename -> - if (filename.equals('versions.yml')) { null } - else if (filename.startsWith('GO_')) { null } - else if (filename.startsWith('GS_')) { null } - else { filename } - } - ] + publishDir = [ + path: { "${params.outdir}/mea" }, + mode: params.publish_dir_mode, + saveAs: { filename -> + if (filename.equals('versions.yml')) { null } + else if (filename.startsWith('GO_')) { null } + else if (filename.startsWith('GS_')) { null } + else { filename } + } + ] } withName: POSTPROCESS_PASCAL { - publishDir = [ - path: { "${params.outdir}/mea" }, - mode: params.publish_dir_mode, - saveAs: { filename -> - if (filename.equals('versions.yml')) { null } - else if (filename.startsWith('GO_')) { null } - else if (filename.startsWith('GS_')) { null } - else { filename } - } - ] + publishDir = [ + path: { "${params.outdir}/mea" }, + mode: params.publish_dir_mode, + saveAs: { filename -> + if (filename.equals('versions.yml')) { null } + else if (filename.startsWith('GO_')) { null } + else if (filename.startsWith('GS_')) { null } + else { filename } + } + ] } withName: GO_ANALYSIS { - publishDir = [ - path: { "${params.outdir}/mea/" }, - mode: params.publish_dir_mode, - saveAs: { filename -> - if (filename.equals('versions.yml')) { null } - else if (filename.endsWith(".txt")) { null } - else if (filename.startsWith('GS_')) { null } - else { filename } - } - ] + publishDir = [ + path: { "${params.outdir}/mea/" }, + mode: params.publish_dir_mode, + saveAs: { filename -> + if (filename.equals('versions.yml')) { null } + else if (filename.endsWith(".txt")) { null } + else if (filename.startsWith('GS_')) { null } + else { filename } + } + ] } withName: MERGE_ORA_AND_SUMMARY { - publishDir = [ - path: { "${params.outdir}/mea/" }, - mode: params.publish_dir_mode, - saveAs: { filename -> - if (filename.equals('versions.yml')) { null } - else if (filename.startsWith('GO_')) { null } - else if (filename.startsWith('GS_')) { null } - else { filename } - } - ] + publishDir = [ + path: { "${params.outdir}/mea/" }, + mode: params.publish_dir_mode, + saveAs: { filename -> + if (filename.equals('versions.yml')) { null } + else if (filename.startsWith('GO_')) { null } + else if (filename.startsWith('GS_')) { null } + else { filename } + } + ] } withName: SAMPLESHEET_CHECK { @@ -101,25 +101,4 @@ process { saveAs: { filename -> filename.equals('versions.yml') ? null : filename } ] } - - //withName: FASTQC { - // ext.args = '--quiet' - //} - - //withName: CUSTOM_DUMPSOFTWAREVERSIONS { - // publishDir = [ - // path: { "${params.outdir}/pipeline_info" }, - // mode: params.publish_dir_mode, - // pattern: '*_versions.yml' - // ] - //} - - //withName: 'MULTIQC' { - // ext.args = { params.multiqc_title ? "--title \"$params.multiqc_title\"" : '' } - // publishDir = [ - // path: { "${params.outdir}/multiqc" }, - // mode: params.publish_dir_mode, - // saveAs: { filename -> filename.equals('versions.yml') ? null : filename } - // ] - //} } diff --git a/conf/test_full.config b/conf/test_full.config index 784948a..bbb9f12 100644 --- a/conf/test_full.config +++ b/conf/test_full.config @@ -15,8 +15,6 @@ params { config_profile_description = 'Full test dataset to check pipeline function' // Input data for full size test - // TODO nf-core: Specify the paths to your full test data ( on nf-core/test-datasets or directly in repositories, e.g. SRA) - // TODO nf-core: Give any required params for the test so that command line flags are not needed input = 'https://raw.githubusercontent.com/nf-core/test-datasets/viralrecon/samplesheet/samplesheet_full_illumina_amplicon.csv' // Genome references diff --git a/conf/test_local.config b/conf/test_local.config index 01c7b88..8b78ee8 100644 --- a/conf/test_local.config +++ b/conf/test_local.config @@ -1,65 +1,65 @@ params { - // pipeline parameters - trait = 'fhshdl' + // pipeline parameters + trait = 'fhshdl' - // PASCAL - pascal_gwas_file = 's3://brentlab-nextflow-testdata/omicsgenetraitassociation/pascal/gwasA.csv.gz' - pascal_gene_annotation = 's3://brentlab-nextflow-testdata/omicsgenetraitassociation/pascal/gene_annotation.tsv' - // nextflow does not support s3 glob operations (tarballed) - pascal_ref_panel = 's3://brentlab-nextflow-testdata/omicsgenetraitassociation/pascal/EUR_simulated.tar.gz' - pascal_header = 0 - pascal_pval_col = 1 + // PASCAL + pascal_gwas_file = 's3://brentlab-nextflow-testdata/omicsgenetraitassociation/pascal/gwasA.csv.gz' + pascal_gene_annotation = 's3://brentlab-nextflow-testdata/omicsgenetraitassociation/pascal/gene_annotation.tsv' + // nextflow does not support s3 glob operations (tarballed) + pascal_ref_panel = 's3://brentlab-nextflow-testdata/omicsgenetraitassociation/pascal/EUR_simulated.tar.gz' + pascal_header = 0 + pascal_pval_col = 1 - // MMAP - mmap_gene_list = 's3://brentlab-nextflow-testdata/omicsgenetraitassociation/mmap/gene_list_gwasA_twas.txt' - mmap_pheno_file = 's3://brentlab-nextflow-testdata/omicsgenetraitassociation/mmap/mmap_pheno_adjusted_fhshdl_HGNC_gwasA_genes.csv' - mmap_pedigree_file = 's3://brentlab-nextflow-testdata/omicsgenetraitassociation/mmap/mmap.ped.v5.csv' - mmap_cov_matrix_file = 's3://brentlab-nextflow-testdata/omicsgenetraitassociation/mmap/llfs.kinship.bin' - mmap_header = 1 - mmap_pval_col = 'p_vals' - mmap_beta_col = 'betas_genes' - mmap_se_genes = 'se_genes' + // MMAP + mmap_gene_list = 's3://brentlab-nextflow-testdata/omicsgenetraitassociation/mmap/gene_list_gwasA_twas.txt' + mmap_pheno_file = 's3://brentlab-nextflow-testdata/omicsgenetraitassociation/mmap/mmap_pheno_adjusted_fhshdl_HGNC_gwasA_genes.csv' + mmap_pedigree_file = 's3://brentlab-nextflow-testdata/omicsgenetraitassociation/mmap/mmap.ped.v5.csv' + mmap_cov_matrix_file = 's3://brentlab-nextflow-testdata/omicsgenetraitassociation/mmap/llfs.kinship.bin' + mmap_header = 1 + mmap_pval_col = 'p_vals' + mmap_beta_col = 'betas_genes' + mmap_se_genes = 'se_genes' - // CMA module test files from S3 - cma_two_traits = 's3://brentlab-nextflow-testdata/omicsgenetraitassociation/cma/two_traits/fhshdl/' - cma_three_complete_corr = 's3://brentlab-nextflow-testdata/omicsgenetraitassociation/cma/three-traits/test_category_complete_correlation/' - cma_three_missing_obs = 's3://brentlab-nextflow-testdata/omicsgenetraitassociation/cma/three-traits/test_category_missing_observations/' + // CMA module test files from S3 + cma_two_traits = 's3://brentlab-nextflow-testdata/omicsgenetraitassociation/cma/two_traits/fhshdl/' + cma_three_complete_corr = 's3://brentlab-nextflow-testdata/omicsgenetraitassociation/cma/three-traits/test_category_complete_correlation/' + cma_three_missing_obs = 's3://brentlab-nextflow-testdata/omicsgenetraitassociation/cma/three-traits/test_category_missing_observations/' - // MEA preprocess - pipeline = 'cma' - module_file_dir = 's3://brentlab-nextflow-testdata/omicsgenetraitassociation/mea/preprocess/cherryPickModules/' - gene_col_name = 'markname' - pval_col_name = 'meta_p' + // MEA preprocess + pipeline = 'cma' + module_file_dir = 's3://brentlab-nextflow-testdata/omicsgenetraitassociation/mea/preprocess/cherryPickModules/' + gene_col_name = 'markname' + pval_col_name = 'meta_p' - // MEA postprocess - numtests = 17551 - alpha = 0.05 + // MEA postprocess + numtests = 17551 + alpha = 0.05 - // Boilerplate options - publish_dir_mode = 'copy' - email = 'jungw@wustl.edu' - //email_on_fail = null - outdir = 'results' - //monochrome_logs = null - //hook_url = null + // Boilerplate options + publish_dir_mode = 'copy' + email = 'jungw@wustl.edu' + //email_on_fail = null + outdir = 'results' + //monochrome_logs = null + //hook_url = null } includeConfig 'modules.config' process { - executor = 'local' - publishDir = [ - mode: "copy", - path: { "${params.outdir}/${task.process.tokenize(':')[-1].tokenize('_')[0].toLowerCase()}" }, - ] + executor = 'local' + publishDir = [ + mode: "copy", + path: { "${params.outdir}/${task.process.tokenize(':')[-1].tokenize('_')[0].toLowerCase()}" }, + ] } singularity { - enabled = true - autoMounts = true - cacheDir = "singularity_images" + enabled = true + autoMounts = true + cacheDir = "singularity_images" } conda.enabled = true diff --git a/docs/README.md b/docs/README.md index f69f875..b2af6e5 100644 --- a/docs/README.md +++ b/docs/README.md @@ -2,9 +2,9 @@ The nf-core/omicsgenetraitassociation documentation is split into the following pages: -- [Usage](usage.md) - - An overview of how the pipeline works, how to run it and a description of all of the different command-line flags. -- [Output](output.md) - - An overview of the different results produced by the pipeline and how to interpret them. +- [Usage](usage.md) + - An overview of how the pipeline works, how to run it and a description of all of the different command-line flags. +- [Output](output.md) + - An overview of the different results produced by the pipeline and how to interpret them. You can find a lot more documentation about installing, configuring and running nf-core pipelines on the website: [https://nf-co.re](https://nf-co.re) diff --git a/docs/images/nf-core-omicgenetraitassociation_metro_map.png b/docs/images/nf-core-omicgenetraitassociation_metro_map.png new file mode 100644 index 0000000..6de8d1b Binary files /dev/null and b/docs/images/nf-core-omicgenetraitassociation_metro_map.png differ diff --git a/docs/images/nf-core-omicsgenetraitassociation_logo_dark.png b/docs/images/nf-core-omicsgenetraitassociation_logo_dark.png index a575419..f36fb77 100644 Binary files a/docs/images/nf-core-omicsgenetraitassociation_logo_dark.png and b/docs/images/nf-core-omicsgenetraitassociation_logo_dark.png differ diff --git a/docs/images/nf-core-omicsgenetraitassociation_logo_light.png b/docs/images/nf-core-omicsgenetraitassociation_logo_light.png index 0291fbd..9790431 100644 Binary files a/docs/images/nf-core-omicsgenetraitassociation_logo_light.png and b/docs/images/nf-core-omicsgenetraitassociation_logo_light.png differ diff --git a/docs/output.md b/docs/output.md index c4c6b9e..9e8bb9e 100644 --- a/docs/output.md +++ b/docs/output.md @@ -4,67 +4,104 @@ This document describes the output produced by the pipeline. Most of the plots are taken from the MultiQC report, which summarises results at the end of the pipeline. -The directories listed below will be created in the results directory after the pipeline has finished. All paths are relative to the top-level results directory. - - +In the output directory, of a given run of this pipeline, there will be a subdirectory for each sample in the samplesheet where the directory name is the sample name (first field in the samplesheet). + +For example, if your samplesheet has two samples, like so: + +```csv title="samplesheet.csv" +sample,trait,pascal,twas,additional_sources +llfs_fhshdl,data/llfs/gwas.csv.gz,data/llfs/twas.csv +fhs_fhshdl,data/fhs/gwas.csv.gz,data/fhs/twas.csv +``` + +Then the output directory would have the following structure: + +```tree +example_results/ +├── llfs_fhshdl +│ ├── cma/ +│ ├── mmap/ +│ ├── pascal/ +│ ├── mea/ +├── llfs_fhshdl +│ ├── cma/ +│ ├── mmap/ +│ ├── pascal/ +│ ├── mea/ +└── pipeline_info +``` ## Pipeline overview The pipeline is built using [Nextflow](https://www.nextflow.io/) and processes data using the following steps: -- [FastQC](#fastqc) - Raw read QC -- [MultiQC](#multiqc) - Aggregate report describing results and QC from the whole pipeline -- [Pipeline information](#pipeline-information) - Report metrics generated during the workflow execution +1. Prepare gene-level trait-association P-values + +- [PASCAL](#pascal) +- [MMAP](#mmap) + +2. Perform (correlated) Meta-Analysis + +- [CMA](#cma) -### FastQC +3. Enrichment Analyses + +- [MEA](#mea) +- [GOEA](#mea) + +### PASCAL + +This step aggregates GWAS summary statistics P-values to gene-level scores using PASCAL which accounts for linkage between markers. It provides aggregated gene-level P-values and a manhattan plot for visualization
Output files -- `fastqc/` - - `*_fastqc.html`: FastQC report containing quality metrics. - - `*_fastqc.zip`: Zip archive containing the FastQC report, tab-delimited data file and plot images. +- `pascal/` + - `pascal_out.tsv`: PASCAL gene-level P-values + - `manhattan_plot.png`: manhattan plot -
+### MMAP -[FastQC](http://www.bioinformatics.babraham.ac.uk/projects/fastqc/) gives general quality metrics about your sequenced reads. It provides information about the quality score distribution across your reads, per base sequence content (%A/T/G/C), adapter contamination and overrepresented sequences. For further reading and documentation see the [FastQC help pages](http://www.bioinformatics.babraham.ac.uk/projects/fastqc/Help/). +This step uses MMAP to perform linear mixed model analysis using gene expression as the main predictor and the trait as the outcome variable. It provides a directory of LMM results for each gene as well as a parsed output as a single csv file. -![MultiQC - FastQC sequence counts plot](images/mqc_fastqc_counts.png) - -![MultiQC - FastQC mean quality scores plot](images/mqc_fastqc_quality.png) +
+Output files -![MultiQC - FastQC adapter content plot](images/mqc_fastqc_adapter.png) +- `mmap/` + - `mmap/`: directory containing single-gene results + - `parsed_output_mmap_results.csv`: parsed output file -:::note -The FastQC plots displayed in the MultiQC report shows _untrimmed_ reads. They may contain adapter sequence and potentially regions with low quality. -::: +### CMA -### MultiQC +This step performs correlated meta-analysis using the CMA package. It provides an output file with the meta P-value and a matrix of tetrachoric correlation coefficients.
Output files -- `multiqc/` - - `multiqc_report.html`: a standalone HTML file that can be viewed in your web browser. - - `multiqc_data/`: directory containing parsed statistics from the different tools used in the pipeline. - - `multiqc_plots/`: directory containing static images from the report in various formats. +- `cma/` + - `CMA_meta.csv`: file with meta p-value + - `tetrachor_sigma.txt`: tetrachoric correlations between input datasets -
+### MEA -[MultiQC](http://multiqc.info) is a visualization tool that generates a single HTML report summarising all samples in your project. Most of the pipeline QC results are visualised in the report and further statistics are available in the report data directory. +This step performs module and gene ontology (GO) enrichment analyses. It provides summaries of each enrichment analysis. + +
+Output files -Results generated by MultiQC collate pipeline QC from supported tools e.g. FastQC. The pipeline has special steps which also allow the software versions to be reported in the MultiQC output for future traceability. For more information about how to use MultiQC reports, see . +- `mea/` + - `master_summary_.csv`: master summary file with all enrichment analysis results ### Pipeline information
Output files -- `pipeline_info/` - - Reports generated by Nextflow: `execution_report.html`, `execution_timeline.html`, `execution_trace.txt` and `pipeline_dag.dot`/`pipeline_dag.svg`. - - Reports generated by the pipeline: `pipeline_report.html`, `pipeline_report.txt` and `software_versions.yml`. The `pipeline_report*` files will only be present if the `--email` / `--email_on_fail` parameter's are used when running the pipeline. - - Reformatted samplesheet files used as input to the pipeline: `samplesheet.valid.csv`. - - Parameters used by the pipeline run: `params.json`. +- `pipeline_info/` + - Reports generated by Nextflow: `execution_report.html`, `execution_timeline.html`, `execution_trace.txt` and `pipeline_dag.dot`/`pipeline_dag.svg`. + - Reports generated by the pipeline: `pipeline_report.html`, `pipeline_report.txt` and `software_versions.yml`. The `pipeline_report*` files will only be present if the `--email` / `--email_on_fail` parameter's are used when running the pipeline. + - Reformatted samplesheet files used as input to the pipeline: `samplesheet.valid.csv`. + - Parameters used by the pipeline run: `params.json`.
diff --git a/docs/usage.md b/docs/usage.md index 4d1e446..61a7b3f 100644 --- a/docs/usage.md +++ b/docs/usage.md @@ -6,49 +6,32 @@ ## Introduction - +Omicsgenetraitassociation may be peformed for any set of omics which can be analyzed at a common OMIC unit of inference (e.g. genes). ## Samplesheet input -You will need to create a samplesheet with information about the samples you would like to analyse before running the pipeline. Use this parameter to specify its location. It has to be a comma-separated file with 3 columns, and a header row as shown in the examples below. +You will need to create a samplesheet with information about the samples you would like to analyse before running the pipeline. Use this parameter to specify its location. It has to be a comma-separated file with 5 columns, and a header row as shown in the examples below. -```bash ---input '[path to samplesheet file]' -``` - -### Multiple runs of the same sample - -The `sample` identifiers have to be the same when you have re-sequenced the same sample more than once e.g. to increase sequencing depth. The pipeline will concatenate the raw reads before performing any downstream analysis. Below is an example for the same sample sequenced across 3 lanes: - -```csv title="samplesheet.csv" -sample,fastq_1,fastq_2 -CONTROL_REP1,AEG588A1_S1_L002_R1_001.fastq.gz,AEG588A1_S1_L002_R2_001.fastq.gz -CONTROL_REP1,AEG588A1_S1_L003_R1_001.fastq.gz,AEG588A1_S1_L003_R2_001.fastq.gz -CONTROL_REP1,AEG588A1_S1_L004_R1_001.fastq.gz,AEG588A1_S1_L004_R2_001.fastq.gz -``` +**Note**: Currently, the pipeline supports additional sources of evidence. If not desired, `additional_sources` should simply be left blank. ### Full samplesheet -The pipeline will auto-detect whether a sample is single- or paired-end using the information provided in the samplesheet. The samplesheet can have as many columns as you desire, however, there is a strict requirement for the first 3 columns to match those defined in the table below. - -A final samplesheet file consisting of both single- and paired-end data may look something like the one below. This is for 6 samples, where `TREATMENT_REP3` has been sequenced twice. +The pipeline will auto-detect whether a sample contains PASCAL, TWAS, or additional sources of evidence. +A final samplesheet file consisting of PASCAL, TWAS, and two additional sources of evidence for sample `llfs_fhshdl` and no additional sources of evidence for `fhs_lnTG` would look like so: ```csv title="samplesheet.csv" -sample,fastq_1,fastq_2 -CONTROL_REP1,AEG588A1_S1_L002_R1_001.fastq.gz,AEG588A1_S1_L002_R2_001.fastq.gz -CONTROL_REP2,AEG588A2_S2_L002_R1_001.fastq.gz,AEG588A2_S2_L002_R2_001.fastq.gz -CONTROL_REP3,AEG588A3_S3_L002_R1_001.fastq.gz,AEG588A3_S3_L002_R2_001.fastq.gz -TREATMENT_REP1,AEG588A4_S4_L003_R1_001.fastq.gz, -TREATMENT_REP2,AEG588A5_S5_L003_R1_001.fastq.gz, -TREATMENT_REP3,AEG588A6_S6_L003_R1_001.fastq.gz, -TREATMENT_REP3,AEG588A6_S6_L004_R1_001.fastq.gz, +sample,trait,pascal,twas,additional_sources +llfs_fhshdl,fhshdl,data/llfs/fhshdl/gwas.csv,data/llfs/fhshdl/twas.csv,data/llfs/additional_sources.txt +fhs_lnTG,lnTG,data/fhs/lnTG/gwas.csv,data/fhs/lnTG/twas.csv, ``` -| Column | Description | -| --------- | -------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | -| `sample` | Custom sample name. This entry will be identical for multiple sequencing libraries/runs from the same sample. Spaces in sample names are automatically converted to underscores (`_`). | -| `fastq_1` | Full path to FastQ file for Illumina short reads 1. File has to be gzipped and have the extension ".fastq.gz" or ".fq.gz". | -| `fastq_2` | Full path to FastQ file for Illumina short reads 2. File has to be gzipped and have the extension ".fastq.gz" or ".fq.gz". | +| Column | Description | +| -------------------- | -------------------------------------------------------------------------------------------------------------------------------- | +| `sample` | Custom sample name. | +| `trait` | Trait name. | +| `pascal` | Full path to GWAS summary statistics. File can either be `.csv` or gzipped `.csv.gz`. | +| `twas` | Full path to TWAS phenotype file. File must be `.csv` and must contain a column that matches `trait` and gene names for columns. | +| `additional_sources` | Full path to a text file that lists full paths to files with additional omic association p-values in each line. | An [example samplesheet](../assets/samplesheet.csv) has been provided with the pipeline. @@ -57,7 +40,10 @@ An [example samplesheet](../assets/samplesheet.csv) has been provided with the p The typical command for running the pipeline is as follows: ```bash -nextflow run nf-core/omicsgenetraitassociation --input ./samplesheet.csv --outdir ./results --genome GRCh37 -profile docker +nextflow run nf-core/omicsgenetraitassociation \ + --input /path/to/your_samplesheet.csv \ + --outdir results \ + --profile docker ``` This will launch the pipeline with the `docker` configuration profile. See below for more information about profiles. @@ -66,7 +52,7 @@ Note that the pipeline will create the following files in your working directory ```bash work # Directory containing the nextflow working files - # Finished results in specified location (defined with --outdir) +results # Finished results in specified location (defined with --outdir) .nextflow_log # Log file from Nextflow # Other nextflow hidden files, eg. history of pipeline runs and old logs. ``` @@ -90,7 +76,6 @@ with `params.yaml` containing: ```yaml input: './samplesheet.csv' outdir: './results/' -genome: 'GRCh37' <...> ``` @@ -141,23 +126,28 @@ They are loaded in sequence, so later profiles can overwrite earlier profiles. If `-profile` is not specified, the pipeline will run locally and expect all software to be installed and available on the `PATH`. This is _not_ recommended, since it can lead to different results on different machines dependent on the computer enviroment. -- `test` - - A profile with a complete configuration for automated testing - - Includes links to test data so needs no other parameters -- `docker` - - A generic configuration profile to be used with [Docker](https://docker.com/) -- `singularity` - - A generic configuration profile to be used with [Singularity](https://sylabs.io/docs/) -- `podman` - - A generic configuration profile to be used with [Podman](https://podman.io/) -- `shifter` - - A generic configuration profile to be used with [Shifter](https://nersc.gitlab.io/development/shifter/how-to-use/) -- `charliecloud` - - A generic configuration profile to be used with [Charliecloud](https://hpc.github.io/charliecloud/) -- `apptainer` - - A generic configuration profile to be used with [Apptainer](https://apptainer.org/) -- `conda` - - A generic configuration profile to be used with [Conda](https://conda.io/docs/). Please only use Conda as a last resort i.e. when it's not possible to run the pipeline with Docker, Singularity, Podman, Shifter, Charliecloud, or Apptainer. +- `test` + - A profile with a complete configuration for automated testing + - Includes links to test data so needs no other parameters +- `docker` + - A generic configuration profile to be used with [Docker](https://docker.com/) +- `singularity` + - A generic configuration profile to be used with [Singularity](https://sylabs.io/docs/) +- `podman` + - A generic configuration profile to be used with [Podman](https://podman.io/) +- `shifter` + - A generic configuration profile to be used with [Shifter](https://nersc.gitlab.io/development/shifter/how-to-use/) +- `charliecloud` + - A generic configuration profile to be used with [Charliecloud](https://hpc.github.io/charliecloud/) +- `apptainer` + - A generic configuration profile to be used with [Apptainer](https://apptainer.org/) +- `conda` + - A generic configuration profile to be used with [Conda](https://conda.io/docs/). Please only use Conda as a last resort i.e. when it's not possible to run the pipeline with Docker, Singularity, Podman, Shifter, Charliecloud, or Apptainer. + +**Omicsgenetraitassociation specific profiles** + +- `test` + - A minimal test profile for the full workflow ### `-resume` diff --git a/lib/WorkflowMain.groovy b/lib/WorkflowMain.groovy index 083d586..151032b 100755 --- a/lib/WorkflowMain.groovy +++ b/lib/WorkflowMain.groovy @@ -24,7 +24,7 @@ class WorkflowMain { // // Validate parameters and print summary to screen // - public static void initialise(workflow, params, log) { + public static void initialise(workflow, params, log, args) { // Print workflow version and exit on --version if (params.version) { @@ -35,6 +35,8 @@ class WorkflowMain { // Check that a -profile or Nextflow config has been provided to run the pipeline NfcoreTemplate.checkConfigProvided(workflow, log) + // Check that the profile doesn't contain spaces and doesn't end with a trailing comma + checkProfile(workflow.profile, args, log) // Check that conda channels are set-up correctly if (workflow.profile.tokenize(',').intersect(['conda', 'mamba']).size() >= 1) { @@ -60,4 +62,16 @@ class WorkflowMain { } return null } + + // + // Exit pipeline if --profile contains spaces + // + private static void checkProfile(profile, args, log) { + if (profile.endsWith(',')) { + Nextflow.error "Profile cannot end with a trailing comma. Please remove the comma from the end of the profile string.\nHint: A common mistake is to provide multiple values to `-profile` separated by spaces. Please use commas to separate profiles instead,e.g., `-profile docker,test`." + } + if (args[0]) { + log.warn "nf-core pipelines do not accept positional arguments. The positional argument `${args[0]}` has been detected.\n Hint: A common mistake is to provide multiple values to `-profile` separated by spaces. Please use commas to separate profiles instead,e.g., `-profile docker,test`." + } + } } diff --git a/lib/WorkflowOmicsgenetraitassociation.groovy b/lib/WorkflowOmicsgenetraitassociation.groovy index a759ea7..ca57c18 100755 --- a/lib/WorkflowOmicsgenetraitassociation.groovy +++ b/lib/WorkflowOmicsgenetraitassociation.groovy @@ -47,13 +47,10 @@ class WorkflowOmicsgenetraitassociation { public static String toolCitationText(params) { - // TODO nf-core: Optionally add in-text citation tools to this list. // Can use ternary operators to dynamically construct based conditions, e.g. params["run_xyz"] ? "Tool (Foo et al. 2023)" : "", // Uncomment function in methodsDescriptionText to render in MultiQC report def citation_text = [ "Tools used in the workflow included:", - "FastQC (Andrews 2010),", - "MultiQC (Ewels et al. 2016)", "." ].join(' ').trim() diff --git a/lib/nfcore_external_java_deps.jar b/lib/nfcore_external_java_deps.jar deleted file mode 100644 index 805c8bb..0000000 Binary files a/lib/nfcore_external_java_deps.jar and /dev/null differ diff --git a/main.nf b/main.nf index 4a218cf..5dde50e 100644 --- a/main.nf +++ b/main.nf @@ -17,10 +17,6 @@ nextflow.enable.dsl = 2 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ */ -// TODO nf-core: Remove this line if you don't need a FASTA file -// This is an example of how to use getGenomeAttribute() to fetch parameters -// from igenomes.config using `--genome` -// params.fasta = WorkflowMain.getGenomeAttribute(params, 'fasta') /* ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ @@ -28,23 +24,23 @@ nextflow.enable.dsl = 2 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ */ -// include { validateParameters; paramsHelp } from 'plugin/nf-validation' +include { validateParameters; paramsHelp } from 'plugin/nf-validation' -// // Print help message if needed -// if (params.help) { -// def logo = NfcoreTemplate.logo(workflow, params.monochrome_logs) -// def citation = '\n' + WorkflowMain.citation(workflow) + '\n' -// def String command = "nextflow run ${workflow.manifest.name} --input samplesheet.csv --genome GRCh37 -profile docker" -// log.info logo + paramsHelp(command) + citation + NfcoreTemplate.dashedLine(params.monochrome_logs) -// System.exit(0) -// } +// Print help message if needed +if (params.help) { + def logo = NfcoreTemplate.logo(workflow, params.monochrome_logs) + def citation = '\n' + WorkflowMain.citation(workflow) + '\n' + def String command = "nextflow run ${workflow.manifest.name} --input samplesheet.csv -profile docker" + log.info logo + paramsHelp(command) + citation + NfcoreTemplate.dashedLine(params.monochrome_logs) + System.exit(0) +} -// // Validate input parameters +// Validate input parameters // if (params.validate_params) { // validateParameters() // } -// WorkflowMain.initialise(workflow, params, log) +WorkflowMain.initialise(workflow, params, log, args) /* ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ diff --git a/modules.json b/modules.json index 5aa4b5d..9bc627b 100644 --- a/modules.json +++ b/modules.json @@ -7,17 +7,17 @@ "nf-core": { "custom/dumpsoftwareversions": { "branch": "master", - "git_sha": "bba7e362e4afead70653f84d8700588ea28d0f9e", + "git_sha": "de45447d060b8c8b98575bc637a4a575fd0638e1", "installed_by": ["modules"] }, "fastqc": { "branch": "master", - "git_sha": "65ad3e0b9a4099592e1102e92e10455dc661cf53", + "git_sha": "f4ae1d942bd50c5c0b9bd2de1393ce38315ba57c", "installed_by": ["modules"] }, "multiqc": { "branch": "master", - "git_sha": "4ab13872435962dadc239979554d13709e20bf29", + "git_sha": "ccacf6f5de6df3bc6d73b665c1fd2933d8bbc290", "installed_by": ["modules"] } } diff --git a/modules/local/cma/cma/main.nf b/modules/local/cma/cma/main.nf index ad683e6..35abd65 100644 --- a/modules/local/cma/cma/main.nf +++ b/modules/local/cma/cma/main.nf @@ -2,7 +2,7 @@ process CMA { label 'process_low' - container 'docker://jungwooseok/cma:1.2.5' + container 'jungwooseok/cma:1.2.7' // TODO: replace when released on bioconductor input: path input_files // list of input files to CMA (to accomodate arbitrary number of files) @@ -34,7 +34,7 @@ process CMA { cat <<-END_VERSIONS > versions.yml "${task.process}": R: \$(R --version | head -n1) - CMA: \$(Rscript -e "print(packageVersion('CMA'))") + CMA: \$(Rscript -e "print(packageVersion('corrmeta'))") dplyr: \$(Rscript -e "print(packageVersion('dplyr'))") END_VERSIONS """ diff --git a/modules/local/cma/format_cma_input/bin/format_cma_input.py b/modules/local/cma/format_cma_input/bin/format_cma_input.py index 226fcf8..670ee3d 100644 --- a/modules/local/cma/format_cma_input/bin/format_cma_input.py +++ b/modules/local/cma/format_cma_input/bin/format_cma_input.py @@ -1,65 +1,64 @@ import pandas as pd -import argparse import os def format_cma_input(input_file, name, header, pval_col, beta_col, se_col): """ Formats the input file for CMA analysis args - @input_file: input file with p-values - @name: name of output file - @header: header exists (1) or not (0) - @pval_col: column name or number of p-value - @beta_col: column name or number of beta value - @se_col: column name or number of SE value + @input_file: input file with p-values + @name: name of output file + @header: header exists (1) or not (0) + @pval_col: column name or number of p-value + @beta_col: column name or number of beta value + @se_col: column name or number of SE value """ _, ext = os.path.splitext(input_file) sep = "," if ext == ".tsv": - sep = "\t" + sep = "\t" if header == 1: - df = pd.read_csv(input_file, sep=sep) - print(df) - print(df.columns) - df['n'] = 0 + df = pd.read_csv(input_file, sep=sep) + print(df) + print(df.columns) + df['n'] = 0 - df['markname'] = df.iloc[:, 0] + df['markname'] = df.iloc[:, 0] - df['pval'] = df[pval_col] + df['pval'] = df[pval_col] - if beta_col == "[]": - df['beta'] = 0 - else: - df['beta'] = df[beta_col] + if beta_col == "[]": + df['beta'] = 0 + else: + df['beta'] = df[beta_col] - if se_col == "[]": - df['se'] = 0 - else: - df['se'] = df[se_col] + if se_col == "[]": + df['se'] = 0 + else: + df['se'] = df[se_col] else: - df = pd.read_csv(input_file, sep=sep, header=None) + df = pd.read_csv(input_file, sep=sep, header=None) - df['n'] = 0 - df['markname'] = df.iloc[:, 0] + df['n'] = 0 + df['markname'] = df.iloc[:, 0] - df['pval'] = df.iloc[:, int(pval_col)] + df['pval'] = df.iloc[:, int(pval_col)] - if beta_col == "[]": - df['beta'] = 0 - else: - df['beta'] = df.iloc[:, int(beta_col)] + if beta_col == "[]": + df['beta'] = 0 + else: + df['beta'] = df.iloc[:, int(beta_col)] - if se_col == "[]": - df['se'] = 0 - else: - df['se'] = df.iloc[:, int(se_col)] + if se_col == "[]": + df['se'] = 0 + else: + df['se'] = df.iloc[:, int(se_col)] df = df[['markname','beta','se','pval','n']] df.to_csv(f'{name}.csv', index=False) if __name__ == "__main__": - from argparse import ArgumentParser + from argparse import ArgumentParser parser = ArgumentParser() parser.add_argument('--input_file', help = "input file") parser.add_argument("--name", help = "name of output csv file") @@ -69,4 +68,4 @@ def format_cma_input(input_file, name, header, pval_col, beta_col, se_col): parser.add_argument("--se_col", help = "column name or number (0-based) of SE value") args = parser.parse_args() - format_cma_input(args.input_file, args.name, args.header, args.pval_col, args.beta_col, args.se_col) \ No newline at end of file + format_cma_input(args.input_file, args.name, args.header, args.pval_col, args.beta_col, args.se_col) diff --git a/modules/local/cma/format_cma_input/environment.yml b/modules/local/cma/format_cma_input/environment.yml index 5af78b4..581befe 100644 --- a/modules/local/cma/format_cma_input/environment.yml +++ b/modules/local/cma/format_cma_input/environment.yml @@ -1,8 +1,8 @@ name: format_cma_input channels: - - conda-forge - - bioconda - - defaults + - conda-forge + - bioconda + - defaults dependencies: - - python=3.8.3 - - pandas=1.1.5 \ No newline at end of file + - python=3.8.3 + - pandas=1.1.5 diff --git a/modules/local/cma/format_cma_input/main.nf b/modules/local/cma/format_cma_input/main.nf index 8661dfc..459461c 100644 --- a/modules/local/cma/format_cma_input/main.nf +++ b/modules/local/cma/format_cma_input/main.nf @@ -2,7 +2,6 @@ process FORMAT_CMA_INPUT { label 'process_medium' - // include an image docker/singularity from biocontainers conda "${moduleDir}/environment.yml" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? 'https://depot.galaxyproject.org/singularity/pandas:1.1.5' : diff --git a/modules/local/mea/go_analysis/main.nf b/modules/local/mea/go_analysis/main.nf index 90da05f..09cb599 100644 --- a/modules/local/mea/go_analysis/main.nf +++ b/modules/local/mea/go_analysis/main.nf @@ -2,7 +2,7 @@ process GO_ANALYSIS { label 'process_low' - container 'docker://jungwooseok/webgestalt:1.0.3' + container 'jungwooseok/webgestalt:1.0.3' // TODO: requested BioContainers input: diff --git a/modules/local/mea/go_analysis/templates/ORA_cmd.R b/modules/local/mea/go_analysis/templates/ORA_cmd.R index 39f8d94..d88e194 100755 --- a/modules/local/mea/go_analysis/templates/ORA_cmd.R +++ b/modules/local/mea/go_analysis/templates/ORA_cmd.R @@ -16,10 +16,10 @@ library("WebGestaltR") # read in nextflow parameters oraSummaryDir <- file.path("GO_summaries", '$trait') opt <- list( - sigModuleDir = '$sigModuleDir', - backGroundGenesFile = '$goFile', - summaryRoot = oraSummaryDir, - reportRoot = 'GO_reports/' + sigModuleDir = '$sigModuleDir', + backGroundGenesFile = '$goFile', + summaryRoot = oraSummaryDir, + reportRoot = 'GO_reports/' ) METHOD = "ORA" # ORA | GSEA | NTA @@ -47,7 +47,7 @@ for(fileName in list.files(INPUT_PATH)){ name <- "" if(grepl("sig_", fileName)){ ## get name of input file - name <- tools::file_path_sans_ext(fileName) + name <- tools::file_path_sans_ext(fileName) tf_method = paste0(name, '_', METHOD) tryCatch( # perform enrichment analysis @@ -70,30 +70,30 @@ for(fileName in list.files(INPUT_PATH)){ print(paste0("ERROR while running WebGestalt for ",tf_method)) enrich_df = NULL } - ) + ) }else{ - name <- tools::file_path_sans_ext(fileName) + name <- tools::file_path_sans_ext(fileName) } # save summary as a .csv file if (!is.null(enrich_df)) { - # remove link column - sig_df <- subset(enrich_df, select = -c(link)) - # affinity propagation - idsInSet <- sapply(sig_df\$overlapId, strsplit, split=";") - names(idsInSet) <- sig_df\$geneSet - minusLogP <- -log(sig_df\$pValue) - minusLogP[minusLogP == Inf] <- -log(.Machine\$double.eps) - apRes <- affinityPropagation(idsInSet, minusLogP) - # subset GO terms for exemplar terms - apGO_full <- sig_df[sig_df\$geneSet %in% apRes\$representatives,] - if (nrow(apGO_full) > 0) { - apGO_full['database'] <- rep(DATABASE, nrow(apGO_full)) - write.csv(apGO_full,file.path(SUMMARIES_PATH,paste0(name,".csv")),row.names = FALSE) - } else { - print("NO SIGNIFICANT OVERLAPS") - write.csv(NULL,file.path(SUMMARIES_PATH,paste0(name,".csv")),row.names = FALSE) - } - } + # remove link column + sig_df <- subset(enrich_df, select = -c(link)) + # affinity propagation + idsInSet <- sapply(sig_df\$overlapId, strsplit, split=";") + names(idsInSet) <- sig_df\$geneSet + minusLogP <- -log(sig_df\$pValue) + minusLogP[minusLogP == Inf] <- -log(.Machine\$double.eps) + apRes <- affinityPropagation(idsInSet, minusLogP) + # subset GO terms for exemplar terms + apGO_full <- sig_df[sig_df\$geneSet %in% apRes\$representatives,] + if (nrow(apGO_full) > 0) { + apGO_full['database'] <- rep(DATABASE, nrow(apGO_full)) + write.csv(apGO_full,file.path(SUMMARIES_PATH,paste0(name,".csv")),row.names = FALSE) + } else { + print("NO SIGNIFICANT OVERLAPS") + write.csv(NULL,file.path(SUMMARIES_PATH,paste0(name,".csv")),row.names = FALSE) + } + } else { print("NO SIGNIFICANT OVERLAPS") write.csv(NULL,file.path(SUMMARIES_PATH,paste0(name,".csv")),row.names = FALSE) @@ -108,9 +108,9 @@ r.version <- strsplit(version[['version.string']], ' ')[[1]][3] webgestalt.version <- as.character(packageVersion('WebGestaltR')) writeLines( - c( - '"${task.process}":', - paste(' r-base:', r.version), - paste(' webgestalt:', webgestalt.version) - ), -'versions.yml') \ No newline at end of file + c( + '"${task.process}":', + paste(' r-base:', r.version), + paste(' webgestalt:', webgestalt.version) + ), +'versions.yml') diff --git a/modules/local/mea/merge_ora_and_summary/environment.yml b/modules/local/mea/merge_ora_and_summary/environment.yml new file mode 100644 index 0000000..97bbe02 --- /dev/null +++ b/modules/local/mea/merge_ora_and_summary/environment.yml @@ -0,0 +1,8 @@ +name: merge_ora_and_summary +channels: + - conda-forge + - bioconda + - defaults +dependencies: + - python=3.8.3 + - pandas=1.1.5 diff --git a/modules/local/mea/merge_ora_and_summary/main.nf b/modules/local/mea/merge_ora_and_summary/main.nf index aa95e92..0b0eee1 100644 --- a/modules/local/mea/merge_ora_and_summary/main.nf +++ b/modules/local/mea/merge_ora_and_summary/main.nf @@ -2,7 +2,7 @@ process MERGE_ORA_AND_SUMMARY { label 'process_low' - // container 'docker://jungwooseok/mea:1.0.0' + conda "${moduleDir}/environment.yml" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? 'https://depot.galaxyproject.org/singularity/pandas:1.1.5' : 'quay.io/biocontainers/pandas:1.1.5' }" @@ -12,11 +12,11 @@ process MERGE_ORA_AND_SUMMARY { tuple val(pipeline), val(trait) output: - path("summary/"), emit: summary_dir - path("summary/*"), emit: summary_files - tuple val(pipeline), val(trait), emit: meta - val(trait), emit: trait - path("versions.yml"), emit: versions + path("summary/") , emit: summary_dir + path("summary/*") , emit: summary_files + tuple val(pipeline), val(trait) , emit: meta + val(trait) , emit: trait + path("versions.yml") , emit: versions when: task.ext.when == null || task.ext.when @@ -24,4 +24,4 @@ process MERGE_ORA_AND_SUMMARY { script: def args = task.ext.args ?: '' template 'mergeORAandSummary.py' -} \ No newline at end of file +} diff --git a/modules/local/mea/merge_ora_and_summary/templates/mergeORAandSummary.py b/modules/local/mea/merge_ora_and_summary/templates/mergeORAandSummary.py index 14a0870..7f80014 100755 --- a/modules/local/mea/merge_ora_and_summary/templates/mergeORAandSummary.py +++ b/modules/local/mea/merge_ora_and_summary/templates/mergeORAandSummary.py @@ -21,7 +21,7 @@ def countGOterms(DIRPATH:str)-> int: def outputMergableORA_df(module_ora_file:str, study:str, trait:str, network:str, moduleIndex:int): - + dict_ora = {'study':[], 'trait':[], 'network':[], 'moduleIndex':[], 'geneontology_Biological_Process':[], 'BPminCorrectedPval':[], 'BPminFDREnrichmentRatio':[], 'BPmaxEnrichmentRatio':[]} @@ -37,25 +37,25 @@ def outputMergableORA_df(module_ora_file:str, study:str, trait:str, network:str, dict_ora[GOtype].append(GOcount) return pd.DataFrame(dict_ora) - + def main(): - + # parse nextflow parameters masterSummaryPiece = '$masterSummaryPiece' oraResultsDir = '$oraSummaryDir' output_directory = "summary/" goFile = '$goFile' - + # Check if the output directory exists, if not create it if not os.path.exists(output_directory): os.makedirs(output_directory) - + df_summary_piece = pd.read_csv(masterSummaryPiece) df_summary_piece[['study', 'trait', 'network', 'moduleIndex']] = df_summary_piece[['study', 'trait', 'network', 'moduleIndex']].astype(str) study, trait, network = os.path.basename(goFile).split(".")[0].split("_")[1:4] - + ora_dfs = [] if len(os.listdir(oraResultsDir)) == 0: # if there are no significant module from ORA result ora_dfs.append(pd.DataFrame({'study':[study], 'trait':[trait], 'network':[network], 'moduleIndex':[0], @@ -67,20 +67,20 @@ def main(): moduleIndex = os.path.basename(file).split(".")[0].split("_")[4] df_ora = outputMergableORA_df(file, study, trait, network, moduleIndex) ora_dfs.append(df_ora) - + df_ora_merged = pd.concat(ora_dfs, ignore_index=True) df_ora_merged[['study', 'trait', 'network', 'moduleIndex']] = df_ora_merged[['study', 'trait', 'network', 'moduleIndex']].astype(str) df_merge = pd.merge(df_summary_piece, df_ora_merged, how='left', on=['study','trait','network', 'moduleIndex']) df_merge.fillna("NA", inplace=True) mergedFileName = f"{study}_{trait}_{network}.csv" df_merge.to_csv(os.path.join(output_directory, mergedFileName), index=False) - + def print_versions(): import sys with open("versions.yml", "w") as file: - file.write('"${task.process}"\\n') - file.write(f' python: {sys.version}\\n') - file.write(f' pandas: {pd.__version__}\\n') + file.write('"${task.process}"\\n') + file.write(f' python: {sys.version}\\n') + file.write(f' pandas: {pd.__version__}\\n') if __name__ == "__main__": main() diff --git a/modules/local/mea/pascal/main.nf b/modules/local/mea/pascal/main.nf index 3aa6778..265808f 100644 --- a/modules/local/mea/pascal/main.nf +++ b/modules/local/mea/pascal/main.nf @@ -2,7 +2,7 @@ process RUN_PASCAL { label 'process_low' - container 'docker://jungwooseok/mea_pascal:1.1' + container 'jungwooseok/mea_pascal:1.1' input: tuple val(module_id), path(geneScoreFile), path(moduleFile), path(goFile) @@ -19,4 +19,4 @@ process RUN_PASCAL { script: def args = task.ext.args ?: '' template 'runPascal.py' -} \ No newline at end of file +} diff --git a/modules/local/mea/pascal/templates/runPascal.py b/modules/local/mea/pascal/templates/runPascal.py index 80981bb..fbb6f70 100755 --- a/modules/local/mea/pascal/templates/runPascal.py +++ b/modules/local/mea/pascal/templates/runPascal.py @@ -14,14 +14,14 @@ def main(): moduleFile = '$moduleFile' outputPath = 'pascalOutput/' - print(scoreFile) + print(scoreFile) print(moduleFile) print(outputPath) # Check if the output directory exists, if not create it if not os.path.exists(outputPath): print("creating outputPath") os.makedirs(outputPath) - + #for moduleFile, scoreFile in zip(moduleFiles, scoreFiles): Scorer = genescorer.chi2sum() Scorer.load_scores(scoreFile) @@ -37,11 +37,11 @@ def print_versions(): import sys import PascalX with open("versions.yml", "w") as file: - file.write('"${task.process}"\\n') - file.write(f' python: {sys.version}\\n') - file.write(f' PascalX: {PascalX.__version__}\\n') + file.write('"${task.process}"\\n') + file.write(f' python: {sys.version}\\n') + file.write(f' PascalX: {PascalX.__version__}\\n') if __name__ == "__main__": main() - print_versions() \ No newline at end of file + print_versions() diff --git a/modules/local/mea/postprocess/environment.yml b/modules/local/mea/postprocess/environment.yml new file mode 100644 index 0000000..6509854 --- /dev/null +++ b/modules/local/mea/postprocess/environment.yml @@ -0,0 +1,10 @@ +name: postprocess +channels: + - conda-forge + - bioconda + - defaults +dependencies: + - python=3.8.3 + - pandas=2.2.0 + - numpy=1.26.3 + - statsmodels=0.14.1 diff --git a/modules/local/mea/postprocess/main.nf b/modules/local/mea/postprocess/main.nf index 8decab9..6bf7be3 100644 --- a/modules/local/mea/postprocess/main.nf +++ b/modules/local/mea/postprocess/main.nf @@ -2,8 +2,13 @@ process POSTPROCESS_PASCAL { label 'process_low' - container 'docker://jungwooseok/mea:1.0.0' - // TODO: requested BioContainer + // container 'jungwooseok/mea:1.0.0' + conda "${moduleDir}/environment.yml" + // TODO: requested BioContainer + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/mulled-v2-9d836da785124bb367cbe6fbfc00dddd2107a4da:b033d6a4ea3a42a6f5121a82b262800f1219b382-0' : + 'quay.io/biocontainers/mulled-v2-9d836da785124bb367cbe6fbfc00dddd2107a4da:b033d6a4ea3a42a6f5121a82b262800f1219b382-0' }" + input: tuple val(module_id), path(pascalOutputFile), path(geneScoreFilePascalInput), path(goFile) @@ -22,4 +27,4 @@ process POSTPROCESS_PASCAL { script: def args = task.ext.args ?: '' template 'processPascalOutput.py' -} \ No newline at end of file +} diff --git a/modules/local/mea/postprocess/templates/processPascalOutput.py b/modules/local/mea/postprocess/templates/processPascalOutput.py index 1931bd1..545238d 100755 --- a/modules/local/mea/postprocess/templates/processPascalOutput.py +++ b/modules/local/mea/postprocess/templates/processPascalOutput.py @@ -10,7 +10,7 @@ def extractGenesBasedOnPval(DIRPATH:str, pval:float): """ - Given a GeneScore file in tsv file format without header and a pvalue threshold, + Given a GeneScore file in tsv file format without header and a pvalue threshold, output list of genes with pval less than the threshold Args: @@ -61,8 +61,8 @@ def recordModulesFromPascalResult(result, OUTPUTPATH, sigGenesList, almostSigGen if gene in sig3GenesList: sig3Genes.append(gene) if gene in sig2GenesList: - sig2Genes.append(gene) - + sig2Genes.append(gene) + # assumes index of 2 represents bool indicating significance of the module if item[2]: moduleIndexToSigFlag[item[0]] = True @@ -124,17 +124,17 @@ def processOnePascalOutput(DIRPATH:str, alpha:float, outputPATH:str): pathwayIndexList.append(module_idx) pathwayGenesList.append(module_genes) pathwayPvalList.append(module_pval) - + # FDR correction BH or Bonferroni correctedPathwayPvalList = multipletests(pathwayPvalList, alpha, method='bonferroni') #Bonferroni - - # output csv file + + # output csv file df = pd.DataFrame(list(zip(pathwayIndexList, pathwayGenesList,pathwayPvalList, correctedPathwayPvalList[1])), - columns=['moduleIndex', 'moduleGenes', 'modulePval', 'correctedModulePval']) + columns=['moduleIndex', 'moduleGenes', 'modulePval', 'correctedModulePval']) df.to_csv(outputPATH) - + result = [] - + numSigPathway = sum(correctedPathwayPvalList[0]) for tup in zip(pathwayIndexList, pathwayGenesList, correctedPathwayPvalList[0], correctedPathwayPvalList[1], pathwayPvalList): result.append(tup) @@ -147,13 +147,13 @@ def print_versions(): import sys import statsmodels with open("versions.yml", "w") as file: - file.write('"${task.process}"\\n') - file.write(f' python: {sys.version}\\n') - file.write(f' pandas: {pd.__version__}\\n') - file.write(f' statsmodels: {statsmodels.__version__}\\n') + file.write('"${task.process}"\\n') + file.write(f' python: {sys.version}\\n') + file.write(f' pandas: {pd.__version__}\\n') + file.write(f' statsmodels: {statsmodels.__version__}\\n') def main(): - + # parse nextflow input variables pascalOutputFile = '$pascalOutputFile' alpha = float('$alpha') @@ -161,14 +161,14 @@ def main(): geneScoreFilePath = '$geneScoreFilePascalInput' significantModulesOutDir = 'significantModules/' numTests = int('$numTests') - + study = pascalOutputFile.split("_")[0] trait = pascalOutputFile.split("_")[1] network = pascalOutputFile.split("_")[2].replace(".txt", "") rpIndex = trait.split("-")[0] - + sigPvalThreshold = 0.05 / numTests - + # Check if the output directory exists, if not create it if not os.path.exists(outputPath): os.makedirs(outputPath) @@ -191,7 +191,7 @@ def main(): 'sig3Genes':[], 'sig4Genes':[] } - + # create summary file for one pascal output file. result, numSigPathway = processOnePascalOutput(pascalOutputFile, alpha, os.path.join(outputPath, "pascalResult.csv")) sigGenesList = extractGenesBasedOnPval(geneScoreFilePath,sigPvalThreshold) @@ -218,11 +218,11 @@ def main(): summary_dict['sig2Genes'].append(sig2GenesDict[moduleIndex]) summary_dict['sig3Genes'].append(sig3GenesDict[moduleIndex]) summary_dict['sig4Genes'].append(sig4GenesDict[moduleIndex]) - + df_summary = pd.DataFrame(summary_dict) #df_summary.to_csv(os.path.join(outputPath, f"master_summary_slice_{rpIndex}.csv"), index=False) df_summary.to_csv(os.path.join(outputPath, f"master_summary_slice_{trait}_{network}.csv"), index=False) if __name__ == "__main__": main() - print_versions() \ No newline at end of file + print_versions() diff --git a/modules/local/mea/preprocess/environment.yml b/modules/local/mea/preprocess/environment.yml new file mode 100644 index 0000000..97bbe02 --- /dev/null +++ b/modules/local/mea/preprocess/environment.yml @@ -0,0 +1,8 @@ +name: merge_ora_and_summary +channels: + - conda-forge + - bioconda + - defaults +dependencies: + - python=3.8.3 + - pandas=1.1.5 diff --git a/modules/local/mea/preprocess/main.nf b/modules/local/mea/preprocess/main.nf index e4885a3..3a92cee 100644 --- a/modules/local/mea/preprocess/main.nf +++ b/modules/local/mea/preprocess/main.nf @@ -2,7 +2,7 @@ process PREPROCESS_PASCAL { label 'process_low' - // container 'docker://jungwooseok/mea:1.0.0' + conda "${moduleDir}/environment.yml" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? 'https://depot.galaxyproject.org/singularity/pandas:1.1.5' : 'quay.io/biocontainers/pandas:1.1.5' }" @@ -23,4 +23,4 @@ process PREPROCESS_PASCAL { script: def args = task.ext.args ?: '' template 'preProcessForPascal.py' -} \ No newline at end of file +} diff --git a/modules/local/mea/preprocess/templates/preProcessForPascal.py b/modules/local/mea/preprocess/templates/preProcessForPascal.py index 4d9f876..c72ee1b 100755 --- a/modules/local/mea/preprocess/templates/preProcessForPascal.py +++ b/modules/local/mea/preprocess/templates/preProcessForPascal.py @@ -6,7 +6,7 @@ def extractGeneSetFromModuleFile(MODULEPATH:str): """ - Read a module file and extract a set of genes in the file. + Read a module file and extract a set of genes in the file. It assumes the input file is tsv format where the gene name starts to appear from the thrid column Args: @@ -43,25 +43,25 @@ def pairwiseProcessGeneScoreAndModule(GSPATH: str, MODULEPATH: str, OUTPUTPATH: Returns: None. The processed gene score file, processed module file, and the GO background set file are saved to the corresponding directories. """ - + # Read the gene score file - df_gs = pd.read_csv(GSPATH, sep=sep) + df_gs = pd.read_csv(GSPATH, sep=sep) genesWithScore = set(df_gs[geneNameCol]) genesInModule = extractGeneSetFromModuleFile(MODULEPATH) intersectingGenes = genesWithScore.intersection(genesInModule) - + moduleFileName = MODULEPATH.split("/")[-1] # Output processed gene score file to be used for PASCAL with open(os.path.join(OUTPUTPATH, f"GS_{pipeline}_{trait}_{moduleFileName[:-4]}.tsv"), "w") as f: for index, row in df_gs.iterrows(): f.write(f"{row[geneNameCol]}\\t{str(row[pvalCol])}\\n") - + # Output GO background set file with open(os.path.join(OUTPUTPATH, f"GO_{pipeline}_{trait}_{moduleFileName[:-4]}.txt"), "w") as f: for index, row in df_gs.iterrows(): if row[geneNameCol] in intersectingGenes: f.write(f"{row[geneNameCol]}\\n") - + # Output processed module file after intersecting with the gene score file with open(os.path.join(OUTPUTPATH, f"Module_{pipeline}_{trait}_{moduleFileName[:-4]}.tsv"), "w") as f: with open(MODULEPATH, "r") as g: @@ -84,7 +84,7 @@ def write_versions_file(): f.write(f' pandas: "{pd.__version__}"\\n') def main(): - + # parse nextflow process input variables scoreFile = '$gene_score_file' moduleFileDir = '$module_file_dir' @@ -100,10 +100,10 @@ def main(): # Check if the output directory exists, if not create it if not os.path.exists(outputPath): os.makedirs(outputPath) - + filePath = moduleFileDir pairwiseProcessGeneScoreAndModule(scoreFile, filePath, outputPath, pipelineName, traitName, geneColName, pvalColName) - - + + if __name__ == "__main__": - main() \ No newline at end of file + main() diff --git a/modules/local/mmap/mmap/main.nf b/modules/local/mmap/mmap/main.nf index a7cde6f..83813ee 100644 --- a/modules/local/mmap/mmap/main.nf +++ b/modules/local/mmap/mmap/main.nf @@ -2,7 +2,7 @@ process MMAP { label 'process_low' - container 'docker://jungwooseok/mmap:1.0.2' + container 'jungwooseok/mmap:1.0.2' input: val gene @@ -30,13 +30,13 @@ process MMAP { covariates=" $gene " \$mmap \\ - --ped "${pedigree_file}" \\ - --trait ${trait} \\ - --covariates \$covariates \\ - --phenotype_filename "${phenotype_file}" \\ - --read_binary_covariance_file "${covariance_matrix_file}" \\ - --single_pedigree \\ - --file_suffix "kinship_${gene}" + --ped "${pedigree_file}" \\ + --trait ${trait} \\ + --covariates \$covariates \\ + --phenotype_filename "${phenotype_file}" \\ + --read_binary_covariance_file "${covariance_matrix_file}" \\ + --single_pedigree \\ + --file_suffix "kinship_${gene}" cat <<-END_VERSIONS > versions.yml "${task.process}": diff --git a/modules/local/mmap/mmap_parse/bin/parse_MMAP_output.py b/modules/local/mmap/mmap_parse/bin/parse_MMAP_output.py index 04dea24..df0f09d 100644 --- a/modules/local/mmap/mmap_parse/bin/parse_MMAP_output.py +++ b/modules/local/mmap/mmap_parse/bin/parse_MMAP_output.py @@ -9,7 +9,7 @@ def generate_statistics(output_MMAP): # read unparsed MMAP output file output_file = pd.read_csv(output_MMAP, header = None) - + #extract relevant columns and rows from the unparsed file output_file_pval_genes = output_file[[4,5,6,22,23,24]] output_file_gene_names_to_parse = output_file_pval_genes[output_file_pval_genes[4] == "h2"] @@ -46,10 +46,10 @@ def generate_statistics(output_MMAP): #write the new dataframe/parsed mmap output to the output path file_identifier = output_MMAP.split("/")[len(output_MMAP.split("/")) - 1].split(".")[0] - parsed_output.to_csv(f"parsed_output_{str(file_identifier)}.csv", index = None) + parsed_output.to_csv(f"parsed_output_{str(file_identifier)}.csv", index = None) if __name__ == "__main__": - from argparse import ArgumentParser + from argparse import ArgumentParser parser = ArgumentParser() parser.add_argument('--output_MMAP', '-output_MMAP', help='aggregated and unparsed MMAP output for different individual gene models') args = parser.parse_args() diff --git a/modules/local/mmap/mmap_parse/environment.yml b/modules/local/mmap/mmap_parse/environment.yml index 4a15882..1f7c586 100644 --- a/modules/local/mmap/mmap_parse/environment.yml +++ b/modules/local/mmap/mmap_parse/environment.yml @@ -1,9 +1,9 @@ name: mmap_parse channels: - - conda-forge - - bioconda - - defaults + - conda-forge + - bioconda + - defaults dependencies: - - python=3.8.3 - - numpy=1.19.5 - - pandas=1.1.5 \ No newline at end of file + - python=3.8.3 + - numpy=1.19.5 + - pandas=1.1.5 diff --git a/modules/local/mmap/mmap_parse/main.nf b/modules/local/mmap/mmap_parse/main.nf index 4763f36..a2ea5e2 100644 --- a/modules/local/mmap/mmap_parse/main.nf +++ b/modules/local/mmap/mmap_parse/main.nf @@ -5,7 +5,7 @@ process MMAP_PARSE { conda "${moduleDir}/environment.yml" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? 'https://depot.galaxyproject.org/singularity/mulled-v2-344874846f44224e5f0b7b741eacdddffe895d1e:d3fff24ee1297b4c3bcef48354c2a30f0c82007a-0' : - 'biocontainers/mulled-v2-344874846f44224e5f0b7b741eacdddffe895d1e:d3fff24ee1297b4c3bcef48354c2a30f0c82007a-0' }" + 'quay.io/biocontainers/mulled-v2-344874846f44224e5f0b7b741eacdddffe895d1e:d3fff24ee1297b4c3bcef48354c2a30f0c82007a-0' }" input: path output_MMAP @@ -21,7 +21,7 @@ process MMAP_PARSE { def args = task.ext.args ?: '' """ python3 ${moduleDir}/bin/parse_MMAP_output.py \\ - --output_MMAP ${output_MMAP} + --output_MMAP ${output_MMAP} cat <<-END_VERSIONS > versions.yml "${task.process}": diff --git a/modules/local/pascal/bin/pascal.py b/modules/local/pascal/bin/pascal.py index 01e923d..440b6ad 100755 --- a/modules/local/pascal/bin/pascal.py +++ b/modules/local/pascal/bin/pascal.py @@ -15,7 +15,7 @@ def pascal(gwas_file, manhattan_plot_file, gene_annotation, ref_panel, output_fi Scorer = genescorer.chi2sum(window=50000, varcutoff=0.99, MAF=0.05, genome=None, gpu=False) print("Gene level scoring starts...") - #Load reference panel + #Load reference panel Scorer.load_refpanel(ref_panel, qualityT = None, parallel = 1, keepfile=None) # Scorer.load_refpanel(ref_panel, qualityT = None, parallel = 1, keepfile=None, chrlist=[1]) ## TESTING:: CHR22 ONLY FOR TESTING print("Done importing reference panel") @@ -43,7 +43,7 @@ def pascal(gwas_file, manhattan_plot_file, gene_annotation, ref_panel, output_fi plt.savefig(manhattan_plot_file) if __name__ == "__main__": - from argparse import ArgumentParser + from argparse import ArgumentParser parser = ArgumentParser() parser.add_argument('--gwas_file', '-gwas_file', help='gwas') parser.add_argument('--manhattan_plot_file') @@ -52,10 +52,10 @@ def pascal(gwas_file, manhattan_plot_file, gene_annotation, ref_panel, output_fi parser.add_argument('--output_file') args = parser.parse_args() - pascal(gwas_file = args.gwas_file, manhattan_plot_file = args.manhattan_plot_file, gene_annotation = args.gene_annotation, ref_panel = args.ref_panel, output_file = args.output_file) + pascal(gwas_file = args.gwas_file, manhattan_plot_file = args.manhattan_plot_file, gene_annotation = args.gene_annotation, ref_panel = args.ref_panel, output_file = args.output_file) print("done") - - + + diff --git a/modules/local/pascal/main.nf b/modules/local/pascal/main.nf index 4090ad6..46cc879 100644 --- a/modules/local/pascal/main.nf +++ b/modules/local/pascal/main.nf @@ -1,7 +1,7 @@ process PASCAL { label 'process_medium' - container 'docker://jungwooseok/pascal:1.0.3' + container 'jungwooseok/pascal:1.0.3' // publishDir "results/pascal", mode:'copy', saveAs: { filename -> filename.endsWith(".csv") ? "PASCAL.csv" : filename} @@ -32,11 +32,11 @@ process PASCAL { tar -xzvf ${ref_panel} python3 ${moduleDir}/bin/pascal.py \\ - --gwas_file $gwas_file \\ - --gene_annotation $gene_annotation \\ - --ref_panel $ref_panel_name \\ - --manhattan_plot_file manhattan_plot.png \\ - --output_file pascal_out.tsv + --gwas_file $gwas_file \\ + --gene_annotation $gene_annotation \\ + --ref_panel $ref_panel_name \\ + --manhattan_plot_file manhattan_plot.png \\ + --output_file pascal_out.tsv cat <<-END_VERSIONS > versions.yml "${task.process}": diff --git a/modules/nf-core/custom/dumpsoftwareversions/environment.yml b/modules/nf-core/custom/dumpsoftwareversions/environment.yml index f0c63f6..4c6c492 100644 --- a/modules/nf-core/custom/dumpsoftwareversions/environment.yml +++ b/modules/nf-core/custom/dumpsoftwareversions/environment.yml @@ -1,7 +1,7 @@ name: custom_dumpsoftwareversions channels: - - conda-forge - - bioconda - - defaults + - conda-forge + - bioconda + - defaults dependencies: - - bioconda::multiqc=1.17 + - bioconda::multiqc=1.20 diff --git a/modules/nf-core/custom/dumpsoftwareversions/main.nf b/modules/nf-core/custom/dumpsoftwareversions/main.nf index 7685b33..105f926 100644 --- a/modules/nf-core/custom/dumpsoftwareversions/main.nf +++ b/modules/nf-core/custom/dumpsoftwareversions/main.nf @@ -4,8 +4,8 @@ process CUSTOM_DUMPSOFTWAREVERSIONS { // Requires `pyyaml` which does not have a dedicated container but is in the MultiQC container conda "${moduleDir}/environment.yml" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/multiqc:1.17--pyhdfd78af_0' : - 'biocontainers/multiqc:1.17--pyhdfd78af_0' }" + 'https://depot.galaxyproject.org/singularity/multiqc:1.20--pyhdfd78af_0' : + 'biocontainers/multiqc:1.20--pyhdfd78af_0' }" input: path versions diff --git a/modules/nf-core/custom/dumpsoftwareversions/meta.yml b/modules/nf-core/custom/dumpsoftwareversions/meta.yml index 5f15a5f..ea89d55 100644 --- a/modules/nf-core/custom/dumpsoftwareversions/meta.yml +++ b/modules/nf-core/custom/dumpsoftwareversions/meta.yml @@ -2,36 +2,36 @@ name: custom_dumpsoftwareversions description: Custom module used to dump software versions within the nf-core pipeline template keywords: - - custom - - dump - - version + - custom + - dump + - version tools: - - custom: - description: Custom module used to dump software versions within the nf-core pipeline template - homepage: https://github.com/nf-core/tools - documentation: https://github.com/nf-core/tools - licence: ["MIT"] + - custom: + description: Custom module used to dump software versions within the nf-core pipeline template + homepage: https://github.com/nf-core/tools + documentation: https://github.com/nf-core/tools + licence: ["MIT"] input: - - versions: - type: file - description: YML file containing software versions - pattern: "*.yml" + - versions: + type: file + description: YML file containing software versions + pattern: "*.yml" output: - - yml: - type: file - description: Standard YML file containing software versions - pattern: "software_versions.yml" - - mqc_yml: - type: file - description: MultiQC custom content YML file containing software versions - pattern: "software_versions_mqc.yml" - - versions: - type: file - description: File containing software versions - pattern: "versions.yml" + - yml: + type: file + description: Standard YML file containing software versions + pattern: "software_versions.yml" + - mqc_yml: + type: file + description: MultiQC custom content YML file containing software versions + pattern: "software_versions_mqc.yml" + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" authors: - - "@drpatelh" - - "@grst" + - "@drpatelh" + - "@grst" maintainers: - - "@drpatelh" - - "@grst" + - "@drpatelh" + - "@grst" diff --git a/modules/nf-core/custom/dumpsoftwareversions/templates/dumpsoftwareversions.py b/modules/nf-core/custom/dumpsoftwareversions/templates/dumpsoftwareversions.py index e55b8d4..da03340 100755 --- a/modules/nf-core/custom/dumpsoftwareversions/templates/dumpsoftwareversions.py +++ b/modules/nf-core/custom/dumpsoftwareversions/templates/dumpsoftwareversions.py @@ -4,11 +4,10 @@ """Provide functions to merge multiple versions.yml files.""" +import yaml import platform from textwrap import dedent -import yaml - def _make_versions_html(versions): """Generate a tabular HTML output of all versions for MultiQC.""" diff --git a/modules/nf-core/custom/dumpsoftwareversions/tests/main.nf.test b/modules/nf-core/custom/dumpsoftwareversions/tests/main.nf.test index eec1db1..b1e1630 100644 --- a/modules/nf-core/custom/dumpsoftwareversions/tests/main.nf.test +++ b/modules/nf-core/custom/dumpsoftwareversions/tests/main.nf.test @@ -31,7 +31,12 @@ nextflow_process { then { assertAll( { assert process.success }, - { assert snapshot(process.out).match() } + { assert snapshot( + process.out.versions, + file(process.out.mqc_yml[0]).readLines()[0..10], + file(process.out.yml[0]).readLines()[0..7] + ).match() + } ) } } diff --git a/modules/nf-core/custom/dumpsoftwareversions/tests/main.nf.test.snap b/modules/nf-core/custom/dumpsoftwareversions/tests/main.nf.test.snap index 4274ed5..5f59a93 100644 --- a/modules/nf-core/custom/dumpsoftwareversions/tests/main.nf.test.snap +++ b/modules/nf-core/custom/dumpsoftwareversions/tests/main.nf.test.snap @@ -1,27 +1,33 @@ { "Should run without failures": { "content": [ - { - "0": [ - "software_versions.yml:md5,1c851188476409cda5752ce971b20b58" - ], - "1": [ - "software_versions_mqc.yml:md5,2570f4ba271ad08357b0d3d32a9cf84d" - ], - "2": [ - "versions.yml:md5,3843ac526e762117eedf8825b40683df" - ], - "mqc_yml": [ - "software_versions_mqc.yml:md5,2570f4ba271ad08357b0d3d32a9cf84d" - ], - "versions": [ - "versions.yml:md5,3843ac526e762117eedf8825b40683df" - ], - "yml": [ - "software_versions.yml:md5,1c851188476409cda5752ce971b20b58" - ] - } + [ + "versions.yml:md5,76d454d92244589d32455833f7c1ba6d" + ], + [ + "data: \"\\n\\n \\n \\n \\n \\n \\n \\n \\n\\", + " \\n\\n\\n \\n \\n\\", + " \\ \\n\\n\\n\\n \\n \\", + " \\ \\n \\n\\n\\n\\n\\", + " \\n\\n \\n \\n\\", + " \\ \\n\\n\\n\\n\\n\\n \\n\\", + " \\ \\n \\n\\n\\n\\n\\", + " \\n\\n \\n \\n\\" + ], + [ + "CUSTOM_DUMPSOFTWAREVERSIONS:", + " python: 3.11.7", + " yaml: 5.4.1", + "TOOL1:", + " tool1: 0.11.9", + "TOOL2:", + " tool2: '1.9'", + "Workflow:" + ] ], - "timestamp": "2023-11-03T14:43:22.157011" + "timestamp": "2024-01-09T23:01:18.710682" } -} +} \ No newline at end of file diff --git a/modules/nf-core/custom/dumpsoftwareversions/tests/tags.yml b/modules/nf-core/custom/dumpsoftwareversions/tests/tags.yml index 405aa24..12c9030 100644 --- a/modules/nf-core/custom/dumpsoftwareversions/tests/tags.yml +++ b/modules/nf-core/custom/dumpsoftwareversions/tests/tags.yml @@ -1,2 +1,2 @@ custom/dumpsoftwareversions: - - modules/nf-core/custom/dumpsoftwareversions/** + - modules/nf-core/custom/dumpsoftwareversions/** diff --git a/modules/nf-core/fastqc/environment.yml b/modules/nf-core/fastqc/environment.yml index 1787b38..0c6458a 100644 --- a/modules/nf-core/fastqc/environment.yml +++ b/modules/nf-core/fastqc/environment.yml @@ -1,7 +1,7 @@ name: fastqc channels: - - conda-forge - - bioconda - - defaults + - conda-forge + - bioconda + - defaults dependencies: - - bioconda::fastqc=0.12.1 + - bioconda::fastqc=0.12.1 diff --git a/modules/nf-core/fastqc/meta.yml b/modules/nf-core/fastqc/meta.yml index ee5507e..6bada9f 100644 --- a/modules/nf-core/fastqc/meta.yml +++ b/modules/nf-core/fastqc/meta.yml @@ -1,57 +1,57 @@ name: fastqc description: Run FastQC on sequenced reads keywords: - - quality control - - qc - - adapters - - fastq + - quality control + - qc + - adapters + - fastq tools: - - fastqc: - description: | - FastQC gives general quality metrics about your reads. - It provides information about the quality score distribution - across your reads, the per base sequence content (%A/C/G/T). - You get information about adapter contamination and other - overrepresented sequences. - homepage: https://www.bioinformatics.babraham.ac.uk/projects/fastqc/ - documentation: https://www.bioinformatics.babraham.ac.uk/projects/fastqc/Help/ - licence: ["GPL-2.0-only"] + - fastqc: + description: | + FastQC gives general quality metrics about your reads. + It provides information about the quality score distribution + across your reads, the per base sequence content (%A/C/G/T). + You get information about adapter contamination and other + overrepresented sequences. + homepage: https://www.bioinformatics.babraham.ac.uk/projects/fastqc/ + documentation: https://www.bioinformatics.babraham.ac.uk/projects/fastqc/Help/ + licence: ["GPL-2.0-only"] input: - - meta: - type: map - description: | - Groovy Map containing sample information - e.g. [ id:'test', single_end:false ] - - reads: - type: file - description: | - List of input FastQ files of size 1 and 2 for single-end and paired-end data, - respectively. + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - reads: + type: file + description: | + List of input FastQ files of size 1 and 2 for single-end and paired-end data, + respectively. output: - - meta: - type: map - description: | - Groovy Map containing sample information - e.g. [ id:'test', single_end:false ] - - html: - type: file - description: FastQC report - pattern: "*_{fastqc.html}" - - zip: - type: file - description: FastQC report archive - pattern: "*_{fastqc.zip}" - - versions: - type: file - description: File containing software versions - pattern: "versions.yml" + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - html: + type: file + description: FastQC report + pattern: "*_{fastqc.html}" + - zip: + type: file + description: FastQC report archive + pattern: "*_{fastqc.zip}" + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" authors: - - "@drpatelh" - - "@grst" - - "@ewels" - - "@FelixKrueger" + - "@drpatelh" + - "@grst" + - "@ewels" + - "@FelixKrueger" maintainers: - - "@drpatelh" - - "@grst" - - "@ewels" - - "@FelixKrueger" + - "@drpatelh" + - "@grst" + - "@ewels" + - "@FelixKrueger" diff --git a/modules/nf-core/fastqc/tests/main.nf.test b/modules/nf-core/fastqc/tests/main.nf.test index b9e8f92..70edae4 100644 --- a/modules/nf-core/fastqc/tests/main.nf.test +++ b/modules/nf-core/fastqc/tests/main.nf.test @@ -3,24 +3,20 @@ nextflow_process { name "Test Process FASTQC" script "../main.nf" process "FASTQC" + tag "modules" tag "modules_nfcore" tag "fastqc" - test("Single-Read") { + test("sarscov2 single-end [fastq]") { when { - params { - outdir = "$outputDir" - } process { """ - input[0] = [ + input[0] = Channel.of([ [ id: 'test', single_end:true ], - [ - file(params.test_data['sarscov2']['illumina']['test_1_fastq_gz'], checkIfExists: true) - ] - ] + [ file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true) ] + ]) """ } } @@ -28,82 +24,189 @@ nextflow_process { then { assertAll ( { assert process.success }, + // NOTE The report contains the date inside it, which means that the md5sum is stable per day, but not longer than that. So you can't md5sum it. // looks like this:
Mon 2 Oct 2023
test.gz
// https://github.com/nf-core/modules/pull/3903#issuecomment-1743620039 - { assert process.out.html.get(0).get(1) ==~ ".*/test_fastqc.html" }, - { assert path(process.out.html.get(0).get(1)).getText().contains("") }, - { assert snapshot(process.out.versions).match("versions") }, - { assert process.out.zip.get(0).get(1) ==~ ".*/test_fastqc.zip" } + + { assert process.out.html[0][1] ==~ ".*/test_fastqc.html" }, + { assert process.out.zip[0][1] ==~ ".*/test_fastqc.zip" }, + { assert path(process.out.html[0][1]).text.contains("") }, + + { assert snapshot(process.out.versions).match("fastqc_versions_single") } + ) + } + } + + test("sarscov2 paired-end [fastq]") { + + when { + process { + """ + input[0] = Channel.of([ + [id: 'test', single_end: false], // meta map + [ file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_2.fastq.gz', checkIfExists: true) ] + ]) + """ + } + } + + then { + assertAll ( + { assert process.success }, + + { assert process.out.html[0][1][0] ==~ ".*/test_1_fastqc.html" }, + { assert process.out.html[0][1][1] ==~ ".*/test_2_fastqc.html" }, + { assert process.out.zip[0][1][0] ==~ ".*/test_1_fastqc.zip" }, + { assert process.out.zip[0][1][1] ==~ ".*/test_2_fastqc.zip" }, + { assert path(process.out.html[0][1][0]).text.contains("") }, + { assert path(process.out.html[0][1][1]).text.contains("") }, + + { assert snapshot(process.out.versions).match("fastqc_versions_paired") } + ) + } + } + + test("sarscov2 interleaved [fastq]") { + + when { + process { + """ + input[0] = Channel.of([ + [id: 'test', single_end: false], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_interleaved.fastq.gz', checkIfExists: true) + ]) + """ + } + } + + then { + assertAll ( + { assert process.success }, + + { assert process.out.html[0][1] ==~ ".*/test_fastqc.html" }, + { assert process.out.zip[0][1] ==~ ".*/test_fastqc.zip" }, + { assert path(process.out.html[0][1]).text.contains("") }, + + { assert snapshot(process.out.versions).match("fastqc_versions_interleaved") } ) } } -// TODO -// // -// // Test with paired-end data -// // -// workflow test_fastqc_paired_end { -// input = [ -// [id: 'test', single_end: false], // meta map -// [ -// file(params.test_data['sarscov2']['illumina']['test_1_fastq_gz'], checkIfExists: true), -// file(params.test_data['sarscov2']['illumina']['test_2_fastq_gz'], checkIfExists: true) -// ] -// ] - -// FASTQC ( input ) -// } - -// // -// // Test with interleaved data -// // -// workflow test_fastqc_interleaved { -// input = [ -// [id: 'test', single_end: false], // meta map -// file(params.test_data['sarscov2']['illumina']['test_interleaved_fastq_gz'], checkIfExists: true) -// ] - -// FASTQC ( input ) -// } - -// // -// // Test with bam data -// // -// workflow test_fastqc_bam { -// input = [ -// [id: 'test', single_end: false], // meta map -// file(params.test_data['sarscov2']['illumina']['test_paired_end_sorted_bam'], checkIfExists: true) -// ] - -// FASTQC ( input ) -// } - -// // -// // Test with multiple samples -// // -// workflow test_fastqc_multiple { -// input = [ -// [id: 'test', single_end: false], // meta map -// [ -// file(params.test_data['sarscov2']['illumina']['test_1_fastq_gz'], checkIfExists: true), -// file(params.test_data['sarscov2']['illumina']['test_2_fastq_gz'], checkIfExists: true), -// file(params.test_data['sarscov2']['illumina']['test2_1_fastq_gz'], checkIfExists: true), -// file(params.test_data['sarscov2']['illumina']['test2_2_fastq_gz'], checkIfExists: true) -// ] -// ] - -// FASTQC ( input ) -// } - -// // -// // Test with custom prefix -// // -// workflow test_fastqc_custom_prefix { -// input = [ -// [ id:'mysample', single_end:true ], // meta map -// file(params.test_data['sarscov2']['illumina']['test_1_fastq_gz'], checkIfExists: true) -// ] - -// FASTQC ( input ) -// } + + test("sarscov2 paired-end [bam]") { + + when { + process { + """ + input[0] = Channel.of([ + [id: 'test', single_end: false], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/bam/test.paired_end.sorted.bam', checkIfExists: true) + ]) + """ + } + } + + then { + assertAll ( + { assert process.success }, + + { assert process.out.html[0][1] ==~ ".*/test_fastqc.html" }, + { assert process.out.zip[0][1] ==~ ".*/test_fastqc.zip" }, + { assert path(process.out.html[0][1]).text.contains("") }, + + { assert snapshot(process.out.versions).match("fastqc_versions_bam") } + ) + } + } + + test("sarscov2 multiple [fastq]") { + + when { + process { + """ + input[0] = Channel.of([ + [id: 'test', single_end: false], // meta map + [ file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_2.fastq.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test2_1.fastq.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test2_2.fastq.gz', checkIfExists: true) ] + ]) + """ + } + } + + then { + assertAll ( + { assert process.success }, + + { assert process.out.html[0][1][0] ==~ ".*/test_1_fastqc.html" }, + { assert process.out.html[0][1][1] ==~ ".*/test_2_fastqc.html" }, + { assert process.out.html[0][1][2] ==~ ".*/test_3_fastqc.html" }, + { assert process.out.html[0][1][3] ==~ ".*/test_4_fastqc.html" }, + { assert process.out.zip[0][1][0] ==~ ".*/test_1_fastqc.zip" }, + { assert process.out.zip[0][1][1] ==~ ".*/test_2_fastqc.zip" }, + { assert process.out.zip[0][1][2] ==~ ".*/test_3_fastqc.zip" }, + { assert process.out.zip[0][1][3] ==~ ".*/test_4_fastqc.zip" }, + { assert path(process.out.html[0][1][0]).text.contains("") }, + { assert path(process.out.html[0][1][1]).text.contains("") }, + { assert path(process.out.html[0][1][2]).text.contains("") }, + { assert path(process.out.html[0][1][3]).text.contains("") }, + + { assert snapshot(process.out.versions).match("fastqc_versions_multiple") } + ) + } + } + + test("sarscov2 custom_prefix") { + + when { + process { + """ + input[0] = Channel.of([ + [ id:'mysample', single_end:true ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true) + ]) + """ + } + } + + then { + assertAll ( + { assert process.success }, + + { assert process.out.html[0][1] ==~ ".*/mysample_fastqc.html" }, + { assert process.out.zip[0][1] ==~ ".*/mysample_fastqc.zip" }, + { assert path(process.out.html[0][1]).text.contains("") }, + + { assert snapshot(process.out.versions).match("fastqc_versions_custom_prefix") } + ) + } + } + + test("sarscov2 single-end [fastq] - stub") { + + options "-stub" + + when { + process { + """ + input[0] = Channel.of([ + [ id: 'test', single_end:true ], + [ file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true) ] + ]) + """ + } + } + + then { + assertAll ( + { assert process.success }, + { assert snapshot(process.out.html.collect { file(it[1]).getName() } + + process.out.zip.collect { file(it[1]).getName() } + + process.out.versions ).match("fastqc_stub") } + ) + } + } + } diff --git a/modules/nf-core/fastqc/tests/main.nf.test.snap b/modules/nf-core/fastqc/tests/main.nf.test.snap index 636a32c..86f7c31 100644 --- a/modules/nf-core/fastqc/tests/main.nf.test.snap +++ b/modules/nf-core/fastqc/tests/main.nf.test.snap @@ -1,10 +1,88 @@ { - "versions": { + "fastqc_versions_interleaved": { "content": [ [ "versions.yml:md5,e1cc25ca8af856014824abd842e93978" ] ], - "timestamp": "2023-10-09T23:40:54+0000" + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-01-31T17:40:07.293713" + }, + "fastqc_stub": { + "content": [ + [ + "test.html", + "test.zip", + "versions.yml:md5,e1cc25ca8af856014824abd842e93978" + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-01-31T17:31:01.425198" + }, + "fastqc_versions_multiple": { + "content": [ + [ + "versions.yml:md5,e1cc25ca8af856014824abd842e93978" + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-01-31T17:40:55.797907" + }, + "fastqc_versions_bam": { + "content": [ + [ + "versions.yml:md5,e1cc25ca8af856014824abd842e93978" + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-01-31T17:40:26.795862" + }, + "fastqc_versions_single": { + "content": [ + [ + "versions.yml:md5,e1cc25ca8af856014824abd842e93978" + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-01-31T17:39:27.043675" + }, + "fastqc_versions_paired": { + "content": [ + [ + "versions.yml:md5,e1cc25ca8af856014824abd842e93978" + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-01-31T17:39:47.584191" + }, + "fastqc_versions_custom_prefix": { + "content": [ + [ + "versions.yml:md5,e1cc25ca8af856014824abd842e93978" + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-01-31T17:41:14.576531" } } \ No newline at end of file diff --git a/modules/nf-core/fastqc/tests/tags.yml b/modules/nf-core/fastqc/tests/tags.yml index 7834294..da7a9d6 100644 --- a/modules/nf-core/fastqc/tests/tags.yml +++ b/modules/nf-core/fastqc/tests/tags.yml @@ -1,2 +1,2 @@ fastqc: - - modules/nf-core/fastqc/** + - modules/nf-core/fastqc/** diff --git a/modules/nf-core/multiqc/environment.yml b/modules/nf-core/multiqc/environment.yml index bc0bdb5..06c4db4 100644 --- a/modules/nf-core/multiqc/environment.yml +++ b/modules/nf-core/multiqc/environment.yml @@ -1,7 +1,7 @@ name: multiqc channels: - - conda-forge - - bioconda - - defaults + - conda-forge + - bioconda + - defaults dependencies: - - bioconda::multiqc=1.18 + - bioconda::multiqc=1.20 diff --git a/modules/nf-core/multiqc/main.nf b/modules/nf-core/multiqc/main.nf index 00cc48d..354f443 100644 --- a/modules/nf-core/multiqc/main.nf +++ b/modules/nf-core/multiqc/main.nf @@ -3,8 +3,8 @@ process MULTIQC { conda "${moduleDir}/environment.yml" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/multiqc:1.18--pyhdfd78af_0' : - 'biocontainers/multiqc:1.18--pyhdfd78af_0' }" + 'https://depot.galaxyproject.org/singularity/multiqc:1.20--pyhdfd78af_0' : + 'biocontainers/multiqc:1.20--pyhdfd78af_0' }" input: path multiqc_files, stageAs: "?/*" @@ -43,7 +43,7 @@ process MULTIQC { stub: """ - touch multiqc_data + mkdir multiqc_data touch multiqc_plots touch multiqc_report.html diff --git a/modules/nf-core/multiqc/meta.yml b/modules/nf-core/multiqc/meta.yml index f1aa660..7943322 100644 --- a/modules/nf-core/multiqc/meta.yml +++ b/modules/nf-core/multiqc/meta.yml @@ -1,59 +1,58 @@ -# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/meta-schema.json name: multiqc description: Aggregate results from bioinformatics analyses across many samples into a single report keywords: - - QC - - bioinformatics tools - - Beautiful stand-alone HTML report + - QC + - bioinformatics tools + - Beautiful stand-alone HTML report tools: - - multiqc: - description: | - MultiQC searches a given directory for analysis logs and compiles a HTML report. - It's a general use tool, perfect for summarising the output from numerous bioinformatics tools. - homepage: https://multiqc.info/ - documentation: https://multiqc.info/docs/ - licence: ["GPL-3.0-or-later"] + - multiqc: + description: | + MultiQC searches a given directory for analysis logs and compiles a HTML report. + It's a general use tool, perfect for summarising the output from numerous bioinformatics tools. + homepage: https://multiqc.info/ + documentation: https://multiqc.info/docs/ + licence: ["GPL-3.0-or-later"] input: - - multiqc_files: - type: file - description: | - List of reports / files recognised by MultiQC, for example the html and zip output of FastQC - - multiqc_config: - type: file - description: Optional config yml for MultiQC - pattern: "*.{yml,yaml}" - - extra_multiqc_config: - type: file - description: Second optional config yml for MultiQC. Will override common sections in multiqc_config. - pattern: "*.{yml,yaml}" - - multiqc_logo: - type: file - description: Optional logo file for MultiQC - pattern: "*.{png}" + - multiqc_files: + type: file + description: | + List of reports / files recognised by MultiQC, for example the html and zip output of FastQC + - multiqc_config: + type: file + description: Optional config yml for MultiQC + pattern: "*.{yml,yaml}" + - extra_multiqc_config: + type: file + description: Second optional config yml for MultiQC. Will override common sections in multiqc_config. + pattern: "*.{yml,yaml}" + - multiqc_logo: + type: file + description: Optional logo file for MultiQC + pattern: "*.{png}" output: - - report: - type: file - description: MultiQC report file - pattern: "multiqc_report.html" - - data: - type: directory - description: MultiQC data dir - pattern: "multiqc_data" - - plots: - type: file - description: Plots created by MultiQC - pattern: "*_data" - - versions: - type: file - description: File containing software versions - pattern: "versions.yml" + - report: + type: file + description: MultiQC report file + pattern: "multiqc_report.html" + - data: + type: directory + description: MultiQC data dir + pattern: "multiqc_data" + - plots: + type: file + description: Plots created by MultiQC + pattern: "*_data" + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" authors: - - "@abhi18av" - - "@bunop" - - "@drpatelh" - - "@jfy133" + - "@abhi18av" + - "@bunop" + - "@drpatelh" + - "@jfy133" maintainers: - - "@abhi18av" - - "@bunop" - - "@drpatelh" - - "@jfy133" + - "@abhi18av" + - "@bunop" + - "@drpatelh" + - "@jfy133" diff --git a/modules/nf-core/multiqc/tests/main.nf.test b/modules/nf-core/multiqc/tests/main.nf.test index c2dad21..f1c4242 100644 --- a/modules/nf-core/multiqc/tests/main.nf.test +++ b/modules/nf-core/multiqc/tests/main.nf.test @@ -3,19 +3,17 @@ nextflow_process { name "Test Process MULTIQC" script "../main.nf" process "MULTIQC" + tag "modules" tag "modules_nfcore" tag "multiqc" - test("MULTIQC: FASTQC") { + test("sarscov2 single-end [fastqc]") { when { - params { - outdir = "$outputDir" - } process { """ - input[0] = Channel.of([file(params.test_data['sarscov2']['illumina']['test_1_fastq_gz_fastqc_zip'], checkIfExists: true)]) + input[0] = Channel.of(file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastqc/test_fastqc.zip', checkIfExists: true)) input[1] = [] input[2] = [] input[3] = [] @@ -26,23 +24,20 @@ nextflow_process { then { assertAll( { assert process.success }, - { assert path(process.out.report.get(0)).exists() }, - { assert path(process.out.data.get(0)).exists() }, - { assert path(process.out.versions.get(0)).getText().contains("multiqc") } + { assert process.out.report[0] ==~ ".*/multiqc_report.html" }, + { assert process.out.data[0] ==~ ".*/multiqc_data" }, + { assert snapshot(process.out.versions).match("multiqc_versions_single") } ) } } - test("MULTIQC: FASTQC and a config file") { + test("sarscov2 single-end [fastqc] [config]") { when { - params { - outdir = "$outputDir" - } process { """ - input[0] = Channel.of([file(params.test_data['sarscov2']['illumina']['test_1_fastq_gz_fastqc_zip'], checkIfExists: true)]) + input[0] = Channel.of(file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastqc/test_fastqc.zip', checkIfExists: true)) input[1] = Channel.of(file("https://github.com/nf-core/tools/raw/dev/nf_core/pipeline-template/assets/multiqc_config.yml", checkIfExists: true)) input[2] = [] input[3] = [] @@ -53,9 +48,35 @@ nextflow_process { then { assertAll( { assert process.success }, - { assert path(process.out.report.get(0)).exists() }, - { assert path(process.out.data.get(0)).exists() }, - { assert path(process.out.versions.get(0)).getText().contains("multiqc") } + { assert process.out.report[0] ==~ ".*/multiqc_report.html" }, + { assert process.out.data[0] ==~ ".*/multiqc_data" }, + { assert snapshot(process.out.versions).match("multiqc_versions_config") } + ) + } + } + + test("sarscov2 single-end [fastqc] - stub") { + + options "-stub" + + when { + process { + """ + input[0] = Channel.of(file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastqc/test_fastqc.zip', checkIfExists: true)) + input[1] = [] + input[2] = [] + input[3] = [] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out.report.collect { file(it).getName() } + + process.out.data.collect { file(it).getName() } + + process.out.plots.collect { file(it).getName() } + + process.out.versions ).match("multiqc_stub") } ) } diff --git a/modules/nf-core/multiqc/tests/main.nf.test.snap b/modules/nf-core/multiqc/tests/main.nf.test.snap new file mode 100644 index 0000000..c204b48 --- /dev/null +++ b/modules/nf-core/multiqc/tests/main.nf.test.snap @@ -0,0 +1,41 @@ +{ + "multiqc_versions_single": { + "content": [ + [ + "versions.yml:md5,d320d4c37e349c5588e07e7a31cd4186" + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-02-14T09:28:51.744211298" + }, + "multiqc_stub": { + "content": [ + [ + "multiqc_report.html", + "multiqc_data", + "multiqc_plots", + "versions.yml:md5,d320d4c37e349c5588e07e7a31cd4186" + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-02-14T09:29:28.847433492" + }, + "multiqc_versions_config": { + "content": [ + [ + "versions.yml:md5,d320d4c37e349c5588e07e7a31cd4186" + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-02-14T09:29:13.223621555" + } +} \ No newline at end of file diff --git a/modules/nf-core/multiqc/tests/tags.yml b/modules/nf-core/multiqc/tests/tags.yml index bea6c0d..788f3e4 100644 --- a/modules/nf-core/multiqc/tests/tags.yml +++ b/modules/nf-core/multiqc/tests/tags.yml @@ -1,2 +1,2 @@ multiqc: - - modules/nf-core/multiqc/** + - modules/nf-core/multiqc/** diff --git a/nextflow.config b/nextflow.config index d0a6316..7a87452 100644 --- a/nextflow.config +++ b/nextflow.config @@ -9,13 +9,12 @@ // Global default params, used in configs params { - // TODO nf-core: Specify your pipeline's command line flags // Input options input = null // pascal options pascal_header = 0 - pascal_pval_col = 1 + pascal_pval_col = 1 // mmap options mmap_header = 1 @@ -47,7 +46,6 @@ params { custom_config_base = "https://raw.githubusercontent.com/nf-core/configs/${params.custom_config_version}" config_profile_contact = null config_profile_url = null - // Max resource options // Defaults only, expecting to be overwritten @@ -76,7 +74,7 @@ try { } // Load nf-core/omicsgenetraitassociation custom profiles from different institutions. -// Warning: Uncomment only if a pipeline-specific instititutional config already exists on nf-core/configs! +// Warning: Uncomment only if a pipeline-specific institutional config already exists on nf-core/configs! // try { // includeConfig "${params.custom_config_base}/pipeline/omicsgenetraitassociation.config" // } catch (Exception e) { @@ -96,6 +94,7 @@ profiles { podman.enabled = false shifter.enabled = false charliecloud.enabled = false + channels = ['conda-forge', 'bioconda', 'defaults'] apptainer.enabled = false } mamba { @@ -156,16 +155,6 @@ profiles { singularity.enabled = false podman.enabled = false shifter.enabled = false - apptainer.enabled = false - } - apptainer { - apptainer.enabled = true - apptainer.autoMounts = true - conda.enabled = false - docker.enabled = false - singularity.enabled = false - podman.enabled = false - shifter.enabled = false charliecloud.enabled = false } gitpod { @@ -181,21 +170,19 @@ profiles { // Will not be used unless Apptainer / Docker / Podman / Singularity are enabled // Set to your registry if you have a mirror of containers apptainer.registry = 'quay.io' -docker.registry = 'quay.io' +docker.registry = 'docker.io' podman.registry = 'quay.io' -singularity.registry = 'quay.io' +singularity.registry = 'docker.io' // Nextflow plugins plugins { id 'nf-validation@1.1.3' // Validation of pipeline parameters and creation of an input channel from a sample sheet } -//// Load igenomes.config if required -//if (!params.igenomes_ignore) { -// includeConfig 'conf/igenomes.config' -//} else { -// params.genomes = [:] -//} +// Nextflow plugins +plugins { + id 'nf-validation@1.1.3' // Validation of pipeline parameters and creation of an input channel from a sample sheet +} // Export these variables to prevent local Python/R libraries from conflicting with those in the container // The JULIA depot path has been adjusted to a fixed path `/usr/local/share/julia` that needs to be used for packages in the container. // See https://apeltzer.github.io/post/03-julia-lang-nextflow/ for details on that. Once we have a common agreement on where to keep Julia packages, this is adjustable. diff --git a/nextflow_schema.json b/nextflow_schema.json index 6ef914e..b865a9f 100644 --- a/nextflow_schema.json +++ b/nextflow_schema.json @@ -206,7 +206,14 @@ "description": "Method used to save pipeline results to output directory.", "help_text": "The Nextflow `publishDir` option specifies which intermediate files should be saved to the output directory. This option tells the pipeline what method should be used to move these files. See [Nextflow docs](https://www.nextflow.io/docs/latest/process.html#publishdir) for details.", "fa_icon": "fas fa-copy", - "enum": ["symlink", "rellink", "link", "copy", "copyNoFollow", "move"], + "enum": [ + "symlink", + "rellink", + "link", + "copy", + "copyNoFollow", + "move" + ], "hidden": true }, "email_on_fail": { diff --git a/pyproject.toml b/pyproject.toml index 0d62beb..5611062 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,10 +1,15 @@ -# Config file for Python. Mostly used to configure linting of bin/check_samplesheet.py with Black. +# Config file for Python. Mostly used to configure linting of bin/*.py with Ruff. # Should be kept the same as nf-core/tools to avoid fighting with template synchronisation. -[tool.black] +[tool.ruff] line-length = 120 -target_version = ["py37", "py38", "py39", "py310"] +target-version = "py38" +cache-dir = "~/.cache/ruff" -[tool.isort] -profile = "black" -known_first_party = ["nf_core"] -multi_line_output = 3 +[tool.ruff.lint] +select = ["I", "E1", "E4", "E7", "E9", "F", "UP", "N"] + +[tool.ruff.lint.isort] +known-first-party = ["nf_core"] + +[tool.ruff.lint.per-file-ignores] +"__init__.py" = ["E402", "F401"] diff --git a/subworkflows/local/cma.nf b/subworkflows/local/cma.nf index 89223e3..f2903b2 100644 --- a/subworkflows/local/cma.nf +++ b/subworkflows/local/cma.nf @@ -1,12 +1,12 @@ // -// test CMA +// test CMA // include { CMA } from '../../modules/local/cma/cma' workflow CMA_SUBWORKFLOW { take: - input_files + input_files trait category @@ -17,24 +17,24 @@ workflow CMA_SUBWORKFLOW { ch_tetrachor = Channel.empty() if (params.cma_test) { - input_files = Channel.fromPath("${params.cma_two_traits}/*.csv").toList() + input_files = Channel.fromPath("${params.cma_two_traits}/*.csv").toList() } // CMA CMA ( - input_files, - trait, - category + input_files, + trait, + category ) ch_pval = CMA.out.pval ch_tetrachor = CMA.out.tetrachor ch_versions = ch_versions.mix(CMA.out.versions) - + emit: pval = ch_pval tetrachor = ch_tetrachor - versions = ch_versions -} \ No newline at end of file + versions = ch_versions +} diff --git a/subworkflows/local/mmap.nf b/subworkflows/local/mmap.nf index 87544bc..4056a22 100644 --- a/subworkflows/local/mmap.nf +++ b/subworkflows/local/mmap.nf @@ -16,9 +16,9 @@ workflow MMAP_SUBWORKFLOW { main: ch_versions = Channel.empty() ch_concatenated_mmap = Channel.empty() - ch_mmap_genes = Channel.fromPath(gene_list_file) - .splitText() - .map ( gene -> gene.trim() ) + ch_mmap_genes = Channel.fromPath(gene_list_file) + .splitText() + .map ( gene -> gene.trim() ) ch_mmap_cma_format = Channel.empty() // @@ -26,10 +26,10 @@ workflow MMAP_SUBWORKFLOW { // // TODO: add gene to meta field MMAP ( - ch_mmap_genes, trait, phenotype_file.first(), pedigree_file, covariance_matrix_file + ch_mmap_genes, trait, phenotype_file.first(), pedigree_file, covariance_matrix_file ) ch_concatenated_mmap = MMAP.out.csv - .collectFile(name: 'mmap_results.csv', cache:false) + .collectFile(name: 'mmap_results.csv', cache:false) ch_versions = ch_versions.mix(MMAP.out.versions) // @@ -37,7 +37,7 @@ workflow MMAP_SUBWORKFLOW { // // TODO: propagate meta MMAP_PARSE ( - ch_concatenated_mmap + ch_concatenated_mmap ) ch_mmap_parsed = MMAP_PARSE.out.mmap_parsed_output ch_versions = ch_versions.mix(MMAP_PARSE.out.versions) @@ -47,12 +47,12 @@ workflow MMAP_SUBWORKFLOW { // // TODO: propagate meta FORMAT_CMA_INPUT ( - ch_mmap_parsed, - "MMAP", - params.mmap_header, - params.mmap_pval_col, - params.mmap_beta_col, - params.mmap_se_genes + ch_mmap_parsed, + "MMAP", + params.mmap_header, + params.mmap_pval_col, + params.mmap_beta_col, + params.mmap_se_genes ) ch_mmap_cma_format = FORMAT_CMA_INPUT.out.csv ch_versions = ch_versions.mix(FORMAT_CMA_INPUT.out.versions) @@ -62,4 +62,4 @@ workflow MMAP_SUBWORKFLOW { parsed_mmap_output = ch_mmap_parsed cma_format_output = ch_mmap_cma_format versions = ch_versions -} \ No newline at end of file +} diff --git a/subworkflows/local/pascal.nf b/subworkflows/local/pascal.nf index e501343..ceba5a4 100644 --- a/subworkflows/local/pascal.nf +++ b/subworkflows/local/pascal.nf @@ -15,19 +15,19 @@ workflow PASCAL_SUBWORKFLOW { ch_pascal_out = Channel.empty() PASCAL ( - gwas_file, gene_annotation, ref_panel + gwas_file, gene_annotation, ref_panel ) ch_pascal_out = PASCAL.out.tsv - .map { meta, file -> file} + .map { meta, file -> file} ch_versions = ch_versions.mix(PASCAL.out.versions) FORMAT_CMA_INPUT ( - ch_pascal_out, - "PASCAL", - params.pascal_header, - params.pascal_pval_col, - [], - [] + ch_pascal_out, + "PASCAL", + params.pascal_header, + params.pascal_pval_col, + [], + [] ) ch_pascal_cma_format = FORMAT_CMA_INPUT.out.csv ch_versions = ch_versions.mix(FORMAT_CMA_INPUT.out.versions) @@ -36,4 +36,4 @@ workflow PASCAL_SUBWORKFLOW { pascal_output = ch_pascal_out cma_format_output = ch_pascal_cma_format versions = ch_versions -} \ No newline at end of file +} diff --git a/tower.yml b/tower.yml index 787aedf..8a2441d 100644 --- a/tower.yml +++ b/tower.yml @@ -1,5 +1,5 @@ reports: - multiqc_report.html: - display: "MultiQC HTML report" - samplesheet.csv: - display: "Auto-created samplesheet with collated metadata and FASTQ paths" + multiqc_report.html: + display: "MultiQC HTML report" + samplesheet.csv: + display: "Auto-created samplesheet with collated metadata and FASTQ paths" diff --git a/workflows/omicsgenetraitassociation.nf b/workflows/omicsgenetraitassociation.nf index 524dff3..1d28997 100644 --- a/workflows/omicsgenetraitassociation.nf +++ b/workflows/omicsgenetraitassociation.nf @@ -36,7 +36,7 @@ WorkflowOmicsgenetraitassociation.initialise(params, log) // MODULES: local modules // include { PASCAL } from '../modules/local/pascal' -include { MMAP } from '../modules/local/mmap/mmap' +include { MMAP } from '../modules/local/mmap/mmap' include { MMAP_PARSE } from '../modules/local/mmap/mmap_parse' include { PREPROCESS_PASCAL } from '../modules/local/mea/preprocess' include { RUN_PASCAL } from '../modules/local/mea/pascal' @@ -48,7 +48,7 @@ include { MERGE_ORA_AND_SUMMARY } from '../modules/local/mea/merge_ora_and // SUBWORKFLOW: Consisting of a mix of local and nf-core/modules // // include { INPUT_CHECK } from '../subworkflows/local/input_check' -include { PASCAL_SUBWORKFLOW } from '../subworkflows/local/pascal' +include { PASCAL_SUBWORKFLOW } from '../subworkflows/local/pascal' include { MMAP_SUBWORKFLOW } from '../subworkflows/local/mmap' include { CMA_SUBWORKFLOW } from '../subworkflows/local/cma' @@ -104,9 +104,9 @@ workflow OMICSGENETRAITASSOCIATION { // MODULE: PASCAL // PASCAL_SUBWORKFLOW ( - ch_input.pascal, - params.pascal_gene_annotation, - params.pascal_ref_panel + ch_input.pascal, + params.pascal_gene_annotation, + params.pascal_ref_panel ) ch_pascal_output = PASCAL_SUBWORKFLOW.out.pascal_output ch_pascal_cma_format = PASCAL_SUBWORKFLOW.out.cma_format_output @@ -118,11 +118,11 @@ workflow OMICSGENETRAITASSOCIATION { // SUBWORKFLOW: MMAP_SUBWORKFLOW // MMAP_SUBWORKFLOW ( - params.mmap_gene_list, - params.trait, - ch_input.twas, - params.mmap_pedigree_file, - params.mmap_cov_matrix_file + params.mmap_gene_list, + params.trait, + ch_input.twas, + params.mmap_pedigree_file, + params.mmap_cov_matrix_file ) ch_mmap_parsed = MMAP_SUBWORKFLOW.out.parsed_mmap_output ch_mmap_cma_format = MMAP_SUBWORKFLOW.out.cma_format_output @@ -136,40 +136,40 @@ workflow OMICSGENETRAITASSOCIATION { // ch_mmap_cma_format.view() ch_cma_input_files = ch_pascal_cma_format - .mix(ch_mmap_cma_format) - .toList() + .mix(ch_mmap_cma_format) + .toList() CMA_SUBWORKFLOW ( - ch_cma_input_files, - params.trait, - [] + ch_cma_input_files, + params.trait, + [] ) ch_pval = CMA_SUBWORKFLOW.out.pval - .collect() + .collect() ch_versions = ch_versions.mix(CMA_SUBWORKFLOW.out.versions) // - // MODULE: PREPROCESSFORPASCAL + // MODULE: PREPROCESSFORPASCAL // ch_mea_preprocess_input = ch_pval - .multiMap{ pval -> - gene_score_file: pval - meta: tuple ( params.pipeline, params.trait, params.gene_col_name, params.pval_col_name) - } + .multiMap{ pval -> + gene_score_file: pval + meta: tuple ( params.pipeline, params.trait, params.gene_col_name, params.pval_col_name) + } ch_module_files = Channel.fromPath("${params.module_file_dir}/*.txt") - .map { module_file -> - tuple ( module_file.baseName, module_file) - } + .map { module_file -> + tuple ( module_file.baseName, module_file) + } ch_preprocess_input = ch_mea_preprocess_input.gene_score_file - .combine(ch_module_files) - .combine(ch_mea_preprocess_input.meta) - .multiMap { gene_score_file, module_id, module_file_dir, pipeline, trait, gene_col_name, pval_col_name -> - gene_score_file: gene_score_file - module_file: tuple (module_id, module_file_dir) - meta: tuple (pipeline, trait, gene_col_name, pval_col_name) - } + .combine(ch_module_files) + .combine(ch_mea_preprocess_input.meta) + .multiMap { gene_score_file, module_id, module_file_dir, pipeline, trait, gene_col_name, pval_col_name -> + gene_score_file: gene_score_file + module_file: tuple (module_id, module_file_dir) + meta: tuple (pipeline, trait, gene_col_name, pval_col_name) + } PREPROCESS_PASCAL ( ch_preprocess_input @@ -182,8 +182,8 @@ workflow OMICSGENETRAITASSOCIATION { // MODULE: MEA PASCAL // RUN_PASCAL ( - ch_mea_paths, - ch_mea_meta + ch_mea_paths, + ch_mea_meta ) ch_pascal_paths = RUN_PASCAL.out.paths @@ -194,10 +194,10 @@ workflow OMICSGENETRAITASSOCIATION { // MODULE: POSTPROCESS_PASCAL // POSTPROCESS_PASCAL ( - ch_pascal_paths, - ch_pascal_meta, - params.numtests, - params.alpha + ch_pascal_paths, + ch_pascal_meta, + params.numtests, + params.alpha ) ch_postprocess_paths = POSTPROCESS_PASCAL.out.paths ch_postprocess_meta = POSTPROCESS_PASCAL.out.meta @@ -207,8 +207,8 @@ workflow OMICSGENETRAITASSOCIATION { // MODULE: GO analysis // GO_ANALYSIS ( - ch_postprocess_paths, - ch_postprocess_meta + ch_postprocess_paths, + ch_postprocess_meta ) ch_go_paths = GO_ANALYSIS.out.paths ch_go_meta = GO_ANALYSIS.out.meta @@ -219,8 +219,8 @@ workflow OMICSGENETRAITASSOCIATION { // TODO: each run of MERGE_ORA_AND_SUMMARY overwrites contents of summary_dir. should not happen // MERGE_ORA_AND_SUMMARY ( - ch_go_paths, - ch_go_meta + ch_go_paths, + ch_go_meta ) ch_merge_summary_dir = MERGE_ORA_AND_SUMMARY.out.summary_dir ch_merge_summary_files = MERGE_ORA_AND_SUMMARY.out.summary_files @@ -231,13 +231,12 @@ workflow OMICSGENETRAITASSOCIATION { // concatenate summary slices and write to master_summary_.csv ch_master_summary = ch_merge_summary_files - .collectFile(name: "master_summary_${params.trait}.csv", - cache: false, - keepHeader: true, - skip: 1, - storeDir: "${params.outdir}/mea/" - ) - // .view() + .collectFile(name: "master_summary_${params.trait}.csv", + cache: false, + keepHeader: true, + skip: 1, + storeDir: "${params.outdir}/mea/" + ) } /*
Process Name \\", + " \\ Software Version
CUSTOM_DUMPSOFTWAREVERSIONSpython3.11.7
yaml5.4.1
TOOL1tool10.11.9
TOOL2tool21.9
WorkflowNextflow
File typeConventional base calls
File typeConventional base calls
File typeConventional base calls
File typeConventional base calls
File typeConventional base calls
File typeConventional base calls
File typeConventional base calls
File typeConventional base calls
File typeConventional base calls
File typeConventional base calls
File typeConventional base calls