From 0a4c6e479abfa18e632fefca707336c75274c57c Mon Sep 17 00:00:00 2001 From: Neil Horner Date: Fri, 13 Sep 2024 22:19:38 +0000 Subject: [PATCH] Template update --- .pre-commit-config.yaml | 2 +- CHANGELOG.md | 4 ++ README.md | 2 +- bin/workflow_glue/check_bam_headers_in_dir.py | 8 ++- bin/workflow_glue/check_xam_index.py | 12 ++-- bin/workflow_glue/report.py | 21 +++---- docs/04_install_and_run.md | 2 +- lib/common.nf | 1 + lib/ingress.nf | 61 +++++++++++++------ main.nf | 1 - nextflow.config | 4 +- 11 files changed, 77 insertions(+), 41 deletions(-) diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 87626df..e950f47 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -8,7 +8,7 @@ repos: always_run: true pass_filenames: false additional_dependencies: - - epi2melabs==0.0.56 + - epi2melabs==0.0.57 - id: build_models name: build_models entry: datamodel-codegen --strict-nullable --base-class workflow_glue.results_schema_helpers.BaseModel --use-schema-description --disable-timestamp --input results_schema.yml --input-file-type openapi --output bin/workflow_glue/results_schema.py diff --git a/CHANGELOG.md b/CHANGELOG.md index 32a7a02..cc5032a 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -4,6 +4,10 @@ All notable changes to this project will be documented in this file. The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/), and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html) +## [v1.1.2] +### Updated +- Updated Ezcharts to v0.11.2. + ## [v1.1.1] ## Added - Publish report missing from v1.0.3. diff --git a/README.md b/README.md index 1b7a57a..9e2adc0 100644 --- a/README.md +++ b/README.md @@ -53,7 +53,7 @@ therefore Nextflow will need to be installed before attempting to run the workflow. The workflow can currently be run using either -[Docker](https://www.docker.com/products/docker-desktop +[Docker](https://www.docker.com/products/docker-desktop) or [Singularity](https://docs.sylabs.io/guides/3.0/user-guide/index.html) to provide isolation of the required software. Both methods are automated out-of-the-box provided diff --git a/bin/workflow_glue/check_bam_headers_in_dir.py b/bin/workflow_glue/check_bam_headers_in_dir.py index 44e689b..199e056 100755 --- a/bin/workflow_glue/check_bam_headers_in_dir.py +++ b/bin/workflow_glue/check_bam_headers_in_dir.py @@ -29,7 +29,13 @@ def main(args): for xam_file in target_files: # get the `@SQ` and `@HD` lines in the header with pysam.AlignmentFile(xam_file, check_sq=False) as f: - sq_lines = f.header.get("SQ") + # compare only the SN/LN/M5 elements of SQ to avoid labelling XAM with + # same reference but different SQ.UR as mixed_header (see CW-4842) + sq_lines = [{ + "SN": sq["SN"], + "LN": sq["LN"], + "M5": sq.get("M5"), + } for sq in f.header.get("SQ", [])] hd_lines = f.header.get("HD") # Check if it is sorted. # When there is more than one BAM, merging/sorting diff --git a/bin/workflow_glue/check_xam_index.py b/bin/workflow_glue/check_xam_index.py index 3beae14..f9f631e 100755 --- a/bin/workflow_glue/check_xam_index.py +++ b/bin/workflow_glue/check_xam_index.py @@ -14,12 +14,12 @@ def validate_xam_index(xam_file): Invalid indexes will fail the call with a ValueError: ValueError: fetch called on bamfile without index """ - alignments = pysam.AlignmentFile(xam_file, check_sq=False) - try: - alignments.fetch() - has_valid_index = True - except ValueError: - has_valid_index = False + with pysam.AlignmentFile(xam_file, check_sq=False) as alignments: + try: + alignments.fetch() + has_valid_index = True + except ValueError: + has_valid_index = False return has_valid_index diff --git a/bin/workflow_glue/report.py b/bin/workflow_glue/report.py index 7f71381..af75696 100755 --- a/bin/workflow_glue/report.py +++ b/bin/workflow_glue/report.py @@ -1,5 +1,6 @@ """Create workflow report.""" import json +import math from dominate.tags import p import ezcharts as ezc @@ -42,9 +43,8 @@ def plot_trucations(report, truncations_file): with tabs.add_dropdown_tab(sample): df_sample.drop(columns=['sample_id'], inplace=True) plt = ezc.histplot(data=df_sample, binwidth=5) - plt.xAxis = dict(name='Transgene cassette genome position') - plt.yAxis = dict(name='Number of alignments') - plt.legend = dict(orient='horizontal', top=30) + plt._fig.xaxis.axis_label = 'Transgene cassette genome position' + plt._fig.yaxis.axis_label = 'Number of alignments' EZChart(plt, theme='epi2melabs') @@ -74,13 +74,12 @@ def plot_itr_coverage(report, coverage_file): with tabs.add_dropdown_tab(sample): with Grid(columns=1): for ref, df_ref in df_sample.groupby('ref'): + plt = ezc.lineplot( - data=df_ref, x='pos', y='depth', hue='strand') - plt.title = dict(text=ref) - plt.legend = dict( - orient='horizontal', top=30, icon='rect') - for s in plt.series: - s.showSymbol = False + data=df_ref, title=ref, + x='pos', y='depth', hue='strand', + marker=False, s=2 + ) EZChart(plt, theme='epi2melabs', height='300px') @@ -175,11 +174,11 @@ def plot_aav_structures(report, structures_file): x='Assigned_genome_type', y='percentage') plt.title = dict(text='Genome types') - plt._fig.xaxis.major_label_orientation = 45 + plt._fig.xaxis.major_label_orientation = 45 * (math.pi / 180) EZChart(plt, theme='epi2melabs') # Table with counts and percentages - # (in lieu of being able to annotate bar plots in ezchrts) + # (in lieu of being able to annotate bar plots in ezcharts) df_sample = df_sample.round({'count': 2, 'percentage': 2}) DataTable.from_pandas(df_sample, use_index=False) diff --git a/docs/04_install_and_run.md b/docs/04_install_and_run.md index 372c28d..88f5b9a 100644 --- a/docs/04_install_and_run.md +++ b/docs/04_install_and_run.md @@ -9,7 +9,7 @@ therefore Nextflow will need to be installed before attempting to run the workflow. The workflow can currently be run using either -[Docker](https://www.docker.com/products/docker-desktop +[Docker](https://www.docker.com/products/docker-desktop) or [Singularity](https://docs.sylabs.io/guides/3.0/user-guide/index.html) to provide isolation of the required software. Both methods are automated out-of-the-box provided diff --git a/lib/common.nf b/lib/common.nf index 2a31d49..3a8568d 100644 --- a/lib/common.nf +++ b/lib/common.nf @@ -15,6 +15,7 @@ process getParams { } process configure_igv { + publishDir "${params.out_dir}/", mode: 'copy', pattern: 'igv.json', enabled: params.containsKey("igv") && params.igv label "wf_common" cpus 1 memory "2 GB" diff --git a/lib/ingress.nf b/lib/ingress.nf index 6d14a83..2931357 100644 --- a/lib/ingress.nf +++ b/lib/ingress.nf @@ -197,15 +197,15 @@ def fastq_ingress(Map arguments) .map { meta, files, stats -> // new `arity: '1..*'` would be nice here files = files instanceof List ? files : [files] - new_keys = [ + def new_keys = [ "group_key": groupKey(meta["alias"], files.size()), "n_fastq": files.size()] - grp_index = (0.. - new_keys = [ + def new_keys = [ "group_index": "${meta["alias"]}_${grp_i}"] [meta + new_keys, files, stats] } @@ -279,17 +279,19 @@ def xam_ingress(Map arguments) // sorted, the index will be used. meta, paths -> boolean is_array = paths instanceof ArrayList - String xai_fn + String src_xam + String src_xai // Using `.uri` or `.Uri()` leads to S3 paths to be prefixed with `s3:///` // instead of `s3://`, causing the workflow to not find the index file. // `.toUriString()` returns the correct path. if (!is_array){ + src_xam = paths.toUriString() def xai = file(paths.toUriString() + ".bai") if (xai.exists()){ - xai_fn = xai.toUriString() + src_xai = xai.toUriString() } } - [meta + [xai_fn: xai_fn], paths] + [meta + [src_xam: src_xam, src_xai: src_xai], paths] } | checkBamHeaders | map { meta, paths, is_unaligned_env, mixed_headers_env, is_sorted_env -> @@ -331,9 +333,9 @@ def xam_ingress(Map arguments) // - between 1 and `N_OPEN_FILES_LIMIT` aligned files no_files: n_files == 0 indexed: \ - n_files == 1 && (meta["is_unaligned"] || meta["is_sorted"]) && meta["xai_fn"] - to_index: - n_files == 1 && (meta["is_unaligned"] || meta["is_sorted"]) && !meta["xai_fn"] + n_files == 1 && (meta["is_unaligned"] || meta["is_sorted"]) && meta["src_xai"] + to_index: \ + n_files == 1 && (meta["is_unaligned"] || meta["is_sorted"]) && !meta["src_xai"] to_catsort: \ (n_files == 1) || (n_files > N_OPEN_FILES_LIMIT) || meta["is_unaligned"] to_merge: true @@ -358,20 +360,20 @@ def xam_ingress(Map arguments) .map { meta, files, stats -> // new `arity: '1..*'` would be nice here files = files instanceof List ? files : [files] - new_keys = [ + def new_keys = [ "group_key": groupKey(meta["alias"], files.size()), "n_fastq": files.size()] - grp_index = (0.. - new_keys = [ + def new_keys = [ "group_index": "${meta["alias"]}_${grp_i}"] [meta + new_keys, files, stats] } .map { meta, path, stats -> - [meta.findAll { it.key !in ['xai_fn', 'is_sorted'] }, path, stats] + [meta.findAll { it.key !in ['is_sorted', 'src_xam', 'src_xai'] }, path, stats] } // add number of reads, run IDs, and basecall models to meta @@ -388,10 +390,18 @@ def xam_ingress(Map arguments) | sortBam | groupTuple | mergeBams + | map{ + meta, bam, bai -> + [meta + [src_xam: null, src_xai: null], bam, bai] + } // now handle samples with too many files for `samtools merge` ch_catsorted = ch_result.to_catsort | catSortBams + | map{ + meta, bam, bai -> + [meta + [src_xam: null, src_xai: null], bam, bai] + } // Validate the index of the input BAM. // If the input BAM index is invalid, regenerate it. @@ -399,7 +409,7 @@ def xam_ingress(Map arguments) ch_to_validate = ch_result.indexed | map{ meta, paths -> - bai = paths && meta.xai_fn ? file(meta.xai_fn) : null + def bai = paths && meta.src_xai ? file(meta.src_xai) : null [meta, paths, bai] } | branch { @@ -429,6 +439,10 @@ def xam_ingress(Map arguments) ch_indexed = ch_result.to_index | mix( ch_validated.invalid_idx ) | samtools_index + | map{ + meta, bam, bai -> + [meta + [src_xai: null], bam, bai] + } // Add extra null for the missing index to input.missing // as well as the missing metadata. @@ -439,7 +453,7 @@ def xam_ingress(Map arguments) ) | map{ meta, paths -> - [meta + [xai_fn: null, is_sorted: false], paths, null] + [meta + [src_xam: null, src_xai: null, is_sorted: false], paths, null] } // Combine all possible inputs @@ -480,7 +494,7 @@ def xam_ingress(Map arguments) } // Remove metadata that are unnecessary downstream: - // meta.xai_fn: not needed, as it will be part of the channel as a file + // meta.src_xai: not needed, as it will be part of the channel as a file // meta.is_sorted: if data are aligned, they will also be sorted/indexed // // The output meta can contain the following flags: @@ -498,7 +512,7 @@ def xam_ingress(Map arguments) ch_result | map{ meta, bam, bai, stats -> - [meta.findAll { it.key !in ['xai_fn', 'is_sorted'] }, [bam, bai], stats] + [meta.findAll { it.key !in ['is_sorted'] }, [bam, bai], stats] }, "xam" ) @@ -508,6 +522,19 @@ def xam_ingress(Map arguments) | map{ it.flatten() } + // Final check to ensure that src_xam/src_xai is not an s3 + // path. If so, drop it. We check src_xam also for src_xai + // as, the latter is irrelevant if the former is in s3. + | map{ + meta, bam, bai, stats -> + def xam = meta.src_xam + def xai = meta.src_xai + if (meta.src_xam){ + xam = meta.src_xam.startsWith('s3://') ? null : meta.src_xam + xai = meta.src_xam.startsWith('s3://') ? null : meta.src_xai + } + [ meta + [src_xam: xam, src_xai: xai], bam, bai, stats ] + } return ch_result } diff --git a/main.nf b/main.nf index e5420e8..b95426b 100644 --- a/main.nf +++ b/main.nf @@ -393,7 +393,6 @@ process makeReport { String report_name = "wf-aav-qc-report.html" String metadata = new JsonBuilder(metadata).toPrettyString() """ - touch 1 echo '${metadata}' > metadata.json workflow-glue report $report_name \ --wf_version $wf_version \ diff --git a/nextflow.config b/nextflow.config index 2fe37da..b2ba2bc 100644 --- a/nextflow.config +++ b/nextflow.config @@ -57,7 +57,7 @@ params { "--ref_transgene_plasmid 'wf-aav-qc-demo/transgene.fasta'" ] container_sha = "sha9b4b07df083ba71262da90933ce692b9ae0154ee" - common_sha = "shad399cf22079b5b153920ac39ee40095a677933f1" + common_sha = "shad28e55140f75a68f59bbecc74e880aeab16ab158" container_sha_medaka = "sha3486abaab0d3b90351617eb8622acf2028edb154" agent = null } @@ -70,7 +70,7 @@ manifest { description = 'AAV plasmid quality control workflow' mainScript = 'main.nf' nextflowVersion = '>=23.04.2' - version = 'v1.1.1' + version = 'v1.1.2' } // used by default for "standard" (docker) and singularity profiles,