-
Notifications
You must be signed in to change notification settings - Fork 14
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
1 parent
8ab10e4
commit ab45cd1
Showing
6 changed files
with
257 additions
and
2 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,76 @@ | ||
functionality: | ||
name: cellranger_atac_count | ||
namespace: mapping | ||
description: Align fastq files using Cell Ranger ATAC count. | ||
authors: | ||
- __merge__: /src/authors/vladimir_shitov.yaml | ||
roles: [ author ] | ||
argument_groups: | ||
- name: Inputs | ||
arguments: | ||
- type: file | ||
name: --input | ||
required: true | ||
multiple: true | ||
example: [ "sample_S1_L001_R1_001.fastq.gz", "sample_S1_L001_R2_001.fastq.gz" ] | ||
description: The fastq.gz files to align. Can also be a single directory containing fastq.gz files. | ||
- type: file | ||
name: --reference | ||
required: true | ||
description: The path to Cell Ranger reference tar.gz file. Can also be a directory. | ||
example: reference.tar.gz | ||
- name: Outputs | ||
arguments: | ||
- type: file | ||
name: --output | ||
direction: output | ||
description: The folder to store the alignment results. | ||
example: "/path/to/output" | ||
required: true | ||
- name: Arguments | ||
arguments: | ||
- type: string | ||
name: --description | ||
description: Sample description to embed in output files | ||
default: "" | ||
- type: integer | ||
name: --force_cells | ||
description: "Define the top N barcodes with the most fragments overlapping peaks as cells and override the cell calling algorithm. N must be a positive integer <= 20,000. Use this option if the number of cells estimated by Cell Ranger ATAC is not consistent with the barcode rank plot" | ||
max: 20000 | ||
- type: file | ||
name: --peaks | ||
description: "Override peak caller: specify peaks to use in downstream analyses from supplied 3-column BED file. The supplied peaks file must be sorted by position and not contain overlapping peaks; comment lines beginning with # are allowed" | ||
- type: string | ||
name: --dim_reduce | ||
description: "Dimensionality reduction mode for clustering" | ||
choices: [ lsa, pca, plsa ] | ||
default: lsa | ||
- type: double | ||
name: --subsample_rate | ||
description: "Downsample to preserve this fraction of reads" | ||
example: 0.1 | ||
- type: string | ||
multiple: true | ||
name: --lanes | ||
description: bcl2fastq option. Semicolon-delimited series of lanes to demultiplex. Use this if you have a sample sheet for an entire flow cell but only want to generate a few lanes for further 10x Genomics analysis. | ||
example: 1,3 | ||
resources: | ||
- type: bash_script | ||
path: script.sh | ||
test_resources: | ||
- type: python_script | ||
path: test.py | ||
- path: /resources_test/cellranger_atac_tiny_bcl | ||
- path: /src/utils/setup_logger.py | ||
- path: /resources_test/reference_gencodev41_chr1 | ||
platforms: | ||
- type: docker | ||
image: ghcr.io/data-intuitive/cellranger_atac:2.1 | ||
setup: | ||
- type: docker | ||
run: | | ||
DEBIAN_FRONTEND=noninteractive apt update \ | ||
&& apt upgrade -y && apt install -y procps && rm -rf /var/lib/apt/lists/* | ||
- type: nextflow | ||
directives: | ||
label: [ highmem, highcpu ] |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,80 @@ | ||
#!/bin/bash | ||
|
||
set -eo pipefail | ||
|
||
## VIASH START | ||
par_input='resources_test/cellranger_atac_tiny_bcl/fastqs/' | ||
par_reference='resources_test/reference_gencodev41_chr1/' | ||
par_output='resources_test/cellranger_atac_tiny_bcl/bam' | ||
## VIASH END | ||
|
||
# just to make sure paths are absolute | ||
par_reference=`realpath $par_reference` | ||
par_output=`realpath $par_output` | ||
|
||
echo "Creating temporary directory" | ||
tmpdir=$(mktemp -d "$meta_temp_dir/$meta_functionality_name-XXXXXXXX") | ||
function clean_up { | ||
rm -rf "$tmpdir" | ||
} | ||
trap clean_up EXIT | ||
|
||
# process inputs | ||
# for every fastq file found, make a symlink into the tempdir | ||
echo "Locating fastqs" | ||
fastq_dir="$tmpdir/fastqs" | ||
mkdir -p "$fastq_dir" | ||
IFS=";" | ||
for var in $par_input; do | ||
unset IFS | ||
abs_path=`realpath $var` | ||
if [ -d "$abs_path" ]; then | ||
find "$abs_path" -name *.fastq.gz -exec ln -s {} "$fastq_dir" \; | ||
else | ||
ln -s "$abs_path" "$fastq_dir" | ||
fi | ||
done | ||
|
||
echo "fastq_dir content: $(ls $fastq_dir)" | ||
|
||
echo "Processing reference" | ||
# process reference | ||
if file $par_reference | grep -q 'gzip compressed data'; then | ||
echo "Untarring genome" | ||
reference_dir="$tmpdir/fastqs" | ||
mkdir -p "$reference_dir" | ||
tar -xvf "$par_reference" -C "$reference_dir" --strip-components=1 | ||
par_reference="$reference_dir" | ||
fi | ||
|
||
# cd into tempdir | ||
cd "$tmpdir" | ||
|
||
if [ ! -z "$meta_memory_gb" ]; then | ||
# always keep 2gb for the OS itself | ||
memory_gb=`python -c "print(int('$meta_memory_gb') - 2)"` | ||
fi | ||
|
||
echo "Running cellranger-atac count" | ||
|
||
id=myoutput | ||
cellranger-atac count \ | ||
--id "$id" \ | ||
--fastqs "$fastq_dir" \ | ||
--reference "$par_reference" \ | ||
--dim-reduce "$par_dim_reduce" \ | ||
--description "$par_description" \ | ||
${par_lanes:+--lanes=${par_lanes[*]}} \ | ||
${par_force_cells:+--force-cells=$par_force_cells} \ | ||
${par_subsample_rate:+--subsample-rate=$par_subsample_rate} \ | ||
${memory_gb:+--localmem=$memory_gb} \ | ||
${meta_cpus:+--localcores=$meta_cpus} \ | ||
${par_lanes:+--lanes=${par_lanes[*]}} | ||
|
||
echo "Copying output" | ||
if [ -d "$id/outs/" ]; then | ||
if [ ! -d "$par_output" ]; then | ||
mkdir -p "$par_output" | ||
fi | ||
mv "$id/outs/"* "$par_output" | ||
fi |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,83 @@ | ||
import subprocess | ||
from os import path | ||
import sys | ||
from itertools import zip_longest, chain | ||
|
||
## VIASH START | ||
meta = { | ||
"functionality_name": "cellranger_atac_count", | ||
"resources_dir": "resources_test" | ||
} | ||
## VIASH END | ||
|
||
sys.path.append(meta["resources_dir"]) | ||
# START TEMPORARY WORKAROUND setup_logger | ||
# reason: resources aren't available when using Nextflow fusion | ||
# from setup_logger import setup_logger | ||
def setup_logger(): | ||
import logging | ||
from sys import stdout | ||
|
||
logger = logging.getLogger() | ||
logger.setLevel(logging.INFO) | ||
console_handler = logging.StreamHandler(stdout) | ||
logFormatter = logging.Formatter("%(asctime)s %(levelname)-8s %(message)s") | ||
console_handler.setFormatter(logFormatter) | ||
logger.addHandler(console_handler) | ||
|
||
return logger | ||
# END TEMPORARY WORKAROUND setup_logger | ||
logger = setup_logger() | ||
|
||
logger.info("> Running command with folder") | ||
input = meta["resources_dir"] + "/cellranger_atac_tiny_bcl/fastqs/HJN3KBCX2/test_sample/" | ||
reference = meta["resources_dir"] + "/reference_gencodev41_chr1/reference_cellranger.tar.gz" | ||
output = "test_output" | ||
cmd_pars = [ | ||
meta["executable"], | ||
"--input", input, | ||
"--reference", reference, | ||
"--output", output | ||
] | ||
if meta.get("cpus"): | ||
cmd_pars.extend(["---cpus", str(meta["cpus"])]) | ||
if meta.get("memory_gb"): | ||
cmd_pars.extend(["---memory", f"{meta['memory_gb']}gb"]) | ||
try: | ||
out = subprocess.check_output(cmd_pars).decode("utf-8") | ||
except subprocess.CalledProcessError as e: | ||
logger.error(e.output) | ||
raise e | ||
logger.info("> Check if file exists") | ||
assert path.exists(output + "/filtered_peak_bc_matrix.h5"), "No output was created." | ||
assert path.exists(output + "/fragments.tsv.gz"), "No fragments file was created." | ||
|
||
|
||
logger.info("> Running command with fastq files") | ||
# test_sample_S1_L001_R2_001.fastq.gz test_sample_S1_L001_R1_001.fastq.gz test_sample_S1_L001_R3_001.fastq.gz | ||
input_files = [ | ||
input + "test_sample_S1_L001_I1_001.fastq.gz", | ||
input + "test_sample_S1_L001_R1_001.fastq.gz", | ||
input + "test_sample_S1_L001_R2_001.fastq.gz", | ||
input + "test_sample_S1_L001_R3_001.fastq.gz", | ||
] | ||
output = "test_output2" | ||
|
||
|
||
cmd_pars = [ | ||
meta["executable"], | ||
*chain.from_iterable([("--input", input_file) for input_file in input_files]), | ||
"--reference", reference, | ||
"--output", output | ||
] | ||
if meta.get("cpus"): | ||
cmd_pars.extend(["---cpus", str(meta["cpus"])]) | ||
if meta.get("memory_gb"): | ||
cmd_pars.extend(["---memory", f"{meta['memory_gb']}gb"]) | ||
out = subprocess.check_output(cmd_pars).decode("utf-8") | ||
|
||
logger.info("> Check if file exists") | ||
assert path.exists(output + "/filtered_peak_bc_matrix.h5"), "No output was created." | ||
assert path.exists(output + "/fragments.tsv.gz"), "No fragments file was created." | ||
|
||
logger.info("> Completed Successfully!") |