-
Notifications
You must be signed in to change notification settings - Fork 3
/
eval_assembly_custom.smk
70 lines (65 loc) · 2.19 KB
/
eval_assembly_custom.smk
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
include: "rules/load_config_custom.smk"
try:
scaffolds = list(map(str.strip, config["scaffolds"].split(",")))
except AttributeError as e:
raise PathNotGiven(
"The scaffold files from assembly are not specified.")
metaquast_dir = results_dir + "/metaquast"
metaquast_criteria = ["num_contigs", "Genome_fraction",
"Duplication_ratio", "Largest_alignment",
"NGA50", "num_mismatches_per_100_kbp"]
rule all:
input:
assembly_benchmark_figure = results_dir + \
"/final_figures/assembly_benchmark.pdf",
assembly_rank_table = results_dir + "/final_tables/assembly_metaquast_ranking.tsv",
assembly_score = results_dir + "/final_tables/assembly_metaquast_score.tsv"
rule build_idx:
input:
refs
output:
fa_idx = [ref + ".fai" for ref in refs]
conda:
"config/conda_env.yaml"
shell:
"""
for fa in {input}
do
samtools faidx $fa
done
"""
rule metaquast:
input:
refs = refs,
scaffolds = scaffolds,
refs_fai = [ref + ".fai" for ref in refs]
output:
report = metaquast_dir + "/report.html",
table = expand(metaquast_dir + "/summary/TSV/{criteria}.tsv",
criteria=metaquast_criteria)
conda:
"config/conda_env.yaml"
threads: threads
params:
metaquast_outdir = metaquast_dir,
refs_comb = config["refs"]
shell:
"""
metaquast.py --unique-mapping -o {params.metaquast_outdir} -R {params.refs_comb} \
{input.scaffolds} -t {threads}
"""
# Visualize the evaluation
rule visualize:
input:
expand(metaquast_dir + "/summary/TSV/{criteria}.tsv",
criteria=metaquast_criteria)
output:
assembly_benchmark_figure = results_dir + \
"/final_figures/assembly_benchmark.pdf",
assembly_rank_table = results_dir + "/final_tables/assembly_metaquast_ranking.tsv",
assembly_score = results_dir + "/final_tables/assembly_metaquast_score.tsv"
params: number_ref = len(refs)
conda:
"config/conda_env.yaml"
script:
"scripts/custom_assembly_benchmark.R"