-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathsummary.smk
168 lines (149 loc) · 8.42 KB
/
summary.smk
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
"""
Rules for select final output files per-subject-per-lineage.
Everything here is just copying over lightweight files created in other rules.
"""
def set_chain_type(w):
"""Set the chain_type wildcard based on chain and lineage and return as dict"""
w = dict(w)
for lineage, attrs in ANTIBODY_LINEAGES.items():
if w["antibody_lineage"] == lineage:
break
else:
raise ValueError(f"Can't find lineage {antibody_lineage}")
w["chain_type"] = "gamma"
if w["chain"] != "heavy" and attrs["LightLocus"]:
w["chain_type"] = {"IGK": "kappa", "IGL": "lambda"}[attrs["LightLocus"]]
return w
def summary_setup_lineage_helper_rules():
"""Define per-lineage summary rules dynamically"""
for lineage, attrs in ANTIBODY_LINEAGES.items():
subject = attrs["Subject"]
targets = []
for chain in ["heavy", "light"]:
targets_chain = []
w = set_chain_type({"subject": attrs["Subject"], "antibody_lineage": lineage, "chain": chain})
# will default to just igphyml-based outputs for automatic alignment,
# and only expect the custom alignment outputs if a custom alignment is
# already in place
things = [""]
custom_aln = Path(expand(
"analysis/sonar/{subject}.{chain_type}/alignment.{antibody_lineage}.custom.fa", **w)[0])
if custom_aln.exists():
things += [".custom"]
targets_chain += expand(
"summary/{subject}/{antibody_lineage}/{antibody_lineage}_{chain}_igphyml{thing}.{ext}",
thing=things, ext=["tree", "tree.pdf"], **w)
targets_chain += expand(
"summary/{subject}/{antibody_lineage}/{antibody_lineage}_{chain}_aligned{thing}.afa",
thing=things, **w)
targets_chain += expand(
"summary/{subject}/{antibody_lineage}/{antibody_lineage}_{chain}_collected.fa", **w)
targets_chain += expand(
"summary/{subject}/{antibody_lineage}/{antibody_lineage}_{chain}_inferredAncestors{thing}.fa",
thing=things, **w)
targets_chain += expand(
"summary/{subject}/{antibody_lineage}/{antibody_lineage}_{chain}_inferredAncestors{thing}.common.fa",
thing=things, **w)
targets += targets_chain
rule:
f"Summary outputs for subject {subject} lineage {lineage} chain {chain}"
name: f"summary_lineage_{lineage}_{chain}"
input: targets_chain
rule:
f"Summary outputs for subject {subject} lineage {lineage}"
name: f"summary_lineage_{lineage}"
input: targets + [f"summary/{subject}/{lineage}/{lineage}_divergence.pdf"]
def summary_setup_subject_helper_rules():
"""Define per-subject summary rules dynamically"""
# note which subjects have IgM reads sequenced for which chain
m_subjects = {"mu": set(), "kappa": set(), "lambda": set()}
for sample, attrs in SAMPLES.items():
if "SpecimenAttrs" not in attrs:
continue
if attrs["Run"] and "IgM" in attrs["SpecimenAttrs"]["CellType"]:
m_subjects[attrs["Type"]].add(attrs["SpecimenAttrs"]["Subject"])
# organize by locus rather than chain type
m_subjects["IGH"] = m_subjects.pop("mu")
m_subjects["IGK"] = m_subjects.pop("kappa")
m_subjects["IGL"] = m_subjects.pop("lambda")
for subject in {attrs["Subject"] for attrs in SPECIMENS.values()}:
targets = []
# MINING-D output from heavy chain IgM
if subject in m_subjects["IGH"]:
targets += expand(
"summary/{subject}/{subject}.miningd.{pval}.txt",
subject=subject, pval=["default", "sensitive"])
# IgDiscover output from IgM for whichever loci were sequenced
loci = {locus for locus in m_subjects.keys() if subject in m_subjects[locus]}
for locus in loci:
segments = ["V", "J"]
if locus == "IGH":
segments += "D"
targets += expand(
"summary/{subject}/{subject}.germline.{locus}{segment}.fasta",
subject=subject, locus=locus, segment=segments)
rule:
f"Summary outputs for subject {subject}"
name: f"summary_subject_{subject}"
input: targets
summary_setup_lineage_helper_rules()
summary_setup_subject_helper_rules()
### Antibody Lineage summary rules
rule summary_tree:
output: "summary/{subject}/{antibody_lineage}/{antibody_lineage}_{chain}_igphyml.{ext}"
input: lambda w: expand("analysis/sonar/{subject}.{chain_type}/longitudinal.auto.{antibody_lineage}/output/longitudinal.auto.{antibody_lineage}_igphyml.{ext}", **set_chain_type(w))
shell: "cp {input} {output}"
rule summary_tree_custom:
output: "summary/{subject}/{antibody_lineage}/{antibody_lineage}_{chain}_igphyml.custom.{ext}"
input: lambda w: expand("analysis/sonar/{subject}.{chain_type}/longitudinal.custom.{antibody_lineage}/output/longitudinal.custom.{antibody_lineage}_igphyml.{ext}", **set_chain_type(w))
shell: "cp {input} {output}"
# Using `igseq convert` for FASTA files to ensure we get unwrapped versions
rule summary_aln_auto:
output: "summary/{subject}/{antibody_lineage}/{antibody_lineage}_{chain}_aligned.afa"
input: lambda w: expand("analysis/sonar/{subject}.{chain_type}/longitudinal.auto.{antibody_lineage}/work/phylo/longitudinal.auto.{antibody_lineage}_aligned.afa", **set_chain_type(w))
conda: "envs/igseq.yaml"
shell: "igseq convert {input} {output}"
rule summary_aln_custom:
output: "summary/{subject}/{antibody_lineage}/{antibody_lineage}_{chain}_aligned.custom.afa"
input: lambda w: expand("analysis/sonar/{subject}.{chain_type}/alignment.custom.{antibody_lineage}.fa", **set_chain_type(w))
conda: "envs/igseq.yaml"
shell: "igseq convert {input} {output}"
rule summary_collected:
output: "summary/{subject}/{antibody_lineage}/{antibody_lineage}_{chain}_collected.fa"
input: lambda w: expand("analysis/sonar/{subject}.{chain_type}/longitudinal.auto.{antibody_lineage}/output/sequences/nucleotide/longitudinal.auto.{antibody_lineage}-collected.fa", **set_chain_type(w))
conda: "envs/igseq.yaml"
shell: "igseq convert {input} {output}"
rule summary_ancestors_auto:
output: "summary/{subject}/{antibody_lineage}/{antibody_lineage}_{chain}_inferredAncestors.fa"
input: lambda w: expand("analysis/sonar/{subject}.{chain_type}/longitudinal.auto.{antibody_lineage}/output/sequences/nucleotide/longitudinal.auto.{antibody_lineage}_inferredAncestors.fa", **set_chain_type(w))
conda: "envs/igseq.yaml"
shell: "igseq convert {input} {output}"
rule summary_ancestors_custom:
output: "summary/{subject}/{antibody_lineage}/{antibody_lineage}_{chain}_inferredAncestors.custom.fa"
input: lambda w: expand("analysis/sonar/{subject}.{chain_type}/longitudinal.custom.{antibody_lineage}/output/sequences/nucleotide/longitudinal.custom.{antibody_lineage}_inferredAncestors.fa", **set_chain_type(w))
conda: "envs/igseq.yaml"
shell: "igseq convert {input} {output}"
rule summary_ancestors_common:
output: "summary/{subject}/{antibody_lineage}/{antibody_lineage}_{chain}_inferredAncestors.common.fa"
input: lambda w: expand("analysis/reporting/sonar/{antibody_lineage}.{chain_type}/igphyml_ancestors.common.fa", **set_chain_type(w))
conda: "envs/igseq.yaml"
shell: "igseq convert {input} {output}"
rule summary_ancestors_custom_common:
output: "summary/{subject}/{antibody_lineage}/{antibody_lineage}_{chain}_inferredAncestors.custom.common.fa"
input: lambda w: expand("analysis/reporting/sonar/{antibody_lineage}.{chain_type}/igphyml_ancestors.custom.common.fa", **set_chain_type(w))
conda: "envs/igseq.yaml"
shell: "igseq convert {input} {output}"
rule summary_germline_divergence_plot:
output: "summary/{subject}/{antibody_lineage}/{antibody_lineage}_divergence.pdf"
input: "analysis/reporting/by-lineage/{antibody_lineage}.divergence.pdf"
shell: "cp {input} {output}"
### Subject summary rules
rule summary_miningd:
output: "summary/{subject}/{subject}.miningd.{thing}.txt"
input: "analysis/mining-d/{subject}.output.{thing}.txt"
shell: "cp {input} {output}"
rule summary_germline:
output: "summary/{subject}/{subject}.germline.{locus}{segment}.fasta"
input: "analysis/germline/{subject}.{locus}/{segment}.fasta"
conda: "envs/igseq.yaml"
shell: "igseq convert {input} {output}"