-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy path04-reduce_homology.sh
executable file
·46 lines (34 loc) · 1.15 KB
/
04-reduce_homology.sh
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
#!/usr/bin/env bash
set -euo pipefail
TMPDIR="tmp$$"
EFFECTORS="raw/fungal_effectors.tsv"
LOCALIZED="processed/localised.tsv"
PROTEOMES="processed/proteomes.tsv"
HOMOLOGS="processed/effector_homologues.fasta"
OUTCLUSTERS="processed/clusters.tsv"
mkdir -p "${TMPDIR}"
mkdir -p "$(dirname "${OUTCLUSTERS}")"
awk -F'\t' '{print $5"\t"$20}' "${EFFECTORS}" \
| tail -n+2 \
| sed 's/\*[[:space:]]*$//' \
| bin/tsv_to_fasta.sh \
> "${TMPDIR}/combined.fasta"
awk -F'\t' 'BEGIN {OFS="\t"} {print $2, $3}' "${LOCALIZED}" | bin/tsv_to_fasta.sh >> "${TMPDIR}/combined.fasta"
awk -F'\t' 'BEGIN {OFS="\t"} {print $2, $3}' "${PROTEOMES}" | bin/tsv_to_fasta.sh >> "${TMPDIR}/combined.fasta"
cat "${HOMOLOGS}" >> "${TMPDIR}/combined.fasta"
rm -rf -- "${TMPDIR}/seqs" "${TMPDIR}/clu" "${TMPDIR}/tmp"
mkdir -p "${TMPDIR}/seqs" "${TMPDIR}/clu" "${TMPDIR}/tmp"
mmseqs createdb "${TMPDIR}/combined.fasta" "${TMPDIR}/seqs/db"
mmseqs cluster \
"${TMPDIR}/seqs/db" \
"${TMPDIR}/clu/db" \
"${TMPDIR}/tmp" \
--min-seq-id 0.3 \
--cov-mode 0 \
-c 0.7 \
--cluster-mode 0
mmseqs createtsv \
"${TMPDIR}/seqs/db" \
"${TMPDIR}/seqs/db" \
"${TMPDIR}/clu/db" \
"${OUTCLUSTERS}"