-
Notifications
You must be signed in to change notification settings - Fork 10
/
Part4a_coverage.sh
70 lines (53 loc) · 1.89 KB
/
Part4a_coverage.sh
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
#!/bin/bash
#SBATCH --job-name=coverage
#SBATCH -n 1
#SBATCH -N 1
#SBATCH -c 7
#SBATCH --mem=5G
#SBATCH --qos=general
#SBATCH --partition=general
#SBATCH --mail-user=
#SBATCH --mail-type=ALL
#SBATCH -o %x_%j.out
#SBATCH -e %x_%j.err
hostname
date
# load required software
module load bedtools
module load bamtools
module load htslib
mkdir -p ../coverage_stats
cd ../coverage_stats
# genome index file from samtools faidx
FAI=/UCHC/PublicShare/Variant_Detection_Tutorials/Variant-Detection-Introduction-GATK_all/resources_all/Homo_sapiens_assembly38.fasta.fai
# make a "genome" file, required by bedtools makewindows command, set variable for location
cut -f 1-2 $FAI > Homo_sapiens_assembly38.fasta.genome
GFILE=../coverage_stats/Homo_sapiens_assembly38.fasta.genome
# make 1kb window bed file, set variable for location
bedtools makewindows -g $GFILE -w 1000 >hg38_1kb.bed
WIN1KB=../coverage_stats/hg38_1kb.bed
# go to sequence alignment folder
cd ../align_pipe
# make a list of bam files
find ../align_pipe/ -name "*bam" >bam.list
# pipe:
# 1) merge bam files
# 2) filter by quality and proper pairing
# 3) convert alignments to bed format
# 4) map alignments to 1kb windows, counting (but also getting the mean and median of the mapping quality score from column 5)
bamtools merge -list bam.list | \
bamtools filter -in - -mapQuality ">30" -isDuplicate false -isProperPair true | \
bedtools bamtobed -i stdin | \
bedtools map \
-a $WIN1KB \
-b stdin \
-c 5 -o mean,median,count \
-g $GFILE \
>../coverage_stats/coverage_1kb.bed
# bgzip compress and tabix index the resulting file
bgzip ../coverage_stats/coverage_1kb.bed
tabix -p bed ../coverage_stats/coverage_1kb.bed.gz
# select and merge outlier windows
zcat ../coverage_stats/coverage_1kb.bed.gz | awk '$6 < 850 || $6 > 2550' | bedtools merge | bgzip >../coverage_stats/coverage_outliers.bed.gz
tabix -p bed ../coverage_stats/coverage_outliers.bed.gz
date