-
Notifications
You must be signed in to change notification settings - Fork 5
/
scans2bigBed
executable file
·117 lines (95 loc) · 2.3 KB
/
scans2bigBed
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
#!/bin/bash
## JASPAR-UCSC-tracks
## Author: Oriol Fornes
## Contact: [email protected]
##
## Create a genome browser bigBed track from individual TF scans
##
################### Initialize ###################
SOFT="scans2bigBed"
VERSION="2.0.1"
function usage {
echo -e "usage: $SOFT -c CHROM_SIZES -i INPUT_DIR [-h]"
}
function help {
echo
usage;
echo
echo " -c CHROM_SIZES chrom sizes file (e.g. from hg38.sh)"
echo " -i INPUT_DIR input directory (from scan_sequence.py)"
echo
echo "optional arguments:"
echo " -h, --help show this help message and exit"
echo " -d DUMMY_DIR dummy directory (default = /tmp/)"
echo " -m MEM memory to use (in Gb; default = 4)"
echo " -o OUT_FILE output file (default = ./bigBed.bb)"
echo " -t THREADS threads to use (default = 1)"
echo " -v, --version version"
echo
exit;
}
function version {
echo -e "$SOFT version $VERSION"
exit
}
function opts_error {
echo -e "Error : invalid parameters !" >&2
echo -e "Use $SOFT -h for help"
exit
}
##################### Inputs #####################
DUMMY_DIR=/tmp
OUT_FILE=./bigBed.bb
THREADS=1
if [ $# -lt 1 ]
then
usage
exit
fi
while getopts ":c:i:d:m:o:t:vh" OPT
do
case $OPT in
c) CHROM_SIZES=$OPTARG;;
i) INPUT_DIR=$OPTARG;;
d) DUMMY_DIR=$OPTARG;;
m) MEM=$OPTARG;;
o) OUT_FILE=$OPTARG;;
t) THREADS=$OPTARG;;
v) version ;;
h) help ;;
esac
done
if [[ -z CHROM_SIZES || -z $INPUT_DIR ]]; then
usage
exit
fi
###################### Work ######################
##
## Extract genome assembly
##
GENOME=$(perl -ne 'if($ARGV=~/(\w+).chrom.sizes/){print $1;exit;}' $CHROM_SIZES)
##
## Initialize
##
BED_FILE=$DUMMY_DIR/$GENOME.bed
SORTED_BED_FILE=$DUMMY_DIR/$GENOME.sorted.bed
##
## Merge all TFBSs into an unsorted BED file
##
zless $INPUT_DIR/*.tsv.gz | cut -f 1-4,6,7 | awk 'max=1000{if($5>max){$5=max}print $1"\t"$2"\t"$3"\t"$4"\t"$5"\t"$6}' > $BED_FILE
##
## Sort BED file
##
LC_ALL=C sort --parallel=$THREADS --buffer-size=${MEM}G -T $DUMMY_DIR -k1,1 -k2,2n $BED_FILE > $SORTED_BED_FILE
##
## Remove BED file
##
#rm $BED_FILE
##
## Create bigBed
##
bedToBigBed -type=bed6 -tab $SORTED_BED_FILE $CHROM_SIZES $OUT_FILE
##
## Remove sorted BED file
##
#rm $SORTED_BED_FILE