forked from tatumdmortimer/popgen-stats
-
Notifications
You must be signed in to change notification settings - Fork 0
/
slidingWindowStats.py
executable file
·88 lines (77 loc) · 2.31 KB
/
slidingWindowStats.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
#!/usr/bin/env python
import sys
import os
import getopt
import egglib
import matplotlib
matplotlib.use('Agg')
import matplotlib.pyplot as plt
# This script reads in an alignment and calculates diversity and selection
# statistics based on the window width and window step given by the user.
# Will calculate Fay and Wu's H if an outgroup is provided.
def get_arguments(argv):
if len(argv) == 0:
usage()
sys.exit(2)
alignment = None
winWidth = 1000
winStep = 300
outgroup = None
try:
opts, args = getopt.getopt(argv, "a:w:s:o:")
except getopt.GetoptError:
usage()
sys.exit(2)
for opt, arg in opts:
if opt == '-a':
alignment = arg
elif opt == '-w':
winWidth = int(arg)
elif opt == '-s':
winStep = int(arg)
elif opt == '-o':
outgroup = arg
return (alignment, winWidth, winStep, outgroup)
def usage():
print "slidingWindowStats.py\n \
-a <fasta alignment>\n \
-w <window width default = 1000>\n \
-s <window step default = 300>\n \
-o <outgroup>"
def calc_stats(a):
statDict = {}
polyDict = a.polymorphism()
statDict['theta'] = polyDict['thetaW']
statDict['pi'] = polyDict['Pi']
statDict['tajimaD'] = polyDict['D']
statDict['FayWuH'] = polyDict['H']
return statDict
alignment, winWidth, winStep, outgroup = get_arguments(sys.argv[1:])
if alignment is None:
usage()
sys.exit()
outfile = open('windowStats_' + os.path.splitext(alignment)[0] + '.txt', 'w')
outfile.write("Start\tStop\tTheta\tPi\tTajimasD\tFay&WuH\n")
align = egglib.Align(alignment)
for i in range(align.ns()):
align.sequence(i, sequence=align.sequence(i).upper())
if outgroup is not None:
align.group(align.find(outgroup, strict = False), group = 999)
start = 0
stop = winWidth
location = []
TD = []
for window in align.slider(winWidth, winStep):
stats = calc_stats(window)
start += winStep
stop += winStep
outfile.write("%i\t%i\t%s\t%s\t%s\t%s\n" % (start, stop, stats['theta'],
stats['pi'], stats['tajimaD'], stats['FayWuH']))
location.append((start + stop)/2)
TD.append(stats['tajimaD'])
outfile.close()
plt.plot(location, TD)
plt.xlabel('Location')
plt.ylabel('Tajima\'s D')
plt.savefig("slidingWindowTajimasD.png")
plt.close()