-
Notifications
You must be signed in to change notification settings - Fork 0
/
Statin_Analyses.py
104 lines (69 loc) · 3.82 KB
/
Statin_Analyses.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
#Analysis of Statin , Numbers Needed to Treat, Odds Ratio , Risk Reduction , Absolute Risk Reduction
#import the libraries
import pandas as pd
import scipy.stats as stats
import numpy as np
import matplotlib.pyplot as plt
#use the plot style and precision according to charite template
plt.style.use('fivethirtyeight')
pd.set_option('precision', 0)
#read the excel to dfs
doc = pd.read_excel('file:///C:/Users/Onotation/Documents/Internship/AllMalignantCancer/allMalignantCancer_newDB.xls')
J_1 = pd.read_excel('file:///C:/Users/Onotation/Documents/Internship/tables/J-1.xlsx')
I_1 = pd.read_excel('file:///C:/Users/Onotation/Documents/Internship/tables/I-1.xlsx')
H_1 = pd.read_excel('file:///C:/Users/Onotation/Documents/Internship/tables/H-1.xlsx')
D_1 = pd.read_excel('file:///C:/Users/Onotation/Documents/Internship/tables/D-1.xlsx')
J = doc.loc[:,'Cocktails-w-Cancer-w/o-Statins']
I = doc.loc[:,'Cocktails-w/o-Cancer-w-Statins']
H = doc.loc[:,'Cocktails-w/o-Cancer/Statins']
D = doc.loc[:,'Cocktails-w-Statins-and-Cancer']
#merge the dfs by frequency of drugs and age groups
JJ1 = J_1.merge(J.to_frame('Frequency'),left_on = 'AgeGroups', right_index=True, how ='left')
II1 = I_1.merge(I.to_frame('Frequency'),left_on = 'AgeGroups', right_index=True, how ='left')
HH1 = H_1.merge(H.to_frame('Frequency'),left_on = 'AgeGroups', right_index=True, how ='left')
DD1 = D_1.merge(D.to_frame('Frequency'),left_on = 'AgeGroups', right_index=True, how ='left')
frames = (JJ1, II1, HH1, DD1)
frames_concat = pd.concat(frames)
#frames_concat.to_csv('out.csv',index = False)
#df = pd.read_csv('file:///C:/Users/Onotation/Documents/Internship/out.CSV')
#df.values
#edit the format with the regular expressions
frames_concat.AgeGroups = \
frames_concat.AgeGroups.replace([r'^(\d{1})\_', r'_(\d{1})$'],
[r'0\1_',r'_0\1'],
regex=True)
#frames_concat.to_csv('out_changed0.csv',index = False)
grp = frames_concat.groupby(['AgeGroups','Factor','Cancer']).Frequency.sum()
counts = grp.unstack(level=[2])
counts1=grp.unstack(level=[1])
#calculate the odds ratio
table = counts1.groupby(level="Cancer").sum().values
oddsratio, pvalue = stats.fisher_exact(table)
print("OddsR(w-statin/wo-statin): ", oddsratio, "p-Value for confidence interval 95%:", pvalue)
#plot test
#counts1['sumwwoStatin']= counts1['w-statin']+counts1['wo-statin']
#counts1['oddRatio']=((counts1['w-statin']/counts1['sumwwoStatin'])/(counts1['wo-statin']/counts1['sumwwoStatin']))
#ax = counts.plot(kind='bar',stacked=True,colormap='Paired',rot = 45)
#for p in ax.patches:
#ax.annotate(np.round(p.get_height(),decimals=0).astype(np.int64), (p.get_x()+p.get_width()/2., p.get_y()), ha='center', va='center', xytext=(2, 10), textcoords='offset points', fontsize=10)
by_factor = counts.groupby(level='Factor')
k = by_factor.ngroups
fig, axes = plt.subplots(1, k, sharex=True, sharey=True, figsize=(15, 8))
for i, (gname, grp) in enumerate(by_factor):
grp.xs(gname, level='Factor').plot.bar(
stacked=True, rot=45, ax=axes[i], title=gname)
fig.tight_layout()
counts['sumwwoCancer']= counts['No']+counts['Yes']
test= counts['cumInci']=((counts['Yes']/counts['sumwwoCancer'])*100)
testUnstacked = test.unstack(level=[1])
#calculate the risk ration
ok=testUnstacked['AbsoluteRR']= (testUnstacked['wo-statin']-testUnstacked['w-statin'])
trialokay = ok.mean()
#calculate numbers needed to treat
NumNeedTreat = 1/trialokay
pot = np.ceil(NumNeedTreat)
pot1 = str(int(float(pot)))
testUnstacked['NNT'] = 1/testUnstacked['AbsoluteRR']
#testUnstacked.NNT = testUnstacked.NNT.round()
#testUnstacked.NNT = np.ceil(testUnstacked.NNT)
print ("Number Needed to Treat for each of the", testUnstacked['NNT']*100)