-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathsensitivity_analysis.py
663 lines (500 loc) · 24.8 KB
/
sensitivity_analysis.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
"""
This file manages the sensitivity analysis also called GSA.
"""
import seaborn as sns
import matplotlib.pyplot as plt
from SALib import ProblemSpec
from SALib.util import extract_group_names, _check_groups
import jax
import equinox
import numpy as np
import pandas as pd
import os
import csv
from math import ceil
import model_main
from jax import jit, vmap, pmap
from jax.experimental.maps import xmap
from jax.sharding import Mesh
from joblib import Parallel, delayed
from scipy.stats.qmc import LatinHypercube
from SALib.sample import saltelli
from SALib.analyze import sobol
import file_manager
import initialization
jax.config.update("jax_enable_x64", True)
os.environ["XLA_FLAGS"] = f'--xla_force_host_platform_device_count={int(os.environ["SLURM_NTASKS"]) if os.environ.get("SLURM_JOB_ID") else int(os.cpu_count())}'
class GSA_pytree(equinox.Module):
""" Initializes the GSATree of the model (a pytree) which contains every information (clinicalData, resultsTmp, paramSensivitity)
Args:
- clinicalData: dict() -> clinical data of the current patient
- resultsTmp: dict() -> output results for GSA (datas comparable to clinicalData)
- params: dict() -> names and bounds of parameters for sensivitiy analysis
- perturbation: jax.Array -> perturbation of the parameters for sensitivity analysis
- convergenceAnalysis: bool() -> if True, the convergence analysis is conducted
- includeECLSGSA: bool() -> if True, ECMO and CRRT parameters are included in GSA
Returns:
An instance of the class (an GSATree object).
"""
clinicalData: dict()
resultsTmp: dict()
params: dict()
perturbation: jax.Array
convergenceAnalysis: bool()
includeECLSGSA: bool()
def __init__(self, clinicalData, resultsTmp, paramGSAOrdered, perturbation, \
convergenceAnalysis, includeECLSGSA):
self.clinicalData = clinicalData
self.resultsTmp = resultsTmp
self.params = paramGSAOrdered
self.perturbation = perturbation
self.convergenceAnalysis = convergenceAnalysis
self.includeECLSGSA = includeECLSGSA
def update(self, update):
# Update temp dict for results of GSA for each sample, s.th. it can be stored in y[ii] later
keys = tuple(self.resultsTmp.keys())
vals = tuple([update[key] for key in keys])
replace = dict(zip(keys, vals))
return equinox.tree_at(lambda tree: tree.resultsTmp, self, replace)
def main(GSATree, sample_size, initTree, simPostTree, simCompTree, paramGroupGSA):
""" Orders the parameters, bounds and outputs as a problem (ProblemSpec),
Divides and distributes the problem through local devices.
Launch the GSA.
Plots the results.
Args:
- GSATree: pytree() instance of GSA_pytree
- sample_size: [int] -> [2**1], [2**2], ... [2**10]
- initTree: pytree() -> initTree instance from the model main
- simPostTree: pytree() -> simPostTree instance from the model main
- simCompTree: pytree() -> simCompTree instance from the model main
- paramGroupGSA: dict() -> Containing parameter names and its groups
Returns:
ProblemSpec() instance -> problem.
"""
problem = ProblemSpec(
{
'num_vars': len(GSATree.params),
'names': list(GSATree.params.keys()),
'bounds': list(GSATree.params.values()),
'outputs': list(GSATree.resultsTmp.keys()),
'groups': list(paramGroupGSA.values()) if paramGroupGSA is not None else None
}
)
problem.sample_sobol(sample_size)
if GSATree.includeECLSGSA == True:
problem, param_indices_ECLS, param_indices_CVS = sample_ECLS(problem, GSATree)
results = Parallel(n_jobs=int(os.environ["XLA_FLAGS"].split("=")[1]), backend="threading")(delayed(process_sample)(problem.samples[i], param_indices_ECLS, param_indices_CVS, GSATree, \
initTree, simPostTree, simCompTree) for i in range(len(problem.samples)))
# As np.array( LIST ) performs vertical stacking its similar to np.transpose(np.stack(results, axis=1))
Y = np.array(results)
else:
all_devices = jax.local_devices()
sample_count = len(problem.samples)
device_count = len(all_devices)
if sample_count <= device_count:
num_devices = sample_count
# highest number of devices num_devices <= len(all_devices) that divides len(problem.samples)
# evenly
else:
for divisor in range(device_count, 0, -1):
if sample_count % divisor == 0:
num_devices = divisor
break
devices = np.array(all_devices[:num_devices])
print("\n-----------------------------------------")
print("-----------------------------------------\n")
print(f"Devices used for sensitivity analysis:")
print(f"{devices}")
print("\n-----------------------------------------")
print("-----------------------------------------\n")
problem.set_samples(problem.samples)
eval_xmap = xmap(evaluate, in_axes=(['Sample', ...], [...], [...], [...], [...]), out_axes=(['Sample', ...]),
axis_resources={'Sample': 'x'})
with Mesh(devices, ('x',)):
Y = eval_xmap(problem.samples, GSATree, initTree, simPostTree, simCompTree)
Y = np.transpose(np.array(Y))
problem.set_results(Y)
problem.analyze_sobol()
post_processing(problem, GSATree)
return problem
@jit
def evaluate(SensitivitySample, GSATree, initTree, simPostTree, simCompTree):
""" Evaluate the model for the specified SensitivitySample.
Args:
- SensitivitySample
- GSATree: pytree() -> instance of GSA_pytree
- initTree: pytree() -> initTree instance from the model main
- simPostTree: pytree() -> simPostTree instance from the model main
- simCompTree: pytree() -> simCompTree instance from the model main
Returns:
list() -> output results for the specified SensitivitySample.
"""
initTree = initTree.update(dict(zip((list(GSATree.params.keys())), SensitivitySample)))
simulatedData = model_main.modeling(initTree, simPostTree, simCompTree)[0]
GSATree = GSATree.update(simulatedData.results)
return list([*GSATree.resultsTmp.values()])
def evaluateEC(SensitivitySample, index, indexBasicParams, GSATree, initTree, simPostTree, simCompTree):
""" Evaluate the model for the specified SensitivitySample.
Args:
- SensitivitySample
- index: [int] -> index of ECMOrpm, CRRTdrain, CRRTreturn
- indexBasicParams: [int] -> index of all other parameters
- GSATree: pytree() -> instance of GSA_pytree
- initTree: pytree() -> initTree instance from the model main
- simPostTree: pytree() -> simPostTree instance from the model main
- simCompTree: pytree() -> simCompTree instance from the model main
Returns:
list() -> output results for the specified SensitivitySample.
"""
# index = [ecmorpm index, crrtdrain index, crrtreturn index]
initTree = initTree.update(dict(zip((np.take(list(GSATree.params.keys()), indexBasicParams)), np.take(SensitivitySample, indexBasicParams))))
initTree.ECMO['pump'].update({'rpm': SensitivitySample[index['rpm']]})
initTree.CRRT['access'] = {'drain': {initialization.convert_index_to_access(SensitivitySample[index['drain']]): 1}, 'return': {initialization.convert_index_to_access(SensitivitySample[index['return']]): 1}}
simulatedData = model_main.modeling(initTree, simPostTree, simCompTree)[0]
GSATree = GSATree.update(simulatedData.results)
return list([*GSATree.resultsTmp.values()])
def process_sample(sample, index, indexBasicParams, GSATree, initTree, simPostTree, simCompTree):
return evaluateEC(sample, index, indexBasicParams, GSATree, initTree, simPostTree, simCompTree)
def sample_ECLS(problem, GSATree):
"""
Creates sample for drain and return access of CRRT when including ECLS into sensitivity analysis.
Args:
problem: dict() -> The problem object of GSA containing the samples.
- GSATree: pytree() -> instance of GSA_pytree.
Returns:
- problem: dict() -> The updated problem object with the samples.
- param_indices_ECLS: List() -> A dictionary mapping the ECLS parameter names to their indices in the GSATree.
- param_indices_CVS: List() -> A list of indices representing the non-ECLS parameters in the GSATree.
"""
param_names = ['rpm', 'drain', 'return']
combinations = [
(13, 12), (12, 12), (15, 14), (14, 15),
(14, 16), (15, 16), (15, 12), (14, 12), (13, 14)
]
param_indices_ECLS = {name: list(GSATree.params.keys()).index(name) for name in param_names}
param_indices_CVS = [i for i in range(len(list(GSATree.params.keys()))) if i not in list(param_indices_ECLS.values())]
sampler = LatinHypercube(d=1)
sample_indices_combinations = sampler.integers(l_bounds=0, \
u_bounds=len(combinations) -1, \
n=len(problem.samples[:, 0]), endpoint=True)
sample_combinations = np.array([combinations[i[0]] for i in sample_indices_combinations])
samples = problem.samples.copy()
samples[:, param_indices_ECLS['drain']] = sample_combinations[:, 0]
samples[:, param_indices_ECLS['return']] = sample_combinations[:, 1]
problem.set_samples(samples)
print('Total GSA samples', len(samples))
return problem, param_indices_ECLS, param_indices_CVS
def post_processing(problem, GSATree):
""" Post process the problem, printing and plotting.
Args:
- problem: ProblemSpec() instance -> the problem
- GSATree: pytree() instance of GSA_pytree
Returns:
X.
"""
categories = {
'Volume': {
'keys': ['ESVLV', 'EDVLV', 'ESVLA', 'EDVLA', 'ESVRV', 'EDVRV', 'ESVRA', 'EDVRA'],
'unit': 'mL'
},
'Pressure': {
'keys': ['SP', 'DP', 'MAP', 'SPAP', 'DPAP', 'MPAP', 'PCWP'],
'unit': 'mmHg'
},
'Flow': {
'keys': ['CO', 'PF'],
'unit': 'L/min'
}
}
# Create sorting of outputs w.r.t. categories
sorted_outputs = []
outputs_index_map = [] # To track the original indices for sorting ST and SDiff
for category, details in categories.items():
for key in details['keys']:
if key in problem['outputs']:
sorted_outputs.append((key, details['unit']))
outputs_index_map.append(problem['outputs'].index(key))
# Create directory
base_dir = file_manager.sensitivityFolder + 'Results'
perturbation_dir = os.path.join(base_dir, str(GSATree.perturbation) + '%')
if not os.path.exists(perturbation_dir):
os.makedirs(perturbation_dir)
# Plotting
plot_density_functions(problem, GSATree, perturbation_dir, sorted_outputs)
print(problem)
heatmap(problem, GSATree, perturbation_dir, sorted_outputs, outputs_index_map)
def heatmap(problem, GSATree, perturbation_dir, sorted_outputs, outputs_index_map):
""" Plots the problem through a heatmap.
Args:
- problem: ProblemSpec() instance -> the problem
- GSATree: pytree() instance of GSA_pytree
- perturbation_dir: str() -> path to the directory where the plots and csv are stored.
- categories: dict() -> Dictionary containing categories of outputs.
Returns:
-
"""
# Get names in sorted_outputs
sorted_outputs = [tuple[0] for tuple in sorted_outputs]
groups = _check_groups(problem)
if not groups:
namesGSA = problem['names']
else:
namesGSA, _ = extract_group_names(groups)
# Get most important parameters making up 90% of total sensitivity for each output
ST, SDiff = get_important_params(problem, GSATree, perturbation_dir, namesGSA)
# Reorder ST and SDiff according to sorted_outputs
ST = ST[outputs_index_map, :]
SDiff = SDiff[outputs_index_map, :]
# Write ST and SDiff to .csv
if GSATree.convergenceAnalysis: write_sensitivities(problem, ST, SDiff, GSATree, perturbation_dir, \
namesGSA, sorted_outputs)
plt.rcParams['mathtext.fontset'] = 'stix'
plt.rcParams['font.family'] = 'STIXGeneral'
plt.rcParams['font.size'] = 10
cm = 1/2.54
fig_height = 9 * cm
if ST.shape[1] > 8:
fig_width = 19 * cm
aspect = 40
else:
fig_width = 9 * cm
aspect = 20
# Plotting heatmap for ST
fig1, axs1 = plt.subplots(1, 1, figsize=(fig_width, fig_height))
im1 = axs1.imshow(ST)
axs1.yaxis.set_ticks(range(0, len(sorted_outputs)))
axs1.yaxis.set_ticklabels(sorted_outputs)
axs1.xaxis.set_ticks(range(0, ST.shape[1]))
axs1.xaxis.set_ticklabels(namesGSA, rotation=45, ha='right', va='top', rotation_mode='anchor')
if not GSATree.convergenceAnalysis: im1.set_clim(0, 1)
fig1.tight_layout()
fig1.colorbar(im1, ax=axs1, location='top', orientation='horizontal',
label='Total Order Sensitivity Index', aspect=aspect)
file_name1 = os.path.join(perturbation_dir, 'Result_ST_' + str(len(problem.samples)) + \
'_perturb_' + str(GSATree.perturbation) + '%_patient_'+ str(GSATree.clinicalData['patientId']))
fig1.savefig(file_name1 + '.pdf')
fig1.savefig(file_name1 + '.tiff')
fig1.savefig(file_name1 + '.jpg')
# Plotting heatmap for ST-S1
fig2, axs2 = plt.subplots(1, 1, figsize=(fig_width, fig_height))
im2 = axs2.imshow(SDiff)
axs2.yaxis.set_ticks(range(0, len(sorted_outputs)))
axs2.yaxis.set_ticklabels(sorted_outputs)
axs2.xaxis.set_ticks(range(0, SDiff.shape[1]))
axs2.xaxis.set_ticklabels(namesGSA, rotation=45, ha='right', va='top', rotation_mode='anchor')
if not GSATree.convergenceAnalysis: im2.set_clim(0, 1)
fig2.tight_layout()
fig2.colorbar(im2, ax=axs2, location='top', orientation='horizontal',
label='Difference Between Total and First Order Sensitivity Index', aspect=40)
file_name2 = os.path.join(perturbation_dir, 'Result_SDiff_' + str(len(problem.samples)) + \
'_perturb_' + str(GSATree.perturbation) + '%_patient_'+ str(GSATree.clinicalData['patientId']))
fig2.savefig(file_name2 + '.pdf')
fig2.savefig(file_name2 + '.tiff')
fig2.savefig(file_name2 + '.jpg')
def get_important_params(problem, GSATree, perturbation_dir, namesGSA):
"""
Calculate the important parameters based on sensitivity analysis.
Args:
- problem: dict() -> The problem dictionary containing the analysis results.
- GSATree: pytree() -> Instance of GSA_pytree
- perturbation_dir: str -> The directory fo results with given perturbation.
- namesGSA: list() -> A list containing parameter or group names.
Returns:
- ST: np.array() -> A numpy array containing the total order sensitivity indices (ST) for each output.
- SDiff: np.array() -> A numpy array containing the difference between total order sensitivity indices
(ST) and first order sensitivity indices (S1) for each output.
"""
ST = [None]*len(problem['outputs'])
S1 = [None]*len(problem['outputs'])
most_important_parameters_per_output = []
print("Sum of Total Order Sensitivity Indices for each output:")
# Get parameters that make up 90% of ST for each output
ii = 0
for key in problem.analysis.keys():
ST[ii] = problem.analysis[key]['ST']
S1[ii] = problem.analysis[key]['S1']
total_sensitivity = np.sum(ST[ii])
print(f"{key}: {np.round(total_sensitivity, 4)}")
# Pair parameters with their total order sensitivity (ST)
param_ST_pairs = list(zip(namesGSA, ST[ii]))
# Sort parameters by their total order sensitivity in descending order
sorted_parameters = sorted(param_ST_pairs, key=lambda x: x[1], reverse=True)
# Accumulate parameters until reaching 90% of total sensitivity
cum_sum = 0
important_parameters = []
for param, sensitivity in sorted_parameters:
cum_sum += sensitivity
important_parameters.append(param)
if cum_sum >= 0.9 * total_sensitivity:
break
most_important_parameters_per_output.append(important_parameters)
ii += 1
# Union of all important parameters across all outputs
MIP_overall = set.union(*map(set, most_important_parameters_per_output))
print("\nMost important parameters overall:\n", MIP_overall)
ST = np.array(ST)
S1 = np.array(S1)
SDiff = ST - S1
plot_important_params(problem, GSATree, MIP_overall, perturbation_dir, namesGSA)
return ST, SDiff
def plot_important_params(problem, GSATree, MIP_overall, perturbation_dir, namesGSA):
"""
Plot important parameters based on sensitivity analysis.
Args:
- problem: dict() -> The problem dictionary containing the analysis results.
- GSATree: pytree() -> Instance of GSA_pytree
- MIP_overall: set() -> Set of most important parameters across all outputs.
- perturbation_dir: str -> The directory fo results with given perturbation.
- namesGSA: list() -> A list containing parameter or group names.
Returns:
-
"""
# Calculate the sum of ST for all outputs for each parameter and sort in descending order
ST_sums = {name: 0 for name in namesGSA}
for key in problem.analysis.keys():
for i, name in enumerate(namesGSA):
ST_sums[name] += problem.analysis[key]['ST'][i]
sorted_params = sorted(ST_sums.items(), key=lambda x: x[1], reverse=True)
# Assign colors based on whether the parameter is in the important parameters set
colors = ['orange' if param[0] in MIP_overall else 'gray' for param in sorted_params]
# Create the barplot
plt.rcParams['mathtext.fontset'] = 'stix'
plt.rcParams['font.family'] = 'STIXGeneral'
plt.rcParams['font.size'] = 10
cm = 1/2.54
fig_height = 9 * cm
if len(namesGSA) > 8:
fig_width = 19 * cm
else:
fig_width = 9 * cm
plt.figure(figsize=(fig_width, fig_height))
plt.bar(range(len(sorted_params)), [param[1] for param in sorted_params], color=colors)
important_params_count = len(MIP_overall)
plt.axvline(x=important_params_count - 0.5, color='black', linestyle='--', linewidth=1)
plt.xticks(range(len(sorted_params)), [param[0] for param in sorted_params], rotation=45, \
ha='right', va='top', rotation_mode='anchor')
plt.ylabel('Total order sensitivity index $S_T$')
plt.tight_layout()
file_name = os.path.join(perturbation_dir, 'Result_Important_Parameters_' + str(len(problem.samples)) + \
'_perturb_' + str(GSATree.perturbation) + '%_patient_'+ str(GSATree.clinicalData['patientId']))
plt.savefig(file_name + '.pdf')
plt.savefig(file_name + '.tiff')
plt.savefig(file_name + '.jpg')
def plot_density_functions(problem, GSATree, perturbation_dir, sorted_outputs):
"""
Generates a grid of density plots for each output parameter, based on the data stored in problem['results'].
The names of the outputs are taken from problem['outputs'].
Args:
- problem: ProblemSpec() -> Dictionary containing problem specification of GSA using SALib.
- GSATree: pytree() -> instance of GSA_pytree
- perturbation_dir: str() -> path to the directory where the plots are stored.
- categories: dict() -> Dictionary containing categories of outputs.
Returns:
Plots containing PDFs for each output.
"""
Y = problem.results
output_names = problem['outputs']
# Calculate the grid size
num_outputs = len(output_names)
num_columns = 2
num_rows = ceil(num_outputs / num_columns)
# Set plot formatting
plt.rcParams['mathtext.fontset'] = 'stix'
plt.rcParams['font.family'] = 'STIXGeneral'
plt.rcParams['font.size'] = 10
cm = 1/2.54
fig_width = 19 * cm
fig_height = 23 * cm
fig, axs = plt.subplots(num_rows, num_columns, figsize=(fig_width, fig_height), constrained_layout=True)
# Make extra axes invisible if not needed
if num_outputs % num_columns:
for ax in axs.flat[num_outputs:]:
ax.set_visible(False)
# Flatten the axes array for easier indexing
axs = axs.flatten()
# Plot density for each output
for i, (output_name, unit) in enumerate(sorted_outputs):
# Locate the index of the output_name in the original results array
original_index = output_names.index(output_name)
sns.kdeplot(x=Y[:, original_index], ax=axs[i], fill=True)
axs[i].set_xlabel(f"{output_name} in {unit}", fontsize=10)
axs[i].set_ylabel('Density', fontsize=10)
plt.tight_layout()
file_name = os.path.join(perturbation_dir, 'Result_Density_' + str(len(problem.samples)) + \
'_perturb_' + str(GSATree.perturbation) + '%_patient_'+ str(GSATree.clinicalData['patientId']))
fig.savefig(file_name + '.pdf')
fig.savefig(file_name + '.tiff')
fig.savefig(file_name + '.jpg')
def write_sensitivities(problem, ST, SDiff, GSATree, perturbation_dir, namesGSA, sorted_outputs):
"""
Writes sensitivity indices ST and SDiff to csv files.
Args:
- problem: ProblemSpec() -> Dictionary containing problem specification of GSA using SALib.
- ST: array() -> An array of total order sensitivity indices.
- SDiff: array() -> An array of total order sensitivity indices minus first order sensitivity indices.
- GSATree: pytree() -> instance of GSA_pytree
- perturbation_dir: str() -> path to the directory where the csv files are stored.
- namesGSA: list() -> A list containing parameter or group names.
- sorted_outputs: list() -> A list of outputs sorted by categories.
Returns:
-
"""
convergence_dir = os.path.join(perturbation_dir, '01_Convergence_Analysis')
if not os.path.exists(convergence_dir):
os.makedirs(convergence_dir)
df_ST = pd.DataFrame(ST, columns=namesGSA, index=sorted_outputs)
df_SDiff = pd.DataFrame(SDiff, columns=namesGSA, index=sorted_outputs)
file_ST = os.path.join(convergence_dir, 'ST_' + str(len(problem.samples)) + \
'_perturb_' + str(GSATree.perturbation) + '%_patient_'+ str(GSATree.clinicalData['patientId']) + '.csv')
file_SDiff = os.path.join(convergence_dir, 'SDiff_' + str(len(problem.samples)) + \
'_perturb_' + str(GSATree.perturbation) + '%_patient_'+ str(GSATree.clinicalData['patientId']) + '.csv')
df_ST.to_csv(file_ST)
df_SDiff.to_csv(file_SDiff)
def read_param_file_GSA(filename, delimiter=' '):
"""
Read parameter file for Global Sensitivity Analysis (GSA).
Parameters:
- filename: str()) -> The path to the parameter file.
- delimiter: str(), optional -> The delimiter used in the parameter file. Default is a space (' ').
Returns:
- names: list() -> A list of parameter names.
- groups: list() or None -> A list of group names corresponding to each parameter. If all parameters belong to the same group, groups will be set to None.
Raises:
- ValueError: If the groups are not defined correctly in the file or if only one group is defined.
"""
names = []
groups = []
with open(filename, "r") as csvfile:
sample = csvfile.read(1024)
if all(len(line.split()) > 1 for line in sample.split('\n') if line.strip() and not line.strip().startswith('#')):
has_group = True
fieldnames = ["name", "group"]
elif any(len(line.split()) > 1 for line in sample.split('\n') if line.strip() and not line.strip().startswith('#')):
raise ValueError("Sensitivity Analysis: Groups not defined correctly in file. Check input file again!")
else:
has_group = False
fieldnames = ["name"]
csvfile.seek(0)
reader = csv.DictReader(csvfile, fieldnames=fieldnames, delimiter=delimiter)
for row in reader:
if row["name"].strip().startswith("#"):
pass
else:
names.append(row["name"])
# If the fourth column does not contain a group name, use
# the parameter name
if has_group is True:
if row["group"] is not None:
groups.append(row["group"])
elif row["group"] == "NA":
groups.append(row["name"])
else:
groups.append(row["name"])
if groups == names:
groups = None
elif len(set(groups)) == 1:
raise ValueError(
"""Only one group defined, results will not be
meaningful"""
)
return names, groups