-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathdbscan_analysis_randomness_check.py
73 lines (62 loc) · 3.12 KB
/
dbscan_analysis_randomness_check.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
# Main script for the project to explore t-SNE'd data
# The main script for running everything will be a bash script
import dbscan_ratio
import spectra
from sklearn.cluster import DBSCAN
import directories
import numpy as np
import pandas as pd
import os
import sys
# ___ Variables _______________________________________________________________ Variables
# These are neede for the naming scheme and for the run itself
SELECTED_RANGES = [(float(sys.argv[1]), float(sys.argv[2]))]
SNR = int(sys.argv[3])
PERPLEXITY = int(sys.argv[4])
NUMBER_RANDOMNESS = int(sys.argv[5])
# ___ Directory paths _________________________________________________________ Directory paths
dirs = directories.directories()
# ___ Import the t-SNE data ____________________________________________________ Import t-SNE results
print('='*40)
print('Loading data..')
tsne_data = np.load(
dirs.tsne_results + 'tSNE_results_range_{}_perplexity_{}_SNRof{}.npy'.format(SELECTED_RANGES,
PERPLEXITY, SNR))
stellar_parameters = pd.read_csv(
dirs.data + 'stellar_parameters_duchenekrauspopulation.csv')
# DBSCAN parameters ____________________________________________________________
minEpsilon = 0.1
maxEpsilon = 0.75
min_minSamples = 25
max_minSamples = 125
# Run the main DBSCAN
dbscan = dbscan_ratio.dbscan_method(tsne_data[:, 0], tsne_data[:, 1],
stellar_parameters['binarity'], minEpsilon,
maxEpsilon, min_minSamples, max_minSamples,
stellar_parameters, SELECTED_RANGES, SNR, PERPLEXITY)
FILENAME_DBSCAN_RESULT = (dirs.dbscan_results +
'DBSCAN_parameterspace_range_{}_perplexity_{}_SNRof{}_ratio_{}_iterations_{}_randomnesscheck_{}.csv'.format(SELECTED_RANGES,
PERPLEXITY, SNR, dbscan.ratio, dbscan.iterations, NUMBER_RANDOMNESS))
if os.path.isfile(FILENAME_DBSCAN_RESULT) == True:
print('File exists already, skipping this analysis..')
exit()
else:
dbscan.normalize_data_tSNE()
dbscan.iterations = 10
dbscan.ratio = 0.9 # To make sure is set to 0.9 - a bit redundant
dbscan.explore_parameter_space()
# dbscan.parameter_space = pd.read_csv(
# dirs.data + 'DBSCAN_parameterspace_range_{}_perplexity_{}_SNRof{}_ratio_{}.csv'.format(SELECTED_RANGES,
# PERPLEXITY, SNR, dbscan.ratio))
# Save the results from the parameter space exploration
(dbscan.parameter_space).to_csv(
dirs.dbscan_results + 'DBSCAN_parameterspace_range_{}_perplexity_{}_SNRof{}_ratio_{}_iterations_{}_randomnesscheck_{}.csv'.format(SELECTED_RANGES,
PERPLEXITY, SNR, dbscan.ratio, dbscan.iterations, NUMBER_RANDOMNESS),
index=False)
# print("="*40)
# print("Plotting...")
# dbscan.plot_tsne_maps()
# dbscan.plot_parameter_space()
# dbscan.plot_histograms()
# print("="*40)
# print("Done!")