-
Notifications
You must be signed in to change notification settings - Fork 1
/
parse_csv.py
139 lines (113 loc) · 4.69 KB
/
parse_csv.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
# this file is called separately from app.py. This file reads and processes the data files.
# errors are added manually for now.
import pandas as pd
land_ocean_data = pd.read_csv("./data/observed_land-ocean_temperature.csv") # in C
climate_forcings_data = pd.read_csv("./data/responses_to_climate_forcings.csv") # in K
# these will be the output csvs. There is one copy in C and one in F.
land_ocean_data_c = land_ocean_data.copy()
land_ocean_data_f = land_ocean_data.copy()
climate_forcings_data_c = climate_forcings_data.copy()
climate_forcings_data_f = climate_forcings_data.copy()
# functions to convert between different temperatures
def kelvin_to_celsius(x):
return x - 273.15
def kelvin_to_fahrenheit(x):
return x * 1.8 - 459.67
def celsius_to_fahrenheit(x):
return x * 1.8 + 32
# convert the units for our output data
climate_forcings_data_c.iloc[:, 1:] = climate_forcings_data_c.iloc[:, 1:].apply(
kelvin_to_celsius
)
climate_forcings_data_f.iloc[:, 1:] = climate_forcings_data_f.iloc[:, 1:].apply(
kelvin_to_fahrenheit
)
land_ocean_data_f.iloc[:, [1, 2]] = land_ocean_data_f.iloc[:, [1, 2]].apply(
celsius_to_fahrenheit
)
# remove data before 1880 for climate forcings data
climate_forcings_data_c = climate_forcings_data_c[
climate_forcings_data_c["Year"] >= 1880
]
climate_forcings_data_f = climate_forcings_data_f[
climate_forcings_data_f["Year"] >= 1880
]
# remove data after 2005 for observed temperature data
land_ocean_data_c = land_ocean_data_c[land_ocean_data_c["Year"] <= 2005]
land_ocean_data_f = land_ocean_data_f[land_ocean_data_f["Year"] <= 2005]
# climate forcings celsius data
# get averages for forcings from 1880-1910
averages = []
headings = []
for f in climate_forcings_data_c.iloc[:, 1:]:
headings.append(f)
mean = climate_forcings_data_c[f][
(climate_forcings_data_c["Year"] >= 1880)
& (climate_forcings_data_c["Year"] <= 1910)
].mean()
averages.append([f, mean])
forcing_averages_data = pd.DataFrame(averages, columns=["Forcing", "Average"])
# get difference from mean for forcings
for i in forcing_averages_data.index:
climate_forcings_data_c[
forcing_averages_data.loc[i, "Forcing"]
] = climate_forcings_data_c[forcing_averages_data.loc[i, "Forcing"]].apply(
lambda x: x - forcing_averages_data.loc[i, "Average"]
)
# climate forcings fahrenheit data
# get averages for forcings from 1880-1910
averages = []
headings = []
for f in climate_forcings_data_f.iloc[:, 1:]:
headings.append(f)
mean = climate_forcings_data_f[f][
(climate_forcings_data_f["Year"] >= 1880)
& (climate_forcings_data_f["Year"] <= 1910)
].mean()
averages.append([f, mean])
forcing_averages_data = pd.DataFrame(averages, columns=["Forcing", "Average"])
# averages_data.to_csv('averages.csv', index=False)
# get difference from mean for forcings
for i in forcing_averages_data.index:
climate_forcings_data_f[
forcing_averages_data.loc[i, "Forcing"]
] = climate_forcings_data_f[forcing_averages_data.loc[i, "Forcing"]].apply(
lambda x: x - forcing_averages_data.loc[i, "Average"]
)
# observed temperature celsius data
# get averages for temps from 1880-1910
averages = []
headings = []
for t in land_ocean_data_c.iloc[:, 1:]:
headings.append(t)
mean = land_ocean_data_c[t][
(land_ocean_data_c["Year"] >= 1880) & (land_ocean_data_c["Year"] <= 1910)
].mean()
averages.append([t, mean])
temp_averages_data = pd.DataFrame(averages, columns=["Temp", "Average"])
# get difference from mean for temps
for i in temp_averages_data.index:
land_ocean_data_c[temp_averages_data.loc[i, "Temp"]] = land_ocean_data_c[
temp_averages_data.loc[i, "Temp"]
].apply(lambda x: x - temp_averages_data.loc[i, "Average"])
# observed temperature fahrenheit data
# get averages for temps from 1880-1910
averages = []
headings = []
for t in land_ocean_data_f.iloc[:, 1:]:
headings.append(t)
mean = land_ocean_data_f[t][
(land_ocean_data_f["Year"] >= 1880) & (land_ocean_data_f["Year"] <= 1910)
].mean()
averages.append([t, mean])
temp_averages_data = pd.DataFrame(averages, columns=["Temp", "Average"])
# get difference from mean for temps
for i in temp_averages_data.index:
land_ocean_data_f[temp_averages_data.loc[i, "Temp"]] = land_ocean_data_f[
temp_averages_data.loc[i, "Temp"]
].apply(lambda x: x - temp_averages_data.loc[i, "Average"])
# make csv files with the new units
land_ocean_data_c.to_csv("land_ocean_c_filtered.csv", index=False)
land_ocean_data_f.to_csv("land_ocean_f_filtered.csv", index=False)
climate_forcings_data_c.to_csv("climate_forcings_c_filtered.csv", index=False)
climate_forcings_data_f.to_csv("climate_forcings_f_filtered.csv", index=False)