-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathcalculate_phys_statistics.py
executable file
·251 lines (232 loc) · 12.5 KB
/
calculate_phys_statistics.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
#!/usr/bin/env ipython
# -*- coding: utf-8 -*-
"""
Created on Tue Jul 3 14:11:45 2018
@author: siirias
"""
import datetime
import matplotlib as mp
import matplotlib.pyplot as plt
import numpy as np
from scipy.io import netcdf
import netCDF4 as nc4
from netCDF4 import Dataset
from smartseahelper import smh
import os
import cmocean
import pandas as pd
ss=smh()
ss.grid_type='T'
ss.interval='d'
ss.save_interval='year'
ss.file_name_format="NORDIC-GoB_1{}_{}_{}_grid_{}.nc"
just_bottom = False # used to get bottom values from 3d grid
#folder_start='OUTPUT'
#name_markers=['A001','A002']
folder_start=''
#name_markers=['REANALYSIS','A001','B001','C001']
#name_markers=['A001','B001','C001', 'D001']
#name_markers=['A005','A002','A001','B005','B002','B001','C001','C002','D001','D002','D005']
name_markers=['B001','B005','B002','D001','D005','D002']
#variables = ['SSS', 'SST', 'SSH_inst']
#variables= ['icecon','icevolume','SSS','SST','SBS','vosaline','votemper']
variables= ['SSS','SST','SBS','vosaline','votemper']
#variables= ['vosaline', 'votemper']
#variables= ['SBS','SSS']
make_climatology = True
extra_definition = 'c20v_'
#extra_definition = ''
#name_marker='A001' #this one tells which dataseries is handled.
climatology_time_slots=366
if(ss.interval=='m'):
climatology_time_slots=12
for name_marker in name_markers:
if 'A' in name_marker or 'B' in name_marker:
ss.file_name_format="NORDIC-GOB_1{}_{}_{}_grid_{}.nc"
else:
ss.file_name_format="SS-GOB_1{}_{}_{}_grid_{}.nc"
for var1 in variables:
var1_actual = var1 # some variables load actually a different name
if '1' in name_marker: #the 001 series are hindcasts, all other scenarios
startdate=datetime.datetime(1975,1,1)
enddate=datetime.datetime(2005,12,31)
elif 'REANALYSIS' in name_marker:
startdate=datetime.datetime(1980,1,1)
enddate=datetime.datetime(2012,12,31)
ss.save_interval='year'
else:
startdate=datetime.datetime(2048,1,1) # should be 2006 #2028 for earlier
enddate=datetime.datetime(2058,12,31)
datadir = ss.root_data_out+"/derived_data/test/" #where everyt output is stored
ss.main_data_folder= ss.root_data_in+"/{}{}/".format(folder_start,name_marker)
depth_ax='deptht'
if var1 in ['SST','SSS','SSH_inst','SBT','icecon','icevolume']:
ss.grid_type='T'
ss.interval='d'
climatology_time_slots=366
if var1 in ['vosaline','votemper','SBT']:
ss.grid_type='T'
ss.interval='m'
climatology_time_slots=12
depth_ax='deptht'
if var1 in ['SBT']:
just_bottom = True
var1_actual = 'votemper'
if '1' in name_marker: #the 001 series are hindcasts, all other scenarios
series_name='Hindcast_{}_{}'.format(name_marker,var1)
elif 'REANALYSIS' in name_marker:
series_name='{}_{}'.format(name_marker,var1)
else:
series_name='Scenario_{}_{}'.format(name_marker,var1)
print("Analysing files from {}".format(ss.main_data_folder))
filenames=ss.filenames_between(startdate,enddate)
ok_files=0
files_working=[]
for f in filenames:
if(os.path.isfile(ss.main_data_folder+f)):
ok_files+=1
files_working.append(f)
else:
print(f)
print()
print("ok {} out of {}".format(ok_files,len(filenames)))
running_number=0
just_one=False
if(just_one):
files_working=[files_working[0]]
mean_variables=None
time_axis=None
is_first=True
yearly_variable_data={}
if(make_climatology):
climatology_data=nc4.Dataset(datadir+'{}climatology_{}_{}_{}.nc'.format(\
extra_definition,name_marker,ss.interval,var1),'w',format='NETCDF4')
clim_lat=None
clim_lon=None
clim_day=None
clim_variable=None
for f in files_working:
data=Dataset(ss.main_data_folder+f)
d=data.variables[var1_actual][:]
if(just_bottom): # this is 3d value, where bottom is wanted
d = ss.give_bottom_values(d)
d=np.ma.masked_where(d==0.0,d)
lons = data.variables['nav_lon'][:]
lats = data.variables['nav_lat'][:]
lats,lons=ss.fix_latslons(lats,lons)
times=data.variables['time_counter'][:].data
areas=ss.give_areas(lats,lons)
if(len(d.shape)==4):
data_3d=True
depths=data.variables['deptht'][:]
volumes=np.repeat(areas[np.newaxis,:,:],len(depths),axis=0)
for i in range(len(depths)):
if i>0:
layer_depth=depths[i]-depths[i-1]
else:
layer_depth=depths[i]
volumes[:,:,i]*=layer_depth*0.001 #because km
else:
data_3d=False
if(just_one):
times=[times[0]]
for time_frame in range(len(times)):
time=ss.nemo_time_to_datetime(times[time_frame])
if(np.sum(~d[time_frame,:,:].mask)>0): #which means there is some actual number here
if(data_3d):
variable_times_volume=np.sum(d[time_frame,:,:,:]*volumes)
mean_variable=variable_times_volume/np.sum(~d[time_frame,:,:,:].mask*volumes)
else:
variable_times_area=np.sum(d[time_frame,:,:]*areas)
mean_variable=variable_times_area/np.sum(~d[time_frame,:,:].mask*areas)
if time.year in yearly_variable_data.keys():
yearly_variable_data[time.year]['samples']+=1
yearly_variable_data[time.year]['mean']+=mean_variable
if(yearly_variable_data[time.year]['min']>mean_variable):
yearly_variable_data[time.year]['min']=mean_variable
if(yearly_variable_data[time.year]['max']<mean_variable):
yearly_variable_data[time.year]['max']=mean_variable
else:
yearly_variable_data[time.year]={'year':time.year,
'min':mean_variable,
'max':mean_variable,
'mean':mean_variable,
'samples':1}
day_year=time.timetuple().tm_yday
month_year=time.month-1
if is_first:
if(make_climatology):
climatology_data.createDimension('x',data.dimensions['x'].size)
climatology_data.createDimension('y',data.dimensions['y'].size)
climatology_data.createDimension('time',climatology_time_slots)
if(data_3d):
climatology_data.createDimension('deptht',data.dimensions['deptht'].size)
variable_axis=('time','deptht','y','x')
else:
variable_axis=('time','y','x')
clim_lat=climatology_data.createVariable('nav_lat','f4',('y','x'))
clim_lon=climatology_data.createVariable('nav_lon','f4',('y','x'))
clim_samples=climatology_data.createVariable('sample_number','i4','time')
if(data_3d):
clim_day=climatology_data.createVariable('month_of_year','i4','time')
else:
clim_day=climatology_data.createVariable('day_of_year','i4','time')
clim_day[:]=np.arange(1,climatology_time_slots+1)
clim_samples[:]=np.zeros((climatology_time_slots,))
clim_variable_mean=climatology_data.createVariable('{}_mean'.format(var1),'f4',variable_axis)
clim_lat[:]=lats
clim_lon[:]=lons
clim_variable_mean[:]=0.0
if(ss.interval=='m' and data_3d):
clim_variable_mean[month_year,:,:,:]+=d[time_frame,:,:,:] #-1 to start from day 0
clim_samples[month_year]+=1
if(ss.interval=='m' and not data_3d):
clim_variable_mean[month_year,:,:]+=d[time_frame,:,:] #-1 to start from day 0
clim_samples[month_year]+=1
if(ss.interval=='d' and not data_3d):
clim_variable_mean[day_year-1,:,:]+=d[time_frame,:,:] #-1 to start from day 0
clim_samples[day_year-1]+=1
if(ss.interval=='d' and data_3d):
clim_variable_mean[day_year-1,:,:,:]+=d[time_frame,:,:,:] #-1 to start from day 0
clim_samples[day_year-1]+=1
clim_lon.units = 'degrees east'
clim_lat.units = 'degrees north'
clim_day.units = 'day of year'
clim_variable_mean.units = data.variables[var1_actual].units
clim_variable_mean.coordinates = "time_centered nav_lon nav_lat"
mean_variables=np.array([mean_variable])
time_axis=np.array([time])
is_first=False
else:
mean_variables=np.concatenate((mean_variables,[mean_variable]))
time_axis=np.concatenate((time_axis,[time]))
if(make_climatology):
if(ss.interval=='m' and data_3d):
clim_variable_mean[month_year,:,:,:]+=d[time_frame,:,:,:] #-1 to start from day 0
clim_samples[month_year]+=1
if(ss.interval=='m' and not data_3d):
clim_variable_mean[month_year,:,:]+=d[time_frame,:,:] #-1 to start from day 0
clim_samples[month_year]+=1
if(ss.interval=='d' and not data_3d):
clim_variable_mean[day_year-1,:,:]+=d[time_frame,:,:] #-1 to start from day 0
clim_samples[day_year-1]+=1
if(ss.interval=='d' and data_3d):
clim_variable_mean[day_year-1,:,:,:]+=d[time_frame,:,:,:] #-1 to start from day 0
clim_samples[day_year-1]+=1
# clim_variable_mean[day_year-1,:,:]+=d[time_frame,:,:] #-1 to start from day 0
# clim_samples[day_year-1]+=1
running_number+=1
print("Analysing {} ({} of approx {})".format(time,running_number,int(len(files_working)*30.5)))
data.close()
#fix the averages:
for i in yearly_variable_data:
yearly_variable_data[i]['mean']/=float(yearly_variable_data[i]['samples'])
if( make_climatology):
for i in range(climatology_time_slots):
if(clim_samples[i]>1):
clim_variable_mean[i,:,:]=clim_variable_mean[i,:,:]/float(clim_samples[i])
climatology_data.close()
full_data=pd.DataFrame({'time':time_axis,'mean_{}'.format(var1):mean_variables})
full_data.to_csv(datadir+'mean_{}_{}.csv'.format(var1,name_marker),index=False)
yearly_data=pd.DataFrame(yearly_variable_data).transpose()
yearly_data.to_csv(datadir+'yearly_{}_{}.csv'.format(var1,name_marker),index=False)