-
Notifications
You must be signed in to change notification settings - Fork 2
/
Copy pathintensity_hdf5_file.cpp
339 lines (257 loc) · 11 KB
/
intensity_hdf5_file.cpp
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
#include <iostream>
#include "ch_frb_io_internals.hpp"
using namespace std;
namespace ch_frb_io {
#if 0
}; // pacify emacs c-mode!
#endif
// Helper function for intensity_hdf5_file constructor
static void _init_frequencies(intensity_hdf5_file &f, hdf5_group &g)
{
vector<hsize_t> freq_shape;
g.get_dataset_shape("freq", freq_shape);
if (freq_shape.size() != 1)
throw runtime_error(f.filename + ": expected index_map.freq.ndim == 1");
f.nfreq = freq_shape[0];
vector<double> frequency_Hz(f.nfreq);
g.read_dataset("freq", &frequency_Hz[0], freq_shape);
if (f.nfreq < 2)
throw runtime_error(f.filename + ": expected nfreq >= 2");
double nu_lo = 1.0e-6 * frequency_Hz[0]; // Hz -> MHz
double nu_hi = 1.0e-6 * frequency_Hz[f.nfreq-1]; // Hz -> MHz
nu_hi += (nu_hi - nu_lo) / (f.nfreq-1); // actual endpoint of band
for (int i = 0; i < f.nfreq; i++) {
// Expected and actual frequency
double nu_exp = ((f.nfreq-i)*nu_lo + i*nu_hi) / (double)f.nfreq;
double nu_act = 1.0e-6 * frequency_Hz[i]; // Hz -> MHz
if (fabs(nu_exp-nu_act) > 1.0e-6 * fabs(nu_hi-nu_lo))
throw runtime_error(f.filename + ": expected equally spaced frequencies");
}
if (nu_lo == nu_hi)
throw runtime_error(f.filename + ": frequencies in file are all equal?!");
f.frequencies_are_increasing = (nu_lo < nu_hi);
f.freq_lo_MHz = min(nu_lo, nu_hi);
f.freq_hi_MHz = max(nu_lo, nu_hi);
}
// Helper function for intensity_hdf5_file constructor
static void _init_polarizations(intensity_hdf5_file &f, hdf5_group &g)
{
vector<hsize_t> pol_shape;
g.get_dataset_shape("pol", pol_shape);
if (pol_shape.size() != 1)
throw runtime_error(f.filename + ": expected index_map.pol.ndim == 1");
if ((pol_shape[0] != 1) && (pol_shape[0] != 2))
throw runtime_error(f.filename + ": expected npol==1 or npol==2");
f.npol = pol_shape[0];
g.read_string_dataset("pol", f.polarizations, pol_shape);
for (int ipol = 0; ipol < f.npol; ipol++) {
if ((f.polarizations[ipol] != "XX") && (f.polarizations[ipol] != "YY"))
throw runtime_error(f.filename + ": expected polarization string to be either 'XX' or 'YY', got '" + f.polarizations[ipol] + "'");
for (int jpol = 0; jpol < ipol; jpol++)
if (f.polarizations[ipol] == f.polarizations[jpol])
throw runtime_error(f.filename + ": duplicate polarizations in file?!");
}
}
// Helper function for intensity_hdf5_file constructor
static void _init_times(intensity_hdf5_file &f, hdf5_group &g)
{
vector<hsize_t> time_shape;
g.get_dataset_shape("time", time_shape);
if (time_shape.size() != 1)
throw runtime_error(f.filename + ": expected index_map.time.ndim == 1");
f.nt_file = time_shape[0];
if (f.nt_file < 2)
throw runtime_error(f.filename + ": expected nt >= 2");
f.times.resize(f.nt_file);
g.read_dataset("time", &f.times[0], time_shape);
// provisional dt_sample = min_i(dt[i+1]-dt[i])
double dt_p = f.times[1] - f.times[0];
for (int i = 2; i < f.nt_file; i++)
dt_p = min(dt_p, f.times[i] - f.times[i-1]);
if (dt_p <= 0.0)
throw runtime_error(f.filename + ": expected time array to be monotone increasing");
f.time_index_mapping.resize(f.nt_file, 0);
for (int i = 1; i < f.nt_file; i++) {
double dt = f.times[i] - f.times[i-1];
if (dt > 1.0e6 * dt_p)
throw runtime_error(f.filename + ": file has unreasonably large time gap");
int nstep = (int)(dt/dt_p + 0.5);
if ((nstep < 1) || (nstep > 1000000))
throw runtime_error(f.filename + ": internal error ('impossible' nstep)");
if (fabs(dt - nstep*dt_p) > 0.01*dt_p) {
cerr << "debug: " << dt << " " << dt_p << " " << nstep << endl;
throw runtime_error(f.filename + ": hmmm, time steps in file do not appear to be integer multiples of a common step size ");
}
f.time_index_mapping[i] = f.time_index_mapping[i-1] + nstep;
if (f.time_index_mapping[i] > 100000000)
throw runtime_error(f.filename + ": file is unreasonably gappy");
}
f.nt_logical = f.time_index_mapping[f.nt_file-1] + 1;
f.time_lo = f.times[0];
f.time_hi = f.times[f.nt_file-1] + dt_p;
f.dt_sample = (f.time_hi - f.time_lo) / f.nt_logical;
}
intensity_hdf5_file::intensity_hdf5_file(const string &filename_, bool noisy) :
filename(filename_)
{
hdf5_file f(filename);
hdf5_group g_root(f, ".");
hdf5_group g_im(f, "index_map");
_init_frequencies(*this, g_im);
_init_polarizations(*this, g_im);
_init_times(*this, g_im);
vector<hsize_t> data_shape(3);
data_shape[0] = nfreq;
data_shape[1] = npol;
data_shape[2] = nt_file;
this->intensity.resize(nfreq * npol * nt_file);
this->weights.resize(nfreq * npol * nt_file);
g_root.read_dataset("intensity", &intensity[0], data_shape);
g_root.read_dataset("weight", &weights[0], data_shape);
// Note: these need to be double-precision
double wsum = 0.0;
double wmax = 0.0;
for (int i = 0; i < nfreq*npol*nt_file; i++) {
if (weights[i] < 0.0)
throw runtime_error(filename + ": negative value in weight array, this is currently considered an error");
wsum += weights[i];
wmax = max(wmax, (double)weights[i]);
}
if (wmax == 0.0)
cerr << (filename + ": warning: weight array is all zeros\n");
this->frac_ungapped = (double)nt_file / (double)nt_logical;
this->frac_unmasked = (wmax > 0.0) ? (wsum / wmax / (float)(nfreq*npol*nt_file)) : 0.0;
if (noisy)
cout << "read " << filename << ", frac_ungapped=" << frac_ungapped << ", frac_unmasked=" << frac_unmasked << endl;
}
void intensity_hdf5_file::get_unpolarized_intensity(float *out_int, float *out_wt, int out_t0, int out_nt, int out_istride, int out_wstride) const
{
if (out_istride == 0)
out_istride = out_nt;
if (out_wstride == 0)
out_wstride = out_istride;
if (out_int==nullptr || out_wt==nullptr)
throw runtime_error("NULL pointer passed to intensity_hdf5_file::get_unpolarized_intensity()");
if (out_nt <= 0)
throw runtime_error("intensity_hdf5_file::get_unpolarized_intensity(): expected out_nt > 0");
if ((out_t0 < 0) || (out_t0 + out_nt > this->nt_logical))
throw runtime_error("intensity_hdf5_file::get_unpolarized_intensity(): requested time range overflows file");
if (abs(out_istride) < out_nt)
throw runtime_error("intensity_hdf5_file::get_unpolarized_intensity(): out_istride is too small");
if (abs(out_wstride) < out_nt)
throw runtime_error("intensity_hdf5_file::get_unpolarized_intensity(): out_wstride is too small");
for (int ifreq = 0; ifreq < nfreq; ifreq++) {
memset(out_int + ifreq*out_istride, 0, out_nt * sizeof(*out_int));
memset(out_wt + ifreq*out_wstride, 0, out_nt * sizeof(*out_wt));
}
//
// First pass: dst_int = (weight*intensity) and dst_wt = (weight).
// FIXME this loop could be written significantly more efficiently!
//
for (int it_f = 0; it_f < nt_file; it_f++) {
int it_l = time_index_mapping[it_f];
if ((it_l < out_t0) || (it_l >= out_t0+out_nt))
continue;
// 1D arrays with length=nfreq and stride=(out_istride,out_wstride).
float *dst_int = out_int + (it_l - out_t0);
float *dst_wt = out_wt + (it_l - out_t0);
for (int ipol = 0; ipol < npol; ipol++) {
// 1D arrays with length=nfreq and stride=in_stride
const float *src_int = &this->intensity[0] + ipol*nt_file + it_f;
const float *src_wt = &this->weights[0] + ipol*nt_file + it_f;
const int in_stride = npol * nt_file;
for (int ifreq = 0; ifreq < nfreq; ifreq++) {
int s = ifreq * in_stride;
dst_int[ifreq*out_istride] += src_int[s] * src_wt[s];
dst_wt[ifreq*out_wstride] += src_wt[s];
}
}
}
// Second pass: dst_int = (intensity)
for (int ifreq = 0; ifreq < nfreq; ifreq++) {
// 1D arrays with length=out_nt
float *dst_int = out_int + ifreq * out_istride;
float *dst_wt = out_wt + ifreq * out_wstride;
for (int it = 0; it < out_nt; it++) {
if (dst_wt[it] > 0.0)
dst_int[it] /= dst_wt[it];
}
}
}
void intensity_hdf5_file::run_unit_tests() const
{
cerr << filename << ": starting unit test\n";
std::random_device rd;
std::mt19937 rng(rd());
for (int it_f = 0; it_f < nt_file; it_f++) {
int it_l = time_index_mapping[it_f];
if ((it_l < 0) || (it_l >= nt_logical))
throw runtime_error(filename + ": time_index_mapping index out of range");
// Expected and actual times
double t_exp = ((nt_logical-it_l)*time_lo + it_l*time_hi) / (double)nt_logical;
double t_act = times[it_f];
if (fabs(t_exp-t_act) > 0.01*dt_sample)
throw runtime_error(filename + ": timestamp check failed");
}
// The rest of this routine tests get_unpolarized_intensity()
// First we populate "reference" buffers for the whole time range,
// using the most boneheaded procedure possible.
vector<float> ref_int(nfreq * nt_logical, 0.0);
vector<float> ref_wt(nfreq * nt_logical, 0.0);
for (int ifreq = 0; ifreq < nfreq; ifreq++) {
for (int ipol = 0; ipol < npol; ipol++) {
for (int it_f = 0; it_f < nt_file; it_f++) {
int it_l = time_index_mapping[it_f];
int s = ifreq*npol*nt_file + ipol*nt_file + it_f;
int d = ifreq*nt_logical + it_l;
ref_int[d] += weights[s] * intensity[s];
ref_wt[d] += weights[s];
}
}
}
for (int i = 0; i < nfreq*nt_logical; i++) {
if (ref_wt[i] > 0.0)
ref_int[i] /= ref_wt[i];
}
// Next we make some random calls to intensity_hdf5_file::get_unpolarized_intensity()
// and compare to the reference buffers.
vector<float> v_int(4 * nfreq * nt_logical, 0.0);
vector<float> v_wt(4 * nfreq * nt_logical, 0.0);
// These pointers have been chosen so that strides in the range [-2*nt_logical,2*nt_logical] will not overflow
float *buf_int = &v_int[0] + 2 * nfreq * nt_logical;
float *buf_wt = &v_wt[0] + 2 * nfreq * nt_logical;
for (int n = 0; n < 100; n++) {
cerr << ".";
// make random (it0, nt)
int buf_it0 = randint(rng, 0, nt_logical-1);
int buf_it1 = randint(rng, 0, nt_logical-1);
if (buf_it0 > buf_it1) std::swap(buf_it0, buf_it1);
int buf_nt = buf_it1 - buf_it0 + 1;
// random strides, can be positive or negative
int istride = randint(rng, buf_nt, 2*nt_logical);
int wstride = randint(rng, buf_nt, 2*nt_logical);
if (uniform_rand(rng) > 0.5) istride *= -1;
if (uniform_rand(rng) > 0.5) wstride *= -1;
// randomize buffer
for (int ifreq = 0; ifreq < nfreq; ifreq++) {
uniform_rand(rng, buf_int + ifreq*istride, buf_nt);
uniform_rand(rng, buf_wt + ifreq*wstride, buf_nt);
}
// randomized call to intensity_hdf5_file::get_unpolarized_intensity()
this->get_unpolarized_intensity(buf_int, buf_wt, buf_it0, buf_nt, istride, wstride);
// compare with reference
for (int ifreq = 0; ifreq < nfreq; ifreq++) {
for (int it = 0; it < buf_nt; it++) {
int sr = ifreq*nt_logical + it + buf_it0;
int si = ifreq*istride + it;
int sw = ifreq*wstride + it;
if (fabs(ref_int[sr] - buf_int[si]) > 1.0e-3)
throw runtime_error(filename + ": intensity consistency check failed");
if (fabs(ref_wt[sr] - buf_wt[sw]) > 1.0e-3)
throw runtime_error(filename + ": weights consistency check failed");
}
}
}
cerr << "\n" << filename << ": pass\n";
}
} // namespace ch_frb_io