-
Notifications
You must be signed in to change notification settings - Fork 10
/
Copy pathpchave.m
343 lines (312 loc) · 12.3 KB
/
pchave.m
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
function varargout=pchave(X,lwin,olap,nfft,Fs,dval,winfun,winopt,clev)
% [SD,F,Ulog,Llog,Ulin,Llin,Snon,Qnon,Qrob,Qchi]=...
% PCHAVE(X,lwin,olap,nfft,Fs,dval,winfun,winopt,clev)
%
% Computation of SPECTRAL DENSITY (UNIT^2/HZ) with the method of CHAVE (1987).
%
% INPUT:
%
% X Cell array with data SECTIONS, each of which will be SEGMENTed
% lwin Length of windowed segment, in samples [default: 256]
% olap Overlap of data segments, in percent [default: 70]
% nfft Number of frequencies [default: 256]
% Fs Sampling frequency (Hz) [default: 1]
% dval 'MAD' for Mean Absolute Deviation scale estimate (Chave Eq. 20)
% 'IQ' for InterQuartile scale estimate (Chave Eq. 21)
% winfun Window function name (string) [default: 'DPSS']
% winopt Window function parameter, such as the time-bandwidth product
% for DPSS [Default: NW=4 and only first taper used]
% clev Is the confidence level in percent for the uncertainties
% under the jackknifing error estimation [default: 95]
%
% OUTPUT:
%
% SD Robust estimate of SPECTRAL DENSITY (UNIT^2/Hz) [abs(fft(X).^2)]
% F Frequency axis, goes from Fs/lwin to Fs/2 in nfft steps
% Ulog Upper confidence limit for use in logscale
% Llog Lower confidence limit for use in logscale
% Ulin Upper confidence limit for use on linear scale
% Llin Lower confidence limit for use on linear scale
% Snon Non-robust estimate for comparison only
% Qnon Order statistic for the non-robust spectral density
% Qrob Order statistic for the robust spectral density
% Qchi Order statistic for the chi2(2) distribution
%
% Returns ROBUST and NONROBUST spectral estimates and FREQUENCIES
% for REAL signals contained in a cell array. Assumed is all cells
% are from the same stochastic process. Each SECTION will be SEGMENTED
% in overlapping windows and TAPERED with a DPSS. Starting from an
% initial LOCATION and SCALE estimate, iterations are performed to
% come up with more ROBUST estimates for the ensemble over all SEGMENTS
% and SECTIONS. The NONROBUST spectral average is just the mean.
%
% See also: SPECTROGRAM
% Reference:
%
% @Article{Chave+87,
% author = "Alan D. Chave and David J. Thomson and Mark E. Ander",
% title = "On the robust estimation of power spectra,
% coherences, and transfer functions",
% journal = JGR,
% year = 1987,
% volume = 92,
% number = "B1",
% pages = "633--648"
% }
%
% Last modified by fjsimons-at-alum.mit.edu, 01/04/2019
% The program, not the demo (see at the end)
if ~isstr(X)
defval('lwin',256)
defval('olap',70)
defval('nfft',256)
defval('Fs',1)
defval('winfun','dpss')
defval('dval','MAD')
defval('clev',95)
if ~iscell(X)
y=X;
clear X
X{1}=y;
end
nfft
if nfft>lwin
warning('PCHAVE: NFFT is larger than window length')
end
% PART I: WELCH OVERLAPPING SEGMENT ANALYSIS
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
% Overlap in samples
olap=floor(olap/100*lwin);
% Compute window; if 'DPSS' use option or default option,
% and want only one taper
if strcmp(winfun,'dpss')
defval('winopt',4)
dwin=feval(winfun,lwin,winopt,1);
else % If not, only at most one option allowed or needed
if exist('winopt')
dwin=feval(winfun,lwin,winopt);
else
dwin=feval(winfun,lwin);
end
end
% Normalize window so the sum of squares is unity (PW 208a)
dwin=dwin/sqrt(dwin'*dwin);
% Start of the loop over the elements of cell X
% Initialize Power Spectral Density matrix with window
Pw=[];
for index=1:length(X)
x=X{index}(:);
npts=length(x);
% If npts equals lwin any amount of overlap still only produces one
% window
checkit=(npts-olap)/(lwin-olap);
nwin=floor(checkit);
disp(sprintf('Window size for spectral density: %8.1f',lwin/Fs))
disp(sprintf('Number of overlapping data segments: %i',nwin))
if nwin~=checkit
disp(sprintf(...
'Number of segments is not integer: %i / %i points truncated',...
npts-(nwin*(lwin-olap)+olap),npts))
end
if nwin<=1; error('Data sequence not long enough'); end
% Make matrix out of suitably repeated windowed segments
% of the data 'xsdw' is x segmented THEN detrended THEN
% windowed with normalized window; not quite BLOCKTILE or BLOCKMEAN but
% of course it's what PAULI makes that does go inside BLOCKMEAN
xsd=detrend(...
x(repmat([1:lwin]',1,nwin)+...
repmat([0:(nwin-1)]*(lwin-olap),lwin,1)));
xsdw=xsd.*repmat(dwin,1,nwin);
% Check segmented THEN detrended THEN windowed
% with normalized boxcar window
xsdb=xsd/sqrt(lwin);
% Fill power matrix up progressively - initialization would speed this up
Pw=[Pw (abs(fft(xsdw,nfft,1)).^2)];
% For this cell section, compare with the boxcar version
Pb=abs(fft(xsdb,nfft,1)).^2;
% You can verify Percival and Walden (Eq. 134):
% $\var\{x\}=\int_{-f_N}^{f_N}S(f)\,df$ by checking var(x) against
% sum(mean(Pb,2))*(Fs/nfft) which equals mean(mean(Pb,2))*Fs
% or of course mean(mean(Pb,2)) - if you've used a boxcar.
% This checks how closely the total variance of x is captured
% by the overlapping detrended boxcar windowing scheme.
% Variations are due to taper forms, overlap, etc.
% This is why you better don't compare absolute values of the
% spectral density, but normalize them on a decibel scale
% disp(sprintf(...
% 'Parseval check: %8.3e (time) vs %8.3e (frequency)',...
% var(x(1:nwin*(lwin-olap)+olap)),mean(Pb(:))))
end
% Total number of estimates available
nwint=size(Pw,2);
% P is the POWER SPECTRAL DENSITY or the ENERGY/SECOND/FREQUENCY
% Units of ENERGY thus UNIT^2
% S=P*Dt=P/Fs is the SPECTRAL DENSITY or ENERGY/FREQUENCY
% So that its integral over all frequencies int(S(f)df) equals variance
% Units are UNIT^2/HZ or UNITS^2*SECOND
% Note that the area in frequency space is given by 2*fN, which is 1/Dt
Sw=Pw/Fs;
% PART II: Chave's method of making the estimate robust (PW p 294)
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
% End up with a bunch of modified periodograms per segment and per
% section in the cell array. Now Chave tells us how to average these.
% First start with the median as an initial robust LOCATION estimate
% The non-robust is just the unweighted average (the mean)
Snon=mean(Sw,2);
Sloc=median(Sw,2);
SD=zeros(size(Sloc));
% I'm doing the iteration for all frequencies at the same time
% Could rewrite this for every frequency at once
% Convergence criterion for all frequencies at once based on
% relative difference wrt previous iteration in percent
relperc=1;
iter=0;
while any(abs(SD-Sloc)./Sloc*100>relperc) & iter<=499
disp(sprintf('Iteration %3.3i mandated by %3.3i / %3.3i frequencies',...
iter+1,sum(abs(SD-Sloc)./Sloc*100>relperc),length(SD)))
if iter>0; Sloc=SD; end
iter=iter+1;
% Now come up with a "practical" but "lower efficiency" SCALE estimate
% Calculate residuals from the location estimate
Sres=Sw-repmat(Sloc,1,size(Sw,2));
if strcmp(upper(dval),'MAD')
% Scale estimate is median absolute deviation of residual from
% the median over value expected for a chi-squared distribution
% with 2 degrees of freedom (Chave Eq. 20 and Eq. 30)
% This is because power spectra are the sums of squares of almost
% normally distributed variates, and hence distributed as
% chi-squared. The number of degrees of freedom is 2 at each
% frequency, except for the DC and Nyquist components, which have
% only 1 degree of freedom. The chi-squared 2-distribution is
% equivalent to the exponential distribution and thus easy to
% calculate. We compare the scale estimates to the chi-squared
% 2-distribution and use this as our estimate of scale.
Sscale=median(abs(Sres),2)/(2*asinh(1/2));
elseif strcmp(upper(dval),'IQ')
% Scale estimate is interquartile range of residuals over expected
% value for chi-squared distribution with 2 degrees of freedom
% (Chave Eq. 21 and Eq. 30)
for index=1:nfft
Sscale(index,1)=iqr(Sres(index,:))/(2*log(3));
end
end
% Now iterate with Huber weights (Chave Eq. 26)
% How far out are the residuals in multiples of the scale estimate?
% Use these to construct weights - far out values are downweighted
% This number is given by Huber and assures the efficiency of the estimate
k=1.5;
Wght=Sres./repmat(Sscale,1,nwint);
buv=Wght>k;
blo=Wght<=k;
Wght(buv)=sqrt(k*sign(Wght(buv))./Wght(buv));
Wght(blo)=1;
% Construct robust estimate as weighted average of segments - in one go
% This is the new LOCATION estimate
SD=sum(Wght.*Sw,2)./sum(Wght,2);
% plot(abs(SD-Sloc)./Sloc*100); pause
end
% Part III: QUALITY CONTROL ON THE WEIGHTS
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
% If the weighting has been performed correctly, then the final
% quantile-quantile plot of the weighted spectral estimates ('wse')
% (except those with zero weights - but this doesn't happen with the
% Huber weights ) against the chi-square quantiles. Also calculate the
% same statistic for the unweighted (nonrobust) spectral estimates
% ('use'). Return their rank.
[use,user]=sort((Wght.*Sw)./repmat(sum(Wght,2),1,nwint),2);
[wse,wser]=sort(Sw,2);
% Scale to have sum of squares of 8, which is the expected raw second
% moment of a chi2(2) variate.
Qnon=use.*repmat(sqrt(8./sum(use.^2,2)),1,nwint)*sqrt(nwint);
Qrob=wse.*repmat(sqrt(8./sum(wse.^2,2)),1,nwint)*sqrt(nwint);
% Compare with the quantiles of the chi2(2) distribution
% You think with the reweighting your measures should follow this
% distribution and if they do their order statistics would plot on a
% straight line compared to Qchi.
Qchi=2*log(nwint./(nwint-[1:nwint]+0.5));
% PART IV: JACKKNIFE STATISTICS
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
% See Efron and Stein, 1981
% Note that BOOTSTRAP is apparently even more efficient
% See also Efron, 1979
% Loop over frequencies
% Get number of standard deviations out to attain confidence level
% Start with some precomputed values
switch clev
case 99
kcon=2.5759;
case 95
kcon=1.96;
case 68
kcon=0.9945;
otherwise
kcon=norminv(1-(1-clev/100)/2,0,1);
end
disp(sprintf('Error bounds reflect confidence level of %5.1f%s',...
(1-2*(1-normcdf(kcon,0,1)))*100,str2mat(37)))
for index=1:nfft
% Work with Sw and Wght
S=Sw(index,:);
W=Wght(index,:);
% Make jackknifing sampler
J=jackknife(nwint);
% Compute S(i) in Efron's notation:
% Now it's like you have more than one estimation of SD
Si=sum(S(J).*W(J),2)./sum(W(J),2);
% Compute S(.) in Efron's notation
% It's like taking the mean of your new estimates
Sdot=mean(Si);
% Compute Efron's jackknifing variance
% This is like the variance of the mean of the new estimates
% "perhaps more appropriately" so - but we think of it as the
% variance on the estimate SD itself; adjusted for sample size
varjkS=(nwint-1)/nwint*sum((Si-Sdot).^2);
% Jackknife standard deviation of the estimate SD
stdjkS=sqrt(varjkS);
% Then you assume that you've obtained the value under a normal
% distribution and hence
% clev in percent falls within +/- kcon times the stdev
errlin(index,1)=kcon*(stdjkS);
% Also make errors for natural logscale
Silog=log(Si);
Sdotlog=mean(Silog);
varjkSlog=(nwint-1)/nwint*sum((Silog-Sdotlog).^2);
errlog(index,1)=exp(kcon*sqrt(varjkSlog));
end
% Note that:
% decibel(Ulog)-decibel(SD) is equal to decibel(SD)-decibel(Llog)
% whereas
% Ulin-SD is equal to SD-Llin
Ulin=SD+errlin;
Llin=SD-errlin;
Ulog=SD.*errlog;
Llog=SD./errlog;
% PART V: Collect frequency information and select output
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
% Calculate frequency vector for real signals
% and get rid of periodicity in spectrum
selekt=[1:floor(nfft/2)+1];
F=(selekt-1)'*Fs/nfft;
Snon=Snon(selekt);
SD=SD(selekt);
Ulin=Ulin(selekt);
Llin=Llin(selekt);
Ulog=Ulog(selekt);
Llog=Llog(selekt);
% Need to scale frequencies except 0 and Nyquist by a factor of two
% if you only take half of the spectrum (for real signals)
% Compute one-sided spectrum (Bendat and Piersol Eq. 5.33 and page 424.)
Snon=[Snon(1); 2*Snon(2:end-1); Snon(end)];
SD=[SD(1); 2*SD(2:end-1); SD(end)];
Ulin=[Ulin(1); 2*Ulin(2:end-1); Ulin(end)];
Llin=[Llin(1); 2*Llin(2:end-1); Llin(end)];
Ulog=[Ulog(1); 2*Ulog(2:end-1); Ulog(end)];
Llog=[Llog(1); 2*Llog(2:end-1); Llog(end)];
% Provide output
vars={SD,F,Ulog,Llog,Ulin,Llin,Snon,Qnon,Qrob,Qchi};
varargout=vars(1:nargout);
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
% Demos
elseif isstr(X)
pchavedemo(X);
end