forked from TASBE/TASBEFlowAnalytics
-
Notifications
You must be signed in to change notification settings - Fork 1
/
subpopulation_statistics.m
70 lines (55 loc) · 2.25 KB
/
subpopulation_statistics.m
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
% Copyright (C) 2010-2017, Raytheon BBN Technologies and contributors listed
% in the AUTHORS file in TASBE analytics package distribution's top directory.
%
% This file is part of the TASBE analytics package, and is distributed
% under the terms of the GNU General Public License, with a linking
% exception, as described in the file LICENSE in the TASBE analytics
% package distribution's top directory.
function [counts means stds excluded] = subpopulation_statistics(BSeq,data,selector,mode)
bedges = get_bin_edges(BSeq);
n = numel(bedges)-1;
ncol = size(data,2);
nbins = get_n_bins(BSeq);
counts = zeros(nbins,1); means = counts; stds = counts; % create zero sets
switch(mode)
case 'geometric'
for i=1:n
which = find(data(:,selector)>bedges(i) & data(:,selector)<=bedges(i+1));
counts(i) = numel(which);
for j=1:ncol
% % to exclude outliers, drop top and bottom 0.1% of data
% sorted = sort(data(which,j));
% dropsize = ceil(numel(sorted)*0.001);
% if(numel(sorted)-2*dropsize > 0)
% trimmed = sorted(dropsize:(numel(sorted)-dropsize));
% else
% trimmed = sorted;
% end
trimmed = data(which,j);
means(i,j) = geomean(trimmed);
stds(i,j) = geostd(trimmed);
end
end
case 'arithmetic'
for i=1:n
which = find(data(:,selector)>bedges(i) & data(:,selector)<=bedges(i+1));
counts(i) = numel(which);
for j=1:ncol
% % to exclude outliers, drop top and bottom 0.1% of data
% sorted = sort(data(which,j));
% dropsize = ceil(numel(sorted)*0.001);
% if(numel(sorted)-2*dropsize > 0)
% trimmed = sorted(dropsize:(numel(sorted)-dropsize));
% else
% trimmed = sorted;
% end
trimmed = data(which,j);
means(i,j) = mean(trimmed);
stds(i,j) = std(trimmed);
end
end
otherwise
error('Unknown statistical mode %s',mode);
end
excluded = size(data,1) - sum(counts);
end