-
Notifications
You must be signed in to change notification settings - Fork 3
/
Samplers.h
145 lines (126 loc) · 4.13 KB
/
Samplers.h
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
#ifndef _SAMPLERS_H_
#define _SAMPLERS_H_
#include <vector>
#include <limits>
#include <cstdlib>
#include <assert.h>
// continuous uniform sampler with range [0,maxValue]
struct UniformSampler {
double maxValue;
UniformSampler(double maxValue = 1.0) {
this->maxValue = maxValue;
}
double Draw() const {
// generate a random number between 0 and maxValue
double randomScore = ((double) rand() / (RAND_MAX)) * maxValue;
return randomScore;
}
};
// an approximation of gaussian that generates data in the range [-width/2, +width/2]
struct GaussianSampler {
UniformSampler uniformSampler_;
double mean_, deviation_;
const unsigned width_;
GaussianSampler(double mean=0.0, double deviation=1.0) : width_(12) {
mean_ = mean;
deviation_ = deviation;
}
double Draw() const {
// generate an Irwin-Hall sample
double sample = 0;
for(unsigned i = 0; i < width_; i++) {
sample += uniformSampler_.Draw();
}
// adjust the range in order to approximate standard gaussian
sample -= 0.5 * width_;
// scale the specified mean/deviation
sample = mean_ + sample * deviation_;
return sample;
}
};
// dumb multinomial sampler with range {0,...,probs.size()-1}
// note: the probabilities need not be normalized, but none of them is allowed to be zero (TODO: allow zeros)
struct MultinomialSampler {
std::vector<double> probs;
double totalProb;
UniformSampler uniformSampler;
MultinomialSampler(std::vector<double>& probs) {
this->probs = probs;
totalProb = 0;
for(unsigned i = 0; i < probs.size(); i++) {
assert(probs[i] > 0);
totalProb += probs[i];
}
}
virtual unsigned Draw() const {
double shoot = uniformSampler.Draw() * totalProb;
for(unsigned i = 0; i < probs.size(); i++) {
if(probs[i] >= shoot) {
return i;
} else {
shoot -= probs[i];
}
}
assert(false);
}
};
// rich in the sense that users can specify values for the multinomial variable, instead of assuming the first
// probability corresponds to value 0, next to value 1, ...etc.
struct RichMultinomialSampler : public MultinomialSampler {
std::vector<unsigned> labels;
RichMultinomialSampler(std::vector<double>& probs, std::vector<unsigned>& labels)
: MultinomialSampler(probs) {
assert(probs.size() == labels.size());
this->labels = labels;
}
virtual unsigned Draw() const {
unsigned labelIndex = MultinomialSampler::Draw();
assert(labelIndex < labels.size());
return labels[labelIndex];
}
};
// R. A. Kronmal and A. V. Peterson, Jr. (1977) On the alias method for
// generating random variables from a discrete distribution. In The American
// Statistician, Vol. 33, No. 4. Pages 214--218.
//
// Intuition: a multinomial with N outcomes can be rewritten as a uniform
// mixture of N Bernoulli distributions. The ith Bernoulli returns i with
// probability F[i], otherwise it returns an "alias" value L[i]. The
// constructor computes the F's and L's given an arbitrary multionimial p in
// O(n) time and Draw returns samples in O(1) time.
//
// copied from http://cdec-decoder.org/
struct AliasSampler {
AliasSampler() {}
explicit AliasSampler(const std::vector<double>& p) { Init(p); }
void Init(const std::vector<double>& p) {
const unsigned N = p.size();
cutoffs_.resize(p.size());
aliases_.clear();
aliases_.resize(p.size(), std::numeric_limits<unsigned>::max());
std::vector<unsigned> s,g;
for (unsigned i = 0; i < N; ++i) {
const double cutoff = cutoffs_[i] = N * p[i];
if (cutoff >= 1.0) g.push_back(i); else s.push_back(i);
}
while(!s.empty() && !g.empty()) {
const unsigned k = g.back();
const unsigned j = s.back();
aliases_[j] = k;
cutoffs_[k] -= 1.0 - cutoffs_[j];
s.pop_back();
if (cutoffs_[k] < 1.0) {
g.pop_back();
s.push_back(k);
}
}
}
unsigned Draw() const {
const unsigned n = uniformSampler.Draw() * cutoffs_.size();
if (uniformSampler.Draw() > cutoffs_[n]) return aliases_[n]; else return n;
}
std::vector<double> cutoffs_; // F
std::vector<unsigned> aliases_; // L
UniformSampler uniformSampler;
};
#endif