-
Notifications
You must be signed in to change notification settings - Fork 8
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Add distribution metrics #39
base: project-bkjg
Are you sure you want to change the base?
Changes from 33 commits
5670907
e060f37
25be9e0
0144c2f
d14f5b9
3721e29
8922e23
7ac8dfd
4c155e5
7578c49
27bdc61
009a5bb
ae32597
40bc6eb
bba200a
18fe3a4
6108428
618c944
52c8e49
7169ca3
b8a766b
2fb1e01
fc5e6e6
6c91ed6
43cdba6
d080a28
0e7ff74
200cd28
791f9bf
980aa9d
5157345
13cfe62
c2aa581
37d5df8
4b05e22
013e65b
48617a1
93d8a7a
8ed4657
959f65e
4f9777e
3897da0
e3982c3
c32146e
66b81a0
02b61bb
923a41e
5c2551a
f603aa8
b6fb8fd
e5ad13b
e883c54
93cd883
2770ba9
2de7767
faff4db
0caa54b
f8ce011
f714545
3b52335
36177a9
a568b19
3950d11
567ca20
6e94531
61caf83
6d4afb2
fcbe7f0
557dbd3
5ed12d8
8abc56e
0fb5f5d
4e2fedd
15c47fd
44991a3
e0246de
350e9f5
50cc412
286f762
b8a3350
5c162bd
06c542e
8e5f95b
06a62fd
17385fa
cb6c55d
e77c4bd
7e3ad57
9608d1b
16af7af
2926727
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,291 @@ | ||
/** | ||
* collectd - src/daemon/distribution.c | ||
* Copyright (C) 2020 Google LLC | ||
* | ||
* Permission is hereby granted, free of charge, to any person obtaining a | ||
* copy of this software and associated documentation files (the "Software"), | ||
* to deal in the Software without restriction, including without limitation | ||
* the rights to use, copy, modify, merge, publish, distribute, sublicense, | ||
* and/or sell copies of the Software, and to permit persons to whom the | ||
* Software is furnished to do so, subject to the following conditions: | ||
* | ||
* The above copyright notice and this permission notice shall be included in | ||
* all copies or substantial portions of the Software. | ||
* | ||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR | ||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, | ||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE | ||
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER | ||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING | ||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER | ||
* DEALINGS IN THE SOFTWARE. | ||
* | ||
* Authors: | ||
* Barbara bkjg Kaczorowska <bkjg at google.com> | ||
*/ | ||
|
||
#include "distribution.h" | ||
#include <math.h> | ||
#include <pthread.h> | ||
|
||
typedef struct { | ||
double max_boundary; | ||
uint64_t counter; | ||
} bucket_t; | ||
Comment on lines
+35
to
+38
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Could you please add a comment about how does the buckets look like. What are the minimal boundaries? Are minimal and maximal boundaries inclusive or exclusive? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Right, I will add that. Thanks! |
||
|
||
struct distribution_s { | ||
bucket_t *buckets; | ||
double sum_gauges; | ||
size_t num_buckets; | ||
pthread_mutex_t mutex; | ||
}; | ||
|
||
static bucket_t *bucket_new_linear(size_t num_buckets, double size) { | ||
bucket_t *buckets = (bucket_t *)calloc(num_buckets, sizeof(bucket_t)); | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. At your option: Here and below. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Changed, thanks! |
||
|
||
if (buckets == NULL) { | ||
return NULL; | ||
} | ||
|
||
for (size_t i = 0; i < num_buckets - 1; ++i) { | ||
buckets[i].max_boundary = (double)(i + 1) * size; | ||
} | ||
|
||
buckets[num_buckets - 1].max_boundary = INFINITY; | ||
|
||
return buckets; | ||
} | ||
|
||
static bucket_t *bucket_new_exponential(size_t num_buckets, double initial_size, | ||
double factor) { | ||
bucket_t *buckets = (bucket_t *)calloc(num_buckets, sizeof(bucket_t)); | ||
|
||
if (buckets == NULL) { | ||
return NULL; | ||
} | ||
|
||
double multiplier = initial_size; | ||
|
||
for (size_t i = 0; i < num_buckets - 1; ++i) { | ||
buckets[i].max_boundary = factor * multiplier; | ||
multiplier *= initial_size; | ||
} | ||
|
||
buckets[num_buckets - 1].max_boundary = INFINITY; | ||
|
||
return buckets; | ||
} | ||
|
||
static bucket_t *bucket_new_custom(size_t num_boundaries, | ||
const double *custom_buckets_boundaries) { | ||
bucket_t *buckets = (bucket_t *)calloc(num_boundaries + 1, sizeof(bucket_t)); | ||
|
||
if (buckets == NULL) { | ||
return NULL; | ||
} | ||
|
||
if (custom_buckets_boundaries[0] <= 0) { | ||
free(buckets); | ||
errno = EINVAL; | ||
return NULL; | ||
} | ||
|
||
buckets[0].max_boundary = custom_buckets_boundaries[0]; | ||
|
||
for (size_t i = 1; i < num_boundaries; ++i) { | ||
if (custom_buckets_boundaries[i] <= 0 || | ||
custom_buckets_boundaries[i - 1] >= custom_buckets_boundaries[i]) { | ||
free(buckets); | ||
errno = EINVAL; | ||
return NULL; | ||
} | ||
|
||
buckets[i].max_boundary = custom_buckets_boundaries[i]; | ||
} | ||
|
||
buckets[num_boundaries].max_boundary = INFINITY; | ||
|
||
return buckets; | ||
} | ||
|
||
distribution_t *distribution_new_linear(size_t num_buckets, double size) { | ||
octo marked this conversation as resolved.
Show resolved
Hide resolved
|
||
distribution_t *d = (distribution_t *)calloc(1, sizeof(distribution_t)); | ||
|
||
if (d == NULL) { | ||
return NULL; | ||
} | ||
|
||
d->buckets = bucket_new_linear(num_buckets, size); | ||
|
||
if (d->buckets == NULL) { | ||
free(d); | ||
return NULL; | ||
} | ||
|
||
d->num_buckets = num_buckets; | ||
pthread_mutex_init(&d->mutex, NULL); | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. What happens on line 228? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I initialise the mutex. When you create it, you should initialise it, to be sure that it will work correctly. |
||
|
||
return d; | ||
} | ||
|
||
distribution_t *distribution_new_exponential(size_t num_buckets, | ||
double initial_size, | ||
double factor) { | ||
distribution_t *d = (distribution_t *)calloc(1, sizeof(distribution_t)); | ||
|
||
if (d == NULL) { | ||
return NULL; | ||
} | ||
|
||
d->buckets = bucket_new_exponential(num_buckets, initial_size, factor); | ||
|
||
if (d->buckets == NULL) { | ||
free(d); | ||
return NULL; | ||
} | ||
|
||
d->num_buckets = num_buckets; | ||
pthread_mutex_init(&d->mutex, NULL); | ||
|
||
return d; | ||
} | ||
|
||
distribution_t *distribution_new_custom(size_t num_boundaries, | ||
double *custom_buckets_boundaries) { | ||
distribution_t *d = (distribution_t *)calloc(1, sizeof(distribution_t)); | ||
|
||
if (d == NULL) { | ||
return NULL; | ||
} | ||
|
||
d->buckets = bucket_new_custom(num_boundaries, custom_buckets_boundaries); | ||
|
||
if (d->buckets == NULL) { | ||
free(d); | ||
return NULL; | ||
} | ||
|
||
d->num_buckets = num_boundaries; | ||
pthread_mutex_init(&d->mutex, NULL); | ||
|
||
return d; | ||
} | ||
|
||
static void bucket_update(bucket_t *buckets, size_t num_buckets, double gauge) { | ||
int ptr = (int)num_buckets - 1; | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. "ptr" sounds like "pointer" but is actually an integer. How about "index" instead? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Great point! Changed! |
||
|
||
while (buckets[ptr].max_boundary > gauge && ptr >= 0) { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. At your option: consider a for (size_t i = num_buckets - 1; i >= 0 && buckets[i].max_boundary > gauge; i--) {
buckets[i].counter++;
} There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Unfortunately this for loop doesn't work and will call segmentation fault I think. That's because i is the size_t type and it imply that i will always be greater equal zero. If we subtract 1 from 0, then we will underflow, get the maximum value of size_t and then buckets[i].max_boundary will call seg fault But I know what you mean, I will consider it. Thanks! |
||
buckets[ptr].counter++; | ||
ptr--; | ||
} | ||
} | ||
|
||
void distribution_update(distribution_t *d, double gauge) { | ||
if (d == NULL) { | ||
errno = EINVAL; | ||
return; | ||
} | ||
|
||
pthread_mutex_lock(&d->mutex); | ||
|
||
bucket_update(d->buckets, d->num_buckets, gauge); | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I like the idea of exporting the bucket update to a separate function, great! |
||
|
||
d->sum_gauges += gauge; | ||
pthread_mutex_unlock(&d->mutex); | ||
} | ||
|
||
static double find_percentile(bucket_t *buckets, size_t num_buckets, | ||
uint64_t quantity) { | ||
size_t left = 0; | ||
size_t right = num_buckets - 1; | ||
size_t middle; | ||
|
||
while (left < right) { | ||
middle = (left + right) / 2; | ||
|
||
if (buckets[middle].counter >= quantity) { | ||
left = middle; | ||
} else { | ||
right = middle - 1; | ||
} | ||
} | ||
|
||
return buckets[left].max_boundary; | ||
} | ||
|
||
double distribution_percentile(distribution_t *d, double percent) { | ||
if (d == NULL || percent > 100.0) { | ||
errno = EINVAL; | ||
return NAN; | ||
} | ||
|
||
pthread_mutex_lock(&d->mutex); | ||
|
||
uint64_t quantity = | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I recommend to add a cast to There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Fixed, thanks! |
||
(percent / 100.0) * d->buckets[d->num_buckets - 1].counter; | ||
|
||
percent = find_percentile(d->buckets, d->num_buckets, quantity); | ||
|
||
pthread_mutex_unlock(&d->mutex); | ||
return percent; | ||
} | ||
|
||
double distribution_average(distribution_t *d) { | ||
if (d == NULL) { | ||
errno = EINVAL; | ||
return NAN; | ||
} | ||
|
||
pthread_mutex_lock(&d->mutex); | ||
|
||
double average = | ||
d->sum_gauges / (double)d->buckets[d->num_buckets - 1].counter; | ||
|
||
pthread_mutex_unlock(&d->mutex); | ||
|
||
return average; | ||
} | ||
|
||
distribution_t *distribution_clone(distribution_t *d) { | ||
if (d == NULL) { | ||
errno = EINVAL; | ||
return NULL; | ||
} | ||
|
||
distribution_t *distribution = calloc(1, sizeof(distribution_t)); | ||
octo marked this conversation as resolved.
Show resolved
Hide resolved
|
||
|
||
if (distribution == NULL) { | ||
return NULL; | ||
} | ||
|
||
pthread_mutex_lock(&d->mutex); | ||
|
||
distribution->sum_gauges = d->sum_gauges; | ||
distribution->num_buckets = d->num_buckets; | ||
|
||
distribution->buckets = calloc(d->num_buckets, sizeof(bucket_t)); | ||
|
||
if (distribution->buckets == NULL) { | ||
free(distribution); | ||
pthread_mutex_unlock(&d->mutex); | ||
return NULL; | ||
} | ||
|
||
memcpy(distribution->buckets, d->buckets, d->num_buckets * sizeof(bucket_t)); | ||
|
||
pthread_mutex_init(&distribution->mutex, NULL); | ||
|
||
pthread_mutex_unlock(&d->mutex); | ||
|
||
return distribution; | ||
} | ||
|
||
void distribution_destroy(distribution_t *d) { | ||
if (d == NULL) { | ||
return; | ||
} | ||
|
||
pthread_mutex_destroy(&d->mutex); | ||
free(d->buckets); | ||
free(d); | ||
} |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,94 @@ | ||
/** | ||
* collectd - src/daemon/distribution.h | ||
* Copyright (C) 2020 Google LLC | ||
* | ||
* Permission is hereby granted, free of charge, to any person obtaining a | ||
* copy of this software and associated documentation files (the "Software"), | ||
* to deal in the Software without restriction, including without limitation | ||
* the rights to use, copy, modify, merge, publish, distribute, sublicense, | ||
* and/or sell copies of the Software, and to permit persons to whom the | ||
* Software is furnished to do so, subject to the following conditions: | ||
* | ||
* The above copyright notice and this permission notice shall be included in | ||
* all copies or substantial portions of the Software. | ||
* | ||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR | ||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, | ||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE | ||
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER | ||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING | ||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER | ||
* DEALINGS IN THE SOFTWARE. | ||
* | ||
* Authors: | ||
* Barbara bkjg Kaczorowska <bkjg at google.com> | ||
*/ | ||
|
||
#ifndef DISTRIBUTION_H | ||
#define DISTRIBUTION_H | ||
|
||
#include "collectd.h" | ||
|
||
struct distribution_s; | ||
typedef struct distribution_s distribution_t; | ||
|
||
/* function that create new distribution structure and initialize buckets using | ||
* linear function | ||
* it will return null if any error occurred, for example - num_buckets is zero | ||
* or OS couldn't allocate the memory - then errno will contain the error code | ||
*/ | ||
distribution_t *distribution_new_linear(size_t num_buckets, double size); | ||
/* function that create new distribution structure and initialize buckets using | ||
* exponential function | ||
* it will return null if any error occurred, for example - num_buckets is zero | ||
* or OS couldn't allocate the memory - then errno will contain the error code | ||
*/ | ||
distribution_t *distribution_new_exponential(size_t num_buckets, | ||
double initial_size, | ||
double factor); | ||
/* function that create new distribution structure and initialize buckets using | ||
* custom buckets sizes given by the user | ||
* It will return null if any error occurred, for example - num_boundaries is | ||
* less than zero or OS couldn't allocate the memory - then errno will contain | ||
* the error code There is also one case when the function can return with some | ||
* error - if the custom buckets boundaries aren't in the ascending order or | ||
* some boundaries are less than zero, then the function will return null and | ||
* will set errno to EINVAL | ||
*/ | ||
distribution_t *distribution_new_custom(size_t num_boundaries, | ||
double *custom_buckets_boundaries); | ||
|
||
/* function for updating the buckets | ||
* if the user will give the wrong argument, i.e. d will be null, then the | ||
* function will return and the errno will be set to EINVAL*/ | ||
void distribution_update(distribution_t *d, double gauge); | ||
|
||
/* function for getting the percentile | ||
* if the user will give the wrong argument, i.e. d will be null or percent | ||
* will be greater than 100.0, then the function will return NaN and the errno | ||
* will be set to EINVAL*/ | ||
double distribution_percentile(distribution_t *d, double percent); | ||
|
||
/* function that calculates average of gauges | ||
* if the user will pass the wrong argument to the function, i.e. d will be | ||
* null, then the function will return NaN and the errno will be set to EINVAL | ||
*/ | ||
double distribution_average(distribution_t *d); | ||
|
||
/* function that do the clone of distribution structure | ||
* if the user will pass the wrong argument to this function, i.e. d will be | ||
* null, then the function will return null and the errno will be set to EINVAL | ||
* there is also the possibility that the user will pass the proper argument but | ||
* the function will fail anyway, i.e. calloc will return null, then the | ||
* function will return null and the errno will be set by calloc system call | ||
*/ | ||
distribution_t *distribution_clone(distribution_t *d); | ||
|
||
/* function that do clean up and free all the memory | ||
* if the user will pass the null as an argument to this function, then the | ||
* function | ||
* will return without setting any errno like the OS do it when have to free a | ||
* null pointer */ | ||
void distribution_destroy(distribution_t *d); | ||
|
||
#endif // DISTRIBUTION_H |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
The first include here should be
"collectd.h"
. That should pull in<math.h>
and<pthread.h>
already.There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Unfortunately it doesn't work for me :( And if I replace include <math.h> and include <pthread.h> with include "collectd.h" it doesn't compile in my case