diff --git a/Makefile.am b/Makefile.am index fdc2908f53..52c079dfee 100644 --- a/Makefile.am +++ b/Makefile.am @@ -123,7 +123,8 @@ sbin_PROGRAMS = \ bin_PROGRAMS = \ collectd-nagios \ collectd-tg \ - collectdctl + collectdctl \ + distribution_benchmark endif # BUILD_WIN32 @@ -151,6 +152,7 @@ check_LTLIBRARIES = \ check_PROGRAMS = \ test_common \ + test_distribution \ test_format_graphite \ test_meta_data \ test_metric \ @@ -356,6 +358,17 @@ test_meta_data_SOURCES = \ src/testing.h test_meta_data_LDADD = libmetadata.la libplugin_mock.la +test_distribution_SOURCES = \ + src/daemon/distribution_test.c \ + src/testing.h +test_distribution_LDADD = libmetric.la libplugin_mock.la + +distribution_benchmark_SOURCES = \ + src/daemon/distribution_benchmark.c + +distribution_benchmark_LDADD = \ + libmetric.la + test_metric_SOURCES = \ src/daemon/metric_test.c \ src/testing.h @@ -426,6 +439,8 @@ libmetadata_la_SOURCES = \ src/utils/metadata/meta_data.h libmetric_la_SOURCES = \ + src/daemon/distribution.c \ + src/daemon/distribution.h \ src/daemon/metric.c \ src/daemon/metric.h libmetric_la_LIBADD = libmetadata.la $(COMMON_LIBS) diff --git a/run_benchmark.sh b/run_benchmark.sh new file mode 100755 index 0000000000..a205e7b993 --- /dev/null +++ b/run_benchmark.sh @@ -0,0 +1,6 @@ +#!/bin/bash + +for i in {1..4000..50} +do + ./distribution_benchmark $i +done diff --git a/src/daemon/distribution.c b/src/daemon/distribution.c new file mode 100644 index 0000000000..31a52aee44 --- /dev/null +++ b/src/daemon/distribution.c @@ -0,0 +1,415 @@ +/** + * collectd - src/daemon/distribution.c + * Copyright (C) 2020 Google LLC + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + * + * Authors: + * Barbara bkjg Kaczorowska + */ + +#include "distribution.h" +#include +#include + +/* maximum boundary of bucket is exclusive and minimum boundary of bucket is the + * maximum boundary of previous bucket inclusive + * special cases: minimum boundary of first bucket is 0 and maximum boundary of + * last bucket is INFINITY */ +typedef struct { + double max_boundary; + uint64_t counter; +} bucket_t; + +struct distribution_s { + bucket_t *buckets; + double sum_gauges; + size_t num_buckets; + pthread_mutex_t mutex; +}; + +double *distribution_get_buckets_boundaries(distribution_t *d) { + if (d == NULL) { + errno = EINVAL; + return NULL; + } + + double *boundaries = calloc(d->num_buckets, sizeof(double)); + + if (boundaries == NULL) { + return NULL; + } + + /* boundaries won't change, so we don't have to lock the mutex */ + for (size_t i = 0; i < d->num_buckets; ++i) { + boundaries[i] = d->buckets[i].max_boundary; + } + + return boundaries; +} + +uint64_t *distribution_get_buckets_counters(distribution_t *d) { + if (d == NULL) { + errno = EINVAL; + return NULL; + } + + uint64_t *counters = calloc(d->num_buckets, sizeof(uint64_t)); + + if (counters == NULL) { + return NULL; + } + + pthread_mutex_lock(&d->mutex); + for (size_t i = 0; i < d->num_buckets; ++i) { + counters[i] = d->buckets[i].counter; + } + pthread_mutex_unlock(&d->mutex); + + return counters; +} + +size_t distribution_get_num_buckets(distribution_t *d) { + if (d == NULL) { + errno = EINVAL; + return 0; + } + + return d->num_buckets; +} + +double distribution_get_sum_gauges(distribution_t *d) { + if (d == NULL) { + errno = EINVAL; + return NAN; + } + + return d->sum_gauges; +} + +bool distribution_check_equal(distribution_t *d1, distribution_t *d2) { + if ((d1 == NULL && d2 != NULL) || (d1 != NULL && d2 == NULL)) { + return false; + } + + /* thanks to previous condition we know that if d1 is NULL then d2 is NULL too + */ + if (d1 == NULL) { + return true; + } + + pthread_mutex_lock(&d1->mutex); + pthread_mutex_lock(&d2->mutex); + + if (d1->sum_gauges != d2->sum_gauges) { + pthread_mutex_unlock(&d2->mutex); + pthread_mutex_unlock(&d1->mutex); + return false; + } + + if (d1->num_buckets != d2->num_buckets) { + pthread_mutex_unlock(&d2->mutex); + pthread_mutex_unlock(&d1->mutex); + return false; + } + + for (size_t i = 0; i < d1->num_buckets; ++i) { + if (d1->buckets[i].max_boundary != d2->buckets[i].max_boundary || + d1->buckets[i].counter != d2->buckets[i].counter) { + pthread_mutex_unlock(&d2->mutex); + pthread_mutex_unlock(&d1->mutex); + return false; + } + } + + pthread_mutex_unlock(&d2->mutex); + pthread_mutex_unlock(&d1->mutex); + + return true; +} + +static bucket_t *bucket_new_linear(size_t num_buckets, double size) { + bucket_t *buckets = calloc(num_buckets, sizeof(bucket_t)); + + if (buckets == NULL) { + return NULL; + } + + for (size_t i = 0; i < num_buckets - 1; ++i) { + buckets[i].max_boundary = (double)(i + 1) * size; + } + + buckets[num_buckets - 1].max_boundary = INFINITY; + + return buckets; +} + +static bucket_t *bucket_new_exponential(size_t num_buckets, double base, + double factor) { + bucket_t *buckets = calloc(num_buckets, sizeof(bucket_t)); + + if (buckets == NULL) { + return NULL; + } + + double multiplier = 1.0; + + for (size_t i = 0; i < num_buckets - 1; ++i) { + buckets[i].max_boundary = factor * multiplier; + multiplier *= base; + } + + buckets[num_buckets - 1].max_boundary = INFINITY; + + return buckets; +} + +static bucket_t *bucket_new_custom(size_t num_boundaries, + const double *custom_buckets_boundaries) { + if (num_boundaries > 0) { + if (custom_buckets_boundaries[0] <= 0 || + custom_buckets_boundaries[0] == INFINITY) { + errno = EINVAL; + return NULL; + } + + for (size_t i = 1; i < num_boundaries; ++i) { + if (custom_buckets_boundaries[i] <= 0 || + custom_buckets_boundaries[i] == INFINITY || + custom_buckets_boundaries[i - 1] >= custom_buckets_boundaries[i]) { + errno = EINVAL; + return NULL; + } + } + } + + bucket_t *buckets = calloc(num_boundaries + 1, sizeof(bucket_t)); + + if (buckets == NULL) { + return NULL; + } + + for (size_t i = 0; i < num_boundaries; ++i) { + buckets[i].max_boundary = custom_buckets_boundaries[i]; + } + buckets[num_boundaries].max_boundary = INFINITY; + + return buckets; +} + +distribution_t *distribution_new_linear(size_t num_buckets, double size) { + if (num_buckets == 0 || size <= 0) { + errno = EINVAL; + return NULL; + } + + distribution_t *d = calloc(1, sizeof(distribution_t)); + + if (d == NULL) { + return NULL; + } + + d->buckets = bucket_new_linear(num_buckets, size); + + if (d->buckets == NULL) { + free(d); + return NULL; + } + + d->num_buckets = num_buckets; + pthread_mutex_init(&d->mutex, NULL); + + return d; +} + +distribution_t *distribution_new_exponential(size_t num_buckets, double base, + double factor) { + if (num_buckets == 0 || base <= 1 || factor <= 0) { + errno = EINVAL; + return NULL; + } + + distribution_t *d = calloc(1, sizeof(distribution_t)); + + if (d == NULL) { + return NULL; + } + + /* as in distribution_new_linear: it would be nice to check if base + * and factor are greater than zero, for consideration: one of them also + * greater than one */ + d->buckets = bucket_new_exponential(num_buckets, base, factor); + + if (d->buckets == NULL) { + free(d); + return NULL; + } + + d->num_buckets = num_buckets; + pthread_mutex_init(&d->mutex, NULL); + + return d; +} + +distribution_t *distribution_new_custom(size_t num_boundaries, + double *custom_buckets_boundaries) { + distribution_t *d = calloc(1, sizeof(distribution_t)); + + if (d == NULL) { + return NULL; + } + + d->buckets = bucket_new_custom(num_boundaries, custom_buckets_boundaries); + + if (d->buckets == NULL) { + free(d); + return NULL; + } + + d->num_buckets = num_boundaries + 1; + pthread_mutex_init(&d->mutex, NULL); + + return d; +} + +static void bucket_update(bucket_t *buckets, size_t num_buckets, double gauge) { + int idx = (int)num_buckets - 1; + + while (idx >= 0 && buckets[idx].max_boundary > gauge) { + buckets[idx].counter++; + idx--; + } +} + +int distribution_update(distribution_t *d, double gauge) { + if (d == NULL || gauge < 0) { + errno = EINVAL; + return EXIT_FAILURE; + } + + pthread_mutex_lock(&d->mutex); + + bucket_update(d->buckets, d->num_buckets, gauge); + + d->sum_gauges += gauge; + pthread_mutex_unlock(&d->mutex); + + return EXIT_SUCCESS; +} + +static double find_percentile(bucket_t *buckets, size_t num_buckets, + uint64_t quantity) { + size_t left = 0; + size_t right = num_buckets - 1; + size_t middle; + + while (left < right) { + middle = (left + right) / 2; + + if (buckets[middle].counter >= quantity) { + right = middle; + } else { + left = middle + 1; + } + } + + return buckets[left].max_boundary; +} + +double distribution_percentile(distribution_t *d, double percent) { + if (d == NULL || percent > 100.0 || percent < 0) { + errno = EINVAL; + return NAN; + } + + pthread_mutex_lock(&d->mutex); + + uint64_t quantity = (uint64_t)( + (percent / 100.0) * (double)d->buckets[d->num_buckets - 1].counter); + + percent = find_percentile(d->buckets, d->num_buckets, quantity); + + pthread_mutex_unlock(&d->mutex); + return percent; +} + +double distribution_average(distribution_t *d) { + if (d == NULL) { + errno = EINVAL; + return NAN; + } + + pthread_mutex_lock(&d->mutex); + + if (d->buckets[d->num_buckets - 1].counter == 0) { + pthread_mutex_unlock(&d->mutex); + return NAN; + } + + double average = + d->sum_gauges / (double)d->buckets[d->num_buckets - 1].counter; + + pthread_mutex_unlock(&d->mutex); + + return average; +} + +distribution_t *distribution_clone(distribution_t *d) { + if (d == NULL) { + errno = EINVAL; + return NULL; + } + + distribution_t *distribution = calloc(1, sizeof(distribution_t)); + + if (distribution == NULL) { + return NULL; + } + + pthread_mutex_lock(&d->mutex); + + distribution->sum_gauges = d->sum_gauges; + distribution->num_buckets = d->num_buckets; + + distribution->buckets = calloc(d->num_buckets, sizeof(bucket_t)); + + if (distribution->buckets == NULL) { + free(distribution); + pthread_mutex_unlock(&d->mutex); + return NULL; + } + + memcpy(distribution->buckets, d->buckets, d->num_buckets * sizeof(bucket_t)); + + pthread_mutex_init(&distribution->mutex, NULL); + + pthread_mutex_unlock(&d->mutex); + + return distribution; +} + +void distribution_destroy(distribution_t *d) { + if (d == NULL) { + return; + } + + pthread_mutex_destroy(&d->mutex); + free(d->buckets); + free(d); +} diff --git a/src/daemon/distribution.h b/src/daemon/distribution.h new file mode 100644 index 0000000000..55e4b3259b --- /dev/null +++ b/src/daemon/distribution.h @@ -0,0 +1,267 @@ +/** + * collectd - src/daemon/distribution.h + * Copyright (C) 2020 Google LLC + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + * + * Authors: + * Barbara bkjg Kaczorowska + */ + +#ifndef DISTRIBUTION_H +#define DISTRIBUTION_H + +#include "collectd.h" + +struct distribution_s; +typedef struct distribution_s distribution_t; + +/* + * NAME + * distribution_new_linear + * + * DESCRIPTION + * Creates new distribution structure and initialize buckets using linear + * function. + * + * ARGUMENTS + * num_buckets Number of the buckets in the distribution. + * size Size of every bucket. + * + * RETURN VALUE + * Returns pointer to the new distribution_t upon success or NULL if an error + * occurred. Error could occurred when num_buckets was zero or if OS couldn't + * allocate the memory - then errno will contain the error code. + */ +distribution_t *distribution_new_linear(size_t num_buckets, double size); +/* + * NAME + * distribution_new_exponential + * + * DESCRIPTION + * Creates new distribution structure and initialize buckets using exponential + * function. + * + * ARGUMENTS + * num_buckets Number of the buckets in the distribution. + * base Base for calculating the size of bucket. + * factor Multiplier for calculating the size of bucket. + * + * RETURN VALUE + * Returns pointer to the new distribution_t upon success or NULL if an error + * occurred. Error could occurred when num_buckets was zero or if OS couldn't + * allocate the memory - then errno will contain the error code. + */ +distribution_t *distribution_new_exponential(size_t num_buckets, double base, + double factor); +/* + * NAME + * distribution_new_custom + * + * DESCRIPTION + * Creates new distribution structure and initialize buckets using custom + * buckets boundaries given by the user + * + * ARGUMENTS + * num_buckets Number of the buckets in the distribution. + * custom_buckets_boundaries Boundaries of buckets in ascending order. + * + * RETURN VALUE + * Returns pointer to the new distribution_t upon success or NULL if an error + * occurred. Error could occurred when num_buckets was zero or if OS couldn't + * allocate the memory - then errno will contain the error code. There is also + * one case when the function can return with some error - if the custom buckets + * boundaries aren't in the ascending order or some boundaries are less than + * zero or are equal to INFINITY, then the function will return null and will + * set errno to EINVAL. + */ +distribution_t *distribution_new_custom(size_t num_boundaries, + double *custom_buckets_boundaries); +/* + * NAME + * distribution_update + * + * DESCRIPTION + * Updates the given distribution with provided gauge. Increase the counter of + * the proper bucket for given gauge. + * + * ARGUMENTS + * d Pointer to the distribution. + * gauge Value of gauge. + * + * RETURN VALUE + * Returns zero upon success or -1 if an error occurred. Error could occurred + * when the user will give the wrong argument, i.e. d will be null, then the + * function will return -1 and the errno will be set to EINVAL. + */ +int distribution_update(distribution_t *d, double gauge); +/* + * NAME + * distribution_percentile + * + * DESCRIPTION + * Calculates the percentile for the given distribution and provided percent. + * + * ARGUMENTS + * d Pointer to the distribution. + * percent Percentile that the function should return. + * + * RETURN VALUE + * Returns the maximum boundary of the bucket in which should be the percent + * given as an argument upon success or NAN if an error occurred. Error could + * occurred when the user will give the wrong argument, i.e. d will be null or + * percent will be greater than 100.0, then the function will return NaN and the + * errno will be set to EINVAL. + */ +double distribution_percentile(distribution_t *d, double percent); +/* + * NAME + * distribution_average + * + * DESCRIPTION + * Calculates average of gauges in the given distribution. + * + * ARGUMENTS + * d Pointer to the distribution. + * + * RETURN VALUE + * Returns the average of all gauges in the given distribution upon success or + * NAN if an error occurred. Error could occurred when the user will pass the + * wrong argument to the function, i.e. d will be null, then the function will + * return NaN and the errno will be set to EINVAL or there were zero updates in + * this distribution. + */ +double distribution_average(distribution_t *d); +/* + * NAME + * distribution_clone + * + * DESCRIPTION + * Clones the distribution given as an argument. + * + * ARGUMENTS + * d Pointer to the distribution. + * + * RETURN VALUE + * Returns the pointer to the cloned distribution upon success or + * NULL if an error occurred. Error could occurred when the user will pass the + * wrong argument to this function, i.e. d will be null, then the function will + * return null and the errno will be set to EINVAL there is also the possibility + * that the user will pass the proper argument but the function will fail + * anyway, i.e. calloc will return null, then the function will return null and + * the errno will be set by calloc. + */ +distribution_t *distribution_clone(distribution_t *d); +/* + * NAME + * distribution_destroy + * + * DESCRIPTION + * Cleans up and frees all the memory + * + * ARGUMENTS + * d Pointer to the distribution. + */ +void distribution_destroy(distribution_t *d); +/* + * NAME + * distribution_check_equal + * + * DESCRIPTION + * Checks if the distributions given as arguments are equal. + * + * ARGUMENTS + * d1 Pointer to the first distribution. + * d2 Pointer to the second distribution. + * + * RETURN VALUE + * Returns the true if the distributions given as arguments are equal + * or false if they aren't. + */ +bool distribution_check_equal(distribution_t *d1, distribution_t *d2); +/* + * NAME + * distribution_get_buckets_boundaries + * + * DESCRIPTION + * Gets buckets' boundaries. + * + * ARGUMENTS + * d Pointer to the distribution. + * + * RETURN VALUE + * Returns the pointer to the array with boundaries of buckets upon success or + * NULL if an error occurred. Error could occurred when the user gave the NULL + * as an argument, then function will return NULL and set errno to EINVAL or if + * calling calloc inside function failed. Then errno will be set by calloc and + * NULL be returned. + */ +double *distribution_get_buckets_boundaries(distribution_t *d); +/* + * NAME + * distribution_get_buckets_counters + * + * DESCRIPTION + * Gets buckets' counters. + * + * ARGUMENTS + * d Pointer to the distribution. + * + * RETURN VALUE + * Returns the pointer to the array with counters of buckets upon success or + * NULL if an error occurred. Error could occurred when the user gave the NULL + * as an argument, then function will return NULL and set errno to EINVAL or if + * calling calloc inside function failed. Then errno will be set by calloc and + * NULL be returned. + */ +uint64_t *distribution_get_buckets_counters(distribution_t *d); +/* + * NAME + * distribution_get_num_buckets + * + * DESCRIPTION + * Gets number of buckets in the distribution. + * + * ARGUMENTS + * d Pointer to the distribution. + * + * RETURN VALUE + * Returns the number of buckets in the distribution upon success or 0 + * if an error occurred. Error could occurred when the user gave the NULL as an + * argument, then function will return 0. + */ +size_t distribution_get_num_buckets(distribution_t *d); +/* + * NAME + * distribution_get_sum_gauges + * + * DESCRIPTION + * Gets sum of gauges in the distribution. + * + * ARGUMENTS + * d Pointer to the distribution. + * + * RETURN VALUE + * Returns the sum of gauges in the distribution upon success or 0 + * if an error occurred. Error could occurred when the user gave the NULL as an + * argument, then function will return 0. + */ +double distribution_get_sum_gauges(distribution_t *d); + +#endif // DISTRIBUTION_H diff --git a/src/daemon/distribution_benchmark.c b/src/daemon/distribution_benchmark.c new file mode 100644 index 0000000000..551dceaecd --- /dev/null +++ b/src/daemon/distribution_benchmark.c @@ -0,0 +1,197 @@ +#include "collectd.h" +#include "distribution.h" + +#define MAX_SIZE 1000000 +#define LINEAR_DISTRIBUTION 0 +#define EXPONENTIAL_DISTRIBUTION 1 +#define CUSTOM_DISTRIBUTION 2 + +double updates[MAX_SIZE]; +double percents[MAX_SIZE]; +/* variable to which we save the return value from the measured functions to + * make sure that compiler won't delete these lines */ +volatile double result; + +void run_distribution_mixed(distribution_t *d, int max_size, + uint64_t *elapsed_time) { + if (d == NULL || elapsed_time == NULL) { + return; + } + + struct timespec start, end; + + for (int i = 0; i < max_size; ++i) { + for (int j = 0; j < 9; ++j) { + updates[i * 9 + j] = (rand() / (double)RAND_MAX) + (rand() % (int)1e6); + } + + percents[i] = (rand() / (double)RAND_MAX) + (rand() % 100); + } + + clock_gettime(CLOCK_MONOTONIC, &start); + for (int j = 0; j < max_size; ++j) { + for (int i = 0; i < 9; ++i) { + result = distribution_update(d, updates[j * 9 + i]); + } + result = distribution_percentile(d, percents[j]); + } + clock_gettime(CLOCK_MONOTONIC, &end); + + *elapsed_time += (uint64_t)1e9 * (end.tv_sec - start.tv_sec) + + (end.tv_nsec - start.tv_nsec); +} + +void run_distribution_update(distribution_t *d, int max_size, + uint64_t *elapsed_time) { + if (d == NULL || elapsed_time == NULL) { + return; + } + + struct timespec start, end; + + for (int i = 0; i < max_size; ++i) { + updates[i] = (rand() / (double)RAND_MAX) + (rand() % (int)1e6); + } + + clock_gettime(CLOCK_MONOTONIC, &start); + for (int i = 0; i < max_size; ++i) { + result = distribution_update(d, updates[i]); + } + clock_gettime(CLOCK_MONOTONIC, &end); + + *elapsed_time += (uint64_t)1e9 * (end.tv_sec - start.tv_sec) + + (end.tv_nsec - start.tv_nsec); +} + +void run_distribution_percentile(distribution_t *d, int max_size, + uint64_t *elapsed_time) { + if (d == NULL || elapsed_time == NULL) { + return; + } + + struct timespec start, end; + + for (int i = 0; i < max_size; ++i) { + percents[i] = (rand() / (double)RAND_MAX) + (rand() % 100); + } + + clock_gettime(CLOCK_MONOTONIC, &start); + for (int i = 0; i < max_size; ++i) { + result = distribution_percentile(d, percents[i]); + } + clock_gettime(CLOCK_MONOTONIC, &end); + + *elapsed_time += (uint64_t)1e9 * (end.tv_sec - start.tv_sec) + + (end.tv_nsec - start.tv_nsec); +} + +int main(int argc, char *argv[]) { + static const int MAX_TURNS = 1; + static const int NUM_DISTS = 3; + + uint64_t elapsed_time_update = 0; + uint64_t elapsed_time_percentile = 0; + uint64_t elapsed_time_mixed = 0; + + if (argc < 2) { + fprintf(stderr, "Usage %s NUM_BUCKETS\n", argv[0]); + exit(EXIT_FAILURE); + } + + int num_buckets = atoi(argv[1]); + + if (num_buckets <= 0) { + fprintf(stderr, "num_buckets have to be greater than zero!\n"); + fprintf(stderr, "Usage %s NUM_BUCKETS\n", argv[0]); + exit(EXIT_FAILURE); + } + + /* if num_buckets will be 1, then calloc can return a null or a valid pointer + * that can be freed however it won't be any issue with that because the + * distribution_new_custom function can handle that */ + double *custom_values = calloc(num_buckets - 1, sizeof(double)); + + srand(time(NULL)); + double prev = 0; + for (int i = 0; i < num_buckets - 1; ++i) { + custom_values[i] = + (rand() / (double)RAND_MAX) + (rand() % (int)1e6 + prev) + 1; + prev = custom_values[i]; + } + + distribution_t *dists[NUM_DISTS]; + dists[0] = distribution_new_linear(num_buckets, 124.543); + dists[1] = distribution_new_exponential(num_buckets, 1.24543, 9); + dists[2] = distribution_new_custom(num_buckets - 1, custom_values); + + for (int i = 0; i < NUM_DISTS; ++i) { + if (dists[i] == NULL) { + fprintf(stderr, "Creating distribution failed! Exited!\n"); + + for (int j = 0; j < i; ++j) { + distribution_destroy(dists[j]); + } + + for (int j = i + 1; j < NUM_DISTS; ++j) { + distribution_destroy(dists[j]); + } + + free(custom_values); + exit(EXIT_FAILURE); + } + } + + for (int i = 0; i < MAX_TURNS; ++i) { + run_distribution_update(dists[LINEAR_DISTRIBUTION], MAX_SIZE, + &elapsed_time_update); + run_distribution_percentile(dists[LINEAR_DISTRIBUTION], MAX_SIZE, + &elapsed_time_percentile); + run_distribution_mixed(dists[LINEAR_DISTRIBUTION], (MAX_SIZE / 10), + &elapsed_time_mixed); + } + + printf("%d,%lf,%lf,%lf,", num_buckets, (double)elapsed_time_update / 1e9, + (double)elapsed_time_percentile / 1e9, + (double)elapsed_time_mixed / 1e9); + + elapsed_time_update = 0; + elapsed_time_percentile = 0; + elapsed_time_mixed = 0; + + for (int i = 0; i < MAX_TURNS; ++i) { + run_distribution_update(dists[EXPONENTIAL_DISTRIBUTION], MAX_SIZE, + &elapsed_time_update); + run_distribution_percentile(dists[EXPONENTIAL_DISTRIBUTION], MAX_SIZE, + &elapsed_time_percentile); + run_distribution_mixed(dists[EXPONENTIAL_DISTRIBUTION], (MAX_SIZE / 10), + &elapsed_time_mixed); + } + + printf("%d,%lf,%lf,%lf,", num_buckets, (double)elapsed_time_update / 1e9, + (double)elapsed_time_percentile / 1e9, + (double)elapsed_time_mixed / 1e9); + + elapsed_time_update = 0; + elapsed_time_percentile = 0; + elapsed_time_mixed = 0; + + for (int i = 0; i < MAX_TURNS; ++i) { + run_distribution_update(dists[CUSTOM_DISTRIBUTION], MAX_SIZE, + &elapsed_time_update); + run_distribution_percentile(dists[CUSTOM_DISTRIBUTION], MAX_SIZE, + &elapsed_time_percentile); + run_distribution_mixed(dists[CUSTOM_DISTRIBUTION], (MAX_SIZE / 10), + &elapsed_time_mixed); + } + + printf("%d,%lf,%lf,%lf\n", num_buckets, (double)elapsed_time_update / 1e9, + (double)elapsed_time_percentile / 1e9, + (double)elapsed_time_mixed / 1e9); + + for (int i = 0; i < NUM_DISTS; ++i) { + distribution_destroy(dists[i]); + } + + free(custom_values); + return 0; +} diff --git a/src/daemon/distribution_test.c b/src/daemon/distribution_test.c new file mode 100644 index 0000000000..65b1f66cf0 --- /dev/null +++ b/src/daemon/distribution_test.c @@ -0,0 +1,1067 @@ +#include "collectd.h" +#include "distribution.h" +#include "testing.h" + +#include + +/* TODO(bkjg): add checking if sum of gauges is equal to zero in constructor + * functions :) */ +double *array_new_linear(size_t size, double diff) { + if (size == 0 || diff <= 0) { + return NULL; + } + + double *arr = calloc(size, sizeof(double)); + + if (arr == NULL) { + return NULL; + } + + for (size_t i = 0; i < size - 1; ++i) { + arr[i] = (double)(i + 1) * diff; + } + + arr[size - 1] = INFINITY; + + return arr; +} + +double *array_new_exponential(size_t size, double base, double factor) { + double *arr = calloc(size, sizeof(double)); + + if (arr == NULL) { + return NULL; + } + + arr[0] = factor; + for (size_t i = 1; i < size - 1; ++i) { + arr[i] = arr[i - 1] * base; + } + + arr[size - 1] = INFINITY; + + return arr; +} + +DEF_TEST(distribution_new_linear) { + struct { + size_t num_buckets; + double size; + int want_err; + double *want_get; + } cases[] = { + { + .num_buckets = 0, + .want_err = EINVAL, + .want_get = NULL, + }, + { + .num_buckets = 10, + .size = -5, + .want_err = EINVAL, + .want_get = NULL, + }, + {.num_buckets = 8, .size = 0, .want_err = EINVAL, .want_get = NULL}, + { + .num_buckets = 10, + .size = 2, + .want_get = array_new_linear(10, 2), + }, + { + .num_buckets = 20, + .size = 0.67, + .want_get = array_new_linear(20, 0.67), + }, + { + .num_buckets = 48, + .size = 8.259, + .want_get = array_new_linear(48, 8.259), + }}; + + for (size_t i = 0; i < (sizeof(cases) / sizeof(cases[0])); ++i) { + printf("## Case %zu:\n", i); + + distribution_t *d = + distribution_new_linear(cases[i].num_buckets, cases[i].size); + + if (cases[i].want_get == NULL) { + EXPECT_EQ_PTR(cases[i].want_get, d); + + if (cases[i].want_err != 0) { + EXPECT_EQ_INT(cases[i].want_err, errno); + } + } else { + CHECK_NOT_NULL(d); + EXPECT_EQ_UINT64(cases[i].num_buckets, distribution_get_num_buckets(d)); + + double *boundaries = distribution_get_buckets_boundaries(d); + uint64_t *counters = distribution_get_buckets_counters(d); + + for (size_t j = 0; j < cases[i].num_buckets; ++j) { + EXPECT_EQ_DOUBLE(cases[i].want_get[j], boundaries[j]); + EXPECT_EQ_UINT64(0, counters[j]); + } + + EXPECT_EQ_DOUBLE(0, distribution_get_sum_gauges(d)); + free(boundaries); + free(counters); + } + + free(cases[i].want_get); + distribution_destroy(d); + } + + return 0; +} + +DEF_TEST(distribution_new_exponential) { + struct { + size_t num_buckets; + double base; + double factor; + int want_err; + double *want_get; + } cases[] = { + { + .num_buckets = 0, + .want_err = EINVAL, + .want_get = NULL, + }, + { + .num_buckets = 10, + .base = -52, + .want_err = EINVAL, + .want_get = NULL, + }, + {.num_buckets = 8, .base = 0, .want_err = EINVAL, .want_get = NULL}, + {.num_buckets = 12, + .base = 2, + .factor = 0, + .want_err = EINVAL, + .want_get = NULL}, + {.num_buckets = 33, + .base = 7, + .factor = -5, + .want_err = EINVAL, + .want_get = NULL}, + {.num_buckets = 75, + .base = 0.7, + .factor = 45, + .want_err = EINVAL, + .want_get = NULL}, + {.num_buckets = 106, + .base = 1, + .factor = 5, + .want_err = EINVAL, + .want_get = NULL}, + { + .num_buckets = 6, + .base = 2, + .factor = 3, + .want_get = array_new_exponential(6, 2, 3), + }, + { + .num_buckets = 10, + .base = 5, + .factor = 6.75, + .want_get = array_new_exponential(10, 5, 6.75), + }, + { + .num_buckets = 26, + .base = 1.01, + .factor = 4.64, + .want_get = array_new_exponential(26, 1.01, 4.64), + }}; + + for (size_t i = 0; i < (sizeof(cases) / sizeof(cases[0])); ++i) { + printf("## Case %zu:\n", i); + + distribution_t *d = distribution_new_exponential( + cases[i].num_buckets, cases[i].base, cases[i].factor); + + if (cases[i].want_get == NULL) { + EXPECT_EQ_PTR(cases[i].want_get, d); + + if (cases[i].want_err != 0) { + EXPECT_EQ_INT(cases[i].want_err, errno); + } + } else { + CHECK_NOT_NULL(d); + + EXPECT_EQ_UINT64(cases[i].num_buckets, distribution_get_num_buckets(d)); + double *boundaries = distribution_get_buckets_boundaries(d); + uint64_t *counters = distribution_get_buckets_counters(d); + + for (size_t j = 0; j < cases[i].num_buckets; ++j) { + EXPECT_EQ_DOUBLE(cases[i].want_get[j], boundaries[j]); + EXPECT_EQ_UINT64(0, counters[j]); + } + + EXPECT_EQ_DOUBLE(0, distribution_get_sum_gauges(d)); + free(boundaries); + free(counters); + } + + free(cases[i].want_get); + distribution_destroy(d); + } + return 0; +} + +DEF_TEST(distribution_new_custom) { + struct { + size_t num_boundaries; + double *given_boundaries; + int want_err; + double *want_get; + } cases[] = { + { + .num_boundaries = 0, + .want_get = (double[]){INFINITY}, + }, + {.num_boundaries = 5, + .given_boundaries = (double[]){1.6, 9.54, 56.23, 698.90, INFINITY}, + .want_err = EINVAL, + .want_get = NULL}, + { + .num_boundaries = 5, + .given_boundaries = (double[]){5, 4, 6, 7, 8}, + .want_err = EINVAL, + .want_get = NULL, + }, + {.num_boundaries = 4, + .given_boundaries = (double[]){-2, 4, 5, 6}, + .want_err = EINVAL, + .want_get = NULL}, + {.num_boundaries = 7, + .given_boundaries = + (double[]){1.23, 4.76, 6.324, 8.324, 9.342, 16.4234, 90.4234}, + .want_get = (double[]){1.23, 4.76, 6.324, 8.324, 9.342, 16.4234, 90.4234, + INFINITY}}, + }; + + for (size_t i = 0; i < (sizeof(cases) / sizeof(cases[0])); ++i) { + printf("## Case %zu:\n", i); + + distribution_t *d = distribution_new_custom(cases[i].num_boundaries, + cases[i].given_boundaries); + + if (cases[i].want_get == NULL) { + EXPECT_EQ_PTR(cases[i].want_get, d); + + if (cases[i].want_err != 0) { + EXPECT_EQ_INT(cases[i].want_err, errno); + } + } else { + CHECK_NOT_NULL(d); + EXPECT_EQ_UINT64(cases[i].num_boundaries + 1, + distribution_get_num_buckets(d)); + double *boundaries = distribution_get_buckets_boundaries(d); + uint64_t *counters = distribution_get_buckets_counters(d); + + for (size_t j = 0; j < cases[i].num_boundaries + 1; ++j) { + EXPECT_EQ_DOUBLE(cases[i].want_get[j], boundaries[j]); + EXPECT_EQ_UINT64(0, counters[j]); + } + + EXPECT_EQ_DOUBLE(0, distribution_get_sum_gauges(d)); + free(boundaries); + free(counters); + } + + distribution_destroy(d); + } + + return 0; +} + +DEF_TEST(distribution_update) { + distribution_t *dist_test1 = distribution_new_linear(15, 34.834); + distribution_t *dist_test2 = distribution_new_exponential(25, 1.673, 6.8); + distribution_t *dist_test3 = distribution_new_custom( + 18, (double[]){1, 5, 25, 125, 625, 1000, 1001, 1005, 1025, 1125, 1625, + 2000, 2001, 2005, 2025, 2125, 2625, 3000}); + struct { + size_t num_buckets; + uint64_t *counters; + distribution_t *input_dist; + bool input_dist_is_null; + double *gauges; + int num_queries; + int *status_codes; + int *want_err; + double want_sum; + } cases[] = { + { + .num_buckets = 0, + .num_queries = 1, + .input_dist = NULL, + .input_dist_is_null = true, + .status_codes = (int[]){EXIT_FAILURE}, + .want_err = (int[]){EINVAL}, + .gauges = (double[]){54.6}, + .want_sum = NAN, + }, + { + .num_buckets = 15, + .input_dist = dist_test1, + .num_queries = 16, + .gauges = (double[]){5, 1, 6.74, 23.54, 52.6435, 23.523, 6554.534, + 87.543, 135.34, 280.43, 100.624, 40.465, -78.213, + -90.423, -1423.423, -9.432}, + .status_codes = + (int[]){0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, EXIT_FAILURE, + EXIT_FAILURE, EXIT_FAILURE, EXIT_FAILURE}, + .want_err = (int[]){0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, EINVAL, + EINVAL, EINVAL, EINVAL}, + .want_sum = 7311.3825, + .counters = (uint64_t[]){5, 7, 9, 10, 10, 10, 10, 10, 11, 11, 11, 11, + 11, 11, 12}, + }, + { + .num_buckets = 25, + .input_dist = dist_test2, + .gauges = (double[]){10.45, 26.43, 98.84, + 1067.27, 905.326, 46.7242, + 205.653, 542.876, 24543.543, + 35262.6345, 64262.24624, 8753.635, + 26.264, 675645.346346, 764.436, + 2345.56, 23456.789, 65543.6456, + 10583.72023, 896496.6532, 738563.5723, + 23562.534652, 5325927.253234, 5237452.523523, + 462383.25235, 7936583.3734475, 9000, + 6.8, 11.3764, 19.0327172}, + .num_queries = 30, + .want_sum = 21544097.760940, + .status_codes = + (int[]){0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, + .want_err = (int[]){0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, + .counters = + (uint64_t[]){0, 2, 3, 6, 7, 7, 8, 9, 9, 10, 13, 13, 14, + 14, 16, 17, 20, 21, 23, 23, 23, 23, 24, 27, 30}, + }, + { + .num_buckets = 19, + .input_dist = dist_test3, + .gauges = (double[]){1, 5.43, 6.42626, 625, 625.1, 624.999999, 1000, + 999.999999, 0, 999999, 1001, -1}, + .status_codes = + (int[]){0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, EXIT_FAILURE}, + .want_err = (int[]){0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, EINVAL}, + .num_queries = 12, + .want_sum = 1004887.956258, + .counters = (uint64_t[]){1, 2, 4, 4, 5, 8, 9, 10, 10, 10, 10, 10, 10, + 10, 10, 10, 10, 10, 11}, + }, + }; + + for (size_t i = 0; i < (sizeof(cases) / sizeof(cases[0])); ++i) { + printf("## Case %zu:\n", i); + + if (cases[i].input_dist_is_null == false) { + CHECK_NOT_NULL(cases[i].input_dist); + } + + for (int j = 0; j < cases[i].num_queries; ++j) { + EXPECT_EQ_INT( + cases[i].status_codes[j], + distribution_update(cases[i].input_dist, cases[i].gauges[j])); + + if (cases[i].want_err[j] != 0) { + EXPECT_EQ_INT(cases[i].want_err[j], errno); + } + } + + EXPECT_EQ_UINT64(cases[i].num_buckets, + distribution_get_num_buckets(cases[i].input_dist)); + + uint64_t *counters = distribution_get_buckets_counters(cases[i].input_dist); + + if (counters != NULL) { + for (size_t j = 0; j < cases[i].num_buckets; ++j) { + EXPECT_EQ_UINT64(cases[i].counters[j], counters[j]); + } + free(counters); + } + + static const int MAX_BUFFER = 256; + char buffer[MAX_BUFFER]; + double sum = distribution_get_sum_gauges(cases[i].input_dist); + + snprintf(buffer, MAX_BUFFER, "%.6lf", sum); + sscanf(buffer, "%lf", &sum); + + EXPECT_EQ_DOUBLE(cases[i].want_sum, sum); + + distribution_destroy(cases[i].input_dist); + } + + return 0; +} + +DEF_TEST(distribution_clone) { + distribution_t *dist_test1 = distribution_new_linear(15, 63.734); + distribution_t *dist_test2 = distribution_new_exponential(9, 2.27, 21); + + struct { + distribution_t *input_dist; + bool input_dist_is_null; + int num_queries; + double *gauges; + int want_err; + int want_null; + } cases[] = { + { + .input_dist = NULL, + .input_dist_is_null = true, + .want_err = EINVAL, + .want_null = 1, + }, + { + .input_dist = dist_test1, + .num_queries = 45, + .gauges = + (double[]){ + 401.786335, 428.364577, 739.931567, 537.877521, 393.784363, + 499.047904, 194.578455, 454.562191, 78.147954, 105.139291, + 549.147894, 101.646588, 278.557795, 334.998973, 771.196134, + 223.185497, 535.803708, 855.076769, 543.160251, 830.504115, + 52.544655, 698.238304, 122.609439, 43.863821, 557.741052, + 506.076551, 435.950159, 700.606309, 523.611113, 581.227883, + 34.957304, 345.55901, 899.636021, 933.738096, 0.165581, + 180.74248, 884.141168, 633.300635, 864.866767, 149.111413, + 313.099403, 307.779694, 844.226904, 622.521474, 569.599438}, + }, + { + .input_dist = dist_test2, + .num_queries = 28, + .gauges = + (double[]){ + 4712.37512, 11220.360862, 6010.987436, 25040.804706, + 1208.025605, 21976.834303, 25464.390888, 5042.098611, + 11671.433834, 33330.679342, 32864.766883, 26837.13805, + 17829.808981, 19009.054978, 7263.350883, 21006.620631, + 15666.541598, 15607.779603, 4561.255709, 10624.390723, + 15794.977271, 21377.752258, 447.526797, 5436.961247, + 14718.381958, 6941.786583, 31236.108615, 11542.587007}, + }, + }; + + for (size_t i = 0; i < (sizeof(cases) / sizeof(cases[0])); ++i) { + printf("## Case %zu:\n", i); + + if (cases[i].input_dist_is_null == false) { + CHECK_NOT_NULL(cases[i].input_dist); + } + + for (int j = 0; j < cases[i].num_queries; ++j) { + distribution_update(cases[i].input_dist, cases[i].gauges[j]); + } + + distribution_t *d = distribution_clone(cases[i].input_dist); + + if (cases[i].want_null == 1) { + EXPECT_EQ_PTR(NULL, d); + + if (cases[i].want_err) { + EXPECT_EQ_INT(cases[i].want_err, errno); + } + } else { + CHECK_NOT_NULL(d); + EXPECT_EQ_INT(1, distribution_check_equal(cases[i].input_dist, d)); + } + + distribution_destroy(d); + distribution_destroy(cases[i].input_dist); + } + + return 0; +} + +DEF_TEST(distribution_average) { + distribution_t *dist_test1 = distribution_new_linear(5, 15.0); + distribution_t *dist_test2 = distribution_new_exponential(14, 1.5, 4); + + struct { + distribution_t *input_dist; + bool input_dist_is_null; + int num_queries; + double *gauges; + int want_err; + double want_get; + } cases[] = { + { + .input_dist = NULL, + .input_dist_is_null = true, + .want_err = EINVAL, + .want_get = NAN, + }, + { + .input_dist = dist_test1, + .num_queries = 15, + .gauges = + (double[]){96.830114, 76.390477, 67.423375, 92.294301, 94.709233, + 58.866879, 96.35488, 7.608579, 11.252106, 7.997911, + 93.895876, 71.741534, 3.379895, 52.557907, 80.819279}, + .want_get = 60.808156, + }, + { + .input_dist = dist_test2, + .num_queries = 24, + .gauges = + (double[]){35.949496, 11.585167, 366.281973, 462.337073, + 647.582073, 587.025411, 401.665193, 596.943991, + 648.546032, 782.821562, 140.631269, 846.614329, + 767.833053, 1071.824215, 58.17617, 604.110249, + 2.884479, 1003.369684, 1159.951421, 1114.897931, + 987.155078, 63.23755, 261.644787, 315.01636}, + .want_get = 539.086856, + }, + }; + + for (size_t i = 0; i < (sizeof(cases) / sizeof(cases[0])); ++i) { + printf("## Case %zu:\n", i); + + if (cases[i].input_dist_is_null == false) { + CHECK_NOT_NULL(cases[i].input_dist); + } + + for (int j = 0; j < cases[i].num_queries; ++j) { + distribution_update(cases[i].input_dist, cases[i].gauges[j]); + } + + static const int MAX_BUFFER = 256; + char buffer[MAX_BUFFER]; + double average = distribution_average(cases[i].input_dist); + snprintf(buffer, MAX_BUFFER, "%.6lf", average); + sscanf(buffer, "%lf", &average); + + EXPECT_EQ_DOUBLE(cases[i].want_get, average); + + if (cases[i].want_err) { + EXPECT_EQ_INT(cases[i].want_err, errno); + } + + distribution_destroy(cases[i].input_dist); + } + + return 0; +} + +DEF_TEST(distribution_percentile) { + distribution_t *dist_test1 = distribution_new_exponential(10, 1.54, 8); + distribution_t *dist_test2 = distribution_new_linear(40, 2.34); + distribution_t *dist_test3 = distribution_new_exponential(5, 5.43, 9); + distribution_t *dist_test4 = distribution_new_linear(5, 15.0); + distribution_t *dist_test5 = distribution_new_exponential(7, 6.7, 2); + + struct { + distribution_t *input_dist; + bool input_dist_is_null; + double percent; + double want_percentile; + double *gauges; + int want_err; + int num_queries; + } cases[] = { + { + .input_dist = NULL, + .input_dist_is_null = true, + .want_percentile = NAN, + .want_err = EINVAL, + }, + { + .input_dist = dist_test1, + .percent = -5, + .want_percentile = NAN, + .want_err = EINVAL, + }, + { + .input_dist = dist_test2, + .num_queries = 4, + .gauges = (double[]){2.43, 8.98, 76.432, 423.4323}, + .percent = 110.9, + .want_percentile = NAN, + .want_err = EINVAL, + }, + { + .input_dist = dist_test3, + .num_queries = 1, + .gauges = (double[]){84.90}, + .percent = -0.12, + .want_percentile = NAN, + .want_err = EINVAL, + }, + { + .input_dist = dist_test4, + .percent = 5.67, + .want_percentile = 15, + .num_queries = 15, + .gauges = (double[]){4.576, 6.432, 90.4235456, 8.432, 10.423, + 11.54, 20.423, 29.312, 40.231, 42.423, + 44.432, 50.12, 53.32, 54.543, 57.423, + 58.423, 59.2141, 80.342, 100.3425, 150.34}, + }, + { + .input_dist = dist_test5, + .percent = 100, + .want_percentile = 2, + }}; + + for (size_t i = 0; i < (sizeof(cases) / sizeof(cases[0])); ++i) { + printf("## Case %zu:\n", i); + + if (cases[i].input_dist_is_null == false) { + CHECK_NOT_NULL(cases[i].input_dist); + } + + for (int j = 0; j < cases[i].num_queries; ++j) { + distribution_update(cases[i].input_dist, cases[i].gauges[j]); + } + + double percentile = + distribution_percentile(cases[i].input_dist, cases[i].percent); + + if (cases[i].want_err != 0) { + EXPECT_EQ_INT(cases[i].want_err, errno); + } + + EXPECT_EQ_DOUBLE(cases[i].want_percentile, percentile); + + distribution_destroy(cases[i].input_dist); + } + + return 0; +} + +DEF_TEST(distribution_get_num_buckets) { + distribution_t *dist_test1 = distribution_new_linear(5, 15.0); + distribution_t *dist_test2 = distribution_new_exponential(14, 1.5, 4); + distribution_t *dist_test3 = distribution_new_custom( + 28, + (double[]){1, 4, 6, 19.3, 65.35, + 98.9423, 904.4321, 1000.432, 7894.90145, 8000.5472, + 9000.852, 10942.11, 11443, 89002.432, 90423.62, + 95326.54, 97642.90, 100432.75, 109543.62, 209536.3543, + 500426.626, 635690.62, 790426.268, 800738.374, 1000436.637, + 1111111.98, 1234567.890, 2345678.901}); + distribution_t *dist_test4 = distribution_new_linear(30, 1.5); + + struct { + distribution_t *input_dist; + int want_err; + int want_num_buckets; + } cases[] = {{ + .input_dist = NULL, + .want_err = EINVAL, + .want_num_buckets = 0, + }, + { + .input_dist = dist_test1, + .want_num_buckets = 5, + }, + { + .input_dist = dist_test2, + .want_num_buckets = 14, + }, + { + .input_dist = dist_test3, + .want_num_buckets = 29, + }, + { + .input_dist = dist_test4, + .want_num_buckets = 30, + }}; + + for (size_t i = 0; i < (sizeof(cases) / sizeof(cases[0])); ++i) { + if (cases[i].want_err == 0) { + CHECK_NOT_NULL(cases[i].input_dist); + } + + EXPECT_EQ_UINT64(cases[i].want_num_buckets, + distribution_get_num_buckets(cases[i].input_dist)); + + if (cases[i].want_err != 0) { + EXPECT_EQ_INT(cases[i].want_err, errno); + } + + distribution_destroy(cases[i].input_dist); + } + + return 0; +} + +DEF_TEST(distribution_get_buckets_boundaries) { + distribution_t *dist_test1 = distribution_new_linear(5, 15.0); + distribution_t *dist_test2 = distribution_new_exponential(14, 1.5, 4); + distribution_t *dist_test3 = distribution_new_custom( + 28, + (double[]){1, 4, 6, 19.3, 65.35, + 98.9423, 904.4321, 1000.432, 7894.90145, 8000.5472, + 9000.852, 10942.11, 11443, 89002.432, 90423.62, + 95326.54, 97642.90, 100432.75, 109543.62, 209536.3543, + 500426.626, 635690.62, 790426.268, 800738.374, 1000436.637, + 1111111.98, 1234567.890, 2345678.901}); + distribution_t *dist_test4 = distribution_new_linear(30, 1.5); + + struct { + distribution_t *input_dist; + int num_buckets; + int want_err; + double *want_boundaries; + int cannot_be_free; + } cases[] = { + { + .input_dist = NULL, + .want_boundaries = NULL, + .want_err = EINVAL, + .num_buckets = 0, + }, + { + .input_dist = dist_test1, + .num_buckets = 5, + .want_boundaries = array_new_linear(5, 15.0), + }, + { + .input_dist = dist_test2, + .num_buckets = 14, + .want_boundaries = array_new_exponential(14, 1.5, 4), + }, + { + .input_dist = dist_test3, + .num_buckets = 29, + .want_boundaries = + (double[]){ + 1, 4, 6, 19.3, 65.35, + 98.9423, 904.4321, 1000.432, 7894.90145, 8000.5472, + 9000.852, 10942.11, 11443, 89002.432, 90423.62, + 95326.54, 97642.90, 100432.75, 109543.62, 209536.3543, + 500426.626, 635690.62, 790426.268, 800738.374, 1000436.637, + 1111111.98, 1234567.890, 2345678.901, INFINITY}, + .cannot_be_free = 1, + }, + { + .input_dist = dist_test4, + .num_buckets = 30, + .want_boundaries = array_new_linear(30, 1.5), + }}; + + for (size_t i = 0; i < (sizeof(cases) / sizeof(cases[0])); ++i) { + if (cases[i].want_err == 0) { + CHECK_NOT_NULL(cases[i].input_dist); + } + + EXPECT_EQ_UINT64(cases[i].num_buckets, + distribution_get_num_buckets(cases[i].input_dist)); + + double *boundaries = + distribution_get_buckets_boundaries(cases[i].input_dist); + + if (cases[i].want_boundaries == NULL) { + EXPECT_EQ_PTR(cases[i].want_boundaries, boundaries); + + if (cases[i].want_err != 0) { + EXPECT_EQ_INT(cases[i].want_err, errno); + } + } else { + CHECK_NOT_NULL(boundaries); + for (size_t j = 0; j < cases[i].num_buckets; ++j) { + EXPECT_EQ_DOUBLE(cases[i].want_boundaries[j], boundaries[j]); + } + free(boundaries); + + if (cases[i].cannot_be_free == 0) { + free(cases[i].want_boundaries); + } + } + + distribution_destroy(cases[i].input_dist); + } + + return 0; +} + +DEF_TEST(distribution_get_buckets_counters) { + distribution_t *dist_test1 = distribution_new_linear(10, 5.0); + distribution_t *dist_test2 = distribution_new_exponential(8, 1.5, 2); + distribution_t *dist_test3 = distribution_new_custom( + 11, (double[]){1, 2, 3, 5, 8, 13, 21, 34, 55, 89, 144}); + + struct { + distribution_t *input_dist; + double *gauges; + int num_queries; + uint64_t *want_counters; + int num_buckets; + int want_err; + } cases[] = { + { + .input_dist = NULL, + .num_queries = 0, + .num_buckets = 0, + .want_err = EINVAL, + }, + { + .input_dist = dist_test1, + .num_queries = 11, + .gauges = (double[]){1, 2, 3, 5, 10, 90, 8, 45, 44, 41.45, 40.5}, + .num_buckets = 10, + .want_counters = (uint64_t[]){3, 5, 6, 6, 6, 6, 6, 6, 9, 11}, + }, + { + .input_dist = dist_test2, + .num_queries = 16, + .gauges = + (double[]){1.5, 1.23, 1.67, 2, 24.532, 25, 28.43, 98.43, 10.43, + 7.53, 11.235, 4.43256, 7.432, 3, 3.01, 2.98}, + .num_buckets = 8, + .want_counters = (uint64_t[]){3, 5, 8, 8, 10, 12, 12, 16}, + }, + { + .input_dist = dist_test3, + .num_queries = 15, + .gauges = (double[]){0, 0.65, 0.7, 0.99, 0.999999, 1, 2.65, 3, 3.1123, + 10.923, 90.432, 145.90, 144, 143.999999, 190}, + .num_buckets = 12, + .want_counters = + (uint64_t[]){5, 6, 7, 9, 9, 10, 10, 10, 10, 10, 12, 15}, + }}; + + for (size_t i = 0; i < (sizeof(cases) / sizeof(cases[0])); ++i) { + printf("## Case %zu:\n", i); + + if (cases[i].want_err == 0) { + CHECK_NOT_NULL(cases[i].input_dist); + } + + for (int j = 0; j < cases[i].num_queries; ++j) { + distribution_update(cases[i].input_dist, cases[i].gauges[j]); + } + + EXPECT_EQ_UINT64(cases[i].num_buckets, + distribution_get_num_buckets(cases[i].input_dist)); + + uint64_t *counters = distribution_get_buckets_counters(cases[i].input_dist); + + if (cases[i].want_counters == NULL) { + EXPECT_EQ_PTR(cases[i].want_counters, counters); + + if (cases[i].want_err != 0) { + EXPECT_EQ_INT(cases[i].want_err, errno); + } + } else { + CHECK_NOT_NULL(counters); + for (size_t j = 0; j < cases[i].num_buckets; ++j) { + EXPECT_EQ_UINT64(cases[i].want_counters[j], counters[j]); + } + free(counters); + } + + distribution_destroy(cases[i].input_dist); + } + + return 0; +} + +DEF_TEST(distribution_check_equal) { + distribution_t *dist_test11 = distribution_new_linear(15, 17.97); + distribution_t *dist_test22 = distribution_new_exponential(19, 1.497, 7.9); + distribution_t *dist_test31 = distribution_new_exponential(15, 2.1, 3); + distribution_t *dist_test32 = distribution_new_exponential(15, 2.1, 3.0001); + distribution_t *dist_test41 = distribution_new_linear(8, 21); + distribution_t *dist_test42 = + distribution_new_custom(7, (double[]){21, 42, 63, 84, 105, 126, 147}); + + struct { + distribution_t *input_dist1; + distribution_t *input_dist2; + double *gauges1; + double *gauges2; + int num_queries1; + int num_queries2; + int want_get; + } cases[] = { + { + .input_dist1 = NULL, + .input_dist2 = NULL, + .num_queries1 = 0, + .num_queries2 = 0, + .want_get = 1, + }, + { + .input_dist1 = dist_test11, + .input_dist2 = NULL, + .num_queries1 = 17, + .num_queries2 = 0, + .gauges1 = + (double[]){64.986822, 75.361073, 291.412027, 0.209184, 25.542358, + 207.719335, 228.715725, 120.862435, 50.335099, + 78.147062, 103.449701, 45.456052, 120.827738, + 39.133311, 66.804762, 256.750525, 42.075292}, + }, + { + .input_dist1 = NULL, + .input_dist2 = dist_test22, + .num_queries1 = 0, + .num_queries2 = 21, + .gauges2 = + (double[]){39443.618618, 24339.664702, 21573.536089, 29609.30347, + 2926.352621, 14058.673966, 4660.770634, 29783.728304, + 34311.842208, 8530.487236, 19927.009242, 15188.572656, + 971.651245, 16269.067161, 2591.089086, 22718.987438, + 28344.842898, 17402.4872, 31390.581462, 24385.094319, + 29730.582344}, + }, + { + .input_dist1 = dist_test31, + .input_dist2 = dist_test32, + .num_queries1 = 15, + .num_queries2 = 15, + .gauges1 = + (double[]){91162.43496, 72940.539939, 84641.174039, 97027.221525, + 84159.235853, 91894.852013, 52426.443153, 27785.207936, + 14766.938133, 94843.147406, 79763.869899, 32806.450583, + 74097.374659, 3293.59171, 6341.594074}, + .gauges2 = + (double[]){91162.43496, 72940.539939, 84641.174039, 97027.221525, + 84159.235853, 91894.852013, 52426.443153, 27785.207936, + 14766.938133, 94843.147406, 79763.869899, 32806.450583, + 74097.374659, 3293.59171, 6341.594074}, + }, + { + .input_dist1 = dist_test41, + .input_dist2 = dist_test42, + .num_queries1 = 17, + .num_queries2 = 17, + .gauges1 = (double[]){122.793488, 73.629423, 85.238252, 171.841943, + 189.006106, 92.612949, 83.502165, 139.368244, + 27.286445, 77.298995, 56.650835, 163.273312, + 142.017526, 162.949669, 31.717699, 38.69047, + 175.971837}, + .gauges2 = (double[]){122.793488, 73.629423, 85.238252, 171.841943, + 189.006106, 92.612949, 83.502165, 139.368244, + 27.286445, 77.298995, 56.650835, 163.273312, + 142.017526, 162.949669, 31.717699, 38.69047, + 175.971837}, + .want_get = 1, + }}; + + for (size_t i = 0; i < (sizeof(cases) / sizeof(cases[0])); ++i) { + printf("## Case %zu:\n", i); + + for (int j = 0; j < cases[i].num_queries1; ++j) { + distribution_update(cases[i].input_dist1, cases[i].gauges1[j]); + } + + for (int j = 0; j < cases[i].num_queries2; ++j) { + distribution_update(cases[i].input_dist2, cases[i].gauges2[j]); + } + + EXPECT_EQ_INT( + cases[i].want_get, + distribution_check_equal(cases[i].input_dist1, cases[i].input_dist2)); + + distribution_destroy(cases[i].input_dist1); + distribution_destroy(cases[i].input_dist2); + } + + return 0; +} + +DEF_TEST(distribution_get_sum_gauges) { + distribution_t *dist_test1 = distribution_new_linear(10, 5.0); + distribution_t *dist_test2 = distribution_new_exponential(8, 1.5, 2); + distribution_t *dist_test3 = distribution_new_custom( + 11, (double[]){1, 2, 3, 5, 8, 13, 21, 34, 55, 89, 144}); + + struct { + distribution_t *input_dist; + double *gauges; + int num_queries; + int want_err; + double want_sum_gauges; + } cases[] = { + { + .input_dist = NULL, + .num_queries = 0, + .want_err = EINVAL, + .want_sum_gauges = NAN, + }, + { + .input_dist = dist_test1, + .num_queries = 19, + .gauges = (double[]){103.022105, 171.636117, 116.488605, 28.172234, + 36.809295, 105.699156, 95.190406, 173.762403, + 105.859558, 105.500904, 42.080885, 145.297908, + 109.747067, 183.684136, 27.112998, 43.693238, + 184.177938, 138.033766, 171.255309}, + .want_sum_gauges = 2087.224028, + }, + { + .input_dist = dist_test2, + .num_queries = 18, + .gauges = (double[]){1314.386028, 713.040721, 388.600533, 2194.733454, + 62.713018, 436.076538, 154.274781, 467.571249, + 1956.153932, 1884.719494, 1744.740075, + 715.797969, 686.73603, 223.723816, 202.431405, + 1640.915258, 1236.700456, 1328.934664}, + .want_sum_gauges = 17352.249421, + }, + { + .input_dist = dist_test3, + .num_queries = 21, + .gauges = (double[]){85.440604, 89.039631, 197.239067, 20.809416, + 23.130686, 109.073608, 236.542966, 158.416868, + 30.537857, 155.668704, 202.337704, 127.671802, + 33.857584, 95.56406, 63.416192, 188.410385, + 140.583885, 96.033746, 70.10835, 110.265126, + 95.71921}, + .want_sum_gauges = 2329.867451, + }}; + + for (size_t i = 0; i < (sizeof(cases) / sizeof(cases[0])); ++i) { + printf("## Case %zu:\n", i); + + if (cases[i].want_err == 0) { + CHECK_NOT_NULL(cases[i].input_dist); + } + + for (int j = 0; j < cases[i].num_queries; ++j) { + distribution_update(cases[i].input_dist, cases[i].gauges[j]); + } + + static const int MAX_BUFFER = 256; + char buffer[MAX_BUFFER]; + double sum = distribution_get_sum_gauges(cases[i].input_dist); + snprintf(buffer, MAX_BUFFER, "%.6lf", sum); + sscanf(buffer, "%lf", &sum); + EXPECT_EQ_DOUBLE(cases[i].want_sum_gauges, sum); + + if (cases[i].want_err != 0) { + EXPECT_EQ_INT(cases[i].want_err, errno); + } + + distribution_destroy(cases[i].input_dist); + } + + return 0; +} + +int main(void) { + RUN_TEST(distribution_new_linear); + RUN_TEST(distribution_new_exponential); + RUN_TEST(distribution_new_custom); + RUN_TEST(distribution_update); + RUN_TEST(distribution_percentile); + RUN_TEST(distribution_average); + RUN_TEST(distribution_clone); + RUN_TEST(distribution_get_num_buckets); + RUN_TEST(distribution_get_buckets_boundaries); + RUN_TEST(distribution_get_buckets_counters); + RUN_TEST(distribution_get_sum_gauges); + RUN_TEST(distribution_check_equal); + + END_TEST; +} \ No newline at end of file