1 /*
2 * Copyright (c) 2022 Meta
3 *
4 * SPDX-License-Identifier: Apache-2.0
5 */
6
7 #include <stdlib.h>
8
9 #include <zephyr/random/random.h>
10 #include <zephyr/sys/hash_function.h>
11 #include <zephyr/ztest.h>
12
13 BUILD_ASSERT(CONFIG_TEST_HASH_FUNC_NUM_ENTRIES > 0);
14 BUILD_ASSERT(CONFIG_TEST_HASH_FUNC_NUM_BUCKETS > 0);
15 BUILD_ASSERT(CONFIG_TEST_HASH_FUNC_NUM_ENTRIES >= 10 * CONFIG_TEST_HASH_FUNC_NUM_BUCKETS);
16
print_buckets(const char * label,float * buckets,size_t n)17 static void print_buckets(const char *label, float *buckets, size_t n)
18 {
19 if (IS_ENABLED(CONFIG_TEST_HASH_FUNC_DEBUG)) {
20 printk("%s\n", label);
21
22 for (size_t i = 0; i < n; ++i) {
23 printk("%f, ", (double)buckets[i]);
24 }
25
26 printk("\n");
27 }
28 }
29
create_histogram(float * buckets,size_t n)30 static void create_histogram(float *buckets, size_t n)
31 {
32 uint32_t entry;
33 uint32_t hash;
34 size_t bucket;
35
36 for (size_t i = 0; i < CONFIG_TEST_HASH_FUNC_NUM_ENTRIES; ++i) {
37 /* generate a random value (note: could be any random data source) */
38 entry = sys_rand32_get();
39 /* hash the random value */
40 hash = sys_hash32(&entry, sizeof(entry));
41 /* mod the hash to determine which bucket to increment */
42 bucket = hash % CONFIG_TEST_HASH_FUNC_NUM_BUCKETS;
43
44 buckets[bucket]++;
45 }
46 }
47
compare_floats(const void * a,const void * b)48 static int compare_floats(const void *a, const void *b)
49 {
50 float aa = *(float *)a;
51 float bb = *(float *)b;
52
53 return (aa > bb) - (aa < bb);
54 }
55
kolmogorov_smirnov_test(float * buckets,size_t n)56 static int kolmogorov_smirnov_test(float *buckets, size_t n)
57 {
58 float d;
59 float f_x;
60 float prev;
61 float f0_x;
62 float d_max;
63 float d_alpha_sq;
64
65 /* sort observations in ascending order */
66 qsort(buckets, n, sizeof(*buckets), compare_floats);
67
68 /* calculate the cdf of observations */
69 prev = 0;
70 for (size_t i = 0; i < n; ++i) {
71 buckets[i] += prev;
72 prev = buckets[i];
73 }
74
75 for (size_t i = 0; i < n; ++i) {
76 buckets[i] /= buckets[n - 1];
77 }
78
79 print_buckets("cdf", buckets, n);
80
81 /* Compute the absolute differences */
82 d_max = 0;
83 for (size_t i = 0; i < n; ++i) {
84 /* cdf of hypothesized distribution (uniform, in this case) */
85 f0_x = (float)(i + 1) / n;
86 /* cdf of empirical distribution */
87 f_x = buckets[i];
88
89 d = (f_x >= f0_x) ? (f_x - f0_x) : (f0_x - f_x);
90 if (d > d_max) {
91 d_max = d;
92 }
93
94 buckets[i] = d;
95 }
96
97 print_buckets("differences", buckets, n);
98
99 /*
100 * Calculate the critical value
101 *
102 * For n >= 40, the critical value can be estimated for various alpha.
103 *
104 * http://oak.ucc.nau.edu/rh83/Statistics/ks1/
105 *
106 * E.g. For alpha = 0.05, the estimator is 1.36 / sqrt(n)
107 *
108 * However, since we lack sqrt(n), we have to square both sides
109 * of the comparison. So,
110 *
111 * D > 1.36 / sqrt(n)
112 * D^2 > (1.36 / sqrt(n))^2
113 * D^2 > 1.8496 / n
114 */
115
116 d_alpha_sq = 1.8496f / n;
117
118 if (IS_ENABLED(CONFIG_TEST_HASH_FUNC_DEBUG)) {
119 printk("d_max^2: %f\n", (double)(d_max * d_max));
120 printk("d_alpha^2: %f\n", (double)d_alpha_sq);
121 }
122
123 if (d_max * d_max > d_alpha_sq) {
124 return -EINVAL;
125 }
126
127 return 0;
128 }
129
ZTEST(hash_function,test_sys_hash32)130 ZTEST(hash_function, test_sys_hash32)
131 {
132 float buckets[CONFIG_TEST_HASH_FUNC_NUM_BUCKETS] = {0};
133
134 create_histogram(buckets, ARRAY_SIZE(buckets));
135
136 print_buckets("histogram", buckets, ARRAY_SIZE(buckets));
137
138 zassert_ok(kolmogorov_smirnov_test(buckets, ARRAY_SIZE(buckets)));
139 }
140
141 ZTEST_SUITE(hash_function, NULL, NULL, NULL, NULL, NULL);
142