1 /*
2  * Copyright (c) 2022 Meta
3  *
4  * SPDX-License-Identifier: Apache-2.0
5  */
6 
7 #include <stdlib.h>
8 
9 #include <zephyr/random/random.h>
10 #include <zephyr/sys/hash_function.h>
11 #include <zephyr/ztest.h>
12 
13 BUILD_ASSERT(CONFIG_TEST_HASH_FUNC_NUM_ENTRIES > 0);
14 BUILD_ASSERT(CONFIG_TEST_HASH_FUNC_NUM_BUCKETS > 0);
15 BUILD_ASSERT(CONFIG_TEST_HASH_FUNC_NUM_ENTRIES >= 10 * CONFIG_TEST_HASH_FUNC_NUM_BUCKETS);
16 
print_buckets(const char * label,float * buckets,size_t n)17 static void print_buckets(const char *label, float *buckets, size_t n)
18 {
19 	if (IS_ENABLED(CONFIG_TEST_HASH_FUNC_DEBUG)) {
20 		printk("%s\n", label);
21 
22 		for (size_t i = 0; i < n; ++i) {
23 			printk("%f, ", (double)buckets[i]);
24 		}
25 
26 		printk("\n");
27 	}
28 }
29 
create_histogram(float * buckets,size_t n)30 static void create_histogram(float *buckets, size_t n)
31 {
32 	uint32_t entry;
33 	uint32_t hash;
34 	size_t bucket;
35 
36 	for (size_t i = 0; i < CONFIG_TEST_HASH_FUNC_NUM_ENTRIES; ++i) {
37 		/* generate a random value (note: could be any random data source) */
38 		entry = sys_rand32_get();
39 		/* hash the random value */
40 		hash = sys_hash32(&entry, sizeof(entry));
41 		/* mod the hash to determine which bucket to increment */
42 		bucket = hash % CONFIG_TEST_HASH_FUNC_NUM_BUCKETS;
43 
44 		buckets[bucket]++;
45 	}
46 }
47 
compare_floats(const void * a,const void * b)48 static int compare_floats(const void *a, const void *b)
49 {
50 	float aa = *(float *)a;
51 	float bb = *(float *)b;
52 
53 	return (aa > bb) - (aa < bb);
54 }
55 
kolmogorov_smirnov_test(float * buckets,size_t n)56 static int kolmogorov_smirnov_test(float *buckets, size_t n)
57 {
58 	float d;
59 	float f_x;
60 	float prev;
61 	float f0_x;
62 	float d_max;
63 	float d_alpha_sq;
64 
65 	/* sort observations in ascending order */
66 	qsort(buckets, n, sizeof(*buckets), compare_floats);
67 
68 	/* calculate the cdf of observations */
69 	prev = 0;
70 	for (size_t i = 0; i < n; ++i) {
71 		buckets[i] += prev;
72 		prev = buckets[i];
73 	}
74 
75 	for (size_t i = 0; i < n; ++i) {
76 		buckets[i] /= buckets[n - 1];
77 	}
78 
79 	print_buckets("cdf", buckets, n);
80 
81 	/* Compute the absolute differences */
82 	d_max = 0;
83 	for (size_t i = 0; i < n; ++i) {
84 		/* cdf of hypothesized distribution (uniform, in this case) */
85 		f0_x = (float)(i + 1) / n;
86 		/* cdf of empirical distribution */
87 		f_x = buckets[i];
88 
89 		d = (f_x >= f0_x) ? (f_x - f0_x) : (f0_x - f_x);
90 		if (d > d_max) {
91 			d_max = d;
92 		}
93 
94 		buckets[i] = d;
95 	}
96 
97 	print_buckets("differences", buckets, n);
98 
99 	/*
100 	 * Calculate the critical value
101 	 *
102 	 * For n >= 40, the critical value can be estimated for various alpha.
103 	 *
104 	 * http://oak.ucc.nau.edu/rh83/Statistics/ks1/
105 	 *
106 	 * E.g. For alpha = 0.05, the estimator is 1.36 / sqrt(n)
107 	 *
108 	 * However, since we lack sqrt(n), we have to square both sides
109 	 * of the comparison. So,
110 	 *
111 	 * D   >  1.36 / sqrt(n)
112 	 * D^2 > (1.36 / sqrt(n))^2
113 	 * D^2 > 1.8496 / n
114 	 */
115 
116 	d_alpha_sq = 1.8496f / n;
117 
118 	if (IS_ENABLED(CONFIG_TEST_HASH_FUNC_DEBUG)) {
119 		printk("d_max^2: %f\n", (double)(d_max * d_max));
120 		printk("d_alpha^2: %f\n", (double)d_alpha_sq);
121 	}
122 
123 	if (d_max * d_max > d_alpha_sq) {
124 		return -EINVAL;
125 	}
126 
127 	return 0;
128 }
129 
ZTEST(hash_function,test_sys_hash32)130 ZTEST(hash_function, test_sys_hash32)
131 {
132 	float buckets[CONFIG_TEST_HASH_FUNC_NUM_BUCKETS] = {0};
133 
134 	create_histogram(buckets, ARRAY_SIZE(buckets));
135 
136 	print_buckets("histogram", buckets, ARRAY_SIZE(buckets));
137 
138 	zassert_ok(kolmogorov_smirnov_test(buckets, ARRAY_SIZE(buckets)));
139 }
140 
141 ZTEST_SUITE(hash_function, NULL, NULL, NULL, NULL, NULL);
142