1 /*
2  * Copyright (c) 2020 Stephanos Ioannidis <root@stephanos.io>
3  * Copyright (C) 2010-2020 ARM Limited or its affiliates. All rights reserved.
4  *
5  * SPDX-License-Identifier: Apache-2.0
6  */
7 
8 #include <zephyr/ztest.h>
9 #include <zephyr/kernel.h>
10 #include <stdlib.h>
11 #include <arm_math.h>
12 #include "../../common/benchmark_common.h"
13 
14 #define PATTERN_LENGTH	(256)
15 
16 static const uint32_t input1[256] = {
17 	0xbe7615dd, 0x3e0a5b1e, 0x3e9425e8, 0x3f433216,
18 	0x3e816b4e, 0xbe143e49, 0x3e877d60, 0xbd29de75,
19 	0xbebc69ec, 0x3e687466, 0xbe2e1b66, 0x3e0346d2,
20 	0xbe48345c, 0x3f2f2dc5, 0xbf229368, 0xbdd18caa,
21 	0xbae856c5, 0xbe779abe, 0xbe6e7711, 0x3caa302e,
22 	0x3e85375c, 0xbe15bd57, 0xbd92253b, 0xbed2cedf,
23 	0xbf2cbc1e, 0x3e2a4449, 0xbdd0693a, 0x3e16b249,
24 	0xbe256ea3, 0x3ed42777, 0x3e895be0, 0x3de3e095,
25 	0xbee09687, 0xbd84a2da, 0x3d966cc5, 0xbd97ea7a,
26 	0xbea4b437, 0xbd0cbb5d, 0x3df36637, 0xbe20fa46,
27 	0x3de5a59f, 0xbedc8418, 0x3ce7901e, 0x3e870ef3,
28 	0xbeeaed98, 0x3e69fc9d, 0x3bef5be4, 0xbe71aa2c,
29 	0x3dfc9df4, 0x3e14edd4, 0xbe90e704, 0x3df1ade8,
30 	0xbecb71a1, 0xbee60066, 0x3d8ed065, 0xbe66f074,
31 	0x3f1d1d4a, 0x3eb0ee33, 0x3e5b4d66, 0x3e80ac40,
32 	0xbea71b63, 0xbebfaf5c, 0x3eb7c5e8, 0x3c286fe6,
33 	0xbe4542ec, 0xbdf4c1cb, 0xbe8a2982, 0x3ebf99e4,
34 	0xbeb9b986, 0x3e91869e, 0x3e3126c8, 0x3e678642,
35 	0x3e83bfc5, 0x3e2fb9b5, 0xbe9f1f35, 0xbe412ac1,
36 	0x3ee470f5, 0xbf1ba9c4, 0x3e8ba6eb, 0x3daf6728,
37 	0x3e379687, 0x3db5db1c, 0xbe0454cc, 0x3e8ffa21,
38 	0xbed3eb22, 0xbd8c756b, 0xbd37f46f, 0x3f1acc52,
39 	0x3e6761ef, 0x3dc8f4c2, 0xbb86a0cb, 0xbed741fd,
40 	0x3ed334d2, 0x3e591fee, 0x3da630be, 0xbd465007,
41 	0xbe2e6781, 0x3e354ba2, 0x3ee78e9f, 0x3e38e1fc,
42 	0xbd5b45af, 0xbdc42924, 0xbe487972, 0x3d42fc8c,
43 	0x3d39ddea, 0xbf800000, 0x3e47bc81, 0xbe6d3ea6,
44 	0xbe346a5d, 0x3e4bc4aa, 0x3ece4e6a, 0x3ef730a6,
45 	0x3d323126, 0xbe1f69b2, 0xbf2c5298, 0x3e851dca,
46 	0x3dd99be4, 0x3ee352ed, 0x3e300faf, 0x3deb85b2,
47 	0xbef35a4d, 0xbea2e18e, 0x3e0c1422, 0x3ded684f,
48 	0xbd9d3a04, 0xbe748c80, 0xbd49d3fc, 0x3eae4b91,
49 	0xbdd13f56, 0x3ec00687, 0xbe9dd67b, 0xbee078e5,
50 	0xbea7ecd6, 0xbe31f0fb, 0x3c9fab4c, 0xbf5b1cab,
51 	0xbe3127e2, 0xbd89aa70, 0x3e7bd318, 0x3c387d43,
52 	0xbd84e04b, 0x3ecab9d8, 0x3edb926f, 0xbec88560,
53 	0xbe168183, 0x3e50ee4c, 0x3d15fc44, 0x3df7a4f6,
54 	0xbbeab7df, 0xbd89602e, 0xbe46f8cf, 0xbca78eee,
55 	0x3e95f997, 0xbeef9320, 0xbb719415, 0x3ed7afd5,
56 	0x3e00886c, 0xbd8e2cbe, 0x3da33a7c, 0x3da3200f,
57 	0xbebf7183, 0x3d365e99, 0xbe09538e, 0xbcf48bc6,
58 	0xbd51b1b8, 0x3d017f4c, 0x3e2b81bb, 0xbed81367,
59 	0xbe069cd5, 0x3e0c8740, 0xbd6860f3, 0x3dae7824,
60 	0x3ea05b45, 0x3ce7c1f1, 0x3ef90c86, 0x3cae676d,
61 	0xbeb633be, 0x3d91a2ac, 0x3d302b7f, 0x3e0439b3,
62 	0xbd9b621c, 0x3e988c26, 0xbdaba1d2, 0x3e3893fd,
63 	0x3ed57561, 0xbeaa3c3b, 0xbd0fd35d, 0xbe971a92,
64 	0x3f068098, 0x3e97eccf, 0x3d7fb7e7, 0x3eb0e749,
65 	0x3e861bd2, 0x3db8027d, 0x3e86a3ab, 0xbe9b4617,
66 	0xbc060bb8, 0x3c973710, 0xbc94e165, 0x3edcc8e4,
67 	0xbe856ffe, 0x3eae969f, 0x3dc64eb0, 0x3f0d4886,
68 	0xbb5951f3, 0xbd545f5a, 0x3c8020ec, 0xbec63ba0,
69 	0xbd226696, 0xbbf74b24, 0x3e789184, 0xbe779dfb,
70 	0x3e27d468, 0xbe8d34a6, 0xbe5a31aa, 0xbe9347f4,
71 	0x3ec9bf45, 0x3dec7be4, 0x3e8573e7, 0x3dac8f5e,
72 	0x3e133275, 0x3efabbd4, 0x3e144acb, 0xbf10beb2,
73 	0xbe8bc904, 0xbd9e491d, 0x3e83271c, 0xbf625b5d,
74 	0x3e918f0c, 0xbe09bf4b, 0xbe8f8404, 0xbec1dd10,
75 	0x3dcc691b, 0x3decd44e, 0x3e52ef40, 0x3dc53970,
76 	0x3e5e7918, 0xbde1a7f5, 0xbe902c3f, 0x3e185dff,
77 	0x3f053faf, 0x3de7951b, 0x3e431459, 0xbdfd5fd5,
78 	0xbf08176e, 0xbf068074, 0x3eb79e7a, 0xbd1d80ed,
79 	0xbe5a7559, 0x3eb3aa3f, 0xbe8db06c, 0xbd90187e,
80 	0xbf03caa7, 0xbd92238d, 0xbe7cdc64, 0xbe2ac8aa
81 	};
82 
83 static const uint32_t input2[256] = {
84 	0x3efd7b68, 0xbf3396c6, 0x3ecf0d7f, 0xbce77d0c,
85 	0xbe8045d0, 0xbea8c5d4, 0x3e6a3aaa, 0xbeb9c2f5,
86 	0x3ce7f210, 0xbf0f4737, 0xbee71c87, 0xbdcd4ed3,
87 	0xbda07eb9, 0xbe22d09c, 0x3da60119, 0x3f2e0015,
88 	0x3cf9d138, 0x3ea7295d, 0x3e958f9e, 0x3ec76134,
89 	0x3caec34b, 0xbe0fc780, 0xbe34442b, 0x3ea30a9e,
90 	0xbd2f09ac, 0x3f0436b2, 0x3ea6974d, 0x3be04c86,
91 	0xbe67da27, 0x3f21e9d0, 0x3dc7b20f, 0x3f110e3f,
92 	0x3ee11ddc, 0x3ed16699, 0x3ecafd42, 0xbf4465ee,
93 	0x3e113928, 0xbea7fe43, 0xbf018159, 0xbba151c0,
94 	0xbd04ad55, 0xbeb940d9, 0x3d8f7268, 0xbe4b929e,
95 	0xbe3cb12a, 0x3edded21, 0x3ecb11e5, 0x3f24b3ab,
96 	0x3e5e8c42, 0xbe1606df, 0x3f0ea0db, 0xbe348348,
97 	0xbdea1459, 0x3e3dd968, 0x3eba6bd5, 0x3efca1b6,
98 	0x3e19c20c, 0x3ee2fa95, 0xbde65541, 0xbe9755e7,
99 	0x3d802e15, 0x3e1d2343, 0x3b1fc1ee, 0xbf067900,
100 	0x3c72e743, 0xbe95ee27, 0xbe8dffe3, 0xbec569ed,
101 	0xbecfe08d, 0x3e6d545e, 0x3e8b3467, 0x3eb5b621,
102 	0xbe1c1a0f, 0x3e071a92, 0xbe292f55, 0xbd841e3d,
103 	0x3eb4d636, 0x3e896277, 0x3f0d088e, 0x3e89c512,
104 	0xbe66e0d9, 0xbda5c7db, 0xbee26d88, 0xbf67e7cf,
105 	0x3e7b8eee, 0x3e878f80, 0xbe176f02, 0xbead74b7,
106 	0x3f40d7b0, 0x3e6684ec, 0xbdcf3d89, 0xbe2b185e,
107 	0x3ea0851a, 0x3e574528, 0xbf25ae3e, 0xbc1d8c89,
108 	0x3e563384, 0xbea6d40d, 0xbeac99c0, 0x3f229549,
109 	0x3e522113, 0xbf2c23b1, 0xbe9d1389, 0x3e4e85f4,
110 	0x3eb0eb91, 0xbf17b075, 0x3e9fa5d3, 0xbdc541ee,
111 	0xbe79ceb4, 0xbef0e805, 0x3d78e944, 0xbe84a108,
112 	0x3f04a4e5, 0xbe0d9ea0, 0xbe8ed7c7, 0xbe380a23,
113 	0xbde28369, 0x3f01dc75, 0xbef7f6f3, 0x3ea0f201,
114 	0xbf13519c, 0xbedb0feb, 0x3eac3cf7, 0xbe5f8009,
115 	0xbd57b158, 0x3e9d8a4e, 0xbe96c82e, 0x3f5bc10d,
116 	0x3cf6474c, 0x3e4364c3, 0xbeaa2ea4, 0xbd53f6ab,
117 	0xbf1fdc71, 0x3ee86194, 0xbf0132aa, 0xbee53cc6,
118 	0x3e82836b, 0xbe9a7a9a, 0xbe494f30, 0xbe24c1c6,
119 	0x3f76962b, 0x3e11f661, 0xbec6d557, 0xbdb3603e,
120 	0x3df40af7, 0x3f5229b9, 0x3e576be7, 0x3e5de873,
121 	0x3ecbb13e, 0x3d9d2b57, 0xbe58e633, 0xbe42c779,
122 	0x3ea4539f, 0xbec1326d, 0x3e9d7323, 0xbe05b725,
123 	0x3c814ef6, 0x3d1b483c, 0x3e036f96, 0x3e5d4cca,
124 	0xbf1511f1, 0x3dd2188e, 0x3e0c87b4, 0x3dd47a1e,
125 	0x3de1ed5e, 0xbf086942, 0x3f14ab85, 0xbed3a836,
126 	0x3db26a76, 0x3e7e0bf6, 0x3f59c9f4, 0xbee53819,
127 	0xbee72aeb, 0xbd08cc78, 0x3dc6bd55, 0x3e0d0cd2,
128 	0xbf00d3c6, 0x3ec9b7be, 0xbeef4f2a, 0xbe91f0f3,
129 	0x3e770521, 0xbd1015f5, 0x3f42a1bc, 0xbf31354a,
130 	0xbe277544, 0xbf21a07d, 0x3d16c40b, 0x3d9a5ef8,
131 	0xbd375473, 0xbe8a73e0, 0xbf04d275, 0x3e00e10c,
132 	0xbf04a1c1, 0x3e5ab3ef, 0xbe79fe0e, 0xbf0ff988,
133 	0xbea9a481, 0xbd7b414f, 0xbdc0561a, 0xbee876f9,
134 	0x3e96901f, 0xbf1922b3, 0xbe4696f0, 0xbec91e95,
135 	0xbcecd009, 0x3e27120e, 0x3d51f700, 0xbf724df3,
136 	0xbf16fe3f, 0xbe68fe8c, 0x3eec30d2, 0x3e8fd73f,
137 	0xbdb5a832, 0x3f3cb41d, 0xbf3582b0, 0xbe7d0a43,
138 	0x3e6e2b26, 0x3d924119, 0xbe64103c, 0x3e1094c3,
139 	0x3f130299, 0xbe9a77df, 0x3e62628d, 0xbf800000,
140 	0xbf1bc246, 0xbe9f54be, 0x3d9be53a, 0xbf1a9110,
141 	0x3c09e4b3, 0xbe43e5d1, 0x3dd67477, 0xbeebdf5a,
142 	0x3d445065, 0xbf24b81b, 0x3f442915, 0xbe439b36,
143 	0x3ebdb5d4, 0x3f3245c5, 0x3dde2d5d, 0x3dd499a6,
144 	0xbe16e5e8, 0xbda4dbc5, 0x3ea0851a, 0xbf187446,
145 	0xbf5e8c93, 0xbea10739, 0xbe22001f, 0xbe20c313,
146 	0xbeee242f, 0x3d076127, 0x3e7c303c, 0x3ed97950,
147 	0xbf6a57bf, 0xbe9b9d13, 0x3ddf83ce, 0xbec67d55
148 	};
149 
ZTEST(basicmath_f32_benchmark,test_benchmark_vec_add_f32)150 ZTEST(basicmath_f32_benchmark, test_benchmark_vec_add_f32)
151 {
152 	uint32_t irq_key, timestamp, timespan;
153 	float32_t *output;
154 
155 	/* Allocate output buffer */
156 	output = malloc(PATTERN_LENGTH * sizeof(float32_t));
157 	zassert_not_null(output, "output buffer allocation failed");
158 
159 	/* Begin benchmark */
160 	benchmark_begin(&irq_key, &timestamp);
161 
162 	/* Execute function */
163 	arm_add_f32(
164 		(float32_t *)input1, (float32_t *)input2, output,
165 		PATTERN_LENGTH);
166 
167 	/* End benchmark */
168 	timespan = benchmark_end(irq_key, timestamp);
169 
170 	/* Free output buffer */
171 	free(output);
172 
173 	/* Print result */
174 	TC_PRINT(BENCHMARK_TYPE " = %u\n", timespan);
175 }
176 
ZTEST(basicmath_f32_benchmark,test_benchmark_vec_sub_f32)177 ZTEST(basicmath_f32_benchmark, test_benchmark_vec_sub_f32)
178 {
179 	uint32_t irq_key, timestamp, timespan;
180 	float32_t *output;
181 
182 	/* Allocate output buffer */
183 	output = malloc(PATTERN_LENGTH * sizeof(float32_t));
184 	zassert_not_null(output, "output buffer allocation failed");
185 
186 	/* Begin benchmark */
187 	benchmark_begin(&irq_key, &timestamp);
188 
189 	/* Execute function */
190 	arm_sub_f32(
191 		(float32_t *)input1, (float32_t *)input2, output,
192 		PATTERN_LENGTH);
193 
194 	/* End benchmark */
195 	timespan = benchmark_end(irq_key, timestamp);
196 
197 	/* Free output buffer */
198 	free(output);
199 
200 	/* Print result */
201 	TC_PRINT(BENCHMARK_TYPE " = %u\n", timespan);
202 }
203 
ZTEST(basicmath_f32_benchmark,test_benchmark_vec_mult_f32)204 ZTEST(basicmath_f32_benchmark, test_benchmark_vec_mult_f32)
205 {
206 	uint32_t irq_key, timestamp, timespan;
207 	float32_t *output;
208 
209 	/* Allocate output buffer */
210 	output = malloc(PATTERN_LENGTH * sizeof(float32_t));
211 	zassert_not_null(output, "output buffer allocation failed");
212 
213 	/* Begin benchmark */
214 	benchmark_begin(&irq_key, &timestamp);
215 
216 	/* Execute function */
217 	arm_mult_f32(
218 		(float32_t *)input1, (float32_t *)input2, output,
219 		PATTERN_LENGTH);
220 
221 	/* End benchmark */
222 	timespan = benchmark_end(irq_key, timestamp);
223 
224 	/* Free output buffer */
225 	free(output);
226 
227 	/* Print result */
228 	TC_PRINT(BENCHMARK_TYPE " = %u\n", timespan);
229 }
230 
ZTEST(basicmath_f32_benchmark,test_benchmark_vec_abs_f32)231 ZTEST(basicmath_f32_benchmark, test_benchmark_vec_abs_f32)
232 {
233 	uint32_t irq_key, timestamp, timespan;
234 	float32_t *output;
235 
236 	/* Allocate output buffer */
237 	output = malloc(PATTERN_LENGTH * sizeof(float32_t));
238 	zassert_not_null(output, "output buffer allocation failed");
239 
240 	/* Begin benchmark */
241 	benchmark_begin(&irq_key, &timestamp);
242 
243 	/* Execute function */
244 	arm_abs_f32((float32_t *)input1, output, PATTERN_LENGTH);
245 
246 	/* End benchmark */
247 	timespan = benchmark_end(irq_key, timestamp);
248 
249 	/* Free output buffer */
250 	free(output);
251 
252 	/* Print result */
253 	TC_PRINT(BENCHMARK_TYPE " = %u\n", timespan);
254 }
255 
ZTEST(basicmath_f32_benchmark,test_benchmark_vec_negate_f32)256 ZTEST(basicmath_f32_benchmark, test_benchmark_vec_negate_f32)
257 {
258 	uint32_t irq_key, timestamp, timespan;
259 	float32_t *output;
260 
261 	/* Allocate output buffer */
262 	output = malloc(PATTERN_LENGTH * sizeof(float32_t));
263 	zassert_not_null(output, "output buffer allocation failed");
264 
265 	/* Begin benchmark */
266 	benchmark_begin(&irq_key, &timestamp);
267 
268 	/* Execute function */
269 	arm_negate_f32((float32_t *)input1, output, PATTERN_LENGTH);
270 
271 	/* End benchmark */
272 	timespan = benchmark_end(irq_key, timestamp);
273 
274 	/* Free output buffer */
275 	free(output);
276 
277 	/* Print result */
278 	TC_PRINT(BENCHMARK_TYPE " = %u\n", timespan);
279 }
280 
ZTEST(basicmath_f32_benchmark,test_benchmark_vec_offset_f32)281 ZTEST(basicmath_f32_benchmark, test_benchmark_vec_offset_f32)
282 {
283 	uint32_t irq_key, timestamp, timespan;
284 	float32_t *output;
285 
286 	/* Allocate output buffer */
287 	output = malloc(PATTERN_LENGTH * sizeof(float32_t));
288 	zassert_not_null(output, "output buffer allocation failed");
289 
290 	/* Begin benchmark */
291 	benchmark_begin(&irq_key, &timestamp);
292 
293 	/* Execute function */
294 	arm_offset_f32((float32_t *)input1, 1.0, output, PATTERN_LENGTH);
295 
296 	/* End benchmark */
297 	timespan = benchmark_end(irq_key, timestamp);
298 
299 	/* Free output buffer */
300 	free(output);
301 
302 	/* Print result */
303 	TC_PRINT(BENCHMARK_TYPE " = %u\n", timespan);
304 }
305 
ZTEST(basicmath_f32_benchmark,test_benchmark_vec_scale_f32)306 ZTEST(basicmath_f32_benchmark, test_benchmark_vec_scale_f32)
307 {
308 	uint32_t irq_key, timestamp, timespan;
309 	float32_t *output;
310 
311 	/* Allocate output buffer */
312 	output = malloc(PATTERN_LENGTH * sizeof(float32_t));
313 	zassert_not_null(output, "output buffer allocation failed");
314 
315 	/* Begin benchmark */
316 	benchmark_begin(&irq_key, &timestamp);
317 
318 	/* Execute function */
319 	arm_scale_f32((float32_t *)input1, 1.0, output, PATTERN_LENGTH);
320 
321 	/* End benchmark */
322 	timespan = benchmark_end(irq_key, timestamp);
323 
324 	/* Free output buffer */
325 	free(output);
326 
327 	/* Print result */
328 	TC_PRINT(BENCHMARK_TYPE " = %u\n", timespan);
329 }
330 
ZTEST(basicmath_f32_benchmark,test_benchmark_vec_dot_prod_f32)331 ZTEST(basicmath_f32_benchmark, test_benchmark_vec_dot_prod_f32)
332 {
333 	uint32_t irq_key, timestamp, timespan;
334 	float32_t output;
335 
336 	/* Begin benchmark */
337 	benchmark_begin(&irq_key, &timestamp);
338 
339 	/* Execute function */
340 	arm_dot_prod_f32(
341 		(float32_t *)input1, (float32_t *)input2, PATTERN_LENGTH,
342 		&output);
343 
344 	/* End benchmark */
345 	timespan = benchmark_end(irq_key, timestamp);
346 
347 	/* Print result */
348 	TC_PRINT(BENCHMARK_TYPE " = %u\n", timespan);
349 }
350 
351 ZTEST_SUITE(basicmath_f32_benchmark, NULL, NULL, NULL, NULL, NULL);
352