1
2 /* ----------------------------------------------------------------------
3 * Project: CMSIS DSP Library
4 * Title: arm_boolean_distance.c
5 * Description: Templates for boolean distances
6 *
7 * $Date: 23 April 2021
8 * $Revision: V1.9.0
9 *
10 * Target Processor: Cortex-M and Cortex-A cores
11 * -------------------------------------------------------------------- */
12 /*
13 * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
14 *
15 * SPDX-License-Identifier: Apache-2.0
16 *
17 * Licensed under the Apache License, Version 2.0 (the License); you may
18 * not use this file except in compliance with the License.
19 * You may obtain a copy of the License at
20 *
21 * www.apache.org/licenses/LICENSE-2.0
22 *
23 * Unless required by applicable law or agreed to in writing, software
24 * distributed under the License is distributed on an AS IS BASIS, WITHOUT
25 * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
26 * See the License for the specific language governing permissions and
27 * limitations under the License.
28 */
29
30
31
32
33 /**
34 * @defgroup DISTANCEF Distance Functions
35 *
36 * Computes Distances between vectors.
37 *
38 * Distance functions are useful in a lot of algorithms.
39 *
40 */
41
42
43 /**
44 * @addtogroup DISTANCEF
45 * @{
46 */
47
48
49
50
51 #define _FUNC(A,B) A##B
52
53 #define FUNC(EXT) _FUNC(arm_boolean_distance, EXT)
54
55 extern void FUNC(EXT)(const uint32_t *pA
56 , const uint32_t *pB
57 , uint32_t numberOfBools
58 #ifdef TT
59 , uint32_t *cTT
60 #endif
61 #ifdef FF
62 , uint32_t *cFF
63 #endif
64 #ifdef TF
65 , uint32_t *cTF
66 #endif
67 #ifdef FT
68 , uint32_t *cFT
69 #endif
70 );
71
72 /**
73 * @brief Elements of boolean distances
74 *
75 * Different values which are used to compute boolean distances
76 *
77 * @param[in] pA First vector of packed booleans
78 * @param[in] pB Second vector of packed booleans
79 * @param[in] numberOfBools Number of booleans
80 *
81 */
82
83 #if defined(ARM_MATH_MVEI) && !defined(ARM_MATH_AUTOVECTORIZE)
84
85 #include "arm_common_tables.h"
86
FUNC(EXT)87 void FUNC(EXT)(const uint32_t *pA
88 , const uint32_t *pB
89 , uint32_t numberOfBools
90 #ifdef TT
91 , uint32_t *cTT
92 #endif
93 #ifdef FF
94 , uint32_t *cFF
95 #endif
96 #ifdef TF
97 , uint32_t *cTF
98 #endif
99 #ifdef FT
100 , uint32_t *cFT
101 #endif
102 )
103 {
104
105 #ifdef TT
106 uint32_t _ctt=0;
107 #endif
108 #ifdef FF
109 uint32_t _cff=0;
110 #endif
111 #ifdef TF
112 uint32_t _ctf=0;
113 #endif
114 #ifdef FT
115 uint32_t _cft=0;
116 #endif
117 uint32_t a, b, ba, bb;
118 int shift;
119 const uint8_t *pA8 = (const uint8_t *) pA;
120 const uint8_t *pB8 = (const uint8_t *) pB;
121
122 /* handle vector blocks */
123 uint32_t blkCnt = numberOfBools / 128;
124
125
126
127 while (blkCnt > 0U) {
128 uint8x16_t vecA = vld1q((const uint8_t *) pA8);
129 uint8x16_t vecB = vld1q((const uint8_t *) pB8);
130
131 #ifdef TT
132 uint8x16_t vecTT = vecA & vecB;
133 vecTT = vldrbq_gather_offset_u8(hwLUT, vecTT);
134 _ctt += vaddvq(vecTT);
135 #endif
136 #ifdef FF
137 uint8x16_t vecFF = vmvnq(vecA) & vmvnq(vecB);
138 vecFF = vldrbq_gather_offset_u8(hwLUT, vecFF);
139 _cff += vaddvq(vecFF);
140 #endif
141 #ifdef TF
142 uint8x16_t vecTF = vecA & vmvnq(vecB);
143 vecTF = vldrbq_gather_offset_u8(hwLUT, vecTF);
144 _ctf += vaddvq(vecTF);
145 #endif
146 #ifdef FT
147 uint8x16_t vecFT = vmvnq(vecA) & vecB;
148 vecFT = vldrbq_gather_offset_u8(hwLUT, vecFT);
149 _cft += vaddvq(vecFT);
150 #endif
151
152 pA8 += 16;
153 pB8 += 16;
154 blkCnt--;
155
156 }
157
158 pA = (const uint32_t *)pA8;
159 pB = (const uint32_t *)pB8;
160
161 blkCnt = numberOfBools & 0x7F;
162 while(blkCnt >= 32)
163 {
164 a = *pA++;
165 b = *pB++;
166 shift = 0;
167 while(shift < 32)
168 {
169 ba = a & 1;
170 bb = b & 1;
171 a = a >> 1;
172 b = b >> 1;
173
174 #ifdef TT
175 _ctt += (ba && bb);
176 #endif
177 #ifdef FF
178 _cff += ((1 ^ ba) && (1 ^ bb));
179 #endif
180 #ifdef TF
181 _ctf += (ba && (1 ^ bb));
182 #endif
183 #ifdef FT
184 _cft += ((1 ^ ba) && bb);
185 #endif
186 shift ++;
187 }
188
189 blkCnt -= 32;
190 }
191
192 a = *pA++;
193 b = *pB++;
194
195 a = a >> (32 - blkCnt);
196 b = b >> (32 - blkCnt);
197
198 while(blkCnt > 0)
199 {
200 ba = a & 1;
201 bb = b & 1;
202 a = a >> 1;
203
204 b = b >> 1;
205 #ifdef TT
206 _ctt += (ba && bb);
207 #endif
208 #ifdef FF
209 _cff += ((1 ^ ba) && (1 ^ bb));
210 #endif
211 #ifdef TF
212 _ctf += (ba && (1 ^ bb));
213 #endif
214 #ifdef FT
215 _cft += ((1 ^ ba) && bb);
216 #endif
217 blkCnt --;
218 }
219
220 #ifdef TT
221 *cTT = _ctt;
222 #endif
223 #ifdef FF
224 *cFF = _cff;
225 #endif
226 #ifdef TF
227 *cTF = _ctf;
228 #endif
229 #ifdef FT
230 *cFT = _cft;
231 #endif
232 }
233
234 #else
235 #if defined(ARM_MATH_NEON)
236
237
FUNC(EXT)238 void FUNC(EXT)(const uint32_t *pA
239 , const uint32_t *pB
240 , uint32_t numberOfBools
241 #ifdef TT
242 , uint32_t *cTT
243 #endif
244 #ifdef FF
245 , uint32_t *cFF
246 #endif
247 #ifdef TF
248 , uint32_t *cTF
249 #endif
250 #ifdef FT
251 , uint32_t *cFT
252 #endif
253 )
254 {
255 #ifdef TT
256 uint32_t _ctt=0;
257 #endif
258 #ifdef FF
259 uint32_t _cff=0;
260 #endif
261 #ifdef TF
262 uint32_t _ctf=0;
263 #endif
264 #ifdef FT
265 uint32_t _cft=0;
266 #endif
267 uint32_t nbBoolBlock;
268 uint32_t a,b,ba,bb;
269 int shift;
270 uint32x4_t aV, bV;
271 #ifdef TT
272 uint32x4_t cttV;
273 #endif
274 #ifdef FF
275 uint32x4_t cffV;
276 #endif
277 #ifdef TF
278 uint32x4_t ctfV;
279 #endif
280 #ifdef FT
281 uint32x4_t cftV;
282 #endif
283 uint8x16_t tmp;
284 uint16x8_t tmp2;
285 uint32x4_t tmp3;
286 uint64x2_t tmp4;
287 #ifdef TT
288 uint64x2_t tmp4tt;
289 #endif
290 #ifdef FF
291 uint64x2_t tmp4ff;
292 #endif
293 #ifdef TF
294 uint64x2_t tmp4tf;
295 #endif
296 #ifdef FT
297 uint64x2_t tmp4ft;
298 #endif
299
300 #ifdef TT
301 tmp4tt = vdupq_n_u64(0);
302 #endif
303 #ifdef FF
304 tmp4ff = vdupq_n_u64(0);
305 #endif
306 #ifdef TF
307 tmp4tf = vdupq_n_u64(0);
308 #endif
309 #ifdef FT
310 tmp4ft = vdupq_n_u64(0);
311 #endif
312
313 nbBoolBlock = numberOfBools >> 7;
314 while(nbBoolBlock > 0)
315 {
316 aV = vld1q_u32(pA);
317 bV = vld1q_u32(pB);
318 pA += 4;
319 pB += 4;
320
321 #ifdef TT
322 cttV = vandq_u32(aV,bV);
323 #endif
324 #ifdef FF
325 cffV = vandq_u32(vmvnq_u32(aV),vmvnq_u32(bV));
326 #endif
327 #ifdef TF
328 ctfV = vandq_u32(aV,vmvnq_u32(bV));
329 #endif
330 #ifdef FT
331 cftV = vandq_u32(vmvnq_u32(aV),bV);
332 #endif
333
334 #ifdef TT
335 tmp = vcntq_u8(vreinterpretq_u8_u32(cttV));
336 tmp2 = vpaddlq_u8(tmp);
337 tmp3 = vpaddlq_u16(tmp2);
338 tmp4 = vpaddlq_u32(tmp3);
339 tmp4tt = vaddq_u64(tmp4tt, tmp4);
340 #endif
341
342 #ifdef FF
343 tmp = vcntq_u8(vreinterpretq_u8_u32(cffV));
344 tmp2 = vpaddlq_u8(tmp);
345 tmp3 = vpaddlq_u16(tmp2);
346 tmp4 = vpaddlq_u32(tmp3);
347 tmp4ff = vaddq_u64(tmp4ff, tmp4);
348 #endif
349
350 #ifdef TF
351 tmp = vcntq_u8(vreinterpretq_u8_u32(ctfV));
352 tmp2 = vpaddlq_u8(tmp);
353 tmp3 = vpaddlq_u16(tmp2);
354 tmp4 = vpaddlq_u32(tmp3);
355 tmp4tf = vaddq_u64(tmp4tf, tmp4);
356 #endif
357
358 #ifdef FT
359 tmp = vcntq_u8(vreinterpretq_u8_u32(cftV));
360 tmp2 = vpaddlq_u8(tmp);
361 tmp3 = vpaddlq_u16(tmp2);
362 tmp4 = vpaddlq_u32(tmp3);
363 tmp4ft = vaddq_u64(tmp4ft, tmp4);
364 #endif
365
366
367 nbBoolBlock --;
368 }
369
370 #ifdef TT
371 _ctt += vgetq_lane_u64(tmp4tt, 0) + vgetq_lane_u64(tmp4tt, 1);
372 #endif
373 #ifdef FF
374 _cff +=vgetq_lane_u64(tmp4ff, 0) + vgetq_lane_u64(tmp4ff, 1);
375 #endif
376 #ifdef TF
377 _ctf += vgetq_lane_u64(tmp4tf, 0) + vgetq_lane_u64(tmp4tf, 1);
378 #endif
379 #ifdef FT
380 _cft += vgetq_lane_u64(tmp4ft, 0) + vgetq_lane_u64(tmp4ft, 1);
381 #endif
382
383 nbBoolBlock = numberOfBools & 0x7F;
384 while(nbBoolBlock >= 32)
385 {
386 a = *pA++;
387 b = *pB++;
388 shift = 0;
389 while(shift < 32)
390 {
391 ba = a & 1;
392 bb = b & 1;
393 a = a >> 1;
394 b = b >> 1;
395
396 #ifdef TT
397 _ctt += (ba && bb);
398 #endif
399 #ifdef FF
400 _cff += ((1 ^ ba) && (1 ^ bb));
401 #endif
402 #ifdef TF
403 _ctf += (ba && (1 ^ bb));
404 #endif
405 #ifdef FT
406 _cft += ((1 ^ ba) && bb);
407 #endif
408 shift ++;
409 }
410
411 nbBoolBlock -= 32;
412 }
413
414 a = *pA++;
415 b = *pB++;
416
417 a = a >> (32 - nbBoolBlock);
418 b = b >> (32 - nbBoolBlock);
419
420 while(nbBoolBlock > 0)
421 {
422 ba = a & 1;
423 bb = b & 1;
424 a = a >> 1;
425
426 b = b >> 1;
427 #ifdef TT
428 _ctt += (ba && bb);
429 #endif
430 #ifdef FF
431 _cff += ((1 ^ ba) && (1 ^ bb));
432 #endif
433 #ifdef TF
434 _ctf += (ba && (1 ^ bb));
435 #endif
436 #ifdef FT
437 _cft += ((1 ^ ba) && bb);
438 #endif
439 nbBoolBlock --;
440 }
441
442 #ifdef TT
443 *cTT = _ctt;
444 #endif
445 #ifdef FF
446 *cFF = _cff;
447 #endif
448 #ifdef TF
449 *cTF = _ctf;
450 #endif
451 #ifdef FT
452 *cFT = _cft;
453 #endif
454 }
455
456 #else
457
FUNC(EXT)458 void FUNC(EXT)(const uint32_t *pA
459 , const uint32_t *pB
460 , uint32_t numberOfBools
461 #ifdef TT
462 , uint32_t *cTT
463 #endif
464 #ifdef FF
465 , uint32_t *cFF
466 #endif
467 #ifdef TF
468 , uint32_t *cTF
469 #endif
470 #ifdef FT
471 , uint32_t *cFT
472 #endif
473 )
474 {
475
476 #ifdef TT
477 uint32_t _ctt=0;
478 #endif
479 #ifdef FF
480 uint32_t _cff=0;
481 #endif
482 #ifdef TF
483 uint32_t _ctf=0;
484 #endif
485 #ifdef FT
486 uint32_t _cft=0;
487 #endif
488 uint32_t a,b,ba,bb;
489 int shift;
490
491 while(numberOfBools >= 32)
492 {
493 a = *pA++;
494 b = *pB++;
495 shift = 0;
496 while(shift < 32)
497 {
498 ba = a & 1;
499 bb = b & 1;
500 a = a >> 1;
501 b = b >> 1;
502 #ifdef TT
503 _ctt += (ba && bb);
504 #endif
505 #ifdef FF
506 _cff += ((1 ^ ba) && (1 ^ bb));
507 #endif
508 #ifdef TF
509 _ctf += (ba && (1 ^ bb));
510 #endif
511 #ifdef FT
512 _cft += ((1 ^ ba) && bb);
513 #endif
514 shift ++;
515 }
516
517 numberOfBools -= 32;
518 }
519
520 a = *pA++;
521 b = *pB++;
522
523 a = a >> (32 - numberOfBools);
524 b = b >> (32 - numberOfBools);
525
526 while(numberOfBools > 0)
527 {
528 ba = a & 1;
529 bb = b & 1;
530 a = a >> 1;
531 b = b >> 1;
532
533 #ifdef TT
534 _ctt += (ba && bb);
535 #endif
536 #ifdef FF
537 _cff += ((1 ^ ba) && (1 ^ bb));
538 #endif
539 #ifdef TF
540 _ctf += (ba && (1 ^ bb));
541 #endif
542 #ifdef FT
543 _cft += ((1 ^ ba) && bb);
544 #endif
545 numberOfBools --;
546 }
547
548 #ifdef TT
549 *cTT = _ctt;
550 #endif
551 #ifdef FF
552 *cFF = _cff;
553 #endif
554 #ifdef TF
555 *cTF = _ctf;
556 #endif
557 #ifdef FT
558 *cFT = _cft;
559 #endif
560 }
561 #endif
562 #endif /* defined(ARM_MATH_MVEI) */
563
564
565 /**
566 * @} end of DISTANCEF group
567 */
568