1
2 /* ----------------------------------------------------------------------
3 * Project: CMSIS DSP Library
4 * Title: arm_boolean_distance.c
5 * Description: Templates for boolean distances
6 *
7 * $Date: 23 April 2021
8 * $Revision: V1.9.0
9 *
10 * Target Processor: Cortex-M and Cortex-A cores
11 * -------------------------------------------------------------------- */
12 /*
13 * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
14 *
15 * SPDX-License-Identifier: Apache-2.0
16 *
17 * Licensed under the Apache License, Version 2.0 (the License); you may
18 * not use this file except in compliance with the License.
19 * You may obtain a copy of the License at
20 *
21 * www.apache.org/licenses/LICENSE-2.0
22 *
23 * Unless required by applicable law or agreed to in writing, software
24 * distributed under the License is distributed on an AS IS BASIS, WITHOUT
25 * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
26 * See the License for the specific language governing permissions and
27 * limitations under the License.
28 */
29
30
31
32
33 /**
34 * @defgroup DISTANCEF Distance Functions
35 *
36 * Computes Distances between vectors.
37 *
38 * Distance functions are useful in a lot of algorithms.
39 *
40 */
41
42
43 /**
44 * @addtogroup DISTANCEF
45 * @{
46 */
47
48
49
50
51 #define _FUNC(A,B) A##B
52
53 #define FUNC(EXT) _FUNC(arm_boolean_distance, EXT)
54
55 /**
56 * @brief Elements of boolean distances
57 *
58 * Different values which are used to compute boolean distances
59 *
60 * @param[in] pA First vector of packed booleans
61 * @param[in] pB Second vector of packed booleans
62 * @param[in] numberOfBools Number of booleans
63 * @return None
64 *
65 */
66
67 #if defined(ARM_MATH_MVEI) && !defined(ARM_MATH_AUTOVECTORIZE)
68
69 #include "arm_common_tables.h"
70
FUNC(EXT)71 void FUNC(EXT)(const uint32_t *pA
72 , const uint32_t *pB
73 , uint32_t numberOfBools
74 #ifdef TT
75 , uint32_t *cTT
76 #endif
77 #ifdef FF
78 , uint32_t *cFF
79 #endif
80 #ifdef TF
81 , uint32_t *cTF
82 #endif
83 #ifdef FT
84 , uint32_t *cFT
85 #endif
86 )
87 {
88
89 #ifdef TT
90 uint32_t _ctt=0;
91 #endif
92 #ifdef FF
93 uint32_t _cff=0;
94 #endif
95 #ifdef TF
96 uint32_t _ctf=0;
97 #endif
98 #ifdef FT
99 uint32_t _cft=0;
100 #endif
101 uint32_t a, b, ba, bb;
102 int shift;
103 const uint8_t *pA8 = (const uint8_t *) pA;
104 const uint8_t *pB8 = (const uint8_t *) pB;
105
106 /* handle vector blocks */
107 uint32_t blkCnt = numberOfBools / 128;
108
109
110
111 while (blkCnt > 0U) {
112 uint8x16_t vecA = vld1q((const uint8_t *) pA8);
113 uint8x16_t vecB = vld1q((const uint8_t *) pB8);
114
115 #ifdef TT
116 uint8x16_t vecTT = vecA & vecB;
117 vecTT = vldrbq_gather_offset_u8(hwLUT, vecTT);
118 _ctt += vaddvq(vecTT);
119 #endif
120 #ifdef FF
121 uint8x16_t vecFF = vmvnq(vecA) & vmvnq(vecB);
122 vecFF = vldrbq_gather_offset_u8(hwLUT, vecFF);
123 _cff += vaddvq(vecFF);
124 #endif
125 #ifdef TF
126 uint8x16_t vecTF = vecA & vmvnq(vecB);
127 vecTF = vldrbq_gather_offset_u8(hwLUT, vecTF);
128 _ctf += vaddvq(vecTF);
129 #endif
130 #ifdef FT
131 uint8x16_t vecFT = vmvnq(vecA) & vecB;
132 vecFT = vldrbq_gather_offset_u8(hwLUT, vecFT);
133 _cft += vaddvq(vecFT);
134 #endif
135
136 pA8 += 16;
137 pB8 += 16;
138 blkCnt--;
139
140 }
141
142 pA = (const uint32_t *)pA8;
143 pB = (const uint32_t *)pB8;
144
145 blkCnt = numberOfBools & 0x7F;
146 while(blkCnt >= 32)
147 {
148 a = *pA++;
149 b = *pB++;
150 shift = 0;
151 while(shift < 32)
152 {
153 ba = a & 1;
154 bb = b & 1;
155 a = a >> 1;
156 b = b >> 1;
157
158 #ifdef TT
159 _ctt += (ba && bb);
160 #endif
161 #ifdef FF
162 _cff += ((1 ^ ba) && (1 ^ bb));
163 #endif
164 #ifdef TF
165 _ctf += (ba && (1 ^ bb));
166 #endif
167 #ifdef FT
168 _cft += ((1 ^ ba) && bb);
169 #endif
170 shift ++;
171 }
172
173 blkCnt -= 32;
174 }
175
176 a = *pA++;
177 b = *pB++;
178
179 a = a >> (32 - blkCnt);
180 b = b >> (32 - blkCnt);
181
182 while(blkCnt > 0)
183 {
184 ba = a & 1;
185 bb = b & 1;
186 a = a >> 1;
187
188 b = b >> 1;
189 #ifdef TT
190 _ctt += (ba && bb);
191 #endif
192 #ifdef FF
193 _cff += ((1 ^ ba) && (1 ^ bb));
194 #endif
195 #ifdef TF
196 _ctf += (ba && (1 ^ bb));
197 #endif
198 #ifdef FT
199 _cft += ((1 ^ ba) && bb);
200 #endif
201 blkCnt --;
202 }
203
204 #ifdef TT
205 *cTT = _ctt;
206 #endif
207 #ifdef FF
208 *cFF = _cff;
209 #endif
210 #ifdef TF
211 *cTF = _ctf;
212 #endif
213 #ifdef FT
214 *cFT = _cft;
215 #endif
216 }
217
218 #else
219 #if defined(ARM_MATH_NEON)
220
221
FUNC(EXT)222 void FUNC(EXT)(const uint32_t *pA
223 , const uint32_t *pB
224 , uint32_t numberOfBools
225 #ifdef TT
226 , uint32_t *cTT
227 #endif
228 #ifdef FF
229 , uint32_t *cFF
230 #endif
231 #ifdef TF
232 , uint32_t *cTF
233 #endif
234 #ifdef FT
235 , uint32_t *cFT
236 #endif
237 )
238 {
239 #ifdef TT
240 uint32_t _ctt=0;
241 #endif
242 #ifdef FF
243 uint32_t _cff=0;
244 #endif
245 #ifdef TF
246 uint32_t _ctf=0;
247 #endif
248 #ifdef FT
249 uint32_t _cft=0;
250 #endif
251 uint32_t nbBoolBlock;
252 uint32_t a,b,ba,bb;
253 int shift;
254 uint32x4_t aV, bV;
255 #ifdef TT
256 uint32x4_t cttV;
257 #endif
258 #ifdef FF
259 uint32x4_t cffV;
260 #endif
261 #ifdef TF
262 uint32x4_t ctfV;
263 #endif
264 #ifdef FT
265 uint32x4_t cftV;
266 #endif
267 uint8x16_t tmp;
268 uint16x8_t tmp2;
269 uint32x4_t tmp3;
270 uint64x2_t tmp4;
271 #ifdef TT
272 uint64x2_t tmp4tt;
273 #endif
274 #ifdef FF
275 uint64x2_t tmp4ff;
276 #endif
277 #ifdef TF
278 uint64x2_t tmp4tf;
279 #endif
280 #ifdef FT
281 uint64x2_t tmp4ft;
282 #endif
283
284 #ifdef TT
285 tmp4tt = vdupq_n_u64(0);
286 #endif
287 #ifdef FF
288 tmp4ff = vdupq_n_u64(0);
289 #endif
290 #ifdef TF
291 tmp4tf = vdupq_n_u64(0);
292 #endif
293 #ifdef FT
294 tmp4ft = vdupq_n_u64(0);
295 #endif
296
297 nbBoolBlock = numberOfBools >> 7;
298 while(nbBoolBlock > 0)
299 {
300 aV = vld1q_u32(pA);
301 bV = vld1q_u32(pB);
302 pA += 4;
303 pB += 4;
304
305 #ifdef TT
306 cttV = vandq_u32(aV,bV);
307 #endif
308 #ifdef FF
309 cffV = vandq_u32(vmvnq_u32(aV),vmvnq_u32(bV));
310 #endif
311 #ifdef TF
312 ctfV = vandq_u32(aV,vmvnq_u32(bV));
313 #endif
314 #ifdef FT
315 cftV = vandq_u32(vmvnq_u32(aV),bV);
316 #endif
317
318 #ifdef TT
319 tmp = vcntq_u8(vreinterpretq_u8_u32(cttV));
320 tmp2 = vpaddlq_u8(tmp);
321 tmp3 = vpaddlq_u16(tmp2);
322 tmp4 = vpaddlq_u32(tmp3);
323 tmp4tt = vaddq_u64(tmp4tt, tmp4);
324 #endif
325
326 #ifdef FF
327 tmp = vcntq_u8(vreinterpretq_u8_u32(cffV));
328 tmp2 = vpaddlq_u8(tmp);
329 tmp3 = vpaddlq_u16(tmp2);
330 tmp4 = vpaddlq_u32(tmp3);
331 tmp4ff = vaddq_u64(tmp4ff, tmp4);
332 #endif
333
334 #ifdef TF
335 tmp = vcntq_u8(vreinterpretq_u8_u32(ctfV));
336 tmp2 = vpaddlq_u8(tmp);
337 tmp3 = vpaddlq_u16(tmp2);
338 tmp4 = vpaddlq_u32(tmp3);
339 tmp4tf = vaddq_u64(tmp4tf, tmp4);
340 #endif
341
342 #ifdef FT
343 tmp = vcntq_u8(vreinterpretq_u8_u32(cftV));
344 tmp2 = vpaddlq_u8(tmp);
345 tmp3 = vpaddlq_u16(tmp2);
346 tmp4 = vpaddlq_u32(tmp3);
347 tmp4ft = vaddq_u64(tmp4ft, tmp4);
348 #endif
349
350
351 nbBoolBlock --;
352 }
353
354 #ifdef TT
355 _ctt += vgetq_lane_u64(tmp4tt, 0) + vgetq_lane_u64(tmp4tt, 1);
356 #endif
357 #ifdef FF
358 _cff +=vgetq_lane_u64(tmp4ff, 0) + vgetq_lane_u64(tmp4ff, 1);
359 #endif
360 #ifdef TF
361 _ctf += vgetq_lane_u64(tmp4tf, 0) + vgetq_lane_u64(tmp4tf, 1);
362 #endif
363 #ifdef FT
364 _cft += vgetq_lane_u64(tmp4ft, 0) + vgetq_lane_u64(tmp4ft, 1);
365 #endif
366
367 nbBoolBlock = numberOfBools & 0x7F;
368 while(nbBoolBlock >= 32)
369 {
370 a = *pA++;
371 b = *pB++;
372 shift = 0;
373 while(shift < 32)
374 {
375 ba = a & 1;
376 bb = b & 1;
377 a = a >> 1;
378 b = b >> 1;
379
380 #ifdef TT
381 _ctt += (ba && bb);
382 #endif
383 #ifdef FF
384 _cff += ((1 ^ ba) && (1 ^ bb));
385 #endif
386 #ifdef TF
387 _ctf += (ba && (1 ^ bb));
388 #endif
389 #ifdef FT
390 _cft += ((1 ^ ba) && bb);
391 #endif
392 shift ++;
393 }
394
395 nbBoolBlock -= 32;
396 }
397
398 a = *pA++;
399 b = *pB++;
400
401 a = a >> (32 - nbBoolBlock);
402 b = b >> (32 - nbBoolBlock);
403
404 while(nbBoolBlock > 0)
405 {
406 ba = a & 1;
407 bb = b & 1;
408 a = a >> 1;
409
410 b = b >> 1;
411 #ifdef TT
412 _ctt += (ba && bb);
413 #endif
414 #ifdef FF
415 _cff += ((1 ^ ba) && (1 ^ bb));
416 #endif
417 #ifdef TF
418 _ctf += (ba && (1 ^ bb));
419 #endif
420 #ifdef FT
421 _cft += ((1 ^ ba) && bb);
422 #endif
423 nbBoolBlock --;
424 }
425
426 #ifdef TT
427 *cTT = _ctt;
428 #endif
429 #ifdef FF
430 *cFF = _cff;
431 #endif
432 #ifdef TF
433 *cTF = _ctf;
434 #endif
435 #ifdef FT
436 *cFT = _cft;
437 #endif
438 }
439
440 #else
441
FUNC(EXT)442 void FUNC(EXT)(const uint32_t *pA
443 , const uint32_t *pB
444 , uint32_t numberOfBools
445 #ifdef TT
446 , uint32_t *cTT
447 #endif
448 #ifdef FF
449 , uint32_t *cFF
450 #endif
451 #ifdef TF
452 , uint32_t *cTF
453 #endif
454 #ifdef FT
455 , uint32_t *cFT
456 #endif
457 )
458 {
459
460 #ifdef TT
461 uint32_t _ctt=0;
462 #endif
463 #ifdef FF
464 uint32_t _cff=0;
465 #endif
466 #ifdef TF
467 uint32_t _ctf=0;
468 #endif
469 #ifdef FT
470 uint32_t _cft=0;
471 #endif
472 uint32_t a,b,ba,bb;
473 int shift;
474
475 while(numberOfBools >= 32)
476 {
477 a = *pA++;
478 b = *pB++;
479 shift = 0;
480 while(shift < 32)
481 {
482 ba = a & 1;
483 bb = b & 1;
484 a = a >> 1;
485 b = b >> 1;
486 #ifdef TT
487 _ctt += (ba && bb);
488 #endif
489 #ifdef FF
490 _cff += ((1 ^ ba) && (1 ^ bb));
491 #endif
492 #ifdef TF
493 _ctf += (ba && (1 ^ bb));
494 #endif
495 #ifdef FT
496 _cft += ((1 ^ ba) && bb);
497 #endif
498 shift ++;
499 }
500
501 numberOfBools -= 32;
502 }
503
504 a = *pA++;
505 b = *pB++;
506
507 a = a >> (32 - numberOfBools);
508 b = b >> (32 - numberOfBools);
509
510 while(numberOfBools > 0)
511 {
512 ba = a & 1;
513 bb = b & 1;
514 a = a >> 1;
515 b = b >> 1;
516
517 #ifdef TT
518 _ctt += (ba && bb);
519 #endif
520 #ifdef FF
521 _cff += ((1 ^ ba) && (1 ^ bb));
522 #endif
523 #ifdef TF
524 _ctf += (ba && (1 ^ bb));
525 #endif
526 #ifdef FT
527 _cft += ((1 ^ ba) && bb);
528 #endif
529 numberOfBools --;
530 }
531
532 #ifdef TT
533 *cTT = _ctt;
534 #endif
535 #ifdef FF
536 *cFF = _cff;
537 #endif
538 #ifdef TF
539 *cTF = _ctf;
540 #endif
541 #ifdef FT
542 *cFT = _cft;
543 #endif
544 }
545 #endif
546 #endif /* defined(ARM_MATH_MVEI) */
547
548
549 /**
550 * @} end of DISTANCEF group
551 */
552