1
2 /* ----------------------------------------------------------------------
3 * Project: CMSIS DSP Library
4 * Title: arm_boolean_distance.c
5 * Description: Templates for boolean distances
6 *
7 * $Date: 23 April 2021
8 * $Revision: V1.9.0
9 *
10 * Target Processor: Cortex-M and Cortex-A cores
11 * -------------------------------------------------------------------- */
12 /*
13 * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
14 *
15 * SPDX-License-Identifier: Apache-2.0
16 *
17 * Licensed under the Apache License, Version 2.0 (the License); you may
18 * not use this file except in compliance with the License.
19 * You may obtain a copy of the License at
20 *
21 * www.apache.org/licenses/LICENSE-2.0
22 *
23 * Unless required by applicable law or agreed to in writing, software
24 * distributed under the License is distributed on an AS IS BASIS, WITHOUT
25 * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
26 * See the License for the specific language governing permissions and
27 * limitations under the License.
28 */
29
30
31
32
33 /**
34 * @defgroup DISTANCEF Distance Functions
35 *
36 * Computes Distances between vectors.
37 *
38 * Distance functions are useful in a lot of algorithms.
39 *
40 */
41
42
43 /**
44 * @addtogroup DISTANCEF
45 * @{
46 */
47
48
49
50
51 #define _FUNC(A,B) A##B
52
53 #define FUNC(EXT) _FUNC(arm_boolean_distance, EXT)
54
55 /**
56 * @brief Elements of boolean distances
57 *
58 * Different values which are used to compute boolean distances
59 *
60 * @param[in] pA First vector of packed booleans
61 * @param[in] pB Second vector of packed booleans
62 * @param[in] numberOfBools Number of booleans
63 *
64 */
65
66 #if defined(ARM_MATH_MVEI) && !defined(ARM_MATH_AUTOVECTORIZE)
67
68 #include "arm_common_tables.h"
69
FUNC(EXT)70 void FUNC(EXT)(const uint32_t *pA
71 , const uint32_t *pB
72 , uint32_t numberOfBools
73 #ifdef TT
74 , uint32_t *cTT
75 #endif
76 #ifdef FF
77 , uint32_t *cFF
78 #endif
79 #ifdef TF
80 , uint32_t *cTF
81 #endif
82 #ifdef FT
83 , uint32_t *cFT
84 #endif
85 )
86 {
87
88 #ifdef TT
89 uint32_t _ctt=0;
90 #endif
91 #ifdef FF
92 uint32_t _cff=0;
93 #endif
94 #ifdef TF
95 uint32_t _ctf=0;
96 #endif
97 #ifdef FT
98 uint32_t _cft=0;
99 #endif
100 uint32_t a, b, ba, bb;
101 int shift;
102 const uint8_t *pA8 = (const uint8_t *) pA;
103 const uint8_t *pB8 = (const uint8_t *) pB;
104
105 /* handle vector blocks */
106 uint32_t blkCnt = numberOfBools / 128;
107
108
109
110 while (blkCnt > 0U) {
111 uint8x16_t vecA = vld1q((const uint8_t *) pA8);
112 uint8x16_t vecB = vld1q((const uint8_t *) pB8);
113
114 #ifdef TT
115 uint8x16_t vecTT = vecA & vecB;
116 vecTT = vldrbq_gather_offset_u8(hwLUT, vecTT);
117 _ctt += vaddvq(vecTT);
118 #endif
119 #ifdef FF
120 uint8x16_t vecFF = vmvnq(vecA) & vmvnq(vecB);
121 vecFF = vldrbq_gather_offset_u8(hwLUT, vecFF);
122 _cff += vaddvq(vecFF);
123 #endif
124 #ifdef TF
125 uint8x16_t vecTF = vecA & vmvnq(vecB);
126 vecTF = vldrbq_gather_offset_u8(hwLUT, vecTF);
127 _ctf += vaddvq(vecTF);
128 #endif
129 #ifdef FT
130 uint8x16_t vecFT = vmvnq(vecA) & vecB;
131 vecFT = vldrbq_gather_offset_u8(hwLUT, vecFT);
132 _cft += vaddvq(vecFT);
133 #endif
134
135 pA8 += 16;
136 pB8 += 16;
137 blkCnt--;
138
139 }
140
141 pA = (const uint32_t *)pA8;
142 pB = (const uint32_t *)pB8;
143
144 blkCnt = numberOfBools & 0x7F;
145 while(blkCnt >= 32)
146 {
147 a = *pA++;
148 b = *pB++;
149 shift = 0;
150 while(shift < 32)
151 {
152 ba = a & 1;
153 bb = b & 1;
154 a = a >> 1;
155 b = b >> 1;
156
157 #ifdef TT
158 _ctt += (ba && bb);
159 #endif
160 #ifdef FF
161 _cff += ((1 ^ ba) && (1 ^ bb));
162 #endif
163 #ifdef TF
164 _ctf += (ba && (1 ^ bb));
165 #endif
166 #ifdef FT
167 _cft += ((1 ^ ba) && bb);
168 #endif
169 shift ++;
170 }
171
172 blkCnt -= 32;
173 }
174
175 a = *pA++;
176 b = *pB++;
177
178 a = a >> (32 - blkCnt);
179 b = b >> (32 - blkCnt);
180
181 while(blkCnt > 0)
182 {
183 ba = a & 1;
184 bb = b & 1;
185 a = a >> 1;
186
187 b = b >> 1;
188 #ifdef TT
189 _ctt += (ba && bb);
190 #endif
191 #ifdef FF
192 _cff += ((1 ^ ba) && (1 ^ bb));
193 #endif
194 #ifdef TF
195 _ctf += (ba && (1 ^ bb));
196 #endif
197 #ifdef FT
198 _cft += ((1 ^ ba) && bb);
199 #endif
200 blkCnt --;
201 }
202
203 #ifdef TT
204 *cTT = _ctt;
205 #endif
206 #ifdef FF
207 *cFF = _cff;
208 #endif
209 #ifdef TF
210 *cTF = _ctf;
211 #endif
212 #ifdef FT
213 *cFT = _cft;
214 #endif
215 }
216
217 #else
218 #if defined(ARM_MATH_NEON)
219
220
FUNC(EXT)221 void FUNC(EXT)(const uint32_t *pA
222 , const uint32_t *pB
223 , uint32_t numberOfBools
224 #ifdef TT
225 , uint32_t *cTT
226 #endif
227 #ifdef FF
228 , uint32_t *cFF
229 #endif
230 #ifdef TF
231 , uint32_t *cTF
232 #endif
233 #ifdef FT
234 , uint32_t *cFT
235 #endif
236 )
237 {
238 #ifdef TT
239 uint32_t _ctt=0;
240 #endif
241 #ifdef FF
242 uint32_t _cff=0;
243 #endif
244 #ifdef TF
245 uint32_t _ctf=0;
246 #endif
247 #ifdef FT
248 uint32_t _cft=0;
249 #endif
250 uint32_t nbBoolBlock;
251 uint32_t a,b,ba,bb;
252 int shift;
253 uint32x4_t aV, bV;
254 #ifdef TT
255 uint32x4_t cttV;
256 #endif
257 #ifdef FF
258 uint32x4_t cffV;
259 #endif
260 #ifdef TF
261 uint32x4_t ctfV;
262 #endif
263 #ifdef FT
264 uint32x4_t cftV;
265 #endif
266 uint8x16_t tmp;
267 uint16x8_t tmp2;
268 uint32x4_t tmp3;
269 uint64x2_t tmp4;
270 #ifdef TT
271 uint64x2_t tmp4tt;
272 #endif
273 #ifdef FF
274 uint64x2_t tmp4ff;
275 #endif
276 #ifdef TF
277 uint64x2_t tmp4tf;
278 #endif
279 #ifdef FT
280 uint64x2_t tmp4ft;
281 #endif
282
283 #ifdef TT
284 tmp4tt = vdupq_n_u64(0);
285 #endif
286 #ifdef FF
287 tmp4ff = vdupq_n_u64(0);
288 #endif
289 #ifdef TF
290 tmp4tf = vdupq_n_u64(0);
291 #endif
292 #ifdef FT
293 tmp4ft = vdupq_n_u64(0);
294 #endif
295
296 nbBoolBlock = numberOfBools >> 7;
297 while(nbBoolBlock > 0)
298 {
299 aV = vld1q_u32(pA);
300 bV = vld1q_u32(pB);
301 pA += 4;
302 pB += 4;
303
304 #ifdef TT
305 cttV = vandq_u32(aV,bV);
306 #endif
307 #ifdef FF
308 cffV = vandq_u32(vmvnq_u32(aV),vmvnq_u32(bV));
309 #endif
310 #ifdef TF
311 ctfV = vandq_u32(aV,vmvnq_u32(bV));
312 #endif
313 #ifdef FT
314 cftV = vandq_u32(vmvnq_u32(aV),bV);
315 #endif
316
317 #ifdef TT
318 tmp = vcntq_u8(vreinterpretq_u8_u32(cttV));
319 tmp2 = vpaddlq_u8(tmp);
320 tmp3 = vpaddlq_u16(tmp2);
321 tmp4 = vpaddlq_u32(tmp3);
322 tmp4tt = vaddq_u64(tmp4tt, tmp4);
323 #endif
324
325 #ifdef FF
326 tmp = vcntq_u8(vreinterpretq_u8_u32(cffV));
327 tmp2 = vpaddlq_u8(tmp);
328 tmp3 = vpaddlq_u16(tmp2);
329 tmp4 = vpaddlq_u32(tmp3);
330 tmp4ff = vaddq_u64(tmp4ff, tmp4);
331 #endif
332
333 #ifdef TF
334 tmp = vcntq_u8(vreinterpretq_u8_u32(ctfV));
335 tmp2 = vpaddlq_u8(tmp);
336 tmp3 = vpaddlq_u16(tmp2);
337 tmp4 = vpaddlq_u32(tmp3);
338 tmp4tf = vaddq_u64(tmp4tf, tmp4);
339 #endif
340
341 #ifdef FT
342 tmp = vcntq_u8(vreinterpretq_u8_u32(cftV));
343 tmp2 = vpaddlq_u8(tmp);
344 tmp3 = vpaddlq_u16(tmp2);
345 tmp4 = vpaddlq_u32(tmp3);
346 tmp4ft = vaddq_u64(tmp4ft, tmp4);
347 #endif
348
349
350 nbBoolBlock --;
351 }
352
353 #ifdef TT
354 _ctt += vgetq_lane_u64(tmp4tt, 0) + vgetq_lane_u64(tmp4tt, 1);
355 #endif
356 #ifdef FF
357 _cff +=vgetq_lane_u64(tmp4ff, 0) + vgetq_lane_u64(tmp4ff, 1);
358 #endif
359 #ifdef TF
360 _ctf += vgetq_lane_u64(tmp4tf, 0) + vgetq_lane_u64(tmp4tf, 1);
361 #endif
362 #ifdef FT
363 _cft += vgetq_lane_u64(tmp4ft, 0) + vgetq_lane_u64(tmp4ft, 1);
364 #endif
365
366 nbBoolBlock = numberOfBools & 0x7F;
367 while(nbBoolBlock >= 32)
368 {
369 a = *pA++;
370 b = *pB++;
371 shift = 0;
372 while(shift < 32)
373 {
374 ba = a & 1;
375 bb = b & 1;
376 a = a >> 1;
377 b = b >> 1;
378
379 #ifdef TT
380 _ctt += (ba && bb);
381 #endif
382 #ifdef FF
383 _cff += ((1 ^ ba) && (1 ^ bb));
384 #endif
385 #ifdef TF
386 _ctf += (ba && (1 ^ bb));
387 #endif
388 #ifdef FT
389 _cft += ((1 ^ ba) && bb);
390 #endif
391 shift ++;
392 }
393
394 nbBoolBlock -= 32;
395 }
396
397 a = *pA++;
398 b = *pB++;
399
400 a = a >> (32 - nbBoolBlock);
401 b = b >> (32 - nbBoolBlock);
402
403 while(nbBoolBlock > 0)
404 {
405 ba = a & 1;
406 bb = b & 1;
407 a = a >> 1;
408
409 b = b >> 1;
410 #ifdef TT
411 _ctt += (ba && bb);
412 #endif
413 #ifdef FF
414 _cff += ((1 ^ ba) && (1 ^ bb));
415 #endif
416 #ifdef TF
417 _ctf += (ba && (1 ^ bb));
418 #endif
419 #ifdef FT
420 _cft += ((1 ^ ba) && bb);
421 #endif
422 nbBoolBlock --;
423 }
424
425 #ifdef TT
426 *cTT = _ctt;
427 #endif
428 #ifdef FF
429 *cFF = _cff;
430 #endif
431 #ifdef TF
432 *cTF = _ctf;
433 #endif
434 #ifdef FT
435 *cFT = _cft;
436 #endif
437 }
438
439 #else
440
FUNC(EXT)441 void FUNC(EXT)(const uint32_t *pA
442 , const uint32_t *pB
443 , uint32_t numberOfBools
444 #ifdef TT
445 , uint32_t *cTT
446 #endif
447 #ifdef FF
448 , uint32_t *cFF
449 #endif
450 #ifdef TF
451 , uint32_t *cTF
452 #endif
453 #ifdef FT
454 , uint32_t *cFT
455 #endif
456 )
457 {
458
459 #ifdef TT
460 uint32_t _ctt=0;
461 #endif
462 #ifdef FF
463 uint32_t _cff=0;
464 #endif
465 #ifdef TF
466 uint32_t _ctf=0;
467 #endif
468 #ifdef FT
469 uint32_t _cft=0;
470 #endif
471 uint32_t a,b,ba,bb;
472 int shift;
473
474 while(numberOfBools >= 32)
475 {
476 a = *pA++;
477 b = *pB++;
478 shift = 0;
479 while(shift < 32)
480 {
481 ba = a & 1;
482 bb = b & 1;
483 a = a >> 1;
484 b = b >> 1;
485 #ifdef TT
486 _ctt += (ba && bb);
487 #endif
488 #ifdef FF
489 _cff += ((1 ^ ba) && (1 ^ bb));
490 #endif
491 #ifdef TF
492 _ctf += (ba && (1 ^ bb));
493 #endif
494 #ifdef FT
495 _cft += ((1 ^ ba) && bb);
496 #endif
497 shift ++;
498 }
499
500 numberOfBools -= 32;
501 }
502
503 a = *pA++;
504 b = *pB++;
505
506 a = a >> (32 - numberOfBools);
507 b = b >> (32 - numberOfBools);
508
509 while(numberOfBools > 0)
510 {
511 ba = a & 1;
512 bb = b & 1;
513 a = a >> 1;
514 b = b >> 1;
515
516 #ifdef TT
517 _ctt += (ba && bb);
518 #endif
519 #ifdef FF
520 _cff += ((1 ^ ba) && (1 ^ bb));
521 #endif
522 #ifdef TF
523 _ctf += (ba && (1 ^ bb));
524 #endif
525 #ifdef FT
526 _cft += ((1 ^ ba) && bb);
527 #endif
528 numberOfBools --;
529 }
530
531 #ifdef TT
532 *cTT = _ctt;
533 #endif
534 #ifdef FF
535 *cFF = _cff;
536 #endif
537 #ifdef TF
538 *cTF = _ctf;
539 #endif
540 #ifdef FT
541 *cFT = _cft;
542 #endif
543 }
544 #endif
545 #endif /* defined(ARM_MATH_MVEI) */
546
547
548 /**
549 * @} end of DISTANCEF group
550 */
551