1 
2 /* ----------------------------------------------------------------------
3  * Project:      CMSIS DSP Library
4  * Title:        arm_boolean_distance.c
5  * Description:  Templates for boolean distances
6  *
7  * $Date:        23 April 2021
8  * $Revision:    V1.9.0
9  *
10  * Target Processor: Cortex-M and Cortex-A cores
11  * -------------------------------------------------------------------- */
12 /*
13  * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
14  *
15  * SPDX-License-Identifier: Apache-2.0
16  *
17  * Licensed under the Apache License, Version 2.0 (the License); you may
18  * not use this file except in compliance with the License.
19  * You may obtain a copy of the License at
20  *
21  * www.apache.org/licenses/LICENSE-2.0
22  *
23  * Unless required by applicable law or agreed to in writing, software
24  * distributed under the License is distributed on an AS IS BASIS, WITHOUT
25  * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
26  * See the License for the specific language governing permissions and
27  * limitations under the License.
28  */
29 
30 
31 
32 
33 /**
34  * @defgroup DISTANCEF Distance Functions
35  *
36  * Computes Distances between vectors.
37  *
38  * Distance functions are useful in a lot of algorithms.
39  *
40  */
41 
42 
43 /**
44  * @addtogroup DISTANCEF
45  * @{
46  */
47 
48 
49 
50 
51 #define _FUNC(A,B) A##B
52 
53 #define FUNC(EXT) _FUNC(arm_boolean_distance, EXT)
54 
55 extern void FUNC(EXT)(const uint32_t *pA
56        , const uint32_t *pB
57        , uint32_t numberOfBools
58 #ifdef TT
59        , uint32_t *cTT
60 #endif
61 #ifdef FF
62        , uint32_t *cFF
63 #endif
64 #ifdef TF
65        , uint32_t *cTF
66 #endif
67 #ifdef FT
68        , uint32_t *cFT
69 #endif
70        );
71 
72 /**
73  * @brief        Elements of boolean distances
74  *
75  * Different values which are used to compute boolean distances
76  *
77  * @param[in]    pA              First vector of packed booleans
78  * @param[in]    pB              Second vector of packed booleans
79  * @param[in]    numberOfBools   Number of booleans
80  *
81  */
82 
83 #if defined(ARM_MATH_MVEI) && !defined(ARM_MATH_AUTOVECTORIZE)
84 
85 #include "arm_common_tables.h"
86 
FUNC(EXT)87 void FUNC(EXT)(const uint32_t *pA
88        , const uint32_t *pB
89        , uint32_t numberOfBools
90 #ifdef TT
91        , uint32_t *cTT
92 #endif
93 #ifdef FF
94        , uint32_t *cFF
95 #endif
96 #ifdef TF
97        , uint32_t *cTF
98 #endif
99 #ifdef FT
100        , uint32_t *cFT
101 #endif
102        )
103 {
104 
105 #ifdef TT
106     uint32_t _ctt=0;
107 #endif
108 #ifdef FF
109     uint32_t _cff=0;
110 #endif
111 #ifdef TF
112     uint32_t _ctf=0;
113 #endif
114 #ifdef FT
115     uint32_t _cft=0;
116 #endif
117     uint32_t        a, b, ba, bb;
118     int shift;
119     const uint8_t  *pA8 = (const uint8_t *) pA;
120     const uint8_t  *pB8 = (const uint8_t *) pB;
121 
122     /* handle vector blocks */
123     uint32_t         blkCnt = numberOfBools / 128;
124 
125 
126 
127     while (blkCnt > 0U) {
128         uint8x16_t      vecA = vld1q((const uint8_t *) pA8);
129         uint8x16_t      vecB = vld1q((const uint8_t *) pB8);
130 
131 #ifdef TT
132         uint8x16_t      vecTT = vecA & vecB;
133         vecTT = vldrbq_gather_offset_u8(hwLUT, vecTT);
134         _ctt += vaddvq(vecTT);
135 #endif
136 #ifdef FF
137         uint8x16_t      vecFF = vmvnq(vecA) & vmvnq(vecB);
138         vecFF = vldrbq_gather_offset_u8(hwLUT, vecFF);
139         _cff += vaddvq(vecFF);
140 #endif
141 #ifdef TF
142         uint8x16_t      vecTF = vecA & vmvnq(vecB);
143         vecTF = vldrbq_gather_offset_u8(hwLUT, vecTF);
144         _ctf += vaddvq(vecTF);
145 #endif
146 #ifdef FT
147         uint8x16_t      vecFT = vmvnq(vecA) & vecB;
148         vecFT = vldrbq_gather_offset_u8(hwLUT, vecFT);
149         _cft += vaddvq(vecFT);
150 #endif
151 
152         pA8 += 16;
153         pB8 += 16;
154         blkCnt--;
155 
156     }
157 
158     pA = (const uint32_t *)pA8;
159     pB = (const uint32_t *)pB8;
160 
161     blkCnt = numberOfBools & 0x7F;
162     while(blkCnt >= 32)
163     {
164        a = *pA++;
165        b = *pB++;
166        shift = 0;
167        while(shift < 32)
168        {
169           ba = a & 1;
170           bb = b & 1;
171           a = a >> 1;
172           b = b >> 1;
173 
174 #ifdef TT
175           _ctt += (ba && bb);
176 #endif
177 #ifdef FF
178           _cff += ((1 ^ ba) && (1 ^ bb));
179 #endif
180 #ifdef TF
181           _ctf += (ba && (1 ^ bb));
182 #endif
183 #ifdef FT
184           _cft += ((1 ^ ba) && bb);
185 #endif
186           shift ++;
187        }
188 
189        blkCnt -= 32;
190     }
191 
192     a = *pA++;
193     b = *pB++;
194 
195     a = a >> (32 - blkCnt);
196     b = b >> (32 - blkCnt);
197 
198     while(blkCnt > 0)
199     {
200           ba = a & 1;
201           bb = b & 1;
202           a = a >> 1;
203 
204           b = b >> 1;
205 #ifdef TT
206           _ctt += (ba && bb);
207 #endif
208 #ifdef FF
209           _cff += ((1 ^ ba) && (1 ^ bb));
210 #endif
211 #ifdef TF
212           _ctf += (ba && (1 ^ bb));
213 #endif
214 #ifdef FT
215           _cft += ((1 ^ ba) && bb);
216 #endif
217           blkCnt --;
218     }
219 
220 #ifdef TT
221     *cTT = _ctt;
222 #endif
223 #ifdef FF
224     *cFF = _cff;
225 #endif
226 #ifdef TF
227     *cTF = _ctf;
228 #endif
229 #ifdef FT
230     *cFT = _cft;
231 #endif
232 }
233 
234 #else
235 #if defined(ARM_MATH_NEON)
236 
237 
FUNC(EXT)238 void FUNC(EXT)(const uint32_t *pA
239        , const uint32_t *pB
240        , uint32_t numberOfBools
241 #ifdef TT
242        , uint32_t *cTT
243 #endif
244 #ifdef FF
245        , uint32_t *cFF
246 #endif
247 #ifdef TF
248        , uint32_t *cTF
249 #endif
250 #ifdef FT
251        , uint32_t *cFT
252 #endif
253        )
254 {
255 #ifdef TT
256     uint32_t _ctt=0;
257 #endif
258 #ifdef FF
259     uint32_t _cff=0;
260 #endif
261 #ifdef TF
262     uint32_t _ctf=0;
263 #endif
264 #ifdef FT
265     uint32_t _cft=0;
266 #endif
267     uint32_t nbBoolBlock;
268     uint32_t a,b,ba,bb;
269     int shift;
270     uint32x4_t aV, bV;
271 #ifdef TT
272     uint32x4_t cttV;
273 #endif
274 #ifdef FF
275     uint32x4_t cffV;
276 #endif
277 #ifdef TF
278     uint32x4_t ctfV;
279 #endif
280 #ifdef FT
281     uint32x4_t cftV;
282 #endif
283     uint8x16_t tmp;
284     uint16x8_t tmp2;
285     uint32x4_t tmp3;
286     uint64x2_t tmp4;
287 #ifdef TT
288     uint64x2_t tmp4tt;
289 #endif
290 #ifdef FF
291     uint64x2_t tmp4ff;
292 #endif
293 #ifdef TF
294     uint64x2_t tmp4tf;
295 #endif
296 #ifdef FT
297     uint64x2_t tmp4ft;
298 #endif
299 
300 #ifdef TT
301     tmp4tt = vdupq_n_u64(0);
302 #endif
303 #ifdef FF
304     tmp4ff = vdupq_n_u64(0);
305 #endif
306 #ifdef TF
307     tmp4tf = vdupq_n_u64(0);
308 #endif
309 #ifdef FT
310     tmp4ft = vdupq_n_u64(0);
311 #endif
312 
313     nbBoolBlock = numberOfBools >> 7;
314     while(nbBoolBlock > 0)
315     {
316        aV = vld1q_u32(pA);
317        bV = vld1q_u32(pB);
318        pA += 4;
319        pB += 4;
320 
321 #ifdef TT
322        cttV = vandq_u32(aV,bV);
323 #endif
324 #ifdef FF
325        cffV = vandq_u32(vmvnq_u32(aV),vmvnq_u32(bV));
326 #endif
327 #ifdef TF
328        ctfV = vandq_u32(aV,vmvnq_u32(bV));
329 #endif
330 #ifdef FT
331        cftV = vandq_u32(vmvnq_u32(aV),bV);
332 #endif
333 
334 #ifdef TT
335        tmp = vcntq_u8(vreinterpretq_u8_u32(cttV));
336        tmp2 = vpaddlq_u8(tmp);
337        tmp3 = vpaddlq_u16(tmp2);
338        tmp4 = vpaddlq_u32(tmp3);
339        tmp4tt = vaddq_u64(tmp4tt, tmp4);
340 #endif
341 
342 #ifdef FF
343        tmp = vcntq_u8(vreinterpretq_u8_u32(cffV));
344        tmp2 = vpaddlq_u8(tmp);
345        tmp3 = vpaddlq_u16(tmp2);
346        tmp4 = vpaddlq_u32(tmp3);
347        tmp4ff = vaddq_u64(tmp4ff, tmp4);
348 #endif
349 
350 #ifdef TF
351        tmp = vcntq_u8(vreinterpretq_u8_u32(ctfV));
352        tmp2 = vpaddlq_u8(tmp);
353        tmp3 = vpaddlq_u16(tmp2);
354        tmp4 = vpaddlq_u32(tmp3);
355        tmp4tf = vaddq_u64(tmp4tf, tmp4);
356 #endif
357 
358 #ifdef FT
359        tmp = vcntq_u8(vreinterpretq_u8_u32(cftV));
360        tmp2 = vpaddlq_u8(tmp);
361        tmp3 = vpaddlq_u16(tmp2);
362        tmp4 = vpaddlq_u32(tmp3);
363        tmp4ft = vaddq_u64(tmp4ft, tmp4);
364 #endif
365 
366 
367        nbBoolBlock --;
368     }
369 
370 #ifdef TT
371     _ctt += vgetq_lane_u64(tmp4tt, 0) + vgetq_lane_u64(tmp4tt, 1);
372 #endif
373 #ifdef FF
374     _cff +=vgetq_lane_u64(tmp4ff, 0) + vgetq_lane_u64(tmp4ff, 1);
375 #endif
376 #ifdef TF
377     _ctf += vgetq_lane_u64(tmp4tf, 0) + vgetq_lane_u64(tmp4tf, 1);
378 #endif
379 #ifdef FT
380     _cft += vgetq_lane_u64(tmp4ft, 0) + vgetq_lane_u64(tmp4ft, 1);
381 #endif
382 
383     nbBoolBlock = numberOfBools & 0x7F;
384     while(nbBoolBlock >= 32)
385     {
386        a = *pA++;
387        b = *pB++;
388        shift = 0;
389        while(shift < 32)
390        {
391           ba = a & 1;
392           bb = b & 1;
393           a = a >> 1;
394           b = b >> 1;
395 
396 #ifdef TT
397           _ctt += (ba && bb);
398 #endif
399 #ifdef FF
400           _cff += ((1 ^ ba) && (1 ^ bb));
401 #endif
402 #ifdef TF
403           _ctf += (ba && (1 ^ bb));
404 #endif
405 #ifdef FT
406           _cft += ((1 ^ ba) && bb);
407 #endif
408           shift ++;
409        }
410 
411        nbBoolBlock -= 32;
412     }
413 
414     a = *pA++;
415     b = *pB++;
416 
417     a = a >> (32 - nbBoolBlock);
418     b = b >> (32 - nbBoolBlock);
419 
420     while(nbBoolBlock > 0)
421     {
422           ba = a & 1;
423           bb = b & 1;
424           a = a >> 1;
425 
426           b = b >> 1;
427 #ifdef TT
428           _ctt += (ba && bb);
429 #endif
430 #ifdef FF
431           _cff += ((1 ^ ba) && (1 ^ bb));
432 #endif
433 #ifdef TF
434           _ctf += (ba && (1 ^ bb));
435 #endif
436 #ifdef FT
437           _cft += ((1 ^ ba) && bb);
438 #endif
439           nbBoolBlock --;
440     }
441 
442 #ifdef TT
443     *cTT = _ctt;
444 #endif
445 #ifdef FF
446     *cFF = _cff;
447 #endif
448 #ifdef TF
449     *cTF = _ctf;
450 #endif
451 #ifdef FT
452     *cFT = _cft;
453 #endif
454 }
455 
456 #else
457 
FUNC(EXT)458 void FUNC(EXT)(const uint32_t *pA
459        , const uint32_t *pB
460        , uint32_t numberOfBools
461 #ifdef TT
462        , uint32_t *cTT
463 #endif
464 #ifdef FF
465        , uint32_t *cFF
466 #endif
467 #ifdef TF
468        , uint32_t *cTF
469 #endif
470 #ifdef FT
471        , uint32_t *cFT
472 #endif
473        )
474 {
475 
476 #ifdef TT
477     uint32_t _ctt=0;
478 #endif
479 #ifdef FF
480     uint32_t _cff=0;
481 #endif
482 #ifdef TF
483     uint32_t _ctf=0;
484 #endif
485 #ifdef FT
486     uint32_t _cft=0;
487 #endif
488     uint32_t a,b,ba,bb;
489     int shift;
490 
491     while(numberOfBools >= 32)
492     {
493        a = *pA++;
494        b = *pB++;
495        shift = 0;
496        while(shift < 32)
497        {
498           ba = a & 1;
499           bb = b & 1;
500           a = a >> 1;
501           b = b >> 1;
502 #ifdef TT
503           _ctt += (ba && bb);
504 #endif
505 #ifdef FF
506           _cff += ((1 ^ ba) && (1 ^ bb));
507 #endif
508 #ifdef TF
509           _ctf += (ba && (1 ^ bb));
510 #endif
511 #ifdef FT
512           _cft += ((1 ^ ba) && bb);
513 #endif
514           shift ++;
515        }
516 
517        numberOfBools -= 32;
518     }
519 
520     a = *pA++;
521     b = *pB++;
522 
523     a = a >> (32 - numberOfBools);
524     b = b >> (32 - numberOfBools);
525 
526     while(numberOfBools > 0)
527     {
528           ba = a & 1;
529           bb = b & 1;
530           a = a >> 1;
531           b = b >> 1;
532 
533 #ifdef TT
534           _ctt += (ba && bb);
535 #endif
536 #ifdef FF
537           _cff += ((1 ^ ba) && (1 ^ bb));
538 #endif
539 #ifdef TF
540           _ctf += (ba && (1 ^ bb));
541 #endif
542 #ifdef FT
543           _cft += ((1 ^ ba) && bb);
544 #endif
545           numberOfBools --;
546     }
547 
548 #ifdef TT
549     *cTT = _ctt;
550 #endif
551 #ifdef FF
552     *cFF = _cff;
553 #endif
554 #ifdef TF
555     *cTF = _ctf;
556 #endif
557 #ifdef FT
558     *cFT = _cft;
559 #endif
560 }
561 #endif
562 #endif /* defined(ARM_MATH_MVEI) */
563 
564 
565 /**
566  * @} end of DISTANCEF group
567  */
568