1 
2 /* ----------------------------------------------------------------------
3  * Project:      CMSIS DSP Library
4  * Title:        arm_boolean_distance.c
5  * Description:  Templates for boolean distances
6  *
7  * $Date:        23 April 2021
8  * $Revision:    V1.9.0
9  *
10  * Target Processor: Cortex-M and Cortex-A cores
11  * -------------------------------------------------------------------- */
12 /*
13  * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
14  *
15  * SPDX-License-Identifier: Apache-2.0
16  *
17  * Licensed under the Apache License, Version 2.0 (the License); you may
18  * not use this file except in compliance with the License.
19  * You may obtain a copy of the License at
20  *
21  * www.apache.org/licenses/LICENSE-2.0
22  *
23  * Unless required by applicable law or agreed to in writing, software
24  * distributed under the License is distributed on an AS IS BASIS, WITHOUT
25  * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
26  * See the License for the specific language governing permissions and
27  * limitations under the License.
28  */
29 
30 
31 
32 
33 /**
34  * @defgroup DISTANCEF Distance Functions
35  *
36  * Computes Distances between vectors.
37  *
38  * Distance functions are useful in a lot of algorithms.
39  *
40  */
41 
42 
43 /**
44  * @addtogroup DISTANCEF
45  * @{
46  */
47 
48 
49 
50 
51 #define _FUNC(A,B) A##B
52 
53 #define FUNC(EXT) _FUNC(arm_boolean_distance, EXT)
54 
55 /**
56  * @brief        Elements of boolean distances
57  *
58  * Different values which are used to compute boolean distances
59  *
60  * @param[in]    pA              First vector of packed booleans
61  * @param[in]    pB              Second vector of packed booleans
62  * @param[in]    numberOfBools   Number of booleans
63  *
64  */
65 
66 #if defined(ARM_MATH_MVEI) && !defined(ARM_MATH_AUTOVECTORIZE)
67 
68 #include "arm_common_tables.h"
69 
FUNC(EXT)70 void FUNC(EXT)(const uint32_t *pA
71        , const uint32_t *pB
72        , uint32_t numberOfBools
73 #ifdef TT
74        , uint32_t *cTT
75 #endif
76 #ifdef FF
77        , uint32_t *cFF
78 #endif
79 #ifdef TF
80        , uint32_t *cTF
81 #endif
82 #ifdef FT
83        , uint32_t *cFT
84 #endif
85        )
86 {
87 
88 #ifdef TT
89     uint32_t _ctt=0;
90 #endif
91 #ifdef FF
92     uint32_t _cff=0;
93 #endif
94 #ifdef TF
95     uint32_t _ctf=0;
96 #endif
97 #ifdef FT
98     uint32_t _cft=0;
99 #endif
100     uint32_t        a, b, ba, bb;
101     int shift;
102     const uint8_t  *pA8 = (const uint8_t *) pA;
103     const uint8_t  *pB8 = (const uint8_t *) pB;
104 
105     /* handle vector blocks */
106     uint32_t         blkCnt = numberOfBools / 128;
107 
108 
109 
110     while (blkCnt > 0U) {
111         uint8x16_t      vecA = vld1q((const uint8_t *) pA8);
112         uint8x16_t      vecB = vld1q((const uint8_t *) pB8);
113 
114 #ifdef TT
115         uint8x16_t      vecTT = vecA & vecB;
116         vecTT = vldrbq_gather_offset_u8(hwLUT, vecTT);
117         _ctt += vaddvq(vecTT);
118 #endif
119 #ifdef FF
120         uint8x16_t      vecFF = vmvnq(vecA) & vmvnq(vecB);
121         vecFF = vldrbq_gather_offset_u8(hwLUT, vecFF);
122         _cff += vaddvq(vecFF);
123 #endif
124 #ifdef TF
125         uint8x16_t      vecTF = vecA & vmvnq(vecB);
126         vecTF = vldrbq_gather_offset_u8(hwLUT, vecTF);
127         _ctf += vaddvq(vecTF);
128 #endif
129 #ifdef FT
130         uint8x16_t      vecFT = vmvnq(vecA) & vecB;
131         vecFT = vldrbq_gather_offset_u8(hwLUT, vecFT);
132         _cft += vaddvq(vecFT);
133 #endif
134 
135         pA8 += 16;
136         pB8 += 16;
137         blkCnt--;
138 
139     }
140 
141     pA = (const uint32_t *)pA8;
142     pB = (const uint32_t *)pB8;
143 
144     blkCnt = numberOfBools & 0x7F;
145     while(blkCnt >= 32)
146     {
147        a = *pA++;
148        b = *pB++;
149        shift = 0;
150        while(shift < 32)
151        {
152           ba = a & 1;
153           bb = b & 1;
154           a = a >> 1;
155           b = b >> 1;
156 
157 #ifdef TT
158           _ctt += (ba && bb);
159 #endif
160 #ifdef FF
161           _cff += ((1 ^ ba) && (1 ^ bb));
162 #endif
163 #ifdef TF
164           _ctf += (ba && (1 ^ bb));
165 #endif
166 #ifdef FT
167           _cft += ((1 ^ ba) && bb);
168 #endif
169           shift ++;
170        }
171 
172        blkCnt -= 32;
173     }
174 
175     a = *pA++;
176     b = *pB++;
177 
178     a = a >> (32 - blkCnt);
179     b = b >> (32 - blkCnt);
180 
181     while(blkCnt > 0)
182     {
183           ba = a & 1;
184           bb = b & 1;
185           a = a >> 1;
186 
187           b = b >> 1;
188 #ifdef TT
189           _ctt += (ba && bb);
190 #endif
191 #ifdef FF
192           _cff += ((1 ^ ba) && (1 ^ bb));
193 #endif
194 #ifdef TF
195           _ctf += (ba && (1 ^ bb));
196 #endif
197 #ifdef FT
198           _cft += ((1 ^ ba) && bb);
199 #endif
200           blkCnt --;
201     }
202 
203 #ifdef TT
204     *cTT = _ctt;
205 #endif
206 #ifdef FF
207     *cFF = _cff;
208 #endif
209 #ifdef TF
210     *cTF = _ctf;
211 #endif
212 #ifdef FT
213     *cFT = _cft;
214 #endif
215 }
216 
217 #else
218 #if defined(ARM_MATH_NEON)
219 
220 
FUNC(EXT)221 void FUNC(EXT)(const uint32_t *pA
222        , const uint32_t *pB
223        , uint32_t numberOfBools
224 #ifdef TT
225        , uint32_t *cTT
226 #endif
227 #ifdef FF
228        , uint32_t *cFF
229 #endif
230 #ifdef TF
231        , uint32_t *cTF
232 #endif
233 #ifdef FT
234        , uint32_t *cFT
235 #endif
236        )
237 {
238 #ifdef TT
239     uint32_t _ctt=0;
240 #endif
241 #ifdef FF
242     uint32_t _cff=0;
243 #endif
244 #ifdef TF
245     uint32_t _ctf=0;
246 #endif
247 #ifdef FT
248     uint32_t _cft=0;
249 #endif
250     uint32_t nbBoolBlock;
251     uint32_t a,b,ba,bb;
252     int shift;
253     uint32x4_t aV, bV;
254 #ifdef TT
255     uint32x4_t cttV;
256 #endif
257 #ifdef FF
258     uint32x4_t cffV;
259 #endif
260 #ifdef TF
261     uint32x4_t ctfV;
262 #endif
263 #ifdef FT
264     uint32x4_t cftV;
265 #endif
266     uint8x16_t tmp;
267     uint16x8_t tmp2;
268     uint32x4_t tmp3;
269     uint64x2_t tmp4;
270 #ifdef TT
271     uint64x2_t tmp4tt;
272 #endif
273 #ifdef FF
274     uint64x2_t tmp4ff;
275 #endif
276 #ifdef TF
277     uint64x2_t tmp4tf;
278 #endif
279 #ifdef FT
280     uint64x2_t tmp4ft;
281 #endif
282 
283 #ifdef TT
284     tmp4tt = vdupq_n_u64(0);
285 #endif
286 #ifdef FF
287     tmp4ff = vdupq_n_u64(0);
288 #endif
289 #ifdef TF
290     tmp4tf = vdupq_n_u64(0);
291 #endif
292 #ifdef FT
293     tmp4ft = vdupq_n_u64(0);
294 #endif
295 
296     nbBoolBlock = numberOfBools >> 7;
297     while(nbBoolBlock > 0)
298     {
299        aV = vld1q_u32(pA);
300        bV = vld1q_u32(pB);
301        pA += 4;
302        pB += 4;
303 
304 #ifdef TT
305        cttV = vandq_u32(aV,bV);
306 #endif
307 #ifdef FF
308        cffV = vandq_u32(vmvnq_u32(aV),vmvnq_u32(bV));
309 #endif
310 #ifdef TF
311        ctfV = vandq_u32(aV,vmvnq_u32(bV));
312 #endif
313 #ifdef FT
314        cftV = vandq_u32(vmvnq_u32(aV),bV);
315 #endif
316 
317 #ifdef TT
318        tmp = vcntq_u8(vreinterpretq_u8_u32(cttV));
319        tmp2 = vpaddlq_u8(tmp);
320        tmp3 = vpaddlq_u16(tmp2);
321        tmp4 = vpaddlq_u32(tmp3);
322        tmp4tt = vaddq_u64(tmp4tt, tmp4);
323 #endif
324 
325 #ifdef FF
326        tmp = vcntq_u8(vreinterpretq_u8_u32(cffV));
327        tmp2 = vpaddlq_u8(tmp);
328        tmp3 = vpaddlq_u16(tmp2);
329        tmp4 = vpaddlq_u32(tmp3);
330        tmp4ff = vaddq_u64(tmp4ff, tmp4);
331 #endif
332 
333 #ifdef TF
334        tmp = vcntq_u8(vreinterpretq_u8_u32(ctfV));
335        tmp2 = vpaddlq_u8(tmp);
336        tmp3 = vpaddlq_u16(tmp2);
337        tmp4 = vpaddlq_u32(tmp3);
338        tmp4tf = vaddq_u64(tmp4tf, tmp4);
339 #endif
340 
341 #ifdef FT
342        tmp = vcntq_u8(vreinterpretq_u8_u32(cftV));
343        tmp2 = vpaddlq_u8(tmp);
344        tmp3 = vpaddlq_u16(tmp2);
345        tmp4 = vpaddlq_u32(tmp3);
346        tmp4ft = vaddq_u64(tmp4ft, tmp4);
347 #endif
348 
349 
350        nbBoolBlock --;
351     }
352 
353 #ifdef TT
354     _ctt += vgetq_lane_u64(tmp4tt, 0) + vgetq_lane_u64(tmp4tt, 1);
355 #endif
356 #ifdef FF
357     _cff +=vgetq_lane_u64(tmp4ff, 0) + vgetq_lane_u64(tmp4ff, 1);
358 #endif
359 #ifdef TF
360     _ctf += vgetq_lane_u64(tmp4tf, 0) + vgetq_lane_u64(tmp4tf, 1);
361 #endif
362 #ifdef FT
363     _cft += vgetq_lane_u64(tmp4ft, 0) + vgetq_lane_u64(tmp4ft, 1);
364 #endif
365 
366     nbBoolBlock = numberOfBools & 0x7F;
367     while(nbBoolBlock >= 32)
368     {
369        a = *pA++;
370        b = *pB++;
371        shift = 0;
372        while(shift < 32)
373        {
374           ba = a & 1;
375           bb = b & 1;
376           a = a >> 1;
377           b = b >> 1;
378 
379 #ifdef TT
380           _ctt += (ba && bb);
381 #endif
382 #ifdef FF
383           _cff += ((1 ^ ba) && (1 ^ bb));
384 #endif
385 #ifdef TF
386           _ctf += (ba && (1 ^ bb));
387 #endif
388 #ifdef FT
389           _cft += ((1 ^ ba) && bb);
390 #endif
391           shift ++;
392        }
393 
394        nbBoolBlock -= 32;
395     }
396 
397     a = *pA++;
398     b = *pB++;
399 
400     a = a >> (32 - nbBoolBlock);
401     b = b >> (32 - nbBoolBlock);
402 
403     while(nbBoolBlock > 0)
404     {
405           ba = a & 1;
406           bb = b & 1;
407           a = a >> 1;
408 
409           b = b >> 1;
410 #ifdef TT
411           _ctt += (ba && bb);
412 #endif
413 #ifdef FF
414           _cff += ((1 ^ ba) && (1 ^ bb));
415 #endif
416 #ifdef TF
417           _ctf += (ba && (1 ^ bb));
418 #endif
419 #ifdef FT
420           _cft += ((1 ^ ba) && bb);
421 #endif
422           nbBoolBlock --;
423     }
424 
425 #ifdef TT
426     *cTT = _ctt;
427 #endif
428 #ifdef FF
429     *cFF = _cff;
430 #endif
431 #ifdef TF
432     *cTF = _ctf;
433 #endif
434 #ifdef FT
435     *cFT = _cft;
436 #endif
437 }
438 
439 #else
440 
FUNC(EXT)441 void FUNC(EXT)(const uint32_t *pA
442        , const uint32_t *pB
443        , uint32_t numberOfBools
444 #ifdef TT
445        , uint32_t *cTT
446 #endif
447 #ifdef FF
448        , uint32_t *cFF
449 #endif
450 #ifdef TF
451        , uint32_t *cTF
452 #endif
453 #ifdef FT
454        , uint32_t *cFT
455 #endif
456        )
457 {
458 
459 #ifdef TT
460     uint32_t _ctt=0;
461 #endif
462 #ifdef FF
463     uint32_t _cff=0;
464 #endif
465 #ifdef TF
466     uint32_t _ctf=0;
467 #endif
468 #ifdef FT
469     uint32_t _cft=0;
470 #endif
471     uint32_t a,b,ba,bb;
472     int shift;
473 
474     while(numberOfBools >= 32)
475     {
476        a = *pA++;
477        b = *pB++;
478        shift = 0;
479        while(shift < 32)
480        {
481           ba = a & 1;
482           bb = b & 1;
483           a = a >> 1;
484           b = b >> 1;
485 #ifdef TT
486           _ctt += (ba && bb);
487 #endif
488 #ifdef FF
489           _cff += ((1 ^ ba) && (1 ^ bb));
490 #endif
491 #ifdef TF
492           _ctf += (ba && (1 ^ bb));
493 #endif
494 #ifdef FT
495           _cft += ((1 ^ ba) && bb);
496 #endif
497           shift ++;
498        }
499 
500        numberOfBools -= 32;
501     }
502 
503     a = *pA++;
504     b = *pB++;
505 
506     a = a >> (32 - numberOfBools);
507     b = b >> (32 - numberOfBools);
508 
509     while(numberOfBools > 0)
510     {
511           ba = a & 1;
512           bb = b & 1;
513           a = a >> 1;
514           b = b >> 1;
515 
516 #ifdef TT
517           _ctt += (ba && bb);
518 #endif
519 #ifdef FF
520           _cff += ((1 ^ ba) && (1 ^ bb));
521 #endif
522 #ifdef TF
523           _ctf += (ba && (1 ^ bb));
524 #endif
525 #ifdef FT
526           _cft += ((1 ^ ba) && bb);
527 #endif
528           numberOfBools --;
529     }
530 
531 #ifdef TT
532     *cTT = _ctt;
533 #endif
534 #ifdef FF
535     *cFF = _cff;
536 #endif
537 #ifdef TF
538     *cTF = _ctf;
539 #endif
540 #ifdef FT
541     *cFT = _cft;
542 #endif
543 }
544 #endif
545 #endif /* defined(ARM_MATH_MVEI) */
546 
547 
548 /**
549  * @} end of DISTANCEF group
550  */
551