1 
2 /* ----------------------------------------------------------------------
3  * Project:      CMSIS DSP Library
4  * Title:        arm_boolean_distance.c
5  * Description:  Templates for boolean distances
6  *
7  * $Date:        23 April 2021
8  * $Revision:    V1.9.0
9  *
10  * Target Processor: Cortex-M and Cortex-A cores
11  * -------------------------------------------------------------------- */
12 /*
13  * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
14  *
15  * SPDX-License-Identifier: Apache-2.0
16  *
17  * Licensed under the Apache License, Version 2.0 (the License); you may
18  * not use this file except in compliance with the License.
19  * You may obtain a copy of the License at
20  *
21  * www.apache.org/licenses/LICENSE-2.0
22  *
23  * Unless required by applicable law or agreed to in writing, software
24  * distributed under the License is distributed on an AS IS BASIS, WITHOUT
25  * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
26  * See the License for the specific language governing permissions and
27  * limitations under the License.
28  */
29 
30 
31 
32 
33 /**
34  * @defgroup DISTANCEF Distance Functions
35  *
36  * Computes Distances between vectors.
37  *
38  * Distance functions are useful in a lot of algorithms.
39  *
40  */
41 
42 
43 /**
44  * @addtogroup DISTANCEF
45  * @{
46  */
47 
48 
49 
50 
51 #define _FUNC(A,B) A##B
52 
53 #define FUNC(EXT) _FUNC(arm_boolean_distance, EXT)
54 
55 /**
56  * @brief        Elements of boolean distances
57  *
58  * Different values which are used to compute boolean distances
59  *
60  * @param[in]    pA              First vector of packed booleans
61  * @param[in]    pB              Second vector of packed booleans
62  * @param[in]    numberOfBools   Number of booleans
63  * @return None
64  *
65  */
66 
67 #if defined(ARM_MATH_MVEI) && !defined(ARM_MATH_AUTOVECTORIZE)
68 
69 #include "arm_common_tables.h"
70 
FUNC(EXT)71 void FUNC(EXT)(const uint32_t *pA
72        , const uint32_t *pB
73        , uint32_t numberOfBools
74 #ifdef TT
75        , uint32_t *cTT
76 #endif
77 #ifdef FF
78        , uint32_t *cFF
79 #endif
80 #ifdef TF
81        , uint32_t *cTF
82 #endif
83 #ifdef FT
84        , uint32_t *cFT
85 #endif
86        )
87 {
88 
89 #ifdef TT
90     uint32_t _ctt=0;
91 #endif
92 #ifdef FF
93     uint32_t _cff=0;
94 #endif
95 #ifdef TF
96     uint32_t _ctf=0;
97 #endif
98 #ifdef FT
99     uint32_t _cft=0;
100 #endif
101     uint32_t        a, b, ba, bb;
102     int shift;
103     const uint8_t  *pA8 = (const uint8_t *) pA;
104     const uint8_t  *pB8 = (const uint8_t *) pB;
105 
106     /* handle vector blocks */
107     uint32_t         blkCnt = numberOfBools / 128;
108 
109 
110 
111     while (blkCnt > 0U) {
112         uint8x16_t      vecA = vld1q((const uint8_t *) pA8);
113         uint8x16_t      vecB = vld1q((const uint8_t *) pB8);
114 
115 #ifdef TT
116         uint8x16_t      vecTT = vecA & vecB;
117         vecTT = vldrbq_gather_offset_u8(hwLUT, vecTT);
118         _ctt += vaddvq(vecTT);
119 #endif
120 #ifdef FF
121         uint8x16_t      vecFF = vmvnq(vecA) & vmvnq(vecB);
122         vecFF = vldrbq_gather_offset_u8(hwLUT, vecFF);
123         _cff += vaddvq(vecFF);
124 #endif
125 #ifdef TF
126         uint8x16_t      vecTF = vecA & vmvnq(vecB);
127         vecTF = vldrbq_gather_offset_u8(hwLUT, vecTF);
128         _ctf += vaddvq(vecTF);
129 #endif
130 #ifdef FT
131         uint8x16_t      vecFT = vmvnq(vecA) & vecB;
132         vecFT = vldrbq_gather_offset_u8(hwLUT, vecFT);
133         _cft += vaddvq(vecFT);
134 #endif
135 
136         pA8 += 16;
137         pB8 += 16;
138         blkCnt--;
139 
140     }
141 
142     pA = (const uint32_t *)pA8;
143     pB = (const uint32_t *)pB8;
144 
145     blkCnt = numberOfBools & 0x7F;
146     while(blkCnt >= 32)
147     {
148        a = *pA++;
149        b = *pB++;
150        shift = 0;
151        while(shift < 32)
152        {
153           ba = a & 1;
154           bb = b & 1;
155           a = a >> 1;
156           b = b >> 1;
157 
158 #ifdef TT
159           _ctt += (ba && bb);
160 #endif
161 #ifdef FF
162           _cff += ((1 ^ ba) && (1 ^ bb));
163 #endif
164 #ifdef TF
165           _ctf += (ba && (1 ^ bb));
166 #endif
167 #ifdef FT
168           _cft += ((1 ^ ba) && bb);
169 #endif
170           shift ++;
171        }
172 
173        blkCnt -= 32;
174     }
175 
176     a = *pA++;
177     b = *pB++;
178 
179     a = a >> (32 - blkCnt);
180     b = b >> (32 - blkCnt);
181 
182     while(blkCnt > 0)
183     {
184           ba = a & 1;
185           bb = b & 1;
186           a = a >> 1;
187 
188           b = b >> 1;
189 #ifdef TT
190           _ctt += (ba && bb);
191 #endif
192 #ifdef FF
193           _cff += ((1 ^ ba) && (1 ^ bb));
194 #endif
195 #ifdef TF
196           _ctf += (ba && (1 ^ bb));
197 #endif
198 #ifdef FT
199           _cft += ((1 ^ ba) && bb);
200 #endif
201           blkCnt --;
202     }
203 
204 #ifdef TT
205     *cTT = _ctt;
206 #endif
207 #ifdef FF
208     *cFF = _cff;
209 #endif
210 #ifdef TF
211     *cTF = _ctf;
212 #endif
213 #ifdef FT
214     *cFT = _cft;
215 #endif
216 }
217 
218 #else
219 #if defined(ARM_MATH_NEON)
220 
221 
FUNC(EXT)222 void FUNC(EXT)(const uint32_t *pA
223        , const uint32_t *pB
224        , uint32_t numberOfBools
225 #ifdef TT
226        , uint32_t *cTT
227 #endif
228 #ifdef FF
229        , uint32_t *cFF
230 #endif
231 #ifdef TF
232        , uint32_t *cTF
233 #endif
234 #ifdef FT
235        , uint32_t *cFT
236 #endif
237        )
238 {
239 #ifdef TT
240     uint32_t _ctt=0;
241 #endif
242 #ifdef FF
243     uint32_t _cff=0;
244 #endif
245 #ifdef TF
246     uint32_t _ctf=0;
247 #endif
248 #ifdef FT
249     uint32_t _cft=0;
250 #endif
251     uint32_t nbBoolBlock;
252     uint32_t a,b,ba,bb;
253     int shift;
254     uint32x4_t aV, bV;
255 #ifdef TT
256     uint32x4_t cttV;
257 #endif
258 #ifdef FF
259     uint32x4_t cffV;
260 #endif
261 #ifdef TF
262     uint32x4_t ctfV;
263 #endif
264 #ifdef FT
265     uint32x4_t cftV;
266 #endif
267     uint8x16_t tmp;
268     uint16x8_t tmp2;
269     uint32x4_t tmp3;
270     uint64x2_t tmp4;
271 #ifdef TT
272     uint64x2_t tmp4tt;
273 #endif
274 #ifdef FF
275     uint64x2_t tmp4ff;
276 #endif
277 #ifdef TF
278     uint64x2_t tmp4tf;
279 #endif
280 #ifdef FT
281     uint64x2_t tmp4ft;
282 #endif
283 
284 #ifdef TT
285     tmp4tt = vdupq_n_u64(0);
286 #endif
287 #ifdef FF
288     tmp4ff = vdupq_n_u64(0);
289 #endif
290 #ifdef TF
291     tmp4tf = vdupq_n_u64(0);
292 #endif
293 #ifdef FT
294     tmp4ft = vdupq_n_u64(0);
295 #endif
296 
297     nbBoolBlock = numberOfBools >> 7;
298     while(nbBoolBlock > 0)
299     {
300        aV = vld1q_u32(pA);
301        bV = vld1q_u32(pB);
302        pA += 4;
303        pB += 4;
304 
305 #ifdef TT
306        cttV = vandq_u32(aV,bV);
307 #endif
308 #ifdef FF
309        cffV = vandq_u32(vmvnq_u32(aV),vmvnq_u32(bV));
310 #endif
311 #ifdef TF
312        ctfV = vandq_u32(aV,vmvnq_u32(bV));
313 #endif
314 #ifdef FT
315        cftV = vandq_u32(vmvnq_u32(aV),bV);
316 #endif
317 
318 #ifdef TT
319        tmp = vcntq_u8(vreinterpretq_u8_u32(cttV));
320        tmp2 = vpaddlq_u8(tmp);
321        tmp3 = vpaddlq_u16(tmp2);
322        tmp4 = vpaddlq_u32(tmp3);
323        tmp4tt = vaddq_u64(tmp4tt, tmp4);
324 #endif
325 
326 #ifdef FF
327        tmp = vcntq_u8(vreinterpretq_u8_u32(cffV));
328        tmp2 = vpaddlq_u8(tmp);
329        tmp3 = vpaddlq_u16(tmp2);
330        tmp4 = vpaddlq_u32(tmp3);
331        tmp4ff = vaddq_u64(tmp4ff, tmp4);
332 #endif
333 
334 #ifdef TF
335        tmp = vcntq_u8(vreinterpretq_u8_u32(ctfV));
336        tmp2 = vpaddlq_u8(tmp);
337        tmp3 = vpaddlq_u16(tmp2);
338        tmp4 = vpaddlq_u32(tmp3);
339        tmp4tf = vaddq_u64(tmp4tf, tmp4);
340 #endif
341 
342 #ifdef FT
343        tmp = vcntq_u8(vreinterpretq_u8_u32(cftV));
344        tmp2 = vpaddlq_u8(tmp);
345        tmp3 = vpaddlq_u16(tmp2);
346        tmp4 = vpaddlq_u32(tmp3);
347        tmp4ft = vaddq_u64(tmp4ft, tmp4);
348 #endif
349 
350 
351        nbBoolBlock --;
352     }
353 
354 #ifdef TT
355     _ctt += vgetq_lane_u64(tmp4tt, 0) + vgetq_lane_u64(tmp4tt, 1);
356 #endif
357 #ifdef FF
358     _cff +=vgetq_lane_u64(tmp4ff, 0) + vgetq_lane_u64(tmp4ff, 1);
359 #endif
360 #ifdef TF
361     _ctf += vgetq_lane_u64(tmp4tf, 0) + vgetq_lane_u64(tmp4tf, 1);
362 #endif
363 #ifdef FT
364     _cft += vgetq_lane_u64(tmp4ft, 0) + vgetq_lane_u64(tmp4ft, 1);
365 #endif
366 
367     nbBoolBlock = numberOfBools & 0x7F;
368     while(nbBoolBlock >= 32)
369     {
370        a = *pA++;
371        b = *pB++;
372        shift = 0;
373        while(shift < 32)
374        {
375           ba = a & 1;
376           bb = b & 1;
377           a = a >> 1;
378           b = b >> 1;
379 
380 #ifdef TT
381           _ctt += (ba && bb);
382 #endif
383 #ifdef FF
384           _cff += ((1 ^ ba) && (1 ^ bb));
385 #endif
386 #ifdef TF
387           _ctf += (ba && (1 ^ bb));
388 #endif
389 #ifdef FT
390           _cft += ((1 ^ ba) && bb);
391 #endif
392           shift ++;
393        }
394 
395        nbBoolBlock -= 32;
396     }
397 
398     a = *pA++;
399     b = *pB++;
400 
401     a = a >> (32 - nbBoolBlock);
402     b = b >> (32 - nbBoolBlock);
403 
404     while(nbBoolBlock > 0)
405     {
406           ba = a & 1;
407           bb = b & 1;
408           a = a >> 1;
409 
410           b = b >> 1;
411 #ifdef TT
412           _ctt += (ba && bb);
413 #endif
414 #ifdef FF
415           _cff += ((1 ^ ba) && (1 ^ bb));
416 #endif
417 #ifdef TF
418           _ctf += (ba && (1 ^ bb));
419 #endif
420 #ifdef FT
421           _cft += ((1 ^ ba) && bb);
422 #endif
423           nbBoolBlock --;
424     }
425 
426 #ifdef TT
427     *cTT = _ctt;
428 #endif
429 #ifdef FF
430     *cFF = _cff;
431 #endif
432 #ifdef TF
433     *cTF = _ctf;
434 #endif
435 #ifdef FT
436     *cFT = _cft;
437 #endif
438 }
439 
440 #else
441 
FUNC(EXT)442 void FUNC(EXT)(const uint32_t *pA
443        , const uint32_t *pB
444        , uint32_t numberOfBools
445 #ifdef TT
446        , uint32_t *cTT
447 #endif
448 #ifdef FF
449        , uint32_t *cFF
450 #endif
451 #ifdef TF
452        , uint32_t *cTF
453 #endif
454 #ifdef FT
455        , uint32_t *cFT
456 #endif
457        )
458 {
459 
460 #ifdef TT
461     uint32_t _ctt=0;
462 #endif
463 #ifdef FF
464     uint32_t _cff=0;
465 #endif
466 #ifdef TF
467     uint32_t _ctf=0;
468 #endif
469 #ifdef FT
470     uint32_t _cft=0;
471 #endif
472     uint32_t a,b,ba,bb;
473     int shift;
474 
475     while(numberOfBools >= 32)
476     {
477        a = *pA++;
478        b = *pB++;
479        shift = 0;
480        while(shift < 32)
481        {
482           ba = a & 1;
483           bb = b & 1;
484           a = a >> 1;
485           b = b >> 1;
486 #ifdef TT
487           _ctt += (ba && bb);
488 #endif
489 #ifdef FF
490           _cff += ((1 ^ ba) && (1 ^ bb));
491 #endif
492 #ifdef TF
493           _ctf += (ba && (1 ^ bb));
494 #endif
495 #ifdef FT
496           _cft += ((1 ^ ba) && bb);
497 #endif
498           shift ++;
499        }
500 
501        numberOfBools -= 32;
502     }
503 
504     a = *pA++;
505     b = *pB++;
506 
507     a = a >> (32 - numberOfBools);
508     b = b >> (32 - numberOfBools);
509 
510     while(numberOfBools > 0)
511     {
512           ba = a & 1;
513           bb = b & 1;
514           a = a >> 1;
515           b = b >> 1;
516 
517 #ifdef TT
518           _ctt += (ba && bb);
519 #endif
520 #ifdef FF
521           _cff += ((1 ^ ba) && (1 ^ bb));
522 #endif
523 #ifdef TF
524           _ctf += (ba && (1 ^ bb));
525 #endif
526 #ifdef FT
527           _cft += ((1 ^ ba) && bb);
528 #endif
529           numberOfBools --;
530     }
531 
532 #ifdef TT
533     *cTT = _ctt;
534 #endif
535 #ifdef FF
536     *cFF = _cff;
537 #endif
538 #ifdef TF
539     *cTF = _ctf;
540 #endif
541 #ifdef FT
542     *cFT = _cft;
543 #endif
544 }
545 #endif
546 #endif /* defined(ARM_MATH_MVEI) */
547 
548 
549 /**
550  * @} end of DISTANCEF group
551  */
552