1 
2 /* ----------------------------------------------------------------------
3  * Project:      CMSIS DSP Library
4  * Title:        arm_chebyshev_distance_f16.c
5  * Description:  Chebyshev distance between two vectors
6  *
7  * $Date:        23 April 2021
8  * $Revision:    V1.9.0
9  *
10  * Target Processor: Cortex-M and Cortex-A cores
11  * -------------------------------------------------------------------- */
12 /*
13  * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
14  *
15  * SPDX-License-Identifier: Apache-2.0
16  *
17  * Licensed under the Apache License, Version 2.0 (the License); you may
18  * not use this file except in compliance with the License.
19  * You may obtain a copy of the License at
20  *
21  * www.apache.org/licenses/LICENSE-2.0
22  *
23  * Unless required by applicable law or agreed to in writing, software
24  * distributed under the License is distributed on an AS IS BASIS, WITHOUT
25  * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
26  * See the License for the specific language governing permissions and
27  * limitations under the License.
28  */
29 
30 #include "dsp/distance_functions_f16.h"
31 
32 #if defined(ARM_FLOAT16_SUPPORTED)
33 
34 #include <limits.h>
35 #include <math.h>
36 
37 /**
38   @ingroup FloatDist
39  */
40 
41 /**
42   @defgroup Chebyshev Chebyshev distance
43 
44   Chebyshev distance
45  */
46 
47 /**
48   @addtogroup Chebyshev
49   @{
50  */
51 
52 
53 /**
54  * @brief        Chebyshev distance between two vectors
55  * @param[in]    pA         First vector
56  * @param[in]    pB         Second vector
57  * @param[in]    blockSize  vector length
58  * @return distance
59  *
60  */
61 
62 #if defined(ARM_MATH_MVE_FLOAT16) && !defined(ARM_MATH_AUTOVECTORIZE)
63 
64 #include "arm_helium_utils.h"
65 #include "arm_vec_math.h"
66 
arm_chebyshev_distance_f16(const float16_t * pA,const float16_t * pB,uint32_t blockSize)67 float16_t arm_chebyshev_distance_f16(const float16_t *pA,const float16_t *pB, uint32_t blockSize)
68 {
69     uint32_t        blkCnt;     /* loop counters */
70     f16x8_t         vecA, vecB;
71     f16x8_t         vecDiff = vdupq_n_f16(0.0);
72     float16_t       maxValue = 0.0f16;
73 
74 
75     blkCnt = blockSize >> 3;
76     while (blkCnt > 0U) {
77         vecA = vld1q(pA);
78         pA += 8;
79         vecB = vld1q(pB);
80         pB += 8;
81         /*
82          * update per-lane max.
83          */
84         vecDiff = vmaxnmaq(vsubq(vecA, vecB), vecDiff);
85         /*
86          * Decrement the blockSize loop counter
87          */
88         blkCnt--;
89     }
90     /*
91      * tail
92      * (will be merged thru tail predication)
93      */
94     blkCnt = blockSize & 7;
95     if (blkCnt > 0U) {
96         mve_pred16_t    p0 = vctp16q(blkCnt);
97 
98         vecA = vldrhq_z_f16(pA, p0);
99         vecB = vldrhq_z_f16(pB, p0);
100 
101         /*
102          * Get current max per lane and current index per lane
103          * when a max is selected
104          */
105         vecDiff = vmaxnmaq_m(vecDiff, vsubq(vecA, vecB), p0);
106     }
107     /*
108      * Get max value across the vector
109      */
110     return vmaxnmavq(maxValue, vecDiff);
111 }
112 
113 #else
arm_chebyshev_distance_f16(const float16_t * pA,const float16_t * pB,uint32_t blockSize)114 float16_t arm_chebyshev_distance_f16(const float16_t *pA,const float16_t *pB, uint32_t blockSize)
115 {
116    _Float16 diff=0.0f,  maxVal,tmpA, tmpB;
117 
118    tmpA = *pA++;
119    tmpB = *pB++;
120    diff = fabsf(tmpA - tmpB);
121    maxVal = diff;
122    blockSize--;
123 
124    while(blockSize > 0)
125    {
126       tmpA = *pA++;
127       tmpB = *pB++;
128       diff = fabsf(tmpA - tmpB);
129       if (diff > maxVal)
130       {
131         maxVal = diff;
132       }
133       blockSize --;
134    }
135 
136    return(maxVal);
137 }
138 #endif /* defined(ARM_MATH_MVEF) && !defined(ARM_MATH_AUTOVECTORIZE) */
139 
140 
141 /**
142  * @} end of Chebyshev group
143  */
144 
145 #endif /* #if defined(ARM_FLOAT16_SUPPORTED) */
146 
147