1 /* ----------------------------------------------------------------------
2  * Project:      CMSIS DSP Library
3  * Title:        arm_abs_f16.c
4  * Description:  Floating-point vector absolute value
5  *
6  * $Date:        23 April 2021
7  * $Revision:    V1.9.0
8  *
9  * Target Processor: Cortex-M and Cortex-A cores
10  * -------------------------------------------------------------------- */
11 /*
12  * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
13  *
14  * SPDX-License-Identifier: Apache-2.0
15  *
16  * Licensed under the Apache License, Version 2.0 (the License); you may
17  * not use this file except in compliance with the License.
18  * You may obtain a copy of the License at
19  *
20  * www.apache.org/licenses/LICENSE-2.0
21  *
22  * Unless required by applicable law or agreed to in writing, software
23  * distributed under the License is distributed on an AS IS BASIS, WITHOUT
24  * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
25  * See the License for the specific language governing permissions and
26  * limitations under the License.
27  */
28 
29 #include "dsp/basic_math_functions_f16.h"
30 #include <math.h>
31 
32 /**
33   @ingroup groupMath
34  */
35 
36 
37 /**
38   @addtogroup BasicAbs
39   @{
40  */
41 
42 /**
43   @brief         Floating-point vector absolute value.
44   @param[in]     pSrc       points to the input vector
45   @param[out]    pDst       points to the output vector
46   @param[in]     blockSize  number of samples in each vector
47  */
48 
49 
50 #if defined(ARM_MATH_MVE_FLOAT16) && !defined(ARM_MATH_AUTOVECTORIZE)
51 
52 #include "arm_helium_utils.h"
53 
arm_abs_f16(const float16_t * pSrc,float16_t * pDst,uint32_t blockSize)54 void arm_abs_f16(
55   const float16_t * pSrc,
56         float16_t * pDst,
57         uint32_t blockSize)
58 {
59     uint32_t blkCnt;                               /* Loop counter */
60     f16x8_t vec1;
61     f16x8_t res;
62 
63 
64     /* Compute 4 outputs at a time */
65     blkCnt = blockSize >> 3U;
66 
67     while (blkCnt > 0U)
68     {
69         /* C = |A| */
70 
71         /* Calculate absolute values and then store the results in the destination buffer. */
72         vec1 = vld1q(pSrc);
73         res = vabsq(vec1);
74         vst1q(pDst, res);
75 
76         /* Increment pointers */
77         pSrc += 8;
78         pDst += 8;
79 
80         /* Decrement the loop counter */
81         blkCnt--;
82     }
83 
84     /* Tail */
85     blkCnt = blockSize & 0x7;
86 
87 
88     if (blkCnt > 0U)
89     {
90       /* C = |A| */
91       mve_pred16_t p0 = vctp16q(blkCnt);
92       vec1 = vld1q(pSrc);
93       vstrhq_p(pDst, vabsq(vec1), p0);
94     }
95 
96 }
97 
98 #else
99 #if defined(ARM_FLOAT16_SUPPORTED)
arm_abs_f16(const float16_t * pSrc,float16_t * pDst,uint32_t blockSize)100 void arm_abs_f16(
101   const float16_t * pSrc,
102         float16_t * pDst,
103         uint32_t blockSize)
104 {
105         uint32_t blkCnt;                               /* Loop counter */
106 
107 #if defined(ARM_MATH_NEON_FLOAT16) && !defined(ARM_MATH_AUTOVECTORIZE)
108     f16x8_t vec1;
109     f16x8_t res;
110 
111     /* Compute 4 outputs at a time */
112     blkCnt = blockSize >> 2U;
113 
114     while (blkCnt > 0U)
115     {
116         /* C = |A| */
117 
118     	/* Calculate absolute values and then store the results in the destination buffer. */
119         vec1 = vld1q_f16(pSrc);
120         res = vabsq_f16(vec1);
121         vst1q_f16(pDst, res);
122 
123         /* Increment pointers */
124         pSrc += 4;
125         pDst += 4;
126 
127         /* Decrement the loop counter */
128         blkCnt--;
129     }
130 
131     /* Tail */
132     blkCnt = blockSize & 0x3;
133 
134 #else
135 #if defined (ARM_MATH_LOOPUNROLL) && !defined(ARM_MATH_AUTOVECTORIZE)
136 
137   /* Loop unrolling: Compute 4 outputs at a time */
138   blkCnt = blockSize >> 2U;
139 
140   while (blkCnt > 0U)
141   {
142     /* C = |A| */
143 
144     /* Calculate absolute and store result in destination buffer. */
145     *pDst++ = (_Float16)fabsf((float32_t)*pSrc++);
146 
147     *pDst++ = (_Float16)fabsf((float32_t)*pSrc++);
148 
149     *pDst++ = (_Float16)fabsf((float32_t)*pSrc++);
150 
151     *pDst++ = (_Float16)fabsf((float32_t)*pSrc++);
152 
153     /* Decrement loop counter */
154     blkCnt--;
155   }
156 
157   /* Loop unrolling: Compute remaining outputs */
158   blkCnt = blockSize % 0x4U;
159 
160 #else
161 
162   /* Initialize blkCnt with number of samples */
163   blkCnt = blockSize;
164 
165 #endif /* #if defined (ARM_MATH_LOOPUNROLL) */
166 #endif /* #if defined(ARM_MATH_NEON) */
167 
168   while (blkCnt > 0U)
169   {
170     /* C = |A| */
171 
172     /* Calculate absolute and store result in destination buffer. */
173     *pDst++ = (_Float16)fabsf((float32_t)*pSrc++);
174 
175     /* Decrement loop counter */
176     blkCnt--;
177   }
178 
179 }
180 #endif /* defined(ARM_FLOAT16_SUPPORTED */
181 #endif /* defined(ARM_MATH_MVEF) && !defined(ARM_MATH_AUTOVECTORIZE) */
182 /**
183   @} end of BasicAbs group
184  */
185