1 /* ----------------------------------------------------------------------
2  * Project:      CMSIS DSP Library
3  * Title:        arm_offset_f32.c
4  * Description:  Floating-point vector offset
5  *
6  * $Date:        23 April 2021
7  * $Revision:    V1.9.0
8  *
9  * Target Processor: Cortex-M and Cortex-A cores
10  * -------------------------------------------------------------------- */
11 /*
12  * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
13  *
14  * SPDX-License-Identifier: Apache-2.0
15  *
16  * Licensed under the Apache License, Version 2.0 (the License); you may
17  * not use this file except in compliance with the License.
18  * You may obtain a copy of the License at
19  *
20  * www.apache.org/licenses/LICENSE-2.0
21  *
22  * Unless required by applicable law or agreed to in writing, software
23  * distributed under the License is distributed on an AS IS BASIS, WITHOUT
24  * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
25  * See the License for the specific language governing permissions and
26  * limitations under the License.
27  */
28 
29 #include "dsp/basic_math_functions.h"
30 
31 /**
32   @ingroup groupMath
33  */
34 
35 /**
36   @defgroup BasicOffset Vector Offset
37 
38   Adds a constant offset to each element of a vector.
39 
40   <pre>
41       pDst[n] = pSrc[n] + offset,   0 <= n < blockSize.
42   </pre>
43 
44   The functions support in-place computation allowing the source and
45   destination pointers to reference the same memory buffer.
46   There are separate functions for floating-point, Q7, Q15, and Q31 data types.
47  */
48 
49 /**
50   @addtogroup BasicOffset
51   @{
52  */
53 
54 /**
55   @brief         Adds a constant offset to a floating-point vector.
56   @param[in]     pSrc       points to the input vector
57   @param[in]     offset     is the offset to be added
58   @param[out]    pDst       points to the output vector
59   @param[in]     blockSize  number of samples in each vector
60  */
61 
62 #if defined(ARM_MATH_MVEF) && !defined(ARM_MATH_AUTOVECTORIZE)
63 
64 #include "arm_helium_utils.h"
65 
arm_offset_f32(const float32_t * pSrc,float32_t offset,float32_t * pDst,uint32_t blockSize)66 ARM_DSP_ATTRIBUTE void arm_offset_f32(
67   const float32_t * pSrc,
68         float32_t offset,
69         float32_t * pDst,
70         uint32_t blockSize)
71 {
72         uint32_t blkCnt;                               /* Loop counter */
73 
74     f32x4_t vec1;
75     f32x4_t res;
76 
77     /* Compute 4 outputs at a time */
78     blkCnt = blockSize >> 2U;
79     while (blkCnt > 0U)
80     {
81         /* C = A + offset */
82 
83         /* Add offset and then store the results in the destination buffer. */
84         vec1 = vld1q(pSrc);
85         res = vaddq(vec1,offset);
86         vst1q(pDst, res);
87 
88         /* Increment pointers */
89         pSrc += 4;
90         pDst += 4;
91 
92         /* Decrement the loop counter */
93         blkCnt--;
94     }
95 
96     /* Tail */
97     blkCnt = blockSize & 0x3;
98 
99     if (blkCnt > 0U)
100     {
101         mve_pred16_t p0 = vctp32q(blkCnt);
102         vec1 = vld1q((float32_t const *) pSrc);
103         vstrwq_p(pDst, vaddq(vec1, offset), p0);
104     }
105 
106 
107 }
108 
109 #else
arm_offset_f32(const float32_t * pSrc,float32_t offset,float32_t * pDst,uint32_t blockSize)110 ARM_DSP_ATTRIBUTE void arm_offset_f32(
111   const float32_t * pSrc,
112         float32_t offset,
113         float32_t * pDst,
114         uint32_t blockSize)
115 {
116         uint32_t blkCnt;                               /* Loop counter */
117 
118 #if defined(ARM_MATH_NEON_EXPERIMENTAL) && !defined(ARM_MATH_AUTOVECTORIZE)
119     f32x4_t vec1;
120     f32x4_t res;
121 
122     /* Compute 4 outputs at a time */
123     blkCnt = blockSize >> 2U;
124 
125     while (blkCnt > 0U)
126     {
127         /* C = A + offset */
128 
129         /* Add offset and then store the results in the destination buffer. */
130         vec1 = vld1q_f32(pSrc);
131         res = vaddq_f32(vec1,vdupq_n_f32(offset));
132         vst1q_f32(pDst, res);
133 
134         /* Increment pointers */
135         pSrc += 4;
136         pDst += 4;
137 
138         /* Decrement the loop counter */
139         blkCnt--;
140     }
141 
142     /* Tail */
143     blkCnt = blockSize & 0x3;
144 
145 #else
146 #if defined (ARM_MATH_LOOPUNROLL) && !defined(ARM_MATH_AUTOVECTORIZE)
147 
148   /* Loop unrolling: Compute 4 outputs at a time */
149   blkCnt = blockSize >> 2U;
150 
151   while (blkCnt > 0U)
152   {
153     /* C = A + offset */
154 
155     /* Add offset and store result in destination buffer. */
156     *pDst++ = (*pSrc++) + offset;
157 
158     *pDst++ = (*pSrc++) + offset;
159 
160     *pDst++ = (*pSrc++) + offset;
161 
162     *pDst++ = (*pSrc++) + offset;
163 
164     /* Decrement loop counter */
165     blkCnt--;
166   }
167 
168   /* Loop unrolling: Compute remaining outputs */
169   blkCnt = blockSize % 0x4U;
170 
171 #else
172 
173   /* Initialize blkCnt with number of samples */
174   blkCnt = blockSize;
175 
176 #endif /* #if defined (ARM_MATH_LOOPUNROLL) */
177 #endif /* #if defined(ARM_MATH_NEON_EXPERIMENTAL) */
178 
179   while (blkCnt > 0U)
180   {
181     /* C = A + offset */
182 
183     /* Add offset and store result in destination buffer. */
184     *pDst++ = (*pSrc++) + offset;
185 
186     /* Decrement loop counter */
187     blkCnt--;
188   }
189 
190 }
191 #endif /* defined(ARM_MATH_MVEF) && !defined(ARM_MATH_AUTOVECTORIZE) */
192 
193 /**
194   @} end of BasicOffset group
195  */
196