1 /* ----------------------------------------------------------------------
2  * Project:      CMSIS DSP Library
3  * Title:        arm_power_f16.c
4  * Description:  Sum of the squares of the elements of a floating-point vector
5  *
6  * $Date:        23 April 2021
7  * $Revision:    V1.9.0
8  *
9  * Target Processor: Cortex-M and Cortex-A cores
10  * -------------------------------------------------------------------- */
11 /*
12  * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
13  *
14  * SPDX-License-Identifier: Apache-2.0
15  *
16  * Licensed under the Apache License, Version 2.0 (the License); you may
17  * not use this file except in compliance with the License.
18  * You may obtain a copy of the License at
19  *
20  * www.apache.org/licenses/LICENSE-2.0
21  *
22  * Unless required by applicable law or agreed to in writing, software
23  * distributed under the License is distributed on an AS IS BASIS, WITHOUT
24  * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
25  * See the License for the specific language governing permissions and
26  * limitations under the License.
27  */
28 
29 #include "dsp/statistics_functions_f16.h"
30 
31 #if defined(ARM_FLOAT16_SUPPORTED)
32 
33 
34 /**
35   @ingroup groupStats
36  */
37 
38 
39 
40 /**
41   @addtogroup power
42   @{
43  */
44 
45 /**
46   @brief         Sum of the squares of the elements of a floating-point vector.
47   @param[in]     pSrc       points to the input vector
48   @param[in]     blockSize  number of samples in input vector
49   @param[out]    pResult    sum of the squares value returned here
50   @return        none
51  */
52 #if defined(ARM_MATH_MVE_FLOAT16) && !defined(ARM_MATH_AUTOVECTORIZE)
53 
54 #include "arm_helium_utils.h"
55 
arm_power_f16(const float16_t * pSrc,uint32_t blockSize,float16_t * pResult)56 void arm_power_f16(
57   const float16_t * pSrc,
58   uint32_t blockSize,
59   float16_t * pResult)
60 {
61     int32_t         blkCnt;     /* loop counters */
62     f16x8_t         vecSrc;
63     f16x8_t         sumVec = vdupq_n_f16(0.0f);
64 
65 
66     blkCnt = blockSize;
67     do {
68         mve_pred16_t    p = vctp16q(blkCnt);
69 
70         vecSrc = vldrhq_z_f16((float16_t const *) pSrc, p);
71         /*
72          * sum lanes
73          */
74         sumVec = vfmaq_m(sumVec, vecSrc, vecSrc, p);
75 
76         blkCnt -= 8;
77         pSrc += 8;
78     }
79     while (blkCnt > 0);
80 
81     *pResult = vecAddAcrossF16Mve(sumVec);
82 }
83 #else
84 
arm_power_f16(const float16_t * pSrc,uint32_t blockSize,float16_t * pResult)85 void arm_power_f16(
86   const float16_t * pSrc,
87         uint32_t blockSize,
88         float16_t * pResult)
89 {
90         uint32_t blkCnt;                               /* Loop counter */
91         _Float16 sum = 0.0f16;                          /* Temporary result storage */
92         _Float16 in;                                  /* Temporary variable to store input value */
93 
94 #if defined (ARM_MATH_LOOPUNROLL) && !defined(ARM_MATH_AUTOVECTORIZE)
95 
96   /* Loop unrolling: Compute 4 outputs at a time */
97   blkCnt = blockSize >> 2U;
98 
99   while (blkCnt > 0U)
100   {
101     /* C = A[0] * A[0] + A[1] * A[1] + ... + A[blockSize-1] * A[blockSize-1] */
102 
103     /* Compute Power and store result in a temporary variable, sum. */
104     in = *pSrc++;
105     sum += in * in;
106 
107     in = *pSrc++;
108     sum += in * in;
109 
110     in = *pSrc++;
111     sum += in * in;
112 
113     in = *pSrc++;
114     sum += in * in;
115 
116     /* Decrement loop counter */
117     blkCnt--;
118   }
119 
120   /* Loop unrolling: Compute remaining outputs */
121   blkCnt = blockSize % 0x4U;
122 
123 #else
124 
125   /* Initialize blkCnt with number of samples */
126   blkCnt = blockSize;
127 
128 #endif /* #if defined (ARM_MATH_LOOPUNROLL) */
129 
130   while (blkCnt > 0U)
131   {
132     /* C = A[0] * A[0] + A[1] * A[1] + ... + A[blockSize-1] * A[blockSize-1] */
133 
134     /* Compute Power and store result in a temporary variable, sum. */
135     in = *pSrc++;
136     sum += in * in;
137 
138     /* Decrement loop counter */
139     blkCnt--;
140   }
141 
142   /* Store result to destination */
143   *pResult = sum;
144 }
145 #endif /* defined(ARM_MATH_MVEF) && !defined(ARM_MATH_AUTOVECTORIZE) */
146 
147 /**
148   @} end of power group
149  */
150 
151 #endif /* #if defined(ARM_FLOAT16_SUPPORTED) */
152 
153