1 /* ----------------------------------------------------------------------
2  * Project:      CMSIS DSP Library
3  * Title:        arm_max_no_idx_f16.c
4  * Description:  Maximum value of a floating-point vector without returning the index
5  *
6  * $Date:        23 April 2021
7  * $Revision:    V1.9.0
8  *
9  * Target Processor: Cortex-M and Cortex-A cores
10  * -------------------------------------------------------------------- */
11 /*
12  * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
13  *
14  * SPDX-License-Identifier: Apache-2.0
15  *
16  * Licensed under the Apache License, Version 2.0 (the License); you may
17  * not use this file except in compliance with the License.
18  * You may obtain a copy of the License at
19  *
20  * www.apache.org/licenses/LICENSE-2.0
21  *
22  * Unless required by applicable law or agreed to in writing, software
23  * distributed under the License is distributed on an AS IS BASIS, WITHOUT
24  * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
25  * See the License for the specific language governing permissions and
26  * limitations under the License.
27  */
28 
29 #include "dsp/statistics_functions_f16.h"
30 
31 #if defined(ARM_FLOAT16_SUPPORTED)
32 
33 #if (defined(ARM_MATH_NEON) || defined(ARM_MATH_MVEF)) && !defined(ARM_MATH_AUTOVECTORIZE)
34 #include <limits.h>
35 #endif
36 
37 /**
38   @ingroup groupStats
39  */
40 
41 
42 /**
43   @addtogroup Max
44   @{
45  */
46 
47 /**
48   @brief         Maximum value of a floating-point vector.
49   @param[in]     pSrc       points to the input vector
50   @param[in]     blockSize  number of samples in input vector
51   @param[out]    pResult    maximum value returned here
52  */
53 
54 #if defined(ARM_MATH_MVE_FLOAT16) && !defined(ARM_MATH_AUTOVECTORIZE)
55 
arm_max_no_idx_f16(const float16_t * pSrc,uint32_t blockSize,float16_t * pResult)56 ARM_DSP_ATTRIBUTE void arm_max_no_idx_f16(
57     const float16_t *pSrc,
58     uint32_t   blockSize,
59     float16_t *pResult)
60 {
61    f16x8_t     vecSrc;
62    f16x8_t     curExtremValVec = vdupq_n_f16(F16_MIN);
63    float16_t   maxValue = F16_MIN;
64    float16_t   newVal;
65    uint32_t    blkCnt;
66 
67    /* Loop unrolling: Compute 4 outputs at a time */
68    blkCnt = blockSize >> 3U;
69 
70    while (blkCnt > 0U)
71    {
72 
73         vecSrc = vldrhq_f16(pSrc);
74         /*
75          * update per-lane max.
76          */
77         curExtremValVec = vmaxnmq(vecSrc, curExtremValVec);
78         /*
79          * Decrement the blockSize loop counter
80          * Advance vector source and destination pointers
81          */
82         pSrc += 8;
83         blkCnt --;
84     }
85     /*
86      * Get max value across the vector
87      */
88     maxValue = vmaxnmvq(maxValue, curExtremValVec);
89 
90     blkCnt = blockSize & 7;
91 
92     while (blkCnt > 0U)
93     {
94         newVal = *pSrc++;
95 
96         /* compare for the maximum value */
97         if ((_Float16)maxValue < (_Float16)newVal)
98         {
99             /* Update the maximum value and it's index */
100             maxValue = newVal;
101         }
102 
103         blkCnt --;
104     }
105 
106     *pResult = maxValue;
107 }
108 
109 #else
110 
arm_max_no_idx_f16(const float16_t * pSrc,uint32_t blockSize,float16_t * pResult)111 ARM_DSP_ATTRIBUTE void arm_max_no_idx_f16(
112     const float16_t *pSrc,
113     uint32_t   blockSize,
114     float16_t *pResult)
115 {
116    float16_t   maxValue = F16_MIN;
117    float16_t   newVal;
118 
119    while (blockSize > 0U)
120    {
121        newVal = *pSrc++;
122 
123        /* compare for the maximum value */
124        if ((_Float16)maxValue < (_Float16)newVal)
125        {
126            /* Update the maximum value and it's index */
127            maxValue = newVal;
128        }
129 
130        blockSize --;
131    }
132 
133    *pResult = maxValue;
134 }
135 
136 #endif /* defined(ARM_MATH_MVEF) && !defined(ARM_MATH_AUTOVECTORIZE) */
137 
138 /**
139   @} end of Max group
140  */
141 
142 #endif /* #if defined(ARM_FLOAT16_SUPPORTED) */
143 
144