1 /******************************************************************************
2  * @file     arm_sorting.h
3  * @brief    Private header file for CMSIS DSP Library
4  * @version  V1.7.0
5  * @date     2019
6  ******************************************************************************/
7 /*
8  * Copyright (c) 2010-2019 Arm Limited or its affiliates. All rights reserved.
9  *
10  * SPDX-License-Identifier: Apache-2.0
11  *
12  * Licensed under the Apache License, Version 2.0 (the License); you may
13  * not use this file except in compliance with the License.
14  * You may obtain a copy of the License at
15  *
16  * www.apache.org/licenses/LICENSE-2.0
17  *
18  * Unless required by applicable law or agreed to in writing, software
19  * distributed under the License is distributed on an AS IS BASIS, WITHOUT
20  * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
21  * See the License for the specific language governing permissions and
22  * limitations under the License.
23  */
24 
25 #ifndef _ARM_SORTING_H_
26 #define _ARM_SORTING_H_
27 
28 #include "arm_math.h"
29 
30 #ifdef   __cplusplus
31 extern "C"
32 {
33 #endif
34 
35   /**
36    * @param[in]  S          points to an instance of the sorting structure.
37    * @param[in]  pSrc       points to the block of input data.
38    * @param[out] pDst       points to the block of output data.
39    * @param[in]  blockSize  number of samples to process.
40    */
41   void arm_bubble_sort_f32(
42     const arm_sort_instance_f32 * S,
43           float32_t * pSrc,
44           float32_t * pDst,
45     uint32_t blockSize);
46 
47    /**
48    * @param[in]  S          points to an instance of the sorting structure.
49    * @param[in]  pSrc       points to the block of input data.
50    * @param[out] pDst       points to the block of output data.
51    * @param[in]  blockSize  number of samples to process.
52    */
53   void arm_heap_sort_f32(
54     const arm_sort_instance_f32 * S,
55           float32_t * pSrc,
56           float32_t * pDst,
57     uint32_t blockSize);
58 
59   /**
60    * @param[in]  S          points to an instance of the sorting structure.
61    * @param[in]  pSrc       points to the block of input data.
62    * @param[out] pDst       points to the block of output data.
63    * @param[in]  blockSize  number of samples to process.
64    */
65   void arm_insertion_sort_f32(
66     const arm_sort_instance_f32 * S,
67           float32_t *pSrc,
68           float32_t* pDst,
69     uint32_t blockSize);
70 
71   /**
72    * @param[in]  S          points to an instance of the sorting structure.
73    * @param[in]  pSrc       points to the block of input data.
74    * @param[out] pDst       points to the block of output data
75    * @param[in]  blockSize  number of samples to process.
76    */
77   void arm_quick_sort_f32(
78     const arm_sort_instance_f32 * S,
79           float32_t * pSrc,
80           float32_t * pDst,
81     uint32_t blockSize);
82 
83   /**
84    * @param[in]  S          points to an instance of the sorting structure.
85    * @param[in]  pSrc       points to the block of input data.
86    * @param[out] pDst       points to the block of output data
87    * @param[in]  blockSize  number of samples to process.
88    */
89   void arm_selection_sort_f32(
90     const arm_sort_instance_f32 * S,
91           float32_t * pSrc,
92           float32_t * pDst,
93     uint32_t blockSize);
94 
95   /**
96    * @param[in]  S          points to an instance of the sorting structure.
97    * @param[in]  pSrc       points to the block of input data.
98    * @param[out] pDst       points to the block of output data
99    * @param[in]  blockSize  number of samples to process.
100    */
101   void arm_bitonic_sort_f32(
102     const arm_sort_instance_f32 * S,
103           float32_t * pSrc,
104           float32_t * pDst,
105           uint32_t blockSize);
106 
107 #if defined(ARM_MATH_NEON)
108 
109 #define vtrn256_128q(a, b)                   \
110 do {                                         \
111 	float32x4_t vtrn128_temp = a.val[1]; \
112 	a.val[1] = b.val[0];                 \
113 	b.val[0] = vtrn128_temp ;            \
114 } while (0)
115 
116 #define vtrn128_64q(a, b)           \
117 do {                                \
118 	float32x2_t ab, cd, ef, gh; \
119 	ab = vget_low_f32(a);	    \
120 	ef = vget_low_f32(b);	    \
121 	cd = vget_high_f32(a);	    \
122 	gh = vget_high_f32(b);      \
123 	a = vcombine_f32(ab, ef);   \
124 	b = vcombine_f32(cd, gh);   \
125 } while (0)
126 
127 #define vtrn256_64q(a, b)                  \
128 do {                                       \
129 	float32x2_t a_0, a_1, a_2, a_3;    \
130 	float32x2_t b_0, b_1, b_2, b_3;    \
131 	a_0 = vget_low_f32(a.val[0]);      \
132 	a_1 = vget_high_f32(a.val[0]);     \
133 	a_2 = vget_low_f32(a.val[1]);      \
134 	a_3 = vget_high_f32(a.val[1]);     \
135 	b_0 = vget_low_f32(b.val[0]);      \
136 	b_1 = vget_high_f32(b.val[0]);     \
137 	b_2 = vget_low_f32(b.val[1]);      \
138 	b_3 = vget_high_f32(b.val[1]);     \
139 	a.val[0] = vcombine_f32(a_0, b_0); \
140 	a.val[1] = vcombine_f32(a_2, b_2); \
141 	b.val[0] = vcombine_f32(a_1, b_1); \
142 	b.val[1] = vcombine_f32(a_3, b_3); \
143 } while (0)
144 
145 #define vtrn128_32q(a, b)                               \
146 do {                                                    \
147 	float32x4x2_t vtrn32_tmp = vtrnq_f32((a), (b)); \
148 	(a) = vtrn32_tmp.val[0];                        \
149 	(b) = vtrn32_tmp.val[1];                        \
150 } while (0)
151 
152 #define vtrn256_32q(a, b)               \
153 do {                                    \
154 	float32x4x2_t vtrn32_tmp_1 = vtrnq_f32((a.val[0]), (b.val[0])); \
155 	float32x4x2_t vtrn32_tmp_2 = vtrnq_f32((a.val[1]), (b.val[1])); \
156 	a.val[0] = vtrn32_tmp_1.val[0]; \
157 	a.val[1] = vtrn32_tmp_2.val[0]; \
158 	b.val[0] = vtrn32_tmp_1.val[1]; \
159 	b.val[1] = vtrn32_tmp_2.val[1]; \
160 } while (0)
161 
162 #define vminmaxq(a, b)                    \
163 	do {                              \
164 	float32x4_t minmax_tmp = (a);     \
165 	(a) = vminq_f32((a), (b));        \
166 	(b) = vmaxq_f32(minmax_tmp, (b)); \
167 } while (0)
168 
169 #define vminmax256q(a, b)                         \
170 	do {                                      \
171 	float32x4x2_t minmax256_tmp = (a);        \
172 	a.val[0] = vminq_f32(a.val[0], b.val[0]); \
173 	a.val[1] = vminq_f32(a.val[1], b.val[1]); \
174 	b.val[0] = vmaxq_f32(minmax256_tmp.val[0], b.val[0]); \
175 	b.val[1] = vmaxq_f32(minmax256_tmp.val[1], b.val[1]); \
176 } while (0)
177 
178 #define vrev128q_f32(a) \
179         vcombine_f32(vrev64_f32(vget_high_f32(a)), vrev64_f32(vget_low_f32(a)))
180 
181 #define vrev256q_f32(a)     \
182 	do {                \
183         float32x4_t rev_tmp = vcombine_f32(vrev64_f32(vget_high_f32(a.val[0])), vrev64_f32(vget_low_f32(a.val[0]))); \
184 	a.val[0] = vcombine_f32(vrev64_f32(vget_high_f32(a.val[1])), vrev64_f32(vget_low_f32(a.val[1])));  \
185 	a.val[1] = rev_tmp; \
186 } while (0)
187 
188 #define vldrev128q_f32(a, p) \
189 	do {                 \
190 	a = vld1q_f32(p);    \
191 	a = vrev128q_f32(a); \
192 } while (0)
193 
194 #endif /* ARM_MATH_NEON */
195 
196 #ifdef   __cplusplus
197 }
198 #endif
199 
200 #endif /* _ARM_SORTING_H */
201