1 /****************************************************************************** 2 * @file arm_sorting.h 3 * @brief Private header file for CMSIS DSP Library 4 * @version V1.7.0 5 * @date 2019 6 ******************************************************************************/ 7 /* 8 * Copyright (c) 2010-2019 Arm Limited or its affiliates. All rights reserved. 9 * 10 * SPDX-License-Identifier: Apache-2.0 11 * 12 * Licensed under the Apache License, Version 2.0 (the License); you may 13 * not use this file except in compliance with the License. 14 * You may obtain a copy of the License at 15 * 16 * www.apache.org/licenses/LICENSE-2.0 17 * 18 * Unless required by applicable law or agreed to in writing, software 19 * distributed under the License is distributed on an AS IS BASIS, WITHOUT 20 * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 21 * See the License for the specific language governing permissions and 22 * limitations under the License. 23 */ 24 25 #ifndef _ARM_SORTING_H_ 26 #define _ARM_SORTING_H_ 27 28 #include "arm_math.h" 29 30 #ifdef __cplusplus 31 extern "C" 32 { 33 #endif 34 35 /** 36 * @param[in] S points to an instance of the sorting structure. 37 * @param[in] pSrc points to the block of input data. 38 * @param[out] pDst points to the block of output data. 39 * @param[in] blockSize number of samples to process. 40 */ 41 void arm_bubble_sort_f32( 42 const arm_sort_instance_f32 * S, 43 float32_t * pSrc, 44 float32_t * pDst, 45 uint32_t blockSize); 46 47 /** 48 * @param[in] S points to an instance of the sorting structure. 49 * @param[in] pSrc points to the block of input data. 50 * @param[out] pDst points to the block of output data. 51 * @param[in] blockSize number of samples to process. 52 */ 53 void arm_heap_sort_f32( 54 const arm_sort_instance_f32 * S, 55 float32_t * pSrc, 56 float32_t * pDst, 57 uint32_t blockSize); 58 59 /** 60 * @param[in] S points to an instance of the sorting structure. 61 * @param[in] pSrc points to the block of input data. 62 * @param[out] pDst points to the block of output data. 63 * @param[in] blockSize number of samples to process. 64 */ 65 void arm_insertion_sort_f32( 66 const arm_sort_instance_f32 * S, 67 float32_t *pSrc, 68 float32_t* pDst, 69 uint32_t blockSize); 70 71 /** 72 * @param[in] S points to an instance of the sorting structure. 73 * @param[in] pSrc points to the block of input data. 74 * @param[out] pDst points to the block of output data 75 * @param[in] blockSize number of samples to process. 76 */ 77 void arm_quick_sort_f32( 78 const arm_sort_instance_f32 * S, 79 float32_t * pSrc, 80 float32_t * pDst, 81 uint32_t blockSize); 82 83 /** 84 * @param[in] S points to an instance of the sorting structure. 85 * @param[in] pSrc points to the block of input data. 86 * @param[out] pDst points to the block of output data 87 * @param[in] blockSize number of samples to process. 88 */ 89 void arm_selection_sort_f32( 90 const arm_sort_instance_f32 * S, 91 float32_t * pSrc, 92 float32_t * pDst, 93 uint32_t blockSize); 94 95 /** 96 * @param[in] S points to an instance of the sorting structure. 97 * @param[in] pSrc points to the block of input data. 98 * @param[out] pDst points to the block of output data 99 * @param[in] blockSize number of samples to process. 100 */ 101 void arm_bitonic_sort_f32( 102 const arm_sort_instance_f32 * S, 103 float32_t * pSrc, 104 float32_t * pDst, 105 uint32_t blockSize); 106 107 #if defined(ARM_MATH_NEON) 108 109 #define vtrn256_128q(a, b) \ 110 do { \ 111 float32x4_t vtrn128_temp = a.val[1]; \ 112 a.val[1] = b.val[0]; \ 113 b.val[0] = vtrn128_temp ; \ 114 } while (0) 115 116 #define vtrn128_64q(a, b) \ 117 do { \ 118 float32x2_t ab, cd, ef, gh; \ 119 ab = vget_low_f32(a); \ 120 ef = vget_low_f32(b); \ 121 cd = vget_high_f32(a); \ 122 gh = vget_high_f32(b); \ 123 a = vcombine_f32(ab, ef); \ 124 b = vcombine_f32(cd, gh); \ 125 } while (0) 126 127 #define vtrn256_64q(a, b) \ 128 do { \ 129 float32x2_t a_0, a_1, a_2, a_3; \ 130 float32x2_t b_0, b_1, b_2, b_3; \ 131 a_0 = vget_low_f32(a.val[0]); \ 132 a_1 = vget_high_f32(a.val[0]); \ 133 a_2 = vget_low_f32(a.val[1]); \ 134 a_3 = vget_high_f32(a.val[1]); \ 135 b_0 = vget_low_f32(b.val[0]); \ 136 b_1 = vget_high_f32(b.val[0]); \ 137 b_2 = vget_low_f32(b.val[1]); \ 138 b_3 = vget_high_f32(b.val[1]); \ 139 a.val[0] = vcombine_f32(a_0, b_0); \ 140 a.val[1] = vcombine_f32(a_2, b_2); \ 141 b.val[0] = vcombine_f32(a_1, b_1); \ 142 b.val[1] = vcombine_f32(a_3, b_3); \ 143 } while (0) 144 145 #define vtrn128_32q(a, b) \ 146 do { \ 147 float32x4x2_t vtrn32_tmp = vtrnq_f32((a), (b)); \ 148 (a) = vtrn32_tmp.val[0]; \ 149 (b) = vtrn32_tmp.val[1]; \ 150 } while (0) 151 152 #define vtrn256_32q(a, b) \ 153 do { \ 154 float32x4x2_t vtrn32_tmp_1 = vtrnq_f32((a.val[0]), (b.val[0])); \ 155 float32x4x2_t vtrn32_tmp_2 = vtrnq_f32((a.val[1]), (b.val[1])); \ 156 a.val[0] = vtrn32_tmp_1.val[0]; \ 157 a.val[1] = vtrn32_tmp_2.val[0]; \ 158 b.val[0] = vtrn32_tmp_1.val[1]; \ 159 b.val[1] = vtrn32_tmp_2.val[1]; \ 160 } while (0) 161 162 #define vminmaxq(a, b) \ 163 do { \ 164 float32x4_t minmax_tmp = (a); \ 165 (a) = vminq_f32((a), (b)); \ 166 (b) = vmaxq_f32(minmax_tmp, (b)); \ 167 } while (0) 168 169 #define vminmax256q(a, b) \ 170 do { \ 171 float32x4x2_t minmax256_tmp = (a); \ 172 a.val[0] = vminq_f32(a.val[0], b.val[0]); \ 173 a.val[1] = vminq_f32(a.val[1], b.val[1]); \ 174 b.val[0] = vmaxq_f32(minmax256_tmp.val[0], b.val[0]); \ 175 b.val[1] = vmaxq_f32(minmax256_tmp.val[1], b.val[1]); \ 176 } while (0) 177 178 #define vrev128q_f32(a) \ 179 vcombine_f32(vrev64_f32(vget_high_f32(a)), vrev64_f32(vget_low_f32(a))) 180 181 #define vrev256q_f32(a) \ 182 do { \ 183 float32x4_t rev_tmp = vcombine_f32(vrev64_f32(vget_high_f32(a.val[0])), vrev64_f32(vget_low_f32(a.val[0]))); \ 184 a.val[0] = vcombine_f32(vrev64_f32(vget_high_f32(a.val[1])), vrev64_f32(vget_low_f32(a.val[1]))); \ 185 a.val[1] = rev_tmp; \ 186 } while (0) 187 188 #define vldrev128q_f32(a, p) \ 189 do { \ 190 a = vld1q_f32(p); \ 191 a = vrev128q_f32(a); \ 192 } while (0) 193 194 #endif /* ARM_MATH_NEON */ 195 196 #ifdef __cplusplus 197 } 198 #endif 199 200 #endif /* _ARM_SORTING_H */ 201