Lines Matching +full:- +full:l
1 /* ----------------------------------------------------------------------
9 * Target Processor: Cortex-M and Cortex-A cores
10 * -------------------------------------------------------------------- */
12 * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
14 * SPDX-License-Identifier: Apache-2.0
20 * www.apache.org/licenses/LICENSE-2.0
102 (0 - 16) * (int32_t)sizeof(q31_t *), in _arm_radix4_butterfly_f32_mve()
103 (1 - 16) * (int32_t)sizeof(q31_t *), in _arm_radix4_butterfly_f32_mve()
104 (8 - 16) * (int32_t)sizeof(q31_t *), in _arm_radix4_butterfly_f32_mve()
105 (9 - 16) * (int32_t)sizeof(q31_t *) in _arm_radix4_butterfly_f32_mve()
114 &S->rearranged_twiddle_stride1[ in _arm_radix4_butterfly_f32_mve()
115 S->rearranged_twiddle_tab_stride1_arr[stage]]; in _arm_radix4_butterfly_f32_mve()
117 &S->rearranged_twiddle_stride2[ in _arm_radix4_butterfly_f32_mve()
118 S->rearranged_twiddle_tab_stride2_arr[stage]]; in _arm_radix4_butterfly_f32_mve()
120 &S->rearranged_twiddle_stride3[ in _arm_radix4_butterfly_f32_mve()
121 S->rearranged_twiddle_tab_stride3_arr[stage]]; in _arm_radix4_butterfly_f32_mve()
147 vecDiff0 = vecA - vecC; /* vecSum0 = vsubq(vecA, vecC) */ in _arm_radix4_butterfly_f32_mve()
150 vecDiff1 = vecB - vecD; in _arm_radix4_butterfly_f32_mve()
159 * [ 1 -1 1 -1 ] * [ A B C D ]' in _arm_radix4_butterfly_f32_mve()
161 vecTmp0 = vecSum0 - vecSum1; in _arm_radix4_butterfly_f32_mve()
163 * [ 1 -1 1 -1 ] * [ A B C D ]'.* W2 in _arm_radix4_butterfly_f32_mve()
172 * [ 1 -i -1 +i ] * [ A B C D ]' in _arm_radix4_butterfly_f32_mve()
176 * [ 1 -i -1 +i ] * [ A B C D ]'.* W1 in _arm_radix4_butterfly_f32_mve()
185 * [ 1 +i -1 -i ] * [ A B C D ]' in _arm_radix4_butterfly_f32_mve()
189 * [ 1 +i -1 -i ] * [ A B C D ]'.* W3 in _arm_radix4_butterfly_f32_mve()
200 blkCnt--; in _arm_radix4_butterfly_f32_mve()
224 vecDiff0 = vecA - vecC; /* vecSum0 = vsubq(vecA, vecC) */ in _arm_radix4_butterfly_f32_mve()
230 vecDiff1 = vecB - vecD; in _arm_radix4_butterfly_f32_mve()
232 /* pre-load for next iteration */ in _arm_radix4_butterfly_f32_mve()
237 vstrwq_scatter_base_f32(vecScGathAddr, -64, vecTmp0); in _arm_radix4_butterfly_f32_mve()
239 vecTmp0 = vecSum0 - vecSum1; in _arm_radix4_butterfly_f32_mve()
240 vstrwq_scatter_base_f32(vecScGathAddr, -64 + 8, vecTmp0); in _arm_radix4_butterfly_f32_mve()
243 vstrwq_scatter_base_f32(vecScGathAddr, -64 + 16, vecTmp0); in _arm_radix4_butterfly_f32_mve()
246 vstrwq_scatter_base_f32(vecScGathAddr, -64 + 24, vecTmp0); in _arm_radix4_butterfly_f32_mve()
248 blkCnt--; in _arm_radix4_butterfly_f32_mve()
259 float32_t const *pCoef = S->pTwiddle; in arm_cfft_radix4by2_f32_mve()
281 vecDiff = vecIn0 - vecIn1; in arm_cfft_radix4by2_f32_mve()
290 blkCnt--; in arm_cfft_radix4by2_f32_mve()
310 (0 - 16) * (int32_t)sizeof(q31_t *), in _arm_radix4_butterfly_inverse_f32_mve()
311 (1 - 16) * (int32_t)sizeof(q31_t *), in _arm_radix4_butterfly_inverse_f32_mve()
312 (8 - 16) * (int32_t)sizeof(q31_t *), in _arm_radix4_butterfly_inverse_f32_mve()
313 (9 - 16) * (int32_t)sizeof(q31_t *) in _arm_radix4_butterfly_inverse_f32_mve()
322 &S->rearranged_twiddle_stride1[ in _arm_radix4_butterfly_inverse_f32_mve()
323 S->rearranged_twiddle_tab_stride1_arr[stage]]; in _arm_radix4_butterfly_inverse_f32_mve()
325 &S->rearranged_twiddle_stride2[ in _arm_radix4_butterfly_inverse_f32_mve()
326 S->rearranged_twiddle_tab_stride2_arr[stage]]; in _arm_radix4_butterfly_inverse_f32_mve()
328 &S->rearranged_twiddle_stride3[ in _arm_radix4_butterfly_inverse_f32_mve()
329 S->rearranged_twiddle_tab_stride3_arr[stage]]; in _arm_radix4_butterfly_inverse_f32_mve()
355 vecDiff0 = vecA - vecC; /* vecSum0 = vsubq(vecA, vecC) */ in _arm_radix4_butterfly_inverse_f32_mve()
358 vecDiff1 = vecB - vecD; in _arm_radix4_butterfly_inverse_f32_mve()
366 * [ 1 -1 1 -1 ] * [ A B C D ]' in _arm_radix4_butterfly_inverse_f32_mve()
368 vecTmp0 = vecSum0 - vecSum1; in _arm_radix4_butterfly_inverse_f32_mve()
370 * [ 1 -1 1 -1 ] * [ A B C D ]'.* W1 in _arm_radix4_butterfly_inverse_f32_mve()
379 * [ 1 -i -1 +i ] * [ A B C D ]' in _arm_radix4_butterfly_inverse_f32_mve()
383 * [ 1 -i -1 +i ] * [ A B C D ]'.* W2 in _arm_radix4_butterfly_inverse_f32_mve()
392 * [ 1 +i -1 -i ] * [ A B C D ]' in _arm_radix4_butterfly_inverse_f32_mve()
396 * [ 1 +i -1 -i ] * [ A B C D ]'.* W3 in _arm_radix4_butterfly_inverse_f32_mve()
407 blkCnt--; in _arm_radix4_butterfly_inverse_f32_mve()
433 vecDiff0 = vecA - vecC; /* vecSum0 = vsubq(vecA, vecC) */ in _arm_radix4_butterfly_inverse_f32_mve()
439 vecDiff1 = vecB - vecD; in _arm_radix4_butterfly_inverse_f32_mve()
446 vstrwq_scatter_base_f32(vecScGathAddr, -64, vecTmp0); in _arm_radix4_butterfly_inverse_f32_mve()
448 vecTmp0 = vecSum0 - vecSum1; in _arm_radix4_butterfly_inverse_f32_mve()
450 vstrwq_scatter_base_f32(vecScGathAddr, -64 + 8, vecTmp0); in _arm_radix4_butterfly_inverse_f32_mve()
454 vstrwq_scatter_base_f32(vecScGathAddr, -64 + 16, vecTmp0); in _arm_radix4_butterfly_inverse_f32_mve()
458 vstrwq_scatter_base_f32(vecScGathAddr, -64 + 24, vecTmp0); in _arm_radix4_butterfly_inverse_f32_mve()
460 blkCnt--; in _arm_radix4_butterfly_inverse_f32_mve()
471 float32_t const *pCoef = S->pTwiddle; in arm_cfft_radix4by2_inverse_f32_mve()
494 vecDiff = vecIn0 - vecIn1; in arm_cfft_radix4by2_inverse_f32_mve()
503 blkCnt--; in arm_cfft_radix4by2_inverse_f32_mve()
518 @brief Processing function for the floating-point complex FFT.
519 @param[in] S points to an instance of the floating-point CFFT structure
520 … points to the complex data buffer of size <code>2*fftLen</code>. Processing occurs in-place
522 - value = 0: forward transform
523 - value = 1: inverse transform
525 - value = 0: disables bit reversal of output
526 - value = 1: enables bit reversal of output
536 uint32_t fftLen = S->fftLen; in arm_cfft_f32()
546 … _arm_radix4_butterfly_inverse_f32_mve(S, pSrc, fftLen, arm_inverse_fft_length_f32(S->fftLen)); in arm_cfft_f32()
579 arm_bitreversal_32_inpl_mve((uint32_t*)pSrc, S->bitRevLength, S->pBitRevTable); in arm_cfft_f32()
612 There are separate algorithms for handling floating-point, Q15, and Q31 data
615 The FFT functions operate in-place. That is, the array holding the input data
622 @par Floating-point
623 The floating-point complex FFT uses a mixed-radix algorithm. Multiple radix-8
624 stages are performed along with a single radix-2 or radix-4 stage, as needed.
638 For not MVE versions, pre-initialized data structures containing twiddle factors
645 computes a 64-point inverse complex FFT including bit reversal.
650 Earlier releases of the library provided separate radix-2 and radix-4
651 algorithms that operated on floating-point data. These functions are still
693 The floating-point complex FFT uses a mixed-radix algorithm. Multiple radix-4
694 stages are performed along with a single radix-2 stage, as needed.
703 Pre-initialized data structures containing twiddle factors and bit reversal
710 computes a 64-point inverse complex FFT including bit reversal.
715 Earlier releases of the library provided separate radix-2 and radix-4
716 algorithms that operated on floating-point data. These functions are still
759 uint32_t L = S->fftLen; in arm_cfft_radix8by2_f32() local
761 float32_t * p2 = p1 + L; in arm_cfft_radix8by2_f32()
762 const float32_t * tw = (float32_t *) S->pTwiddle; in arm_cfft_radix8by2_f32()
765 uint32_t l; in arm_cfft_radix8by2_f32() local
771 L >>= 1; in arm_cfft_radix8by2_f32()
774 pMid1 = p1 + L; in arm_cfft_radix8by2_f32()
775 pMid2 = p2 + L; in arm_cfft_radix8by2_f32()
778 for (l = L >> 2; l > 0; l-- ) in arm_cfft_radix8by2_f32()
805 t2[0] = t1[0] - t2[0]; in arm_cfft_radix8by2_f32()
806 t2[1] = t1[1] - t2[1]; in arm_cfft_radix8by2_f32()
807 t2[2] = t1[2] - t2[2]; in arm_cfft_radix8by2_f32()
808 t2[3] = t1[3] - t2[3]; /* for col 2 */ in arm_cfft_radix8by2_f32()
815 t4[0] = t4[0] - t3[0]; in arm_cfft_radix8by2_f32()
816 t4[1] = t4[1] - t3[1]; in arm_cfft_radix8by2_f32()
817 t4[2] = t4[2] - t3[2]; in arm_cfft_radix8by2_f32()
818 t4[3] = t4[3] - t3[3]; /* for col 2 */ in arm_cfft_radix8by2_f32()
829 /* R = R * Tr - I * Ti */ in arm_cfft_radix8by2_f32()
832 *p2++ = m2 - m3; in arm_cfft_radix8by2_f32()
835 /* 0.9988 - 0.0491i <==> -0.0491 - 0.9988i */ in arm_cfft_radix8by2_f32()
841 *pMid2++ = m0 - m1; in arm_cfft_radix8by2_f32()
853 *p2++ = m2 - m3; in arm_cfft_radix8by2_f32()
860 *pMid2++ = m0 - m1; in arm_cfft_radix8by2_f32()
865 arm_radix8_butterfly_f32 (pCol1, L, (float32_t *) S->pTwiddle, 2U); in arm_cfft_radix8by2_f32()
868 arm_radix8_butterfly_f32 (pCol2, L, (float32_t *) S->pTwiddle, 2U); in arm_cfft_radix8by2_f32()
873 uint32_t L = S->fftLen >> 1; in arm_cfft_radix8by4_f32() local
876 float32_t * p2 = p1 + L; in arm_cfft_radix8by4_f32()
877 float32_t * p3 = p2 + L; in arm_cfft_radix8by4_f32()
878 float32_t * p4 = p3 + L; in arm_cfft_radix8by4_f32()
882 uint32_t l, twMod2, twMod3, twMod4; in arm_cfft_radix8by4_f32() local
888 pEnd1 = p2 - 1; /* points to imaginary values by default */ in arm_cfft_radix8by4_f32()
889 pEnd2 = p3 - 1; in arm_cfft_radix8by4_f32()
890 pEnd3 = p4 - 1; in arm_cfft_radix8by4_f32()
891 pEnd4 = pEnd3 + L; in arm_cfft_radix8by4_f32()
893 tw2 = tw3 = tw4 = (float32_t *) S->pTwiddle; in arm_cfft_radix8by4_f32()
895 L >>= 1; in arm_cfft_radix8by4_f32()
905 p1sp3_0 = p1[0] - p3[0]; in arm_cfft_radix8by4_f32()
907 p1sp3_1 = p1[1] - p3[1]; in arm_cfft_radix8by4_f32()
910 t2[0] = p1sp3_0 + p2[1] - p4[1]; in arm_cfft_radix8by4_f32()
911 t2[1] = p1sp3_1 - p2[0] + p4[0]; in arm_cfft_radix8by4_f32()
913 t3[0] = p1ap3_0 - p2[0] - p4[0]; in arm_cfft_radix8by4_f32()
914 t3[1] = p1ap3_1 - p2[1] - p4[1]; in arm_cfft_radix8by4_f32()
916 t4[0] = p1sp3_0 - p2[1] + p4[1]; in arm_cfft_radix8by4_f32()
917 t4[1] = p1sp3_1 + p2[0] - p4[0]; in arm_cfft_radix8by4_f32()
934 for (l = (L - 2) >> 1; l > 0; l-- ) in arm_cfft_radix8by4_f32()
938 p1sp3_0 = p1[0] - p3[0]; in arm_cfft_radix8by4_f32()
940 p1sp3_1 = p1[1] - p3[1]; in arm_cfft_radix8by4_f32()
942 t2[0] = p1sp3_0 + p2[1] - p4[1]; in arm_cfft_radix8by4_f32()
943 t2[1] = p1sp3_1 - p2[0] + p4[0]; in arm_cfft_radix8by4_f32()
945 t3[0] = p1ap3_0 - p2[0] - p4[0]; in arm_cfft_radix8by4_f32()
946 t3[1] = p1ap3_1 - p2[1] - p4[1]; in arm_cfft_radix8by4_f32()
948 t4[0] = p1sp3_0 - p2[1] + p4[1]; in arm_cfft_radix8by4_f32()
949 t4[1] = p1sp3_1 + p2[0] - p4[0]; in arm_cfft_radix8by4_f32()
950 /* col 1 - top */ in arm_cfft_radix8by4_f32()
955 p1ap3_1 = pEnd1[-1] + pEnd3[-1]; in arm_cfft_radix8by4_f32()
956 p1sp3_1 = pEnd1[-1] - pEnd3[-1]; in arm_cfft_radix8by4_f32()
958 p1sp3_0 = pEnd1[ 0] - pEnd3[0]; in arm_cfft_radix8by4_f32()
960 t2[2] = pEnd2[0] - pEnd4[0] + p1sp3_1; in arm_cfft_radix8by4_f32()
961 t2[3] = pEnd1[0] - pEnd3[0] - pEnd2[-1] + pEnd4[-1]; in arm_cfft_radix8by4_f32()
963 t3[2] = p1ap3_1 - pEnd2[-1] - pEnd4[-1]; in arm_cfft_radix8by4_f32()
964 t3[3] = p1ap3_0 - pEnd2[ 0] - pEnd4[ 0]; in arm_cfft_radix8by4_f32()
966 t4[2] = pEnd2[ 0] - pEnd4[ 0] - p1sp3_1; in arm_cfft_radix8by4_f32()
967 t4[3] = pEnd4[-1] - pEnd2[-1] - p1sp3_0; in arm_cfft_radix8by4_f32()
968 /* col 1 - Bottom */ in arm_cfft_radix8by4_f32()
969 *pEnd1-- = p1ap3_0 + pEnd2[ 0] + pEnd4[ 0]; in arm_cfft_radix8by4_f32()
970 *pEnd1-- = p1ap3_1 + pEnd2[-1] + pEnd4[-1]; in arm_cfft_radix8by4_f32()
978 /* => Z1 * Z2 = (a*c - b*d) + i(b*c + a*d) */ in arm_cfft_radix8by4_f32()
987 *p2++ = m2 - m3; in arm_cfft_radix8by4_f32()
989 /* 0.9997 - 0.0245i <==> 0.0245 - 0.9997i */ in arm_cfft_radix8by4_f32()
996 *pEnd2-- = m0 - m1; in arm_cfft_radix8by4_f32()
997 *pEnd2-- = m2 + m3; in arm_cfft_radix8by4_f32()
1010 *p3++ = m2 - m3; in arm_cfft_radix8by4_f32()
1012 /* 0.9988 - 0.0491i <==> -0.9988 - 0.0491i */ in arm_cfft_radix8by4_f32()
1014 m0 = -t3[3] * twR; in arm_cfft_radix8by4_f32()
1019 *pEnd3-- = m0 - m1; in arm_cfft_radix8by4_f32()
1020 *pEnd3-- = m3 - m2; in arm_cfft_radix8by4_f32()
1033 *p4++ = m2 - m3; in arm_cfft_radix8by4_f32()
1035 /* 0.9973 - 0.0736i <==> -0.0736 + 0.9973i */ in arm_cfft_radix8by4_f32()
1042 *pEnd4-- = m0 - m1; in arm_cfft_radix8by4_f32()
1043 *pEnd4-- = m2 + m3; in arm_cfft_radix8by4_f32()
1048 /* 1.0000 0.7071-0.7071i -1.0000i -0.7071-0.7071i */ in arm_cfft_radix8by4_f32()
1050 p1sp3_0 = p1[0] - p3[0]; in arm_cfft_radix8by4_f32()
1052 p1sp3_1 = p1[1] - p3[1]; in arm_cfft_radix8by4_f32()
1055 t2[0] = p1sp3_0 + p2[1] - p4[1]; in arm_cfft_radix8by4_f32()
1056 t2[1] = p1sp3_1 - p2[0] + p4[0]; in arm_cfft_radix8by4_f32()
1058 t3[0] = p1ap3_0 - p2[0] - p4[0]; in arm_cfft_radix8by4_f32()
1059 t3[1] = p1ap3_1 - p2[1] - p4[1]; in arm_cfft_radix8by4_f32()
1061 t4[0] = p1sp3_0 - p2[1] + p4[1]; in arm_cfft_radix8by4_f32()
1062 t4[1] = p1sp3_1 + p2[0] - p4[0]; in arm_cfft_radix8by4_f32()
1063 /* col 1 - Top */ in arm_cfft_radix8by4_f32()
1077 *p2++ = m2 - m3; in arm_cfft_radix8by4_f32()
1088 *p3++ = m2 - m3; in arm_cfft_radix8by4_f32()
1099 *p4++ = m2 - m3; in arm_cfft_radix8by4_f32()
1102 arm_radix8_butterfly_f32 (pCol1, L, (float32_t *) S->pTwiddle, 4U); in arm_cfft_radix8by4_f32()
1105 arm_radix8_butterfly_f32 (pCol2, L, (float32_t *) S->pTwiddle, 4U); in arm_cfft_radix8by4_f32()
1108 arm_radix8_butterfly_f32 (pCol3, L, (float32_t *) S->pTwiddle, 4U); in arm_cfft_radix8by4_f32()
1111 arm_radix8_butterfly_f32 (pCol4, L, (float32_t *) S->pTwiddle, 4U); in arm_cfft_radix8by4_f32()
1120 @brief Processing function for the floating-point complex FFT.
1121 @param[in] S points to an instance of the floating-point CFFT structure
1122 … points to the complex data buffer of size <code>2*fftLen</code>. Processing occurs in-place
1124 - value = 0: forward transform
1125 - value = 1: inverse transform
1127 - value = 0: disables bit reversal of output
1128 - value = 1: enables bit reversal of output
1137 uint32_t L = S->fftLen, l; in arm_cfft_f32() local
1144 for (l = 0; l < L; l++) in arm_cfft_f32()
1146 *pSrc = -*pSrc; in arm_cfft_f32()
1151 switch (L) in arm_cfft_f32()
1166 arm_radix8_butterfly_f32 ( p1, L, (float32_t *) S->pTwiddle, 1); in arm_cfft_f32()
1171 arm_bitreversal_32 ((uint32_t*) p1, S->bitRevLength, S->pBitRevTable); in arm_cfft_f32()
1175 invL = 1.0f / (float32_t)L; in arm_cfft_f32()
1179 for (l= 0; l < L; l++) in arm_cfft_f32()
1182 *pSrc = -(*pSrc) * invL; in arm_cfft_f32()