Lines Matching refs:CMPLX_DIM

81         MATRIX_DIM * CMPLX_DIM, MATRIX_DIM * CMPLX_DIM + 1,  in arm_mat_cmplx_mult_f16_2x2_mve()
83 MATRIX_DIM * CMPLX_DIM + 2 , MATRIX_DIM * CMPLX_DIM + 3, in arm_mat_cmplx_mult_f16_2x2_mve()
92 tmp = (CMPLX_DIM * MATRIX_DIM); in arm_mat_cmplx_mult_f16_2x2_mve()
93 vecColAOffs1 = vecColAOffs0 + (uint16_t)(CMPLX_DIM * MATRIX_DIM); in arm_mat_cmplx_mult_f16_2x2_mve()
121 *(float32_t *)(&pOut[0 * CMPLX_DIM * MATRIX_DIM]) = ((f32x4_t)vecTmp)[0]; in arm_mat_cmplx_mult_f16_2x2_mve()
122 *(float32_t *)(&pOut[0 * CMPLX_DIM * MATRIX_DIM + CMPLX_DIM]) = ((f32x4_t)vecTmp)[2]; in arm_mat_cmplx_mult_f16_2x2_mve()
127 *(float32_t *)(&pOut[1 * CMPLX_DIM * MATRIX_DIM]) = ((f32x4_t)vecTmp)[0]; in arm_mat_cmplx_mult_f16_2x2_mve()
128 *(float32_t *)(&pOut[1 * CMPLX_DIM * MATRIX_DIM + CMPLX_DIM]) = ((f32x4_t)vecTmp)[2]; in arm_mat_cmplx_mult_f16_2x2_mve()
150 float16_t *pInA1 = pInA0 + CMPLX_DIM * MATRIX_DIM; in arm_mat_cmplx_mult_f16_3x3_mve()
151 float16_t *pInA2 = pInA1 + CMPLX_DIM * MATRIX_DIM; in arm_mat_cmplx_mult_f16_3x3_mve()
155 MATRIX_DIM * CMPLX_DIM, MATRIX_DIM * CMPLX_DIM + 1, in arm_mat_cmplx_mult_f16_3x3_mve()
156 2 * MATRIX_DIM * CMPLX_DIM, 2 * MATRIX_DIM * CMPLX_DIM + 1, in arm_mat_cmplx_mult_f16_3x3_mve()
162 mve_pred16_t p0 = vctp16q(MATRIX_DIM * CMPLX_DIM); in arm_mat_cmplx_mult_f16_3x3_mve()
183 mve_cmplx_sum_intra_vec_f16(acc0, &pOut[0 * CMPLX_DIM * MATRIX_DIM]); in arm_mat_cmplx_mult_f16_3x3_mve()
184 mve_cmplx_sum_intra_vec_f16(acc1, &pOut[1 * CMPLX_DIM * MATRIX_DIM]); in arm_mat_cmplx_mult_f16_3x3_mve()
185 mve_cmplx_sum_intra_vec_f16(acc2, &pOut[2 * CMPLX_DIM * MATRIX_DIM]); in arm_mat_cmplx_mult_f16_3x3_mve()
186 pOut += CMPLX_DIM; in arm_mat_cmplx_mult_f16_3x3_mve()
190 pInB = pInB + CMPLX_DIM; in arm_mat_cmplx_mult_f16_3x3_mve()
203 mve_cmplx_sum_intra_vec_f16(acc0, &pOut[0 * CMPLX_DIM * MATRIX_DIM]); in arm_mat_cmplx_mult_f16_3x3_mve()
204 mve_cmplx_sum_intra_vec_f16(acc1, &pOut[1 * CMPLX_DIM * MATRIX_DIM]); in arm_mat_cmplx_mult_f16_3x3_mve()
205 mve_cmplx_sum_intra_vec_f16(acc2, &pOut[2 * CMPLX_DIM * MATRIX_DIM]); in arm_mat_cmplx_mult_f16_3x3_mve()
206 pOut += CMPLX_DIM; in arm_mat_cmplx_mult_f16_3x3_mve()
210 pInB = pInB + CMPLX_DIM; in arm_mat_cmplx_mult_f16_3x3_mve()
223 mve_cmplx_sum_intra_vec_f16(acc0, &pOut[0 * CMPLX_DIM * MATRIX_DIM]); in arm_mat_cmplx_mult_f16_3x3_mve()
224 mve_cmplx_sum_intra_vec_f16(acc1, &pOut[1 * CMPLX_DIM * MATRIX_DIM]); in arm_mat_cmplx_mult_f16_3x3_mve()
225 mve_cmplx_sum_intra_vec_f16(acc2, &pOut[2 * CMPLX_DIM * MATRIX_DIM]); in arm_mat_cmplx_mult_f16_3x3_mve()
247 float16_t *pInA1 = pInA0 + CMPLX_DIM * MATRIX_DIM; in arm_mat_cmplx_mult_f16_4x4_mve()
248 float16_t *pInA2 = pInA1 + CMPLX_DIM * MATRIX_DIM; in arm_mat_cmplx_mult_f16_4x4_mve()
249 float16_t *pInA3 = pInA2 + CMPLX_DIM * MATRIX_DIM; in arm_mat_cmplx_mult_f16_4x4_mve()
253 MATRIX_DIM * CMPLX_DIM, MATRIX_DIM * CMPLX_DIM + 1, in arm_mat_cmplx_mult_f16_4x4_mve()
254 2 * MATRIX_DIM * CMPLX_DIM, 2 * MATRIX_DIM * CMPLX_DIM + 1, in arm_mat_cmplx_mult_f16_4x4_mve()
255 3 * MATRIX_DIM * CMPLX_DIM, 3 * MATRIX_DIM * CMPLX_DIM + 1 in arm_mat_cmplx_mult_f16_4x4_mve()
281 mve_cmplx_sum_intra_vec_f16(acc0, &pOut[0 * CMPLX_DIM * MATRIX_DIM]); in arm_mat_cmplx_mult_f16_4x4_mve()
282 mve_cmplx_sum_intra_vec_f16(acc1, &pOut[1 * CMPLX_DIM * MATRIX_DIM]); in arm_mat_cmplx_mult_f16_4x4_mve()
283 mve_cmplx_sum_intra_vec_f16(acc2, &pOut[2 * CMPLX_DIM * MATRIX_DIM]); in arm_mat_cmplx_mult_f16_4x4_mve()
284 mve_cmplx_sum_intra_vec_f16(acc3, &pOut[3 * CMPLX_DIM * MATRIX_DIM]); in arm_mat_cmplx_mult_f16_4x4_mve()
285 pOut += CMPLX_DIM; in arm_mat_cmplx_mult_f16_4x4_mve()
289 pInB = pInB + CMPLX_DIM; in arm_mat_cmplx_mult_f16_4x4_mve()
310 mve_cmplx_sum_intra_vec_f16(acc0, &pOut[0 * CMPLX_DIM * MATRIX_DIM]); in arm_mat_cmplx_mult_f16_4x4_mve()
311 mve_cmplx_sum_intra_vec_f16(acc1, &pOut[1 * CMPLX_DIM * MATRIX_DIM]); in arm_mat_cmplx_mult_f16_4x4_mve()
312 mve_cmplx_sum_intra_vec_f16(acc2, &pOut[2 * CMPLX_DIM * MATRIX_DIM]); in arm_mat_cmplx_mult_f16_4x4_mve()
313 mve_cmplx_sum_intra_vec_f16(acc3, &pOut[3 * CMPLX_DIM * MATRIX_DIM]); in arm_mat_cmplx_mult_f16_4x4_mve()
314 pOut += CMPLX_DIM; in arm_mat_cmplx_mult_f16_4x4_mve()
318 pInB = pInB + CMPLX_DIM; in arm_mat_cmplx_mult_f16_4x4_mve()
339 mve_cmplx_sum_intra_vec_f16(acc0, &pOut[0 * CMPLX_DIM * MATRIX_DIM]); in arm_mat_cmplx_mult_f16_4x4_mve()
340 mve_cmplx_sum_intra_vec_f16(acc1, &pOut[1 * CMPLX_DIM * MATRIX_DIM]); in arm_mat_cmplx_mult_f16_4x4_mve()
341 mve_cmplx_sum_intra_vec_f16(acc2, &pOut[2 * CMPLX_DIM * MATRIX_DIM]); in arm_mat_cmplx_mult_f16_4x4_mve()
342 mve_cmplx_sum_intra_vec_f16(acc3, &pOut[3 * CMPLX_DIM * MATRIX_DIM]); in arm_mat_cmplx_mult_f16_4x4_mve()
343 pOut += CMPLX_DIM; in arm_mat_cmplx_mult_f16_4x4_mve()
347 pInB = pInB + CMPLX_DIM; in arm_mat_cmplx_mult_f16_4x4_mve()
368 mve_cmplx_sum_intra_vec_f16(acc0, &pOut[0 * CMPLX_DIM * MATRIX_DIM]); in arm_mat_cmplx_mult_f16_4x4_mve()
369 mve_cmplx_sum_intra_vec_f16(acc1, &pOut[1 * CMPLX_DIM * MATRIX_DIM]); in arm_mat_cmplx_mult_f16_4x4_mve()
370 mve_cmplx_sum_intra_vec_f16(acc2, &pOut[2 * CMPLX_DIM * MATRIX_DIM]); in arm_mat_cmplx_mult_f16_4x4_mve()
371 mve_cmplx_sum_intra_vec_f16(acc3, &pOut[3 * CMPLX_DIM * MATRIX_DIM]); in arm_mat_cmplx_mult_f16_4x4_mve()
435 vecColBOffs[2] = numColsB * CMPLX_DIM; in arm_mat_cmplx_mult_f16()
436 vecColBOffs[3] = (numColsB * CMPLX_DIM) + 1; in arm_mat_cmplx_mult_f16()
437 vecColBOffs[4] = 2*numColsB * CMPLX_DIM; in arm_mat_cmplx_mult_f16()
438 vecColBOffs[5] = 2*(numColsB * CMPLX_DIM) + 1; in arm_mat_cmplx_mult_f16()
439 vecColBOffs[6] = 3*numColsB * CMPLX_DIM; in arm_mat_cmplx_mult_f16()
440 vecColBOffs[7] = 3*(numColsB * CMPLX_DIM) + 1; in arm_mat_cmplx_mult_f16()
455 px = pOut + i * CMPLX_DIM; in arm_mat_cmplx_mult_f16()
480 float16_t const *pInA1 = pInA0 + numColsA * CMPLX_DIM; in arm_mat_cmplx_mult_f16()
481 float16_t const *pInA2 = pInA1 + numColsA * CMPLX_DIM; in arm_mat_cmplx_mult_f16()
482 float16_t const *pInA3 = pInA2 + numColsA * CMPLX_DIM; in arm_mat_cmplx_mult_f16()
500 blkCnt = (numColsA * CMPLX_DIM) >> 3; in arm_mat_cmplx_mult_f16()
509 vecOffs = vaddq_n_u16(vecOffs , (uint16_t) (numColsB * 4 * CMPLX_DIM)); in arm_mat_cmplx_mult_f16()
536 blkCnt = (numColsA * CMPLX_DIM) & 7; in arm_mat_cmplx_mult_f16()
546 vecOffs = vaddq_n_u16(vecOffs, (uint16_t) (numColsB * 4 * CMPLX_DIM)); in arm_mat_cmplx_mult_f16()
567 mve_cmplx_sum_intra_vec_f16(acc0, &px[0 * CMPLX_DIM * numColsB + 0]); in arm_mat_cmplx_mult_f16()
568 mve_cmplx_sum_intra_vec_f16(acc1, &px[1 * CMPLX_DIM * numColsB + 0]); in arm_mat_cmplx_mult_f16()
569 mve_cmplx_sum_intra_vec_f16(acc2, &px[2 * CMPLX_DIM * numColsB + 0]); in arm_mat_cmplx_mult_f16()
570 mve_cmplx_sum_intra_vec_f16(acc3, &px[3 * CMPLX_DIM * numColsB + 0]); in arm_mat_cmplx_mult_f16()
572 px += CMPLX_DIM; in arm_mat_cmplx_mult_f16()
580 pInB = (float16_t const *) pSrcB->pData + (numColsB - col) * CMPLX_DIM; in arm_mat_cmplx_mult_f16()
586 pInA += (numColsA * 4) * CMPLX_DIM; in arm_mat_cmplx_mult_f16()
600 px = pOut + i * CMPLX_DIM; in arm_mat_cmplx_mult_f16()
636 blkCnt = (numColsA * CMPLX_DIM) >> 3; in arm_mat_cmplx_mult_f16()
645 vecOffs = vaddq_n_u16(vecOffs, (uint16_t) (4*numColsB * CMPLX_DIM)); in arm_mat_cmplx_mult_f16()
660 blkCnt = (numColsA * CMPLX_DIM) & 7; in arm_mat_cmplx_mult_f16()
677 px += CMPLX_DIM; in arm_mat_cmplx_mult_f16()
685 pInB = (float16_t const *) pSrcB->pData + (numColsB - col) * CMPLX_DIM; in arm_mat_cmplx_mult_f16()
691 pInA += numColsA * CMPLX_DIM; in arm_mat_cmplx_mult_f16()