1 /* ----------------------------------------------------------------------
2 * Project: CMSIS DSP Library
3 * Title: arm_mat_inverse_f16.c
4 * Description: Floating-point matrix inverse
5 *
6 * $Date: 23 April 2021
7 * $Revision: V1.9.0
8 *
9 * Target Processor: Cortex-M and Cortex-A cores
10 * -------------------------------------------------------------------- */
11 /*
12 * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
13 *
14 * SPDX-License-Identifier: Apache-2.0
15 *
16 * Licensed under the Apache License, Version 2.0 (the License); you may
17 * not use this file except in compliance with the License.
18 * You may obtain a copy of the License at
19 *
20 * www.apache.org/licenses/LICENSE-2.0
21 *
22 * Unless required by applicable law or agreed to in writing, software
23 * distributed under the License is distributed on an AS IS BASIS, WITHOUT
24 * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
25 * See the License for the specific language governing permissions and
26 * limitations under the License.
27 */
28
29 #include "dsp/matrix_functions_f16.h"
30
31 #if defined(ARM_FLOAT16_SUPPORTED)
32
33
34 /**
35 @ingroup groupMatrix
36 */
37
38
39 /**
40 @addtogroup MatrixInv
41 @{
42 */
43
44 /**
45 @brief Floating-point matrix inverse.
46 @param[in] pSrc points to input matrix structure. The source matrix is modified by the function.
47 @param[out] pDst points to output matrix structure
48 @return execution status
49 - \ref ARM_MATH_SUCCESS : Operation successful
50 - \ref ARM_MATH_SIZE_MISMATCH : Matrix size check failed
51 - \ref ARM_MATH_SINGULAR : Input matrix is found to be singular (non-invertible)
52 */
53 #if defined(ARM_MATH_MVE_FLOAT16) && !defined(ARM_MATH_AUTOVECTORIZE)
54
arm_mat_inverse_f16(const arm_matrix_instance_f16 * pSrc,arm_matrix_instance_f16 * pDst)55 arm_status arm_mat_inverse_f16(
56 const arm_matrix_instance_f16 * pSrc,
57 arm_matrix_instance_f16 * pDst)
58 {
59 float16_t *pIn = pSrc->pData; /* input data matrix pointer */
60 float16_t *pOut = pDst->pData; /* output data matrix pointer */
61 float16_t *pInT1, *pInT2; /* Temporary input data matrix pointer */
62 float16_t *pOutT1, *pOutT2; /* Temporary output data matrix pointer */
63 float16_t *pPivotRowIn, *pPRT_in, *pPivotRowDst, *pPRT_pDst; /* Temporary input and output data matrix pointer */
64
65 uint32_t numRows = pSrc->numRows; /* Number of rows in the matrix */
66 uint32_t numCols = pSrc->numCols; /* Number of Cols in the matrix */
67 float16_t *pTmpA, *pTmpB;
68
69 _Float16 in = 0.0f16; /* Temporary input values */
70 uint32_t i, rowCnt, flag = 0U, j, loopCnt, l; /* loop counters */
71 arm_status status; /* status of matrix inverse */
72 uint32_t blkCnt;
73
74 #ifdef ARM_MATH_MATRIX_CHECK
75 /* Check for matrix mismatch condition */
76 if ((pSrc->numRows != pSrc->numCols) || (pDst->numRows != pDst->numCols)
77 || (pSrc->numRows != pDst->numRows))
78 {
79 /* Set status as ARM_MATH_SIZE_MISMATCH */
80 status = ARM_MATH_SIZE_MISMATCH;
81 }
82 else
83 #endif /* #ifdef ARM_MATH_MATRIX_CHECK */
84 {
85
86 /*--------------------------------------------------------------------------------------------------------------
87 * Matrix Inverse can be solved using elementary row operations.
88 *
89 * Gauss-Jordan Method:
90 *
91 * 1. First combine the identity matrix and the input matrix separated by a bar to form an
92 * augmented matrix as follows:
93 * _ _ _ _ _ _ _ _
94 * | | a11 a12 | | | 1 0 | | | X11 X12 |
95 * | | | | | | | = | |
96 * |_ |_ a21 a22 _| | |_0 1 _| _| |_ X21 X21 _|
97 *
98 * 2. In our implementation, pDst Matrix is used as identity matrix.
99 *
100 * 3. Begin with the first row. Let i = 1.
101 *
102 * 4. Check to see if the pivot for row i is zero.
103 * The pivot is the element of the main diagonal that is on the current row.
104 * For instance, if working with row i, then the pivot element is aii.
105 * If the pivot is zero, exchange that row with a row below it that does not
106 * contain a zero in column i. If this is not possible, then an inverse
107 * to that matrix does not exist.
108 *
109 * 5. Divide every element of row i by the pivot.
110 *
111 * 6. For every row below and row i, replace that row with the sum of that row and
112 * a multiple of row i so that each new element in column i below row i is zero.
113 *
114 * 7. Move to the next row and column and repeat steps 2 through 5 until you have zeros
115 * for every element below and above the main diagonal.
116 *
117 * 8. Now an identical matrix is formed to the left of the bar(input matrix, src).
118 * Therefore, the matrix to the right of the bar is our solution(dst matrix, dst).
119 *----------------------------------------------------------------------------------------------------------------*/
120
121 /*
122 * Working pointer for destination matrix
123 */
124 pOutT1 = pOut;
125 /*
126 * Loop over the number of rows
127 */
128 rowCnt = numRows;
129 /*
130 * Making the destination matrix as identity matrix
131 */
132 while (rowCnt > 0U)
133 {
134 /*
135 * Writing all zeroes in lower triangle of the destination matrix
136 */
137 j = numRows - rowCnt;
138 while (j > 0U)
139 {
140 *pOutT1++ = 0.0f16;
141 j--;
142 }
143 /*
144 * Writing all ones in the diagonal of the destination matrix
145 */
146 *pOutT1++ = 1.0f16;
147 /*
148 * Writing all zeroes in upper triangle of the destination matrix
149 */
150 j = rowCnt - 1U;
151 while (j > 0U)
152 {
153 *pOutT1++ = 0.0f16;
154 j--;
155 }
156 /*
157 * Decrement the loop counter
158 */
159 rowCnt--;
160 }
161
162 /*
163 * Loop over the number of columns of the input matrix.
164 * All the elements in each column are processed by the row operations
165 */
166 loopCnt = numCols;
167 /*
168 * Index modifier to navigate through the columns
169 */
170 l = 0U;
171 while (loopCnt > 0U)
172 {
173 /*
174 * Check if the pivot element is zero..
175 * If it is zero then interchange the row with non zero row below.
176 * If there is no non zero element to replace in the rows below,
177 * then the matrix is Singular.
178 */
179
180 /*
181 * Working pointer for the input matrix that points
182 * * to the pivot element of the particular row
183 */
184 pInT1 = pIn + (l * numCols);
185 /*
186 * Working pointer for the destination matrix that points
187 * * to the pivot element of the particular row
188 */
189 pOutT1 = pOut + (l * numCols);
190 /*
191 * Temporary variable to hold the pivot value
192 */
193 in = *pInT1;
194
195
196 /*
197 * Check if the pivot element is zero
198 */
199 if (*pInT1 == 0.0f16)
200 {
201 /*
202 * Loop over the number rows present below
203 */
204 for (i = 1U; i < numRows-l; i++)
205 {
206 /*
207 * Update the input and destination pointers
208 */
209 pInT2 = pInT1 + (numCols * i);
210 pOutT2 = pOutT1 + (numCols * i);
211 /*
212 * Check if there is a non zero pivot element to
213 * * replace in the rows below
214 */
215 if (*pInT2 != 0.0f16)
216 {
217 f16x8_t vecA, vecB;
218 /*
219 * Loop over number of columns
220 * * to the right of the pilot element
221 */
222 pTmpA = pInT1;
223 pTmpB = pInT2;
224 blkCnt = (numCols - l) >> 3;
225 while (blkCnt > 0U)
226 {
227
228 vecA = vldrhq_f16(pTmpA);
229 vecB = vldrhq_f16(pTmpB);
230 vstrhq_f16(pTmpB, vecA);
231 vstrhq_f16(pTmpA, vecB);
232
233 pTmpA += 8;
234 pTmpB += 8;
235 /*
236 * Decrement the blockSize loop counter
237 */
238 blkCnt--;
239 }
240 /*
241 * tail
242 * (will be merged thru tail predication)
243 */
244 blkCnt = (numCols - l) & 7;
245 if (blkCnt > 0U)
246 {
247 mve_pred16_t p0 = vctp16q(blkCnt);
248
249 vecA = vldrhq_f16(pTmpA);
250 vecB = vldrhq_f16(pTmpB);
251 vstrhq_p_f16(pTmpB, vecA, p0);
252 vstrhq_p_f16(pTmpA, vecB, p0);
253 }
254
255 pInT1 += numCols - l;
256 pInT2 += numCols - l;
257 pTmpA = pOutT1;
258 pTmpB = pOutT2;
259 blkCnt = numCols >> 3;
260 while (blkCnt > 0U)
261 {
262
263 vecA = vldrhq_f16(pTmpA);
264 vecB = vldrhq_f16(pTmpB);
265 vstrhq_f16(pTmpB, vecA);
266 vstrhq_f16(pTmpA, vecB);
267 pTmpA += 8;
268 pTmpB += 8;
269 /*
270 * Decrement the blockSize loop counter
271 */
272 blkCnt--;
273 }
274 /*
275 * tail
276 */
277 blkCnt = numCols & 7;
278 if (blkCnt > 0U)
279 {
280 mve_pred16_t p0 = vctp16q(blkCnt);
281
282 vecA = vldrhq_f16(pTmpA);
283 vecB = vldrhq_f16(pTmpB);
284 vstrhq_p_f16(pTmpB, vecA, p0);
285 vstrhq_p_f16(pTmpA, vecB, p0);
286 }
287
288 pOutT1 += numCols;
289 pOutT2 += numCols;
290 /*
291 * Flag to indicate whether exchange is done or not
292 */
293 flag = 1U;
294
295 /*
296 * Break after exchange is done
297 */
298 break;
299 }
300
301 }
302 }
303
304 /*
305 * Update the status if the matrix is singular
306 */
307 if ((flag != 1U) && (in == 0.0f16))
308 {
309 return ARM_MATH_SINGULAR;
310 }
311
312 /*
313 * Points to the pivot row of input and destination matrices
314 */
315 pPivotRowIn = pIn + (l * numCols);
316 pPivotRowDst = pOut + (l * numCols);
317
318 /*
319 * Temporary pointers to the pivot row pointers
320 */
321 pInT1 = pPivotRowIn;
322 pOutT1 = pPivotRowDst;
323
324 /*
325 * Pivot element of the row
326 */
327 in = *(pIn + (l * numCols));
328
329 pTmpA = pInT1;
330
331 f16x8_t invIn = vdupq_n_f16(1.0f16 / in);
332
333 blkCnt = (numCols - l) >> 3;
334 f16x8_t vecA;
335 while (blkCnt > 0U)
336 {
337 *(f16x8_t *) pTmpA = *(f16x8_t *) pTmpA * invIn;
338 pTmpA += 8;
339 /*
340 * Decrement the blockSize loop counter
341 */
342 blkCnt--;
343 }
344 /*
345 * tail
346 */
347 blkCnt = (numCols - l) & 7;
348 if (blkCnt > 0U)
349 {
350 mve_pred16_t p0 = vctp16q(blkCnt);
351
352
353 vecA = vldrhq_f16(pTmpA);
354 vecA = vecA * invIn;
355 vstrhq_p_f16(pTmpA, vecA, p0);
356 }
357
358 pInT1 += numCols - l;
359 /*
360 * Loop over number of columns
361 * * to the right of the pilot element
362 */
363
364 pTmpA = pOutT1;
365 blkCnt = numCols >> 3;
366 while (blkCnt > 0U)
367 {
368 *(f16x8_t *) pTmpA = *(f16x8_t *) pTmpA *invIn;
369 pTmpA += 8;
370 /*
371 * Decrement the blockSize loop counter
372 */
373 blkCnt--;
374 }
375 /*
376 * tail
377 * (will be merged thru tail predication)
378 */
379 blkCnt = numCols & 7;
380 if (blkCnt > 0U)
381 {
382 mve_pred16_t p0 = vctp16q(blkCnt);
383
384 vecA = vldrhq_f16(pTmpA);
385 vecA = vecA * invIn;
386 vstrhq_p_f16(pTmpA, vecA, p0);
387 }
388
389 pOutT1 += numCols;
390
391 /*
392 * Replace the rows with the sum of that row and a multiple of row i
393 * * so that each new element in column i above row i is zero.
394 */
395
396 /*
397 * Temporary pointers for input and destination matrices
398 */
399 pInT1 = pIn;
400 pOutT1 = pOut;
401
402 for (i = 0U; i < numRows; i++)
403 {
404 /*
405 * Check for the pivot element
406 */
407 if (i == l)
408 {
409 /*
410 * If the processing element is the pivot element,
411 * only the columns to the right are to be processed
412 */
413 pInT1 += numCols - l;
414 pOutT1 += numCols;
415 }
416 else
417 {
418 /*
419 * Element of the reference row
420 */
421
422 /*
423 * Working pointers for input and destination pivot rows
424 */
425 pPRT_in = pPivotRowIn;
426 pPRT_pDst = pPivotRowDst;
427 /*
428 * Loop over the number of columns to the right of the pivot element,
429 * to replace the elements in the input matrix
430 */
431
432 in = *pInT1;
433 f16x8_t tmpV = vdupq_n_f16(in);
434
435 blkCnt = (numCols - l) >> 3;
436 while (blkCnt > 0U)
437 {
438 f16x8_t vec1, vec2;
439 /*
440 * Replace the element by the sum of that row
441 * and a multiple of the reference row
442 */
443 vec1 = vldrhq_f16(pInT1);
444 vec2 = vldrhq_f16(pPRT_in);
445 vec1 = vfmsq_f16(vec1, tmpV, vec2);
446 vstrhq_f16(pInT1, vec1);
447 pPRT_in += 8;
448 pInT1 += 8;
449 /*
450 * Decrement the blockSize loop counter
451 */
452 blkCnt--;
453 }
454 /*
455 * tail
456 * (will be merged thru tail predication)
457 */
458 blkCnt = (numCols - l) & 7;
459 if (blkCnt > 0U)
460 {
461 f16x8_t vec1, vec2;
462 mve_pred16_t p0 = vctp16q(blkCnt);
463
464 vec1 = vldrhq_f16(pInT1);
465 vec2 = vldrhq_f16(pPRT_in);
466 vec1 = vfmsq_f16(vec1, tmpV, vec2);
467 vstrhq_p_f16(pInT1, vec1, p0);
468 pInT1 += blkCnt;
469 }
470
471 blkCnt = numCols >> 3;
472 while (blkCnt > 0U)
473 {
474 f16x8_t vec1, vec2;
475
476 /*
477 * Replace the element by the sum of that row
478 * and a multiple of the reference row
479 */
480 vec1 = vldrhq_f16(pOutT1);
481 vec2 = vldrhq_f16(pPRT_pDst);
482 vec1 = vfmsq_f16(vec1, tmpV, vec2);
483 vstrhq_f16(pOutT1, vec1);
484 pPRT_pDst += 8;
485 pOutT1 += 8;
486 /*
487 * Decrement the blockSize loop counter
488 */
489 blkCnt--;
490 }
491 /*
492 * tail
493 * (will be merged thru tail predication)
494 */
495 blkCnt = numCols & 7;
496 if (blkCnt > 0U)
497 {
498 f16x8_t vec1, vec2;
499 mve_pred16_t p0 = vctp16q(blkCnt);
500
501 vec1 = vldrhq_f16(pOutT1);
502 vec2 = vldrhq_f16(pPRT_pDst);
503 vec1 = vfmsq_f16(vec1, tmpV, vec2);
504 vstrhq_p_f16(pOutT1, vec1, p0);
505
506 pInT2 += blkCnt;
507 pOutT1 += blkCnt;
508 }
509 }
510 /*
511 * Increment the temporary input pointer
512 */
513 pInT1 = pInT1 + l;
514 }
515 /*
516 * Increment the input pointer
517 */
518 pIn++;
519 /*
520 * Decrement the loop counter
521 */
522 loopCnt--;
523 /*
524 * Increment the index modifier
525 */
526 l++;
527 }
528
529 /*
530 * Set status as ARM_MATH_SUCCESS
531 */
532 status = ARM_MATH_SUCCESS;
533
534 if ((flag != 1U) && (in == 0.0f16))
535 {
536 pIn = pSrc->pData;
537 for (i = 0; i < numRows * numCols; i++)
538 {
539 if (pIn[i] != 0.0f16)
540 break;
541 }
542
543 if (i == numRows * numCols)
544 status = ARM_MATH_SINGULAR;
545 }
546 }
547 /* Return to application */
548 return (status);
549 }
550
551 #else
552
arm_mat_inverse_f16(const arm_matrix_instance_f16 * pSrc,arm_matrix_instance_f16 * pDst)553 arm_status arm_mat_inverse_f16(
554 const arm_matrix_instance_f16 * pSrc,
555 arm_matrix_instance_f16 * pDst)
556 {
557 float16_t *pIn = pSrc->pData; /* input data matrix pointer */
558 float16_t *pOut = pDst->pData; /* output data matrix pointer */
559 float16_t *pInT1, *pInT2; /* Temporary input data matrix pointer */
560 float16_t *pOutT1, *pOutT2; /* Temporary output data matrix pointer */
561 float16_t *pPivotRowIn, *pPRT_in, *pPivotRowDst, *pPRT_pDst; /* Temporary input and output data matrix pointer */
562 uint32_t numRows = pSrc->numRows; /* Number of rows in the matrix */
563 uint32_t numCols = pSrc->numCols; /* Number of Cols in the matrix */
564
565 _Float16 Xchg, in = 0.0f16, in1; /* Temporary input values */
566 uint32_t i, rowCnt, flag = 0U, j, loopCnt, k,l; /* loop counters */
567 arm_status status; /* status of matrix inverse */
568
569 #ifdef ARM_MATH_MATRIX_CHECK
570
571 /* Check for matrix mismatch condition */
572 if ((pSrc->numRows != pSrc->numCols) ||
573 (pDst->numRows != pDst->numCols) ||
574 (pSrc->numRows != pDst->numRows) )
575 {
576 /* Set status as ARM_MATH_SIZE_MISMATCH */
577 status = ARM_MATH_SIZE_MISMATCH;
578 }
579 else
580
581 #endif /* #ifdef ARM_MATH_MATRIX_CHECK */
582
583 {
584
585 /*--------------------------------------------------------------------------------------------------------------
586 * Matrix Inverse can be solved using elementary row operations.
587 *
588 * Gauss-Jordan Method:
589 *
590 * 1. First combine the identity matrix and the input matrix separated by a bar to form an
591 * augmented matrix as follows:
592 * _ _ _ _
593 * | a11 a12 | 1 0 | | X11 X12 |
594 * | | | = | |
595 * |_ a21 a22 | 0 1 _| |_ X21 X21 _|
596 *
597 * 2. In our implementation, pDst Matrix is used as identity matrix.
598 *
599 * 3. Begin with the first row. Let i = 1.
600 *
601 * 4. Check to see if the pivot for row i is zero.
602 * The pivot is the element of the main diagonal that is on the current row.
603 * For instance, if working with row i, then the pivot element is aii.
604 * If the pivot is zero, exchange that row with a row below it that does not
605 * contain a zero in column i. If this is not possible, then an inverse
606 * to that matrix does not exist.
607 *
608 * 5. Divide every element of row i by the pivot.
609 *
610 * 6. For every row below and row i, replace that row with the sum of that row and
611 * a multiple of row i so that each new element in column i below row i is zero.
612 *
613 * 7. Move to the next row and column and repeat steps 2 through 5 until you have zeros
614 * for every element below and above the main diagonal.
615 *
616 * 8. Now an identical matrix is formed to the left of the bar(input matrix, pSrc).
617 * Therefore, the matrix to the right of the bar is our solution(pDst matrix, pDst).
618 *----------------------------------------------------------------------------------------------------------------*/
619
620 /* Working pointer for destination matrix */
621 pOutT1 = pOut;
622
623 /* Loop over the number of rows */
624 rowCnt = numRows;
625
626 /* Making the destination matrix as identity matrix */
627 while (rowCnt > 0U)
628 {
629 /* Writing all zeroes in lower triangle of the destination matrix */
630 j = numRows - rowCnt;
631 while (j > 0U)
632 {
633 *pOutT1++ = 0.0f16;
634 j--;
635 }
636
637 /* Writing all ones in the diagonal of the destination matrix */
638 *pOutT1++ = 1.0f16;
639
640 /* Writing all zeroes in upper triangle of the destination matrix */
641 j = rowCnt - 1U;
642 while (j > 0U)
643 {
644 *pOutT1++ = 0.0f16;
645 j--;
646 }
647
648 /* Decrement loop counter */
649 rowCnt--;
650 }
651
652 /* Loop over the number of columns of the input matrix.
653 All the elements in each column are processed by the row operations */
654 loopCnt = numCols;
655
656 /* Index modifier to navigate through the columns */
657 l = 0U;
658
659 while (loopCnt > 0U)
660 {
661 /* Check if the pivot element is zero..
662 * If it is zero then interchange the row with non zero row below.
663 * If there is no non zero element to replace in the rows below,
664 * then the matrix is Singular. */
665
666 /* Working pointer for the input matrix that points
667 * to the pivot element of the particular row */
668 pInT1 = pIn + (l * numCols);
669
670 /* Working pointer for the destination matrix that points
671 * to the pivot element of the particular row */
672 pOutT1 = pOut + (l * numCols);
673
674 /* Temporary variable to hold the pivot value */
675 in = *pInT1;
676
677
678 /* Check if the pivot element is zero */
679 if (*pInT1 == 0.0f16)
680 {
681 /* Loop over the number rows present below */
682
683 for (i = 1U; i < numRows-l; i++)
684 {
685 /* Update the input and destination pointers */
686 pInT2 = pInT1 + (numCols * i);
687 pOutT2 = pOutT1 + (numCols * i);
688
689 /* Check if there is a non zero pivot element to
690 * replace in the rows below */
691 if (*pInT2 != 0.0f16)
692 {
693 /* Loop over number of columns
694 * to the right of the pilot element */
695 j = numCols - l;
696
697 while (j > 0U)
698 {
699 /* Exchange the row elements of the input matrix */
700 Xchg = *pInT2;
701 *pInT2++ = *pInT1;
702 *pInT1++ = Xchg;
703
704 /* Decrement the loop counter */
705 j--;
706 }
707
708 /* Loop over number of columns of the destination matrix */
709 j = numCols;
710
711 while (j > 0U)
712 {
713 /* Exchange the row elements of the destination matrix */
714 Xchg = *pOutT2;
715 *pOutT2++ = *pOutT1;
716 *pOutT1++ = Xchg;
717
718 /* Decrement loop counter */
719 j--;
720 }
721
722 /* Flag to indicate whether exchange is done or not */
723 flag = 1U;
724
725 /* Break after exchange is done */
726 break;
727 }
728
729 }
730 }
731
732 /* Update the status if the matrix is singular */
733 if ((flag != 1U) && (in == 0.0f16))
734 {
735 return ARM_MATH_SINGULAR;
736 }
737
738 /* Points to the pivot row of input and destination matrices */
739 pPivotRowIn = pIn + (l * numCols);
740 pPivotRowDst = pOut + (l * numCols);
741
742 /* Temporary pointers to the pivot row pointers */
743 pInT1 = pPivotRowIn;
744 pInT2 = pPivotRowDst;
745
746 /* Pivot element of the row */
747 in = *pPivotRowIn;
748
749 /* Loop over number of columns
750 * to the right of the pilot element */
751 j = (numCols - l);
752
753 while (j > 0U)
754 {
755 /* Divide each element of the row of the input matrix
756 * by the pivot element */
757 in1 = *pInT1;
758 *pInT1++ = in1 / in;
759
760 /* Decrement the loop counter */
761 j--;
762 }
763
764 /* Loop over number of columns of the destination matrix */
765 j = numCols;
766
767 while (j > 0U)
768 {
769 /* Divide each element of the row of the destination matrix
770 * by the pivot element */
771 in1 = *pInT2;
772 *pInT2++ = in1 / in;
773
774 /* Decrement the loop counter */
775 j--;
776 }
777
778 /* Replace the rows with the sum of that row and a multiple of row i
779 * so that each new element in column i above row i is zero.*/
780
781 /* Temporary pointers for input and destination matrices */
782 pInT1 = pIn;
783 pInT2 = pOut;
784
785 /* index used to check for pivot element */
786 i = 0U;
787
788 /* Loop over number of rows */
789 /* to be replaced by the sum of that row and a multiple of row i */
790 k = numRows;
791
792 while (k > 0U)
793 {
794 /* Check for the pivot element */
795 if (i == l)
796 {
797 /* If the processing element is the pivot element,
798 only the columns to the right are to be processed */
799 pInT1 += numCols - l;
800
801 pInT2 += numCols;
802 }
803 else
804 {
805 /* Element of the reference row */
806 in = *pInT1;
807
808 /* Working pointers for input and destination pivot rows */
809 pPRT_in = pPivotRowIn;
810 pPRT_pDst = pPivotRowDst;
811
812 /* Loop over the number of columns to the right of the pivot element,
813 to replace the elements in the input matrix */
814 j = (numCols - l);
815
816 while (j > 0U)
817 {
818 /* Replace the element by the sum of that row
819 and a multiple of the reference row */
820 in1 = *pInT1;
821 *pInT1++ = in1 - (in * *pPRT_in++);
822
823 /* Decrement the loop counter */
824 j--;
825 }
826
827 /* Loop over the number of columns to
828 replace the elements in the destination matrix */
829 j = numCols;
830
831 while (j > 0U)
832 {
833 /* Replace the element by the sum of that row
834 and a multiple of the reference row */
835 in1 = *pInT2;
836 *pInT2++ = in1 - (in * *pPRT_pDst++);
837
838 /* Decrement loop counter */
839 j--;
840 }
841
842 }
843
844 /* Increment temporary input pointer */
845 pInT1 = pInT1 + l;
846
847 /* Decrement loop counter */
848 k--;
849
850 /* Increment pivot index */
851 i++;
852 }
853
854 /* Increment the input pointer */
855 pIn++;
856
857 /* Decrement the loop counter */
858 loopCnt--;
859
860 /* Increment the index modifier */
861 l++;
862 }
863
864 /* Set status as ARM_MATH_SUCCESS */
865 status = ARM_MATH_SUCCESS;
866
867 if ((flag != 1U) && (in == 0.0f16))
868 {
869 pIn = pSrc->pData;
870 for (i = 0; i < numRows * numCols; i++)
871 {
872 if (pIn[i] != 0.0f16)
873 break;
874 }
875
876 if (i == numRows * numCols)
877 status = ARM_MATH_SINGULAR;
878 }
879 }
880
881 /* Return to application */
882 return (status);
883 }
884 #endif /* defined(ARM_MATH_MVEF) && !defined(ARM_MATH_AUTOVECTORIZE) */
885
886 /**
887 @} end of MatrixInv group
888 */
889
890 #endif /* #if defined(ARM_FLOAT16_SUPPORTED) */
891
892