1 #include "UnaryTestsF32.h"
2 #include "Error.h"
3
4
5 #define SNR_THRESHOLD 120
6
7 /*
8
9 Reference patterns are generated with
10 a double precision computation.
11
12 */
13 #define REL_ERROR (1.0e-5)
14 #define ABS_ERROR (1.0e-5)
15
16 /*
17
18 Comparisons for Householder
19
20 */
21 #define SNR_HOUSEHOLDER_THRESHOLD 140
22 #define REL_HOUSEHOLDER_ERROR (1.0e-7)
23 #define ABS_HOUSEHOLDER_ERROR (1.0e-7)
24
25 /*
26
27 Comparisons for QR decomposition
28
29 */
30 #define SNR_QR_THRESHOLD 90
31 #define REL_QR_ERROR (1.0e-4)
32 #define ABS_QR_ERROR (2.0e-4)
33
34 /*
35
36 Comparisons for inverse
37
38 */
39
40 /* Not very accurate for big matrix.
41 But big matrix needed for checking the vectorized code */
42
43 #define SNR_THRESHOLD_INV 99
44 #define REL_ERROR_INV (3.0e-5)
45 #define ABS_ERROR_INV (2.0e-5)
46
47 /*
48
49 Comparison for Cholesky
50
51 */
52 #define SNR_THRESHOLD_CHOL 92
53 #define REL_ERROR_CHOL (1.0e-5)
54 #define ABS_ERROR_CHOL (5.0e-4)
55
56 /* LDLT comparison */
57
58 #define REL_ERROR_LDLT (1e-5)
59 #define ABS_ERROR_LDLT (1e-5)
60
61 #define REL_ERROR_LDLT_SPDO (1e-5)
62 #define ABS_ERROR_LDLT_SDPO (2e-1)
63
64 /* Upper bound of maximum matrix dimension used by Python */
65 #define MAXMATRIXDIM 40
66
checkInnerTailOverflow(float32_t * b)67 static void checkInnerTailOverflow(float32_t *b)
68 {
69 ASSERT_TRUE(b[0] == 0);
70 ASSERT_TRUE(b[1] == 0);
71 ASSERT_TRUE(b[2] == 0);
72 ASSERT_TRUE(b[3] == 0);
73 }
74
75 #define LOADDATA2() \
76 const float32_t *inp1=input1.ptr(); \
77 const float32_t *inp2=input2.ptr(); \
78 \
79 float32_t *ap=a.ptr(); \
80 float32_t *bp=b.ptr(); \
81 \
82 float32_t *outp=output.ptr(); \
83 int16_t *dimsp = dims.ptr(); \
84 int nbMatrixes = dims.nbSamples() >> 1;\
85 int rows,columns; \
86 int i;
87
88 #define LOADDATA1() \
89 const float32_t *inp1=input1.ptr(); \
90 \
91 float32_t *ap=a.ptr(); \
92 \
93 float32_t *outp=output.ptr(); \
94 int16_t *dimsp = dims.ptr(); \
95 int nbMatrixes = dims.nbSamples() >> 1;\
96 int rows,columns; \
97 int i;
98
99 #define PREPAREDATA2() \
100 in1.numRows=rows; \
101 in1.numCols=columns; \
102 memcpy((void*)ap,(const void*)inp1,sizeof(float32_t)*rows*columns);\
103 in1.pData = ap; \
104 \
105 in2.numRows=rows; \
106 in2.numCols=columns; \
107 memcpy((void*)bp,(const void*)inp2,sizeof(float32_t)*rows*columns);\
108 in2.pData = bp; \
109 \
110 out.numRows=rows; \
111 out.numCols=columns; \
112 out.pData = outp;
113
114 #define PREPAREDATALT() \
115 in1.numRows=rows; \
116 in1.numCols=rows; \
117 memcpy((void*)ap,(const void*)inp1,sizeof(float32_t)*rows*rows); \
118 in1.pData = ap; \
119 \
120 in2.numRows=rows; \
121 in2.numCols=columns; \
122 memcpy((void*)bp,(const void*)inp2,sizeof(float32_t)*rows*columns);\
123 in2.pData = bp; \
124 \
125 out.numRows=rows; \
126 out.numCols=columns; \
127 out.pData = outp;
128
129 #define PREPAREDATA1(TRANSPOSED) \
130 in1.numRows=rows; \
131 in1.numCols=columns; \
132 memcpy((void*)ap,(const void*)inp1,sizeof(float32_t)*rows*columns);\
133 in1.pData = ap; \
134 \
135 if (TRANSPOSED) \
136 { \
137 out.numRows=columns; \
138 out.numCols=rows; \
139 } \
140 else \
141 { \
142 out.numRows=rows; \
143 out.numCols=columns; \
144 } \
145 out.pData = outp;
146
147 #define PREPAREDATA1C(TRANSPOSED) \
148 in1.numRows=rows; \
149 in1.numCols=columns; \
150 memcpy((void*)ap,(const void*)inp1,2*sizeof(float32_t)*rows*columns);\
151 in1.pData = ap; \
152 \
153 if (TRANSPOSED) \
154 { \
155 out.numRows=columns; \
156 out.numCols=rows; \
157 } \
158 else \
159 { \
160 out.numRows=rows; \
161 out.numCols=columns; \
162 } \
163 out.pData = outp;
164
165 #define LOADVECDATA2() \
166 const float32_t *inp1=input1.ptr(); \
167 const float32_t *inp2=input2.ptr(); \
168 \
169 float32_t *ap=a.ptr(); \
170 float32_t *bp=b.ptr(); \
171 \
172 float32_t *outp=output.ptr(); \
173 int16_t *dimsp = dims.ptr(); \
174 int nbMatrixes = dims.nbSamples() / 2;\
175 int rows,internal; \
176 int i;
177
178 #define PREPAREVECDATA2() \
179 in1.numRows=rows; \
180 in1.numCols=internal; \
181 memcpy((void*)ap,(const void*)inp1,sizeof(float32_t)*rows*internal);\
182 in1.pData = ap; \
183 \
184 memcpy((void*)bp,(const void*)inp2,sizeof(float32_t)*internal);
185
186 #define PREPAREDATALL1() \
187 in1.numRows=rows; \
188 in1.numCols=columns; \
189 memcpy((void*)ap,(const void*)inp1,sizeof(float32_t)*rows*columns);\
190 in1.pData = ap; \
191 \
192 outll.numRows=rows; \
193 outll.numCols=columns; \
194 \
195 outll.pData = outllp;
196
197 #define SWAP_ROWS(A,i,j) \
198 for(int w=0;w < n; w++) \
199 { \
200 float64_t tmp; \
201 tmp = A[i*n + w]; \
202 A[i*n + w] = A[j*n + w];\
203 A[j*n + w] = tmp; \
204 }
205
test_householder_f32()206 void UnaryTestsF32::test_householder_f32()
207 {
208 int32_t vecDim;
209 const int16_t *dimsp = dims.ptr();
210 const int nbVectors = dims.nbSamples();
211 const float32_t *inp1=input1.ptr();
212
213 float32_t *outp=output.ptr();
214 float32_t *outBetap=outputBeta.ptr();
215
216
217 for(int i=0; i < nbVectors ; i++)
218 {
219 vecDim = *dimsp++;
220
221 float32_t beta = arm_householder_f32(inp1,DEFAULT_HOUSEHOLDER_THRESHOLD_F32,vecDim,outp);
222 *outBetap = beta;
223
224 outp += vecDim;
225 inp1 += vecDim;
226 outBetap++;
227 checkInnerTailOverflow(outp);
228 checkInnerTailOverflow(outBetap);
229
230 }
231
232 ASSERT_EMPTY_TAIL(output);
233 ASSERT_EMPTY_TAIL(outputBeta);
234
235 ASSERT_SNR(output,ref,(float32_t)SNR_HOUSEHOLDER_THRESHOLD);
236 ASSERT_SNR(outputBeta,refBeta,(float32_t)SNR_HOUSEHOLDER_THRESHOLD);
237
238 ASSERT_CLOSE_ERROR(output,ref,ABS_HOUSEHOLDER_ERROR,REL_HOUSEHOLDER_ERROR);
239 ASSERT_CLOSE_ERROR(outputBeta,refBeta,ABS_HOUSEHOLDER_ERROR,REL_HOUSEHOLDER_ERROR);
240
241
242 }
243
244
test_mat_qr_f32()245 void UnaryTestsF32::test_mat_qr_f32()
246 {
247 int32_t rows, columns, rank;
248 int nb;
249 const int16_t *dimsp = dims.ptr();
250 const int nbMatrixes = dims.nbSamples() / 3;
251 const float32_t *inp1=input1.ptr();
252
253 float32_t *outTaup=outputTau.ptr();
254 float32_t *outRp=outputR.ptr();
255 float32_t *outQp=outputQ.ptr();
256
257 float32_t *pTmpA=a.ptr();
258 float32_t *pTmpB=b.ptr();
259
260 (void) outTaup;
261 (void) outRp;
262 (void) outQp;
263 (void)nbMatrixes;
264 (void)nb;
265
266 nb=0;
267 for(int i=0; i < nbMatrixes ; i++)
268 //for(int i=0; i < 1 ; i++)
269 {
270 rows = *dimsp++;
271 columns = *dimsp++;
272 rank = *dimsp++;
273 (void)rank;
274
275 //printf("--> %d %d\n",nb,i);
276 nb += rows * columns;
277
278
279 in1.numRows=rows;
280 in1.numCols=columns;
281 in1.pData = (float32_t*)inp1;
282
283 outR.numRows = rows;
284 outR.numCols = columns;
285 outR.pData = (float32_t*)outRp;
286
287 outQ.numRows = rows;
288 outQ.numCols = rows;
289 outQ.pData = (float32_t*)outQp;
290
291
292 arm_status status=arm_mat_qr_f32(&in1,DEFAULT_HOUSEHOLDER_THRESHOLD_F32,&outR,&outQ,outTaup,pTmpA,pTmpB);
293 ASSERT_TRUE(status==ARM_MATH_SUCCESS);
294
295
296 inp1 += rows * columns;
297 outRp += rows * columns;
298 outQp += rows * rows;
299 outTaup += columns;
300
301 checkInnerTailOverflow(outRp);
302 checkInnerTailOverflow(outQp);
303 checkInnerTailOverflow(outTaup);
304
305 }
306
307 ASSERT_EMPTY_TAIL(outputR);
308 ASSERT_EMPTY_TAIL(outputQ);
309 ASSERT_EMPTY_TAIL(outputTau);
310
311 ASSERT_SNR(refQ,outputQ,(float32_t)SNR_QR_THRESHOLD);
312 ASSERT_SNR(refR,outputR,(float32_t)SNR_QR_THRESHOLD);
313 ASSERT_SNR(refTau,outputTau,(float32_t)SNR_QR_THRESHOLD);
314
315 ASSERT_CLOSE_ERROR(refQ,outputQ,ABS_QR_ERROR,REL_QR_ERROR);
316 ASSERT_CLOSE_ERROR(refR,outputR,ABS_QR_ERROR,REL_QR_ERROR);
317 ASSERT_CLOSE_ERROR(refTau,outputTau,ABS_QR_ERROR,REL_QR_ERROR);
318 }
319
test_mat_vec_mult_f32()320 void UnaryTestsF32::test_mat_vec_mult_f32()
321 {
322 LOADVECDATA2();
323
324 for(i=0;i < nbMatrixes ; i ++)
325 {
326 rows = *dimsp++;
327 internal = *dimsp++;
328
329 PREPAREVECDATA2();
330
331 arm_mat_vec_mult_f32(&this->in1, bp, outp);
332
333 outp += rows ;
334 checkInnerTailOverflow(outp);
335
336 }
337
338 ASSERT_EMPTY_TAIL(output);
339
340 ASSERT_SNR(output,ref,(float32_t)SNR_THRESHOLD);
341
342 ASSERT_CLOSE_ERROR(output,ref,ABS_ERROR,REL_ERROR);
343
344 }
345
test_mat_add_f32()346 void UnaryTestsF32::test_mat_add_f32()
347 {
348 LOADDATA2();
349 arm_status status;
350
351 for(i=0;i < nbMatrixes ; i ++)
352 {
353 rows = *dimsp++;
354 columns = *dimsp++;
355
356 PREPAREDATA2();
357
358 status=arm_mat_add_f32(&this->in1,&this->in2,&this->out);
359 ASSERT_TRUE(status==ARM_MATH_SUCCESS);
360
361 outp += (rows * columns);
362 checkInnerTailOverflow(outp);
363
364 }
365
366 ASSERT_EMPTY_TAIL(output);
367
368 ASSERT_SNR(output,ref,(float32_t)SNR_THRESHOLD);
369
370 ASSERT_CLOSE_ERROR(output,ref,ABS_ERROR,REL_ERROR);
371
372 }
373
test_mat_sub_f32()374 void UnaryTestsF32::test_mat_sub_f32()
375 {
376 LOADDATA2();
377 arm_status status;
378
379 for(i=0;i < nbMatrixes ; i ++)
380 {
381 rows = *dimsp++;
382 columns = *dimsp++;
383
384 PREPAREDATA2();
385
386 status=arm_mat_sub_f32(&this->in1,&this->in2,&this->out);
387 ASSERT_TRUE(status==ARM_MATH_SUCCESS);
388
389 outp += (rows * columns);
390 checkInnerTailOverflow(outp);
391
392 }
393
394 ASSERT_EMPTY_TAIL(output);
395
396 ASSERT_SNR(output,ref,(float32_t)SNR_THRESHOLD);
397
398 ASSERT_CLOSE_ERROR(output,ref,ABS_ERROR,REL_ERROR);
399
400 }
401
test_mat_scale_f32()402 void UnaryTestsF32::test_mat_scale_f32()
403 {
404 LOADDATA1();
405 arm_status status;
406
407 for(i=0;i < nbMatrixes ; i ++)
408 {
409 rows = *dimsp++;
410 columns = *dimsp++;
411
412 PREPAREDATA1(false);
413
414 status=arm_mat_scale_f32(&this->in1,0.5f,&this->out);
415 ASSERT_TRUE(status==ARM_MATH_SUCCESS);
416
417 outp += (rows * columns);
418 checkInnerTailOverflow(outp);
419
420 }
421
422 ASSERT_EMPTY_TAIL(output);
423
424 ASSERT_SNR(output,ref,(float32_t)SNR_THRESHOLD);
425
426 ASSERT_CLOSE_ERROR(output,ref,ABS_ERROR,REL_ERROR);
427
428 }
429
test_mat_trans_f32()430 void UnaryTestsF32::test_mat_trans_f32()
431 {
432 LOADDATA1();
433 arm_status status;
434
435 for(i=0;i < nbMatrixes ; i ++)
436 {
437 rows = *dimsp++;
438 columns = *dimsp++;
439
440 PREPAREDATA1(true);
441
442 status=arm_mat_trans_f32(&this->in1,&this->out);
443 ASSERT_TRUE(status==ARM_MATH_SUCCESS);
444
445 outp += (rows * columns);
446 checkInnerTailOverflow(outp);
447
448 }
449
450 ASSERT_EMPTY_TAIL(output);
451
452 ASSERT_SNR(output,ref,(float32_t)SNR_THRESHOLD);
453
454 ASSERT_CLOSE_ERROR(output,ref,ABS_ERROR,REL_ERROR);
455
456 }
457
test_mat_cmplx_trans_f32()458 void UnaryTestsF32::test_mat_cmplx_trans_f32()
459 {
460 LOADDATA1();
461 arm_status status;
462
463 for(i=0;i < nbMatrixes ; i ++)
464 {
465 rows = *dimsp++;
466 columns = *dimsp++;
467
468 PREPAREDATA1C(true);
469
470 status=arm_mat_cmplx_trans_f32(&this->in1,&this->out);
471 ASSERT_TRUE(status==ARM_MATH_SUCCESS);
472
473 outp += 2*(rows * columns);
474 checkInnerTailOverflow(outp);
475
476 }
477
478 ASSERT_EMPTY_TAIL(output);
479
480 ASSERT_SNR(output,ref,(float32_t)SNR_THRESHOLD);
481
482 ASSERT_CLOSE_ERROR(output,ref,ABS_ERROR,REL_ERROR);
483
484 }
485
refInnerTail(float32_t * b)486 static void refInnerTail(float32_t *b)
487 {
488 b[0] = 1.0f;
489 b[1] = -2.0f;
490 b[2] = 3.0f;
491 b[3] = -4.0f;
492 }
493
checkInnerTail(float32_t * b)494 static void checkInnerTail(float32_t *b)
495 {
496 ASSERT_TRUE(b[0] == 1.0f);
497 ASSERT_TRUE(b[1] == -2.0f);
498 ASSERT_TRUE(b[2] == 3.0f);
499 ASSERT_TRUE(b[3] == -4.0f);
500 }
501
502
503
504
test_mat_inverse_f32()505 void UnaryTestsF32::test_mat_inverse_f32()
506 {
507 const float32_t *inp1=input1.ptr();
508
509 float32_t *ap=a.ptr();
510
511 float32_t *outp=output.ptr();
512 int16_t *dimsp = dims.ptr();
513 int nbMatrixes = dims.nbSamples();
514 int rows,columns;
515 int i;
516 arm_status status;
517
518 // Non singular matrixes
519 // Last matrix is singular
520 for(i=0;i < nbMatrixes-1 ; i ++)
521 {
522 rows = *dimsp++;
523 columns = rows;
524
525 PREPAREDATA1(false);
526
527 refInnerTail(outp+(rows * columns));
528
529 status=arm_mat_inverse_f32(&this->in1,&this->out);
530 ASSERT_TRUE(status==ARM_MATH_SUCCESS);
531
532 outp += (rows * columns);
533 inp1 += (rows * columns);
534
535 checkInnerTail(outp);
536
537 }
538
539 /*** Singular matrix **/
540 rows = *dimsp++;
541 columns = rows;
542
543 PREPAREDATA1(false);
544
545 refInnerTail(outp+(rows * columns));
546
547 status=arm_mat_inverse_f32(&this->in1,&this->out);
548 ASSERT_TRUE(status==ARM_MATH_SINGULAR);
549
550 outp += (rows * columns);
551 inp1 += (rows * columns);
552
553 checkInnerTail(outp);
554
555 /*********************/
556
557 ASSERT_CLOSE_ERROR(output,ref,ABS_ERROR_INV,REL_ERROR_INV);
558
559 ASSERT_SNR(output,ref,(float32_t)SNR_THRESHOLD_INV);
560
561
562 }
563
test_mat_cholesky_dpo_f32()564 void UnaryTestsF32::test_mat_cholesky_dpo_f32()
565 {
566 float32_t *ap=a.ptr();
567 const float32_t *inp1=input1.ptr();
568
569
570 float32_t *outp=output.ptr();
571 int16_t *dimsp = dims.ptr();
572 int nbMatrixes = dims.nbSamples();
573
574 int rows,columns;
575 int i;
576 arm_status status;
577
578 for(i=0;i < nbMatrixes ; i ++)
579 {
580 rows = *dimsp++;
581 columns = rows;
582
583 PREPAREDATA1(false);
584
585 status=arm_mat_cholesky_f32(&this->in1,&this->out);
586 ASSERT_TRUE(status==ARM_MATH_SUCCESS);
587
588 outp += (rows * columns);
589 inp1 += (rows * columns);
590 checkInnerTailOverflow(outp);
591
592 }
593
594 ASSERT_EMPTY_TAIL(output);
595
596 ASSERT_SNR(output,ref,(float32_t)SNR_THRESHOLD_CHOL);
597
598 ASSERT_CLOSE_ERROR(ref,output,ABS_ERROR_CHOL,REL_ERROR_CHOL);
599 }
600
test_solve_upper_triangular_f32()601 void UnaryTestsF32::test_solve_upper_triangular_f32()
602 {
603 float32_t *ap=a.ptr();
604 const float32_t *inp1=input1.ptr();
605
606 float32_t *bp=b.ptr();
607 const float32_t *inp2=input2.ptr();
608
609
610 float32_t *outp=output.ptr();
611 int16_t *dimsp = dims.ptr();
612 int nbMatrixes = dims.nbSamples()>>1;
613
614 int rows,columns;
615 int i;
616 arm_status status;
617
618 for(i=0;i < nbMatrixes ; i ++)
619 {
620 rows = *dimsp++;
621 columns = *dimsp++;
622
623 PREPAREDATALT();
624
625 status=arm_mat_solve_upper_triangular_f32(&this->in1,&this->in2,&this->out);
626 ASSERT_TRUE(status==ARM_MATH_SUCCESS);
627
628 outp += (rows * columns);
629 inp1 += (rows * rows);
630 inp2 += (rows * columns);
631 checkInnerTailOverflow(outp);
632
633 }
634
635 ASSERT_EMPTY_TAIL(output);
636
637 ASSERT_SNR(output,ref,(float32_t)SNR_THRESHOLD);
638
639 ASSERT_CLOSE_ERROR(ref,output,ABS_ERROR,REL_ERROR);
640 }
641
test_solve_lower_triangular_f32()642 void UnaryTestsF32::test_solve_lower_triangular_f32()
643 {
644 float32_t *ap=a.ptr();
645 const float32_t *inp1=input1.ptr();
646
647 float32_t *bp=b.ptr();
648 const float32_t *inp2=input2.ptr();
649
650
651 float32_t *outp=output.ptr();
652 int16_t *dimsp = dims.ptr();
653 int nbMatrixes = dims.nbSamples() >> 1;
654
655 int rows,columns;
656 int i;
657 arm_status status;
658
659 for(i=0;i < nbMatrixes ; i ++)
660 {
661 rows = *dimsp++;
662 columns = *dimsp++;
663
664 PREPAREDATALT();
665
666 status=arm_mat_solve_lower_triangular_f32(&this->in1,&this->in2,&this->out);
667 ASSERT_TRUE(status==ARM_MATH_SUCCESS);
668
669 outp += (rows * columns);
670 inp1 += (rows * rows);
671 inp2 += (rows * columns);
672 checkInnerTailOverflow(outp);
673
674 }
675
676 ASSERT_EMPTY_TAIL(output);
677
678 ASSERT_SNR(output,ref,(float32_t)SNR_THRESHOLD);
679
680 ASSERT_CLOSE_ERROR(ref,output,ABS_ERROR,REL_ERROR);
681 }
682
trans_f64(const float64_t * src,float64_t * dst,int n)683 static void trans_f64(const float64_t *src, float64_t *dst, int n)
684 {
685 for(int r=0; r<n ; r++)
686 {
687 for(int c=0; c<n ; c++)
688 {
689 dst[c*n+r] = src[r*n+c];
690 }
691 }
692 }
693
trans_f32_f64(const float32_t * src,float64_t * dst,int n)694 static void trans_f32_f64(const float32_t *src, float64_t *dst, int n)
695 {
696 for(int r=0; r<n ; r++)
697 {
698 for(int c=0; c<n ; c++)
699 {
700 dst[c*n+r] = (float64_t)src[r*n+c];
701 }
702 }
703 }
704
mult_f32_f64(const float32_t * srcA,const float64_t * srcB,float64_t * dst,int n)705 static void mult_f32_f64(const float32_t *srcA, const float64_t *srcB, float64_t *dst,int n)
706 {
707 for(int r=0; r<n ; r++)
708 {
709 for(int c=0; c<n ; c++)
710 {
711 float64_t sum=0.0;
712 for(int k=0; k < n ; k++)
713 {
714 sum += (float64_t)srcA[r*n+k] * srcB[k*n+c];
715 }
716 dst[r*n+c] = sum;
717 }
718 }
719 }
720
mult_f64_f64(const float64_t * srcA,const float64_t * srcB,float64_t * dst,int n)721 static void mult_f64_f64(const float64_t *srcA, const float64_t *srcB, float64_t *dst,int n)
722 {
723 for(int r=0; r<n ; r++)
724 {
725 for(int c=0; c<n ; c++)
726 {
727 float64_t sum=0.0;
728 for(int k=0; k < n ; k++)
729 {
730 sum += srcA[r*n+k] * srcB[k*n+c];
731 }
732 dst[r*n+c] = sum;
733 }
734 }
735 }
736
compute_ldlt_error(const int n,const int16_t * outpp)737 void UnaryTestsF32::compute_ldlt_error(const int n,const int16_t *outpp)
738 {
739 float64_t *tmpa = tmpapat.ptr() ;
740 float64_t *tmpb = tmpbpat.ptr() ;
741 float64_t *tmpc = tmpcpat.ptr() ;
742
743
744 /* Compute P A P^t */
745
746 // Create identiy matrix
747 for(int r=0; r < n; r++)
748 {
749 for(int c=0; c < n; c++)
750 {
751 if (r == c)
752 {
753 tmpa[r*n+c] = 1.0;
754 }
755 else
756 {
757 tmpa[r*n+c] = 0.0;
758 }
759 }
760 }
761
762
763
764 // Create permutation matrix
765
766 for(int r=0;r < n; r++)
767 {
768 SWAP_ROWS(tmpa,r,outpp[r]);
769 }
770
771
772 trans_f64((const float64_t*)tmpa,tmpb,n);
773 mult_f32_f64((const float32_t*)this->in1.pData,(const float64_t*)tmpb,tmpc,n);
774 mult_f64_f64((const float64_t*)tmpa,(const float64_t*)tmpc,outa,n);
775
776
777
778 /* Compute L D L^t */
779 trans_f32_f64((const float32_t*)this->outll.pData,tmpc,n);
780 mult_f32_f64((const float32_t*)this->outd.pData,(const float64_t*)tmpc,tmpa,n);
781 mult_f32_f64((const float32_t*)this->outll.pData,(const float64_t*)tmpa,outb,n);
782
783
784
785 }
786
787
test_mat_ldl_f32()788 void UnaryTestsF32::test_mat_ldl_f32()
789 {
790 float32_t *ap=a.ptr();
791 const float32_t *inp1=input1.ptr();
792
793
794 float32_t *outllp=outputll.ptr();
795 float32_t *outdp=outputd.ptr();
796 int16_t *outpp=outputp.ptr();
797
798
799 outa=outputa.ptr();
800 outb=outputb.ptr();
801
802 int16_t *dimsp = dims.ptr();
803 int nbMatrixes = dims.nbSamples();
804
805 int rows,columns;
806 int i;
807 arm_status status;
808
809
810 for(i=0;i < nbMatrixes ; i ++)
811 {
812 rows = *dimsp++;
813 columns = rows;
814
815 PREPAREDATALL1();
816
817 outd.numRows=rows;
818 outd.numCols=columns;
819 outd.pData=outdp;
820
821 memset(outpp,0,rows*sizeof(uint16_t));
822 memset(outdp,0,columns*rows*sizeof(float32_t));
823
824 status=arm_mat_ldlt_f32(&this->in1,&this->outll,&this->outd,(uint16_t*)outpp);
825 ASSERT_TRUE(status==ARM_MATH_SUCCESS);
826
827
828 compute_ldlt_error(rows,outpp);
829
830
831 outllp += (rows * columns);
832 outdp += (rows * columns);
833 outpp += rows;
834
835 outa += (rows * columns);
836 outb +=(rows * columns);
837
838 inp1 += (rows * columns);
839
840 checkInnerTailOverflow(outllp);
841 checkInnerTailOverflow(outdp);
842
843
844 }
845
846 ASSERT_EMPTY_TAIL(outputll);
847 ASSERT_EMPTY_TAIL(outputd);
848 ASSERT_EMPTY_TAIL(outputp);
849 ASSERT_EMPTY_TAIL(outputa);
850 ASSERT_EMPTY_TAIL(outputb);
851
852
853 ASSERT_CLOSE_ERROR(outputa,outputb,snrAbs,snrRel);
854
855
856
857 }
858
setUp(Testing::testID_t id,std::vector<Testing::param_t> & params,Client::PatternMgr * mgr)859 void UnaryTestsF32::setUp(Testing::testID_t id,std::vector<Testing::param_t>& params,Client::PatternMgr *mgr)
860 {
861
862
863 (void)params;
864 switch(id)
865 {
866 case TEST_MAT_ADD_F32_1:
867 input1.reload(UnaryTestsF32::INPUTS1_F32_ID,mgr);
868 input2.reload(UnaryTestsF32::INPUTS2_F32_ID,mgr);
869 dims.reload(UnaryTestsF32::DIMSUNARY1_S16_ID,mgr);
870
871 ref.reload(UnaryTestsF32::REFADD1_F32_ID,mgr);
872
873 output.create(ref.nbSamples(),UnaryTestsF32::OUT_F32_ID,mgr);
874 a.create(MAXMATRIXDIM*MAXMATRIXDIM,UnaryTestsF32::TMPA_F32_ID,mgr);
875 b.create(MAXMATRIXDIM*MAXMATRIXDIM,UnaryTestsF32::TMPB_F32_ID,mgr);
876 break;
877
878 case TEST_MAT_SUB_F32_2:
879 input1.reload(UnaryTestsF32::INPUTS1_F32_ID,mgr);
880 input2.reload(UnaryTestsF32::INPUTS2_F32_ID,mgr);
881 dims.reload(UnaryTestsF32::DIMSUNARY1_S16_ID,mgr);
882
883 ref.reload(UnaryTestsF32::REFSUB1_F32_ID,mgr);
884
885 output.create(ref.nbSamples(),UnaryTestsF32::OUT_F32_ID,mgr);
886 a.create(MAXMATRIXDIM*MAXMATRIXDIM,UnaryTestsF32::TMPA_F32_ID,mgr);
887 b.create(MAXMATRIXDIM*MAXMATRIXDIM,UnaryTestsF32::TMPB_F32_ID,mgr);
888 break;
889
890 case TEST_MAT_SCALE_F32_3:
891 input1.reload(UnaryTestsF32::INPUTS1_F32_ID,mgr);
892 dims.reload(UnaryTestsF32::DIMSUNARY1_S16_ID,mgr);
893
894 ref.reload(UnaryTestsF32::REFSCALE1_F32_ID,mgr);
895
896 output.create(ref.nbSamples(),UnaryTestsF32::OUT_F32_ID,mgr);
897 a.create(MAXMATRIXDIM*MAXMATRIXDIM,UnaryTestsF32::TMPA_F32_ID,mgr);
898 break;
899
900 case TEST_MAT_TRANS_F32_4:
901 input1.reload(UnaryTestsF32::INPUTS1_F32_ID,mgr);
902 dims.reload(UnaryTestsF32::DIMSUNARY1_S16_ID,mgr);
903
904 ref.reload(UnaryTestsF32::REFTRANS1_F32_ID,mgr);
905
906 output.create(ref.nbSamples(),UnaryTestsF32::OUT_F32_ID,mgr);
907 a.create(MAXMATRIXDIM*MAXMATRIXDIM,UnaryTestsF32::TMPA_F32_ID,mgr);
908 break;
909
910 case TEST_MAT_INVERSE_F32_5:
911 input1.reload(UnaryTestsF32::INPUTSINV_F32_ID,mgr);
912 dims.reload(UnaryTestsF32::DIMSINVERT1_S16_ID,mgr);
913
914 ref.reload(UnaryTestsF32::REFINV1_F32_ID,mgr);
915
916 output.create(ref.nbSamples(),UnaryTestsF32::OUT_F32_ID,mgr);
917 a.create(MAXMATRIXDIM*MAXMATRIXDIM,UnaryTestsF32::TMPA_F32_ID,mgr);
918 break;
919
920 case TEST_MAT_VEC_MULT_F32_6:
921 input1.reload(UnaryTestsF32::INPUTS1_F32_ID,mgr);
922 input2.reload(UnaryTestsF32::INPUTVEC1_F32_ID,mgr);
923 dims.reload(UnaryTestsF32::DIMSUNARY1_S16_ID,mgr);
924
925 ref.reload(UnaryTestsF32::REFVECMUL1_F32_ID,mgr);
926
927 output.create(ref.nbSamples(),UnaryTestsF32::OUT_F32_ID,mgr);
928 a.create(MAXMATRIXDIM*MAXMATRIXDIM,UnaryTestsF32::TMPA_F32_ID,mgr);
929 b.create(MAXMATRIXDIM,UnaryTestsF32::TMPB_F32_ID,mgr);
930 break;
931
932 case TEST_MAT_CMPLX_TRANS_F32_7:
933 input1.reload(UnaryTestsF32::INPUTSC1_F32_ID,mgr);
934 dims.reload(UnaryTestsF32::DIMSUNARY1_S16_ID,mgr);
935
936 ref.reload(UnaryTestsF32::REFTRANSC1_F32_ID,mgr);
937
938 output.create(ref.nbSamples(),UnaryTestsF32::OUT_F32_ID,mgr);
939 a.create(MAXMATRIXDIM*MAXMATRIXDIM,UnaryTestsF32::TMPA_F32_ID,mgr);
940 break;
941
942 case TEST_MAT_CHOLESKY_DPO_F32_8:
943 input1.reload(UnaryTestsF32::INPUTSCHOLESKY1_DPO_F32_ID,mgr);
944 dims.reload(UnaryTestsF32::DIMSCHOLESKY1_DPO_S16_ID,mgr);
945
946 ref.reload(UnaryTestsF32::REFCHOLESKY1_DPO_F32_ID,mgr);
947
948 output.create(ref.nbSamples(),UnaryTestsF32::OUT_F32_ID,mgr);
949 a.create(MAXMATRIXDIM*MAXMATRIXDIM,UnaryTestsF32::TMPA_F32_ID,mgr);
950
951
952 break;
953
954 case TEST_SOLVE_UPPER_TRIANGULAR_F32_9:
955 input1.reload(UnaryTestsF32::INPUT_MAT_UTSOLVE_F32_ID,mgr);
956 input2.reload(UnaryTestsF32::INPUT_VEC_LTSOLVE_F32_ID,mgr);
957 dims.reload(UnaryTestsF32::DIM_LTSOLVE_F32_ID,mgr);
958
959 ref.reload(UnaryTestsF32::REF_UT_SOLVE_F32_ID,mgr);
960
961 output.create(ref.nbSamples(),UnaryTestsF32::OUT_F32_ID,mgr);
962 a.create(MAXMATRIXDIM*MAXMATRIXDIM,UnaryTestsF32::TMPA_F32_ID,mgr);
963 b.create(MAXMATRIXDIM*MAXMATRIXDIM,UnaryTestsF32::TMPB_F32_ID,mgr);
964 break;
965
966 case TEST_SOLVE_LOWER_TRIANGULAR_F32_10:
967 input1.reload(UnaryTestsF32::INPUT_MAT_LTSOLVE_F32_ID,mgr);
968 input2.reload(UnaryTestsF32::INPUT_VEC_LTSOLVE_F32_ID,mgr);
969 dims.reload(UnaryTestsF32::DIM_LTSOLVE_F32_ID,mgr);
970
971 ref.reload(UnaryTestsF32::REF_LT_SOLVE_F32_ID,mgr);
972
973 output.create(ref.nbSamples(),UnaryTestsF32::OUT_F32_ID,mgr);
974 a.create(MAXMATRIXDIM*MAXMATRIXDIM,UnaryTestsF32::TMPA_F32_ID,mgr);
975 b.create(MAXMATRIXDIM*MAXMATRIXDIM,UnaryTestsF32::TMPB_F32_ID,mgr);
976 break;
977
978 case TEST_MAT_LDL_F32_11:
979 // Definite positive test
980 input1.reload(UnaryTestsF32::INPUTSCHOLESKY1_DPO_F32_ID,mgr);
981 dims.reload(UnaryTestsF32::DIMSCHOLESKY1_DPO_S16_ID,mgr);
982
983 outputll.create(input1.nbSamples(),UnaryTestsF32::LL_F32_ID,mgr);
984 outputd.create(input1.nbSamples(),UnaryTestsF32::D_F32_ID,mgr);
985 outputp.create(input1.nbSamples(),UnaryTestsF32::PERM_S16_ID,mgr);
986
987 outputa.create(input1.nbSamples(),UnaryTestsF32::OUTA_F64_ID,mgr);
988 outputb.create(input1.nbSamples(),UnaryTestsF32::OUTB_F64_ID,mgr);
989
990 a.create(MAXMATRIXDIM*MAXMATRIXDIM,UnaryTestsF32::TMPA_F32_ID,mgr);
991
992 tmpapat.create(MAXMATRIXDIM*MAXMATRIXDIM,UnaryTestsF32::TMPB_F64_ID,mgr);
993 tmpbpat.create(MAXMATRIXDIM*MAXMATRIXDIM,UnaryTestsF32::TMPC_F64_ID,mgr);
994 tmpcpat.create(MAXMATRIXDIM*MAXMATRIXDIM,UnaryTestsF32::TMPD_F64_ID,mgr);
995
996 this->snrRel=REL_ERROR_LDLT;
997 this->snrAbs=ABS_ERROR_LDLT;
998
999 break;
1000
1001 case TEST_MAT_LDL_F32_12:
1002 // Semi definite positive test
1003 input1.reload(UnaryTestsF32::INPUTSCHOLESKY1_SDPO_F32_ID,mgr);
1004 dims.reload(UnaryTestsF32::DIMSCHOLESKY1_SDPO_S16_ID,mgr);
1005
1006 outputll.create(input1.nbSamples(),UnaryTestsF32::LL_F32_ID,mgr);
1007 outputd.create(input1.nbSamples(),UnaryTestsF32::D_F32_ID,mgr);
1008 outputp.create(input1.nbSamples(),UnaryTestsF32::PERM_S16_ID,mgr);
1009
1010 outputa.create(input1.nbSamples(),UnaryTestsF32::OUTA_F64_ID,mgr);
1011 outputb.create(input1.nbSamples(),UnaryTestsF32::OUTB_F64_ID,mgr);
1012
1013 a.create(MAXMATRIXDIM*MAXMATRIXDIM,UnaryTestsF32::TMPA_F32_ID,mgr);
1014
1015 tmpapat.create(MAXMATRIXDIM*MAXMATRIXDIM,UnaryTestsF32::TMPB_F64_ID,mgr);
1016 tmpbpat.create(MAXMATRIXDIM*MAXMATRIXDIM,UnaryTestsF32::TMPC_F64_ID,mgr);
1017 tmpcpat.create(MAXMATRIXDIM*MAXMATRIXDIM,UnaryTestsF32::TMPD_F64_ID,mgr);
1018
1019 this->snrRel=REL_ERROR_LDLT_SPDO;
1020 this->snrAbs=ABS_ERROR_LDLT_SDPO;
1021
1022
1023 break;
1024
1025 case TEST_HOUSEHOLDER_F32_13:
1026 input1.reload(UnaryTestsF32::INPUTS_HOUSEHOLDER_F32_ID,mgr);
1027 dims.reload(UnaryTestsF32::DIMS_HOUSEHOLDER_S16_ID,mgr);
1028 ref.reload(UnaryTestsF32::REF_HOUSEHOLDER_V_F32_ID,mgr);
1029 refBeta.reload(UnaryTestsF32::REF_HOUSEHOLDER_BETA_F32_ID,mgr);
1030
1031
1032 output.create(ref.nbSamples(),UnaryTestsF32::TMPA_F32_ID,mgr);
1033 outputBeta.create(refBeta.nbSamples(),UnaryTestsF32::TMPB_F32_ID,mgr);
1034 break;
1035
1036
1037 case TEST_MAT_QR_F32_14:
1038 input1.reload(UnaryTestsF32::INPUTS_QR_F32_ID,mgr);
1039 dims.reload(UnaryTestsF32::DIMS_QR_S16_ID,mgr);
1040 refTau.reload(UnaryTestsF32::REF_QR_TAU_F32_ID,mgr);
1041 refR.reload(UnaryTestsF32::REF_QR_R_F32_ID,mgr);
1042 refQ.reload(UnaryTestsF32::REF_QR_Q_F32_ID,mgr);
1043
1044
1045 outputTau.create(refTau.nbSamples(),UnaryTestsF32::TMPA_F32_ID,mgr);
1046 outputR.create(refR.nbSamples(),UnaryTestsF32::TMPB_F32_ID,mgr);
1047 outputQ.create(refQ.nbSamples(),UnaryTestsF32::TMPC_F32_ID,mgr);
1048
1049 a.create(47,UnaryTestsF32::TMPC_F32_ID,mgr);
1050 b.create(47,UnaryTestsF32::TMPD_F32_ID,mgr);
1051 break;
1052
1053
1054 }
1055
1056
1057
1058 }
1059
tearDown(Testing::testID_t id,Client::PatternMgr * mgr)1060 void UnaryTestsF32::tearDown(Testing::testID_t id,Client::PatternMgr *mgr)
1061 {
1062 (void)id;
1063 (void)mgr;
1064
1065 switch(id)
1066 {
1067 case TEST_MAT_LDL_F32_11:
1068 //outputll.dump(mgr);
1069 break;
1070 case TEST_MAT_QR_F32_14:
1071 //outputR.dump(mgr);
1072 break;
1073 }
1074 //output.dump(mgr);
1075 }
1076