1 // make test-div64_64.bin && qemu-system-arm -M lm3s6965evb -cpu cortex-m3 -nographic -serial null -monitor null -semihosting -kernel test-div64_64.bin
2 #include <stdio.h>
3 #include <math.h>
4 #include "pico/divider.h"
5 #include "pico/stdlib.h"
6 
7 #ifdef TURBO
8 #include "hardware/vreg.h"
9 #endif
10 
11 typedef uint64_t ui64;
12 typedef int64_t i64;
13 typedef uint32_t ui32;
14 typedef int32_t i32;
15 
test_mulib_divu64u64(ui64 * y,ui64 * x,ui64 * q,ui64 * r)16 void test_mulib_divu64u64(ui64*y,ui64*x,ui64*q,ui64*r) {
17     *q = divmod_u64u64_rem(*y, *x, r);
18 }
test_mulib_divs64s64(i64 * y,i64 * x,i64 * q,i64 * r)19 void test_mulib_divs64s64( i64*y, i64*x, i64*q, i64*r) {
20     *q = divmod_s64s64_rem(*y, *x, r);
21 }
22 
23 ui32 hwdiv_data[4];
24 
hwdiv_sim()25 void hwdiv_sim() {
26   hwdiv_data[2]=hwdiv_data[0]/hwdiv_data[1];
27   hwdiv_data[3]=hwdiv_data[0]%hwdiv_data[1];
28 //  ostr("HWS: ");
29 //  o8hex(hwdiv_data[0]); osp();
30 //  o8hex(hwdiv_data[1]); osp();
31 //  o8hex(hwdiv_data[2]); osp();
32 //  o8hex(hwdiv_data[3]); onl();
33 }
34 
35 ui64 ntests=0;
36 
37 
38 #ifdef uart_default
o1ch(int c)39 void o1ch(int c) {
40     uart_putc(uart_default, c);
41 }
42 
ostr(char * p)43 void ostr(char*p) { while(*p) o1ch(*p++); }
onl()44 void onl() {ostr("\r\n");}
osp()45 void osp() {o1ch(' ');}
ostrnl(char * p)46 void ostrnl(char*p) { ostr(p); onl();}
o1hex(int u)47 void o1hex(int u) {u&=0x0f; if(u>=10) o1ch(u-10+'A'); else o1ch(u+'0');}
o2hex(int u)48 void o2hex(int u) {o1hex(u>> 4); o1hex(u);}
o4hex(int u)49 void o4hex(int u) {o2hex(u>> 8); o2hex(u);}
o8hex(int u)50 void o8hex(int u) {o4hex(u>>16); o4hex(u);}
o16hex(ui64 u)51 void o16hex(ui64 u) {o8hex(u>>32); o8hex(u);}
odig(unsigned int * pv,unsigned int d,int zf)52 unsigned int odig(unsigned int*pv,unsigned int d,int zf) {
53     char c='0';
54     unsigned int v=*pv;
55     while(v>=d) v-=d,c++;
56     if(zf==1&&c=='0') osp();
57     else              o1ch(c),zf=0;
58     *pv=v;
59     return zf;
60 }
61 
odec(int u)62 void odec(int u) {
63     unsigned int v=u;
64     int zf=1;
65     if(u<0) o1ch('-'),v=-v;
66     zf=odig(&v,1000000000,zf);
67     zf=odig(&v,100000000,zf);
68     zf=odig(&v,10000000,zf);
69     zf=odig(&v,1000000,zf);
70     zf=odig(&v,100000,zf);
71     zf=odig(&v,10000,zf);
72     zf=odig(&v,1000,zf);
73     zf=odig(&v,100,zf);
74     zf=odig(&v,10,zf);
75     zf=odig(&v,1,0);
76 }
77 #endif
78 
79 
xdigval(int c)80 int xdigval(int c) {
81   if(c>='0'&&c<='9') return c-'0';
82   if(c>='A'&&c<='F') return c-'A'+10;
83   if(c>='a'&&c<='f') return c-'a'+10;
84   return -1;
85 }
86 
87 ui64 seed;
88 
rnd64()89 ui64 rnd64() {
90   if(seed&1) seed=(seed>>1)^0x800000000000000dULL;
91   else       seed= seed>>1;
92   return seed;
93 }
94 
rnd32()95 unsigned int rnd32() {
96   return rnd64();
97 }
98 
99 //#define RANDOMISE
100 //#define rfn "/dev/random"
101 
102 
103 #ifdef uart_default
test_divu64u64(ui64 y,ui64 x)104 void test_divu64u64(ui64 y,ui64 x) {
105   ui64 q,r;
106   test_mulib_divu64u64(&y,&x,&q,&r);
107 #if !PICO_ON_DEVICE
108   if (!x) return;
109 #endif
110   if(q==y/x&&r==y%x) ;
111   else {
112     ostr("U ");
113     o16hex(y); osp();
114     o16hex(x); osp();
115     o16hex(q); osp();
116     o16hex(r);
117     ostr(" : ");
118     o16hex(y/x); osp();
119     o16hex(y%x); onl();
120     }
121   ntests++;
122 }
123 
test_divs64s64(i64 y,i64 x)124 void test_divs64s64(i64 y,i64 x) {
125   i64 q,r;
126 #if !PICO_ON_DEVICE
127   if (y == INT64_MIN) return;
128 #endif
129   test_mulib_divs64s64(&y,&x,&q,&r);
130 #if !PICO_ON_DEVICE
131     if (!x) return;
132 #endif
133   if(q==y/x&&r==y%x) ;
134   else {
135     ostr("S ");
136     o16hex(y); osp();
137     o16hex(x); osp();
138     o16hex(q); osp();
139     o16hex(r);
140     ostr(" : ");
141     o16hex(y/x); osp();
142     o16hex(y%x); onl();
143     }
144   ntests++;
145 }
146 
147 
148 // for all x and y consisting of a single run of 1:s, test a region around (x,y)
test_special()149 void test_special() {
150   int i0,j0,i1,j1,dy,dx;
151   ui64 y,x;
152   for(i0=0;i0<64;i0++) {
153     y=0;
154     for(i1=i0;i1<65;i1++) {
155       for(j0=0;j0<64;j0++) {
156         x=0;
157         for(j1=j0;j1<65;j1++) {
158 #define A 2
159           for(dy=-A;dy<=A;dy++) {
160             for(dx=-A;dx<=A;dx++) {
161               test_divu64u64( y+dy, x+dx);
162               test_divs64s64( y+dy, x+dx);
163               test_divs64s64( y+dy,-x-dx);
164               test_divs64s64(-y-dy, x+dx);
165               test_divs64s64(-y-dy,-x-dx);
166               }
167             }
168           x|=1ULL<<j1;
169           }
170         }
171       y|=1ULL<<i1;
172       }
173       odec(i0+1); ostr(" "); odec(i1+1); ostr(" specials\n");
174     }
175 }
176 
test_random()177 void test_random() {
178   int i,j;
179   ui64 y,x,m;
180   for(i=0;;i++) {
181     for(j=0;j<200000;j++) {
182       m=1ULL<<(rnd32()%48+15); m+=m-1; y=rnd64()&m;
183       m=1ULL<<(rnd32()%48+15); m+=m-1; x=rnd64()&m;
184       test_divu64u64( y, x);
185       test_divs64s64( y, x);
186       test_divs64s64( y,-x);
187       test_divs64s64(-y, x);
188       test_divs64s64(-y,-x);
189       }
190     odec(i+1); ostr("M\n");
191     }
192 }
193 #endif
194 
time_32(uint32_t a,uint32_t b,uint32_t (* func)(uint32_t a,uint32_t b))195 uint32_t __attribute__((naked)) time_32(uint32_t a, uint32_t b, uint32_t (*func)(uint32_t a, uint32_t b)) {
196 #ifndef __riscv
197     pico_default_asm (
198         "push {r4, r5, lr}\n"
199         "ldr r4, =0xe000e018\n"
200         "ldr r5, [r4]\n"
201         "blx r2\n"
202         "ldr r0, [r4]\n"
203         "subs r5, r0\n"
204         "lsls r0, r5, #8\n"
205         "asrs r0, #8\n"
206         "pop {r4, r5, pc}\n"
207     );
208 #else
209     pico_default_asm (
210         "li a0, 0\n"
211     );
212 #endif
213 }
214 
time_64(uint64_t a,uint64_t b,uint64_t (* func64)(uint64_t a,uint64_t b))215 uint32_t __attribute__((naked)) time_64(uint64_t a, uint64_t b, uint64_t (*func64)(uint64_t a, uint64_t b)) {
216 #ifndef __riscv
217     pico_default_asm (
218     "push {r4-r6, lr}\n"
219     "ldr r6, [sp, #16]\n"
220     "ldr r4, =0xe000e018\n"
221     "ldr r5, [r4]\n"
222     "blx r6\n"
223     "ldr r0, [r4]\n"
224     "subs r5, r0\n"
225     "lsls r0, r5, #8\n"
226     "asrs r0, #8\n"
227     "pop {r4-r6, pc}\n"
228     );
229 #else
230     pico_default_asm (
231             "li a0, 0\n"
232             );
233 #endif
234 }
235 
compiler_div_s32(uint32_t a,uint32_t b)236 uint32_t compiler_div_s32(uint32_t a, uint32_t b) {
237     return ((int32_t)a) / (int32_t)b;
238 }
239 
pico_div_s32(uint32_t a,uint32_t b)240 uint32_t pico_div_s32(uint32_t a, uint32_t b) {
241     return div_s32s32(a, b);
242 }
243 
compiler_div_u32(uint32_t a,uint32_t b)244 uint32_t compiler_div_u32(uint32_t a, uint32_t b) {
245     return a/b;
246 }
247 
pico_div_u32(uint32_t a,uint32_t b)248 uint32_t pico_div_u32(uint32_t a, uint32_t b) {
249     return div_u32u32(a, b);
250 }
251 
compiler_div_s64(uint64_t a,uint64_t b)252 uint64_t compiler_div_s64(uint64_t a, uint64_t b) {
253     return ((int64_t)a) / (int64_t)b;
254 }
255 
pico_div_s64(uint64_t a,uint64_t b)256 uint64_t pico_div_s64(uint64_t a, uint64_t b) {
257     return div_s64s64(a, b);
258 }
259 
compiler_div_u64(uint64_t a,uint64_t b)260 uint64_t compiler_div_u64(uint64_t a, uint64_t b) {
261     return a/b;
262 }
263 
pico_div_u64(uint64_t a,uint64_t b)264 uint64_t pico_div_u64(uint64_t a, uint64_t b) {
265     return div_u64u64(a, b);
266 }
267 
268 
perf_test()269 void perf_test() {
270     *(volatile unsigned int *)0xe000e010=5; // enable SYSTICK at core clock
271 
272     for(int bit = 30; bit>=0; bit--) {
273         int div = 1u << (31-bit);
274         const int N = 1000;
275         int tc = 0, tp = 0;
276         for (int i = 0; i < N; i++) {
277             int a = rnd32();
278             int b;
279             do {
280                 b = rnd32() / div;
281             } while (b == 0);
282             tc += time_32(a, b, compiler_div_s32);
283             tp += time_32(a, b, pico_div_s32);
284         }
285         printf("  S32 %d %f\t%f\n", bit, tc / 1000.0, tp / 1000.0);
286     }
287 
288     for(int bit = 30; bit>=0; bit--) {
289         int div = 1u << (31-bit);
290         const int N = 1000;
291         int tc = 0, tp = 0;
292         for (int i = 0; i < N; i++) {
293             int a = rnd32();
294             int b;
295             do {
296                 b = rnd32() / div;
297             } while (b == 0);
298             tc += time_32(a, b, compiler_div_u32);
299             tp += time_32(a, b, pico_div_u32);
300         }
301         printf("  U32 %d %f\t%f\n", bit, tc / 1000.0, tp / 1000.0);
302     }
303 
304     for(int extra = 0; extra <= 48; extra+=16)
305     {
306         for(int bit = 62; bit>=0; bit--) {
307             int64_t div = 1ull << (62-bit);
308             const int N = 1000;
309             int tc = 0, tp = 0;
310             for (int i = 0; i < N; i++) {
311                 int64_t a = rnd64() / (1u << extra);
312                 int64_t b;
313                 do {
314                     b = ((int64_t)rnd64()) / div;
315                 } while (b == 0);
316                 tc += time_64(a, b, compiler_div_s64);
317                 tp += time_64(a, b, pico_div_s64);
318             }
319             printf("  S64 %d %d %f\t%f\n", extra, bit, tc / 1000.0, tp / 1000.0);
320         }
321 
322         for(int bit = 62; bit>=0; bit--) {
323             int64_t div = 1ull << (62-bit);
324             const int N = 1000;
325             int tc = 0, tp = 0;
326             for (int i = 0; i < N; i++) {
327                 uint64_t a = rnd64();
328                 uint64_t b;
329                 do {
330                     b = rnd64() / div;
331                 } while (b == 0);
332                 tc += time_64(a, b, compiler_div_u64);
333                 tp += time_64(a, b, pico_div_u64);
334             }
335             printf("  U64 %d %d %f\t%f\n", extra, bit, tc / 1000.0, tp / 1000.0);
336         }
337     }
338 }
339 
main()340 int main() {
341 #ifndef uart_default
342 #warning test/pico_divider requires a default uart
343 #else
344 #ifdef TURBO
345     vreg_set_voltage(VREG_VOLTAGE_MAX);
346     set_sys_clock_khz(48000*8, true);
347 #endif
348     setup_default_uart();
349 #ifdef RANDOMISE
350   int u;
351   ifh=sys_host(SYS_OPEN,(int)rfn,0,strlen(rfn));
352   u=sys_host(SYS_READ,ifh,(int)&seed,sizeof(seed));
353   if(u) {ostrnl("Error reading random stream"); return 16;}
354   sys_host(SYS_CLOSE,ifh,0,0);
355 #else
356   seed=12233524287791987605ULL;
357 #endif
358   perf_test();
359   ostr("begin\n");
360   test_divu64u64( 38, 6);
361   test_divs64s64( 38, 6);
362   test_divs64s64( 38,-6);
363   test_divs64s64(-38, 6);
364   test_divs64s64(-38,-6);
365   test_divu64u64(1234567890123ULL,6);
366   test_divu64u64(0x0000000100000000ULL,6);
367   test_divu64u64(0xffffffffffffffffULL,6);
368   test_special();
369   o16hex(ntests);
370   ostr(" special tests done; starting random tests\n");
371   test_divu64u64(0xf123456789abcdefULL,0x0000000100000000ULL);
372   test_divu64u64(0xf123456789abcdefULL,0x00000001ffffffffULL);
373   test_divu64u64(0xf123456789abcdefULL,0x00000003ffffffffULL);
374   test_divu64u64(0xf123456789abcdefULL,0x00000007ffffffffULL);
375   test_divu64u64(0xf123456789abcdefULL,0x0000000fffffffffULL);
376   test_divu64u64(0xf123456789abcdefULL,0x0000001fffffffffULL);
377   test_divu64u64(0xf123456789abcdefULL,0x0000003fffffffffULL);
378   test_divu64u64(0xf123456789abcdefULL,0x0000007fffffffffULL);
379   test_divu64u64(0xf123456789abcdefULL,0x000000ffffffffffULL);
380   test_divu64u64(0xf123456789abcdefULL,0x000001ffffffffffULL);
381   test_divu64u64(0xf123456789abcdefULL,0x000003ffffffffffULL);
382   test_divu64u64(0xf123456789abcdefULL,0x000007ffffffffffULL);
383   test_divu64u64(0xf123456789abcdefULL,0x00000fffffffffffULL);
384   test_divu64u64(0xf123456789abcdefULL,0x00001fffffffffffULL);
385   test_divu64u64(0xf123456789abcdefULL,0x00003fffffffffffULL);
386   test_divu64u64(0xf123456789abcdefULL,0x00007fffffffffffULL);
387   test_divu64u64(0xf123456789abcdefULL,0x0000ffffffffffffULL);
388 
389   test_random();
390 
391   ostr("END\n");
392   return 0;
393 #endif
394 }
395 
396