1 /* memcpy for the Visium processor.
2 
3    Copyright (c) 2015 Rolls-Royce Controls and Data Services Limited.
4    All rights reserved.
5 
6    Redistribution and use in source and binary forms, with or without
7    modification, are permitted provided that the following conditions are met:
8 
9      * Redistributions of source code must retain the above copyright notice,
10        this list of conditions and the following disclaimer.
11      * Redistributions in binary form must reproduce the above copyright
12        notice, this list of conditions and the following disclaimer in the
13        documentation and/or other materials provided with the distribution.
14      * Neither the name of Rolls-Royce Controls and Data Services Limited nor
15        the names of its contributors may be used to endorse or promote products
16        derived from this software without specific prior written permission.
17 
18    THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
19    AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
20    IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
21    ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
22    LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
23    CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
24    SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
25    INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
26    CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
27    ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
28    THE POSSIBILITY OF SUCH DAMAGE.  */
29 
30 /* This file must be kept in sync with libgcc/config/visium/memcpy.c  */
31 
32 #include <picolibc.h>
33 
34 #include <stddef.h>
35 #include "memcpy.h"
36 #include "../../string/local.h"
37 
38 #define INST_BARRIER   __asm__ __volatile__ ("":::"memory");
39 
40 #define MOVE_32_OBJECTS(in,out)	\
41 do {				\
42   INST_BARRIER			\
43   m0 = in [0];			\
44   m1 = in [1];			\
45   m2 = in [2];			\
46   m3 = in [3];			\
47   out [0] = m0;			\
48   out [1] = m1;			\
49   out [2] = m2;			\
50   out [3] = m3;			\
51   INST_BARRIER			\
52   m0 = in [4];			\
53   m1 = in [5];			\
54   m2 = in [6];			\
55   m3 = in [7];			\
56   out [4] = m0;			\
57   out [5] = m1;			\
58   out [6] = m2;			\
59   out [7] = m3;			\
60   INST_BARRIER			\
61   m0 = in [8];			\
62   m1 = in [9];			\
63   m2 = in [10];			\
64   m3 = in [11];			\
65   out [8] = m0;			\
66   out [9] = m1;			\
67   out [10] = m2;		\
68   out [11] = m3;		\
69   INST_BARRIER			\
70   m0 = in [12];			\
71   m1 = in [13];			\
72   m2 = in [14];			\
73   m3 = in [15];			\
74   out [12] = m0;		\
75   out [13] = m1;		\
76   out [14] = m2;		\
77   out [15] = m3;		\
78   INST_BARRIER			\
79   m0 = in [16];			\
80   m1 = in [17];			\
81   m2 = in [18];			\
82   m3 = in [19];			\
83   out [16] = m0;		\
84   out [17] = m1;		\
85   out [18] = m2;		\
86   out [19] = m3;		\
87   INST_BARRIER			\
88   m0 = in [20];			\
89   m1 = in [21];			\
90   m2 = in [22];			\
91   m3 = in [23];			\
92   out [20] = m0;		\
93   out [21] = m1;		\
94   out [22] = m2;		\
95   out [23] = m3;		\
96   INST_BARRIER			\
97   m0 = in [24];			\
98   m1 = in [25];			\
99   m2 = in [26];			\
100   m3 = in [27];			\
101   out [24] = m0;		\
102   out [25] = m1;		\
103   out [26] = m2;		\
104   out [27] = m3;		\
105   INST_BARRIER			\
106   m0 =	in [28];		\
107   m1 = in [29];			\
108   m2 = in [30];			\
109   m3 = in [31];			\
110   out [28] = m0;		\
111   out [29] = m1;		\
112   out [30] = m2;		\
113   out [31] = m3;		\
114   INST_BARRIER			\
115   in += 32;			\
116   out += 32;			\
117 } while(0)
118 
119 #define MOVE_16_OBJECTS(in,out)	\
120 do {				\
121   INST_BARRIER			\
122   m0 = in [0];			\
123   m1 = in [1];			\
124   m2 = in [2];			\
125   m3 = in [3];			\
126   out [0] = m0;			\
127   out [1] = m1;			\
128   out [2] = m2;			\
129   out [3] = m3;			\
130   INST_BARRIER			\
131   m0 = in [4];			\
132   m1 = in [5];			\
133   m2 = in [6];			\
134   m3 = in [7];			\
135   out [4] = m0;			\
136   out [5] = m1;			\
137   out [6] = m2;			\
138   out [7] = m3;			\
139   INST_BARRIER			\
140   m0 = in [8];			\
141   m1 = in [9];			\
142   m2 = in [10];			\
143   m3 = in [11];			\
144   out [8] = m0;			\
145   out [9] = m1;			\
146   out [10] = m2;		\
147   out [11] = m3;		\
148   INST_BARRIER			\
149   m0 = in [12];			\
150   m1 = in [13];			\
151   m2 = in [14];			\
152   m3 = in [15];			\
153   out [12] = m0;		\
154   out [13] = m1;		\
155   out [14] = m2;		\
156   out [15] = m3;		\
157   INST_BARRIER			\
158   in += 16;			\
159   out += 16;			\
160 } while(0)
161 
162 #define MOVE_12_OBJECTS(in,out)	\
163 do {				\
164   INST_BARRIER			\
165   m0 = in [0];			\
166   m1 = in [1];			\
167   m2 = in [2];			\
168   m3 = in [3];			\
169   out [0] = m0;			\
170   out [1] = m1;			\
171   out [2] = m2;			\
172   out [3] = m3;			\
173   INST_BARRIER			\
174   m0 = in [4];			\
175   m1 = in [5];			\
176   m2 = in [6];			\
177   m3 = in [7];			\
178   out [4] = m0;			\
179   out [5] = m1;			\
180   out [6] = m2;			\
181   out [7] = m3;			\
182   INST_BARRIER			\
183   m0 = in [8];			\
184   m1 = in [9];			\
185   m2 = in [10];			\
186   m3 = in [11];			\
187   out [8] = m0;			\
188   out [9] = m1;			\
189   out [10] = m2;		\
190   out [11] = m3;		\
191   INST_BARRIER			\
192   in += 12;			\
193   out += 12;			\
194 } while(0)
195 
196 #define MOVE_11_OBJECTS(in,out)	\
197 do {				\
198   INST_BARRIER			\
199   m0 = in [0];			\
200   m1 = in [1];			\
201   m2 = in [2];			\
202   m3 = in [3];			\
203   out [0] = m0;			\
204   out [1] = m1;			\
205   out [2] = m2;			\
206   out [3] = m3;			\
207   INST_BARRIER			\
208   m0 = in [4];			\
209   m1 = in [5];			\
210   m2 = in [6];			\
211   m3 = in [7];			\
212   out [4] = m0;			\
213   out [5] = m1;			\
214   out [6] = m2;			\
215   out [7] = m3;			\
216   INST_BARRIER			\
217   m0 = in [8];			\
218   m1 = in [9];			\
219   m2 = in [10];			\
220   out [8] = m0;			\
221   out [9] = m1;			\
222   out [10] = m2;		\
223   INST_BARRIER			\
224   in += 11;			\
225   out += 11;			\
226 } while(0)
227 
228 #define MOVE_10_OBJECTS(in,out)	\
229 do {				\
230   INST_BARRIER			\
231   m0 = in [0];			\
232   m1 = in [1];			\
233   m2 = in [2];			\
234   m3 = in [3];			\
235   out [0] = m0;			\
236   out [1] = m1;			\
237   out [2] = m2;			\
238   out [3] = m3;			\
239   INST_BARRIER			\
240   m0 = in [4];			\
241   m1 = in [5];			\
242   m2 = in [6];			\
243   m3 = in [7];			\
244   out [4] = m0;			\
245   m0 = in [8];			\
246   out [5] = m1;			\
247   m1 = in [9];			\
248   out [6] = m2;			\
249   out [7] = m3;			\
250   out [8] = m0;			\
251   out [9] = m1;			\
252   INST_BARRIER			\
253   in += 10;			\
254   out += 10;			\
255 } while(0)
256 
257 #define MOVE_9_OBJECTS(in,out)	\
258 do {				\
259   INST_BARRIER			\
260   m0 = in [0];			\
261   m1 = in [1];			\
262   m2 = in [2];			\
263   m3 = in [3];			\
264   out [0] = m0;			\
265   out [1] = m1;			\
266   out [2] = m2;			\
267   out [3] = m3;			\
268   INST_BARRIER			\
269   m0 = in [4];			\
270   m1 = in [5];			\
271   m2 = in [6];			\
272   m3 = in [7];			\
273   out [4] = m0;			\
274   out [5] = m1;			\
275   out [6] = m2;			\
276   out [7] = m3;			\
277   INST_BARRIER			\
278   m0 = in [8];			\
279   out [8] = m0;			\
280   in += 9;			\
281   out += 9;			\
282 } while(0)
283 
284 #define MOVE_8_OBJECTS(in,out)	\
285 do {				\
286   INST_BARRIER			\
287   m0 = in [0];			\
288   m1 = in [1];			\
289   m2 = in [2];			\
290   m3 = in [3];			\
291   out [0] = m0;			\
292   out [1] = m1;			\
293   out [2] = m2;			\
294   out [3] = m3;			\
295   INST_BARRIER			\
296   m0 = in [4];			\
297   m1 = in [5];			\
298   m2 = in [6];			\
299   m3 = in [7];			\
300   out [4] = m0;			\
301   out [5] = m1;			\
302   out [6] = m2;			\
303   out [7] = m3;			\
304   INST_BARRIER			\
305   in += 8;			\
306   out += 8;			\
307 } while(0)
308 
309 #define MOVE_7_OBJECTS(in,out)	\
310 do {				\
311   INST_BARRIER			\
312   m0 = in [0];			\
313   m1 = in [1];			\
314   m2 = in [2];			\
315   m3 = in [3];			\
316   out [0] = m0;			\
317   out [1] = m1;			\
318   out [2] = m2;			\
319   out [3] = m3;			\
320   INST_BARRIER			\
321   m0 = in [4];			\
322   m1 = in [5];			\
323   m2 = in [6];			\
324   out [4] = m0;			\
325   out [5] = m1;			\
326   out [6] = m2;			\
327   INST_BARRIER			\
328   in += 7;			\
329   out += 7;			\
330 } while(0)
331 
332 #define MOVE_6_OBJECTS(in,out)	\
333 do {				\
334   INST_BARRIER			\
335   m0 = in [0];			\
336   m1 = in [1];			\
337   m2 = in [2];			\
338   m3 = in [3];			\
339   out [0] = m0;			\
340   INST_BARRIER			\
341   m0 = in [4];			\
342   out [1] = m1;			\
343   INST_BARRIER			\
344   m1 = in [5];			\
345   out [2] = m2;			\
346   out [3] = m3;			\
347   out [4] = m0;			\
348   out [5] = m1;			\
349   INST_BARRIER			\
350   in += 6;			\
351   out += 6;			\
352 } while(0)
353 
354 #define MOVE_5_OBJECTS(in,out)	\
355 do {				\
356   INST_BARRIER			\
357   m0 = in [0];			\
358   m1 = in [1];			\
359   m2 = in [2];			\
360   m3 = in [3];			\
361   INST_BARRIER			\
362   out [0] = m0;			\
363   m0 = in [4];			\
364   INST_BARRIER			\
365   out [1] = m1;			\
366   out [2] = m2;			\
367   out [3] = m3;			\
368   out [4] = m0;			\
369   INST_BARRIER			\
370   in += 5;			\
371   out += 5;			\
372 } while(0)
373 
374 #define MOVE_4_OBJECTS(in,out)	\
375 do {				\
376   INST_BARRIER			\
377   m0 = in [0];			\
378   m1 = in [1];			\
379   m2 = in [2];			\
380   m3 = in [3];			\
381   out [0] = m0;			\
382   out [1] = m1;			\
383   out [2] = m2;			\
384   out [3] = m3;			\
385   INST_BARRIER			\
386   in += 4;			\
387   out += 4;			\
388 } while(0)
389 
390 #define MOVE_3_OBJECTS(in,out)	\
391 do {				\
392   INST_BARRIER			\
393   m0 = in [0];			\
394   m1 = in [1];			\
395   m2 = in [2];			\
396   out [0] = m0;			\
397   out [1] = m1;			\
398   out [2] = m2;			\
399   INST_BARRIER			\
400   in += 3;			\
401   out += 3;			\
402 } while(0)
403 
404 #define MOVE_2_OBJECTS(in,out)	\
405 do {				\
406   INST_BARRIER			\
407   m0 = in [0];			\
408   m1 = in [1];			\
409   out [0] = m0;			\
410   out [1] = m1;			\
411   INST_BARRIER			\
412   in += 2;			\
413   out += 2;			\
414 } while(0)
415 
416 #define MOVE_1_OBJECT(in,out)	\
417 do {				\
418   INST_BARRIER			\
419   m0 = in [0];			\
420   out [0] = m0;			\
421   INST_BARRIER			\
422   in += 1;			\
423   out += 1;			\
424 } while(0)
425 
426 
427 static inline void
428 __inhibit_loop_to_libcall
__int_memcpy(void * __restrict s1,const void * __restrict s2,size_t n)429 __int_memcpy (void *__restrict s1, const void *__restrict s2, size_t n)
430 {
431   int value = n;
432   int loop_var;
433   const int *in = s2;
434   int *out = s1;
435   int count;
436   int m0,m1,m2,m3;
437 
438   /* This code currently give a stall for any value with a 1->2 in the low 5
439      bits, i.e.  1,2, 33,34 ? not acceptable!  */
440   switch (value & 0x1f)
441     {
442     case 0:
443       break;
444     case 1:
445       MOVE_1_OBJECT (in, out);
446       break;
447     case 2:
448       MOVE_2_OBJECTS (in, out);
449       break;
450     case 3:
451       MOVE_3_OBJECTS (in, out);
452       break;
453     case 4:
454       MOVE_4_OBJECTS (in, out);
455       break;
456     case 5:
457       MOVE_5_OBJECTS (in, out);
458       break;
459     case 6:
460       MOVE_6_OBJECTS (in, out);
461       break;
462     case 7:
463       MOVE_7_OBJECTS (in, out);
464       break;
465     case 8:
466       MOVE_8_OBJECTS (in, out);
467       break;
468     case 9:
469       MOVE_9_OBJECTS (in, out);
470       break;
471     case 10:
472       MOVE_10_OBJECTS (in, out);
473       break;
474     case 11:
475       MOVE_11_OBJECTS (in, out);
476       break;
477     case 12:
478       MOVE_12_OBJECTS (in, out);
479       break;
480     case 13:
481       MOVE_9_OBJECTS (in, out);
482       MOVE_4_OBJECTS (in, out);
483       break;
484     case 14:
485       MOVE_12_OBJECTS (in, out);
486       MOVE_2_OBJECTS (in, out);
487       break;
488     case 15:
489       MOVE_11_OBJECTS (in, out);
490       MOVE_4_OBJECTS (in, out);
491       break;
492     case 16:
493       MOVE_16_OBJECTS (in, out);
494       break;
495     case 17:
496       MOVE_11_OBJECTS (in, out);
497       MOVE_6_OBJECTS (in, out);
498       break;
499     case 18:
500       MOVE_9_OBJECTS (in, out);
501       MOVE_9_OBJECTS (in, out);
502       break;
503     case 19:
504       MOVE_16_OBJECTS (in, out);
505       MOVE_3_OBJECTS (in, out);
506       break;
507     case 20:
508       MOVE_16_OBJECTS (in, out);
509       MOVE_4_OBJECTS (in, out);
510       break;
511     case 21:
512       MOVE_16_OBJECTS (in, out);
513       MOVE_5_OBJECTS (in, out);
514       break;
515     case 22:
516       MOVE_16_OBJECTS (in, out);
517       MOVE_6_OBJECTS (in, out);
518       break;
519     case 23:
520       MOVE_16_OBJECTS (in, out);
521       MOVE_7_OBJECTS (in, out);
522       break;
523     case 24:
524       MOVE_16_OBJECTS (in, out);
525       MOVE_8_OBJECTS (in, out);
526       break;
527     case 25:
528       MOVE_16_OBJECTS (in, out);
529       MOVE_9_OBJECTS (in, out);
530       break;
531     case 26:
532       MOVE_16_OBJECTS (in, out);
533       MOVE_10_OBJECTS (in, out);
534       break;
535     case 27:
536       MOVE_16_OBJECTS (in, out);
537       MOVE_11_OBJECTS (in, out);
538       break;
539     case 28:
540       MOVE_16_OBJECTS (in, out);
541       MOVE_8_OBJECTS (in, out);
542       MOVE_4_OBJECTS (in, out);
543       break;
544     case 29:
545       MOVE_16_OBJECTS (in, out);
546       MOVE_9_OBJECTS (in, out);
547       MOVE_4_OBJECTS (in, out);
548       break;
549     case 30:
550       MOVE_16_OBJECTS (in, out);
551       MOVE_12_OBJECTS (in, out);
552       MOVE_2_OBJECTS (in, out);
553       break;
554     case 31:
555       MOVE_16_OBJECTS (in, out);
556       MOVE_11_OBJECTS (in, out);
557       MOVE_4_OBJECTS (in, out);
558       break;
559     }
560 
561   /* This loop governs the asmptoptic behaviour of this algorithm, for long
562      word copies.  */
563   count = value >> 5;
564   for (loop_var = 0; loop_var < count; loop_var++)
565     MOVE_32_OBJECTS (in, out);
566 }
567 
568 static inline void
569 __inhibit_loop_to_libcall
__shrt_int_memcpy(void * __restrict s1,const void * __restrict s2,size_t n)570 __shrt_int_memcpy (void *__restrict s1, const void *__restrict s2, size_t n)
571 {
572   int value = n;
573   int loop_var;
574   const short int *in = s2;
575   int short *out = s1;
576   int count;
577   int m0,m1,m2,m3;
578 
579  /* This code currently give a stall for any value with a 1->2 in the low 5
580     bits, i.e.  1,2, 33,34 ? not acceptable!  */
581   switch (value & 0x1f)
582     {
583     case 0:
584       break;
585     case 1:
586       MOVE_1_OBJECT (in, out);
587       break;
588     case 2:
589       MOVE_2_OBJECTS (in, out);
590       break;
591     case 3:
592       MOVE_3_OBJECTS (in, out);
593       break;
594     case 4:
595       MOVE_4_OBJECTS (in, out);
596       break;
597     case 5:
598       MOVE_5_OBJECTS (in, out);
599       break;
600     case 6:
601       MOVE_6_OBJECTS (in, out);
602       break;
603     case 7:
604       MOVE_7_OBJECTS (in, out);
605       break;
606     case 8:
607       MOVE_8_OBJECTS (in, out);
608       break;
609     case 9:
610       MOVE_9_OBJECTS (in, out);
611       break;
612     case 10:
613       MOVE_10_OBJECTS (in, out);
614       break;
615     case 11:
616       MOVE_11_OBJECTS (in, out);
617       break;
618     case 12:
619       MOVE_12_OBJECTS (in, out);
620       break;
621     case 13:
622       MOVE_9_OBJECTS (in, out);
623       MOVE_4_OBJECTS (in, out);
624       break;
625     case 14:
626       MOVE_12_OBJECTS (in, out);
627       MOVE_2_OBJECTS (in, out);
628       break;
629     case 15:
630       MOVE_11_OBJECTS (in, out);
631       MOVE_4_OBJECTS (in, out);
632       break;
633     case 16:
634       MOVE_16_OBJECTS (in, out);
635       break;
636     case 17:
637       MOVE_11_OBJECTS (in, out);
638       MOVE_6_OBJECTS (in, out);
639       break;
640     case 18:
641       MOVE_9_OBJECTS (in, out);
642       MOVE_9_OBJECTS (in, out);
643       break;
644     case 19:
645       MOVE_16_OBJECTS (in, out);
646       MOVE_3_OBJECTS (in, out);
647       break;
648     case 20:
649       MOVE_16_OBJECTS (in, out);
650       MOVE_4_OBJECTS (in, out);
651       break;
652     case 21:
653       MOVE_16_OBJECTS (in, out);
654       MOVE_5_OBJECTS (in, out);
655       break;
656     case 22:
657       MOVE_16_OBJECTS (in, out);
658       MOVE_6_OBJECTS (in, out);
659       break;
660     case 23:
661       MOVE_16_OBJECTS (in, out);
662       MOVE_7_OBJECTS (in, out);
663       break;
664     case 24:
665       MOVE_16_OBJECTS (in, out);
666       MOVE_8_OBJECTS (in, out);
667       break;
668     case 25:
669       MOVE_16_OBJECTS (in, out);
670       MOVE_9_OBJECTS (in, out);
671       break;
672     case 26:
673       MOVE_16_OBJECTS (in, out);
674       MOVE_10_OBJECTS (in, out);
675       break;
676     case 27:
677       MOVE_16_OBJECTS (in, out);
678       MOVE_11_OBJECTS (in, out);
679       break;
680     case 28:
681       MOVE_16_OBJECTS (in, out);
682       MOVE_8_OBJECTS (in, out);
683       MOVE_4_OBJECTS (in, out);
684       break;
685     case 29:
686       MOVE_16_OBJECTS (in, out);
687       MOVE_9_OBJECTS (in, out);
688       MOVE_4_OBJECTS (in, out);
689       break;
690     case 30:
691       MOVE_16_OBJECTS (in, out);
692       MOVE_12_OBJECTS (in, out);
693       MOVE_2_OBJECTS (in, out);
694       break;
695     case 31:
696       MOVE_16_OBJECTS (in, out);
697       MOVE_11_OBJECTS (in, out);
698       MOVE_4_OBJECTS (in, out);
699       break;
700     }
701 
702   /* This loop governs the asmptoptic behaviour of this algorithm, for long
703      word copies.  */
704   count = value >> 5;
705   for (loop_var = 0; loop_var < count; loop_var++)
706     MOVE_32_OBJECTS (in, out);
707 }
708 
709 
710 static inline void
711 __inhibit_loop_to_libcall
__byte_memcpy(void * __restrict s1,const void * __restrict s2,size_t n)712 __byte_memcpy (void *__restrict s1, const void *__restrict s2, size_t n)
713 {
714   int value = n;
715   int loop_var;
716   const char *in = s2;
717   char *out = s1;
718   int count;
719   int m0,m1,m2,m3;
720 
721  /* This code currently give a stall for any value with a 1->2 in the low 5
722     bits, i.e.  1,2, 33,34 ? not acceptable!  */
723   switch (value & 0x1f)
724     {
725     case 0:
726       break;
727     case 1:
728       MOVE_1_OBJECT (in, out);
729       break;
730     case 2:
731       MOVE_2_OBJECTS (in, out);
732       break;
733     case 3:
734       MOVE_3_OBJECTS (in, out);
735       break;
736     case 4:
737       MOVE_4_OBJECTS (in, out);
738       break;
739     case 5:
740       MOVE_5_OBJECTS (in, out);
741       break;
742     case 6:
743       MOVE_6_OBJECTS (in, out);
744       break;
745     case 7:
746       MOVE_7_OBJECTS (in, out);
747       break;
748     case 8:
749       MOVE_8_OBJECTS (in, out);
750       break;
751     case 9:
752       MOVE_9_OBJECTS (in, out);
753       break;
754     case 10:
755       MOVE_10_OBJECTS (in, out);
756       break;
757     case 11:
758       MOVE_11_OBJECTS (in, out);
759       break;
760     case 12:
761       MOVE_12_OBJECTS (in, out);
762       break;
763     case 13:
764       MOVE_9_OBJECTS (in, out);
765       MOVE_4_OBJECTS (in, out);
766       break;
767     case 14:
768       MOVE_12_OBJECTS (in, out);
769       MOVE_2_OBJECTS (in, out);
770       break;
771     case 15:
772       MOVE_11_OBJECTS (in, out);
773       MOVE_4_OBJECTS (in, out);
774       break;
775     case 16:
776       MOVE_16_OBJECTS (in, out);
777       break;
778     case 17:
779       MOVE_11_OBJECTS (in, out);
780       MOVE_6_OBJECTS (in, out);
781       break;
782     case 18:
783       MOVE_9_OBJECTS (in, out);
784       MOVE_9_OBJECTS (in, out);
785       break;
786     case 19:
787       MOVE_16_OBJECTS (in, out);
788       MOVE_3_OBJECTS (in, out);
789       break;
790     case 20:
791       MOVE_16_OBJECTS (in, out);
792       MOVE_4_OBJECTS (in, out);
793       break;
794     case 21:
795       MOVE_16_OBJECTS (in, out);
796       MOVE_5_OBJECTS (in, out);
797       break;
798     case 22:
799       MOVE_16_OBJECTS (in, out);
800       MOVE_6_OBJECTS (in, out);
801       break;
802     case 23:
803       MOVE_16_OBJECTS (in, out);
804       MOVE_7_OBJECTS (in, out);
805       break;
806     case 24:
807       MOVE_16_OBJECTS (in, out);
808       MOVE_8_OBJECTS (in, out);
809       break;
810     case 25:
811       MOVE_16_OBJECTS (in, out);
812       MOVE_9_OBJECTS (in, out);
813       break;
814     case 26:
815       MOVE_16_OBJECTS (in, out);
816       MOVE_10_OBJECTS (in, out);
817       break;
818     case 27:
819       MOVE_16_OBJECTS (in, out);
820       MOVE_11_OBJECTS (in, out);
821       break;
822     case 28:
823       MOVE_16_OBJECTS (in, out);
824       MOVE_8_OBJECTS (in, out);
825       MOVE_4_OBJECTS (in, out);
826       break;
827     case 29:
828       MOVE_16_OBJECTS (in, out);
829       MOVE_9_OBJECTS (in, out);
830       MOVE_4_OBJECTS (in, out);
831       break;
832     case 30:
833       MOVE_16_OBJECTS (in, out);
834       MOVE_12_OBJECTS (in, out);
835       MOVE_2_OBJECTS (in, out);
836       break;
837     case 31:
838       MOVE_16_OBJECTS (in, out);
839       MOVE_11_OBJECTS (in, out);
840       MOVE_4_OBJECTS (in, out);
841       break;
842     }
843 
844   /* This loop governs the asmptoptic behaviour of this algorithm, for long
845      word copies.  */
846   count = value >> 5;
847   for (loop_var = 0; loop_var < count; loop_var++)
848     MOVE_32_OBJECTS (in, out);
849 }
850 
851 
852 /* Exposed interface.  */
853 
854 void *
855 __inhibit_loop_to_libcall
memcpy(void * __restrict s1,const void * __restrict s2,size_t n)856 memcpy (void *__restrict s1, const void *__restrict s2, size_t n)
857 {
858   void *result = s1;
859 
860   /* None of the following handles copying zero bytes.  */
861   if (n != 0)
862     {
863       unsigned test = (unsigned) s2 | (unsigned) s1 | (unsigned) n;
864 
865       if (test & 1)
866 	__byte_memcpy (s1, s2, n);
867       else if (test & 2)
868 	__shrt_int_memcpy (s1, s2, n >> 1);
869       else
870 #ifdef __VISIUM_ARCH_BMI__
871 	__asm__ __volatile__ ("bmd     %0,%1,%2"
872 			      : "+t" (s1), "+u" (s2), "+v" (n)
873 			      :
874 			      : "r4", "r5", "r6", "memory");
875 #else
876 	__int_memcpy (s1, s2, n >> 2);
877 #endif /* __VISIUM_ARCH_BMI__ */
878     }
879 
880   return result;
881 }
882