1 /* memcpy for the Visium processor.
2 
3    Copyright (c) 2015 Rolls-Royce Controls and Data Services Limited.
4    All rights reserved.
5 
6    Redistribution and use in source and binary forms, with or without
7    modification, are permitted provided that the following conditions are met:
8 
9      * Redistributions of source code must retain the above copyright notice,
10        this list of conditions and the following disclaimer.
11      * Redistributions in binary form must reproduce the above copyright
12        notice, this list of conditions and the following disclaimer in the
13        documentation and/or other materials provided with the distribution.
14      * Neither the name of Rolls-Royce Controls and Data Services Limited nor
15        the names of its contributors may be used to endorse or promote products
16        derived from this software without specific prior written permission.
17 
18    THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
19    AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
20    IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
21    ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
22    LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
23    CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
24    SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
25    INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
26    CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
27    ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
28    THE POSSIBILITY OF SUCH DAMAGE.  */
29 
30 /* This file must be kept in sync with libgcc/config/visium/memcpy.c  */
31 
32 #include <stddef.h>
33 #include "memcpy.h"
34 #include "../../string/local.h"
35 
36 #define INST_BARRIER   __asm__ __volatile__ ("":::"memory");
37 
38 #define MOVE_32_OBJECTS(in,out)	\
39 do {				\
40   INST_BARRIER			\
41   m0 = in [0];			\
42   m1 = in [1];			\
43   m2 = in [2];			\
44   m3 = in [3];			\
45   out [0] = m0;			\
46   out [1] = m1;			\
47   out [2] = m2;			\
48   out [3] = m3;			\
49   INST_BARRIER			\
50   m0 = in [4];			\
51   m1 = in [5];			\
52   m2 = in [6];			\
53   m3 = in [7];			\
54   out [4] = m0;			\
55   out [5] = m1;			\
56   out [6] = m2;			\
57   out [7] = m3;			\
58   INST_BARRIER			\
59   m0 = in [8];			\
60   m1 = in [9];			\
61   m2 = in [10];			\
62   m3 = in [11];			\
63   out [8] = m0;			\
64   out [9] = m1;			\
65   out [10] = m2;		\
66   out [11] = m3;		\
67   INST_BARRIER			\
68   m0 = in [12];			\
69   m1 = in [13];			\
70   m2 = in [14];			\
71   m3 = in [15];			\
72   out [12] = m0;		\
73   out [13] = m1;		\
74   out [14] = m2;		\
75   out [15] = m3;		\
76   INST_BARRIER			\
77   m0 = in [16];			\
78   m1 = in [17];			\
79   m2 = in [18];			\
80   m3 = in [19];			\
81   out [16] = m0;		\
82   out [17] = m1;		\
83   out [18] = m2;		\
84   out [19] = m3;		\
85   INST_BARRIER			\
86   m0 = in [20];			\
87   m1 = in [21];			\
88   m2 = in [22];			\
89   m3 = in [23];			\
90   out [20] = m0;		\
91   out [21] = m1;		\
92   out [22] = m2;		\
93   out [23] = m3;		\
94   INST_BARRIER			\
95   m0 = in [24];			\
96   m1 = in [25];			\
97   m2 = in [26];			\
98   m3 = in [27];			\
99   out [24] = m0;		\
100   out [25] = m1;		\
101   out [26] = m2;		\
102   out [27] = m3;		\
103   INST_BARRIER			\
104   m0 =	in [28];		\
105   m1 = in [29];			\
106   m2 = in [30];			\
107   m3 = in [31];			\
108   out [28] = m0;		\
109   out [29] = m1;		\
110   out [30] = m2;		\
111   out [31] = m3;		\
112   INST_BARRIER			\
113   in += 32;			\
114   out += 32;			\
115 } while(0)
116 
117 #define MOVE_16_OBJECTS(in,out)	\
118 do {				\
119   INST_BARRIER			\
120   m0 = in [0];			\
121   m1 = in [1];			\
122   m2 = in [2];			\
123   m3 = in [3];			\
124   out [0] = m0;			\
125   out [1] = m1;			\
126   out [2] = m2;			\
127   out [3] = m3;			\
128   INST_BARRIER			\
129   m0 = in [4];			\
130   m1 = in [5];			\
131   m2 = in [6];			\
132   m3 = in [7];			\
133   out [4] = m0;			\
134   out [5] = m1;			\
135   out [6] = m2;			\
136   out [7] = m3;			\
137   INST_BARRIER			\
138   m0 = in [8];			\
139   m1 = in [9];			\
140   m2 = in [10];			\
141   m3 = in [11];			\
142   out [8] = m0;			\
143   out [9] = m1;			\
144   out [10] = m2;		\
145   out [11] = m3;		\
146   INST_BARRIER			\
147   m0 = in [12];			\
148   m1 = in [13];			\
149   m2 = in [14];			\
150   m3 = in [15];			\
151   out [12] = m0;		\
152   out [13] = m1;		\
153   out [14] = m2;		\
154   out [15] = m3;		\
155   INST_BARRIER			\
156   in += 16;			\
157   out += 16;			\
158 } while(0)
159 
160 #define MOVE_12_OBJECTS(in,out)	\
161 do {				\
162   INST_BARRIER			\
163   m0 = in [0];			\
164   m1 = in [1];			\
165   m2 = in [2];			\
166   m3 = in [3];			\
167   out [0] = m0;			\
168   out [1] = m1;			\
169   out [2] = m2;			\
170   out [3] = m3;			\
171   INST_BARRIER			\
172   m0 = in [4];			\
173   m1 = in [5];			\
174   m2 = in [6];			\
175   m3 = in [7];			\
176   out [4] = m0;			\
177   out [5] = m1;			\
178   out [6] = m2;			\
179   out [7] = m3;			\
180   INST_BARRIER			\
181   m0 = in [8];			\
182   m1 = in [9];			\
183   m2 = in [10];			\
184   m3 = in [11];			\
185   out [8] = m0;			\
186   out [9] = m1;			\
187   out [10] = m2;		\
188   out [11] = m3;		\
189   INST_BARRIER			\
190   in += 12;			\
191   out += 12;			\
192 } while(0)
193 
194 #define MOVE_11_OBJECTS(in,out)	\
195 do {				\
196   INST_BARRIER			\
197   m0 = in [0];			\
198   m1 = in [1];			\
199   m2 = in [2];			\
200   m3 = in [3];			\
201   out [0] = m0;			\
202   out [1] = m1;			\
203   out [2] = m2;			\
204   out [3] = m3;			\
205   INST_BARRIER			\
206   m0 = in [4];			\
207   m1 = in [5];			\
208   m2 = in [6];			\
209   m3 = in [7];			\
210   out [4] = m0;			\
211   out [5] = m1;			\
212   out [6] = m2;			\
213   out [7] = m3;			\
214   INST_BARRIER			\
215   m0 = in [8];			\
216   m1 = in [9];			\
217   m2 = in [10];			\
218   out [8] = m0;			\
219   out [9] = m1;			\
220   out [10] = m2;		\
221   INST_BARRIER			\
222   in += 11;			\
223   out += 11;			\
224 } while(0)
225 
226 #define MOVE_10_OBJECTS(in,out)	\
227 do {				\
228   INST_BARRIER			\
229   m0 = in [0];			\
230   m1 = in [1];			\
231   m2 = in [2];			\
232   m3 = in [3];			\
233   out [0] = m0;			\
234   out [1] = m1;			\
235   out [2] = m2;			\
236   out [3] = m3;			\
237   INST_BARRIER			\
238   m0 = in [4];			\
239   m1 = in [5];			\
240   m2 = in [6];			\
241   m3 = in [7];			\
242   out [4] = m0;			\
243   m0 = in [8];			\
244   out [5] = m1;			\
245   m1 = in [9];			\
246   out [6] = m2;			\
247   out [7] = m3;			\
248   out [8] = m0;			\
249   out [9] = m1;			\
250   INST_BARRIER			\
251   in += 10;			\
252   out += 10;			\
253 } while(0)
254 
255 #define MOVE_9_OBJECTS(in,out)	\
256 do {				\
257   INST_BARRIER			\
258   m0 = in [0];			\
259   m1 = in [1];			\
260   m2 = in [2];			\
261   m3 = in [3];			\
262   out [0] = m0;			\
263   out [1] = m1;			\
264   out [2] = m2;			\
265   out [3] = m3;			\
266   INST_BARRIER			\
267   m0 = in [4];			\
268   m1 = in [5];			\
269   m2 = in [6];			\
270   m3 = in [7];			\
271   out [4] = m0;			\
272   out [5] = m1;			\
273   out [6] = m2;			\
274   out [7] = m3;			\
275   INST_BARRIER			\
276   m0 = in [8];			\
277   out [8] = m0;			\
278   in += 9;			\
279   out += 9;			\
280 } while(0)
281 
282 #define MOVE_8_OBJECTS(in,out)	\
283 do {				\
284   INST_BARRIER			\
285   m0 = in [0];			\
286   m1 = in [1];			\
287   m2 = in [2];			\
288   m3 = in [3];			\
289   out [0] = m0;			\
290   out [1] = m1;			\
291   out [2] = m2;			\
292   out [3] = m3;			\
293   INST_BARRIER			\
294   m0 = in [4];			\
295   m1 = in [5];			\
296   m2 = in [6];			\
297   m3 = in [7];			\
298   out [4] = m0;			\
299   out [5] = m1;			\
300   out [6] = m2;			\
301   out [7] = m3;			\
302   INST_BARRIER			\
303   in += 8;			\
304   out += 8;			\
305 } while(0)
306 
307 #define MOVE_7_OBJECTS(in,out)	\
308 do {				\
309   INST_BARRIER			\
310   m0 = in [0];			\
311   m1 = in [1];			\
312   m2 = in [2];			\
313   m3 = in [3];			\
314   out [0] = m0;			\
315   out [1] = m1;			\
316   out [2] = m2;			\
317   out [3] = m3;			\
318   INST_BARRIER			\
319   m0 = in [4];			\
320   m1 = in [5];			\
321   m2 = in [6];			\
322   out [4] = m0;			\
323   out [5] = m1;			\
324   out [6] = m2;			\
325   INST_BARRIER			\
326   in += 7;			\
327   out += 7;			\
328 } while(0)
329 
330 #define MOVE_6_OBJECTS(in,out)	\
331 do {				\
332   INST_BARRIER			\
333   m0 = in [0];			\
334   m1 = in [1];			\
335   m2 = in [2];			\
336   m3 = in [3];			\
337   out [0] = m0;			\
338   INST_BARRIER			\
339   m0 = in [4];			\
340   out [1] = m1;			\
341   INST_BARRIER			\
342   m1 = in [5];			\
343   out [2] = m2;			\
344   out [3] = m3;			\
345   out [4] = m0;			\
346   out [5] = m1;			\
347   INST_BARRIER			\
348   in += 6;			\
349   out += 6;			\
350 } while(0)
351 
352 #define MOVE_5_OBJECTS(in,out)	\
353 do {				\
354   INST_BARRIER			\
355   m0 = in [0];			\
356   m1 = in [1];			\
357   m2 = in [2];			\
358   m3 = in [3];			\
359   INST_BARRIER			\
360   out [0] = m0;			\
361   m0 = in [4];			\
362   INST_BARRIER			\
363   out [1] = m1;			\
364   out [2] = m2;			\
365   out [3] = m3;			\
366   out [4] = m0;			\
367   INST_BARRIER			\
368   in += 5;			\
369   out += 5;			\
370 } while(0)
371 
372 #define MOVE_4_OBJECTS(in,out)	\
373 do {				\
374   INST_BARRIER			\
375   m0 = in [0];			\
376   m1 = in [1];			\
377   m2 = in [2];			\
378   m3 = in [3];			\
379   out [0] = m0;			\
380   out [1] = m1;			\
381   out [2] = m2;			\
382   out [3] = m3;			\
383   INST_BARRIER			\
384   in += 4;			\
385   out += 4;			\
386 } while(0)
387 
388 #define MOVE_3_OBJECTS(in,out)	\
389 do {				\
390   INST_BARRIER			\
391   m0 = in [0];			\
392   m1 = in [1];			\
393   m2 = in [2];			\
394   out [0] = m0;			\
395   out [1] = m1;			\
396   out [2] = m2;			\
397   INST_BARRIER			\
398   in += 3;			\
399   out += 3;			\
400 } while(0)
401 
402 #define MOVE_2_OBJECTS(in,out)	\
403 do {				\
404   INST_BARRIER			\
405   m0 = in [0];			\
406   m1 = in [1];			\
407   out [0] = m0;			\
408   out [1] = m1;			\
409   INST_BARRIER			\
410   in += 2;			\
411   out += 2;			\
412 } while(0)
413 
414 #define MOVE_1_OBJECT(in,out)	\
415 do {				\
416   INST_BARRIER			\
417   m0 = in [0];			\
418   out [0] = m0;			\
419   INST_BARRIER			\
420   in += 1;			\
421   out += 1;			\
422 } while(0)
423 
424 
425 static inline void
426 __inhibit_loop_to_libcall
__int_memcpy(void * __restrict s1,const void * __restrict s2,size_t n)427 __int_memcpy (void *__restrict s1, const void *__restrict s2, size_t n)
428 {
429   int value = n;
430   int loop_var;
431   const int *in = s2;
432   int *out = s1;
433   int count;
434   int m0,m1,m2,m3;
435 
436   /* This code currently give a stall for any value with a 1->2 in the low 5
437      bits, i.e.  1,2, 33,34 ? not acceptable!  */
438   switch (value & 0x1f)
439     {
440     case 0:
441       break;
442     case 1:
443       MOVE_1_OBJECT (in, out);
444       break;
445     case 2:
446       MOVE_2_OBJECTS (in, out);
447       break;
448     case 3:
449       MOVE_3_OBJECTS (in, out);
450       break;
451     case 4:
452       MOVE_4_OBJECTS (in, out);
453       break;
454     case 5:
455       MOVE_5_OBJECTS (in, out);
456       break;
457     case 6:
458       MOVE_6_OBJECTS (in, out);
459       break;
460     case 7:
461       MOVE_7_OBJECTS (in, out);
462       break;
463     case 8:
464       MOVE_8_OBJECTS (in, out);
465       break;
466     case 9:
467       MOVE_9_OBJECTS (in, out);
468       break;
469     case 10:
470       MOVE_10_OBJECTS (in, out);
471       break;
472     case 11:
473       MOVE_11_OBJECTS (in, out);
474       break;
475     case 12:
476       MOVE_12_OBJECTS (in, out);
477       break;
478     case 13:
479       MOVE_9_OBJECTS (in, out);
480       MOVE_4_OBJECTS (in, out);
481       break;
482     case 14:
483       MOVE_12_OBJECTS (in, out);
484       MOVE_2_OBJECTS (in, out);
485       break;
486     case 15:
487       MOVE_11_OBJECTS (in, out);
488       MOVE_4_OBJECTS (in, out);
489       break;
490     case 16:
491       MOVE_16_OBJECTS (in, out);
492       break;
493     case 17:
494       MOVE_11_OBJECTS (in, out);
495       MOVE_6_OBJECTS (in, out);
496       break;
497     case 18:
498       MOVE_9_OBJECTS (in, out);
499       MOVE_9_OBJECTS (in, out);
500       break;
501     case 19:
502       MOVE_16_OBJECTS (in, out);
503       MOVE_3_OBJECTS (in, out);
504       break;
505     case 20:
506       MOVE_16_OBJECTS (in, out);
507       MOVE_4_OBJECTS (in, out);
508       break;
509     case 21:
510       MOVE_16_OBJECTS (in, out);
511       MOVE_5_OBJECTS (in, out);
512       break;
513     case 22:
514       MOVE_16_OBJECTS (in, out);
515       MOVE_6_OBJECTS (in, out);
516       break;
517     case 23:
518       MOVE_16_OBJECTS (in, out);
519       MOVE_7_OBJECTS (in, out);
520       break;
521     case 24:
522       MOVE_16_OBJECTS (in, out);
523       MOVE_8_OBJECTS (in, out);
524       break;
525     case 25:
526       MOVE_16_OBJECTS (in, out);
527       MOVE_9_OBJECTS (in, out);
528       break;
529     case 26:
530       MOVE_16_OBJECTS (in, out);
531       MOVE_10_OBJECTS (in, out);
532       break;
533     case 27:
534       MOVE_16_OBJECTS (in, out);
535       MOVE_11_OBJECTS (in, out);
536       break;
537     case 28:
538       MOVE_16_OBJECTS (in, out);
539       MOVE_8_OBJECTS (in, out);
540       MOVE_4_OBJECTS (in, out);
541       break;
542     case 29:
543       MOVE_16_OBJECTS (in, out);
544       MOVE_9_OBJECTS (in, out);
545       MOVE_4_OBJECTS (in, out);
546       break;
547     case 30:
548       MOVE_16_OBJECTS (in, out);
549       MOVE_12_OBJECTS (in, out);
550       MOVE_2_OBJECTS (in, out);
551       break;
552     case 31:
553       MOVE_16_OBJECTS (in, out);
554       MOVE_11_OBJECTS (in, out);
555       MOVE_4_OBJECTS (in, out);
556       break;
557     }
558 
559   /* This loop governs the asmptoptic behaviour of this algorithm, for long
560      word copies.  */
561   count = value >> 5;
562   for (loop_var = 0; loop_var < count; loop_var++)
563     MOVE_32_OBJECTS (in, out);
564 }
565 
566 static inline void
567 __inhibit_loop_to_libcall
__shrt_int_memcpy(void * __restrict s1,const void * __restrict s2,size_t n)568 __shrt_int_memcpy (void *__restrict s1, const void *__restrict s2, size_t n)
569 {
570   int value = n;
571   int loop_var;
572   const short int *in = s2;
573   int short *out = s1;
574   int count;
575   int m0,m1,m2,m3;
576 
577  /* This code currently give a stall for any value with a 1->2 in the low 5
578     bits, i.e.  1,2, 33,34 ? not acceptable!  */
579   switch (value & 0x1f)
580     {
581     case 0:
582       break;
583     case 1:
584       MOVE_1_OBJECT (in, out);
585       break;
586     case 2:
587       MOVE_2_OBJECTS (in, out);
588       break;
589     case 3:
590       MOVE_3_OBJECTS (in, out);
591       break;
592     case 4:
593       MOVE_4_OBJECTS (in, out);
594       break;
595     case 5:
596       MOVE_5_OBJECTS (in, out);
597       break;
598     case 6:
599       MOVE_6_OBJECTS (in, out);
600       break;
601     case 7:
602       MOVE_7_OBJECTS (in, out);
603       break;
604     case 8:
605       MOVE_8_OBJECTS (in, out);
606       break;
607     case 9:
608       MOVE_9_OBJECTS (in, out);
609       break;
610     case 10:
611       MOVE_10_OBJECTS (in, out);
612       break;
613     case 11:
614       MOVE_11_OBJECTS (in, out);
615       break;
616     case 12:
617       MOVE_12_OBJECTS (in, out);
618       break;
619     case 13:
620       MOVE_9_OBJECTS (in, out);
621       MOVE_4_OBJECTS (in, out);
622       break;
623     case 14:
624       MOVE_12_OBJECTS (in, out);
625       MOVE_2_OBJECTS (in, out);
626       break;
627     case 15:
628       MOVE_11_OBJECTS (in, out);
629       MOVE_4_OBJECTS (in, out);
630       break;
631     case 16:
632       MOVE_16_OBJECTS (in, out);
633       break;
634     case 17:
635       MOVE_11_OBJECTS (in, out);
636       MOVE_6_OBJECTS (in, out);
637       break;
638     case 18:
639       MOVE_9_OBJECTS (in, out);
640       MOVE_9_OBJECTS (in, out);
641       break;
642     case 19:
643       MOVE_16_OBJECTS (in, out);
644       MOVE_3_OBJECTS (in, out);
645       break;
646     case 20:
647       MOVE_16_OBJECTS (in, out);
648       MOVE_4_OBJECTS (in, out);
649       break;
650     case 21:
651       MOVE_16_OBJECTS (in, out);
652       MOVE_5_OBJECTS (in, out);
653       break;
654     case 22:
655       MOVE_16_OBJECTS (in, out);
656       MOVE_6_OBJECTS (in, out);
657       break;
658     case 23:
659       MOVE_16_OBJECTS (in, out);
660       MOVE_7_OBJECTS (in, out);
661       break;
662     case 24:
663       MOVE_16_OBJECTS (in, out);
664       MOVE_8_OBJECTS (in, out);
665       break;
666     case 25:
667       MOVE_16_OBJECTS (in, out);
668       MOVE_9_OBJECTS (in, out);
669       break;
670     case 26:
671       MOVE_16_OBJECTS (in, out);
672       MOVE_10_OBJECTS (in, out);
673       break;
674     case 27:
675       MOVE_16_OBJECTS (in, out);
676       MOVE_11_OBJECTS (in, out);
677       break;
678     case 28:
679       MOVE_16_OBJECTS (in, out);
680       MOVE_8_OBJECTS (in, out);
681       MOVE_4_OBJECTS (in, out);
682       break;
683     case 29:
684       MOVE_16_OBJECTS (in, out);
685       MOVE_9_OBJECTS (in, out);
686       MOVE_4_OBJECTS (in, out);
687       break;
688     case 30:
689       MOVE_16_OBJECTS (in, out);
690       MOVE_12_OBJECTS (in, out);
691       MOVE_2_OBJECTS (in, out);
692       break;
693     case 31:
694       MOVE_16_OBJECTS (in, out);
695       MOVE_11_OBJECTS (in, out);
696       MOVE_4_OBJECTS (in, out);
697       break;
698     }
699 
700   /* This loop governs the asmptoptic behaviour of this algorithm, for long
701      word copies.  */
702   count = value >> 5;
703   for (loop_var = 0; loop_var < count; loop_var++)
704     MOVE_32_OBJECTS (in, out);
705 }
706 
707 
708 static inline void
709 __inhibit_loop_to_libcall
__byte_memcpy(void * __restrict s1,const void * __restrict s2,size_t n)710 __byte_memcpy (void *__restrict s1, const void *__restrict s2, size_t n)
711 {
712   int value = n;
713   int loop_var;
714   const char *in = s2;
715   char *out = s1;
716   int count;
717   int m0,m1,m2,m3;
718 
719  /* This code currently give a stall for any value with a 1->2 in the low 5
720     bits, i.e.  1,2, 33,34 ? not acceptable!  */
721   switch (value & 0x1f)
722     {
723     case 0:
724       break;
725     case 1:
726       MOVE_1_OBJECT (in, out);
727       break;
728     case 2:
729       MOVE_2_OBJECTS (in, out);
730       break;
731     case 3:
732       MOVE_3_OBJECTS (in, out);
733       break;
734     case 4:
735       MOVE_4_OBJECTS (in, out);
736       break;
737     case 5:
738       MOVE_5_OBJECTS (in, out);
739       break;
740     case 6:
741       MOVE_6_OBJECTS (in, out);
742       break;
743     case 7:
744       MOVE_7_OBJECTS (in, out);
745       break;
746     case 8:
747       MOVE_8_OBJECTS (in, out);
748       break;
749     case 9:
750       MOVE_9_OBJECTS (in, out);
751       break;
752     case 10:
753       MOVE_10_OBJECTS (in, out);
754       break;
755     case 11:
756       MOVE_11_OBJECTS (in, out);
757       break;
758     case 12:
759       MOVE_12_OBJECTS (in, out);
760       break;
761     case 13:
762       MOVE_9_OBJECTS (in, out);
763       MOVE_4_OBJECTS (in, out);
764       break;
765     case 14:
766       MOVE_12_OBJECTS (in, out);
767       MOVE_2_OBJECTS (in, out);
768       break;
769     case 15:
770       MOVE_11_OBJECTS (in, out);
771       MOVE_4_OBJECTS (in, out);
772       break;
773     case 16:
774       MOVE_16_OBJECTS (in, out);
775       break;
776     case 17:
777       MOVE_11_OBJECTS (in, out);
778       MOVE_6_OBJECTS (in, out);
779       break;
780     case 18:
781       MOVE_9_OBJECTS (in, out);
782       MOVE_9_OBJECTS (in, out);
783       break;
784     case 19:
785       MOVE_16_OBJECTS (in, out);
786       MOVE_3_OBJECTS (in, out);
787       break;
788     case 20:
789       MOVE_16_OBJECTS (in, out);
790       MOVE_4_OBJECTS (in, out);
791       break;
792     case 21:
793       MOVE_16_OBJECTS (in, out);
794       MOVE_5_OBJECTS (in, out);
795       break;
796     case 22:
797       MOVE_16_OBJECTS (in, out);
798       MOVE_6_OBJECTS (in, out);
799       break;
800     case 23:
801       MOVE_16_OBJECTS (in, out);
802       MOVE_7_OBJECTS (in, out);
803       break;
804     case 24:
805       MOVE_16_OBJECTS (in, out);
806       MOVE_8_OBJECTS (in, out);
807       break;
808     case 25:
809       MOVE_16_OBJECTS (in, out);
810       MOVE_9_OBJECTS (in, out);
811       break;
812     case 26:
813       MOVE_16_OBJECTS (in, out);
814       MOVE_10_OBJECTS (in, out);
815       break;
816     case 27:
817       MOVE_16_OBJECTS (in, out);
818       MOVE_11_OBJECTS (in, out);
819       break;
820     case 28:
821       MOVE_16_OBJECTS (in, out);
822       MOVE_8_OBJECTS (in, out);
823       MOVE_4_OBJECTS (in, out);
824       break;
825     case 29:
826       MOVE_16_OBJECTS (in, out);
827       MOVE_9_OBJECTS (in, out);
828       MOVE_4_OBJECTS (in, out);
829       break;
830     case 30:
831       MOVE_16_OBJECTS (in, out);
832       MOVE_12_OBJECTS (in, out);
833       MOVE_2_OBJECTS (in, out);
834       break;
835     case 31:
836       MOVE_16_OBJECTS (in, out);
837       MOVE_11_OBJECTS (in, out);
838       MOVE_4_OBJECTS (in, out);
839       break;
840     }
841 
842   /* This loop governs the asmptoptic behaviour of this algorithm, for long
843      word copies.  */
844   count = value >> 5;
845   for (loop_var = 0; loop_var < count; loop_var++)
846     MOVE_32_OBJECTS (in, out);
847 }
848 
849 
850 /* Exposed interface.  */
851 
852 void *
853 __inhibit_loop_to_libcall
memcpy(void * __restrict s1,const void * __restrict s2,size_t n)854 memcpy (void *__restrict s1, const void *__restrict s2, size_t n)
855 {
856   void *result = s1;
857 
858   /* None of the following handles copying zero bytes.  */
859   if (n != 0)
860     {
861       unsigned test = (unsigned) s2 | (unsigned) s1 | (unsigned) n;
862 
863       if (test & 1)
864 	__byte_memcpy (s1, s2, n);
865       else if (test & 2)
866 	__shrt_int_memcpy (s1, s2, n >> 1);
867       else
868 #ifdef __VISIUM_ARCH_BMI__
869 	__asm__ __volatile__ ("bmd     %0,%1,%2"
870 			      : "+t" (s1), "+u" (s2), "+v" (n)
871 			      :
872 			      : "r4", "r5", "r6", "memory");
873 #else
874 	__int_memcpy (s1, s2, n >> 2);
875 #endif /* __VISIUM_ARCH_BMI__ */
876     }
877 
878   return result;
879 }
880