1 /* memcpy for the Visium processor.
2
3 Copyright (c) 2015 Rolls-Royce Controls and Data Services Limited.
4 All rights reserved.
5
6 Redistribution and use in source and binary forms, with or without
7 modification, are permitted provided that the following conditions are met:
8
9 * Redistributions of source code must retain the above copyright notice,
10 this list of conditions and the following disclaimer.
11 * Redistributions in binary form must reproduce the above copyright
12 notice, this list of conditions and the following disclaimer in the
13 documentation and/or other materials provided with the distribution.
14 * Neither the name of Rolls-Royce Controls and Data Services Limited nor
15 the names of its contributors may be used to endorse or promote products
16 derived from this software without specific prior written permission.
17
18 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
19 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
20 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
21 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
22 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
23 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
24 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
25 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
26 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
27 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
28 THE POSSIBILITY OF SUCH DAMAGE. */
29
30 /* This file must be kept in sync with libgcc/config/visium/memcpy.c */
31
32 #include <stddef.h>
33 #include "memcpy.h"
34 #include "../../string/local.h"
35
36 #define INST_BARRIER __asm__ __volatile__ ("":::"memory");
37
38 #define MOVE_32_OBJECTS(in,out) \
39 do { \
40 INST_BARRIER \
41 m0 = in [0]; \
42 m1 = in [1]; \
43 m2 = in [2]; \
44 m3 = in [3]; \
45 out [0] = m0; \
46 out [1] = m1; \
47 out [2] = m2; \
48 out [3] = m3; \
49 INST_BARRIER \
50 m0 = in [4]; \
51 m1 = in [5]; \
52 m2 = in [6]; \
53 m3 = in [7]; \
54 out [4] = m0; \
55 out [5] = m1; \
56 out [6] = m2; \
57 out [7] = m3; \
58 INST_BARRIER \
59 m0 = in [8]; \
60 m1 = in [9]; \
61 m2 = in [10]; \
62 m3 = in [11]; \
63 out [8] = m0; \
64 out [9] = m1; \
65 out [10] = m2; \
66 out [11] = m3; \
67 INST_BARRIER \
68 m0 = in [12]; \
69 m1 = in [13]; \
70 m2 = in [14]; \
71 m3 = in [15]; \
72 out [12] = m0; \
73 out [13] = m1; \
74 out [14] = m2; \
75 out [15] = m3; \
76 INST_BARRIER \
77 m0 = in [16]; \
78 m1 = in [17]; \
79 m2 = in [18]; \
80 m3 = in [19]; \
81 out [16] = m0; \
82 out [17] = m1; \
83 out [18] = m2; \
84 out [19] = m3; \
85 INST_BARRIER \
86 m0 = in [20]; \
87 m1 = in [21]; \
88 m2 = in [22]; \
89 m3 = in [23]; \
90 out [20] = m0; \
91 out [21] = m1; \
92 out [22] = m2; \
93 out [23] = m3; \
94 INST_BARRIER \
95 m0 = in [24]; \
96 m1 = in [25]; \
97 m2 = in [26]; \
98 m3 = in [27]; \
99 out [24] = m0; \
100 out [25] = m1; \
101 out [26] = m2; \
102 out [27] = m3; \
103 INST_BARRIER \
104 m0 = in [28]; \
105 m1 = in [29]; \
106 m2 = in [30]; \
107 m3 = in [31]; \
108 out [28] = m0; \
109 out [29] = m1; \
110 out [30] = m2; \
111 out [31] = m3; \
112 INST_BARRIER \
113 in += 32; \
114 out += 32; \
115 } while(0)
116
117 #define MOVE_16_OBJECTS(in,out) \
118 do { \
119 INST_BARRIER \
120 m0 = in [0]; \
121 m1 = in [1]; \
122 m2 = in [2]; \
123 m3 = in [3]; \
124 out [0] = m0; \
125 out [1] = m1; \
126 out [2] = m2; \
127 out [3] = m3; \
128 INST_BARRIER \
129 m0 = in [4]; \
130 m1 = in [5]; \
131 m2 = in [6]; \
132 m3 = in [7]; \
133 out [4] = m0; \
134 out [5] = m1; \
135 out [6] = m2; \
136 out [7] = m3; \
137 INST_BARRIER \
138 m0 = in [8]; \
139 m1 = in [9]; \
140 m2 = in [10]; \
141 m3 = in [11]; \
142 out [8] = m0; \
143 out [9] = m1; \
144 out [10] = m2; \
145 out [11] = m3; \
146 INST_BARRIER \
147 m0 = in [12]; \
148 m1 = in [13]; \
149 m2 = in [14]; \
150 m3 = in [15]; \
151 out [12] = m0; \
152 out [13] = m1; \
153 out [14] = m2; \
154 out [15] = m3; \
155 INST_BARRIER \
156 in += 16; \
157 out += 16; \
158 } while(0)
159
160 #define MOVE_12_OBJECTS(in,out) \
161 do { \
162 INST_BARRIER \
163 m0 = in [0]; \
164 m1 = in [1]; \
165 m2 = in [2]; \
166 m3 = in [3]; \
167 out [0] = m0; \
168 out [1] = m1; \
169 out [2] = m2; \
170 out [3] = m3; \
171 INST_BARRIER \
172 m0 = in [4]; \
173 m1 = in [5]; \
174 m2 = in [6]; \
175 m3 = in [7]; \
176 out [4] = m0; \
177 out [5] = m1; \
178 out [6] = m2; \
179 out [7] = m3; \
180 INST_BARRIER \
181 m0 = in [8]; \
182 m1 = in [9]; \
183 m2 = in [10]; \
184 m3 = in [11]; \
185 out [8] = m0; \
186 out [9] = m1; \
187 out [10] = m2; \
188 out [11] = m3; \
189 INST_BARRIER \
190 in += 12; \
191 out += 12; \
192 } while(0)
193
194 #define MOVE_11_OBJECTS(in,out) \
195 do { \
196 INST_BARRIER \
197 m0 = in [0]; \
198 m1 = in [1]; \
199 m2 = in [2]; \
200 m3 = in [3]; \
201 out [0] = m0; \
202 out [1] = m1; \
203 out [2] = m2; \
204 out [3] = m3; \
205 INST_BARRIER \
206 m0 = in [4]; \
207 m1 = in [5]; \
208 m2 = in [6]; \
209 m3 = in [7]; \
210 out [4] = m0; \
211 out [5] = m1; \
212 out [6] = m2; \
213 out [7] = m3; \
214 INST_BARRIER \
215 m0 = in [8]; \
216 m1 = in [9]; \
217 m2 = in [10]; \
218 out [8] = m0; \
219 out [9] = m1; \
220 out [10] = m2; \
221 INST_BARRIER \
222 in += 11; \
223 out += 11; \
224 } while(0)
225
226 #define MOVE_10_OBJECTS(in,out) \
227 do { \
228 INST_BARRIER \
229 m0 = in [0]; \
230 m1 = in [1]; \
231 m2 = in [2]; \
232 m3 = in [3]; \
233 out [0] = m0; \
234 out [1] = m1; \
235 out [2] = m2; \
236 out [3] = m3; \
237 INST_BARRIER \
238 m0 = in [4]; \
239 m1 = in [5]; \
240 m2 = in [6]; \
241 m3 = in [7]; \
242 out [4] = m0; \
243 m0 = in [8]; \
244 out [5] = m1; \
245 m1 = in [9]; \
246 out [6] = m2; \
247 out [7] = m3; \
248 out [8] = m0; \
249 out [9] = m1; \
250 INST_BARRIER \
251 in += 10; \
252 out += 10; \
253 } while(0)
254
255 #define MOVE_9_OBJECTS(in,out) \
256 do { \
257 INST_BARRIER \
258 m0 = in [0]; \
259 m1 = in [1]; \
260 m2 = in [2]; \
261 m3 = in [3]; \
262 out [0] = m0; \
263 out [1] = m1; \
264 out [2] = m2; \
265 out [3] = m3; \
266 INST_BARRIER \
267 m0 = in [4]; \
268 m1 = in [5]; \
269 m2 = in [6]; \
270 m3 = in [7]; \
271 out [4] = m0; \
272 out [5] = m1; \
273 out [6] = m2; \
274 out [7] = m3; \
275 INST_BARRIER \
276 m0 = in [8]; \
277 out [8] = m0; \
278 in += 9; \
279 out += 9; \
280 } while(0)
281
282 #define MOVE_8_OBJECTS(in,out) \
283 do { \
284 INST_BARRIER \
285 m0 = in [0]; \
286 m1 = in [1]; \
287 m2 = in [2]; \
288 m3 = in [3]; \
289 out [0] = m0; \
290 out [1] = m1; \
291 out [2] = m2; \
292 out [3] = m3; \
293 INST_BARRIER \
294 m0 = in [4]; \
295 m1 = in [5]; \
296 m2 = in [6]; \
297 m3 = in [7]; \
298 out [4] = m0; \
299 out [5] = m1; \
300 out [6] = m2; \
301 out [7] = m3; \
302 INST_BARRIER \
303 in += 8; \
304 out += 8; \
305 } while(0)
306
307 #define MOVE_7_OBJECTS(in,out) \
308 do { \
309 INST_BARRIER \
310 m0 = in [0]; \
311 m1 = in [1]; \
312 m2 = in [2]; \
313 m3 = in [3]; \
314 out [0] = m0; \
315 out [1] = m1; \
316 out [2] = m2; \
317 out [3] = m3; \
318 INST_BARRIER \
319 m0 = in [4]; \
320 m1 = in [5]; \
321 m2 = in [6]; \
322 out [4] = m0; \
323 out [5] = m1; \
324 out [6] = m2; \
325 INST_BARRIER \
326 in += 7; \
327 out += 7; \
328 } while(0)
329
330 #define MOVE_6_OBJECTS(in,out) \
331 do { \
332 INST_BARRIER \
333 m0 = in [0]; \
334 m1 = in [1]; \
335 m2 = in [2]; \
336 m3 = in [3]; \
337 out [0] = m0; \
338 INST_BARRIER \
339 m0 = in [4]; \
340 out [1] = m1; \
341 INST_BARRIER \
342 m1 = in [5]; \
343 out [2] = m2; \
344 out [3] = m3; \
345 out [4] = m0; \
346 out [5] = m1; \
347 INST_BARRIER \
348 in += 6; \
349 out += 6; \
350 } while(0)
351
352 #define MOVE_5_OBJECTS(in,out) \
353 do { \
354 INST_BARRIER \
355 m0 = in [0]; \
356 m1 = in [1]; \
357 m2 = in [2]; \
358 m3 = in [3]; \
359 INST_BARRIER \
360 out [0] = m0; \
361 m0 = in [4]; \
362 INST_BARRIER \
363 out [1] = m1; \
364 out [2] = m2; \
365 out [3] = m3; \
366 out [4] = m0; \
367 INST_BARRIER \
368 in += 5; \
369 out += 5; \
370 } while(0)
371
372 #define MOVE_4_OBJECTS(in,out) \
373 do { \
374 INST_BARRIER \
375 m0 = in [0]; \
376 m1 = in [1]; \
377 m2 = in [2]; \
378 m3 = in [3]; \
379 out [0] = m0; \
380 out [1] = m1; \
381 out [2] = m2; \
382 out [3] = m3; \
383 INST_BARRIER \
384 in += 4; \
385 out += 4; \
386 } while(0)
387
388 #define MOVE_3_OBJECTS(in,out) \
389 do { \
390 INST_BARRIER \
391 m0 = in [0]; \
392 m1 = in [1]; \
393 m2 = in [2]; \
394 out [0] = m0; \
395 out [1] = m1; \
396 out [2] = m2; \
397 INST_BARRIER \
398 in += 3; \
399 out += 3; \
400 } while(0)
401
402 #define MOVE_2_OBJECTS(in,out) \
403 do { \
404 INST_BARRIER \
405 m0 = in [0]; \
406 m1 = in [1]; \
407 out [0] = m0; \
408 out [1] = m1; \
409 INST_BARRIER \
410 in += 2; \
411 out += 2; \
412 } while(0)
413
414 #define MOVE_1_OBJECT(in,out) \
415 do { \
416 INST_BARRIER \
417 m0 = in [0]; \
418 out [0] = m0; \
419 INST_BARRIER \
420 in += 1; \
421 out += 1; \
422 } while(0)
423
424
425 static inline void
426 __inhibit_loop_to_libcall
__int_memcpy(void * __restrict s1,const void * __restrict s2,size_t n)427 __int_memcpy (void *__restrict s1, const void *__restrict s2, size_t n)
428 {
429 int value = n;
430 int loop_var;
431 const int *in = s2;
432 int *out = s1;
433 int count;
434 int m0,m1,m2,m3;
435
436 /* This code currently give a stall for any value with a 1->2 in the low 5
437 bits, i.e. 1,2, 33,34 ? not acceptable! */
438 switch (value & 0x1f)
439 {
440 case 0:
441 break;
442 case 1:
443 MOVE_1_OBJECT (in, out);
444 break;
445 case 2:
446 MOVE_2_OBJECTS (in, out);
447 break;
448 case 3:
449 MOVE_3_OBJECTS (in, out);
450 break;
451 case 4:
452 MOVE_4_OBJECTS (in, out);
453 break;
454 case 5:
455 MOVE_5_OBJECTS (in, out);
456 break;
457 case 6:
458 MOVE_6_OBJECTS (in, out);
459 break;
460 case 7:
461 MOVE_7_OBJECTS (in, out);
462 break;
463 case 8:
464 MOVE_8_OBJECTS (in, out);
465 break;
466 case 9:
467 MOVE_9_OBJECTS (in, out);
468 break;
469 case 10:
470 MOVE_10_OBJECTS (in, out);
471 break;
472 case 11:
473 MOVE_11_OBJECTS (in, out);
474 break;
475 case 12:
476 MOVE_12_OBJECTS (in, out);
477 break;
478 case 13:
479 MOVE_9_OBJECTS (in, out);
480 MOVE_4_OBJECTS (in, out);
481 break;
482 case 14:
483 MOVE_12_OBJECTS (in, out);
484 MOVE_2_OBJECTS (in, out);
485 break;
486 case 15:
487 MOVE_11_OBJECTS (in, out);
488 MOVE_4_OBJECTS (in, out);
489 break;
490 case 16:
491 MOVE_16_OBJECTS (in, out);
492 break;
493 case 17:
494 MOVE_11_OBJECTS (in, out);
495 MOVE_6_OBJECTS (in, out);
496 break;
497 case 18:
498 MOVE_9_OBJECTS (in, out);
499 MOVE_9_OBJECTS (in, out);
500 break;
501 case 19:
502 MOVE_16_OBJECTS (in, out);
503 MOVE_3_OBJECTS (in, out);
504 break;
505 case 20:
506 MOVE_16_OBJECTS (in, out);
507 MOVE_4_OBJECTS (in, out);
508 break;
509 case 21:
510 MOVE_16_OBJECTS (in, out);
511 MOVE_5_OBJECTS (in, out);
512 break;
513 case 22:
514 MOVE_16_OBJECTS (in, out);
515 MOVE_6_OBJECTS (in, out);
516 break;
517 case 23:
518 MOVE_16_OBJECTS (in, out);
519 MOVE_7_OBJECTS (in, out);
520 break;
521 case 24:
522 MOVE_16_OBJECTS (in, out);
523 MOVE_8_OBJECTS (in, out);
524 break;
525 case 25:
526 MOVE_16_OBJECTS (in, out);
527 MOVE_9_OBJECTS (in, out);
528 break;
529 case 26:
530 MOVE_16_OBJECTS (in, out);
531 MOVE_10_OBJECTS (in, out);
532 break;
533 case 27:
534 MOVE_16_OBJECTS (in, out);
535 MOVE_11_OBJECTS (in, out);
536 break;
537 case 28:
538 MOVE_16_OBJECTS (in, out);
539 MOVE_8_OBJECTS (in, out);
540 MOVE_4_OBJECTS (in, out);
541 break;
542 case 29:
543 MOVE_16_OBJECTS (in, out);
544 MOVE_9_OBJECTS (in, out);
545 MOVE_4_OBJECTS (in, out);
546 break;
547 case 30:
548 MOVE_16_OBJECTS (in, out);
549 MOVE_12_OBJECTS (in, out);
550 MOVE_2_OBJECTS (in, out);
551 break;
552 case 31:
553 MOVE_16_OBJECTS (in, out);
554 MOVE_11_OBJECTS (in, out);
555 MOVE_4_OBJECTS (in, out);
556 break;
557 }
558
559 /* This loop governs the asmptoptic behaviour of this algorithm, for long
560 word copies. */
561 count = value >> 5;
562 for (loop_var = 0; loop_var < count; loop_var++)
563 MOVE_32_OBJECTS (in, out);
564 }
565
566 static inline void
567 __inhibit_loop_to_libcall
__shrt_int_memcpy(void * __restrict s1,const void * __restrict s2,size_t n)568 __shrt_int_memcpy (void *__restrict s1, const void *__restrict s2, size_t n)
569 {
570 int value = n;
571 int loop_var;
572 const short int *in = s2;
573 int short *out = s1;
574 int count;
575 int m0,m1,m2,m3;
576
577 /* This code currently give a stall for any value with a 1->2 in the low 5
578 bits, i.e. 1,2, 33,34 ? not acceptable! */
579 switch (value & 0x1f)
580 {
581 case 0:
582 break;
583 case 1:
584 MOVE_1_OBJECT (in, out);
585 break;
586 case 2:
587 MOVE_2_OBJECTS (in, out);
588 break;
589 case 3:
590 MOVE_3_OBJECTS (in, out);
591 break;
592 case 4:
593 MOVE_4_OBJECTS (in, out);
594 break;
595 case 5:
596 MOVE_5_OBJECTS (in, out);
597 break;
598 case 6:
599 MOVE_6_OBJECTS (in, out);
600 break;
601 case 7:
602 MOVE_7_OBJECTS (in, out);
603 break;
604 case 8:
605 MOVE_8_OBJECTS (in, out);
606 break;
607 case 9:
608 MOVE_9_OBJECTS (in, out);
609 break;
610 case 10:
611 MOVE_10_OBJECTS (in, out);
612 break;
613 case 11:
614 MOVE_11_OBJECTS (in, out);
615 break;
616 case 12:
617 MOVE_12_OBJECTS (in, out);
618 break;
619 case 13:
620 MOVE_9_OBJECTS (in, out);
621 MOVE_4_OBJECTS (in, out);
622 break;
623 case 14:
624 MOVE_12_OBJECTS (in, out);
625 MOVE_2_OBJECTS (in, out);
626 break;
627 case 15:
628 MOVE_11_OBJECTS (in, out);
629 MOVE_4_OBJECTS (in, out);
630 break;
631 case 16:
632 MOVE_16_OBJECTS (in, out);
633 break;
634 case 17:
635 MOVE_11_OBJECTS (in, out);
636 MOVE_6_OBJECTS (in, out);
637 break;
638 case 18:
639 MOVE_9_OBJECTS (in, out);
640 MOVE_9_OBJECTS (in, out);
641 break;
642 case 19:
643 MOVE_16_OBJECTS (in, out);
644 MOVE_3_OBJECTS (in, out);
645 break;
646 case 20:
647 MOVE_16_OBJECTS (in, out);
648 MOVE_4_OBJECTS (in, out);
649 break;
650 case 21:
651 MOVE_16_OBJECTS (in, out);
652 MOVE_5_OBJECTS (in, out);
653 break;
654 case 22:
655 MOVE_16_OBJECTS (in, out);
656 MOVE_6_OBJECTS (in, out);
657 break;
658 case 23:
659 MOVE_16_OBJECTS (in, out);
660 MOVE_7_OBJECTS (in, out);
661 break;
662 case 24:
663 MOVE_16_OBJECTS (in, out);
664 MOVE_8_OBJECTS (in, out);
665 break;
666 case 25:
667 MOVE_16_OBJECTS (in, out);
668 MOVE_9_OBJECTS (in, out);
669 break;
670 case 26:
671 MOVE_16_OBJECTS (in, out);
672 MOVE_10_OBJECTS (in, out);
673 break;
674 case 27:
675 MOVE_16_OBJECTS (in, out);
676 MOVE_11_OBJECTS (in, out);
677 break;
678 case 28:
679 MOVE_16_OBJECTS (in, out);
680 MOVE_8_OBJECTS (in, out);
681 MOVE_4_OBJECTS (in, out);
682 break;
683 case 29:
684 MOVE_16_OBJECTS (in, out);
685 MOVE_9_OBJECTS (in, out);
686 MOVE_4_OBJECTS (in, out);
687 break;
688 case 30:
689 MOVE_16_OBJECTS (in, out);
690 MOVE_12_OBJECTS (in, out);
691 MOVE_2_OBJECTS (in, out);
692 break;
693 case 31:
694 MOVE_16_OBJECTS (in, out);
695 MOVE_11_OBJECTS (in, out);
696 MOVE_4_OBJECTS (in, out);
697 break;
698 }
699
700 /* This loop governs the asmptoptic behaviour of this algorithm, for long
701 word copies. */
702 count = value >> 5;
703 for (loop_var = 0; loop_var < count; loop_var++)
704 MOVE_32_OBJECTS (in, out);
705 }
706
707
708 static inline void
709 __inhibit_loop_to_libcall
__byte_memcpy(void * __restrict s1,const void * __restrict s2,size_t n)710 __byte_memcpy (void *__restrict s1, const void *__restrict s2, size_t n)
711 {
712 int value = n;
713 int loop_var;
714 const char *in = s2;
715 char *out = s1;
716 int count;
717 int m0,m1,m2,m3;
718
719 /* This code currently give a stall for any value with a 1->2 in the low 5
720 bits, i.e. 1,2, 33,34 ? not acceptable! */
721 switch (value & 0x1f)
722 {
723 case 0:
724 break;
725 case 1:
726 MOVE_1_OBJECT (in, out);
727 break;
728 case 2:
729 MOVE_2_OBJECTS (in, out);
730 break;
731 case 3:
732 MOVE_3_OBJECTS (in, out);
733 break;
734 case 4:
735 MOVE_4_OBJECTS (in, out);
736 break;
737 case 5:
738 MOVE_5_OBJECTS (in, out);
739 break;
740 case 6:
741 MOVE_6_OBJECTS (in, out);
742 break;
743 case 7:
744 MOVE_7_OBJECTS (in, out);
745 break;
746 case 8:
747 MOVE_8_OBJECTS (in, out);
748 break;
749 case 9:
750 MOVE_9_OBJECTS (in, out);
751 break;
752 case 10:
753 MOVE_10_OBJECTS (in, out);
754 break;
755 case 11:
756 MOVE_11_OBJECTS (in, out);
757 break;
758 case 12:
759 MOVE_12_OBJECTS (in, out);
760 break;
761 case 13:
762 MOVE_9_OBJECTS (in, out);
763 MOVE_4_OBJECTS (in, out);
764 break;
765 case 14:
766 MOVE_12_OBJECTS (in, out);
767 MOVE_2_OBJECTS (in, out);
768 break;
769 case 15:
770 MOVE_11_OBJECTS (in, out);
771 MOVE_4_OBJECTS (in, out);
772 break;
773 case 16:
774 MOVE_16_OBJECTS (in, out);
775 break;
776 case 17:
777 MOVE_11_OBJECTS (in, out);
778 MOVE_6_OBJECTS (in, out);
779 break;
780 case 18:
781 MOVE_9_OBJECTS (in, out);
782 MOVE_9_OBJECTS (in, out);
783 break;
784 case 19:
785 MOVE_16_OBJECTS (in, out);
786 MOVE_3_OBJECTS (in, out);
787 break;
788 case 20:
789 MOVE_16_OBJECTS (in, out);
790 MOVE_4_OBJECTS (in, out);
791 break;
792 case 21:
793 MOVE_16_OBJECTS (in, out);
794 MOVE_5_OBJECTS (in, out);
795 break;
796 case 22:
797 MOVE_16_OBJECTS (in, out);
798 MOVE_6_OBJECTS (in, out);
799 break;
800 case 23:
801 MOVE_16_OBJECTS (in, out);
802 MOVE_7_OBJECTS (in, out);
803 break;
804 case 24:
805 MOVE_16_OBJECTS (in, out);
806 MOVE_8_OBJECTS (in, out);
807 break;
808 case 25:
809 MOVE_16_OBJECTS (in, out);
810 MOVE_9_OBJECTS (in, out);
811 break;
812 case 26:
813 MOVE_16_OBJECTS (in, out);
814 MOVE_10_OBJECTS (in, out);
815 break;
816 case 27:
817 MOVE_16_OBJECTS (in, out);
818 MOVE_11_OBJECTS (in, out);
819 break;
820 case 28:
821 MOVE_16_OBJECTS (in, out);
822 MOVE_8_OBJECTS (in, out);
823 MOVE_4_OBJECTS (in, out);
824 break;
825 case 29:
826 MOVE_16_OBJECTS (in, out);
827 MOVE_9_OBJECTS (in, out);
828 MOVE_4_OBJECTS (in, out);
829 break;
830 case 30:
831 MOVE_16_OBJECTS (in, out);
832 MOVE_12_OBJECTS (in, out);
833 MOVE_2_OBJECTS (in, out);
834 break;
835 case 31:
836 MOVE_16_OBJECTS (in, out);
837 MOVE_11_OBJECTS (in, out);
838 MOVE_4_OBJECTS (in, out);
839 break;
840 }
841
842 /* This loop governs the asmptoptic behaviour of this algorithm, for long
843 word copies. */
844 count = value >> 5;
845 for (loop_var = 0; loop_var < count; loop_var++)
846 MOVE_32_OBJECTS (in, out);
847 }
848
849
850 /* Exposed interface. */
851
852 void *
853 __inhibit_loop_to_libcall
memcpy(void * __restrict s1,const void * __restrict s2,size_t n)854 memcpy (void *__restrict s1, const void *__restrict s2, size_t n)
855 {
856 void *result = s1;
857
858 /* None of the following handles copying zero bytes. */
859 if (n != 0)
860 {
861 unsigned test = (unsigned) s2 | (unsigned) s1 | (unsigned) n;
862
863 if (test & 1)
864 __byte_memcpy (s1, s2, n);
865 else if (test & 2)
866 __shrt_int_memcpy (s1, s2, n >> 1);
867 else
868 #ifdef __VISIUM_ARCH_BMI__
869 __asm__ __volatile__ ("bmd %0,%1,%2"
870 : "+t" (s1), "+u" (s2), "+v" (n)
871 :
872 : "r4", "r5", "r6", "memory");
873 #else
874 __int_memcpy (s1, s2, n >> 2);
875 #endif /* __VISIUM_ARCH_BMI__ */
876 }
877
878 return result;
879 }
880