1 /* memcpy for the Visium processor.
2
3 Copyright (c) 2015 Rolls-Royce Controls and Data Services Limited.
4 All rights reserved.
5
6 Redistribution and use in source and binary forms, with or without
7 modification, are permitted provided that the following conditions are met:
8
9 * Redistributions of source code must retain the above copyright notice,
10 this list of conditions and the following disclaimer.
11 * Redistributions in binary form must reproduce the above copyright
12 notice, this list of conditions and the following disclaimer in the
13 documentation and/or other materials provided with the distribution.
14 * Neither the name of Rolls-Royce Controls and Data Services Limited nor
15 the names of its contributors may be used to endorse or promote products
16 derived from this software without specific prior written permission.
17
18 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
19 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
20 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
21 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
22 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
23 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
24 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
25 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
26 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
27 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
28 THE POSSIBILITY OF SUCH DAMAGE. */
29
30 /* This file must be kept in sync with libgcc/config/visium/memcpy.c */
31
32 #include <picolibc.h>
33
34 #include <stddef.h>
35 #include "memcpy.h"
36 #include "../../string/local.h"
37
38 #define INST_BARRIER __asm__ __volatile__ ("":::"memory");
39
40 #define MOVE_32_OBJECTS(in,out) \
41 do { \
42 INST_BARRIER \
43 m0 = in [0]; \
44 m1 = in [1]; \
45 m2 = in [2]; \
46 m3 = in [3]; \
47 out [0] = m0; \
48 out [1] = m1; \
49 out [2] = m2; \
50 out [3] = m3; \
51 INST_BARRIER \
52 m0 = in [4]; \
53 m1 = in [5]; \
54 m2 = in [6]; \
55 m3 = in [7]; \
56 out [4] = m0; \
57 out [5] = m1; \
58 out [6] = m2; \
59 out [7] = m3; \
60 INST_BARRIER \
61 m0 = in [8]; \
62 m1 = in [9]; \
63 m2 = in [10]; \
64 m3 = in [11]; \
65 out [8] = m0; \
66 out [9] = m1; \
67 out [10] = m2; \
68 out [11] = m3; \
69 INST_BARRIER \
70 m0 = in [12]; \
71 m1 = in [13]; \
72 m2 = in [14]; \
73 m3 = in [15]; \
74 out [12] = m0; \
75 out [13] = m1; \
76 out [14] = m2; \
77 out [15] = m3; \
78 INST_BARRIER \
79 m0 = in [16]; \
80 m1 = in [17]; \
81 m2 = in [18]; \
82 m3 = in [19]; \
83 out [16] = m0; \
84 out [17] = m1; \
85 out [18] = m2; \
86 out [19] = m3; \
87 INST_BARRIER \
88 m0 = in [20]; \
89 m1 = in [21]; \
90 m2 = in [22]; \
91 m3 = in [23]; \
92 out [20] = m0; \
93 out [21] = m1; \
94 out [22] = m2; \
95 out [23] = m3; \
96 INST_BARRIER \
97 m0 = in [24]; \
98 m1 = in [25]; \
99 m2 = in [26]; \
100 m3 = in [27]; \
101 out [24] = m0; \
102 out [25] = m1; \
103 out [26] = m2; \
104 out [27] = m3; \
105 INST_BARRIER \
106 m0 = in [28]; \
107 m1 = in [29]; \
108 m2 = in [30]; \
109 m3 = in [31]; \
110 out [28] = m0; \
111 out [29] = m1; \
112 out [30] = m2; \
113 out [31] = m3; \
114 INST_BARRIER \
115 in += 32; \
116 out += 32; \
117 } while(0)
118
119 #define MOVE_16_OBJECTS(in,out) \
120 do { \
121 INST_BARRIER \
122 m0 = in [0]; \
123 m1 = in [1]; \
124 m2 = in [2]; \
125 m3 = in [3]; \
126 out [0] = m0; \
127 out [1] = m1; \
128 out [2] = m2; \
129 out [3] = m3; \
130 INST_BARRIER \
131 m0 = in [4]; \
132 m1 = in [5]; \
133 m2 = in [6]; \
134 m3 = in [7]; \
135 out [4] = m0; \
136 out [5] = m1; \
137 out [6] = m2; \
138 out [7] = m3; \
139 INST_BARRIER \
140 m0 = in [8]; \
141 m1 = in [9]; \
142 m2 = in [10]; \
143 m3 = in [11]; \
144 out [8] = m0; \
145 out [9] = m1; \
146 out [10] = m2; \
147 out [11] = m3; \
148 INST_BARRIER \
149 m0 = in [12]; \
150 m1 = in [13]; \
151 m2 = in [14]; \
152 m3 = in [15]; \
153 out [12] = m0; \
154 out [13] = m1; \
155 out [14] = m2; \
156 out [15] = m3; \
157 INST_BARRIER \
158 in += 16; \
159 out += 16; \
160 } while(0)
161
162 #define MOVE_12_OBJECTS(in,out) \
163 do { \
164 INST_BARRIER \
165 m0 = in [0]; \
166 m1 = in [1]; \
167 m2 = in [2]; \
168 m3 = in [3]; \
169 out [0] = m0; \
170 out [1] = m1; \
171 out [2] = m2; \
172 out [3] = m3; \
173 INST_BARRIER \
174 m0 = in [4]; \
175 m1 = in [5]; \
176 m2 = in [6]; \
177 m3 = in [7]; \
178 out [4] = m0; \
179 out [5] = m1; \
180 out [6] = m2; \
181 out [7] = m3; \
182 INST_BARRIER \
183 m0 = in [8]; \
184 m1 = in [9]; \
185 m2 = in [10]; \
186 m3 = in [11]; \
187 out [8] = m0; \
188 out [9] = m1; \
189 out [10] = m2; \
190 out [11] = m3; \
191 INST_BARRIER \
192 in += 12; \
193 out += 12; \
194 } while(0)
195
196 #define MOVE_11_OBJECTS(in,out) \
197 do { \
198 INST_BARRIER \
199 m0 = in [0]; \
200 m1 = in [1]; \
201 m2 = in [2]; \
202 m3 = in [3]; \
203 out [0] = m0; \
204 out [1] = m1; \
205 out [2] = m2; \
206 out [3] = m3; \
207 INST_BARRIER \
208 m0 = in [4]; \
209 m1 = in [5]; \
210 m2 = in [6]; \
211 m3 = in [7]; \
212 out [4] = m0; \
213 out [5] = m1; \
214 out [6] = m2; \
215 out [7] = m3; \
216 INST_BARRIER \
217 m0 = in [8]; \
218 m1 = in [9]; \
219 m2 = in [10]; \
220 out [8] = m0; \
221 out [9] = m1; \
222 out [10] = m2; \
223 INST_BARRIER \
224 in += 11; \
225 out += 11; \
226 } while(0)
227
228 #define MOVE_10_OBJECTS(in,out) \
229 do { \
230 INST_BARRIER \
231 m0 = in [0]; \
232 m1 = in [1]; \
233 m2 = in [2]; \
234 m3 = in [3]; \
235 out [0] = m0; \
236 out [1] = m1; \
237 out [2] = m2; \
238 out [3] = m3; \
239 INST_BARRIER \
240 m0 = in [4]; \
241 m1 = in [5]; \
242 m2 = in [6]; \
243 m3 = in [7]; \
244 out [4] = m0; \
245 m0 = in [8]; \
246 out [5] = m1; \
247 m1 = in [9]; \
248 out [6] = m2; \
249 out [7] = m3; \
250 out [8] = m0; \
251 out [9] = m1; \
252 INST_BARRIER \
253 in += 10; \
254 out += 10; \
255 } while(0)
256
257 #define MOVE_9_OBJECTS(in,out) \
258 do { \
259 INST_BARRIER \
260 m0 = in [0]; \
261 m1 = in [1]; \
262 m2 = in [2]; \
263 m3 = in [3]; \
264 out [0] = m0; \
265 out [1] = m1; \
266 out [2] = m2; \
267 out [3] = m3; \
268 INST_BARRIER \
269 m0 = in [4]; \
270 m1 = in [5]; \
271 m2 = in [6]; \
272 m3 = in [7]; \
273 out [4] = m0; \
274 out [5] = m1; \
275 out [6] = m2; \
276 out [7] = m3; \
277 INST_BARRIER \
278 m0 = in [8]; \
279 out [8] = m0; \
280 in += 9; \
281 out += 9; \
282 } while(0)
283
284 #define MOVE_8_OBJECTS(in,out) \
285 do { \
286 INST_BARRIER \
287 m0 = in [0]; \
288 m1 = in [1]; \
289 m2 = in [2]; \
290 m3 = in [3]; \
291 out [0] = m0; \
292 out [1] = m1; \
293 out [2] = m2; \
294 out [3] = m3; \
295 INST_BARRIER \
296 m0 = in [4]; \
297 m1 = in [5]; \
298 m2 = in [6]; \
299 m3 = in [7]; \
300 out [4] = m0; \
301 out [5] = m1; \
302 out [6] = m2; \
303 out [7] = m3; \
304 INST_BARRIER \
305 in += 8; \
306 out += 8; \
307 } while(0)
308
309 #define MOVE_7_OBJECTS(in,out) \
310 do { \
311 INST_BARRIER \
312 m0 = in [0]; \
313 m1 = in [1]; \
314 m2 = in [2]; \
315 m3 = in [3]; \
316 out [0] = m0; \
317 out [1] = m1; \
318 out [2] = m2; \
319 out [3] = m3; \
320 INST_BARRIER \
321 m0 = in [4]; \
322 m1 = in [5]; \
323 m2 = in [6]; \
324 out [4] = m0; \
325 out [5] = m1; \
326 out [6] = m2; \
327 INST_BARRIER \
328 in += 7; \
329 out += 7; \
330 } while(0)
331
332 #define MOVE_6_OBJECTS(in,out) \
333 do { \
334 INST_BARRIER \
335 m0 = in [0]; \
336 m1 = in [1]; \
337 m2 = in [2]; \
338 m3 = in [3]; \
339 out [0] = m0; \
340 INST_BARRIER \
341 m0 = in [4]; \
342 out [1] = m1; \
343 INST_BARRIER \
344 m1 = in [5]; \
345 out [2] = m2; \
346 out [3] = m3; \
347 out [4] = m0; \
348 out [5] = m1; \
349 INST_BARRIER \
350 in += 6; \
351 out += 6; \
352 } while(0)
353
354 #define MOVE_5_OBJECTS(in,out) \
355 do { \
356 INST_BARRIER \
357 m0 = in [0]; \
358 m1 = in [1]; \
359 m2 = in [2]; \
360 m3 = in [3]; \
361 INST_BARRIER \
362 out [0] = m0; \
363 m0 = in [4]; \
364 INST_BARRIER \
365 out [1] = m1; \
366 out [2] = m2; \
367 out [3] = m3; \
368 out [4] = m0; \
369 INST_BARRIER \
370 in += 5; \
371 out += 5; \
372 } while(0)
373
374 #define MOVE_4_OBJECTS(in,out) \
375 do { \
376 INST_BARRIER \
377 m0 = in [0]; \
378 m1 = in [1]; \
379 m2 = in [2]; \
380 m3 = in [3]; \
381 out [0] = m0; \
382 out [1] = m1; \
383 out [2] = m2; \
384 out [3] = m3; \
385 INST_BARRIER \
386 in += 4; \
387 out += 4; \
388 } while(0)
389
390 #define MOVE_3_OBJECTS(in,out) \
391 do { \
392 INST_BARRIER \
393 m0 = in [0]; \
394 m1 = in [1]; \
395 m2 = in [2]; \
396 out [0] = m0; \
397 out [1] = m1; \
398 out [2] = m2; \
399 INST_BARRIER \
400 in += 3; \
401 out += 3; \
402 } while(0)
403
404 #define MOVE_2_OBJECTS(in,out) \
405 do { \
406 INST_BARRIER \
407 m0 = in [0]; \
408 m1 = in [1]; \
409 out [0] = m0; \
410 out [1] = m1; \
411 INST_BARRIER \
412 in += 2; \
413 out += 2; \
414 } while(0)
415
416 #define MOVE_1_OBJECT(in,out) \
417 do { \
418 INST_BARRIER \
419 m0 = in [0]; \
420 out [0] = m0; \
421 INST_BARRIER \
422 in += 1; \
423 out += 1; \
424 } while(0)
425
426
427 static inline void
428 __inhibit_loop_to_libcall
__int_memcpy(void * __restrict s1,const void * __restrict s2,size_t n)429 __int_memcpy (void *__restrict s1, const void *__restrict s2, size_t n)
430 {
431 int value = n;
432 int loop_var;
433 const int *in = s2;
434 int *out = s1;
435 int count;
436 int m0,m1,m2,m3;
437
438 /* This code currently give a stall for any value with a 1->2 in the low 5
439 bits, i.e. 1,2, 33,34 ? not acceptable! */
440 switch (value & 0x1f)
441 {
442 case 0:
443 break;
444 case 1:
445 MOVE_1_OBJECT (in, out);
446 break;
447 case 2:
448 MOVE_2_OBJECTS (in, out);
449 break;
450 case 3:
451 MOVE_3_OBJECTS (in, out);
452 break;
453 case 4:
454 MOVE_4_OBJECTS (in, out);
455 break;
456 case 5:
457 MOVE_5_OBJECTS (in, out);
458 break;
459 case 6:
460 MOVE_6_OBJECTS (in, out);
461 break;
462 case 7:
463 MOVE_7_OBJECTS (in, out);
464 break;
465 case 8:
466 MOVE_8_OBJECTS (in, out);
467 break;
468 case 9:
469 MOVE_9_OBJECTS (in, out);
470 break;
471 case 10:
472 MOVE_10_OBJECTS (in, out);
473 break;
474 case 11:
475 MOVE_11_OBJECTS (in, out);
476 break;
477 case 12:
478 MOVE_12_OBJECTS (in, out);
479 break;
480 case 13:
481 MOVE_9_OBJECTS (in, out);
482 MOVE_4_OBJECTS (in, out);
483 break;
484 case 14:
485 MOVE_12_OBJECTS (in, out);
486 MOVE_2_OBJECTS (in, out);
487 break;
488 case 15:
489 MOVE_11_OBJECTS (in, out);
490 MOVE_4_OBJECTS (in, out);
491 break;
492 case 16:
493 MOVE_16_OBJECTS (in, out);
494 break;
495 case 17:
496 MOVE_11_OBJECTS (in, out);
497 MOVE_6_OBJECTS (in, out);
498 break;
499 case 18:
500 MOVE_9_OBJECTS (in, out);
501 MOVE_9_OBJECTS (in, out);
502 break;
503 case 19:
504 MOVE_16_OBJECTS (in, out);
505 MOVE_3_OBJECTS (in, out);
506 break;
507 case 20:
508 MOVE_16_OBJECTS (in, out);
509 MOVE_4_OBJECTS (in, out);
510 break;
511 case 21:
512 MOVE_16_OBJECTS (in, out);
513 MOVE_5_OBJECTS (in, out);
514 break;
515 case 22:
516 MOVE_16_OBJECTS (in, out);
517 MOVE_6_OBJECTS (in, out);
518 break;
519 case 23:
520 MOVE_16_OBJECTS (in, out);
521 MOVE_7_OBJECTS (in, out);
522 break;
523 case 24:
524 MOVE_16_OBJECTS (in, out);
525 MOVE_8_OBJECTS (in, out);
526 break;
527 case 25:
528 MOVE_16_OBJECTS (in, out);
529 MOVE_9_OBJECTS (in, out);
530 break;
531 case 26:
532 MOVE_16_OBJECTS (in, out);
533 MOVE_10_OBJECTS (in, out);
534 break;
535 case 27:
536 MOVE_16_OBJECTS (in, out);
537 MOVE_11_OBJECTS (in, out);
538 break;
539 case 28:
540 MOVE_16_OBJECTS (in, out);
541 MOVE_8_OBJECTS (in, out);
542 MOVE_4_OBJECTS (in, out);
543 break;
544 case 29:
545 MOVE_16_OBJECTS (in, out);
546 MOVE_9_OBJECTS (in, out);
547 MOVE_4_OBJECTS (in, out);
548 break;
549 case 30:
550 MOVE_16_OBJECTS (in, out);
551 MOVE_12_OBJECTS (in, out);
552 MOVE_2_OBJECTS (in, out);
553 break;
554 case 31:
555 MOVE_16_OBJECTS (in, out);
556 MOVE_11_OBJECTS (in, out);
557 MOVE_4_OBJECTS (in, out);
558 break;
559 }
560
561 /* This loop governs the asmptoptic behaviour of this algorithm, for long
562 word copies. */
563 count = value >> 5;
564 for (loop_var = 0; loop_var < count; loop_var++)
565 MOVE_32_OBJECTS (in, out);
566 }
567
568 static inline void
569 __inhibit_loop_to_libcall
__shrt_int_memcpy(void * __restrict s1,const void * __restrict s2,size_t n)570 __shrt_int_memcpy (void *__restrict s1, const void *__restrict s2, size_t n)
571 {
572 int value = n;
573 int loop_var;
574 const short int *in = s2;
575 int short *out = s1;
576 int count;
577 int m0,m1,m2,m3;
578
579 /* This code currently give a stall for any value with a 1->2 in the low 5
580 bits, i.e. 1,2, 33,34 ? not acceptable! */
581 switch (value & 0x1f)
582 {
583 case 0:
584 break;
585 case 1:
586 MOVE_1_OBJECT (in, out);
587 break;
588 case 2:
589 MOVE_2_OBJECTS (in, out);
590 break;
591 case 3:
592 MOVE_3_OBJECTS (in, out);
593 break;
594 case 4:
595 MOVE_4_OBJECTS (in, out);
596 break;
597 case 5:
598 MOVE_5_OBJECTS (in, out);
599 break;
600 case 6:
601 MOVE_6_OBJECTS (in, out);
602 break;
603 case 7:
604 MOVE_7_OBJECTS (in, out);
605 break;
606 case 8:
607 MOVE_8_OBJECTS (in, out);
608 break;
609 case 9:
610 MOVE_9_OBJECTS (in, out);
611 break;
612 case 10:
613 MOVE_10_OBJECTS (in, out);
614 break;
615 case 11:
616 MOVE_11_OBJECTS (in, out);
617 break;
618 case 12:
619 MOVE_12_OBJECTS (in, out);
620 break;
621 case 13:
622 MOVE_9_OBJECTS (in, out);
623 MOVE_4_OBJECTS (in, out);
624 break;
625 case 14:
626 MOVE_12_OBJECTS (in, out);
627 MOVE_2_OBJECTS (in, out);
628 break;
629 case 15:
630 MOVE_11_OBJECTS (in, out);
631 MOVE_4_OBJECTS (in, out);
632 break;
633 case 16:
634 MOVE_16_OBJECTS (in, out);
635 break;
636 case 17:
637 MOVE_11_OBJECTS (in, out);
638 MOVE_6_OBJECTS (in, out);
639 break;
640 case 18:
641 MOVE_9_OBJECTS (in, out);
642 MOVE_9_OBJECTS (in, out);
643 break;
644 case 19:
645 MOVE_16_OBJECTS (in, out);
646 MOVE_3_OBJECTS (in, out);
647 break;
648 case 20:
649 MOVE_16_OBJECTS (in, out);
650 MOVE_4_OBJECTS (in, out);
651 break;
652 case 21:
653 MOVE_16_OBJECTS (in, out);
654 MOVE_5_OBJECTS (in, out);
655 break;
656 case 22:
657 MOVE_16_OBJECTS (in, out);
658 MOVE_6_OBJECTS (in, out);
659 break;
660 case 23:
661 MOVE_16_OBJECTS (in, out);
662 MOVE_7_OBJECTS (in, out);
663 break;
664 case 24:
665 MOVE_16_OBJECTS (in, out);
666 MOVE_8_OBJECTS (in, out);
667 break;
668 case 25:
669 MOVE_16_OBJECTS (in, out);
670 MOVE_9_OBJECTS (in, out);
671 break;
672 case 26:
673 MOVE_16_OBJECTS (in, out);
674 MOVE_10_OBJECTS (in, out);
675 break;
676 case 27:
677 MOVE_16_OBJECTS (in, out);
678 MOVE_11_OBJECTS (in, out);
679 break;
680 case 28:
681 MOVE_16_OBJECTS (in, out);
682 MOVE_8_OBJECTS (in, out);
683 MOVE_4_OBJECTS (in, out);
684 break;
685 case 29:
686 MOVE_16_OBJECTS (in, out);
687 MOVE_9_OBJECTS (in, out);
688 MOVE_4_OBJECTS (in, out);
689 break;
690 case 30:
691 MOVE_16_OBJECTS (in, out);
692 MOVE_12_OBJECTS (in, out);
693 MOVE_2_OBJECTS (in, out);
694 break;
695 case 31:
696 MOVE_16_OBJECTS (in, out);
697 MOVE_11_OBJECTS (in, out);
698 MOVE_4_OBJECTS (in, out);
699 break;
700 }
701
702 /* This loop governs the asmptoptic behaviour of this algorithm, for long
703 word copies. */
704 count = value >> 5;
705 for (loop_var = 0; loop_var < count; loop_var++)
706 MOVE_32_OBJECTS (in, out);
707 }
708
709
710 static inline void
711 __inhibit_loop_to_libcall
__byte_memcpy(void * __restrict s1,const void * __restrict s2,size_t n)712 __byte_memcpy (void *__restrict s1, const void *__restrict s2, size_t n)
713 {
714 int value = n;
715 int loop_var;
716 const char *in = s2;
717 char *out = s1;
718 int count;
719 int m0,m1,m2,m3;
720
721 /* This code currently give a stall for any value with a 1->2 in the low 5
722 bits, i.e. 1,2, 33,34 ? not acceptable! */
723 switch (value & 0x1f)
724 {
725 case 0:
726 break;
727 case 1:
728 MOVE_1_OBJECT (in, out);
729 break;
730 case 2:
731 MOVE_2_OBJECTS (in, out);
732 break;
733 case 3:
734 MOVE_3_OBJECTS (in, out);
735 break;
736 case 4:
737 MOVE_4_OBJECTS (in, out);
738 break;
739 case 5:
740 MOVE_5_OBJECTS (in, out);
741 break;
742 case 6:
743 MOVE_6_OBJECTS (in, out);
744 break;
745 case 7:
746 MOVE_7_OBJECTS (in, out);
747 break;
748 case 8:
749 MOVE_8_OBJECTS (in, out);
750 break;
751 case 9:
752 MOVE_9_OBJECTS (in, out);
753 break;
754 case 10:
755 MOVE_10_OBJECTS (in, out);
756 break;
757 case 11:
758 MOVE_11_OBJECTS (in, out);
759 break;
760 case 12:
761 MOVE_12_OBJECTS (in, out);
762 break;
763 case 13:
764 MOVE_9_OBJECTS (in, out);
765 MOVE_4_OBJECTS (in, out);
766 break;
767 case 14:
768 MOVE_12_OBJECTS (in, out);
769 MOVE_2_OBJECTS (in, out);
770 break;
771 case 15:
772 MOVE_11_OBJECTS (in, out);
773 MOVE_4_OBJECTS (in, out);
774 break;
775 case 16:
776 MOVE_16_OBJECTS (in, out);
777 break;
778 case 17:
779 MOVE_11_OBJECTS (in, out);
780 MOVE_6_OBJECTS (in, out);
781 break;
782 case 18:
783 MOVE_9_OBJECTS (in, out);
784 MOVE_9_OBJECTS (in, out);
785 break;
786 case 19:
787 MOVE_16_OBJECTS (in, out);
788 MOVE_3_OBJECTS (in, out);
789 break;
790 case 20:
791 MOVE_16_OBJECTS (in, out);
792 MOVE_4_OBJECTS (in, out);
793 break;
794 case 21:
795 MOVE_16_OBJECTS (in, out);
796 MOVE_5_OBJECTS (in, out);
797 break;
798 case 22:
799 MOVE_16_OBJECTS (in, out);
800 MOVE_6_OBJECTS (in, out);
801 break;
802 case 23:
803 MOVE_16_OBJECTS (in, out);
804 MOVE_7_OBJECTS (in, out);
805 break;
806 case 24:
807 MOVE_16_OBJECTS (in, out);
808 MOVE_8_OBJECTS (in, out);
809 break;
810 case 25:
811 MOVE_16_OBJECTS (in, out);
812 MOVE_9_OBJECTS (in, out);
813 break;
814 case 26:
815 MOVE_16_OBJECTS (in, out);
816 MOVE_10_OBJECTS (in, out);
817 break;
818 case 27:
819 MOVE_16_OBJECTS (in, out);
820 MOVE_11_OBJECTS (in, out);
821 break;
822 case 28:
823 MOVE_16_OBJECTS (in, out);
824 MOVE_8_OBJECTS (in, out);
825 MOVE_4_OBJECTS (in, out);
826 break;
827 case 29:
828 MOVE_16_OBJECTS (in, out);
829 MOVE_9_OBJECTS (in, out);
830 MOVE_4_OBJECTS (in, out);
831 break;
832 case 30:
833 MOVE_16_OBJECTS (in, out);
834 MOVE_12_OBJECTS (in, out);
835 MOVE_2_OBJECTS (in, out);
836 break;
837 case 31:
838 MOVE_16_OBJECTS (in, out);
839 MOVE_11_OBJECTS (in, out);
840 MOVE_4_OBJECTS (in, out);
841 break;
842 }
843
844 /* This loop governs the asmptoptic behaviour of this algorithm, for long
845 word copies. */
846 count = value >> 5;
847 for (loop_var = 0; loop_var < count; loop_var++)
848 MOVE_32_OBJECTS (in, out);
849 }
850
851
852 /* Exposed interface. */
853
854 void *
855 __inhibit_loop_to_libcall
memcpy(void * __restrict s1,const void * __restrict s2,size_t n)856 memcpy (void *__restrict s1, const void *__restrict s2, size_t n)
857 {
858 void *result = s1;
859
860 /* None of the following handles copying zero bytes. */
861 if (n != 0)
862 {
863 unsigned test = (unsigned) s2 | (unsigned) s1 | (unsigned) n;
864
865 if (test & 1)
866 __byte_memcpy (s1, s2, n);
867 else if (test & 2)
868 __shrt_int_memcpy (s1, s2, n >> 1);
869 else
870 #ifdef __VISIUM_ARCH_BMI__
871 __asm__ __volatile__ ("bmd %0,%1,%2"
872 : "+t" (s1), "+u" (s2), "+v" (n)
873 :
874 : "r4", "r5", "r6", "memory");
875 #else
876 __int_memcpy (s1, s2, n >> 2);
877 #endif /* __VISIUM_ARCH_BMI__ */
878 }
879
880 return result;
881 }
882