1 /* memset for the Visium processor.
2
3 Copyright (c) 2015 Rolls-Royce Controls and Data Services Limited.
4 All rights reserved.
5
6 Redistribution and use in source and binary forms, with or without
7 modification, are permitted provided that the following conditions are met:
8
9 * Redistributions of source code must retain the above copyright notice,
10 this list of conditions and the following disclaimer.
11 * Redistributions in binary form must reproduce the above copyright
12 notice, this list of conditions and the following disclaimer in the
13 documentation and/or other materials provided with the distribution.
14 * Neither the name of Rolls-Royce Controls and Data Services Limited nor
15 the names of its contributors may be used to endorse or promote products
16 derived from this software without specific prior written permission.
17
18 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
19 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
20 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
21 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
22 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
23 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
24 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
25 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
26 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
27 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
28 THE POSSIBILITY OF SUCH DAMAGE. */
29
30 /* This file must be kept in sync with libgcc/config/visium/memset.c */
31
32 #include <picolibc.h>
33
34 #include <stddef.h>
35 #include "memset.h"
36 #include "../../string/local.h"
37
38 #define SET_32_OBJECTS(out) \
39 do { \
40 out [0] = m0; \
41 out [1] = m0; \
42 out [2] = m0; \
43 out [3] = m0; \
44 out [4] = m0; \
45 out [5] = m0; \
46 out [6] = m0; \
47 out [7] = m0; \
48 out [8] = m0; \
49 out [9] = m0; \
50 out [10] = m0; \
51 out [11] = m0; \
52 out [12] = m0; \
53 out [13] = m0; \
54 out [14] = m0; \
55 out [15] = m0; \
56 out [16] = m0; \
57 out [17] = m0; \
58 out [18] = m0; \
59 out [19] = m0; \
60 out [20] = m0; \
61 out [21] = m0; \
62 out [22] = m0; \
63 out [23] = m0; \
64 out [24] = m0; \
65 out [25] = m0; \
66 out [26] = m0; \
67 out [27] = m0; \
68 out [28] = m0; \
69 out [29] = m0; \
70 out [30] = m0; \
71 out [31] = m0; \
72 out += 32; \
73 } while(0)
74
75 #define SET_16_OBJECTS(out) \
76 do { \
77 out [0] = m0; \
78 out [1] = m0; \
79 out [2] = m0; \
80 out [3] = m0; \
81 out [4] = m0; \
82 out [5] = m0; \
83 out [6] = m0; \
84 out [7] = m0; \
85 out [8] = m0; \
86 out [9] = m0; \
87 out [10] = m0; \
88 out [11] = m0; \
89 out [12] = m0; \
90 out [13] = m0; \
91 out [14] = m0; \
92 out [15] = m0; \
93 out += 16; \
94 } while(0)
95
96 #define SET_12_OBJECTS(out) \
97 do { \
98 out [0] = m0; \
99 out [1] = m0; \
100 out [2] = m0; \
101 out [3] = m0; \
102 out [4] = m0; \
103 out [5] = m0; \
104 out [6] = m0; \
105 out [7] = m0; \
106 out [8] = m0; \
107 out [9] = m0; \
108 out [10] = m0; \
109 out [11] = m0; \
110 out += 12; \
111 } while(0)
112
113 #define SET_11_OBJECTS(out) \
114 do { \
115 out [0] = m0; \
116 out [1] = m0; \
117 out [2] = m0; \
118 out [3] = m0; \
119 out [4] = m0; \
120 out [5] = m0; \
121 out [6] = m0; \
122 out [7] = m0; \
123 out [8] = m0; \
124 out [9] = m0; \
125 out [10] = m0; \
126 out += 11; \
127 } while(0)
128
129 #define SET_10_OBJECTS(out) \
130 do { \
131 out [0] = m0; \
132 out [1] = m0; \
133 out [2] = m0; \
134 out [3] = m0; \
135 out [4] = m0; \
136 out [5] = m0; \
137 out [6] = m0; \
138 out [7] = m0; \
139 out [8] = m0; \
140 out [9] = m0; \
141 out += 10; \
142 } while(0)
143
144 #define SET_9_OBJECTS(out) \
145 do { \
146 out [0] = m0; \
147 out [1] = m0; \
148 out [2] = m0; \
149 out [3] = m0; \
150 out [4] = m0; \
151 out [5] = m0; \
152 out [6] = m0; \
153 out [7] = m0; \
154 out [8] = m0; \
155 out += 9; \
156 } while(0)
157
158 #define SET_8_OBJECTS(out) \
159 do { \
160 out [0] = m0; \
161 out [1] = m0; \
162 out [2] = m0; \
163 out [3] = m0; \
164 out [4] = m0; \
165 out [5] = m0; \
166 out [6] = m0; \
167 out [7] = m0; \
168 out += 8; \
169 } while(0)
170
171 #define SET_7_OBJECTS(out) \
172 do { \
173 out [0] = m0; \
174 out [1] = m0; \
175 out [2] = m0; \
176 out [3] = m0; \
177 out [4] = m0; \
178 out [5] = m0; \
179 out [6] = m0; \
180 out += 7; \
181 } while(0)
182
183 #define SET_6_OBJECTS(out) \
184 do { \
185 out [0] = m0; \
186 out [1] = m0; \
187 out [2] = m0; \
188 out [3] = m0; \
189 out [4] = m0; \
190 out [5] = m0; \
191 out += 6; \
192 } while(0)
193
194 #define SET_5_OBJECTS(out) \
195 do { \
196 out [0] = m0; \
197 out [1] = m0; \
198 out [2] = m0; \
199 out [3] = m0; \
200 out [4] = m0; \
201 out += 5; \
202 } while(0)
203
204 #define SET_4_OBJECTS(out) \
205 do { \
206 out [0] = m0; \
207 out [1] = m0; \
208 out [2] = m0; \
209 out [3] = m0; \
210 out += 4; \
211 } while(0)
212
213 #define SET_3_OBJECTS(out) \
214 do { \
215 out [0] = m0; \
216 out [1] = m0; \
217 out [2] = m0; \
218 out += 3; \
219 } while(0)
220
221 #define SET_2_OBJECTS(out) \
222 do { \
223 out [0] = m0; \
224 out [1] = m0; \
225 out += 2; \
226 } while(0)
227
228 #define SET_1_OBJECT(out) \
229 do { \
230 out [0] = m0; \
231 out += 1; \
232 } while(0)
233
234 static inline void
235 __inhibit_loop_to_libcall
__int_memset(void * __restrict s1,int val,size_t n)236 __int_memset (void *__restrict s1, int val, size_t n)
237 {
238 int value = n;
239 int loop_var;
240 int *out = s1;
241 int count;
242 int m0 = val;
243
244 /* This code currently give a stall for any value with a 1->2 in the low 5
245 bits, i.e. 1,2, 33,34 ? not acceptable! */
246 switch (value & 0x1f)
247 {
248 case 0:
249 break;
250 case 1:
251 SET_1_OBJECT (out);
252 break;
253 case 2:
254 SET_2_OBJECTS (out);
255 break;
256 case 3:
257 SET_3_OBJECTS (out);
258 break;
259 case 4:
260 SET_4_OBJECTS (out);
261 break;
262 case 5:
263 SET_5_OBJECTS (out);
264 break;
265 case 6:
266 SET_6_OBJECTS (out);
267 break;
268 case 7:
269 SET_7_OBJECTS (out);
270 break;
271 case 8:
272 SET_8_OBJECTS (out);
273 break;
274 case 9:
275 SET_9_OBJECTS (out);
276 break;
277 case 10:
278 SET_10_OBJECTS (out);
279 break;
280 case 11:
281 SET_11_OBJECTS (out);
282 break;
283 case 12:
284 SET_12_OBJECTS (out);
285 break;
286 case 13:
287 SET_9_OBJECTS (out);
288 SET_4_OBJECTS (out);
289 break;
290 case 14:
291 SET_12_OBJECTS (out);
292 SET_2_OBJECTS (out);
293 break;
294 case 15:
295 SET_11_OBJECTS (out);
296 SET_4_OBJECTS (out);
297 break;
298 case 16:
299 SET_16_OBJECTS (out);
300 break;
301 case 17:
302 SET_11_OBJECTS (out);
303 SET_6_OBJECTS (out);
304 break;
305 case 18:
306 SET_9_OBJECTS (out);
307 SET_9_OBJECTS (out);
308 break;
309 case 19:
310 SET_16_OBJECTS (out);
311 SET_3_OBJECTS (out);
312 break;
313 case 20:
314 SET_16_OBJECTS (out);
315 SET_4_OBJECTS (out);
316 break;
317 case 21:
318 SET_16_OBJECTS (out);
319 SET_5_OBJECTS (out);
320 break;
321 case 22:
322 SET_16_OBJECTS (out);
323 SET_6_OBJECTS (out);
324 break;
325 case 23:
326 SET_16_OBJECTS (out);
327 SET_7_OBJECTS (out);
328 break;
329 case 24:
330 SET_16_OBJECTS (out);
331 SET_8_OBJECTS (out);
332 break;
333 case 25:
334 SET_16_OBJECTS (out);
335 SET_9_OBJECTS (out);
336 break;
337 case 26:
338 SET_16_OBJECTS (out);
339 SET_10_OBJECTS (out);
340 break;
341 case 27:
342 SET_16_OBJECTS (out);
343 SET_11_OBJECTS (out);
344 break;
345 case 28:
346 SET_16_OBJECTS (out);
347 SET_8_OBJECTS (out);
348 SET_4_OBJECTS (out);
349 break;
350 case 29:
351 SET_16_OBJECTS (out);
352 SET_9_OBJECTS (out);
353 SET_4_OBJECTS (out);
354 break;
355 case 30:
356 SET_16_OBJECTS (out);
357 SET_12_OBJECTS (out);
358 SET_2_OBJECTS (out);
359 break;
360 case 31:
361 SET_16_OBJECTS (out);
362 SET_11_OBJECTS (out);
363 SET_4_OBJECTS (out);
364 break;
365 }
366
367 /* This loop governs the asmptoptic behaviour of this algorithm, for long
368 word copies. */
369 count = value >> 5;
370 for (loop_var = 0; loop_var < count; loop_var++)
371 SET_32_OBJECTS (out);
372 }
373
374 static inline void
375 __inhibit_loop_to_libcall
__short_int_memset(void * __restrict s1,int val,size_t n)376 __short_int_memset (void *__restrict s1, int val, size_t n)
377 {
378 int value = n;
379 int loop_var;
380 int short *out = s1;
381 int count;
382 int m0 = val;
383
384 /* This code currently give a stall for any value with a 1->2 in the low 5
385 bits, i.e. 1,2, 33,34 ? not acceptable! */
386 switch (value & 0x1f)
387 {
388 case 0:
389 break;
390 case 1:
391 SET_1_OBJECT (out);
392 break;
393 case 2:
394 SET_2_OBJECTS (out);
395 break;
396 case 3:
397 SET_3_OBJECTS (out);
398 break;
399 case 4:
400 SET_4_OBJECTS (out);
401 break;
402 case 5:
403 SET_5_OBJECTS (out);
404 break;
405 case 6:
406 SET_6_OBJECTS (out);
407 break;
408 case 7:
409 SET_7_OBJECTS (out);
410 break;
411 case 8:
412 SET_8_OBJECTS (out);
413 break;
414 case 9:
415 SET_9_OBJECTS (out);
416 break;
417 case 10:
418 SET_10_OBJECTS (out);
419 break;
420 case 11:
421 SET_11_OBJECTS (out);
422 break;
423 case 12:
424 SET_12_OBJECTS (out);
425 break;
426 case 13:
427 SET_9_OBJECTS (out);
428 SET_4_OBJECTS (out);
429 break;
430 case 14:
431 SET_12_OBJECTS (out);
432 SET_2_OBJECTS (out);
433 break;
434 case 15:
435 SET_11_OBJECTS (out);
436 SET_4_OBJECTS (out);
437 break;
438 case 16:
439 SET_16_OBJECTS (out);
440 break;
441 case 17:
442 SET_11_OBJECTS (out);
443 SET_6_OBJECTS (out);
444 break;
445 case 18:
446 SET_9_OBJECTS (out);
447 SET_9_OBJECTS (out);
448 break;
449 case 19:
450 SET_16_OBJECTS (out);
451 SET_3_OBJECTS (out);
452 break;
453 case 20:
454 SET_16_OBJECTS (out);
455 SET_4_OBJECTS (out);
456 break;
457 case 21:
458 SET_16_OBJECTS (out);
459 SET_5_OBJECTS (out);
460 break;
461 case 22:
462 SET_16_OBJECTS (out);
463 SET_6_OBJECTS (out);
464 break;
465 case 23:
466 SET_16_OBJECTS (out);
467 SET_7_OBJECTS (out);
468 break;
469 case 24:
470 SET_16_OBJECTS (out);
471 SET_8_OBJECTS (out);
472 break;
473 case 25:
474 SET_16_OBJECTS (out);
475 SET_9_OBJECTS (out);
476 break;
477 case 26:
478 SET_16_OBJECTS (out);
479 SET_10_OBJECTS (out);
480 break;
481 case 27:
482 SET_16_OBJECTS (out);
483 SET_11_OBJECTS (out);
484 break;
485 case 28:
486 SET_16_OBJECTS (out);
487 SET_8_OBJECTS (out);
488 SET_4_OBJECTS (out);
489 break;
490 case 29:
491 SET_16_OBJECTS (out);
492 SET_9_OBJECTS (out);
493 SET_4_OBJECTS (out);
494 break;
495 case 30:
496 SET_16_OBJECTS (out);
497 SET_12_OBJECTS (out);
498 SET_2_OBJECTS (out);
499 break;
500 case 31:
501 SET_16_OBJECTS (out);
502 SET_11_OBJECTS (out);
503 SET_4_OBJECTS (out);
504 break;
505 }
506
507 /* This loop governs the asmptoptic behaviour of this algorithm, for long
508 word copies. */
509 count = value >> 5;
510 for (loop_var = 0; loop_var < count; loop_var++)
511 SET_32_OBJECTS (out);
512 }
513
514 static inline void
515 __inhibit_loop_to_libcall
__byte_memset(void * __restrict s1,int val,size_t n)516 __byte_memset (void *__restrict s1, int val, size_t n)
517 {
518 int value = n;
519 int loop_var;
520 char *out = s1;
521 int count;
522 int m0 = val;
523
524 /* This code currently give a stall for any value with a 1->2 in the low 5
525 bits, i.e. 1,2, 33,34 ? not acceptable! */
526 switch (value & 0x1f)
527 {
528 case 0:
529 break;
530 case 1:
531 SET_1_OBJECT (out);
532 break;
533 case 2:
534 SET_2_OBJECTS (out);
535 break;
536 case 3:
537 SET_3_OBJECTS (out);
538 break;
539 case 4:
540 SET_4_OBJECTS (out);
541 break;
542 case 5:
543 SET_5_OBJECTS (out);
544 break;
545 case 6:
546 SET_6_OBJECTS (out);
547 break;
548 case 7:
549 SET_7_OBJECTS (out);
550 break;
551 case 8:
552 SET_8_OBJECTS (out);
553 break;
554 case 9:
555 SET_9_OBJECTS (out);
556 break;
557 case 10:
558 SET_10_OBJECTS (out);
559 break;
560 case 11:
561 SET_11_OBJECTS (out);
562 break;
563 case 12:
564 SET_12_OBJECTS (out);
565 break;
566 case 13:
567 SET_9_OBJECTS (out);
568 SET_4_OBJECTS (out);
569 break;
570 case 14:
571 SET_12_OBJECTS (out);
572 SET_2_OBJECTS (out);
573 break;
574 case 15:
575 SET_11_OBJECTS (out);
576 SET_4_OBJECTS (out);
577 break;
578 case 16:
579 SET_16_OBJECTS (out);
580 break;
581 case 17:
582 SET_11_OBJECTS (out);
583 SET_6_OBJECTS (out);
584 break;
585 case 18:
586 SET_9_OBJECTS (out);
587 SET_9_OBJECTS (out);
588 break;
589 case 19:
590 SET_16_OBJECTS (out);
591 SET_3_OBJECTS (out);
592 break;
593 case 20:
594 SET_16_OBJECTS (out);
595 SET_4_OBJECTS (out);
596 break;
597 case 21:
598 SET_16_OBJECTS (out);
599 SET_5_OBJECTS (out);
600 break;
601 case 22:
602 SET_16_OBJECTS (out);
603 SET_6_OBJECTS (out);
604 break;
605 case 23:
606 SET_16_OBJECTS (out);
607 SET_7_OBJECTS (out);
608 break;
609 case 24:
610 SET_16_OBJECTS (out);
611 SET_8_OBJECTS (out);
612 break;
613 case 25:
614 SET_16_OBJECTS (out);
615 SET_9_OBJECTS (out);
616 break;
617 case 26:
618 SET_16_OBJECTS (out);
619 SET_10_OBJECTS (out);
620 break;
621 case 27:
622 SET_16_OBJECTS (out);
623 SET_11_OBJECTS (out);
624 break;
625 case 28:
626 SET_16_OBJECTS (out);
627 SET_8_OBJECTS (out);
628 SET_4_OBJECTS (out);
629 break;
630 case 29:
631 SET_16_OBJECTS (out);
632 SET_9_OBJECTS (out);
633 SET_4_OBJECTS (out);
634 break;
635 case 30:
636 SET_16_OBJECTS (out);
637 SET_12_OBJECTS (out);
638 SET_2_OBJECTS (out);
639 break;
640 case 31:
641 SET_16_OBJECTS (out);
642 SET_11_OBJECTS (out);
643 SET_4_OBJECTS (out);
644 break;
645 }
646
647 /* This loop governs the asmptoptic behaviour of this algorithm, for long
648 word copies. */
649 count = value >> 5;
650 for (loop_var = 0; loop_var < count; loop_var++)
651 SET_32_OBJECTS (out);
652 }
653
654
655 /* Exposed interface. */
656
657 void *
658 __inhibit_loop_to_libcall
memset(void * s,int c,size_t n)659 memset (void *s, int c, size_t n)
660 {
661 void *result = s;
662
663 /* None of the following handles setting zero bytes. */
664 if (n != 0)
665 {
666 unsigned test = (unsigned) s | (unsigned) n;
667
668 if (test & 1)
669 __byte_memset (s, c, n);
670 else if (test & 2)
671 {
672 short int sc = (short int) ((c << 8) + (char) c);
673 __short_int_memset (s, sc, n >> 1);
674 }
675 else
676 {
677 int ic = (c << 24) + ((char) c << 16) + ((char) c << 8) + (char) c;
678 __int_memset (s, ic, n >> 2);
679 }
680 }
681
682 return result;
683 }
684