1 /* memset for the Visium processor.
2
3 Copyright (c) 2015 Rolls-Royce Controls and Data Services Limited.
4 All rights reserved.
5
6 Redistribution and use in source and binary forms, with or without
7 modification, are permitted provided that the following conditions are met:
8
9 * Redistributions of source code must retain the above copyright notice,
10 this list of conditions and the following disclaimer.
11 * Redistributions in binary form must reproduce the above copyright
12 notice, this list of conditions and the following disclaimer in the
13 documentation and/or other materials provided with the distribution.
14 * Neither the name of Rolls-Royce Controls and Data Services Limited nor
15 the names of its contributors may be used to endorse or promote products
16 derived from this software without specific prior written permission.
17
18 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
19 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
20 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
21 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
22 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
23 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
24 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
25 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
26 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
27 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
28 THE POSSIBILITY OF SUCH DAMAGE. */
29
30 /* This file must be kept in sync with libgcc/config/visium/memset.c */
31
32 #include <stddef.h>
33 #include "memset.h"
34 #include "../../string/local.h"
35
36 #define SET_32_OBJECTS(out) \
37 do { \
38 out [0] = m0; \
39 out [1] = m0; \
40 out [2] = m0; \
41 out [3] = m0; \
42 out [4] = m0; \
43 out [5] = m0; \
44 out [6] = m0; \
45 out [7] = m0; \
46 out [8] = m0; \
47 out [9] = m0; \
48 out [10] = m0; \
49 out [11] = m0; \
50 out [12] = m0; \
51 out [13] = m0; \
52 out [14] = m0; \
53 out [15] = m0; \
54 out [16] = m0; \
55 out [17] = m0; \
56 out [18] = m0; \
57 out [19] = m0; \
58 out [20] = m0; \
59 out [21] = m0; \
60 out [22] = m0; \
61 out [23] = m0; \
62 out [24] = m0; \
63 out [25] = m0; \
64 out [26] = m0; \
65 out [27] = m0; \
66 out [28] = m0; \
67 out [29] = m0; \
68 out [30] = m0; \
69 out [31] = m0; \
70 out += 32; \
71 } while(0)
72
73 #define SET_16_OBJECTS(out) \
74 do { \
75 out [0] = m0; \
76 out [1] = m0; \
77 out [2] = m0; \
78 out [3] = m0; \
79 out [4] = m0; \
80 out [5] = m0; \
81 out [6] = m0; \
82 out [7] = m0; \
83 out [8] = m0; \
84 out [9] = m0; \
85 out [10] = m0; \
86 out [11] = m0; \
87 out [12] = m0; \
88 out [13] = m0; \
89 out [14] = m0; \
90 out [15] = m0; \
91 out += 16; \
92 } while(0)
93
94 #define SET_12_OBJECTS(out) \
95 do { \
96 out [0] = m0; \
97 out [1] = m0; \
98 out [2] = m0; \
99 out [3] = m0; \
100 out [4] = m0; \
101 out [5] = m0; \
102 out [6] = m0; \
103 out [7] = m0; \
104 out [8] = m0; \
105 out [9] = m0; \
106 out [10] = m0; \
107 out [11] = m0; \
108 out += 12; \
109 } while(0)
110
111 #define SET_11_OBJECTS(out) \
112 do { \
113 out [0] = m0; \
114 out [1] = m0; \
115 out [2] = m0; \
116 out [3] = m0; \
117 out [4] = m0; \
118 out [5] = m0; \
119 out [6] = m0; \
120 out [7] = m0; \
121 out [8] = m0; \
122 out [9] = m0; \
123 out [10] = m0; \
124 out += 11; \
125 } while(0)
126
127 #define SET_10_OBJECTS(out) \
128 do { \
129 out [0] = m0; \
130 out [1] = m0; \
131 out [2] = m0; \
132 out [3] = m0; \
133 out [4] = m0; \
134 out [5] = m0; \
135 out [6] = m0; \
136 out [7] = m0; \
137 out [8] = m0; \
138 out [9] = m0; \
139 out += 10; \
140 } while(0)
141
142 #define SET_9_OBJECTS(out) \
143 do { \
144 out [0] = m0; \
145 out [1] = m0; \
146 out [2] = m0; \
147 out [3] = m0; \
148 out [4] = m0; \
149 out [5] = m0; \
150 out [6] = m0; \
151 out [7] = m0; \
152 out [8] = m0; \
153 out += 9; \
154 } while(0)
155
156 #define SET_8_OBJECTS(out) \
157 do { \
158 out [0] = m0; \
159 out [1] = m0; \
160 out [2] = m0; \
161 out [3] = m0; \
162 out [4] = m0; \
163 out [5] = m0; \
164 out [6] = m0; \
165 out [7] = m0; \
166 out += 8; \
167 } while(0)
168
169 #define SET_7_OBJECTS(out) \
170 do { \
171 out [0] = m0; \
172 out [1] = m0; \
173 out [2] = m0; \
174 out [3] = m0; \
175 out [4] = m0; \
176 out [5] = m0; \
177 out [6] = m0; \
178 out += 7; \
179 } while(0)
180
181 #define SET_6_OBJECTS(out) \
182 do { \
183 out [0] = m0; \
184 out [1] = m0; \
185 out [2] = m0; \
186 out [3] = m0; \
187 out [4] = m0; \
188 out [5] = m0; \
189 out += 6; \
190 } while(0)
191
192 #define SET_5_OBJECTS(out) \
193 do { \
194 out [0] = m0; \
195 out [1] = m0; \
196 out [2] = m0; \
197 out [3] = m0; \
198 out [4] = m0; \
199 out += 5; \
200 } while(0)
201
202 #define SET_4_OBJECTS(out) \
203 do { \
204 out [0] = m0; \
205 out [1] = m0; \
206 out [2] = m0; \
207 out [3] = m0; \
208 out += 4; \
209 } while(0)
210
211 #define SET_3_OBJECTS(out) \
212 do { \
213 out [0] = m0; \
214 out [1] = m0; \
215 out [2] = m0; \
216 out += 3; \
217 } while(0)
218
219 #define SET_2_OBJECTS(out) \
220 do { \
221 out [0] = m0; \
222 out [1] = m0; \
223 out += 2; \
224 } while(0)
225
226 #define SET_1_OBJECT(out) \
227 do { \
228 out [0] = m0; \
229 out += 1; \
230 } while(0)
231
232 static inline void
233 __inhibit_loop_to_libcall
__int_memset(void * __restrict s1,int val,size_t n)234 __int_memset (void *__restrict s1, int val, size_t n)
235 {
236 int value = n;
237 int loop_var;
238 int *out = s1;
239 int count;
240 int m0 = val;
241
242 /* This code currently give a stall for any value with a 1->2 in the low 5
243 bits, i.e. 1,2, 33,34 ? not acceptable! */
244 switch (value & 0x1f)
245 {
246 case 0:
247 break;
248 case 1:
249 SET_1_OBJECT (out);
250 break;
251 case 2:
252 SET_2_OBJECTS (out);
253 break;
254 case 3:
255 SET_3_OBJECTS (out);
256 break;
257 case 4:
258 SET_4_OBJECTS (out);
259 break;
260 case 5:
261 SET_5_OBJECTS (out);
262 break;
263 case 6:
264 SET_6_OBJECTS (out);
265 break;
266 case 7:
267 SET_7_OBJECTS (out);
268 break;
269 case 8:
270 SET_8_OBJECTS (out);
271 break;
272 case 9:
273 SET_9_OBJECTS (out);
274 break;
275 case 10:
276 SET_10_OBJECTS (out);
277 break;
278 case 11:
279 SET_11_OBJECTS (out);
280 break;
281 case 12:
282 SET_12_OBJECTS (out);
283 break;
284 case 13:
285 SET_9_OBJECTS (out);
286 SET_4_OBJECTS (out);
287 break;
288 case 14:
289 SET_12_OBJECTS (out);
290 SET_2_OBJECTS (out);
291 break;
292 case 15:
293 SET_11_OBJECTS (out);
294 SET_4_OBJECTS (out);
295 break;
296 case 16:
297 SET_16_OBJECTS (out);
298 break;
299 case 17:
300 SET_11_OBJECTS (out);
301 SET_6_OBJECTS (out);
302 break;
303 case 18:
304 SET_9_OBJECTS (out);
305 SET_9_OBJECTS (out);
306 break;
307 case 19:
308 SET_16_OBJECTS (out);
309 SET_3_OBJECTS (out);
310 break;
311 case 20:
312 SET_16_OBJECTS (out);
313 SET_4_OBJECTS (out);
314 break;
315 case 21:
316 SET_16_OBJECTS (out);
317 SET_5_OBJECTS (out);
318 break;
319 case 22:
320 SET_16_OBJECTS (out);
321 SET_6_OBJECTS (out);
322 break;
323 case 23:
324 SET_16_OBJECTS (out);
325 SET_7_OBJECTS (out);
326 break;
327 case 24:
328 SET_16_OBJECTS (out);
329 SET_8_OBJECTS (out);
330 break;
331 case 25:
332 SET_16_OBJECTS (out);
333 SET_9_OBJECTS (out);
334 break;
335 case 26:
336 SET_16_OBJECTS (out);
337 SET_10_OBJECTS (out);
338 break;
339 case 27:
340 SET_16_OBJECTS (out);
341 SET_11_OBJECTS (out);
342 break;
343 case 28:
344 SET_16_OBJECTS (out);
345 SET_8_OBJECTS (out);
346 SET_4_OBJECTS (out);
347 break;
348 case 29:
349 SET_16_OBJECTS (out);
350 SET_9_OBJECTS (out);
351 SET_4_OBJECTS (out);
352 break;
353 case 30:
354 SET_16_OBJECTS (out);
355 SET_12_OBJECTS (out);
356 SET_2_OBJECTS (out);
357 break;
358 case 31:
359 SET_16_OBJECTS (out);
360 SET_11_OBJECTS (out);
361 SET_4_OBJECTS (out);
362 break;
363 }
364
365 /* This loop governs the asmptoptic behaviour of this algorithm, for long
366 word copies. */
367 count = value >> 5;
368 for (loop_var = 0; loop_var < count; loop_var++)
369 SET_32_OBJECTS (out);
370 }
371
372 static inline void
373 __inhibit_loop_to_libcall
__short_int_memset(void * __restrict s1,int val,size_t n)374 __short_int_memset (void *__restrict s1, int val, size_t n)
375 {
376 int value = n;
377 int loop_var;
378 int short *out = s1;
379 int count;
380 int m0 = val;
381
382 /* This code currently give a stall for any value with a 1->2 in the low 5
383 bits, i.e. 1,2, 33,34 ? not acceptable! */
384 switch (value & 0x1f)
385 {
386 case 0:
387 break;
388 case 1:
389 SET_1_OBJECT (out);
390 break;
391 case 2:
392 SET_2_OBJECTS (out);
393 break;
394 case 3:
395 SET_3_OBJECTS (out);
396 break;
397 case 4:
398 SET_4_OBJECTS (out);
399 break;
400 case 5:
401 SET_5_OBJECTS (out);
402 break;
403 case 6:
404 SET_6_OBJECTS (out);
405 break;
406 case 7:
407 SET_7_OBJECTS (out);
408 break;
409 case 8:
410 SET_8_OBJECTS (out);
411 break;
412 case 9:
413 SET_9_OBJECTS (out);
414 break;
415 case 10:
416 SET_10_OBJECTS (out);
417 break;
418 case 11:
419 SET_11_OBJECTS (out);
420 break;
421 case 12:
422 SET_12_OBJECTS (out);
423 break;
424 case 13:
425 SET_9_OBJECTS (out);
426 SET_4_OBJECTS (out);
427 break;
428 case 14:
429 SET_12_OBJECTS (out);
430 SET_2_OBJECTS (out);
431 break;
432 case 15:
433 SET_11_OBJECTS (out);
434 SET_4_OBJECTS (out);
435 break;
436 case 16:
437 SET_16_OBJECTS (out);
438 break;
439 case 17:
440 SET_11_OBJECTS (out);
441 SET_6_OBJECTS (out);
442 break;
443 case 18:
444 SET_9_OBJECTS (out);
445 SET_9_OBJECTS (out);
446 break;
447 case 19:
448 SET_16_OBJECTS (out);
449 SET_3_OBJECTS (out);
450 break;
451 case 20:
452 SET_16_OBJECTS (out);
453 SET_4_OBJECTS (out);
454 break;
455 case 21:
456 SET_16_OBJECTS (out);
457 SET_5_OBJECTS (out);
458 break;
459 case 22:
460 SET_16_OBJECTS (out);
461 SET_6_OBJECTS (out);
462 break;
463 case 23:
464 SET_16_OBJECTS (out);
465 SET_7_OBJECTS (out);
466 break;
467 case 24:
468 SET_16_OBJECTS (out);
469 SET_8_OBJECTS (out);
470 break;
471 case 25:
472 SET_16_OBJECTS (out);
473 SET_9_OBJECTS (out);
474 break;
475 case 26:
476 SET_16_OBJECTS (out);
477 SET_10_OBJECTS (out);
478 break;
479 case 27:
480 SET_16_OBJECTS (out);
481 SET_11_OBJECTS (out);
482 break;
483 case 28:
484 SET_16_OBJECTS (out);
485 SET_8_OBJECTS (out);
486 SET_4_OBJECTS (out);
487 break;
488 case 29:
489 SET_16_OBJECTS (out);
490 SET_9_OBJECTS (out);
491 SET_4_OBJECTS (out);
492 break;
493 case 30:
494 SET_16_OBJECTS (out);
495 SET_12_OBJECTS (out);
496 SET_2_OBJECTS (out);
497 break;
498 case 31:
499 SET_16_OBJECTS (out);
500 SET_11_OBJECTS (out);
501 SET_4_OBJECTS (out);
502 break;
503 }
504
505 /* This loop governs the asmptoptic behaviour of this algorithm, for long
506 word copies. */
507 count = value >> 5;
508 for (loop_var = 0; loop_var < count; loop_var++)
509 SET_32_OBJECTS (out);
510 }
511
512 static inline void
513 __inhibit_loop_to_libcall
__byte_memset(void * __restrict s1,int val,size_t n)514 __byte_memset (void *__restrict s1, int val, size_t n)
515 {
516 int value = n;
517 int loop_var;
518 char *out = s1;
519 int count;
520 int m0 = val;
521
522 /* This code currently give a stall for any value with a 1->2 in the low 5
523 bits, i.e. 1,2, 33,34 ? not acceptable! */
524 switch (value & 0x1f)
525 {
526 case 0:
527 break;
528 case 1:
529 SET_1_OBJECT (out);
530 break;
531 case 2:
532 SET_2_OBJECTS (out);
533 break;
534 case 3:
535 SET_3_OBJECTS (out);
536 break;
537 case 4:
538 SET_4_OBJECTS (out);
539 break;
540 case 5:
541 SET_5_OBJECTS (out);
542 break;
543 case 6:
544 SET_6_OBJECTS (out);
545 break;
546 case 7:
547 SET_7_OBJECTS (out);
548 break;
549 case 8:
550 SET_8_OBJECTS (out);
551 break;
552 case 9:
553 SET_9_OBJECTS (out);
554 break;
555 case 10:
556 SET_10_OBJECTS (out);
557 break;
558 case 11:
559 SET_11_OBJECTS (out);
560 break;
561 case 12:
562 SET_12_OBJECTS (out);
563 break;
564 case 13:
565 SET_9_OBJECTS (out);
566 SET_4_OBJECTS (out);
567 break;
568 case 14:
569 SET_12_OBJECTS (out);
570 SET_2_OBJECTS (out);
571 break;
572 case 15:
573 SET_11_OBJECTS (out);
574 SET_4_OBJECTS (out);
575 break;
576 case 16:
577 SET_16_OBJECTS (out);
578 break;
579 case 17:
580 SET_11_OBJECTS (out);
581 SET_6_OBJECTS (out);
582 break;
583 case 18:
584 SET_9_OBJECTS (out);
585 SET_9_OBJECTS (out);
586 break;
587 case 19:
588 SET_16_OBJECTS (out);
589 SET_3_OBJECTS (out);
590 break;
591 case 20:
592 SET_16_OBJECTS (out);
593 SET_4_OBJECTS (out);
594 break;
595 case 21:
596 SET_16_OBJECTS (out);
597 SET_5_OBJECTS (out);
598 break;
599 case 22:
600 SET_16_OBJECTS (out);
601 SET_6_OBJECTS (out);
602 break;
603 case 23:
604 SET_16_OBJECTS (out);
605 SET_7_OBJECTS (out);
606 break;
607 case 24:
608 SET_16_OBJECTS (out);
609 SET_8_OBJECTS (out);
610 break;
611 case 25:
612 SET_16_OBJECTS (out);
613 SET_9_OBJECTS (out);
614 break;
615 case 26:
616 SET_16_OBJECTS (out);
617 SET_10_OBJECTS (out);
618 break;
619 case 27:
620 SET_16_OBJECTS (out);
621 SET_11_OBJECTS (out);
622 break;
623 case 28:
624 SET_16_OBJECTS (out);
625 SET_8_OBJECTS (out);
626 SET_4_OBJECTS (out);
627 break;
628 case 29:
629 SET_16_OBJECTS (out);
630 SET_9_OBJECTS (out);
631 SET_4_OBJECTS (out);
632 break;
633 case 30:
634 SET_16_OBJECTS (out);
635 SET_12_OBJECTS (out);
636 SET_2_OBJECTS (out);
637 break;
638 case 31:
639 SET_16_OBJECTS (out);
640 SET_11_OBJECTS (out);
641 SET_4_OBJECTS (out);
642 break;
643 }
644
645 /* This loop governs the asmptoptic behaviour of this algorithm, for long
646 word copies. */
647 count = value >> 5;
648 for (loop_var = 0; loop_var < count; loop_var++)
649 SET_32_OBJECTS (out);
650 }
651
652
653 /* Exposed interface. */
654
655 void *
656 __inhibit_loop_to_libcall
memset(void * s,int c,size_t n)657 memset (void *s, int c, size_t n)
658 {
659 void *result = s;
660
661 /* None of the following handles setting zero bytes. */
662 if (n != 0)
663 {
664 unsigned test = (unsigned) s | (unsigned) n;
665
666 if (test & 1)
667 __byte_memset (s, c, n);
668 else if (test & 2)
669 {
670 short int sc = (short int) ((c << 8) + (char) c);
671 __short_int_memset (s, sc, n >> 1);
672 }
673 else
674 {
675 int ic = (c << 24) + ((char) c << 16) + ((char) c << 8) + (char) c;
676 __int_memset (s, ic, n >> 2);
677 }
678 }
679
680 return result;
681 }
682