1 /* memset for the Visium processor.
2 
3    Copyright (c) 2015 Rolls-Royce Controls and Data Services Limited.
4    All rights reserved.
5 
6    Redistribution and use in source and binary forms, with or without
7    modification, are permitted provided that the following conditions are met:
8 
9      * Redistributions of source code must retain the above copyright notice,
10        this list of conditions and the following disclaimer.
11      * Redistributions in binary form must reproduce the above copyright
12        notice, this list of conditions and the following disclaimer in the
13        documentation and/or other materials provided with the distribution.
14      * Neither the name of Rolls-Royce Controls and Data Services Limited nor
15        the names of its contributors may be used to endorse or promote products
16        derived from this software without specific prior written permission.
17 
18    THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
19    AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
20    IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
21    ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
22    LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
23    CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
24    SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
25    INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
26    CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
27    ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
28    THE POSSIBILITY OF SUCH DAMAGE.  */
29 
30 /* This file must be kept in sync with libgcc/config/visium/memset.c  */
31 
32 #include <picolibc.h>
33 
34 #include <stddef.h>
35 #include "memset.h"
36 #include "../../string/local.h"
37 
38 #define SET_32_OBJECTS(out)	\
39 do {				\
40   out [0] = m0;			\
41   out [1] = m0;			\
42   out [2] = m0;			\
43   out [3] = m0;			\
44   out [4] = m0;			\
45   out [5] = m0;			\
46   out [6] = m0;			\
47   out [7] = m0;			\
48   out [8] = m0;			\
49   out [9] = m0;			\
50   out [10] = m0;		\
51   out [11] = m0;		\
52   out [12] = m0;		\
53   out [13] = m0;		\
54   out [14] = m0;		\
55   out [15] = m0;		\
56   out [16] = m0;		\
57   out [17] = m0;		\
58   out [18] = m0;		\
59   out [19] = m0;		\
60   out [20] = m0;		\
61   out [21] = m0;		\
62   out [22] = m0;		\
63   out [23] = m0;		\
64   out [24] = m0;		\
65   out [25] = m0;		\
66   out [26] = m0;		\
67   out [27] = m0;		\
68   out [28] = m0;		\
69   out [29] = m0;		\
70   out [30] = m0;		\
71   out [31] = m0;		\
72   out += 32;			\
73 } while(0)
74 
75 #define SET_16_OBJECTS(out)	\
76 do {				\
77   out [0] = m0;			\
78   out [1] = m0;			\
79   out [2] = m0;			\
80   out [3] = m0;			\
81   out [4] = m0;			\
82   out [5] = m0;			\
83   out [6] = m0;			\
84   out [7] = m0;			\
85   out [8] = m0;			\
86   out [9] = m0;			\
87   out [10] = m0;		\
88   out [11] = m0;		\
89   out [12] = m0;		\
90   out [13] = m0;		\
91   out [14] = m0;		\
92   out [15] = m0;		\
93   out += 16;			\
94 } while(0)
95 
96 #define SET_12_OBJECTS(out)	\
97 do {				\
98   out [0] = m0;			\
99   out [1] = m0;			\
100   out [2] = m0;			\
101   out [3] = m0;			\
102   out [4] = m0;			\
103   out [5] = m0;			\
104   out [6] = m0;			\
105   out [7] = m0;			\
106   out [8] = m0;			\
107   out [9] = m0;			\
108   out [10] = m0;		\
109   out [11] = m0;		\
110   out += 12;			\
111 } while(0)
112 
113 #define SET_11_OBJECTS(out)	\
114 do {				\
115   out [0] = m0;			\
116   out [1] = m0;			\
117   out [2] = m0;			\
118   out [3] = m0;			\
119   out [4] = m0;			\
120   out [5] = m0;			\
121   out [6] = m0;			\
122   out [7] = m0;			\
123   out [8] = m0;			\
124   out [9] = m0;			\
125   out [10] = m0;		\
126   out += 11;			\
127 } while(0)
128 
129 #define SET_10_OBJECTS(out)	\
130 do {				\
131   out [0] = m0;			\
132   out [1] = m0;			\
133   out [2] = m0;			\
134   out [3] = m0;			\
135   out [4] = m0;			\
136   out [5] = m0;			\
137   out [6] = m0;			\
138   out [7] = m0;			\
139   out [8] = m0;			\
140   out [9] = m0;			\
141   out += 10;			\
142 } while(0)
143 
144 #define SET_9_OBJECTS(out)	\
145 do {				\
146   out [0] = m0;			\
147   out [1] = m0;			\
148   out [2] = m0;			\
149   out [3] = m0;			\
150   out [4] = m0;			\
151   out [5] = m0;			\
152   out [6] = m0;			\
153   out [7] = m0;			\
154   out [8] = m0;			\
155   out += 9;			\
156 } while(0)
157 
158 #define SET_8_OBJECTS(out)	\
159 do {				\
160   out [0] = m0;			\
161   out [1] = m0;			\
162   out [2] = m0;			\
163   out [3] = m0;			\
164   out [4] = m0;			\
165   out [5] = m0;			\
166   out [6] = m0;			\
167   out [7] = m0;			\
168   out += 8;			\
169 } while(0)
170 
171 #define SET_7_OBJECTS(out)	\
172 do {				\
173   out [0] = m0;			\
174   out [1] = m0;			\
175   out [2] = m0;			\
176   out [3] = m0;			\
177   out [4] = m0;			\
178   out [5] = m0;			\
179   out [6] = m0;			\
180   out += 7;			\
181 } while(0)
182 
183 #define SET_6_OBJECTS(out)	\
184 do {				\
185   out [0] = m0;			\
186   out [1] = m0;			\
187   out [2] = m0;			\
188   out [3] = m0;			\
189   out [4] = m0;			\
190   out [5] = m0;			\
191   out += 6;			\
192 } while(0)
193 
194 #define SET_5_OBJECTS(out)	\
195 do {				\
196   out [0] = m0;			\
197   out [1] = m0;			\
198   out [2] = m0;			\
199   out [3] = m0;			\
200   out [4] = m0;			\
201   out += 5;			\
202 } while(0)
203 
204 #define SET_4_OBJECTS(out)	\
205 do {				\
206   out [0] = m0;			\
207   out [1] = m0;			\
208   out [2] = m0;			\
209   out [3] = m0;			\
210   out += 4;			\
211 } while(0)
212 
213 #define SET_3_OBJECTS(out)	\
214 do {				\
215   out [0] = m0;			\
216   out [1] = m0;			\
217   out [2] = m0;			\
218   out += 3;			\
219 } while(0)
220 
221 #define SET_2_OBJECTS(out)	\
222 do {				\
223   out [0] = m0;			\
224   out [1] = m0;			\
225   out += 2;			\
226 } while(0)
227 
228 #define SET_1_OBJECT(out)	\
229 do {				\
230   out [0] = m0;			\
231   out += 1;			\
232 } while(0)
233 
234 static inline void
235 __inhibit_loop_to_libcall
__int_memset(void * __restrict s1,int val,size_t n)236 __int_memset (void *__restrict s1, int val, size_t n)
237 {
238   int value = n;
239   int loop_var;
240   int *out = s1;
241   int count;
242   int m0 = val;
243 
244   /* This code currently give a stall for any value with a 1->2 in the low 5
245      bits, i.e.  1,2, 33,34 ? not acceptable!  */
246   switch (value & 0x1f)
247     {
248     case 0:
249       break;
250     case 1:
251       SET_1_OBJECT (out);
252       break;
253     case 2:
254       SET_2_OBJECTS (out);
255       break;
256     case 3:
257       SET_3_OBJECTS (out);
258       break;
259     case 4:
260       SET_4_OBJECTS (out);
261       break;
262     case 5:
263       SET_5_OBJECTS (out);
264       break;
265     case 6:
266       SET_6_OBJECTS (out);
267       break;
268     case 7:
269       SET_7_OBJECTS (out);
270       break;
271     case 8:
272       SET_8_OBJECTS (out);
273       break;
274     case 9:
275       SET_9_OBJECTS (out);
276       break;
277     case 10:
278       SET_10_OBJECTS (out);
279       break;
280     case 11:
281       SET_11_OBJECTS (out);
282       break;
283     case 12:
284       SET_12_OBJECTS (out);
285       break;
286     case 13:
287       SET_9_OBJECTS (out);
288       SET_4_OBJECTS (out);
289       break;
290     case 14:
291       SET_12_OBJECTS (out);
292       SET_2_OBJECTS (out);
293       break;
294     case 15:
295       SET_11_OBJECTS (out);
296       SET_4_OBJECTS (out);
297       break;
298     case 16:
299       SET_16_OBJECTS (out);
300       break;
301     case 17:
302       SET_11_OBJECTS (out);
303       SET_6_OBJECTS (out);
304       break;
305     case 18:
306       SET_9_OBJECTS (out);
307       SET_9_OBJECTS (out);
308       break;
309     case 19:
310       SET_16_OBJECTS (out);
311       SET_3_OBJECTS (out);
312       break;
313     case 20:
314       SET_16_OBJECTS (out);
315       SET_4_OBJECTS (out);
316       break;
317     case 21:
318       SET_16_OBJECTS (out);
319       SET_5_OBJECTS (out);
320       break;
321     case 22:
322       SET_16_OBJECTS (out);
323       SET_6_OBJECTS (out);
324       break;
325     case 23:
326       SET_16_OBJECTS (out);
327       SET_7_OBJECTS (out);
328       break;
329     case 24:
330       SET_16_OBJECTS (out);
331       SET_8_OBJECTS (out);
332       break;
333     case 25:
334       SET_16_OBJECTS (out);
335       SET_9_OBJECTS (out);
336       break;
337     case 26:
338       SET_16_OBJECTS (out);
339       SET_10_OBJECTS (out);
340       break;
341     case 27:
342       SET_16_OBJECTS (out);
343       SET_11_OBJECTS (out);
344       break;
345     case 28:
346       SET_16_OBJECTS (out);
347       SET_8_OBJECTS (out);
348       SET_4_OBJECTS (out);
349       break;
350     case 29:
351       SET_16_OBJECTS (out);
352       SET_9_OBJECTS (out);
353       SET_4_OBJECTS (out);
354       break;
355     case 30:
356       SET_16_OBJECTS (out);
357       SET_12_OBJECTS (out);
358       SET_2_OBJECTS (out);
359       break;
360     case 31:
361       SET_16_OBJECTS (out);
362       SET_11_OBJECTS (out);
363       SET_4_OBJECTS (out);
364       break;
365     }
366 
367   /* This loop governs the asmptoptic behaviour of this algorithm, for long
368      word copies.  */
369   count = value >> 5;
370   for (loop_var = 0; loop_var < count; loop_var++)
371     SET_32_OBJECTS (out);
372 }
373 
374 static inline void
375 __inhibit_loop_to_libcall
__short_int_memset(void * __restrict s1,int val,size_t n)376 __short_int_memset (void *__restrict s1, int val, size_t n)
377 {
378   int value = n;
379   int loop_var;
380   int short *out = s1;
381   int count;
382   int m0 = val;
383 
384   /* This code currently give a stall for any value with a 1->2 in the low 5
385      bits, i.e.  1,2, 33,34 ? not acceptable!  */
386   switch (value & 0x1f)
387     {
388     case 0:
389       break;
390     case 1:
391       SET_1_OBJECT (out);
392       break;
393     case 2:
394       SET_2_OBJECTS (out);
395       break;
396     case 3:
397       SET_3_OBJECTS (out);
398       break;
399     case 4:
400       SET_4_OBJECTS (out);
401       break;
402     case 5:
403       SET_5_OBJECTS (out);
404       break;
405     case 6:
406       SET_6_OBJECTS (out);
407       break;
408     case 7:
409       SET_7_OBJECTS (out);
410       break;
411     case 8:
412       SET_8_OBJECTS (out);
413       break;
414     case 9:
415       SET_9_OBJECTS (out);
416       break;
417     case 10:
418       SET_10_OBJECTS (out);
419       break;
420     case 11:
421       SET_11_OBJECTS (out);
422       break;
423     case 12:
424       SET_12_OBJECTS (out);
425       break;
426     case 13:
427       SET_9_OBJECTS (out);
428       SET_4_OBJECTS (out);
429       break;
430     case 14:
431       SET_12_OBJECTS (out);
432       SET_2_OBJECTS (out);
433       break;
434     case 15:
435       SET_11_OBJECTS (out);
436       SET_4_OBJECTS (out);
437       break;
438     case 16:
439       SET_16_OBJECTS (out);
440       break;
441     case 17:
442       SET_11_OBJECTS (out);
443       SET_6_OBJECTS (out);
444       break;
445     case 18:
446       SET_9_OBJECTS (out);
447       SET_9_OBJECTS (out);
448       break;
449     case 19:
450       SET_16_OBJECTS (out);
451       SET_3_OBJECTS (out);
452       break;
453     case 20:
454       SET_16_OBJECTS (out);
455       SET_4_OBJECTS (out);
456       break;
457     case 21:
458       SET_16_OBJECTS (out);
459       SET_5_OBJECTS (out);
460       break;
461     case 22:
462       SET_16_OBJECTS (out);
463       SET_6_OBJECTS (out);
464       break;
465     case 23:
466       SET_16_OBJECTS (out);
467       SET_7_OBJECTS (out);
468       break;
469     case 24:
470       SET_16_OBJECTS (out);
471       SET_8_OBJECTS (out);
472       break;
473     case 25:
474       SET_16_OBJECTS (out);
475       SET_9_OBJECTS (out);
476       break;
477     case 26:
478       SET_16_OBJECTS (out);
479       SET_10_OBJECTS (out);
480       break;
481     case 27:
482       SET_16_OBJECTS (out);
483       SET_11_OBJECTS (out);
484       break;
485     case 28:
486       SET_16_OBJECTS (out);
487       SET_8_OBJECTS (out);
488       SET_4_OBJECTS (out);
489       break;
490     case 29:
491       SET_16_OBJECTS (out);
492       SET_9_OBJECTS (out);
493       SET_4_OBJECTS (out);
494       break;
495     case 30:
496       SET_16_OBJECTS (out);
497       SET_12_OBJECTS (out);
498       SET_2_OBJECTS (out);
499       break;
500     case 31:
501       SET_16_OBJECTS (out);
502       SET_11_OBJECTS (out);
503       SET_4_OBJECTS (out);
504       break;
505     }
506 
507   /* This loop governs the asmptoptic behaviour of this algorithm, for long
508      word copies.  */
509   count = value >> 5;
510   for (loop_var = 0; loop_var < count; loop_var++)
511     SET_32_OBJECTS (out);
512 }
513 
514 static inline void
515 __inhibit_loop_to_libcall
__byte_memset(void * __restrict s1,int val,size_t n)516 __byte_memset (void *__restrict s1, int val, size_t n)
517 {
518   int value = n;
519   int loop_var;
520   char *out = s1;
521   int count;
522   int m0 = val;
523 
524   /* This code currently give a stall for any value with a 1->2 in the low 5
525      bits, i.e.  1,2, 33,34 ? not acceptable!  */
526   switch (value & 0x1f)
527     {
528     case 0:
529       break;
530     case 1:
531       SET_1_OBJECT (out);
532       break;
533     case 2:
534       SET_2_OBJECTS (out);
535       break;
536     case 3:
537       SET_3_OBJECTS (out);
538       break;
539     case 4:
540       SET_4_OBJECTS (out);
541       break;
542     case 5:
543       SET_5_OBJECTS (out);
544       break;
545     case 6:
546       SET_6_OBJECTS (out);
547       break;
548     case 7:
549       SET_7_OBJECTS (out);
550       break;
551     case 8:
552       SET_8_OBJECTS (out);
553       break;
554     case 9:
555       SET_9_OBJECTS (out);
556       break;
557     case 10:
558       SET_10_OBJECTS (out);
559       break;
560     case 11:
561       SET_11_OBJECTS (out);
562       break;
563     case 12:
564       SET_12_OBJECTS (out);
565       break;
566     case 13:
567       SET_9_OBJECTS (out);
568       SET_4_OBJECTS (out);
569       break;
570     case 14:
571       SET_12_OBJECTS (out);
572       SET_2_OBJECTS (out);
573       break;
574     case 15:
575       SET_11_OBJECTS (out);
576       SET_4_OBJECTS (out);
577       break;
578     case 16:
579       SET_16_OBJECTS (out);
580       break;
581     case 17:
582       SET_11_OBJECTS (out);
583       SET_6_OBJECTS (out);
584       break;
585     case 18:
586       SET_9_OBJECTS (out);
587       SET_9_OBJECTS (out);
588       break;
589     case 19:
590       SET_16_OBJECTS (out);
591       SET_3_OBJECTS (out);
592       break;
593     case 20:
594       SET_16_OBJECTS (out);
595       SET_4_OBJECTS (out);
596       break;
597     case 21:
598       SET_16_OBJECTS (out);
599       SET_5_OBJECTS (out);
600       break;
601     case 22:
602       SET_16_OBJECTS (out);
603       SET_6_OBJECTS (out);
604       break;
605     case 23:
606       SET_16_OBJECTS (out);
607       SET_7_OBJECTS (out);
608       break;
609     case 24:
610       SET_16_OBJECTS (out);
611       SET_8_OBJECTS (out);
612       break;
613     case 25:
614       SET_16_OBJECTS (out);
615       SET_9_OBJECTS (out);
616       break;
617     case 26:
618       SET_16_OBJECTS (out);
619       SET_10_OBJECTS (out);
620       break;
621     case 27:
622       SET_16_OBJECTS (out);
623       SET_11_OBJECTS (out);
624       break;
625     case 28:
626       SET_16_OBJECTS (out);
627       SET_8_OBJECTS (out);
628       SET_4_OBJECTS (out);
629       break;
630     case 29:
631       SET_16_OBJECTS (out);
632       SET_9_OBJECTS (out);
633       SET_4_OBJECTS (out);
634       break;
635     case 30:
636       SET_16_OBJECTS (out);
637       SET_12_OBJECTS (out);
638       SET_2_OBJECTS (out);
639       break;
640     case 31:
641       SET_16_OBJECTS (out);
642       SET_11_OBJECTS (out);
643       SET_4_OBJECTS (out);
644       break;
645     }
646 
647   /* This loop governs the asmptoptic behaviour of this algorithm, for long
648      word copies.  */
649   count = value >> 5;
650   for (loop_var = 0; loop_var < count; loop_var++)
651     SET_32_OBJECTS (out);
652 }
653 
654 
655 /* Exposed interface.  */
656 
657 void *
658 __inhibit_loop_to_libcall
memset(void * s,int c,size_t n)659 memset (void *s, int c, size_t n)
660 {
661   void *result = s;
662 
663   /* None of the following handles setting zero bytes.  */
664   if (n != 0)
665     {
666       unsigned test = (unsigned) s | (unsigned) n;
667 
668       if (test & 1)
669 	__byte_memset (s, c, n);
670       else if (test & 2)
671 	{
672 	  short int sc = (short int) ((c << 8) + (char) c);
673 	  __short_int_memset (s, sc, n >> 1);
674 	}
675       else
676 	{
677 	  int ic = (c << 24) + ((char) c << 16) + ((char) c << 8) + (char) c;
678 	  __int_memset (s, ic, n >> 2);
679 	}
680     }
681 
682   return result;
683 }
684