1 /* memset for the Visium processor.
2 
3    Copyright (c) 2015 Rolls-Royce Controls and Data Services Limited.
4    All rights reserved.
5 
6    Redistribution and use in source and binary forms, with or without
7    modification, are permitted provided that the following conditions are met:
8 
9      * Redistributions of source code must retain the above copyright notice,
10        this list of conditions and the following disclaimer.
11      * Redistributions in binary form must reproduce the above copyright
12        notice, this list of conditions and the following disclaimer in the
13        documentation and/or other materials provided with the distribution.
14      * Neither the name of Rolls-Royce Controls and Data Services Limited nor
15        the names of its contributors may be used to endorse or promote products
16        derived from this software without specific prior written permission.
17 
18    THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
19    AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
20    IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
21    ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
22    LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
23    CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
24    SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
25    INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
26    CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
27    ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
28    THE POSSIBILITY OF SUCH DAMAGE.  */
29 
30 /* This file must be kept in sync with libgcc/config/visium/memset.c  */
31 
32 #include <stddef.h>
33 #include "memset.h"
34 #include "../../string/local.h"
35 
36 #define SET_32_OBJECTS(out)	\
37 do {				\
38   out [0] = m0;			\
39   out [1] = m0;			\
40   out [2] = m0;			\
41   out [3] = m0;			\
42   out [4] = m0;			\
43   out [5] = m0;			\
44   out [6] = m0;			\
45   out [7] = m0;			\
46   out [8] = m0;			\
47   out [9] = m0;			\
48   out [10] = m0;		\
49   out [11] = m0;		\
50   out [12] = m0;		\
51   out [13] = m0;		\
52   out [14] = m0;		\
53   out [15] = m0;		\
54   out [16] = m0;		\
55   out [17] = m0;		\
56   out [18] = m0;		\
57   out [19] = m0;		\
58   out [20] = m0;		\
59   out [21] = m0;		\
60   out [22] = m0;		\
61   out [23] = m0;		\
62   out [24] = m0;		\
63   out [25] = m0;		\
64   out [26] = m0;		\
65   out [27] = m0;		\
66   out [28] = m0;		\
67   out [29] = m0;		\
68   out [30] = m0;		\
69   out [31] = m0;		\
70   out += 32;			\
71 } while(0)
72 
73 #define SET_16_OBJECTS(out)	\
74 do {				\
75   out [0] = m0;			\
76   out [1] = m0;			\
77   out [2] = m0;			\
78   out [3] = m0;			\
79   out [4] = m0;			\
80   out [5] = m0;			\
81   out [6] = m0;			\
82   out [7] = m0;			\
83   out [8] = m0;			\
84   out [9] = m0;			\
85   out [10] = m0;		\
86   out [11] = m0;		\
87   out [12] = m0;		\
88   out [13] = m0;		\
89   out [14] = m0;		\
90   out [15] = m0;		\
91   out += 16;			\
92 } while(0)
93 
94 #define SET_12_OBJECTS(out)	\
95 do {				\
96   out [0] = m0;			\
97   out [1] = m0;			\
98   out [2] = m0;			\
99   out [3] = m0;			\
100   out [4] = m0;			\
101   out [5] = m0;			\
102   out [6] = m0;			\
103   out [7] = m0;			\
104   out [8] = m0;			\
105   out [9] = m0;			\
106   out [10] = m0;		\
107   out [11] = m0;		\
108   out += 12;			\
109 } while(0)
110 
111 #define SET_11_OBJECTS(out)	\
112 do {				\
113   out [0] = m0;			\
114   out [1] = m0;			\
115   out [2] = m0;			\
116   out [3] = m0;			\
117   out [4] = m0;			\
118   out [5] = m0;			\
119   out [6] = m0;			\
120   out [7] = m0;			\
121   out [8] = m0;			\
122   out [9] = m0;			\
123   out [10] = m0;		\
124   out += 11;			\
125 } while(0)
126 
127 #define SET_10_OBJECTS(out)	\
128 do {				\
129   out [0] = m0;			\
130   out [1] = m0;			\
131   out [2] = m0;			\
132   out [3] = m0;			\
133   out [4] = m0;			\
134   out [5] = m0;			\
135   out [6] = m0;			\
136   out [7] = m0;			\
137   out [8] = m0;			\
138   out [9] = m0;			\
139   out += 10;			\
140 } while(0)
141 
142 #define SET_9_OBJECTS(out)	\
143 do {				\
144   out [0] = m0;			\
145   out [1] = m0;			\
146   out [2] = m0;			\
147   out [3] = m0;			\
148   out [4] = m0;			\
149   out [5] = m0;			\
150   out [6] = m0;			\
151   out [7] = m0;			\
152   out [8] = m0;			\
153   out += 9;			\
154 } while(0)
155 
156 #define SET_8_OBJECTS(out)	\
157 do {				\
158   out [0] = m0;			\
159   out [1] = m0;			\
160   out [2] = m0;			\
161   out [3] = m0;			\
162   out [4] = m0;			\
163   out [5] = m0;			\
164   out [6] = m0;			\
165   out [7] = m0;			\
166   out += 8;			\
167 } while(0)
168 
169 #define SET_7_OBJECTS(out)	\
170 do {				\
171   out [0] = m0;			\
172   out [1] = m0;			\
173   out [2] = m0;			\
174   out [3] = m0;			\
175   out [4] = m0;			\
176   out [5] = m0;			\
177   out [6] = m0;			\
178   out += 7;			\
179 } while(0)
180 
181 #define SET_6_OBJECTS(out)	\
182 do {				\
183   out [0] = m0;			\
184   out [1] = m0;			\
185   out [2] = m0;			\
186   out [3] = m0;			\
187   out [4] = m0;			\
188   out [5] = m0;			\
189   out += 6;			\
190 } while(0)
191 
192 #define SET_5_OBJECTS(out)	\
193 do {				\
194   out [0] = m0;			\
195   out [1] = m0;			\
196   out [2] = m0;			\
197   out [3] = m0;			\
198   out [4] = m0;			\
199   out += 5;			\
200 } while(0)
201 
202 #define SET_4_OBJECTS(out)	\
203 do {				\
204   out [0] = m0;			\
205   out [1] = m0;			\
206   out [2] = m0;			\
207   out [3] = m0;			\
208   out += 4;			\
209 } while(0)
210 
211 #define SET_3_OBJECTS(out)	\
212 do {				\
213   out [0] = m0;			\
214   out [1] = m0;			\
215   out [2] = m0;			\
216   out += 3;			\
217 } while(0)
218 
219 #define SET_2_OBJECTS(out)	\
220 do {				\
221   out [0] = m0;			\
222   out [1] = m0;			\
223   out += 2;			\
224 } while(0)
225 
226 #define SET_1_OBJECT(out)	\
227 do {				\
228   out [0] = m0;			\
229   out += 1;			\
230 } while(0)
231 
232 static inline void
233 __inhibit_loop_to_libcall
__int_memset(void * __restrict s1,int val,size_t n)234 __int_memset (void *__restrict s1, int val, size_t n)
235 {
236   int value = n;
237   int loop_var;
238   int *out = s1;
239   int count;
240   int m0 = val;
241 
242   /* This code currently give a stall for any value with a 1->2 in the low 5
243      bits, i.e.  1,2, 33,34 ? not acceptable!  */
244   switch (value & 0x1f)
245     {
246     case 0:
247       break;
248     case 1:
249       SET_1_OBJECT (out);
250       break;
251     case 2:
252       SET_2_OBJECTS (out);
253       break;
254     case 3:
255       SET_3_OBJECTS (out);
256       break;
257     case 4:
258       SET_4_OBJECTS (out);
259       break;
260     case 5:
261       SET_5_OBJECTS (out);
262       break;
263     case 6:
264       SET_6_OBJECTS (out);
265       break;
266     case 7:
267       SET_7_OBJECTS (out);
268       break;
269     case 8:
270       SET_8_OBJECTS (out);
271       break;
272     case 9:
273       SET_9_OBJECTS (out);
274       break;
275     case 10:
276       SET_10_OBJECTS (out);
277       break;
278     case 11:
279       SET_11_OBJECTS (out);
280       break;
281     case 12:
282       SET_12_OBJECTS (out);
283       break;
284     case 13:
285       SET_9_OBJECTS (out);
286       SET_4_OBJECTS (out);
287       break;
288     case 14:
289       SET_12_OBJECTS (out);
290       SET_2_OBJECTS (out);
291       break;
292     case 15:
293       SET_11_OBJECTS (out);
294       SET_4_OBJECTS (out);
295       break;
296     case 16:
297       SET_16_OBJECTS (out);
298       break;
299     case 17:
300       SET_11_OBJECTS (out);
301       SET_6_OBJECTS (out);
302       break;
303     case 18:
304       SET_9_OBJECTS (out);
305       SET_9_OBJECTS (out);
306       break;
307     case 19:
308       SET_16_OBJECTS (out);
309       SET_3_OBJECTS (out);
310       break;
311     case 20:
312       SET_16_OBJECTS (out);
313       SET_4_OBJECTS (out);
314       break;
315     case 21:
316       SET_16_OBJECTS (out);
317       SET_5_OBJECTS (out);
318       break;
319     case 22:
320       SET_16_OBJECTS (out);
321       SET_6_OBJECTS (out);
322       break;
323     case 23:
324       SET_16_OBJECTS (out);
325       SET_7_OBJECTS (out);
326       break;
327     case 24:
328       SET_16_OBJECTS (out);
329       SET_8_OBJECTS (out);
330       break;
331     case 25:
332       SET_16_OBJECTS (out);
333       SET_9_OBJECTS (out);
334       break;
335     case 26:
336       SET_16_OBJECTS (out);
337       SET_10_OBJECTS (out);
338       break;
339     case 27:
340       SET_16_OBJECTS (out);
341       SET_11_OBJECTS (out);
342       break;
343     case 28:
344       SET_16_OBJECTS (out);
345       SET_8_OBJECTS (out);
346       SET_4_OBJECTS (out);
347       break;
348     case 29:
349       SET_16_OBJECTS (out);
350       SET_9_OBJECTS (out);
351       SET_4_OBJECTS (out);
352       break;
353     case 30:
354       SET_16_OBJECTS (out);
355       SET_12_OBJECTS (out);
356       SET_2_OBJECTS (out);
357       break;
358     case 31:
359       SET_16_OBJECTS (out);
360       SET_11_OBJECTS (out);
361       SET_4_OBJECTS (out);
362       break;
363     }
364 
365   /* This loop governs the asmptoptic behaviour of this algorithm, for long
366      word copies.  */
367   count = value >> 5;
368   for (loop_var = 0; loop_var < count; loop_var++)
369     SET_32_OBJECTS (out);
370 }
371 
372 static inline void
373 __inhibit_loop_to_libcall
__short_int_memset(void * __restrict s1,int val,size_t n)374 __short_int_memset (void *__restrict s1, int val, size_t n)
375 {
376   int value = n;
377   int loop_var;
378   int short *out = s1;
379   int count;
380   int m0 = val;
381 
382   /* This code currently give a stall for any value with a 1->2 in the low 5
383      bits, i.e.  1,2, 33,34 ? not acceptable!  */
384   switch (value & 0x1f)
385     {
386     case 0:
387       break;
388     case 1:
389       SET_1_OBJECT (out);
390       break;
391     case 2:
392       SET_2_OBJECTS (out);
393       break;
394     case 3:
395       SET_3_OBJECTS (out);
396       break;
397     case 4:
398       SET_4_OBJECTS (out);
399       break;
400     case 5:
401       SET_5_OBJECTS (out);
402       break;
403     case 6:
404       SET_6_OBJECTS (out);
405       break;
406     case 7:
407       SET_7_OBJECTS (out);
408       break;
409     case 8:
410       SET_8_OBJECTS (out);
411       break;
412     case 9:
413       SET_9_OBJECTS (out);
414       break;
415     case 10:
416       SET_10_OBJECTS (out);
417       break;
418     case 11:
419       SET_11_OBJECTS (out);
420       break;
421     case 12:
422       SET_12_OBJECTS (out);
423       break;
424     case 13:
425       SET_9_OBJECTS (out);
426       SET_4_OBJECTS (out);
427       break;
428     case 14:
429       SET_12_OBJECTS (out);
430       SET_2_OBJECTS (out);
431       break;
432     case 15:
433       SET_11_OBJECTS (out);
434       SET_4_OBJECTS (out);
435       break;
436     case 16:
437       SET_16_OBJECTS (out);
438       break;
439     case 17:
440       SET_11_OBJECTS (out);
441       SET_6_OBJECTS (out);
442       break;
443     case 18:
444       SET_9_OBJECTS (out);
445       SET_9_OBJECTS (out);
446       break;
447     case 19:
448       SET_16_OBJECTS (out);
449       SET_3_OBJECTS (out);
450       break;
451     case 20:
452       SET_16_OBJECTS (out);
453       SET_4_OBJECTS (out);
454       break;
455     case 21:
456       SET_16_OBJECTS (out);
457       SET_5_OBJECTS (out);
458       break;
459     case 22:
460       SET_16_OBJECTS (out);
461       SET_6_OBJECTS (out);
462       break;
463     case 23:
464       SET_16_OBJECTS (out);
465       SET_7_OBJECTS (out);
466       break;
467     case 24:
468       SET_16_OBJECTS (out);
469       SET_8_OBJECTS (out);
470       break;
471     case 25:
472       SET_16_OBJECTS (out);
473       SET_9_OBJECTS (out);
474       break;
475     case 26:
476       SET_16_OBJECTS (out);
477       SET_10_OBJECTS (out);
478       break;
479     case 27:
480       SET_16_OBJECTS (out);
481       SET_11_OBJECTS (out);
482       break;
483     case 28:
484       SET_16_OBJECTS (out);
485       SET_8_OBJECTS (out);
486       SET_4_OBJECTS (out);
487       break;
488     case 29:
489       SET_16_OBJECTS (out);
490       SET_9_OBJECTS (out);
491       SET_4_OBJECTS (out);
492       break;
493     case 30:
494       SET_16_OBJECTS (out);
495       SET_12_OBJECTS (out);
496       SET_2_OBJECTS (out);
497       break;
498     case 31:
499       SET_16_OBJECTS (out);
500       SET_11_OBJECTS (out);
501       SET_4_OBJECTS (out);
502       break;
503     }
504 
505   /* This loop governs the asmptoptic behaviour of this algorithm, for long
506      word copies.  */
507   count = value >> 5;
508   for (loop_var = 0; loop_var < count; loop_var++)
509     SET_32_OBJECTS (out);
510 }
511 
512 static inline void
513 __inhibit_loop_to_libcall
__byte_memset(void * __restrict s1,int val,size_t n)514 __byte_memset (void *__restrict s1, int val, size_t n)
515 {
516   int value = n;
517   int loop_var;
518   char *out = s1;
519   int count;
520   int m0 = val;
521 
522   /* This code currently give a stall for any value with a 1->2 in the low 5
523      bits, i.e.  1,2, 33,34 ? not acceptable!  */
524   switch (value & 0x1f)
525     {
526     case 0:
527       break;
528     case 1:
529       SET_1_OBJECT (out);
530       break;
531     case 2:
532       SET_2_OBJECTS (out);
533       break;
534     case 3:
535       SET_3_OBJECTS (out);
536       break;
537     case 4:
538       SET_4_OBJECTS (out);
539       break;
540     case 5:
541       SET_5_OBJECTS (out);
542       break;
543     case 6:
544       SET_6_OBJECTS (out);
545       break;
546     case 7:
547       SET_7_OBJECTS (out);
548       break;
549     case 8:
550       SET_8_OBJECTS (out);
551       break;
552     case 9:
553       SET_9_OBJECTS (out);
554       break;
555     case 10:
556       SET_10_OBJECTS (out);
557       break;
558     case 11:
559       SET_11_OBJECTS (out);
560       break;
561     case 12:
562       SET_12_OBJECTS (out);
563       break;
564     case 13:
565       SET_9_OBJECTS (out);
566       SET_4_OBJECTS (out);
567       break;
568     case 14:
569       SET_12_OBJECTS (out);
570       SET_2_OBJECTS (out);
571       break;
572     case 15:
573       SET_11_OBJECTS (out);
574       SET_4_OBJECTS (out);
575       break;
576     case 16:
577       SET_16_OBJECTS (out);
578       break;
579     case 17:
580       SET_11_OBJECTS (out);
581       SET_6_OBJECTS (out);
582       break;
583     case 18:
584       SET_9_OBJECTS (out);
585       SET_9_OBJECTS (out);
586       break;
587     case 19:
588       SET_16_OBJECTS (out);
589       SET_3_OBJECTS (out);
590       break;
591     case 20:
592       SET_16_OBJECTS (out);
593       SET_4_OBJECTS (out);
594       break;
595     case 21:
596       SET_16_OBJECTS (out);
597       SET_5_OBJECTS (out);
598       break;
599     case 22:
600       SET_16_OBJECTS (out);
601       SET_6_OBJECTS (out);
602       break;
603     case 23:
604       SET_16_OBJECTS (out);
605       SET_7_OBJECTS (out);
606       break;
607     case 24:
608       SET_16_OBJECTS (out);
609       SET_8_OBJECTS (out);
610       break;
611     case 25:
612       SET_16_OBJECTS (out);
613       SET_9_OBJECTS (out);
614       break;
615     case 26:
616       SET_16_OBJECTS (out);
617       SET_10_OBJECTS (out);
618       break;
619     case 27:
620       SET_16_OBJECTS (out);
621       SET_11_OBJECTS (out);
622       break;
623     case 28:
624       SET_16_OBJECTS (out);
625       SET_8_OBJECTS (out);
626       SET_4_OBJECTS (out);
627       break;
628     case 29:
629       SET_16_OBJECTS (out);
630       SET_9_OBJECTS (out);
631       SET_4_OBJECTS (out);
632       break;
633     case 30:
634       SET_16_OBJECTS (out);
635       SET_12_OBJECTS (out);
636       SET_2_OBJECTS (out);
637       break;
638     case 31:
639       SET_16_OBJECTS (out);
640       SET_11_OBJECTS (out);
641       SET_4_OBJECTS (out);
642       break;
643     }
644 
645   /* This loop governs the asmptoptic behaviour of this algorithm, for long
646      word copies.  */
647   count = value >> 5;
648   for (loop_var = 0; loop_var < count; loop_var++)
649     SET_32_OBJECTS (out);
650 }
651 
652 
653 /* Exposed interface.  */
654 
655 void *
656 __inhibit_loop_to_libcall
memset(void * s,int c,size_t n)657 memset (void *s, int c, size_t n)
658 {
659   void *result = s;
660 
661   /* None of the following handles setting zero bytes.  */
662   if (n != 0)
663     {
664       unsigned test = (unsigned) s | (unsigned) n;
665 
666       if (test & 1)
667 	__byte_memset (s, c, n);
668       else if (test & 2)
669 	{
670 	  short int sc = (short int) ((c << 8) + (char) c);
671 	  __short_int_memset (s, sc, n >> 1);
672 	}
673       else
674 	{
675 	  int ic = (c << 24) + ((char) c << 16) + ((char) c << 8) + (char) c;
676 	  __int_memset (s, ic, n >> 2);
677 	}
678     }
679 
680   return result;
681 }
682