1 /*
2 (C) Copyright 2001,2006,
3 International Business Machines Corporation,
4 Sony Computer Entertainment, Incorporated,
5 Toshiba Corporation,
6
7 All rights reserved.
8
9 Redistribution and use in source and binary forms, with or without
10 modification, are permitted provided that the following conditions are met:
11
12 * Redistributions of source code must retain the above copyright notice,
13 this list of conditions and the following disclaimer.
14 * Redistributions in binary form must reproduce the above copyright
15 notice, this list of conditions and the following disclaimer in the
16 documentation and/or other materials provided with the distribution.
17 * Neither the names of the copyright holders nor the names of their
18 contributors may be used to endorse or promote products derived from this
19 software without specific prior written permission.
20
21 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
22 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
23 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
24 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
25 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
26 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
27 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
28 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
29 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
30 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
31 POSSIBILITY OF SUCH DAMAGE.
32 */
33 #include <spu_intrinsics.h>
34 #include <stddef.h>
35 #include "vec_literal.h"
36
37 /* Copy n bytes from memory area src to memory area dest.
38 * Copying is performed as if the n characters pointed to
39 * by src are first copied into a temporary array that does
40 * not overlap the src and dest arrays. Then the n characters
41 * of the temporary array are copied into the destination
42 * array. The memmove subroutine returns a pointer to dest.
43 */
44
memmove(void * __restrict__ dest,const void * __restrict__ src,size_t n)45 void * memmove(void * __restrict__ dest, const void * __restrict__ src, size_t n)
46 {
47 int adjust, delta;
48 unsigned int soffset1, soffset2, doffset1, doffset2;
49 vec_uchar16 *vSrc, *vDst;
50 vec_uchar16 sdata1, sdata2, sdata, ddata, shuffle;
51 vec_uchar16 mask, mask1, mask2, mask3, one = spu_splats((unsigned char)-1);
52
53 soffset1 = (unsigned int)(src) & 15;
54 doffset1 = (unsigned int)(dest) & 15;
55 doffset2 = ((unsigned int)(dest) + n) & 15;
56
57 /* Construct a series of masks used to data insert. The masks
58 * contains 0 bit when the destination word is unchanged, 1 when it
59 * must be replaced by source bits.
60 *
61 * mask1 = mask for leading unchanged bytes
62 * mask2 = mask for trailing unchange bytes
63 * mask3 = mask indicating the more than one qword is being changed.
64 */
65 mask = one;
66 mask1 = spu_rlmaskqwbyte(mask, -doffset1);
67 mask2 = spu_slqwbyte(mask, 16-doffset2);
68 mask3 = (vec_uchar16)spu_cmpgt(spu_splats((unsigned int)(doffset1 + n)), 15);
69
70 vDst = (vec_uchar16 *)(dest);
71
72 delta = (int)soffset1 - (int)doffset1;
73
74 /* The follow check only works if the SPU addresses are not
75 * wrapped. No provisions have been made to correct for this
76 * limitation.
77 */
78 if (((unsigned int)dest - (unsigned int)src) >= (unsigned int)n) {
79 /* Forward copy. Perform a memcpy.
80 *
81 * Handle any leading destination partial quadwords as
82 * well a very short copy (ie, such that the n characters
83 * all reside in a single (destination) quadword.
84 */
85 vSrc = (vec_uchar16 *)(src);
86 vDst = (vec_uchar16 *)(dest);
87
88 /* Handle any leading destination partial quadwords as
89 * well a very short copy (ie, such that the n characters
90 * all reside in a single (destination) quadword.
91 */
92 soffset1 = (unsigned int)(src) & 15;
93 doffset1 = (unsigned int)(dest) & 15;
94 doffset2 = ((unsigned int)(dest) + n) & 15;
95
96 /* Compute a shuffle pattern used to align the source string
97 * with the alignment of the destination string.
98 */
99
100 adjust = (int)spu_extract(spu_cmpgt(spu_promote(doffset1, 0), spu_promote(soffset1, 0)), 0);
101 delta = (int)soffset1 - (int)doffset1;
102 delta += adjust & 16;
103
104 shuffle = (vec_uchar16)spu_add((vec_uint4)spu_splats((unsigned char)delta),
105 VEC_LITERAL(vec_uint4, 0x00010203, 0x04050607, 0x08090A0B, 0x0C0D0E0F));
106
107 vSrc += adjust;
108
109 sdata1 = *vSrc++;
110 sdata2 = *vSrc++;
111
112 ddata = *vDst;
113 sdata = spu_shuffle(sdata1, sdata2, shuffle);
114
115 /* Construct a series of masks used to data insert. The masks
116 * contain 0 when the destination word is unchanged, 1 when it
117 * must be replaced by source bytes.
118 *
119 * mask1 = mask for leading unchanged bytes
120 * mask2 = mask for trailing unchange bytes
121 * mask3 = mask indicating the more than one qword is being changed.
122 */
123 mask = one;
124 mask1 = spu_rlmaskqwbyte(mask, -doffset1);
125 mask2 = spu_slqwbyte(mask, 16-doffset2);
126 mask3 = (vec_uchar16)spu_cmpgt(spu_splats((unsigned int)(doffset1 + n)), 15);
127
128 *vDst++ = spu_sel(ddata, sdata, spu_and(mask1, spu_or(mask2, mask3)));
129
130 n += doffset1;
131
132 /* Handle complete destination quadwords
133 */
134 while (n > 31) {
135 sdata1 = sdata2;
136 sdata2 = *vSrc++;
137 *vDst++ = spu_shuffle(sdata1, sdata2, shuffle);
138 n -= 16;
139 }
140
141 /* Handle any trailing partial (destination) quadwords
142 */
143 mask = spu_and((vec_uchar16)spu_cmpgt(spu_splats((unsigned int)n), 16), mask2);
144 *vDst = spu_sel(*vDst, spu_shuffle(sdata2, *vSrc, shuffle), mask);
145
146 } else {
147 /* Backward copy.
148 *
149 * Handle any leading destination partial quadwords as
150 * well a very short copy (ie, such that the n characters
151 * all reside in a single (destination) quadword.
152 */
153 vSrc = (vec_uchar16 *)((unsigned int)src + n-1);
154 vDst = (vec_uchar16 *)((unsigned int)dest + n-1);
155
156 /* Handle any leading destination partial quadwords as
157 * well a very short copy (ie, such that the n characters
158 * all reside in a single (destination) quadword.
159 */
160 soffset1 = (unsigned int)(src) & 15;
161 soffset2 = (unsigned int)(vSrc) & 15;
162 doffset1 = (unsigned int)(dest) & 15;
163 doffset2 = (unsigned int)(vDst) & 15;
164
165 /* Compute a shuffle pattern used to align the source string
166 * with the alignment of the destination string.
167 */
168 adjust = (int)spu_extract(spu_cmpgt(spu_promote(soffset2, 0), spu_promote(doffset2, 0)), 0);
169 delta = (int)doffset2 - (int)soffset2;
170 delta += adjust & 16;
171
172 shuffle = (vec_uchar16)spu_sub(VEC_LITERAL(vec_uint4, 0x10111213, 0x14151617, 0x18191A1B, 0x1C1D1E1F),
173 (vec_uint4)spu_splats((unsigned char)delta));
174
175 vSrc -= adjust;
176
177 sdata2 = *vSrc--;
178 sdata1 = *vSrc--;
179
180 ddata = *vDst;
181 sdata = spu_shuffle(sdata1, sdata2, shuffle);
182
183 /* Construct a series of masks used to data insert. The masks
184 * contain 0 when the destination word is unchanged, 1 when it
185 * must be replaced by source bytes.
186 *
187 * mask1 = mask for leading unchanged bytes
188 * mask2 = mask for trailing unchange bytes
189 * mask3 = mask indicating the more than one qword is being changed.
190 */
191 mask = one;
192 mask1 = spu_rlmaskqwbyte(mask, -doffset1);
193 mask2 = spu_slqwbyte(mask, 15-doffset2);
194 mask3 = (vec_uchar16)spu_cmpgt(spu_splats((int)(doffset2 - n)), -2);
195
196 *vDst-- = spu_sel(ddata, sdata, spu_and(mask2, spu_orc(mask1, mask3)));
197
198 n -= doffset2 + 1;
199
200 /* Handle complete destination quadwords
201 */
202 while ((int)n > 15) {
203 sdata2 = sdata1;
204 sdata1 = *vSrc--;
205 *vDst-- = spu_shuffle(sdata1, sdata2, shuffle);
206 n -= 16;
207 }
208
209 /* Handle any trailing partial (destination) quadwords
210 */
211 mask = spu_and((vec_uchar16)spu_cmpgt(spu_splats((int)n), 0), mask1);
212 *vDst = spu_sel(*vDst, spu_shuffle(*vSrc, sdata1, shuffle), mask);
213 }
214 return (dest);
215 }
216
217