1 /*
2  * Copyright (c) 2006-2010 Tensilica Inc. ALL RIGHTS RESERVED.
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a copy
5  * of this software and associated documentation files (the "Software"), to deal
6  * in the Software without restriction, including without limitation the rights
7  * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
8  * copies of the Software, and to permit persons to whom the Software is
9  * furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice shall be included in
12  * all copies or substantial portions of the Software.
13  *
14  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
17  * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
18  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
19  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
20  * THE SOFTWARE.
21  */
22 
23 #ifndef __C6X_COMPAT__H
24 #define __C6X_COMPAT__H
25 
26 /* Unimplemented functions _gmpy, _gmpy4, _xormpy, _lssub, _cmpy, _cmpyr,
27    _cmpyr1, _ddotpl2r, _ddotph2r */
28 
29 
30 typedef long long C6X_COMPAT_LONG40;
31 
32 
33 #define _memd8(a) (*((double*)(a)))
34 #define _memd8_const(a) (*((const double*)(a)))
35 
36 #define _amemd8(a) (*((double*)(a)))
37 #define _amemd8_const(a) (*((const double*)(a)))
38 
39 #define _mem8(a) (*((unsigned long long*)(a)))
40 #define _mem8_const(a) (*((const unsigned long long*)(a)))
41 
42 #define _mem4(a) (*((unsigned*)(a)))
43 #define _mem4_const(a) (*((const unsigned*)(a)))
44 #define _amem4_const(a) (*((const unsigned*)(a)))
45 
46 /*  NOTE: To emulate a C6X properly you should define global variables
47     for your Xtensa with these names.  Some of the emulation routines
48     will set these values.  */
49 
50 extern int _carry;
51 extern int _overflow;
52 
53 //   Utility routines
54 
55 
56 #define TESTBIT(x,n) (((x) >> (n)) & 1)
57 
58 #define NSA_BITS 32
59 
norm_shift_amt_U_and_non_U(int is_signed,int inp)60 static inline unsigned int norm_shift_amt_U_and_non_U(int is_signed, int inp) {
61 int j=0, k=0;
62 int x=inp;
63 if (is_signed) {
64     /* Invert signed val if negative */
65     x= TESTBIT(x,(NSA_BITS-1))? ~x: x;
66     x= (x&1)|(x<<1);		/* Shift up to return count-1 */
67     if (x ==0)
68       return NSA_BITS-1;
69   }
70   if (x ==0)
71     return NSA_BITS;
72   /* Now count leading zeros */
73   for (j=0, k=NSA_BITS-1; k>=0; j++, k--) {
74     if (TESTBIT(x,k))
75       return j;
76   }
77   return NSA_BITS;
78 }
79 
80 
81 
82 static inline long long
orig_L40_set(long long L40_var1)83 orig_L40_set( long long L40_var1) {
84    long long L40_var_out;
85 
86    L40_var_out =  L40_var1 & 0x000000ffffffffffLL;
87 
88    if( L40_var1 & 0x8000000000LL)
89       L40_var_out = L40_var_out | 0xffffff0000000000LL;
90 
91    return( L40_var_out);
92 }
93 
94 
95 
96 static inline signed long long
util_saturate_n_no_state(signed long long t,int n)97 util_saturate_n_no_state(signed long long t, int n)
98 {
99   signed long long maxv, minv;
100   maxv = (1LL << (n-1)) - 1;
101   minv = (-1LL << (n-1));
102   if (t > maxv) {
103     t = maxv;
104   } else if (t < minv) {
105     t = minv;
106   }
107   return t;
108 }
109 
110 
111 static inline signed long long
util_saturate_n_sgn(signed long long t,int n)112 util_saturate_n_sgn(signed long long t, int n)
113 {
114   signed long long result;
115   signed long long maxv, minv;
116   maxv = (1LL << (n-1)) - 1;
117   minv = (-1LL << (n-1));
118   if (t > 0) {
119     result = maxv;
120     _overflow = 1;
121   } else if (t < 0) {
122     result = minv;
123     _overflow = 1;
124   } else {
125     result = 0;
126   }
127   return result;
128 }
129 
130 
131 
132 
133 /* well-behaved signed shift right (left on negative) with
134    saturation */
135 static inline signed long long
util_shift_right_saturate_n(signed long long t,int shval,int n)136 util_shift_right_saturate_n(signed long long t, int shval, int n)
137 {
138   /* n should be <= 62 */
139   long long result;
140 
141   signed long long mask;
142   int actual_shift = shval;
143   long long shft = actual_shift > 0 ? actual_shift : -actual_shift;
144 
145   if (t == 0 || actual_shift == 0)
146     return t;
147 
148   if (actual_shift >= n) {
149     return (t < 0) ? -1 : 0;
150   }
151   if (actual_shift <= -n) {
152     return util_saturate_n_sgn(t, n);
153   }
154   if (actual_shift > 0) {
155     return t >> actual_shift;
156   }
157   /* actual_shift < 0. Check for saturation after shift. */
158   mask = (-1LL << (n-shft-1));
159   if (t > 0 && ((mask & t) != 0)) {
160     return util_saturate_n_sgn(t, n);
161   }
162   if (t < 0 && ((mask & t) != mask)) {
163     return util_saturate_n_sgn(t, n);
164   }
165   result = t << shft;
166 
167   return result;
168 }
169 
170 
171 /* Implemented c6x standard C compatibility functions (alphabetical
172    order) */
173 
174 
_abs(int src1)175 static inline int _abs(int src1) {
176 	if ((unsigned) src1 == (unsigned) 0x80000000) {
177 		return 0x7fffffff;
178 	}
179 	return abs(src1);
180 }
181 
182 
_abs2(int src1)183 static inline int _abs2(int src1) {
184 	short s1[2],r[2];
185 	int result;
186 	*((int*)s1) = src1;
187 	if ((unsigned short) s1[1] == (unsigned short) 0x8000) r[1] = 0x7fff;
188 	   else r[1] = abs(s1[1]);
189 	if ((unsigned short) s1[0] == (unsigned short) 0x8000) r[0] = 0x7fff;
190 	   else r[0] = abs(s1[0]);
191 	result = *(int*)r;
192 	return result;
193 	}
194 
195 
196 
197 
_add2(int src1,int src2)198 static inline int _add2(int src1, int src2) {
199 	short s1[2], s2[2], r[2];
200 	int result;
201 	*((int*)s1) = src1;
202 	*((int*)s2) = src2;
203 	r[0] = s1[0] + s2[0];
204 	r[1] = s1[1] + s2[1];
205 	result = *(int*)r;
206 	return result;
207 }
208 
_add4(int src1,int src2)209 static inline int _add4(int src1, int src2) {
210 	char c1[4], c2[4], r[4];
211 	int result;
212 	*((int*)c1) = src1;
213 	*((int*)c2) = src2;
214 	r[0] = c1[0] + c2[0];
215 	r[1] = c1[1] + c2[1];
216 	r[2] = c1[2] + c2[2];
217 	r[3] = c1[3] + c2[3];
218 	result = *(int*)r;
219 	return result;
220 }
221 
222 
223 
_addsub(unsigned int src1,unsigned int src2)224 static inline long long _addsub(unsigned int src1, unsigned int src2)
225 {
226 
227   int res_lo;
228   int res_hi;
229 
230   res_hi = src1+src2;
231   res_lo = src1-src2;
232   return (((unsigned long long) res_hi) << 32) | ((unsigned int) res_lo) ;
233 }
234 
235 
_addsub2(unsigned int src1,unsigned int src2)236 static inline long long _addsub2(unsigned int src1, unsigned int src2)
237 {
238   short s1[2], s2[2], ra[2], rs[2];
239   int res_lo;
240   int res_hi;
241 
242   *((int*)s1) = src1;
243   *((int*)s2) = src2;
244   ra[0] = s1[0] + s2[0];
245   ra[1] = s1[1] + s2[1];
246   rs[0] = s1[0] - s2[0];
247   rs[1] = s1[1] - s2[1];
248 
249   res_hi = *(int*)ra;
250   res_lo = *(int*)rs;
251   return (((unsigned long long) res_hi) << 32) | ((unsigned int) res_lo) ;
252 }
253 
254 
_avg2(int src1,int src2)255 static inline int _avg2(int src1, int src2) {
256   int low = (((int)1 +  (short) src1 + (short) src2) >> 1) & 0XFFFF;
257   int high1 = src1 >> 16;
258   int high2 = src2 >> 16;
259   int high = ((high1 + high2 + 1) >> 1)<< 16;
260   return high | low;
261 }
262 
263 
264 
_avgu4(unsigned int src1,unsigned int src2)265 static inline unsigned int _avgu4(unsigned int src1, unsigned int src2) {
266 unsigned int res0 = ((src1 & 0xFF) + (src2 & 0xFF) + 1) >> 1;
267   unsigned int res1 = (((src1 & 0xFF00) >> 8) + ((src2 & 0xFF00) >> 8) + 1) >> 1;
268   unsigned int res2 = (((src1 & 0xFF0000) >> 16) + ((src2 & 0xFF0000) >> 16) + 1) >> 1;
269   unsigned int res3 = (((src1 & 0xFF000000) >> 24) + ((src2 & 0xFF000000) >> 24) + 1) >> 1;
270   return (res3 << 24) | (res2 << 16) | (res1 << 8) | res0;
271 }
272 
273 
TEN_popc(unsigned char b)274 static inline int TEN_popc (unsigned char b)
275 {
276   int i, result = 0;
277   for (i = 0; i <  8; i++){
278     if (b & 0x1)
279       result++;
280     b >>= 1;
281   }
282   return result;
283 }
284 
_bitc4(unsigned int src1)285 static inline unsigned int _bitc4(unsigned int src1)
286 {
287   unsigned int res0 = TEN_popc(src1 & 0xFF);
288   unsigned int res1 = TEN_popc((src1 & 0xFF00) >> 8);
289   unsigned int res2 = TEN_popc((src1 & 0xFF0000) >> 16);
290   unsigned int res3 = TEN_popc((src1 & 0xFF000000) >> 24);
291   return (res3 << 24) | (res2 << 16) | (res1 << 8) | res0;
292 }
293 
_bitr(unsigned int src)294 static inline unsigned int _bitr(unsigned int src) {
295 	int i;
296 	unsigned r = 0;
297 	for (i = 0; i< 32; ++i) {
298 		r = r | (((src >> i) & 1)<<(31-i));
299 	}
300 	return r;
301 }
302 
303 
_clr(unsigned int src2,int csta,int cstb)304 static inline unsigned int _clr(unsigned int src2,  int csta,  int cstb)
305 {
306   csta &= 0x1f;
307   cstb &= 0x1f;
308   if (csta > cstb)
309     return src2;
310   else {
311     unsigned int mask =  (((1 <<  (cstb -  csta)) <<  1) -  1) <<  csta;
312     return src2 & (~mask);
313   }
314 }
315 
_clrr(unsigned int src2,int src1)316 static inline unsigned int _clrr(unsigned int src2, int src1)
317 {
318   unsigned int csta = (src1 >> 5) & 0x1f;
319   unsigned int cstb = src1 & 0x1f;
320   if (csta > cstb)
321     return src2;
322   else {
323     unsigned int mask =  (((1 <<  (cstb -  csta)) <<  1) -  1) <<  csta;
324     return src2 & (~mask);
325   }
326 }
327 
328 
329 
330 
_cmpeq2(int src1,int src2)331 static inline int _cmpeq2(int src1, int src2) {
332 	short s1[2], s2[2];
333 	int r0, r1;
334 	int result;
335 	*((int*)s1) = src1;
336 	*((int*)s2) = src2;
337 	r0 = s1[0] == s2[0] ? 1 : 0;
338 	r1 = s1[1] == s2[1] ? 1 : 0;
339 	result = (r1 << 1) | r0;
340 	return result;
341 }
342 
_cmpeq4(int src1,int src2)343 static inline int _cmpeq4(int src1, int src2) {
344 	char s1[4], s2[4];
345 	int r0, r1, r2, r3;
346 	int result;
347 	*((int*)s1) = src1;
348 	*((int*)s2) = src2;
349 	r0 = s1[0] == s2[0] ? 1 : 0;
350 	r1 = s1[1] == s2[1] ? 1 : 0;
351 	r2 = s1[2] == s2[2] ? 1 : 0;
352 	r3 = s1[3] == s2[3] ? 1 : 0;
353 	result = (r3 << 3) | (r2 << 2) | (r1 << 1) | r0;
354 	return result;
355 }
356 
357 
_cmpgt2(int src1,int src2)358 static inline int _cmpgt2(int src1, int src2) {
359 	short s1[2], s2[2];
360 	int r1, r0;
361 	int result;
362 	*((int*)s1) = src1;
363 	*((int*)s2) = src2;
364 	r0 = s1[0] > s2[0] ? 1 : 0;
365 	r1 = s1[1] > s2[1] ? 1 : 0;
366 	result = (r1<<1) | r0;
367 	return result;
368 }
369 
370 
_cmpgtu4(unsigned int src1,unsigned int src2)371 static inline unsigned int _cmpgtu4(unsigned int src1, unsigned int src2) {
372   unsigned int s1_0 = (src1 & 0xFF);
373   unsigned int s1_1 = (src1 & 0xFF00) >> 8;
374   unsigned int s1_2 = (src1 & 0xFF0000) >> 16;
375   unsigned int s1_3 = (src1 & 0xFF000000) >> 24;
376 
377   unsigned int s2_0 = (src2 & 0xFF);
378   unsigned int s2_1 = (src2 & 0xFF00) >> 8;
379   unsigned int s2_2 = (src2 & 0xFF0000) >> 16;
380   unsigned int s2_3 = (src2 & 0xFF000000) >> 24;
381 
382   unsigned int result = 0;
383 
384   if (s1_0 > s2_0)
385     result |= 0x1;
386 
387   if (s1_1 > s2_1)
388     result |= 0x2;
389 
390   if (s1_2 > s2_2)
391     result |= 0x4;
392 
393   if (s1_3 > s2_3)
394     result |= 0x8;
395 
396   return result;
397 }
398 
399 
400 
401 
_ddotp4(unsigned int src1,unsigned int src2)402 static inline long long _ddotp4(unsigned int src1, unsigned int src2) {
403   unsigned int res0, res1;
404   short s1_0 = (src1 & 0xffff);
405   short s1_1 = (src1 & 0xfff0000) >> 16;
406 
407   unsigned short s2_0 = (src2 & 0xff);
408   unsigned short s2_1 = (src2 & 0xff00) >> 8;
409   unsigned short s2_2 = (src2 & 0xff0000) >> 16;
410   unsigned short s2_3 = (src2 & 0xff000000) >> 24;
411 
412   res0 = ((int)s1_0) * s2_0 + ((int)s1_1) * s2_1;
413   res1 = ((int)s1_0) * s2_2 + ((int)s1_1) * s2_3;
414 
415   return (res1 << 16) | res0;
416 }
417 
418 
_ddotph2(long long src1_o_src1_e,unsigned int src2)419 static inline long long _ddotph2(long long src1_o_src1_e, unsigned int src2)
420 {
421 
422   unsigned int src1_o = src1_o_src1_e >> 32;
423   unsigned int src1_e = src1_o_src1_e & 0xFFFFFFFF;
424   short ls1_o = src1_o & 0XFFFF;
425   short hs1_o = src1_o >> 16;
426 //  short ls1_e = src1_e & 0XFFFF;
427   short hs1_e = src1_e >> 16;
428   short ls2 = src2 & 0XFFFF;
429   short hs2 = src2 >> 16;
430 
431   unsigned long long res_hi = ls2 * ls1_o + hs2 * hs1_o;
432   unsigned int res_lo = ls1_o * hs2 + hs1_e * ls2;
433   return (res_hi << 32) | res_lo;
434 }
435 
436 
_ddotpl2(long long src1_o_src1_e,unsigned int src2)437 static inline long long _ddotpl2(long long src1_o_src1_e, unsigned int src2)
438 {
439   unsigned int src1_o = src1_o_src1_e >> 32;
440   unsigned int src1_e = src1_o_src1_e & 0xFFFFFFFF;
441   short ls1_o = src1_o & 0XFFFF;
442 //  short hs1_o = src1_o >> 16;
443   short ls1_e = src1_e & 0XFFFF;
444   short hs1_e = src1_e >> 16;
445   short ls2 = src2 & 0XFFFF;
446   short hs2 = src2 >> 16;
447 
448   unsigned long long res_hi = ls2 * hs1_e + hs2 * ls1_o;
449   unsigned res_lo = hs1_e * hs2 + ls1_e * ls2;
450   return (res_hi << 32) | res_lo;
451 }
452 
453 
_deal(unsigned int src)454 static inline unsigned int _deal(unsigned int src)
455 {
456   int i;
457   unsigned short lo = 0, hi = 0;
458   for (i = 0; i < 32; i+= 2) {
459     lo >>= 1;
460     lo |= (src & 0x1) << 15;
461     src >>= 1;
462     hi >>= 1;
463     hi |= (src & 0x1) << 15;
464     src >>= 1;
465   }
466   return (hi << 16) | lo;
467 }
468 
469 
_dmv(unsigned int src1,unsigned int src2)470 static inline long long _dmv(unsigned int src1, unsigned int src2)
471 {
472   return (((long long) src1) << 32) | src2;
473 }
474 
475 
_dotpn2(int src1,int src2)476 static inline int _dotpn2(int src1, int src2) {
477 short int s1_h = src1>>16;
478 	short int s1_l = src1;
479 	short int s2_h = src2>>16;
480 	short int s2_l = src2;
481 	return s1_h * s2_h - s1_l * s2_l;
482 }
483 
484 
_dotp2(int src1,int src2)485 static inline int _dotp2(int src1, int src2) {
486 	short int s1_h = src1>>16;
487 	short int s1_l = src1;
488 	short int s2_h = src2>>16;
489 	short int s2_l = src2;
490 	return s1_h * s2_h + s1_l * s2_l;
491 }
492 
493 
494 
_dotpnrsu2(int src1,unsigned int src2)495 static inline int _dotpnrsu2(int src1, unsigned int src2)
496 {
497   short ls1 = src1 & 0XFFFF;
498   unsigned short ls2 = src2 & 0XFFFF;
499   short hs1 = src1 >> 16;
500   unsigned short hs2 = src2 >> 16;
501 
502   int result = (((long long) (int)(hs1 * hs2)) - ((long long) (int)(ls1 * ls2)) +  (1 << 15)) >> 16;
503   return result;
504 }
505 
506 
507 
_dotprsu2(int src1,unsigned int src2)508 static inline int _dotprsu2(int src1, unsigned int src2) {
509   short ls1 = src1 & 0XFFFF;
510   unsigned short ls2 =  (src2 & 0XFFFF);
511   short hs1 = src1 >> 16;
512   unsigned short hs2 =  (src2 >> 16);
513 
514   int result = (((long long) (int) (ls1 * ls2)) + ((long long) (int) (hs1 * hs2)) +  (1LL << 15)) >> 16;
515   return result;
516 }
517 
518 
519 
520 
521 
522 
523 
_dotpsu4(int src1,unsigned int src2)524 static inline int _dotpsu4(int src1, unsigned int src2) {
525   int result;
526   signed char s1_0 = (src1 & 0xff);
527   signed char s1_1 = (src1 & 0xff00) >> 8;
528   signed char s1_2 = (src1 & 0xff0000) >> 16;
529   signed char s1_3 = (src1 & 0xff000000) >> 24;
530 
531   unsigned int s2_0 = (src2 & 0xff);
532   unsigned int s2_1 = (src2 & 0xff00) >> 8;
533   unsigned int s2_2 = (src2 & 0xff0000) >> 16;
534   unsigned int s2_3 = (src2 & 0xff000000) >> 24;
535 
536   result = s1_0 * s2_0 + s1_1 * s2_1 + s1_2 * s2_2 + s1_3 * s2_3;
537   return result;
538 }
539 
540 
_dotpu4(unsigned int src1,unsigned int src2)541 static inline unsigned int _dotpu4(unsigned int src1, unsigned int src2) {
542 	unsigned char v1_0 = src1 & 0xff;
543 	unsigned char v1_1 = (src1>>8) & 0xff;
544 	unsigned char v1_2 = (src1>>16) & 0xff;
545 	unsigned char v1_3 = (src1>>24) & 0xff;
546 
547 	unsigned char v2_0 = src2 & 0xff;
548 	unsigned char v2_1 = (src2>>8) & 0xff;
549 	unsigned char v2_2 = (src2>>16) & 0xff;
550 	unsigned char v2_3 = (src2>>24) & 0xff;
551 
552 	unsigned v = v1_0 * v2_0  + v1_1 * v2_1 + v1_2 * v2_2 + v1_3 * v2_3;
553 	return v;
554 }
555 
556 
_dpack2(unsigned int src1,unsigned int src2)557 static inline long long _dpack2(unsigned int src1, unsigned int src2){
558 unsigned short s1[2], s2[2];
559 *((int*)s1) = src1;
560 *((int*)s2) = src2;
561 return ((unsigned long long) s1[1] << 48) | ((unsigned long long) s2[1] << 32) | ((unsigned long long) s1[0] << 16) | ((unsigned long long) s2[0]);
562 }
563 
564 
_dpackx2(unsigned int src1,unsigned int src2)565 static inline long long _dpackx2(unsigned int src1, unsigned int src2){
566 unsigned short s1[2], s2[2];
567 *((int*)s1) = src1;
568 *((int*)s2) = src2;
569 return ((unsigned long long) s2[0] << 48) | ((unsigned long long) s1[1] << 32) | ((unsigned long long) s1[0] << 16) | ((unsigned long long) s2[1]);
570 }
571 
_ext(int src2,unsigned int csta,unsigned int cstb)572 static inline int _ext(int src2, unsigned int csta, unsigned int cstb)
573 {
574   return (src2 << csta) >> cstb;
575 }
576 
_extr(int src2,int src1)577 static inline int _extr(int src2, int src1)
578 {
579   unsigned int csta = (src1 >> 5) & 0x1f;
580   unsigned int cstb = src1 & 0x1f;
581   return (src2 << csta) >> cstb;
582 }
583 
_extu(unsigned int src2,unsigned int csta,unsigned int cstb)584 static inline unsigned int _extu(unsigned int src2, unsigned int csta, unsigned int cstb)
585 {
586   return (src2 << csta) >> cstb;
587 }
588 
_extur(unsigned int src2,int src1)589 static inline unsigned int _extur(unsigned int src2, int src1)
590 {
591   unsigned int csta = (src1 >> 5) & 0x1f;
592   unsigned int cstb = src1 & 0x1f;
593   return (src2 << csta) >> cstb;
594 }
595 
596 
_hi(double src)597 static inline unsigned long long _hi(double src) {
598 	unsigned long long v;
599 	*(double*)&v = src;
600 	return v>>32;
601 }
602 
_hill(long long src)603 static inline unsigned int _hill (long long src)
604 {
605   return (unsigned int) (src >> 32);
606 }
607 
608 
609 
_itod(unsigned hi,unsigned lo)610 static inline double _itod(unsigned hi, unsigned lo) {
611 	double v;
612 	unsigned long long ll = ((((unsigned long long)(hi))<<32) | (unsigned long long)((unsigned)lo));
613 	*((unsigned long long *)&v) = ll;
614 	return v;
615 }
616 
617 
_itoll(unsigned int src2,unsigned int src1)618 static inline long long _itoll(unsigned int src2, unsigned int src1)
619 {
620   return (((long long) src2) << 32) | src1;
621 }
622 
623 
_labs(C6X_COMPAT_LONG40 src2)624 static inline C6X_COMPAT_LONG40 _labs(C6X_COMPAT_LONG40 src2)
625 {
626   long long maxv = (1LL << (40 -1)) - 1;
627   long long minv = (-1LL << (40 - 1));
628   C6X_COMPAT_LONG40 lres =  orig_L40_set(src2);
629 
630   lres = lres < 0 ? -lres : lres;
631   if (lres > maxv) lres = maxv;
632   else if (lres < minv) lres = minv;
633 
634   return lres;
635 }
636 
637 
_ldotp2(int src1,int src2)638 static inline C6X_COMPAT_LONG40 _ldotp2(int src1, int src2) {
639 return (C6X_COMPAT_LONG40) _dotp2(src1, src2);
640 }
641 
642 
_lmbd(unsigned int src1,unsigned int src2)643 static inline unsigned int _lmbd(unsigned int src1, unsigned int src2)
644 {
645   return norm_shift_amt_U_and_non_U(0,(((int) (src1 << 31)) >> 31) ^ (~src2));
646 }
647 
648 
_lnorm(C6X_COMPAT_LONG40 src2)649 static inline unsigned int _lnorm(C6X_COMPAT_LONG40 src2) {
650 if (src2 == 0)
651     return 39;
652   else {
653     int hi = (int)(src2 >> 32);
654     int lo = (int)src2;
655 
656 
657     long long temp = (unsigned long long)(unsigned)lo | (unsigned long long)hi << 32;
658     temp = orig_L40_set(temp);
659 
660     if (temp == 0) return 0;
661     int cnt = 0;
662     while (((temp >> 39) & 1) == ((temp >> 38) & 1)) {
663        temp <<= 1;
664        cnt++;
665        }
666   return cnt;
667   }
668 }
669 
670 
_lo(double src)671 static inline unsigned long long _lo(double src) {
672 	unsigned long long v;
673 	*(double*)&v = src;
674 	return v;
675 }
676 
677 
_loll(long long src)678 static inline unsigned int _loll (long long src)
679 {
680   return (unsigned int) src;
681 }
682 
683 
_lsadd(int src1,C6X_COMPAT_LONG40 src2)684 static inline C6X_COMPAT_LONG40 _lsadd(int src1, C6X_COMPAT_LONG40 src2)
685 {
686   long long maxv = (1LL << (40 -1)) - 1;
687   long long minv = (-1LL << (40 - 1));
688   int hi = (int)(src2 >> 32);
689   int lo = (int)src2;
690   long long src2_int =  (unsigned long long)(unsigned)lo | (unsigned long long)hi << 32;
691 
692 
693   long long src2_int2 =  orig_L40_set(src2_int);
694 
695   long long res = src1 + src2_int2;
696 
697   if (res > maxv) {
698 	res = maxv;
699 	_overflow = 1;
700 	}
701   else if (res < minv) {
702 	res = minv;
703 	_overflow = 1;
704 	}
705 
706   long long res2 = orig_L40_set(res);
707 
708   res2 = (signed char)(res2 >> 32);
709 
710   C6X_COMPAT_LONG40 lres = (((C6X_COMPAT_LONG40) res2) << 32) | ((unsigned int)res);
711   return lres;
712 }
713 
714 
715 
_max2(int src1,int src2)716 static inline int _max2 (int src1, int src2) {
717 	short s1[2], s2[2], r[2];
718 	int result;
719 	*((int*)s1) = src1;
720 	*((int*)s2) = src2;
721 	r[0] = s1[0] > s2[0] ? s1[0] : s2[0];
722 	r[1] = s1[1] > s2[1] ? s1[1] : s2[1];
723 	result = *(int*)r;
724 	return result;
725 }
726 
727 
728 
729 
730 
731 
_maxu4(unsigned int src1,unsigned int src2)732 static inline unsigned int _maxu4(unsigned int src1, unsigned int src2) {
733   unsigned int res0, res1, res2, res3;
734   unsigned int s1_0 = res0 = (src1 & 0xFF);
735   unsigned int s1_1 = res1 = (src1 & 0xFF00) >> 8;
736   unsigned int s1_2 = res2 = (src1 & 0xFF0000) >> 16;
737   unsigned int s1_3 = res3 = (src1 & 0xFF000000) >> 24;
738 
739   unsigned int s2_0 = (src2 & 0xFF);
740   unsigned int s2_1 = (src2 & 0xFF00) >> 8;
741   unsigned int s2_2 = (src2 & 0xFF0000) >> 16;
742   unsigned int s2_3 = (src2 & 0xFF000000) >> 24;
743 
744 //  unsigned int res = 0;
745 
746   if (s1_0 < s2_0)
747     res0 = s2_0;
748 
749   if (s1_1 < s2_1)
750     res1 = s2_1;
751 
752   if (s1_2 < s2_2)
753     res2 = s2_2;
754 
755   if (s1_3 < s2_3)
756     res3 = s2_3;
757 
758   return (res3 << 24) | (res2 << 16) | (res1 << 8) | res0;
759 
760 
761 }
762 
_min2(int src1,int src2)763 static inline int _min2(int src1, int src2) {
764 	short s1[2], s2[2], r[2];
765 	int result;
766 	*((int*)s1) = src1;
767 	*((int*)s2) = src2;
768 	r[0] = s1[0] < s2[0] ? s1[0] : s2[0];
769 	r[1] = s1[1] < s2[1] ? s1[1] : s2[1];
770 	result = *(int*)r;
771 	return result;
772 }
773 
774 
_minu4(unsigned int src1,unsigned int src2)775 static inline unsigned int _minu4(unsigned int src1, unsigned int src2) {
776 unsigned int res0, res1, res2, res3;
777   unsigned int s1_0 = res0 = (src1 & 0xFF);
778   unsigned int s1_1 = res1 = (src1 & 0xFF00) >> 8;
779   unsigned int s1_2 = res2 = (src1 & 0xFF0000) >> 16;
780   unsigned int s1_3 = res3 = (src1 & 0xFF000000) >> 24;
781 
782   unsigned int s2_0 = (src2 & 0xFF);
783   unsigned int s2_1 = (src2 & 0xFF00) >> 8;
784   unsigned int s2_2 = (src2 & 0xFF0000) >> 16;
785   unsigned int s2_3 = (src2 & 0xFF000000) >> 24;
786 
787 //  unsigned int res = 0;
788 
789   if (s1_0 > s2_0)
790     res0 = s2_0;
791 
792   if (s1_1 > s2_1)
793     res1 = s2_1;
794 
795   if (s1_2 > s2_2)
796     res2 = s2_2;
797 
798   if (s1_3 > s2_3)
799     res3 = s2_3;
800 
801   return (res3 << 24) | (res2 << 16) | (res1 << 8) | res0;
802 }
803 
804 
_mpy(int src1,int src2)805 static inline int _mpy(int src1, int src2) {
806 return (short) src1 * (short) src2;
807 }
808 
809 
_mpyh(int src1,int src2)810 static inline int _mpyh(int src1, int src2) {
811 return (short) (src1 >> 16) * (short) (src2 >> 16);
812 }
813 
814 
_mpyhill(int src1,int src2)815 static inline long long _mpyhill (int src1,  int src2)
816 {
817   short s1 = src1 >> 16;
818   return ((long long) src2) * s1;
819 }
820 
_mpyhir(int src1,int src2)821 static inline int _mpyhir(int src1, int src2)
822 {
823   short s1 = src1 >> 16;
824   long long result = ((long long) src2) * s1 + (1 << 14);
825   result >>= 15;
826   return result;
827 }
828 
829 
_mpyhl(int src1,int src2)830 static inline int _mpyhl(int src1, int src2) {
831 return (short) (src1 >> 16) * (short) (src2);
832 }
833 
_mpyhlu(unsigned int src1,unsigned int src2)834 static inline unsigned int _mpyhlu(unsigned int src1, unsigned int src2) {
835 return (unsigned short) (src1 >> 16) * (unsigned short) (src2);
836 }
837 
_mpyhslu(int src1,unsigned int src2)838 static inline int _mpyhslu(int src1, unsigned int src2) {
839 return (short) (src1 >> 16) * (unsigned short) src2;
840 }
841 
842 
_mpyhsu(int src1,unsigned int src2)843 static inline int _mpyhsu(int src1, unsigned int src2) {
844 return (short) (src1 >>16) * (unsigned short) (src2 >>16);
845 }
846 
847 
_mpyhu(unsigned int src1,unsigned int src2)848 static inline unsigned int _mpyhu(unsigned int src1, unsigned int src2) {
849 return (unsigned short) (src1 >>16) * (unsigned short) (src2 >> 16);
850 }
851 
852 
_mpyhuls(unsigned int src1,int src2)853 static inline int _mpyhuls(unsigned int src1, int src2) {
854 return (unsigned short) (src1 >>16) * (signed short) (src2);
855 }
856 
857 
_mpyhus(unsigned int src1,int src2)858 static inline int _mpyhus(unsigned int src1, int src2) {
859 return (unsigned short) (src1 >> 16) * (short) (src2 >>16);
860 }
861 
862 
863 
_mpyidll(int src1,int src2)864 static inline long long _mpyidll (int src1, int src2)
865 {
866   return (long long) src1 * src2;
867 }
868 
869 
_mpylh(int src1,int src2)870 static inline int _mpylh(int src1, int src2) {
871 return (signed short) (src1 & 0xffff) * (signed short) (src2 >> 16);
872 }
873 
_mpylhu(unsigned int src1,unsigned int src2)874 static inline unsigned int _mpylhu(unsigned int src1, unsigned int src2) {
875 return (unsigned short) src1 * (unsigned short) (src2 >> 16);
876 }
877 
878 
_mpylill(int src1,int src2)879 static inline long long _mpylill (int src1,  int src2)
880 {
881   return ((long long) src2) * ((short)src1);
882 }
883 
884 
885 
_mpylir(int src1,int src2)886 static inline int _mpylir(int src1, int src2)
887 {
888   short s1 = src1;
889   long long result = ((long long) src2) * s1 + (1 << 14);
890   result >>= 15;
891   return result;
892 }
893 
894 
_mpylshu(int src1,unsigned int src2)895 static inline int _mpylshu(int src1, unsigned int src2) {
896 return (short) src1 * (unsigned short) (src2 >> 16);
897 }
898 
899 
_mpyluhs(unsigned int src1,int src2)900 static inline int _mpyluhs(unsigned int src1, int src2) {
901 return (unsigned short) src1 * (short) (src2 >> 16);
902 }
903 
904 
905 
_mpysu(int src1,unsigned int src2)906 static inline int _mpysu(int src1, unsigned int src2) {
907 return (short) src1 * (unsigned short) src2;
908 }
909 
910 
911 
_mpysu4ll(int src1,unsigned int src2)912 static inline long long _mpysu4ll (int src1,  unsigned int src2) {
913   unsigned short res0, res1, res2, res3;
914   signed char s1_0 = (src1 & 0xff);
915   signed char s1_1 = (src1 & 0xff00) >> 8;
916   signed char s1_2 = (src1 & 0xff0000) >> 16;
917   signed char s1_3 = (src1 & 0xff000000) >> 24;
918 
919   unsigned short s2_0 = (src2 & 0xff);
920   unsigned short s2_1 = (src2 & 0xff00) >> 8;
921   unsigned short s2_2 = (src2 & 0xff0000) >> 16;
922   unsigned short s2_3 = (src2 & 0xff000000) >> 24;
923 
924   res0 = s1_0 * s2_0;
925   res1 = s1_1 * s2_1;
926   res2 = s1_2 * s2_2;
927   res3 = s1_3 * s2_3;
928 
929   return (((unsigned long long) res3) << 48)
930     | (((unsigned long long) res2) << 32)
931     | (((unsigned long long) res1) << 16)
932     | res0;
933 }
934 
_mpyu(unsigned int src1,unsigned int src2)935 static inline unsigned int _mpyu(unsigned int src1, unsigned int src2) {
936 	unsigned v = (unsigned short)src1 * (unsigned short)src2;
937 	return v;
938 }
939 
_mpyus(unsigned int src1,int src2)940 static inline int _mpyus(unsigned int src1, int src2) {
941 return (unsigned short) src1 * (short) src2;
942 }
943 
_mpyu4ll(unsigned int src1,unsigned int src2)944 static inline long long _mpyu4ll (unsigned int src1,  unsigned int src2) {
945   unsigned short res0, res1, res2, res3;
946   unsigned char s1_0 = (src1 & 0xff);
947   unsigned char s1_1 = (src1 & 0xff00) >> 8;
948   unsigned char s1_2 = (src1 & 0xff0000) >> 16;
949   unsigned char s1_3 = (src1 & 0xff000000) >> 24;
950 
951   unsigned short s2_0 = (src2 & 0xff);
952   unsigned short s2_1 = (src2 & 0xff00) >> 8;
953   unsigned short s2_2 = (src2 & 0xff0000) >> 16;
954   unsigned short s2_3 = (src2 & 0xff000000) >> 24;
955 
956   res0 = s1_0 * s2_0;
957   res1 = s1_1 * s2_1;
958   res2 = s1_2 * s2_2;
959   res3 = s1_3 * s2_3;
960 
961   return (((unsigned long long) res3) << 48)
962     | (((unsigned long long) res2) << 32)
963     | (((unsigned long long) res1) << 16)
964     | res0;
965 }
966 
967 
_mpy2ir(unsigned int src1,unsigned int src2)968 static inline long long _mpy2ir(unsigned int src1, unsigned int src2)
969 {
970   if ((src1 == 0x8000) && (src2 == 0x80000000)) {
971     _overflow = 1;
972     return 0;
973   }
974   else {
975     short ls1 = src1 & 0xffff;
976     short hs1 = src1 >> 16;
977     unsigned long long hi = (((long long) hs1) * (int) src2 + (1 << 14)) >> 15;
978     unsigned long long lo = ((((long long) ls1) * (int) src2 + (1 << 14)) >> 15) & 0xFFFFFFFF;
979     return (hi << 32) | lo;
980   }
981 }
982 
983 
_mpy2ll(int src1,int src2)984 static inline long long _mpy2ll (int src1,  int src2) {
985   short ls1 = src1 & 0xffff;
986   short hs1 = src1 >> 16;
987   short ls2 = src2 & 0xffff;
988   short hs2 = src2 >> 16;
989 
990   unsigned long long hi = hs1 * hs2;
991   unsigned long long lo = (ls1 * ls2) & 0xFFFFFFFF;
992 
993   return (hi << 32) | lo;
994 
995 }
996 
997 
_mpy32(int src1,int src2)998 static inline int _mpy32(int src1, int src2)
999 {
1000   return src1 * src2;
1001 }
1002 
1003 
_mpy32ll(int src1,int src2)1004 static inline long long _mpy32ll(int src1, int src2)
1005 {
1006   return ((long long) src1) * src2;
1007 }
1008 
_mpy32su(int src1,unsigned int src2)1009 static inline long long _mpy32su(int src1, unsigned int src2)
1010 {
1011   return ((long long) src1) * ((int) src2);
1012 }
1013 
_mpy32u(unsigned int src1,unsigned int src2)1014 static inline long long _mpy32u(unsigned int src1, unsigned int src2)
1015 {
1016   return ((long long) ((int) src1)) * ((long long) ((int) src2));
1017 }
1018 
_mpy32us(unsigned int src1,int src2)1019 static inline long long _mpy32us(unsigned int src1, int src2)
1020 {
1021   return ((int) src1) * ((long long) src2);
1022 }
1023 
_mvd(int src2)1024 static inline int _mvd (int src2)
1025 {
1026   return src2;
1027 }
1028 
1029 
_norm(int src2)1030 static inline unsigned int _norm(int src2)
1031 {
1032   return norm_shift_amt_U_and_non_U(1,src2);
1033 }
1034 
1035 
_pack2(unsigned int src1,unsigned int src2)1036 static inline unsigned int _pack2 (unsigned int src1, unsigned int src2) {
1037 	short s1[2], s2[2], r[2];
1038 	int result;
1039 	*((int*)s1) = src1;
1040 	*((int*)s2) = src2;
1041 	r[0] = s2[0];
1042 	r[1] = s1[0];
1043 	result = *(int*)r;
1044 	return result;
1045 }
1046 
1047 
_packh2(unsigned int src1,unsigned int src2)1048 static inline int _packh2 (unsigned int src1, unsigned int src2) {
1049 	unsigned v0 = src1 & 0xffff0000;
1050 	unsigned v1 = src2 >> 16;
1051 	unsigned v = v0|v1;
1052 	return v;
1053 
1054 }
1055 
_packh4(unsigned int src1,unsigned int src2)1056 static inline unsigned int _packh4 (unsigned int src1, unsigned int src2) {
1057 	unsigned v3 = (src1 >> 24) & 0xff;
1058 	unsigned v2 = (src1 >> 8) & 0xff;
1059 	unsigned v1 = (src2 >> 24) & 0xff;
1060 	unsigned v0 = (src2 >> 8) & 0xff;
1061 	unsigned v = (v3<<24) | (v2<<16) | (v1 << 8) | v0;
1062 	return v;
1063 }
1064 
_packhl2(unsigned int src1,unsigned int src2)1065 static inline unsigned int _packhl2 (unsigned int src1,  unsigned int src2) {
1066 	unsigned v0 = src1 & 0xffff0000;
1067 	unsigned v1 = src2 & 0x0000ffff;
1068 	unsigned v = v0|v1;
1069 	return v;
1070 }
1071 
_packlh2(unsigned int src1,unsigned int src2)1072 static inline unsigned int _packlh2 (unsigned int src1,  unsigned int src2) {
1073 	unsigned v0 = src1 << 16;
1074 	unsigned v1 = (src2 >> 16) & 0xffff;
1075 	unsigned v = v0|v1;
1076 	return v;
1077 }
1078 
1079 
1080 
1081 
_packl4(unsigned int src1,unsigned int src2)1082 static inline unsigned int _packl4 (unsigned int src1, unsigned int src2) {
1083 	unsigned v3 = (src1 >> 16) & 0xff;
1084 	unsigned v2 = (src1) & 0xff;
1085 	unsigned v1 = (src2 >> 16) & 0xff;
1086 	unsigned v0 = (src2) & 0xff;
1087 	unsigned v = (v3<<24) | (v2<<16) | (v1 << 8) | v0;
1088 	return v;
1089 }
1090 
1091 
1092 
1093 
_rpack2(unsigned int src1,unsigned int src2)1094 static inline unsigned int _rpack2 (unsigned int src1, unsigned int src2) {
1095 int s1 = (int) src1;
1096 int s2 = (int) src2;
1097 s1 = util_shift_right_saturate_n (s1, -1, 32);
1098 s2 = util_shift_right_saturate_n (s2, -1, 32);
1099 return (unsigned int) (s1 & 0xffff0000) | (unsigned int) ((s2 & 0xffff0000) >>16);
1100 }
1101 
1102 
_rotl(unsigned int src1,unsigned int src2)1103 static inline unsigned int _rotl (unsigned int src1, unsigned int src2)
1104 {
1105   src2 &= 0x1f;
1106   return (src1 << src2) | (src1 >> (32 - src2));
1107 }
1108 
1109 
_sadd(int src1,int src2)1110 static inline int _sadd(int src1, int src2) {
1111 signed long long res;
1112 signed long long maxv, minv;
1113 maxv = (1LL << (32-1)) - 1;
1114 minv = (-1LL << (32-1));
1115 res = (long long) src1 + (long long) src2;
1116 if (res > maxv) {
1117 	res = maxv;
1118 	_overflow = 1;
1119 	}
1120 else if (res < minv ) {
1121 	res = minv;
1122 	_overflow = 1;
1123 	}
1124 return (int) res;
1125 }
1126 
_saddsub(unsigned int src1,unsigned int src2)1127 static inline long long _saddsub(unsigned int src1, unsigned int src2) {
1128 int radd;
1129 signed long long rsub;
1130 
1131 signed long long maxv, minv;
1132 maxv = (1LL << (32-1)) - 1;
1133 minv = (-1LL << (32-1));
1134 
1135 radd = (int) src1 + (int) src2;
1136 
1137 //   saturate on subtract, not add
1138 
1139 
1140 rsub = (long long) ((int) src1) - (long long) ((int) src2);
1141 if (rsub > maxv) {
1142 	rsub = maxv;
1143         /* NOTE:   TI c6x does NOT set the overflow register even if results saturate */
1144 	/*  _overflow = 1; */
1145 	}
1146 else if (rsub < minv ) {
1147 	rsub = minv;
1148         /* NOTE:   TI c6x does NOT set the overflow register even if results saturate */
1149 	/*  _overflow = 1; */
1150 	}
1151 
1152 return  (((unsigned long long) radd) << 32) |  ( rsub & 0x00000000ffffffff ) ;
1153 }
1154 
1155 
1156 
_saddsub2(unsigned int src1,unsigned int src2)1157 static inline long long _saddsub2(unsigned int src1, unsigned int src2) {
1158 signed int radd[2];
1159 signed int rsub[2];
1160 signed short s1[2], s2[2];
1161 
1162 signed int maxv, minv;
1163 maxv = (1L << (16-1)) - 1;
1164 minv = (-1L << (16-1));
1165 
1166 *((int*)s1) = src1;
1167 *((int*)s2) = src2;
1168 
1169 radd[0] =  (int) s1[0] + (int) s2[0];
1170 radd[1] =  (int) s1[1] + (int) s2[1];
1171 
1172 rsub[0] =  (int) s1[0] - (int) s2[0];
1173 rsub[1] =  (int) s1[1] - (int) s2[1];
1174 
1175 if (radd[0] > maxv) {
1176 	radd[0] = maxv;
1177         /* NOTE:   TI c6x does NOT set the overflow register even if results saturate */
1178 	/*  _overflow = 1; */
1179 	}
1180 else if (radd[0] < minv ) {
1181 	radd[0] = minv;
1182         /* NOTE:   TI c6x does NOT set the overflow register even if results saturate */
1183 	/*  _overflow = 1; */
1184 	}
1185 
1186 if (radd[1] > maxv) {
1187 	radd[1] = maxv;
1188         /* NOTE:   TI c6x does NOT set the overflow register even if results saturate */
1189 	/*  _overflow = 1; */
1190 	}
1191 else if (radd[1] < minv ) {
1192 	radd[1] = minv;
1193         /* NOTE:   TI c6x does NOT set the overflow register even if results saturate */
1194 	/*  _overflow = 1; */
1195 	}
1196 
1197 
1198 if (rsub[0] > maxv) {
1199 	rsub[0] = maxv;
1200         /* NOTE:   TI c6x does NOT set the overflow register even if results saturate */
1201 	/*  _overflow = 1; */
1202 	}
1203 else if (rsub[0] < minv ) {
1204 	rsub[0] = minv;
1205         /* NOTE:   TI c6x does NOT set the overflow register even if results saturate */
1206 	/*  _overflow = 1; */
1207 	}
1208 
1209 if (rsub[1] > maxv) {
1210 	rsub[1] = maxv;
1211         /* NOTE:   TI c6x does NOT set the overflow register even if results saturate */
1212 	/*  _overflow = 1; */
1213 	}
1214 else if (rsub[1] < minv ) {
1215 	rsub[1] = minv;
1216         /* NOTE:   TI c6x does NOT set the overflow register even if results saturate */
1217 	/*  _overflow = 1; */
1218 	}
1219 
1220 
1221 return  ((((unsigned long long) radd[1]) & 0x000000000000ffff) << 48) |
1222         ((((unsigned long long) radd[0]) & 0x000000000000ffff) << 32) |
1223         ((((unsigned long long) rsub[1]) & 0x000000000000ffff) << 16) |
1224         ((((unsigned long long) rsub[0]) & 0x000000000000ffff));
1225 }
1226 
1227 
1228 
_sadd2(int src1,int src2)1229 static inline  int _sadd2(int src1, int src2) {
1230 signed short s1[2], s2[2];
1231 signed int r[2], maxv, minv;
1232 
1233 maxv = (1L << (16-1)) - 1;
1234 minv = (-1L << (16-1));
1235 
1236 
1237 *((int*)s1) = src1;
1238 *((int*)s2) = src2;
1239 
1240 r[0] =  (int) s1[0] + (int) s2[0];
1241 r[1] =  (int) s1[1] + (int) s2[1];
1242 
1243 if (r[0] > maxv) {
1244 	r[0] = maxv;
1245         /* NOTE:   TI c6x does NOT set the overflow register even if results saturate */
1246 	/*  _overflow = 1; */
1247 	}
1248 else if (r[0] < minv ) {
1249 	r[0] = minv;
1250         /* NOTE:   TI c6x does NOT set the overflow register even if results saturate */
1251 	/*  _overflow = 1; */
1252 	}
1253 if (r[1] > maxv) {
1254 	r[1] = maxv;
1255         /* NOTE:   TI c6x does NOT set the overflow register even if results saturate */
1256 	/*  _overflow = 1; */
1257 	}
1258 else if (r[1] < minv ) {
1259 	r[1] = minv;
1260         /* NOTE:   TI c6x does NOT set the overflow register even if results saturate */
1261 	/*  _overflow = 1; */
1262 	}
1263 
1264 return ((r[1] & 0xffff) << 16 ) | (r[0] & 0xffff) ;
1265 }
1266 
1267 
_saddus2(unsigned int src1,int src2)1268 static inline int _saddus2(unsigned int src1, int src2) {
1269 int res0, res1;
1270   unsigned int s1_0 = (src1 & 0xffff);
1271   unsigned int s1_1 = (src1 & 0xffff0000) >> 16;
1272 
1273   short s2_0 = (src2 & 0xffff);
1274   short s2_1 = (src2 & 0xffff0000) >> 16;
1275 
1276   res0 = s1_0 + s2_0;
1277   res1 = s1_1 + s2_1;
1278 
1279   if (res0 >= 0x10000)
1280     res0 = 0xffff;
1281   else if (res0 < 0)
1282     res0 = 0;
1283 
1284   if (res1 >= 0x10000)
1285     res1 = 0xffff;
1286   else if (res1 < 0)
1287     res1 = 0;
1288 
1289   return (res1 << 16) | res0;
1290 }
1291 
1292 
_saddu4(unsigned int src1,unsigned int src2)1293 static inline unsigned int _saddu4(unsigned int src1, unsigned int src2) {
1294 unsigned int res0, res1, res2, res3;
1295   unsigned int s1_0 = (src1 & 0xff);
1296   unsigned int s1_1 = (src1 & 0xff00) >> 8;
1297   unsigned int s1_2 = (src1 & 0xff0000) >> 16;
1298   unsigned int s1_3 = (src1 & 0xff000000) >> 24;
1299 
1300   unsigned int s2_0 = (src2 & 0xff);
1301   unsigned int s2_1 = (src2 & 0xff00) >> 8;
1302   unsigned int s2_2 = (src2 & 0xff0000) >> 16;
1303   unsigned int s2_3 = (src2 & 0xff000000) >> 24;
1304 
1305   res0 = s1_0 + s2_0;
1306   res1 = s1_1 + s2_1;
1307   res2 = s1_2 + s2_2;
1308   res3 = s1_3 + s2_3;
1309 
1310   if (res0 >= 0x100)
1311     res0 = 0xff;
1312 
1313   if (res1 >= 0x100)
1314     res1 = 0xff;
1315 
1316   if (res2 >= 0x100)
1317     res2 = 0xff;
1318 
1319   if (res3 >= 0x100)
1320     res3 = 0xff;
1321 
1322   return (res3 << 24) | (res2 << 16) | (res1 << 8) | res0;
1323 
1324 }
1325 
1326 
1327 
_sat(C6X_COMPAT_LONG40 src2)1328 static inline int _sat(C6X_COMPAT_LONG40 src2)
1329 {
1330   long long maxv = (1LL << (32-1)) - 1;
1331   long long minv = (-1LL << (32-1));
1332 
1333   int hi = (int)(src2 >> 32);
1334   int lo = (int)src2;
1335   long long temp = (unsigned long long)(unsigned)lo | (unsigned long long)hi << 32;
1336   temp = orig_L40_set(temp);
1337 
1338   if (temp > maxv) {
1339 	temp = maxv;
1340 	_overflow = 1;
1341 	}
1342   else if (temp < minv) {
1343 	temp = minv;
1344 	_overflow = 1;
1345 	}
1346   return (int) temp;
1347 }
1348 
_set(unsigned int src2,unsigned int csta,unsigned int cstb)1349 static inline unsigned int _set(unsigned int src2, unsigned int csta, unsigned int cstb)
1350 {
1351   csta &= 0x1f;
1352   cstb &= 0x1f;
1353   if (csta > cstb)
1354     return src2;
1355   else {
1356     unsigned int mask =  (((1 <<  (cstb -  csta)) <<  1) -  1) <<  csta;
1357     return src2 | mask;
1358   }
1359 }
1360 
_setr(unsigned int src2,int src1)1361 static inline unsigned int _setr(unsigned int src2, int src1)
1362 {
1363   unsigned int csta = (src1 >> 5) & 0x1f;
1364   unsigned int cstb = src1 & 0x1f;
1365   if (csta > cstb)
1366     return src2;
1367   else {
1368     unsigned int mask =  (((1 <<  (cstb -  csta)) <<  1) -  1) <<  csta;
1369     return src2 | mask;
1370   }
1371 }
1372 
1373 
_shfl(unsigned int src2)1374 static inline unsigned int _shfl (unsigned int src2)
1375 {
1376   unsigned short lo = src2;
1377   unsigned short hi = src2 >> 16;
1378   unsigned int result = 0;
1379   int i;
1380   for (i = 0; i < 32; i+= 2) {
1381     result >>= 1;
1382     result |= (lo & 0x1) << 31;
1383     lo >>= 1;
1384     result >>= 1;
1385     result |= (hi & 0x1) << 31;
1386     hi >>= 1;
1387   }
1388   return result;
1389 }
1390 
_shfl3(unsigned int src1,unsigned int src2)1391 static inline long long _shfl3 (unsigned int src1, unsigned int src2)
1392 {
1393   unsigned short lo = src2;
1394   unsigned short hi = src1 >> 16;
1395   unsigned short mid = src1;
1396   unsigned long long result = 0;
1397   int i;
1398   for (i = 0; i < 32; i+= 2) {
1399     result >>= 1;
1400     result |= ((unsigned long long) (lo & 0x1)) << 47;
1401     lo >>= 1;
1402     result >>= 1;
1403     result |= ((unsigned long long) (mid & 0x1)) << 47;
1404     mid >>= 1;
1405     result >>= 1;
1406     result |= ((unsigned long long) (hi & 0x1)) << 47;
1407     hi >>= 1;
1408   }
1409   return result;
1410 }
1411 
1412 
1413 
_shlmb(unsigned int src1,unsigned int src2)1414 static inline unsigned int _shlmb (unsigned int src1, unsigned int src2)
1415 {
1416   return (src2 << 8) | (src1 >> 24);
1417 }
1418 
_shrmb(unsigned int src1,unsigned int src2)1419 static inline unsigned int _shrmb (unsigned int src1, unsigned int src2)
1420 {
1421   return (src2 >> 8) | (src1 << 24);
1422 }
1423 
1424 
_shru2(unsigned int src1,unsigned int src2)1425 static inline unsigned int _shru2 (unsigned int src1, unsigned int src2) {
1426 unsigned short hs1 = src1 >> 16;
1427   unsigned short ls1 = src1 & 0xFFFF;
1428   hs1 >>= src2;
1429   ls1 >>= src2;
1430   return (hs1 << 16) | ls1;
1431 }
1432 
1433 
_shr2(int src1,unsigned int src2)1434 static inline int _shr2 (int src1, unsigned int src2) {
1435   short s1[2], result[2];
1436   *((int*)s1) = src1;
1437   src2 = src2 & 31;
1438   result[0] = (int)s1[0] >> src2;
1439   result[1] = (int)s1[1] >> src2;
1440 
1441   return *(int*)result;
1442 }
1443 
1444 
_smpy(int src1,int src2)1445 static inline int _smpy (int src1, int src2) {
1446 unsigned long long result;
1447 result =  (((short) src1 * (short) src2) << 1);
1448 
1449 if ((result & 0xffffffff) == 0x80000000){
1450     result = 0x7fffffff;
1451     _overflow = 1;
1452   }
1453 return (int) (result);
1454 }
1455 
_smpyh(int src1,int src2)1456 static inline int _smpyh (int src1, int src2) {
1457 unsigned long long result;
1458 result =  ((short) (src1 >> 16) * (short) (src2 >> 16)) << 1;
1459 if ((result & 0xffffffff) == 0x80000000){
1460     result = 0x7fffffff;
1461     _overflow = 1;
1462   }
1463 return (int) (result);
1464 }
1465 
_smpyhl(int src1,int src2)1466 static inline int _smpyhl (int src1, int src2) {
1467 unsigned long long result;
1468 result = ((short) (src1 >> 16) * (short) (src2)) << 1;
1469 if ((result & 0xffffffff) == 0x80000000){
1470     result = 0x7fffffff;
1471     _overflow = 1;
1472   }
1473 return (int) (result);
1474 }
1475 
_smpylh(int src1,int src2)1476 static inline int _smpylh (int src1, int src2) {
1477 unsigned long long result;
1478 result = ((short) (src1) * (short) (src2 >> 16)) << 1;
1479 if ((result & 0xffffffff) == 0x80000000){
1480     result = 0x7fffffff;
1481     _overflow = 1;
1482   }
1483 return (int) (result);
1484 }
1485 
_smpy2ll(int src1,int src2)1486 static inline long long _smpy2ll (int src1,  int src2) {
1487   short ls1 = src1 & 0XFFFF;
1488   short hs1 = src1 >> 16;
1489   short ls2 = src2 & 0XFFFF;
1490   short hs2 = src2 >> 16;
1491 
1492   unsigned long long hi = (hs1 * hs2) << 1;
1493   unsigned long long lo = ((ls1 * ls2) << 1) & 0xFFFFFFFF;
1494   if ((hi & 0xffffffff) == 0x80000000){
1495     hi = 0x7fffffff;
1496     _overflow = 1;
1497   }
1498 
1499   if ((lo & 0xffffffff) == 0x80000000){
1500     lo = 0x7fffffff;
1501     _overflow = 1;
1502   }
1503 
1504   return (hi << 32) | lo;
1505 }
1506 
1507 
1508 
1509 
_smpy32(int src1,int src2)1510 static inline int _smpy32(int src1, int src2)
1511 {
1512   long long res = (long long) src1 * src2;
1513   res <<= 1;
1514   res >>= 32;
1515   return res;
1516 }
1517 
TEN_satu8(short src)1518 static inline unsigned char TEN_satu8 (short src)
1519 {
1520   if (src > 0xff)
1521     return 0xff;
1522   else if (src < 0)
1523     return 0;
1524   else
1525     return src;
1526 }
1527 
_spack2(int src1,int src2)1528 static inline int _spack2 (int src1, int src2) {
1529 short s1 = (short) util_saturate_n_no_state(src1,16);
1530 short s2 = (short) util_saturate_n_no_state(src2,16);
1531 return  ( (unsigned int) s1 << 16) | (((int) s2) & 0xFFFF);
1532 }
1533 
1534 
_spacku4(int src1,int src2)1535 static inline unsigned int _spacku4 (int src1, int src2) {
1536   short lolo = src2;
1537   short lohi = src2 >> 16;
1538   short hilo = src1;
1539   short hihi = src1 >> 16;
1540 
1541   lolo = TEN_satu8(lolo);
1542   lohi = TEN_satu8(lohi);
1543   hilo = TEN_satu8(hilo);
1544   hihi = TEN_satu8(hihi);
1545 
1546   return (((unsigned int) hihi) <<  24) | (((unsigned int) hilo) << 16) | (lohi << 8) | lolo;
1547 }
1548 
1549 
1550 
_sshl(int src1,unsigned int src2)1551 static inline int _sshl (int src1, unsigned int src2) {
1552 short local2 = (short)(src2 & 0x7FFF);
1553 return (int) util_shift_right_saturate_n(src1, -local2, 32);
1554 }
1555 
1556 
1557 
1558 
_sshvl(int src2,int src1)1559 static inline int _sshvl (int src2, int src1) {
1560   short s1;
1561   if (src1 > 31)
1562     s1 = 31;
1563   else if (src1 < -31)
1564     s1 = -31;
1565   else
1566     s1 = src1;
1567 
1568   return (int) util_shift_right_saturate_n(src2, -s1, 32);
1569 }
1570 
1571 
1572 
1573 
1574 
_sshvr(int src2,int src1)1575 static inline int _sshvr (int src2, int src1) {
1576 short s1;
1577   if (src1 > 31)
1578     s1 = 31;
1579   else if (src1 < -31)
1580     s1 = -31;
1581   else
1582     s1 = src1;
1583   return (int) util_shift_right_saturate_n(src2, s1, 32);
1584 }
1585 
1586 
1587 
1588 
_ssub(int src1,int src2)1589 static inline int _ssub(int src1, int src2) {
1590 signed long long res;
1591 signed long long maxv, minv;
1592 maxv = (1LL << (32-1)) - 1;
1593 minv = (-1LL << (32-1));
1594 res = (long long) src1 - (long long) src2;
1595 if (res > maxv) {
1596 	res = maxv;
1597 	_overflow = 1;
1598 	}
1599 else if (res < minv ) {
1600 	res = minv;
1601 	_overflow = 1;
1602 	}
1603 return (int) res;
1604 }
1605 
_ssub2(int src1,int src2)1606 static inline int _ssub2(int src1, int src2) {
1607 signed short s1[2], s2[2];
1608 signed int r[2], maxv, minv;
1609 
1610 maxv = (1L << (16-1)) - 1;
1611 minv = (-1L << (16-1));
1612 
1613 
1614 *((int*)s1) = src1;
1615 *((int*)s2) = src2;
1616 
1617 r[0] =  (int) s1[0] - (int) s2[0];
1618 r[1] =  (int) s1[1] - (int) s2[1];
1619 
1620 if (r[0] > maxv) {
1621 	r[0] = maxv;
1622         /* NOTE:   TI c6x does NOT set the overflow register even if results saturate */
1623 	/*  _overflow = 1; */
1624 	}
1625 else if (r[0] < minv ) {
1626 	r[0] = minv;
1627         /* NOTE:   TI c6x does NOT set the overflow register even if results saturate */
1628 	/*  _overflow = 1; */
1629 	}
1630 if (r[1] > maxv) {
1631 	r[1] = maxv;
1632         /* NOTE:   TI c6x does NOT set the overflow register even if results saturate */
1633 	/*  _overflow = 1; */
1634 	}
1635 else if (r[1] < minv ) {
1636 	r[1] = minv;
1637         /* NOTE:   TI c6x does NOT set the overflow register even if results saturate */
1638 	/*  _overflow = 1; */
1639 	}
1640 
1641 return ((r[1] & 0xffff) << 16 ) | (r[0] & 0xffff) ;
1642 }
1643 
1644 
_subabs4(int src1,int src2)1645 static inline int _subabs4 (int src1, int src2) {
1646   int res0, res1, res2, res3;
1647   unsigned int s1_0 = (src1 & 0xff);
1648   unsigned int s1_1 = (src1 & 0xff00) >> 8;
1649   unsigned int s1_2 = (src1 & 0xff0000) >> 16;
1650   unsigned int s1_3 = (src1 & 0xff000000) >> 24;
1651 
1652   unsigned int s2_0 = (src2 & 0xff);
1653   unsigned int s2_1 = (src2 & 0xff00) >> 8;
1654   unsigned int s2_2 = (src2 & 0xff0000) >> 16;
1655   unsigned int s2_3 = (src2 & 0xff000000) >> 24;
1656 
1657   res0 = s1_0 - s2_0;
1658   res1 = s1_1 - s2_1;
1659   res2 = s1_2 - s2_2;
1660   res3 = s1_3 - s2_3;
1661 
1662   if (res0 < 0)
1663     res0 = -res0;
1664 
1665   if (res1 < 0)
1666     res1 = -res1;
1667 
1668   if (res2 < 0)
1669     res2 = -res2;
1670 
1671   if (res3 < 0)
1672     res3 = -res3;
1673 
1674   return (res3 << 24) | (res2 << 16) | (res1 << 8) | res0;
1675 }
1676 
1677 
_subc(unsigned int src1,unsigned int src2)1678 static inline unsigned int _subc (unsigned int src1, unsigned int src2)
1679 {
1680   if ( src1 >=  src2)
1681     return ((src1 - src2) <<  1) +  1;
1682   else
1683     return src1 << 1;
1684 }
1685 
1686 
1687 
_sub2(int src1,int src2)1688 static inline int _sub2(int src1, int src2) {
1689 	short s1[2], s2[2], r[2];
1690 	int result;
1691 	*((int*)s1) = src1;
1692 	*((int*)s2) = src2;
1693 	r[0] = s1[0] - s2[0];
1694 	r[1] = s1[1] - s2[1];
1695 	result = *(int*)r;
1696 	return result;
1697 }
1698 
1699 
_sub4(int src1,int src2)1700 static inline int _sub4(int src1, int src2) {
1701 	char c1[4], c2[4], r[4];
1702 	int result;
1703 	*((int*)c1) = src1;
1704 	*((int*)c2) = src2;
1705 	r[0] = c1[0] - c2[0];
1706 	r[1] = c1[1] - c2[1];
1707 	r[2] = c1[2] - c2[2];
1708 	r[3] = c1[3] - c2[3];
1709 	result = *(int*)r;
1710 	return result;
1711 }
1712 
1713 
_swap4(unsigned int src1)1714 static inline int _swap4 (unsigned int src1) {
1715 	unsigned char v0 = src1;
1716 	unsigned char v1 = src1 >> 8;
1717 	unsigned char v2 = src1 >> 16;
1718 	unsigned char v3 = src1 >> 24;
1719 	unsigned v = v0<<8 | v1 | v2<<24 | v3<<16;
1720 	return v;
1721 }
1722 
_unpkhu4(unsigned int src1)1723 static inline unsigned int _unpkhu4 (unsigned int src1) {
1724 	unsigned v0 = src1>>24;
1725 	unsigned v1 = (src1>>16) & 0xff;
1726 	return (v0<<16) | v1;
1727 }
1728 
_unpklu4(unsigned int src1)1729 static inline unsigned int _unpklu4 (unsigned int src1) {
1730 	unsigned v1 = (src1>>8) & 0xff;
1731 	unsigned v0 = (src1) & 0xff;
1732 	return (v1<<16) | v0;
1733 }
1734 
1735 
1736 
1737 
_xpnd2(unsigned int src1)1738 static inline unsigned int _xpnd2 (unsigned int src1) {
1739       int v0 = (src1 & 0x1) ? 0x0000ffff : 0x00000000;
1740       int v1 = (src1 & 0x2) ? 0xffff0000 : 0x00000000;
1741       return v0|v1;
1742 }
1743 
_xpnd4(unsigned int src1)1744 static inline unsigned int _xpnd4 (unsigned int src1) {
1745       int v0 = (src1 & 0x1) ? 0x000000ff : 0x00000000;
1746       int v1 = (src1 & 0x2) ? 0x0000ff00 : 0x00000000;
1747       int v2 = (src1 & 0x4) ? 0x00ff0000 : 0x00000000;
1748       int v3 = (src1 & 0x8) ? 0xff000000 : 0x00000000;
1749       int r = v0|v1|v2|v3;
1750       return r;
1751 }
1752 
1753 
1754 
1755 //     end of Implemented in alphabetical order
1756 
1757 
1758 #endif /* __C6X_COMPAT__H */
1759