1 /******************************************************************************
2  * @file     none.h
3  * @brief    Intrinsincs when no DSP extension available
4  * @version  V1.9.0
5  * @date     20. July 2020
6  ******************************************************************************/
7 /*
8  * Copyright (c) 2010-2020 Arm Limited or its affiliates. All rights reserved.
9  *
10  * SPDX-License-Identifier: Apache-2.0
11  *
12  * Licensed under the Apache License, Version 2.0 (the License); you may
13  * not use this file except in compliance with the License.
14  * You may obtain a copy of the License at
15  *
16  * www.apache.org/licenses/LICENSE-2.0
17  *
18  * Unless required by applicable law or agreed to in writing, software
19  * distributed under the License is distributed on an AS IS BASIS, WITHOUT
20  * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
21  * See the License for the specific language governing permissions and
22  * limitations under the License.
23  */
24 
25 /*
26 
27 Definitions in this file are allowing to reuse some versions of the
28 CMSIS-DSP to build on a core (M0 for instance) or a host where
29 DSP extension are not available.
30 
31 Ideally a pure C version should have been used instead.
32 But those are not always available or use a restricted set
33 of intrinsics.
34 
35 */
36 
37 #ifndef _NONE_H_
38 #define _NONE_H_
39 
40 #include "arm_math_types.h"
41 
42 #ifdef   __cplusplus
43 extern "C"
44 {
45 #endif
46 
47 
48 
49 /*
50 
51 Normally those kind of definitions are in a compiler file
52 in Core or Core_A.
53 
54 But for MSVC compiler it is a bit special. The goal is very specific
55 to CMSIS-DSP and only to allow the use of this library from other
56 systems like Python or Matlab.
57 
58 MSVC is not going to be used to cross-compile to ARM. So, having a MSVC
59 compiler file in Core or Core_A would not make sense.
60 
61 */
62 #if defined ( _MSC_VER ) || defined(__GNUC_PYTHON__) || defined(__APPLE_CC__)
__CLZ(uint32_t data)63     __STATIC_FORCEINLINE uint8_t __CLZ(uint32_t data)
64     {
65       if (data == 0U) { return 32U; }
66 
67       uint32_t count = 0U;
68       uint32_t mask = 0x80000000U;
69 
70       while ((data & mask) == 0U)
71       {
72         count += 1U;
73         mask = mask >> 1U;
74       }
75       return count;
76     }
77 
__SSAT(int32_t val,uint32_t sat)78   __STATIC_FORCEINLINE int32_t __SSAT(int32_t val, uint32_t sat)
79   {
80     if ((sat >= 1U) && (sat <= 32U))
81     {
82       const int32_t max = (int32_t)((1U << (sat - 1U)) - 1U);
83       const int32_t min = -1 - max ;
84       if (val > max)
85       {
86         return max;
87       }
88       else if (val < min)
89       {
90         return min;
91       }
92     }
93     return val;
94   }
95 
__USAT(int32_t val,uint32_t sat)96   __STATIC_FORCEINLINE uint32_t __USAT(int32_t val, uint32_t sat)
97   {
98     if (sat <= 31U)
99     {
100       const uint32_t max = ((1U << sat) - 1U);
101       if (val > (int32_t)max)
102       {
103         return max;
104       }
105       else if (val < 0)
106       {
107         return 0U;
108       }
109     }
110     return (uint32_t)val;
111   }
112 
113  /**
114   \brief   Rotate Right in unsigned value (32 bit)
115   \details Rotate Right (immediate) provides the value of the contents of a register rotated by a variable number of bits.
116   \param [in]    op1  Value to rotate
117   \param [in]    op2  Number of Bits to rotate
118   \return               Rotated value
119  */
__ROR(uint32_t op1,uint32_t op2)120 __STATIC_FORCEINLINE uint32_t __ROR(uint32_t op1, uint32_t op2)
121 {
122   op2 %= 32U;
123   if (op2 == 0U)
124   {
125     return op1;
126   }
127   return (op1 >> op2) | (op1 << (32U - op2));
128 }
129 
130 
131 #endif
132 
133 /**
134    * @brief Clips Q63 to Q31 values.
135    */
clip_q63_to_q31(q63_t x)136   __STATIC_FORCEINLINE q31_t clip_q63_to_q31(
137   q63_t x)
138   {
139     return ((q31_t) (x >> 32) != ((q31_t) x >> 31)) ?
140       ((0x7FFFFFFF ^ ((q31_t) (x >> 63)))) : (q31_t) x;
141   }
142 
143   /**
144    * @brief Clips Q63 to Q15 values.
145    */
clip_q63_to_q15(q63_t x)146   __STATIC_FORCEINLINE q15_t clip_q63_to_q15(
147   q63_t x)
148   {
149     return ((q31_t) (x >> 32) != ((q31_t) x >> 31)) ?
150       ((0x7FFF ^ ((q15_t) (x >> 63)))) : (q15_t) (x >> 15);
151   }
152 
153   /**
154    * @brief Clips Q31 to Q7 values.
155    */
clip_q31_to_q7(q31_t x)156   __STATIC_FORCEINLINE q7_t clip_q31_to_q7(
157   q31_t x)
158   {
159     return ((q31_t) (x >> 24) != ((q31_t) x >> 23)) ?
160       ((0x7F ^ ((q7_t) (x >> 31)))) : (q7_t) x;
161   }
162 
163   /**
164    * @brief Clips Q31 to Q15 values.
165    */
clip_q31_to_q15(q31_t x)166   __STATIC_FORCEINLINE q15_t clip_q31_to_q15(
167   q31_t x)
168   {
169     return ((q31_t) (x >> 16) != ((q31_t) x >> 15)) ?
170       ((0x7FFF ^ ((q15_t) (x >> 31)))) : (q15_t) x;
171   }
172 
173   /**
174    * @brief Multiplies 32 X 64 and returns 32 bit result in 2.30 format.
175    */
mult32x64(q63_t x,q31_t y)176   __STATIC_FORCEINLINE q63_t mult32x64(
177   q63_t x,
178   q31_t y)
179   {
180     return ((((q63_t) (x & 0x00000000FFFFFFFF) * y) >> 32) +
181             (((q63_t) (x >> 32)                * y)      )  );
182   }
183 
184 /* SMMLAR */
185 #define multAcc_32x32_keep32_R(a, x, y) \
186     a = (q31_t) (((((q63_t) a) << 32) + ((q63_t) x * y) + 0x80000000LL ) >> 32)
187 
188 /* SMMLSR */
189 #define multSub_32x32_keep32_R(a, x, y) \
190     a = (q31_t) (((((q63_t) a) << 32) - ((q63_t) x * y) + 0x80000000LL ) >> 32)
191 
192 /* SMMULR */
193 #define mult_32x32_keep32_R(a, x, y) \
194     a = (q31_t) (((q63_t) x * y + 0x80000000LL ) >> 32)
195 
196 /* SMMLA */
197 #define multAcc_32x32_keep32(a, x, y) \
198     a += (q31_t) (((q63_t) x * y) >> 32)
199 
200 /* SMMLS */
201 #define multSub_32x32_keep32(a, x, y) \
202     a -= (q31_t) (((q63_t) x * y) >> 32)
203 
204 /* SMMUL */
205 #define mult_32x32_keep32(a, x, y) \
206     a = (q31_t) (((q63_t) x * y ) >> 32)
207 
208 #ifndef ARM_MATH_DSP
209   /**
210    * @brief definition to pack two 16 bit values.
211    */
212   #define __PKHBT(ARG1, ARG2, ARG3) ( (((int32_t)(ARG1) <<    0) & (int32_t)0x0000FFFF) | \
213                                       (((int32_t)(ARG2) << ARG3) & (int32_t)0xFFFF0000)  )
214   #define __PKHTB(ARG1, ARG2, ARG3) ( (((int32_t)(ARG1) <<    0) & (int32_t)0xFFFF0000) | \
215                                       (((int32_t)(ARG2) >> ARG3) & (int32_t)0x0000FFFF)  )
216 #endif
217 
218    /**
219    * @brief definition to pack four 8 bit values.
220    */
221 #ifndef ARM_MATH_BIG_ENDIAN
222   #define __PACKq7(v0,v1,v2,v3) ( (((int32_t)(v0) <<  0) & (int32_t)0x000000FF) | \
223                                   (((int32_t)(v1) <<  8) & (int32_t)0x0000FF00) | \
224                                   (((int32_t)(v2) << 16) & (int32_t)0x00FF0000) | \
225                                   (((int32_t)(v3) << 24) & (int32_t)0xFF000000)  )
226 #else
227   #define __PACKq7(v0,v1,v2,v3) ( (((int32_t)(v3) <<  0) & (int32_t)0x000000FF) | \
228                                   (((int32_t)(v2) <<  8) & (int32_t)0x0000FF00) | \
229                                   (((int32_t)(v1) << 16) & (int32_t)0x00FF0000) | \
230                                   (((int32_t)(v0) << 24) & (int32_t)0xFF000000)  )
231 #endif
232 
233 
234 
235 
236 /*
237  * @brief C custom defined intrinsic functions
238  */
239 #if !defined (ARM_MATH_DSP)
240 
241 
242   /*
243    * @brief C custom defined QADD8
244    */
__QADD8(uint32_t x,uint32_t y)245   __STATIC_FORCEINLINE uint32_t __QADD8(
246   uint32_t x,
247   uint32_t y)
248   {
249     q31_t r, s, t, u;
250 
251     r = __SSAT(((((q31_t)x << 24) >> 24) + (((q31_t)y << 24) >> 24)), 8) & (int32_t)0x000000FF;
252     s = __SSAT(((((q31_t)x << 16) >> 24) + (((q31_t)y << 16) >> 24)), 8) & (int32_t)0x000000FF;
253     t = __SSAT(((((q31_t)x <<  8) >> 24) + (((q31_t)y <<  8) >> 24)), 8) & (int32_t)0x000000FF;
254     u = __SSAT(((((q31_t)x      ) >> 24) + (((q31_t)y      ) >> 24)), 8) & (int32_t)0x000000FF;
255 
256     return ((uint32_t)((u << 24) | (t << 16) | (s <<  8) | (r      )));
257   }
258 
259 
260   /*
261    * @brief C custom defined QSUB8
262    */
__QSUB8(uint32_t x,uint32_t y)263   __STATIC_FORCEINLINE uint32_t __QSUB8(
264   uint32_t x,
265   uint32_t y)
266   {
267     q31_t r, s, t, u;
268 
269     r = __SSAT(((((q31_t)x << 24) >> 24) - (((q31_t)y << 24) >> 24)), 8) & (int32_t)0x000000FF;
270     s = __SSAT(((((q31_t)x << 16) >> 24) - (((q31_t)y << 16) >> 24)), 8) & (int32_t)0x000000FF;
271     t = __SSAT(((((q31_t)x <<  8) >> 24) - (((q31_t)y <<  8) >> 24)), 8) & (int32_t)0x000000FF;
272     u = __SSAT(((((q31_t)x      ) >> 24) - (((q31_t)y      ) >> 24)), 8) & (int32_t)0x000000FF;
273 
274     return ((uint32_t)((u << 24) | (t << 16) | (s <<  8) | (r      )));
275   }
276 
277 
278   /*
279    * @brief C custom defined QADD16
280    */
__QADD16(uint32_t x,uint32_t y)281   __STATIC_FORCEINLINE uint32_t __QADD16(
282   uint32_t x,
283   uint32_t y)
284   {
285 /*  q31_t r,     s;  without initialisation 'arm_offset_q15 test' fails  but 'intrinsic' tests pass! for armCC */
286     q31_t r = 0, s = 0;
287 
288     r = __SSAT(((((q31_t)x << 16) >> 16) + (((q31_t)y << 16) >> 16)), 16) & (int32_t)0x0000FFFF;
289     s = __SSAT(((((q31_t)x      ) >> 16) + (((q31_t)y      ) >> 16)), 16) & (int32_t)0x0000FFFF;
290 
291     return ((uint32_t)((s << 16) | (r      )));
292   }
293 
294 
295   /*
296    * @brief C custom defined SHADD16
297    */
__SHADD16(uint32_t x,uint32_t y)298   __STATIC_FORCEINLINE uint32_t __SHADD16(
299   uint32_t x,
300   uint32_t y)
301   {
302     q31_t r, s;
303 
304     r = (((((q31_t)x << 16) >> 16) + (((q31_t)y << 16) >> 16)) >> 1) & (int32_t)0x0000FFFF;
305     s = (((((q31_t)x      ) >> 16) + (((q31_t)y      ) >> 16)) >> 1) & (int32_t)0x0000FFFF;
306 
307     return ((uint32_t)((s << 16) | (r      )));
308   }
309 
310 
311   /*
312    * @brief C custom defined QSUB16
313    */
__QSUB16(uint32_t x,uint32_t y)314   __STATIC_FORCEINLINE uint32_t __QSUB16(
315   uint32_t x,
316   uint32_t y)
317   {
318     q31_t r, s;
319 
320     r = __SSAT(((((q31_t)x << 16) >> 16) - (((q31_t)y << 16) >> 16)), 16) & (int32_t)0x0000FFFF;
321     s = __SSAT(((((q31_t)x      ) >> 16) - (((q31_t)y      ) >> 16)), 16) & (int32_t)0x0000FFFF;
322 
323     return ((uint32_t)((s << 16) | (r      )));
324   }
325 
326 
327   /*
328    * @brief C custom defined SHSUB16
329    */
__SHSUB16(uint32_t x,uint32_t y)330   __STATIC_FORCEINLINE uint32_t __SHSUB16(
331   uint32_t x,
332   uint32_t y)
333   {
334     q31_t r, s;
335 
336     r = (((((q31_t)x << 16) >> 16) - (((q31_t)y << 16) >> 16)) >> 1) & (int32_t)0x0000FFFF;
337     s = (((((q31_t)x      ) >> 16) - (((q31_t)y      ) >> 16)) >> 1) & (int32_t)0x0000FFFF;
338 
339     return ((uint32_t)((s << 16) | (r      )));
340   }
341 
342 
343   /*
344    * @brief C custom defined QASX
345    */
__QASX(uint32_t x,uint32_t y)346   __STATIC_FORCEINLINE uint32_t __QASX(
347   uint32_t x,
348   uint32_t y)
349   {
350     q31_t r, s;
351 
352     r = __SSAT(((((q31_t)x << 16) >> 16) - (((q31_t)y      ) >> 16)), 16) & (int32_t)0x0000FFFF;
353     s = __SSAT(((((q31_t)x      ) >> 16) + (((q31_t)y << 16) >> 16)), 16) & (int32_t)0x0000FFFF;
354 
355     return ((uint32_t)((s << 16) | (r      )));
356   }
357 
358 
359   /*
360    * @brief C custom defined SHASX
361    */
__SHASX(uint32_t x,uint32_t y)362   __STATIC_FORCEINLINE uint32_t __SHASX(
363   uint32_t x,
364   uint32_t y)
365   {
366     q31_t r, s;
367 
368     r = (((((q31_t)x << 16) >> 16) - (((q31_t)y      ) >> 16)) >> 1) & (int32_t)0x0000FFFF;
369     s = (((((q31_t)x      ) >> 16) + (((q31_t)y << 16) >> 16)) >> 1) & (int32_t)0x0000FFFF;
370 
371     return ((uint32_t)((s << 16) | (r      )));
372   }
373 
374 
375   /*
376    * @brief C custom defined QSAX
377    */
__QSAX(uint32_t x,uint32_t y)378   __STATIC_FORCEINLINE uint32_t __QSAX(
379   uint32_t x,
380   uint32_t y)
381   {
382     q31_t r, s;
383 
384     r = __SSAT(((((q31_t)x << 16) >> 16) + (((q31_t)y      ) >> 16)), 16) & (int32_t)0x0000FFFF;
385     s = __SSAT(((((q31_t)x      ) >> 16) - (((q31_t)y << 16) >> 16)), 16) & (int32_t)0x0000FFFF;
386 
387     return ((uint32_t)((s << 16) | (r      )));
388   }
389 
390 
391   /*
392    * @brief C custom defined SHSAX
393    */
__SHSAX(uint32_t x,uint32_t y)394   __STATIC_FORCEINLINE uint32_t __SHSAX(
395   uint32_t x,
396   uint32_t y)
397   {
398     q31_t r, s;
399 
400     r = (((((q31_t)x << 16) >> 16) + (((q31_t)y      ) >> 16)) >> 1) & (int32_t)0x0000FFFF;
401     s = (((((q31_t)x      ) >> 16) - (((q31_t)y << 16) >> 16)) >> 1) & (int32_t)0x0000FFFF;
402 
403     return ((uint32_t)((s << 16) | (r      )));
404   }
405 
406 
407   /*
408    * @brief C custom defined SMUSDX
409    */
__SMUSDX(uint32_t x,uint32_t y)410   __STATIC_FORCEINLINE uint32_t __SMUSDX(
411   uint32_t x,
412   uint32_t y)
413   {
414     return ((uint32_t)(((((q31_t)x << 16) >> 16) * (((q31_t)y      ) >> 16)) -
415                        ((((q31_t)x      ) >> 16) * (((q31_t)y << 16) >> 16))   ));
416   }
417 
418   /*
419    * @brief C custom defined SMUADX
420    */
__SMUADX(uint32_t x,uint32_t y)421   __STATIC_FORCEINLINE uint32_t __SMUADX(
422   uint32_t x,
423   uint32_t y)
424   {
425     return ((uint32_t)(((((q31_t)x << 16) >> 16) * (((q31_t)y      ) >> 16)) +
426                        ((((q31_t)x      ) >> 16) * (((q31_t)y << 16) >> 16))   ));
427   }
428 
429 
430   /*
431    * @brief C custom defined QADD
432    */
__QADD(int32_t x,int32_t y)433   __STATIC_FORCEINLINE int32_t __QADD(
434   int32_t x,
435   int32_t y)
436   {
437     return ((int32_t)(clip_q63_to_q31((q63_t)x + (q31_t)y)));
438   }
439 
440 
441   /*
442    * @brief C custom defined QSUB
443    */
__QSUB(int32_t x,int32_t y)444   __STATIC_FORCEINLINE int32_t __QSUB(
445   int32_t x,
446   int32_t y)
447   {
448     return ((int32_t)(clip_q63_to_q31((q63_t)x - (q31_t)y)));
449   }
450 
451 
452   /*
453    * @brief C custom defined SMLAD
454    */
__SMLAD(uint32_t x,uint32_t y,uint32_t sum)455   __STATIC_FORCEINLINE uint32_t __SMLAD(
456   uint32_t x,
457   uint32_t y,
458   uint32_t sum)
459   {
460     return ((uint32_t)(((((q31_t)x << 16) >> 16) * (((q31_t)y << 16) >> 16)) +
461                        ((((q31_t)x      ) >> 16) * (((q31_t)y      ) >> 16)) +
462                        ( ((q31_t)sum    )                                  )   ));
463   }
464 
465 
466   /*
467    * @brief C custom defined SMLADX
468    */
__SMLADX(uint32_t x,uint32_t y,uint32_t sum)469   __STATIC_FORCEINLINE uint32_t __SMLADX(
470   uint32_t x,
471   uint32_t y,
472   uint32_t sum)
473   {
474     return ((uint32_t)(((((q31_t)x << 16) >> 16) * (((q31_t)y      ) >> 16)) +
475                        ((((q31_t)x      ) >> 16) * (((q31_t)y << 16) >> 16)) +
476                        ( ((q31_t)sum    )                                  )   ));
477   }
478 
479 
480   /*
481    * @brief C custom defined SMLSDX
482    */
__SMLSDX(uint32_t x,uint32_t y,uint32_t sum)483   __STATIC_FORCEINLINE uint32_t __SMLSDX(
484   uint32_t x,
485   uint32_t y,
486   uint32_t sum)
487   {
488     return ((uint32_t)(((((q31_t)x << 16) >> 16) * (((q31_t)y      ) >> 16)) -
489                        ((((q31_t)x      ) >> 16) * (((q31_t)y << 16) >> 16)) +
490                        ( ((q31_t)sum    )                                  )   ));
491   }
492 
493 
494   /*
495    * @brief C custom defined SMLALD
496    */
__SMLALD(uint32_t x,uint32_t y,uint64_t sum)497   __STATIC_FORCEINLINE uint64_t __SMLALD(
498   uint32_t x,
499   uint32_t y,
500   uint64_t sum)
501   {
502 /*  return (sum + ((q15_t) (x >> 16) * (q15_t) (y >> 16)) + ((q15_t) x * (q15_t) y)); */
503     return ((uint64_t)(((((q31_t)x << 16) >> 16) * (((q31_t)y << 16) >> 16)) +
504                        ((((q31_t)x      ) >> 16) * (((q31_t)y      ) >> 16)) +
505                        ( ((q63_t)sum    )                                  )   ));
506   }
507 
508 
509   /*
510    * @brief C custom defined SMLALDX
511    */
__SMLALDX(uint32_t x,uint32_t y,uint64_t sum)512   __STATIC_FORCEINLINE uint64_t __SMLALDX(
513   uint32_t x,
514   uint32_t y,
515   uint64_t sum)
516   {
517 /*  return (sum + ((q15_t) (x >> 16) * (q15_t) y)) + ((q15_t) x * (q15_t) (y >> 16)); */
518     return ((uint64_t)(((((q31_t)x << 16) >> 16) * (((q31_t)y      ) >> 16)) +
519                        ((((q31_t)x      ) >> 16) * (((q31_t)y << 16) >> 16)) +
520                        ( ((q63_t)sum    )                                  )   ));
521   }
522 
523 
524   /*
525    * @brief C custom defined SMUAD
526    */
__SMUAD(uint32_t x,uint32_t y)527   __STATIC_FORCEINLINE uint32_t __SMUAD(
528   uint32_t x,
529   uint32_t y)
530   {
531     return ((uint32_t)(((((q31_t)x << 16) >> 16) * (((q31_t)y << 16) >> 16)) +
532                        ((((q31_t)x      ) >> 16) * (((q31_t)y      ) >> 16))   ));
533   }
534 
535 
536   /*
537    * @brief C custom defined SMUSD
538    */
__SMUSD(uint32_t x,uint32_t y)539   __STATIC_FORCEINLINE uint32_t __SMUSD(
540   uint32_t x,
541   uint32_t y)
542   {
543     return ((uint32_t)(((((q31_t)x << 16) >> 16) * (((q31_t)y << 16) >> 16)) -
544                        ((((q31_t)x      ) >> 16) * (((q31_t)y      ) >> 16))   ));
545   }
546 
547 
548   /*
549    * @brief C custom defined SXTB16
550    */
__SXTB16(uint32_t x)551   __STATIC_FORCEINLINE uint32_t __SXTB16(
552   uint32_t x)
553   {
554     return ((uint32_t)(((((q31_t)x << 24) >> 24) & (q31_t)0x0000FFFF) |
555                        ((((q31_t)x <<  8) >>  8) & (q31_t)0xFFFF0000)  ));
556   }
557 
558   /*
559    * @brief C custom defined SMMLA
560    */
__SMMLA(int32_t x,int32_t y,int32_t sum)561   __STATIC_FORCEINLINE int32_t __SMMLA(
562   int32_t x,
563   int32_t y,
564   int32_t sum)
565   {
566     return (sum + (int32_t) (((int64_t) x * y) >> 32));
567   }
568 
569 #endif /* !defined (ARM_MATH_DSP) */
570 
571 
572 #ifdef   __cplusplus
573 }
574 #endif
575 
576 #endif /* ifndef _TRANSFORM_FUNCTIONS_H_ */
577