1 /******************************************************************************
2 * @file none.h
3 * @brief Intrinsincs when no DSP extension available
4 * @version V1.9.0
5 * @date 20. July 2020
6 ******************************************************************************/
7 /*
8 * Copyright (c) 2010-2020 Arm Limited or its affiliates. All rights reserved.
9 *
10 * SPDX-License-Identifier: Apache-2.0
11 *
12 * Licensed under the Apache License, Version 2.0 (the License); you may
13 * not use this file except in compliance with the License.
14 * You may obtain a copy of the License at
15 *
16 * www.apache.org/licenses/LICENSE-2.0
17 *
18 * Unless required by applicable law or agreed to in writing, software
19 * distributed under the License is distributed on an AS IS BASIS, WITHOUT
20 * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
21 * See the License for the specific language governing permissions and
22 * limitations under the License.
23 */
24
25 /*
26
27 Definitions in this file are allowing to reuse some versions of the
28 CMSIS-DSP to build on a core (M0 for instance) or a host where
29 DSP extension are not available.
30
31 Ideally a pure C version should have been used instead.
32 But those are not always available or use a restricted set
33 of intrinsics.
34
35 */
36
37 #ifndef _NONE_H_
38 #define _NONE_H_
39
40 #include "arm_math_types.h"
41
42 #ifdef __cplusplus
43 extern "C"
44 {
45 #endif
46
47
48
49 /*
50
51 Normally those kind of definitions are in a compiler file
52 in Core or Core_A.
53
54 But for MSVC compiler it is a bit special. The goal is very specific
55 to CMSIS-DSP and only to allow the use of this library from other
56 systems like Python or Matlab.
57
58 MSVC is not going to be used to cross-compile to ARM. So, having a MSVC
59 compiler file in Core or Core_A would not make sense.
60
61 */
62 #if defined ( _MSC_VER ) || defined(__GNUC_PYTHON__) || defined(__APPLE_CC__)
__CLZ(uint32_t data)63 __STATIC_FORCEINLINE uint8_t __CLZ(uint32_t data)
64 {
65 if (data == 0U) { return 32U; }
66
67 uint32_t count = 0U;
68 uint32_t mask = 0x80000000U;
69
70 while ((data & mask) == 0U)
71 {
72 count += 1U;
73 mask = mask >> 1U;
74 }
75 return count;
76 }
77
__SSAT(int32_t val,uint32_t sat)78 __STATIC_FORCEINLINE int32_t __SSAT(int32_t val, uint32_t sat)
79 {
80 if ((sat >= 1U) && (sat <= 32U))
81 {
82 const int32_t max = (int32_t)((1U << (sat - 1U)) - 1U);
83 const int32_t min = -1 - max ;
84 if (val > max)
85 {
86 return max;
87 }
88 else if (val < min)
89 {
90 return min;
91 }
92 }
93 return val;
94 }
95
__USAT(int32_t val,uint32_t sat)96 __STATIC_FORCEINLINE uint32_t __USAT(int32_t val, uint32_t sat)
97 {
98 if (sat <= 31U)
99 {
100 const uint32_t max = ((1U << sat) - 1U);
101 if (val > (int32_t)max)
102 {
103 return max;
104 }
105 else if (val < 0)
106 {
107 return 0U;
108 }
109 }
110 return (uint32_t)val;
111 }
112
113 /**
114 \brief Rotate Right in unsigned value (32 bit)
115 \details Rotate Right (immediate) provides the value of the contents of a register rotated by a variable number of bits.
116 \param [in] op1 Value to rotate
117 \param [in] op2 Number of Bits to rotate
118 \return Rotated value
119 */
__ROR(uint32_t op1,uint32_t op2)120 __STATIC_FORCEINLINE uint32_t __ROR(uint32_t op1, uint32_t op2)
121 {
122 op2 %= 32U;
123 if (op2 == 0U)
124 {
125 return op1;
126 }
127 return (op1 >> op2) | (op1 << (32U - op2));
128 }
129
130
131 #endif
132
133 /**
134 * @brief Clips Q63 to Q31 values.
135 */
clip_q63_to_q31(q63_t x)136 __STATIC_FORCEINLINE q31_t clip_q63_to_q31(
137 q63_t x)
138 {
139 return ((q31_t) (x >> 32) != ((q31_t) x >> 31)) ?
140 ((0x7FFFFFFF ^ ((q31_t) (x >> 63)))) : (q31_t) x;
141 }
142
143 /**
144 * @brief Clips Q63 to Q15 values.
145 */
clip_q63_to_q15(q63_t x)146 __STATIC_FORCEINLINE q15_t clip_q63_to_q15(
147 q63_t x)
148 {
149 return ((q31_t) (x >> 32) != ((q31_t) x >> 31)) ?
150 ((0x7FFF ^ ((q15_t) (x >> 63)))) : (q15_t) (x >> 15);
151 }
152
153 /**
154 * @brief Clips Q31 to Q7 values.
155 */
clip_q31_to_q7(q31_t x)156 __STATIC_FORCEINLINE q7_t clip_q31_to_q7(
157 q31_t x)
158 {
159 return ((q31_t) (x >> 24) != ((q31_t) x >> 23)) ?
160 ((0x7F ^ ((q7_t) (x >> 31)))) : (q7_t) x;
161 }
162
163 /**
164 * @brief Clips Q31 to Q15 values.
165 */
clip_q31_to_q15(q31_t x)166 __STATIC_FORCEINLINE q15_t clip_q31_to_q15(
167 q31_t x)
168 {
169 return ((q31_t) (x >> 16) != ((q31_t) x >> 15)) ?
170 ((0x7FFF ^ ((q15_t) (x >> 31)))) : (q15_t) x;
171 }
172
173 /**
174 * @brief Multiplies 32 X 64 and returns 32 bit result in 2.30 format.
175 */
mult32x64(q63_t x,q31_t y)176 __STATIC_FORCEINLINE q63_t mult32x64(
177 q63_t x,
178 q31_t y)
179 {
180 return ((((q63_t) (x & 0x00000000FFFFFFFF) * y) >> 32) +
181 (((q63_t) (x >> 32) * y) ) );
182 }
183
184 /* SMMLAR */
185 #define multAcc_32x32_keep32_R(a, x, y) \
186 a = (q31_t) (((((q63_t) a) << 32) + ((q63_t) x * y) + 0x80000000LL ) >> 32)
187
188 /* SMMLSR */
189 #define multSub_32x32_keep32_R(a, x, y) \
190 a = (q31_t) (((((q63_t) a) << 32) - ((q63_t) x * y) + 0x80000000LL ) >> 32)
191
192 /* SMMULR */
193 #define mult_32x32_keep32_R(a, x, y) \
194 a = (q31_t) (((q63_t) x * y + 0x80000000LL ) >> 32)
195
196 /* SMMLA */
197 #define multAcc_32x32_keep32(a, x, y) \
198 a += (q31_t) (((q63_t) x * y) >> 32)
199
200 /* SMMLS */
201 #define multSub_32x32_keep32(a, x, y) \
202 a -= (q31_t) (((q63_t) x * y) >> 32)
203
204 /* SMMUL */
205 #define mult_32x32_keep32(a, x, y) \
206 a = (q31_t) (((q63_t) x * y ) >> 32)
207
208 #ifndef ARM_MATH_DSP
209 /**
210 * @brief definition to pack two 16 bit values.
211 */
212 #define __PKHBT(ARG1, ARG2, ARG3) ( (((int32_t)(ARG1) << 0) & (int32_t)0x0000FFFF) | \
213 (((int32_t)(ARG2) << ARG3) & (int32_t)0xFFFF0000) )
214 #define __PKHTB(ARG1, ARG2, ARG3) ( (((int32_t)(ARG1) << 0) & (int32_t)0xFFFF0000) | \
215 (((int32_t)(ARG2) >> ARG3) & (int32_t)0x0000FFFF) )
216 #endif
217
218 /**
219 * @brief definition to pack four 8 bit values.
220 */
221 #ifndef ARM_MATH_BIG_ENDIAN
222 #define __PACKq7(v0,v1,v2,v3) ( (((int32_t)(v0) << 0) & (int32_t)0x000000FF) | \
223 (((int32_t)(v1) << 8) & (int32_t)0x0000FF00) | \
224 (((int32_t)(v2) << 16) & (int32_t)0x00FF0000) | \
225 (((int32_t)(v3) << 24) & (int32_t)0xFF000000) )
226 #else
227 #define __PACKq7(v0,v1,v2,v3) ( (((int32_t)(v3) << 0) & (int32_t)0x000000FF) | \
228 (((int32_t)(v2) << 8) & (int32_t)0x0000FF00) | \
229 (((int32_t)(v1) << 16) & (int32_t)0x00FF0000) | \
230 (((int32_t)(v0) << 24) & (int32_t)0xFF000000) )
231 #endif
232
233
234
235
236 /*
237 * @brief C custom defined intrinsic functions
238 */
239 #if !defined (ARM_MATH_DSP)
240
241
242 /*
243 * @brief C custom defined QADD8
244 */
__QADD8(uint32_t x,uint32_t y)245 __STATIC_FORCEINLINE uint32_t __QADD8(
246 uint32_t x,
247 uint32_t y)
248 {
249 q31_t r, s, t, u;
250
251 r = __SSAT(((((q31_t)x << 24) >> 24) + (((q31_t)y << 24) >> 24)), 8) & (int32_t)0x000000FF;
252 s = __SSAT(((((q31_t)x << 16) >> 24) + (((q31_t)y << 16) >> 24)), 8) & (int32_t)0x000000FF;
253 t = __SSAT(((((q31_t)x << 8) >> 24) + (((q31_t)y << 8) >> 24)), 8) & (int32_t)0x000000FF;
254 u = __SSAT(((((q31_t)x ) >> 24) + (((q31_t)y ) >> 24)), 8) & (int32_t)0x000000FF;
255
256 return ((uint32_t)((u << 24) | (t << 16) | (s << 8) | (r )));
257 }
258
259
260 /*
261 * @brief C custom defined QSUB8
262 */
__QSUB8(uint32_t x,uint32_t y)263 __STATIC_FORCEINLINE uint32_t __QSUB8(
264 uint32_t x,
265 uint32_t y)
266 {
267 q31_t r, s, t, u;
268
269 r = __SSAT(((((q31_t)x << 24) >> 24) - (((q31_t)y << 24) >> 24)), 8) & (int32_t)0x000000FF;
270 s = __SSAT(((((q31_t)x << 16) >> 24) - (((q31_t)y << 16) >> 24)), 8) & (int32_t)0x000000FF;
271 t = __SSAT(((((q31_t)x << 8) >> 24) - (((q31_t)y << 8) >> 24)), 8) & (int32_t)0x000000FF;
272 u = __SSAT(((((q31_t)x ) >> 24) - (((q31_t)y ) >> 24)), 8) & (int32_t)0x000000FF;
273
274 return ((uint32_t)((u << 24) | (t << 16) | (s << 8) | (r )));
275 }
276
277
278 /*
279 * @brief C custom defined QADD16
280 */
__QADD16(uint32_t x,uint32_t y)281 __STATIC_FORCEINLINE uint32_t __QADD16(
282 uint32_t x,
283 uint32_t y)
284 {
285 /* q31_t r, s; without initialisation 'arm_offset_q15 test' fails but 'intrinsic' tests pass! for armCC */
286 q31_t r = 0, s = 0;
287
288 r = __SSAT(((((q31_t)x << 16) >> 16) + (((q31_t)y << 16) >> 16)), 16) & (int32_t)0x0000FFFF;
289 s = __SSAT(((((q31_t)x ) >> 16) + (((q31_t)y ) >> 16)), 16) & (int32_t)0x0000FFFF;
290
291 return ((uint32_t)((s << 16) | (r )));
292 }
293
294
295 /*
296 * @brief C custom defined SHADD16
297 */
__SHADD16(uint32_t x,uint32_t y)298 __STATIC_FORCEINLINE uint32_t __SHADD16(
299 uint32_t x,
300 uint32_t y)
301 {
302 q31_t r, s;
303
304 r = (((((q31_t)x << 16) >> 16) + (((q31_t)y << 16) >> 16)) >> 1) & (int32_t)0x0000FFFF;
305 s = (((((q31_t)x ) >> 16) + (((q31_t)y ) >> 16)) >> 1) & (int32_t)0x0000FFFF;
306
307 return ((uint32_t)((s << 16) | (r )));
308 }
309
310
311 /*
312 * @brief C custom defined QSUB16
313 */
__QSUB16(uint32_t x,uint32_t y)314 __STATIC_FORCEINLINE uint32_t __QSUB16(
315 uint32_t x,
316 uint32_t y)
317 {
318 q31_t r, s;
319
320 r = __SSAT(((((q31_t)x << 16) >> 16) - (((q31_t)y << 16) >> 16)), 16) & (int32_t)0x0000FFFF;
321 s = __SSAT(((((q31_t)x ) >> 16) - (((q31_t)y ) >> 16)), 16) & (int32_t)0x0000FFFF;
322
323 return ((uint32_t)((s << 16) | (r )));
324 }
325
326
327 /*
328 * @brief C custom defined SHSUB16
329 */
__SHSUB16(uint32_t x,uint32_t y)330 __STATIC_FORCEINLINE uint32_t __SHSUB16(
331 uint32_t x,
332 uint32_t y)
333 {
334 q31_t r, s;
335
336 r = (((((q31_t)x << 16) >> 16) - (((q31_t)y << 16) >> 16)) >> 1) & (int32_t)0x0000FFFF;
337 s = (((((q31_t)x ) >> 16) - (((q31_t)y ) >> 16)) >> 1) & (int32_t)0x0000FFFF;
338
339 return ((uint32_t)((s << 16) | (r )));
340 }
341
342
343 /*
344 * @brief C custom defined QASX
345 */
__QASX(uint32_t x,uint32_t y)346 __STATIC_FORCEINLINE uint32_t __QASX(
347 uint32_t x,
348 uint32_t y)
349 {
350 q31_t r, s;
351
352 r = __SSAT(((((q31_t)x << 16) >> 16) - (((q31_t)y ) >> 16)), 16) & (int32_t)0x0000FFFF;
353 s = __SSAT(((((q31_t)x ) >> 16) + (((q31_t)y << 16) >> 16)), 16) & (int32_t)0x0000FFFF;
354
355 return ((uint32_t)((s << 16) | (r )));
356 }
357
358
359 /*
360 * @brief C custom defined SHASX
361 */
__SHASX(uint32_t x,uint32_t y)362 __STATIC_FORCEINLINE uint32_t __SHASX(
363 uint32_t x,
364 uint32_t y)
365 {
366 q31_t r, s;
367
368 r = (((((q31_t)x << 16) >> 16) - (((q31_t)y ) >> 16)) >> 1) & (int32_t)0x0000FFFF;
369 s = (((((q31_t)x ) >> 16) + (((q31_t)y << 16) >> 16)) >> 1) & (int32_t)0x0000FFFF;
370
371 return ((uint32_t)((s << 16) | (r )));
372 }
373
374
375 /*
376 * @brief C custom defined QSAX
377 */
__QSAX(uint32_t x,uint32_t y)378 __STATIC_FORCEINLINE uint32_t __QSAX(
379 uint32_t x,
380 uint32_t y)
381 {
382 q31_t r, s;
383
384 r = __SSAT(((((q31_t)x << 16) >> 16) + (((q31_t)y ) >> 16)), 16) & (int32_t)0x0000FFFF;
385 s = __SSAT(((((q31_t)x ) >> 16) - (((q31_t)y << 16) >> 16)), 16) & (int32_t)0x0000FFFF;
386
387 return ((uint32_t)((s << 16) | (r )));
388 }
389
390
391 /*
392 * @brief C custom defined SHSAX
393 */
__SHSAX(uint32_t x,uint32_t y)394 __STATIC_FORCEINLINE uint32_t __SHSAX(
395 uint32_t x,
396 uint32_t y)
397 {
398 q31_t r, s;
399
400 r = (((((q31_t)x << 16) >> 16) + (((q31_t)y ) >> 16)) >> 1) & (int32_t)0x0000FFFF;
401 s = (((((q31_t)x ) >> 16) - (((q31_t)y << 16) >> 16)) >> 1) & (int32_t)0x0000FFFF;
402
403 return ((uint32_t)((s << 16) | (r )));
404 }
405
406
407 /*
408 * @brief C custom defined SMUSDX
409 */
__SMUSDX(uint32_t x,uint32_t y)410 __STATIC_FORCEINLINE uint32_t __SMUSDX(
411 uint32_t x,
412 uint32_t y)
413 {
414 return ((uint32_t)(((((q31_t)x << 16) >> 16) * (((q31_t)y ) >> 16)) -
415 ((((q31_t)x ) >> 16) * (((q31_t)y << 16) >> 16)) ));
416 }
417
418 /*
419 * @brief C custom defined SMUADX
420 */
__SMUADX(uint32_t x,uint32_t y)421 __STATIC_FORCEINLINE uint32_t __SMUADX(
422 uint32_t x,
423 uint32_t y)
424 {
425 return ((uint32_t)(((((q31_t)x << 16) >> 16) * (((q31_t)y ) >> 16)) +
426 ((((q31_t)x ) >> 16) * (((q31_t)y << 16) >> 16)) ));
427 }
428
429
430 /*
431 * @brief C custom defined QADD
432 */
__QADD(int32_t x,int32_t y)433 __STATIC_FORCEINLINE int32_t __QADD(
434 int32_t x,
435 int32_t y)
436 {
437 return ((int32_t)(clip_q63_to_q31((q63_t)x + (q31_t)y)));
438 }
439
440
441 /*
442 * @brief C custom defined QSUB
443 */
__QSUB(int32_t x,int32_t y)444 __STATIC_FORCEINLINE int32_t __QSUB(
445 int32_t x,
446 int32_t y)
447 {
448 return ((int32_t)(clip_q63_to_q31((q63_t)x - (q31_t)y)));
449 }
450
451
452 /*
453 * @brief C custom defined SMLAD
454 */
__SMLAD(uint32_t x,uint32_t y,uint32_t sum)455 __STATIC_FORCEINLINE uint32_t __SMLAD(
456 uint32_t x,
457 uint32_t y,
458 uint32_t sum)
459 {
460 return ((uint32_t)(((((q31_t)x << 16) >> 16) * (((q31_t)y << 16) >> 16)) +
461 ((((q31_t)x ) >> 16) * (((q31_t)y ) >> 16)) +
462 ( ((q31_t)sum ) ) ));
463 }
464
465
466 /*
467 * @brief C custom defined SMLADX
468 */
__SMLADX(uint32_t x,uint32_t y,uint32_t sum)469 __STATIC_FORCEINLINE uint32_t __SMLADX(
470 uint32_t x,
471 uint32_t y,
472 uint32_t sum)
473 {
474 return ((uint32_t)(((((q31_t)x << 16) >> 16) * (((q31_t)y ) >> 16)) +
475 ((((q31_t)x ) >> 16) * (((q31_t)y << 16) >> 16)) +
476 ( ((q31_t)sum ) ) ));
477 }
478
479
480 /*
481 * @brief C custom defined SMLSDX
482 */
__SMLSDX(uint32_t x,uint32_t y,uint32_t sum)483 __STATIC_FORCEINLINE uint32_t __SMLSDX(
484 uint32_t x,
485 uint32_t y,
486 uint32_t sum)
487 {
488 return ((uint32_t)(((((q31_t)x << 16) >> 16) * (((q31_t)y ) >> 16)) -
489 ((((q31_t)x ) >> 16) * (((q31_t)y << 16) >> 16)) +
490 ( ((q31_t)sum ) ) ));
491 }
492
493
494 /*
495 * @brief C custom defined SMLALD
496 */
__SMLALD(uint32_t x,uint32_t y,uint64_t sum)497 __STATIC_FORCEINLINE uint64_t __SMLALD(
498 uint32_t x,
499 uint32_t y,
500 uint64_t sum)
501 {
502 /* return (sum + ((q15_t) (x >> 16) * (q15_t) (y >> 16)) + ((q15_t) x * (q15_t) y)); */
503 return ((uint64_t)(((((q31_t)x << 16) >> 16) * (((q31_t)y << 16) >> 16)) +
504 ((((q31_t)x ) >> 16) * (((q31_t)y ) >> 16)) +
505 ( ((q63_t)sum ) ) ));
506 }
507
508
509 /*
510 * @brief C custom defined SMLALDX
511 */
__SMLALDX(uint32_t x,uint32_t y,uint64_t sum)512 __STATIC_FORCEINLINE uint64_t __SMLALDX(
513 uint32_t x,
514 uint32_t y,
515 uint64_t sum)
516 {
517 /* return (sum + ((q15_t) (x >> 16) * (q15_t) y)) + ((q15_t) x * (q15_t) (y >> 16)); */
518 return ((uint64_t)(((((q31_t)x << 16) >> 16) * (((q31_t)y ) >> 16)) +
519 ((((q31_t)x ) >> 16) * (((q31_t)y << 16) >> 16)) +
520 ( ((q63_t)sum ) ) ));
521 }
522
523
524 /*
525 * @brief C custom defined SMUAD
526 */
__SMUAD(uint32_t x,uint32_t y)527 __STATIC_FORCEINLINE uint32_t __SMUAD(
528 uint32_t x,
529 uint32_t y)
530 {
531 return ((uint32_t)(((((q31_t)x << 16) >> 16) * (((q31_t)y << 16) >> 16)) +
532 ((((q31_t)x ) >> 16) * (((q31_t)y ) >> 16)) ));
533 }
534
535
536 /*
537 * @brief C custom defined SMUSD
538 */
__SMUSD(uint32_t x,uint32_t y)539 __STATIC_FORCEINLINE uint32_t __SMUSD(
540 uint32_t x,
541 uint32_t y)
542 {
543 return ((uint32_t)(((((q31_t)x << 16) >> 16) * (((q31_t)y << 16) >> 16)) -
544 ((((q31_t)x ) >> 16) * (((q31_t)y ) >> 16)) ));
545 }
546
547
548 /*
549 * @brief C custom defined SXTB16
550 */
__SXTB16(uint32_t x)551 __STATIC_FORCEINLINE uint32_t __SXTB16(
552 uint32_t x)
553 {
554 return ((uint32_t)(((((q31_t)x << 24) >> 24) & (q31_t)0x0000FFFF) |
555 ((((q31_t)x << 8) >> 8) & (q31_t)0xFFFF0000) ));
556 }
557
558 /*
559 * @brief C custom defined SMMLA
560 */
__SMMLA(int32_t x,int32_t y,int32_t sum)561 __STATIC_FORCEINLINE int32_t __SMMLA(
562 int32_t x,
563 int32_t y,
564 int32_t sum)
565 {
566 return (sum + (int32_t) (((int64_t) x * y) >> 32));
567 }
568
569 #endif /* !defined (ARM_MATH_DSP) */
570
571
572 #ifdef __cplusplus
573 }
574 #endif
575
576 #endif /* ifndef _TRANSFORM_FUNCTIONS_H_ */
577