1 /*
2 * SPDX-FileCopyrightText: Copyright 2023 Arm Limited and/or its affiliates <open-source-office@arm.com>
3 *
4 * SPDX-License-Identifier: Apache-2.0
5 *
6 * Licensed under the Apache License, Version 2.0 (the License); you may
7 * not use this file except in compliance with the License.
8 * You may obtain a copy of the License at
9 *
10 * www.apache.org/licenses/LICENSE-2.0
11 *
12 * Unless required by applicable law or agreed to in writing, software
13 * distributed under the License is distributed on an AS IS BASIS, WITHOUT
14 * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15 * See the License for the specific language governing permissions and
16 * limitations under the License.
17 */
18
19 /* ----------------------------------------------------------------------
20 * Project: CMSIS NN Library
21 * Title: arm_nn_compiler.h
22 * Description: Generic compiler header
23 *
24 * $Date: 31 January 2023
25 * $Revision: V.1.1.0
26 *
27 * Target : Arm(R) M-Profile Architecture
28 * -------------------------------------------------------------------- */
29
30 #ifndef ARM_NN_COMPILER_H
31 #define ARM_NN_COMPILER_H
32
33 /**
34 *
35 * @brief Arm C-Language Extension(ACLE) Includes
36 *
37 */
38
39 #if defined(__ARMCC_VERSION) && (__ARMCC_VERSION >= 6010050)
40
41 #ifndef __ASM
42 #define __ASM __asm
43 #endif
44 #ifndef __INLINE
45 #define __INLINE __inline
46 #endif
47 #ifndef __STATIC_INLINE
48 #define __STATIC_INLINE static __inline
49 #endif
50 #ifndef __STATIC_FORCEINLINE
51 #define __STATIC_FORCEINLINE __attribute__((always_inline)) static __inline
52 #endif
53 #ifndef __RESTRICT
54 #define __RESTRICT __restrict
55 #endif
56
57 #elif defined(__ICCARM__)
58
59 #warning IAR support is not tested
60 #ifndef __ASM
61 #define __ASM __asm
62 #endif
63 #ifndef __INLINE
64 #define __INLINE inline
65 #endif
66 #ifndef __STATIC_INLINE
67 #define __STATIC_INLINE static inline
68 #endif
69 #ifndef __FORCEINLINE
70 #define __FORCEINLINE _Pragma("inline=forced")
71 #endif
72 #ifndef __STATIC_FORCEINLINE
73 #define __STATIC_FORCEINLINE __FORCEINLINE __STATIC_INLINE
74 #endif
75
76 #elif defined(_MSC_VER)
77
78 // Build for non Arm Cortex-M processors is not tested or supported.
79 // Use this section to stub any macros or intrinsics
80 #warning Unsupported compiler
81 #ifndef __STATIC_FORCEINLINE
82 #define __STATIC_FORCEINLINE static __forceinline
83 #endif
84 #ifndef __STATIC_INLINE
85 #define __STATIC_INLINE static __inline
86 #endif
87 #ifndef __ALIGNED
88 #define __ALIGNED(x) __declspec(align(x))
89 #endif
90
91 #elif defined(__GNUC__)
92
93 #ifndef __ASM
94 #define __ASM __asm
95 #endif
96 #ifndef __INLINE
97 #define __INLINE inline
98 #endif
99 #ifndef __STATIC_INLINE
100 #define __STATIC_INLINE static inline
101 #endif
102 #ifndef __STATIC_FORCEINLINE
103 #define __STATIC_FORCEINLINE __attribute__((always_inline)) static inline
104 #endif
105 #ifndef __RESTRICT
106 #define __RESTRICT __restrict
107 #endif
108
109 #else
110
111 #error Unsupported compiler. Add support as needed
112
113 #endif
114
115 /**
116 *
117 * @brief Compiler specific diagnostic adjustment / fixes if applicable
118 *
119 */
120
121 // Note: __ARM_ARCH is used with M-profile architecture as the target here.
122 #if defined(__GNUC__)
123 #if (__GNUC__ == 12 && (__GNUC_MINOR__ <= 2)) && defined(__ARM_ARCH)
124 // Workaround for 'Internal Compiler Error' on Arm GNU Toolchain rel 12.2.x
125 // https://gcc.gnu.org/pipermail/gcc-patches/2022-December/607963.html
126 #define ARM_GCC_12_2_ICE
127 #endif
128 #endif
129
130 #if ((__ARM_FEATURE_MVE & 3) == 3) || (__ARM_FEATURE_MVE & 1)
131 #include <arm_mve.h>
132 #endif
133
134 #if defined(__ARM_ARCH) || defined(__ARM_ACLE)
135 #include <arm_acle.h>
136 #endif
137
138 /**
139 *
140 * @brief ACLE and Intrinsics
141 *
142 */
143
144 // Note: Have __GNUC__, that is used to check for GCC , checks at the end
145 // as __GNUC__ is defined by non-GCC compilers as well
146
147 /* Common intrinsics for all architectures */
148 #if defined(__ARMCC_VERSION) && (__ARMCC_VERSION >= 6010050) || defined(__ICCARM__)
149 #define CLZ __clz
150 #elif defined(__GNUC__)
151 /**
152 \brief Count leading zeros
153 \details Counts the number of leading zeros of a data value.
154 \param [in] value Value to count the leading zeros
155 \return number of leading zeros in value
156 */
CLZ(uint32_t value)157 __STATIC_FORCEINLINE uint8_t CLZ(uint32_t value)
158 {
159 /* Even though __builtin_clz produces a CLZ instruction on ARM, formally
160 __builtin_clz(0) is undefined behaviour, so handle this case specially.
161 This guarantees Arm-compatible results if compiling on a non-Arm
162 target, and ensures the compiler doesn't decide to activate any
163 optimisations using the logic "value was passed to __builtin_clz, so it
164 is non-zero".
165 ARM GCC 7.3 and possibly earlier will optimise this test away, leaving a
166 single CLZ instruction.
167 */
168 if (value == 0U)
169 {
170 return 32U;
171 }
172 return __builtin_clz(value);
173 }
174 #endif
175
176 // ACLE intrinsics under groups __ARM_FEATURE_QBIT, __ARM_FEATURE_DSP , __ARM_FEATURE_SAT, __ARM_FEATURE_SIMD32
177
178 // Note: Just __ARM_FEATURE_DSP is checked to collect all intrinsics from the above mentioned groups
179
180 #if (defined(__ARM_FEATURE_DSP) && (__ARM_FEATURE_DSP == 1))
181
182 // Common intrinsics
183 #define SMLABB __smlabb
184 #define SMLATT __smlatt
185 #define QADD __qadd
186 #define QSUB8 __qsub8
187 #define QSUB16 __qsub16
188 #define SADD16 __sadd16
189
190 // Compiler specifc variants of intrinsics. Create a new section or file for IAR if needed
191 #if defined(__ARMCC_VERSION) && (__ARMCC_VERSION >= 6010050) || defined(__ICCARM__)
192
193 #define SMULBB __smulbb
194 #define SMULTT __smultt
195 #define ROR __ror
196 #define SXTB16 __sxtb16
197 #define SXTAB16 __sxtab16
198 #define SXTB16_RORn(ARG1, ARG2) SXTB16(ROR(ARG1, ARG2))
199 #define SXTAB16_RORn(ARG1, ARG2, ARG3) SXTAB16(ARG1, ROR(ARG2, ARG3))
200 #define SMLAD __smlad
201 // PKH<XY> translates into pkh<xy> on AC6
202 #define PKHBT(ARG1, ARG2, ARG3) \
203 (((((uint32_t)(ARG1))) & 0x0000FFFFUL) | ((((uint32_t)(ARG2)) << (ARG3)) & 0xFFFF0000UL))
204 #define PKHTB(ARG1, ARG2, ARG3) \
205 (((((uint32_t)(ARG1))) & 0xFFFF0000UL) | ((((uint32_t)(ARG2)) >> (ARG3)) & 0x0000FFFFUL))
206
207 #elif defined(__GNUC__)
208
209 #define PKHBT(ARG1, ARG2, ARG3) \
210 __extension__({ \
211 uint32_t __RES, __ARG1 = (ARG1), __ARG2 = (ARG2); \
212 __ASM("pkhbt %0, %1, %2, lsl %3" : "=r"(__RES) : "r"(__ARG1), "r"(__ARG2), "I"(ARG3)); \
213 __RES; \
214 })
215 #define PKHTB(ARG1, ARG2, ARG3) \
216 __extension__({ \
217 uint32_t __RES, __ARG1 = (ARG1), __ARG2 = (ARG2); \
218 if (ARG3 == 0) \
219 __ASM("pkhtb %0, %1, %2" : "=r"(__RES) : "r"(__ARG1), "r"(__ARG2)); \
220 else \
221 __ASM("pkhtb %0, %1, %2, asr %3" : "=r"(__RES) : "r"(__ARG1), "r"(__ARG2), "I"(ARG3)); \
222 __RES; \
223 })
224
SXTAB16(uint32_t op1,uint32_t op2)225 __STATIC_FORCEINLINE uint32_t SXTAB16(uint32_t op1, uint32_t op2)
226 {
227 uint32_t result;
228
229 __ASM("sxtab16 %0, %1, %2" : "=r"(result) : "r"(op1), "r"(op2));
230 return (result);
231 }
232
SXTB16(uint32_t op1)233 __STATIC_FORCEINLINE uint32_t SXTB16(uint32_t op1)
234 {
235 uint32_t result;
236
237 __ASM("sxtb16 %0, %1" : "=r"(result) : "r"(op1));
238 return (result);
239 }
240
241 // __smlad is defined by GCC, but results in a performance drop(Tested on Arm GNU Toolchain version 11.x and 12.x)
SMLAD(uint32_t op1,uint32_t op2,uint32_t op3)242 __STATIC_FORCEINLINE uint32_t SMLAD(uint32_t op1, uint32_t op2, uint32_t op3)
243 {
244 uint32_t result;
245
246 __ASM volatile("smlad %0, %1, %2, %3" : "=r"(result) : "r"(op1), "r"(op2), "r"(op3));
247 return (result);
248 }
249
ROR(uint32_t op1,uint32_t op2)250 __STATIC_FORCEINLINE uint32_t ROR(uint32_t op1, uint32_t op2)
251 {
252 op2 %= 32U;
253 if (op2 == 0U)
254 {
255 return op1;
256 }
257 return (op1 >> op2) | (op1 << (32U - op2));
258 }
259
SXTB16_RORn(uint32_t op1,uint32_t rotate)260 __STATIC_FORCEINLINE uint32_t SXTB16_RORn(uint32_t op1, uint32_t rotate)
261 {
262 uint32_t result;
263 if (__builtin_constant_p(rotate) && ((rotate == 8U) || (rotate == 16U) || (rotate == 24U)))
264 {
265 __ASM volatile("sxtb16 %0, %1, ROR %2" : "=r"(result) : "r"(op1), "i"(rotate));
266 }
267 else
268 {
269 result = SXTB16(ROR(op1, rotate));
270 }
271 return result;
272 }
273
SXTAB16_RORn(uint32_t op1,uint32_t op2,uint32_t rotate)274 __STATIC_FORCEINLINE uint32_t SXTAB16_RORn(uint32_t op1, uint32_t op2, uint32_t rotate)
275 {
276 uint32_t result;
277 if (__builtin_constant_p(rotate) && ((rotate == 8U) || (rotate == 16U) || (rotate == 24U)))
278 {
279 __ASM volatile("sxtab16 %0, %1, %2, ROR %3" : "=r"(result) : "r"(op1), "r"(op2), "i"(rotate));
280 }
281 else
282 {
283 result = SXTAB16(op1, ROR(op2, rotate));
284 }
285 return result;
286 }
287
288 // Inline assembly routines for ACLE intrinsics that are not defined by GCC toolchain
SMULBB(uint32_t op1,uint32_t op2)289 __STATIC_FORCEINLINE uint32_t SMULBB(uint32_t op1, uint32_t op2)
290 {
291 uint32_t result;
292
293 __ASM volatile("smulbb %0, %1, %2" : "=r"(result) : "r"(op1), "r"(op2));
294 return (result);
295 }
296
SMULTT(uint32_t op1,uint32_t op2)297 __STATIC_FORCEINLINE uint32_t SMULTT(uint32_t op1, uint32_t op2)
298 {
299 uint32_t result;
300
301 __ASM volatile("smultt %0, %1, %2" : "=r"(result) : "r"(op1), "r"(op2));
302 return (result);
303 }
304 #endif
305
306 #endif
307
308 #endif /* #ifndef ARM_NN_COMPILER_H */