1 /*
2 * Copyright (c) 2019 Facebook.
3 *
4 * SPDX-License-Identifier: Apache-2.0
5 */
6
7 /**
8 * @file
9 * @brief Inline implementation of functions declared in math_extras.h.
10 */
11
12 #ifndef ZEPHYR_INCLUDE_SYS_MATH_EXTRAS_H_
13 #error "please include <sys/math_extras.h> instead of this file"
14 #endif
15
16 #include <zephyr/toolchain.h>
17
18 /*
19 * Force the use of portable C code (no builtins) by defining
20 * PORTABLE_MISC_MATH_EXTRAS before including <misc/math_extras.h>.
21 * This is primarily for use by tests.
22 *
23 * We'll #undef use_builtin again at the end of the file.
24 */
25 #ifdef PORTABLE_MISC_MATH_EXTRAS
26 #define use_builtin(x) 0
27 #else
28 #define use_builtin(x) HAS_BUILTIN(x)
29 #endif
30
31 #if use_builtin(__builtin_add_overflow)
u16_add_overflow(uint16_t a,uint16_t b,uint16_t * result)32 static inline bool u16_add_overflow(uint16_t a, uint16_t b, uint16_t *result)
33 {
34 return __builtin_add_overflow(a, b, result);
35 }
36
u32_add_overflow(uint32_t a,uint32_t b,uint32_t * result)37 static inline bool u32_add_overflow(uint32_t a, uint32_t b, uint32_t *result)
38 {
39 return __builtin_add_overflow(a, b, result);
40 }
41
u64_add_overflow(uint64_t a,uint64_t b,uint64_t * result)42 static inline bool u64_add_overflow(uint64_t a, uint64_t b, uint64_t *result)
43 {
44 return __builtin_add_overflow(a, b, result);
45 }
46
size_add_overflow(size_t a,size_t b,size_t * result)47 static inline bool size_add_overflow(size_t a, size_t b, size_t *result)
48 {
49 return __builtin_add_overflow(a, b, result);
50 }
51 #else /* !use_builtin(__builtin_add_overflow) */
u16_add_overflow(uint16_t a,uint16_t b,uint16_t * result)52 static inline bool u16_add_overflow(uint16_t a, uint16_t b, uint16_t *result)
53 {
54 uint16_t c = a + b;
55
56 *result = c;
57
58 return c < a;
59 }
60
u32_add_overflow(uint32_t a,uint32_t b,uint32_t * result)61 static inline bool u32_add_overflow(uint32_t a, uint32_t b, uint32_t *result)
62 {
63 uint32_t c = a + b;
64
65 *result = c;
66
67 return c < a;
68 }
69
u64_add_overflow(uint64_t a,uint64_t b,uint64_t * result)70 static inline bool u64_add_overflow(uint64_t a, uint64_t b, uint64_t *result)
71 {
72 uint64_t c = a + b;
73
74 *result = c;
75
76 return c < a;
77 }
78
size_add_overflow(size_t a,size_t b,size_t * result)79 static inline bool size_add_overflow(size_t a, size_t b, size_t *result)
80 {
81 size_t c = a + b;
82
83 *result = c;
84
85 return c < a;
86 }
87 #endif /* use_builtin(__builtin_add_overflow) */
88
89 #if use_builtin(__builtin_mul_overflow)
u16_mul_overflow(uint16_t a,uint16_t b,uint16_t * result)90 static inline bool u16_mul_overflow(uint16_t a, uint16_t b, uint16_t *result)
91 {
92 return __builtin_mul_overflow(a, b, result);
93 }
94
u32_mul_overflow(uint32_t a,uint32_t b,uint32_t * result)95 static inline bool u32_mul_overflow(uint32_t a, uint32_t b, uint32_t *result)
96 {
97 return __builtin_mul_overflow(a, b, result);
98 }
99
u64_mul_overflow(uint64_t a,uint64_t b,uint64_t * result)100 static inline bool u64_mul_overflow(uint64_t a, uint64_t b, uint64_t *result)
101 {
102 return __builtin_mul_overflow(a, b, result);
103 }
104
size_mul_overflow(size_t a,size_t b,size_t * result)105 static inline bool size_mul_overflow(size_t a, size_t b, size_t *result)
106 {
107 return __builtin_mul_overflow(a, b, result);
108 }
109 #else /* !use_builtin(__builtin_mul_overflow) */
u16_mul_overflow(uint16_t a,uint16_t b,uint16_t * result)110 static inline bool u16_mul_overflow(uint16_t a, uint16_t b, uint16_t *result)
111 {
112 uint16_t c = a * b;
113
114 *result = c;
115
116 return a != 0 && (c / a) != b;
117 }
118
u32_mul_overflow(uint32_t a,uint32_t b,uint32_t * result)119 static inline bool u32_mul_overflow(uint32_t a, uint32_t b, uint32_t *result)
120 {
121 uint32_t c = a * b;
122
123 *result = c;
124
125 return a != 0 && (c / a) != b;
126 }
127
u64_mul_overflow(uint64_t a,uint64_t b,uint64_t * result)128 static inline bool u64_mul_overflow(uint64_t a, uint64_t b, uint64_t *result)
129 {
130 uint64_t c = a * b;
131
132 *result = c;
133
134 return a != 0 && (c / a) != b;
135 }
136
size_mul_overflow(size_t a,size_t b,size_t * result)137 static inline bool size_mul_overflow(size_t a, size_t b, size_t *result)
138 {
139 size_t c = a * b;
140
141 *result = c;
142
143 return a != 0 && (c / a) != b;
144 }
145 #endif /* use_builtin(__builtin_mul_overflow) */
146
147
148 /*
149 * The GCC builtins __builtin_clz(), __builtin_ctz(), and 64-bit
150 * variants are described by the GCC documentation as having undefined
151 * behavior when the argument is zero. See
152 * https://gcc.gnu.org/onlinedocs/gcc/Other-Builtins.html.
153 *
154 * The undefined behavior applies to all architectures, regardless of
155 * the behavior of the instruction used to implement the builtin.
156 *
157 * We don't want to expose users of this API to the undefined behavior,
158 * so we use a conditional to explicitly provide the correct result when
159 * x=0.
160 *
161 * Most instruction set architectures have a CLZ instruction or similar
162 * that already computes the correct result for x=0. Both GCC and Clang
163 * know this and simply generate a CLZ instruction, optimizing away the
164 * conditional.
165 *
166 * For x86, and for compilers that fail to eliminate the conditional,
167 * there is often another opportunity for optimization since code using
168 * these functions tends to contain a zero check already. For example,
169 * from kernel/sched.c:
170 *
171 * struct k_thread *z_priq_mq_best(struct _priq_mq *pq)
172 * {
173 * if (!pq->bitmask) {
174 * return NULL;
175 * }
176 *
177 * struct k_thread *thread = NULL;
178 * sys_dlist_t *l =
179 * &pq->queues[u32_count_trailing_zeros(pq->bitmask)];
180 *
181 * ...
182 *
183 * The compiler will often be able to eliminate the redundant x == 0
184 * check after inlining the call to u32_count_trailing_zeros().
185 */
186
187 #if use_builtin(__builtin_clz)
u32_count_leading_zeros(uint32_t x)188 static inline int u32_count_leading_zeros(uint32_t x)
189 {
190 return (x == 0) ? 32 : __builtin_clz(x);
191 }
192 #else /* !use_builtin(__builtin_clz) */
u32_count_leading_zeros(uint32_t x)193 static inline int u32_count_leading_zeros(uint32_t x)
194 {
195 int b;
196
197 for (b = 0; b < 32 && (x >> 31) == 0; b++) {
198 x <<= 1;
199 }
200
201 return b;
202 }
203 #endif /* use_builtin(__builtin_clz) */
204
205 #if use_builtin(__builtin_clzll)
u64_count_leading_zeros(uint64_t x)206 static inline int u64_count_leading_zeros(uint64_t x)
207 {
208 return (x == 0) ? 64 : __builtin_clzll(x);
209 }
210 #else /* !use_builtin(__builtin_clzll) */
u64_count_leading_zeros(uint64_t x)211 static inline int u64_count_leading_zeros(uint64_t x)
212 {
213 if (x == (uint32_t)x) {
214 return 32 + u32_count_leading_zeros((uint32_t)x);
215 } else {
216 return u32_count_leading_zeros(x >> 32);
217 }
218 }
219 #endif /* use_builtin(__builtin_clzll) */
220
221 #if use_builtin(__builtin_ctz)
u32_count_trailing_zeros(uint32_t x)222 static inline int u32_count_trailing_zeros(uint32_t x)
223 {
224 return (x == 0) ? 32 : __builtin_ctz(x);
225 }
226 #else /* !use_builtin(__builtin_ctz) */
u32_count_trailing_zeros(uint32_t x)227 static inline int u32_count_trailing_zeros(uint32_t x)
228 {
229 int b;
230
231 for (b = 0; b < 32 && (x & 1) == 0; b++) {
232 x >>= 1;
233 }
234
235 return b;
236 }
237 #endif /* use_builtin(__builtin_ctz) */
238
239 #if use_builtin(__builtin_ctzll)
u64_count_trailing_zeros(uint64_t x)240 static inline int u64_count_trailing_zeros(uint64_t x)
241 {
242 return (x == 0) ? 64 : __builtin_ctzll(x);
243 }
244 #else /* !use_builtin(__builtin_ctzll) */
u64_count_trailing_zeros(uint64_t x)245 static inline int u64_count_trailing_zeros(uint64_t x)
246 {
247 if ((uint32_t)x) {
248 return u32_count_trailing_zeros((uint32_t)x);
249 } else {
250 return 32 + u32_count_trailing_zeros(x >> 32);
251 }
252 }
253 #endif /* use_builtin(__builtin_ctzll) */
254
255 #undef use_builtin
256