1 /*
2  * Copyright (c) 2019 Facebook.
3  *
4  * SPDX-License-Identifier: Apache-2.0
5  */
6 
7 /**
8  * @file
9  * @brief Inline implementation of functions declared in math_extras.h.
10  */
11 
12 #ifndef ZEPHYR_INCLUDE_SYS_MATH_EXTRAS_H_
13 #error "please include <sys/math_extras.h> instead of this file"
14 #endif
15 
16 #include <zephyr/toolchain.h>
17 
18 /*
19  * Force the use of portable C code (no builtins) by defining
20  * PORTABLE_MISC_MATH_EXTRAS before including <misc/math_extras.h>.
21  * This is primarily for use by tests.
22  *
23  * We'll #undef use_builtin again at the end of the file.
24  */
25 #ifdef PORTABLE_MISC_MATH_EXTRAS
26 #define use_builtin(x) 0
27 #else
28 #define use_builtin(x) HAS_BUILTIN(x)
29 #endif
30 
31 #if use_builtin(__builtin_add_overflow)
u16_add_overflow(uint16_t a,uint16_t b,uint16_t * result)32 static inline bool u16_add_overflow(uint16_t a, uint16_t b, uint16_t *result)
33 {
34 	return __builtin_add_overflow(a, b, result);
35 }
36 
u32_add_overflow(uint32_t a,uint32_t b,uint32_t * result)37 static inline bool u32_add_overflow(uint32_t a, uint32_t b, uint32_t *result)
38 {
39 	return __builtin_add_overflow(a, b, result);
40 }
41 
u64_add_overflow(uint64_t a,uint64_t b,uint64_t * result)42 static inline bool u64_add_overflow(uint64_t a, uint64_t b, uint64_t *result)
43 {
44 	return __builtin_add_overflow(a, b, result);
45 }
46 
size_add_overflow(size_t a,size_t b,size_t * result)47 static inline bool size_add_overflow(size_t a, size_t b, size_t *result)
48 {
49 	return __builtin_add_overflow(a, b, result);
50 }
51 #else /* !use_builtin(__builtin_add_overflow) */
u16_add_overflow(uint16_t a,uint16_t b,uint16_t * result)52 static inline bool u16_add_overflow(uint16_t a, uint16_t b, uint16_t *result)
53 {
54 	uint16_t c = a + b;
55 
56 	*result = c;
57 
58 	return c < a;
59 }
60 
u32_add_overflow(uint32_t a,uint32_t b,uint32_t * result)61 static inline bool u32_add_overflow(uint32_t a, uint32_t b, uint32_t *result)
62 {
63 	uint32_t c = a + b;
64 
65 	*result = c;
66 
67 	return c < a;
68 }
69 
u64_add_overflow(uint64_t a,uint64_t b,uint64_t * result)70 static inline bool u64_add_overflow(uint64_t a, uint64_t b, uint64_t *result)
71 {
72 	uint64_t c = a + b;
73 
74 	*result = c;
75 
76 	return c < a;
77 }
78 
size_add_overflow(size_t a,size_t b,size_t * result)79 static inline bool size_add_overflow(size_t a, size_t b, size_t *result)
80 {
81 	size_t c = a + b;
82 
83 	*result = c;
84 
85 	return c < a;
86 }
87 #endif /* use_builtin(__builtin_add_overflow) */
88 
89 #if use_builtin(__builtin_mul_overflow)
u16_mul_overflow(uint16_t a,uint16_t b,uint16_t * result)90 static inline bool u16_mul_overflow(uint16_t a, uint16_t b, uint16_t *result)
91 {
92 	return __builtin_mul_overflow(a, b, result);
93 }
94 
u32_mul_overflow(uint32_t a,uint32_t b,uint32_t * result)95 static inline bool u32_mul_overflow(uint32_t a, uint32_t b, uint32_t *result)
96 {
97 	return __builtin_mul_overflow(a, b, result);
98 }
99 
u64_mul_overflow(uint64_t a,uint64_t b,uint64_t * result)100 static inline bool u64_mul_overflow(uint64_t a, uint64_t b, uint64_t *result)
101 {
102 	return __builtin_mul_overflow(a, b, result);
103 }
104 
size_mul_overflow(size_t a,size_t b,size_t * result)105 static inline bool size_mul_overflow(size_t a, size_t b, size_t *result)
106 {
107 	return __builtin_mul_overflow(a, b, result);
108 }
109 #else /* !use_builtin(__builtin_mul_overflow) */
u16_mul_overflow(uint16_t a,uint16_t b,uint16_t * result)110 static inline bool u16_mul_overflow(uint16_t a, uint16_t b, uint16_t *result)
111 {
112 	uint16_t c = a * b;
113 
114 	*result = c;
115 
116 	return a != 0 && (c / a) != b;
117 }
118 
u32_mul_overflow(uint32_t a,uint32_t b,uint32_t * result)119 static inline bool u32_mul_overflow(uint32_t a, uint32_t b, uint32_t *result)
120 {
121 	uint32_t c = a * b;
122 
123 	*result = c;
124 
125 	return a != 0 && (c / a) != b;
126 }
127 
u64_mul_overflow(uint64_t a,uint64_t b,uint64_t * result)128 static inline bool u64_mul_overflow(uint64_t a, uint64_t b, uint64_t *result)
129 {
130 	uint64_t c = a * b;
131 
132 	*result = c;
133 
134 	return a != 0 && (c / a) != b;
135 }
136 
size_mul_overflow(size_t a,size_t b,size_t * result)137 static inline bool size_mul_overflow(size_t a, size_t b, size_t *result)
138 {
139 	size_t c = a * b;
140 
141 	*result = c;
142 
143 	return a != 0 && (c / a) != b;
144 }
145 #endif /* use_builtin(__builtin_mul_overflow) */
146 
147 
148 /*
149  * The GCC builtins __builtin_clz(), __builtin_ctz(), and 64-bit
150  * variants are described by the GCC documentation as having undefined
151  * behavior when the argument is zero. See
152  * https://gcc.gnu.org/onlinedocs/gcc/Other-Builtins.html.
153  *
154  * The undefined behavior applies to all architectures, regardless of
155  * the behavior of the instruction used to implement the builtin.
156  *
157  * We don't want to expose users of this API to the undefined behavior,
158  * so we use a conditional to explicitly provide the correct result when
159  * x=0.
160  *
161  * Most instruction set architectures have a CLZ instruction or similar
162  * that already computes the correct result for x=0. Both GCC and Clang
163  * know this and simply generate a CLZ instruction, optimizing away the
164  * conditional.
165  *
166  * For x86, and for compilers that fail to eliminate the conditional,
167  * there is often another opportunity for optimization since code using
168  * these functions tends to contain a zero check already. For example,
169  * from kernel/sched.c:
170  *
171  *	struct k_thread *z_priq_mq_best(struct _priq_mq *pq)
172  *	{
173  *		if (!pq->bitmask) {
174  *			return NULL;
175  *		}
176  *
177  *		struct k_thread *thread = NULL;
178  *		sys_dlist_t *l =
179  *			&pq->queues[u32_count_trailing_zeros(pq->bitmask)];
180  *
181  *		...
182  *
183  * The compiler will often be able to eliminate the redundant x == 0
184  * check after inlining the call to u32_count_trailing_zeros().
185  */
186 
187 #if use_builtin(__builtin_clz)
u32_count_leading_zeros(uint32_t x)188 static inline int u32_count_leading_zeros(uint32_t x)
189 {
190 	return (x == 0) ? 32 : __builtin_clz(x);
191 }
192 #else /* !use_builtin(__builtin_clz) */
u32_count_leading_zeros(uint32_t x)193 static inline int u32_count_leading_zeros(uint32_t x)
194 {
195 	int b;
196 
197 	for (b = 0; b < 32 && (x >> 31) == 0; b++) {
198 		x <<= 1;
199 	}
200 
201 	return b;
202 }
203 #endif /* use_builtin(__builtin_clz) */
204 
205 #if use_builtin(__builtin_clzll)
u64_count_leading_zeros(uint64_t x)206 static inline int u64_count_leading_zeros(uint64_t x)
207 {
208 	return (x == 0) ? 64 : __builtin_clzll(x);
209 }
210 #else /* !use_builtin(__builtin_clzll) */
u64_count_leading_zeros(uint64_t x)211 static inline int u64_count_leading_zeros(uint64_t x)
212 {
213 	if (x == (uint32_t)x) {
214 		return 32 + u32_count_leading_zeros((uint32_t)x);
215 	} else {
216 		return u32_count_leading_zeros(x >> 32);
217 	}
218 }
219 #endif /* use_builtin(__builtin_clzll) */
220 
221 #if use_builtin(__builtin_ctz)
u32_count_trailing_zeros(uint32_t x)222 static inline int u32_count_trailing_zeros(uint32_t x)
223 {
224 	return (x == 0) ? 32 : __builtin_ctz(x);
225 }
226 #else /* !use_builtin(__builtin_ctz) */
u32_count_trailing_zeros(uint32_t x)227 static inline int u32_count_trailing_zeros(uint32_t x)
228 {
229 	int b;
230 
231 	for (b = 0; b < 32 && (x & 1) == 0; b++) {
232 		x >>= 1;
233 	}
234 
235 	return b;
236 }
237 #endif /* use_builtin(__builtin_ctz) */
238 
239 #if use_builtin(__builtin_ctzll)
u64_count_trailing_zeros(uint64_t x)240 static inline int u64_count_trailing_zeros(uint64_t x)
241 {
242 	return (x == 0) ? 64 : __builtin_ctzll(x);
243 }
244 #else /* !use_builtin(__builtin_ctzll) */
u64_count_trailing_zeros(uint64_t x)245 static inline int u64_count_trailing_zeros(uint64_t x)
246 {
247 	if ((uint32_t)x) {
248 		return u32_count_trailing_zeros((uint32_t)x);
249 	} else {
250 		return 32 + u32_count_trailing_zeros(x >> 32);
251 	}
252 }
253 #endif /* use_builtin(__builtin_ctzll) */
254 
255 #undef use_builtin
256