/* * Copyright (c) 2019 Facebook. * * SPDX-License-Identifier: Apache-2.0 */ /** * @file * @brief Inline implementation of functions declared in math_extras.h. */ #ifndef ZEPHYR_INCLUDE_SYS_MATH_EXTRAS_H_ #error "please include instead of this file" #endif #include /* * Force the use of portable C code (no builtins) by defining * PORTABLE_MISC_MATH_EXTRAS before including . * This is primarily for use by tests. * * We'll #undef use_builtin again at the end of the file. */ #ifdef PORTABLE_MISC_MATH_EXTRAS #define use_builtin(x) 0 #else #define use_builtin(x) HAS_BUILTIN(x) #endif #if use_builtin(__builtin_add_overflow) static inline bool u16_add_overflow(uint16_t a, uint16_t b, uint16_t *result) { return __builtin_add_overflow(a, b, result); } static inline bool u32_add_overflow(uint32_t a, uint32_t b, uint32_t *result) { return __builtin_add_overflow(a, b, result); } static inline bool u64_add_overflow(uint64_t a, uint64_t b, uint64_t *result) { return __builtin_add_overflow(a, b, result); } static inline bool size_add_overflow(size_t a, size_t b, size_t *result) { return __builtin_add_overflow(a, b, result); } #else /* !use_builtin(__builtin_add_overflow) */ static inline bool u16_add_overflow(uint16_t a, uint16_t b, uint16_t *result) { uint16_t c = a + b; *result = c; return c < a; } static inline bool u32_add_overflow(uint32_t a, uint32_t b, uint32_t *result) { uint32_t c = a + b; *result = c; return c < a; } static inline bool u64_add_overflow(uint64_t a, uint64_t b, uint64_t *result) { uint64_t c = a + b; *result = c; return c < a; } static inline bool size_add_overflow(size_t a, size_t b, size_t *result) { size_t c = a + b; *result = c; return c < a; } #endif /* use_builtin(__builtin_add_overflow) */ #if use_builtin(__builtin_mul_overflow) static inline bool u16_mul_overflow(uint16_t a, uint16_t b, uint16_t *result) { return __builtin_mul_overflow(a, b, result); } static inline bool u32_mul_overflow(uint32_t a, uint32_t b, uint32_t *result) { return __builtin_mul_overflow(a, b, result); } static inline bool u64_mul_overflow(uint64_t a, uint64_t b, uint64_t *result) { return __builtin_mul_overflow(a, b, result); } static inline bool size_mul_overflow(size_t a, size_t b, size_t *result) { return __builtin_mul_overflow(a, b, result); } #else /* !use_builtin(__builtin_mul_overflow) */ static inline bool u16_mul_overflow(uint16_t a, uint16_t b, uint16_t *result) { uint16_t c = a * b; *result = c; return a != 0 && (c / a) != b; } static inline bool u32_mul_overflow(uint32_t a, uint32_t b, uint32_t *result) { uint32_t c = a * b; *result = c; return a != 0 && (c / a) != b; } static inline bool u64_mul_overflow(uint64_t a, uint64_t b, uint64_t *result) { uint64_t c = a * b; *result = c; return a != 0 && (c / a) != b; } static inline bool size_mul_overflow(size_t a, size_t b, size_t *result) { size_t c = a * b; *result = c; return a != 0 && (c / a) != b; } #endif /* use_builtin(__builtin_mul_overflow) */ /* * The GCC builtins __builtin_clz(), __builtin_ctz(), and 64-bit * variants are described by the GCC documentation as having undefined * behavior when the argument is zero. See * https://gcc.gnu.org/onlinedocs/gcc/Other-Builtins.html. * * The undefined behavior applies to all architectures, regardless of * the behavior of the instruction used to implement the builtin. * * We don't want to expose users of this API to the undefined behavior, * so we use a conditional to explicitly provide the correct result when * x=0. * * Most instruction set architectures have a CLZ instruction or similar * that already computes the correct result for x=0. Both GCC and Clang * know this and simply generate a CLZ instruction, optimizing away the * conditional. * * For x86, and for compilers that fail to eliminate the conditional, * there is often another opportunity for optimization since code using * these functions tends to contain a zero check already. For example, * from kernel/sched.c: * * struct k_thread *z_priq_mq_best(struct _priq_mq *pq) * { * if (!pq->bitmask) { * return NULL; * } * * struct k_thread *thread = NULL; * sys_dlist_t *l = * &pq->queues[u32_count_trailing_zeros(pq->bitmask)]; * * ... * * The compiler will often be able to eliminate the redundant x == 0 * check after inlining the call to u32_count_trailing_zeros(). */ #if use_builtin(__builtin_clz) static inline int u32_count_leading_zeros(uint32_t x) { return (x == 0) ? 32 : __builtin_clz(x); } #else /* !use_builtin(__builtin_clz) */ static inline int u32_count_leading_zeros(uint32_t x) { int b; for (b = 0; b < 32 && (x >> 31) == 0; b++) { x <<= 1; } return b; } #endif /* use_builtin(__builtin_clz) */ #if use_builtin(__builtin_clzll) static inline int u64_count_leading_zeros(uint64_t x) { return (x == 0) ? 64 : __builtin_clzll(x); } #else /* !use_builtin(__builtin_clzll) */ static inline int u64_count_leading_zeros(uint64_t x) { if (x == (uint32_t)x) { return 32 + u32_count_leading_zeros((uint32_t)x); } else { return u32_count_leading_zeros(x >> 32); } } #endif /* use_builtin(__builtin_clzll) */ #if use_builtin(__builtin_ctz) static inline int u32_count_trailing_zeros(uint32_t x) { return (x == 0) ? 32 : __builtin_ctz(x); } #else /* !use_builtin(__builtin_ctz) */ static inline int u32_count_trailing_zeros(uint32_t x) { int b; for (b = 0; b < 32 && (x & 1) == 0; b++) { x >>= 1; } return b; } #endif /* use_builtin(__builtin_ctz) */ #if use_builtin(__builtin_ctzll) static inline int u64_count_trailing_zeros(uint64_t x) { return (x == 0) ? 64 : __builtin_ctzll(x); } #else /* !use_builtin(__builtin_ctzll) */ static inline int u64_count_trailing_zeros(uint64_t x) { if ((uint32_t)x) { return u32_count_trailing_zeros((uint32_t)x); } else { return 32 + u32_count_trailing_zeros(x >> 32); } } #endif /* use_builtin(__builtin_ctzll) */ #undef use_builtin