1 // SPDX-License-Identifier: BSD-3-Clause
2 //
3 // Copyright(c) 2021 Intel Corporation. All rights reserved.
4 //
5 // Author: Shriram Shastry <malladi.sastry@linux.intel.com>
6 //
7 //
8
9 #include <sof/math/sqrt.h>
10
11 #define SQRT_WRAP_SCHAR_BITS 0xFF
12
13 /*
14 * Square root
15 *
16 * Y = SQRTLOOKUP_INT16(U) computes the square root of
17 * U using lookup tables.
18 * Range of u is [0 to 65535]
19 * Range of y is [0 to 4]
20 * +------------------+-----------------+--------+--------+
21 * | u | y (returntype) | u | y |
22 * +----+-----+-------+----+----+-------+--------+--------+
23 * |WLen| FLen|Signbit|WLen|FLen|Signbit| Qformat| Qformat|
24 * +----+-----+-------+----+----+-------+--------+--------+
25 * | 16 | 12 | 0 | 16 | 12 | 1 | 4.12 | 4.12 |
26 * +------------------+-----------------+--------+--------+
27
28 * Arguments : uint16_t u
29 * Return Type : int32_t
30 */
sqrt_int16(uint16_t u)31 uint16_t sqrt_int16(uint16_t u)
32 {
33 static const int32_t iv1[193] = {
34 46341, 46702, 47059, 47415, 47767, 48117, 48465, 48809, 49152, 49492, 49830, 50166,
35 50499, 50830, 51159, 51486, 51811, 52134, 52454, 52773, 53090, 53405, 53719, 54030,
36 54340, 54647, 54954, 55258, 55561, 55862, 56162, 56459, 56756, 57051, 57344, 57636,
37 57926, 58215, 58503, 58789, 59073, 59357, 59639, 59919, 60199, 60477, 60753, 61029,
38 61303, 61576, 61848, 62119, 62388, 62657, 62924, 63190, 63455, 63719, 63982, 64243,
39 64504, 64763, 65022, 65279, 65536, 65792, 66046, 66300, 66552, 66804, 67054, 67304,
40 67553, 67801, 68048, 68294, 68539, 68784, 69027, 69270, 69511, 69752, 69992, 70232,
41 70470, 70708, 70945, 71181, 71416, 71651, 71885, 72118, 72350, 72581, 72812, 73042,
42 73271, 73500, 73728, 73955, 74182, 74408, 74633, 74857, 75081, 75304, 75527, 75748,
43 75969, 76190, 76410, 76629, 76848, 77066, 77283, 77500, 77716, 77932, 78147, 78361,
44 78575, 78788, 79001, 79213, 79424, 79635, 79846, 80056, 80265, 80474, 80682, 80890,
45 81097, 81303, 81509, 81715, 81920, 82125, 82329, 82532, 82735, 82938, 83140, 83341,
46 83542, 83743, 83943, 84143, 84342, 84540, 84739, 84936, 85134, 85331, 85527, 85723,
47 85918, 86113, 86308, 86502, 86696, 86889, 87082, 87275, 87467, 87658, 87849, 88040,
48 88231, 88420, 88610, 88799, 88988, 89176, 89364, 89552, 89739, 89926, 90112, 90298,
49 90484, 90669, 90854, 91038, 91222, 91406, 91589, 91772, 91955, 92137, 92319, 92501,
50 92682};
51 static const int8_t iv[256] = {
52 8, 7, 6, 6, 5, 5, 5, 5, 4, 4, 4, 4, 4, 4, 4, 4, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
53 3, 3, 3, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
54 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
55 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
56 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
57 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
58 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
59 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
60 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
61 int32_t xfi_tmp;
62 int32_t y;
63 uint16_t v;
64 int32_t a_i;
65 int32_t l1_i;
66 int32_t l2_i;
67 int num_left_shifts;
68 int shift_factor;
69 int xfi;
70 unsigned int slice_temp;
71 int sign = 1;
72
73 if (!u)
74 return 0;
75 /* Normalize the input such that u = x * 2^n and 0.5 <= x < 2
76 * Normalize to the range [1, 2)
77 * normalizes the input U
78 * such that the output X is
79 * U = X*2^N
80 * 1 <= X < 2
81 * The output X is unsigned with one integer bit.
82 * The input U must be scalar and positive.
83 * The number of bits in a byte is assumed to be B=8.
84 * Reinterpret the input as an unsigned integer.
85 * Unroll the loop in generated code so there will be no branching.
86 * For each iteration, see how many leading zeros are in the high
87 * byte of V, and shift them out to the left. Continue with the
88 * shifted V for as many bytes as it has.
89 * The index is the high byte of the input plus 1 to make it a
90 * one-based index.
91 * Index into the number-of-leading-zeros lookup table. This lookup
92 * table takes in a byte and returns the number of leading zeros in the
93 * binary representation.
94 */
95
96 shift_factor = iv[u >> 8];
97 /* Left-shift out all the leading zeros in the high byte. */
98 v = u << shift_factor;
99 /* Update the total number of left-shifts */
100 num_left_shifts = shift_factor;
101 /* For each iteration, see how many leading zeros are in the high
102 * byte of V, and shift them out to the left. Continue with the
103 * shifted V for as many bytes as it has.
104 * The index is the high byte of the input plus 1 to make it a
105 * one-based index.
106 * Index into the number-of-leading-zeros lookup table. This lookup
107 * table takes in a byte and returns the number of leading zeros in the
108 * binary representation.
109 */
110 shift_factor = iv[v >> 8];
111 /* Left-shift out all the leading zeros in the high byte.
112 * Update the total number of left-shifts
113 */
114 num_left_shifts += shift_factor;
115 /* The input has been left-shifted so the most-significant-bit is a 1.
116 * Reinterpret the output as unsigned with one integer bit, so
117 * that 1 <= x < 2.
118 * Let Q = int(u). Then u = Q*2^(-u_fraction_length),
119 * and x = Q*2^num_left_shifts * 2^(1-word_length). Therefore,
120 * u = x*2^n, where n is defined as:
121 */
122 xfi_tmp = (3 - num_left_shifts) & 1;
123 v = (v << shift_factor) >> xfi_tmp;
124 /* Extract the high byte of x */
125 /* Convert the high byte into an index for SQRTLUT */
126 slice_temp = SQRT_WRAP_SCHAR_BITS & (v >> 8);
127 /* The upper byte was used for the index into SQRTLUT.
128 * The remainder, r, interpreted as a fraction, is used to
129 * linearly interpolate between points.
130 */
131 a_i = iv1[((v >> 8) - 63) - 1];
132 a_i <<= 8;
133 l1_i = iv1[SQRT_WRAP_SCHAR_BITS & ((slice_temp + 194) - 1)];
134 l2_i = iv1[(slice_temp - 63) - 1];
135 y = a_i + (v & SQRT_WRAP_SCHAR_BITS) * (l1_i - l2_i);
136 xfi = (((xfi_tmp - num_left_shifts) + 3) >> 1);
137 shift_factor = (((xfi_tmp - num_left_shifts) + 3) >> 1);
138 if (xfi != 0) {
139 if (xfi > 0)
140 y <<= (shift_factor >= 32) ? 0 : shift_factor;
141 else
142 y >>= -sign * xfi;
143 }
144 y = ((y >> 11) + 1) >> 1;
145
146 return y;
147 }
148