1 // SPDX-License-Identifier: BSD-3-Clause
2 //
3 // Copyright(c) 2021 Intel Corporation. All rights reserved.
4 //
5 // Author: Shriram Shastry <malladi.sastry@linux.intel.com>
6 //
7 //
8 
9 #include <sof/math/sqrt.h>
10 
11 #define SQRT_WRAP_SCHAR_BITS 0xFF
12 
13 /*
14  *  Square root
15  *
16  *  Y = SQRTLOOKUP_INT16(U) computes the square root of
17  *  U using lookup tables.
18  *  Range of u is [0 to 65535]
19  *  Range of y is [0 to 4]
20  * +------------------+-----------------+--------+--------+
21  * | u		      | y (returntype)	|   u	 |  y	  |
22  * +----+-----+-------+----+----+-------+--------+--------+
23  * |WLen| FLen|Signbit|WLen|FLen|Signbit| Qformat| Qformat|
24  * +----+-----+-------+----+----+-------+--------+--------+
25  * | 16 | 12  |	 0    | 16 | 12 |   1	| 4.12	 | 4.12	  |
26  * +------------------+-----------------+--------+--------+
27 
28  * Arguments	: uint16_t u
29  * Return Type	: int32_t
30  */
sqrt_int16(uint16_t u)31 uint16_t sqrt_int16(uint16_t u)
32 {
33 	static const int32_t iv1[193] = {
34 	    46341, 46702, 47059, 47415, 47767, 48117, 48465, 48809, 49152, 49492, 49830, 50166,
35 	    50499, 50830, 51159, 51486, 51811, 52134, 52454, 52773, 53090, 53405, 53719, 54030,
36 	    54340, 54647, 54954, 55258, 55561, 55862, 56162, 56459, 56756, 57051, 57344, 57636,
37 	    57926, 58215, 58503, 58789, 59073, 59357, 59639, 59919, 60199, 60477, 60753, 61029,
38 	    61303, 61576, 61848, 62119, 62388, 62657, 62924, 63190, 63455, 63719, 63982, 64243,
39 	    64504, 64763, 65022, 65279, 65536, 65792, 66046, 66300, 66552, 66804, 67054, 67304,
40 	    67553, 67801, 68048, 68294, 68539, 68784, 69027, 69270, 69511, 69752, 69992, 70232,
41 	    70470, 70708, 70945, 71181, 71416, 71651, 71885, 72118, 72350, 72581, 72812, 73042,
42 	    73271, 73500, 73728, 73955, 74182, 74408, 74633, 74857, 75081, 75304, 75527, 75748,
43 	    75969, 76190, 76410, 76629, 76848, 77066, 77283, 77500, 77716, 77932, 78147, 78361,
44 	    78575, 78788, 79001, 79213, 79424, 79635, 79846, 80056, 80265, 80474, 80682, 80890,
45 	    81097, 81303, 81509, 81715, 81920, 82125, 82329, 82532, 82735, 82938, 83140, 83341,
46 	    83542, 83743, 83943, 84143, 84342, 84540, 84739, 84936, 85134, 85331, 85527, 85723,
47 	    85918, 86113, 86308, 86502, 86696, 86889, 87082, 87275, 87467, 87658, 87849, 88040,
48 	    88231, 88420, 88610, 88799, 88988, 89176, 89364, 89552, 89739, 89926, 90112, 90298,
49 	    90484, 90669, 90854, 91038, 91222, 91406, 91589, 91772, 91955, 92137, 92319, 92501,
50 	    92682};
51 	static const int8_t iv[256] = {
52 	    8, 7, 6, 6, 5, 5, 5, 5, 4, 4, 4, 4, 4, 4, 4, 4, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
53 	    3, 3, 3, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
54 	    2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
55 	    1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
56 	    1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
57 	    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
58 	    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
59 	    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
60 	    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
61 	int32_t xfi_tmp;
62 	int32_t y;
63 	uint16_t v;
64 	int32_t a_i;
65 	int32_t l1_i;
66 	int32_t l2_i;
67 	int num_left_shifts;
68 	int shift_factor;
69 	int xfi;
70 	unsigned int slice_temp;
71 	int sign = 1;
72 
73 	if (!u)
74 		return 0;
75 	/* Normalize the input such that u = x * 2^n and 0.5 <= x < 2
76 	 * Normalize to the range [1, 2)
77 	 * normalizes the input U
78 	 * such that the output X is
79 	 * U = X*2^N
80 	 * 1 <= X < 2
81 	 * The output X is unsigned with one integer bit.
82 	 * The input U must be scalar and positive.
83 	 * The number of bits in a byte is assumed to be B=8.
84 	 * Reinterpret the input as an unsigned integer.
85 	 * Unroll the loop in generated code so there will be no branching.
86 	 * For each iteration, see how many leading zeros are in the high
87 	 * byte of V, and shift them out to the left. Continue with the
88 	 * shifted V for as many bytes as it has.
89 	 * The index is the high byte of the input plus 1 to make it a
90 	 * one-based index.
91 	 * Index into the number-of-leading-zeros lookup table.  This lookup
92 	 * table takes in a byte and returns the number of leading zeros in the
93 	 * binary representation.
94 	 */
95 
96 	shift_factor = iv[u >> 8];
97 	/*  Left-shift out all the leading zeros in the high byte. */
98 	v = u << shift_factor;
99 	/*  Update the total number of left-shifts */
100 	num_left_shifts = shift_factor;
101 	/* For each iteration, see how many leading zeros are in the high
102 	 * byte of V, and shift them out to the left. Continue with the
103 	 * shifted V for as many bytes as it has.
104 	 * The index is the high byte of the input plus 1 to make it a
105 	 * one-based index.
106 	 * Index into the number-of-leading-zeros lookup table.  This lookup
107 	 * table takes in a byte and returns the number of leading zeros in the
108 	 * binary representation.
109 	 */
110 	shift_factor = iv[v >> 8];
111 	/*  Left-shift out all the leading zeros in the high byte.
112 	 *  Update the total number of left-shifts
113 	 */
114 	num_left_shifts += shift_factor;
115 	/*  The input has been left-shifted so the most-significant-bit is a 1.
116 	 *  Reinterpret the output as unsigned with one integer bit, so
117 	 *  that 1 <= x < 2.
118 	 *  Let Q = int(u).  Then u = Q*2^(-u_fraction_length),
119 	 *  and x = Q*2^num_left_shifts * 2^(1-word_length).  Therefore,
120 	 *  u = x*2^n, where n is defined as:
121 	 */
122 	xfi_tmp = (3 - num_left_shifts) & 1;
123 	v = (v << shift_factor) >> xfi_tmp;
124 	/*  Extract the high byte of x */
125 	/*  Convert the high byte into an index for SQRTLUT */
126 	slice_temp = SQRT_WRAP_SCHAR_BITS & (v >> 8);
127 	/*  The upper byte was used for the index into SQRTLUT.
128 	 *  The remainder, r, interpreted as a fraction, is used to
129 	 *  linearly interpolate between points.
130 	 */
131 	a_i = iv1[((v >> 8) - 63) - 1];
132 	a_i <<= 8;
133 	l1_i = iv1[SQRT_WRAP_SCHAR_BITS & ((slice_temp + 194) - 1)];
134 	l2_i = iv1[(slice_temp - 63) - 1];
135 	y = a_i + (v & SQRT_WRAP_SCHAR_BITS) * (l1_i - l2_i);
136 	xfi = (((xfi_tmp - num_left_shifts) + 3) >> 1);
137 	shift_factor = (((xfi_tmp - num_left_shifts) + 3) >> 1);
138 	if (xfi != 0) {
139 		if (xfi > 0)
140 			y <<= (shift_factor >= 32) ? 0 : shift_factor;
141 		else
142 			y >>= -sign * xfi;
143 	}
144 	y = ((y >> 11) + 1) >> 1;
145 
146 	return y;
147 }
148