1 /** @file  wls_processing.c
2   *
3   * @brief This file contains processing functions to calculate CSI correction for WLS time stamps
4   *
5   * Copyright 2023-2024 NXP
6   *
7   * SPDX-License-Identifier: BSD-3-Clause
8   *
9   */
10 
11 /************************************************************************
12 * DFW processing functions to calculate CSI correction for WLS time stamps
13 ************************************************************************/
14 
15 #include <osa.h>
16 #if CONFIG_WLS_CSI_PROC
17 
18 // Standard includes.
19 #include <stdio.h>
20 #include <string.h>
21 //#include <stdlib.h>
22 
23 #include "wls_radix4Fft.h"
24 #include "wls_structure_defs.h"
25 #include "wls_processing.h"
26 #ifdef FFT_PARALLEL
27 #include "wls_processing_parallel.h"
28 #endif
29 #ifdef ARM_DS5
30 #include "wls_processing_Neon_Intrinsic.h"
31 #endif
32 #include "wls_param_defines.h"
33 
34 #ifdef FLOATING_POINT
35 #include <math.h>
36 #endif
37 
38 #define NUM_ATAN_IT 10
39 
40 //#define ONE_OVER_PI ((int)((1<<MPY_BIPT)/3.141592653589795f))
41 #define ONE_OVER_PI ((0x145F306D)>>(30-MPY_BIPT)) //0x145F306D=1.0/PI*(1<<30)
42 #define TABLE_SIZE_POW 6
43 #define TABLE_SIZE (1<<TABLE_SIZE_POW)
44 
45 const unsigned char sqrtLUT[TABLE_SIZE] = {31, 44, 54, 63, 71, 77, 84, 90, 95, 100, 105, 110, 114, 119, 123, 127, 131, 135, 138, 142, 146, 149, 152, 156, 159, 162, 165, 168, 171, 174, 177, 180, 183, 186, 188, 191, 194, 196, 199, 201, 204, 206, 209, 211, 214, 216, 218, 221, 223, 225, 228, 230, 232, 234, 236, 238, 241, 243, 245, 247, 249, 251, 253, 255};
46 
47 static const unsigned int pilotDummyArray[10] = {0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0};
48 
49 // 80 MHz 128+[-125:-1 1:125]
50 // minus pilots on [-117, -100, -23, -6, 6, 23, 100, 117]
51 
52 // SC5 VHT format with Ng=1, needs to skip pilot tones
53 const unsigned char pilotToneIndexVHT80[SC5_VHT80_PILOTS] = { 25, 53, 89, 117, 139, 167, 203, 231 };
54 const unsigned char pilotToneIndexVHT40[SC5_VHT40_PILOTS] = { 11, 39, 53, 75, 89, 117 };
55 const unsigned char pilotToneIndexVHT20[SC5_VHT20_PILOTS] = { 11, 25, 39, 53 };
56 const unsigned char pilotToneIndexHE80[SC5_HE80_PILOTS] = { 11, 28, 105, 122, 134, 151, 228, 245 };
57 const unsigned char pilotToneIndexHE40[SC5_HE40_PILOTS] = { 11, 28, 38, 55, 73, 90, 100, 117 };
58 const unsigned char pilotToneIndexHE20[SC5_HE20_PILOTS] = { 3, 20, 44, 61 };
59 
60 const unsigned char pilotToneIndexHT20[SC5_HT20_PILOTS] = { 5, 7, 9, 11, 13, 15, 17, 19, 21, 23, 25, 27, 29, 32, \
61 34, 36, 38, 40, 42, 44, 46, 48, 50, 52, 54, 56, 58,};
62 const unsigned char pilotToneIndexLEG20[SC5_HT20_PILOTS] = { 5, 7, 9, 11, 13, 15, 17, 19, 21, 23, 25, 27, 29, 32, \
63 35, 37, 39, 41, 43, 45, 47, 49, 51, 53, 55, 57, 59, };
64 
65 #ifdef REMOVE_IIR
66 #define P(x) (((x*REMOVE_IIR*MAX_FFT_SIZE)>>IIR_FORMAT_NP)&(MAX_FFT_SIZE-1)) // adjust index format to twiddle table
67 #ifdef STA_20_ONLY
68 const short phiCorr64[64] = {P(-2560), P(-2049), P(-1731), P(-1527), P(-1376), P(-1254), P(-1151), P(-1061), P(-981), P(-910), P(-845), P(-785), P(-730), P(-679), P(-631), P(-586), P(-543), P(-502), P(-462), P(-424), P(-388), P(-352), P(-318), P(-284), P(-251), P(-218), P(-186), P(-155), P(-123), P(-92), P(-61), P(-31), P(0), P(31), P(61), P(92), P(123), P(155), P(186), P(218), P(251), P(284), P(318), P(352), P(388), P(424), P(462), P(502), P(543), P(586), P(631), P(679), P(730), P(785), P(845), P(910), P(981), P(1061), P(1151), P(1254), P(1376), P(1527), P(1731), P(2049)};
69 #else
70 const short phiCorr512[512] ={P(-2560), P(-2488), P(-2417), P(-2348), P(-2281), P(-2218), P(-2158), P(-2101), P(-2049), P(-1999), P(-1953), P(-1910), P(-1870), P(-1832), P(-1796), P(-1763), P(-1731), P(-1701), P(-1673), P(-1646), P(-1620), P(-1595), P(-1572), P(-1549), P(-1527), P(-1506), P(-1486), P(-1466), P(-1447), P(-1429), P(-1411), P(-1393), P(-1376), P(-1360), P(-1344), P(-1328), P(-1313), P(-1297), P(-1283), P(-1268), P(-1254), P(-1241), P(-1227), P(-1214), P(-1201), P(-1188), P(-1176), P(-1163), P(-1151), P(-1139), P(-1128), P(-1116), P(-1105), P(-1094), P(-1083), P(-1072), P(-1061), P(-1051), P(-1040), P(-1030), P(-1020), P(-1010), P(-1001), P(-991), P(-981), P(-972), P(-963), P(-954), P(-945), P(-936), P(-927), P(-918), P(-910), P(-901), P(-893), P(-885), P(-877), P(-869), P(-861), P(-853), P(-845), P(-837), P(-829), P(-822), P(-814), P(-807), P(-800), P(-792), P(-785), P(-778), P(-771), P(-764), P(-757), P(-750), P(-744), P(-737), P(-730), P(-724), P(-717), P(-711), P(-704), P(-698), P(-691), P(-685), P(-679), P(-673), P(-667), P(-661), P(-655), P(-649), P(-643), P(-637), P(-631), P(-625), P(-619), P(-614), P(-608), P(-602), P(-597), P(-591), P(-586), P(-580), P(-575), P(-569), P(-564), P(-558), P(-553), P(-548), P(-543), P(-537), P(-532), P(-527), P(-522), P(-517), P(-512), P(-507), P(-502), P(-497), P(-492), P(-487), P(-482), P(-477), P(-472), P(-467), P(-462), P(-457), P(-453), P(-448), P(-443), P(-438), P(-434), P(-429), P(-424), P(-420), P(-415), P(-410), P(-406), P(-401), P(-397), P(-392), P(-388), P(-383), P(-379), P(-374), P(-370), P(-365), P(-361), P(-357), P(-352), P(-348), P(-343), P(-339), P(-335), P(-330), P(-326), P(-322), P(-318), P(-313), P(-309), P(-305), P(-301), P(-296), P(-292), P(-288), P(-284), P(-280), P(-276), P(-271), P(-267), P(-263), P(-259), P(-255), P(-251), P(-247), P(-243), P(-238), P(-234), P(-230), P(-226), P(-222), P(-218), P(-214), P(-210), P(-206), P(-202), P(-198), P(-194), P(-190), P(-186), P(-182), P(-178), P(-174), P(-170), P(-166), P(-162), P(-159), P(-155), P(-151), P(-147), P(-143), P(-139), P(-135), P(-131), P(-127), P(-123), P(-119), P(-116), P(-112), P(-108), P(-104), P(-100), P(-96), P(-92), P(-88), P(-85), P(-81), P(-77), P(-73), P(-69), P(-65), P(-61), P(-58), P(-54), P(-50), P(-46), P(-42), P(-38), P(-35), P(-31), P(-27), P(-23), P(-19), P(-15), P(-11), P(-8), P(-4), P(0), P(4), P(8), P(11), P(15), P(19), P(23), P(27), P(31), P(35), P(38), P(42), P(46), P(50), P(54), P(58), P(61), P(65), P(69), P(73), P(77), P(81), P(85), P(88), P(92), P(96), P(100), P(104), P(108), P(112), P(116), P(119), P(123), P(127), P(131), P(135), P(139), P(143), P(147), P(151), P(155), P(159), P(162), P(166), P(170), P(174), P(178), P(182), P(186), P(190), P(194), P(198), P(202), P(206), P(210), P(214), P(218), P(222), P(226), P(230), P(234), P(238), P(243), P(247), P(251), P(255), P(259), P(263), P(267), P(271), P(276), P(280), P(284), P(288), P(292), P(296), P(301), P(305), P(309), P(313), P(318), P(322), P(326), P(330), P(335), P(339), P(343), P(348), P(352), P(357), P(361), P(365), P(370), P(374), P(379), P(383), P(388), P(392), P(397), P(401), P(406), P(410), P(415), P(420), P(424), P(429), P(434), P(438), P(443), P(448), P(453), P(457), P(462), P(467), P(472), P(477), P(482), P(487), P(492), P(497), P(502), P(507), P(512), P(517), P(522), P(527), P(532), P(537), P(543), P(548), P(553), P(558), P(564), P(569), P(575), P(580), P(586), P(591), P(597), P(602), P(608), P(614), P(619), P(625), P(631), P(637), P(643), P(649), P(655), P(661), P(667), P(673), P(679), P(685), P(691), P(698), P(704), P(711), P(717), P(724), P(730), P(737), P(744), P(750), P(757), P(764), P(771), P(778), P(785), P(792), P(800), P(807), P(814), P(822), P(829), P(837), P(845), P(853), P(861), P(869), P(877), P(885), P(893), P(901), P(910), P(918), P(927), P(936), P(945), P(954), P(963), P(972), P(981), P(991), P(1001), P(1010), P(1020), P(1030), P(1040), P(1051), P(1061), P(1072), P(1083), P(1094), P(1105), P(1116), P(1128), P(1139), P(1151), P(1163), P(1176), P(1188), P(1201), P(1214), P(1227), P(1241), P(1254), P(1268), P(1283), P(1297), P(1313), P(1328), P(1344), P(1360), P(1376), P(1393), P(1411), P(1429), P(1447), P(1466), P(1486), P(1506), P(1527), P(1549), P(1572), P(1595), P(1620), P(1646), P(1673), P(1701), P(1731), P(1763), P(1796), P(1832), P(1870), P(1910), P(1953), P(1999), P(2049), P(2101), P(2158), P(2218), P(2281), P(2348), P(2417), P(2488)};
71 const short phiCorrScBt512[512] ={P(-3584), P(-3243), P(-2979), P(-2789), P(-2648), P(-2534), P(-2440), P(-2358), P(-2286), P(-2220), P(-2161), P(-2106), P(-2056), P(-2009), P(-1965), P(-1924), P(-1886), P(-1850), P(-1815), P(-1783), P(-1752), P(-1722), P(-1694), P(-1667), P(-1642), P(-1617), P(-1593), P(-1570), P(-1548), P(-1526), P(-1506), P(-1485), P(-1466), P(-1447), P(-1429), P(-1411), P(-1393), P(-1376), P(-1360), P(-1343), P(-1328), P(-1312), P(-1297), P(-1282), P(-1268), P(-1253), P(-1240), P(-1226), P(-1213), P(-1199), P(-1187), P(-1174), P(-1161), P(-1149), P(-1137), P(-1125), P(-1114), P(-1102), P(-1091), P(-1080), P(-1069), P(-1059), P(-1048), P(-1038), P(-1027), P(-1017), P(-1007), P(-997), P(-988), P(-978), P(-969), P(-959), P(-950), P(-941), P(-932), P(-923), P(-915), P(-906), P(-898), P(-889), P(-881), P(-873), P(-864), P(-856), P(-848), P(-841), P(-833), P(-825), P(-817), P(-810), P(-802), P(-795), P(-788), P(-780), P(-773), P(-766), P(-759), P(-752), P(-745), P(-738), P(-732), P(-725), P(-718), P(-712), P(-705), P(-699), P(-692), P(-686), P(-679), P(-673), P(-667), P(-661), P(-655), P(-648), P(-642), P(-636), P(-630), P(-625), P(-619), P(-613), P(-607), P(-601), P(-596), P(-590), P(-584), P(-579), P(-573), P(-568), P(-562), P(-557), P(-551), P(-546), P(-540), P(-535), P(-530), P(-524), P(-519), P(-514), P(-509), P(-504), P(-498), P(-493), P(-488), P(-483), P(-478), P(-473), P(-468), P(-463), P(-458), P(-453), P(-449), P(-444), P(-439), P(-434), P(-429), P(-424), P(-420), P(-415), P(-410), P(-405), P(-401), P(-396), P(-391), P(-387), P(-382), P(-378), P(-373), P(-368), P(-364), P(-359), P(-355), P(-350), P(-346), P(-341), P(-337), P(-333), P(-328), P(-324), P(-319), P(-315), P(-311), P(-306), P(-302), P(-297), P(-293), P(-289), P(-284), P(-280), P(-276), P(-272), P(-267), P(-263), P(-259), P(-255), P(-250), P(-246), P(-242), P(-238), P(-234), P(-229), P(-225), P(-221), P(-217), P(-213), P(-209), P(-205), P(-200), P(-196), P(-192), P(-188), P(-184), P(-180), P(-176), P(-172), P(-168), P(-164), P(-159), P(-155), P(-151), P(-147), P(-143), P(-139), P(-135), P(-131), P(-127), P(-123), P(-119), P(-115), P(-111), P(-107), P(-103), P(-99), P(-95), P(-91), P(-87), P(-83), P(-79), P(-75), P(-71), P(-67), P(-63), P(-59), P(-55), P(-51), P(-47), P(-44), P(-40), P(-36), P(-32), P(-28), P(-24), P(-20), P(-16), P(-12), P(-8), P(-4), P(0), P(4), P(8), P(12), P(16), P(20), P(24), P(28), P(32), P(36), P(40), P(44), P(47), P(51), P(55), P(59), P(63), P(67), P(71), P(75), P(79), P(83), P(87), P(91), P(95), P(99), P(103), P(107), P(111), P(115), P(119), P(123), P(127), P(131), P(135), P(139), P(143), P(147), P(151), P(155), P(159), P(164), P(168), P(172), P(176), P(180), P(184), P(188), P(192), P(196), P(200), P(205), P(209), P(213), P(217), P(221), P(225), P(229), P(234), P(238), P(242), P(246), P(250), P(255), P(259), P(263), P(267), P(272), P(276), P(280), P(284), P(289), P(293), P(297), P(302), P(306), P(311), P(315), P(319), P(324), P(328), P(333), P(337), P(341), P(346), P(350), P(355), P(359), P(364), P(368), P(373), P(378), P(382), P(387), P(391), P(396), P(401), P(405), P(410), P(415), P(420), P(424), P(429), P(434), P(439), P(444), P(449), P(453), P(458), P(463), P(468), P(473), P(478), P(483), P(488), P(493), P(498), P(504), P(509), P(514), P(519), P(524), P(530), P(535), P(540), P(546), P(551), P(557), P(562), P(568), P(573), P(579), P(584), P(590), P(596), P(601), P(607), P(613), P(619), P(625), P(630), P(636), P(642), P(648), P(655), P(661), P(667), P(673), P(679), P(686), P(692), P(699), P(705), P(712), P(718), P(725), P(732), P(738), P(745), P(752), P(759), P(766), P(773), P(780), P(788), P(795), P(802), P(810), P(817), P(825), P(833), P(841), P(848), P(856), P(864), P(873), P(881), P(889), P(898), P(906), P(915), P(923), P(932), P(941), P(950), P(959), P(969), P(978), P(988), P(997), P(1007), P(1017), P(1027), P(1038), P(1048), P(1059), P(1069), P(1080), P(1091), P(1102), P(1114), P(1125), P(1137), P(1149), P(1161), P(1174), P(1187), P(1199), P(1213), P(1226), P(1240), P(1253), P(1268), P(1282), P(1297), P(1312), P(1328), P(1343), P(1360), P(1376), P(1393), P(1411), P(1429), P(1447), P(1466), P(1485), P(1506), P(1526), P(1548), P(1570), P(1593), P(1617), P(1642), P(1667), P(1694), P(1722), P(1752), P(1783), P(1815), P(1850), P(1886), P(1924), P(1965), P(2009), P(2056), P(2106), P(2161), P(2220), P(2286), P(2358), P(2440), P(2534), P(2648), P(2789), P(2979), P(3243)};
72 #endif
73 #endif
74 
75 // input is in 16p14
76 #define ONE_16P14 (1<<14)
myAsin(int x)77 int myAsin(int x){
78 
79 	int numerator, denominator, y, z;
80 
81 	if(x >= ONE_16P14){
82 		return (1<<(MPY_BIPT-2)); // PI/2
83 	}
84 	else if(-x >= ONE_16P14){
85 		return -(1<<(MPY_BIPT-2)); // -PI/2
86 	}
87 	else if(x==0){
88 		return 0;
89 	}
90 	else{
91 		y = ONE_16P14 -((x*x)>>14);
92 		denominator = mySqrt(y);
93 		numerator = (x<<7);
94 		z = numerator/denominator;
95 		return myAtan2(ONE_16P14, z);
96 	}
97 }
98 
99 
myAtan2(int valI,int valQ)100 int myAtan2(int valI, int valQ){
101 
102 	int x, y_old, y_new;
103 	short cosValInt, sinValInt;
104 	int idx, zerosI, zerosQ;
105 	int ii, quadrant = 0;
106 	unsigned int tempVal;
107 	unsigned int *cosTablePtr = (unsigned int*)radix4FftTwiddleArr;
108 
109 	if(!valI || !valQ){
110 		if(!valI && !valQ){ // undefined
111 			return 0;
112 		}
113 		else if(!valI){
114 			if(valQ>0) // PI/2
115 				return (1<<(MPY_BIPT-2));
116 			else // -PI/2
117 				return -(1<<(MPY_BIPT-2));
118 		}
119 		else{ // valQ == 0
120 			if(valI>0) // 0
121 				return 0;
122 			else // PI
123 				return (1<<(MPY_BIPT-1));
124 		}
125 	}
126 
127 	// determine quadrant, change sign
128 	if(valQ<0){
129 		valQ = -valQ;
130 		quadrant+=2;
131 	}
132 	if(valI<0){
133 		valI=-valI;
134 		quadrant++;
135 	}
136 	//x = 1.0f*valQ/valI;
137 	zerosQ = __clz(valQ);
138 	valQ<<=(zerosQ-1);
139 	ii = zerosQ-1;
140 	zerosI = __clz(valI);
141 	if(zerosI<16){ // get at least 16 bit precision
142 		valI>>=(16-zerosI);
143 		ii += (16-zerosI);
144 	}
145 	x = valQ/valI;
146 	if(MPY_BIPT>ii)
147 		x<<=(MPY_BIPT-ii);
148 	else
149 		x>>=(ii-MPY_BIPT);
150 
151 	// initialize
152 	if(x < 3<<(MPY_BIPT-2)) // 0.75
153 		y_old = (x*ONE_OVER_PI)>>(MPY_BIPT+1); //x/(2*PI);
154 	else if(x < 4<<MPY_BIPT){ // 4.0
155 		y_old = (x-(1<<MPY_BIPT))>>2;
156 		y_old += 3<<(MPY_BIPT-2); //(0.25f*(x-1)+0.75f)/(2*PI);
157 		y_old*=ONE_OVER_PI;
158 		y_old>>=MPY_BIPT+1;
159 	}
160 	else
161 		y_old = (15<<(MPY_BIPT-6)); // approx of (1.5f/(2*PI))<<MPY_BIPT
162 
163 	// Newton's method with NUM_ATAN_IT=10 fixed iterations
164 	for(ii=0;ii<NUM_ATAN_IT;ii++){
165 		idx = (y_old*MAX_FFT_SIZE)>>(MPY_BIPT-1);
166 #ifdef TWIDDLE_HALF_SIZE
167 		tempVal = cosTablePtr[idx&(MAX_FFT_SIZE/2-1)];
168 		if(idx&(MAX_FFT_SIZE/2)){ // [-pi,0)
169 			cosValInt = -(short) (tempVal&0xffff);
170 			sinValInt = -(short) (tempVal>>16);
171 		}
172 		else{ // [0, pi)
173 			cosValInt = (short) (tempVal&0xffff);
174 			sinValInt = (short) (tempVal>>16);
175 		}
176 #else
177 		tempVal = cosTablePtr[idx&(MAX_FFT_SIZE-1)]; // apply modulo
178 		cosValInt = (short) (tempVal&0xffff);
179 		sinValInt = (short) (tempVal>>16);
180 #endif
181 		y_new = (x*(cosValInt+(1<<TWIDDLE_BIPT)))>>TWIDDLE_BIPT;
182 		y_new -=sinValInt>>(TWIDDLE_BIPT-MPY_BIPT);
183 		y_new *=ONE_OVER_PI;
184 		y_new >>=(MPY_BIPT+2);
185 		y_new +=y_old;
186 
187 		if(y_new>(1<<(MPY_BIPT-2))-1)
188 			y_new = (1<<(MPY_BIPT-2))-8;
189 
190 		y_old = y_new;
191 	}
192 
193 	switch(quadrant){
194 		case 1:
195 			return -y_new+(1<<(MPY_BIPT-1));
196 		case 2:
197 			return -y_new;
198 		case 3:
199 			return y_new-(1<<(MPY_BIPT-1)); //-PI
200 		default:
201 			return y_new;
202 	}
203 }
204 
mySqrtLut(int x)205 unsigned int mySqrtLut(int x){
206 	unsigned int y = 0;
207 	int zerosX, shift, shiftBack, idx, offset;
208 	unsigned int y1;
209 
210 	if(x>0){ // return 0 for non-positive values
211 		zerosX = __clz(x);
212 		shift = 32-TABLE_SIZE_POW-zerosX;
213 		shiftBack = (shift+1)>>1;
214 		if(shiftBack>0){
215 			idx = x>>(2*shiftBack);
216 			offset = x-(idx<<(2*shiftBack));
217 			y = sqrtLUT[idx-1]+1;
218 			y *=(1<<(2*shiftBack))-offset;
219 			y1 = sqrtLUT[idx]+1;
220 			y1 *=offset;
221 			y+=y1;
222 			shiftBack -=(2*shiftBack)+MY_SQRT_BIPT-(TABLE_SIZE_POW>>1);
223 		}
224 		else{
225 			idx = x<<(-2*shiftBack);
226 			y = sqrtLUT[idx-1]+1;
227 			shiftBack -=MY_SQRT_BIPT-(TABLE_SIZE_POW>>1);
228 		}
229 		if(shiftBack>0){
230 			y<<=shiftBack;
231 		}
232 		else{
233 			y>>=(-shiftBack);
234 		}
235 	}
236 	return y;
237 }
238 
239 #ifndef ARM_DS5
mySqrt(int x)240 unsigned int mySqrt(int x){
241 	int ii, digits, N;
242 	unsigned int bit, temp, res = 0;
243 	unsigned num = (x>0)? x:(-x);
244 
245 	digits = 32-__clz(num);
246 	digits>>=1;
247 	N = digits+1;
248 	digits<<=1;
249 	bit = 1<<digits;
250 
251 	for(ii=0;ii<N;ii++){ // res is scaled by 2^(N-1-ii)
252 		temp = res+bit;
253 		res>>=1;
254 		if(num>temp){
255 			num-=temp;
256 			res +=bit;
257 		}
258 		bit>>=2;
259 	}
260 	return res;
261 }
262 #endif
263 
264 // general function: unpacks complex subcarriers, demodulates pilots, and estimates phase-roll and power per subband
processSubCarrier(unsigned short * dataPtrTmp,short * writePtr,unsigned int pilotVals,const short * phiCorrPtr,int numCsiScHlf,int dataPtrStride,int phiCorrPtrStride,unsigned int * myStack)265 void processSubCarrier(unsigned short *dataPtrTmp, short *writePtr, unsigned int pilotVals, const short* phiCorrPtr, int numCsiScHlf, int dataPtrStride, int phiCorrPtrStride, unsigned int *myStack){
266 	int ii;
267 
268 #if defined(ARM_GCC) || defined(ARM_DS5)
269 #ifdef ARM_DS5
270     int reg6, reg7, reg8;
271 #endif
272 #ifndef REMOVE_IIR
273 	unsigned int tempVal = myStack[0];
274 	unsigned int powerEst = myStack[1];
275 	int runningSumI = myStack[2];
276 	int runningSumQ = myStack[3];
277 #endif
278 #else
279 	signed char tempI, tempQ, pilotSym;
280 	unsigned int tempVal;
281 	unsigned short utemp16;
282 	short lastValI, lastValQ;
283 	short tempValI = (myStack[0])&0xffff;
284 	short tempValQ = ((myStack[0])>>16)&0xffff;
285 	unsigned int powerEst = myStack[1];
286 	int runningSumI = myStack[2];
287 	int runningSumQ = myStack[3];
288 #ifdef REMOVE_IIR
289 	int idx;
290 	short cosPhi, sinPhi;
291 	int resI, resQ;
292 #endif
293 #endif
294 
295 	for(ii=0;ii<numCsiScHlf;ii++){
296 #if defined ARM_DS5
297 #ifdef REMOVE_IIR
298         __asm volatile
299         {
300             LDRH reg6, [dataPtrTmp], (2*dataPtrStride)
301             SXTB reg7, reg6
302             SXTB reg8, reg6, ROR #8
303             LDR reg6, [myStack, #4]
304             PKHBT reg7, reg7, reg8, LSL #16
305             SMLAD reg6, reg7, reg7, reg6
306             STR reg6, [myStack, #4]
307             LDRH reg8, [phiCorrPtr], (2*phiCorrPtrStride)
308             LDR reg6, [radix4FftTwiddleArr, reg8, LSL#2]
309             SMUSD reg8, reg6, reg7
310             SMUADX reg7, reg6, reg7
311             ASR reg8, reg8, #7
312             LDR reg6, [myStack]
313             PKHBT reg8, reg8, reg7, LSL #9
314             STR reg8, [myStack]
315             SMUAD reg7, reg6, reg8
316             STR reg8, [writePtr], #4
317             SMUSDX reg8, reg6, reg8
318             LDR reg6, [myStack, #8]
319             ADD reg6, reg6, reg7, ASR #12
320             STR reg6, [myStack, #8]
321             LDR reg7, [myStack, #12]
322             ADD reg7, reg7, reg8, ASR #12
323             STR reg7, [myStack, #12]
324         }
325 #else
326         __asm volatile
327         {
328             LDRH reg6, [dataPtrTmp], (2*dataPtrStride)
329             SXTB reg7, reg6
330             SXTB reg8, reg6, ROR #8
331             MOV reg6, tempVal
332             PKHBT tempVal, reg7, reg8, LSL #16
333             SMLAD powerEst, tempVal, tempVal, powerEst
334             SMLAD runningSumI, reg6, tempVal, runningSumI
335             SMLSDX runningSumQ, reg6, tempVal, runningSumQ
336             LSL reg7, reg7, #8
337             PKHBT reg7, reg7, reg8, LSL #24
338             STR reg7, [writePtr], #4
339         }
340 #endif
341 #elif defined ARM_GCC
342 #ifdef REMOVE_IIR
343 		asm volatile (
344 			"LDRH r6, [%[read]], %[strideDw] \n\t"	// +r6
345 			"SXTB r7, r6 \n\t"						// +r7, r6
346 			"SXTB r8, r6, ROR #8 \n\t"				// +r8, r7, -r6
347 			"LDR r6, [%[myStack], #4] \n\t"			// +r6, ,r7, r8; load powerEst
348 			"PKHBT r7, r7, r8, LSL #16 \n\t"		// r6, r7, -r8
349 			"SMLAD r6, r7, r7, r6 \n\t"				// r6, r7
350 			"STR r6, [%[myStack], #4] \n\t"			// r7, -r6; store powerEst
351 			"LDRH r8, [%[phiCorr]], %[phiCorrStrideHw] \n\t" // +r8, r7
352 			"LDR r6, [%[twiddleArr], r8, LSL#2] \n\t" // +r6, r7, -r8
353 			"SMUSD r8, r6, r7 \n\t"					// +r8, r6, r7
354 			"SMUADX r7, r6, r7 \n\t"				// r7, r8, -r6
355 			"ASR r8, r8, #7 \n\t"					// r7, r8
356 			"LDR r6, [%[myStack]] \n\t"				// +r6, r7, r8; load tempVal
357 			"PKHBT r8, r8, r7, LSL #9 \n\t"			// r6, r8, -r7
358 			"STR r8, [%[myStack]] \n\t"				// r6, r8 ; store new tempVal
359 			"SMUAD r7, r6, r8 \n\t"					// +r7, r6, r8
360 			"STR r8, [%[write]], #4 \n\t"			// r6, r7, r8
361 			"SMUSDX r8, r6, r8 \n\t"				// r7, r8, -r6
362 			"LDR r6, [%[myStack], #8] \n\t"			// +r6, r7, r8; load runningSumI
363 			"ADD r6, r6, r7, ASR #12 \n\t"			// r6, r8, -r7
364 			"STR r6, [%[myStack], #8] \n\t"			// r8, -r6; store runningSumI
365 			"LDR r7, [%[myStack], #12] \n\t"		// +r7, r8; load runningSumQ
366 			"ADD r7, r7, r8, ASR #12 \n\t"			// r7, -r8
367 			"STR r7, [%[myStack], #12] \n\t"		// -r8; store runningSumQ
368 			: [read]"+r"(dataPtrTmp), [write]"+r"(writePtr), [phiCorr]"+r"(phiCorrPtr)
369 			: [strideDw]"r"(2*dataPtrStride), [phiCorrStrideHw]"r"(2*phiCorrPtrStride), [twiddleArr]"r"(radix4FftTwiddleArr), [myStack]"r"(myStack)
370 			: "r6", "r7", "r8", "memory");
371 #else
372 		asm volatile (
373 			"LDRH r6, [%[read]], %[strideDw] \n\t"
374 			"SXTB r7, r6 \n\t"
375 			"SXTB r8, r6, ROR #8 \n\t"
376 			"MOV r6, %[tempVal] \n\t"
377 			"PKHBT %[tempVal], r7, r8, LSL #16 \n\t"
378 			"SMLAD %[powerEst], %[tempVal], %[tempVal], %[powerEst] \n\t"
379 			"SMLAD %[sumI], r6, %[tempVal], %[sumI] \n\t"
380 			"SMLSDX %[sumQ], r6, %[tempVal], %[sumQ] \n\t"
381 			"LSL r7, r7, #8 \n\t"
382 			"PKHBT r7, r7, r8, LSL #24 \n\t"
383 			"STR r7, [%[write]], #4 \n\t"
384 			: [read]"+r"(dataPtrTmp), [write]"+r"(writePtr), [tempVal]"+r"(tempVal), [powerEst] "+r"(powerEst), [sumI]"+r"(runningSumI), [sumQ]"+r"(runningSumQ)
385 			: [strideDw]"r"(2*dataPtrStride)
386 			: "r6", "r7", "r8");
387 #endif
388 #elif !defined(ARM_DS5) && !defined(ARM_GCC) // no ARM
389 		utemp16 = *dataPtrTmp;
390 		dataPtrTmp+=dataPtrStride;
391 		tempI = utemp16&0xff;
392 		tempQ = (utemp16>>8);
393 		pilotSym = 2*(pilotVals&1)-1; pilotVals>>=1;
394 		lastValI = tempValI;
395 		lastValQ = tempValQ;
396 		tempValI = (short)tempI*pilotSym;
397 		tempValQ = (short)tempQ*pilotSym;
398 		powerEst += tempValI*tempValI + tempValQ*tempValQ;
399 #ifdef REMOVE_IIR
400 		idx = *phiCorrPtr; phiCorrPtr+=phiCorrPtrStride;
401 		cosPhi = radix4FftTwiddleArr[2*idx];
402 		sinPhi = radix4FftTwiddleArr[2*idx+1];
403 		resI = tempValI*cosPhi-tempValQ*sinPhi;
404 		resQ = tempValI*sinPhi+tempValQ*cosPhi;
405 		tempValI = (short)(resI>>(TWIDDLE_BIPT-8));
406 		tempValQ = (short)(resQ>>(TWIDDLE_BIPT-8));
407 		runningSumI += ((lastValI*tempValI) + (lastValQ*tempValQ))>>12;
408 		runningSumQ += ((lastValI*tempValQ) - (lastValQ*tempValI))>>12;
409 		writePtr[0] = tempValI;
410 		writePtr[1] = tempValQ;
411 #else
412 		runningSumI += ((lastValI*tempValI) + (lastValQ*tempValQ));
413 		runningSumQ += ((lastValI*tempValQ) - (lastValQ*tempValI));
414 		writePtr[0] = tempValI<<8;
415 		writePtr[1] = tempValQ<<8;
416 #endif
417 		writePtr+=2;
418 #endif
419 	}
420 
421 #if defined(ARM_GCC) || defined(ARM_DS5)
422 #ifndef REMOVE_IIR
423 	myStack[0] = tempVal;
424 	myStack[1] = powerEst;
425 	myStack[2] = runningSumI;
426 	myStack[3] = runningSumQ;
427 #endif
428 #else
429 	tempVal = ((unsigned int)tempValQ)<<16;
430 	myStack[0] = (((unsigned int)tempValI)&0xffff) | tempVal;
431 	myStack[1] = powerEst;
432 	myStack[2] = runningSumI;
433 	myStack[3] = runningSumQ;
434 #endif
435 }
436 
437 
438 // separate function due to non-uniform spacing of subcarriers; keep zeros between subcarriers to avoid distortion
439 // FFT size is larger, but not significant due to 20 MHz case
processSubCarrier20MHz(unsigned short * dataPtrTmp,short * writePtr,unsigned int pilotVals,const short * phiCorrPtr,int numCsiScHlf,int dataPtrStride,int phiCorrPtrStride,unsigned char Ng,unsigned char jumpIdx,unsigned int * myStack)440 void processSubCarrier20MHz(unsigned short *dataPtrTmp, short *writePtr, unsigned int pilotVals, const short* phiCorrPtr, int numCsiScHlf, int dataPtrStride, int phiCorrPtrStride, unsigned char Ng, unsigned char jumpIdx, unsigned int *myStack){
441 	int ii;
442 #if defined(ARM_GCC) || defined(ARM_DS5)
443 
444 #ifdef ARM_DS5
445 	int reg6, reg7, reg8;
446 #endif
447 #ifndef REMOVE_IIR
448 	unsigned int tempVal = myStack[0];
449 	unsigned int powerEst = myStack[1];
450 	int runningSumI = myStack[2];
451 	int runningSumQ = myStack[3];
452 #endif
453 
454 #else // not ARM
455 
456 	signed char tempI, tempQ, pilotSym;
457 	unsigned int tempVal;
458 	unsigned short utemp16;
459 	short lastValI, lastValQ;
460 	short tempValI = (myStack[0])&0xffff;
461 	short tempValQ = ((myStack[0])>>16)&0xffff;
462 	unsigned int powerEst = myStack[1];
463 	int runningSumI = myStack[2];
464 	int runningSumQ = myStack[3];
465 #ifdef REMOVE_IIR
466 	int idx;
467 	short cosPhi, sinPhi;
468 	int resI, resQ;
469 #endif
470 
471 #endif
472 
473 	for(ii=0;ii<numCsiScHlf;ii++){
474 #if defined(ARM_DS5)
475 #ifdef REMOVE_IIR
476     __asm volatile
477     {
478         LDRH reg6, [dataPtrTmp], (2*dataPtrStride)
479         SXTB reg7, reg6
480         SXTB reg8, reg6, ROR #8
481         LDR reg6, [myStack, #4]
482         PKHBT reg7, reg7, reg8, LSL #16
483         SMLAD reg6, reg7, reg7, reg6
484         STR reg6, [myStack, #4]
485         LDRH reg8, [phiCorrPtr], (2*phiCorrPtrStride)
486         LDR reg6, [radix4FftTwiddleArr, reg8, LSL#2]
487         SMUSD reg8, reg6, reg7
488         SMUADX reg7, reg6, reg7
489         ASR reg8, reg8, #7
490         LDR reg6, [myStack]
491         PKHBT reg8, reg8, reg7, LSL #9
492         STR reg8, [myStack]
493         SMUAD reg7, reg6, reg8
494         STR reg8, [writePtr], (4*Ng)
495         SMUSDX reg8, reg6, reg8
496         LDR reg6, [myStack, #8]
497         ADD reg6, reg6, reg7, ASR #12
498         STR reg6, [myStack, #8]
499         LDR reg7, [myStack, #12]
500         ADD reg7, reg7, reg8, ASR #12
501         STR reg7, [myStack, #12]
502     }
503 #else
504     __asm volatile
505     {
506         LDRH reg6, [dataPtrTmp], (2*dataPtrStride)
507         SXTB reg7, reg6
508         SXTB reg8, reg6, ROR #8
509         MOV reg6, tempVal
510         PKHBT tempVal, reg7, reg8, LSL #16
511         SMLAD powerEst, tempVal, tempVal, powerEst
512         SMLAD runningSumI, reg6, tempVal, runningSumI
513         SMLSDX runningSumQ, reg6, tempVal, runningSumQ
514         LSL reg7, reg7, #8
515         PKHBT reg7, reg7, reg8, LSL #24
516         STR reg7, [writePtr], (4*Ng)
517     }
518 #endif
519 
520 #elif defined(ARM_GCC)
521 #ifdef REMOVE_IIR
522 	asm volatile (
523 		"LDRH r6, [%[read]], %[strideDw] \n\t"
524 		"SXTB r7, r6 \n\t"
525 		"SXTB r8, r6, ROR #8 \n\t"
526 		"LDR r6, [%[myStack], #4] \n\t"
527 		"PKHBT r7, r7, r8, LSL #16 \n\t"
528 		"SMLAD r6, r7, r7, r6 \n\t"
529 		"STR r6, [%[myStack], #4] \n\t"
530 		"LDRH r8, [%[phiCorr]], %[phiCorrStrideHw] \n\t"
531 		"LDR r6, [%[twiddleArr], r8, LSL#2] \n\t"
532 		"SMUSD r8, r6, r7 \n\t"
533 		"SMUADX r7, r6, r7 \n\t"
534 		"ASR r8, r8, #7 \n\t"
535 		"LDR r6, [%[myStack]] \n\t"
536 		"PKHBT r8, r8, r7, LSL #9 \n\t"
537 		"STR r8, [%[myStack]] \n\t"
538 		"SMUAD r7, r6, r8 \n\t"
539 		"STR r8, [%[write]], %[Ng] \n\t"
540 		"SMUSDX r8, r6, r8 \n\t"
541 		"LDR r6, [%[myStack], #8] \n\t"
542 		"ADD r6, r6, r7, ASR #12 \n\t"
543 		"STR r6, [%[myStack], #8] \n\t"
544 		"LDR r7, [%[myStack], #12] \n\t"
545 		"ADD r7, r7, r8, ASR #12 \n\t"
546 		"STR r7, [%[myStack], #12] \n\t"
547 		: [read]"+r"(dataPtrTmp), [write]"+r"(writePtr), [phiCorr]"+r"(phiCorrPtr)
548 		: [strideDw]"r"(2*dataPtrStride), [phiCorrStrideHw]"r"(2*phiCorrPtrStride), [twiddleArr]"r"(radix4FftTwiddleArr), [Ng]"r"(4*Ng), [myStack]"r"(myStack)
549 		: "r6", "r7", "r8");
550 #else
551 	asm volatile (
552 		"LDRH r6, [%[read]], %[strideDw] \n\t"
553 		"SXTB r7, r6 \n\t"
554 		"SXTB r8, r6, ROR #8 \n\t"
555 		"MOV r6, %[tempVal] \n\t"
556 		"PKHBT %[tempVal], r7, r8, LSL #16 \n\t"
557 		"SMLAD %[powerEst], %[tempVal], %[tempVal], %[powerEst] \n\t"
558 		"SMLAD %[sumI], r6, %[tempVal], %[sumI] \n\t"
559 		"SMLSDX %[sumQ], r6, %[tempVal], %[sumQ] \n\t"
560 		"LSL r7, r7, #8 \n\t"
561 		"PKHBT r7, r7, r8, LSL #24 \n\t"
562 		"STR r7, [%[write]], %[Ng] \n\t"
563 		: [read]"+r"(dataPtrTmp), [write]"+r"(writePtr), [pilots]"+r"(pilotVals), [tempVal]"+r"(tempVal), [powerEst] "+r"(powerEst), [sumI]"+r"(runningSumI), [sumQ]"+r"(runningSumQ)
564 		: [strideDw]"r"(2*dataPtrStride), [Ng]"r"(4*Ng)
565 		: "r6", "r7", "r8");
566 #endif
567 #elif !defined(ARM_GCC) && !defined(ARM_DS5)
568 		utemp16 = *dataPtrTmp;
569 		dataPtrTmp+=dataPtrStride;
570 		tempI = utemp16&0xff;
571 		tempQ = (utemp16>>8);
572 		pilotSym = 2*(pilotVals&1)-1; pilotVals>>=1;
573 		lastValI = tempValI;
574 		lastValQ = tempValQ;
575 		tempValI = (short)tempI*pilotSym;
576 		tempValQ = (short)tempQ*pilotSym;
577 		powerEst += tempValI*tempValI + tempValQ*tempValQ;
578 #ifdef REMOVE_IIR
579 		idx = *phiCorrPtr; phiCorrPtr+=phiCorrPtrStride;
580 		cosPhi = radix4FftTwiddleArr[2*idx];
581 		sinPhi = radix4FftTwiddleArr[2*idx+1];
582 		resI = tempValI*cosPhi-tempValQ*sinPhi;
583 		resQ = tempValI*sinPhi+tempValQ*cosPhi;
584 		tempValI = (short)(resI>>(TWIDDLE_BIPT-8));
585 		tempValQ = (short)(resQ>>(TWIDDLE_BIPT-8));
586 #else
587 		tempValI<<=8;
588 		tempValQ<<=8;
589 #endif
590 		runningSumI += ((lastValI*tempValI) + (lastValQ*tempValQ))>>12;
591 		runningSumQ += ((lastValI*tempValQ) - (lastValQ*tempValI))>>12;
592 		writePtr[0] = tempValI;
593 		writePtr[1] = tempValQ;
594 		writePtr+=2*Ng;
595 #endif
596 		if(ii == jumpIdx){
597 			writePtr-=2;
598 #ifdef REMOVE_IIR
599 			phiCorrPtr-=phiCorrPtrStride/Ng;
600 #endif
601 		}
602 	}
603 #if defined(ARM_GCC) || defined(ARM_DS5)
604 #ifndef REMOVE_IIR
605 	myStack[0] = tempVal;
606 	myStack[1] = powerEst;
607 	myStack[2] = runningSumI;
608 	myStack[3] = runningSumQ;
609 #endif
610 #else // not ARM
611 	tempVal = ((unsigned int)tempValQ)<<16;
612 	myStack[0] = (((unsigned int)tempValI)&0xffff) | tempVal;
613 	myStack[1] = powerEst;
614 	myStack[2] = runningSumI;
615 	myStack[3] = runningSumQ;
616 #endif
617 }
618 
619 // function cycles through rx/tx combinations and then calls subfunction for processing
620 // for 40 MHz per 20 MHz subband, for 80/160 MHz per 40 MHz block
readHexDataDemodulateProcessVhtHeNg1(hal_pktinfo_t * pktinfo,hal_wls_processing_input_params_t * inputVals,unsigned int * dataPtr,int csiDataSize,unsigned int * fftInBfr,unsigned int * powerPerSubband,int * phaseRollPtr,int chNum)621 void readHexDataDemodulateProcessVhtHeNg1(hal_pktinfo_t *pktinfo, hal_wls_processing_input_params_t *inputVals, unsigned int* dataPtr, int csiDataSize, unsigned int *fftInBfr, unsigned int *powerPerSubband, int *phaseRollPtr, int chNum) {
622 
623 	unsigned short *dataPtrTmp;
624 	short *writePtr;
625 	const short *phiCorrPtrTemp = 0;
626 	int ii, jj, kk, bb;
627 	int pp, nRx, nTx;
628 	int dcZeros = 0;
629 	int dcZeros2 = 0;
630 	int numBands = 1;
631 	short scStart = 0;
632 	short scStop = 0;
633 	int scStart2 = 0;
634 	int scStop2 = 0;
635 	int numTonesIt, tonePos;
636 	int nRx0 = pktinfo->nRx + 1;
637 	int nTx0 = pktinfo->nTx + 1;
638 	//int NgDsfShift = pktinfo->NgDsfShift;
639 	int sigSubbands = pktinfo->sigBw;
640 	//int subbandSize = 1 << (6 - NgDsfShift);
641 	int bandwidth = 20 << sigSubbands;
642 	int ifftSizeOsf = 1 << (pktinfo->fftSize + 6);
643 	int dataPtrStride = nTx0*nRx0; // now in 2 byte steps
644 	const unsigned int *pilotValsPtr = pilotDummyArray;
645 	int phiCorrPtrStride = 0;
646 	unsigned int tempPow, myStack[4]; // includes tempVal|powerEst| runningSumI|runningSumQ
647 	unsigned char pilotToneIndexVHT80[SC5_VHT80_PILOTS] = { 25,53,89,117,139,167,203,231 };
648 	unsigned char pilotToneIndexVHT40[SC5_VHT40_PILOTS] = { 11,39,53,75,89,117 };
649 	unsigned char pilotToneIndexVHT20[SC5_VHT20_PILOTS] = { 11,25,39,53 };
650 	const unsigned char *pilotIdx;
651 	int numPilots, pilotSkip, dcSkip;
652 
653 #ifdef REMOVE_IIR
654 	int devSubbands = pktinfo->rxDevBw;
655 	const short *phiCorrPtr = 0;
656 	int phiCorrScale = 0;
657 	int channelOffset = 0;
658 	int deviceBandwidth = 20 << devSubbands;
659 	//if (deviceBandwidth > 80) { // 80+80, is really 80 MHz
660 	//	deviceBandwidth = 80;
661 	//	devSubbands = 2;
662 	//}
663 #ifdef STA_20_ONLY
664 	phiCorrPtr = phiCorr64;
665 #else
666 	phiCorrPtr = phiCorr512;
667 	if (pktinfo->rxDevBw) { // > 20 MHz
668 		phiCorrPtr = phiCorrScBt512;
669 	}
670 #endif
671 #endif
672 	nTx = (pktinfo->nTx<inputVals->nTx) ? (pktinfo->nTx + 1) : (inputVals->nTx + 1);
673 	if (MAX_TX<nTx) {
674 		nTx = MAX_TX;
675 	}
676 	pktinfo->nTx = nTx - 1;
677 	nRx = (pktinfo->nRx<inputVals->nRx) ? (pktinfo->nRx + 1) : (inputVals->nRx + 1);
678 	if (MAX_RX<nRx) {
679 		nRx = MAX_RX;
680 	}
681 	pktinfo->nRx = nRx - 1;
682 
683 #if defined(REMOVE_IIR) && !defined(STA_20_ONLY)
684 	if (bandwidth<deviceBandwidth) {	// figure out channel offset
685 		if (chNum<36) { // 2.4 GHZ
686 			channelOffset = 0; // control channel always on bottom
687 		}
688 		else if (chNum<149) { // 5 GHz
689 			channelOffset = ((chNum - 36) >> (2 + sigSubbands))&((1 << (devSubbands - sigSubbands)) - 1);
690 		}
691 		else {
692 			channelOffset = ((chNum - 149) >> (2 + sigSubbands))&((1 << (devSubbands - sigSubbands)) - 1);
693 		}
694 		// add offset
695 		phiCorrPtr += channelOffset*(1 << (9 + sigSubbands - devSubbands));
696 	}
697 	phiCorrScale = 3 - devSubbands; // since filter is in 160 MHz resolution
698 #endif
699 
700 	if (pktinfo->packetType == 3) { // VHT
701 		pilotSkip = 0;
702 		dcSkip = 0;
703 		if ((bandwidth == 80) || (bandwidth == 160)) {
704 			// 80 MHz 128+[-122:-2 2:122]
705 			// minus pilots on [103, 75, 39, 11, -103, -75, -39, -11]
706 			numPilots = SC5_VHT80_PILOTS;
707 			pilotIdx = pilotToneIndexVHT80;
708 			dcZeros = 3;
709 			scStart = 6;
710 			scStop = 127;
711 			scStart2 = 130;
712 			scStop2 = 251;
713 			if (bandwidth == 160) {
714 				numBands = 2;
715 				dcZeros2 = 11;
716 #ifndef COMPUTE_80P80_AS_160
717 				pktinfo->sigBw = pktinfo->sigBw - 1;
718 				pktinfo->nRx = 2 * nRx - 1;
719 #endif
720 			}
721 		}
722 		else if (bandwidth == 40) {
723 			// 40 MHz 64+[-58:-2 2:58]
724 			// minus pilots on [-53,-25,-11,11,25,53]
725 			numPilots = SC5_VHT40_PILOTS;
726 			pilotIdx = pilotToneIndexVHT40;
727 			dcZeros = 3;
728 			scStart = 6;
729 			scStop = 63;
730 			scStart2 = 66;
731 			scStop2 = 123;
732 		}
733 		else { //if(bandwidth == 20)
734 			   // 20 MHz 32+[-28:-1 1:28]
735 			   // minus pilots on [-21, -7, 7, 21]
736 			numPilots = SC5_VHT20_PILOTS;
737 			pilotIdx = pilotToneIndexVHT20;
738 			dcZeros = 1;
739 			scStart = 4;
740 			scStop = 32;
741 			scStart2 = 33;
742 			scStop2 = 61;
743 		}
744 	}
745 	else { // HE
746 		pilotSkip = 2;
747 		if ((bandwidth == 80) || (bandwidth == 160)) {
748 			// 80 MHz 128+[-125:-1 1:125]
749 			// minus pilots on [-117, -100, -23, -6, 6, 23, 100, 117]
750 			numPilots = SC5_HE80_PILOTS;
751 			pilotIdx = pilotToneIndexHE80;
752 			dcZeros = 1;
753 			dcSkip = 0;
754 			scStart = 3;
755 			scStop = 128;
756 			scStart2 = 129;
757 			scStop2 = 254;
758 			if (bandwidth == 160) {
759 				numBands = 2;
760 				dcZeros2 = 5;
761 #ifndef COMPUTE_80P80_AS_160
762 				pktinfo->sigBw = pktinfo->sigBw - 1;
763 				pktinfo->nRx = 2 * nRx - 1;
764 #endif
765 			}
766 		}
767 		else if (bandwidth == 40) {
768 			// 40 MHz 64+[-61:-1 1:61]
769 			// minus pilots on [-53,-36,-26,-9,9,26,36,53]
770 			numPilots = SC5_HE40_PILOTS;
771 			pilotIdx = pilotToneIndexHE40;
772 			dcZeros = 1;
773 			dcSkip = 0;
774 			scStart = 3;
775 			scStop = 64;
776 			scStart2 = 65;
777 			scStop2 = 126;
778 		}
779 		else { //if(bandwidth == 20)
780 			   // 20 MHz 32+[-31:0 1:31]
781 			   // minus pilots on [-29, -12, 12, 29]
782 			numPilots = SC5_HE20_PILOTS;
783 			pilotIdx = pilotToneIndexHE20;
784 			dcZeros = 0;
785 			dcSkip = 1;
786 			scStart = 1;
787 			scStop = 33;
788 			scStart2 = 33;
789 			scStop2 = 64;
790 		}
791 	}
792 #ifdef REMOVE_IIR
793 	phiCorrPtr += scStart << phiCorrScale;
794 	phiCorrPtrStride = 1 << phiCorrScale;
795 #endif
796 
797 	pp = 0;
798 	for (ii = 0; ii<nTx; ii++) {
799 		for (jj = 0; jj<nRx; jj++) {
800 			dataPtrTmp = ((unsigned short*)dataPtr) + (ii + jj*nTx0);
801 			memset(myStack, 0, 4 * 4);
802 #if defined(COMPUTE_80P80_AS_160) && defined(REMOVE_IIR)
803 			phiCorrPtrTemp = phiCorrPtr;
804 #endif
805 			for (bb = 0; bb < numBands; bb++) {
806 #ifdef COMPUTE_80P80_AS_160
807 				writePtr = (short*)(fftInBfr + ifftSizeOsf*(jj + ii*nRx) + bb * 256);
808 				//memset(fftInBfr + ifftSizeOsf*(jj + ii*nRx) + bb*256, 0, 4 * ifftSizeOsf);
809 #else
810 				writePtr = (short*)(fftInBfr + ifftSizeOsf*(jj + bb* nRx + ii*nRx*numBands));
811 				//memset(fftInBfr + ifftSizeOsf*(jj + bb* nRx + ii*nRx*numBands), 0, 4 * ifftSizeOsf);
812 #ifdef REMOVE_IIR
813 				phiCorrPtrTemp = phiCorrPtr;
814 #endif
815 #endif
816 				memset(writePtr, 0, 4 * ifftSizeOsf);
817 				writePtr += 2 * scStart;
818 				tonePos = scStart;
819 				tempPow = 0;
820 				// negative spectrum half
821 				for (kk = 0; kk < numPilots / 2; kk++) {
822 					numTonesIt = pilotIdx[kk] - tonePos;
823 					processSubCarrier(dataPtrTmp, writePtr, pilotValsPtr[0], phiCorrPtrTemp, numTonesIt, dataPtrStride, phiCorrPtrStride, myStack);
824 					tonePos = pilotIdx[kk] + 1; // move write ptr
825 					dataPtrTmp += (numTonesIt + pilotSkip)*dataPtrStride; // move read ptr
826 #ifdef REMOVE_IIR
827 					phiCorrPtrTemp += (numTonesIt + 1)*phiCorrPtrStride;
828 #endif
829 					writePtr += 2 * (numTonesIt + 1);
830 					tempPow += myStack[1];
831 					myStack[0] = 0;
832 					myStack[1] = 0;
833 				}
834 				numTonesIt = scStop - tonePos;
835 				processSubCarrier(dataPtrTmp, writePtr, pilotValsPtr[0], phiCorrPtrTemp, numTonesIt, dataPtrStride, phiCorrPtrStride, myStack);
836 				dataPtrTmp += (numTonesIt + dcSkip)*dataPtrStride;
837 #ifdef REMOVE_IIR
838 				phiCorrPtrTemp += (numTonesIt + dcZeros)*phiCorrPtrStride;
839 #endif
840 				tempPow += myStack[1];
841 				myStack[1] = 0;
842 
843 				powerPerSubband[pp++] = tempPow;
844 
845 				// positive spectrum half
846 #ifdef COMPUTE_80P80_AS_160
847 				writePtr = (short*)(fftInBfr + ifftSizeOsf*(jj + ii*nRx) + bb * 256 + scStart2);
848 #else
849 				writePtr = (short*)(fftInBfr + ifftSizeOsf*(jj + bb* nRx + ii*nRx*numBands) + scStart2);
850 #endif
851 				tonePos = scStart2;
852 				tempPow = 0;
853 				// negative spectrum half
854 				for (; kk < numPilots; kk++) {
855 					numTonesIt = pilotIdx[kk] - tonePos;
856 					processSubCarrier(dataPtrTmp, writePtr, pilotValsPtr[0], phiCorrPtrTemp, numTonesIt, dataPtrStride, phiCorrPtrStride, myStack);
857 					tonePos = pilotIdx[kk] + 1;
858 					dataPtrTmp += (numTonesIt + pilotSkip)*dataPtrStride;
859 #ifdef REMOVE_IIR
860 					phiCorrPtrTemp += (numTonesIt + 1)*phiCorrPtrStride;
861 #endif
862 					writePtr += 2 * (numTonesIt + 1);
863 					tempPow += myStack[1];
864 					myStack[0] = 0;
865 					myStack[1] = 0;
866 				}
867 				numTonesIt = scStop2 - tonePos;
868 				processSubCarrier(dataPtrTmp, writePtr, pilotValsPtr[0], phiCorrPtrTemp, numTonesIt, dataPtrStride, phiCorrPtrStride, myStack);
869 				dataPtrTmp += numTonesIt*dataPtrStride;
870 				tempPow += myStack[1];
871 				myStack[1] = 0;
872 
873 #ifdef REMOVE_IIR
874 				phiCorrPtrTemp += (numTonesIt + dcZeros2)*phiCorrPtrStride;
875 #endif
876 				powerPerSubband[pp++] = tempPow;
877 #ifndef COMPUTE_80P80_AS_160
878 				phaseRollPtr[(jj + bb* nRx + ii*nRx*numBands)] = myAtan2(myStack[2], myStack[3]);
879 				memset(myStack, 0, 4 * 4);
880 			}
881 #else
882 			}
883 			phaseRollPtr[(jj + ii*nRx)] = myAtan2(myStack[2], myStack[3]);
884 #endif
885 		}
886 	}
887 }
888 
detectPhaseJump(hal_pktinfo_t * pktinfo,hal_wls_processing_input_params_t * inputVals,unsigned int * fftInBfr,int * phaseRollPtr)889 void detectPhaseJump(hal_pktinfo_t *pktinfo, hal_wls_processing_input_params_t *inputVals, unsigned int *fftInBfr,  int *phaseRollPtr) {
890 
891 	int ii, jj, qq;
892 	int nRx = pktinfo->nRx + 1;
893 	int nTx = pktinfo->nTx + 1;
894 	int ifftSizeOsf = 1 << (pktinfo->fftSize + 6);
895 	unsigned int *cosTablePtr = (unsigned int*)radix4FftTwiddleArr;
896 	int dcEdge = 0;
897 	int runningSumI = 0;
898 	int runningSumQ = 0;
899 
900 	unsigned int* readPtr;
901 	unsigned int tempValI, tempValQ, tempValLd;
902 	short tempI0, tempQ0, tempI1, tempQ1;
903 	short cosPhi, sinPhi;
904 	int resLeftI, resLeftQ, resRightI, resRightQ;
905 	int idx;
906 	pktinfo->dcPhase = 0;
907 
908 	if (pktinfo->packetType == 3) { // VHT case
909 		dcEdge = 6;
910 	}
911 	else { // HE case
912 		dcEdge = 3;
913 	}
914 
915 	for (ii = 0;ii < nTx;ii++) {
916 		for (jj = 0;jj < nRx;jj+= NUM_PARALLEL) {
917 			readPtr = (fftInBfr + NUM_PARALLEL * ifftSizeOsf * (jj + ii * nRx));
918 
919 			for (qq = 0; qq < NUM_PARALLEL; qq++){
920 				// load dc edge I/Q values
921 				tempValI = readPtr[NUM_PARALLEL * (256 - dcEdge) + qq];
922 				tempI0 = (tempValI & 0xffff);
923 				tempQ0 = (tempValI >> 16);
924 
925 				tempValQ = readPtr[NUM_PARALLEL * (256 + dcEdge) + qq];
926 				tempI1 = (tempValQ & 0xffff);
927 				tempQ1 = (tempValQ >> 16);
928 
929 				// adjust for phase roll(2 * phaseRollNg*MAX_FFT_SIZE) >> MPY_BIPT;
930 
931 				idx = (dcEdge*phaseRollPtr[qq + jj + ii*nRx] *MAX_FFT_SIZE) >> MPY_BIPT; // Ng=1
932 				idx &= (MAX_FFT_SIZE - 1); // apply modulo
933 				tempValLd = cosTablePtr[idx];
934 				cosPhi = (tempValLd & 0xffff);
935 				sinPhi = (tempValLd >> 16);
936 
937 				resLeftI = ((int)cosPhi*tempI0) - ((int)sinPhi*tempQ0);
938 				resLeftQ = ((int)cosPhi*tempQ0) + ((int)sinPhi*tempI0);
939 				tempI0 = (short)(resLeftI >> TWIDDLE_BIPT);
940 				tempQ0 = (short)(resLeftQ >> TWIDDLE_BIPT);
941 
942 				if (pktinfo->packetType == 3) { // account for tone rotation
943 					resRightI = - ((int)sinPhi*tempQ1) - ((int)cosPhi*tempI1);
944 					resRightQ = ((int)sinPhi*tempI1) - ((int)cosPhi*tempQ1);
945 				}
946 				else {
947 					resRightI = ((int)cosPhi*tempI1) + ((int)sinPhi*tempQ1);
948 					resRightQ = ((int)cosPhi*tempQ1) - ((int)sinPhi*tempI1);
949 				}
950 				tempI1 = (short)(resRightI >> TWIDDLE_BIPT);
951 				tempQ1 = (short)(resRightQ >> TWIDDLE_BIPT);
952 
953 				// calculate phase jump
954 				runningSumI += (tempI0*tempI1 + tempQ0*tempQ1)/2;
955 				runningSumQ += (tempI0*tempQ1 - tempQ0*tempI1)/2;
956 			}
957 		}
958 	}
959 
960 	idx = myAtan2(runningSumI, runningSumQ);
961 
962 	if (idx > (1 << (MPY_BIPT - 3))) { // pi/4
963 		pktinfo->dcPhase = 0x1;
964 	}
965 	else if (idx < -(1 << (MPY_BIPT - 3))) { // -pi/4
966 		pktinfo->dcPhase = 0x3;
967 	}
968 }
969 
readHexDataDemodulateProcess(hal_pktinfo_t * pktinfo,hal_wls_processing_input_params_t * inputVals,unsigned int * dataPtr,int csiDataSize,unsigned int * fftInBfr,unsigned int * powerPerSubband,int * phaseRollPtr,int chNum)970 void readHexDataDemodulateProcess(hal_pktinfo_t *pktinfo, hal_wls_processing_input_params_t *inputVals, unsigned int* dataPtr, int csiDataSize, unsigned int *fftInBfr, unsigned int *powerPerSubband, int *phaseRollPtr, int chNum){
971 
972 	unsigned short *dataPtrTmp;
973 	short *writePtr;
974 	const short *phiCorrPtrTemp = 0;
975 #if (MAX_RX>1) || (MAX_TX>1)
976 	int ii, jj;
977 #endif
978 	int pp, nRx, nTx;
979     int dcZeros = 0;
980     int scStart = 0;
981 	int numCsiScHlf;
982 	int nRx0 = pktinfo->nRx+1;
983 	int nTx0 = pktinfo->nTx+1;
984 	int NgShift = 1+pktinfo->Ng;
985 	int Ng = 1<<NgShift;
986 	int NgDsfShift = pktinfo->NgDsfShift;
987 	int sigSubbands = pktinfo->sigBw;
988 	int subbandSize = 1<<(6-NgDsfShift);
989 	int bandwidth = 20<<sigSubbands;
990 	int ifftSizeOsf = 1<<(pktinfo->fftSize+6);
991 	int dataPtrStride = nTx0*nRx0; // now in 2 byte steps
992 	const unsigned int *pilotValsPtr;
993 	int phiCorrPtrStride = 0;
994 	unsigned int myStack[4]; // includes tempVal|powerEst| runningSumI|runningSumQ
995 #ifdef REMOVE_IIR
996 	const short *phiCorrPtr = 0;
997 	int devSubbands = pktinfo->rxDevBw;
998 	int phiCorrScale = 0;
999 	int deviceBandwidth = 20<<devSubbands;
1000 	int channelOffset = 0;
1001 #ifdef STA_20_ONLY
1002 	phiCorrPtr = phiCorr64;
1003 #else
1004 	phiCorrPtr = phiCorr512;
1005 	if(pktinfo->rxDevBw){ // > 20 MHz
1006 		phiCorrPtr = phiCorrScBt512;
1007 	}
1008 #endif
1009 #endif
1010 	{
1011 		pilotValsPtr = pilotDummyArray;
1012 	}
1013 
1014 	nTx = (pktinfo->nTx<inputVals->nTx)?(pktinfo->nTx+1):(inputVals->nTx+1);
1015 	if(MAX_TX<nTx){
1016 		nTx = MAX_TX;
1017     }
1018     pktinfo->nTx = nTx-1;
1019 	nRx = (pktinfo->nRx<inputVals->nRx)?(pktinfo->nRx+1):(inputVals->nRx+1);
1020 	if(MAX_RX<nRx){
1021 		nRx = MAX_RX;
1022 	}
1023 	pktinfo->nRx = nRx-1;
1024 
1025 #if defined(REMOVE_IIR) && !defined(STA_20_ONLY)
1026 	if(bandwidth<deviceBandwidth){	// figure out channel offset
1027 		if(chNum<36){ // 2.4 GHZ
1028 			channelOffset = 0; // control channel always on bottom
1029 		}
1030 		else if(chNum<149){ // 5 GHz
1031 			channelOffset = ((chNum-36)>>(2+sigSubbands))&((1<<(devSubbands-sigSubbands))-1);
1032 		}
1033 		else{
1034 			channelOffset = ((chNum-149)>>(2+sigSubbands))&((1<<(devSubbands-sigSubbands))-1);
1035 		}
1036 		// add offset
1037 		phiCorrPtr += channelOffset*(1<<(9+sigSubbands-devSubbands));
1038 	}
1039 	phiCorrScale = 3-devSubbands; // since filter is in 160 MHz resolution
1040 #endif
1041 	if(bandwidth == 20){ // 20 MHz [1:Ng:27 28 N-28:Ng:N-2 N-1];
1042 		numCsiScHlf = (14>>(NgShift-1))+1;
1043 		pilotValsPtr += Ng-2;
1044 		dcZeros = 1;
1045 		scStart = 4;
1046 #ifdef REMOVE_IIR
1047 		phiCorrPtr += 4<<phiCorrScale;
1048 #endif
1049 	}
1050 	else if(bandwidth == 40){ // 40 MHz [2:Ng:58 N-58:Ng:N-2];
1051 		numCsiScHlf = 29>>(NgShift-1); // = dataPerChannel/2
1052 		pilotValsPtr += Ng+2;
1053 		dcZeros = 2-NgShift;
1054 		scStart = ((subbandSize<<sigSubbands)/2)-(numCsiScHlf+dcZeros/2);
1055 #ifdef REMOVE_IIR
1056 		phiCorrPtr += 6<<phiCorrScale;
1057 #endif
1058 	}
1059 	else if(bandwidth == 80){ // 80 MHz [2:Ng:122 N-122:Ng:N-2 ];
1060 		numCsiScHlf = 31; // Ng =4 = dataPerChannel/2
1061 		pilotValsPtr += 8;
1062 		dcZeros = 0;
1063 		scStart = ((subbandSize<<sigSubbands)/2)-(numCsiScHlf+dcZeros/2);
1064 #ifdef REMOVE_IIR
1065 		phiCorrPtr += 6<<phiCorrScale;
1066 #endif
1067 	}
1068 	else if(bandwidth == 160){ // 160 MHz [6:Ng:250 N-250:Ng:N-6];
1069 		numCsiScHlf = 62/2; // since processing a quarter at a time
1070 		pilotValsPtr += 8;
1071 		dcZeros = 2;
1072 		scStart = ((subbandSize<<sigSubbands)/2)-(2*numCsiScHlf+dcZeros/2);
1073 #ifdef REMOVE_IIR
1074 		phiCorrPtr += 6<<phiCorrScale;
1075 #endif
1076 	}
1077 #ifdef REMOVE_IIR
1078 	phiCorrPtrStride = Ng<<phiCorrScale;
1079 #endif
1080 
1081 	pp = 0;
1082 #if (MAX_RX>1) || (MAX_TX>1)
1083 	for(ii=0;ii<nTx;ii++){
1084 		for(jj=0;jj<nRx;jj++){
1085 			dataPtrTmp = ((unsigned short*)dataPtr)+(ii+jj*nTx0);
1086 			memset(fftInBfr+ifftSizeOsf*(jj+ii*nRx),0,4*ifftSizeOsf);
1087 			writePtr = (short*)(fftInBfr+ifftSizeOsf*(jj+ii*nRx)+scStart);
1088 #else
1089 	{
1090 		{
1091 			dataPtrTmp = ((unsigned short*)dataPtr);
1092 			memset(fftInBfr,0,4*ifftSizeOsf);
1093 			writePtr = (short*)(fftInBfr+scStart);
1094 #endif
1095 			memset(myStack,0,4*4);
1096 
1097 #ifdef REMOVE_IIR
1098 			phiCorrPtrTemp = phiCorrPtr;
1099 #endif
1100 
1101 			// negative spectrum half
1102 			if(bandwidth == 20){
1103 				processSubCarrier20MHz(dataPtrTmp, writePtr, pilotValsPtr[0], phiCorrPtrTemp, numCsiScHlf, dataPtrStride, phiCorrPtrStride, Ng, numCsiScHlf-2, myStack);
1104 			}
1105 			else {
1106 				if (bandwidth == 160) {
1107 					processSubCarrier(dataPtrTmp, writePtr, pilotValsPtr[0], phiCorrPtrTemp, numCsiScHlf, dataPtrStride, phiCorrPtrStride, myStack);
1108 					dataPtrTmp += numCsiScHlf*dataPtrStride;
1109 					writePtr += numCsiScHlf * 2;
1110 #ifdef REMOVE_IIR
1111 					phiCorrPtrTemp += numCsiScHlf*phiCorrPtrStride;
1112 #endif
1113 					powerPerSubband[pp++] = myStack[1];
1114 					myStack[1] = 0;
1115 				}
1116 				processSubCarrier(dataPtrTmp, writePtr, pilotValsPtr[0], phiCorrPtrTemp, numCsiScHlf, dataPtrStride, phiCorrPtrStride, myStack);
1117 			}
1118 			powerPerSubband[pp++] = myStack[1];
1119 			myStack[1] = 0;
1120 
1121 			// positive spectrum half
1122 			dataPtrTmp +=numCsiScHlf*dataPtrStride;
1123 			writePtr +=(numCsiScHlf+dcZeros)*2;
1124 #ifdef REMOVE_IIR
1125 			phiCorrPtrTemp +=(numCsiScHlf+dcZeros)*phiCorrPtrStride;
1126 #endif
1127 			if(bandwidth == 20){
1128 				unsigned char jumpIdx = (pktinfo->packetType) ? (numCsiScHlf - 2) : 0;
1129 				writePtr +=((numCsiScHlf-1)*(Ng-1)-1)*2;
1130 #ifdef REMOVE_IIR
1131 				phiCorrPtrTemp -= 2*phiCorrPtrStride-phiCorrPtrStride/Ng;
1132 #endif
1133 				processSubCarrier20MHz(dataPtrTmp, writePtr, pilotValsPtr[1], phiCorrPtrTemp, numCsiScHlf, dataPtrStride, phiCorrPtrStride, Ng, jumpIdx, myStack);
1134 			}
1135 			else {
1136 				if(bandwidth == 160){
1137 					processSubCarrier(dataPtrTmp, writePtr, pilotValsPtr[0], phiCorrPtrTemp, numCsiScHlf, dataPtrStride, phiCorrPtrStride, myStack);
1138 					dataPtrTmp +=numCsiScHlf*dataPtrStride;
1139 					writePtr +=numCsiScHlf*2;
1140 #ifdef REMOVE_IIR
1141 					phiCorrPtrTemp +=numCsiScHlf*phiCorrPtrStride;
1142 #endif
1143 					powerPerSubband[pp++] = myStack[1];
1144 					myStack[1] = 0;
1145 				}
1146 				processSubCarrier(dataPtrTmp, writePtr, pilotValsPtr[1], phiCorrPtrTemp, numCsiScHlf, dataPtrStride, phiCorrPtrStride, myStack);
1147 			}
1148 			powerPerSubband[pp++] = myStack[1];
1149 			myStack[1] = 0;
1150 
1151 #if (MAX_RX>1) || (MAX_TX>1)
1152 			phaseRollPtr[(jj+ii*nRx)] = myAtan2(myStack[2], myStack[3]);
1153 #else
1154 			*phaseRollPtr = myAtan2(myStack[2], myStack[3]);
1155 #endif
1156 		}
1157 	}
1158 }
1159 
1160 void calculateTotalPower(hal_pktinfo_t *pktinfo, unsigned int *powerPerSubband, unsigned int *totalpower){
1161 
1162 	int ii, jj, tt, qq;
1163 	unsigned int tempVal;
1164 	int nRx = pktinfo->nRx+1;
1165 	int nTx = pktinfo->nTx+1;
1166 	int numSubbands = 1<<(pktinfo->sigBw);
1167 	int numSubBlocks;
1168 	if(pktinfo->sigBw>1){ // for 80 and 160 MHz, 40 MHz subbands
1169 		numSubBlocks = numSubbands>>1;
1170 	}
1171 	else{ // 20 an 40 MHz
1172 		numSubBlocks = 2;
1173 	}
1174 
1175 	// determine scaling
1176 	for(ii=0;ii<nTx;ii++){
1177 		for(jj=0;jj<nRx;jj+= NUM_PARALLEL){
1178 			for (qq = 0; qq < NUM_PARALLEL; qq++) {
1179 				tempVal = 0;
1180 				for (tt = 0;tt < numSubBlocks;tt++) {
1181 					tempVal += powerPerSubband[qq + tt*NUM_PARALLEL + (jj + ii*nRx)*numSubBlocks];
1182 				}
1183 				totalpower[qq + jj + ii*nRx] = tempVal;
1184 			}
1185 		}
1186 	}
1187 }
1188 
1189 #ifndef STA_20_ONLY
1190 void processLegacyPackets(hal_pktinfo_t *pktinfo, unsigned int *fftInBuffer, int bufferspacing, int *phaseRollPtr){
1191 
1192 	int ii, jj;
1193 	int nRx = pktinfo->nRx+1;
1194 	int nTx = pktinfo->nTx+1;
1195 
1196 	for(ii=0;ii<nTx;ii++){
1197 		for(jj=0;jj<nRx;jj+=NUM_PARALLEL){
1198 			if (pktinfo->sigBw == 0) { // 20 MHz cases
1199 #if defined(FFT_PARALLEL) && defined(ARM_DS5)
1200 				interpolateBandEdges20Intrinsic(pktinfo, fftInBuffer + bufferspacing*(jj + ii*nRx), phaseRollPtr + jj + ii*nRx);
1201 #elif defined(FFT_PARALLEL) && !defined(ARM_DS5)
1202 				interpolateBandEdges20Parallel(pktinfo, fftInBuffer + bufferspacing*(jj + ii*nRx), phaseRollPtr + jj + ii*nRx);
1203 #else
1204 				interpolateBandEdges20(pktinfo, fftInBuffer + bufferspacing*(jj + ii*nRx), phaseRollPtr[jj + ii*nRx]);
1205 #endif
1206 			}
1207 			else{
1208 				if (pktinfo->Ng) { // Ng=4 case
1209 #if defined(FFT_PARALLEL) && defined(ARM_DS5)
1210 					interpolateBandEdgesIntrinsic(pktinfo, fftInBuffer + bufferspacing*(jj + ii*nRx), phaseRollPtr + jj + ii*nRx);
1211 #elif defined(FFT_PARALLEL) && !defined(ARM_DS5)
1212 					interpolateBandEdgesParallel(pktinfo, fftInBuffer + bufferspacing*(jj + ii*nRx), phaseRollPtr + jj + ii*nRx);
1213 #else
1214 					interpolateBandEdges(pktinfo, fftInBuffer + bufferspacing*(jj + ii*nRx), phaseRollPtr[jj + ii*nRx]);
1215 #endif
1216 				}
1217 				else { // 40 MHz case
1218 #if defined(FFT_PARALLEL) && defined(ARM_DS5)
1219 					interpolateBandEdges40Intrinsic(pktinfo, fftInBuffer + bufferspacing*(jj + ii*nRx), phaseRollPtr + jj + ii*nRx);
1220 #elif defined(FFT_PARALLEL) && !defined(ARM_DS5)
1221 					interpolateBandEdges40Parallel(pktinfo, fftInBuffer + bufferspacing*(jj + ii*nRx), phaseRollPtr + jj + ii*nRx);
1222 #else
1223 					interpolateBandEdges40(pktinfo, fftInBuffer + bufferspacing*(jj + ii*nRx), phaseRollPtr[jj + ii*nRx]);
1224 #endif
1225 				}
1226 			}
1227 		}
1228 	}
1229 }
1230 #endif
1231 
1232 void ifftProcessing(hal_pktinfo_t *pktinfo, unsigned int *fftInBuffer, unsigned int *fftOutBuffer, int bufferspacing){
1233 	int ii, jj;
1234 	int nRx = pktinfo->nRx+1;
1235 	int nTx = pktinfo->nTx+1;
1236 	int ifftSizeOsf = 1<<(pktinfo->fftSize+6);
1237 	short *pSrc = (short*)fftInBuffer;
1238 #ifndef FFT_INPLACE
1239 	short *pDst = (short*)fftOutBuffer;
1240 #endif
1241 
1242 #ifdef FFT_INPLACE
1243 	for (ii = 0;ii<nTx;ii++) {
1244 		for (jj = 0;jj<nRx;jj += NUM_PARALLEL) {
1245 #if defined(FFT_PARALLEL) && defined(ARM_DS5)
1246 			radix2IfftIntrinsic(pSrc, ifftSizeOsf, radix4FftTwiddleArr, MAX_FFT_SIZE);
1247 #elif defined(FFT_PARALLEL) && !defined(ARM_DS5)
1248 			radix2IfftParallel(pSrc, ifftSizeOsf, radix4FftTwiddleArr, MAX_FFT_SIZE);
1249 #else
1250 			radix2Ifft(pSrc, ifftSizeOsf, radix4FftTwiddleArr, MAX_FFT_SIZE);
1251 #endif
1252 			pSrc += 2 * NUM_PARALLEL*bufferspacing;
1253 		}
1254 	}
1255 #else
1256 
1257 	for (ii = 0;ii<nTx;ii++) {
1258 		for (jj = 0;jj<nRx;jj += NUM_PARALLEL) {
1259 #if defined(FFT_PARALLEL) && defined(ARM_DS5)
1260 			radix4IfftIntrinsic(pSrc, pDst, ifftSizeOsf, radix4FftTwiddleArr, MAX_FFT_SIZE);
1261 #elif defined(FFT_PARALLEL) && !defined(ARM_DS5)
1262 			radix4IfftParallel(pSrc, pDst, ifftSizeOsf, radix4FftTwiddleArr, MAX_FFT_SIZE);
1263 #else
1264 			radix4Ifft(pSrc, pDst, ifftSizeOsf, radix4FftTwiddleArr, MAX_FFT_SIZE);
1265 #endif
1266 			pSrc += 2 * NUM_PARALLEL*bufferspacing;
1267 			pDst += 2 * NUM_PARALLEL*ifftSizeOsf; // can be larger for legacy packets, if ifftSizeOsf was reduced
1268 		}
1269 	}
1270 #endif
1271 }
1272 
1273 void interpolatePilots(hal_pktinfo_t *pktinfo, unsigned int *fftInBuffer, int bufferspacing, int *phaseRollPtr, unsigned int *totalpower) {
1274 
1275 	int ii, jj, kk, bb;
1276 	int phaseRollNg, idx;
1277 	int numPilots, dcZeros;
1278 	const unsigned char *pilotIdx;
1279 	unsigned int * readPtr;
1280 	unsigned int tempValLd, tempValSt;
1281 	short tempI, tempQ;
1282 	int convertI, convertQ;
1283 	int resLeftI, resLeftQ, resRightI, resRightQ, resMidI, resMidQ;
1284 	short sinPhi1, cosPhi1, sinPhi2, cosPhi2, sinPhi3, cosPhi3;
1285 
1286 	unsigned int *cosTablePtr = (unsigned int*)radix4FftTwiddleArr;
1287 	int nRx = pktinfo->nRx + 1;
1288 	int nTx = pktinfo->nTx + 1;
1289 	int numSubbands = 1 << pktinfo->sigBw;
1290 	int bandwidth = 20 * numSubbands;
1291 	int fftSizeHalf = (32 * numSubbands) >> (pktinfo->NgDsfShift);
1292 	int fftSize = 2 * fftSizeHalf;
1293 	int numBands = 1;
1294 	int NgShift = 0;
1295 	int extraSc = 0;
1296 	const unsigned char *extraScIdx = NULL;
1297 	unsigned int powerScale = 1;
1298 
1299 	if (pktinfo->packetType == 3) { // VHT
1300 #ifndef STA_20_ONLY
1301 		if ((bandwidth == 80) || (bandwidth == 160)) {
1302 			// 80 MHz 128+[-122:-2 2:122]
1303 			// minus pilots on [103, 75, 39, 11, -103, -75, -39, -11]
1304 			numPilots = SC5_VHT80_PILOTS;
1305 			pilotIdx = pilotToneIndexVHT80;
1306 			dcZeros = 3;
1307 			if (bandwidth == 160) {
1308 				fftSizeHalf >>= 1;
1309 				fftSize >>= 1;
1310 				numBands = 2;
1311 			}
1312 		}
1313 		else if (bandwidth == 40) {
1314 			// 40 MHz 64+[-58:-2 2:58]
1315 			// minus pilots on [-53,-25,-11,11,25,53]
1316 			numPilots = SC5_VHT40_PILOTS;
1317 			pilotIdx = pilotToneIndexVHT40;
1318 			dcZeros = 3;
1319 		}
1320 		else
1321 #endif
1322 		{ //if(bandwidth == 20)
1323 			// 20 MHz 32+[-28:-1 1:28]
1324 			// minus pilots on [-21, -7, 7, 21]
1325 			numPilots = SC5_VHT20_PILOTS;
1326 			pilotIdx = pilotToneIndexVHT20;
1327 			dcZeros = 1;
1328 		}
1329 	}
1330 	else {
1331 		if (pktinfo->packetType == 0) { // Leg 20in20 only
1332 										// pilotToneIndexLEG20[SC5_HT20_PILOTS
1333 										// 20 MHz 32+[-28:2:-2 -1 1 2:2:28]
1334 			numPilots = SC5_HT20_PILOTS;
1335 			pilotIdx = pilotToneIndexLEG20;
1336 			dcZeros = 2; // not really DC
1337 			NgShift = 1; // Ng=2
1338 			extraSc = 0; //2; // change phase on two subcarriers
1339 			powerScale = 2;
1340 		}
1341 		else if (pktinfo->packetType == 1) {
1342 #ifndef STA_20_ONLY
1343 			if(pktinfo->sigBw==1){ //HT40
1344 				numPilots = 0;
1345 				pilotIdx = pilotToneIndexHT20; // not used
1346 				dcZeros = 1;
1347 				extraSc = 0; // not used
1348 			}
1349 			else
1350 #endif
1351 			{ // HT 20
1352 				// pilotToneIndexHT20[SC5_HT20_PILOTS
1353 				// 20 MHz 32+[-28:2:-2 -1 1:2:27 28]
1354 				// minus pilots on [-21, -7, 7, 21]
1355 				numPilots = SC5_HT20_PILOTS;
1356 				pilotIdx = pilotToneIndexHT20;
1357 				dcZeros = 0;
1358 				NgShift = 1; // Ng=2
1359 				extraSc = 2; // change phase on two subcarriers
1360 				extraScIdx = pilotToneIndexVHT20 + 2; // skip negative half
1361 				powerScale = 2;
1362 			}
1363 		}
1364 		else { // HE
1365 #ifndef STA_20_ONLY
1366 			if ((bandwidth == 80) || (bandwidth == 160)) {
1367 				// 80 MHz 128+[-125:-1 1:125]
1368 				// minus pilots on [-117, -100, -23, -6, 6, 23, 100, 117]
1369 				numPilots = SC5_HE80_PILOTS;
1370 				pilotIdx = pilotToneIndexHE80;
1371 				dcZeros = 1;
1372 				if (bandwidth == 160) {
1373 					fftSizeHalf >>= 1;
1374 					fftSize >>= 1;
1375 					numBands = 2;
1376 				}
1377 			}
1378 			else if (bandwidth == 40) {
1379 				// 40 MHz 64+[-61:-1 1:61]
1380 				// minus pilots on [-53,-36,-26,-9,9,26,36,53]
1381 				numPilots = SC5_HE40_PILOTS;
1382 				pilotIdx = pilotToneIndexHE40;
1383 				dcZeros = 1;
1384 			}
1385 			else
1386 #endif
1387 			{ //if(bandwidth == 20)
1388 				// 20 MHz 32+[-31:0 1:31]
1389 				// minus pilots on [-29, -12, 12, 29]
1390 				numPilots = SC5_HE20_PILOTS;
1391 				pilotIdx = pilotToneIndexHE20;
1392 				dcZeros = 0;
1393 			}
1394 		}
1395 	}
1396 
1397 	for (ii = 0; ii < nTx; ii++){
1398 		for (jj = 0; jj < nRx; jj++){
1399 
1400 			// account for extra non-zero subcarriers is substantial
1401 			totalpower[jj + ii*nRx] = powerScale*totalpower[jj + ii*nRx];
1402 
1403 			phaseRollNg = (phaseRollPtr[jj + ii*nRx])>> NgShift;
1404 
1405 			idx = (phaseRollNg*MAX_FFT_SIZE) >> MPY_BIPT;
1406 			idx &= (MAX_FFT_SIZE - 1); // apply modulo
1407 			tempValLd = cosTablePtr[idx];
1408 			cosPhi1 = (tempValLd & 0xffff);
1409 			sinPhi1 = (tempValLd >> 16);
1410 			if (dcZeros > 1){
1411 				idx = (2 * phaseRollNg*MAX_FFT_SIZE) >> MPY_BIPT;
1412 				idx &= (MAX_FFT_SIZE - 1); // apply modulo
1413 				tempValLd = cosTablePtr[idx];
1414 				cosPhi2 = (tempValLd & 0xffff);
1415 				sinPhi2 = (tempValLd >> 16);
1416 
1417 				idx = (3 * phaseRollNg*MAX_FFT_SIZE) >> MPY_BIPT;
1418 				idx &= (MAX_FFT_SIZE - 1); // apply modulo
1419 				tempValLd = cosTablePtr[idx];
1420 				cosPhi3 = (tempValLd & 0xffff);
1421 				sinPhi3 = (tempValLd >> 16);
1422 			}
1423 
1424 			for (bb = 0; bb < numBands; bb++){
1425 				readPtr = fftInBuffer + bb*fftSize + bufferspacing*(jj + ii*nRx);
1426 
1427 				for (kk = 0; kk < extraSc; kk++) {
1428 					tempValLd = readPtr[extraScIdx[kk]];
1429 					tempI = (tempValLd & 0xffff);
1430 					tempQ = (tempValLd >> 16);
1431 
1432 					resRightI = (((int)cosPhi1*tempI) + ((int)sinPhi1*tempQ));
1433 					resRightQ = (((int)cosPhi1*tempQ) - ((int)sinPhi1*tempI));
1434 					convertI = resRightI >> TWIDDLE_BIPT;
1435 					convertQ = resRightQ >> TWIDDLE_BIPT;
1436 
1437 					tempValSt = (convertI & 0xffff) | (convertQ << 16);
1438 					readPtr[extraScIdx[kk]] = tempValSt;
1439 				}
1440 
1441 				if (dcZeros == 2) { // not really DC, just fix edge tones for Legacy 20 MHz
1442 					tempValLd = readPtr[pilotToneIndexLEG20[0] + 1]; // first subcarrier
1443 					tempI = (tempValLd & 0xffff);
1444 					tempQ = (tempValLd >> 16);
1445 
1446 					resRightI = (((int)cosPhi2*tempI) + ((int)sinPhi2*tempQ));
1447 					resRightQ = (((int)cosPhi2*tempQ) - ((int)sinPhi2*tempI));
1448 					convertI = resRightI >> TWIDDLE_BIPT;
1449 					convertQ = resRightQ >> TWIDDLE_BIPT;
1450 
1451 					tempValSt = (convertI & 0xffff) | (convertQ << 16);
1452 					readPtr[pilotToneIndexLEG20[0] - 1] = tempValSt;
1453 
1454 					tempValLd = readPtr[pilotToneIndexLEG20[SC5_HT20_PILOTS-1]-1]; // last subcarrier
1455 					tempI = (tempValLd & 0xffff);
1456 					tempQ = (tempValLd >> 16);
1457 
1458 					resRightI = (((int)cosPhi2*tempI) - ((int)sinPhi2*tempQ));
1459 					resRightQ = (((int)cosPhi2*tempQ) + ((int)sinPhi2*tempI));
1460 					convertI = resRightI >> TWIDDLE_BIPT;
1461 					convertQ = resRightQ >> TWIDDLE_BIPT;
1462 
1463 					tempValSt = (convertI & 0xffff) | (convertQ << 16);
1464 					readPtr[pilotToneIndexLEG20[SC5_HT20_PILOTS-1]+1] = tempValSt;
1465 				}
1466 
1467 				// interpolate pilots
1468 				for (kk = 0; kk < numPilots; kk++){
1469 					tempValLd = readPtr[pilotIdx[kk] + 1];
1470 					tempI = (tempValLd & 0xffff);
1471 					tempQ = (tempValLd >> 16);
1472 
1473 					resRightI = (((int)cosPhi1*tempI) + ((int)sinPhi1*tempQ)) / 2;
1474 					resRightQ = (((int)cosPhi1*tempQ) - ((int)sinPhi1*tempI)) / 2;
1475 
1476 					tempValLd = readPtr[pilotIdx[kk] - 1];
1477 					tempI = (tempValLd & 0xffff);
1478 					tempQ = (tempValLd >> 16);
1479 
1480 					resLeftI = (((int)cosPhi1*tempI) - ((int)sinPhi1*tempQ)) / 2;
1481 					resLeftQ = (((int)cosPhi1*tempQ) + ((int)sinPhi1*tempI)) / 2;
1482 
1483 					convertI = (resLeftI + resRightI) >> TWIDDLE_BIPT;
1484 					convertQ = (resLeftQ + resRightQ) >> TWIDDLE_BIPT;
1485 
1486 					tempValSt = (convertI & 0xffff) | (convertQ << 16);
1487 					readPtr[pilotIdx[kk]] = tempValSt;
1488 				}
1489 
1490 				// interpolate DC
1491 				if (dcZeros == 3){
1492 					tempValLd = readPtr[fftSizeHalf + 2];
1493 					tempI = (tempValLd & 0xffff);
1494 					tempQ = (tempValLd >> 16);
1495 
1496 					resRightI = (((int)cosPhi1*tempI) + ((int)sinPhi1*tempQ)) / 2;
1497 					resRightQ = (((int)cosPhi1*tempQ) - ((int)sinPhi1*tempI)) / 2;
1498 
1499 					resMidI = (((int)cosPhi2*tempI) + ((int)sinPhi2*tempQ)) / 2;
1500 					resMidQ = (((int)cosPhi2*tempQ) - ((int)sinPhi2*tempI)) / 2;
1501 
1502 					resLeftI = (((int)cosPhi3*tempI) + ((int)sinPhi3*tempQ)) / 2;
1503 					resLeftQ = (((int)cosPhi3*tempQ) - ((int)sinPhi3*tempI)) / 2;
1504 
1505 					tempValLd = readPtr[fftSizeHalf - 2];
1506 					tempI = (tempValLd & 0xffff);
1507 					tempQ = (tempValLd >> 16);
1508 
1509 					resRightI += (((int)cosPhi3*tempI) - ((int)sinPhi3*tempQ)) / 2;
1510 					resRightQ += (((int)cosPhi3*tempQ) + ((int)sinPhi3*tempI)) / 2;
1511 
1512 					resMidI += (((int)cosPhi2*tempI) - ((int)sinPhi2*tempQ)) / 2;
1513 					resMidQ += (((int)cosPhi2*tempQ) + ((int)sinPhi2*tempI)) / 2;
1514 
1515 					resLeftI += (((int)cosPhi1*tempI) - ((int)sinPhi1*tempQ)) / 2;
1516 					resLeftQ += (((int)cosPhi1*tempQ) + ((int)sinPhi1*tempI)) / 2;
1517 
1518 					convertI = resRightI >> TWIDDLE_BIPT;
1519 					convertQ = resRightQ >> TWIDDLE_BIPT;
1520 					tempValSt = (convertI & 0xffff) | (convertQ << 16);
1521 					readPtr[fftSizeHalf+1] = tempValSt;
1522 
1523 					convertI = resMidI >> TWIDDLE_BIPT;
1524 					convertQ = resMidQ >> TWIDDLE_BIPT;
1525 					tempValSt = (convertI & 0xffff) | (convertQ << 16);
1526 					readPtr[fftSizeHalf] = tempValSt;
1527 
1528 					convertI = resLeftI >> TWIDDLE_BIPT;
1529 					convertQ = resLeftQ >> TWIDDLE_BIPT;
1530 					tempValSt = (convertI & 0xffff) | (convertQ << 16);
1531 					readPtr[fftSizeHalf-1] = tempValSt;
1532 				}
1533 				else if (dcZeros == 1) { // dcZeros == 1
1534 					tempValLd = readPtr[fftSizeHalf + 1];
1535 					tempI = (tempValLd & 0xffff);
1536 					tempQ = (tempValLd >> 16);
1537 					resMidI = (((int)cosPhi1*tempI) + ((int)sinPhi1*tempQ)) / 2;
1538 					resMidQ = (((int)cosPhi1*tempQ) - ((int)sinPhi1*tempI)) / 2;
1539 
1540 					tempValLd = readPtr[fftSizeHalf - 1];
1541 					tempI = (tempValLd & 0xffff);
1542 					tempQ = (tempValLd >> 16);
1543 					resMidI += (((int)cosPhi1*tempI) - ((int)sinPhi1*tempQ)) / 2;
1544 					resMidQ += (((int)cosPhi1*tempQ) + ((int)sinPhi1*tempI)) / 2;
1545 
1546 					convertI = resMidI >> TWIDDLE_BIPT;
1547 					convertQ = resMidQ >> TWIDDLE_BIPT;
1548 					tempValSt = (convertI & 0xffff) | (convertQ << 16);
1549 					readPtr[fftSizeHalf] = tempValSt;
1550 				}
1551 			}
1552 		}
1553 	}
1554 
1555 }
1556 
1557 void interpolateBandEdges20(hal_pktinfo_t *pktinfo, unsigned int *fftInBuffer, int phaseRollNg) {
1558 
1559 	unsigned int *cosTablePtr = (unsigned int*)radix4FftTwiddleArr;
1560 	short sinPhi1, cosPhi1, sinPhi2, cosPhi2;
1561 	short sinPhi0, cosPhi0;
1562 
1563 	int convertI, convertQ;
1564 	int resLeftI1, resLeftQ1, resRightI1, resRightQ1;
1565 	int resLeftI2, resLeftQ2, resRightI2, resRightQ2;
1566 	short tempI, tempQ;
1567 	unsigned int tempValLd, tempValSt;
1568 
1569 	int idx;
1570 
1571 	int psb = pktinfo->psb;
1572 	int NgDsfShift = pktinfo->NgDsfShift;
1573 	int subbandSize = 1 << (6 - NgDsfShift);
1574 
1575 	unsigned int *readPtr = fftInBuffer;
1576 
1577 	idx = (phaseRollNg*MAX_FFT_SIZE) >> MPY_BIPT;
1578 	idx &= (MAX_FFT_SIZE - 1); // apply modulo
1579 	tempValLd = cosTablePtr[idx];
1580 	cosPhi1 = (tempValLd & 0xffff);
1581 	sinPhi1 = (tempValLd >> 16);
1582 
1583 	if (pktinfo->rxDevBw == 1) { // 40 MHz device bandwidth
1584 		idx = (2 * phaseRollNg*MAX_FFT_SIZE) >> MPY_BIPT;
1585 		idx &= (MAX_FFT_SIZE - 1); // apply modulo
1586 		tempValLd = cosTablePtr[idx];
1587 		cosPhi2 = (tempValLd & 0xffff);
1588 		sinPhi2 = (tempValLd >> 16);
1589 
1590 		idx = (phaseRollNg*MAX_FFT_SIZE) >> (MPY_BIPT + 1); // divide by 2 for Ng=2
1591 		idx &= (MAX_FFT_SIZE - 1); // apply modulo
1592 		tempValLd = cosTablePtr[idx];
1593 		cosPhi0 = (tempValLd & 0xffff);
1594 		sinPhi0 = (tempValLd >> 16);
1595 
1596 		// interpolate edge for 20/40
1597 		if (psb == 0) { // left side, right edge
1598 			tempValLd = readPtr[subbandSize - 3];
1599 			tempI = (tempValLd & 0xffff);
1600 			tempQ = (tempValLd >> 16);
1601 
1602 			resLeftI1 = ((int)cosPhi2*tempI) - ((int)sinPhi2*tempQ);
1603 			resLeftQ1 = ((int)cosPhi2*tempQ) + ((int)sinPhi2*tempI);
1604 
1605 			resLeftI2 = ((int)cosPhi1*tempI) - ((int)sinPhi1*tempQ);
1606 			resLeftQ2 = ((int)cosPhi1*tempQ) + ((int)sinPhi1*tempI);
1607 
1608 			convertI = resLeftI1 >> TWIDDLE_BIPT;
1609 			convertQ = resLeftQ1 >> TWIDDLE_BIPT;
1610 			tempValSt = (convertI & 0xffff) | (convertQ << 16);
1611 			readPtr[subbandSize - 1] = tempValSt;
1612 
1613 			convertI = resLeftI2 >> TWIDDLE_BIPT;
1614 			convertQ = resLeftQ2 >> TWIDDLE_BIPT;
1615 			tempValSt = (convertI & 0xffff) | (convertQ << 16);
1616 			readPtr[subbandSize - 2] = tempValSt;
1617 
1618 			// change phase on sc -32 / 32
1619 			tempValLd = readPtr[subbandSize / 2];
1620 			tempI = (tempValLd & 0xffff);
1621 			tempQ = (tempValLd >> 16);
1622 
1623 			resLeftI2 = (((int)cosPhi0*tempI) - ((int)sinPhi0*tempQ));
1624 			resLeftQ2 = (((int)cosPhi0*tempQ) + ((int)sinPhi0*tempI));
1625 
1626 			convertI = resLeftI2 >> TWIDDLE_BIPT;
1627 			convertQ = resLeftQ2 >> TWIDDLE_BIPT;
1628 			tempValSt = (convertI & 0xffff) | (convertQ << 16);
1629 			readPtr[subbandSize / 2] = tempValSt;
1630 		}
1631 		else {	//if (psb == 1)  // right side, left edge
1632 			tempValLd = readPtr[subbandSize + 3];
1633 			tempI = (tempValLd & 0xffff);
1634 			tempQ = (tempValLd >> 16);
1635 
1636 			resRightI2 = ((int)cosPhi1*tempI) + ((int)sinPhi1*tempQ);
1637 			resRightQ2 = ((int)cosPhi1*tempQ) - ((int)sinPhi1*tempI);
1638 
1639 			resRightI1 = ((int)cosPhi2*tempI) + ((int)sinPhi2*tempQ);
1640 			resRightQ1 = ((int)cosPhi2*tempQ) - ((int)sinPhi2*tempI);
1641 
1642 			convertI = resRightI2 >> TWIDDLE_BIPT;
1643 			convertQ = resRightQ2 >> TWIDDLE_BIPT;
1644 			tempValSt = (convertI & 0xffff) | (convertQ << 16);
1645 			readPtr[subbandSize + 2] = tempValSt;
1646 
1647 			convertI = resRightI1 >> TWIDDLE_BIPT;
1648 			convertQ = resRightQ1 >> TWIDDLE_BIPT;
1649 			tempValSt = (convertI & 0xffff) | (convertQ << 16);
1650 			readPtr[subbandSize + 1] = tempValSt;
1651 
1652 			// change phase on sc -32 / 32
1653 			tempValLd = readPtr[3 * subbandSize / 2];
1654 			tempI = (tempValLd & 0xffff);
1655 			tempQ = (tempValLd >> 16);
1656 
1657 			resRightI1 = (((int)cosPhi0*tempI) - ((int)sinPhi0*tempQ));
1658 			resRightQ1 = (((int)cosPhi0*tempQ) + ((int)sinPhi0*tempI));
1659 
1660 			convertI = resRightI1 >> TWIDDLE_BIPT;
1661 			convertQ = resRightQ1 >> TWIDDLE_BIPT;
1662 			tempValSt = (convertI & 0xffff) | (convertQ << 16);
1663 			readPtr[3 * subbandSize / 2] = tempValSt;
1664 		}
1665 	}
1666 	else { // pktinfo->rxDevBw == 2)  // 80 MHz device bandwidth
1667 		tempValLd = readPtr[psb*subbandSize+1];
1668 		tempI = (tempValLd & 0xffff);
1669 		tempQ = (tempValLd >> 16);
1670 
1671 		resLeftI1 = ((int)cosPhi1*tempI) + ((int)sinPhi1*tempQ);
1672 		resLeftQ1 = ((int)cosPhi1*tempQ) - ((int)sinPhi1*tempI);
1673 
1674 		convertI = resLeftI1 >> TWIDDLE_BIPT;
1675 		convertQ = resLeftQ1 >> TWIDDLE_BIPT;
1676 		tempValSt = (convertI & 0xffff) | (convertQ << 16);
1677 		readPtr[psb*subbandSize] = tempValSt;
1678 
1679 		tempValLd = readPtr[(psb+1)*subbandSize - 2]; // right edge
1680 		tempI = (tempValLd & 0xffff);
1681 		tempQ = (tempValLd >> 16);
1682 
1683 		resRightI1 = (((int)cosPhi1*tempI) - ((int)sinPhi1*tempQ));
1684 		resRightQ1 = (((int)cosPhi1*tempQ) + ((int)sinPhi1*tempI));
1685 
1686 		convertI = resRightI1 >> TWIDDLE_BIPT;
1687 		convertQ = resRightQ1 >> TWIDDLE_BIPT;
1688 		tempValSt = (convertI & 0xffff) | (convertQ << 16);
1689 		readPtr[(psb + 1)*subbandSize - 1] = tempValSt;
1690 	}
1691 }
1692 
1693 void interpolateBandEdges40(hal_pktinfo_t *pktinfo, unsigned int *fftInBuffer, int phaseRollNg) {
1694 
1695 	unsigned int *cosTablePtr = (unsigned int*)radix4FftTwiddleArr;
1696 	short sinPhi1, cosPhi1, sinPhi2, cosPhi2, sinPhi3, cosPhi3;
1697 	short sinPhi4, cosPhi4, sinPhi5, cosPhi5, sinPhi0, cosPhi0;
1698 
1699 	int convertI, convertQ;
1700 	int resLeftI1, resLeftQ1, resRightI1, resRightQ1, resMidI, resMidQ;
1701 	int resLeftI2, resLeftQ2, resRightI2, resRightQ2;
1702 	short tempI, tempQ;
1703 	unsigned int tempValLd, tempValSt;
1704 
1705 	int idx;
1706 
1707 	int NgDsfShift = pktinfo->NgDsfShift;
1708 	int subbandSize = 1 << (6 - NgDsfShift);
1709 
1710 	unsigned int *readPtr = fftInBuffer;
1711 
1712 	idx = (phaseRollNg*MAX_FFT_SIZE) >> MPY_BIPT;
1713 	idx &= (MAX_FFT_SIZE - 1); // apply modulo
1714 	tempValLd = cosTablePtr[idx];
1715 	cosPhi1 = (tempValLd & 0xffff);
1716 	sinPhi1 = (tempValLd >> 16);
1717 
1718 	idx = (2 * phaseRollNg*MAX_FFT_SIZE) >> MPY_BIPT;
1719 	idx &= (MAX_FFT_SIZE - 1); // apply modulo
1720 	tempValLd = cosTablePtr[idx];
1721 	cosPhi2 = (tempValLd & 0xffff);
1722 	sinPhi2 = (tempValLd >> 16);
1723 
1724 	idx = (3 * phaseRollNg*MAX_FFT_SIZE) >> MPY_BIPT;
1725 	idx &= (MAX_FFT_SIZE - 1); // apply modulo
1726 	tempValLd = cosTablePtr[idx];
1727 	cosPhi3 = (tempValLd & 0xffff);
1728 	sinPhi3 = (tempValLd >> 16);
1729 
1730 	idx = (4 * phaseRollNg*MAX_FFT_SIZE) >> MPY_BIPT;
1731 	idx &= (MAX_FFT_SIZE - 1); // apply modulo
1732 	tempValLd = cosTablePtr[idx];
1733 	cosPhi4 = (tempValLd & 0xffff);
1734 	sinPhi4 = (tempValLd >> 16);
1735 
1736 	idx = (5 * phaseRollNg*MAX_FFT_SIZE) >> MPY_BIPT;
1737 	idx &= (MAX_FFT_SIZE - 1); // apply modulo
1738 	tempValLd = cosTablePtr[idx];
1739 	cosPhi5 = (tempValLd & 0xffff);
1740 	sinPhi5 = (tempValLd >> 16);
1741 
1742 
1743 	tempValLd = readPtr[subbandSize + 3];
1744 	tempI = (tempValLd & 0xffff);
1745 	tempQ = (tempValLd >> 16);
1746 
1747 	resRightI2 = 5*(((int)cosPhi1*tempI) + ((int)sinPhi1*tempQ)) / 6;
1748 	resRightQ2 = 5*(((int)cosPhi1*tempQ) - ((int)sinPhi1*tempI)) / 6;
1749 
1750 	resRightI1 = 4*(((int)cosPhi2*tempI) + ((int)sinPhi2*tempQ)) / 6;
1751 	resRightQ1 = 4*(((int)cosPhi2*tempQ) - ((int)sinPhi2*tempI)) / 6;
1752 
1753 	resMidI = (((int)cosPhi3*tempI) + ((int)sinPhi3*tempQ)) / 2;
1754 	resMidQ = (((int)cosPhi3*tempQ) - ((int)sinPhi3*tempI)) / 2;
1755 
1756 	resLeftI1 = 2*(((int)cosPhi4*tempI) + ((int)sinPhi4*tempQ)) / 6;
1757 	resLeftQ1 = 2*(((int)cosPhi4*tempQ) - ((int)sinPhi4*tempI)) / 6;
1758 
1759 	resLeftI2 = (((int)cosPhi5*tempI) + ((int)sinPhi5*tempQ)) / 6;
1760 	resLeftQ2 = (((int)cosPhi5*tempQ) - ((int)sinPhi5*tempI)) / 6;
1761 
1762 	tempValLd = readPtr[subbandSize - 3];
1763 	tempI = (tempValLd & 0xffff);
1764 	tempQ = (tempValLd >> 16);
1765 
1766 	resRightI2 += (((int)cosPhi5*tempI) - ((int)sinPhi5*tempQ)) / 6;
1767 	resRightQ2 += (((int)cosPhi5*tempQ) + ((int)sinPhi5*tempI)) / 6;
1768 
1769 	resRightI1 += 2*(((int)cosPhi4*tempI) - ((int)sinPhi4*tempQ)) / 6;
1770 	resRightQ1 += 2*(((int)cosPhi4*tempQ) + ((int)sinPhi4*tempI)) / 6;
1771 
1772 	resMidI += (((int)cosPhi3*tempI) - ((int)sinPhi3*tempQ)) / 2;
1773 	resMidQ += (((int)cosPhi3*tempQ) + ((int)sinPhi3*tempI)) / 2;
1774 
1775 	resLeftI1 += 4*(((int)cosPhi2*tempI) - ((int)sinPhi2*tempQ)) / 6;
1776 	resLeftQ1 += 4*(((int)cosPhi2*tempQ) + ((int)sinPhi2*tempI)) / 6;
1777 
1778 	resLeftI2 += 5*(((int)cosPhi1*tempI) - ((int)sinPhi1*tempQ)) / 6;
1779 	resLeftQ2 += 5*(((int)cosPhi1*tempQ) + ((int)sinPhi1*tempI)) / 6;
1780 
1781 	convertI = resRightI2 >> TWIDDLE_BIPT;
1782 	convertQ = resRightQ2 >> TWIDDLE_BIPT;
1783 	tempValSt = (convertI & 0xffff) | (convertQ << 16);
1784 	readPtr[subbandSize + 2] = tempValSt;
1785 
1786 	convertI = resRightI1 >> TWIDDLE_BIPT;
1787 	convertQ = resRightQ1 >> TWIDDLE_BIPT;
1788 	tempValSt = (convertI & 0xffff) | (convertQ << 16);
1789 	readPtr[subbandSize + 1] = tempValSt;
1790 
1791 	convertI = resMidI >> TWIDDLE_BIPT;
1792 	convertQ = resMidQ >> TWIDDLE_BIPT;
1793 	tempValSt = (convertI & 0xffff) | (convertQ << 16);
1794 	readPtr[subbandSize] = tempValSt;
1795 
1796 	convertI = resLeftI1 >> TWIDDLE_BIPT;
1797 	convertQ = resLeftQ1 >> TWIDDLE_BIPT;
1798 	tempValSt = (convertI & 0xffff) | (convertQ << 16);
1799 	readPtr[subbandSize - 1] = tempValSt;
1800 
1801 	convertI = resLeftI2 >> TWIDDLE_BIPT;
1802 	convertQ = resLeftQ2 >> TWIDDLE_BIPT;
1803 	tempValSt = (convertI & 0xffff) | (convertQ << 16);
1804 	readPtr[subbandSize - 2] = tempValSt;
1805 
1806 	// change phase on sc -32 / 32
1807 	idx = (phaseRollNg*MAX_FFT_SIZE) >> (MPY_BIPT+1); // divide by 2 for Ng=2
1808 	idx &= (MAX_FFT_SIZE - 1); // apply modulo
1809 	tempValLd = cosTablePtr[idx];
1810 	cosPhi0 = (tempValLd & 0xffff);
1811 	sinPhi0 = (tempValLd >> 16);
1812 
1813 	tempValLd = readPtr[subbandSize/2];
1814 	tempI = (tempValLd & 0xffff);
1815 	tempQ = (tempValLd >> 16);
1816 
1817 	resLeftI2 = (((int)cosPhi0*tempI) - ((int)sinPhi0*tempQ));
1818 	resLeftQ2 = (((int)cosPhi0*tempQ) + ((int)sinPhi0*tempI));
1819 
1820 	convertI = resLeftI2 >> TWIDDLE_BIPT;
1821 	convertQ = resLeftQ2 >> TWIDDLE_BIPT;
1822 	tempValSt = (convertI & 0xffff) | (convertQ << 16);
1823 	readPtr[subbandSize/2] = tempValSt;
1824 
1825 	tempValLd = readPtr[3*subbandSize/2];
1826 	tempI = (tempValLd & 0xffff);
1827 	tempQ = (tempValLd >> 16);
1828 
1829 	resRightI1 = (((int)cosPhi0*tempI) - ((int)sinPhi0*tempQ));
1830 	resRightQ1 = (((int)cosPhi0*tempQ) + ((int)sinPhi0*tempI));
1831 
1832 	convertI = resRightI1 >> TWIDDLE_BIPT;
1833 	convertQ = resRightQ1 >> TWIDDLE_BIPT;
1834 	tempValSt = (convertI & 0xffff) | (convertQ << 16);
1835 	readPtr[3*subbandSize/2] = tempValSt;
1836 }
1837 
1838 void interpolateBandEdges(hal_pktinfo_t *pktinfo, unsigned int *fftInBuffer, int phaseRollNg){
1839 
1840 	int ii;
1841 	int NgDsfShift = pktinfo->NgDsfShift;
1842 	int numSubbands = 1<<pktinfo->sigBw;
1843 	int subbandSize = 1<<(6-NgDsfShift);
1844 	unsigned int *readPtr = fftInBuffer+pktinfo->scOffset;
1845 
1846 	readPtr[0] = 0;
1847 	for(ii=1;ii<numSubbands;ii++){
1848 		interpolatePairValue(readPtr+ii*subbandSize-1, readPtr+ii*subbandSize, phaseRollNg);
1849 	}
1850 	readPtr[numSubbands*subbandSize-1] = 0;
1851 }
1852 
1853 // H[k] in fft-in buffer are complex conjugate due to replacing iFFT with FFT
1854 void interpolatePairValue(unsigned int *valLeft, unsigned int *valRight, int phaseRollNg){
1855 
1856 	unsigned int tempValLd, tempValSt;
1857 	short tempI, tempQ;
1858 	int convertI, convertQ;
1859 
1860 #ifdef FLOATING_POINT
1861 	float resLeftI, resLeftQ, resRightI, resRightQ;
1862 	float sinPhi1, cosPhi1, sinPhi2, cosPhi2;
1863 	float angle = PI*phaseRollNg/(1<<(12-1));
1864 	cosPhi1 = cosf(angle);
1865 	sinPhi1 = sinf(angle);
1866 	cosPhi2 = cosf(2*angle);
1867 	sinPhi2 = sinf(2*angle);
1868 
1869 #else
1870 	int idx;
1871 	unsigned int *cosTablePtr = (unsigned int*)radix4FftTwiddleArr;
1872 	int resLeftI, resLeftQ, resRightI, resRightQ;
1873 	short sinPhi1, cosPhi1, sinPhi2, cosPhi2;
1874 
1875 	idx = (phaseRollNg*MAX_FFT_SIZE)>>MPY_BIPT;
1876 	idx &=(MAX_FFT_SIZE-1); // apply modulo
1877 	tempValLd = cosTablePtr[idx];
1878 	cosPhi1 = (tempValLd&0xffff);
1879 	sinPhi1 = (tempValLd>>16);
1880 
1881 	idx = (phaseRollNg*MAX_FFT_SIZE)>>(MPY_BIPT-1);
1882 	idx &=(MAX_FFT_SIZE-1); // apply modulo
1883 	tempValLd = cosTablePtr[idx];
1884 	cosPhi2 = (tempValLd&0xffff);
1885 	sinPhi2 = (tempValLd>>16);
1886 #endif
1887 
1888 	// contribution by lltf_missing(2,:)+2
1889 	tempValLd = valRight[2];
1890 	tempI = (tempValLd&0xffff);
1891 	tempQ = (tempValLd>>16);
1892 
1893 	resRightI = (((int)cosPhi2*tempI)+((int)sinPhi2*tempQ))/4;
1894 	resRightQ = (((int)cosPhi2*tempQ)-((int)sinPhi2*tempI))/4;
1895 
1896 	// contribution by lltf_missing(2,:)+1
1897 	tempValLd = valRight[1];
1898 	tempI = (tempValLd&0xffff);
1899 	tempQ = (tempValLd>>16);
1900 
1901 	resRightI += (((int)cosPhi1*tempI)+((int)sinPhi1*tempQ))/2;
1902 	resRightQ += (((int)cosPhi1*tempQ)-((int)sinPhi1*tempI))/2;
1903 
1904 	resLeftI = (((int)cosPhi2*tempI)+((int)sinPhi2*tempQ))/4;
1905 	resLeftQ = (((int)cosPhi2*tempQ)-((int)sinPhi2*tempI))/4;
1906 
1907 	// contribution by lltf_missing(1,:)-1
1908 	tempValLd = valLeft[-1];
1909 	tempI = (tempValLd&0xffff);
1910 	tempQ = (tempValLd>>16);
1911 
1912 	resRightI += (((int)cosPhi2*tempI)-((int)sinPhi2*tempQ))/4;
1913 	resRightQ += (((int)cosPhi2*tempQ)+((int)sinPhi2*tempI))/4;
1914 
1915 	resLeftI += (((int)cosPhi1*tempI)-((int)sinPhi1*tempQ))/2;
1916 	resLeftQ += (((int)cosPhi1*tempQ)+((int)sinPhi1*tempI))/2;
1917 #ifdef FLOATING_POINT
1918 	convertI = (int)floorf(resRightI+0.5f);
1919 	convertQ = (int)floorf(resRightQ+0.5f);
1920 #else
1921 	convertI = resRightI>>TWIDDLE_BIPT;
1922 	convertQ = resRightQ>>TWIDDLE_BIPT;
1923 #endif
1924 	tempValSt = (convertI&0xffff) | (convertQ<<16);
1925 	valRight[0] = tempValSt;
1926 
1927 	// contribution by lltf_missing(1,:)-2
1928 	tempValLd = valLeft[-2];
1929 	tempI = (tempValLd&0xffff);
1930 	tempQ = (tempValLd>>16);
1931 
1932 	resLeftI += (((int)cosPhi2*tempI)-((int)sinPhi2*tempQ))/4;
1933 	resLeftQ += (((int)cosPhi2*tempQ)+((int)sinPhi2*tempI))/4;
1934 
1935 #ifdef FLOATING_POINT
1936 	convertI = (int)floorf(resLeftI+0.5f);
1937 	convertQ = (int)floorf(resLeftQ+0.5f);
1938 #else
1939 	convertI = resLeftI>>TWIDDLE_BIPT;
1940 	convertQ = resLeftQ>>TWIDDLE_BIPT;
1941 #endif
1942 	tempValSt = (convertI&0xffff) | (convertQ<<16);
1943 	valLeft[0] = tempValSt;
1944 }
1945 
1946 void findActiveSubbands(hal_pktinfo_t *pktinfo, unsigned int *powerPerSubband, unsigned int *totalpower, int chNum, int ftmSignalBW){
1947 
1948 	int ii, qq, rx, tx, subBandPer, numSubBlocks, fftSize;
1949 	int subBlock, offset, channelMask, flag=1;
1950 	int channelOffset = 0;
1951 
1952 	int sigBwTemp = pktinfo->sigBw; // initialized to devBw
1953 	int NgDsfShift = pktinfo->NgDsfShift;
1954 	int numSubbands = 1<<sigBwTemp;
1955 	int subbandSize = 1<<(6-NgDsfShift);
1956 	int nRx = pktinfo->nRx+1;
1957 	int nTx = pktinfo->nTx+1;
1958 	unsigned int topHalf, bottomHalf, topHalfTemp, bottomHalfTemp;
1959 
1960 	// figure out channel offset
1961 	if(chNum>35){ // 5 GHz
1962 		if(chNum<149){
1963 			channelOffset = ((chNum-36)>>2)&(numSubbands-1);
1964 		}
1965 		else{
1966 			channelOffset = ((chNum-149)>>2)&(numSubbands-1);
1967 		}
1968 	}
1969 	pktinfo->scOffset = channelOffset;
1970 
1971 	if(sigBwTemp>1){ // for 80 and 160 MHz, 40 MHz subbands
1972 		subBandPer = 2;
1973 		channelOffset>>=1;
1974 	}
1975 	else{
1976 		subBandPer = 1;
1977 	}
1978 
1979 	numSubBlocks = numSubbands/subBandPer; offset =0;
1980 	subBlock = numSubBlocks;
1981 
1982 	if (numSubBlocks > 1) {
1983 		do {
1984 			channelMask = subBlock - 1;
1985 			subBlock >>= 1;
1986 			bottomHalf = 0;	topHalf = 0; flag = 0;
1987 			for (tx = 0;tx < nTx;tx++) {
1988 				for (rx = 0;rx < nRx;rx += NUM_PARALLEL) {
1989 					for (qq = 0;qq < NUM_PARALLEL; qq++) {
1990 						bottomHalfTemp = 0; topHalfTemp = 0;
1991 						for (ii = 0;ii < subBlock;ii++) {
1992 							bottomHalfTemp += powerPerSubband[qq + (ii + offset*subBlock)*NUM_PARALLEL + (rx + tx*nRx)*numSubBlocks];
1993 							topHalfTemp += powerPerSubband[qq + (ii + (offset + 1)*subBlock)*NUM_PARALLEL + (rx + tx*nRx)*numSubBlocks];
1994 						}
1995 						totalpower[qq + rx + tx*nRx] = topHalfTemp + bottomHalfTemp;
1996 						bottomHalf += bottomHalfTemp;
1997 						topHalf += topHalfTemp;
1998 					}
1999 				}
2000 			}
2001 			bottomHalf /= subBlock*subbandSize*nTx*nRx;
2002 			topHalf /= subBlock*subbandSize*nTx*nRx;
2003 
2004 			if (((channelOffset &channelMask) >= subBlock) && ((bottomHalf < SUBBAND_DET_THRESH) || (ftmSignalBW < sigBwTemp))) {
2005 				flag = 1;
2006 				sigBwTemp--;
2007 				offset = 2 * offset + 2;
2008 				numSubbands >>= 1;
2009 				//pktinfo->scOffset += numSubbands*subbandSize;
2010 			}
2011 			else if ((topHalf < SUBBAND_DET_THRESH) || (ftmSignalBW < sigBwTemp)) {
2012 				flag = 1;
2013 				sigBwTemp--;
2014 				numSubbands >>= 1;
2015 			}
2016 		} while (flag && (subBlock > 1));
2017 	}
2018 	else {
2019 		// determine scaling
2020 		numSubBlocks = 2;
2021 		for (tx = 0;tx < nTx;tx++) {
2022 			for (rx = 0;rx < nRx;rx += NUM_PARALLEL) {
2023 				for (qq = 0; qq < NUM_PARALLEL; qq++) {
2024 					unsigned int tempVal = 0;
2025 					for (ii = 0;ii < numSubBlocks;ii++) {
2026 						tempVal += powerPerSubband[qq + ii*NUM_PARALLEL + (rx + tx*nRx)*numSubBlocks];
2027 					}
2028 					totalpower[qq + rx + tx*nRx] = tempVal;
2029 				}
2030 			}
2031 		}
2032 	}
2033 
2034 	if (sigBwTemp > ftmSignalBW)
2035 		sigBwTemp = ftmSignalBW;
2036 
2037 	channelOffset = (pktinfo->scOffset) >> sigBwTemp;
2038 	pktinfo->scOffset = subbandSize *(channelOffset << sigBwTemp);
2039 
2040 	// write back
2041 	if(sigBwTemp!=pktinfo->sigBw){
2042 		pktinfo->sigBw = sigBwTemp;
2043 		fftSize = pktinfo->sigBw +IFFT_OSF_SHIFT -pktinfo->NgDsfShift;
2044 		pktinfo->fftSize = (fftSize > MIN_IFFT_SIZE_SHIFT)? fftSize:MIN_IFFT_SIZE_SHIFT;
2045 	}
2046 }
2047 
2048 void zeroOutTones(hal_pktinfo_t *pktinfo, unsigned int *fftInBuffer, int bufferspacing){
2049 
2050 	int ii, jj, kk;
2051 	int nRx = pktinfo->nRx+1;
2052 	int nTx = pktinfo->nTx+1;
2053 	int scOffset = pktinfo->scOffset;
2054 	int ifftSizeOsf = 1<<(pktinfo->fftSize+6);
2055 	int numSubbands = 1<<(pktinfo->sigBw);
2056 	int subbandSize = 1<<(6-pktinfo->NgDsfShift);
2057 
2058 	unsigned int *writePtr;
2059 
2060 	for(ii=0;ii<nTx;ii++){
2061 		for(jj=0;jj<nRx;jj+=NUM_PARALLEL){
2062 			writePtr = fftInBuffer+bufferspacing*(jj+ii*nRx);
2063 
2064 			for(kk=0; kk < NUM_PARALLEL*scOffset; kk++){
2065 				writePtr[kk]=0;
2066 			}
2067 			for(kk = NUM_PARALLEL*(scOffset+numSubbands*subbandSize); kk < NUM_PARALLEL*ifftSizeOsf ; kk++){
2068 				writePtr[kk]=0;
2069 			}
2070 		}
2071 	}
2072 }
2073 
2074 #ifndef STA_20_ONLY
2075 void removeToneRotation(hal_pktinfo_t *pktinfo, unsigned int *fftInBfr, int bufferspacing){
2076 
2077 	int ii, jj, kk;
2078 	short *writePtr;
2079 	int bandwidth = 20<<(pktinfo->sigBw);
2080 	int subbandSize = 1<<(6-pktinfo->NgDsfShift);
2081 	int nRx = pktinfo->nRx+1;
2082 	int nTx = pktinfo->nTx+1;
2083 #if defined(ARM_GCC) || defined(ARM_DS5)
2084 	unsigned int myConst = 0x10001;
2085 	unsigned int myZero = 0x0;
2086 #ifdef ARM_DS5
2087     int reg6, reg7;
2088 #endif
2089 #else
2090 	short tempI, tempQ;
2091 #endif
2092 
2093 	for(ii=0;ii<nTx;ii++){
2094 		for(jj=0;jj<nRx;jj++){
2095 			writePtr = (short*)(fftInBfr+bufferspacing*(jj+ii*nRx)+pktinfo->scOffset);
2096 			if(bandwidth==40){ // second 20 MHz multiply by 1i
2097 #if defined(ARM_GCC) || defined(ARM_DS5)
2098 				writePtr+=2*subbandSize;
2099 				for(kk=0;kk<subbandSize;kk++){
2100 #if defined ARM_DS5
2101                     __asm volatile
2102                     {
2103                         LDR reg6, [writePtr]
2104                         SSAX reg7, myZero, reg6
2105                         STR reg7, [writePtr], #4
2106                     }
2107 #elif defined ARM_GCC
2108 					asm volatile (
2109 						"LDR r6, [%[writePtr]] \n\t"
2110 						"SSAX r7, %[myZero], r6 \n\t"
2111 						"STR r7, [%[writePtr]], #4 \n\t"
2112 						: [writePtr]"+r"(writePtr)
2113 						: [myZero]"r"(myZero) : "r6", "r7");
2114 #endif
2115 				}
2116 #else
2117 				for(kk=subbandSize;kk<2*subbandSize;kk++){
2118 					tempI = writePtr[2*kk];
2119 					tempQ = writePtr[2*kk+1];
2120 					writePtr[2*kk] = tempQ;
2121 					writePtr[2*kk+1] = -tempI;
2122 				}
2123 #endif
2124 			}
2125 			else if( (bandwidth>=80) && (pktinfo->packetType<4) ){ // first 20 MHz multiply by -1
2126 #if defined(ARM_GCC) || defined(ARM_DS5)
2127 				for(kk=0;kk<subbandSize;kk++){
2128 #if defined ARM_DS5
2129                     __asm volatile
2130                     {
2131                         LDR reg6, [writePtr]
2132                         MVN reg7, reg6
2133                         SADD16 reg7, reg7, myConst
2134                         STR reg7, [writePtr], #4
2135                     }
2136 #elif defined ARM_GCC
2137 					asm volatile (
2138 						"LDR r6, [%[writePtr]] \n\t"
2139 						"MVN r7, r6 \n\t"
2140 						"SADD16 r7, r7, %[myConst] \n\t"
2141 						"STR r7, [%[writePtr]], #4 \n\t"
2142 						: [writePtr]"+r"(writePtr)
2143 						: [myConst]"r"(myConst) : "r6", "r7");
2144 #endif
2145 			}
2146 #else
2147 				for(kk=0;kk<subbandSize;kk++){
2148                     tempI = writePtr[2*kk];
2149                     tempQ = writePtr[2*kk+1];
2150                     writePtr[2*kk] = -tempI;
2151                     writePtr[2*kk+1] = -tempQ;
2152 				}
2153 #endif
2154 #ifdef SMAC_BFINFO
2155                 if(bandwidth==160){ // first and fifth 20 MHz multiply by -1
2156 #if defined(ARM_GCC) || defined(ARM_DS5)
2157                 	writePtr+=2*3*subbandSize;
2158                 	for(kk=0;kk<subbandSize;kk++){
2159 #if defined ARM_DS5
2160                         __asm volatile
2161                         {
2162                             LDR reg6, [writePtr]
2163                             MVN reg7, reg6
2164                             SADD16 reg7, reg7, myConst
2165                             STR reg7, [writePtr], #4
2166                         }
2167 #elif defined ARM_GCC
2168 						asm volatile (
2169 							"LDR r6, [%[writePtr]] \n\t"
2170 							"MVN r7, r6 \n\t"
2171 							"SADD16 r7, r7, %[myConst] \n\t"
2172 							"STR r7, [%[writePtr]], #4 \n\t"
2173 							: [writePtr]"+r"(writePtr)
2174 							: [myConst]"r"(myConst) : "r6", "r7");
2175 #endif
2176                 	}
2177 #else
2178 					if (pktinfo->dcPhase == 0x3) { // case where we multiply by pi/2
2179 						for (kk = 4 * subbandSize;kk < 5 * subbandSize;kk++) {
2180 							tempI = writePtr[2 * kk];
2181 							tempQ = writePtr[2 * kk + 1];
2182 							writePtr[2 * kk] = tempQ;
2183 							writePtr[2 * kk + 1] = -tempI;
2184 						}
2185 						for (kk = 5 * subbandSize;kk < 8 * subbandSize;kk++) {
2186 							tempI = writePtr[2 * kk];
2187 							tempQ = writePtr[2 * kk + 1];
2188 							writePtr[2 * kk] = -tempQ;
2189 							writePtr[2 * kk + 1] = tempI;
2190 						}
2191 					}
2192 					else if (pktinfo->dcPhase == 0x1) { // case where we multiply by -pi/2
2193 						for (kk = 4 * subbandSize;kk < 5 * subbandSize;kk++) {
2194 							tempI = writePtr[2 * kk];
2195 							tempQ = writePtr[2 * kk + 1];
2196 							writePtr[2 * kk] = -tempQ;
2197 							writePtr[2 * kk + 1] = tempI;
2198 						}
2199 						for (kk = 5 * subbandSize;kk < 8 * subbandSize;kk++) {
2200 							tempI = writePtr[2 * kk];
2201 							tempQ = writePtr[2 * kk + 1];
2202 							writePtr[2 * kk] = tempQ;
2203 							writePtr[2 * kk + 1] = -tempI;
2204 						}
2205 					}
2206 					else { // no extra rotation
2207 						for (kk = 4 * subbandSize;kk < 5 * subbandSize;kk++) {
2208 							tempI = writePtr[2 * kk];
2209 							tempQ = writePtr[2 * kk + 1];
2210 							writePtr[2 * kk] = -tempI;
2211 							writePtr[2 * kk + 1] = -tempQ;
2212 						}
2213 					}
2214 #endif
2215 				}
2216 #endif
2217 			}
2218 			else if (pktinfo->packetType == 4) { // only HE160
2219 				if (pktinfo->dcPhase == 0x3) { // case where we multiply by pi/2
2220 					for (kk = 4 * subbandSize;kk < 8 * subbandSize;kk++) {
2221 						tempI = writePtr[2 * kk];
2222 						tempQ = writePtr[2 * kk + 1];
2223 						writePtr[2 * kk] = -tempQ;
2224 						writePtr[2 * kk + 1] = tempI;
2225 					}
2226 				}
2227 				else if (pktinfo->dcPhase == 0x1) { // case where we multiply by -pi/2
2228 					for (kk = 4 * subbandSize;kk < 8 * subbandSize;kk++) {
2229 						tempI = writePtr[2 * kk];
2230 						tempQ = writePtr[2 * kk + 1];
2231 						writePtr[2 * kk] = tempQ;
2232 						writePtr[2 * kk + 1] = -tempI;
2233 					}
2234 				}
2235 			}
2236 		}
2237 	}
2238 
2239 }
2240 
2241 #endif // STA_20_ONLY
2242 
2243 void calcPdpAndMaxRx(unsigned int *currentValPtr, unsigned int *pdpOutBuffer, int ifftSizeOsf, unsigned int* maxVals, int *maxIndeces){
2244 #ifdef ARM_DS5
2245     int reg6;
2246 #else
2247 	short tempI, tempQ;
2248 	unsigned int tempVal;
2249 #endif
2250     int tt, maxIdx = -1;
2251     unsigned int currentSum, currentMax = 0;
2252 
2253     for(tt=0;tt<ifftSizeOsf;tt++){
2254 #ifdef ARM_DS5
2255         __asm volatile
2256         {
2257             LDR reg6, [currentValPtr], #4
2258             MOV currentSum, #0
2259             SMLAD currentSum, reg6, reg6, currentSum
2260         }
2261 #else
2262 		tempVal = *currentValPtr++;
2263 		tempI = tempVal&0xffff;
2264 		tempQ = (tempVal>>16)&0xffff;
2265 		tempVal = tempI*tempI;
2266 		currentSum = tempVal;
2267 		tempVal = tempQ*tempQ;
2268 		currentSum += tempVal;
2269 #endif
2270 		pdpOutBuffer[tt]= currentSum;
2271 		if(currentSum>currentMax){
2272 			currentMax =currentSum;
2273 			maxIdx = tt;
2274 		}
2275 	}
2276 	*maxVals = currentMax;
2277 	*maxIndeces = maxIdx;
2278 }
2279 
2280 void calcPdpAndMaxRxParallel(unsigned int *currentValPtr, unsigned int *pdpOutBuffer, int ifftSizeOsf, unsigned int* maxVals, int *maxIndeces) {
2281 	short tempI, tempQ;
2282 	unsigned int tempVal;
2283 	int tt, pp, maxIdx[NUM_PARALLEL];
2284 	unsigned int currentSum, currentMax[NUM_PARALLEL];
2285 
2286 	for (pp = 0; pp < NUM_PARALLEL; pp++) {
2287 		currentMax[pp] = 0;
2288 		maxIdx[pp] = -1;
2289 	}
2290 
2291 	for (tt = 0;tt<ifftSizeOsf;tt++) {
2292 		for (pp = 0; pp < NUM_PARALLEL; pp++) {
2293 			tempVal = *currentValPtr++;
2294 			tempI = tempVal & 0xffff;
2295 			tempQ = (tempVal >> 16) & 0xffff;
2296 			tempVal = tempI*tempI;
2297 			currentSum = tempVal;
2298 			tempVal = tempQ*tempQ;
2299 			currentSum += tempVal;
2300 			pdpOutBuffer[NUM_PARALLEL*tt+pp] = currentSum;
2301 			if (currentSum > currentMax[pp]) {
2302 				currentMax[pp] = currentSum;
2303 				maxIdx[pp] = tt;
2304 			}
2305 		}
2306 	}
2307 	for (pp = 0; pp < NUM_PARALLEL; pp++) {
2308 		maxVals[pp] = currentMax[pp];
2309 		maxIndeces[pp] = maxIdx[pp];
2310 	}
2311 }
2312 
2313 void calcPdpAndFirstPathMin(hal_pktinfo_t *pktinfo, unsigned int *fftOutBuffer, unsigned int *pdpOutBuffer, unsigned int *totalpower, int *idxRes, unsigned int *valRes, int *firstPathDelay){
2314 
2315 	int ii, jj, pp, firstPathMin, maxIdx;
2316 	unsigned short *tdScaling = (unsigned short*)totalpower;//[MAX_RX*MAX_TX];
2317 	unsigned int *currentValPtr =fftOutBuffer;
2318 	unsigned int tempVal, maxVals[MAX_RX*MAX_TX];
2319 	int maxIndeces[MAX_RX*MAX_TX], firstPathRx[MAX_RX*MAX_TX];
2320 	int nRx = pktinfo->nRx+1;
2321 	int nTx = pktinfo->nTx+1;
2322 	//int NgDsfShift = pktinfo->NgDsfShift;
2323 	int numSubbands = 1<<pktinfo->sigBw;
2324 	int ifftSizeOsf = 1<<(pktinfo->fftSize+6);
2325 	int NgShift, subbandSize;
2326 	if(pktinfo->packetType>2){ // Ng=1
2327 		NgShift = 0;
2328 	}
2329 	else
2330 	{
2331 		NgShift = pktinfo->Ng+1;
2332 	}
2333 	subbandSize = 1<<(6-NgShift);
2334 
2335 	// determine scaling
2336 	for(ii=0;ii<nTx;ii++){
2337 		for(jj=0;jj<nRx;jj++){
2338 			//1.0f/(nRx*nTx*(aoaSysParam->fftSize/Ng)*powerH[jj+ii*nRx]);
2339 			tempVal = totalpower[jj+ii*nRx];
2340 			if(tempVal){
2341 				tempVal *= nTx*nRx*(numSubbands*subbandSize);
2342 				tempVal >>=(31-16-10);
2343 				tdScaling[2*(jj+ii*nRx)] = (unsigned short)((ifftSizeOsf*ifftSizeOsf<<10)/tempVal); //(1<<31)
2344 				tdScaling[2*(jj+ii*nRx)+1] = 0; // use 4 byte
2345 			}
2346 			else{
2347 				tdScaling[jj+ii*nRx] =1;
2348 			}
2349 		}
2350 	}
2351 
2352 	for(ii=0;ii<nTx;ii++){
2353 		for(jj=0;jj<nRx;jj+=NUM_PARALLEL){
2354 			// calculate PDP for this Rx/Tx channel
2355 #if defined(FFT_PARALLEL) && defined(ARM_DS5)
2356 			calcPdpAndMaxRxIntrinsic(currentValPtr, pdpOutBuffer, ifftSizeOsf, maxVals + jj + ii*nRx, maxIndeces + jj + ii*nRx);
2357 #else
2358 			calcPdpAndMaxRxParallel(currentValPtr, pdpOutBuffer, ifftSizeOsf, maxVals + jj + ii*nRx, maxIndeces + jj + ii*nRx);
2359 #endif
2360 			currentValPtr += NUM_PARALLEL*ifftSizeOsf;
2361 
2362 			// find first path
2363 			for (pp = 0; pp < NUM_PARALLEL; pp++) {
2364 				firstPathRx[pp + jj + ii*nRx] = findFirstPath(pktinfo, pdpOutBuffer + pp, maxIndeces[pp + jj + ii*nRx], maxVals[pp + jj + ii*nRx], NUM_PARALLEL);
2365 			}
2366 		}
2367 	}
2368 
2369 	// choose min as first path and average for max peak
2370 	firstPathMin = (ifftSizeOsf<<TOA_FPATH_BIPT);
2371 	maxIdx = 0; tempVal = 0;
2372 	for(ii=0;ii<nTx;ii++){
2373 		for(jj=0;jj<nRx;jj++){
2374 			if(firstPathRx[jj+ii*nRx]<firstPathMin){
2375 				firstPathMin = firstPathRx[jj+ii*nRx];
2376 			}
2377 			if(maxVals[jj+ii*nRx]>tempVal){
2378 				tempVal = maxVals[jj+ii*nRx];
2379 				maxIdx = jj+ii*nRx;
2380 			}
2381 		}
2382 	}
2383 	*idxRes = maxIndeces[maxIdx];
2384 	*valRes = (tempVal*tdScaling[2*maxIdx])*nTx*nRx;
2385 	*firstPathDelay = firstPathMin;
2386 }
2387 
2388 
2389 #if (MAX_RX>1) || (MAX_TX>1)
2390 void calcPdpAndMax(hal_pktinfo_t *pktinfo, unsigned int *fftOutBuffer, unsigned int *pdpOutBuffer, unsigned int *totalpower, int *idxRes, unsigned int *valRes){
2391 
2392 	int ii, jj, tt, maxIdx = -1;
2393 	unsigned short *scalePtr, *tdScaling = (unsigned short*)totalpower;//[MAX_RX*MAX_TX];
2394 	unsigned int *ldPtr, *currentValPtr =fftOutBuffer;
2395 	unsigned int currentSum, tempVal, currentMax = 0;
2396 	int nRx = pktinfo->nRx+1;
2397 	int nTx = pktinfo->nTx+1;
2398 	int numSubbands = 1<<pktinfo->sigBw;
2399 	int ifftSizeOsf = 1<<(pktinfo->fftSize+6);
2400 	int NgShift, subbandSize;
2401 #if defined(ARM_DS5)
2402     int reg6, reg7, reg8;
2403 #elif !defined(ARM_GCC)
2404 	short tempI, tempQ;
2405     unsigned short scaleVal;
2406 #endif
2407 
2408 	if(pktinfo->packetType>2){ // Ng=1
2409 		NgShift = 0;
2410 	}
2411 	else
2412 	{
2413 		NgShift = pktinfo->Ng+1;
2414 	}
2415 	subbandSize = 1<<(6-NgShift);
2416 
2417 	// determine scaling
2418 	for(ii=0;ii<nTx;ii++){
2419 		for(jj=0;jj<nRx;jj++){
2420 			//1.0f/(nRx*nTx*(aoaSysParam->fftSize/Ng)*powerH[jj+ii*nRx]);
2421 			tempVal = totalpower[jj+ii*nRx];
2422 			if(tempVal){
2423 				tempVal *= nTx*nRx*(numSubbands*subbandSize);
2424 				tempVal >>=(31-16-10);
2425 				tdScaling[2*(jj+ii*nRx)] = (unsigned short)((ifftSizeOsf*ifftSizeOsf<<10)/tempVal); //(1<<31)
2426 				tdScaling[2*(jj+ii*nRx)+1] = 0; // use 4 byte
2427 			}
2428 			else{
2429 				tdScaling[jj+ii*nRx] =1;
2430 			}
2431 		}
2432 	}
2433 
2434 	for(tt=0;tt<ifftSizeOsf;tt++){
2435 		currentSum = 0;
2436 		ldPtr = currentValPtr;
2437 		scalePtr = tdScaling;
2438 		for(ii=0;ii<nTx;ii++){
2439 			for(jj=0;jj<nRx;jj++){
2440 #if defined(ARM_DS5)
2441                 __asm volatile
2442                 {
2443                     LDR reg6, [ldPtr], ifftSizeOsf, LSL #2
2444                     MOV reg7, #0
2445                     SMLAD reg7, reg6, reg6, reg7
2446                     LDRH reg8, [scalePtr], #4
2447                     MLA currentSum, reg7, reg8, currentSum
2448                 }
2449 
2450 #elif defined(ARM_GCC)
2451 				asm volatile (
2452 					"LDR r6, [%[ldPtr]], %[ifftSizeOsf] \n\t"
2453 					"MOV r7, #0 \n\t"
2454 					"SMLAD r7, r6, r6, r7 \n\t"
2455 					"LDRH r8, [%[scalePtr]], #4 \n\t"
2456 					"MLA %[currentSum], r7, r8, %[currentSum] \n\t"
2457 					: [ldPtr]"+r"(ldPtr), [scalePtr]"+r"(scalePtr), [currentSum]"+r"(currentSum)
2458 					: [ifftSizeOsf]"r"(4*ifftSizeOsf)
2459 					: "r6", "r7", "r8");
2460 #elif !(defined(ARM_DS5)||defined(ARM_GCC))
2461 				tempVal = *ldPtr;
2462 				ldPtr+=ifftSizeOsf;
2463 				scaleVal = *scalePtr;
2464 				scalePtr +=2;
2465 				tempI = tempVal&0xffff;
2466 				tempQ = (tempVal>>16)&0xffff;
2467 				tempVal = tempI*tempI;
2468 				currentSum += tempVal*scaleVal;
2469 				tempVal = tempQ*tempQ;
2470 				currentSum += tempVal*scaleVal;
2471 #endif
2472 			}
2473 		}
2474 		pdpOutBuffer[tt]= currentSum;
2475 		currentValPtr++;
2476 
2477 		if(currentSum>currentMax){
2478 			currentMax =currentSum;
2479 			maxIdx = tt;
2480 		}
2481 	}
2482 	*idxRes = maxIdx;
2483 	*valRes = currentMax;
2484 }
2485 
2486 void calcPdpAndMaxParallel(hal_pktinfo_t *pktinfo, unsigned int *fftOutBuffer, unsigned int *pdpOutBuffer, unsigned int *totalpower, int *idxRes, unsigned int *valRes) {
2487 
2488 	int ii, jj, pp, tt, maxIdx = -1;
2489 	unsigned short *scalePtr, *tdScaling = (unsigned short*)totalpower;//[MAX_RX*MAX_TX];
2490 	unsigned int *ldPtr, *currentValPtr = fftOutBuffer;
2491 	unsigned int currentSum, tempVal, currentMax = 0;
2492 	int nRx = pktinfo->nRx + 1;
2493 	int nTx = pktinfo->nTx + 1;
2494 	int numSubbands = 1 << pktinfo->sigBw;
2495 	int ifftSizeOsf = 1 << (pktinfo->fftSize + 6);
2496 	int NgShift, subbandSize;
2497 	short tempI, tempQ;
2498 	unsigned short scaleVal;
2499 
2500 	if (pktinfo->packetType>2) { // Ng=1
2501 		NgShift = 0;
2502 	}
2503 	else
2504 	{
2505 		NgShift = pktinfo->Ng + 1;
2506 	}
2507 	subbandSize = 1 << (6 - NgShift);
2508 
2509 	// determine scaling
2510 	for (ii = 0;ii<nTx;ii++) {
2511 		for (jj = 0;jj<nRx;jj++) {
2512 			//1.0f/(nRx*nTx*(aoaSysParam->fftSize/Ng)*powerH[jj+ii*nRx]);
2513 			tempVal = totalpower[jj + ii*nRx];
2514 			if (tempVal) {
2515 				tempVal *= nTx*nRx*(numSubbands*subbandSize);
2516 				tempVal >>= (31 - 16 - 10);
2517 				if(ifftSizeOsf>1024)
2518 					tdScaling[2 * (jj + ii*nRx)] = (unsigned short)((ifftSizeOsf*ifftSizeOsf << 8) / (tempVal>>2)); //(1<<31)
2519 				else
2520 					tdScaling[2 * (jj + ii*nRx)] = (unsigned short)((ifftSizeOsf*ifftSizeOsf << 10) / tempVal ); //(1<<31)
2521 				tdScaling[2 * (jj + ii*nRx) + 1] = 0; // use 4 byte
2522 			}
2523 			else {
2524 				tdScaling[jj + ii*nRx] = 1;
2525 			}
2526 		}
2527 	}
2528 
2529 	for (tt = 0;tt<ifftSizeOsf;tt++) {
2530 		currentSum = 0;
2531 		ldPtr = currentValPtr;
2532 		scalePtr = tdScaling;
2533 		for (ii = 0;ii<nTx;ii++) {
2534 			for (jj = 0;jj<nRx;jj+= NUM_PARALLEL) {
2535 				for (pp = 0; pp < NUM_PARALLEL; pp++) {
2536 					tempVal = *ldPtr;
2537 					scaleVal = *scalePtr;
2538 					tempI = tempVal & 0xffff;
2539 					tempQ = (tempVal >> 16) & 0xffff;
2540 					tempVal = tempI*tempI;
2541 					currentSum += tempVal*scaleVal;
2542 					tempVal = tempQ*tempQ;
2543 					currentSum += tempVal*scaleVal;
2544 
2545 					ldPtr++;
2546 					scalePtr += 2;
2547 				}
2548 				ldPtr += NUM_PARALLEL*ifftSizeOsf-NUM_PARALLEL;
2549 			}
2550 		}
2551 		pdpOutBuffer[tt] = currentSum;
2552 		currentValPtr+= NUM_PARALLEL;
2553 
2554 		if (currentSum>currentMax) {
2555 			currentMax = currentSum;
2556 			maxIdx = tt;
2557 		}
2558 	}
2559 	*idxRes = maxIdx;
2560 	*valRes = currentMax;
2561 }
2562 
2563 #else
2564 
2565 void calcPdpAndMax(hal_pktinfo_t *pktinfo, unsigned int *fftOutBuffer, unsigned int *pdpOutBuffer, unsigned int *totalpower, int *idxRes, unsigned int *valRes){
2566 	int tt, maxIdx = -1;
2567 	unsigned short *tdScaling = (unsigned short*)totalpower;//[MAX_RX*MAX_TX];
2568 	unsigned int *currentValPtr =fftOutBuffer;
2569 	unsigned int currentSum, tempVal, currentMax = 0;
2570 	//int nRx = pktinfo->nRx+1;
2571 	//int nTx = pktinfo->nTx+1;
2572 	int numSubbands = 1<<pktinfo->sigBw;
2573 	int NgShift = pktinfo->Ng+1;
2574 	int subbandSize = 1<<(6-NgShift);
2575 	int ifftSizeOsf = 1<<(pktinfo->fftSize+6);
2576 	short tempI, tempQ;
2577     unsigned short scaleVal;
2578 
2579 	// determine scaling
2580 	//1.0f/(nRx*nTx*(aoaSysParam->fftSize/Ng)*powerH[jj+ii*nRx]);
2581 	tempVal = *totalpower;
2582 	if(tempVal){
2583 		tempVal *= numSubbands*subbandSize;
2584 		tempVal >>=(31-16-10);
2585 		tdScaling[0] = (unsigned short)((ifftSizeOsf*ifftSizeOsf<<10)/tempVal); //(1<<31)
2586 		tdScaling[1] = 0; // use 4 byte
2587 	}
2588 	else{
2589 		*tdScaling =1;
2590 	}
2591 
2592 	scaleVal = *tdScaling;
2593 	for(tt=0;tt<ifftSizeOsf;tt++){
2594 		tempVal = *currentValPtr;
2595 		tempI = tempVal&0xffff;
2596 		tempQ = (tempVal>>16)&0xffff;
2597 		tempVal = tempI*tempI;
2598 		currentSum = tempVal*scaleVal;
2599 		tempVal = tempQ*tempQ;
2600 		currentSum += tempVal*scaleVal;
2601 		pdpOutBuffer[tt]= currentSum;
2602 		currentValPtr++;
2603 
2604 		if(currentSum>currentMax){
2605 			currentMax =currentSum;
2606 			maxIdx = tt;
2607 		}
2608 	}
2609 	*idxRes = maxIdx;
2610 	*valRes = currentMax;
2611 }
2612 #endif
2613 
2614 int findFirstPath(hal_pktinfo_t *pktinfo, unsigned int *pdpOutBuffer, int maxIdx, unsigned int maxVal, int stride){
2615 
2616 	int ii, firstLen, secondLen, mask;
2617 	unsigned int y0, ym1, yp1, denominator, peakThresh, tempVal;
2618 	int numerator, delta, interpDelay, firstPath;
2619 	//unsigned short denomShort;
2620 	unsigned int *readPtr;
2621 	int NgDsfShift = pktinfo->NgDsfShift;
2622 	int ifftSizeOsf = 1<<(pktinfo->fftSize+6);
2623 
2624 	int totalLen, negLen, posLen;
2625 
2626 	totalLen = ifftSizeOsf;
2627 	totalLen /= (pktinfo->packetType>3)? (TD_FRAC_OF_FFT_HE >> NgDsfShift):(TD_FRAC_OF_FFT >> NgDsfShift);
2628 	negLen = ifftSizeOsf / (FFT_ADV_FRAC >> NgDsfShift); //
2629 	posLen = totalLen - negLen;
2630 
2631 	peakThresh = maxVal>>PEAK_THRESH_SHIFT;
2632 	if(peakThresh<PEAK_THRESH_MIN_ABS){
2633 		peakThresh = PEAK_THRESH_MIN_ABS;
2634 	}
2635 
2636 	if(maxIdx>posLen){ // max in negative portion of PDP, no need to process positive part
2637 		firstPath = maxIdx-ifftSizeOsf;
2638 		firstLen = negLen+firstPath+2;
2639 		secondLen = 0;
2640 	}
2641 	else{ // max in positive portion of PDP, start with negative part, positive next
2642 		firstPath = maxIdx;
2643 		firstLen = negLen;
2644 		secondLen = firstPath;
2645 	}
2646 
2647 	// negative part
2648 	readPtr = pdpOutBuffer+ stride*(ifftSizeOsf-negLen);
2649 	ym1 = 0;
2650 	y0 = maxVal;
2651 	yp1 = readPtr[0];
2652 	for(ii=1;ii<firstLen;ii++){ // find first path
2653 		ym1 = y0;
2654 		y0 = yp1;
2655 		yp1 = readPtr[stride*ii];
2656 
2657 		if((y0>ym1) && (y0>yp1)){ // local maximum
2658 			if(y0>peakThresh){
2659 				firstPath =ii-1-negLen;
2660 				secondLen = 0; // no need for positive part
2661 				break;
2662 			}
2663 		}
2664 	}
2665 
2666 	// positive part
2667 	readPtr = pdpOutBuffer;
2668 	for(ii=0;ii<secondLen;ii++){
2669 		ym1 = y0;
2670 		y0 = yp1;
2671 		yp1 = readPtr[stride*ii];
2672 
2673 		if((y0>ym1) && (y0>yp1)){ // local maximum
2674 			if(y0>peakThresh){
2675 				firstPath =ii-1;
2676 				break;
2677 			}
2678 		}
2679 	}
2680 
2681 	// interpolate peak
2682 	mask = ifftSizeOsf-1;
2683 	// shift to avoid overflow
2684 	ym1 = pdpOutBuffer[stride*((firstPath-1)&mask)]>>8;
2685 	y0 = pdpOutBuffer[stride*(firstPath&mask)]>>8;
2686 	yp1 = pdpOutBuffer[stride*((firstPath+1)&mask)]>>8;
2687 
2688 	denominator = 2*(2*y0-yp1-ym1);
2689 	numerator = (yp1-ym1);
2690 
2691 	tempVal = (numerator>0)? numerator: (-numerator);
2692 	ii = __clz(tempVal);
2693 	if(ii>TOA_FPATH_BIPT){
2694 		tempVal<<=TOA_FPATH_BIPT;
2695 		delta = tempVal / denominator;
2696 	}
2697 	else{
2698 		tempVal<<=(ii-1);
2699 		delta = tempVal / (denominator>>(TOA_FPATH_BIPT-ii+1));
2700 	}
2701 	delta = (numerator>0)? delta:-delta;
2702 
2703 	// format is 32.TOA_FPATH_BIPT
2704 	//delta /=denomShort;
2705 	//delta<<=TOA_FPATH_BIPT;
2706 	//delta /= denominator;
2707 	if((delta>(1<<(TOA_FPATH_BIPT-1))) || (-delta >(1<<(TOA_FPATH_BIPT-1)))){
2708 		delta = 0; // overflow in division
2709 	}
2710 	interpDelay = (firstPath<<TOA_FPATH_BIPT) +delta;
2711 
2712 	return interpDelay;
2713 }
2714 
2715 
2716 void dumpRawComplex(hal_pktinfo_t *pktinfo, unsigned int *fftBuffer, int peakIdx, unsigned int *destArray){
2717 
2718 	int ii, jj, offset;
2719 	int nRx = pktinfo->nRx+1;
2720 	int nTx = pktinfo->nTx+1;
2721 	int ifftSizeOsf = 1<<(pktinfo->fftSize+6);
2722 	unsigned int *currentValPtr, tempVal;
2723 
2724 	offset = (peakIdx+(1<<7))>>8; // round back
2725 	if(offset<0){
2726 		offset+=ifftSizeOsf;
2727 	}
2728 	currentValPtr = fftBuffer+offset;
2729 
2730 	for(ii=0;ii<nTx;ii++){
2731 		for(jj=0;jj<nRx;jj++){
2732 			tempVal = currentValPtr[(jj+ii*nRx)*ifftSizeOsf];
2733 			destArray[jj+ii*nRx] = tempVal;
2734 		}
2735 	}
2736 }
2737 
2738 #endif  /* CONFIG_WLS_CSI_PROC */
2739