1 /** @file wls_subspace_processing.c
2   *
3   * @brief This file contains source code to calculate fine timing using sub-space methods (ESPRIT)
4   *
5   * Copyright 2023-2024 NXP
6   *
7   * SPDX-License-Identifier: BSD-3-Clause
8   *
9   */
10 
11 /************************************************************************
12 * DFW Source code to calculate fine timing using sub-space methods (ESPRIT)
13 ************************************************************************/
14 
15 #include <osa.h>
16 #if CONFIG_WLS_CSI_PROC
17 
18 // Standard includes.
19 #include <stdio.h>
20 //#include <stdlib.h>
21 #include <string.h>
22 #include <math.h>
23 // 4 sqrtf, 1 atan2f, 1 atanf, 2 floorf
24 #ifdef ARM_DS5
25 #include <arm_neon.h>
26 #endif
27 
28 // Specific includes.
29 #include "wls_subspace_processing.h"
30 #include "wls_param_defines.h"
31 #include "wls_QR_algorithm.h"
32 #include "wls_radix4Fft.h"
33 
34 #define FFT_INPUT_SIZE_MAX 32
35 #define FFT_INPUT_SIZE_SHORT_WINDOW 16
36 #define FFT_INPUT_SIZE_LONG_WINDOW 32
37 
38 #ifdef STA_20_ONLY
39 #define BUFFER_OFFSET (12*MAX_FFT_SIZE/2)
40 #else
41 #define BUFFER_OFFSET (3*MAX_FFT_SIZE/2)
42 #endif
43 #define SUBSP_DIM_L_MAX MAX_MAT_SIZE
44 //needs to be floor(0.75*SUB_DIM_L)
45 #define SIG_SUBSP_DIM_MAX 7 // 15
46 
47 #define MUSIC_THRESH_REL_5GHz 0.01f
48 #define MUSIC_THRESH_REL_2GHz 0.1f
49 #define MUSIC_THRESH_MIN 0.02f
50 #define MUSIC_THRESH_MAX 0.10f
51 
52 #define SUB_DET_THRESH_5G_80MHZ 0.1f
53 #define SUB_DET_THRESH_5G_40MHZ 0.08f
54 #define SUB_DET_THRESH_5G_20MHZ 0.04f // 0.1f
55 #define SUB_DET_THRESH_20MHZ 0.253f
56 #define SUB_DET_THRESH_Legacy 0.38f
57 #define SUB_DET_THRESH_REL_5G_80MHZ 0.01f
58 #define SUB_DET_THRESH_REL_5G_40MHZ 0.025f
59 #define SUB_DET_THRESH_REL_5G_20MHZ 0.005f
60 #define SUB_DET_THRESH_REL_2G 0.015f
61 #define SUB_DET_THRESH_REL_Legacy 0.25f
62 
63 #define SUB_DIM_L_6 6
64 #define SUB_DIM_L_10 10
65 #define SUB_DIM_L_12 12
66 #define SUB_DIM_L_16 16
67 #define SUB_DIM_L_20 20
68 #define SUB_DIM_L_22 22
69 
70 #define USE_TLS
71 #define FFT_TD_OSF_SHIFT 1
72 #define FFT_FD_EDGE 0
73 
calcCorMatrix(hal_pktinfo_t * pktinfo,unsigned int * fftOutBuffer,unsigned int * totalpower,int firstPathDelay,float * realCorrMatrix,unsigned int * scratchMemory,int subsp_dim_L,int fft_input_size,int FD_downsample,int * numObsPtr)74 void calcCorMatrix(hal_pktinfo_t *pktinfo, unsigned int *fftOutBuffer, unsigned int *totalpower, int firstPathDelay, float *realCorrMatrix,
75 	unsigned int *scratchMemory, int subsp_dim_L, int fft_input_size, int FD_downsample, int* numObsPtr)
76 {
77 
78 	int ii, jj, kk, ll, rowIdx, colIdx, qq;
79 	int loadOffset;
80 	float *fftTempBuffer = (float*)scratchMemory; //[2*(FFT_INPUT_SIZE_MAX<<FFT_TD_OSF_SHIFT)];
81 	float *strPtr, *strPtrTr, *ldPtr, *ldPtrTr;
82 	//float debugBuffer[FFT_INPUT_SIZE_MAX];
83 	float *corrMatrix = (float*)scratchMemory +2* (FFT_INPUT_SIZE_MAX << FFT_TD_OSF_SHIFT); // size: [SUBSP_DIM_L_MAX*2*SUBSP_DIM_L_MAX];
84 	unsigned int *loadPtr;
85 	unsigned int tempVal, loadOffsetU32, loadOffsetMask;
86 	short tempI, tempQ;
87 	float valI, valQ, valTempI, valTempQ, scale;
88 	float corrI, corrQ, sumI, sumQ;
89 	float *tempBuffer = (float*)scratchMemory + 2 * (FFT_INPUT_SIZE_MAX << FFT_TD_OSF_SHIFT) + 2 * SUBSP_DIM_L_MAX *  SUBSP_DIM_L_MAX; // [2*FFT_INPUT_SIZE_MAX];
90 	int storeIdx, loadIdx;
91 
92 	int nRx = pktinfo->nRx+1;
93 	int nTx = pktinfo->nTx+1;
94 	int ifftSizeOsf = 1<<(pktinfo->fftSize+6);
95 
96 	int firstPathIndex = (firstPathDelay+(1<<(TOA_FPATH_BIPT-1)))>>TOA_FPATH_BIPT;
97 	int windowStart = firstPathIndex-(fft_input_size <<(IFFT_OSF_SHIFT-1)); // center window middle on first-path
98 
99 	int stepsize = 1<<(IFFT_OSF_SHIFT - FFT_TD_OSF_SHIFT);
100 	int numObs = fft_input_size - subsp_dim_L - 2*FFT_FD_EDGE;
101 
102 	memset((void*)corrMatrix, 0, 2*SUBSP_DIM_L_MAX*SUBSP_DIM_L_MAX*sizeof(float));
103 	loadOffsetMask = ifftSizeOsf*NUM_PARALLEL - 1;
104 
105 	for(ii=0;ii<nTx;ii++){
106 		for(jj=0;jj<nRx;jj+=NUM_PARALLEL){
107 
108 			for (qq = 0;qq < NUM_PARALLEL; qq++) {
109 				loadPtr = fftOutBuffer + (jj + ii*nRx)*ifftSizeOsf + qq;
110 				loadOffset = windowStart*NUM_PARALLEL;
111 				scale = SQRTF(1.0f*nTx*nRx*totalpower[(jj + qq) + ii*nRx] / (1 << 15)) / (1 << (8 + FFT_TD_OSF_SHIFT)); // 32.31 format
112 				strPtr = fftTempBuffer;
113 				for (kk = 0;kk < (fft_input_size << FFT_TD_OSF_SHIFT) * FD_downsample;kk++) {
114 					loadOffsetU32 = loadOffset &loadOffsetMask;
115 					tempVal = loadPtr[loadOffsetU32];
116 					loadOffset += stepsize*NUM_PARALLEL;
117 					tempI = tempVal & 0xffff;
118 					tempQ = (tempVal >> 16) & 0xffff;
119 
120 					valI = tempI*scale;
121 					*strPtr++ = valI;
122 					valQ = tempQ*scale;
123 					*strPtr++ = valQ;
124 				}
125 				radix2FftFlt(fftTempBuffer, (fft_input_size << FFT_TD_OSF_SHIFT) * FD_downsample, twiddleTableFlt, MAX_FFT_FLT);
126 				if (FD_downsample == 4) {
127 					for (ll = 1;ll < fft_input_size;ll++) {
128 						fftTempBuffer[2 * ll] = fftTempBuffer[2 * FD_downsample * ll];
129 						fftTempBuffer[2 * ll + 1] = fftTempBuffer[2 * FD_downsample * ll + 1];
130 					}
131 				}
132 				// diagonal
133 				// rowIdx = 0
134 				ldPtr = fftTempBuffer + 2*(FFT_FD_EDGE+1); // skip 0
135 				strPtr = corrMatrix;
136 				sumI = 0;
137 				storeIdx = 0;
138 				for (kk = 0;kk < numObs;kk++) {
139 					valI = *ldPtr++; // fftTempBuffer[2 * kk];
140 					valQ = *ldPtr++; // fftTempBuffer[2 * kk + 1];
141 					corrI = valI*valI + valQ*valQ;
142 					sumI += corrI;
143 					tempBuffer[storeIdx++] = corrI;
144 				}
145 				*strPtr += sumI; //corrMatrix[0]
146 				strPtr += 2 * (SUBSP_DIM_L_MAX + 1);
147 				loadIdx = 0;
148 				for (rowIdx = 1;rowIdx < subsp_dim_L;rowIdx++) {
149 					sumI -= tempBuffer[loadIdx++];
150 					valI = *ldPtr++; // fftTempBuffer[2 * (rowIdx-1 + kk)];
151 					valQ = *ldPtr++; // fftTempBuffer[2 * (rowIdx-1 + kk) + 1];
152 					corrI = valI*valI + valQ*valQ;
153 					sumI += corrI;
154 					tempBuffer[storeIdx++] = corrI;
155 					*strPtr += sumI; //corrMatrix[rowIdx * 2 * (SUBSP_DIM_L_MAX+1)] += sumI;
156 					strPtr += 2 * (SUBSP_DIM_L_MAX + 1);
157 				}
158 				// off-diagonals
159 				for (colIdx = 1; colIdx < subsp_dim_L; colIdx++) {
160 					// rowIdx = 0
161 					ldPtr = fftTempBuffer + 2 * (FFT_FD_EDGE + 1);
162 					ldPtrTr = fftTempBuffer + 2 * (FFT_FD_EDGE + 1) + 2 * colIdx;
163 					strPtr = corrMatrix + 2 * colIdx * SUBSP_DIM_L_MAX;
164 					sumI = 0;
165 					sumQ = 0;
166 					storeIdx = 0;
167 					for (kk = 0;kk < numObs;kk++) {
168 
169 						valI = *ldPtrTr++; //fftTempBuffer[2 * (colIdx + kk)];
170 						valQ = *ldPtrTr++; //fftTempBuffer[2 * (colIdx + kk) + 1];
171 
172 						valTempI = *ldPtr++; //fftTempBuffer[2 * kk];
173 						valTempQ = *ldPtr++; //fftTempBuffer[2 * kk + 1];
174 
175 						corrI = valI*valTempI + valQ*valTempQ;
176 						corrQ = valQ*valTempI - valI*valTempQ;
177 
178 						sumI += corrI;
179 						sumQ += corrQ;
180 
181 						tempBuffer[storeIdx++] = corrI;
182 						tempBuffer[storeIdx++] = corrQ;
183 					}
184 					*strPtr++ += sumI; //corrMatrix[2 * colIdx * SUBSP_DIM_L_MAX]
185 					*strPtr++  -= sumQ; //corrMatrix[2 * colIdx * SUBSP_DIM_L_MAX + 1]
186 					strPtr += 2 * SUBSP_DIM_L_MAX;
187 					loadIdx = 0;
188 					for (rowIdx = 1;rowIdx < subsp_dim_L - colIdx;rowIdx++) {
189 						sumI -= tempBuffer[loadIdx++];
190 						sumQ -= tempBuffer[loadIdx++];
191 
192 						valI = *ldPtrTr++; //fftTempBuffer[2 * (rowIdx - 1 + colIdx + kk)];
193 						valQ = *ldPtrTr++; //fftTempBuffer[2 * (rowIdx - 1 + colIdx + kk) + 1];
194 
195 						valTempI = *ldPtr++; //fftTempBuffer[2 * (rowIdx - 1 + kk)];
196 						valTempQ = *ldPtr++; //fftTempBuffer[2 * (rowIdx - 1 + kk) + 1];
197 
198 						corrI = valI*valTempI + valQ*valTempQ;
199 						corrQ = valQ*valTempI - valI*valTempQ;
200 
201 						sumI += corrI;
202 						sumQ += corrQ;
203 
204 						tempBuffer[storeIdx++] = corrI;
205 						tempBuffer[storeIdx++] = corrQ;
206 
207 						*strPtr++ += sumI; //corrMatrix[(rowIdx + colIdx) * 2 * SUBSP_DIM_L_MAX + 2 * rowIdx]
208 						*strPtr++ -= sumQ; //corrMatrix[(rowIdx + colIdx) * 2 * SUBSP_DIM_L_MAX + 2 * rowIdx + 1]
209 						strPtr += 2 * SUBSP_DIM_L_MAX;
210 					}
211 				}
212 			}
213 		}
214 	}
215 	for (rowIdx = 0;rowIdx < subsp_dim_L;rowIdx++) {
216 		ldPtr = corrMatrix + (rowIdx * 2 * SUBSP_DIM_L_MAX);
217 		strPtrTr = corrMatrix +(2 * rowIdx);
218 		for (colIdx = 0;colIdx < rowIdx;colIdx++) {
219 
220 			corrI = *ldPtr++; // use symmetry
221 			corrQ = *ldPtr++;
222 
223 			*strPtrTr++ = corrI;
224 			*strPtrTr = -corrQ; strPtrTr += 2* SUBSP_DIM_L_MAX -1;
225 		}
226 	}
227 
228 	// create real matrix
229 	for(rowIdx=0;rowIdx<subsp_dim_L/2;rowIdx++){
230 		ldPtr = corrMatrix +(rowIdx * 2 * SUBSP_DIM_L_MAX);
231 		ldPtrTr = corrMatrix + (((subsp_dim_L - 1) - rowIdx) * 2 * SUBSP_DIM_L_MAX + 2 * (subsp_dim_L - 1) + 1);
232 		strPtr = realCorrMatrix + (rowIdx*SUBSP_DIM_L_MAX);
233 		strPtrTr = realCorrMatrix + (rowIdx + subsp_dim_L / 2)*SUBSP_DIM_L_MAX;
234 		for(colIdx=0;colIdx<subsp_dim_L/2;colIdx++){
235 			valI = *ldPtr++; // R00
236 			valQ = *ldPtr++;
237 
238 			corrQ = *ldPtrTr--;
239 			corrI = *ldPtrTr--; // JR11J
240 			valTempI = valI + corrI;
241 			valTempQ = valQ - corrQ;
242 
243 		    // Ax = real(myR00+conj(myJR11J));
244 			// Bx = imag(myR00+conj(myJR11J));
245 			strPtr[subsp_dim_L / 2] = valTempQ; // Bx
246 			*strPtr++ = valTempI; // Ax
247 			strPtrTr[subsp_dim_L / 2] = valTempI; // Ax
248 			*strPtrTr++ = -valTempQ; // -Bx
249 		}
250 	}
251 	for(rowIdx=0;rowIdx<subsp_dim_L/2;rowIdx++){
252 		ldPtr = corrMatrix + (((subsp_dim_L - 1) - rowIdx) * 2 * SUBSP_DIM_L_MAX);
253 		ldPtrTr = corrMatrix + ((subsp_dim_L - 1) * 2 * SUBSP_DIM_L_MAX + 2 * rowIdx);
254 		strPtr = realCorrMatrix+ (rowIdx*SUBSP_DIM_L_MAX);
255 		strPtrTr = realCorrMatrix + (rowIdx + subsp_dim_L / 2)*SUBSP_DIM_L_MAX;
256 		for(colIdx=0;colIdx<subsp_dim_L/2;colIdx++){
257 			valI = *ldPtrTr++; // R01J
258 			valQ = *ldPtrTr; ldPtrTr -= 2 * SUBSP_DIM_L_MAX +1;
259 
260 			corrI = *ldPtr++; // R01J.'
261 			corrQ = *ldPtr++;
262 			valTempI = valI + corrI;
263 			valTempQ = valQ + corrQ;
264 			// Cx = real(myR01J+conj(myR01J'));
265 			// Dx = imag(myR01J+conj(myR01J'));
266 			strPtr[subsp_dim_L / 2] += valTempQ; // Dx
267 			*strPtr++ += valTempI; // Cx
268 			strPtrTr[subsp_dim_L/2] -= valTempI; // -Cx
269 			*strPtrTr++ += valTempQ; // Dx
270 		}
271 	}
272 	*numObsPtr = numObs;
273 
274 	// scale input by 1/((fft_input_size-SUBSP_DIM_L+1)*nRx*nTx) - no
275 }
276 
findSigSpace(float * inputBuff,float * eMatrix,float thresh,int matSizeN,hal_pktinfo_t * pktinfo,float sub_det_thresh_rel)277 int findSigSpace(float *inputBuff, float *eMatrix, float thresh, int matSizeN, hal_pktinfo_t *pktinfo, float sub_det_thresh_rel)
278 {
279 	int ii, jj;
280 	float *eigVals = inputBuff;
281 	float *Qmat = inputBuff+SUBSP_DIM_L_MAX;
282 	float *tempMatSum; // tempMatSum[SUBSP_DIM_L*SUBSP_DIM_L];
283 	float *tempMatDiff; // tempMatDiff[SUBSP_DIM_L*SUBSP_DIM_L];
284 	float max;
285 	int maxIdx;
286 	int sigSpaceDim = 0;
287 
288 	// find max for thresh
289 	max = 0;
290 	for (jj = 0; jj<matSizeN; jj++){
291 		if (eigVals[jj]>max){
292 			max = eigVals[jj];
293 		}
294 	}
295 
296 	if (sub_det_thresh_rel*max > thresh)
297 		thresh = sub_det_thresh_rel*max;
298 
299 	for(ii=0;ii<SIG_SUBSP_DIM_MAX;ii++){
300 		// sort
301 		max = 0;
302 		maxIdx = -1;
303 		for(jj=0;jj<matSizeN;jj++){
304 			if(eigVals[jj]>max){
305 				max = eigVals[jj];
306 				maxIdx = jj;
307 			}
308 		}
309 		if(max>thresh){
310 			eigVals[maxIdx] = 0;
311 			// copy eigVec
312 			for(jj=0;jj<matSizeN;jj++){
313 				eMatrix[jj+sigSpaceDim*matSizeN] = Qmat[jj+maxIdx*SUBSP_DIM_L_MAX];
314 			}
315 			sigSpaceDim++;
316 		}
317 	}
318 
319 	//if (sigSpaceDim > 1){
320 	tempMatSum = (float*) inputBuff; // tempMatSum[SUBSP_DIM_L*SUBSP_DIM_L];
321 	//tempMatDiff = (float*) inputBuff+SUBSP_DIM_L*SUBSP_DIM_L; // tempMatDiff[SUBSP_DIM_L*SUBSP_DIM_L]
322 	tempMatDiff = (float*) inputBuff+ SUBSP_DIM_L_MAX*SUBSP_DIM_L_MAX; // tempMatDiff[SUBSP_DIM_L*SUBSP_DIM_L]
323 	for (ii = 0; ii<sigSpaceDim; ii++){
324 		for(jj=1;jj<matSizeN;jj++){
325 			tempMatSum[jj-1+ii*matSizeN] = eMatrix[jj-1+ii*matSizeN]+eMatrix[jj+ii*matSizeN];
326 			tempMatDiff[jj-1+ii*matSizeN] = eMatrix[jj-1+ii*matSizeN]-eMatrix[jj+ii*matSizeN];
327 		}
328 		tempMatSum[jj-1+ii*matSizeN] = SQRTF(2)*eMatrix[matSizeN/2-1+ii*matSizeN];
329 		tempMatDiff[jj-1+ii*matSizeN] = -SQRTF(2)*eMatrix[matSizeN-1+ii*matSizeN];
330 	}
331 
332 	for(ii=0;ii<sigSpaceDim;ii++){
333 		for(jj=0;jj<matSizeN/2-1;jj++){
334 			// E_lower
335 			eMatrix[jj+ii*SUBSP_DIM_L_MAX] = tempMatSum[jj+ii*matSizeN];
336 			// E_upper
337 			eMatrix[jj+(ii+sigSpaceDim)*SUBSP_DIM_L_MAX] = -tempMatDiff[(jj+matSizeN/2)+ii*matSizeN];
338 		}
339 		// E_lower
340 		eMatrix[jj+ii*SUBSP_DIM_L_MAX] = tempMatSum[matSizeN-1+ii*matSizeN];
341 		// E_upper
342 		eMatrix[jj+(ii+sigSpaceDim)*SUBSP_DIM_L_MAX] = tempMatDiff[matSizeN-1+ii*matSizeN];
343 
344 		for(jj++;jj<matSizeN-1;jj++){
345 			// E_lower
346 			eMatrix[jj+ii*SUBSP_DIM_L_MAX] = tempMatSum[jj+ii*matSizeN];
347 			// E_upper
348 			eMatrix[jj+(ii+sigSpaceDim)*SUBSP_DIM_L_MAX] = tempMatDiff[(jj-matSizeN/2)+ii*matSizeN];
349 		}
350 		for (;jj < SUBSP_DIM_L_MAX;jj++) {
351 			eMatrix[jj + ii*SUBSP_DIM_L_MAX] = 0;
352 			eMatrix[jj + (ii + sigSpaceDim)*SUBSP_DIM_L_MAX] = 0;
353 		}
354 	}
355 	return sigSpaceDim;
356 }
357 
calcDimOnePhase(float * eMatrix,float * omega_hat_sort,int matSizeN)358 void calcDimOnePhase(float *eMatrix, float *omega_hat_sort, int matSizeN){
359 
360 	int ii;
361 	float sumI, sumQ, omega_hat_pos;
362 	//float* tempVecReal = eMatrix + SUBSP_DIM_L; // tempVecReal[SUBSP_DIM_L];
363 	//float* tempVecImag = eMatrix + 2*SUBSP_DIM_L; // tempVecImag[SUBSP_DIM_L];
364 	float* tempVecReal = eMatrix + SUBSP_DIM_L_MAX; // tempVecReal[SUBSP_DIM_L];
365 	float* tempVecImag = eMatrix + 2*SUBSP_DIM_L_MAX; // tempVecImag[SUBSP_DIM_L];
366 	float *V1_real, *V2_Real, *V1_imag, *V2_imag;
367 
368 	// copy real eigen vectors
369 	for (ii = 0; ii < matSizeN/2; ii++){
370 		tempVecReal[ii] = eMatrix[ii];
371 		tempVecReal[ii + matSizeN / 2] = eMatrix[matSizeN / 2 - 1 - ii];
372 		tempVecImag[ii] = eMatrix[ii + matSizeN / 2];
373 		tempVecImag[ii + matSizeN / 2] = -eMatrix[matSizeN -1 - ii];
374 	}
375 
376 	V1_real = tempVecReal;
377 	V2_Real = tempVecReal +1;
378 	V1_imag = tempVecImag;
379 	V2_imag = tempVecImag +1;
380 
381 	sumI = 0.0f;
382 	sumQ = 0.0f;
383 	for (ii = 0; ii < matSizeN-1; ii++){
384 		sumI += V1_real[ii] * V2_Real[ii] + V1_imag[ii] * V2_imag[ii];
385 		sumQ += V1_real[ii] * V2_imag[ii] - V1_imag[ii] * V2_Real[ii];
386 	}
387 
388 	omega_hat_pos = -1 * atan2f(sumQ, sumI);
389 	omega_hat_sort[0] = (omega_hat_pos>0) ? omega_hat_pos : (omega_hat_pos + 2 * PI);
390 }
391 
calc_QpTimesE(float * eMatrix,float * qMatrix,float * qPeMatrix,int matSizeN,int matSizeM)392 void calc_QpTimesE(float *eMatrix, float *qMatrix, float *qPeMatrix, int matSizeN, int matSizeM){
393 
394 	int ii, jj, kk;
395 	float sum;
396 
397 	for(ii=0;ii<matSizeM;ii++){
398 		for(jj=0;jj<matSizeN;jj++){
399 			sum = 0;
400 			for(kk=0;kk<matSizeN;kk++){
401 				sum += qMatrix[kk+jj*SUBSP_DIM_L_MAX]*eMatrix[kk+ii*SUBSP_DIM_L_MAX];
402 			}
403 			qPeMatrix[jj+ii*SUBSP_DIM_L_MAX] = sum;
404 		}
405 	}
406 }
407 
constructVmatrix(float * eigValPlusVec,float * vMatrix,int sigSpaceDim,int matSizeN)408 void constructVmatrix(float *eigValPlusVec, float *vMatrix, int sigSpaceDim, int matSizeN){
409 
410 	int ii, jj;
411 	float *eigVals = eigValPlusVec;
412 	float *Qmat = eigValPlusVec+SUBSP_DIM_L_MAX;
413 
414 	float min;
415 	int minIdx;
416 
417 	for(ii=0;ii<sigSpaceDim;ii++){
418 		// sort
419 		min = 1e3f;
420 		minIdx = -1;
421 		for(jj=0;jj<matSizeN;jj++){
422 			if(eigVals[jj]<min){
423 				min = eigVals[jj];
424 				minIdx = jj;
425 			}
426 		}
427 		eigVals[minIdx] = 1e3f;
428 		// copy eigVec
429 		for(jj=0;jj<sigSpaceDim;jj++){
430 			// V upper
431 			vMatrix[ii+jj*SUBSP_DIM_L_MAX] = -Qmat[jj+minIdx*SUBSP_DIM_L_MAX];
432 		}
433 		for(;jj<matSizeN;jj++){
434 			// V lower
435 			vMatrix[ii+jj*SUBSP_DIM_L_MAX] = Qmat[jj+minIdx*SUBSP_DIM_L_MAX];
436 		}
437 	}
438 }
439 
calcEtimesE(float * eMatrix,float * ePeMatrix,int matSizeN,int matSizeM)440 void calcEtimesE(float *eMatrix, float *ePeMatrix, int matSizeN, int matSizeM){
441 	int ii, jj, kk;
442 #ifdef ARM_DS5
443 	float32x2_t resVec0, resVec1, resVec2, resVec3, strVec0, strVec1;
444 	float32x2_t rowVec0, rowVec1, colVec0, colVec1;
445 	float32x2x2_t tempVec0; //, tempVec1;
446 
447 	for(ii=0;ii<matSizeM;ii+=2){
448 		for(jj=0;jj<=ii;jj+=2){
449 			resVec0 =  vdup_n_f32(0.0f);
450 			resVec1 =  vdup_n_f32(0.0f);
451 			resVec2 =  vdup_n_f32(0.0f);
452 			resVec3 =  vdup_n_f32(0.0f);
453 			for(kk=0;kk<matSizeN;kk+=2){
454 				rowVec0  = vld1_f32((float32_t const *) (eMatrix+(kk+jj*SUBSP_DIM_L_MAX)));
455 				rowVec1  = vld1_f32((float32_t const *) (eMatrix+(kk+(jj+1)*SUBSP_DIM_L_MAX)));
456 
457 				colVec0  = vld1_f32((float32_t const *) (eMatrix+(kk+ii*SUBSP_DIM_L_MAX)));
458 				colVec1  = vld1_f32((float32_t const *) (eMatrix+(kk+(ii+1)*SUBSP_DIM_L_MAX)));
459 
460 				resVec0 = vmla_f32(resVec0, rowVec0, colVec0);
461 				resVec1 = vmla_f32(resVec1, rowVec1, colVec0);
462 				resVec2 = vmla_f32(resVec2, rowVec0, colVec1);
463 				resVec3 = vmla_f32(resVec3, rowVec1, colVec1);
464 			}
465 			strVec0 = vpadd_f32(resVec0, resVec1);
466 			strVec1 = vpadd_f32(resVec2, resVec3);
467 
468 			vst1_f32((float32_t *) (ePeMatrix + jj+ii*SUBSP_DIM_L_MAX), strVec0); // VST1.32 {d0}, [r0]
469 			vst1_f32((float32_t *) (ePeMatrix + jj + (ii + 1)*SUBSP_DIM_L_MAX), strVec1);
470 			// use symmetry
471 			tempVec0 = vzip_f32(strVec0, strVec1); // VZIP.32 d0,d0
472 			vst1_f32((float32_t *) (ePeMatrix + ii +jj*SUBSP_DIM_L_MAX), tempVec0.val[0]); // VST1.32 {d0}, [r0]
473 			vst1_f32((float32_t *) (ePeMatrix + ii + (jj + 1)*SUBSP_DIM_L_MAX), tempVec0.val[1]);
474 		}
475 	}
476 #else
477 	float sum, sum1, sum2,sum3;
478 
479 	for(ii=0;ii<matSizeM;ii+=2){
480 		for(jj=0;jj<matSizeM;jj+=2){
481 			sum = 0;
482 			sum1 = 0;
483 			sum2 = 0;
484 			sum3 = 0;
485 			for(kk=0;kk<matSizeN;kk+=2){
486 				sum += eMatrix[kk+jj*SUBSP_DIM_L_MAX]*eMatrix[kk+ii*SUBSP_DIM_L_MAX];
487 				sum1 += eMatrix[kk + (jj+1)*SUBSP_DIM_L_MAX] * eMatrix[kk + ii*SUBSP_DIM_L_MAX];
488 				sum2 += eMatrix[kk + jj*SUBSP_DIM_L_MAX] * eMatrix[kk + (ii + 1)*SUBSP_DIM_L_MAX];
489 				sum3 += eMatrix[kk + (jj + 1)*SUBSP_DIM_L_MAX] * eMatrix[kk + (ii + 1)*SUBSP_DIM_L_MAX];
490 				sum += eMatrix[(kk+1) + jj*SUBSP_DIM_L_MAX] * eMatrix[(kk + 1) + ii*SUBSP_DIM_L_MAX];
491 				sum1 += eMatrix[(kk + 1) + (jj + 1)*SUBSP_DIM_L_MAX] * eMatrix[(kk + 1) + ii*SUBSP_DIM_L_MAX];
492 				sum2 += eMatrix[(kk + 1) + jj*SUBSP_DIM_L_MAX] * eMatrix[(kk + 1) + (ii + 1)*SUBSP_DIM_L_MAX];
493 				sum3 += eMatrix[(kk + 1) + (jj + 1)*SUBSP_DIM_L_MAX] * eMatrix[(kk + 1) + (ii + 1)*SUBSP_DIM_L_MAX];
494 			}
495 			//if (kk < matSizeN) { // odd matSizeN case
496 			//	sum += eMatrix[kk + jj*SUBSP_DIM_L_MAX] * eMatrix[kk + ii*SUBSP_DIM_L_MAX];
497 			//	sum1 += eMatrix[kk + (jj + 1)*SUBSP_DIM_L_MAX] * eMatrix[kk + ii*SUBSP_DIM_L_MAX];
498 			//	sum2 += eMatrix[kk + jj*SUBSP_DIM_L_MAX] * eMatrix[kk + (ii + 1)*SUBSP_DIM_L_MAX];
499 			//	sum3 += eMatrix[kk + (jj + 1)*SUBSP_DIM_L_MAX] * eMatrix[kk + (ii + 1)*SUBSP_DIM_L_MAX];
500 			//}
501 			ePeMatrix[jj+ii*SUBSP_DIM_L_MAX] = sum;
502 			ePeMatrix[jj+1 + ii*SUBSP_DIM_L_MAX] = sum1;
503 			ePeMatrix[jj + (ii + 1)*SUBSP_DIM_L_MAX] = sum2;
504 			ePeMatrix[(jj + 1) + (ii + 1)*SUBSP_DIM_L_MAX] = sum3;
505 		}
506 	}
507 #endif
508 }
509 
calcPhase(float * psiHatMatrix,int sigSpaceDim,float * omega_hat_sort)510 int calcPhase(float *psiHatMatrix, int sigSpaceDim, float *omega_hat_sort){
511 
512 	int ii, jj;
513 	//float omega_hat[SUBSP_DIM_L];
514 	float omega_hat[SUBSP_DIM_L_MAX];
515 	float tempStr;
516 
517 	for(ii=0;ii<sigSpaceDim;ii++){
518 		omega_hat[ii] = -2*atanf(psiHatMatrix[ii]);
519 	}
520 	// sort
521 	for(ii=0;ii<sigSpaceDim;ii++){
522 		omega_hat_sort[ii] = (omega_hat[ii]>0)?omega_hat[ii]:(omega_hat[ii]+2*PI);
523 		for(jj=ii;jj>0;jj--){
524 			if(omega_hat_sort[jj]<omega_hat_sort[jj-1]){
525 				tempStr = omega_hat_sort[jj-1];
526 				omega_hat_sort[jj-1] = omega_hat_sort[jj];
527 				omega_hat_sort[jj] = tempStr;
528 			}
529 		}
530 	}
531 	// remove double eigen values (from complex conjugate pairs)
532 	jj = 0;
533 	for(ii=1;ii<sigSpaceDim;ii++){
534 		if(omega_hat_sort[ii]-omega_hat_sort[jj]>1e-2f){
535 			jj++;
536 			tempStr = omega_hat_sort[ii];
537 			omega_hat_sort[jj] = tempStr;
538 		}
539 	}
540 	sigSpaceDim -= (ii-1-jj);
541 
542 	return sigSpaceDim;
543 }
544 
calcDelayAmplitude(hal_pktinfo_t * pktinfo,unsigned int * fftOutBuffer,unsigned int * totalpower,int firstPathDelay,int sigSpaceDim,float * omega_hat_sort,float * tau_hat,float * aMatrix,int fft_input_size)545 void calcDelayAmplitude(hal_pktinfo_t *pktinfo,
546 						unsigned int *fftOutBuffer,
547 						unsigned int *totalpower,
548 						int firstPathDelay,
549 						int sigSpaceDim,
550 						float *omega_hat_sort,
551 						float *tau_hat,
552 						float *aMatrix,
553 						int fft_input_size)
554 {
555 	int ii, jj, kk, qq;
556 	int index;
557 	unsigned int *loadPtr;
558 	unsigned int tempLoad;
559 	short tempI, tempQ;
560 	float tempVal, scale, delta;
561 	float leftI, leftQ, rightI, rightQ;
562 	float leftSqr, rightSqr, valSqr;
563 	//float fracIndex[SUBSP_DIM_L];
564 	float fracIndex[SUBSP_DIM_L_MAX];
565 
566 	int nRx = pktinfo->nRx+1;
567 	int nTx = pktinfo->nTx+1;
568 	int ifftSizeOsf = 1<<(pktinfo->fftSize+6);
569 	int signalBandwidth = 20<<(pktinfo->sigBw);
570 	int stepsize = 1<<IFFT_OSF_SHIFT;
571 
572 	int firstPathIndex = (firstPathDelay+(1<<(TOA_FPATH_BIPT-1)))>>TOA_FPATH_BIPT;
573 	int windowStart = firstPathIndex-(fft_input_size <<(IFFT_OSF_SHIFT-1)); // center window middle on first-path
574 
575 	unsigned int bitMask = NUM_PARALLEL *ifftSizeOsf - 1;
576 
577 	// map phase to delay
578 	for(kk=0;kk<sigSpaceDim;kk++){
579 		tempVal = omega_hat_sort[kk]/(2*PI);
580 		fracIndex[kk] = tempVal*(fft_input_size*stepsize);
581 		tempVal *= fft_input_size /(1.0f*signalBandwidth);
582 		//windowStart = (windowStart+2)&0xfffffffc;
583 		tempVal += windowStart /(1.0f*stepsize*signalBandwidth); // windowstat in oversampled samples
584 		tau_hat[kk] = tempVal;
585 	}
586 
587 	// project delays onto time domain data to estimate path amplitude
588 	for(ii=0;ii<nTx;ii++){
589 		for(jj=0;jj<nRx;jj+= NUM_PARALLEL){
590 			for (qq = 0;qq < NUM_PARALLEL;qq++) {
591 				loadPtr = fftOutBuffer + (jj + ii*nRx)*ifftSizeOsf + qq;
592 				scale = SQRTF(1.0f*nTx*nRx*totalpower[(jj + qq) + ii*nRx] / (1 << 15)) / (1 << 8); // 32.31 format
593 				//scale = SQRTF(1.0f/fixedpower[jj+ii*nRx]/(1<<15))/(1<<8); // 32.31 format
594 				//scale = SQRTF(1.0f/fixedpower[jj+ii*nRx]/(1<<0))/(1<<3); // 32.31 format
595 				//scale = 1;
596 				for (kk = 0;kk < sigSpaceDim;kk++) {
597 					index = (int)floorf(fracIndex[kk]);
598 					delta = fracIndex[kk] - index;
599 					index += windowStart;
600 
601 					// treatment for negative windowStart??
602 					// if(index<0) ...
603 
604 					tempLoad = loadPtr[(NUM_PARALLEL*index) &bitMask];
605 					tempI = tempLoad & 0xffff;
606 					tempQ = (tempLoad >> 16) & 0xffff;
607 					leftI = tempI*scale;
608 					leftQ = tempQ*scale;
609 					leftSqr = leftI*leftI + leftQ*leftQ;
610 
611 					tempLoad = loadPtr[(NUM_PARALLEL*(index + 1)) &bitMask];
612 					tempI = tempLoad & 0xffff;
613 					tempQ = (tempLoad >> 16) & 0xffff;
614 					rightI = tempI*scale;
615 					rightQ = tempQ*scale;
616 					rightSqr = rightI*rightI + rightQ*rightQ;
617 
618 					valSqr = (rightSqr - leftSqr)*delta + leftSqr;
619 
620 					aMatrix[kk + (jj + qq)*sigSpaceDim + ii*nRx*sigSpaceDim] = valSqr;
621 				}
622 			}
623 		}
624 	}
625 }
626 
determineFirstPath(hal_pktinfo_t * pktinfo,float * tau_hat,float * aMatrix,int sigSpaceDim,float music_thresh_rel)627 int determineFirstPath(hal_pktinfo_t *pktinfo,
628 					   float *tau_hat,
629 					   float *aMatrix,
630 					   int sigSpaceDim,
631 					   float music_thresh_rel)
632 {
633 	int ii, jj, kk;
634 	int nRx = pktinfo->nRx+1;
635 	int nTx = pktinfo->nTx+1;
636 	int firstPathIdx;
637 	int maxIdx = -1;
638 	int firstPathDelay = 0;
639 
640 	float detThresh;
641 	float maxVal = 0;
642 
643 	//float sumPower[SUBSP_DIM_L];
644 	float sumPower[SUBSP_DIM_L_MAX];
645 
646 	int signalBandwidth = 20<<(pktinfo->sigBw);
647 	int stepsize = 1<<IFFT_OSF_SHIFT;
648 
649 	for(kk=0;kk<sigSpaceDim;kk++){
650 		sumPower[kk]=0;
651 	}
652 
653 	for(ii=0;ii<nTx;ii++){
654 		for(jj=0;jj<nRx;jj++){
655 			for(kk=0;kk<sigSpaceDim;kk++){
656 				sumPower[kk]+= aMatrix[kk +jj*sigSpaceDim +ii*nRx*sigSpaceDim];
657 			}
658 		}
659 	}
660 
661 	// find max
662 	for(kk=0;kk<sigSpaceDim;kk++){
663 		sumPower[kk] /= (nTx*nRx);
664 		if(sumPower[kk]>maxVal){
665 			maxVal = sumPower[kk];
666 			maxIdx = kk;
667 		}
668 	}
669 
670 	detThresh = maxVal*music_thresh_rel;
671 	if(detThresh>MUSIC_THRESH_MAX){
672 		detThresh=MUSIC_THRESH_MAX;
673 	}
674 	else if(detThresh<MUSIC_THRESH_MIN){
675 		detThresh=MUSIC_THRESH_MIN;
676 	}
677 	//detThresh = maxVal/4;
678 	firstPathIdx = maxIdx;
679 	for(kk=maxIdx-1;kk>=0;kk--){
680 		if(sumPower[kk]>detThresh){
681 			firstPathIdx = kk;
682 		}
683 	}
684 	firstPathDelay = (int) floorf((tau_hat[firstPathIdx]*signalBandwidth*stepsize*(1<<TOA_FPATH_BIPT))+0.5f);
685 
686 	return firstPathDelay;
687 }
688 
calcSubspaceFineTiming(hal_pktinfo_t * pktinfo,unsigned int * fftOutBuffer,unsigned int * totalpower,int firstPathDelay,int * fineTimingRes,unsigned int * procBuffer,hal_wls_packet_params_t * packetparams)689 int calcSubspaceFineTiming(hal_pktinfo_t *pktinfo,		// structure with CSI buffer parameters
690 	unsigned int *fftOutBuffer,	// buffer holding time-domain CSI
691 	unsigned int *totalpower,	// array holding power per rx/tx channel
692 	int firstPathDelay,			// existing first path estimate
693 	int *fineTimingRes,			// result of algorithm
694 	unsigned int *procBuffer, 	// buffer for processing, size 2*BUFFER_OFFSET DW
695 	hal_wls_packet_params_t *packetparams // passing packetinfo
696 	)
697 {
698 	int nRx = pktinfo->nRx + 1;
699 	int nTx = pktinfo->nTx + 1;
700 	int subsp_dim_L = SUB_DIM_L_20;
701 	float sub_det_thresh, sub_det_thresh_rel, music_thresh_rel;
702 	int fft_input_size = FFT_INPUT_SIZE_LONG_WINDOW;
703 	float *realCorrMatrix = (float*)procBuffer; // realCorrMatrix[SUBSP_DIM_L*SUBSP_DIM_L]
704 	float *eigValPlusVec = (float*)procBuffer + BUFFER_OFFSET; // eigValPlusVec[MAX_MAT_SIZE+MAX_MAT_SIZE*MAX_MAT_SIZE]
705 	float *eMatrix = (float*)procBuffer; // eMatrix[2*SUBSP_DIM_L*SUBSP_DIM_L]
706 #ifdef USE_TLS
707 	float *ePeMatrix = (float*)procBuffer + BUFFER_OFFSET; //ePeMatrix[4*(SIG_SUBSP_DIM_MAX-2)*(SIG_SUBSP_DIM_MAX-2)]
708 	float *eigValPlusVecEE = (float*)procBuffer; // eigValPlusVec[MAX_MAT_SIZE+MAX_MAT_SIZE*MAX_MAT_SIZE]
709 	float *vMatrix = (float*)procBuffer + BUFFER_OFFSET; // vMatrix[4*(SUBSP_DIM_L-2)*(SUBSP_DIM_L-2)]
710 	float *qMatrix = (float*)procBuffer; // qMatrix[MAX_MAT_SIZE*MAX_MAT_SIZE]
711 	float *qPvMatrix = (float*)procBuffer + BUFFER_OFFSET / 2; // qPvMatrix[SUBSP_DIM_L*SUBSP_DIM_L]
712 	float *psiHatMatrix = (float*)procBuffer; // psiHatMatrix[SUBSP_DIM_L*SUBSP_DIM_L]
713 #else
714 	float *qMatrix = (float*) procBuffer+BUFFER_OFFSET; // qMatrix[MAX_MAT_SIZE*MAX_MAT_SIZE]
715 	float *qPeMatrix = (float*) procBuffer+BUFFER_OFFSET*3/2; // qPeMatrix[SUBSP_DIM_L*SUBSP_DIM_L]
716 	float *psiHatMatrix = (float*) procBuffer+BUFFER_OFFSET; // psiHatMatrix[SUBSP_DIM_L*SUBSP_DIM_L]
717 #endif
718 	float *eigPsiHat = (float*)procBuffer + BUFFER_OFFSET / 2; // eigPsiHat[SUBSP_DIM_L]
719 	float *omega_hat_sort = (float*)procBuffer; // omega_hat_sort[SUBSP_DIM_L]
720 	float *tau_hat = (float*)procBuffer + BUFFER_OFFSET / 4; // tau_hat[SUBSP_DIM_L]
721 	float *aMatrix = (float*)procBuffer + BUFFER_OFFSET / 2; // aMatrix[MAX_TX*MAX_RX*2*SUBSP_DIM_L]
722 
723 	int numObs, sigSpaceDim, FD_downsample=1;
724 
725 	if ((pktinfo->packetType == 0) && (pktinfo->sigBw == 0) && (pktinfo->rxDevBw == 2)) {         //process legacy pkt
726 		subsp_dim_L = SUB_DIM_L_6;
727 		sub_det_thresh = SUB_DET_THRESH_Legacy;
728 		sub_det_thresh_rel = SUB_DET_THRESH_REL_Legacy;
729 	}
730 	else if (packetparams->chNum >= 36) {  //5G band
731 		if (pktinfo->sigBw == 2) {
732 			sub_det_thresh = SUB_DET_THRESH_5G_80MHZ;
733 			sub_det_thresh_rel = SUB_DET_THRESH_REL_5G_80MHZ;
734 		}
735 		else if (pktinfo->sigBw == 1) {
736 			sub_det_thresh = SUB_DET_THRESH_5G_40MHZ;
737 			sub_det_thresh_rel = SUB_DET_THRESH_REL_5G_40MHZ;
738 		}
739 		else if (pktinfo->sigBw == 0) {
740 #if 0 //larger auto-correlation matrix size
741 			sub_det_thresh = SUB_DET_THRESH_5G_20MHZ;
742 			sub_det_thresh_rel = SUB_DET_THRESH_REL_5G_20MHZ;
743 #else
744 			fft_input_size = FFT_INPUT_SIZE_SHORT_WINDOW;
745 			subsp_dim_L = SUB_DIM_L_10;
746 			sub_det_thresh = SUB_DET_THRESH_5G_20MHZ;
747 			sub_det_thresh_rel = SUB_DET_THRESH_REL_5G_20MHZ;
748 #endif
749 		}
750 		else if (pktinfo->packetType < 2) {
751 			subsp_dim_L = SUB_DIM_L_16;
752 			fft_input_size = FFT_INPUT_SIZE_LONG_WINDOW;
753 			sub_det_thresh = SUB_DET_THRESH_20MHZ;
754 			sub_det_thresh_rel = SUB_DET_THRESH_REL_5G_20MHZ;
755 		}
756 		else {
757 			FD_downsample = 4;
758 			subsp_dim_L = SUB_DIM_L_10;
759 			sub_det_thresh = SUB_DET_THRESH_Legacy;
760 			sub_det_thresh_rel = 0.035f;
761 		}
762 	}
763 	else {								//2.4G band
764 		if (pktinfo->sigBw == 1) {
765 			subsp_dim_L = SUB_DIM_L_16;
766 		}
767 		else {
768 			subsp_dim_L = SUB_DIM_L_12;
769 		}
770 		sub_det_thresh = SUB_DET_THRESH_20MHZ;
771 		sub_det_thresh_rel = SUB_DET_THRESH_REL_2G;
772 		fft_input_size = FFT_INPUT_SIZE_LONG_WINDOW;
773 	}
774 
775 	// determine firstpath power threshold according to frequency, and pass to "determineFirstPath"
776 	music_thresh_rel = (packetparams->chNum < 36) ? MUSIC_THRESH_REL_2GHz : MUSIC_THRESH_REL_5GHz;
777 
778 	// in:	fftOutBuffer[], totalpower[]
779 	// out:	realCorrMatrix[SUBSP_DIM_L*SUBSP_DIM_L] {100 DW}
780 	calcCorMatrix(pktinfo, fftOutBuffer, totalpower, firstPathDelay, realCorrMatrix, procBuffer + BUFFER_OFFSET, subsp_dim_L, fft_input_size, FD_downsample, &numObs);
781 	//numObs = (fft_input_size - subsp_dim_L + 1) * 2 * nRx*nTx;
782 	sub_det_thresh *= (numObs * 2 * nRx*nTx); // account for CorMatrix scaling
783 
784 	// eigen(Shur) decomposition
785 	// in:	realCorrMatrix[]
786 	// out:	eigValPlusVec[]; [D:MAX_MAT_SIZE (eigen values)| Q:SUBSP_DIM_L*SUBSP_DIM_L (eigen vectors)] {10+100 DW +30DW scratch}
787 	QR_algorithm(realCorrMatrix, eigValPlusVec, subsp_dim_L, 0);
788 
789 	// find signal space Eig Vals and construct E matrix for ESPRIT
790 	// in:	eigValPlusVec[] - used also as scratch memory
791 	// out:	eMatrix[]; [E_lower: (SUBSP_DIM_L-1)*SIG_SUBSP_DIM_MAX| E_upper: (SUBSP_DIM_L-1)*SIG_SUBSP_DIM_MAX] {200 DW}
792 	sigSpaceDim = findSigSpace(eigValPlusVec, eMatrix, sub_det_thresh, subsp_dim_L, pktinfo, sub_det_thresh_rel);
793 
794 	if (sigSpaceDim < 2){
795 		if (sigSpaceDim == 0){ // error, no signal over threshold
796 			return -1;
797 		}
798 		//else{ // (sigSpaceDim == 1)
799 		//	// for single eigen value, use simpler code
800 		//	// in: eMatrix[]
801 		//	// out: omega_hat_sort[]
802 		//	calcDimOnePhase(eMatrix, omega_hat_sort, SUBSP_DIM_L);
803 
804 		//	// determine delays and amplitudes
805 		//	// in : fftOutBuffer[], totalpower[], omega_hat_sort[]
806 		//	// out: tau_hat[], aMatrix[]  {10+80 DW}
807 		//	calcDelayAmplitude(pktinfo, fftOutBuffer, totalpower, firstPathDelay, sigSpaceDim, omega_hat_sort, tau_hat, aMatrix);
808 
809 		//	// determine first path
810 		//	// in: tau_hat[], aMatrix[]
811 		//	// out: fineTimingRes
812 		//	*fineTimingRes = determineFirstPath(pktinfo, tau_hat, aMatrix, sigSpaceDim);
813 
814 		//	return 0;
815 		//}
816 	}
817 #ifdef USE_TLS // QR of E
818 	// calc E'*E
819 	// in: eMatrix[]
820 	// out: ePeMatrix[] {256 DW}
821 	calcEtimesE(eMatrix, ePeMatrix,(subsp_dim_L -1), 2*sigSpaceDim);
822 	// eigen(Shur) decomposition
823 	// in:	ePeMatrix[]
824 	// out:	eigValPlusVec[]; [D:2*SIG_SUBSP_DIM_MAX (eigen values)| Q:4*SIG_SUBSP_DIM_MAX^2 (eigen vectors)] {12+144 DW +30DW scratch}
825 	QR_algorithm(ePeMatrix, eigValPlusVecEE, 2*sigSpaceDim, 1);
826 
827 	// in: eigValPlusVec[]
828 	// out: vMatrix[]; [V_upper: SIG_SUBSP_DIM_MAX*SIG_SUBSP_DIM_MAX| V_lower: SIG_SUBSP_DIM_MAX*SIG_SUBSP_DIM_MAX] {256 DW}
829 	constructVmatrix(eigValPlusVecEE, vMatrix, sigSpaceDim, 2*sigSpaceDim);
830 
831 	//  Psi_hat = V_lower\V_upper;
832 	// V_lower = Q*R
833 	// in: vMatrix[] V_lower (updated)
834 	// out: qMatrix[SIG_SUBSP_DIM_MAX*SIG_SUBSP_DIM_MAX] {100 DW }
835 	QR_decomposition(vMatrix+sigSpaceDim*SUBSP_DIM_L_MAX, qMatrix, sigSpaceDim, sigSpaceDim);
836 	// calc Q'*V_upper
837 	// in: vMatrix[] V_upper, qMatrix[]
838 	// out: qPvMatrix[] {100 DW +30 DW scratch}
839 	calc_QpTimesE(vMatrix, qMatrix, qPvMatrix, sigSpaceDim, sigSpaceDim);
840 	// Psi_hat = (R^-1)(Q'*E_upper) solve with back substitution
841 	// in: qPvMatrix[], vMatrix[] V_lower (updated)
842 	// out: Psi_hat[]: (sigSpaceDim*sigSpaceDim) {100 DW}
843 	myBackSub(qPvMatrix, vMatrix+sigSpaceDim*SUBSP_DIM_L_MAX, psiHatMatrix, sigSpaceDim, sigSpaceDim);
844 
845 #else
846 	// LS Psi_hat = E_lower\E_upper;
847 	// E_lower = Q*R
848 	// in: eMatrix[] E_lower (updated)
849 	// out: qMatrix[] {100 DW}
850 	QR_decomposition(eMatrix, qMatrix, (subsp_dim_L -1), sigSpaceDim);
851 	// calc Q'*E_upper
852 	// in: eMatrix[] E_upper, qMatrix[]
853 	// out: qPeMatrix[] {100 DW}
854 	calc_QpTimesE(eMatrix+ SUBSP_DIM_L_MAX*sigSpaceDim, qMatrix, qPeMatrix, (subsp_dim_L -1), sigSpaceDim);
855 	// Psi_hat = (R^-1)(Q'*E_upper) solve with back substitution
856 	// in: qPeMatrix[], eMatrix[] E_lower (updated)
857 	// out: Psi_hat[]: (sigSpaceDim*sigSpaceDim) {100 DW}
858 	myBackSub(qPeMatrix, eMatrix, psiHatMatrix, (subsp_dim_L -1), sigSpaceDim);
859 
860 #endif
861 	// omega_hat = atan(real(eig(Psi_hat)))
862 	// in: psiHatMatrix[]
863 	// out: eigPsiHat[]  {10 DW  +30 DW scratch}
864 	unsym_QR_algorithm(psiHatMatrix, eigPsiHat, sigSpaceDim);
865 
866 	// determine phase
867 	// in: eigPsiHat[], sigSpaceDim
868 	// out: omega_hat_sort[], sigSpaceDim  {10 DW}
869 	sigSpaceDim = calcPhase(eigPsiHat, sigSpaceDim, omega_hat_sort);
870 
871 	// determine delays and amplitudes
872 	// in : fftOutBuffer[], totalpower[], omega_hat_sort[]
873 	// out: tau_hat[], aMatrix[]  {10+80 DW}
874 	calcDelayAmplitude(pktinfo, fftOutBuffer, totalpower, firstPathDelay, sigSpaceDim, omega_hat_sort, tau_hat, aMatrix, fft_input_size);
875 
876 	// determine first path
877 	// in: tau_hat[], aMatrix[]
878 	// out: fineTimingRes
879 	*fineTimingRes = determineFirstPath(pktinfo, tau_hat, aMatrix, sigSpaceDim, music_thresh_rel);
880 
881 	return 0;
882 }
883 #endif /* CONFIG_WLS_CSI_PROC */
884