1 /** @file wls_subspace_processing.c
2 *
3 * @brief This file contains source code to calculate fine timing using sub-space methods (ESPRIT)
4 *
5 * Copyright 2023-2024 NXP
6 *
7 * SPDX-License-Identifier: BSD-3-Clause
8 *
9 */
10
11 /************************************************************************
12 * DFW Source code to calculate fine timing using sub-space methods (ESPRIT)
13 ************************************************************************/
14
15 #include <osa.h>
16 #if CONFIG_WLS_CSI_PROC
17
18 // Standard includes.
19 #include <stdio.h>
20 //#include <stdlib.h>
21 #include <string.h>
22 #include <math.h>
23 // 4 sqrtf, 1 atan2f, 1 atanf, 2 floorf
24 #ifdef ARM_DS5
25 #include <arm_neon.h>
26 #endif
27
28 // Specific includes.
29 #include "wls_subspace_processing.h"
30 #include "wls_param_defines.h"
31 #include "wls_QR_algorithm.h"
32 #include "wls_radix4Fft.h"
33
34 #define FFT_INPUT_SIZE_MAX 32
35 #define FFT_INPUT_SIZE_SHORT_WINDOW 16
36 #define FFT_INPUT_SIZE_LONG_WINDOW 32
37
38 #ifdef STA_20_ONLY
39 #define BUFFER_OFFSET (12*MAX_FFT_SIZE/2)
40 #else
41 #define BUFFER_OFFSET (3*MAX_FFT_SIZE/2)
42 #endif
43 #define SUBSP_DIM_L_MAX MAX_MAT_SIZE
44 //needs to be floor(0.75*SUB_DIM_L)
45 #define SIG_SUBSP_DIM_MAX 7 // 15
46
47 #define MUSIC_THRESH_REL_5GHz 0.01f
48 #define MUSIC_THRESH_REL_2GHz 0.1f
49 #define MUSIC_THRESH_MIN 0.02f
50 #define MUSIC_THRESH_MAX 0.10f
51
52 #define SUB_DET_THRESH_5G_80MHZ 0.1f
53 #define SUB_DET_THRESH_5G_40MHZ 0.08f
54 #define SUB_DET_THRESH_5G_20MHZ 0.04f // 0.1f
55 #define SUB_DET_THRESH_20MHZ 0.253f
56 #define SUB_DET_THRESH_Legacy 0.38f
57 #define SUB_DET_THRESH_REL_5G_80MHZ 0.01f
58 #define SUB_DET_THRESH_REL_5G_40MHZ 0.025f
59 #define SUB_DET_THRESH_REL_5G_20MHZ 0.005f
60 #define SUB_DET_THRESH_REL_2G 0.015f
61 #define SUB_DET_THRESH_REL_Legacy 0.25f
62
63 #define SUB_DIM_L_6 6
64 #define SUB_DIM_L_10 10
65 #define SUB_DIM_L_12 12
66 #define SUB_DIM_L_16 16
67 #define SUB_DIM_L_20 20
68 #define SUB_DIM_L_22 22
69
70 #define USE_TLS
71 #define FFT_TD_OSF_SHIFT 1
72 #define FFT_FD_EDGE 0
73
calcCorMatrix(hal_pktinfo_t * pktinfo,unsigned int * fftOutBuffer,unsigned int * totalpower,int firstPathDelay,float * realCorrMatrix,unsigned int * scratchMemory,int subsp_dim_L,int fft_input_size,int FD_downsample,int * numObsPtr)74 void calcCorMatrix(hal_pktinfo_t *pktinfo, unsigned int *fftOutBuffer, unsigned int *totalpower, int firstPathDelay, float *realCorrMatrix,
75 unsigned int *scratchMemory, int subsp_dim_L, int fft_input_size, int FD_downsample, int* numObsPtr)
76 {
77
78 int ii, jj, kk, ll, rowIdx, colIdx, qq;
79 int loadOffset;
80 float *fftTempBuffer = (float*)scratchMemory; //[2*(FFT_INPUT_SIZE_MAX<<FFT_TD_OSF_SHIFT)];
81 float *strPtr, *strPtrTr, *ldPtr, *ldPtrTr;
82 //float debugBuffer[FFT_INPUT_SIZE_MAX];
83 float *corrMatrix = (float*)scratchMemory +2* (FFT_INPUT_SIZE_MAX << FFT_TD_OSF_SHIFT); // size: [SUBSP_DIM_L_MAX*2*SUBSP_DIM_L_MAX];
84 unsigned int *loadPtr;
85 unsigned int tempVal, loadOffsetU32, loadOffsetMask;
86 short tempI, tempQ;
87 float valI, valQ, valTempI, valTempQ, scale;
88 float corrI, corrQ, sumI, sumQ;
89 float *tempBuffer = (float*)scratchMemory + 2 * (FFT_INPUT_SIZE_MAX << FFT_TD_OSF_SHIFT) + 2 * SUBSP_DIM_L_MAX * SUBSP_DIM_L_MAX; // [2*FFT_INPUT_SIZE_MAX];
90 int storeIdx, loadIdx;
91
92 int nRx = pktinfo->nRx+1;
93 int nTx = pktinfo->nTx+1;
94 int ifftSizeOsf = 1<<(pktinfo->fftSize+6);
95
96 int firstPathIndex = (firstPathDelay+(1<<(TOA_FPATH_BIPT-1)))>>TOA_FPATH_BIPT;
97 int windowStart = firstPathIndex-(fft_input_size <<(IFFT_OSF_SHIFT-1)); // center window middle on first-path
98
99 int stepsize = 1<<(IFFT_OSF_SHIFT - FFT_TD_OSF_SHIFT);
100 int numObs = fft_input_size - subsp_dim_L - 2*FFT_FD_EDGE;
101
102 memset((void*)corrMatrix, 0, 2*SUBSP_DIM_L_MAX*SUBSP_DIM_L_MAX*sizeof(float));
103 loadOffsetMask = ifftSizeOsf*NUM_PARALLEL - 1;
104
105 for(ii=0;ii<nTx;ii++){
106 for(jj=0;jj<nRx;jj+=NUM_PARALLEL){
107
108 for (qq = 0;qq < NUM_PARALLEL; qq++) {
109 loadPtr = fftOutBuffer + (jj + ii*nRx)*ifftSizeOsf + qq;
110 loadOffset = windowStart*NUM_PARALLEL;
111 scale = SQRTF(1.0f*nTx*nRx*totalpower[(jj + qq) + ii*nRx] / (1 << 15)) / (1 << (8 + FFT_TD_OSF_SHIFT)); // 32.31 format
112 strPtr = fftTempBuffer;
113 for (kk = 0;kk < (fft_input_size << FFT_TD_OSF_SHIFT) * FD_downsample;kk++) {
114 loadOffsetU32 = loadOffset &loadOffsetMask;
115 tempVal = loadPtr[loadOffsetU32];
116 loadOffset += stepsize*NUM_PARALLEL;
117 tempI = tempVal & 0xffff;
118 tempQ = (tempVal >> 16) & 0xffff;
119
120 valI = tempI*scale;
121 *strPtr++ = valI;
122 valQ = tempQ*scale;
123 *strPtr++ = valQ;
124 }
125 radix2FftFlt(fftTempBuffer, (fft_input_size << FFT_TD_OSF_SHIFT) * FD_downsample, twiddleTableFlt, MAX_FFT_FLT);
126 if (FD_downsample == 4) {
127 for (ll = 1;ll < fft_input_size;ll++) {
128 fftTempBuffer[2 * ll] = fftTempBuffer[2 * FD_downsample * ll];
129 fftTempBuffer[2 * ll + 1] = fftTempBuffer[2 * FD_downsample * ll + 1];
130 }
131 }
132 // diagonal
133 // rowIdx = 0
134 ldPtr = fftTempBuffer + 2*(FFT_FD_EDGE+1); // skip 0
135 strPtr = corrMatrix;
136 sumI = 0;
137 storeIdx = 0;
138 for (kk = 0;kk < numObs;kk++) {
139 valI = *ldPtr++; // fftTempBuffer[2 * kk];
140 valQ = *ldPtr++; // fftTempBuffer[2 * kk + 1];
141 corrI = valI*valI + valQ*valQ;
142 sumI += corrI;
143 tempBuffer[storeIdx++] = corrI;
144 }
145 *strPtr += sumI; //corrMatrix[0]
146 strPtr += 2 * (SUBSP_DIM_L_MAX + 1);
147 loadIdx = 0;
148 for (rowIdx = 1;rowIdx < subsp_dim_L;rowIdx++) {
149 sumI -= tempBuffer[loadIdx++];
150 valI = *ldPtr++; // fftTempBuffer[2 * (rowIdx-1 + kk)];
151 valQ = *ldPtr++; // fftTempBuffer[2 * (rowIdx-1 + kk) + 1];
152 corrI = valI*valI + valQ*valQ;
153 sumI += corrI;
154 tempBuffer[storeIdx++] = corrI;
155 *strPtr += sumI; //corrMatrix[rowIdx * 2 * (SUBSP_DIM_L_MAX+1)] += sumI;
156 strPtr += 2 * (SUBSP_DIM_L_MAX + 1);
157 }
158 // off-diagonals
159 for (colIdx = 1; colIdx < subsp_dim_L; colIdx++) {
160 // rowIdx = 0
161 ldPtr = fftTempBuffer + 2 * (FFT_FD_EDGE + 1);
162 ldPtrTr = fftTempBuffer + 2 * (FFT_FD_EDGE + 1) + 2 * colIdx;
163 strPtr = corrMatrix + 2 * colIdx * SUBSP_DIM_L_MAX;
164 sumI = 0;
165 sumQ = 0;
166 storeIdx = 0;
167 for (kk = 0;kk < numObs;kk++) {
168
169 valI = *ldPtrTr++; //fftTempBuffer[2 * (colIdx + kk)];
170 valQ = *ldPtrTr++; //fftTempBuffer[2 * (colIdx + kk) + 1];
171
172 valTempI = *ldPtr++; //fftTempBuffer[2 * kk];
173 valTempQ = *ldPtr++; //fftTempBuffer[2 * kk + 1];
174
175 corrI = valI*valTempI + valQ*valTempQ;
176 corrQ = valQ*valTempI - valI*valTempQ;
177
178 sumI += corrI;
179 sumQ += corrQ;
180
181 tempBuffer[storeIdx++] = corrI;
182 tempBuffer[storeIdx++] = corrQ;
183 }
184 *strPtr++ += sumI; //corrMatrix[2 * colIdx * SUBSP_DIM_L_MAX]
185 *strPtr++ -= sumQ; //corrMatrix[2 * colIdx * SUBSP_DIM_L_MAX + 1]
186 strPtr += 2 * SUBSP_DIM_L_MAX;
187 loadIdx = 0;
188 for (rowIdx = 1;rowIdx < subsp_dim_L - colIdx;rowIdx++) {
189 sumI -= tempBuffer[loadIdx++];
190 sumQ -= tempBuffer[loadIdx++];
191
192 valI = *ldPtrTr++; //fftTempBuffer[2 * (rowIdx - 1 + colIdx + kk)];
193 valQ = *ldPtrTr++; //fftTempBuffer[2 * (rowIdx - 1 + colIdx + kk) + 1];
194
195 valTempI = *ldPtr++; //fftTempBuffer[2 * (rowIdx - 1 + kk)];
196 valTempQ = *ldPtr++; //fftTempBuffer[2 * (rowIdx - 1 + kk) + 1];
197
198 corrI = valI*valTempI + valQ*valTempQ;
199 corrQ = valQ*valTempI - valI*valTempQ;
200
201 sumI += corrI;
202 sumQ += corrQ;
203
204 tempBuffer[storeIdx++] = corrI;
205 tempBuffer[storeIdx++] = corrQ;
206
207 *strPtr++ += sumI; //corrMatrix[(rowIdx + colIdx) * 2 * SUBSP_DIM_L_MAX + 2 * rowIdx]
208 *strPtr++ -= sumQ; //corrMatrix[(rowIdx + colIdx) * 2 * SUBSP_DIM_L_MAX + 2 * rowIdx + 1]
209 strPtr += 2 * SUBSP_DIM_L_MAX;
210 }
211 }
212 }
213 }
214 }
215 for (rowIdx = 0;rowIdx < subsp_dim_L;rowIdx++) {
216 ldPtr = corrMatrix + (rowIdx * 2 * SUBSP_DIM_L_MAX);
217 strPtrTr = corrMatrix +(2 * rowIdx);
218 for (colIdx = 0;colIdx < rowIdx;colIdx++) {
219
220 corrI = *ldPtr++; // use symmetry
221 corrQ = *ldPtr++;
222
223 *strPtrTr++ = corrI;
224 *strPtrTr = -corrQ; strPtrTr += 2* SUBSP_DIM_L_MAX -1;
225 }
226 }
227
228 // create real matrix
229 for(rowIdx=0;rowIdx<subsp_dim_L/2;rowIdx++){
230 ldPtr = corrMatrix +(rowIdx * 2 * SUBSP_DIM_L_MAX);
231 ldPtrTr = corrMatrix + (((subsp_dim_L - 1) - rowIdx) * 2 * SUBSP_DIM_L_MAX + 2 * (subsp_dim_L - 1) + 1);
232 strPtr = realCorrMatrix + (rowIdx*SUBSP_DIM_L_MAX);
233 strPtrTr = realCorrMatrix + (rowIdx + subsp_dim_L / 2)*SUBSP_DIM_L_MAX;
234 for(colIdx=0;colIdx<subsp_dim_L/2;colIdx++){
235 valI = *ldPtr++; // R00
236 valQ = *ldPtr++;
237
238 corrQ = *ldPtrTr--;
239 corrI = *ldPtrTr--; // JR11J
240 valTempI = valI + corrI;
241 valTempQ = valQ - corrQ;
242
243 // Ax = real(myR00+conj(myJR11J));
244 // Bx = imag(myR00+conj(myJR11J));
245 strPtr[subsp_dim_L / 2] = valTempQ; // Bx
246 *strPtr++ = valTempI; // Ax
247 strPtrTr[subsp_dim_L / 2] = valTempI; // Ax
248 *strPtrTr++ = -valTempQ; // -Bx
249 }
250 }
251 for(rowIdx=0;rowIdx<subsp_dim_L/2;rowIdx++){
252 ldPtr = corrMatrix + (((subsp_dim_L - 1) - rowIdx) * 2 * SUBSP_DIM_L_MAX);
253 ldPtrTr = corrMatrix + ((subsp_dim_L - 1) * 2 * SUBSP_DIM_L_MAX + 2 * rowIdx);
254 strPtr = realCorrMatrix+ (rowIdx*SUBSP_DIM_L_MAX);
255 strPtrTr = realCorrMatrix + (rowIdx + subsp_dim_L / 2)*SUBSP_DIM_L_MAX;
256 for(colIdx=0;colIdx<subsp_dim_L/2;colIdx++){
257 valI = *ldPtrTr++; // R01J
258 valQ = *ldPtrTr; ldPtrTr -= 2 * SUBSP_DIM_L_MAX +1;
259
260 corrI = *ldPtr++; // R01J.'
261 corrQ = *ldPtr++;
262 valTempI = valI + corrI;
263 valTempQ = valQ + corrQ;
264 // Cx = real(myR01J+conj(myR01J'));
265 // Dx = imag(myR01J+conj(myR01J'));
266 strPtr[subsp_dim_L / 2] += valTempQ; // Dx
267 *strPtr++ += valTempI; // Cx
268 strPtrTr[subsp_dim_L/2] -= valTempI; // -Cx
269 *strPtrTr++ += valTempQ; // Dx
270 }
271 }
272 *numObsPtr = numObs;
273
274 // scale input by 1/((fft_input_size-SUBSP_DIM_L+1)*nRx*nTx) - no
275 }
276
findSigSpace(float * inputBuff,float * eMatrix,float thresh,int matSizeN,hal_pktinfo_t * pktinfo,float sub_det_thresh_rel)277 int findSigSpace(float *inputBuff, float *eMatrix, float thresh, int matSizeN, hal_pktinfo_t *pktinfo, float sub_det_thresh_rel)
278 {
279 int ii, jj;
280 float *eigVals = inputBuff;
281 float *Qmat = inputBuff+SUBSP_DIM_L_MAX;
282 float *tempMatSum; // tempMatSum[SUBSP_DIM_L*SUBSP_DIM_L];
283 float *tempMatDiff; // tempMatDiff[SUBSP_DIM_L*SUBSP_DIM_L];
284 float max;
285 int maxIdx;
286 int sigSpaceDim = 0;
287
288 // find max for thresh
289 max = 0;
290 for (jj = 0; jj<matSizeN; jj++){
291 if (eigVals[jj]>max){
292 max = eigVals[jj];
293 }
294 }
295
296 if (sub_det_thresh_rel*max > thresh)
297 thresh = sub_det_thresh_rel*max;
298
299 for(ii=0;ii<SIG_SUBSP_DIM_MAX;ii++){
300 // sort
301 max = 0;
302 maxIdx = -1;
303 for(jj=0;jj<matSizeN;jj++){
304 if(eigVals[jj]>max){
305 max = eigVals[jj];
306 maxIdx = jj;
307 }
308 }
309 if(max>thresh){
310 eigVals[maxIdx] = 0;
311 // copy eigVec
312 for(jj=0;jj<matSizeN;jj++){
313 eMatrix[jj+sigSpaceDim*matSizeN] = Qmat[jj+maxIdx*SUBSP_DIM_L_MAX];
314 }
315 sigSpaceDim++;
316 }
317 }
318
319 //if (sigSpaceDim > 1){
320 tempMatSum = (float*) inputBuff; // tempMatSum[SUBSP_DIM_L*SUBSP_DIM_L];
321 //tempMatDiff = (float*) inputBuff+SUBSP_DIM_L*SUBSP_DIM_L; // tempMatDiff[SUBSP_DIM_L*SUBSP_DIM_L]
322 tempMatDiff = (float*) inputBuff+ SUBSP_DIM_L_MAX*SUBSP_DIM_L_MAX; // tempMatDiff[SUBSP_DIM_L*SUBSP_DIM_L]
323 for (ii = 0; ii<sigSpaceDim; ii++){
324 for(jj=1;jj<matSizeN;jj++){
325 tempMatSum[jj-1+ii*matSizeN] = eMatrix[jj-1+ii*matSizeN]+eMatrix[jj+ii*matSizeN];
326 tempMatDiff[jj-1+ii*matSizeN] = eMatrix[jj-1+ii*matSizeN]-eMatrix[jj+ii*matSizeN];
327 }
328 tempMatSum[jj-1+ii*matSizeN] = SQRTF(2)*eMatrix[matSizeN/2-1+ii*matSizeN];
329 tempMatDiff[jj-1+ii*matSizeN] = -SQRTF(2)*eMatrix[matSizeN-1+ii*matSizeN];
330 }
331
332 for(ii=0;ii<sigSpaceDim;ii++){
333 for(jj=0;jj<matSizeN/2-1;jj++){
334 // E_lower
335 eMatrix[jj+ii*SUBSP_DIM_L_MAX] = tempMatSum[jj+ii*matSizeN];
336 // E_upper
337 eMatrix[jj+(ii+sigSpaceDim)*SUBSP_DIM_L_MAX] = -tempMatDiff[(jj+matSizeN/2)+ii*matSizeN];
338 }
339 // E_lower
340 eMatrix[jj+ii*SUBSP_DIM_L_MAX] = tempMatSum[matSizeN-1+ii*matSizeN];
341 // E_upper
342 eMatrix[jj+(ii+sigSpaceDim)*SUBSP_DIM_L_MAX] = tempMatDiff[matSizeN-1+ii*matSizeN];
343
344 for(jj++;jj<matSizeN-1;jj++){
345 // E_lower
346 eMatrix[jj+ii*SUBSP_DIM_L_MAX] = tempMatSum[jj+ii*matSizeN];
347 // E_upper
348 eMatrix[jj+(ii+sigSpaceDim)*SUBSP_DIM_L_MAX] = tempMatDiff[(jj-matSizeN/2)+ii*matSizeN];
349 }
350 for (;jj < SUBSP_DIM_L_MAX;jj++) {
351 eMatrix[jj + ii*SUBSP_DIM_L_MAX] = 0;
352 eMatrix[jj + (ii + sigSpaceDim)*SUBSP_DIM_L_MAX] = 0;
353 }
354 }
355 return sigSpaceDim;
356 }
357
calcDimOnePhase(float * eMatrix,float * omega_hat_sort,int matSizeN)358 void calcDimOnePhase(float *eMatrix, float *omega_hat_sort, int matSizeN){
359
360 int ii;
361 float sumI, sumQ, omega_hat_pos;
362 //float* tempVecReal = eMatrix + SUBSP_DIM_L; // tempVecReal[SUBSP_DIM_L];
363 //float* tempVecImag = eMatrix + 2*SUBSP_DIM_L; // tempVecImag[SUBSP_DIM_L];
364 float* tempVecReal = eMatrix + SUBSP_DIM_L_MAX; // tempVecReal[SUBSP_DIM_L];
365 float* tempVecImag = eMatrix + 2*SUBSP_DIM_L_MAX; // tempVecImag[SUBSP_DIM_L];
366 float *V1_real, *V2_Real, *V1_imag, *V2_imag;
367
368 // copy real eigen vectors
369 for (ii = 0; ii < matSizeN/2; ii++){
370 tempVecReal[ii] = eMatrix[ii];
371 tempVecReal[ii + matSizeN / 2] = eMatrix[matSizeN / 2 - 1 - ii];
372 tempVecImag[ii] = eMatrix[ii + matSizeN / 2];
373 tempVecImag[ii + matSizeN / 2] = -eMatrix[matSizeN -1 - ii];
374 }
375
376 V1_real = tempVecReal;
377 V2_Real = tempVecReal +1;
378 V1_imag = tempVecImag;
379 V2_imag = tempVecImag +1;
380
381 sumI = 0.0f;
382 sumQ = 0.0f;
383 for (ii = 0; ii < matSizeN-1; ii++){
384 sumI += V1_real[ii] * V2_Real[ii] + V1_imag[ii] * V2_imag[ii];
385 sumQ += V1_real[ii] * V2_imag[ii] - V1_imag[ii] * V2_Real[ii];
386 }
387
388 omega_hat_pos = -1 * atan2f(sumQ, sumI);
389 omega_hat_sort[0] = (omega_hat_pos>0) ? omega_hat_pos : (omega_hat_pos + 2 * PI);
390 }
391
calc_QpTimesE(float * eMatrix,float * qMatrix,float * qPeMatrix,int matSizeN,int matSizeM)392 void calc_QpTimesE(float *eMatrix, float *qMatrix, float *qPeMatrix, int matSizeN, int matSizeM){
393
394 int ii, jj, kk;
395 float sum;
396
397 for(ii=0;ii<matSizeM;ii++){
398 for(jj=0;jj<matSizeN;jj++){
399 sum = 0;
400 for(kk=0;kk<matSizeN;kk++){
401 sum += qMatrix[kk+jj*SUBSP_DIM_L_MAX]*eMatrix[kk+ii*SUBSP_DIM_L_MAX];
402 }
403 qPeMatrix[jj+ii*SUBSP_DIM_L_MAX] = sum;
404 }
405 }
406 }
407
constructVmatrix(float * eigValPlusVec,float * vMatrix,int sigSpaceDim,int matSizeN)408 void constructVmatrix(float *eigValPlusVec, float *vMatrix, int sigSpaceDim, int matSizeN){
409
410 int ii, jj;
411 float *eigVals = eigValPlusVec;
412 float *Qmat = eigValPlusVec+SUBSP_DIM_L_MAX;
413
414 float min;
415 int minIdx;
416
417 for(ii=0;ii<sigSpaceDim;ii++){
418 // sort
419 min = 1e3f;
420 minIdx = -1;
421 for(jj=0;jj<matSizeN;jj++){
422 if(eigVals[jj]<min){
423 min = eigVals[jj];
424 minIdx = jj;
425 }
426 }
427 eigVals[minIdx] = 1e3f;
428 // copy eigVec
429 for(jj=0;jj<sigSpaceDim;jj++){
430 // V upper
431 vMatrix[ii+jj*SUBSP_DIM_L_MAX] = -Qmat[jj+minIdx*SUBSP_DIM_L_MAX];
432 }
433 for(;jj<matSizeN;jj++){
434 // V lower
435 vMatrix[ii+jj*SUBSP_DIM_L_MAX] = Qmat[jj+minIdx*SUBSP_DIM_L_MAX];
436 }
437 }
438 }
439
calcEtimesE(float * eMatrix,float * ePeMatrix,int matSizeN,int matSizeM)440 void calcEtimesE(float *eMatrix, float *ePeMatrix, int matSizeN, int matSizeM){
441 int ii, jj, kk;
442 #ifdef ARM_DS5
443 float32x2_t resVec0, resVec1, resVec2, resVec3, strVec0, strVec1;
444 float32x2_t rowVec0, rowVec1, colVec0, colVec1;
445 float32x2x2_t tempVec0; //, tempVec1;
446
447 for(ii=0;ii<matSizeM;ii+=2){
448 for(jj=0;jj<=ii;jj+=2){
449 resVec0 = vdup_n_f32(0.0f);
450 resVec1 = vdup_n_f32(0.0f);
451 resVec2 = vdup_n_f32(0.0f);
452 resVec3 = vdup_n_f32(0.0f);
453 for(kk=0;kk<matSizeN;kk+=2){
454 rowVec0 = vld1_f32((float32_t const *) (eMatrix+(kk+jj*SUBSP_DIM_L_MAX)));
455 rowVec1 = vld1_f32((float32_t const *) (eMatrix+(kk+(jj+1)*SUBSP_DIM_L_MAX)));
456
457 colVec0 = vld1_f32((float32_t const *) (eMatrix+(kk+ii*SUBSP_DIM_L_MAX)));
458 colVec1 = vld1_f32((float32_t const *) (eMatrix+(kk+(ii+1)*SUBSP_DIM_L_MAX)));
459
460 resVec0 = vmla_f32(resVec0, rowVec0, colVec0);
461 resVec1 = vmla_f32(resVec1, rowVec1, colVec0);
462 resVec2 = vmla_f32(resVec2, rowVec0, colVec1);
463 resVec3 = vmla_f32(resVec3, rowVec1, colVec1);
464 }
465 strVec0 = vpadd_f32(resVec0, resVec1);
466 strVec1 = vpadd_f32(resVec2, resVec3);
467
468 vst1_f32((float32_t *) (ePeMatrix + jj+ii*SUBSP_DIM_L_MAX), strVec0); // VST1.32 {d0}, [r0]
469 vst1_f32((float32_t *) (ePeMatrix + jj + (ii + 1)*SUBSP_DIM_L_MAX), strVec1);
470 // use symmetry
471 tempVec0 = vzip_f32(strVec0, strVec1); // VZIP.32 d0,d0
472 vst1_f32((float32_t *) (ePeMatrix + ii +jj*SUBSP_DIM_L_MAX), tempVec0.val[0]); // VST1.32 {d0}, [r0]
473 vst1_f32((float32_t *) (ePeMatrix + ii + (jj + 1)*SUBSP_DIM_L_MAX), tempVec0.val[1]);
474 }
475 }
476 #else
477 float sum, sum1, sum2,sum3;
478
479 for(ii=0;ii<matSizeM;ii+=2){
480 for(jj=0;jj<matSizeM;jj+=2){
481 sum = 0;
482 sum1 = 0;
483 sum2 = 0;
484 sum3 = 0;
485 for(kk=0;kk<matSizeN;kk+=2){
486 sum += eMatrix[kk+jj*SUBSP_DIM_L_MAX]*eMatrix[kk+ii*SUBSP_DIM_L_MAX];
487 sum1 += eMatrix[kk + (jj+1)*SUBSP_DIM_L_MAX] * eMatrix[kk + ii*SUBSP_DIM_L_MAX];
488 sum2 += eMatrix[kk + jj*SUBSP_DIM_L_MAX] * eMatrix[kk + (ii + 1)*SUBSP_DIM_L_MAX];
489 sum3 += eMatrix[kk + (jj + 1)*SUBSP_DIM_L_MAX] * eMatrix[kk + (ii + 1)*SUBSP_DIM_L_MAX];
490 sum += eMatrix[(kk+1) + jj*SUBSP_DIM_L_MAX] * eMatrix[(kk + 1) + ii*SUBSP_DIM_L_MAX];
491 sum1 += eMatrix[(kk + 1) + (jj + 1)*SUBSP_DIM_L_MAX] * eMatrix[(kk + 1) + ii*SUBSP_DIM_L_MAX];
492 sum2 += eMatrix[(kk + 1) + jj*SUBSP_DIM_L_MAX] * eMatrix[(kk + 1) + (ii + 1)*SUBSP_DIM_L_MAX];
493 sum3 += eMatrix[(kk + 1) + (jj + 1)*SUBSP_DIM_L_MAX] * eMatrix[(kk + 1) + (ii + 1)*SUBSP_DIM_L_MAX];
494 }
495 //if (kk < matSizeN) { // odd matSizeN case
496 // sum += eMatrix[kk + jj*SUBSP_DIM_L_MAX] * eMatrix[kk + ii*SUBSP_DIM_L_MAX];
497 // sum1 += eMatrix[kk + (jj + 1)*SUBSP_DIM_L_MAX] * eMatrix[kk + ii*SUBSP_DIM_L_MAX];
498 // sum2 += eMatrix[kk + jj*SUBSP_DIM_L_MAX] * eMatrix[kk + (ii + 1)*SUBSP_DIM_L_MAX];
499 // sum3 += eMatrix[kk + (jj + 1)*SUBSP_DIM_L_MAX] * eMatrix[kk + (ii + 1)*SUBSP_DIM_L_MAX];
500 //}
501 ePeMatrix[jj+ii*SUBSP_DIM_L_MAX] = sum;
502 ePeMatrix[jj+1 + ii*SUBSP_DIM_L_MAX] = sum1;
503 ePeMatrix[jj + (ii + 1)*SUBSP_DIM_L_MAX] = sum2;
504 ePeMatrix[(jj + 1) + (ii + 1)*SUBSP_DIM_L_MAX] = sum3;
505 }
506 }
507 #endif
508 }
509
calcPhase(float * psiHatMatrix,int sigSpaceDim,float * omega_hat_sort)510 int calcPhase(float *psiHatMatrix, int sigSpaceDim, float *omega_hat_sort){
511
512 int ii, jj;
513 //float omega_hat[SUBSP_DIM_L];
514 float omega_hat[SUBSP_DIM_L_MAX];
515 float tempStr;
516
517 for(ii=0;ii<sigSpaceDim;ii++){
518 omega_hat[ii] = -2*atanf(psiHatMatrix[ii]);
519 }
520 // sort
521 for(ii=0;ii<sigSpaceDim;ii++){
522 omega_hat_sort[ii] = (omega_hat[ii]>0)?omega_hat[ii]:(omega_hat[ii]+2*PI);
523 for(jj=ii;jj>0;jj--){
524 if(omega_hat_sort[jj]<omega_hat_sort[jj-1]){
525 tempStr = omega_hat_sort[jj-1];
526 omega_hat_sort[jj-1] = omega_hat_sort[jj];
527 omega_hat_sort[jj] = tempStr;
528 }
529 }
530 }
531 // remove double eigen values (from complex conjugate pairs)
532 jj = 0;
533 for(ii=1;ii<sigSpaceDim;ii++){
534 if(omega_hat_sort[ii]-omega_hat_sort[jj]>1e-2f){
535 jj++;
536 tempStr = omega_hat_sort[ii];
537 omega_hat_sort[jj] = tempStr;
538 }
539 }
540 sigSpaceDim -= (ii-1-jj);
541
542 return sigSpaceDim;
543 }
544
calcDelayAmplitude(hal_pktinfo_t * pktinfo,unsigned int * fftOutBuffer,unsigned int * totalpower,int firstPathDelay,int sigSpaceDim,float * omega_hat_sort,float * tau_hat,float * aMatrix,int fft_input_size)545 void calcDelayAmplitude(hal_pktinfo_t *pktinfo,
546 unsigned int *fftOutBuffer,
547 unsigned int *totalpower,
548 int firstPathDelay,
549 int sigSpaceDim,
550 float *omega_hat_sort,
551 float *tau_hat,
552 float *aMatrix,
553 int fft_input_size)
554 {
555 int ii, jj, kk, qq;
556 int index;
557 unsigned int *loadPtr;
558 unsigned int tempLoad;
559 short tempI, tempQ;
560 float tempVal, scale, delta;
561 float leftI, leftQ, rightI, rightQ;
562 float leftSqr, rightSqr, valSqr;
563 //float fracIndex[SUBSP_DIM_L];
564 float fracIndex[SUBSP_DIM_L_MAX];
565
566 int nRx = pktinfo->nRx+1;
567 int nTx = pktinfo->nTx+1;
568 int ifftSizeOsf = 1<<(pktinfo->fftSize+6);
569 int signalBandwidth = 20<<(pktinfo->sigBw);
570 int stepsize = 1<<IFFT_OSF_SHIFT;
571
572 int firstPathIndex = (firstPathDelay+(1<<(TOA_FPATH_BIPT-1)))>>TOA_FPATH_BIPT;
573 int windowStart = firstPathIndex-(fft_input_size <<(IFFT_OSF_SHIFT-1)); // center window middle on first-path
574
575 unsigned int bitMask = NUM_PARALLEL *ifftSizeOsf - 1;
576
577 // map phase to delay
578 for(kk=0;kk<sigSpaceDim;kk++){
579 tempVal = omega_hat_sort[kk]/(2*PI);
580 fracIndex[kk] = tempVal*(fft_input_size*stepsize);
581 tempVal *= fft_input_size /(1.0f*signalBandwidth);
582 //windowStart = (windowStart+2)&0xfffffffc;
583 tempVal += windowStart /(1.0f*stepsize*signalBandwidth); // windowstat in oversampled samples
584 tau_hat[kk] = tempVal;
585 }
586
587 // project delays onto time domain data to estimate path amplitude
588 for(ii=0;ii<nTx;ii++){
589 for(jj=0;jj<nRx;jj+= NUM_PARALLEL){
590 for (qq = 0;qq < NUM_PARALLEL;qq++) {
591 loadPtr = fftOutBuffer + (jj + ii*nRx)*ifftSizeOsf + qq;
592 scale = SQRTF(1.0f*nTx*nRx*totalpower[(jj + qq) + ii*nRx] / (1 << 15)) / (1 << 8); // 32.31 format
593 //scale = SQRTF(1.0f/fixedpower[jj+ii*nRx]/(1<<15))/(1<<8); // 32.31 format
594 //scale = SQRTF(1.0f/fixedpower[jj+ii*nRx]/(1<<0))/(1<<3); // 32.31 format
595 //scale = 1;
596 for (kk = 0;kk < sigSpaceDim;kk++) {
597 index = (int)floorf(fracIndex[kk]);
598 delta = fracIndex[kk] - index;
599 index += windowStart;
600
601 // treatment for negative windowStart??
602 // if(index<0) ...
603
604 tempLoad = loadPtr[(NUM_PARALLEL*index) &bitMask];
605 tempI = tempLoad & 0xffff;
606 tempQ = (tempLoad >> 16) & 0xffff;
607 leftI = tempI*scale;
608 leftQ = tempQ*scale;
609 leftSqr = leftI*leftI + leftQ*leftQ;
610
611 tempLoad = loadPtr[(NUM_PARALLEL*(index + 1)) &bitMask];
612 tempI = tempLoad & 0xffff;
613 tempQ = (tempLoad >> 16) & 0xffff;
614 rightI = tempI*scale;
615 rightQ = tempQ*scale;
616 rightSqr = rightI*rightI + rightQ*rightQ;
617
618 valSqr = (rightSqr - leftSqr)*delta + leftSqr;
619
620 aMatrix[kk + (jj + qq)*sigSpaceDim + ii*nRx*sigSpaceDim] = valSqr;
621 }
622 }
623 }
624 }
625 }
626
determineFirstPath(hal_pktinfo_t * pktinfo,float * tau_hat,float * aMatrix,int sigSpaceDim,float music_thresh_rel)627 int determineFirstPath(hal_pktinfo_t *pktinfo,
628 float *tau_hat,
629 float *aMatrix,
630 int sigSpaceDim,
631 float music_thresh_rel)
632 {
633 int ii, jj, kk;
634 int nRx = pktinfo->nRx+1;
635 int nTx = pktinfo->nTx+1;
636 int firstPathIdx;
637 int maxIdx = -1;
638 int firstPathDelay = 0;
639
640 float detThresh;
641 float maxVal = 0;
642
643 //float sumPower[SUBSP_DIM_L];
644 float sumPower[SUBSP_DIM_L_MAX];
645
646 int signalBandwidth = 20<<(pktinfo->sigBw);
647 int stepsize = 1<<IFFT_OSF_SHIFT;
648
649 for(kk=0;kk<sigSpaceDim;kk++){
650 sumPower[kk]=0;
651 }
652
653 for(ii=0;ii<nTx;ii++){
654 for(jj=0;jj<nRx;jj++){
655 for(kk=0;kk<sigSpaceDim;kk++){
656 sumPower[kk]+= aMatrix[kk +jj*sigSpaceDim +ii*nRx*sigSpaceDim];
657 }
658 }
659 }
660
661 // find max
662 for(kk=0;kk<sigSpaceDim;kk++){
663 sumPower[kk] /= (nTx*nRx);
664 if(sumPower[kk]>maxVal){
665 maxVal = sumPower[kk];
666 maxIdx = kk;
667 }
668 }
669
670 detThresh = maxVal*music_thresh_rel;
671 if(detThresh>MUSIC_THRESH_MAX){
672 detThresh=MUSIC_THRESH_MAX;
673 }
674 else if(detThresh<MUSIC_THRESH_MIN){
675 detThresh=MUSIC_THRESH_MIN;
676 }
677 //detThresh = maxVal/4;
678 firstPathIdx = maxIdx;
679 for(kk=maxIdx-1;kk>=0;kk--){
680 if(sumPower[kk]>detThresh){
681 firstPathIdx = kk;
682 }
683 }
684 firstPathDelay = (int) floorf((tau_hat[firstPathIdx]*signalBandwidth*stepsize*(1<<TOA_FPATH_BIPT))+0.5f);
685
686 return firstPathDelay;
687 }
688
calcSubspaceFineTiming(hal_pktinfo_t * pktinfo,unsigned int * fftOutBuffer,unsigned int * totalpower,int firstPathDelay,int * fineTimingRes,unsigned int * procBuffer,hal_wls_packet_params_t * packetparams)689 int calcSubspaceFineTiming(hal_pktinfo_t *pktinfo, // structure with CSI buffer parameters
690 unsigned int *fftOutBuffer, // buffer holding time-domain CSI
691 unsigned int *totalpower, // array holding power per rx/tx channel
692 int firstPathDelay, // existing first path estimate
693 int *fineTimingRes, // result of algorithm
694 unsigned int *procBuffer, // buffer for processing, size 2*BUFFER_OFFSET DW
695 hal_wls_packet_params_t *packetparams // passing packetinfo
696 )
697 {
698 int nRx = pktinfo->nRx + 1;
699 int nTx = pktinfo->nTx + 1;
700 int subsp_dim_L = SUB_DIM_L_20;
701 float sub_det_thresh, sub_det_thresh_rel, music_thresh_rel;
702 int fft_input_size = FFT_INPUT_SIZE_LONG_WINDOW;
703 float *realCorrMatrix = (float*)procBuffer; // realCorrMatrix[SUBSP_DIM_L*SUBSP_DIM_L]
704 float *eigValPlusVec = (float*)procBuffer + BUFFER_OFFSET; // eigValPlusVec[MAX_MAT_SIZE+MAX_MAT_SIZE*MAX_MAT_SIZE]
705 float *eMatrix = (float*)procBuffer; // eMatrix[2*SUBSP_DIM_L*SUBSP_DIM_L]
706 #ifdef USE_TLS
707 float *ePeMatrix = (float*)procBuffer + BUFFER_OFFSET; //ePeMatrix[4*(SIG_SUBSP_DIM_MAX-2)*(SIG_SUBSP_DIM_MAX-2)]
708 float *eigValPlusVecEE = (float*)procBuffer; // eigValPlusVec[MAX_MAT_SIZE+MAX_MAT_SIZE*MAX_MAT_SIZE]
709 float *vMatrix = (float*)procBuffer + BUFFER_OFFSET; // vMatrix[4*(SUBSP_DIM_L-2)*(SUBSP_DIM_L-2)]
710 float *qMatrix = (float*)procBuffer; // qMatrix[MAX_MAT_SIZE*MAX_MAT_SIZE]
711 float *qPvMatrix = (float*)procBuffer + BUFFER_OFFSET / 2; // qPvMatrix[SUBSP_DIM_L*SUBSP_DIM_L]
712 float *psiHatMatrix = (float*)procBuffer; // psiHatMatrix[SUBSP_DIM_L*SUBSP_DIM_L]
713 #else
714 float *qMatrix = (float*) procBuffer+BUFFER_OFFSET; // qMatrix[MAX_MAT_SIZE*MAX_MAT_SIZE]
715 float *qPeMatrix = (float*) procBuffer+BUFFER_OFFSET*3/2; // qPeMatrix[SUBSP_DIM_L*SUBSP_DIM_L]
716 float *psiHatMatrix = (float*) procBuffer+BUFFER_OFFSET; // psiHatMatrix[SUBSP_DIM_L*SUBSP_DIM_L]
717 #endif
718 float *eigPsiHat = (float*)procBuffer + BUFFER_OFFSET / 2; // eigPsiHat[SUBSP_DIM_L]
719 float *omega_hat_sort = (float*)procBuffer; // omega_hat_sort[SUBSP_DIM_L]
720 float *tau_hat = (float*)procBuffer + BUFFER_OFFSET / 4; // tau_hat[SUBSP_DIM_L]
721 float *aMatrix = (float*)procBuffer + BUFFER_OFFSET / 2; // aMatrix[MAX_TX*MAX_RX*2*SUBSP_DIM_L]
722
723 int numObs, sigSpaceDim, FD_downsample=1;
724
725 if ((pktinfo->packetType == 0) && (pktinfo->sigBw == 0) && (pktinfo->rxDevBw == 2)) { //process legacy pkt
726 subsp_dim_L = SUB_DIM_L_6;
727 sub_det_thresh = SUB_DET_THRESH_Legacy;
728 sub_det_thresh_rel = SUB_DET_THRESH_REL_Legacy;
729 }
730 else if (packetparams->chNum >= 36) { //5G band
731 if (pktinfo->sigBw == 2) {
732 sub_det_thresh = SUB_DET_THRESH_5G_80MHZ;
733 sub_det_thresh_rel = SUB_DET_THRESH_REL_5G_80MHZ;
734 }
735 else if (pktinfo->sigBw == 1) {
736 sub_det_thresh = SUB_DET_THRESH_5G_40MHZ;
737 sub_det_thresh_rel = SUB_DET_THRESH_REL_5G_40MHZ;
738 }
739 else if (pktinfo->sigBw == 0) {
740 #if 0 //larger auto-correlation matrix size
741 sub_det_thresh = SUB_DET_THRESH_5G_20MHZ;
742 sub_det_thresh_rel = SUB_DET_THRESH_REL_5G_20MHZ;
743 #else
744 fft_input_size = FFT_INPUT_SIZE_SHORT_WINDOW;
745 subsp_dim_L = SUB_DIM_L_10;
746 sub_det_thresh = SUB_DET_THRESH_5G_20MHZ;
747 sub_det_thresh_rel = SUB_DET_THRESH_REL_5G_20MHZ;
748 #endif
749 }
750 else if (pktinfo->packetType < 2) {
751 subsp_dim_L = SUB_DIM_L_16;
752 fft_input_size = FFT_INPUT_SIZE_LONG_WINDOW;
753 sub_det_thresh = SUB_DET_THRESH_20MHZ;
754 sub_det_thresh_rel = SUB_DET_THRESH_REL_5G_20MHZ;
755 }
756 else {
757 FD_downsample = 4;
758 subsp_dim_L = SUB_DIM_L_10;
759 sub_det_thresh = SUB_DET_THRESH_Legacy;
760 sub_det_thresh_rel = 0.035f;
761 }
762 }
763 else { //2.4G band
764 if (pktinfo->sigBw == 1) {
765 subsp_dim_L = SUB_DIM_L_16;
766 }
767 else {
768 subsp_dim_L = SUB_DIM_L_12;
769 }
770 sub_det_thresh = SUB_DET_THRESH_20MHZ;
771 sub_det_thresh_rel = SUB_DET_THRESH_REL_2G;
772 fft_input_size = FFT_INPUT_SIZE_LONG_WINDOW;
773 }
774
775 // determine firstpath power threshold according to frequency, and pass to "determineFirstPath"
776 music_thresh_rel = (packetparams->chNum < 36) ? MUSIC_THRESH_REL_2GHz : MUSIC_THRESH_REL_5GHz;
777
778 // in: fftOutBuffer[], totalpower[]
779 // out: realCorrMatrix[SUBSP_DIM_L*SUBSP_DIM_L] {100 DW}
780 calcCorMatrix(pktinfo, fftOutBuffer, totalpower, firstPathDelay, realCorrMatrix, procBuffer + BUFFER_OFFSET, subsp_dim_L, fft_input_size, FD_downsample, &numObs);
781 //numObs = (fft_input_size - subsp_dim_L + 1) * 2 * nRx*nTx;
782 sub_det_thresh *= (numObs * 2 * nRx*nTx); // account for CorMatrix scaling
783
784 // eigen(Shur) decomposition
785 // in: realCorrMatrix[]
786 // out: eigValPlusVec[]; [D:MAX_MAT_SIZE (eigen values)| Q:SUBSP_DIM_L*SUBSP_DIM_L (eigen vectors)] {10+100 DW +30DW scratch}
787 QR_algorithm(realCorrMatrix, eigValPlusVec, subsp_dim_L, 0);
788
789 // find signal space Eig Vals and construct E matrix for ESPRIT
790 // in: eigValPlusVec[] - used also as scratch memory
791 // out: eMatrix[]; [E_lower: (SUBSP_DIM_L-1)*SIG_SUBSP_DIM_MAX| E_upper: (SUBSP_DIM_L-1)*SIG_SUBSP_DIM_MAX] {200 DW}
792 sigSpaceDim = findSigSpace(eigValPlusVec, eMatrix, sub_det_thresh, subsp_dim_L, pktinfo, sub_det_thresh_rel);
793
794 if (sigSpaceDim < 2){
795 if (sigSpaceDim == 0){ // error, no signal over threshold
796 return -1;
797 }
798 //else{ // (sigSpaceDim == 1)
799 // // for single eigen value, use simpler code
800 // // in: eMatrix[]
801 // // out: omega_hat_sort[]
802 // calcDimOnePhase(eMatrix, omega_hat_sort, SUBSP_DIM_L);
803
804 // // determine delays and amplitudes
805 // // in : fftOutBuffer[], totalpower[], omega_hat_sort[]
806 // // out: tau_hat[], aMatrix[] {10+80 DW}
807 // calcDelayAmplitude(pktinfo, fftOutBuffer, totalpower, firstPathDelay, sigSpaceDim, omega_hat_sort, tau_hat, aMatrix);
808
809 // // determine first path
810 // // in: tau_hat[], aMatrix[]
811 // // out: fineTimingRes
812 // *fineTimingRes = determineFirstPath(pktinfo, tau_hat, aMatrix, sigSpaceDim);
813
814 // return 0;
815 //}
816 }
817 #ifdef USE_TLS // QR of E
818 // calc E'*E
819 // in: eMatrix[]
820 // out: ePeMatrix[] {256 DW}
821 calcEtimesE(eMatrix, ePeMatrix,(subsp_dim_L -1), 2*sigSpaceDim);
822 // eigen(Shur) decomposition
823 // in: ePeMatrix[]
824 // out: eigValPlusVec[]; [D:2*SIG_SUBSP_DIM_MAX (eigen values)| Q:4*SIG_SUBSP_DIM_MAX^2 (eigen vectors)] {12+144 DW +30DW scratch}
825 QR_algorithm(ePeMatrix, eigValPlusVecEE, 2*sigSpaceDim, 1);
826
827 // in: eigValPlusVec[]
828 // out: vMatrix[]; [V_upper: SIG_SUBSP_DIM_MAX*SIG_SUBSP_DIM_MAX| V_lower: SIG_SUBSP_DIM_MAX*SIG_SUBSP_DIM_MAX] {256 DW}
829 constructVmatrix(eigValPlusVecEE, vMatrix, sigSpaceDim, 2*sigSpaceDim);
830
831 // Psi_hat = V_lower\V_upper;
832 // V_lower = Q*R
833 // in: vMatrix[] V_lower (updated)
834 // out: qMatrix[SIG_SUBSP_DIM_MAX*SIG_SUBSP_DIM_MAX] {100 DW }
835 QR_decomposition(vMatrix+sigSpaceDim*SUBSP_DIM_L_MAX, qMatrix, sigSpaceDim, sigSpaceDim);
836 // calc Q'*V_upper
837 // in: vMatrix[] V_upper, qMatrix[]
838 // out: qPvMatrix[] {100 DW +30 DW scratch}
839 calc_QpTimesE(vMatrix, qMatrix, qPvMatrix, sigSpaceDim, sigSpaceDim);
840 // Psi_hat = (R^-1)(Q'*E_upper) solve with back substitution
841 // in: qPvMatrix[], vMatrix[] V_lower (updated)
842 // out: Psi_hat[]: (sigSpaceDim*sigSpaceDim) {100 DW}
843 myBackSub(qPvMatrix, vMatrix+sigSpaceDim*SUBSP_DIM_L_MAX, psiHatMatrix, sigSpaceDim, sigSpaceDim);
844
845 #else
846 // LS Psi_hat = E_lower\E_upper;
847 // E_lower = Q*R
848 // in: eMatrix[] E_lower (updated)
849 // out: qMatrix[] {100 DW}
850 QR_decomposition(eMatrix, qMatrix, (subsp_dim_L -1), sigSpaceDim);
851 // calc Q'*E_upper
852 // in: eMatrix[] E_upper, qMatrix[]
853 // out: qPeMatrix[] {100 DW}
854 calc_QpTimesE(eMatrix+ SUBSP_DIM_L_MAX*sigSpaceDim, qMatrix, qPeMatrix, (subsp_dim_L -1), sigSpaceDim);
855 // Psi_hat = (R^-1)(Q'*E_upper) solve with back substitution
856 // in: qPeMatrix[], eMatrix[] E_lower (updated)
857 // out: Psi_hat[]: (sigSpaceDim*sigSpaceDim) {100 DW}
858 myBackSub(qPeMatrix, eMatrix, psiHatMatrix, (subsp_dim_L -1), sigSpaceDim);
859
860 #endif
861 // omega_hat = atan(real(eig(Psi_hat)))
862 // in: psiHatMatrix[]
863 // out: eigPsiHat[] {10 DW +30 DW scratch}
864 unsym_QR_algorithm(psiHatMatrix, eigPsiHat, sigSpaceDim);
865
866 // determine phase
867 // in: eigPsiHat[], sigSpaceDim
868 // out: omega_hat_sort[], sigSpaceDim {10 DW}
869 sigSpaceDim = calcPhase(eigPsiHat, sigSpaceDim, omega_hat_sort);
870
871 // determine delays and amplitudes
872 // in : fftOutBuffer[], totalpower[], omega_hat_sort[]
873 // out: tau_hat[], aMatrix[] {10+80 DW}
874 calcDelayAmplitude(pktinfo, fftOutBuffer, totalpower, firstPathDelay, sigSpaceDim, omega_hat_sort, tau_hat, aMatrix, fft_input_size);
875
876 // determine first path
877 // in: tau_hat[], aMatrix[]
878 // out: fineTimingRes
879 *fineTimingRes = determineFirstPath(pktinfo, tau_hat, aMatrix, sigSpaceDim, music_thresh_rel);
880
881 return 0;
882 }
883 #endif /* CONFIG_WLS_CSI_PROC */
884