1 /* ----------------------------------------------------------------------
2  * Project:      CMSIS DSP Library
3  * Title:        arm_quaternion_product_f32.c
4  * Description:  Floating-point quaternion product
5  *
6  * $Date:        23 April 2021
7  * $Revision:    V1.9.0
8  *
9  * Target Processor: Cortex-M and Cortex-A cores
10  * -------------------------------------------------------------------- */
11 /*
12  * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
13  *
14  * SPDX-License-Identifier: Apache-2.0
15  *
16  * Licensed under the Apache License, Version 2.0 (the License); you may
17  * not use this file except in compliance with the License.
18  * You may obtain a copy of the License at
19  *
20  * www.apache.org/licenses/LICENSE-2.0
21  *
22  * Unless required by applicable law or agreed to in writing, software
23  * distributed under the License is distributed on an AS IS BASIS, WITHOUT
24  * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
25  * See the License for the specific language governing permissions and
26  * limitations under the License.
27  */
28 
29 #include "dsp/quaternion_math_functions.h"
30 #include <math.h>
31 
32 /**
33   @ingroup groupQuaternionMath
34  */
35 
36 /**
37   @defgroup QuatProd Quaternion Product
38 
39   Compute the product of quaternions.
40  */
41 
42 /**
43   @ingroup QuatProd
44  */
45 
46 /**
47   @defgroup QuatProdVect Elementwise Quaternion Product
48 
49   Compute the elementwise product of quaternions.
50  */
51 
52 /**
53   @addtogroup QuatProdVect
54   @{
55  */
56 
57 /**
58   @brief         Floating-point elementwise product two quaternions.
59   @param[in]     qa                  first array of quaternions
60   @param[in]     qb                  second array of quaternions
61   @param[out]    qr                   elementwise product of quaternions
62   @param[in]     nbQuaternions       number of quaternions in the array
63   @return        none
64  */
65 
66 #if defined(ARM_MATH_MVEF) && !defined(ARM_MATH_AUTOVECTORIZE)
67 
68 #include "arm_helium_utils.h"
69 
arm_quaternion_product_f32(const float32_t * qa,const float32_t * qb,float32_t * qr,uint32_t nbQuaternions)70 void arm_quaternion_product_f32(const float32_t *qa,
71     const float32_t *qb,
72     float32_t *qr,
73     uint32_t nbQuaternions)
74 {
75     static uint32_t patternA[4] = { 0, 1, 0, 1 };
76     static uint32_t patternB[4] = { 3, 2, 3, 2 };
77     static uint32_t patternC[4] = { 3, 2, 1, 0 };
78     static float32_t   signA[4] = { -1, -1, 1, 1 };
79 
80     uint32x4_t vecA = vld1q_u32(patternA);
81     uint32x4_t vecB = vld1q_u32(patternB);
82     uint32x4_t vecC = vld1q_u32(patternC);
83     f32x4_t vecSignA = vld1q_f32(signA);
84 
85     while (nbQuaternions > 0U)
86     {
87         f32x4_t vecTmpA, vecTmpB, vecAcc;
88 
89         vecTmpA = vldrwq_gather_shifted_offset_f32(qa, vecA);
90         vecTmpB = vld1q(qb);
91         /*
92          * vcmul(r, [a1, a2, a1, a2], [b1, b2, b3, b4], 0)
93          */
94         vecAcc = vcmulq(vecTmpA, vecTmpB);
95         /*
96          * vcmla(r, [a1, a2, a1, a2], [b1, b2, b3, b4], 90)
97          */
98         vecAcc = vcmlaq_rot90(vecAcc, vecTmpA, vecTmpB);
99 
100         vecTmpA = vldrwq_gather_shifted_offset_f32(qa, vecB);
101         vecTmpB = vldrwq_gather_shifted_offset_f32(qb, vecC);
102         /*
103          * build [-b4, -b3, b2, b1]
104          */
105         vecTmpB = vecTmpB * vecSignA;
106         /*
107          * vcmla(r, [a4, a3, a4, a3], [-b4, -b3, b2, b1], 270)
108          */
109         vecAcc = vcmlaq_rot270(vecAcc, vecTmpA, vecTmpB);
110         /*
111          * vcmla(r, [a4, a3, a4, a3], [-b4, -b3, b2, b1], 0)
112          */
113         vecAcc = vcmlaq(vecAcc, vecTmpA, vecTmpB);
114         /*
115          * store accumulator
116          */
117         vst1q_f32(qr, vecAcc);
118 
119         /* move to next quaternion */
120         qa += 4;
121         qb += 4;
122         qr += 4;
123 
124         nbQuaternions--;
125     }
126 }
127 
128 #else
129 
arm_quaternion_product_f32(const float32_t * qa,const float32_t * qb,float32_t * qr,uint32_t nbQuaternions)130 void arm_quaternion_product_f32(const float32_t *qa,
131     const float32_t *qb,
132     float32_t *qr,
133     uint32_t nbQuaternions)
134 {
135    for(uint32_t i=0; i < nbQuaternions; i++)
136    {
137      arm_quaternion_product_single_f32(qa, qb, qr);
138 
139      qa += 4;
140      qb += 4;
141      qr += 4;
142    }
143 }
144 #endif /* defined(ARM_MATH_MVEF) && !defined(ARM_MATH_AUTOVECTORIZE) */
145 
146 /**
147   @} end of QuatProdVect group
148  */
149