1 /* ----------------------------------------------------------------------
2 * Project: CMSIS DSP Library
3 * Title: arm_or_u16.c
4 * Description: uint16_t bitwise inclusive OR
5 *
6 * $Date: 23 April 2021
7 * $Revision: V1.9.0
8 *
9 * Target Processor: Cortex-M and Cortex-A cores
10 * -------------------------------------------------------------------- */
11 /*
12 * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
13 *
14 * SPDX-License-Identifier: Apache-2.0
15 *
16 * Licensed under the Apache License, Version 2.0 (the License); you may
17 * not use this file except in compliance with the License.
18 * You may obtain a copy of the License at
19 *
20 * www.apache.org/licenses/LICENSE-2.0
21 *
22 * Unless required by applicable law or agreed to in writing, software
23 * distributed under the License is distributed on an AS IS BASIS, WITHOUT
24 * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
25 * See the License for the specific language governing permissions and
26 * limitations under the License.
27 */
28
29 #include "dsp/basic_math_functions.h"
30
31 /**
32 @ingroup groupMath
33 */
34
35 /**
36 @defgroup Or Vector bitwise inclusive OR
37
38 Compute the logical bitwise OR.
39
40 There are separate functions for uint32_t, uint16_t, and uint8_t data types.
41 */
42
43 /**
44 @addtogroup Or
45 @{
46 */
47
48 /**
49 @brief Compute the logical bitwise OR of two fixed-point vectors.
50 @param[in] pSrcA points to input vector A
51 @param[in] pSrcB points to input vector B
52 @param[out] pDst points to output vector
53 @param[in] blockSize number of samples in each vector
54 */
55
arm_or_u16(const uint16_t * pSrcA,const uint16_t * pSrcB,uint16_t * pDst,uint32_t blockSize)56 void arm_or_u16(
57 const uint16_t * pSrcA,
58 const uint16_t * pSrcB,
59 uint16_t * pDst,
60 uint32_t blockSize)
61 {
62 uint32_t blkCnt; /* Loop counter */
63
64 #if defined(ARM_MATH_MVEI) && !defined(ARM_MATH_AUTOVECTORIZE)
65 uint16x8_t vecSrcA, vecSrcB;
66
67 /* Compute 8 outputs at a time */
68 blkCnt = blockSize >> 3;
69
70 while (blkCnt > 0U)
71 {
72 vecSrcA = vld1q(pSrcA);
73 vecSrcB = vld1q(pSrcB);
74
75 vst1q(pDst, vorrq_u16(vecSrcA, vecSrcB) );
76
77 pSrcA += 8;
78 pSrcB += 8;
79 pDst += 8;
80
81 /* Decrement the loop counter */
82 blkCnt--;
83 }
84
85 /* Tail */
86 blkCnt = blockSize & 7;
87
88 if (blkCnt > 0U)
89 {
90 mve_pred16_t p0 = vctp16q(blkCnt);
91 vecSrcA = vld1q(pSrcA);
92 vecSrcB = vld1q(pSrcB);
93 vstrhq_p(pDst, vorrq_u16(vecSrcA, vecSrcB), p0);
94 }
95 #else
96 #if defined(ARM_MATH_NEON) && !defined(ARM_MATH_AUTOVECTORIZE)
97 uint16x8_t vecA, vecB;
98
99 /* Compute 8 outputs at a time */
100 blkCnt = blockSize >> 3U;
101
102 while (blkCnt > 0U)
103 {
104 vecA = vld1q_u16(pSrcA);
105 vecB = vld1q_u16(pSrcB);
106
107 vst1q_u16(pDst, vorrq_u16(vecA, vecB) );
108
109 pSrcA += 8;
110 pSrcB += 8;
111 pDst += 8;
112
113 /* Decrement the loop counter */
114 blkCnt--;
115 }
116
117 /* Tail */
118 blkCnt = blockSize & 7;
119 #else
120 /* Initialize blkCnt with number of samples */
121 blkCnt = blockSize;
122 #endif
123
124 while (blkCnt > 0U)
125 {
126 *pDst++ = (*pSrcA++)|(*pSrcB++);
127
128 /* Decrement the loop counter */
129 blkCnt--;
130 }
131 #endif /* if defined(ARM_MATH_MVEI) */
132 }
133
134 /**
135 @} end of Or group
136 */
137