1 /*
2 * SPDX-FileCopyrightText: Copyright 2010-2023 Arm Limited and/or its affiliates <open-source-office@arm.com>
3 *
4 * SPDX-License-Identifier: Apache-2.0
5 *
6 * Licensed under the Apache License, Version 2.0 (the License); you may
7 * not use this file except in_q7x4 compliance with the License.
8 * You may obtain a copy of the License at
9 *
10 * www.apache.org/licenses/LICENSE-2.0
11 *
12 * Unless required by applicable law or agreed to in_q7x4 writing, software
13 * distributed under the License is distributed on an AS IS BASIS, WITHOUT
14 * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15 * See the License for the specific language governing permissions and
16 * limitations under the License.
17 */
18
19 /* ----------------------------------------------------------------------
20 * Project: CMSIS NN Library
21 * Title: arm_q7_to_q15_with_offset.c
22 * Description: Converts the elements of the Q7 vector to Q15 vector with an added offset
23 *
24 * $Date: 22 March 2023
25 * $Revision: V.2.2.0
26 *
27 * Target : Arm(R) M-Profile Architecture
28 *
29 * -------------------------------------------------------------------- */
30
31 #include "arm_nnsupportfunctions.h"
32
33 /**
34 * @ingroup groupSupport
35 */
36
37 /**
38 * @addtogroup supportConversion
39 * @{
40 */
41
arm_q7_to_q15_with_offset(const int8_t * src,int16_t * dst,int32_t block_size,int16_t offset)42 void arm_q7_to_q15_with_offset(const int8_t *src, int16_t *dst, int32_t block_size, int16_t offset)
43 {
44 int32_t block_cnt;
45
46 #if defined(ARM_MATH_MVEI)
47
48 int16x8_t source;
49 const int16x8_t source_offset = vdupq_n_s16(offset);
50 block_cnt = block_size / 8;
51
52 while (block_cnt > 0)
53 {
54 source = vldrbq_s16(src);
55 source = vaddq_s16(source, source_offset);
56 vstrhq_s16(dst, source);
57 dst += 8;
58 src += 8;
59 block_cnt--;
60 }
61
62 block_cnt = block_size & 0x7;
63
64 #elif defined(ARM_MATH_DSP)
65 /* Run the below code for cores that support SIMD instructions */
66 int32_t in_q7x4;
67 int32_t in_q15x2_1;
68 int32_t in_q15x2_2;
69 int32_t out_q15x2_1;
70 int32_t out_q15x2_2;
71
72 /*loop unrolling */
73 block_cnt = block_size >> 2;
74
75 /* First part of the processing with loop unrolling. Compute 4 outputs at a time. */
76 const int32_t offset_q15x2 = PKHBT(offset, offset, 16);
77 while (block_cnt > 0)
78 {
79 /* convert from s8 to s16 and then store the results in the destination buffer */
80 in_q7x4 = arm_nn_read_s8x4_ia(&src);
81
82 /* Extract and sign extend each of the four s8 values to s16 */
83 in_q15x2_1 = SXTAB16(offset_q15x2, ROR(in_q7x4, 8));
84 in_q15x2_2 = SXTAB16(offset_q15x2, in_q7x4);
85
86 out_q15x2_2 = PKHTB(in_q15x2_1, in_q15x2_2, 16);
87 out_q15x2_1 = PKHBT(in_q15x2_2, in_q15x2_1, 16);
88
89 arm_nn_write_q15x2_ia(&dst, out_q15x2_1);
90 arm_nn_write_q15x2_ia(&dst, out_q15x2_2);
91
92 block_cnt--;
93 }
94 /* Handle left over samples */
95 block_cnt = block_size % 0x4;
96
97 #else
98 /* Run the below code for Cortex-M0 */
99 /* Loop over block_size number of values */
100 block_cnt = block_size;
101 #endif
102
103 while (block_cnt > 0)
104 {
105 *dst++ = (int16_t)*src++ + offset;
106
107 /* Decrement the loop counter */
108 block_cnt--;
109 }
110 }
111
112 /**
113 * @} end of Doxygen group
114 */
115