1 /*
2  * SPDX-FileCopyrightText: Copyright 2010-2023 Arm Limited and/or its affiliates <open-source-office@arm.com>
3  *
4  * SPDX-License-Identifier: Apache-2.0
5  *
6  * Licensed under the Apache License, Version 2.0 (the License); you may
7  * not use this file except in_q7x4 compliance with the License.
8  * You may obtain a copy of the License at
9  *
10  * www.apache.org/licenses/LICENSE-2.0
11  *
12  * Unless required by applicable law or agreed to in_q7x4 writing, software
13  * distributed under the License is distributed on an AS IS BASIS, WITHOUT
14  * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15  * See the License for the specific language governing permissions and
16  * limitations under the License.
17  */
18 
19 /* ----------------------------------------------------------------------
20  * Project:      CMSIS NN Library
21  * Title:        arm_q7_to_q15_with_offset.c
22  * Description:  Converts the elements of the Q7 vector to Q15 vector with an added offset
23  *
24  * $Date:        22 March 2023
25  * $Revision:    V.2.2.0
26  *
27  * Target :  Arm(R) M-Profile Architecture
28  *
29  * -------------------------------------------------------------------- */
30 
31 #include "arm_nnsupportfunctions.h"
32 
33 /**
34  * @ingroup groupSupport
35  */
36 
37 /**
38  * @addtogroup supportConversion
39  * @{
40  */
41 
arm_q7_to_q15_with_offset(const int8_t * src,int16_t * dst,int32_t block_size,int16_t offset)42 void arm_q7_to_q15_with_offset(const int8_t *src, int16_t *dst, int32_t block_size, int16_t offset)
43 {
44     int32_t block_cnt;
45 
46 #if defined(ARM_MATH_MVEI)
47 
48     int16x8_t source;
49     const int16x8_t source_offset = vdupq_n_s16(offset);
50     block_cnt = block_size / 8;
51 
52     while (block_cnt > 0)
53     {
54         source = vldrbq_s16(src);
55         source = vaddq_s16(source, source_offset);
56         vstrhq_s16(dst, source);
57         dst += 8;
58         src += 8;
59         block_cnt--;
60     }
61 
62     block_cnt = block_size & 0x7;
63 
64 #elif defined(ARM_MATH_DSP)
65     /* Run the below code for cores that support SIMD instructions  */
66     int32_t in_q7x4;
67     int32_t in_q15x2_1;
68     int32_t in_q15x2_2;
69     int32_t out_q15x2_1;
70     int32_t out_q15x2_2;
71 
72     /*loop unrolling */
73     block_cnt = block_size >> 2;
74 
75     /* First part of the processing with loop unrolling.  Compute 4 outputs at a time. */
76     const int32_t offset_q15x2 = PKHBT(offset, offset, 16);
77     while (block_cnt > 0)
78     {
79         /* convert from s8 to s16 and then store the results in the destination buffer */
80         in_q7x4 = arm_nn_read_s8x4_ia(&src);
81 
82         /* Extract and sign extend each of the four s8 values to s16 */
83         in_q15x2_1 = SXTAB16(offset_q15x2, ROR(in_q7x4, 8));
84         in_q15x2_2 = SXTAB16(offset_q15x2, in_q7x4);
85 
86         out_q15x2_2 = PKHTB(in_q15x2_1, in_q15x2_2, 16);
87         out_q15x2_1 = PKHBT(in_q15x2_2, in_q15x2_1, 16);
88 
89         arm_nn_write_q15x2_ia(&dst, out_q15x2_1);
90         arm_nn_write_q15x2_ia(&dst, out_q15x2_2);
91 
92         block_cnt--;
93     }
94     /* Handle left over samples */
95     block_cnt = block_size % 0x4;
96 
97 #else
98     /* Run the below code for Cortex-M0 */
99     /* Loop over block_size number of values */
100     block_cnt = block_size;
101 #endif
102 
103     while (block_cnt > 0)
104     {
105         *dst++ = (int16_t)*src++ + offset;
106 
107         /* Decrement the loop counter */
108         block_cnt--;
109     }
110 }
111 
112 /**
113  * @} end of Doxygen group
114  */
115