1 /*
2  * Copyright (C) 2010-2020 Arm Limited or its affiliates. All rights reserved.
3  *
4  * SPDX-License-Identifier: Apache-2.0
5  *
6  * Licensed under the Apache License, Version 2.0 (the License); you may
7  * not use this file except in compliance with the License.
8  * You may obtain a copy of the License at
9  *
10  * www.apache.org/licenses/LICENSE-2.0
11  *
12  * Unless required by applicable law or agreed to in writing, software
13  * distributed under the License is distributed on an AS IS BASIS, WITHOUT
14  * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15  * See the License for the specific language governing permissions and
16  * limitations under the License.
17  */
18 
19 /* ----------------------------------------------------------------------
20  * Project:      CMSIS NN Library
21  * Title:        arm_q7_to_q15_reordered_with_offset.c
22  * Description:  Converts the elements of the Q7 vector to a reordered Q15 vector with an added offset. The re-ordering
23  *               is a signature of sign extension intrinsic(DSP extension).
24  *
25  * $Date:        May 29, 2020
26  * $Revision:    V.2.0.3
27  *
28  * Target Processor:  Cortex-M cores
29  *
30  * -------------------------------------------------------------------- */
31 
32 #include "arm_nnsupportfunctions.h"
33 
34 /**
35  * @ingroup groupSupport
36  */
37 
38 /**
39  * @addtogroup nndata_convert
40  * @{
41  */
42 
43 /**
44  * @brief Converts the elements of the Q7 vector to a reordered Q15 vector with an added offset.
45  *
46  * @note  Refer header file for details.
47  *
48  */
49 
arm_q7_to_q15_reordered_with_offset(const q7_t * src,q15_t * dst,uint32_t block_size,q15_t offset)50 void arm_q7_to_q15_reordered_with_offset(const q7_t *src, q15_t *dst, uint32_t block_size, q15_t offset)
51 {
52 
53 #if defined(ARM_MATH_DSP)
54     uint32_t block_cnt;
55     /* Run the below code for cores that support SIMD instructions  */
56     q31_t in_q7x4;
57     q31_t out_q15x2_1;
58     q31_t out_q15x2_2;
59 
60     /*loop unrolling */
61     block_cnt = block_size >> 2u;
62 
63     /* First part of the processing with loop unrolling. Compute 4 outputs at a time. */
64     const q31_t offset_q15x2 = (q31_t)__PKHBT(offset, offset, 16);
65     while (block_cnt > 0u)
66     {
67         /* convert from q7 to q15 and then store the results in the destination buffer */
68         in_q7x4 = arm_nn_read_q7x4_ia(&src);
69 
70         /* Extract and sign extend each of the four q7 values to q15 */
71         out_q15x2_1 = __SXTAB16(offset_q15x2, __ROR((uint32_t)in_q7x4, 8));
72         out_q15x2_2 = __SXTAB16(offset_q15x2, in_q7x4);
73 
74         arm_nn_write_q15x2_ia(&dst, out_q15x2_2);
75         arm_nn_write_q15x2_ia(&dst, out_q15x2_1);
76 
77         block_cnt--;
78     }
79     /* Handle left over samples */
80     block_cnt = block_size % 0x4u;
81 
82     while (block_cnt > 0u)
83     {
84         *dst++ = (q15_t)*src++ + offset;
85 
86         /* Decrement the loop counter */
87         block_cnt--;
88     }
89 #else
90     (void)src;
91     (void)dst;
92     (void)block_size;
93     (void)offset;
94     /* Not available */
95 #endif
96 }
97 
98 /**
99  * @} end of nndata_convert group
100  */
101