1 /* ----------------------------------------------------------------------
2  * Project:      CMSIS DSP Library
3  * Title:        arm_copy_f16.c
4  * Description:  Copies the elements of a floating-point vector
5  *
6  * $Date:        23 April 2021
7  * $Revision:    V1.9.0
8  *
9  * Target Processor: Cortex-M and Cortex-A cores
10  * -------------------------------------------------------------------- */
11 /*
12  * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
13  *
14  * SPDX-License-Identifier: Apache-2.0
15  *
16  * Licensed under the Apache License, Version 2.0 (the License); you may
17  * not use this file except in compliance with the License.
18  * You may obtain a copy of the License at
19  *
20  * www.apache.org/licenses/LICENSE-2.0
21  *
22  * Unless required by applicable law or agreed to in writing, software
23  * distributed under the License is distributed on an AS IS BASIS, WITHOUT
24  * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
25  * See the License for the specific language governing permissions and
26  * limitations under the License.
27  */
28 
29 #include "dsp/support_functions_f16.h"
30 
31 #if defined(ARM_FLOAT16_SUPPORTED)
32 
33 
34 /**
35   @ingroup groupSupport
36  */
37 
38 
39 /**
40   @addtogroup copy
41   @{
42  */
43 
44 /**
45   @brief         Copies the elements of a f16 vector.
46   @param[in]     pSrc       points to input vector
47   @param[out]    pDst       points to output vector
48   @param[in]     blockSize  number of samples in each vector
49   @return        none
50  */
51 #if defined(ARM_MATH_MVE_FLOAT16) && !defined(ARM_MATH_AUTOVECTORIZE)
52 
arm_copy_f16(const float16_t * pSrc,float16_t * pDst,uint32_t blockSize)53 void arm_copy_f16(
54   const float16_t * pSrc,
55   float16_t * pDst,
56   uint32_t blockSize)
57 {
58     do {
59         mve_pred16_t    p = vctp16q(blockSize);
60 
61         vstrhq_p_f16(pDst,
62         vldrhq_z_f16((float16_t const *) pSrc, p), p);
63         /*
64          * Decrement the blockSize loop counter
65          * Advance vector source and destination pointers
66          */
67         pSrc += 8;
68         pDst += 8;
69         blockSize -= 8;
70     }
71     while ((int32_t) blockSize > 0);
72 }
73 
74 #else
75 
arm_copy_f16(const float16_t * pSrc,float16_t * pDst,uint32_t blockSize)76 void arm_copy_f16(
77   const float16_t * pSrc,
78         float16_t * pDst,
79         uint32_t blockSize)
80 {
81   uint32_t blkCnt;                               /* Loop counter */
82 
83 #if defined (ARM_MATH_LOOPUNROLL)
84 
85   /* Loop unrolling: Compute 4 outputs at a time */
86   blkCnt = blockSize >> 2U;
87 
88   while (blkCnt > 0U)
89   {
90     /* C = A */
91 
92     /* Copy and store result in destination buffer */
93     *pDst++ = *pSrc++;
94     *pDst++ = *pSrc++;
95     *pDst++ = *pSrc++;
96     *pDst++ = *pSrc++;
97 
98     /* Decrement loop counter */
99     blkCnt--;
100   }
101 
102   /* Loop unrolling: Compute remaining outputs */
103   blkCnt = blockSize % 0x4U;
104 
105 #else
106 
107   /* Initialize blkCnt with number of samples */
108   blkCnt = blockSize;
109 
110 #endif /* #if defined (ARM_MATH_LOOPUNROLL) */
111 
112   while (blkCnt > 0U)
113   {
114     /* C = A */
115 
116     /* Copy and store result in destination buffer */
117     *pDst++ = *pSrc++;
118 
119     /* Decrement loop counter */
120     blkCnt--;
121   }
122 }
123 #endif /* defined(ARM_MATH_MVEF) && !defined(ARM_MATH_AUTOVECTORIZE) */
124 
125 /**
126   @} end of BasicCopy group
127  */
128 
129 #endif /* #if defined(ARM_FLOAT16_SUPPORTED) */
130 
131