1 /* ----------------------------------------------------------------------
2  * Project:      CMSIS DSP Library
3  * Title:        arm_copy_f16.c
4  * Description:  Copies the elements of a floating-point vector
5  *
6  * $Date:        23 April 2021
7  * $Revision:    V1.9.0
8  *
9  * Target Processor: Cortex-M and Cortex-A cores
10  * -------------------------------------------------------------------- */
11 /*
12  * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
13  *
14  * SPDX-License-Identifier: Apache-2.0
15  *
16  * Licensed under the Apache License, Version 2.0 (the License); you may
17  * not use this file except in compliance with the License.
18  * You may obtain a copy of the License at
19  *
20  * www.apache.org/licenses/LICENSE-2.0
21  *
22  * Unless required by applicable law or agreed to in writing, software
23  * distributed under the License is distributed on an AS IS BASIS, WITHOUT
24  * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
25  * See the License for the specific language governing permissions and
26  * limitations under the License.
27  */
28 
29 #include "dsp/support_functions_f16.h"
30 
31 #if defined(ARM_FLOAT16_SUPPORTED)
32 
33 
34 /**
35   @ingroup groupSupport
36  */
37 
38 
39 /**
40   @addtogroup copy
41   @{
42  */
43 
44 /**
45   @brief         Copies the elements of a f16 vector.
46   @param[in]     pSrc       points to input vector
47   @param[out]    pDst       points to output vector
48   @param[in]     blockSize  number of samples in each vector
49  */
50 #if defined(ARM_MATH_MVE_FLOAT16) && !defined(ARM_MATH_AUTOVECTORIZE)
51 
arm_copy_f16(const float16_t * pSrc,float16_t * pDst,uint32_t blockSize)52 void arm_copy_f16(
53   const float16_t * pSrc,
54   float16_t * pDst,
55   uint32_t blockSize)
56 {
57     do {
58         mve_pred16_t    p = vctp16q(blockSize);
59 
60         vstrhq_p_f16(pDst,
61         vldrhq_z_f16((float16_t const *) pSrc, p), p);
62         /*
63          * Decrement the blockSize loop counter
64          * Advance vector source and destination pointers
65          */
66         pSrc += 8;
67         pDst += 8;
68         blockSize -= 8;
69     }
70     while ((int32_t) blockSize > 0);
71 }
72 
73 #else
74 
arm_copy_f16(const float16_t * pSrc,float16_t * pDst,uint32_t blockSize)75 void arm_copy_f16(
76   const float16_t * pSrc,
77         float16_t * pDst,
78         uint32_t blockSize)
79 {
80   uint32_t blkCnt;                               /* Loop counter */
81 
82 #if defined (ARM_MATH_LOOPUNROLL)
83 
84   /* Loop unrolling: Compute 4 outputs at a time */
85   blkCnt = blockSize >> 2U;
86 
87   while (blkCnt > 0U)
88   {
89     /* C = A */
90 
91     /* Copy and store result in destination buffer */
92     *pDst++ = *pSrc++;
93     *pDst++ = *pSrc++;
94     *pDst++ = *pSrc++;
95     *pDst++ = *pSrc++;
96 
97     /* Decrement loop counter */
98     blkCnt--;
99   }
100 
101   /* Loop unrolling: Compute remaining outputs */
102   blkCnt = blockSize % 0x4U;
103 
104 #else
105 
106   /* Initialize blkCnt with number of samples */
107   blkCnt = blockSize;
108 
109 #endif /* #if defined (ARM_MATH_LOOPUNROLL) */
110 
111   while (blkCnt > 0U)
112   {
113     /* C = A */
114 
115     /* Copy and store result in destination buffer */
116     *pDst++ = *pSrc++;
117 
118     /* Decrement loop counter */
119     blkCnt--;
120   }
121 }
122 #endif /* defined(ARM_MATH_MVEF) && !defined(ARM_MATH_AUTOVECTORIZE) */
123 
124 /**
125   @} end of BasicCopy group
126  */
127 
128 #endif /* #if defined(ARM_FLOAT16_SUPPORTED) */
129 
130