1 /*
2  * Copyright (c) 2021-2024, The TrustedFirmware-M Contributors. All rights reserved.
3  *
4  * SPDX-License-Identifier: BSD-3-Clause
5  *
6  */
7 
8 #include "cc3xx_dma.h"
9 
10 #include "cc3xx_dev.h"
11 #include "cc3xx_engine_state.h"
12 #include "cc3xx_config.h"
13 
14 #include <assert.h>
15 
16 #include "fatal_error.h"
17 #ifdef CC3XX_CONFIG_DMA_CACHE_FLUSH_ENABLE
18 #include "cmsis.h"
19 #endif
20 
21 struct cc3xx_dma_state_t dma_state;
22 
23 #ifdef CC3XX_CONFIG_DMA_CACHE_FLUSH_ENABLE
round_down(uint32_t num,uint32_t boundary)24 static inline uint32_t round_down(uint32_t num, uint32_t boundary)
25 {
26     return num - (num % boundary);
27 }
28 
round_up(uint32_t num,uint32_t boundary)29 static inline uint32_t round_up(uint32_t num, uint32_t boundary)
30 {
31     return (num + boundary - 1) - ((num + boundary - 1) % boundary);
32 }
33 #endif /* CC3XX_CONFIG_DMA_CACHE_FLUSH_ENABLE */
34 
35 #ifdef CC3XX_CONFIG_DMA_REMAP_ENABLE
36 static cc3xx_dma_remap_region_t remap_regions[CC3XX_CONFIG_DMA_REMAP_REGION_AM] = {0};
37 
cc3xx_lowlevel_dma_remap_region_init(uint32_t remap_region_idx,cc3xx_dma_remap_region_t * region)38 void cc3xx_lowlevel_dma_remap_region_init(uint32_t remap_region_idx,
39                                         cc3xx_dma_remap_region_t *region)
40 {
41     memcpy(&remap_regions[remap_region_idx], region, sizeof(*region));
42 }
43 
cc3xx_lowlevel_dma_remap_region_clear(uint32_t remap_region_idx)44 void cc3xx_lowlevel_dma_remap_region_clear(uint32_t remap_region_idx)
45 {
46     memset(&remap_regions[remap_region_idx], 0, sizeof(cc3xx_dma_remap_region_t));
47 }
48 
cc3xx_lowlevel_dma_tcm_cpusel(uint32_t cpuid)49 void cc3xx_lowlevel_dma_tcm_cpusel(uint32_t cpuid)
50 {
51     dma_state.remap_cpusel = cpuid;
52 }
53 
remap_addr(uintptr_t addr)54 static uintptr_t remap_addr(uintptr_t addr)
55 {
56     uint32_t idx;
57     cc3xx_dma_remap_region_t *region;
58 
59     for (idx = 0; idx < CC3XX_CONFIG_DMA_REMAP_REGION_AM; idx++) {
60         region = &remap_regions[idx];
61         if (addr >= region->region_base
62             && addr < region->region_base + region->region_size) {
63             return (addr - region->region_base) + region->remap_base
64                     + (region->remap_cpusel_offset * dma_state.remap_cpusel);
65         }
66     }
67 
68     return addr;
69 }
70 
71 #else
72 
remap_addr(uintptr_t addr)73 static uintptr_t remap_addr(uintptr_t addr) {
74     return addr;
75 }
76 
77 #endif /* CC3XX_CONFIG_DMA_REMAP_ENABLE */
78 
process_data(const void * buf,size_t length)79 static void process_data(const void* buf, size_t length)
80 {
81     uintptr_t remapped_buf;
82 
83     /* Enable the DMA clock */
84     P_CC3XX->misc.dma_clk_enable = 0x1U;
85 
86     /* Mask a sensible set of the host interrupts */
87     P_CC3XX->host_rgf.host_rgf_imr = 0x7F0U;
88 
89     /* Reset the AXI_ERROR and SYM_DMA_COMPLETED interrupts */
90     P_CC3XX->host_rgf.host_rgf_icr |= 0xFF0U;
91 
92     /* remap the address, particularly for TCMs */
93     remapped_buf = remap_addr((uintptr_t)buf);
94 
95     if (dma_state.block_buf_needs_output) {
96         /* Set the data target */
97         P_CC3XX->dout.dst_lli_word0 = dma_state.output_addr;
98         /* And the length */
99         P_CC3XX->dout.dst_lli_word1 = length;
100 
101         #ifdef CC3XX_CONFIG_DMA_CACHE_FLUSH_ENABLE
102         /* This function only accepts 32-byte aligned addresses, so do some
103          * rounding so we make sure to invalidate the whole output buffer */
104         SCB_CleanInvalidateDCache_by_Addr((void *)round_down(dma_state.output_addr, 32),
105                                           round_up(dma_state.output_addr + length, 32)
106                                           - round_down(dma_state.output_addr, 32));
107         #endif /* CC3XX_CONFIG_DMA_CACHE_FLUSH_ENABLE */
108 
109         dma_state.output_addr += length;
110         dma_state.current_bytes_output += length;
111     }
112 
113 #ifdef CC3XX_CONFIG_DMA_CACHE_FLUSH_ENABLE
114     /* This function only accepts 32-byte aligned addresses, so do some
115      * rounding so we make sure to invalidate the whole input buffer */
116     SCB_CleanInvalidateDCache_by_Addr((void *)round_down(remapped_buf, 32),
117                                       round_up(remapped_buf + length, 32)
118                                       - round_down(remapped_buf, 32));
119 #endif /* CC3XX_CONFIG_DMA_CACHE_FLUSH_ENABLE */
120 
121     /* Set the data source */
122     P_CC3XX->din.src_lli_word0 = remapped_buf;
123     /* Writing the length triggers the DMA */
124     P_CC3XX->din.src_lli_word1 = length;
125 
126     /* Wait for the DMA to complete (The SYM_DMA_COMPLETED interrupt to be
127      * asserted)
128      */
129     while (!(P_CC3XX->host_rgf.host_rgf_irr & 0x800U)) {
130 #ifdef CC3XX_CONFIG_DMA_WFI_WAIT_ENABLE
131         __asm("WFI");
132 #endif /* CC3XX_CONFIG_WFI_WAIT_ENABLE */
133     }
134 
135     /* Reset the SYM_DMA_COMPLETED interrupt */
136     P_CC3XX->host_rgf.host_rgf_icr = 0x800U;
137 
138     /* Disable the DMA clock */
139     P_CC3XX->misc.dma_clk_enable = 0x0U;
140 }
141 
cc3xx_lowlevel_dma_copy_data(void * dest,const void * src,size_t length)142 void cc3xx_lowlevel_dma_copy_data(void* dest, const void* src, size_t length)
143 {
144     /* Set to PASSTHROUGH engine */
145     cc3xx_lowlevel_set_engine(CC3XX_ENGINE_NONE);
146 
147     /* Set output target */
148     cc3xx_lowlevel_dma_set_output(dest, length);
149 
150     /* This starts the copy */
151     cc3xx_lowlevel_dma_buffered_input_data(src, length, true);
152     cc3xx_lowlevel_dma_flush_buffer(false);
153 }
154 
cc3xx_lowlevel_dma_buffered_input_data(const void * buf,size_t length,bool write_output)155 cc3xx_err_t cc3xx_lowlevel_dma_buffered_input_data(const void* buf, size_t length,
156                                                    bool write_output)
157 {
158     size_t block_buf_size_free =
159         dma_state.block_buf_size - dma_state.block_buf_size_in_use;
160     size_t data_to_process_length = 0;
161     size_t dma_input_length = 0;
162 
163     if (write_output) {
164         if (length > dma_state.output_size) {
165             FATAL_ERR(CC3XX_ERR_DMA_OUTPUT_BUFFER_TOO_SMALL);
166             return CC3XX_ERR_DMA_OUTPUT_BUFFER_TOO_SMALL;
167         }
168         dma_state.output_size -= length;
169     }
170 
171     /* The DMA block buf will hold a block (to allow GCM and Hashing which both
172      * require a last-block special case to work). First, fill this block.
173      */
174     if (dma_state.block_buf_size_in_use != 0) {
175         /* If we need to output the block buffer, and then new data shouldn't be
176          * output, then the block buffer needs to be flushed
177          */
178         if (dma_state.block_buf_needs_output != write_output) {
179             cc3xx_lowlevel_dma_flush_buffer(false);
180         } else {
181             data_to_process_length =
182                 length < block_buf_size_free ? length : block_buf_size_free;
183             memcpy(dma_state.block_buf + dma_state.block_buf_size_in_use, buf,
184                    block_buf_size_free);
185             dma_state.block_buf_size_in_use += data_to_process_length;
186             buf += data_to_process_length;
187             length -= data_to_process_length;
188         }
189     }
190 
191     if (length == 0) {
192         return CC3XX_ERR_SUCCESS;
193     }
194 
195     dma_state.block_buf_needs_output = write_output;
196 
197     /* The block buf is now full, and we have remaining data. First dispatch the
198      * block buf. If the buffer is empty, this is a no-op.
199      */
200     cc3xx_lowlevel_dma_flush_buffer(false);
201 
202     /* If we have any whole blocks left, flush them (but make sure at least some
203      * data always remains to insert into the block buf.
204      */
205     data_to_process_length = ((length - 1) / dma_state.block_buf_size) * dma_state.block_buf_size;
206     while (data_to_process_length > 0) {
207         dma_input_length = data_to_process_length < 0x10000 ? data_to_process_length
208                                                             : 0x10000 - dma_state.block_buf_size;
209         process_data(buf, dma_input_length);
210         data_to_process_length -= dma_input_length;
211         length -= dma_input_length;
212         buf += dma_input_length;
213     }
214 
215     /* Write the remaining data into the block buffer. The previous flush means
216      * the buffer is empty, and we have less than a block of input data left, so
217      * this can't overflow.
218      */
219     memcpy(dma_state.block_buf, buf, length);
220     dma_state.block_buf_size_in_use += length;
221 
222     return CC3XX_ERR_SUCCESS;
223 }
224 
cc3xx_lowlevel_dma_flush_buffer(bool zero_pad_first)225 void cc3xx_lowlevel_dma_flush_buffer(bool zero_pad_first)
226 {
227     if (dma_state.block_buf_size_in_use > 0) {
228         if (zero_pad_first) {
229             memset(dma_state.block_buf + dma_state.block_buf_size_in_use, 0,
230                    sizeof(dma_state.block_buf) - dma_state.block_buf_size_in_use);
231             dma_state.block_buf_size_in_use = dma_state.block_buf_size;
232         }
233 
234         process_data(dma_state.block_buf, dma_state.block_buf_size_in_use);
235         dma_state.block_buf_size_in_use = 0;
236     }
237 }
238 
cc3xx_lowlevel_dma_set_buffer_size(size_t size)239 void cc3xx_lowlevel_dma_set_buffer_size(size_t size) {
240     dma_state.block_buf_size = size;
241     assert(size <= CC3XX_DMA_BLOCK_BUF_MAX_SIZE);
242 }
243 
cc3xx_lowlevel_dma_set_output(void * buf,size_t length)244 void cc3xx_lowlevel_dma_set_output(void* buf, size_t length)
245 {
246     /* remap the address, particularly for TCMs */
247     dma_state.output_addr = remap_addr((uintptr_t)buf);
248     dma_state.output_size = length;
249 }
250 
cc3xx_lowlevel_dma_uninit(void)251 void cc3xx_lowlevel_dma_uninit(void)
252 {
253     memset(&dma_state, 0, sizeof(dma_state));
254 }
255