1 /*
2 * Copyright (c) 2021-2024, The TrustedFirmware-M Contributors. All rights reserved.
3 *
4 * SPDX-License-Identifier: BSD-3-Clause
5 *
6 */
7
8 #include "cc3xx_dma.h"
9
10 #include "cc3xx_dev.h"
11 #include "cc3xx_engine_state.h"
12 #include "cc3xx_config.h"
13
14 #include <assert.h>
15
16 #include "fatal_error.h"
17 #ifdef CC3XX_CONFIG_DMA_CACHE_FLUSH_ENABLE
18 #include "cmsis.h"
19 #endif
20
21 struct cc3xx_dma_state_t dma_state;
22
23 #ifdef CC3XX_CONFIG_DMA_CACHE_FLUSH_ENABLE
round_down(uint32_t num,uint32_t boundary)24 static inline uint32_t round_down(uint32_t num, uint32_t boundary)
25 {
26 return num - (num % boundary);
27 }
28
round_up(uint32_t num,uint32_t boundary)29 static inline uint32_t round_up(uint32_t num, uint32_t boundary)
30 {
31 return (num + boundary - 1) - ((num + boundary - 1) % boundary);
32 }
33 #endif /* CC3XX_CONFIG_DMA_CACHE_FLUSH_ENABLE */
34
35 #ifdef CC3XX_CONFIG_DMA_REMAP_ENABLE
36 static cc3xx_dma_remap_region_t remap_regions[CC3XX_CONFIG_DMA_REMAP_REGION_AM] = {0};
37
cc3xx_lowlevel_dma_remap_region_init(uint32_t remap_region_idx,cc3xx_dma_remap_region_t * region)38 void cc3xx_lowlevel_dma_remap_region_init(uint32_t remap_region_idx,
39 cc3xx_dma_remap_region_t *region)
40 {
41 memcpy(&remap_regions[remap_region_idx], region, sizeof(*region));
42 }
43
cc3xx_lowlevel_dma_remap_region_clear(uint32_t remap_region_idx)44 void cc3xx_lowlevel_dma_remap_region_clear(uint32_t remap_region_idx)
45 {
46 memset(&remap_regions[remap_region_idx], 0, sizeof(cc3xx_dma_remap_region_t));
47 }
48
cc3xx_lowlevel_dma_tcm_cpusel(uint32_t cpuid)49 void cc3xx_lowlevel_dma_tcm_cpusel(uint32_t cpuid)
50 {
51 dma_state.remap_cpusel = cpuid;
52 }
53
remap_addr(uintptr_t addr)54 static uintptr_t remap_addr(uintptr_t addr)
55 {
56 uint32_t idx;
57 cc3xx_dma_remap_region_t *region;
58
59 for (idx = 0; idx < CC3XX_CONFIG_DMA_REMAP_REGION_AM; idx++) {
60 region = &remap_regions[idx];
61 if (addr >= region->region_base
62 && addr < region->region_base + region->region_size) {
63 return (addr - region->region_base) + region->remap_base
64 + (region->remap_cpusel_offset * dma_state.remap_cpusel);
65 }
66 }
67
68 return addr;
69 }
70
71 #else
72
remap_addr(uintptr_t addr)73 static uintptr_t remap_addr(uintptr_t addr) {
74 return addr;
75 }
76
77 #endif /* CC3XX_CONFIG_DMA_REMAP_ENABLE */
78
process_data(const void * buf,size_t length)79 static void process_data(const void* buf, size_t length)
80 {
81 uintptr_t remapped_buf;
82
83 /* Enable the DMA clock */
84 P_CC3XX->misc.dma_clk_enable = 0x1U;
85
86 /* Mask a sensible set of the host interrupts */
87 P_CC3XX->host_rgf.host_rgf_imr = 0x7F0U;
88
89 /* Reset the AXI_ERROR and SYM_DMA_COMPLETED interrupts */
90 P_CC3XX->host_rgf.host_rgf_icr |= 0xFF0U;
91
92 /* remap the address, particularly for TCMs */
93 remapped_buf = remap_addr((uintptr_t)buf);
94
95 if (dma_state.block_buf_needs_output) {
96 /* Set the data target */
97 P_CC3XX->dout.dst_lli_word0 = dma_state.output_addr;
98 /* And the length */
99 P_CC3XX->dout.dst_lli_word1 = length;
100
101 #ifdef CC3XX_CONFIG_DMA_CACHE_FLUSH_ENABLE
102 /* This function only accepts 32-byte aligned addresses, so do some
103 * rounding so we make sure to invalidate the whole output buffer */
104 SCB_CleanInvalidateDCache_by_Addr((void *)round_down(dma_state.output_addr, 32),
105 round_up(dma_state.output_addr + length, 32)
106 - round_down(dma_state.output_addr, 32));
107 #endif /* CC3XX_CONFIG_DMA_CACHE_FLUSH_ENABLE */
108
109 dma_state.output_addr += length;
110 dma_state.current_bytes_output += length;
111 }
112
113 #ifdef CC3XX_CONFIG_DMA_CACHE_FLUSH_ENABLE
114 /* This function only accepts 32-byte aligned addresses, so do some
115 * rounding so we make sure to invalidate the whole input buffer */
116 SCB_CleanInvalidateDCache_by_Addr((void *)round_down(remapped_buf, 32),
117 round_up(remapped_buf + length, 32)
118 - round_down(remapped_buf, 32));
119 #endif /* CC3XX_CONFIG_DMA_CACHE_FLUSH_ENABLE */
120
121 /* Set the data source */
122 P_CC3XX->din.src_lli_word0 = remapped_buf;
123 /* Writing the length triggers the DMA */
124 P_CC3XX->din.src_lli_word1 = length;
125
126 /* Wait for the DMA to complete (The SYM_DMA_COMPLETED interrupt to be
127 * asserted)
128 */
129 while (!(P_CC3XX->host_rgf.host_rgf_irr & 0x800U)) {
130 #ifdef CC3XX_CONFIG_DMA_WFI_WAIT_ENABLE
131 __asm("WFI");
132 #endif /* CC3XX_CONFIG_WFI_WAIT_ENABLE */
133 }
134
135 /* Reset the SYM_DMA_COMPLETED interrupt */
136 P_CC3XX->host_rgf.host_rgf_icr = 0x800U;
137
138 /* Disable the DMA clock */
139 P_CC3XX->misc.dma_clk_enable = 0x0U;
140 }
141
cc3xx_lowlevel_dma_copy_data(void * dest,const void * src,size_t length)142 void cc3xx_lowlevel_dma_copy_data(void* dest, const void* src, size_t length)
143 {
144 /* Set to PASSTHROUGH engine */
145 cc3xx_lowlevel_set_engine(CC3XX_ENGINE_NONE);
146
147 /* Set output target */
148 cc3xx_lowlevel_dma_set_output(dest, length);
149
150 /* This starts the copy */
151 cc3xx_lowlevel_dma_buffered_input_data(src, length, true);
152 cc3xx_lowlevel_dma_flush_buffer(false);
153 }
154
cc3xx_lowlevel_dma_buffered_input_data(const void * buf,size_t length,bool write_output)155 cc3xx_err_t cc3xx_lowlevel_dma_buffered_input_data(const void* buf, size_t length,
156 bool write_output)
157 {
158 size_t block_buf_size_free =
159 dma_state.block_buf_size - dma_state.block_buf_size_in_use;
160 size_t data_to_process_length = 0;
161 size_t dma_input_length = 0;
162
163 if (write_output) {
164 if (length > dma_state.output_size) {
165 FATAL_ERR(CC3XX_ERR_DMA_OUTPUT_BUFFER_TOO_SMALL);
166 return CC3XX_ERR_DMA_OUTPUT_BUFFER_TOO_SMALL;
167 }
168 dma_state.output_size -= length;
169 }
170
171 /* The DMA block buf will hold a block (to allow GCM and Hashing which both
172 * require a last-block special case to work). First, fill this block.
173 */
174 if (dma_state.block_buf_size_in_use != 0) {
175 /* If we need to output the block buffer, and then new data shouldn't be
176 * output, then the block buffer needs to be flushed
177 */
178 if (dma_state.block_buf_needs_output != write_output) {
179 cc3xx_lowlevel_dma_flush_buffer(false);
180 } else {
181 data_to_process_length =
182 length < block_buf_size_free ? length : block_buf_size_free;
183 memcpy(dma_state.block_buf + dma_state.block_buf_size_in_use, buf,
184 block_buf_size_free);
185 dma_state.block_buf_size_in_use += data_to_process_length;
186 buf += data_to_process_length;
187 length -= data_to_process_length;
188 }
189 }
190
191 if (length == 0) {
192 return CC3XX_ERR_SUCCESS;
193 }
194
195 dma_state.block_buf_needs_output = write_output;
196
197 /* The block buf is now full, and we have remaining data. First dispatch the
198 * block buf. If the buffer is empty, this is a no-op.
199 */
200 cc3xx_lowlevel_dma_flush_buffer(false);
201
202 /* If we have any whole blocks left, flush them (but make sure at least some
203 * data always remains to insert into the block buf.
204 */
205 data_to_process_length = ((length - 1) / dma_state.block_buf_size) * dma_state.block_buf_size;
206 while (data_to_process_length > 0) {
207 dma_input_length = data_to_process_length < 0x10000 ? data_to_process_length
208 : 0x10000 - dma_state.block_buf_size;
209 process_data(buf, dma_input_length);
210 data_to_process_length -= dma_input_length;
211 length -= dma_input_length;
212 buf += dma_input_length;
213 }
214
215 /* Write the remaining data into the block buffer. The previous flush means
216 * the buffer is empty, and we have less than a block of input data left, so
217 * this can't overflow.
218 */
219 memcpy(dma_state.block_buf, buf, length);
220 dma_state.block_buf_size_in_use += length;
221
222 return CC3XX_ERR_SUCCESS;
223 }
224
cc3xx_lowlevel_dma_flush_buffer(bool zero_pad_first)225 void cc3xx_lowlevel_dma_flush_buffer(bool zero_pad_first)
226 {
227 if (dma_state.block_buf_size_in_use > 0) {
228 if (zero_pad_first) {
229 memset(dma_state.block_buf + dma_state.block_buf_size_in_use, 0,
230 sizeof(dma_state.block_buf) - dma_state.block_buf_size_in_use);
231 dma_state.block_buf_size_in_use = dma_state.block_buf_size;
232 }
233
234 process_data(dma_state.block_buf, dma_state.block_buf_size_in_use);
235 dma_state.block_buf_size_in_use = 0;
236 }
237 }
238
cc3xx_lowlevel_dma_set_buffer_size(size_t size)239 void cc3xx_lowlevel_dma_set_buffer_size(size_t size) {
240 dma_state.block_buf_size = size;
241 assert(size <= CC3XX_DMA_BLOCK_BUF_MAX_SIZE);
242 }
243
cc3xx_lowlevel_dma_set_output(void * buf,size_t length)244 void cc3xx_lowlevel_dma_set_output(void* buf, size_t length)
245 {
246 /* remap the address, particularly for TCMs */
247 dma_state.output_addr = remap_addr((uintptr_t)buf);
248 dma_state.output_size = length;
249 }
250
cc3xx_lowlevel_dma_uninit(void)251 void cc3xx_lowlevel_dma_uninit(void)
252 {
253 memset(&dma_state, 0, sizeof(dma_state));
254 }
255