1 /*
2  * SPDX-FileCopyrightText: 2018-2021 Espressif Systems (Shanghai) CO LTD
3  *
4  * SPDX-License-Identifier: Apache-2.0
5  */
6 
7 #include "freertos/FreeRTOS.h"
8 #include "freertos/task.h"
9 #include "esp_psram.h"
10 #include "esp_private/esp_psram_extram.h"
11 #include "esp32/rom/cache.h"
12 #include "sdkconfig.h"
13 #include "esp32/himem.h"
14 #include "soc/soc.h"
15 #include "esp_log.h"
16 #include "esp_check.h"
17 #include "../esp_psram_impl.h"
18 
19 /*
20 So, why does the API look this way and is so inflexible to not allow any maps beyond the full 32K chunks? Most of
21 it has to do with the fact that the cache works on the *virtual* addresses What this comes down to is that while it's
22 allowed to map a range of physical memory into the address space two times, there's no cache consistency between the
23 two regions.
24 
25 This means that a write to region A may or may not show up, perhaps delayed, in region B, as it depends on
26 the time that the writeback to SPI RAM is done on A and the time before the corresponding cache line is invalidated
27 on B. Note that this goes for every 32-byte cache line: this implies that if a program writes to address X and Y within
28 A, the write to Y may show up before the write to X does.
29 
30 It gets even worse when both A and B are written: theoretically, a write to a 32-byte cache line in A can be entirely
31 undone because of a write to a different addres in B that happens to be in the same 32-byte cache line.
32 
33 Because of these reasons, we do not allow double mappings at all. This, however, has other implications that make
34 supporting ranges not really useful. Because the lack of double mappings, applications will need to do their own
35 management of mapped regions, meaning they will normally map in and out blocks at a time anyway, as mapping more
36 fluent regions would result in the chance of accidentally mapping two overlapping regions. As this is the case,
37 to keep the code simple, at the moment we just force these blocks to be equal to the 32K MMU page size. The API
38 itself does allow for more granular allocations, so if there's a pressing need for a more complex solution in the
39 future, we can do this.
40 
41 Note: In the future, we can expand on this api to do a memcpy() between SPI RAM and (internal) memory using the SPI1
42 peripheral. This needs support for SPI1 to be in the SPI driver, however.
43 */
44 
45 #if CONFIG_SPIRAM_BANKSWITCH_ENABLE
46 #define SPIRAM_BANKSWITCH_RESERVE CONFIG_SPIRAM_BANKSWITCH_RESERVE
47 #else
48 #define SPIRAM_BANKSWITCH_RESERVE 0
49 #endif
50 
51 #define CACHE_BLOCKSIZE (32*1024)
52 
53 //Start of the virtual address range reserved for himem use
54 #define VIRT_HIMEM_RANGE_START (SOC_EXTRAM_DATA_LOW+(128-SPIRAM_BANKSWITCH_RESERVE)*CACHE_BLOCKSIZE)
55 //Start MMU block reserved for himem use
56 #define VIRT_HIMEM_RANGE_BLOCKSTART (128-SPIRAM_BANKSWITCH_RESERVE)
57 //Start physical block
58 #define PHYS_HIMEM_BLOCKSTART (128-SPIRAM_BANKSWITCH_RESERVE)
59 
60 #define TAG "esp_himem"
61 
62 // Metadata for a block of physical RAM
63 typedef struct {
64     unsigned int is_alloced: 1;
65     unsigned int is_mapped: 1;
66 } ramblock_t;
67 
68 //Metadata for a 32-K memory address range
69 typedef struct {
70     unsigned int is_alloced: 1;
71     unsigned int is_mapped: 1;
72     unsigned int ram_block: 16;
73 } rangeblock_t;
74 
75 static ramblock_t *s_ram_descriptor = NULL;
76 static rangeblock_t *s_range_descriptor = NULL;
77 static int s_ramblockcnt = 0;
78 static const int s_rangeblockcnt = SPIRAM_BANKSWITCH_RESERVE;
79 
80 //Handle for a window of address space
81 typedef struct esp_himem_rangedata_t {
82     int block_ct;
83     int block_start;
84 } esp_himem_rangedata_t;
85 
86 //Handle for a range of physical memory
87 typedef struct esp_himem_ramdata_t {
88     int block_ct;
89     uint16_t *block;
90 } esp_himem_ramdata_t;
91 
92 static portMUX_TYPE spinlock = portMUX_INITIALIZER_UNLOCKED;
93 
ramblock_idx_valid(int ramblock_idx)94 static inline int ramblock_idx_valid(int ramblock_idx)
95 {
96     return (ramblock_idx >= 0 && ramblock_idx < s_ramblockcnt);
97 }
98 
rangeblock_idx_valid(int rangeblock_idx)99 static inline int rangeblock_idx_valid(int rangeblock_idx)
100 {
101     return (rangeblock_idx >= 0 && rangeblock_idx < s_rangeblockcnt);
102 }
103 
set_bank(int virt_bank,int phys_bank,int ct)104 static void set_bank(int virt_bank, int phys_bank, int ct)
105 {
106     int r __attribute__((unused));
107     r = cache_sram_mmu_set( 0, 0, SOC_EXTRAM_DATA_LOW + CACHE_BLOCKSIZE * virt_bank, phys_bank * CACHE_BLOCKSIZE, 32, ct );
108     assert(r == 0);
109     r = cache_sram_mmu_set( 1, 0, SOC_EXTRAM_DATA_LOW + CACHE_BLOCKSIZE * virt_bank, phys_bank * CACHE_BLOCKSIZE, 32, ct );
110     assert(r == 0);
111 }
112 
esp_himem_get_phys_size(void)113 size_t esp_himem_get_phys_size(void)
114 {
115     int paddr_start = (4096 * 1024) - (CACHE_BLOCKSIZE * SPIRAM_BANKSWITCH_RESERVE);
116     uint32_t psram_available_size = 0;
117     esp_psram_impl_get_available_size(&psram_available_size);
118     return psram_available_size - paddr_start;
119 }
120 
esp_himem_get_free_size(void)121 size_t esp_himem_get_free_size(void)
122 {
123     size_t ret=0;
124     for (int i = 0; i < s_ramblockcnt; i++) {
125         if (!s_ram_descriptor[i].is_alloced) ret+=CACHE_BLOCKSIZE;
126     }
127     return ret;
128 }
129 
esp_himem_reserved_area_size(void)130 size_t esp_himem_reserved_area_size(void) {
131     return CACHE_BLOCKSIZE * SPIRAM_BANKSWITCH_RESERVE;
132 }
133 
134 
esp_himem_init(void)135 void __attribute__((constructor)) esp_himem_init(void)
136 {
137     if (SPIRAM_BANKSWITCH_RESERVE == 0) return;
138     uint32_t maxram = 0;
139     esp_psram_impl_get_available_size(&maxram);
140     //catch double init
141     ESP_RETURN_ON_FALSE(s_ram_descriptor == NULL,  , TAG, "already initialized"); //Looks weird; last arg is empty so it expands to 'return ;'
142     ESP_RETURN_ON_FALSE(s_range_descriptor == NULL,  , TAG, "already initialized");
143     //need to have some reserved banks
144     ESP_RETURN_ON_FALSE(SPIRAM_BANKSWITCH_RESERVE != 0,  , TAG, "No banks reserved for himem");
145     //Start and end of physical reserved memory. Note it starts slightly under
146     //the 4MiB mark as the reserved banks can't have an unity mapping to be used by malloc
147     //anymore; we treat them as himem instead.
148     int paddr_start = (4096 * 1024) - (CACHE_BLOCKSIZE * SPIRAM_BANKSWITCH_RESERVE);
149     int paddr_end = maxram;
150     s_ramblockcnt = ((paddr_end - paddr_start) / CACHE_BLOCKSIZE);
151     //Allocate data structures
152     s_ram_descriptor = calloc(sizeof(ramblock_t), s_ramblockcnt);
153     s_range_descriptor = calloc(sizeof(rangeblock_t), SPIRAM_BANKSWITCH_RESERVE);
154     if (s_ram_descriptor == NULL || s_range_descriptor == NULL) {
155         ESP_EARLY_LOGE(TAG, "Cannot allocate memory for meta info. Not initializing!");
156         free(s_ram_descriptor);
157         free(s_range_descriptor);
158         return;
159     }
160     ESP_EARLY_LOGI(TAG, "Initialized. Using last %d 32KB address blocks for bank switching on %d KB of physical memory.",
161                 SPIRAM_BANKSWITCH_RESERVE, (paddr_end - paddr_start)/1024);
162 }
163 
164 
165 //Allocate count not-necessarily consecutive physical RAM blocks, return numbers in blocks[]. Return
166 //true if blocks can be allocated, false if not.
allocate_blocks(int count,uint16_t * blocks_out)167 static bool allocate_blocks(int count, uint16_t *blocks_out)
168 {
169     int n = 0;
170     for (int i = 0; i < s_ramblockcnt && n != count; i++) {
171         if (!s_ram_descriptor[i].is_alloced) {
172             blocks_out[n] = i;
173             n++;
174         }
175     }
176     if (n == count) {
177         //All blocks could be allocated. Mark as in use.
178         for (int i = 0; i < count; i++) {
179             s_ram_descriptor[blocks_out[i]].is_alloced = true;
180             assert(s_ram_descriptor[blocks_out[i]].is_mapped == false);
181         }
182         return true;
183     } else {
184         //Error allocating blocks
185         return false;
186     }
187 }
188 
189 
esp_himem_alloc(size_t size,esp_himem_handle_t * handle_out)190 esp_err_t esp_himem_alloc(size_t size, esp_himem_handle_t *handle_out)
191 {
192     if (size % CACHE_BLOCKSIZE != 0) {
193         return ESP_ERR_INVALID_SIZE;
194     }
195     int blocks = size / CACHE_BLOCKSIZE;
196     esp_himem_ramdata_t *r = calloc(sizeof(esp_himem_ramdata_t), 1);
197     if (!r) {
198         goto nomem;
199     }
200     r->block = calloc(sizeof(uint16_t), blocks);
201     if (!r->block) {
202         goto nomem;
203     }
204     portENTER_CRITICAL(&spinlock);
205     int ok = allocate_blocks(blocks, r->block);
206     portEXIT_CRITICAL(&spinlock);
207     if (!ok) {
208         goto nomem;
209     }
210     r->block_ct = blocks;
211     *handle_out = r;
212     return ESP_OK;
213 nomem:
214     if (r) {
215         free(r->block);
216     }
217     free(r);
218     return ESP_ERR_NO_MEM;
219 }
220 
esp_himem_free(esp_himem_handle_t handle)221 esp_err_t esp_himem_free(esp_himem_handle_t handle)
222 {
223     //Check if any of the blocks is still mapped; fail if this is the case.
224     for (int i = 0; i < handle->block_ct; i++) {
225         assert(ramblock_idx_valid(handle->block[i]));
226         ESP_RETURN_ON_FALSE(!s_ram_descriptor[handle->block[i]].is_mapped, ESP_ERR_INVALID_ARG, TAG, "block in range still mapped");
227     }
228     //Mark blocks as free
229     portENTER_CRITICAL(&spinlock);
230     for (int i = 0; i < handle->block_ct; i++) {
231         s_ram_descriptor[handle->block[i]].is_alloced = false;
232     }
233     portEXIT_CRITICAL(&spinlock);
234 
235     //Free handle
236     free(handle->block);
237     free(handle);
238     return ESP_OK;
239 }
240 
241 
esp_himem_alloc_map_range(size_t size,esp_himem_rangehandle_t * handle_out)242 esp_err_t esp_himem_alloc_map_range(size_t size, esp_himem_rangehandle_t *handle_out)
243 {
244     ESP_RETURN_ON_FALSE(s_ram_descriptor != NULL, ESP_ERR_INVALID_STATE, TAG, "Himem not available!");
245     ESP_RETURN_ON_FALSE(size % CACHE_BLOCKSIZE == 0, ESP_ERR_INVALID_SIZE, TAG, "requested size not aligned to blocksize");
246     int blocks = size / CACHE_BLOCKSIZE;
247     esp_himem_rangedata_t *r = calloc(sizeof(esp_himem_rangedata_t), 1);
248     if (!r) {
249         return ESP_ERR_NO_MEM;
250     }
251     r->block_ct = blocks;
252     r->block_start = -1;
253     int start_free = 0;
254     portENTER_CRITICAL(&spinlock);
255     for (int i = 0; i < s_rangeblockcnt; i++) {
256         if (s_range_descriptor[i].is_alloced) {
257             start_free = i + 1; //optimistically assume next block is free...
258         } else if (i - start_free == blocks - 1) {
259             //We found a span of blocks that's big enough to allocate the requested range in.
260             r->block_start = start_free;
261             break;
262         }
263     }
264 
265     if (r->block_start == -1) {
266         //Couldn't find enough free blocks
267         free(r);
268         portEXIT_CRITICAL(&spinlock);
269         return ESP_ERR_NO_MEM;
270     }
271     //Range is found. Mark the blocks as in use.
272     for (int i = 0; i < blocks; i++) {
273         s_range_descriptor[r->block_start + i].is_alloced = 1;
274     }
275     portEXIT_CRITICAL(&spinlock);
276     //All done.
277     *handle_out = r;
278     return ESP_OK;
279 }
280 
esp_himem_free_map_range(esp_himem_rangehandle_t handle)281 esp_err_t esp_himem_free_map_range(esp_himem_rangehandle_t handle)
282 {
283     //Check if any of the blocks in the range have a mapping
284     for (int i = 0; i < handle->block_ct; i++) {
285         assert(rangeblock_idx_valid(handle->block_start + i));
286         assert(s_range_descriptor[i + handle->block_start].is_alloced == 1); //should be, if handle is valid
287         ESP_RETURN_ON_FALSE(!s_range_descriptor[i + handle->block_start].is_mapped, ESP_ERR_INVALID_ARG, TAG, "memory still mapped to range");
288     }
289     //We should be good to free this. Mark blocks as free.
290     portENTER_CRITICAL(&spinlock);
291     for (int i = 0; i < handle->block_ct; i++) {
292         s_range_descriptor[i + handle->block_start].is_alloced = 0;
293     }
294     portEXIT_CRITICAL(&spinlock);
295     free(handle);
296     return ESP_OK;
297 }
298 
299 
esp_himem_map(esp_himem_handle_t handle,esp_himem_rangehandle_t range,size_t ram_offset,size_t range_offset,size_t len,int flags,void ** out_ptr)300 esp_err_t esp_himem_map(esp_himem_handle_t handle, esp_himem_rangehandle_t range, size_t ram_offset, size_t range_offset, size_t len, int flags, void **out_ptr)
301 {
302     int ram_block = ram_offset / CACHE_BLOCKSIZE;
303     int range_block = range_offset / CACHE_BLOCKSIZE;
304     int blockcount = len / CACHE_BLOCKSIZE;
305     ESP_RETURN_ON_FALSE(s_ram_descriptor != NULL, ESP_ERR_INVALID_STATE, TAG, "Himem not available!");
306     //Offsets and length must be block-aligned
307     ESP_RETURN_ON_FALSE(ram_offset % CACHE_BLOCKSIZE == 0, ESP_ERR_INVALID_ARG, TAG, "ram offset not aligned to blocksize");
308     ESP_RETURN_ON_FALSE(range_offset % CACHE_BLOCKSIZE == 0, ESP_ERR_INVALID_ARG, TAG, "range not aligned to blocksize");
309     ESP_RETURN_ON_FALSE(len % CACHE_BLOCKSIZE == 0, ESP_ERR_INVALID_ARG, TAG, "length not aligned to blocksize");
310     //ram and range should be within allocated range
311     ESP_RETURN_ON_FALSE(ram_block + blockcount <= handle->block_ct, ESP_ERR_INVALID_SIZE, TAG, "args not in range of phys ram handle");
312     ESP_RETURN_ON_FALSE(range_block + blockcount <= range->block_ct, ESP_ERR_INVALID_SIZE, TAG, "args not in range of range handle");
313 
314     //Check if ram blocks aren't already mapped, and if memory range is unmapped
315     for (int i = 0; i < blockcount; i++) {
316         ESP_RETURN_ON_FALSE(!s_ram_descriptor[handle->block[i + ram_block]].is_mapped, ESP_ERR_INVALID_STATE, TAG, "ram already mapped");
317         ESP_RETURN_ON_FALSE(!s_range_descriptor[range->block_start + i + range_block].is_mapped, ESP_ERR_INVALID_STATE, TAG, "range already mapped");
318     }
319 
320     //Map and mark as mapped
321     portENTER_CRITICAL(&spinlock);
322     for (int i = 0; i < blockcount; i++) {
323         assert(ramblock_idx_valid(handle->block[i + ram_block]));
324         s_ram_descriptor[handle->block[i + ram_block]].is_mapped = 1;
325         s_range_descriptor[range->block_start + i + range_block].is_mapped = 1;
326         s_range_descriptor[range->block_start + i + range_block].ram_block = handle->block[i + ram_block];
327     }
328     portEXIT_CRITICAL(&spinlock);
329     for (int i = 0; i < blockcount; i++) {
330         set_bank(VIRT_HIMEM_RANGE_BLOCKSTART + range->block_start + i + range_block, handle->block[i + ram_block] + PHYS_HIMEM_BLOCKSTART, 1);
331     }
332 
333     //Set out pointer
334     *out_ptr = (void *)(VIRT_HIMEM_RANGE_START + (range->block_start + range_block) * CACHE_BLOCKSIZE);
335     return ESP_OK;
336 }
337 
esp_himem_unmap(esp_himem_rangehandle_t range,void * ptr,size_t len)338 esp_err_t esp_himem_unmap(esp_himem_rangehandle_t range, void *ptr, size_t len)
339 {
340     //Note: doesn't actually unmap, just clears cache and marks blocks as unmapped.
341     //Future optimization: could actually lazy-unmap here: essentially, do nothing and only clear the cache when we re-use
342     //the block for a different physical address.
343     int range_offset = (uint32_t)ptr - VIRT_HIMEM_RANGE_START;
344     int range_block = (range_offset / CACHE_BLOCKSIZE) - range->block_start;
345     int blockcount = len / CACHE_BLOCKSIZE;
346     ESP_RETURN_ON_FALSE(range_offset % CACHE_BLOCKSIZE == 0, ESP_ERR_INVALID_ARG, TAG, "range offset not block-aligned");
347     ESP_RETURN_ON_FALSE(len % CACHE_BLOCKSIZE == 0, ESP_ERR_INVALID_ARG, TAG, "map length not block-aligned");
348     ESP_RETURN_ON_FALSE(range_block + blockcount <= range->block_ct, ESP_ERR_INVALID_ARG, TAG, "range out of bounds for handle");
349 
350     portENTER_CRITICAL(&spinlock);
351     for (int i = 0; i < blockcount; i++) {
352         int ramblock = s_range_descriptor[range->block_start + i + range_block].ram_block;
353         assert(ramblock_idx_valid(ramblock));
354         s_ram_descriptor[ramblock].is_mapped = 0;
355         s_range_descriptor[range->block_start + i + range_block].is_mapped = 0;
356     }
357     esp_psram_extram_writeback_cache();
358     portEXIT_CRITICAL(&spinlock);
359     return ESP_OK;
360 }
361