1 /*
2  * SPDX-FileCopyrightText: 2018-2021 Espressif Systems (Shanghai) CO LTD
3  *
4  * SPDX-License-Identifier: Apache-2.0
5  */
6 
7 #include "freertos/FreeRTOS.h"
8 #include "freertos/task.h"
9 #include "esp32/spiram.h"
10 #include "esp32/rom/cache.h"
11 #include "sdkconfig.h"
12 #include "esp32/himem.h"
13 #include "soc/soc.h"
14 #include "esp_log.h"
15 #include "esp_check.h"
16 
17 /*
18 So, why does the API look this way and is so inflexible to not allow any maps beyond the full 32K chunks? Most of
19 it has to do with the fact that the cache works on the *virtual* addresses What this comes down to is that while it's
20 allowed to map a range of physical memory into the address space two times, there's no cache consistency between the
21 two regions.
22 
23 This means that a write to region A may or may not show up, perhaps delayed, in region B, as it depends on
24 the time that the writeback to SPI RAM is done on A and the time before the corresponding cache line is invalidated
25 on B. Note that this goes for every 32-byte cache line: this implies that if a program writes to address X and Y within
26 A, the write to Y may show up before the write to X does.
27 
28 It gets even worse when both A and B are written: theoretically, a write to a 32-byte cache line in A can be entirely
29 undone because of a write to a different addres in B that happens to be in the same 32-byte cache line.
30 
31 Because of these reasons, we do not allow double mappings at all. This, however, has other implications that make
32 supporting ranges not really useful. Because the lack of double mappings, applications will need to do their own
33 management of mapped regions, meaning they will normally map in and out blocks at a time anyway, as mapping more
34 fluent regions would result in the chance of accidentally mapping two overlapping regions. As this is the case,
35 to keep the code simple, at the moment we just force these blocks to be equal to the 32K MMU page size. The API
36 itself does allow for more granular allocations, so if there's a pressing need for a more complex solution in the
37 future, we can do this.
38 
39 Note: In the future, we can expand on this api to do a memcpy() between SPI RAM and (internal) memory using the SPI1
40 peripheral. This needs support for SPI1 to be in the SPI driver, however.
41 */
42 
43 #if CONFIG_SPIRAM_BANKSWITCH_ENABLE
44 #define SPIRAM_BANKSWITCH_RESERVE CONFIG_SPIRAM_BANKSWITCH_RESERVE
45 #else
46 #define SPIRAM_BANKSWITCH_RESERVE 0
47 #endif
48 
49 #define CACHE_BLOCKSIZE (32*1024)
50 
51 //Start of the virtual address range reserved for himem use
52 #define VIRT_HIMEM_RANGE_START (SOC_EXTRAM_DATA_LOW+(128-SPIRAM_BANKSWITCH_RESERVE)*CACHE_BLOCKSIZE)
53 //Start MMU block reserved for himem use
54 #define VIRT_HIMEM_RANGE_BLOCKSTART (128-SPIRAM_BANKSWITCH_RESERVE)
55 //Start physical block
56 #define PHYS_HIMEM_BLOCKSTART (128-SPIRAM_BANKSWITCH_RESERVE)
57 
58 #define TAG "esp_himem"
59 
60 // Metadata for a block of physical RAM
61 typedef struct {
62     unsigned int is_alloced: 1;
63     unsigned int is_mapped: 1;
64 } ramblock_t;
65 
66 //Metadata for a 32-K memory address range
67 typedef struct {
68     unsigned int is_alloced: 1;
69     unsigned int is_mapped: 1;
70     unsigned int ram_block: 16;
71 } rangeblock_t;
72 
73 static ramblock_t *s_ram_descriptor = NULL;
74 static rangeblock_t *s_range_descriptor = NULL;
75 static int s_ramblockcnt = 0;
76 static const int s_rangeblockcnt = SPIRAM_BANKSWITCH_RESERVE;
77 
78 //Handle for a window of address space
79 typedef struct esp_himem_rangedata_t {
80     int block_ct;
81     int block_start;
82 } esp_himem_rangedata_t;
83 
84 //Handle for a range of physical memory
85 typedef struct esp_himem_ramdata_t {
86     int block_ct;
87     uint16_t *block;
88 } esp_himem_ramdata_t;
89 
90 static portMUX_TYPE spinlock = portMUX_INITIALIZER_UNLOCKED;
91 
ramblock_idx_valid(int ramblock_idx)92 static inline int ramblock_idx_valid(int ramblock_idx)
93 {
94     return (ramblock_idx >= 0 && ramblock_idx < s_ramblockcnt);
95 }
96 
rangeblock_idx_valid(int rangeblock_idx)97 static inline int rangeblock_idx_valid(int rangeblock_idx)
98 {
99     return (rangeblock_idx >= 0 && rangeblock_idx < s_rangeblockcnt);
100 }
101 
set_bank(int virt_bank,int phys_bank,int ct)102 static void set_bank(int virt_bank, int phys_bank, int ct)
103 {
104     int r __attribute__((unused));
105     r = cache_sram_mmu_set( 0, 0, SOC_EXTRAM_DATA_LOW + CACHE_BLOCKSIZE * virt_bank, phys_bank * CACHE_BLOCKSIZE, 32, ct );
106     assert(r == 0);
107     r = cache_sram_mmu_set( 1, 0, SOC_EXTRAM_DATA_LOW + CACHE_BLOCKSIZE * virt_bank, phys_bank * CACHE_BLOCKSIZE, 32, ct );
108     assert(r == 0);
109 }
110 
esp_himem_get_phys_size(void)111 size_t esp_himem_get_phys_size(void)
112 {
113     int paddr_start = (4096 * 1024) - (CACHE_BLOCKSIZE * SPIRAM_BANKSWITCH_RESERVE);
114     return esp_spiram_get_size()-paddr_start;
115 }
116 
esp_himem_get_free_size(void)117 size_t esp_himem_get_free_size(void)
118 {
119     size_t ret=0;
120     for (int i = 0; i < s_ramblockcnt; i++) {
121         if (!s_ram_descriptor[i].is_alloced) ret+=CACHE_BLOCKSIZE;
122     }
123     return ret;
124 }
125 
esp_himem_reserved_area_size(void)126 size_t esp_himem_reserved_area_size(void) {
127     return CACHE_BLOCKSIZE * SPIRAM_BANKSWITCH_RESERVE;
128 }
129 
130 
esp_himem_init(void)131 void __attribute__((constructor)) esp_himem_init(void)
132 {
133     if (SPIRAM_BANKSWITCH_RESERVE == 0) return;
134     int maxram=esp_spiram_get_size();
135     //catch double init
136     ESP_RETURN_ON_FALSE(s_ram_descriptor == NULL,  , TAG, "already initialized"); //Looks weird; last arg is empty so it expands to 'return ;'
137     ESP_RETURN_ON_FALSE(s_range_descriptor == NULL,  , TAG, "already initialized");
138     //need to have some reserved banks
139     ESP_RETURN_ON_FALSE(SPIRAM_BANKSWITCH_RESERVE != 0,  , TAG, "No banks reserved for himem");
140     //Start and end of physical reserved memory. Note it starts slightly under
141     //the 4MiB mark as the reserved banks can't have an unity mapping to be used by malloc
142     //anymore; we treat them as himem instead.
143     int paddr_start = (4096 * 1024) - (CACHE_BLOCKSIZE * SPIRAM_BANKSWITCH_RESERVE);
144     int paddr_end = maxram;
145     s_ramblockcnt = ((paddr_end - paddr_start) / CACHE_BLOCKSIZE);
146     //Allocate data structures
147     s_ram_descriptor = calloc(sizeof(ramblock_t), s_ramblockcnt);
148     s_range_descriptor = calloc(sizeof(rangeblock_t), SPIRAM_BANKSWITCH_RESERVE);
149     if (s_ram_descriptor == NULL || s_range_descriptor == NULL) {
150         ESP_EARLY_LOGE(TAG, "Cannot allocate memory for meta info. Not initializing!");
151         free(s_ram_descriptor);
152         free(s_range_descriptor);
153         return;
154     }
155     ESP_EARLY_LOGI(TAG, "Initialized. Using last %d 32KB address blocks for bank switching on %d KB of physical memory.",
156                 SPIRAM_BANKSWITCH_RESERVE, (paddr_end - paddr_start)/1024);
157 }
158 
159 
160 //Allocate count not-necessarily consecutive physical RAM blocks, return numbers in blocks[]. Return
161 //true if blocks can be allocated, false if not.
allocate_blocks(int count,uint16_t * blocks_out)162 static bool allocate_blocks(int count, uint16_t *blocks_out)
163 {
164     int n = 0;
165     for (int i = 0; i < s_ramblockcnt && n != count; i++) {
166         if (!s_ram_descriptor[i].is_alloced) {
167             blocks_out[n] = i;
168             n++;
169         }
170     }
171     if (n == count) {
172         //All blocks could be allocated. Mark as in use.
173         for (int i = 0; i < count; i++) {
174             s_ram_descriptor[blocks_out[i]].is_alloced = true;
175             assert(s_ram_descriptor[blocks_out[i]].is_mapped == false);
176         }
177         return true;
178     } else {
179         //Error allocating blocks
180         return false;
181     }
182 }
183 
184 
esp_himem_alloc(size_t size,esp_himem_handle_t * handle_out)185 esp_err_t esp_himem_alloc(size_t size, esp_himem_handle_t *handle_out)
186 {
187     if (size % CACHE_BLOCKSIZE != 0) {
188         return ESP_ERR_INVALID_SIZE;
189     }
190     int blocks = size / CACHE_BLOCKSIZE;
191     esp_himem_ramdata_t *r = calloc(sizeof(esp_himem_ramdata_t), 1);
192     if (!r) {
193         goto nomem;
194     }
195     r->block = calloc(sizeof(uint16_t), blocks);
196     if (!r->block) {
197         goto nomem;
198     }
199     portENTER_CRITICAL(&spinlock);
200     int ok = allocate_blocks(blocks, r->block);
201     portEXIT_CRITICAL(&spinlock);
202     if (!ok) {
203         goto nomem;
204     }
205     r->block_ct = blocks;
206     *handle_out = r;
207     return ESP_OK;
208 nomem:
209     if (r) {
210         free(r->block);
211     }
212     free(r);
213     return ESP_ERR_NO_MEM;
214 }
215 
esp_himem_free(esp_himem_handle_t handle)216 esp_err_t esp_himem_free(esp_himem_handle_t handle)
217 {
218     //Check if any of the blocks is still mapped; fail if this is the case.
219     for (int i = 0; i < handle->block_ct; i++) {
220         assert(ramblock_idx_valid(handle->block[i]));
221         ESP_RETURN_ON_FALSE(!s_ram_descriptor[handle->block[i]].is_mapped, ESP_ERR_INVALID_ARG, TAG, "block in range still mapped");
222     }
223     //Mark blocks as free
224     portENTER_CRITICAL(&spinlock);
225     for (int i = 0; i < handle->block_ct; i++) {
226         s_ram_descriptor[handle->block[i]].is_alloced = false;
227     }
228     portEXIT_CRITICAL(&spinlock);
229 
230     //Free handle
231     free(handle->block);
232     free(handle);
233     return ESP_OK;
234 }
235 
236 
esp_himem_alloc_map_range(size_t size,esp_himem_rangehandle_t * handle_out)237 esp_err_t esp_himem_alloc_map_range(size_t size, esp_himem_rangehandle_t *handle_out)
238 {
239     ESP_RETURN_ON_FALSE(s_ram_descriptor != NULL, ESP_ERR_INVALID_STATE, TAG, "Himem not available!");
240     ESP_RETURN_ON_FALSE(size % CACHE_BLOCKSIZE == 0, ESP_ERR_INVALID_SIZE, TAG, "requested size not aligned to blocksize");
241     int blocks = size / CACHE_BLOCKSIZE;
242     esp_himem_rangedata_t *r = calloc(sizeof(esp_himem_rangedata_t), 1);
243     if (!r) {
244         return ESP_ERR_NO_MEM;
245     }
246     r->block_ct = blocks;
247     r->block_start = -1;
248     int start_free = 0;
249     portENTER_CRITICAL(&spinlock);
250     for (int i = 0; i < s_rangeblockcnt; i++) {
251         if (s_range_descriptor[i].is_alloced) {
252             start_free = i + 1; //optimistically assume next block is free...
253         } else if (i - start_free == blocks - 1) {
254             //We found a span of blocks that's big enough to allocate the requested range in.
255             r->block_start = start_free;
256             break;
257         }
258     }
259 
260     if (r->block_start == -1) {
261         //Couldn't find enough free blocks
262         free(r);
263         portEXIT_CRITICAL(&spinlock);
264         return ESP_ERR_NO_MEM;
265     }
266     //Range is found. Mark the blocks as in use.
267     for (int i = 0; i < blocks; i++) {
268         s_range_descriptor[r->block_start + i].is_alloced = 1;
269     }
270     portEXIT_CRITICAL(&spinlock);
271     //All done.
272     *handle_out = r;
273     return ESP_OK;
274 }
275 
esp_himem_free_map_range(esp_himem_rangehandle_t handle)276 esp_err_t esp_himem_free_map_range(esp_himem_rangehandle_t handle)
277 {
278     //Check if any of the blocks in the range have a mapping
279     for (int i = 0; i < handle->block_ct; i++) {
280         assert(rangeblock_idx_valid(handle->block_start + i));
281         assert(s_range_descriptor[i + handle->block_start].is_alloced == 1); //should be, if handle is valid
282         ESP_RETURN_ON_FALSE(!s_range_descriptor[i + handle->block_start].is_mapped, ESP_ERR_INVALID_ARG, TAG, "memory still mapped to range");
283     }
284     //We should be good to free this. Mark blocks as free.
285     portENTER_CRITICAL(&spinlock);
286     for (int i = 0; i < handle->block_ct; i++) {
287         s_range_descriptor[i + handle->block_start].is_alloced = 0;
288     }
289     portEXIT_CRITICAL(&spinlock);
290     free(handle);
291     return ESP_OK;
292 }
293 
294 
esp_himem_map(esp_himem_handle_t handle,esp_himem_rangehandle_t range,size_t ram_offset,size_t range_offset,size_t len,int flags,void ** out_ptr)295 esp_err_t esp_himem_map(esp_himem_handle_t handle, esp_himem_rangehandle_t range, size_t ram_offset, size_t range_offset, size_t len, int flags, void **out_ptr)
296 {
297     int ram_block = ram_offset / CACHE_BLOCKSIZE;
298     int range_block = range_offset / CACHE_BLOCKSIZE;
299     int blockcount = len / CACHE_BLOCKSIZE;
300     ESP_RETURN_ON_FALSE(s_ram_descriptor != NULL, ESP_ERR_INVALID_STATE, TAG, "Himem not available!");
301     //Offsets and length must be block-aligned
302     ESP_RETURN_ON_FALSE(ram_offset % CACHE_BLOCKSIZE == 0, ESP_ERR_INVALID_ARG, TAG, "ram offset not aligned to blocksize");
303     ESP_RETURN_ON_FALSE(range_offset % CACHE_BLOCKSIZE == 0, ESP_ERR_INVALID_ARG, TAG, "range not aligned to blocksize");
304     ESP_RETURN_ON_FALSE(len % CACHE_BLOCKSIZE == 0, ESP_ERR_INVALID_ARG, TAG, "length not aligned to blocksize");
305     //ram and range should be within allocated range
306     ESP_RETURN_ON_FALSE(ram_block + blockcount <= handle->block_ct, ESP_ERR_INVALID_SIZE, TAG, "args not in range of phys ram handle");
307     ESP_RETURN_ON_FALSE(range_block + blockcount <= range->block_ct, ESP_ERR_INVALID_SIZE, TAG, "args not in range of range handle");
308 
309     //Check if ram blocks aren't already mapped, and if memory range is unmapped
310     for (int i = 0; i < blockcount; i++) {
311         ESP_RETURN_ON_FALSE(!s_ram_descriptor[handle->block[i + ram_block]].is_mapped, ESP_ERR_INVALID_STATE, TAG, "ram already mapped");
312         ESP_RETURN_ON_FALSE(!s_range_descriptor[range->block_start + i + range_block].is_mapped, ESP_ERR_INVALID_STATE, TAG, "range already mapped");
313     }
314 
315     //Map and mark as mapped
316     portENTER_CRITICAL(&spinlock);
317     for (int i = 0; i < blockcount; i++) {
318         assert(ramblock_idx_valid(handle->block[i + ram_block]));
319         s_ram_descriptor[handle->block[i + ram_block]].is_mapped = 1;
320         s_range_descriptor[range->block_start + i + range_block].is_mapped = 1;
321         s_range_descriptor[range->block_start + i + range_block].ram_block = handle->block[i + ram_block];
322     }
323     portEXIT_CRITICAL(&spinlock);
324     for (int i = 0; i < blockcount; i++) {
325         set_bank(VIRT_HIMEM_RANGE_BLOCKSTART + range->block_start + i + range_block, handle->block[i + ram_block] + PHYS_HIMEM_BLOCKSTART, 1);
326     }
327 
328     //Set out pointer
329     *out_ptr = (void *)(VIRT_HIMEM_RANGE_START + (range->block_start + range_offset) * CACHE_BLOCKSIZE);
330     return ESP_OK;
331 }
332 
esp_himem_unmap(esp_himem_rangehandle_t range,void * ptr,size_t len)333 esp_err_t esp_himem_unmap(esp_himem_rangehandle_t range, void *ptr, size_t len)
334 {
335     //Note: doesn't actually unmap, just clears cache and marks blocks as unmapped.
336     //Future optimization: could actually lazy-unmap here: essentially, do nothing and only clear the cache when we re-use
337     //the block for a different physical address.
338     int range_offset = (uint32_t)ptr - VIRT_HIMEM_RANGE_START;
339     int range_block = (range_offset / CACHE_BLOCKSIZE) - range->block_start;
340     int blockcount = len / CACHE_BLOCKSIZE;
341     ESP_RETURN_ON_FALSE(range_offset % CACHE_BLOCKSIZE == 0, ESP_ERR_INVALID_ARG, TAG, "range offset not block-aligned");
342     ESP_RETURN_ON_FALSE(len % CACHE_BLOCKSIZE == 0, ESP_ERR_INVALID_ARG, TAG, "map length not block-aligned");
343     ESP_RETURN_ON_FALSE(range_block + blockcount <= range->block_ct, ESP_ERR_INVALID_ARG, TAG, "range out of bounds for handle");
344 
345     portENTER_CRITICAL(&spinlock);
346     for (int i = 0; i < blockcount; i++) {
347         int ramblock = s_range_descriptor[range->block_start + i + range_block].ram_block;
348         assert(ramblock_idx_valid(ramblock));
349         s_ram_descriptor[ramblock].is_mapped = 0;
350         s_range_descriptor[range->block_start + i + range_block].is_mapped = 0;
351     }
352     esp_spiram_writeback_cache();
353     portEXIT_CRITICAL(&spinlock);
354     return ESP_OK;
355 }
356