1 /**
2  * @file lv_gpu_stm32_dma2d.c
3  *
4  */
5 
6 /*********************
7  *      INCLUDES
8  *********************/
9 #include "lv_gpu_stm32_dma2d.h"
10 #include "../../core/lv_refr.h"
11 
12 #if LV_USE_GPU_STM32_DMA2D
13 
14 /*********************
15  *      DEFINES
16  *********************/
17 #if LV_COLOR_16_SWAP
18     // Note: DMA2D red/blue swap (RBS) works for all color modes
19     #define RBS_BIT 1U
20 #else
21     #define RBS_BIT 0U
22 #endif
23 
24 #define CACHE_ROW_SIZE 32U // cache row size in Bytes
25 
26 // For code/implementation discussion refer to https://github.com/lvgl/lvgl/issues/3714#issuecomment-1365187036
27 // astyle --options=lvgl/scripts/code-format.cfg --ignore-exclude-errors lvgl/src/draw/stm32_dma2d/*.c lvgl/src/draw/stm32_dma2d/*.h
28 
29 #if LV_COLOR_DEPTH == 16
30     const dma2d_color_format_t LvglColorFormat = RGB565;
31 #elif LV_COLOR_DEPTH == 32
32     const dma2d_color_format_t LvglColorFormat = ARGB8888;
33 #else
34     #error "Cannot use DMA2D with LV_COLOR_DEPTH other than 16 or 32"
35 #endif
36 
37 static bool isDma2dInProgess = false; // indicates whether DMA2D transfer *initiated here* is in progress
38 
39 /**
40  * Turn on the peripheral and set output color mode, this only needs to be done once
41  */
lv_draw_stm32_dma2d_init(void)42 void lv_draw_stm32_dma2d_init(void)
43 {
44     // Enable DMA2D clock
45 #if defined(STM32F4) || defined(STM32F7)
46     RCC->AHB1ENR |= RCC_AHB1ENR_DMA2DEN; // enable DMA2D
47 #elif defined(STM32H7)
48     RCC->AHB3ENR |= RCC_AHB3ENR_DMA2DEN;
49 #else
50 # warning "LVGL can't enable the clock of DMA2D"
51 #endif
52 
53     // Wait for hardware access to complete
54     __asm volatile("DSB\n");
55 
56     // Delay after setting peripheral clock
57     volatile uint32_t temp = RCC->AHB1ENR;
58     LV_UNUSED(temp);
59 
60     // AHB master timer configuration
61     DMA2D->AMTCR = 0; // AHB bus guaranteed dead time disabled
62 #if defined(LV_STM32_DMA2D_TEST)
63     _lv_gpu_stm32_dwt_init(); // init µs timer
64 #endif
65 }
66 
lv_draw_stm32_dma2d_ctx_init(lv_disp_drv_t * drv,lv_draw_ctx_t * draw_ctx)67 void lv_draw_stm32_dma2d_ctx_init(lv_disp_drv_t * drv, lv_draw_ctx_t * draw_ctx)
68 {
69     lv_draw_sw_init_ctx(drv, draw_ctx);
70 
71     lv_draw_stm32_dma2d_ctx_t * dma2d_draw_ctx = (lv_draw_sw_ctx_t *)draw_ctx;
72 
73     dma2d_draw_ctx->blend = lv_draw_stm32_dma2d_blend;
74     dma2d_draw_ctx->base_draw.draw_img_decoded = lv_draw_stm32_dma2d_img_decoded;
75     //dma2d_draw_ctx->base_draw.draw_img = lv_draw_stm32_dma2d_img;
76     // Note: currently it does not make sense use lv_gpu_stm32_dma2d_wait_cb() since waiting starts right after the dma2d transfer
77     //dma2d_draw_ctx->base_draw.wait_for_finish = lv_gpu_stm32_dma2d_wait_cb;
78     dma2d_draw_ctx->base_draw.buffer_copy = lv_draw_stm32_dma2d_buffer_copy;
79 }
80 
lv_draw_stm32_dma2d_ctx_deinit(lv_disp_drv_t * drv,lv_draw_ctx_t * draw_ctx)81 void lv_draw_stm32_dma2d_ctx_deinit(lv_disp_drv_t * drv, lv_draw_ctx_t * draw_ctx)
82 {
83     LV_UNUSED(drv);
84     LV_UNUSED(draw_ctx);
85 }
86 
lv_draw_stm32_dma2d_blend(lv_draw_ctx_t * draw_ctx,const lv_draw_sw_blend_dsc_t * dsc)87 static void lv_draw_stm32_dma2d_blend(lv_draw_ctx_t * draw_ctx, const lv_draw_sw_blend_dsc_t * dsc)
88 {
89     if(dsc->blend_mode != LV_BLEND_MODE_NORMAL) {
90         lv_draw_sw_blend_basic(draw_ctx, dsc);
91         return;
92     }
93     // Note: x1 must be zero. Otherwise, there is no way to correctly calculate dest_stride.
94     //LV_ASSERT_MSG(draw_ctx->buf_area->x1 == 0); // critical?
95     // Both draw buffer start address and buffer size *must* be 32-byte aligned since draw buffer cache is being invalidated.
96     //uint32_t drawBufferLength = lv_area_get_size(draw_ctx->buf_area) * sizeof(lv_color_t);
97     //LV_ASSERT_MSG(drawBufferLength % CACHE_ROW_SIZE == 0); // critical, but this is not the way to test it
98     //LV_ASSERT_MSG((uint32_t)draw_ctx->buf % CACHE_ROW_SIZE == 0, "draw_ctx.buf is not 32B aligned"); // critical?
99 
100     if(dsc->src_buf) {
101         // For performance reasons, both source buffer start address and buffer size *should* be 32-byte aligned since source buffer cache is being cleaned.
102         //uint32_t srcBufferLength = lv_area_get_size(dsc->blend_area) * sizeof(lv_color_t);
103         //LV_ASSERT_MSG(srcBufferLength % CACHE_ROW_SIZE == 0); // FIXME: assert fails (performance, non-critical)
104         //LV_ASSERT_MSG((uint32_t)dsc->src_buf % CACHE_ROW_SIZE == 0); // FIXME: assert fails (performance, non-critical)
105     }
106 
107     lv_area_t draw_area;
108     if(!_lv_area_intersect(&draw_area, dsc->blend_area, draw_ctx->clip_area)) return;
109     // + draw_ctx->buf_area has the entire draw buffer location
110     // + draw_ctx->clip_area has the current draw buffer location
111     // + dsc->blend_area has the location of the area intended to be painted - image etc.
112     // + draw_area has the area actually being painted
113     // All coordinates are relative to the screen.
114 
115     const lv_opa_t * mask = dsc->mask_buf;
116 
117     if(dsc->mask_buf && dsc->mask_res == LV_DRAW_MASK_RES_TRANSP) return;
118     else if(dsc->mask_res == LV_DRAW_MASK_RES_FULL_COVER) mask = NULL;
119 
120     lv_coord_t dest_stride = lv_area_get_width(draw_ctx->buf_area);
121     if(mask != NULL) {
122         // For performance reasons, both mask buffer start address and buffer size *should* be 32-byte aligned since mask buffer cache is being cleaned.
123         //uint32_t srcBufferLength = lv_area_get_size(dsc->mask_area) * sizeof(lv_opa_t);
124         //LV_ASSERT_MSG(srcBufferLength % CACHE_ROW_SIZE == 0); // FIXME: assert fails (performance, non-critical)
125         //LV_ASSERT_MSG((uint32_t)mask % CACHE_ROW_SIZE == 0); // FIXME: assert fails (performance, non-critical)
126 
127         lv_coord_t mask_stride = lv_area_get_width(dsc->mask_area);
128         lv_point_t mask_offset = lv_area_get_offset(dsc->mask_area, &draw_area); // mask offset in relation to draw_area
129 
130         if(dsc->src_buf == NULL) {  // 93.5%
131             lv_area_move(&draw_area, -draw_ctx->buf_area->x1, -draw_ctx->buf_area->y1);
132             _lv_draw_stm32_dma2d_blend_paint(draw_ctx->buf, dest_stride, &draw_area, mask, mask_stride, &mask_offset, dsc->color,
133                                              dsc->opa);
134         }
135         else {   // 0.2%
136             // note: (x)RGB dsc->src_buf does not carry alpha channel bytes,
137             // alpha channel bytes are carried in dsc->mask_buf
138 #if LV_COLOR_DEPTH == 32
139             lv_coord_t src_stride = lv_area_get_width(dsc->blend_area);
140             lv_point_t src_offset = lv_area_get_offset(dsc->blend_area, &draw_area); // source image offset in relation to draw_area
141             lv_coord_t draw_width = lv_area_get_width(&draw_area);
142             lv_coord_t draw_height = lv_area_get_height(&draw_area);
143 
144             // merge mask alpha bytes with src RGB bytes
145             // TODO: optimize by reading 4 or 8 mask bytes at a time
146             mask += (mask_stride * mask_offset.y) + mask_offset.x;
147             lv_color_t * src_buf = (lv_color_t *)dsc->src_buf;
148             src_buf += (src_stride * src_offset.y) + src_offset.x;
149             uint16_t mask_buffer_offset = mask_stride - draw_width;
150             uint16_t src_buffer_offset = src_stride - draw_width;
151             while(draw_height > 0) {
152                 draw_height--;
153                 for(uint16_t x = 0; x < draw_width; x++) {
154                     (*src_buf).ch.alpha = *mask;
155                     src_buf++;
156                     mask++;
157                 }
158                 mask += mask_buffer_offset;
159                 src_buf += src_buffer_offset;
160             }
161 
162             lv_area_move(&draw_area, -draw_ctx->buf_area->x1,
163                          -draw_ctx->buf_area->y1); // translate the screen draw area to the origin of the buffer area
164             _lv_draw_stm32_dma2d_blend_map(draw_ctx->buf, dest_stride, &draw_area, dsc->src_buf, src_stride, &src_offset, dsc->opa,
165                                            ARGB8888, false);
166 #else
167             // Note: 16-bit bitmap hardware blending with mask and background is possible, but requires a temp 24 or 32-bit buffer to combine bitmap with mask first.
168 
169             lv_draw_sw_blend_basic(draw_ctx, dsc); // (e.g. Shop Items)
170             // clean cache after software drawing - this does not help since this is not the only place where buffer is written without dma2d
171             // lv_coord_t draw_width = lv_area_get_width(&draw_area);
172             // lv_coord_t draw_height = lv_area_get_height(&draw_area);
173             // uint32_t dest_address = (uint32_t)(draw_ctx->buf + (dest_stride * draw_area.y1) + draw_area.x1);
174             // _lv_gpu_stm32_dma2d_clean_cache(dest_address, dest_stride - draw_width, draw_width, draw_height, sizeof(lv_color_t));
175 #endif
176         }
177     }
178     else {
179         if(dsc->src_buf == NULL) {  // 6.1%
180             lv_area_move(&draw_area, -draw_ctx->buf_area->x1,
181                          -draw_ctx->buf_area->y1); // translate the screen draw area to the origin of the buffer area
182             _lv_draw_stm32_dma2d_blend_fill(draw_ctx->buf, dest_stride, &draw_area, dsc->color, dsc->opa);
183         }
184         else {   // 0.2%
185             lv_coord_t src_stride = lv_area_get_width(dsc->blend_area);
186             lv_point_t src_offset = lv_area_get_offset(dsc->blend_area, &draw_area); // source image offset in relation to draw_area
187             lv_area_move(&draw_area, -draw_ctx->buf_area->x1,
188                          -draw_ctx->buf_area->y1); // translate the screen draw area to the origin of the buffer area
189             _lv_draw_stm32_dma2d_blend_map(draw_ctx->buf, dest_stride, &draw_area, dsc->src_buf, src_stride, &src_offset, dsc->opa,
190                                            LvglColorFormat, true);
191         }
192     }
193 }
194 
195 // Does dest_area = intersect(draw_ctx->clip_area, src_area) ?
196 // See: https://github.com/lvgl/lvgl/issues/3714#issuecomment-1331710788
lv_draw_stm32_dma2d_buffer_copy(lv_draw_ctx_t * draw_ctx,void * dest_buf,lv_coord_t dest_stride,const lv_area_t * dest_area,void * src_buf,lv_coord_t src_stride,const lv_area_t * src_area)197 static void lv_draw_stm32_dma2d_buffer_copy(lv_draw_ctx_t * draw_ctx, void * dest_buf, lv_coord_t dest_stride,
198                                             const lv_area_t * dest_area, void * src_buf, lv_coord_t src_stride, const lv_area_t * src_area)
199 {
200     // Both draw buffer start address and buffer size *must* be 32-byte aligned since draw buffer cache is being invalidated.
201     //uint32_t drawBufferLength = lv_area_get_size(draw_ctx->buf_area) * sizeof(lv_color_t);
202     //LV_ASSERT_MSG(drawBufferLength % CACHE_ROW_SIZE == 0); // critical, but this is not the way to test it
203     //LV_ASSERT_MSG((uint32_t)draw_ctx->buf % CACHE_ROW_SIZE == 0, "draw_ctx.buf is not 32B aligned"); // critical?
204     // FIXME:
205     // 1. Both src_buf and dest_buf pixel size *must* be known to use DMA2D.
206     // 2. Verify both buffers start addresses and lengths are 32-byte (cache row size) aligned.
207     LV_UNUSED(draw_ctx);
208     lv_point_t src_offset = lv_area_get_offset(src_area, dest_area);
209     // FIXME: use lv_area_move(dest_area, -dest_area->x1, -dest_area->y1) here ?
210     // TODO: It is assumed that dest_buf and src_buf buffers are of lv_color_t type. Verify it, this assumption may be incorrect.
211     _lv_draw_stm32_dma2d_blend_map((const lv_color_t *)dest_buf, dest_stride, dest_area, (const lv_color_t *)src_buf,
212                                    src_stride, &src_offset, 0xff, LvglColorFormat, true);
213     // TODO: Investigate if output buffer cache needs to be invalidated. It depends on what the destination buffer is and how it is used next - by dma2d or not.
214     _lv_gpu_stm32_dma2d_await_dma_transfer_finish(NULL); // TODO: is this line needed here?
215 }
216 
lv_draw_stm32_dma2d_img_decoded(lv_draw_ctx_t * draw_ctx,const lv_draw_img_dsc_t * img_dsc,const lv_area_t * coords,const uint8_t * src_buf,lv_img_cf_t color_format)217 static void lv_draw_stm32_dma2d_img_decoded(lv_draw_ctx_t * draw_ctx, const lv_draw_img_dsc_t * img_dsc,
218                                             const lv_area_t * coords, const uint8_t * src_buf, lv_img_cf_t color_format)
219 {
220     if(draw_ctx->draw_img_decoded == NULL) return;
221     lv_area_t draw_area;
222     lv_area_copy(&draw_area, draw_ctx->clip_area);
223 
224     bool mask_any = lv_draw_mask_is_any(&draw_area);
225     bool transform = img_dsc->angle != 0 || img_dsc->zoom != LV_IMG_ZOOM_NONE;
226     const dma2d_color_format_t bitmapColorFormat = lv_color_format_to_dma2d_color_format(color_format);
227     const bool ignoreBitmapAlpha = (color_format == LV_IMG_CF_RGBX8888);
228 
229     if(!mask_any && !transform && bitmapColorFormat != UNSUPPORTED && img_dsc->recolor_opa == LV_OPA_TRANSP) {
230         // simple bitmap blending, optionally with supported color format conversion - handle directly by dma2d
231         lv_coord_t dest_stride = lv_area_get_width(draw_ctx->buf_area);
232         lv_coord_t src_stride = lv_area_get_width(coords);
233         lv_point_t src_offset = lv_area_get_offset(coords, &draw_area); // source image offset in relation to draw_area
234         lv_area_move(&draw_area, -draw_ctx->buf_area->x1, -draw_ctx->buf_area->y1);
235         _lv_draw_stm32_dma2d_blend_map(draw_ctx->buf, dest_stride, &draw_area, src_buf, src_stride, &src_offset,
236                                        img_dsc->opa, bitmapColorFormat, ignoreBitmapAlpha);
237     }
238     else {
239         // all more complex cases which require additional image transformations
240         lv_draw_sw_img_decoded(draw_ctx, img_dsc, coords, src_buf, color_format);
241 
242     }
243 }
244 
lv_area_get_offset(const lv_area_t * area1,const lv_area_t * area2)245 static lv_point_t lv_area_get_offset(const lv_area_t * area1, const lv_area_t * area2)
246 {
247     lv_point_t offset = {x: area2->x1 - area1->x1, y: area2->y1 - area1->y1};
248     return offset;
249 }
250 
lv_color_format_to_dma2d_color_format(lv_img_cf_t color_format)251 static dma2d_color_format_t lv_color_format_to_dma2d_color_format(lv_img_cf_t color_format)
252 {
253     switch(color_format) {
254         case LV_IMG_CF_RGBA8888:
255             // note: LV_IMG_CF_RGBA8888 is actually ARGB8888
256             return ARGB8888;
257         case LV_IMG_CF_RGBX8888:
258             // note: LV_IMG_CF_RGBX8888 is actually XRGB8888
259             return ARGB8888;
260         case LV_IMG_CF_RGB565:
261             return RGB565;
262         case LV_IMG_CF_TRUE_COLOR:
263             return LvglColorFormat;
264         case LV_IMG_CF_TRUE_COLOR_ALPHA:
265 #if LV_COLOR_DEPTH == 16
266             // bitmap color format is 24b ARGB8565 - dma2d unsupported
267             return UNSUPPORTED;
268 #elif LV_COLOR_DEPTH == 32
269             return ARGB8888;
270 #else
271             // unknown bitmap color format
272             return UNSUPPORTED;
273 #endif
274         default:
275             return UNSUPPORTED;
276     }
277 }
278 
lv_draw_stm32_dma2d_img(lv_draw_ctx_t * draw_ctx,const lv_draw_img_dsc_t * img_dsc,const lv_area_t * src_area,const void * src)279 static lv_res_t lv_draw_stm32_dma2d_img(lv_draw_ctx_t * draw_ctx, const lv_draw_img_dsc_t * img_dsc,
280                                         const lv_area_t * src_area,
281                                         const void * src)
282 {
283     //if(lv_img_src_get_type(src) != LV_IMG_SRC_VARIABLE) return LV_RES_INV;
284     return LV_RES_INV;
285     if(img_dsc->opa <= LV_OPA_MIN) return LV_RES_OK;
286     const lv_img_dsc_t * img = src;
287     const dma2d_color_format_t bitmapColorFormat = lv_color_format_to_dma2d_color_format(img->header.cf);
288     const bool ignoreBitmapAlpha = (img->header.cf == LV_IMG_CF_RGBX8888);
289 
290     if(bitmapColorFormat == UNSUPPORTED || img_dsc->angle != 0 || img_dsc->zoom != LV_IMG_ZOOM_NONE) {
291         return LV_RES_INV; // sorry, dma2d can handle this
292     }
293 
294     // FIXME: handle dsc.pivot, dsc.recolor, dsc.blend_mode
295     // FIXME: src pixel size *must* be known to use DMA2D
296     // FIXME: If image is drawn by SW, then output cache needs to be cleaned next. Currently it is not possible.
297     // Both draw buffer start address and buffer size *must* be 32-byte aligned since draw buffer cache is being invalidated.
298     //uint32_t drawBufferLength = lv_area_get_size(draw_ctx->buf_area) * sizeof(lv_color_t);
299     //LV_ASSERT_MSG(drawBufferLength % CACHE_ROW_SIZE == 0); // critical, but this is not the way to test it
300     //LV_ASSERT_MSG((uint32_t)draw_ctx->buf % CACHE_ROW_SIZE == 0, "draw_ctx.buf is not 32B aligned"); // critical?
301 
302     // For performance reasons, both source buffer start address and buffer size *should* be 32-byte aligned since source buffer cache is being cleaned.
303     //uint32_t srcBufferLength = lv_area_get_size(src_area) * sizeof(lv_color_t); // TODO: verify src pixel size = sizeof(lv_color_t)
304     //LV_ASSERT_MSG(srcBufferLength % CACHE_ROW_SIZE == 0); // FIXME: assert fails (performance, non-critical)
305     //LV_ASSERT_MSG((uint32_t)src % CACHE_ROW_SIZE == 0); // FIXME: assert fails (performance, non-critical)
306 
307     lv_area_t draw_area;
308     if(!_lv_area_intersect(&draw_area, src_area, draw_ctx->clip_area)) return LV_RES_OK;
309 
310     lv_coord_t dest_stride = lv_area_get_width(draw_ctx->buf_area);
311     lv_point_t src_offset = lv_area_get_offset(src_area, &draw_area); // source image offset in relation to draw_area
312     lv_area_move(&draw_area, -draw_ctx->buf_area->x1, -draw_ctx->buf_area->y1);
313     _lv_draw_stm32_dma2d_blend_map(draw_ctx->buf, dest_stride, &draw_area, img->data, img->header.w,
314                                    &src_offset, img_dsc->opa, bitmapColorFormat, ignoreBitmapAlpha);
315     return LV_RES_OK;
316 }
317 
lv_gpu_stm32_dma2d_wait_cb(lv_draw_ctx_t * draw_ctx)318 static void lv_gpu_stm32_dma2d_wait_cb(lv_draw_ctx_t * draw_ctx)
319 {
320     lv_disp_t * disp = _lv_refr_get_disp_refreshing();
321     _lv_gpu_stm32_dma2d_await_dma_transfer_finish(disp->driver);
322     lv_draw_sw_wait_for_finish(draw_ctx);
323 }
324 
325 /**********************
326  *   STATIC FUNCTIONS
327  **********************/
328 
329 /**
330  * @brief Fills draw_area with specified color.
331  * @param color color to be painted, note: alpha is ignored
332  */
_lv_draw_stm32_dma2d_blend_fill(const lv_color_t * dest_buf,lv_coord_t dest_stride,const lv_area_t * draw_area,lv_color_t color,lv_opa_t opa)333 LV_STM32_DMA2D_STATIC void _lv_draw_stm32_dma2d_blend_fill(const lv_color_t * dest_buf, lv_coord_t dest_stride,
334                                                            const lv_area_t * draw_area, lv_color_t color, lv_opa_t opa)
335 {
336     LV_ASSERT_MSG(!isDma2dInProgess, "dma2d transfer has not finished"); // critical
337     lv_coord_t draw_width = lv_area_get_width(draw_area);
338     lv_coord_t draw_height = lv_area_get_height(draw_area);
339 
340     _lv_gpu_stm32_dma2d_await_dma_transfer_finish(NULL);
341 
342     if(opa >= LV_OPA_MAX) {
343         DMA2D->CR = 0x3UL << DMA2D_CR_MODE_Pos; // Register-to-memory (no FG nor BG, only output stage active)
344 
345         DMA2D->OPFCCR = LvglColorFormat;
346 #if defined(DMA2D_OPFCCR_RBS_Pos)
347         DMA2D->OPFCCR |= (RBS_BIT << DMA2D_OPFCCR_RBS_Pos);
348 #endif
349         DMA2D->OMAR = (uint32_t)(dest_buf + (dest_stride * draw_area->y1) + draw_area->x1);
350         DMA2D->OOR = dest_stride - draw_width;  // out buffer offset
351         // Note: unlike FGCOLR and BGCOLR, OCOLR bits must match DMA2D_OUTPUT_COLOR, alpha can be specified
352 #if RBS_BIT
353         // swap red/blue bits
354         DMA2D->OCOLR = (color.ch.blue << 11) | (color.ch.green_l << 5 | color.ch.green_h << 8) | (color.ch.red);
355 #else
356         DMA2D->OCOLR = color.full;
357 #endif
358     }
359     else {
360         DMA2D->CR = 0x2UL << DMA2D_CR_MODE_Pos; // Memory-to-memory with blending (FG and BG fetch with PFC and blending)
361 
362         DMA2D->FGPFCCR = A8;
363         DMA2D->FGPFCCR |= (opa << DMA2D_FGPFCCR_ALPHA_Pos);
364         // Alpha Mode 1: Replace original foreground image alpha channel value by FGPFCCR.ALPHA
365         DMA2D->FGPFCCR |= (0x1UL << DMA2D_FGPFCCR_AM_Pos);
366         //DMA2D->FGPFCCR |= (RBS_BIT << DMA2D_FGPFCCR_RBS_Pos);
367 
368         // Note: in Alpha Mode 1 FGMAR and FGOR are not used to supply foreground A8 bytes,
369         // those bytes are replaced by constant ALPHA defined in FGPFCCR
370         DMA2D->FGMAR = (uint32_t)dest_buf;
371         DMA2D->FGOR = dest_stride;
372         DMA2D->FGCOLR = lv_color_to32(color) & 0x00ffffff; // swap FGCOLR R/B bits if FGPFCCR.RBS (RBS_BIT) bit is set
373 
374         DMA2D->BGPFCCR = LvglColorFormat;
375 #if defined(DMA2D_BGPFCCR_RBS_Pos)
376         DMA2D->BGPFCCR |= (RBS_BIT << DMA2D_BGPFCCR_RBS_Pos);
377 #endif
378         DMA2D->BGMAR = (uint32_t)(dest_buf + (dest_stride * draw_area->y1) + draw_area->x1);
379         DMA2D->BGOR = dest_stride - draw_width;
380         DMA2D->BGCOLR = 0;  // used in A4 and A8 modes only
381         _lv_gpu_stm32_dma2d_clean_cache(DMA2D->BGMAR, DMA2D->BGOR, draw_width, draw_height, sizeof(lv_color_t));
382 
383         DMA2D->OPFCCR = LvglColorFormat;
384 #if defined(DMA2D_OPFCCR_RBS_Pos)
385         DMA2D->OPFCCR |= (RBS_BIT << DMA2D_OPFCCR_RBS_Pos);
386 #endif
387         DMA2D->OMAR = DMA2D->BGMAR;
388         DMA2D->OOR = DMA2D->BGOR;
389         DMA2D->OCOLR = 0;
390     }
391     // PL - pixel per lines (14 bit), NL - number of lines (16 bit)
392     DMA2D->NLR = (draw_width << DMA2D_NLR_PL_Pos) | (draw_height << DMA2D_NLR_NL_Pos);
393 
394     _lv_gpu_stm32_dma2d_start_dma_transfer();
395 }
396 
397 /**
398  * @brief Draws src (foreground) map on dst (background) map.
399  * @param src_offset src offset in relation to dst, useful when src is larger than draw_area
400  * @param opa constant opacity to be applied
401  * @param bitmapColorCode bitmap color type
402  * @param ignoreAlpha if TRUE, bitmap src alpha channel is ignored
403  */
_lv_draw_stm32_dma2d_blend_map(const lv_color_t * dest_buf,lv_coord_t dest_stride,const lv_area_t * draw_area,const void * src_buf,lv_coord_t src_stride,const lv_point_t * src_offset,lv_opa_t opa,dma2d_color_format_t src_color_format,bool ignore_src_alpha)404 LV_STM32_DMA2D_STATIC void _lv_draw_stm32_dma2d_blend_map(const lv_color_t * dest_buf, lv_coord_t dest_stride,
405                                                           const lv_area_t * draw_area, const void * src_buf, lv_coord_t src_stride, const lv_point_t * src_offset, lv_opa_t opa,
406                                                           dma2d_color_format_t src_color_format, bool ignore_src_alpha)
407 {
408     LV_ASSERT_MSG(!isDma2dInProgess, "dma2d transfer has not finished"); // critical
409     if(opa <= LV_OPA_MIN || src_color_format == UNSUPPORTED) return;
410     lv_coord_t draw_width = lv_area_get_width(draw_area);
411     lv_coord_t draw_height = lv_area_get_height(draw_area);
412     bool bitmapHasOpacity = !ignore_src_alpha && (src_color_format == ARGB8888 || src_color_format == ARGB1555 ||
413                                                   src_color_format == ARGB4444);
414 
415     if(opa >= LV_OPA_MAX) opa = 0xff;
416 
417     uint8_t srcBpp; // source bytes per pixel
418     switch(src_color_format) {
419         case ARGB8888:
420             srcBpp = 4;
421             break;
422         case RGB888:
423             srcBpp = 3;
424             break;
425         case RGB565:
426         case ARGB1555:
427         case ARGB4444:
428             srcBpp = 2;
429             break;
430         default:
431             LV_LOG_ERROR("unsupported color format");
432             return;
433     }
434 
435     _lv_gpu_stm32_dma2d_await_dma_transfer_finish(NULL);
436 
437     DMA2D->FGPFCCR = src_color_format;
438 
439     if(opa == 0xff && !bitmapHasOpacity) {
440         // no need to blend
441         if(src_color_format == LvglColorFormat) {
442             // no need to convert pixel format (PFC) either
443             DMA2D->CR = 0x0UL;
444         }
445         else {
446             DMA2D->CR = 0x1UL << DMA2D_CR_MODE_Pos; // Memory-to-memory with PFC (FG fetch only with FG PFC active)
447         }
448         // Alpha Mode 0: No modification of the foreground image alpha channel value
449     }
450     else {
451         // blend
452         DMA2D->CR = 0x2UL << DMA2D_CR_MODE_Pos; // Memory-to-memory with blending (FG and BG fetch with PFC and blending)
453         DMA2D->FGPFCCR |= (opa << DMA2D_FGPFCCR_ALPHA_Pos);
454         if(bitmapHasOpacity) {
455             // Alpha Mode 2: Replace original foreground image alpha channel value by FGPFCCR.ALPHA multiplied with original alpha channel value
456             DMA2D->FGPFCCR |= (0x2UL << DMA2D_FGPFCCR_AM_Pos);
457         }
458         else {
459             // Alpha Mode 1: Replace original foreground image alpha channel value by FGPFCCR.ALPHA
460             DMA2D->FGPFCCR |= (0x1UL << DMA2D_FGPFCCR_AM_Pos);
461         }
462     }
463 #if defined(DMA2D_FGPFCCR_RBS_Pos)
464     DMA2D->FGPFCCR |= (RBS_BIT << DMA2D_FGPFCCR_RBS_Pos);
465 #endif
466     DMA2D->FGMAR = ((uint32_t)src_buf) + srcBpp * ((src_stride * src_offset->y) + src_offset->x);
467     DMA2D->FGOR = src_stride - draw_width;
468     DMA2D->FGCOLR = 0;  // used in A4 and A8 modes only
469     _lv_gpu_stm32_dma2d_clean_cache(DMA2D->FGMAR, DMA2D->FGOR, draw_width, draw_height, srcBpp);
470 
471     DMA2D->OPFCCR = LvglColorFormat;
472 #if defined(DMA2D_OPFCCR_RBS_Pos)
473     DMA2D->OPFCCR |= (RBS_BIT << DMA2D_OPFCCR_RBS_Pos);
474 #endif
475     DMA2D->OMAR = (uint32_t)(dest_buf + (dest_stride * draw_area->y1) + draw_area->x1);
476     DMA2D->OOR = dest_stride - draw_width;
477     DMA2D->OCOLR = 0;
478 
479     if(opa != 0xff || bitmapHasOpacity) {
480         // use background (BG*) registers
481         DMA2D->BGPFCCR = LvglColorFormat;
482 #if defined(DMA2D_BGPFCCR_RBS_Pos)
483         DMA2D->BGPFCCR |= (RBS_BIT << DMA2D_BGPFCCR_RBS_Pos);
484 #endif
485         DMA2D->BGMAR = DMA2D->OMAR;
486         DMA2D->BGOR = DMA2D->OOR;
487         DMA2D->BGCOLR = 0;  // used in A4 and A8 modes only
488         _lv_gpu_stm32_dma2d_clean_cache(DMA2D->BGMAR, DMA2D->BGOR, draw_width, draw_height, sizeof(lv_color_t));
489     }
490 
491     // PL - pixel per lines (14 bit), NL - number of lines (16 bit)
492     DMA2D->NLR = (draw_width << DMA2D_NLR_PL_Pos) | (draw_height << DMA2D_NLR_NL_Pos);
493 
494     _lv_gpu_stm32_dma2d_start_dma_transfer();
495 }
496 
497 /**
498  * @brief Paints solid color with alpha mask with additional constant opacity. Useful e.g. for painting anti-aliased fonts.
499  * @param src_offset src offset in relation to dst, useful when src (alpha mask) is larger than draw_area
500  * @param color color to paint, note: alpha is ignored
501  * @param opa constant opacity to be applied
502  */
_lv_draw_stm32_dma2d_blend_paint(const lv_color_t * dest_buf,lv_coord_t dest_stride,const lv_area_t * draw_area,const lv_opa_t * mask_buf,lv_coord_t mask_stride,const lv_point_t * mask_offset,lv_color_t color,lv_opa_t opa)503 LV_STM32_DMA2D_STATIC void _lv_draw_stm32_dma2d_blend_paint(const lv_color_t * dest_buf, lv_coord_t dest_stride,
504                                                             const lv_area_t * draw_area, const lv_opa_t * mask_buf, lv_coord_t mask_stride, const lv_point_t * mask_offset,
505                                                             lv_color_t color, lv_opa_t opa)
506 {
507     LV_ASSERT_MSG(!isDma2dInProgess, "dma2d transfer has not finished"); // critical
508     lv_coord_t draw_width = lv_area_get_width(draw_area);
509     lv_coord_t draw_height = lv_area_get_height(draw_area);
510 
511     _lv_gpu_stm32_dma2d_await_dma_transfer_finish(NULL);
512 
513     DMA2D->CR = 0x2UL << DMA2D_CR_MODE_Pos;  // Memory-to-memory with blending (FG and BG fetch with PFC and blending)
514 
515     DMA2D->FGPFCCR = A8;
516     if(opa < LV_OPA_MAX) {
517         DMA2D->FGPFCCR |= (opa << DMA2D_FGPFCCR_ALPHA_Pos);
518         DMA2D->FGPFCCR |= (0x2UL <<
519                            DMA2D_FGPFCCR_AM_Pos); // Alpha Mode: Replace original foreground image alpha channel value by FGPFCCR.ALPHA multiplied with original alpha channel value
520     }
521     //DMA2D->FGPFCCR |= (RBS_BIT << DMA2D_FGPFCCR_RBS_Pos);
522     DMA2D->FGMAR = (uint32_t)(mask_buf + (mask_stride * mask_offset->y) + mask_offset->x);
523     DMA2D->FGOR = mask_stride - draw_width;
524     DMA2D->FGCOLR = lv_color_to32(color) & 0x00ffffff;  // swap FGCOLR R/B bits if FGPFCCR.RBS (RBS_BIT) bit is set
525     _lv_gpu_stm32_dma2d_clean_cache(DMA2D->FGMAR, DMA2D->FGOR, draw_width, draw_height, sizeof(lv_opa_t));
526 
527     DMA2D->BGPFCCR = LvglColorFormat;
528 #if defined(DMA2D_BGPFCCR_RBS_Pos)
529     DMA2D->BGPFCCR |= (RBS_BIT << DMA2D_BGPFCCR_RBS_Pos);
530 #endif
531     DMA2D->BGMAR = (uint32_t)(dest_buf + (dest_stride * draw_area->y1) + draw_area->x1);
532     DMA2D->BGOR = dest_stride - draw_width;
533     DMA2D->BGCOLR = 0;  // used in A4 and A8 modes only
534     _lv_gpu_stm32_dma2d_clean_cache(DMA2D->BGMAR, DMA2D->BGOR, draw_width, draw_height, sizeof(lv_color_t));
535 
536     DMA2D->OPFCCR = LvglColorFormat;
537 #if defined(DMA2D_OPFCCR_RBS_Pos)
538     DMA2D->OPFCCR |= (RBS_BIT << DMA2D_OPFCCR_RBS_Pos);
539 #endif
540     DMA2D->OMAR = DMA2D->BGMAR;
541     DMA2D->OOR = DMA2D->BGOR;
542     DMA2D->OCOLR = 0;
543     // PL - pixel per lines (14 bit), NL - number of lines (16 bit)
544     DMA2D->NLR = (draw_width << DMA2D_NLR_PL_Pos) | (draw_height << DMA2D_NLR_NL_Pos);
545 
546     _lv_gpu_stm32_dma2d_start_dma_transfer();
547 }
548 
549 /**
550  * @brief Copies src (foreground) map to the dst (background) map.
551  * @param src_offset src offset in relation to dst, useful when src is larger than draw_area
552  */
_lv_draw_stm32_dma2d_copy_buffer(const lv_color_t * dest_buf,lv_coord_t dest_stride,const lv_area_t * draw_area,const lv_color_t * src_buf,lv_coord_t src_stride,const lv_point_t * src_offset)553 LV_STM32_DMA2D_STATIC void _lv_draw_stm32_dma2d_copy_buffer(const lv_color_t * dest_buf, lv_coord_t dest_stride,
554                                                             const lv_area_t * draw_area, const lv_color_t * src_buf, lv_coord_t src_stride, const lv_point_t * src_offset)
555 {
556     LV_ASSERT_MSG(!isDma2dInProgess, "dma2d transfer has not finished"); // critical
557     lv_coord_t draw_width = lv_area_get_width(draw_area);
558     lv_coord_t draw_height = lv_area_get_height(draw_area);
559 
560     _lv_gpu_stm32_dma2d_await_dma_transfer_finish(NULL);
561 
562     DMA2D->CR = 0x0UL; // Memory-to-memory (FG fetch only)
563 
564     DMA2D->FGPFCCR = LvglColorFormat;
565 #if defined(DMA2D_FGPFCCR_RBS_Pos)
566     DMA2D->FGPFCCR |= (RBS_BIT << DMA2D_FGPFCCR_RBS_Pos);
567 #endif
568     DMA2D->FGMAR = (uint32_t)(src_buf + (src_stride * src_offset->y) + src_offset->x);
569     DMA2D->FGOR = src_stride - draw_width;
570     DMA2D->FGCOLR = 0;  // used in A4 and A8 modes only
571     _lv_gpu_stm32_dma2d_clean_cache(DMA2D->FGMAR, DMA2D->FGOR, draw_width, draw_height, sizeof(lv_color_t));
572 
573     // Note BG* registers do not need to be set up since BG is not used
574 
575     DMA2D->OPFCCR = LvglColorFormat;
576 #if defined(DMA2D_OPFCCR_RBS_Pos)
577     DMA2D->OPFCCR |= (RBS_BIT << DMA2D_OPFCCR_RBS_Pos);
578 #endif
579     DMA2D->OMAR = (uint32_t)(dest_buf + (dest_stride * draw_area->y1) + draw_area->x1);
580     DMA2D->OOR = dest_stride - draw_width;
581     DMA2D->OCOLR = 0;
582 
583     // PL - pixel per lines (14 bit), NL - number of lines (16 bit)
584     DMA2D->NLR = (draw_width << DMA2D_NLR_PL_Pos) | (draw_height << DMA2D_NLR_NL_Pos);
585 
586     _lv_gpu_stm32_dma2d_start_dma_transfer();
587 }
588 
_lv_gpu_stm32_dma2d_start_dma_transfer(void)589 LV_STM32_DMA2D_STATIC void _lv_gpu_stm32_dma2d_start_dma_transfer(void)
590 {
591     LV_ASSERT_MSG(!isDma2dInProgess, "dma2d transfer has not finished");
592     isDma2dInProgess = true;
593     DMA2D->IFCR = 0x3FU; // trigger ISR flags reset
594     // Note: cleaning output buffer cache is needed only when buffer may be misaligned or adjacent area may have been drawn in sw-fashion, e.g. using lv_draw_sw_blend_basic()
595 #if LV_COLOR_DEPTH == 16
596     _lv_gpu_stm32_dma2d_clean_cache(DMA2D->OMAR, DMA2D->OOR, (DMA2D->NLR & DMA2D_NLR_PL_Msk) >> DMA2D_NLR_PL_Pos,
597                                     (DMA2D->NLR & DMA2D_NLR_NL_Msk) >> DMA2D_NLR_NL_Pos, sizeof(lv_color_t));
598 #endif
599     DMA2D->CR |= DMA2D_CR_START;
600     // Note: for some reason mask buffer gets damaged during transfer if waiting is postponed
601     _lv_gpu_stm32_dma2d_await_dma_transfer_finish(NULL); // FIXME: this line should not be needed here, but it is
602 }
603 
_lv_gpu_stm32_dma2d_await_dma_transfer_finish(lv_disp_drv_t * disp_drv)604 LV_STM32_DMA2D_STATIC void _lv_gpu_stm32_dma2d_await_dma_transfer_finish(lv_disp_drv_t * disp_drv)
605 {
606     if(disp_drv && disp_drv->wait_cb) {
607         while((DMA2D->CR & DMA2D_CR_START) != 0U) {
608             disp_drv->wait_cb(disp_drv);
609         }
610     }
611     else {
612         while((DMA2D->CR & DMA2D_CR_START) != 0U);
613     }
614 
615     __IO uint32_t isrFlags = DMA2D->ISR;
616 
617     if(isrFlags & DMA2D_ISR_CEIF) {
618         LV_LOG_ERROR("DMA2D config error");
619     }
620 
621     if(isrFlags & DMA2D_ISR_TEIF) {
622         LV_LOG_ERROR("DMA2D transfer error");
623     }
624 
625     DMA2D->IFCR = 0x3FU; // trigger ISR flags reset
626 
627     if(isDma2dInProgess) {
628         // invalidate output buffer cached memory ONLY after DMA2D transfer
629         //_lv_gpu_stm32_dma2d_invalidate_cache(DMA2D->OMAR, DMA2D->OOR, (DMA2D->NLR & DMA2D_NLR_PL_Msk) >> DMA2D_NLR_PL_Pos, (DMA2D->NLR & DMA2D_NLR_NL_Msk) >> DMA2D_NLR_NL_Pos, sizeof(lv_color_t));
630         isDma2dInProgess = false;
631     }
632 }
633 
_lv_gpu_stm32_dma2d_invalidate_cache(uint32_t address,lv_coord_t offset,lv_coord_t width,lv_coord_t height,uint8_t pixel_size)634 LV_STM32_DMA2D_STATIC void _lv_gpu_stm32_dma2d_invalidate_cache(uint32_t address, lv_coord_t offset, lv_coord_t width,
635                                                                 lv_coord_t height, uint8_t pixel_size)
636 {
637 #if defined (__DCACHE_PRESENT) && (__DCACHE_PRESENT == 1U)
638     if(((SCB->CCR) & SCB_CCR_DC_Msk) == 0) return; // L1 data cache is disabled
639     uint16_t stride = pixel_size * (width + offset); // in bytes
640     uint16_t ll = pixel_size * width; // line length in bytes
641     uint32_t n = 0; // address of the next cache row after the last invalidated row
642     lv_coord_t h = 0;
643 
644     __DSB();
645 
646     while(h < height) {
647         uint32_t a = address + (h * stride);
648         uint32_t e = a + ll; // end address, address of the first byte after the current line
649         a &= ~(CACHE_ROW_SIZE - 1U);
650         if(a < n) a = n;  // prevent the previous last cache row from being invalidated again
651 
652         while(a < e) {
653             SCB->DCIMVAC = a;
654             a += CACHE_ROW_SIZE;
655         }
656 
657         n = a;
658         h++;
659     };
660 
661     __DSB();
662     __ISB();
663 #endif
664 }
665 
_lv_gpu_stm32_dma2d_clean_cache(uint32_t address,lv_coord_t offset,lv_coord_t width,lv_coord_t height,uint8_t pixel_size)666 LV_STM32_DMA2D_STATIC void _lv_gpu_stm32_dma2d_clean_cache(uint32_t address, lv_coord_t offset, lv_coord_t width,
667                                                            lv_coord_t height, uint8_t pixel_size)
668 {
669 #if defined (__DCACHE_PRESENT) && (__DCACHE_PRESENT == 1U)
670     if(((SCB->CCR) & SCB_CCR_DC_Msk) == 0) return; // L1 data cache is disabled
671     uint16_t stride = pixel_size * (width + offset); // in bytes
672     uint16_t ll = pixel_size * width; // line length in bytes
673     uint32_t n = 0; // address of the next cache row after the last cleaned row
674     lv_coord_t h = 0;
675     __DSB();
676 
677     while(h < height) {
678         uint32_t a = address + (h * stride);
679         uint32_t e = a + ll; // end address, address of the first byte after the current line
680         a &= ~(CACHE_ROW_SIZE - 1U);
681         if(a < n) a = n;  // prevent the previous last cache row from being cleaned again
682 
683         while(a < e) {
684             SCB->DCCMVAC = a;
685             a += CACHE_ROW_SIZE;
686         }
687 
688         n = a;
689         h++;
690     };
691 
692     __DSB();
693     __ISB();
694 #endif
695 }
696 
697 // initialize µs timer
_lv_gpu_stm32_dwt_init(void)698 LV_STM32_DMA2D_STATIC bool _lv_gpu_stm32_dwt_init(void)
699 {
700     // disable TRC
701     CoreDebug->DEMCR &= ~CoreDebug_DEMCR_TRCENA_Msk;
702     // enable TRC
703     CoreDebug->DEMCR |= CoreDebug_DEMCR_TRCENA_Msk;
704 
705 #if defined(__CORTEX_M) && (__CORTEX_M == 7U)
706     DWT->LAR = 0xC5ACCE55;
707 #endif
708     // disable clock cycle counter
709     DWT->CTRL &= ~DWT_CTRL_CYCCNTENA_Msk;
710     // enable  clock cycle counter
711     DWT->CTRL |= DWT_CTRL_CYCCNTENA_Msk;
712 
713     // reset the clock cycle counter value
714     DWT->CYCCNT = 0;
715 
716     // 3 NO OPERATION instructions
717     __ASM volatile("NOP");
718     __ASM volatile("NOP");
719     __ASM volatile("NOP");
720 
721     // check if clock cycle counter has started
722     if(DWT->CYCCNT) {
723         return true; // clock cycle counter started
724     }
725     else {
726         return false; // clock cycle counter not started
727     }
728 }
729 
730 // get elapsed µs since reset
_lv_gpu_stm32_dwt_get_us(void)731 LV_STM32_DMA2D_STATIC uint32_t _lv_gpu_stm32_dwt_get_us(void)
732 {
733     uint32_t us = (DWT->CYCCNT * 1000000) / HAL_RCC_GetHCLKFreq();
734     return us;
735 }
736 
737 // reset µs timer
_lv_gpu_stm32_dwt_reset(void)738 LV_STM32_DMA2D_STATIC void _lv_gpu_stm32_dwt_reset(void)
739 {
740     DWT->CYCCNT = 0;
741 }
742 
743 #endif
744