1 /**
2  * @file lv_gpu_stm32_dma2d.c
3  *
4  */
5 
6 /*********************
7  *      INCLUDES
8  *********************/
9 #include "lv_gpu_stm32_dma2d.h"
10 #include "../../core/lv_refr.h"
11 
12 #if LV_USE_GPU_STM32_DMA2D
13 
14 /*********************
15  *      DEFINES
16  *********************/
17 #if LV_COLOR_16_SWAP
18     // Note: DMA2D red/blue swap (RBS) works for all color modes
19     #define RBS_BIT 1U
20 #else
21     #define RBS_BIT 0U
22 #endif
23 
24 #define CACHE_ROW_SIZE 32U // cache row size in Bytes
25 
26 // For code/implementation discussion refer to https://github.com/lvgl/lvgl/issues/3714#issuecomment-1365187036
27 // astyle --options=lvgl/scripts/code-format.cfg --ignore-exclude-errors lvgl/src/draw/stm32_dma2d/*.c lvgl/src/draw/stm32_dma2d/*.h
28 
29 #if LV_COLOR_DEPTH == 16
30     const dma2d_color_format_t LvglColorFormat = RGB565;
31 #elif LV_COLOR_DEPTH == 32
32     const dma2d_color_format_t LvglColorFormat = ARGB8888;
33 #else
34     #error "Cannot use DMA2D with LV_COLOR_DEPTH other than 16 or 32"
35 #endif
36 
37 #if defined (LV_STM32_DMA2D_USE_M7_CACHE)
38     // Cortex-M7 DCache present
39     #define __lv_gpu_stm32_dma2d_clean_cache(address, offset, width, height, pixel_size) _lv_gpu_stm32_dma2d_clean_cache(address, offset, width, height, pixel_size)
40     #define __lv_gpu_stm32_dma2d_invalidate_cache(address, offset, width, height, pixel_size) _lv_gpu_stm32_dma2d_invalidate_cache(address, offset, width, height, pixel_size)
41 #else
42     #define __lv_gpu_stm32_dma2d_clean_cache(address, offset, width, height, pixel_size)
43     #define __lv_gpu_stm32_dma2d_invalidate_cache(address, offset, width, height, pixel_size)
44 #endif
45 
46 /**********************
47  *  STATIC PROTOTYPES
48  **********************/
49 static void lv_draw_stm32_dma2d_blend(lv_draw_ctx_t * draw_ctx, const lv_draw_sw_blend_dsc_t * dsc);
50 static void lv_draw_stm32_dma2d_buffer_copy(lv_draw_ctx_t * draw_ctx,
51                                             void * dest_buf, lv_coord_t dest_stride, const lv_area_t * dest_area,
52                                             void * src_buf, lv_coord_t src_stride, const lv_area_t * src_area);
53 static void lv_draw_stm32_dma2d_img_decoded(lv_draw_ctx_t * draw_ctx, const lv_draw_img_dsc_t * img_dsc,
54                                             const lv_area_t * coords, const uint8_t * src_buf, lv_img_cf_t color_format);
55 static dma2d_color_format_t lv_color_format_to_dma2d_color_format(lv_img_cf_t color_format);
56 static lv_point_t lv_area_get_offset(const lv_area_t * area1, const lv_area_t * area2);
57 
58 lv_res_t lv_draw_stm32_dma2d_img(lv_draw_ctx_t * draw_ctx, const lv_draw_img_dsc_t * img_dsc,
59                                  const lv_area_t * src_area, const void * src);
60 LV_STM32_DMA2D_STATIC void _lv_draw_stm32_dma2d_blend_fill(const lv_color_t * dst_buf, lv_coord_t dst_stride,
61                                                            const lv_area_t * draw_area, lv_color_t color, lv_opa_t opa);
62 LV_STM32_DMA2D_STATIC void _lv_draw_stm32_dma2d_blend_map(const lv_color_t * dest_buf, lv_coord_t dest_stride,
63                                                           const lv_area_t * draw_area, const void * src_buf, lv_coord_t src_stride, const lv_point_t * src_offset, lv_opa_t opa,
64                                                           dma2d_color_format_t src_color_format, bool ignore_src_alpha);
65 LV_STM32_DMA2D_STATIC void _lv_draw_stm32_dma2d_blend_paint(const lv_color_t * dst_buf, lv_coord_t dst_stride,
66                                                             const lv_area_t * draw_area, const lv_opa_t * mask_buf, lv_coord_t mask_stride, const lv_point_t * mask_offset,
67                                                             lv_color_t color, lv_opa_t opa);
68 LV_STM32_DMA2D_STATIC void _lv_gpu_stm32_dma2d_await_dma_transfer_finish(lv_disp_drv_t * disp_drv);
69 LV_STM32_DMA2D_STATIC void _lv_gpu_stm32_dma2d_start_dma_transfer(void);
70 
71 #if defined(LV_STM32_DMA2D_TEST)
72     LV_STM32_DMA2D_STATIC bool _lv_gpu_stm32_dwt_init(void);
73     LV_STM32_DMA2D_STATIC void _lv_gpu_stm32_dwt_reset(void);
74     LV_STM32_DMA2D_STATIC uint32_t _lv_gpu_stm32_dwt_get_us(void);
75 #endif
76 
77 static bool isDma2dInProgess = false; // indicates whether DMA2D transfer *initiated here* is in progress
78 
79 /**
80  * Turn on the peripheral and set output color mode, this only needs to be done once
81  */
lv_draw_stm32_dma2d_init(void)82 void lv_draw_stm32_dma2d_init(void)
83 {
84     // Enable DMA2D clock
85 #if defined(STM32F4) || defined(STM32F7) || defined(STM32U5)
86     RCC->AHB1ENR |= RCC_AHB1ENR_DMA2DEN; // enable DMA2D
87     // wait for hardware access to complete
88     __asm volatile("DSB\n");
89     volatile uint32_t temp = RCC->AHB1ENR;
90     LV_UNUSED(temp);
91 #elif defined(STM32H7)
92     RCC->AHB3ENR |= RCC_AHB3ENR_DMA2DEN;
93     // wait for hardware access to complete
94     __asm volatile("DSB\n");
95     volatile uint32_t temp = RCC->AHB3ENR;
96     LV_UNUSED(temp);
97 #else
98 # warning "LVGL can't enable the clock of DMA2D"
99 #endif
100     // AHB master timer configuration
101     DMA2D->AMTCR = 0; // AHB bus guaranteed dead time disabled
102 #if defined(LV_STM32_DMA2D_TEST)
103     _lv_gpu_stm32_dwt_init(); // init µs timer
104 #endif
105 }
106 
lv_draw_stm32_dma2d_ctx_init(lv_disp_drv_t * drv,lv_draw_ctx_t * draw_ctx)107 void lv_draw_stm32_dma2d_ctx_init(lv_disp_drv_t * drv, lv_draw_ctx_t * draw_ctx)
108 {
109     lv_draw_sw_init_ctx(drv, draw_ctx);
110 
111     lv_draw_stm32_dma2d_ctx_t * dma2d_draw_ctx = (lv_draw_sw_ctx_t *)draw_ctx;
112 
113     dma2d_draw_ctx->blend = lv_draw_stm32_dma2d_blend;
114     dma2d_draw_ctx->base_draw.draw_img_decoded = lv_draw_stm32_dma2d_img_decoded;
115     //dma2d_draw_ctx->base_draw.draw_img = lv_draw_stm32_dma2d_img;
116     dma2d_draw_ctx->base_draw.buffer_copy = lv_draw_stm32_dma2d_buffer_copy;
117 }
118 
lv_draw_stm32_dma2d_ctx_deinit(lv_disp_drv_t * drv,lv_draw_ctx_t * draw_ctx)119 void lv_draw_stm32_dma2d_ctx_deinit(lv_disp_drv_t * drv, lv_draw_ctx_t * draw_ctx)
120 {
121     LV_UNUSED(drv);
122     LV_UNUSED(draw_ctx);
123 }
124 
lv_draw_stm32_dma2d_blend(lv_draw_ctx_t * draw_ctx,const lv_draw_sw_blend_dsc_t * dsc)125 static void lv_draw_stm32_dma2d_blend(lv_draw_ctx_t * draw_ctx, const lv_draw_sw_blend_dsc_t * dsc)
126 {
127     if(dsc->blend_mode != LV_BLEND_MODE_NORMAL) {
128         lv_draw_sw_blend_basic(draw_ctx, dsc);
129         return;
130     }
131     // Note: x1 must be zero. Otherwise, there is no way to correctly calculate dest_stride.
132     //LV_ASSERT_MSG(draw_ctx->buf_area->x1 == 0); // critical?
133     // Both draw buffer start address and buffer size *must* be 32-byte aligned since draw buffer cache is being invalidated.
134     //uint32_t drawBufferLength = lv_area_get_size(draw_ctx->buf_area) * sizeof(lv_color_t);
135     //LV_ASSERT_MSG(drawBufferLength % CACHE_ROW_SIZE == 0); // critical, but this is not the way to test it
136     //LV_ASSERT_MSG((uint32_t)draw_ctx->buf % CACHE_ROW_SIZE == 0, "draw_ctx.buf is not 32B aligned"); // critical?
137 
138     if(dsc->src_buf) {
139         // For performance reasons, both source buffer start address and buffer size *should* be 32-byte aligned since source buffer cache is being cleaned.
140         //uint32_t srcBufferLength = lv_area_get_size(dsc->blend_area) * sizeof(lv_color_t);
141         //LV_ASSERT_MSG(srcBufferLength % CACHE_ROW_SIZE == 0); // FIXME: assert fails (performance, non-critical)
142         //LV_ASSERT_MSG((uint32_t)dsc->src_buf % CACHE_ROW_SIZE == 0); // FIXME: assert fails (performance, non-critical)
143     }
144 
145     lv_area_t draw_area;
146     if(!_lv_area_intersect(&draw_area, dsc->blend_area, draw_ctx->clip_area)) return;
147     // + draw_ctx->buf_area has the entire draw buffer location
148     // + draw_ctx->clip_area has the current draw buffer location
149     // + dsc->blend_area has the location of the area intended to be painted - image etc.
150     // + draw_area has the area actually being painted
151     // All coordinates are relative to the screen.
152 
153     const lv_opa_t * mask = dsc->mask_buf;
154 
155     if(dsc->mask_buf && dsc->mask_res == LV_DRAW_MASK_RES_TRANSP) return;
156     else if(dsc->mask_res == LV_DRAW_MASK_RES_FULL_COVER) mask = NULL;
157 
158     lv_coord_t dest_stride = lv_area_get_width(draw_ctx->buf_area);
159     if(mask != NULL) {
160         // For performance reasons, both mask buffer start address and buffer size *should* be 32-byte aligned since mask buffer cache is being cleaned.
161         //uint32_t srcBufferLength = lv_area_get_size(dsc->mask_area) * sizeof(lv_opa_t);
162         //LV_ASSERT_MSG(srcBufferLength % CACHE_ROW_SIZE == 0); // FIXME: assert fails (performance, non-critical)
163         //LV_ASSERT_MSG((uint32_t)mask % CACHE_ROW_SIZE == 0); // FIXME: assert fails (performance, non-critical)
164 
165         lv_coord_t mask_stride = lv_area_get_width(dsc->mask_area);
166         lv_point_t mask_offset = lv_area_get_offset(dsc->mask_area, &draw_area); // mask offset in relation to draw_area
167 
168         if(dsc->src_buf == NULL) {  // 93.5%
169             lv_area_move(&draw_area, -draw_ctx->buf_area->x1, -draw_ctx->buf_area->y1);
170             _lv_draw_stm32_dma2d_blend_paint(draw_ctx->buf, dest_stride, &draw_area, mask, mask_stride, &mask_offset, dsc->color,
171                                              dsc->opa);
172         }
173         else {   // 0.2%
174             // note: (x)RGB dsc->src_buf does not carry alpha channel bytes,
175             // alpha channel bytes are carried in dsc->mask_buf
176 #if LV_COLOR_DEPTH == 32
177             lv_coord_t src_stride = lv_area_get_width(dsc->blend_area);
178             lv_point_t src_offset = lv_area_get_offset(dsc->blend_area, &draw_area); // source image offset in relation to draw_area
179             lv_coord_t draw_width = lv_area_get_width(&draw_area);
180             lv_coord_t draw_height = lv_area_get_height(&draw_area);
181 
182             // merge mask alpha bytes with src RGB bytes
183             // TODO: optimize by reading 4 or 8 mask bytes at a time
184             mask += (mask_stride * mask_offset.y) + mask_offset.x;
185             lv_color_t * src_buf = (lv_color_t *)dsc->src_buf;
186             src_buf += (src_stride * src_offset.y) + src_offset.x;
187             uint16_t mask_buffer_offset = mask_stride - draw_width;
188             uint16_t src_buffer_offset = src_stride - draw_width;
189             while(draw_height > 0) {
190                 draw_height--;
191                 for(uint16_t x = 0; x < draw_width; x++) {
192                     (*src_buf).ch.alpha = *mask;
193                     src_buf++;
194                     mask++;
195                 }
196                 mask += mask_buffer_offset;
197                 src_buf += src_buffer_offset;
198             }
199 
200             lv_area_move(&draw_area, -draw_ctx->buf_area->x1,
201                          -draw_ctx->buf_area->y1); // translate the screen draw area to the origin of the buffer area
202             _lv_draw_stm32_dma2d_blend_map(draw_ctx->buf, dest_stride, &draw_area, dsc->src_buf, src_stride, &src_offset, dsc->opa,
203                                            ARGB8888, false);
204 #else
205             // Note: 16-bit bitmap hardware blending with mask and background is possible, but requires a temp 24 or 32-bit buffer to combine bitmap with mask first.
206 
207             lv_draw_sw_blend_basic(draw_ctx, dsc); // (e.g. Shop Items)
208             // clean cache after software drawing - this does not help since this is not the only place where buffer is written without dma2d
209             // lv_coord_t draw_width = lv_area_get_width(&draw_area);
210             // lv_coord_t draw_height = lv_area_get_height(&draw_area);
211             // uint32_t dest_address = (uint32_t)(draw_ctx->buf + (dest_stride * draw_area.y1) + draw_area.x1);
212             // __lv_gpu_stm32_dma2d_clean_cache(dest_address, dest_stride - draw_width, draw_width, draw_height, sizeof(lv_color_t));
213 #endif
214         }
215     }
216     else {
217         if(dsc->src_buf == NULL) {  // 6.1%
218             lv_area_move(&draw_area, -draw_ctx->buf_area->x1,
219                          -draw_ctx->buf_area->y1); // translate the screen draw area to the origin of the buffer area
220             _lv_draw_stm32_dma2d_blend_fill(draw_ctx->buf, dest_stride, &draw_area, dsc->color, dsc->opa);
221         }
222         else {   // 0.2%
223             lv_coord_t src_stride = lv_area_get_width(dsc->blend_area);
224             lv_point_t src_offset = lv_area_get_offset(dsc->blend_area, &draw_area); // source image offset in relation to draw_area
225             lv_area_move(&draw_area, -draw_ctx->buf_area->x1,
226                          -draw_ctx->buf_area->y1); // translate the screen draw area to the origin of the buffer area
227             _lv_draw_stm32_dma2d_blend_map(draw_ctx->buf, dest_stride, &draw_area, dsc->src_buf, src_stride, &src_offset, dsc->opa,
228                                            LvglColorFormat, true);
229         }
230     }
231 }
232 
233 // Does dest_area = intersect(draw_ctx->clip_area, src_area) ?
234 // See: https://github.com/lvgl/lvgl/issues/3714#issuecomment-1331710788
lv_draw_stm32_dma2d_buffer_copy(lv_draw_ctx_t * draw_ctx,void * dest_buf,lv_coord_t dest_stride,const lv_area_t * dest_area,void * src_buf,lv_coord_t src_stride,const lv_area_t * src_area)235 static void lv_draw_stm32_dma2d_buffer_copy(lv_draw_ctx_t * draw_ctx, void * dest_buf, lv_coord_t dest_stride,
236                                             const lv_area_t * dest_area, void * src_buf, lv_coord_t src_stride, const lv_area_t * src_area)
237 {
238     // Both draw buffer start address and buffer size *must* be 32-byte aligned since draw buffer cache is being invalidated.
239     //uint32_t drawBufferLength = lv_area_get_size(draw_ctx->buf_area) * sizeof(lv_color_t);
240     //LV_ASSERT_MSG(drawBufferLength % CACHE_ROW_SIZE == 0); // critical, but this is not the way to test it
241     //LV_ASSERT_MSG((uint32_t)draw_ctx->buf % CACHE_ROW_SIZE == 0, "draw_ctx.buf is not 32B aligned"); // critical?
242     // FIXME:
243     // 1. Both src_buf and dest_buf pixel size *must* be known to use DMA2D.
244     // 2. Verify both buffers start addresses and lengths are 32-byte (cache row size) aligned.
245     LV_UNUSED(draw_ctx);
246     lv_point_t src_offset = lv_area_get_offset(src_area, dest_area);
247     // FIXME: use lv_area_move(dest_area, -dest_area->x1, -dest_area->y1) here ?
248     // TODO: It is assumed that dest_buf and src_buf buffers are of lv_color_t type. Verify it, this assumption may be incorrect.
249     _lv_draw_stm32_dma2d_blend_map((const lv_color_t *)dest_buf, dest_stride, dest_area, (const lv_color_t *)src_buf,
250                                    src_stride, &src_offset, 0xff, LvglColorFormat, true);
251     // TODO: Investigate if output buffer cache needs to be invalidated. It depends on what the destination buffer is and how it is used next - by dma2d or not.
252     _lv_gpu_stm32_dma2d_await_dma_transfer_finish(NULL); // TODO: is this line needed here?
253 }
254 
lv_draw_stm32_dma2d_img_decoded(lv_draw_ctx_t * draw_ctx,const lv_draw_img_dsc_t * img_dsc,const lv_area_t * coords,const uint8_t * src_buf,lv_img_cf_t color_format)255 static void lv_draw_stm32_dma2d_img_decoded(lv_draw_ctx_t * draw_ctx, const lv_draw_img_dsc_t * img_dsc,
256                                             const lv_area_t * coords, const uint8_t * src_buf, lv_img_cf_t color_format)
257 {
258     if(draw_ctx->draw_img_decoded == NULL) return;
259     lv_area_t draw_area;
260     lv_area_copy(&draw_area, draw_ctx->clip_area);
261 
262     bool mask_any = lv_draw_mask_is_any(&draw_area);
263     bool transform = img_dsc->angle != 0 || img_dsc->zoom != LV_IMG_ZOOM_NONE;
264     const dma2d_color_format_t bitmapColorFormat = lv_color_format_to_dma2d_color_format(color_format);
265     const bool ignoreBitmapAlpha = (color_format == LV_IMG_CF_RGBX8888);
266 
267     if(!mask_any && !transform && bitmapColorFormat != UNSUPPORTED && img_dsc->recolor_opa == LV_OPA_TRANSP) {
268         // simple bitmap blending, optionally with supported color format conversion - handle directly by dma2d
269         lv_coord_t dest_stride = lv_area_get_width(draw_ctx->buf_area);
270         lv_coord_t src_stride = lv_area_get_width(coords);
271         lv_point_t src_offset = lv_area_get_offset(coords, &draw_area); // source image offset in relation to draw_area
272         lv_area_move(&draw_area, -draw_ctx->buf_area->x1, -draw_ctx->buf_area->y1);
273         _lv_draw_stm32_dma2d_blend_map(draw_ctx->buf, dest_stride, &draw_area, src_buf, src_stride, &src_offset,
274                                        img_dsc->opa, bitmapColorFormat, ignoreBitmapAlpha);
275     }
276     else {
277         // all more complex cases which require additional image transformations
278         lv_draw_sw_img_decoded(draw_ctx, img_dsc, coords, src_buf, color_format);
279 
280     }
281 }
282 
lv_area_get_offset(const lv_area_t * area1,const lv_area_t * area2)283 static lv_point_t lv_area_get_offset(const lv_area_t * area1, const lv_area_t * area2)
284 {
285     lv_point_t offset = {x: area2->x1 - area1->x1, y: area2->y1 - area1->y1};
286     return offset;
287 }
288 
lv_color_format_to_dma2d_color_format(lv_img_cf_t color_format)289 static dma2d_color_format_t lv_color_format_to_dma2d_color_format(lv_img_cf_t color_format)
290 {
291     switch(color_format) {
292         case LV_IMG_CF_RGBA8888:
293             // note: LV_IMG_CF_RGBA8888 is actually ARGB8888
294             return ARGB8888;
295         case LV_IMG_CF_RGBX8888:
296             // note: LV_IMG_CF_RGBX8888 is actually XRGB8888
297             return ARGB8888;
298         case LV_IMG_CF_RGB565:
299             return RGB565;
300         case LV_IMG_CF_TRUE_COLOR:
301             return LvglColorFormat;
302         case LV_IMG_CF_TRUE_COLOR_ALPHA:
303 #if LV_COLOR_DEPTH == 16
304             // bitmap color format is 24b ARGB8565 - dma2d unsupported
305             return UNSUPPORTED;
306 #elif LV_COLOR_DEPTH == 32
307             return ARGB8888;
308 #else
309             // unknown bitmap color format
310             return UNSUPPORTED;
311 #endif
312         default:
313             return UNSUPPORTED;
314     }
315 }
316 
lv_draw_stm32_dma2d_img(lv_draw_ctx_t * draw_ctx,const lv_draw_img_dsc_t * img_dsc,const lv_area_t * src_area,const void * src)317 lv_res_t lv_draw_stm32_dma2d_img(lv_draw_ctx_t * draw_ctx, const lv_draw_img_dsc_t * img_dsc,
318                                  const lv_area_t * src_area, const void * src)
319 {
320     //if(lv_img_src_get_type(src) != LV_IMG_SRC_VARIABLE) return LV_RES_INV;
321     return LV_RES_INV;
322     if(img_dsc->opa <= LV_OPA_MIN) return LV_RES_OK;
323     const lv_img_dsc_t * img = src;
324     const dma2d_color_format_t bitmapColorFormat = lv_color_format_to_dma2d_color_format(img->header.cf);
325     const bool ignoreBitmapAlpha = (img->header.cf == LV_IMG_CF_RGBX8888);
326 
327     if(bitmapColorFormat == UNSUPPORTED || img_dsc->angle != 0 || img_dsc->zoom != LV_IMG_ZOOM_NONE) {
328         return LV_RES_INV; // sorry, dma2d can handle this
329     }
330 
331     // FIXME: handle dsc.pivot, dsc.recolor, dsc.blend_mode
332     // FIXME: src pixel size *must* be known to use DMA2D
333     // FIXME: If image is drawn by SW, then output cache needs to be cleaned next. Currently it is not possible.
334     // Both draw buffer start address and buffer size *must* be 32-byte aligned since draw buffer cache is being invalidated.
335     //uint32_t drawBufferLength = lv_area_get_size(draw_ctx->buf_area) * sizeof(lv_color_t);
336     //LV_ASSERT_MSG(drawBufferLength % CACHE_ROW_SIZE == 0); // critical, but this is not the way to test it
337     //LV_ASSERT_MSG((uint32_t)draw_ctx->buf % CACHE_ROW_SIZE == 0, "draw_ctx.buf is not 32B aligned"); // critical?
338 
339     // For performance reasons, both source buffer start address and buffer size *should* be 32-byte aligned since source buffer cache is being cleaned.
340     //uint32_t srcBufferLength = lv_area_get_size(src_area) * sizeof(lv_color_t); // TODO: verify src pixel size = sizeof(lv_color_t)
341     //LV_ASSERT_MSG(srcBufferLength % CACHE_ROW_SIZE == 0); // FIXME: assert fails (performance, non-critical)
342     //LV_ASSERT_MSG((uint32_t)src % CACHE_ROW_SIZE == 0); // FIXME: assert fails (performance, non-critical)
343 
344     lv_area_t draw_area;
345     if(!_lv_area_intersect(&draw_area, src_area, draw_ctx->clip_area)) return LV_RES_OK;
346 
347     lv_coord_t dest_stride = lv_area_get_width(draw_ctx->buf_area);
348     lv_point_t src_offset = lv_area_get_offset(src_area, &draw_area); // source image offset in relation to draw_area
349     lv_area_move(&draw_area, -draw_ctx->buf_area->x1, -draw_ctx->buf_area->y1);
350     _lv_draw_stm32_dma2d_blend_map(draw_ctx->buf, dest_stride, &draw_area, img->data, img->header.w,
351                                    &src_offset, img_dsc->opa, bitmapColorFormat, ignoreBitmapAlpha);
352     return LV_RES_OK;
353 }
354 
355 /**********************
356  *   STATIC FUNCTIONS
357  **********************/
358 
359 /**
360  * @brief Fills draw_area with specified color.
361  * @param color color to be painted, note: alpha is ignored
362  */
_lv_draw_stm32_dma2d_blend_fill(const lv_color_t * dest_buf,lv_coord_t dest_stride,const lv_area_t * draw_area,lv_color_t color,lv_opa_t opa)363 LV_STM32_DMA2D_STATIC void _lv_draw_stm32_dma2d_blend_fill(const lv_color_t * dest_buf, lv_coord_t dest_stride,
364                                                            const lv_area_t * draw_area, lv_color_t color, lv_opa_t opa)
365 {
366     LV_ASSERT_MSG(!isDma2dInProgess, "dma2d transfer has not finished"); // critical
367     lv_coord_t draw_width = lv_area_get_width(draw_area);
368     lv_coord_t draw_height = lv_area_get_height(draw_area);
369 
370     _lv_gpu_stm32_dma2d_await_dma_transfer_finish(NULL);
371 
372     if(opa >= LV_OPA_MAX) {
373         DMA2D->CR = 0x3UL << DMA2D_CR_MODE_Pos; // Register-to-memory (no FG nor BG, only output stage active)
374 
375         DMA2D->OPFCCR = LvglColorFormat;
376 #if defined(DMA2D_OPFCCR_RBS_Pos)
377         DMA2D->OPFCCR |= (RBS_BIT << DMA2D_OPFCCR_RBS_Pos);
378 #endif
379         DMA2D->OMAR = (uint32_t)(dest_buf + (dest_stride * draw_area->y1) + draw_area->x1);
380         DMA2D->OOR = dest_stride - draw_width;  // out buffer offset
381         // Note: unlike FGCOLR and BGCOLR, OCOLR bits must match DMA2D_OUTPUT_COLOR, alpha can be specified
382 #if RBS_BIT
383         // swap red/blue bits
384         DMA2D->OCOLR = (color.ch.blue << 11) | (color.ch.green_l << 5 | color.ch.green_h << 8) | (color.ch.red);
385 #else
386         DMA2D->OCOLR = color.full;
387 #endif
388     }
389     else {
390         DMA2D->CR = 0x2UL << DMA2D_CR_MODE_Pos; // Memory-to-memory with blending (FG and BG fetch with PFC and blending)
391 
392         DMA2D->FGPFCCR = A8;
393         DMA2D->FGPFCCR |= (opa << DMA2D_FGPFCCR_ALPHA_Pos);
394         // Alpha Mode 1: Replace original foreground image alpha channel value by FGPFCCR.ALPHA
395         DMA2D->FGPFCCR |= (0x1UL << DMA2D_FGPFCCR_AM_Pos);
396         //DMA2D->FGPFCCR |= (RBS_BIT << DMA2D_FGPFCCR_RBS_Pos);
397 
398         // Note: in Alpha Mode 1 FGMAR and FGOR are not used to supply foreground A8 bytes,
399         // those bytes are replaced by constant ALPHA defined in FGPFCCR
400         DMA2D->FGMAR = (uint32_t)dest_buf;
401         DMA2D->FGOR = dest_stride;
402         DMA2D->FGCOLR = lv_color_to32(color) & 0x00ffffff; // swap FGCOLR R/B bits if FGPFCCR.RBS (RBS_BIT) bit is set
403 
404         DMA2D->BGPFCCR = LvglColorFormat;
405 #if defined(DMA2D_BGPFCCR_RBS_Pos)
406         DMA2D->BGPFCCR |= (RBS_BIT << DMA2D_BGPFCCR_RBS_Pos);
407 #endif
408         DMA2D->BGMAR = (uint32_t)(dest_buf + (dest_stride * draw_area->y1) + draw_area->x1);
409         DMA2D->BGOR = dest_stride - draw_width;
410         DMA2D->BGCOLR = 0;  // used in A4 and A8 modes only
411         __lv_gpu_stm32_dma2d_clean_cache(DMA2D->BGMAR, DMA2D->BGOR, draw_width, draw_height, sizeof(lv_color_t));
412 
413         DMA2D->OPFCCR = LvglColorFormat;
414 #if defined(DMA2D_OPFCCR_RBS_Pos)
415         DMA2D->OPFCCR |= (RBS_BIT << DMA2D_OPFCCR_RBS_Pos);
416 #endif
417         DMA2D->OMAR = DMA2D->BGMAR;
418         DMA2D->OOR = DMA2D->BGOR;
419         DMA2D->OCOLR = 0;
420     }
421     // PL - pixel per lines (14 bit), NL - number of lines (16 bit)
422     DMA2D->NLR = (draw_width << DMA2D_NLR_PL_Pos) | (draw_height << DMA2D_NLR_NL_Pos);
423 
424     _lv_gpu_stm32_dma2d_start_dma_transfer();
425 }
426 
427 /**
428  * @brief Draws src (foreground) map on dst (background) map.
429  * @param src_offset src offset in relation to dst, useful when src is larger than draw_area
430  * @param opa constant opacity to be applied
431  * @param bitmapColorCode bitmap color type
432  * @param ignoreAlpha if TRUE, bitmap src alpha channel is ignored
433  */
_lv_draw_stm32_dma2d_blend_map(const lv_color_t * dest_buf,lv_coord_t dest_stride,const lv_area_t * draw_area,const void * src_buf,lv_coord_t src_stride,const lv_point_t * src_offset,lv_opa_t opa,dma2d_color_format_t src_color_format,bool ignore_src_alpha)434 LV_STM32_DMA2D_STATIC void _lv_draw_stm32_dma2d_blend_map(const lv_color_t * dest_buf, lv_coord_t dest_stride,
435                                                           const lv_area_t * draw_area, const void * src_buf, lv_coord_t src_stride, const lv_point_t * src_offset, lv_opa_t opa,
436                                                           dma2d_color_format_t src_color_format, bool ignore_src_alpha)
437 {
438     LV_ASSERT_MSG(!isDma2dInProgess, "dma2d transfer has not finished"); // critical
439     if(opa <= LV_OPA_MIN || src_color_format == UNSUPPORTED) return;
440     lv_coord_t draw_width = lv_area_get_width(draw_area);
441     lv_coord_t draw_height = lv_area_get_height(draw_area);
442     bool bitmapHasOpacity = !ignore_src_alpha && (src_color_format == ARGB8888 || src_color_format == ARGB1555 ||
443                                                   src_color_format == ARGB4444);
444 
445     if(opa >= LV_OPA_MAX) opa = 0xff;
446 
447     uint8_t srcBpp; // source bytes per pixel
448     switch(src_color_format) {
449         case ARGB8888:
450             srcBpp = 4;
451             break;
452         case RGB888:
453             srcBpp = 3;
454             break;
455         case RGB565:
456         case ARGB1555:
457         case ARGB4444:
458             srcBpp = 2;
459             break;
460         default:
461             LV_LOG_ERROR("unsupported color format");
462             return;
463     }
464 
465     _lv_gpu_stm32_dma2d_await_dma_transfer_finish(NULL);
466 
467     DMA2D->FGPFCCR = src_color_format;
468 
469     if(opa == 0xff && !bitmapHasOpacity) {
470         // no need to blend
471         if(src_color_format == LvglColorFormat) {
472             // no need to convert pixel format (PFC) either
473             DMA2D->CR = 0x0UL;
474         }
475         else {
476             DMA2D->CR = 0x1UL << DMA2D_CR_MODE_Pos; // Memory-to-memory with PFC (FG fetch only with FG PFC active)
477         }
478         // Alpha Mode 0: No modification of the foreground image alpha channel value
479     }
480     else {
481         // blend
482         DMA2D->CR = 0x2UL << DMA2D_CR_MODE_Pos; // Memory-to-memory with blending (FG and BG fetch with PFC and blending)
483         DMA2D->FGPFCCR |= (opa << DMA2D_FGPFCCR_ALPHA_Pos);
484         if(bitmapHasOpacity) {
485             // Alpha Mode 2: Replace original foreground image alpha channel value by FGPFCCR.ALPHA multiplied with original alpha channel value
486             DMA2D->FGPFCCR |= (0x2UL << DMA2D_FGPFCCR_AM_Pos);
487         }
488         else {
489             // Alpha Mode 1: Replace original foreground image alpha channel value by FGPFCCR.ALPHA
490             DMA2D->FGPFCCR |= (0x1UL << DMA2D_FGPFCCR_AM_Pos);
491         }
492     }
493 #if defined(DMA2D_FGPFCCR_RBS_Pos)
494     DMA2D->FGPFCCR |= (RBS_BIT << DMA2D_FGPFCCR_RBS_Pos);
495 #endif
496     DMA2D->FGMAR = ((uint32_t)src_buf) + srcBpp * ((src_stride * src_offset->y) + src_offset->x);
497     DMA2D->FGOR = src_stride - draw_width;
498     DMA2D->FGCOLR = 0;  // used in A4 and A8 modes only
499     __lv_gpu_stm32_dma2d_clean_cache(DMA2D->FGMAR, DMA2D->FGOR, draw_width, draw_height, srcBpp);
500 
501     DMA2D->OPFCCR = LvglColorFormat;
502 #if defined(DMA2D_OPFCCR_RBS_Pos)
503     DMA2D->OPFCCR |= (RBS_BIT << DMA2D_OPFCCR_RBS_Pos);
504 #endif
505     DMA2D->OMAR = (uint32_t)(dest_buf + (dest_stride * draw_area->y1) + draw_area->x1);
506     DMA2D->OOR = dest_stride - draw_width;
507     DMA2D->OCOLR = 0;
508 
509     if(opa != 0xff || bitmapHasOpacity) {
510         // use background (BG*) registers
511         DMA2D->BGPFCCR = LvglColorFormat;
512 #if defined(DMA2D_BGPFCCR_RBS_Pos)
513         DMA2D->BGPFCCR |= (RBS_BIT << DMA2D_BGPFCCR_RBS_Pos);
514 #endif
515         DMA2D->BGMAR = DMA2D->OMAR;
516         DMA2D->BGOR = DMA2D->OOR;
517         DMA2D->BGCOLR = 0;  // used in A4 and A8 modes only
518         __lv_gpu_stm32_dma2d_clean_cache(DMA2D->BGMAR, DMA2D->BGOR, draw_width, draw_height, sizeof(lv_color_t));
519     }
520 
521     // PL - pixel per lines (14 bit), NL - number of lines (16 bit)
522     DMA2D->NLR = (draw_width << DMA2D_NLR_PL_Pos) | (draw_height << DMA2D_NLR_NL_Pos);
523 
524     _lv_gpu_stm32_dma2d_start_dma_transfer();
525 }
526 
527 /**
528  * @brief Paints solid color with alpha mask with additional constant opacity. Useful e.g. for painting anti-aliased fonts.
529  * @param src_offset src offset in relation to dst, useful when src (alpha mask) is larger than draw_area
530  * @param color color to paint, note: alpha is ignored
531  * @param opa constant opacity to be applied
532  */
_lv_draw_stm32_dma2d_blend_paint(const lv_color_t * dest_buf,lv_coord_t dest_stride,const lv_area_t * draw_area,const lv_opa_t * mask_buf,lv_coord_t mask_stride,const lv_point_t * mask_offset,lv_color_t color,lv_opa_t opa)533 LV_STM32_DMA2D_STATIC void _lv_draw_stm32_dma2d_blend_paint(const lv_color_t * dest_buf, lv_coord_t dest_stride,
534                                                             const lv_area_t * draw_area, const lv_opa_t * mask_buf, lv_coord_t mask_stride, const lv_point_t * mask_offset,
535                                                             lv_color_t color, lv_opa_t opa)
536 {
537     LV_ASSERT_MSG(!isDma2dInProgess, "dma2d transfer has not finished"); // critical
538     lv_coord_t draw_width = lv_area_get_width(draw_area);
539     lv_coord_t draw_height = lv_area_get_height(draw_area);
540 
541     _lv_gpu_stm32_dma2d_await_dma_transfer_finish(NULL);
542 
543     DMA2D->CR = 0x2UL << DMA2D_CR_MODE_Pos;  // Memory-to-memory with blending (FG and BG fetch with PFC and blending)
544 
545     DMA2D->FGPFCCR = A8;
546     if(opa < LV_OPA_MAX) {
547         DMA2D->FGPFCCR |= (opa << DMA2D_FGPFCCR_ALPHA_Pos);
548         DMA2D->FGPFCCR |= (0x2UL <<
549                            DMA2D_FGPFCCR_AM_Pos); // Alpha Mode: Replace original foreground image alpha channel value by FGPFCCR.ALPHA multiplied with original alpha channel value
550     }
551     //DMA2D->FGPFCCR |= (RBS_BIT << DMA2D_FGPFCCR_RBS_Pos);
552     DMA2D->FGMAR = (uint32_t)(mask_buf + (mask_stride * mask_offset->y) + mask_offset->x);
553     DMA2D->FGOR = mask_stride - draw_width;
554     DMA2D->FGCOLR = lv_color_to32(color) & 0x00ffffff;  // swap FGCOLR R/B bits if FGPFCCR.RBS (RBS_BIT) bit is set
555     __lv_gpu_stm32_dma2d_clean_cache(DMA2D->FGMAR, DMA2D->FGOR, draw_width, draw_height, sizeof(lv_opa_t));
556 
557     DMA2D->BGPFCCR = LvglColorFormat;
558 #if defined(DMA2D_BGPFCCR_RBS_Pos)
559     DMA2D->BGPFCCR |= (RBS_BIT << DMA2D_BGPFCCR_RBS_Pos);
560 #endif
561     DMA2D->BGMAR = (uint32_t)(dest_buf + (dest_stride * draw_area->y1) + draw_area->x1);
562     DMA2D->BGOR = dest_stride - draw_width;
563     DMA2D->BGCOLR = 0;  // used in A4 and A8 modes only
564     __lv_gpu_stm32_dma2d_clean_cache(DMA2D->BGMAR, DMA2D->BGOR, draw_width, draw_height, sizeof(lv_color_t));
565 
566     DMA2D->OPFCCR = LvglColorFormat;
567 #if defined(DMA2D_OPFCCR_RBS_Pos)
568     DMA2D->OPFCCR |= (RBS_BIT << DMA2D_OPFCCR_RBS_Pos);
569 #endif
570     DMA2D->OMAR = DMA2D->BGMAR;
571     DMA2D->OOR = DMA2D->BGOR;
572     DMA2D->OCOLR = 0;
573     // PL - pixel per lines (14 bit), NL - number of lines (16 bit)
574     DMA2D->NLR = (draw_width << DMA2D_NLR_PL_Pos) | (draw_height << DMA2D_NLR_NL_Pos);
575 
576     _lv_gpu_stm32_dma2d_start_dma_transfer();
577 }
578 
579 /**
580  * @brief Copies src (foreground) map to the dst (background) map.
581  * @param src_offset src offset in relation to dst, useful when src is larger than draw_area
582  */
_lv_draw_stm32_dma2d_copy_buffer(const lv_color_t * dest_buf,lv_coord_t dest_stride,const lv_area_t * draw_area,const lv_color_t * src_buf,lv_coord_t src_stride,const lv_point_t * src_offset)583 void _lv_draw_stm32_dma2d_copy_buffer(const lv_color_t * dest_buf, lv_coord_t dest_stride,
584                                       const lv_area_t * draw_area, const lv_color_t * src_buf, lv_coord_t src_stride, const lv_point_t * src_offset)
585 {
586     LV_ASSERT_MSG(!isDma2dInProgess, "dma2d transfer has not finished"); // critical
587     lv_coord_t draw_width = lv_area_get_width(draw_area);
588     lv_coord_t draw_height = lv_area_get_height(draw_area);
589 
590     _lv_gpu_stm32_dma2d_await_dma_transfer_finish(NULL);
591 
592     DMA2D->CR = 0x0UL; // Memory-to-memory (FG fetch only)
593 
594     DMA2D->FGPFCCR = LvglColorFormat;
595 #if defined(DMA2D_FGPFCCR_RBS_Pos)
596     DMA2D->FGPFCCR |= (RBS_BIT << DMA2D_FGPFCCR_RBS_Pos);
597 #endif
598     DMA2D->FGMAR = (uint32_t)(src_buf + (src_stride * src_offset->y) + src_offset->x);
599     DMA2D->FGOR = src_stride - draw_width;
600     DMA2D->FGCOLR = 0;  // used in A4 and A8 modes only
601     __lv_gpu_stm32_dma2d_clean_cache(DMA2D->FGMAR, DMA2D->FGOR, draw_width, draw_height, sizeof(lv_color_t));
602 
603     // Note BG* registers do not need to be set up since BG is not used
604 
605     DMA2D->OPFCCR = LvglColorFormat;
606 #if defined(DMA2D_OPFCCR_RBS_Pos)
607     DMA2D->OPFCCR |= (RBS_BIT << DMA2D_OPFCCR_RBS_Pos);
608 #endif
609     DMA2D->OMAR = (uint32_t)(dest_buf + (dest_stride * draw_area->y1) + draw_area->x1);
610     DMA2D->OOR = dest_stride - draw_width;
611     DMA2D->OCOLR = 0;
612 
613     // PL - pixel per lines (14 bit), NL - number of lines (16 bit)
614     DMA2D->NLR = (draw_width << DMA2D_NLR_PL_Pos) | (draw_height << DMA2D_NLR_NL_Pos);
615 
616     _lv_gpu_stm32_dma2d_start_dma_transfer();
617 }
618 
_lv_gpu_stm32_dma2d_start_dma_transfer(void)619 LV_STM32_DMA2D_STATIC void _lv_gpu_stm32_dma2d_start_dma_transfer(void)
620 {
621     LV_ASSERT_MSG(!isDma2dInProgess, "dma2d transfer has not finished");
622     isDma2dInProgess = true;
623     DMA2D->IFCR = 0x3FU; // trigger ISR flags reset
624     // Note: cleaning output buffer cache is needed only when buffer may be misaligned or adjacent area may have been drawn in sw-fashion, e.g. using lv_draw_sw_blend_basic()
625 #if LV_COLOR_DEPTH == 16
626     __lv_gpu_stm32_dma2d_clean_cache(DMA2D->OMAR, DMA2D->OOR, (DMA2D->NLR & DMA2D_NLR_PL_Msk) >> DMA2D_NLR_PL_Pos,
627                                      (DMA2D->NLR & DMA2D_NLR_NL_Msk) >> DMA2D_NLR_NL_Pos, sizeof(lv_color_t));
628 #endif
629     DMA2D->CR |= DMA2D_CR_START;
630     // Note: for some reason mask buffer gets damaged during transfer if waiting is postponed
631     _lv_gpu_stm32_dma2d_await_dma_transfer_finish(NULL); // FIXME: this line should not be needed here, but it is
632 }
633 
_lv_gpu_stm32_dma2d_await_dma_transfer_finish(lv_disp_drv_t * disp_drv)634 LV_STM32_DMA2D_STATIC void _lv_gpu_stm32_dma2d_await_dma_transfer_finish(lv_disp_drv_t * disp_drv)
635 {
636     if(disp_drv && disp_drv->wait_cb) {
637         while((DMA2D->CR & DMA2D_CR_START) != 0U) {
638             disp_drv->wait_cb(disp_drv);
639         }
640     }
641     else {
642         while((DMA2D->CR & DMA2D_CR_START) != 0U);
643     }
644 
645     __IO uint32_t isrFlags = DMA2D->ISR;
646 
647     if(isrFlags & DMA2D_ISR_CEIF) {
648         LV_LOG_ERROR("DMA2D config error");
649     }
650 
651     if(isrFlags & DMA2D_ISR_TEIF) {
652         LV_LOG_ERROR("DMA2D transfer error");
653     }
654 
655     DMA2D->IFCR = 0x3FU; // trigger ISR flags reset
656 
657     if(isDma2dInProgess) {
658         // invalidate output buffer cached memory ONLY after DMA2D transfer
659         //__lv_gpu_stm32_dma2d_invalidate_cache(DMA2D->OMAR, DMA2D->OOR, (DMA2D->NLR & DMA2D_NLR_PL_Msk) >> DMA2D_NLR_PL_Pos, (DMA2D->NLR & DMA2D_NLR_NL_Msk) >> DMA2D_NLR_NL_Pos, sizeof(lv_color_t));
660         isDma2dInProgess = false;
661     }
662 }
663 
664 #if defined (LV_STM32_DMA2D_USE_M7_CACHE)
665 // Cortex-M7 DCache present
_lv_gpu_stm32_dma2d_invalidate_cache(uint32_t address,lv_coord_t offset,lv_coord_t width,lv_coord_t height,uint8_t pixel_size)666 void _lv_gpu_stm32_dma2d_invalidate_cache(uint32_t address, lv_coord_t offset, lv_coord_t width,
667                                           lv_coord_t height, uint8_t pixel_size)
668 {
669     if(((SCB->CCR) & SCB_CCR_DC_Msk) == 0) return; // L1 data cache is disabled
670     uint16_t stride = pixel_size * (width + offset); // in bytes
671     uint16_t ll = pixel_size * width; // line length in bytes
672     uint32_t n = 0; // address of the next cache row after the last invalidated row
673     lv_coord_t h = 0;
674 
675     __DSB();
676 
677     while(h < height) {
678         uint32_t a = address + (h * stride);
679         uint32_t e = a + ll; // end address, address of the first byte after the current line
680         a &= ~(CACHE_ROW_SIZE - 1U);
681         if(a < n) a = n;  // prevent the previous last cache row from being invalidated again
682 
683         while(a < e) {
684             SCB->DCIMVAC = a;
685             a += CACHE_ROW_SIZE;
686         }
687 
688         n = a;
689         h++;
690     };
691 
692     __DSB();
693     __ISB();
694 }
695 
_lv_gpu_stm32_dma2d_clean_cache(uint32_t address,lv_coord_t offset,lv_coord_t width,lv_coord_t height,uint8_t pixel_size)696 void _lv_gpu_stm32_dma2d_clean_cache(uint32_t address, lv_coord_t offset, lv_coord_t width,
697                                      lv_coord_t height, uint8_t pixel_size)
698 {
699     if(((SCB->CCR) & SCB_CCR_DC_Msk) == 0) return; // L1 data cache is disabled
700     uint16_t stride = pixel_size * (width + offset); // in bytes
701     uint16_t ll = pixel_size * width; // line length in bytes
702     uint32_t n = 0; // address of the next cache row after the last cleaned row
703     lv_coord_t h = 0;
704     __DSB();
705 
706     while(h < height) {
707         uint32_t a = address + (h * stride);
708         uint32_t e = a + ll; // end address, address of the first byte after the current line
709         a &= ~(CACHE_ROW_SIZE - 1U);
710         if(a < n) a = n;  // prevent the previous last cache row from being cleaned again
711 
712         while(a < e) {
713             SCB->DCCMVAC = a;
714             a += CACHE_ROW_SIZE;
715         }
716 
717         n = a;
718         h++;
719     };
720 
721     __DSB();
722     __ISB();
723 }
724 #endif // LV_STM32_DMA2D_USE_M7_CACHE
725 
726 #if defined(LV_STM32_DMA2D_TEST)
727 // initialize µs timer
_lv_gpu_stm32_dwt_init(void)728 LV_STM32_DMA2D_STATIC bool _lv_gpu_stm32_dwt_init(void)
729 {
730     // disable TRC
731     CoreDebug->DEMCR &= ~CoreDebug_DEMCR_TRCENA_Msk;
732     // enable TRC
733     CoreDebug->DEMCR |= CoreDebug_DEMCR_TRCENA_Msk;
734 
735 #if defined(__CORTEX_M) && (__CORTEX_M == 7U)
736     DWT->LAR = 0xC5ACCE55;
737 #endif
738     // disable clock cycle counter
739     DWT->CTRL &= ~DWT_CTRL_CYCCNTENA_Msk;
740     // enable  clock cycle counter
741     DWT->CTRL |= DWT_CTRL_CYCCNTENA_Msk;
742 
743     // reset the clock cycle counter value
744     DWT->CYCCNT = 0;
745 
746     // 3 NO OPERATION instructions
747     __ASM volatile("NOP");
748     __ASM volatile("NOP");
749     __ASM volatile("NOP");
750 
751     // check if clock cycle counter has started
752     if(DWT->CYCCNT) {
753         return true; // clock cycle counter started
754     }
755     else {
756         return false; // clock cycle counter not started
757     }
758 }
759 
760 // get elapsed µs since reset
_lv_gpu_stm32_dwt_get_us(void)761 LV_STM32_DMA2D_STATIC uint32_t _lv_gpu_stm32_dwt_get_us(void)
762 {
763     uint32_t us = (DWT->CYCCNT * 1000000) / HAL_RCC_GetHCLKFreq();
764     return us;
765 }
766 
767 // reset µs timer
_lv_gpu_stm32_dwt_reset(void)768 LV_STM32_DMA2D_STATIC void _lv_gpu_stm32_dwt_reset(void)
769 {
770     DWT->CYCCNT = 0;
771 }
772 #endif // LV_STM32_DMA2D_TEST
773 #endif // LV_USE_GPU_STM32_DMA2D
774