1 /**
2  * @file lv_gpu_stm32_dma2d.c
3  *
4  */
5 
6 /*********************
7  *      INCLUDES
8  *********************/
9 #include "lv_gpu_stm32_dma2d.h"
10 #include "../../core/lv_refr.h"
11 
12 #if LV_USE_GPU_STM32_DMA2D
13 
14 /*********************
15  *      DEFINES
16  *********************/
17 #if LV_COLOR_16_SWAP
18     // Note: DMA2D red/blue swap (RBS) works for all color modes
19     #define RBS_BIT 1U
20 #else
21     #define RBS_BIT 0U
22 #endif
23 
24 #define CACHE_ROW_SIZE 32U // cache row size in Bytes
25 
26 // For code/implementation discussion refer to https://github.com/lvgl/lvgl/issues/3714#issuecomment-1365187036
27 // astyle --options=lvgl/scripts/code-format.cfg --ignore-exclude-errors lvgl/src/draw/stm32_dma2d/*.c lvgl/src/draw/stm32_dma2d/*.h
28 
29 #if LV_COLOR_DEPTH == 16
30     const dma2d_color_format_t LvglColorFormat = RGB565;
31 #elif LV_COLOR_DEPTH == 32
32     const dma2d_color_format_t LvglColorFormat = ARGB8888;
33 #else
34     #error "Cannot use DMA2D with LV_COLOR_DEPTH other than 16 or 32"
35 #endif
36 
37 #if defined (__DCACHE_PRESENT) && (__DCACHE_PRESENT == 1U)
38     #define LV_STM32_DMA2D_USE_M7_CACHE
39 #endif
40 
41 #if defined (LV_STM32_DMA2D_USE_M7_CACHE)
42     // Cortex-M7 DCache present
43     #define __lv_gpu_stm32_dma2d_clean_cache(address, offset, width, height, pixel_size) _lv_gpu_stm32_dma2d_clean_cache(address, offset, width, height, pixel_size)
44     #define __lv_gpu_stm32_dma2d_invalidate_cache(address, offset, width, height, pixel_size) _lv_gpu_stm32_dma2d_invalidate_cache(address, offset, width, height, pixel_size)
45 #else
46     #define __lv_gpu_stm32_dma2d_clean_cache(address, offset, width, height, pixel_size)
47     #define __lv_gpu_stm32_dma2d_invalidate_cache(address, offset, width, height, pixel_size)
48 #endif
49 
50 /**********************
51  *  STATIC PROTOTYPES
52  **********************/
53 static void lv_draw_stm32_dma2d_blend(lv_draw_ctx_t * draw_ctx, const lv_draw_sw_blend_dsc_t * dsc);
54 static void lv_draw_stm32_dma2d_buffer_copy(lv_draw_ctx_t * draw_ctx,
55                                             void * dest_buf, lv_coord_t dest_stride, const lv_area_t * dest_area,
56                                             void * src_buf, lv_coord_t src_stride, const lv_area_t * src_area);
57 static void lv_draw_stm32_dma2d_img_decoded(lv_draw_ctx_t * draw_ctx, const lv_draw_img_dsc_t * img_dsc,
58                                             const lv_area_t * coords, const uint8_t * src_buf, lv_img_cf_t color_format);
59 static dma2d_color_format_t lv_color_format_to_dma2d_color_format(lv_img_cf_t color_format);
60 static lv_point_t lv_area_get_offset(const lv_area_t * area1, const lv_area_t * area2);
61 
62 LV_STM32_DMA2D_STATIC void lv_gpu_stm32_dma2d_wait_cb(lv_draw_ctx_t * draw_ctx);
63 LV_STM32_DMA2D_STATIC lv_res_t lv_draw_stm32_dma2d_img(lv_draw_ctx_t * draw_ctx, const lv_draw_img_dsc_t * img_dsc,
64                                                        const lv_area_t * src_area, const void * src);
65 LV_STM32_DMA2D_STATIC void _lv_draw_stm32_dma2d_blend_fill(const lv_color_t * dst_buf, lv_coord_t dst_stride,
66                                                            const lv_area_t * draw_area, lv_color_t color, lv_opa_t opa);
67 LV_STM32_DMA2D_STATIC void _lv_draw_stm32_dma2d_blend_map(const lv_color_t * dest_buf, lv_coord_t dest_stride,
68                                                           const lv_area_t * draw_area, const void * src_buf, lv_coord_t src_stride, const lv_point_t * src_offset, lv_opa_t opa,
69                                                           dma2d_color_format_t src_color_format, bool ignore_src_alpha);
70 LV_STM32_DMA2D_STATIC void _lv_draw_stm32_dma2d_blend_paint(const lv_color_t * dst_buf, lv_coord_t dst_stride,
71                                                             const lv_area_t * draw_area, const lv_opa_t * mask_buf, lv_coord_t mask_stride, const lv_point_t * mask_offset,
72                                                             lv_color_t color, lv_opa_t opa);
73 LV_STM32_DMA2D_STATIC void _lv_draw_stm32_dma2d_copy_buffer(const lv_color_t * dest_buf, lv_coord_t dest_stride,
74                                                             const lv_area_t * draw_area, const lv_color_t * src_buf, lv_coord_t src_stride, const lv_point_t * src_offset);
75 LV_STM32_DMA2D_STATIC void _lv_gpu_stm32_dma2d_await_dma_transfer_finish(lv_disp_drv_t * disp_drv);
76 LV_STM32_DMA2D_STATIC void _lv_gpu_stm32_dma2d_start_dma_transfer(void);
77 
78 #if defined (LV_STM32_DMA2D_USE_M7_CACHE)
79 LV_STM32_DMA2D_STATIC void _lv_gpu_stm32_dma2d_invalidate_cache(uint32_t address, lv_coord_t offset,
80                                                                 lv_coord_t width, lv_coord_t height, uint8_t pixel_size);
81 LV_STM32_DMA2D_STATIC void _lv_gpu_stm32_dma2d_clean_cache(uint32_t address, lv_coord_t offset, lv_coord_t width,
82                                                            lv_coord_t height, uint8_t pixel_size);
83 #endif
84 
85 #if defined(LV_STM32_DMA2D_TEST)
86     LV_STM32_DMA2D_STATIC bool _lv_gpu_stm32_dwt_init(void);
87     LV_STM32_DMA2D_STATIC void _lv_gpu_stm32_dwt_reset(void);
88     LV_STM32_DMA2D_STATIC uint32_t _lv_gpu_stm32_dwt_get_us(void);
89 #endif
90 
91 static bool isDma2dInProgess = false; // indicates whether DMA2D transfer *initiated here* is in progress
92 
93 /**
94  * Turn on the peripheral and set output color mode, this only needs to be done once
95  */
lv_draw_stm32_dma2d_init(void)96 void lv_draw_stm32_dma2d_init(void)
97 {
98     // Enable DMA2D clock
99 #if defined(STM32F4) || defined(STM32F7) || defined(STM32U5)
100     RCC->AHB1ENR |= RCC_AHB1ENR_DMA2DEN; // enable DMA2D
101     // wait for hardware access to complete
102     __asm volatile("DSB\n");
103     volatile uint32_t temp = RCC->AHB1ENR;
104     LV_UNUSED(temp);
105 #elif defined(STM32H7)
106     RCC->AHB3ENR |= RCC_AHB3ENR_DMA2DEN;
107     // wait for hardware access to complete
108     __asm volatile("DSB\n");
109     volatile uint32_t temp = RCC->AHB3ENR;
110     LV_UNUSED(temp);
111 #else
112 # warning "LVGL can't enable the clock of DMA2D"
113 #endif
114     // AHB master timer configuration
115     DMA2D->AMTCR = 0; // AHB bus guaranteed dead time disabled
116 #if defined(LV_STM32_DMA2D_TEST)
117     _lv_gpu_stm32_dwt_init(); // init µs timer
118 #endif
119 }
120 
lv_draw_stm32_dma2d_ctx_init(lv_disp_drv_t * drv,lv_draw_ctx_t * draw_ctx)121 void lv_draw_stm32_dma2d_ctx_init(lv_disp_drv_t * drv, lv_draw_ctx_t * draw_ctx)
122 {
123     lv_draw_sw_init_ctx(drv, draw_ctx);
124 
125     lv_draw_stm32_dma2d_ctx_t * dma2d_draw_ctx = (lv_draw_sw_ctx_t *)draw_ctx;
126 
127     dma2d_draw_ctx->blend = lv_draw_stm32_dma2d_blend;
128     dma2d_draw_ctx->base_draw.draw_img_decoded = lv_draw_stm32_dma2d_img_decoded;
129     //dma2d_draw_ctx->base_draw.draw_img = lv_draw_stm32_dma2d_img;
130     // Note: currently it does not make sense use lv_gpu_stm32_dma2d_wait_cb() since waiting starts right after the dma2d transfer
131     //dma2d_draw_ctx->base_draw.wait_for_finish = lv_gpu_stm32_dma2d_wait_cb;
132     dma2d_draw_ctx->base_draw.buffer_copy = lv_draw_stm32_dma2d_buffer_copy;
133 }
134 
lv_draw_stm32_dma2d_ctx_deinit(lv_disp_drv_t * drv,lv_draw_ctx_t * draw_ctx)135 void lv_draw_stm32_dma2d_ctx_deinit(lv_disp_drv_t * drv, lv_draw_ctx_t * draw_ctx)
136 {
137     LV_UNUSED(drv);
138     LV_UNUSED(draw_ctx);
139 }
140 
lv_draw_stm32_dma2d_blend(lv_draw_ctx_t * draw_ctx,const lv_draw_sw_blend_dsc_t * dsc)141 static void lv_draw_stm32_dma2d_blend(lv_draw_ctx_t * draw_ctx, const lv_draw_sw_blend_dsc_t * dsc)
142 {
143     if(dsc->blend_mode != LV_BLEND_MODE_NORMAL) {
144         lv_draw_sw_blend_basic(draw_ctx, dsc);
145         return;
146     }
147     // Note: x1 must be zero. Otherwise, there is no way to correctly calculate dest_stride.
148     //LV_ASSERT_MSG(draw_ctx->buf_area->x1 == 0); // critical?
149     // Both draw buffer start address and buffer size *must* be 32-byte aligned since draw buffer cache is being invalidated.
150     //uint32_t drawBufferLength = lv_area_get_size(draw_ctx->buf_area) * sizeof(lv_color_t);
151     //LV_ASSERT_MSG(drawBufferLength % CACHE_ROW_SIZE == 0); // critical, but this is not the way to test it
152     //LV_ASSERT_MSG((uint32_t)draw_ctx->buf % CACHE_ROW_SIZE == 0, "draw_ctx.buf is not 32B aligned"); // critical?
153 
154     if(dsc->src_buf) {
155         // For performance reasons, both source buffer start address and buffer size *should* be 32-byte aligned since source buffer cache is being cleaned.
156         //uint32_t srcBufferLength = lv_area_get_size(dsc->blend_area) * sizeof(lv_color_t);
157         //LV_ASSERT_MSG(srcBufferLength % CACHE_ROW_SIZE == 0); // FIXME: assert fails (performance, non-critical)
158         //LV_ASSERT_MSG((uint32_t)dsc->src_buf % CACHE_ROW_SIZE == 0); // FIXME: assert fails (performance, non-critical)
159     }
160 
161     lv_area_t draw_area;
162     if(!_lv_area_intersect(&draw_area, dsc->blend_area, draw_ctx->clip_area)) return;
163     // + draw_ctx->buf_area has the entire draw buffer location
164     // + draw_ctx->clip_area has the current draw buffer location
165     // + dsc->blend_area has the location of the area intended to be painted - image etc.
166     // + draw_area has the area actually being painted
167     // All coordinates are relative to the screen.
168 
169     const lv_opa_t * mask = dsc->mask_buf;
170 
171     if(dsc->mask_buf && dsc->mask_res == LV_DRAW_MASK_RES_TRANSP) return;
172     else if(dsc->mask_res == LV_DRAW_MASK_RES_FULL_COVER) mask = NULL;
173 
174     lv_coord_t dest_stride = lv_area_get_width(draw_ctx->buf_area);
175     if(mask != NULL) {
176         // For performance reasons, both mask buffer start address and buffer size *should* be 32-byte aligned since mask buffer cache is being cleaned.
177         //uint32_t srcBufferLength = lv_area_get_size(dsc->mask_area) * sizeof(lv_opa_t);
178         //LV_ASSERT_MSG(srcBufferLength % CACHE_ROW_SIZE == 0); // FIXME: assert fails (performance, non-critical)
179         //LV_ASSERT_MSG((uint32_t)mask % CACHE_ROW_SIZE == 0); // FIXME: assert fails (performance, non-critical)
180 
181         lv_coord_t mask_stride = lv_area_get_width(dsc->mask_area);
182         lv_point_t mask_offset = lv_area_get_offset(dsc->mask_area, &draw_area); // mask offset in relation to draw_area
183 
184         if(dsc->src_buf == NULL) {  // 93.5%
185             lv_area_move(&draw_area, -draw_ctx->buf_area->x1, -draw_ctx->buf_area->y1);
186             _lv_draw_stm32_dma2d_blend_paint(draw_ctx->buf, dest_stride, &draw_area, mask, mask_stride, &mask_offset, dsc->color,
187                                              dsc->opa);
188         }
189         else {   // 0.2%
190             // note: (x)RGB dsc->src_buf does not carry alpha channel bytes,
191             // alpha channel bytes are carried in dsc->mask_buf
192 #if LV_COLOR_DEPTH == 32
193             lv_coord_t src_stride = lv_area_get_width(dsc->blend_area);
194             lv_point_t src_offset = lv_area_get_offset(dsc->blend_area, &draw_area); // source image offset in relation to draw_area
195             lv_coord_t draw_width = lv_area_get_width(&draw_area);
196             lv_coord_t draw_height = lv_area_get_height(&draw_area);
197 
198             // merge mask alpha bytes with src RGB bytes
199             // TODO: optimize by reading 4 or 8 mask bytes at a time
200             mask += (mask_stride * mask_offset.y) + mask_offset.x;
201             lv_color_t * src_buf = (lv_color_t *)dsc->src_buf;
202             src_buf += (src_stride * src_offset.y) + src_offset.x;
203             uint16_t mask_buffer_offset = mask_stride - draw_width;
204             uint16_t src_buffer_offset = src_stride - draw_width;
205             while(draw_height > 0) {
206                 draw_height--;
207                 for(uint16_t x = 0; x < draw_width; x++) {
208                     (*src_buf).ch.alpha = *mask;
209                     src_buf++;
210                     mask++;
211                 }
212                 mask += mask_buffer_offset;
213                 src_buf += src_buffer_offset;
214             }
215 
216             lv_area_move(&draw_area, -draw_ctx->buf_area->x1,
217                          -draw_ctx->buf_area->y1); // translate the screen draw area to the origin of the buffer area
218             _lv_draw_stm32_dma2d_blend_map(draw_ctx->buf, dest_stride, &draw_area, dsc->src_buf, src_stride, &src_offset, dsc->opa,
219                                            ARGB8888, false);
220 #else
221             // Note: 16-bit bitmap hardware blending with mask and background is possible, but requires a temp 24 or 32-bit buffer to combine bitmap with mask first.
222 
223             lv_draw_sw_blend_basic(draw_ctx, dsc); // (e.g. Shop Items)
224             // clean cache after software drawing - this does not help since this is not the only place where buffer is written without dma2d
225             // lv_coord_t draw_width = lv_area_get_width(&draw_area);
226             // lv_coord_t draw_height = lv_area_get_height(&draw_area);
227             // uint32_t dest_address = (uint32_t)(draw_ctx->buf + (dest_stride * draw_area.y1) + draw_area.x1);
228             // __lv_gpu_stm32_dma2d_clean_cache(dest_address, dest_stride - draw_width, draw_width, draw_height, sizeof(lv_color_t));
229 #endif
230         }
231     }
232     else {
233         if(dsc->src_buf == NULL) {  // 6.1%
234             lv_area_move(&draw_area, -draw_ctx->buf_area->x1,
235                          -draw_ctx->buf_area->y1); // translate the screen draw area to the origin of the buffer area
236             _lv_draw_stm32_dma2d_blend_fill(draw_ctx->buf, dest_stride, &draw_area, dsc->color, dsc->opa);
237         }
238         else {   // 0.2%
239             lv_coord_t src_stride = lv_area_get_width(dsc->blend_area);
240             lv_point_t src_offset = lv_area_get_offset(dsc->blend_area, &draw_area); // source image offset in relation to draw_area
241             lv_area_move(&draw_area, -draw_ctx->buf_area->x1,
242                          -draw_ctx->buf_area->y1); // translate the screen draw area to the origin of the buffer area
243             _lv_draw_stm32_dma2d_blend_map(draw_ctx->buf, dest_stride, &draw_area, dsc->src_buf, src_stride, &src_offset, dsc->opa,
244                                            LvglColorFormat, true);
245         }
246     }
247 }
248 
249 // Does dest_area = intersect(draw_ctx->clip_area, src_area) ?
250 // See: https://github.com/lvgl/lvgl/issues/3714#issuecomment-1331710788
lv_draw_stm32_dma2d_buffer_copy(lv_draw_ctx_t * draw_ctx,void * dest_buf,lv_coord_t dest_stride,const lv_area_t * dest_area,void * src_buf,lv_coord_t src_stride,const lv_area_t * src_area)251 static void lv_draw_stm32_dma2d_buffer_copy(lv_draw_ctx_t * draw_ctx, void * dest_buf, lv_coord_t dest_stride,
252                                             const lv_area_t * dest_area, void * src_buf, lv_coord_t src_stride, const lv_area_t * src_area)
253 {
254     // Both draw buffer start address and buffer size *must* be 32-byte aligned since draw buffer cache is being invalidated.
255     //uint32_t drawBufferLength = lv_area_get_size(draw_ctx->buf_area) * sizeof(lv_color_t);
256     //LV_ASSERT_MSG(drawBufferLength % CACHE_ROW_SIZE == 0); // critical, but this is not the way to test it
257     //LV_ASSERT_MSG((uint32_t)draw_ctx->buf % CACHE_ROW_SIZE == 0, "draw_ctx.buf is not 32B aligned"); // critical?
258     // FIXME:
259     // 1. Both src_buf and dest_buf pixel size *must* be known to use DMA2D.
260     // 2. Verify both buffers start addresses and lengths are 32-byte (cache row size) aligned.
261     LV_UNUSED(draw_ctx);
262     lv_point_t src_offset = lv_area_get_offset(src_area, dest_area);
263     // FIXME: use lv_area_move(dest_area, -dest_area->x1, -dest_area->y1) here ?
264     // TODO: It is assumed that dest_buf and src_buf buffers are of lv_color_t type. Verify it, this assumption may be incorrect.
265     _lv_draw_stm32_dma2d_blend_map((const lv_color_t *)dest_buf, dest_stride, dest_area, (const lv_color_t *)src_buf,
266                                    src_stride, &src_offset, 0xff, LvglColorFormat, true);
267     // TODO: Investigate if output buffer cache needs to be invalidated. It depends on what the destination buffer is and how it is used next - by dma2d or not.
268     _lv_gpu_stm32_dma2d_await_dma_transfer_finish(NULL); // TODO: is this line needed here?
269 }
270 
lv_draw_stm32_dma2d_img_decoded(lv_draw_ctx_t * draw_ctx,const lv_draw_img_dsc_t * img_dsc,const lv_area_t * coords,const uint8_t * src_buf,lv_img_cf_t color_format)271 static void lv_draw_stm32_dma2d_img_decoded(lv_draw_ctx_t * draw_ctx, const lv_draw_img_dsc_t * img_dsc,
272                                             const lv_area_t * coords, const uint8_t * src_buf, lv_img_cf_t color_format)
273 {
274     if(draw_ctx->draw_img_decoded == NULL) return;
275     lv_area_t draw_area;
276     lv_area_copy(&draw_area, draw_ctx->clip_area);
277 
278     bool mask_any = lv_draw_mask_is_any(&draw_area);
279     bool transform = img_dsc->angle != 0 || img_dsc->zoom != LV_IMG_ZOOM_NONE;
280     const dma2d_color_format_t bitmapColorFormat = lv_color_format_to_dma2d_color_format(color_format);
281     const bool ignoreBitmapAlpha = (color_format == LV_IMG_CF_RGBX8888);
282 
283     if(!mask_any && !transform && bitmapColorFormat != UNSUPPORTED && img_dsc->recolor_opa == LV_OPA_TRANSP) {
284         // simple bitmap blending, optionally with supported color format conversion - handle directly by dma2d
285         lv_coord_t dest_stride = lv_area_get_width(draw_ctx->buf_area);
286         lv_coord_t src_stride = lv_area_get_width(coords);
287         lv_point_t src_offset = lv_area_get_offset(coords, &draw_area); // source image offset in relation to draw_area
288         lv_area_move(&draw_area, -draw_ctx->buf_area->x1, -draw_ctx->buf_area->y1);
289         _lv_draw_stm32_dma2d_blend_map(draw_ctx->buf, dest_stride, &draw_area, src_buf, src_stride, &src_offset,
290                                        img_dsc->opa, bitmapColorFormat, ignoreBitmapAlpha);
291     }
292     else {
293         // all more complex cases which require additional image transformations
294         lv_draw_sw_img_decoded(draw_ctx, img_dsc, coords, src_buf, color_format);
295 
296     }
297 }
298 
lv_area_get_offset(const lv_area_t * area1,const lv_area_t * area2)299 static lv_point_t lv_area_get_offset(const lv_area_t * area1, const lv_area_t * area2)
300 {
301     lv_point_t offset = {x: area2->x1 - area1->x1, y: area2->y1 - area1->y1};
302     return offset;
303 }
304 
lv_color_format_to_dma2d_color_format(lv_img_cf_t color_format)305 static dma2d_color_format_t lv_color_format_to_dma2d_color_format(lv_img_cf_t color_format)
306 {
307     switch(color_format) {
308         case LV_IMG_CF_RGBA8888:
309             // note: LV_IMG_CF_RGBA8888 is actually ARGB8888
310             return ARGB8888;
311         case LV_IMG_CF_RGBX8888:
312             // note: LV_IMG_CF_RGBX8888 is actually XRGB8888
313             return ARGB8888;
314         case LV_IMG_CF_RGB565:
315             return RGB565;
316         case LV_IMG_CF_TRUE_COLOR:
317             return LvglColorFormat;
318         case LV_IMG_CF_TRUE_COLOR_ALPHA:
319 #if LV_COLOR_DEPTH == 16
320             // bitmap color format is 24b ARGB8565 - dma2d unsupported
321             return UNSUPPORTED;
322 #elif LV_COLOR_DEPTH == 32
323             return ARGB8888;
324 #else
325             // unknown bitmap color format
326             return UNSUPPORTED;
327 #endif
328         default:
329             return UNSUPPORTED;
330     }
331 }
332 
lv_draw_stm32_dma2d_img(lv_draw_ctx_t * draw_ctx,const lv_draw_img_dsc_t * img_dsc,const lv_area_t * src_area,const void * src)333 LV_STM32_DMA2D_STATIC lv_res_t lv_draw_stm32_dma2d_img(lv_draw_ctx_t * draw_ctx, const lv_draw_img_dsc_t * img_dsc,
334                                                        const lv_area_t * src_area, const void * src)
335 {
336     //if(lv_img_src_get_type(src) != LV_IMG_SRC_VARIABLE) return LV_RES_INV;
337     return LV_RES_INV;
338     if(img_dsc->opa <= LV_OPA_MIN) return LV_RES_OK;
339     const lv_img_dsc_t * img = src;
340     const dma2d_color_format_t bitmapColorFormat = lv_color_format_to_dma2d_color_format(img->header.cf);
341     const bool ignoreBitmapAlpha = (img->header.cf == LV_IMG_CF_RGBX8888);
342 
343     if(bitmapColorFormat == UNSUPPORTED || img_dsc->angle != 0 || img_dsc->zoom != LV_IMG_ZOOM_NONE) {
344         return LV_RES_INV; // sorry, dma2d can handle this
345     }
346 
347     // FIXME: handle dsc.pivot, dsc.recolor, dsc.blend_mode
348     // FIXME: src pixel size *must* be known to use DMA2D
349     // FIXME: If image is drawn by SW, then output cache needs to be cleaned next. Currently it is not possible.
350     // Both draw buffer start address and buffer size *must* be 32-byte aligned since draw buffer cache is being invalidated.
351     //uint32_t drawBufferLength = lv_area_get_size(draw_ctx->buf_area) * sizeof(lv_color_t);
352     //LV_ASSERT_MSG(drawBufferLength % CACHE_ROW_SIZE == 0); // critical, but this is not the way to test it
353     //LV_ASSERT_MSG((uint32_t)draw_ctx->buf % CACHE_ROW_SIZE == 0, "draw_ctx.buf is not 32B aligned"); // critical?
354 
355     // For performance reasons, both source buffer start address and buffer size *should* be 32-byte aligned since source buffer cache is being cleaned.
356     //uint32_t srcBufferLength = lv_area_get_size(src_area) * sizeof(lv_color_t); // TODO: verify src pixel size = sizeof(lv_color_t)
357     //LV_ASSERT_MSG(srcBufferLength % CACHE_ROW_SIZE == 0); // FIXME: assert fails (performance, non-critical)
358     //LV_ASSERT_MSG((uint32_t)src % CACHE_ROW_SIZE == 0); // FIXME: assert fails (performance, non-critical)
359 
360     lv_area_t draw_area;
361     if(!_lv_area_intersect(&draw_area, src_area, draw_ctx->clip_area)) return LV_RES_OK;
362 
363     lv_coord_t dest_stride = lv_area_get_width(draw_ctx->buf_area);
364     lv_point_t src_offset = lv_area_get_offset(src_area, &draw_area); // source image offset in relation to draw_area
365     lv_area_move(&draw_area, -draw_ctx->buf_area->x1, -draw_ctx->buf_area->y1);
366     _lv_draw_stm32_dma2d_blend_map(draw_ctx->buf, dest_stride, &draw_area, img->data, img->header.w,
367                                    &src_offset, img_dsc->opa, bitmapColorFormat, ignoreBitmapAlpha);
368     return LV_RES_OK;
369 }
370 
lv_gpu_stm32_dma2d_wait_cb(lv_draw_ctx_t * draw_ctx)371 LV_STM32_DMA2D_STATIC void lv_gpu_stm32_dma2d_wait_cb(lv_draw_ctx_t * draw_ctx)
372 {
373     lv_disp_t * disp = _lv_refr_get_disp_refreshing();
374     _lv_gpu_stm32_dma2d_await_dma_transfer_finish(disp->driver);
375     lv_draw_sw_wait_for_finish(draw_ctx);
376 }
377 
378 /**********************
379  *   STATIC FUNCTIONS
380  **********************/
381 
382 /**
383  * @brief Fills draw_area with specified color.
384  * @param color color to be painted, note: alpha is ignored
385  */
_lv_draw_stm32_dma2d_blend_fill(const lv_color_t * dest_buf,lv_coord_t dest_stride,const lv_area_t * draw_area,lv_color_t color,lv_opa_t opa)386 LV_STM32_DMA2D_STATIC void _lv_draw_stm32_dma2d_blend_fill(const lv_color_t * dest_buf, lv_coord_t dest_stride,
387                                                            const lv_area_t * draw_area, lv_color_t color, lv_opa_t opa)
388 {
389     LV_ASSERT_MSG(!isDma2dInProgess, "dma2d transfer has not finished"); // critical
390     lv_coord_t draw_width = lv_area_get_width(draw_area);
391     lv_coord_t draw_height = lv_area_get_height(draw_area);
392 
393     _lv_gpu_stm32_dma2d_await_dma_transfer_finish(NULL);
394 
395     if(opa >= LV_OPA_MAX) {
396         DMA2D->CR = 0x3UL << DMA2D_CR_MODE_Pos; // Register-to-memory (no FG nor BG, only output stage active)
397 
398         DMA2D->OPFCCR = LvglColorFormat;
399 #if defined(DMA2D_OPFCCR_RBS_Pos)
400         DMA2D->OPFCCR |= (RBS_BIT << DMA2D_OPFCCR_RBS_Pos);
401 #endif
402         DMA2D->OMAR = (uint32_t)(dest_buf + (dest_stride * draw_area->y1) + draw_area->x1);
403         DMA2D->OOR = dest_stride - draw_width;  // out buffer offset
404         // Note: unlike FGCOLR and BGCOLR, OCOLR bits must match DMA2D_OUTPUT_COLOR, alpha can be specified
405 #if RBS_BIT
406         // swap red/blue bits
407         DMA2D->OCOLR = (color.ch.blue << 11) | (color.ch.green_l << 5 | color.ch.green_h << 8) | (color.ch.red);
408 #else
409         DMA2D->OCOLR = color.full;
410 #endif
411     }
412     else {
413         DMA2D->CR = 0x2UL << DMA2D_CR_MODE_Pos; // Memory-to-memory with blending (FG and BG fetch with PFC and blending)
414 
415         DMA2D->FGPFCCR = A8;
416         DMA2D->FGPFCCR |= (opa << DMA2D_FGPFCCR_ALPHA_Pos);
417         // Alpha Mode 1: Replace original foreground image alpha channel value by FGPFCCR.ALPHA
418         DMA2D->FGPFCCR |= (0x1UL << DMA2D_FGPFCCR_AM_Pos);
419         //DMA2D->FGPFCCR |= (RBS_BIT << DMA2D_FGPFCCR_RBS_Pos);
420 
421         // Note: in Alpha Mode 1 FGMAR and FGOR are not used to supply foreground A8 bytes,
422         // those bytes are replaced by constant ALPHA defined in FGPFCCR
423         DMA2D->FGMAR = (uint32_t)dest_buf;
424         DMA2D->FGOR = dest_stride;
425         DMA2D->FGCOLR = lv_color_to32(color) & 0x00ffffff; // swap FGCOLR R/B bits if FGPFCCR.RBS (RBS_BIT) bit is set
426 
427         DMA2D->BGPFCCR = LvglColorFormat;
428 #if defined(DMA2D_BGPFCCR_RBS_Pos)
429         DMA2D->BGPFCCR |= (RBS_BIT << DMA2D_BGPFCCR_RBS_Pos);
430 #endif
431         DMA2D->BGMAR = (uint32_t)(dest_buf + (dest_stride * draw_area->y1) + draw_area->x1);
432         DMA2D->BGOR = dest_stride - draw_width;
433         DMA2D->BGCOLR = 0;  // used in A4 and A8 modes only
434         __lv_gpu_stm32_dma2d_clean_cache(DMA2D->BGMAR, DMA2D->BGOR, draw_width, draw_height, sizeof(lv_color_t));
435 
436         DMA2D->OPFCCR = LvglColorFormat;
437 #if defined(DMA2D_OPFCCR_RBS_Pos)
438         DMA2D->OPFCCR |= (RBS_BIT << DMA2D_OPFCCR_RBS_Pos);
439 #endif
440         DMA2D->OMAR = DMA2D->BGMAR;
441         DMA2D->OOR = DMA2D->BGOR;
442         DMA2D->OCOLR = 0;
443     }
444     // PL - pixel per lines (14 bit), NL - number of lines (16 bit)
445     DMA2D->NLR = (draw_width << DMA2D_NLR_PL_Pos) | (draw_height << DMA2D_NLR_NL_Pos);
446 
447     _lv_gpu_stm32_dma2d_start_dma_transfer();
448 }
449 
450 /**
451  * @brief Draws src (foreground) map on dst (background) map.
452  * @param src_offset src offset in relation to dst, useful when src is larger than draw_area
453  * @param opa constant opacity to be applied
454  * @param bitmapColorCode bitmap color type
455  * @param ignoreAlpha if TRUE, bitmap src alpha channel is ignored
456  */
_lv_draw_stm32_dma2d_blend_map(const lv_color_t * dest_buf,lv_coord_t dest_stride,const lv_area_t * draw_area,const void * src_buf,lv_coord_t src_stride,const lv_point_t * src_offset,lv_opa_t opa,dma2d_color_format_t src_color_format,bool ignore_src_alpha)457 LV_STM32_DMA2D_STATIC void _lv_draw_stm32_dma2d_blend_map(const lv_color_t * dest_buf, lv_coord_t dest_stride,
458                                                           const lv_area_t * draw_area, const void * src_buf, lv_coord_t src_stride, const lv_point_t * src_offset, lv_opa_t opa,
459                                                           dma2d_color_format_t src_color_format, bool ignore_src_alpha)
460 {
461     LV_ASSERT_MSG(!isDma2dInProgess, "dma2d transfer has not finished"); // critical
462     if(opa <= LV_OPA_MIN || src_color_format == UNSUPPORTED) return;
463     lv_coord_t draw_width = lv_area_get_width(draw_area);
464     lv_coord_t draw_height = lv_area_get_height(draw_area);
465     bool bitmapHasOpacity = !ignore_src_alpha && (src_color_format == ARGB8888 || src_color_format == ARGB1555 ||
466                                                   src_color_format == ARGB4444);
467 
468     if(opa >= LV_OPA_MAX) opa = 0xff;
469 
470     uint8_t srcBpp; // source bytes per pixel
471     switch(src_color_format) {
472         case ARGB8888:
473             srcBpp = 4;
474             break;
475         case RGB888:
476             srcBpp = 3;
477             break;
478         case RGB565:
479         case ARGB1555:
480         case ARGB4444:
481             srcBpp = 2;
482             break;
483         default:
484             LV_LOG_ERROR("unsupported color format");
485             return;
486     }
487 
488     _lv_gpu_stm32_dma2d_await_dma_transfer_finish(NULL);
489 
490     DMA2D->FGPFCCR = src_color_format;
491 
492     if(opa == 0xff && !bitmapHasOpacity) {
493         // no need to blend
494         if(src_color_format == LvglColorFormat) {
495             // no need to convert pixel format (PFC) either
496             DMA2D->CR = 0x0UL;
497         }
498         else {
499             DMA2D->CR = 0x1UL << DMA2D_CR_MODE_Pos; // Memory-to-memory with PFC (FG fetch only with FG PFC active)
500         }
501         // Alpha Mode 0: No modification of the foreground image alpha channel value
502     }
503     else {
504         // blend
505         DMA2D->CR = 0x2UL << DMA2D_CR_MODE_Pos; // Memory-to-memory with blending (FG and BG fetch with PFC and blending)
506         DMA2D->FGPFCCR |= (opa << DMA2D_FGPFCCR_ALPHA_Pos);
507         if(bitmapHasOpacity) {
508             // Alpha Mode 2: Replace original foreground image alpha channel value by FGPFCCR.ALPHA multiplied with original alpha channel value
509             DMA2D->FGPFCCR |= (0x2UL << DMA2D_FGPFCCR_AM_Pos);
510         }
511         else {
512             // Alpha Mode 1: Replace original foreground image alpha channel value by FGPFCCR.ALPHA
513             DMA2D->FGPFCCR |= (0x1UL << DMA2D_FGPFCCR_AM_Pos);
514         }
515     }
516 #if defined(DMA2D_FGPFCCR_RBS_Pos)
517     DMA2D->FGPFCCR |= (RBS_BIT << DMA2D_FGPFCCR_RBS_Pos);
518 #endif
519     DMA2D->FGMAR = ((uint32_t)src_buf) + srcBpp * ((src_stride * src_offset->y) + src_offset->x);
520     DMA2D->FGOR = src_stride - draw_width;
521     DMA2D->FGCOLR = 0;  // used in A4 and A8 modes only
522     __lv_gpu_stm32_dma2d_clean_cache(DMA2D->FGMAR, DMA2D->FGOR, draw_width, draw_height, srcBpp);
523 
524     DMA2D->OPFCCR = LvglColorFormat;
525 #if defined(DMA2D_OPFCCR_RBS_Pos)
526     DMA2D->OPFCCR |= (RBS_BIT << DMA2D_OPFCCR_RBS_Pos);
527 #endif
528     DMA2D->OMAR = (uint32_t)(dest_buf + (dest_stride * draw_area->y1) + draw_area->x1);
529     DMA2D->OOR = dest_stride - draw_width;
530     DMA2D->OCOLR = 0;
531 
532     if(opa != 0xff || bitmapHasOpacity) {
533         // use background (BG*) registers
534         DMA2D->BGPFCCR = LvglColorFormat;
535 #if defined(DMA2D_BGPFCCR_RBS_Pos)
536         DMA2D->BGPFCCR |= (RBS_BIT << DMA2D_BGPFCCR_RBS_Pos);
537 #endif
538         DMA2D->BGMAR = DMA2D->OMAR;
539         DMA2D->BGOR = DMA2D->OOR;
540         DMA2D->BGCOLR = 0;  // used in A4 and A8 modes only
541         __lv_gpu_stm32_dma2d_clean_cache(DMA2D->BGMAR, DMA2D->BGOR, draw_width, draw_height, sizeof(lv_color_t));
542     }
543 
544     // PL - pixel per lines (14 bit), NL - number of lines (16 bit)
545     DMA2D->NLR = (draw_width << DMA2D_NLR_PL_Pos) | (draw_height << DMA2D_NLR_NL_Pos);
546 
547     _lv_gpu_stm32_dma2d_start_dma_transfer();
548 }
549 
550 /**
551  * @brief Paints solid color with alpha mask with additional constant opacity. Useful e.g. for painting anti-aliased fonts.
552  * @param src_offset src offset in relation to dst, useful when src (alpha mask) is larger than draw_area
553  * @param color color to paint, note: alpha is ignored
554  * @param opa constant opacity to be applied
555  */
_lv_draw_stm32_dma2d_blend_paint(const lv_color_t * dest_buf,lv_coord_t dest_stride,const lv_area_t * draw_area,const lv_opa_t * mask_buf,lv_coord_t mask_stride,const lv_point_t * mask_offset,lv_color_t color,lv_opa_t opa)556 LV_STM32_DMA2D_STATIC void _lv_draw_stm32_dma2d_blend_paint(const lv_color_t * dest_buf, lv_coord_t dest_stride,
557                                                             const lv_area_t * draw_area, const lv_opa_t * mask_buf, lv_coord_t mask_stride, const lv_point_t * mask_offset,
558                                                             lv_color_t color, lv_opa_t opa)
559 {
560     LV_ASSERT_MSG(!isDma2dInProgess, "dma2d transfer has not finished"); // critical
561     lv_coord_t draw_width = lv_area_get_width(draw_area);
562     lv_coord_t draw_height = lv_area_get_height(draw_area);
563 
564     _lv_gpu_stm32_dma2d_await_dma_transfer_finish(NULL);
565 
566     DMA2D->CR = 0x2UL << DMA2D_CR_MODE_Pos;  // Memory-to-memory with blending (FG and BG fetch with PFC and blending)
567 
568     DMA2D->FGPFCCR = A8;
569     if(opa < LV_OPA_MAX) {
570         DMA2D->FGPFCCR |= (opa << DMA2D_FGPFCCR_ALPHA_Pos);
571         DMA2D->FGPFCCR |= (0x2UL <<
572                            DMA2D_FGPFCCR_AM_Pos); // Alpha Mode: Replace original foreground image alpha channel value by FGPFCCR.ALPHA multiplied with original alpha channel value
573     }
574     //DMA2D->FGPFCCR |= (RBS_BIT << DMA2D_FGPFCCR_RBS_Pos);
575     DMA2D->FGMAR = (uint32_t)(mask_buf + (mask_stride * mask_offset->y) + mask_offset->x);
576     DMA2D->FGOR = mask_stride - draw_width;
577     DMA2D->FGCOLR = lv_color_to32(color) & 0x00ffffff;  // swap FGCOLR R/B bits if FGPFCCR.RBS (RBS_BIT) bit is set
578     __lv_gpu_stm32_dma2d_clean_cache(DMA2D->FGMAR, DMA2D->FGOR, draw_width, draw_height, sizeof(lv_opa_t));
579 
580     DMA2D->BGPFCCR = LvglColorFormat;
581 #if defined(DMA2D_BGPFCCR_RBS_Pos)
582     DMA2D->BGPFCCR |= (RBS_BIT << DMA2D_BGPFCCR_RBS_Pos);
583 #endif
584     DMA2D->BGMAR = (uint32_t)(dest_buf + (dest_stride * draw_area->y1) + draw_area->x1);
585     DMA2D->BGOR = dest_stride - draw_width;
586     DMA2D->BGCOLR = 0;  // used in A4 and A8 modes only
587     __lv_gpu_stm32_dma2d_clean_cache(DMA2D->BGMAR, DMA2D->BGOR, draw_width, draw_height, sizeof(lv_color_t));
588 
589     DMA2D->OPFCCR = LvglColorFormat;
590 #if defined(DMA2D_OPFCCR_RBS_Pos)
591     DMA2D->OPFCCR |= (RBS_BIT << DMA2D_OPFCCR_RBS_Pos);
592 #endif
593     DMA2D->OMAR = DMA2D->BGMAR;
594     DMA2D->OOR = DMA2D->BGOR;
595     DMA2D->OCOLR = 0;
596     // PL - pixel per lines (14 bit), NL - number of lines (16 bit)
597     DMA2D->NLR = (draw_width << DMA2D_NLR_PL_Pos) | (draw_height << DMA2D_NLR_NL_Pos);
598 
599     _lv_gpu_stm32_dma2d_start_dma_transfer();
600 }
601 
602 /**
603  * @brief Copies src (foreground) map to the dst (background) map.
604  * @param src_offset src offset in relation to dst, useful when src is larger than draw_area
605  */
_lv_draw_stm32_dma2d_copy_buffer(const lv_color_t * dest_buf,lv_coord_t dest_stride,const lv_area_t * draw_area,const lv_color_t * src_buf,lv_coord_t src_stride,const lv_point_t * src_offset)606 LV_STM32_DMA2D_STATIC void _lv_draw_stm32_dma2d_copy_buffer(const lv_color_t * dest_buf, lv_coord_t dest_stride,
607                                                             const lv_area_t * draw_area, const lv_color_t * src_buf, lv_coord_t src_stride, const lv_point_t * src_offset)
608 {
609     LV_ASSERT_MSG(!isDma2dInProgess, "dma2d transfer has not finished"); // critical
610     lv_coord_t draw_width = lv_area_get_width(draw_area);
611     lv_coord_t draw_height = lv_area_get_height(draw_area);
612 
613     _lv_gpu_stm32_dma2d_await_dma_transfer_finish(NULL);
614 
615     DMA2D->CR = 0x0UL; // Memory-to-memory (FG fetch only)
616 
617     DMA2D->FGPFCCR = LvglColorFormat;
618 #if defined(DMA2D_FGPFCCR_RBS_Pos)
619     DMA2D->FGPFCCR |= (RBS_BIT << DMA2D_FGPFCCR_RBS_Pos);
620 #endif
621     DMA2D->FGMAR = (uint32_t)(src_buf + (src_stride * src_offset->y) + src_offset->x);
622     DMA2D->FGOR = src_stride - draw_width;
623     DMA2D->FGCOLR = 0;  // used in A4 and A8 modes only
624     __lv_gpu_stm32_dma2d_clean_cache(DMA2D->FGMAR, DMA2D->FGOR, draw_width, draw_height, sizeof(lv_color_t));
625 
626     // Note BG* registers do not need to be set up since BG is not used
627 
628     DMA2D->OPFCCR = LvglColorFormat;
629 #if defined(DMA2D_OPFCCR_RBS_Pos)
630     DMA2D->OPFCCR |= (RBS_BIT << DMA2D_OPFCCR_RBS_Pos);
631 #endif
632     DMA2D->OMAR = (uint32_t)(dest_buf + (dest_stride * draw_area->y1) + draw_area->x1);
633     DMA2D->OOR = dest_stride - draw_width;
634     DMA2D->OCOLR = 0;
635 
636     // PL - pixel per lines (14 bit), NL - number of lines (16 bit)
637     DMA2D->NLR = (draw_width << DMA2D_NLR_PL_Pos) | (draw_height << DMA2D_NLR_NL_Pos);
638 
639     _lv_gpu_stm32_dma2d_start_dma_transfer();
640 }
641 
_lv_gpu_stm32_dma2d_start_dma_transfer(void)642 LV_STM32_DMA2D_STATIC void _lv_gpu_stm32_dma2d_start_dma_transfer(void)
643 {
644     LV_ASSERT_MSG(!isDma2dInProgess, "dma2d transfer has not finished");
645     isDma2dInProgess = true;
646     DMA2D->IFCR = 0x3FU; // trigger ISR flags reset
647     // Note: cleaning output buffer cache is needed only when buffer may be misaligned or adjacent area may have been drawn in sw-fashion, e.g. using lv_draw_sw_blend_basic()
648 #if LV_COLOR_DEPTH == 16
649     __lv_gpu_stm32_dma2d_clean_cache(DMA2D->OMAR, DMA2D->OOR, (DMA2D->NLR & DMA2D_NLR_PL_Msk) >> DMA2D_NLR_PL_Pos,
650                                      (DMA2D->NLR & DMA2D_NLR_NL_Msk) >> DMA2D_NLR_NL_Pos, sizeof(lv_color_t));
651 #endif
652     DMA2D->CR |= DMA2D_CR_START;
653     // Note: for some reason mask buffer gets damaged during transfer if waiting is postponed
654     _lv_gpu_stm32_dma2d_await_dma_transfer_finish(NULL); // FIXME: this line should not be needed here, but it is
655 }
656 
_lv_gpu_stm32_dma2d_await_dma_transfer_finish(lv_disp_drv_t * disp_drv)657 LV_STM32_DMA2D_STATIC void _lv_gpu_stm32_dma2d_await_dma_transfer_finish(lv_disp_drv_t * disp_drv)
658 {
659     if(disp_drv && disp_drv->wait_cb) {
660         while((DMA2D->CR & DMA2D_CR_START) != 0U) {
661             disp_drv->wait_cb(disp_drv);
662         }
663     }
664     else {
665         while((DMA2D->CR & DMA2D_CR_START) != 0U);
666     }
667 
668     __IO uint32_t isrFlags = DMA2D->ISR;
669 
670     if(isrFlags & DMA2D_ISR_CEIF) {
671         LV_LOG_ERROR("DMA2D config error");
672     }
673 
674     if(isrFlags & DMA2D_ISR_TEIF) {
675         LV_LOG_ERROR("DMA2D transfer error");
676     }
677 
678     DMA2D->IFCR = 0x3FU; // trigger ISR flags reset
679 
680     if(isDma2dInProgess) {
681         // invalidate output buffer cached memory ONLY after DMA2D transfer
682         //__lv_gpu_stm32_dma2d_invalidate_cache(DMA2D->OMAR, DMA2D->OOR, (DMA2D->NLR & DMA2D_NLR_PL_Msk) >> DMA2D_NLR_PL_Pos, (DMA2D->NLR & DMA2D_NLR_NL_Msk) >> DMA2D_NLR_NL_Pos, sizeof(lv_color_t));
683         isDma2dInProgess = false;
684     }
685 }
686 
687 #if defined (LV_STM32_DMA2D_USE_M7_CACHE)
688 // Cortex-M7 DCache present
_lv_gpu_stm32_dma2d_invalidate_cache(uint32_t address,lv_coord_t offset,lv_coord_t width,lv_coord_t height,uint8_t pixel_size)689 LV_STM32_DMA2D_STATIC void _lv_gpu_stm32_dma2d_invalidate_cache(uint32_t address, lv_coord_t offset, lv_coord_t width,
690                                                                 lv_coord_t height, uint8_t pixel_size)
691 {
692     if(((SCB->CCR) & SCB_CCR_DC_Msk) == 0) return; // L1 data cache is disabled
693     uint16_t stride = pixel_size * (width + offset); // in bytes
694     uint16_t ll = pixel_size * width; // line length in bytes
695     uint32_t n = 0; // address of the next cache row after the last invalidated row
696     lv_coord_t h = 0;
697 
698     __DSB();
699 
700     while(h < height) {
701         uint32_t a = address + (h * stride);
702         uint32_t e = a + ll; // end address, address of the first byte after the current line
703         a &= ~(CACHE_ROW_SIZE - 1U);
704         if(a < n) a = n;  // prevent the previous last cache row from being invalidated again
705 
706         while(a < e) {
707             SCB->DCIMVAC = a;
708             a += CACHE_ROW_SIZE;
709         }
710 
711         n = a;
712         h++;
713     };
714 
715     __DSB();
716     __ISB();
717 }
718 
_lv_gpu_stm32_dma2d_clean_cache(uint32_t address,lv_coord_t offset,lv_coord_t width,lv_coord_t height,uint8_t pixel_size)719 LV_STM32_DMA2D_STATIC void _lv_gpu_stm32_dma2d_clean_cache(uint32_t address, lv_coord_t offset, lv_coord_t width,
720                                                            lv_coord_t height, uint8_t pixel_size)
721 {
722     if(((SCB->CCR) & SCB_CCR_DC_Msk) == 0) return; // L1 data cache is disabled
723     uint16_t stride = pixel_size * (width + offset); // in bytes
724     uint16_t ll = pixel_size * width; // line length in bytes
725     uint32_t n = 0; // address of the next cache row after the last cleaned row
726     lv_coord_t h = 0;
727     __DSB();
728 
729     while(h < height) {
730         uint32_t a = address + (h * stride);
731         uint32_t e = a + ll; // end address, address of the first byte after the current line
732         a &= ~(CACHE_ROW_SIZE - 1U);
733         if(a < n) a = n;  // prevent the previous last cache row from being cleaned again
734 
735         while(a < e) {
736             SCB->DCCMVAC = a;
737             a += CACHE_ROW_SIZE;
738         }
739 
740         n = a;
741         h++;
742     };
743 
744     __DSB();
745     __ISB();
746 }
747 #endif // LV_STM32_DMA2D_USE_M7_CACHE
748 
749 #if defined(LV_STM32_DMA2D_TEST)
750 // initialize µs timer
_lv_gpu_stm32_dwt_init(void)751 LV_STM32_DMA2D_STATIC bool _lv_gpu_stm32_dwt_init(void)
752 {
753     // disable TRC
754     CoreDebug->DEMCR &= ~CoreDebug_DEMCR_TRCENA_Msk;
755     // enable TRC
756     CoreDebug->DEMCR |= CoreDebug_DEMCR_TRCENA_Msk;
757 
758 #if defined(__CORTEX_M) && (__CORTEX_M == 7U)
759     DWT->LAR = 0xC5ACCE55;
760 #endif
761     // disable clock cycle counter
762     DWT->CTRL &= ~DWT_CTRL_CYCCNTENA_Msk;
763     // enable  clock cycle counter
764     DWT->CTRL |= DWT_CTRL_CYCCNTENA_Msk;
765 
766     // reset the clock cycle counter value
767     DWT->CYCCNT = 0;
768 
769     // 3 NO OPERATION instructions
770     __ASM volatile("NOP");
771     __ASM volatile("NOP");
772     __ASM volatile("NOP");
773 
774     // check if clock cycle counter has started
775     if(DWT->CYCCNT) {
776         return true; // clock cycle counter started
777     }
778     else {
779         return false; // clock cycle counter not started
780     }
781 }
782 
783 // get elapsed µs since reset
_lv_gpu_stm32_dwt_get_us(void)784 LV_STM32_DMA2D_STATIC uint32_t _lv_gpu_stm32_dwt_get_us(void)
785 {
786     uint32_t us = (DWT->CYCCNT * 1000000) / HAL_RCC_GetHCLKFreq();
787     return us;
788 }
789 
790 // reset µs timer
_lv_gpu_stm32_dwt_reset(void)791 LV_STM32_DMA2D_STATIC void _lv_gpu_stm32_dwt_reset(void)
792 {
793     DWT->CYCCNT = 0;
794 }
795 #endif // LV_STM32_DMA2D_TEST
796 #endif // LV_USE_GPU_STM32_DMA2D