1 /**
2  * @file gifdec_mve.h
3  *
4  */
5 
6 #ifndef GIFDEC_MVE_H
7 #define GIFDEC_MVE_H
8 
9 #ifdef __cplusplus
10 extern "C" {
11 #endif
12 
13 /*********************
14  *      INCLUDES
15  *********************/
16 #include <stdint.h>
17 #include "../../misc/lv_color.h"
18 
19 /*********************
20  *      DEFINES
21  *********************/
22 
23 #define GIFDEC_FILL_BG(dst, w, h, stride, color, opa) \
24     _gifdec_fill_bg_mve(dst, w, h, stride, color, opa)
25 
26 #define GIFDEC_RENDER_FRAME(dst, w, h, stride, frame, pattern, tindex) \
27     _gifdec_render_frame_mve(dst, w, h, stride, frame, pattern, tindex)
28 
29 /**********************
30  *      MACROS
31  **********************/
32 
33 /**********************
34  *      TYPEDEFS
35  **********************/
36 
37 /**********************
38  * GLOBAL PROTOTYPES
39  **********************/
40 
_gifdec_fill_bg_mve(uint8_t * dst,uint16_t w,uint16_t h,uint16_t stride,uint8_t * color,uint8_t opa)41 static inline void _gifdec_fill_bg_mve(uint8_t * dst, uint16_t w, uint16_t h, uint16_t stride, uint8_t * color,
42                                        uint8_t opa)
43 {
44     lv_color32_t c = lv_color32_make(*(color + 0), *(color + 1), *(color + 2), opa);
45     uint32_t color_32 = *(uint32_t *)&c;
46 
47     __asm volatile(
48         ".p2align 2                                             \n"
49         "vdup.32             q0, %[src]                         \n"
50         "3:                                                     \n"
51         "mov                 r0, %[dst]                         \n"
52 
53         "wlstp.32            lr, %[w], 1f                       \n"
54         "2:                                                     \n"
55 
56         "vstrw.32            q0, [r0], #16                      \n"
57         "letp                lr, 2b                             \n"
58         "1:                                                     \n"
59         "add                 %[dst], %[iTargetStride]           \n"
60         "subs                %[h], #1                           \n"
61         "bne                 3b                                 \n"
62         : [dst] "+r"(dst),
63         [h] "+r"(h)
64         : [src] "r"(color_32),
65         [w] "r"(w),
66         [iTargetStride] "r"(stride * sizeof(uint32_t))
67         : "r0", "q0", "memory", "r14", "cc");
68 }
69 
_gifdec_render_frame_mve(uint8_t * dst,uint16_t w,uint16_t h,uint16_t stride,uint8_t * frame,uint8_t * pattern,uint16_t tindex)70 static inline void _gifdec_render_frame_mve(uint8_t * dst, uint16_t w, uint16_t h, uint16_t stride, uint8_t * frame,
71                                             uint8_t * pattern, uint16_t tindex)
72 {
73     if(w == 0 || h == 0) {
74         return;
75     }
76 
77     __asm volatile(
78         "vmov.u16       q3, #255                                \n"
79         "vshl.u16       q3, q3, #8                              \n" /* left shift 8 for a*/
80 
81         "mov            r0, #2                                  \n"
82         "vidup.u16      q6, r0, #4                              \n" /* [2, 6, 10, 14, 18, 22, 26, 30] */
83         "mov            r0, #0                                  \n"
84         "vidup.u16      q7, r0, #4                              \n" /* [0, 4, 8, 12, 16, 20, 24, 28] */
85 
86         "3:                                                     \n"
87         "mov            r1, %[dst]                              \n"
88         "mov            r2, %[frame]                            \n"
89 
90         "wlstp.16       lr, %[w], 1f                            \n"
91         "2:                                                     \n"
92 
93         "mov            r0, #3                                  \n"
94         "vldrb.u16      q4, [r2], #8                            \n"
95         "vmul.u16       q5, q4, r0                              \n"
96 
97         "mov            r0, #1                                  \n"
98         "vldrb.u16      q2, [%[pattern], q5]                    \n" /* load 8 pixel r*/
99 
100         "vadd.u16       q5, q5, r0                              \n"
101         "vldrb.u16      q1, [%[pattern], q5]                    \n" /* load 8 pixel g*/
102 
103         "vadd.u16       q5, q5, r0                              \n"
104         "vldrb.u16      q0, [%[pattern], q5]                    \n" /* load 8 pixel b*/
105 
106         "vshl.u16       q1, q1, #8                              \n" /* left shift 8 for g*/
107 
108         "vorr.u16       q0, q0, q1                              \n" /* make 8 pixel gb*/
109         "vorr.u16       q1, q2, q3                              \n" /* make 8 pixel ar*/
110 
111         "vcmp.i16       ne, q4, %[tindex]                       \n"
112         "vpstt                                                  \n"
113         "vstrht.16      q0, [r1, q7]                            \n"
114         "vstrht.16      q1, [r1, q6]                            \n"
115         "add            r1, r1, #32                             \n"
116 
117         "letp           lr, 2b                                  \n"
118 
119         "1:                                                     \n"
120         "mov            r0, %[stride], LSL #2                   \n"
121         "add            %[dst], r0                              \n"
122         "add            %[frame], %[stride]                     \n"
123         "subs           %[h], #1                                \n"
124         "bne            3b                                      \n"
125 
126         : [dst] "+r"(dst),
127         [frame] "+r"(frame),
128         [h] "+r"(h)
129         : [pattern] "r"(pattern),
130         [w] "r"(w),
131         [stride] "r"(stride),
132         [tindex] "r"(tindex)
133         : "r0", "r1", "r2", "q0", "q1", "q2", "q3", "q4", "q5", "q6", "q7", "memory", "r14", "cc");
134 }
135 
136 #ifdef __cplusplus
137 } /*extern "C"*/
138 #endif
139 
140 #endif /*GIFDEC_MVE_H*/
141