/** * @file gifdec_mve.h * */ #ifndef GIFDEC_MVE_H #define GIFDEC_MVE_H #ifdef __cplusplus extern "C" { #endif /********************* * INCLUDES *********************/ #include #include "../../misc/lv_color.h" /********************* * DEFINES *********************/ #define GIFDEC_FILL_BG(dst, w, h, stride, color, opa) \ _gifdec_fill_bg_mve(dst, w, h, stride, color, opa) #define GIFDEC_RENDER_FRAME(dst, w, h, stride, frame, pattern, tindex) \ _gifdec_render_frame_mve(dst, w, h, stride, frame, pattern, tindex) /********************** * MACROS **********************/ /********************** * TYPEDEFS **********************/ /********************** * GLOBAL PROTOTYPES **********************/ static inline void _gifdec_fill_bg_mve(uint8_t * dst, uint16_t w, uint16_t h, uint16_t stride, uint8_t * color, uint8_t opa) { lv_color32_t c = lv_color32_make(*(color + 0), *(color + 1), *(color + 2), opa); uint32_t color_32 = *(uint32_t *)&c; __asm volatile( ".p2align 2 \n" "vdup.32 q0, %[src] \n" "3: \n" "mov r0, %[dst] \n" "wlstp.32 lr, %[w], 1f \n" "2: \n" "vstrw.32 q0, [r0], #16 \n" "letp lr, 2b \n" "1: \n" "add %[dst], %[iTargetStride] \n" "subs %[h], #1 \n" "bne 3b \n" : [dst] "+r"(dst), [h] "+r"(h) : [src] "r"(color_32), [w] "r"(w), [iTargetStride] "r"(stride * sizeof(uint32_t)) : "r0", "q0", "memory", "r14", "cc"); } static inline void _gifdec_render_frame_mve(uint8_t * dst, uint16_t w, uint16_t h, uint16_t stride, uint8_t * frame, uint8_t * pattern, uint16_t tindex) { if(w == 0 || h == 0) { return; } __asm volatile( "vmov.u16 q3, #255 \n" "vshl.u16 q3, q3, #8 \n" /* left shift 8 for a*/ "mov r0, #2 \n" "vidup.u16 q6, r0, #4 \n" /* [2, 6, 10, 14, 18, 22, 26, 30] */ "mov r0, #0 \n" "vidup.u16 q7, r0, #4 \n" /* [0, 4, 8, 12, 16, 20, 24, 28] */ "3: \n" "mov r1, %[dst] \n" "mov r2, %[frame] \n" "wlstp.16 lr, %[w], 1f \n" "2: \n" "mov r0, #3 \n" "vldrb.u16 q4, [r2], #8 \n" "vmul.u16 q5, q4, r0 \n" "mov r0, #1 \n" "vldrb.u16 q2, [%[pattern], q5] \n" /* load 8 pixel r*/ "vadd.u16 q5, q5, r0 \n" "vldrb.u16 q1, [%[pattern], q5] \n" /* load 8 pixel g*/ "vadd.u16 q5, q5, r0 \n" "vldrb.u16 q0, [%[pattern], q5] \n" /* load 8 pixel b*/ "vshl.u16 q1, q1, #8 \n" /* left shift 8 for g*/ "vorr.u16 q0, q0, q1 \n" /* make 8 pixel gb*/ "vorr.u16 q1, q2, q3 \n" /* make 8 pixel ar*/ "vcmp.i16 ne, q4, %[tindex] \n" "vpstt \n" "vstrht.16 q0, [r1, q7] \n" "vstrht.16 q1, [r1, q6] \n" "add r1, r1, #32 \n" "letp lr, 2b \n" "1: \n" "mov r0, %[stride], LSL #2 \n" "add %[dst], r0 \n" "add %[frame], %[stride] \n" "subs %[h], #1 \n" "bne 3b \n" : [dst] "+r"(dst), [frame] "+r"(frame), [h] "+r"(h) : [pattern] "r"(pattern), [w] "r"(w), [stride] "r"(stride), [tindex] "r"(tindex) : "r0", "r1", "r2", "q0", "q1", "q2", "q3", "q4", "q5", "q6", "q7", "memory", "r14", "cc"); } #ifdef __cplusplus } /*extern "C"*/ #endif #endif /*GIFDEC_MVE_H*/