1/** 2 * @file lv_blend_neon.S 3 * 4 */ 5 6#ifndef __ASSEMBLY__ 7#define __ASSEMBLY__ 8#endif 9 10#include "lv_blend_neon.h" 11 12#if LV_USE_DRAW_SW_ASM == LV_DRAW_SW_ASM_NEON 13 14.text 15.fpu neon 16.arch armv7a 17.syntax unified 18.p2align 2 19 20@ d0 ~ d3 : src B,G,R,A 21@ d4 ~ d7 : dst B,G,R,A 22@ q8 : src RGB565 raw 23@ q9 : dst RGB565 raw 24@ q10 ~ q12: pre-multiplied src 25@ d26~29 : temp 26@ d30 : mask 27@ d31 : opa 28 29FG_MASK .req r0 30BG_MASK .req r1 31DST_ADDR .req r2 32DST_W .req r3 33DST_H .req r4 34DST_STRIDE .req r5 35SRC_ADDR .req r6 36SRC_STRIDE .req r7 37MASK_ADDR .req r8 38MASK_STRIDE .req r9 39W .req r10 40H .req r11 41S_8888_L .qn q0 42S_8888_H .qn q1 43D_8888_L .qn q2 44D_8888_H .qn q3 45 S_B .dn d0 46 S_G .dn d1 47 S_R .dn d2 48 S_A .dn d3 49 D_B .dn d4 50 D_G .dn d5 51 D_R .dn d6 52 D_A .dn d7 53S_565 .qn q8 54D_565 .qn q9 55 S_565_L .dn d16 56 S_565_H .dn d17 57 D_565_L .dn d18 58 D_565_H .dn d19 59PREMULT_B .qn q10 60PREMULT_G .qn q11 61PREMULT_R .qn q12 62TMP_Q0 .qn q13 63 TMP_D0 .dn d26 64 TMP_D1 .dn d27 65TMP_Q1 .qn q14 66 TMP_D2 .dn d28 67 TMP_D3 .dn d29 68 M_A .dn d30 69 OPA .dn d31 70 71.macro convert reg, bpp, intlv 72.if \bpp >= 31 73 .if \intlv 74 vzip.8 \reg\()_B, \reg\()_R @ BRBRBRBR GGGGGGGG BRBRBRBR AAAAAAAA 75 vzip.8 \reg\()_G, \reg\()_A @ BRBRBRBR GAGAGAGA BRBRBRBR GAGAGAGA 76 vzip.8 \reg\()_R, \reg\()_A @ BRBRBRBR GAGAGAGA BGRABGRA BGRABGRA 77 vzip.8 \reg\()_B, \reg\()_G @ BGRABGRA BGRABGRA BGRABGRA BGRABGRA 78 .else 79 vuzp.8 \reg\()_B, \reg\()_G @ BRBRBRBR GAGAGAGA BGRABGRA BGRABGRA 80 vuzp.8 \reg\()_R, \reg\()_A @ BRBRBRBR GAGAGAGA BRBRBRBR GAGAGAGA 81 vuzp.8 \reg\()_G, \reg\()_A @ BRBRBRBR GGGGGGGG BRBRBRBR AAAAAAAA 82 vuzp.8 \reg\()_B, \reg\()_R @ BBBBBBBB GGGGGGGG RRRRRRRR AAAAAAAA 83 .endif 84.elseif \bpp == 24 85 .if \intlv @ for init only (same B,G,R for all channel) 86 vzip.8 \reg\()_B, \reg\()_G @ BGBGBGBG BGBGBGBG RRRRRRRR 87 vzip.16 \reg\()_B, \reg\()_R @ BGRRBGRR BGBGBGBG BGRRBGRR 88 vsli.64 \reg\()_8888_L, \reg\()_8888_L, #24 @ BGRBGRRB BGBBGBGB 89 vsli.64 \reg\()_B, \reg\()_G, #48 @ BGRBGRBG 90 vsri.64 \reg\()_R, \reg\()_B, #8 @ GRBGRBGR 91 vsri.64 \reg\()_G, \reg\()_R, #8 @ RBGRBGRB 92 .endif 93.elseif \bpp == 16 94 .if \intlv 95 vshll.u8 \reg\()_565, \reg\()_R, #8 @ RRRrrRRR 00000000 96 vshll.u8 TMP_Q0, \reg\()_G, #8 @ GGGgggGG 00000000 97 vshll.u8 TMP_Q1, \reg\()_B, #8 @ BBBbbBBB 00000000 98 vsri.16 \reg\()_565, TMP_Q0, #5 @ RRRrrGGG gggGG000 99 vsri.16 \reg\()_565, TMP_Q1, #11 @ RRRrrGGG gggBBBbb 100 .else 101 vshr.u8 TMP_Q0, \reg\()_565, #3 @ 000RRRrr 000gggBB 102 vshrn.i16 \reg\()_G, \reg\()_565, #5 @ rrGGGggg 103 vshrn.i16 \reg\()_R, TMP_Q0, #5 @ RRRrr000 104 vshl.i8 \reg\()_G, \reg\()_G, #2 @ GGGggg00 105 vshl.i16 TMP_Q1, \reg\()_565, #3 @ rrGGGggg BBBbb000 106 vsri.8 \reg\()_R, \reg\()_R, #5 @ RRRrrRRR 107 vmovn.i16 \reg\()_B, TMP_Q1 @ BBBbb000 108 vsri.8 \reg\()_G, \reg\()_G, #6 @ GGGgggGG 109 vsri.8 \reg\()_B, \reg\()_B, #5 @ BBBbbBBB 110 .endif 111.endif 112.endm 113 114.macro ldst op, bpp, len, mem, reg, cvt, wb 115.if \bpp >= 31 116 .if \len == 8 117 .if \cvt 118 v\op\()4.8 {\reg\()_B, \reg\()_G, \reg\()_R, \reg\()_A}, [\mem\()_ADDR]\wb 119 .else 120 v\op\()1.32 {\reg\()_8888_L, \reg\()_8888_H}, [\mem\()_ADDR]\wb 121 .endif 122 .else 123 .ifc \op,st 124 .if \cvt 125 convert \reg, \bpp, 1 126 .endif 127 .endif 128 .if \len == 7 129 v\op\()1.32 {\reg\()_8888_L}, [\mem\()_ADDR]! 130 v\op\()1.32 {\reg\()_R}, [\mem\()_ADDR]! 131 v\op\()1.32 {\reg\()_A[0]}, [\mem\()_ADDR]! 132 .elseif \len == 6 133 v\op\()1.32 {\reg\()_8888_L}, [\mem\()_ADDR]! 134 v\op\()1.32 {\reg\()_R}, [\mem\()_ADDR]! 135 .elseif \len == 5 136 v\op\()1.32 {\reg\()_8888_L}, [\mem\()_ADDR]! 137 v\op\()1.32 {\reg\()_R[0]}, [\mem\()_ADDR]! 138 .elseif \len == 4 139 v\op\()1.32 {\reg\()_8888_L}, [\mem\()_ADDR]\wb 140 .elseif \len == 3 141 v\op\()1.32 {\reg\()_B}, [\mem\()_ADDR]! 142 v\op\()1.32 {\reg\()_G[0]}, [\mem\()_ADDR]! 143 .elseif \len == 2 144 v\op\()1.32 {\reg\()_B}, [\mem\()_ADDR]\wb 145 .elseif \len == 1 146 v\op\()1.32 {\reg\()_B[0]}, [\mem\()_ADDR]\wb 147 .else 148 .error "[32bpp]len should be 1~8" 149 .endif 150 .ifc \op,ld 151 .if \cvt 152 convert \reg, \bpp, 0 153 .endif 154 .endif 155 .ifb \wb 156 .if (\len != 4) && (\len != 2) && (\len != 1) 157 sub \mem\()_ADDR, #4*\len 158 .endif 159 .endif 160 .endif 161.elseif \bpp == 24 162 .if \len == 8 163 .if \cvt 164 v\op\()3.8 {\reg\()_B, \reg\()_G, \reg\()_R}, [\mem\()_ADDR]\wb 165 .else 166 v\op\()1.8 {\reg\()_B, \reg\()_G, \reg\()_R}, [\mem\()_ADDR]\wb 167 .endif 168 .elseif (\len < 8) && (\len > 0) 169 .if \cvt 170 v\op\()3.8 {\reg\()_B[0], \reg\()_G[0], \reg\()_R[0]}, [\mem\()_ADDR]! 171 .if \len > 1 172 v\op\()3.8 {\reg\()_B[1], \reg\()_G[1], \reg\()_R[1]}, [\mem\()_ADDR]! 173 .endif 174 .if \len > 2 175 v\op\()3.8 {\reg\()_B[2], \reg\()_G[2], \reg\()_R[2]}, [\mem\()_ADDR]! 176 .endif 177 .if \len > 3 178 v\op\()3.8 {\reg\()_B[3], \reg\()_G[3], \reg\()_R[3]}, [\mem\()_ADDR]! 179 .endif 180 .if \len > 4 181 v\op\()3.8 {\reg\()_B[4], \reg\()_G[4], \reg\()_R[4]}, [\mem\()_ADDR]! 182 .endif 183 .if \len > 5 184 v\op\()3.8 {\reg\()_B[5], \reg\()_G[5], \reg\()_R[5]}, [\mem\()_ADDR]! 185 .endif 186 .if \len > 6 187 v\op\()3.8 {\reg\()_B[6], \reg\()_G[6], \reg\()_R[6]}, [\mem\()_ADDR]! 188 .endif 189 .ifb \wb 190 sub \mem\()_ADDR, #3*\len 191 .endif 192 .else 193 .if \len == 7 194 v\op\()1.32 {\reg\()_8888_L}, [\mem\()_ADDR]! 195 v\op\()1.32 {\reg\()_R[0]}, [\mem\()_ADDR]! 196 v\op\()1.8 {\reg\()_R[4]}, [\mem\()_ADDR]! 197 .elseif \len == 6 198 v\op\()1.32 {\reg\()_8888_L}, [\mem\()_ADDR]! 199 v\op\()1.16 {\reg\()_R[0]}, [\mem\()_ADDR]! 200 .elseif \len == 5 201 v\op\()1.32 {\reg\()_B}, [\mem\()_ADDR]! 202 v\op\()1.32 {\reg\()_G[0]}, [\mem\()_ADDR]! 203 v\op\()1.16 {\reg\()_G[2]}, [\mem\()_ADDR]! 204 v\op\()1.8 {\reg\()_G[6]}, [\mem\()_ADDR]! 205 .elseif \len == 4 206 v\op\()1.32 {\reg\()_B}, [\mem\()_ADDR]! 207 v\op\()1.32 {\reg\()_G[0]}, [\mem\()_ADDR]! 208 .elseif \len == 3 209 v\op\()1.32 {\reg\()_B}, [\mem\()_ADDR]! 210 v\op\()1.8 {\reg\()_G[0]}, [\mem\()_ADDR]! 211 .elseif \len == 2 212 v\op\()1.32 {\reg\()_B[0]}, [\mem\()_ADDR]! 213 v\op\()1.16 {\reg\()_B[2]}, [\mem\()_ADDR]! 214 .elseif \len == 1 215 v\op\()1.16 {\reg\()_B[0]}, [\mem\()_ADDR]! 216 v\op\()1.8 {\reg\()_B[2]}, [\mem\()_ADDR]! 217 .endif 218 .ifb \wb 219 sub \mem\()_ADDR, #3*\len 220 .endif 221 .endif 222 .else 223 .error "[24bpp]len should be 1~8" 224 .endif 225.elseif \bpp == 16 226 .ifc \op,st 227 .if \cvt 228 convert \reg, \bpp, 1 229 .endif 230 .endif 231 .if \len == 8 232 v\op\()1.16 {\reg\()_565}, [\mem\()_ADDR]\wb 233 .elseif \len == 7 234 v\op\()1.16 {\reg\()_565_L}, [\mem\()_ADDR]! 235 v\op\()1.32 {\reg\()_565_H[0]}, [\mem\()_ADDR]! 236 v\op\()1.16 {\reg\()_565_H[2]}, [\mem\()_ADDR]! 237 .ifb \wb 238 sub \mem\()_ADDR, #14 239 .endif 240 .elseif \len == 6 241 v\op\()1.16 {\reg\()_565_L}, [\mem\()_ADDR]! 242 v\op\()1.32 {\reg\()_565_H[0]}, [\mem\()_ADDR]! 243 .ifb \wb 244 sub \mem\()_ADDR, #12 245 .endif 246 .elseif \len == 5 247 v\op\()1.16 {\reg\()_565_L}, [\mem\()_ADDR]! 248 v\op\()1.16 {\reg\()_565_H[0]}, [\mem\()_ADDR]! 249 .ifb \wb 250 sub \mem\()_ADDR, #10 251 .endif 252 .elseif \len == 4 253 v\op\()1.16 {\reg\()_565_L}, [\mem\()_ADDR]\wb 254 .elseif \len == 3 255 v\op\()1.32 {\reg\()_565_L[0]}, [\mem\()_ADDR]! 256 v\op\()1.16 {\reg\()_565_L[2]}, [\mem\()_ADDR]! 257 .ifb \wb 258 sub \mem\()_ADDR, #6 259 .endif 260 .elseif \len == 2 261 v\op\()1.32 {\reg\()_565_L[0]}, [\mem\()_ADDR]\wb 262 .elseif \len == 1 263 v\op\()1.16 {\reg\()_565_L[0]}, [\mem\()_ADDR]\wb 264 .else 265 .error "[16bpp]len should be 1~8" 266 .endif 267 .ifc \op,ld 268 .if \cvt 269 convert \reg, \bpp, 0 270 .endif 271 .endif 272.elseif \bpp == 8 273 .if \len == 8 274 v\op\()1.8 {\reg\()_A}, [\mem\()_ADDR]\wb 275 .elseif \len == 7 276 v\op\()1.32 {\reg\()_A[0]}, [\mem\()_ADDR]! 277 v\op\()1.16 {\reg\()_A[2]}, [\mem\()_ADDR]! 278 v\op\()1.8 {\reg\()_A[6]}, [\mem\()_ADDR]! 279 .ifb \wb 280 sub \mem\()_ADDR, #7 281 .endif 282 .elseif \len == 6 283 v\op\()1.32 {\reg\()_A[0]}, [\mem\()_ADDR]! 284 v\op\()1.16 {\reg\()_A[2]}, [\mem\()_ADDR]! 285 .ifb \wb 286 sub \mem\()_ADDR, #6 287 .endif 288 .elseif \len == 5 289 v\op\()1.32 {\reg\()_A[0]}, [\mem\()_ADDR]! 290 v\op\()1.8 {\reg\()_A[4]}, [\mem\()_ADDR]! 291 .ifb \wb 292 sub \mem\()_ADDR, #5 293 .endif 294 .elseif \len == 4 295 v\op\()1.32 {\reg\()_A[0]}, [\mem\()_ADDR]\wb 296 .elseif \len == 3 297 v\op\()1.16 {\reg\()_A[0]}, [\mem\()_ADDR]! 298 v\op\()1.8 {\reg\()_A[2]}, [\mem\()_ADDR]! 299 .ifb \wb 300 sub \mem\()_ADDR, #3 301 .endif 302 .elseif \len == 2 303 v\op\()1.16 {\reg\()_A[0]}, [\mem\()_ADDR]\wb 304 .elseif \len == 1 305 v\op\()1.8 {\reg\()_A[0]}, [\mem\()_ADDR]\wb 306 .else 307 .error "[8bpp]len should be 1~8" 308 .endif 309.elseif \bpp == 0 310 .ifb \wb 311 .if \len == 8 312 v\op\()3.8 {\reg\()_B[], \reg\()_G[], \reg\()_R[]}, [\mem\()_ADDR] 313 .else 314 .error "[color]len should be 8" 315 .endif 316 .endif 317.endif 318.ifc \op,ld 319 .if \cvt && (\bpp > 8) && (\bpp < 32) 320 vmov.u8 \reg\()_A, #0xFF 321 .endif 322.endif 323.endm 324 325.macro premult alpha 326 vmull.u8 PREMULT_B, S_B, \alpha 327 vmull.u8 PREMULT_G, S_G, \alpha 328 vmull.u8 PREMULT_R, S_R, \alpha 329.endm 330 331.macro init src_bpp, dst_bpp, mask, opa 332 ldr DST_ADDR, [r0, #4] 333 ldr DST_W, [r0, #8] 334 ldr DST_H, [r0, #12] 335 ldr DST_STRIDE, [r0, #16] 336 ldr SRC_ADDR, [r0, #20] 337.if \src_bpp > 0 338 ldr SRC_STRIDE, [r0, #24] 339.endif 340.if \mask 341 ldr MASK_ADDR, [r0, #28] 342 ldr MASK_STRIDE, [r0, #32] 343 sub MASK_STRIDE, MASK_STRIDE, DST_W 344.endif 345.if \opa 346 vld1.8 {OPA[]}, [r0] 347.else 348 vmov.u8 OPA, #0xFF 349.endif 350 351 vmvn D_A, OPA 352.if \dst_bpp == 16 353 sub DST_STRIDE, DST_STRIDE, DST_W, lsl #1 354.elseif \dst_bpp == 24 355 sub DST_STRIDE, DST_STRIDE, DST_W 356 sub DST_STRIDE, DST_STRIDE, DST_W, lsl #1 357.elseif \dst_bpp >= 31 358 sub DST_STRIDE, DST_STRIDE, DST_W, lsl #2 359.endif 360.if \src_bpp == 0 361 .if \mask || \opa 362 ldst ld, \src_bpp, 8, SRC, S, 1 363 vmov.u8 S_A, #0xFF 364 premult OPA 365 .else 366 ldst ld, \src_bpp, 8, SRC, D, 1 367 vmov.u8 D_A, #0xFF 368 convert D, \dst_bpp, 1 369 .endif 370.else 371.if \src_bpp == 16 372 sub SRC_STRIDE, SRC_STRIDE, DST_W, lsl #1 373.elseif \src_bpp == 24 374 sub SRC_STRIDE, SRC_STRIDE, DST_W 375 sub SRC_STRIDE, SRC_STRIDE, DST_W, lsl #1 376.elseif \src_bpp >= 31 377 sub SRC_STRIDE, SRC_STRIDE, DST_W, lsl #2 378.endif 379.endif 380 mvn FG_MASK, #0 381 mvn BG_MASK, #0 382.endm 383 384@ input: M_A = 255 - fg.alpha 385.macro calc_alpha len 386 vmov.u8 TMP_D0, #0xFD 387 vmvn D_A, D_A 388 vcge.u8 TMP_D1, S_A, TMP_D0 @ if (fg.alpha >= LV_OPA_MAX 389 vcge.u8 TMP_D2, D_A, TMP_D0 @ || bg.alpha <= LV_OPA_MIN) 390 vorr TMP_D2, TMP_D1 391 vcge.u8 TMP_D3, M_A, TMP_D0 @ elseif (fg.alpha <= LV_OPA_MIN) 392 vmvn TMP_Q1, TMP_Q1 393 vshrn.i16 TMP_D0, TMP_Q1, #4 394 vmov FG_MASK, BG_MASK, TMP_D0 395 cbz FG_MASK, 99f @ return fg; 396 vmull.u8 TMP_Q0, M_A, D_A @ D_A = 255 - LV_OPA_MIX2(255 - fg.alpha, 255 - bg.alpha) 397 vqrshrn.u16 M_A, TMP_Q0, #8 398 vbif M_A, D_A, TMP_D3 @ insert original D_A when fg.alpha <= LV_OPA_MIN 399 vmvn D_A, M_A 400 cbz BG_MASK, 99f @ return bg; 401 vmov.u8 TMP_D2, #0xFF 402 vmovl.u8 TMP_Q0, D_A 403 .if \len > 4 404 vmovl.u16 S_565, TMP_D1 405 .endif 406 vmovl.u16 TMP_Q0, TMP_D0 407 vmull.u8 TMP_Q1, S_A, TMP_D2 408 vcvt.f32.u32 TMP_Q0, TMP_Q0 409 .if \len > 4 410 vmovl.u16 D_565, TMP_D3 411 vcvt.f32.u32 S_565, S_565 412 .endif 413 vmovl.u16 TMP_Q1, TMP_D2 414 vrecpe.f32 TMP_Q0, TMP_Q0 415 vcvt.f32.u32 TMP_Q1, TMP_Q1 416 .if \len > 4 417 vcvt.f32.u32 D_565, D_565 418 vrecpe.f32 S_565, S_565 419 .endif 420 vmul.f32 TMP_Q0, TMP_Q0, TMP_Q1 421 .if \len > 4 422 vmul.f32 S_565, S_565, D_565 423 .endif 424 vcvt.u32.f32 TMP_Q0, TMP_Q0 425 .if \len > 4 426 vcvt.u32.f32 S_565, S_565 427 .endif 428 vmovn.u32 TMP_D0, TMP_Q0 429 .if \len > 4 430 vmovn.u32 TMP_D1, S_565 431 .endif 432 vmovn.u16 TMP_D0, TMP_Q0 433 premult TMP_D0 434 vmvn M_A, TMP_D0 43599: 436.endm 437 438.macro blend mode, dst_bpp 439.if \dst_bpp == 32 440 vmov TMP_D0, FG_MASK, BG_MASK 441 vmovl.s8 TMP_Q0, TMP_D0 442 vsli.8 TMP_Q0, TMP_Q0, #4 443 cbz FG_MASK, 98f 444.endif 445.ifc \mode,normal 446.if \dst_bpp == 32 447 cbz BG_MASK, 97f 448 mvns BG_MASK, BG_MASK 449 beq 96f 450 vmov S_565_L, D_B 451 vmov S_565_H, D_G 452 vmov D_565_L, D_R 453.endif 45496: 455 vmlal.u8 PREMULT_B, D_B, M_A 456 vmlal.u8 PREMULT_G, D_G, M_A 457 vmlal.u8 PREMULT_R, D_R, M_A 458 vqrshrn.u16 D_B, PREMULT_B, #8 459 vqrshrn.u16 D_G, PREMULT_G, #8 460 vqrshrn.u16 D_R, PREMULT_R, #8 461.if \dst_bpp == 32 462 beq 97f 463 vbif D_B, S_565_L, TMP_D1 464 vbif D_G, S_565_H, TMP_D1 465 vbif D_R, D_565_L, TMP_D1 46697: 467 mvns FG_MASK, FG_MASK 468 beq 99f 469.endif 470.else 471 .error "blend mode is unsupported" 472.endif 473.if \dst_bpp == 32 47498: 475 vbif D_B, S_B, TMP_D0 476 vbif D_G, S_G, TMP_D0 477 vbif D_R, S_R, TMP_D0 478 vbif D_A, S_A, TMP_D0 47999: 480.endif 481.endm 482 483.macro process len, src_bpp, dst_bpp, mask, opa, mode 484.if (\src_bpp < 32) && (\mask == 0) && (\opa == 0) 485@ no blend 486 .if \src_bpp == 0 || \src_bpp == \dst_bpp 487 ldst ld, \src_bpp, \len, SRC, D, 0, ! 488 ldst st, \dst_bpp, \len, DST, D, 0, ! 489 .else 490 ldst ld, \src_bpp, \len, SRC, D, 1, ! 491 ldst st, \dst_bpp, \len, DST, D, 1, ! 492 .endif 493.elseif \src_bpp < 32 494@ no src_a 495 .if \src_bpp > 0 496 ldst ld, \src_bpp, \len, SRC, S, 1, ! 497 .endif 498 ldst ld, \dst_bpp, \len, DST, D, 1 499 .if \mask 500 ldst ld, 8, \len, MASK, S, 1, ! 501 .if \opa 502 vmull.u8 TMP_Q0, S_A, OPA 503 vqrshrn.u16 S_A, TMP_Q0, #8 504 .endif 505 vmvn M_A, S_A 506 .if \dst_bpp < 32 507 premult S_A 508 .else 509 calc_alpha \len 510 .endif 511 .else 512 vmvn M_A, OPA 513 .if \dst_bpp < 32 514 premult OPA 515 .else 516 vmov S_A, OPA 517 calc_alpha \len 518 .endif 519 .endif 520 blend \mode, \dst_bpp 521 ldst st, \dst_bpp, \len, DST, D, 1, ! 522.else 523@ src_a (+\mask) (+\opa) 524 ldst ld, \src_bpp, \len, SRC, S, 1, ! 525 ldst ld, \dst_bpp, \len, DST, D, 1 526 .if \mask == 0 527 .if \opa 528 vmull.u8 TMP_Q0, S_A, OPA 529 vqrshrn.u16 S_A, TMP_Q0, #8 530 .endif 531 .else 532 ldst ld, 8, \len, MASK, M, 1, ! 533 vmull.u8 TMP_Q0, S_A, M_A 534 vqrshrn.u16 S_A, TMP_Q0, #8 535 .if \opa 536 vmull.u8 TMP_Q0, S_A, OPA 537 vqrshrn.u16 S_A, TMP_Q0, #8 538 .endif 539 .endif 540 vmvn M_A, S_A 541 .if \dst_bpp < 32 542 premult S_A 543 .else 544 calc_alpha \len 545 .endif 546 blend \mode, \dst_bpp 547 ldst st, \dst_bpp, \len, DST, D, 1, ! 548.endif 549.endm 550 551.macro tail src_bpp, dst_bpp, mask, opa, mode 552 tst DST_W, #4 553 beq 3f 554 tst DST_W, #2 555 beq 5f 556 tst DST_W, #1 557 beq 6f 558 process 7, \src_bpp, \dst_bpp, \mask, \opa, \mode 559 b 0f 5606: 561 process 6, \src_bpp, \dst_bpp, \mask, \opa, \mode 562 b 0f 5635: 564 tst DST_W, #1 565 beq 4f 566 process 5, \src_bpp, \dst_bpp, \mask, \opa, \mode 567 b 0f 5684: 569 process 4, \src_bpp, \dst_bpp, \mask, \opa, \mode 570 b 0f 5713: 572 tst DST_W, #2 573 beq 1f 574 tst DST_W, #1 575 beq 2f 576 process 3, \src_bpp, \dst_bpp, \mask, \opa, \mode 577 b 0f 5782: 579 process 2, \src_bpp, \dst_bpp, \mask, \opa, \mode 580 b 0f 5811: 582 process 1, \src_bpp, \dst_bpp, \mask, \opa, \mode 5830: 584.endm 585 586.macro next src_bpp, mask 587 add DST_ADDR, DST_ADDR, DST_STRIDE 588.if \src_bpp 589 add SRC_ADDR, SRC_ADDR, SRC_STRIDE 590.endif 591.if \mask 592 add MASK_ADDR, MASK_ADDR, MASK_STRIDE 593.endif 594.endm 595 596.macro enter 597 push {r4-r11, lr} 598.endm 599 600.macro exit 601 pop {r4-r11, pc} 602.endm 603 604.macro preload mem, bpp 605.if \bpp >= 31 606 pld [\mem\()_ADDR, DST_W, lsl #2] 607.elseif \bpp == 24 608 add W, DST_W, DST_W, lsl #1 609 pld [\mem\()_ADDR, W] 610.elseif \bpp == 16 611 pld [\mem\()_ADDR, DST_W, lsl #1] 612.elseif \bpp == 8 613 pld [\mem\()_ADDR, DST_W] 614.endif 615.endm 616 617.macro blender src_bpp, dst_bpp, mask, opa, mode 618 enter 619 init \src_bpp, \dst_bpp, \mask, \opa 620 movs H, DST_H 621 beq 0f 622 preload SRC, \src_bpp 623.if \mask || \opa || (\src_bpp == 32) 624 preload DST, \dst_bpp 625.endif 626 subs W, DST_W, #8 627 blt 7f 6289: 629 process 8, \src_bpp, \dst_bpp, \mask, \opa, \mode 630 subs W, W, #8 631 bge 9b 632 tst DST_W, #7 633 beq 8f 634 tail \src_bpp, \dst_bpp, \mask, \opa, \mode 6358: 636 next \src_bpp, \mask 637 preload SRC, \src_bpp 638.if \mask || \opa || (\src_bpp == 32) 639 preload DST, \dst_bpp 640.endif 641 sub W, DST_W, #8 642 subs H, H, #1 643 bgt 9b 644 exit 6457: 646 tail \src_bpp, \dst_bpp, \mask, \opa, \mode 647 next \src_bpp, \mask 648 subs H, H, #1 649 bgt 7b 650 exit 651.endm 652 653.macro export name, src_bpp, dst_bpp, mask, opa, mode 654.thumb_func 655.func \name 656.global \name 657.hidden \name 658\name\(): 659 blender \src_bpp, \dst_bpp, \mask, \opa, \mode 660.endfunc 661.endm 662 663.macro export_set src, dst, src_bpp, dst_bpp, mode 664.ifc \src,color 665 export _lv_\src\()_blend_to_\dst\()_neon, \src_bpp, \dst_bpp, 0, 0, \mode 666 export _lv_\src\()_blend_to_\dst\()_with_opa_neon, \src_bpp, \dst_bpp, 0, 1, \mode 667 export _lv_\src\()_blend_to_\dst\()_with_mask_neon, \src_bpp, \dst_bpp, 1, 0, \mode 668 export _lv_\src\()_blend_to_\dst\()_mix_mask_opa_neon, \src_bpp, \dst_bpp, 1, 1, \mode 669.else 670 export _lv_\src\()_blend_\mode\()_to_\dst\()_neon, \src_bpp, \dst_bpp, 0, 0, \mode 671 export _lv_\src\()_blend_\mode\()_to_\dst\()_with_opa_neon, \src_bpp, \dst_bpp, 0, 1, \mode 672 export _lv_\src\()_blend_\mode\()_to_\dst\()_with_mask_neon, \src_bpp, \dst_bpp, 1, 0, \mode 673 export _lv_\src\()_blend_\mode\()_to_\dst\()_mix_mask_opa_neon, \src_bpp, \dst_bpp, 1, 1, \mode 674.endif 675.endm 676 677export_set color, rgb565, 0, 16, normal 678export_set rgb565, rgb565, 16, 16, normal 679export_set rgb888, rgb565, 24, 16, normal 680export_set xrgb8888, rgb565, 31, 16, normal 681export_set argb8888, rgb565, 32, 16, normal 682export_set color, rgb888, 0, 24, normal 683export_set rgb565, rgb888, 16, 24, normal 684export_set rgb888, rgb888, 24, 24, normal 685export_set xrgb8888, rgb888, 31, 24, normal 686export_set argb8888, rgb888, 32, 24, normal 687export_set color, xrgb8888, 0, 31, normal 688export_set rgb565, xrgb8888, 16, 31, normal 689export_set rgb888, xrgb8888, 24, 31, normal 690export_set xrgb8888, xrgb8888, 31, 31, normal 691export_set argb8888, xrgb8888, 32, 31, normal 692export_set color, argb8888, 0, 32, normal 693export_set rgb565, argb8888, 16, 32, normal 694export_set rgb888, argb8888, 24, 32, normal 695export_set xrgb8888, argb8888, 31, 32, normal 696export_set argb8888, argb8888, 32, 32, normal 697 698#endif /*LV_USE_DRAW_SW_ASM == LV_DRAW_SW_ASM_NEON*/ 699