OLD | NEW |
1 /* | 1 /* |
2 * Copyright 2012 The Android Open Source Project | 2 * Copyright 2012 The Android Open Source Project |
3 * | 3 * |
4 * Use of this source code is governed by a BSD-style license that can be | 4 * Use of this source code is governed by a BSD-style license that can be |
5 * found in the LICENSE file. | 5 * found in the LICENSE file. |
6 */ | 6 */ |
7 | 7 |
8 #include "SkBlitRow_opts_arm_neon.h" | 8 #include "SkBlitRow_opts_arm_neon.h" |
9 | 9 |
10 #include "SkBlitMask.h" | 10 #include "SkBlitMask.h" |
(...skipping 347 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
358 | 358 |
359 "21: \n\t" | 359 "21: \n\t" |
360 : [count] "+r" (count) | 360 : [count] "+r" (count) |
361 : [dst] "r" (dst), [keep_dst] "r" (keep_dst), [src] "r" (s
rc) | 361 : [dst] "r" (dst), [keep_dst] "r" (keep_dst), [src] "r" (s
rc) |
362 : "ip", "cc", "memory", "d0","d1","d2","d3","d4","d5","d6"
,"d7", | 362 : "ip", "cc", "memory", "d0","d1","d2","d3","d4","d5","d6"
,"d7", |
363 "d16","d17","d18","d19","d20","d21","d22","d23","d24","d25
","d26","d27","d28","d29", | 363 "d16","d17","d18","d19","d20","d21","d22","d23","d24","d25
","d26","d27","d28","d29", |
364 "d30","d31" | 364 "d30","d31" |
365 ); | 365 ); |
366 } | 366 } |
367 } | 367 } |
368 #endif | 368 |
| 369 #else // #ifdef SK_CPU_ARM32 |
| 370 |
| 371 void S32A_D565_Opaque_neon(uint16_t* SK_RESTRICT dst, |
| 372 const SkPMColor* SK_RESTRICT src, int count, |
| 373 U8CPU alpha, int /*x*/, int /*y*/) { |
| 374 SkASSERT(255 == alpha); |
| 375 |
| 376 if (count >= 16) { |
| 377 asm ( |
| 378 "movi v4.8h, #0x80 \t\n" |
| 379 |
| 380 "1: \t\n" |
| 381 "sub %[count], %[count], #16 \t\n" |
| 382 "ld1 {v16.8h-v17.8h}, [%[dst]] \t\n" |
| 383 "ld4 {v0.16b-v3.16b}, [%[src]], #64 \t\n" |
| 384 "prfm pldl1keep, [%[src],#512] \t\n" |
| 385 "prfm pldl1keep, [%[dst],#256] \t\n" |
| 386 "ushr v20.8h, v17.8h, #5 \t\n" |
| 387 "ushr v31.8h, v16.8h, #5 \t\n" |
| 388 "xtn v6.8b, v31.8h \t\n" |
| 389 "xtn2 v6.16b, v20.8h \t\n" |
| 390 "ushr v20.8h, v17.8h, #11 \t\n" |
| 391 "shl v19.16b, v6.16b, #2 \t\n" |
| 392 "ushr v31.8h, v16.8h, #11 \t\n" |
| 393 "xtn v22.8b, v31.8h \t\n" |
| 394 "xtn2 v22.16b, v20.8h \t\n" |
| 395 "shl v18.16b, v22.16b, #3 \t\n" |
| 396 "mvn v3.16b, v3.16b \t\n" |
| 397 "xtn v16.8b, v16.8h \t\n" |
| 398 "mov v7.16b, v4.16b \t\n" |
| 399 "xtn2 v16.16b, v17.8h \t\n" |
| 400 "umlal v7.8h, v3.8b, v19.8b \t\n" |
| 401 "shl v16.16b, v16.16b, #3 \t\n" |
| 402 "mov v22.16b, v4.16b \t\n" |
| 403 "ushr v24.8h, v7.8h, #6 \t\n" |
| 404 "umlal v22.8h, v3.8b, v18.8b \t\n" |
| 405 "ushr v20.8h, v22.8h, #5 \t\n" |
| 406 "addhn v20.8b, v22.8h, v20.8h \t\n" |
| 407 "cmp %[count], #16 \t\n" |
| 408 "mov v6.16b, v4.16b \t\n" |
| 409 "mov v5.16b, v4.16b \t\n" |
| 410 "umlal v6.8h, v3.8b, v16.8b \t\n" |
| 411 "umlal2 v5.8h, v3.16b, v19.16b \t\n" |
| 412 "mov v17.16b, v4.16b \t\n" |
| 413 "ushr v19.8h, v6.8h, #5 \t\n" |
| 414 "umlal2 v17.8h, v3.16b, v18.16b \t\n" |
| 415 "addhn v7.8b, v7.8h, v24.8h \t\n" |
| 416 "ushr v18.8h, v5.8h, #6 \t\n" |
| 417 "ushr v21.8h, v17.8h, #5 \t\n" |
| 418 "addhn2 v7.16b, v5.8h, v18.8h \t\n" |
| 419 "addhn2 v20.16b, v17.8h, v21.8h \t\n" |
| 420 "mov v22.16b, v4.16b \t\n" |
| 421 "addhn v6.8b, v6.8h, v19.8h \t\n" |
| 422 "umlal2 v22.8h, v3.16b, v16.16b \t\n" |
| 423 "ushr v5.8h, v22.8h, #5 \t\n" |
| 424 "addhn2 v6.16b, v22.8h, v5.8h \t\n" |
| 425 "uqadd v7.16b, v1.16b, v7.16b \t\n" |
| 426 "uqadd v20.16b, v2.16b, v20.16b \t\n" |
| 427 "uqadd v6.16b, v0.16b, v6.16b \t\n" |
| 428 "shll v22.8h, v20.8b, #8 \t\n" |
| 429 "shll v5.8h, v7.8b, #8 \t\n" |
| 430 "sri v22.8h, v5.8h, #5 \t\n" |
| 431 "shll v17.8h, v6.8b, #8 \t\n" |
| 432 "shll2 v23.8h, v20.16b, #8 \t\n" |
| 433 "shll2 v7.8h, v7.16b, #8 \t\n" |
| 434 "sri v22.8h, v17.8h, #11 \t\n" |
| 435 "sri v23.8h, v7.8h, #5 \t\n" |
| 436 "shll2 v6.8h, v6.16b, #8 \t\n" |
| 437 "st1 {v22.8h}, [%[dst]], #16 \t\n" |
| 438 "sri v23.8h, v6.8h, #11 \t\n" |
| 439 "st1 {v23.8h}, [%[dst]], #16 \t\n" |
| 440 "b.ge 1b \t\n" |
| 441 : [dst] "+&r" (dst), [src] "+&r" (src), [count] "+&r" (count) |
| 442 :: "cc", "memory", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", |
| 443 "v16", "v17", "v18", "v19", "v20", "v21", "v22", "v23", "v24", |
| 444 "v31" |
| 445 ); |
| 446 } |
| 447 // Leftovers |
| 448 if (count > 0) { |
| 449 do { |
| 450 SkPMColor c = *src++; |
| 451 SkPMColorAssert(c); |
| 452 if (c) { |
| 453 *dst = SkSrcOver32To16(c, *dst); |
| 454 } |
| 455 dst += 1; |
| 456 } while (--count != 0); |
| 457 } |
| 458 } |
| 459 #endif // #ifdef SK_CPU_ARM32 |
369 | 460 |
370 static inline uint16x8_t SkDiv255Round_neon8(uint16x8_t prod) { | 461 static inline uint16x8_t SkDiv255Round_neon8(uint16x8_t prod) { |
371 prod += vdupq_n_u16(128); | 462 prod += vdupq_n_u16(128); |
372 prod += vshrq_n_u16(prod, 8); | 463 prod += vshrq_n_u16(prod, 8); |
373 return vshrq_n_u16(prod, 8); | 464 return vshrq_n_u16(prod, 8); |
374 } | 465 } |
375 | 466 |
376 void S32A_D565_Blend_neon(uint16_t* SK_RESTRICT dst, | 467 void S32A_D565_Blend_neon(uint16_t* SK_RESTRICT dst, |
377 const SkPMColor* SK_RESTRICT src, int count, | 468 const SkPMColor* SK_RESTRICT src, int count, |
378 U8CPU alpha, int /*x*/, int /*y*/) { | 469 U8CPU alpha, int /*x*/, int /*y*/) { |
(...skipping 1166 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
1545 count--; | 1636 count--; |
1546 } | 1637 } |
1547 } | 1638 } |
1548 | 1639 |
1549 /////////////////////////////////////////////////////////////////////////////// | 1640 /////////////////////////////////////////////////////////////////////////////// |
1550 | 1641 |
1551 const SkBlitRow::Proc sk_blitrow_platform_565_procs_arm_neon[] = { | 1642 const SkBlitRow::Proc sk_blitrow_platform_565_procs_arm_neon[] = { |
1552 // no dither | 1643 // no dither |
1553 S32_D565_Opaque_neon, | 1644 S32_D565_Opaque_neon, |
1554 S32_D565_Blend_neon, | 1645 S32_D565_Blend_neon, |
1555 #ifdef SK_CPU_ARM32 | |
1556 S32A_D565_Opaque_neon, | 1646 S32A_D565_Opaque_neon, |
1557 #else | |
1558 NULL, | |
1559 #endif | |
1560 S32A_D565_Blend_neon, | 1647 S32A_D565_Blend_neon, |
1561 | 1648 |
1562 // dither | 1649 // dither |
1563 S32_D565_Opaque_Dither_neon, | 1650 S32_D565_Opaque_Dither_neon, |
1564 S32_D565_Blend_Dither_neon, | 1651 S32_D565_Blend_Dither_neon, |
1565 S32A_D565_Opaque_Dither_neon, | 1652 S32A_D565_Opaque_Dither_neon, |
1566 NULL, // S32A_D565_Blend_Dither | 1653 NULL, // S32A_D565_Blend_Dither |
1567 }; | 1654 }; |
1568 | 1655 |
1569 const SkBlitRow::Proc32 sk_blitrow_platform_32_procs_arm_neon[] = { | 1656 const SkBlitRow::Proc32 sk_blitrow_platform_32_procs_arm_neon[] = { |
(...skipping 13 matching lines...) Expand all Loading... |
1583 S32A_Opaque_BlitRow32_neon_src_alpha, // S32A_Opaque, | 1670 S32A_Opaque_BlitRow32_neon_src_alpha, // S32A_Opaque, |
1584 #else | 1671 #else |
1585 S32A_Opaque_BlitRow32_neon, // S32A_Opaque, | 1672 S32A_Opaque_BlitRow32_neon, // S32A_Opaque, |
1586 #endif | 1673 #endif |
1587 #ifdef SK_CPU_ARM32 | 1674 #ifdef SK_CPU_ARM32 |
1588 S32A_Blend_BlitRow32_neon // S32A_Blend | 1675 S32A_Blend_BlitRow32_neon // S32A_Blend |
1589 #else | 1676 #else |
1590 NULL | 1677 NULL |
1591 #endif | 1678 #endif |
1592 }; | 1679 }; |
OLD | NEW |