Chromium Code Reviews| OLD | NEW |
|---|---|
| 1 #include "SkXfermode.h" | 1 #include "SkXfermode.h" |
| 2 #include "SkXfermode_proccoeff.h" | 2 #include "SkXfermode_proccoeff.h" |
| 3 #include "SkColorPriv.h" | 3 #include "SkColorPriv.h" |
| 4 | 4 |
| 5 #include <arm_neon.h> | 5 #include <arm_neon.h> |
| 6 #include "SkColor_opts_neon.h" | 6 #include "SkColor_opts_neon.h" |
| 7 #include "SkXfermode_opts_arm_neon.h" | 7 #include "SkXfermode_opts_arm_neon.h" |
| 8 | 8 |
| 9 #define SkAlphaMulAlpha(a, b) SkMulDiv255Round(a, b) | 9 #define SkAlphaMulAlpha(a, b) SkMulDiv255Round(a, b) |
| 10 | 10 |
| (...skipping 562 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 573 SkASSERT(dst && src && count >= 0); | 573 SkASSERT(dst && src && count >= 0); |
| 574 | 574 |
| 575 SkXfermodeProc proc = this->getProc(); | 575 SkXfermodeProc proc = this->getProc(); |
| 576 SkXfermodeProcSIMD procSIMD = reinterpret_cast<SkXfermodeProcSIMD>(fProcSIMD ); | 576 SkXfermodeProcSIMD procSIMD = reinterpret_cast<SkXfermodeProcSIMD>(fProcSIMD ); |
| 577 | 577 |
| 578 if (NULL == aa) { | 578 if (NULL == aa) { |
| 579 // Unrolled NEON code | 579 // Unrolled NEON code |
| 580 while (count >= 8) { | 580 while (count >= 8) { |
| 581 uint8x8x4_t vsrc, vdst, vres; | 581 uint8x8x4_t vsrc, vdst, vres; |
| 582 | 582 |
| 583 #if (__GNUC__ == 4) && (__GNUC_MINOR__ > 6) | 583 #if (__GNUC__ > 4) || ((__GNUC__ == 4) && (__GNUC_MINOR__ > 6)) |
| 584 asm volatile ( | 584 asm volatile ( |
| 585 "vld4.u8 %h[vsrc], [%[src]]! \t\n" | 585 "vld4.u8 %h[vsrc], [%[src]]! \t\n" |
| 586 "vld4.u8 %h[vdst], [%[dst]] \t\n" | 586 "vld4.u8 %h[vdst], [%[dst]] \t\n" |
| 587 : [vsrc] "=w" (vsrc), [vdst] "=w" (vdst), [src] "+&r" (src) | 587 : [vsrc] "=w" (vsrc), [vdst] "=w" (vdst), [src] "+&r" (src) |
| 588 : [dst] "r" (dst) | 588 : [dst] "r" (dst) |
| 589 : | 589 : |
| 590 ); | 590 ); |
| 591 #else | 591 #else |
| 592 register uint8x8_t d0 asm("d0"); | 592 register uint8x8_t d0 asm("d0"); |
| 593 register uint8x8_t d1 asm("d1"); | 593 register uint8x8_t d1 asm("d1"); |
| (...skipping 38 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 632 SkPMColor C = proc(src[i], dstC); | 632 SkPMColor C = proc(src[i], dstC); |
| 633 if (a != 0xFF) { | 633 if (a != 0xFF) { |
| 634 C = SkFourByteInterp(C, dstC, a); | 634 C = SkFourByteInterp(C, dstC, a); |
| 635 } | 635 } |
| 636 dst[i] = C; | 636 dst[i] = C; |
| 637 } | 637 } |
| 638 } | 638 } |
| 639 } | 639 } |
| 640 } | 640 } |
| 641 | 641 |
| 642 void SkNEONProcCoeffXfermode::xfer16(uint16_t* SK_RESTRICT dst, | |
| 643 const SkPMColor* SK_RESTRICT src, int count , | |
| 644 const SkAlpha* SK_RESTRICT aa) const { | |
| 645 SkASSERT(dst && src && count >= 0); | |
| 646 | |
| 647 SkXfermodeProc proc = this->getProc(); | |
| 648 SkXfermodeProcSIMD procSIMD = reinterpret_cast<SkXfermodeProcSIMD>(fProcSIMD ); | |
|
mtklein
2013/11/06 16:04:17
SkASSERT procSIMD != NULL?
kevin.petit.not.used.account
2013/11/06 16:50:05
Why not but it isn't possible to reach this code w
mtklein
2013/11/06 17:18:41
Yeah, the intention is just documentation.
kevin.petit.not.used.account
2013/11/07 11:26:20
Done.
| |
| 649 | |
| 650 if (NULL == aa) { | |
| 651 while(count >= 8) { | |
| 652 uint16x8_t vdst, vres16; | |
| 653 uint8x8x4_t vdst32, vsrc, vres; | |
| 654 | |
| 655 vdst = vld1q_u16(dst); | |
| 656 | |
| 657 #if (__GNUC__ > 4) || ((__GNUC__ == 4) && (__GNUC_MINOR__ > 6)) | |
|
mtklein
2013/11/06 16:04:17
Is Clang any better at generating code from intrin
kevin.petit.not.used.account
2013/11/06 16:50:05
To be honest I haven't done a detailed analysis of
mtklein
2013/11/06 17:18:41
Ah, this may be a coincidence then. I was indeed
kevin.petit.not.used.account
2013/11/07 11:26:20
Yes, that's a typical pattern. Could you point me
| |
| 658 asm volatile ( | |
| 659 "vld4.u8 %h[vsrc], [%[src]]! \t\n" | |
| 660 : [vsrc] "=w" (vsrc), [src] "+&r" (src) | |
| 661 : : | |
| 662 ); | |
| 663 #else | |
| 664 register uint8x8_t d0 asm("d0"); | |
| 665 register uint8x8_t d1 asm("d1"); | |
| 666 register uint8x8_t d2 asm("d2"); | |
| 667 register uint8x8_t d3 asm("d3"); | |
| 668 | |
| 669 asm volatile ( | |
| 670 "vld4.u8 {d0-d3},[%[src]]!;" | |
| 671 : "=w" (d0), "=w" (d1), "=w" (d2), "=w" (d3), | |
| 672 [src] "+&r" (src) | |
| 673 : : | |
| 674 ); | |
| 675 vsrc.val[0] = d0; | |
| 676 vsrc.val[1] = d1; | |
| 677 vsrc.val[2] = d2; | |
| 678 vsrc.val[3] = d3; | |
| 679 #endif | |
| 680 | |
| 681 vdst32 = SkPixel16ToPixel32_neon8(vdst); | |
| 682 vres = procSIMD(vsrc, vdst32); | |
| 683 vres16 = SkPixel32ToPixel16_neon8(vres); | |
| 684 | |
| 685 vst1q_u16(dst, vres16); | |
| 686 | |
| 687 count -= 8; | |
| 688 dst += 8; | |
| 689 } | |
| 690 for (int i = 0; i < count; i++) { | |
| 691 SkPMColor dstC = SkPixel16ToPixel32(dst[i]); | |
| 692 dst[i] = SkPixel32ToPixel16_ToU16(proc(src[i], dstC)); | |
| 693 } | |
| 694 } else { | |
| 695 for (int i = count - 1; i >= 0; --i) { | |
| 696 unsigned a = aa[i]; | |
| 697 if (0 != a) { | |
| 698 SkPMColor dstC = SkPixel16ToPixel32(dst[i]); | |
| 699 SkPMColor C = proc(src[i], dstC); | |
| 700 if (0xFF != a) { | |
| 701 C = SkFourByteInterp(C, dstC, a); | |
| 702 } | |
| 703 dst[i] = SkPixel32ToPixel16_ToU16(C); | |
| 704 } | |
| 705 } | |
| 706 } | |
| 707 } | |
| 708 | |
| 642 #ifdef SK_DEVELOPER | 709 #ifdef SK_DEVELOPER |
| 643 void SkNEONProcCoeffXfermode::toString(SkString* str) const { | 710 void SkNEONProcCoeffXfermode::toString(SkString* str) const { |
| 644 this->INHERITED::toString(str); | 711 this->INHERITED::toString(str); |
| 645 } | 712 } |
| 646 #endif | 713 #endif |
| 647 | 714 |
| 648 //////////////////////////////////////////////////////////////////////////////// | 715 //////////////////////////////////////////////////////////////////////////////// |
| 649 | 716 |
| 650 SkXfermodeProcSIMD gNEONXfermodeProcs[] = { | 717 SkXfermodeProcSIMD gNEONXfermodeProcs[] = { |
| 651 NULL, // kClear_Mode | 718 NULL, // kClear_Mode |
| (...skipping 37 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 689 SkProcCoeffXfermode* SkPlatformXfermodeFactory_impl_neon(const ProcCoeff& rec, | 756 SkProcCoeffXfermode* SkPlatformXfermodeFactory_impl_neon(const ProcCoeff& rec, |
| 690 SkXfermode::Mode mode) { | 757 SkXfermode::Mode mode) { |
| 691 | 758 |
| 692 void* procSIMD = reinterpret_cast<void*>(gNEONXfermodeProcs[mode]); | 759 void* procSIMD = reinterpret_cast<void*>(gNEONXfermodeProcs[mode]); |
| 693 | 760 |
| 694 if (procSIMD != NULL) { | 761 if (procSIMD != NULL) { |
| 695 return SkNEW_ARGS(SkNEONProcCoeffXfermode, (rec, mode, procSIMD)); | 762 return SkNEW_ARGS(SkNEONProcCoeffXfermode, (rec, mode, procSIMD)); |
| 696 } | 763 } |
| 697 return NULL; | 764 return NULL; |
| 698 } | 765 } |
| OLD | NEW |