OLD | NEW |
---|---|
1 #include "SkXfermode.h" | 1 #include "SkXfermode.h" |
2 #include "SkXfermode_proccoeff.h" | 2 #include "SkXfermode_proccoeff.h" |
3 #include "SkColorPriv.h" | 3 #include "SkColorPriv.h" |
4 | 4 |
5 #include <arm_neon.h> | 5 #include <arm_neon.h> |
6 #include "SkColor_opts_neon.h" | 6 #include "SkColor_opts_neon.h" |
7 #include "SkXfermode_opts_arm_neon.h" | 7 #include "SkXfermode_opts_arm_neon.h" |
8 | 8 |
9 #define SkAlphaMulAlpha(a, b) SkMulDiv255Round(a, b) | 9 #define SkAlphaMulAlpha(a, b) SkMulDiv255Round(a, b) |
10 | 10 |
(...skipping 562 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
573 SkASSERT(dst && src && count >= 0); | 573 SkASSERT(dst && src && count >= 0); |
574 | 574 |
575 SkXfermodeProc proc = this->getProc(); | 575 SkXfermodeProc proc = this->getProc(); |
576 SkXfermodeProcSIMD procSIMD = reinterpret_cast<SkXfermodeProcSIMD>(fProcSIMD ); | 576 SkXfermodeProcSIMD procSIMD = reinterpret_cast<SkXfermodeProcSIMD>(fProcSIMD ); |
577 | 577 |
578 if (NULL == aa) { | 578 if (NULL == aa) { |
579 // Unrolled NEON code | 579 // Unrolled NEON code |
580 while (count >= 8) { | 580 while (count >= 8) { |
581 uint8x8x4_t vsrc, vdst, vres; | 581 uint8x8x4_t vsrc, vdst, vres; |
582 | 582 |
583 #if (__GNUC__ == 4) && (__GNUC_MINOR__ > 6) | 583 #if (__GNUC__ > 4) || ((__GNUC__ == 4) && (__GNUC_MINOR__ > 6)) |
584 asm volatile ( | 584 asm volatile ( |
585 "vld4.u8 %h[vsrc], [%[src]]! \t\n" | 585 "vld4.u8 %h[vsrc], [%[src]]! \t\n" |
586 "vld4.u8 %h[vdst], [%[dst]] \t\n" | 586 "vld4.u8 %h[vdst], [%[dst]] \t\n" |
587 : [vsrc] "=w" (vsrc), [vdst] "=w" (vdst), [src] "+&r" (src) | 587 : [vsrc] "=w" (vsrc), [vdst] "=w" (vdst), [src] "+&r" (src) |
588 : [dst] "r" (dst) | 588 : [dst] "r" (dst) |
589 : | 589 : |
590 ); | 590 ); |
591 #else | 591 #else |
592 register uint8x8_t d0 asm("d0"); | 592 register uint8x8_t d0 asm("d0"); |
593 register uint8x8_t d1 asm("d1"); | 593 register uint8x8_t d1 asm("d1"); |
(...skipping 38 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
632 SkPMColor C = proc(src[i], dstC); | 632 SkPMColor C = proc(src[i], dstC); |
633 if (a != 0xFF) { | 633 if (a != 0xFF) { |
634 C = SkFourByteInterp(C, dstC, a); | 634 C = SkFourByteInterp(C, dstC, a); |
635 } | 635 } |
636 dst[i] = C; | 636 dst[i] = C; |
637 } | 637 } |
638 } | 638 } |
639 } | 639 } |
640 } | 640 } |
641 | 641 |
642 void SkNEONProcCoeffXfermode::xfer16(uint16_t* SK_RESTRICT dst, | |
643 const SkPMColor* SK_RESTRICT src, int count , | |
644 const SkAlpha* SK_RESTRICT aa) const { | |
645 SkASSERT(dst && src && count >= 0); | |
646 | |
647 SkXfermodeProc proc = this->getProc(); | |
648 SkXfermodeProcSIMD procSIMD = reinterpret_cast<SkXfermodeProcSIMD>(fProcSIMD ); | |
mtklein
2013/11/06 16:04:17
SkASSERT procSIMD != NULL?
kevin.petit.not.used.account
2013/11/06 16:50:05
Why not but it isn't possible to reach this code w
mtklein
2013/11/06 17:18:41
Yeah, the intention is just documentation.
kevin.petit.not.used.account
2013/11/07 11:26:20
Done.
| |
649 | |
650 if (NULL == aa) { | |
651 while(count >= 8) { | |
652 uint16x8_t vdst, vres16; | |
653 uint8x8x4_t vdst32, vsrc, vres; | |
654 | |
655 vdst = vld1q_u16(dst); | |
656 | |
657 #if (__GNUC__ > 4) || ((__GNUC__ == 4) && (__GNUC_MINOR__ > 6)) | |
mtklein
2013/11/06 16:04:17
Is Clang any better at generating code from intrin
kevin.petit.not.used.account
2013/11/06 16:50:05
To be honest I haven't done a detailed analysis of
mtklein
2013/11/06 17:18:41
Ah, this may be a coincidence then. I was indeed
kevin.petit.not.used.account
2013/11/07 11:26:20
Yes, that's a typical pattern. Could you point me
| |
658 asm volatile ( | |
659 "vld4.u8 %h[vsrc], [%[src]]! \t\n" | |
660 : [vsrc] "=w" (vsrc), [src] "+&r" (src) | |
661 : : | |
662 ); | |
663 #else | |
664 register uint8x8_t d0 asm("d0"); | |
665 register uint8x8_t d1 asm("d1"); | |
666 register uint8x8_t d2 asm("d2"); | |
667 register uint8x8_t d3 asm("d3"); | |
668 | |
669 asm volatile ( | |
670 "vld4.u8 {d0-d3},[%[src]]!;" | |
671 : "=w" (d0), "=w" (d1), "=w" (d2), "=w" (d3), | |
672 [src] "+&r" (src) | |
673 : : | |
674 ); | |
675 vsrc.val[0] = d0; | |
676 vsrc.val[1] = d1; | |
677 vsrc.val[2] = d2; | |
678 vsrc.val[3] = d3; | |
679 #endif | |
680 | |
681 vdst32 = SkPixel16ToPixel32_neon8(vdst); | |
682 vres = procSIMD(vsrc, vdst32); | |
683 vres16 = SkPixel32ToPixel16_neon8(vres); | |
684 | |
685 vst1q_u16(dst, vres16); | |
686 | |
687 count -= 8; | |
688 dst += 8; | |
689 } | |
690 for (int i = 0; i < count; i++) { | |
691 SkPMColor dstC = SkPixel16ToPixel32(dst[i]); | |
692 dst[i] = SkPixel32ToPixel16_ToU16(proc(src[i], dstC)); | |
693 } | |
694 } else { | |
695 for (int i = count - 1; i >= 0; --i) { | |
696 unsigned a = aa[i]; | |
697 if (0 != a) { | |
698 SkPMColor dstC = SkPixel16ToPixel32(dst[i]); | |
699 SkPMColor C = proc(src[i], dstC); | |
700 if (0xFF != a) { | |
701 C = SkFourByteInterp(C, dstC, a); | |
702 } | |
703 dst[i] = SkPixel32ToPixel16_ToU16(C); | |
704 } | |
705 } | |
706 } | |
707 } | |
708 | |
642 #ifdef SK_DEVELOPER | 709 #ifdef SK_DEVELOPER |
643 void SkNEONProcCoeffXfermode::toString(SkString* str) const { | 710 void SkNEONProcCoeffXfermode::toString(SkString* str) const { |
644 this->INHERITED::toString(str); | 711 this->INHERITED::toString(str); |
645 } | 712 } |
646 #endif | 713 #endif |
647 | 714 |
648 //////////////////////////////////////////////////////////////////////////////// | 715 //////////////////////////////////////////////////////////////////////////////// |
649 | 716 |
650 SkXfermodeProcSIMD gNEONXfermodeProcs[] = { | 717 SkXfermodeProcSIMD gNEONXfermodeProcs[] = { |
651 NULL, // kClear_Mode | 718 NULL, // kClear_Mode |
(...skipping 37 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
689 SkProcCoeffXfermode* SkPlatformXfermodeFactory_impl_neon(const ProcCoeff& rec, | 756 SkProcCoeffXfermode* SkPlatformXfermodeFactory_impl_neon(const ProcCoeff& rec, |
690 SkXfermode::Mode mode) { | 757 SkXfermode::Mode mode) { |
691 | 758 |
692 void* procSIMD = reinterpret_cast<void*>(gNEONXfermodeProcs[mode]); | 759 void* procSIMD = reinterpret_cast<void*>(gNEONXfermodeProcs[mode]); |
693 | 760 |
694 if (procSIMD != NULL) { | 761 if (procSIMD != NULL) { |
695 return SkNEW_ARGS(SkNEONProcCoeffXfermode, (rec, mode, procSIMD)); | 762 return SkNEW_ARGS(SkNEONProcCoeffXfermode, (rec, mode, procSIMD)); |
696 } | 763 } |
697 return NULL; | 764 return NULL; |
698 } | 765 } |
OLD | NEW |