OLD | NEW |
---|---|
1 #include "SkXfermode.h" | 1 #include "SkXfermode.h" |
2 #include "SkXfermode_proccoeff.h" | 2 #include "SkXfermode_proccoeff.h" |
3 #include "SkColorPriv.h" | 3 #include "SkColorPriv.h" |
4 | 4 |
5 #include <arm_neon.h> | 5 #include <arm_neon.h> |
6 #include "SkColor_opts_neon.h" | 6 #include "SkColor_opts_neon.h" |
7 #include "SkXfermode_opts_arm_neon.h" | 7 #include "SkXfermode_opts_arm_neon.h" |
8 | 8 |
9 #define SkAlphaMulAlpha(a, b) SkMulDiv255Round(a, b) | 9 #define SkAlphaMulAlpha(a, b) SkMulDiv255Round(a, b) |
10 | 10 |
(...skipping 730 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
741 | 741 |
742 typedef uint8x8x4_t (*SkXfermodeProcSIMD)(uint8x8x4_t src, uint8x8x4_t dst); | 742 typedef uint8x8x4_t (*SkXfermodeProcSIMD)(uint8x8x4_t src, uint8x8x4_t dst); |
743 | 743 |
744 extern SkXfermodeProcSIMD gNEONXfermodeProcs[]; | 744 extern SkXfermodeProcSIMD gNEONXfermodeProcs[]; |
745 | 745 |
746 SkNEONProcCoeffXfermode::SkNEONProcCoeffXfermode(SkReadBuffer& buffer) | 746 SkNEONProcCoeffXfermode::SkNEONProcCoeffXfermode(SkReadBuffer& buffer) |
747 : INHERITED(buffer) { | 747 : INHERITED(buffer) { |
748 fProcSIMD = reinterpret_cast<void*>(gNEONXfermodeProcs[this->getMode()]); | 748 fProcSIMD = reinterpret_cast<void*>(gNEONXfermodeProcs[this->getMode()]); |
749 } | 749 } |
750 | 750 |
751 void SkNEONProcCoeffXfermode::xfer32(SkPMColor dst[], const SkPMColor src[], | 751 void SkNEONProcCoeffXfermode::xfer32(SkPMColor* SK_RESTRICT dst, |
752 int count, const SkAlpha aa[]) const { | 752 const SkPMColor* SK_RESTRICT src, int count , |
753 const SkAlpha* SK_RESTRICT aa) const { | |
753 SkASSERT(dst && src && count >= 0); | 754 SkASSERT(dst && src && count >= 0); |
754 | 755 |
755 SkXfermodeProc proc = this->getProc(); | 756 SkXfermodeProc proc = this->getProc(); |
756 SkXfermodeProcSIMD procSIMD = reinterpret_cast<SkXfermodeProcSIMD>(fProcSIMD ); | 757 SkXfermodeProcSIMD procSIMD = reinterpret_cast<SkXfermodeProcSIMD>(fProcSIMD ); |
757 SkASSERT(procSIMD != NULL); | 758 SkASSERT(procSIMD != NULL); |
758 | 759 |
759 if (NULL == aa) { | 760 if (NULL == aa) { |
760 // Unrolled NEON code | 761 // Unrolled NEON code |
761 while (count >= 8) { | 762 while (count >= 8) { |
mtklein
2014/06/30 13:44:35
Do you think you could tack on some note like this
kevin.petit
2014/06/30 14:04:39
Done.
| |
763 | |
764 #ifdef SK_CPU_ARM32 | |
762 uint8x8x4_t vsrc, vdst, vres; | 765 uint8x8x4_t vsrc, vdst, vres; |
763 | |
764 #ifdef SK_CPU_ARM64 | |
765 vsrc = vld4_u8((uint8_t*)src); | |
766 vdst = vld4_u8((uint8_t*)dst); | |
767 #else | |
768 #if (__GNUC__ > 4) || ((__GNUC__ == 4) && (__GNUC_MINOR__ > 6)) | 766 #if (__GNUC__ > 4) || ((__GNUC__ == 4) && (__GNUC_MINOR__ > 6)) |
769 asm volatile ( | 767 asm volatile ( |
770 "vld4.u8 %h[vsrc], [%[src]]! \t\n" | 768 "vld4.u8 %h[vsrc], [%[src]]! \t\n" |
771 "vld4.u8 %h[vdst], [%[dst]] \t\n" | 769 "vld4.u8 %h[vdst], [%[dst]] \t\n" |
772 : [vsrc] "=w" (vsrc), [vdst] "=w" (vdst), [src] "+&r" (src) | 770 : [vsrc] "=w" (vsrc), [vdst] "=w" (vdst), [src] "+&r" (src) |
773 : [dst] "r" (dst) | 771 : [dst] "r" (dst) |
774 : | 772 : |
775 ); | 773 ); |
776 #else | 774 #else |
777 register uint8x8_t d0 asm("d0"); | 775 register uint8x8_t d0 asm("d0"); |
(...skipping 12 matching lines...) Expand all Loading... | |
790 "=w" (d4), "=w" (d5), "=w" (d6), "=w" (d7), | 788 "=w" (d4), "=w" (d5), "=w" (d6), "=w" (d7), |
791 [src] "+&r" (src) | 789 [src] "+&r" (src) |
792 : [dst] "r" (dst) | 790 : [dst] "r" (dst) |
793 : | 791 : |
794 ); | 792 ); |
795 vsrc.val[0] = d0; vdst.val[0] = d4; | 793 vsrc.val[0] = d0; vdst.val[0] = d4; |
796 vsrc.val[1] = d1; vdst.val[1] = d5; | 794 vsrc.val[1] = d1; vdst.val[1] = d5; |
797 vsrc.val[2] = d2; vdst.val[2] = d6; | 795 vsrc.val[2] = d2; vdst.val[2] = d6; |
798 vsrc.val[3] = d3; vdst.val[3] = d7; | 796 vsrc.val[3] = d3; vdst.val[3] = d7; |
799 #endif | 797 #endif |
800 #endif // #ifdef SK_CPU_ARM64 | |
801 | 798 |
802 vres = procSIMD(vsrc, vdst); | 799 vres = procSIMD(vsrc, vdst); |
803 | 800 |
804 vst4_u8((uint8_t*)dst, vres); | 801 vst4_u8((uint8_t*)dst, vres); |
805 | 802 |
803 dst += 8; | |
804 | |
805 #else // #ifdef SK_CPU_ARM32 | |
806 | |
807 asm volatile ( | |
mtklein
2014/06/30 13:28:03
Can you take me through this and remind my why it
kevin.petit
2014/06/30 13:38:05
Correct.
| |
808 "ld4 {v0.8b - v3.8b}, [%[src]], #32 \t\n" | |
809 "ld4 {v4.8b - v7.8b}, [%[dst]] \t\n" | |
810 "blr %[proc] \t\n" | |
811 "st4 {v0.8b - v3.8b}, [%[dst]], #32 \t\n" | |
812 : [src] "+&r" (src), [dst] "+&r" (dst) | |
813 : [proc] "r" (procSIMD) | |
814 : "cc", "memory", | |
815 /* We don't know what proc is going to clobber so we must | |
816 * add everything that is not callee-saved. | |
817 */ | |
818 "x0", "x1", "x2", "x3", "x4", "x5", "x6", "x7", "x8", "x9", | |
819 "x10", "x11", "x12", "x13", "x14", "x15", "x16", "x17", "x18", | |
820 "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v16", "v17", | |
821 "v18", "v19", "v20", "v21", "v22", "v23", "v24", "v25", "v26", | |
822 "v27", "v28", "v29", "v30", "v31" | |
823 ); | |
824 | |
825 #endif // #ifdef SK_CPU_ARM32 | |
826 | |
806 count -= 8; | 827 count -= 8; |
807 dst += 8; | |
808 #ifdef SK_CPU_ARM64 | |
809 src += 8; | |
810 #endif | |
811 } | 828 } |
812 // Leftovers | 829 // Leftovers |
813 for (int i = 0; i < count; i++) { | 830 for (int i = 0; i < count; i++) { |
814 dst[i] = proc(src[i], dst[i]); | 831 dst[i] = proc(src[i], dst[i]); |
815 } | 832 } |
816 } else { | 833 } else { |
817 for (int i = count - 1; i >= 0; --i) { | 834 for (int i = count - 1; i >= 0; --i) { |
818 unsigned a = aa[i]; | 835 unsigned a = aa[i]; |
819 if (0 != a) { | 836 if (0 != a) { |
820 SkPMColor dstC = dst[i]; | 837 SkPMColor dstC = dst[i]; |
(...skipping 175 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
996 | 1013 |
997 if (procSIMD != NULL) { | 1014 if (procSIMD != NULL) { |
998 return SkNEW_ARGS(SkNEONProcCoeffXfermode, (rec, mode, procSIMD)); | 1015 return SkNEW_ARGS(SkNEONProcCoeffXfermode, (rec, mode, procSIMD)); |
999 } | 1016 } |
1000 return NULL; | 1017 return NULL; |
1001 } | 1018 } |
1002 | 1019 |
1003 SkXfermodeProc SkPlatformXfermodeProcFactory_impl_neon(SkXfermode::Mode mode) { | 1020 SkXfermodeProc SkPlatformXfermodeProcFactory_impl_neon(SkXfermode::Mode mode) { |
1004 return gNEONXfermodeProcs1[mode]; | 1021 return gNEONXfermodeProcs1[mode]; |
1005 } | 1022 } |
OLD | NEW |