Chromium Code Reviews| OLD | NEW |
|---|---|
| 1 #include "SkXfermode.h" | 1 #include "SkXfermode.h" |
| 2 #include "SkXfermode_proccoeff.h" | 2 #include "SkXfermode_proccoeff.h" |
| 3 #include "SkColorPriv.h" | 3 #include "SkColorPriv.h" |
| 4 | 4 |
| 5 #include <arm_neon.h> | 5 #include <arm_neon.h> |
| 6 #include "SkColor_opts_neon.h" | 6 #include "SkColor_opts_neon.h" |
| 7 #include "SkXfermode_opts_arm_neon.h" | 7 #include "SkXfermode_opts_arm_neon.h" |
| 8 | 8 |
| 9 #define SkAlphaMulAlpha(a, b) SkMulDiv255Round(a, b) | 9 #define SkAlphaMulAlpha(a, b) SkMulDiv255Round(a, b) |
| 10 | 10 |
| (...skipping 730 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 741 | 741 |
| 742 typedef uint8x8x4_t (*SkXfermodeProcSIMD)(uint8x8x4_t src, uint8x8x4_t dst); | 742 typedef uint8x8x4_t (*SkXfermodeProcSIMD)(uint8x8x4_t src, uint8x8x4_t dst); |
| 743 | 743 |
| 744 extern SkXfermodeProcSIMD gNEONXfermodeProcs[]; | 744 extern SkXfermodeProcSIMD gNEONXfermodeProcs[]; |
| 745 | 745 |
| 746 SkNEONProcCoeffXfermode::SkNEONProcCoeffXfermode(SkReadBuffer& buffer) | 746 SkNEONProcCoeffXfermode::SkNEONProcCoeffXfermode(SkReadBuffer& buffer) |
| 747 : INHERITED(buffer) { | 747 : INHERITED(buffer) { |
| 748 fProcSIMD = reinterpret_cast<void*>(gNEONXfermodeProcs[this->getMode()]); | 748 fProcSIMD = reinterpret_cast<void*>(gNEONXfermodeProcs[this->getMode()]); |
| 749 } | 749 } |
| 750 | 750 |
| 751 void SkNEONProcCoeffXfermode::xfer32(SkPMColor dst[], const SkPMColor src[], | 751 void SkNEONProcCoeffXfermode::xfer32(SkPMColor* SK_RESTRICT dst, |
| 752 int count, const SkAlpha aa[]) const { | 752 const SkPMColor* SK_RESTRICT src, int count , |
| 753 const SkAlpha* SK_RESTRICT aa) const { | |
| 753 SkASSERT(dst && src && count >= 0); | 754 SkASSERT(dst && src && count >= 0); |
| 754 | 755 |
| 755 SkXfermodeProc proc = this->getProc(); | 756 SkXfermodeProc proc = this->getProc(); |
| 756 SkXfermodeProcSIMD procSIMD = reinterpret_cast<SkXfermodeProcSIMD>(fProcSIMD ); | 757 SkXfermodeProcSIMD procSIMD = reinterpret_cast<SkXfermodeProcSIMD>(fProcSIMD ); |
| 757 SkASSERT(procSIMD != NULL); | 758 SkASSERT(procSIMD != NULL); |
| 758 | 759 |
| 759 if (NULL == aa) { | 760 if (NULL == aa) { |
| 760 // Unrolled NEON code | 761 // Unrolled NEON code |
| 761 while (count >= 8) { | 762 while (count >= 8) { |
|
mtklein
2014/06/30 13:44:35
Do you think you could tack on some note like this
kevin.petit
2014/06/30 14:04:39
Done.
| |
| 763 | |
| 764 #ifdef SK_CPU_ARM32 | |
| 762 uint8x8x4_t vsrc, vdst, vres; | 765 uint8x8x4_t vsrc, vdst, vres; |
| 763 | |
| 764 #ifdef SK_CPU_ARM64 | |
| 765 vsrc = vld4_u8((uint8_t*)src); | |
| 766 vdst = vld4_u8((uint8_t*)dst); | |
| 767 #else | |
| 768 #if (__GNUC__ > 4) || ((__GNUC__ == 4) && (__GNUC_MINOR__ > 6)) | 766 #if (__GNUC__ > 4) || ((__GNUC__ == 4) && (__GNUC_MINOR__ > 6)) |
| 769 asm volatile ( | 767 asm volatile ( |
| 770 "vld4.u8 %h[vsrc], [%[src]]! \t\n" | 768 "vld4.u8 %h[vsrc], [%[src]]! \t\n" |
| 771 "vld4.u8 %h[vdst], [%[dst]] \t\n" | 769 "vld4.u8 %h[vdst], [%[dst]] \t\n" |
| 772 : [vsrc] "=w" (vsrc), [vdst] "=w" (vdst), [src] "+&r" (src) | 770 : [vsrc] "=w" (vsrc), [vdst] "=w" (vdst), [src] "+&r" (src) |
| 773 : [dst] "r" (dst) | 771 : [dst] "r" (dst) |
| 774 : | 772 : |
| 775 ); | 773 ); |
| 776 #else | 774 #else |
| 777 register uint8x8_t d0 asm("d0"); | 775 register uint8x8_t d0 asm("d0"); |
| (...skipping 12 matching lines...) Expand all Loading... | |
| 790 "=w" (d4), "=w" (d5), "=w" (d6), "=w" (d7), | 788 "=w" (d4), "=w" (d5), "=w" (d6), "=w" (d7), |
| 791 [src] "+&r" (src) | 789 [src] "+&r" (src) |
| 792 : [dst] "r" (dst) | 790 : [dst] "r" (dst) |
| 793 : | 791 : |
| 794 ); | 792 ); |
| 795 vsrc.val[0] = d0; vdst.val[0] = d4; | 793 vsrc.val[0] = d0; vdst.val[0] = d4; |
| 796 vsrc.val[1] = d1; vdst.val[1] = d5; | 794 vsrc.val[1] = d1; vdst.val[1] = d5; |
| 797 vsrc.val[2] = d2; vdst.val[2] = d6; | 795 vsrc.val[2] = d2; vdst.val[2] = d6; |
| 798 vsrc.val[3] = d3; vdst.val[3] = d7; | 796 vsrc.val[3] = d3; vdst.val[3] = d7; |
| 799 #endif | 797 #endif |
| 800 #endif // #ifdef SK_CPU_ARM64 | |
| 801 | 798 |
| 802 vres = procSIMD(vsrc, vdst); | 799 vres = procSIMD(vsrc, vdst); |
| 803 | 800 |
| 804 vst4_u8((uint8_t*)dst, vres); | 801 vst4_u8((uint8_t*)dst, vres); |
| 805 | 802 |
| 803 dst += 8; | |
| 804 | |
| 805 #else // #ifdef SK_CPU_ARM32 | |
| 806 | |
| 807 asm volatile ( | |
|
mtklein
2014/06/30 13:28:03
Can you take me through this and remind my why it
kevin.petit
2014/06/30 13:38:05
Correct.
| |
| 808 "ld4 {v0.8b - v3.8b}, [%[src]], #32 \t\n" | |
| 809 "ld4 {v4.8b - v7.8b}, [%[dst]] \t\n" | |
| 810 "blr %[proc] \t\n" | |
| 811 "st4 {v0.8b - v3.8b}, [%[dst]], #32 \t\n" | |
| 812 : [src] "+&r" (src), [dst] "+&r" (dst) | |
| 813 : [proc] "r" (procSIMD) | |
| 814 : "cc", "memory", | |
| 815 /* We don't know what proc is going to clobber so we must | |
| 816 * add everything that is not callee-saved. | |
| 817 */ | |
| 818 "x0", "x1", "x2", "x3", "x4", "x5", "x6", "x7", "x8", "x9", | |
| 819 "x10", "x11", "x12", "x13", "x14", "x15", "x16", "x17", "x18", | |
| 820 "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v16", "v17", | |
| 821 "v18", "v19", "v20", "v21", "v22", "v23", "v24", "v25", "v26", | |
| 822 "v27", "v28", "v29", "v30", "v31" | |
| 823 ); | |
| 824 | |
| 825 #endif // #ifdef SK_CPU_ARM32 | |
| 826 | |
| 806 count -= 8; | 827 count -= 8; |
| 807 dst += 8; | |
| 808 #ifdef SK_CPU_ARM64 | |
| 809 src += 8; | |
| 810 #endif | |
| 811 } | 828 } |
| 812 // Leftovers | 829 // Leftovers |
| 813 for (int i = 0; i < count; i++) { | 830 for (int i = 0; i < count; i++) { |
| 814 dst[i] = proc(src[i], dst[i]); | 831 dst[i] = proc(src[i], dst[i]); |
| 815 } | 832 } |
| 816 } else { | 833 } else { |
| 817 for (int i = count - 1; i >= 0; --i) { | 834 for (int i = count - 1; i >= 0; --i) { |
| 818 unsigned a = aa[i]; | 835 unsigned a = aa[i]; |
| 819 if (0 != a) { | 836 if (0 != a) { |
| 820 SkPMColor dstC = dst[i]; | 837 SkPMColor dstC = dst[i]; |
| (...skipping 175 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 996 | 1013 |
| 997 if (procSIMD != NULL) { | 1014 if (procSIMD != NULL) { |
| 998 return SkNEW_ARGS(SkNEONProcCoeffXfermode, (rec, mode, procSIMD)); | 1015 return SkNEW_ARGS(SkNEONProcCoeffXfermode, (rec, mode, procSIMD)); |
| 999 } | 1016 } |
| 1000 return NULL; | 1017 return NULL; |
| 1001 } | 1018 } |
| 1002 | 1019 |
| 1003 SkXfermodeProc SkPlatformXfermodeProcFactory_impl_neon(SkXfermode::Mode mode) { | 1020 SkXfermodeProc SkPlatformXfermodeProcFactory_impl_neon(SkXfermode::Mode mode) { |
| 1004 return gNEONXfermodeProcs1[mode]; | 1021 return gNEONXfermodeProcs1[mode]; |
| 1005 } | 1022 } |
| OLD | NEW |