Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(40)

Side by Side Diff: src/opts/SkXfermode_opts_arm_neon.cpp

Issue 350343002: ARM Skia NEON patches - 41 - arm64: SkXfermode::xfer32 (Closed) Base URL: https://skia.googlesource.com/skia.git@master
Patch Set: Created 6 years, 6 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
« no previous file with comments | « no previous file | no next file » | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 #include "SkXfermode.h" 1 #include "SkXfermode.h"
2 #include "SkXfermode_proccoeff.h" 2 #include "SkXfermode_proccoeff.h"
3 #include "SkColorPriv.h" 3 #include "SkColorPriv.h"
4 4
5 #include <arm_neon.h> 5 #include <arm_neon.h>
6 #include "SkColor_opts_neon.h" 6 #include "SkColor_opts_neon.h"
7 #include "SkXfermode_opts_arm_neon.h" 7 #include "SkXfermode_opts_arm_neon.h"
8 8
9 #define SkAlphaMulAlpha(a, b) SkMulDiv255Round(a, b) 9 #define SkAlphaMulAlpha(a, b) SkMulDiv255Round(a, b)
10 10
(...skipping 730 matching lines...) Expand 10 before | Expand all | Expand 10 after
741 741
742 typedef uint8x8x4_t (*SkXfermodeProcSIMD)(uint8x8x4_t src, uint8x8x4_t dst); 742 typedef uint8x8x4_t (*SkXfermodeProcSIMD)(uint8x8x4_t src, uint8x8x4_t dst);
743 743
744 extern SkXfermodeProcSIMD gNEONXfermodeProcs[]; 744 extern SkXfermodeProcSIMD gNEONXfermodeProcs[];
745 745
746 SkNEONProcCoeffXfermode::SkNEONProcCoeffXfermode(SkReadBuffer& buffer) 746 SkNEONProcCoeffXfermode::SkNEONProcCoeffXfermode(SkReadBuffer& buffer)
747 : INHERITED(buffer) { 747 : INHERITED(buffer) {
748 fProcSIMD = reinterpret_cast<void*>(gNEONXfermodeProcs[this->getMode()]); 748 fProcSIMD = reinterpret_cast<void*>(gNEONXfermodeProcs[this->getMode()]);
749 } 749 }
750 750
751 void SkNEONProcCoeffXfermode::xfer32(SkPMColor dst[], const SkPMColor src[], 751 void SkNEONProcCoeffXfermode::xfer32(SkPMColor* SK_RESTRICT dst,
752 int count, const SkAlpha aa[]) const { 752 const SkPMColor* SK_RESTRICT src, int count ,
753 const SkAlpha* SK_RESTRICT aa) const {
753 SkASSERT(dst && src && count >= 0); 754 SkASSERT(dst && src && count >= 0);
754 755
755 SkXfermodeProc proc = this->getProc(); 756 SkXfermodeProc proc = this->getProc();
756 SkXfermodeProcSIMD procSIMD = reinterpret_cast<SkXfermodeProcSIMD>(fProcSIMD ); 757 SkXfermodeProcSIMD procSIMD = reinterpret_cast<SkXfermodeProcSIMD>(fProcSIMD );
757 SkASSERT(procSIMD != NULL); 758 SkASSERT(procSIMD != NULL);
758 759
759 if (NULL == aa) { 760 if (NULL == aa) {
760 // Unrolled NEON code 761 // Unrolled NEON code
761 while (count >= 8) { 762 while (count >= 8) {
mtklein 2014/06/30 13:44:35 Do you think you could tack on some note like this
kevin.petit 2014/06/30 14:04:39 Done.
763
764 #ifdef SK_CPU_ARM32
762 uint8x8x4_t vsrc, vdst, vres; 765 uint8x8x4_t vsrc, vdst, vres;
763
764 #ifdef SK_CPU_ARM64
765 vsrc = vld4_u8((uint8_t*)src);
766 vdst = vld4_u8((uint8_t*)dst);
767 #else
768 #if (__GNUC__ > 4) || ((__GNUC__ == 4) && (__GNUC_MINOR__ > 6)) 766 #if (__GNUC__ > 4) || ((__GNUC__ == 4) && (__GNUC_MINOR__ > 6))
769 asm volatile ( 767 asm volatile (
770 "vld4.u8 %h[vsrc], [%[src]]! \t\n" 768 "vld4.u8 %h[vsrc], [%[src]]! \t\n"
771 "vld4.u8 %h[vdst], [%[dst]] \t\n" 769 "vld4.u8 %h[vdst], [%[dst]] \t\n"
772 : [vsrc] "=w" (vsrc), [vdst] "=w" (vdst), [src] "+&r" (src) 770 : [vsrc] "=w" (vsrc), [vdst] "=w" (vdst), [src] "+&r" (src)
773 : [dst] "r" (dst) 771 : [dst] "r" (dst)
774 : 772 :
775 ); 773 );
776 #else 774 #else
777 register uint8x8_t d0 asm("d0"); 775 register uint8x8_t d0 asm("d0");
(...skipping 12 matching lines...) Expand all
790 "=w" (d4), "=w" (d5), "=w" (d6), "=w" (d7), 788 "=w" (d4), "=w" (d5), "=w" (d6), "=w" (d7),
791 [src] "+&r" (src) 789 [src] "+&r" (src)
792 : [dst] "r" (dst) 790 : [dst] "r" (dst)
793 : 791 :
794 ); 792 );
795 vsrc.val[0] = d0; vdst.val[0] = d4; 793 vsrc.val[0] = d0; vdst.val[0] = d4;
796 vsrc.val[1] = d1; vdst.val[1] = d5; 794 vsrc.val[1] = d1; vdst.val[1] = d5;
797 vsrc.val[2] = d2; vdst.val[2] = d6; 795 vsrc.val[2] = d2; vdst.val[2] = d6;
798 vsrc.val[3] = d3; vdst.val[3] = d7; 796 vsrc.val[3] = d3; vdst.val[3] = d7;
799 #endif 797 #endif
800 #endif // #ifdef SK_CPU_ARM64
801 798
802 vres = procSIMD(vsrc, vdst); 799 vres = procSIMD(vsrc, vdst);
803 800
804 vst4_u8((uint8_t*)dst, vres); 801 vst4_u8((uint8_t*)dst, vres);
805 802
803 dst += 8;
804
805 #else // #ifdef SK_CPU_ARM32
806
807 asm volatile (
mtklein 2014/06/30 13:28:03 Can you take me through this and remind my why it
kevin.petit 2014/06/30 13:38:05 Correct.
808 "ld4 {v0.8b - v3.8b}, [%[src]], #32 \t\n"
809 "ld4 {v4.8b - v7.8b}, [%[dst]] \t\n"
810 "blr %[proc] \t\n"
811 "st4 {v0.8b - v3.8b}, [%[dst]], #32 \t\n"
812 : [src] "+&r" (src), [dst] "+&r" (dst)
813 : [proc] "r" (procSIMD)
814 : "cc", "memory",
815 /* We don't know what proc is going to clobber so we must
816 * add everything that is not callee-saved.
817 */
818 "x0", "x1", "x2", "x3", "x4", "x5", "x6", "x7", "x8", "x9",
819 "x10", "x11", "x12", "x13", "x14", "x15", "x16", "x17", "x18",
820 "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v16", "v17",
821 "v18", "v19", "v20", "v21", "v22", "v23", "v24", "v25", "v26",
822 "v27", "v28", "v29", "v30", "v31"
823 );
824
825 #endif // #ifdef SK_CPU_ARM32
826
806 count -= 8; 827 count -= 8;
807 dst += 8;
808 #ifdef SK_CPU_ARM64
809 src += 8;
810 #endif
811 } 828 }
812 // Leftovers 829 // Leftovers
813 for (int i = 0; i < count; i++) { 830 for (int i = 0; i < count; i++) {
814 dst[i] = proc(src[i], dst[i]); 831 dst[i] = proc(src[i], dst[i]);
815 } 832 }
816 } else { 833 } else {
817 for (int i = count - 1; i >= 0; --i) { 834 for (int i = count - 1; i >= 0; --i) {
818 unsigned a = aa[i]; 835 unsigned a = aa[i];
819 if (0 != a) { 836 if (0 != a) {
820 SkPMColor dstC = dst[i]; 837 SkPMColor dstC = dst[i];
(...skipping 175 matching lines...) Expand 10 before | Expand all | Expand 10 after
996 1013
997 if (procSIMD != NULL) { 1014 if (procSIMD != NULL) {
998 return SkNEW_ARGS(SkNEONProcCoeffXfermode, (rec, mode, procSIMD)); 1015 return SkNEW_ARGS(SkNEONProcCoeffXfermode, (rec, mode, procSIMD));
999 } 1016 }
1000 return NULL; 1017 return NULL;
1001 } 1018 }
1002 1019
1003 SkXfermodeProc SkPlatformXfermodeProcFactory_impl_neon(SkXfermode::Mode mode) { 1020 SkXfermodeProc SkPlatformXfermodeProcFactory_impl_neon(SkXfermode::Mode mode) {
1004 return gNEONXfermodeProcs1[mode]; 1021 return gNEONXfermodeProcs1[mode];
1005 } 1022 }
OLDNEW
« no previous file with comments | « no previous file | no next file » | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698