src/opts/SkXfermode_opts_arm_neon.cpp - Issue 350343002: ARM Skia NEON patches - 41 - arm64: SkXfermode::xfer32

Keyboard Shortcuts

	File
u :	up to issue
j / k :	jump to file after / before current file
J / K :	jump to next file with a comment after / before current file
	Side-by-side diff
i :	toggle intra-line diffs
e :	expand all comments
c :	collapse all comments
s :	toggle showing all comments
n / p :	next / previous diff chunk or comment
N / P :	next / previous comment
<Up> / <Down> :	next / previous line

	Issue
u :	up to list of issues
j / k :	jump to patch after / before current patch
o / <Enter> :	open current patch in side-by-side view
i :	open current patch in unified diff view

	Issue List
j / k :	jump to issue after / before current issue
o / <Enter> :	open current issue

Unified Diff: src/opts/SkXfermode_opts_arm_neon.cpp

Issue 350343002: ARM Skia NEON patches - 41 - arm64: SkXfermode::xfer32 (Closed) Base URL: https://skia.googlesource.com/skia.git@master

Patch Set: Add a comment Created 6 years, 6 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View side-by-side diff with in-line comments

Download patch

Index: src/opts/SkXfermode_opts_arm_neon.cpp

diff --git a/src/opts/SkXfermode_opts_arm_neon.cpp b/src/opts/SkXfermode_opts_arm_neon.cpp

index 70e92af66bc548f9f42c0ecc70b8f8513794d096..88c179d9e827601fd72002d4dbb84167e4c84ecb 100644

--- a/src/opts/SkXfermode_opts_arm_neon.cpp

+++ b/src/opts/SkXfermode_opts_arm_neon.cpp

@@ -748,8 +748,9 @@ SkNEONProcCoeffXfermode::SkNEONProcCoeffXfermode(SkReadBuffer& buffer)

fProcSIMD = reinterpret_cast<void*>(gNEONXfermodeProcs[this->getMode()]);

}

-void SkNEONProcCoeffXfermode::xfer32(SkPMColor dst[], const SkPMColor src[],

- int count, const SkAlpha aa[]) const {

+void SkNEONProcCoeffXfermode::xfer32(SkPMColor* SK_RESTRICT dst,

+ const SkPMColor* SK_RESTRICT src, int count,

+ const SkAlpha* SK_RESTRICT aa) const {

SkASSERT(dst && src && count >= 0);

SkXfermodeProc proc = this->getProc();

@@ -758,13 +759,16 @@ void SkNEONProcCoeffXfermode::xfer32(SkPMColor dst[], const SkPMColor src[],

if (NULL == aa) {

// Unrolled NEON code

+ // We'd like to just do this (modulo a few casts):

+ // vst4_u8(dst, procSIMD(vld4_u8(src), vld4_u8(dst)));

+ // src += 8;

+ // dst += 8;

+ // but that tends to generate miserable code. Here are a bunch of faster

+ // workarounds for different architectures and compilers.

while (count >= 8) {

- uint8x8x4_t vsrc, vdst, vres;

-#ifdef SK_CPU_ARM64

- vsrc = vld4_u8((uint8_t*)src);

- vdst = vld4_u8((uint8_t*)dst);

-#else

+#ifdef SK_CPU_ARM32

+ uint8x8x4_t vsrc, vdst, vres;

#if (__GNUC__ > 4) || ((__GNUC__ == 4) && (__GNUC_MINOR__ > 6))

asm volatile (

"vld4.u8 %h[vsrc], [%[src]]! \t\n"

@@ -797,17 +801,36 @@ void SkNEONProcCoeffXfermode::xfer32(SkPMColor dst[], const SkPMColor src[],

vsrc.val[2] = d2; vdst.val[2] = d6;

vsrc.val[3] = d3; vdst.val[3] = d7;

#endif

-#endif // #ifdef SK_CPU_ARM64

vres = procSIMD(vsrc, vdst);

vst4_u8((uint8_t*)dst, vres);

- count -= 8;

dst += 8;

-#ifdef SK_CPU_ARM64

- src += 8;

-#endif

+#else // #ifdef SK_CPU_ARM32

+ asm volatile (

+ "ld4 {v0.8b - v3.8b}, [%[src]], #32 \t\n"

+ "ld4 {v4.8b - v7.8b}, [%[dst]] \t\n"

+ "blr %[proc] \t\n"

+ "st4 {v0.8b - v3.8b}, [%[dst]], #32 \t\n"

+ : [src] "+&r" (src), [dst] "+&r" (dst)

+ : [proc] "r" (procSIMD)

+ : "cc", "memory",

+ /* We don't know what proc is going to clobber so we must

+ * add everything that is not callee-saved.

+ */

+ "x0", "x1", "x2", "x3", "x4", "x5", "x6", "x7", "x8", "x9",

+ "x10", "x11", "x12", "x13", "x14", "x15", "x16", "x17", "x18",

+ "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v16", "v17",

+ "v18", "v19", "v20", "v21", "v22", "v23", "v24", "v25", "v26",

+ "v27", "v28", "v29", "v30", "v31"

+ );

+#endif // #ifdef SK_CPU_ARM32

+ count -= 8;

}

// Leftovers

for (int i = 0; i < count; i++) {

« no previous file with comments | « no previous file | no next file » | no next file with comments »