Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(2)

Unified Diff: src/opts/SkBlitRow_opts_arm_neon.cpp

Issue 346843003: ARM Skia NEON patches - 40 - arm64: S32A_D565_Opaque (Closed) Base URL: https://skia.googlesource.com/skia.git@master
Patch Set: Created 6 years, 6 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View side-by-side diff with in-line comments
Download patch
« no previous file with comments | « no previous file | no next file » | no next file with comments »
Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
Index: src/opts/SkBlitRow_opts_arm_neon.cpp
diff --git a/src/opts/SkBlitRow_opts_arm_neon.cpp b/src/opts/SkBlitRow_opts_arm_neon.cpp
index 01a6a2aa74593e0b25c7b90367bb9c3041790b60..a8a0535baacf59b30f5b6ac032f0dabb45a046ba 100644
--- a/src/opts/SkBlitRow_opts_arm_neon.cpp
+++ b/src/opts/SkBlitRow_opts_arm_neon.cpp
@@ -365,7 +365,98 @@ void S32A_D565_Opaque_neon(uint16_t* SK_RESTRICT dst,
);
}
}
-#endif
+
+#else // #ifdef SK_CPU_ARM32
+
+void S32A_D565_Opaque_neon(uint16_t* SK_RESTRICT dst,
+ const SkPMColor* SK_RESTRICT src, int count,
+ U8CPU alpha, int /*x*/, int /*y*/) {
+ SkASSERT(255 == alpha);
+
+ if (count >= 16) {
+ asm (
+ "movi v4.8h, #0x80 \t\n"
+
+ "1: \t\n"
+ "sub %[count], %[count], #16 \t\n"
+ "ld1 {v16.8h-v17.8h}, [%[dst]] \t\n"
+ "ld4 {v0.16b-v3.16b}, [%[src]], #64 \t\n"
+ "prfm pldl1keep, [%[src],#512] \t\n"
+ "prfm pldl1keep, [%[dst],#256] \t\n"
+ "ushr v20.8h, v17.8h, #5 \t\n"
+ "ushr v31.8h, v16.8h, #5 \t\n"
+ "xtn v6.8b, v31.8h \t\n"
+ "xtn2 v6.16b, v20.8h \t\n"
+ "ushr v20.8h, v17.8h, #11 \t\n"
+ "shl v19.16b, v6.16b, #2 \t\n"
+ "ushr v31.8h, v16.8h, #11 \t\n"
+ "xtn v22.8b, v31.8h \t\n"
+ "xtn2 v22.16b, v20.8h \t\n"
+ "shl v18.16b, v22.16b, #3 \t\n"
+ "mvn v3.16b, v3.16b \t\n"
+ "xtn v16.8b, v16.8h \t\n"
+ "mov v7.16b, v4.16b \t\n"
+ "xtn2 v16.16b, v17.8h \t\n"
+ "umlal v7.8h, v3.8b, v19.8b \t\n"
+ "shl v16.16b, v16.16b, #3 \t\n"
+ "mov v22.16b, v4.16b \t\n"
+ "ushr v24.8h, v7.8h, #6 \t\n"
+ "umlal v22.8h, v3.8b, v18.8b \t\n"
+ "ushr v20.8h, v22.8h, #5 \t\n"
+ "addhn v20.8b, v22.8h, v20.8h \t\n"
+ "cmp %[count], #16 \t\n"
+ "mov v6.16b, v4.16b \t\n"
+ "mov v5.16b, v4.16b \t\n"
+ "umlal v6.8h, v3.8b, v16.8b \t\n"
+ "umlal2 v5.8h, v3.16b, v19.16b \t\n"
+ "mov v17.16b, v4.16b \t\n"
+ "ushr v19.8h, v6.8h, #5 \t\n"
+ "umlal2 v17.8h, v3.16b, v18.16b \t\n"
+ "addhn v7.8b, v7.8h, v24.8h \t\n"
+ "ushr v18.8h, v5.8h, #6 \t\n"
+ "ushr v21.8h, v17.8h, #5 \t\n"
+ "addhn2 v7.16b, v5.8h, v18.8h \t\n"
+ "addhn2 v20.16b, v17.8h, v21.8h \t\n"
+ "mov v22.16b, v4.16b \t\n"
+ "addhn v6.8b, v6.8h, v19.8h \t\n"
+ "umlal2 v22.8h, v3.16b, v16.16b \t\n"
+ "ushr v5.8h, v22.8h, #5 \t\n"
+ "addhn2 v6.16b, v22.8h, v5.8h \t\n"
+ "uqadd v7.16b, v1.16b, v7.16b \t\n"
+ "uqadd v20.16b, v2.16b, v20.16b \t\n"
+ "uqadd v6.16b, v0.16b, v6.16b \t\n"
+ "shll v22.8h, v20.8b, #8 \t\n"
+ "shll v5.8h, v7.8b, #8 \t\n"
+ "sri v22.8h, v5.8h, #5 \t\n"
+ "shll v17.8h, v6.8b, #8 \t\n"
+ "shll2 v23.8h, v20.16b, #8 \t\n"
+ "shll2 v7.8h, v7.16b, #8 \t\n"
+ "sri v22.8h, v17.8h, #11 \t\n"
+ "sri v23.8h, v7.8h, #5 \t\n"
+ "shll2 v6.8h, v6.16b, #8 \t\n"
+ "st1 {v22.8h}, [%[dst]], #16 \t\n"
+ "sri v23.8h, v6.8h, #11 \t\n"
+ "st1 {v23.8h}, [%[dst]], #16 \t\n"
+ "b.ge 1b \t\n"
+ : [dst] "+&r" (dst), [src] "+&r" (src), [count] "+&r" (count)
+ :: "cc", "memory", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7",
+ "v16", "v17", "v18", "v19", "v20", "v21", "v22", "v23", "v24",
+ "v31"
+ );
+ }
+ // Leftovers
+ if (count > 0) {
+ do {
+ SkPMColor c = *src++;
+ SkPMColorAssert(c);
+ if (c) {
+ *dst = SkSrcOver32To16(c, *dst);
+ }
+ dst += 1;
+ } while (--count != 0);
+ }
+}
+#endif // #ifdef SK_CPU_ARM32
static inline uint16x8_t SkDiv255Round_neon8(uint16x8_t prod) {
prod += vdupq_n_u16(128);
@@ -1552,11 +1643,7 @@ const SkBlitRow::Proc sk_blitrow_platform_565_procs_arm_neon[] = {
// no dither
S32_D565_Opaque_neon,
S32_D565_Blend_neon,
-#ifdef SK_CPU_ARM32
S32A_D565_Opaque_neon,
-#else
- NULL,
-#endif
S32A_D565_Blend_neon,
// dither
« no previous file with comments | « no previous file | no next file » | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698