Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(540)

Unified Diff: src/opts/SkBlitRow_opts_arm_neon.cpp

Issue 22269003: ARM Skia NEON patches - 23 - S32_D565_Opaque_Dither cleanup/bugfix/speed (Closed) Base URL: https://skia.googlecode.com/svn/trunk
Patch Set: Created 7 years, 4 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View side-by-side diff with in-line comments
Download patch
« no previous file with comments | « no previous file | no next file » | no next file with comments »
Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
Index: src/opts/SkBlitRow_opts_arm_neon.cpp
diff --git a/src/opts/SkBlitRow_opts_arm_neon.cpp b/src/opts/SkBlitRow_opts_arm_neon.cpp
index 7868108378bbab635ac8fa99d4b8fb458db775de..39606fc6677cd620e0ef7354ca910dd1b7cc8962 100644
--- a/src/opts/SkBlitRow_opts_arm_neon.cpp
+++ b/src/opts/SkBlitRow_opts_arm_neon.cpp
@@ -1033,13 +1033,6 @@ void S32A_D565_Opaque_Dither_neon (uint16_t * SK_RESTRICT dst,
///////////////////////////////////////////////////////////////////////////////
-/* 2009/10/27: RBE says "a work in progress"; debugging says ok;
- * speedup untested, but ARM version is 26 insns/iteration and
- * this NEON version is 21 insns/iteration-of-8 (2.62insns/element)
- * which is 10x the native version; that's pure instruction counts,
- * not accounting for any instruction or memory latencies.
- */
-
#undef DEBUG_S32_OPAQUE_DITHER
void S32_D565_Opaque_Dither_neon(uint16_t* SK_RESTRICT dst,
@@ -1065,11 +1058,17 @@ void S32_D565_Opaque_Dither_neon(uint16_t* SK_RESTRICT dst,
register uint8x8_t d2 asm("d2");
register uint8x8_t d3 asm("d3");
- asm ("vld4.8 {d0-d3},[%4] /* r=%P0 g=%P1 b=%P2 a=%P3 */"
- : "=w" (d0), "=w" (d1), "=w" (d2), "=w" (d3)
- : "r" (src)
- );
- sr = d0; sg = d1; sb = d2;
+ asm (
+ "vld4.8 {d0-d3},[%[src]]! /* r=%P0 g=%P1 b=%P2 a=%P3 */"
+ : "=w" (d0), "=w" (d1), "=w" (d2), "=w" (d3), [src] "+&r" (src)
+ :
+ );
+ sg = d1;
+#if SK_PMCOLOR_BYTE_ORDER(B,G,R,A)
+ sr = d2; sb = d0;
+#elif SK_PMCOLOR_BYTE_ORDER(R,G,B,A)
+ sr = d0; sb = d2;
+#endif
}
/* XXX: if we want to prefetch, hide it in the above asm()
* using the gcc __builtin_prefetch(), the prefetch will
@@ -1087,13 +1086,13 @@ void S32_D565_Opaque_Dither_neon(uint16_t* SK_RESTRICT dst,
/* sg = sg - (sg>>6) + d>>1; similar logic for overflows */
sg = vsub_u8(sg, vshr_n_u8(sg, 6));
- dg = vaddl_u8(sg, vshr_n_u8(d,1));
+ dg = vaddl_u8(sg, vshr_n_u8(d, 1));
/* XXX: check that the "d>>1" here is hoisted */
/* pack high bits of each into 565 format (rgb, b is lsb) */
dst8 = vshrq_n_u16(db, 3);
dst8 = vsliq_n_u16(dst8, vshrq_n_u16(dg, 2), 5);
- dst8 = vsliq_n_u16(dst8, vshrq_n_u16(dr,3), 11);
+ dst8 = vsliq_n_u16(dst8, vshrq_n_u16(dr, 3), 11);
/* store it */
vst1q_u16(dst, dst8);
@@ -1104,7 +1103,7 @@ void S32_D565_Opaque_Dither_neon(uint16_t* SK_RESTRICT dst,
int i, myx = x, myy = y;
DITHER_565_SCAN(myy);
for (i=0;i<UNROLL;i++) {
- SkPMColor c = src[i];
+ SkPMColor c = src[i-8];
mtklein 2013/09/12 20:07:36 Can you tack on something like // The '!' in th
kevin.petit.not.used.account 2013/09/13 12:08:27 Done.
unsigned dither = DITHER_VALUE(myx);
uint16_t val = SkDitherRGB32To565(c, dither);
if (val != dst[i]) {
@@ -1117,7 +1116,6 @@ void S32_D565_Opaque_Dither_neon(uint16_t* SK_RESTRICT dst,
#endif
dst += UNROLL;
- src += UNROLL;
mtklein 2013/09/12 20:07:36 Maybe even duplicate the same note here about why
kevin.petit.not.used.account 2013/09/13 12:08:27 Done.
count -= UNROLL;
x += UNROLL; /* probably superfluous */
}
« no previous file with comments | « no previous file | no next file » | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698