src/opts/SkBlitRow_opts_arm_neon.cpp - Issue 181523002: ARM Skia NEON patches - 22 - S32_D565_Blend

Unified Diff: src/opts/SkBlitRow_opts_arm_neon.cpp

Issue 181523002: ARM Skia NEON patches - 22 - S32_D565_Blend (Closed) Base URL: https://skia.googlesource.com/skia.git@master

Patch Set: Rebase Created 6 years, 8 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View side-by-side diff with in-line comments

Download patch

Index: src/opts/SkBlitRow_opts_arm_neon.cpp

diff --git a/src/opts/SkBlitRow_opts_arm_neon.cpp b/src/opts/SkBlitRow_opts_arm_neon.cpp

index 67b42c9e267113f64029542bd74009a5e72544af..950e4f71d124e6c6682082913f5252924bc52e4b 100644

--- a/src/opts/SkBlitRow_opts_arm_neon.cpp

+++ b/src/opts/SkBlitRow_opts_arm_neon.cpp

@@ -52,6 +52,90 @@ void S32_D565_Opaque_neon(uint16_t* SK_RESTRICT dst,

};

}

+void S32_D565_Blend_neon(uint16_t* SK_RESTRICT dst,

+ const SkPMColor* SK_RESTRICT src, int count,

+ U8CPU alpha, int /*x*/, int /*y*/) {

+ SkASSERT(255 > alpha);

+ uint16x8_t vmask_blue, vscale;

+ // prepare constants

+ vscale = vdupq_n_u16(SkAlpha255To256(alpha));

+ vmask_blue = vmovq_n_u16(0x1F);

+ while (count >= 8) {

+ uint16x8_t vdst, vdst_r, vdst_g, vdst_b;

+ uint16x8_t vres_r, vres_g, vres_b;

+ uint8x8_t vsrc_r, vsrc_g, vsrc_b;

+ // Load src

+ {

+ register uint8x8_t d0 asm("d0");

+ register uint8x8_t d1 asm("d1");

+ register uint8x8_t d2 asm("d2");

+ register uint8x8_t d3 asm("d3");

+ asm (

+ "vld4.8 {d0-d3},[%[src]]!"

+ : "=w" (d0), "=w" (d1), "=w" (d2), "=w" (d3), [src] "+&r" (src)

+ :

+ );

+ vsrc_g = d1;

+#if SK_PMCOLOR_BYTE_ORDER(B,G,R,A)

+ vsrc_r = d2; vsrc_b = d0;

+#elif SK_PMCOLOR_BYTE_ORDER(R,G,B,A)

+ vsrc_r = d0; vsrc_b = d2;

+#endif

+ }

+ // Load and unpack dst

+ vdst = vld1q_u16(dst);

+ vdst_g = vshlq_n_u16(vdst, 5); // shift green to top of lanes

+ vdst_b = vandq_u16(vdst, vmask_blue); // extract blue

+ vdst_r = vshrq_n_u16(vdst, 6+5); // extract red

+ vdst_g = vshrq_n_u16(vdst_g, 5+5); // extract green

+ // Shift src to 565

+ vsrc_r = vshr_n_u8(vsrc_r, 3); // shift red to 565 range

+ vsrc_g = vshr_n_u8(vsrc_g, 2); // shift green to 565 range

+ vsrc_b = vshr_n_u8(vsrc_b, 3); // shift blue to 565 range

+ // Scale src - dst

+ vres_r = vmovl_u8(vsrc_r) - vdst_r;

+ vres_g = vmovl_u8(vsrc_g) - vdst_g;

+ vres_b = vmovl_u8(vsrc_b) - vdst_b;

+ vres_r = vshrq_n_u16(vres_r * vscale, 8);

+ vres_g = vshrq_n_u16(vres_g * vscale, 8);

+ vres_b = vshrq_n_u16(vres_b * vscale, 8);

+ vres_r += vdst_r;

+ vres_g += vdst_g;

+ vres_b += vdst_b;

+ // Combine

+ vres_b = vsliq_n_u16(vres_b, vres_g, 5); // insert green into blue

+ vres_b = vsliq_n_u16(vres_b, vres_r, 6+5); // insert red into green/blue

+ // Store

+ vst1q_u16(dst, vres_b);

+ dst += 8;

+ count -= 8;

+ }

+ if (count > 0) {

+ int scale = SkAlpha255To256(alpha);

+ do {

+ SkPMColor c = *src++;

+ SkPMColorAssert(c);

+ uint16_t d = *dst;

+ *dst++ = SkPackRGB16(

+ SkAlphaBlend(SkPacked32ToR16(c), SkGetPackedR16(d), scale),

+ SkAlphaBlend(SkPacked32ToG16(c), SkGetPackedG16(d), scale),

+ SkAlphaBlend(SkPacked32ToB16(c), SkGetPackedB16(d), scale));

+ } while (--count != 0);

+ }

void S32A_D565_Opaque_neon(uint16_t* SK_RESTRICT dst,

const SkPMColor* SK_RESTRICT src, int count,

U8CPU alpha, int /*x*/, int /*y*/) {

@@ -1385,11 +1469,8 @@ void Color32_arm_neon(SkPMColor* dst, const SkPMColor* src, int count,

const SkBlitRow::Proc sk_blitrow_platform_565_procs_arm_neon[] = {

// no dither

- // NOTE: For the S32_D565_Blend function below, we don't have a special

- // version that assumes that each source pixel is opaque. But our

- // S32A is still faster than the default, so use it.

S32_D565_Opaque_neon,

- S32A_D565_Blend_neon, // really S32_D565_Blend

+ S32_D565_Blend_neon,

S32A_D565_Opaque_neon,

S32A_D565_Blend_neon,

« no previous file with comments | « expectations/gm/ignored-tests.txt ('k') | no next file » | no next file with comments »