Index: src/opts/SkXfermode_opts_arm_neon.cpp |
diff --git a/src/opts/SkXfermode_opts_arm_neon.cpp b/src/opts/SkXfermode_opts_arm_neon.cpp |
index 7435dd44de102af82ac3af028fb57da1be8ac7a4..6a79b737263a19a4438d82449cc2c73a9229ae24 100644 |
--- a/src/opts/SkXfermode_opts_arm_neon.cpp |
+++ b/src/opts/SkXfermode_opts_arm_neon.cpp |
@@ -93,6 +93,133 @@ static inline uint8x8_t clamp_div255round_simd8_32(int32x4_t val1, int32x4_t val |
} |
//////////////////////////////////////////////////////////////////////////////// |
+// 1 pixel modeprocs |
+//////////////////////////////////////////////////////////////////////////////// |
+ |
+// kSrcATop_Mode, //!< [Da, Sc * Da + (1 - Sa) * Dc] |
+SkPMColor srcatop_modeproc_neon(SkPMColor src, SkPMColor dst) { |
+ unsigned sa = SkGetPackedA32(src); |
+ unsigned da = SkGetPackedA32(dst); |
+ unsigned isa = 255 - sa; |
+ |
+ uint8x8_t vda, visa, vsrc, vdst; |
+ |
+ vda = vdup_n_u8(da); |
+ visa = vdup_n_u8(isa); |
+ |
+ uint16x8_t vsrc_wide, vdst_wide; |
+ vsrc_wide = vmull_u8(vda, vreinterpret_u8_u32(vdup_n_u32(src))); |
+ vdst_wide = vmull_u8(visa, vreinterpret_u8_u32(vdup_n_u32(dst))); |
+ |
+ vsrc_wide += vdupq_n_u16(128); |
+ vsrc_wide += vshrq_n_u16(vsrc_wide, 8); |
+ |
+ vdst_wide += vdupq_n_u16(128); |
+ vdst_wide += vshrq_n_u16(vdst_wide, 8); |
+ |
+ vsrc = vshrn_n_u16(vsrc_wide, 8); |
+ vdst = vshrn_n_u16(vdst_wide, 8); |
+ |
+ vsrc += vdst; |
+ vsrc = vset_lane_u8(da, vsrc, 3); |
+ |
+ return vget_lane_u32(vreinterpret_u32_u8(vsrc), 0); |
+} |
+ |
+// kDstATop_Mode, //!< [Sa, Sa * Dc + Sc * (1 - Da)] |
+SkPMColor dstatop_modeproc_neon(SkPMColor src, SkPMColor dst) { |
+ unsigned sa = SkGetPackedA32(src); |
+ unsigned da = SkGetPackedA32(dst); |
+ unsigned ida = 255 - da; |
+ |
+ uint8x8_t vsa, vida, vsrc, vdst; |
+ |
+ vsa = vdup_n_u8(sa); |
+ vida = vdup_n_u8(ida); |
+ |
+ uint16x8_t vsrc_wide, vdst_wide; |
+ vsrc_wide = vmull_u8(vida, vreinterpret_u8_u32(vdup_n_u32(src))); |
+ vdst_wide = vmull_u8(vsa, vreinterpret_u8_u32(vdup_n_u32(dst))); |
+ |
+ vsrc_wide += vdupq_n_u16(128); |
+ vsrc_wide += vshrq_n_u16(vsrc_wide, 8); |
+ |
+ vdst_wide += vdupq_n_u16(128); |
+ vdst_wide += vshrq_n_u16(vdst_wide, 8); |
+ |
+ vsrc = vshrn_n_u16(vsrc_wide, 8); |
+ vdst = vshrn_n_u16(vdst_wide, 8); |
+ |
+ vsrc += vdst; |
+ vsrc = vset_lane_u8(sa, vsrc, 3); |
+ |
+ return vget_lane_u32(vreinterpret_u32_u8(vsrc), 0); |
+} |
+ |
+// kXor_Mode [Sa + Da - 2 * Sa * Da, Sc * (1 - Da) + (1 - Sa) * Dc] |
+SkPMColor xor_modeproc_neon(SkPMColor src, SkPMColor dst) { |
+ unsigned sa = SkGetPackedA32(src); |
+ unsigned da = SkGetPackedA32(dst); |
+ unsigned ret_alpha = sa + da - (SkAlphaMulAlpha(sa, da) << 1); |
+ unsigned isa = 255 - sa; |
+ unsigned ida = 255 - da; |
+ |
+ uint8x8_t vsrc, vdst, visa, vida; |
+ uint16x8_t vsrc_wide, vdst_wide; |
+ |
+ visa = vdup_n_u8(isa); |
+ vida = vdup_n_u8(ida); |
+ vsrc = vreinterpret_u8_u32(vdup_n_u32(src)); |
+ vdst = vreinterpret_u8_u32(vdup_n_u32(dst)); |
+ |
+ vsrc_wide = vmull_u8(vsrc, vida); |
+ vdst_wide = vmull_u8(vdst, visa); |
+ |
+ vsrc_wide += vdupq_n_u16(128); |
+ vsrc_wide += vshrq_n_u16(vsrc_wide, 8); |
+ |
+ vdst_wide += vdupq_n_u16(128); |
+ vdst_wide += vshrq_n_u16(vdst_wide, 8); |
+ |
+ vsrc = vshrn_n_u16(vsrc_wide, 8); |
+ vdst = vshrn_n_u16(vdst_wide, 8); |
+ |
+ vsrc += vdst; |
+ |
+ vsrc = vset_lane_u8(ret_alpha, vsrc, 3); |
+ |
+ return vget_lane_u32(vreinterpret_u32_u8(vsrc), 0); |
+} |
+ |
+// kPlus_Mode |
+SkPMColor plus_modeproc_neon(SkPMColor src, SkPMColor dst) { |
+ uint8x8_t vsrc, vdst; |
+ vsrc = vreinterpret_u8_u32(vdup_n_u32(src)); |
+ vdst = vreinterpret_u8_u32(vdup_n_u32(dst)); |
+ vsrc = vqadd_u8(vsrc, vdst); |
+ |
+ return vget_lane_u32(vreinterpret_u32_u8(vsrc), 0); |
+} |
+ |
+// kModulate_Mode |
+SkPMColor modulate_modeproc_neon(SkPMColor src, SkPMColor dst) { |
+ uint8x8_t vsrc, vdst, vres; |
+ uint16x8_t vres_wide; |
+ |
+ vsrc = vreinterpret_u8_u32(vdup_n_u32(src)); |
+ vdst = vreinterpret_u8_u32(vdup_n_u32(dst)); |
+ |
+ vres_wide = vmull_u8(vsrc, vdst); |
+ |
+ vres_wide += vdupq_n_u16(128); |
+ vres_wide += vshrq_n_u16(vres_wide, 8); |
+ |
+ vres = vshrn_n_u16(vres_wide, 8); |
+ |
+ return vget_lane_u32(vreinterpret_u32_u8(vres), 0); |
+} |
+ |
+//////////////////////////////////////////////////////////////////////////////// |
// 8 pixels modeprocs |
//////////////////////////////////////////////////////////////////////////////// |
@@ -755,6 +882,45 @@ SK_COMPILE_ASSERT( |
mode_count_arm |
); |
+SkXfermodeProc gNEONXfermodeProcs1[] = { |
+ NULL, // kClear_Mode |
+ NULL, // kSrc_Mode |
+ NULL, // kDst_Mode |
+ NULL, // kSrcOver_Mode |
+ NULL, // kDstOver_Mode |
+ NULL, // kSrcIn_Mode |
+ NULL, // kDstIn_Mode |
+ NULL, // kSrcOut_Mode |
+ NULL, // kDstOut_Mode |
+ srcatop_modeproc_neon, |
+ dstatop_modeproc_neon, |
+ xor_modeproc_neon, |
+ plus_modeproc_neon, |
+ modulate_modeproc_neon, |
+ NULL, // kScreen_Mode |
+ |
+ NULL, // kOverlay_Mode |
+ NULL, // kDarken_Mode |
+ NULL, // kLighten_Mode |
+ NULL, // kColorDodge_Mode |
+ NULL, // kColorBurn_Mode |
+ NULL, // kHardLight_Mode |
+ NULL, // kSoftLight_Mode |
+ NULL, // kDifference_Mode |
+ NULL, // kExclusion_Mode |
+ NULL, // kMultiply_Mode |
+ |
+ NULL, // kHue_Mode |
+ NULL, // kSaturation_Mode |
+ NULL, // kColor_Mode |
+ NULL, // kLuminosity_Mode |
+}; |
+ |
+SK_COMPILE_ASSERT( |
+ SK_ARRAY_COUNT(gNEONXfermodeProcs1) == SkXfermode::kLastMode + 1, |
+ mode1_count_arm |
+); |
+ |
SkProcCoeffXfermode* SkPlatformXfermodeFactory_impl_neon(const ProcCoeff& rec, |
SkXfermode::Mode mode) { |
@@ -765,3 +931,7 @@ SkProcCoeffXfermode* SkPlatformXfermodeFactory_impl_neon(const ProcCoeff& rec, |
} |
return NULL; |
} |
+ |
+SkXfermodeProc SkPlatformXfermodeProcFactory_impl_neon(SkXfermode::Mode mode) { |
+ return gNEONXfermodeProcs1[mode]; |
+} |