Index: src/core/SkBlitRow_D32.cpp |
diff --git a/src/core/SkBlitRow_D32.cpp b/src/core/SkBlitRow_D32.cpp |
index 509eeeb1a060bc5f5a1aadf3a87492a943c14929..ac01e427bfb760a8fc3d6d77244604fb988ba921 100644 |
--- a/src/core/SkBlitRow_D32.cpp |
+++ b/src/core/SkBlitRow_D32.cpp |
@@ -140,27 +140,37 @@ SkBlitRow::Proc32 SkBlitRow::ColorProcFactory() { |
return proc; |
} |
+#define SK_SUPPORT_LEGACY_COLOR32_MATHx |
+ |
+// Color32 and its SIMD specializations use the blend_256_round_alt algorithm |
+// from tests/BlendTest.cpp. It's not quite perfect, but it's never wrong in the |
+// interesting edge cases, and it's quite a bit faster than blend_perfect. |
+// |
+// blend_256_round_alt is our currently blessed algorithm. Please use it or an analogous one. |
void SkBlitRow::Color32(SkPMColor* SK_RESTRICT dst, |
const SkPMColor* SK_RESTRICT src, |
int count, SkPMColor color) { |
- if (count > 0) { |
- if (0 == color) { |
- if (src != dst) { |
- memcpy(dst, src, count * sizeof(SkPMColor)); |
- } |
- return; |
- } |
- unsigned colorA = SkGetPackedA32(color); |
- if (255 == colorA) { |
- sk_memset32(dst, color, count); |
- } else { |
- unsigned scale = 256 - SkAlpha255To256(colorA); |
- do { |
- *dst = color + SkAlphaMulQ(*src, scale); |
- src += 1; |
- dst += 1; |
- } while (--count); |
- } |
+ switch (SkGetPackedA32(color)) { |
+ case 0: memmove(dst, src, count * sizeof(SkPMColor)); return; |
+ case 255: sk_memset32(dst, color, count); return; |
+ } |
+ |
+ unsigned invA = 255 - SkGetPackedA32(color); |
+#ifdef SK_SUPPORT_LEGACY_COLOR32_MATH // blend_256_plus1_trunc, busted |
+ unsigned round = 0; |
+#else // blend_256_round_alt, good |
+ invA += invA >> 7; |
+ unsigned round = (128 << 16) + (128 << 0); |
+#endif |
+ |
+ while (count --> 0) { |
+ // Our math is 16-bit, so we can do a little bit of SIMD in 32-bit registers. |
+ const uint32_t mask = 0x00FF00FF; |
+ uint32_t rb = (((*src >> 0) & mask) * invA + round) >> 8, // _r_b |
+ ag = (((*src >> 8) & mask) * invA + round) >> 0; // a_g_ |
+ *dst = color + ((rb & mask) | (ag & ~mask)); |
+ src++; |
+ dst++; |
} |
} |