| Index: src/core/SkBlitRow_D32.cpp
|
| diff --git a/src/core/SkBlitRow_D32.cpp b/src/core/SkBlitRow_D32.cpp
|
| index 509eeeb1a060bc5f5a1aadf3a87492a943c14929..ac01e427bfb760a8fc3d6d77244604fb988ba921 100644
|
| --- a/src/core/SkBlitRow_D32.cpp
|
| +++ b/src/core/SkBlitRow_D32.cpp
|
| @@ -140,27 +140,37 @@ SkBlitRow::Proc32 SkBlitRow::ColorProcFactory() {
|
| return proc;
|
| }
|
|
|
| +#define SK_SUPPORT_LEGACY_COLOR32_MATHx
|
| +
|
| +// Color32 and its SIMD specializations use the blend_256_round_alt algorithm
|
| +// from tests/BlendTest.cpp. It's not quite perfect, but it's never wrong in the
|
| +// interesting edge cases, and it's quite a bit faster than blend_perfect.
|
| +//
|
| +// blend_256_round_alt is our currently blessed algorithm. Please use it or an analogous one.
|
| void SkBlitRow::Color32(SkPMColor* SK_RESTRICT dst,
|
| const SkPMColor* SK_RESTRICT src,
|
| int count, SkPMColor color) {
|
| - if (count > 0) {
|
| - if (0 == color) {
|
| - if (src != dst) {
|
| - memcpy(dst, src, count * sizeof(SkPMColor));
|
| - }
|
| - return;
|
| - }
|
| - unsigned colorA = SkGetPackedA32(color);
|
| - if (255 == colorA) {
|
| - sk_memset32(dst, color, count);
|
| - } else {
|
| - unsigned scale = 256 - SkAlpha255To256(colorA);
|
| - do {
|
| - *dst = color + SkAlphaMulQ(*src, scale);
|
| - src += 1;
|
| - dst += 1;
|
| - } while (--count);
|
| - }
|
| + switch (SkGetPackedA32(color)) {
|
| + case 0: memmove(dst, src, count * sizeof(SkPMColor)); return;
|
| + case 255: sk_memset32(dst, color, count); return;
|
| + }
|
| +
|
| + unsigned invA = 255 - SkGetPackedA32(color);
|
| +#ifdef SK_SUPPORT_LEGACY_COLOR32_MATH // blend_256_plus1_trunc, busted
|
| + unsigned round = 0;
|
| +#else // blend_256_round_alt, good
|
| + invA += invA >> 7;
|
| + unsigned round = (128 << 16) + (128 << 0);
|
| +#endif
|
| +
|
| + while (count --> 0) {
|
| + // Our math is 16-bit, so we can do a little bit of SIMD in 32-bit registers.
|
| + const uint32_t mask = 0x00FF00FF;
|
| + uint32_t rb = (((*src >> 0) & mask) * invA + round) >> 8, // _r_b
|
| + ag = (((*src >> 8) & mask) * invA + round) >> 0; // a_g_
|
| + *dst = color + ((rb & mask) | (ag & ~mask));
|
| + src++;
|
| + dst++;
|
| }
|
| }
|
|
|
|
|