| Index: src/effects/SkColorMatrixFilter.cpp
|
| diff --git a/src/effects/SkColorMatrixFilter.cpp b/src/effects/SkColorMatrixFilter.cpp
|
| index 9cd904fd63cdc915452d26a3e108af9fc96ddf0c..3bdda61187082036cc43d820bea8d992d916461d 100644
|
| --- a/src/effects/SkColorMatrixFilter.cpp
|
| +++ b/src/effects/SkColorMatrixFilter.cpp
|
| @@ -1,18 +1,40 @@
|
| -
|
| /*
|
| * Copyright 2011 Google Inc.
|
| *
|
| * Use of this source code is governed by a BSD-style license that can be
|
| * found in the LICENSE file.
|
| */
|
| +
|
| #include "SkColorMatrixFilter.h"
|
| #include "SkColorMatrix.h"
|
| #include "SkColorPriv.h"
|
| +#include "SkPMFloat.h"
|
| #include "SkReadBuffer.h"
|
| #include "SkWriteBuffer.h"
|
| #include "SkUnPreMultiply.h"
|
| #include "SkString.h"
|
|
|
| +#define SK_SUPPORT_LEGACY_INT_COLORMATRIX
|
| +
|
| +#define SK_PMORDER_INDEX_A (SK_A32_SHIFT / 8)
|
| +#define SK_PMORDER_INDEX_R (SK_R32_SHIFT / 8)
|
| +#define SK_PMORDER_INDEX_G (SK_G32_SHIFT / 8)
|
| +#define SK_PMORDER_INDEX_B (SK_B32_SHIFT / 8)
|
| +
|
| +static void transpose_to_pmorder(float dst[20], const float src[20]) {
|
| + const float* srcR = src + 0;
|
| + const float* srcG = src + 5;
|
| + const float* srcB = src + 10;
|
| + const float* srcA = src + 15;
|
| +
|
| + for (int i = 0; i < 20; i += 4) {
|
| + dst[i + SK_PMORDER_INDEX_A] = *srcA++;
|
| + dst[i + SK_PMORDER_INDEX_R] = *srcR++;
|
| + dst[i + SK_PMORDER_INDEX_G] = *srcG++;
|
| + dst[i + SK_PMORDER_INDEX_B] = *srcB++;
|
| + }
|
| +}
|
| +
|
| static int32_t rowmul4(const int32_t array[], unsigned r, unsigned g,
|
| unsigned b, unsigned a) {
|
| return array[0] * r + array[1] * g + array[2] * b + array[3] * a + array[4];
|
| @@ -120,6 +142,8 @@ static void Add16(const SkColorMatrixFilter::State& state,
|
| // src is [20] but some compilers won't accept __restrict__ on anything
|
| // but an raw pointer or reference
|
| void SkColorMatrixFilter::initState(const SkScalar* SK_RESTRICT src) {
|
| + transpose_to_pmorder(fTranspose, src);
|
| +
|
| int32_t* array = fState.fArray;
|
| SkFixed max = 0;
|
| for (int i = 0; i < 20; i++) {
|
| @@ -217,12 +241,41 @@ uint32_t SkColorMatrixFilter::getFlags() const {
|
| return this->INHERITED::getFlags() | fFlags;
|
| }
|
|
|
| -void SkColorMatrixFilter::filterSpan(const SkPMColor src[], int count,
|
| - SkPMColor dst[]) const {
|
| - Proc proc = fProc;
|
| - const State& state = fState;
|
| - int32_t result[4];
|
| +/**
|
| + * Need inv255 = 1 / 255 as a constant, so when we premul a SkPMFloat, we can do this
|
| + *
|
| + * new_red = old_red * alpha * inv255
|
| + *
|
| + * instead of (much slower)
|
| + *
|
| + * new_red = old_red * alpha / 255
|
| + *
|
| + * However, 1.0f/255 comes to (in hex) 0x3B808081, which is slightly bigger than the "actual"
|
| + * value of 0x3B808080(repeat 80)... This slightly too-big value can cause us to compute
|
| + * new_red > alpha, which is a problem (for valid premul). To fix this, we use a
|
| + * hand-computed value of 0x3B808080, 1 ULP smaller. This keeps our colors valid.
|
| + */
|
| +static const float gInv255 = 0.0039215683f; // (1.0f / 255) - ULP == SkBits2Float(0x3B808080)
|
| +
|
| +static Sk4f premul(const Sk4f& x) {
|
| + float scale = SkPMFloat(x).a() * gInv255;
|
| + Sk4f pm = x * Sk4f(scale, scale, scale, 1);
|
| +
|
| +#ifdef SK_DEBUG
|
| + SkPMFloat pmf(pm);
|
| + SkASSERT(pmf.isValid());
|
| +#endif
|
| +
|
| + return pm;
|
| +}
|
| +
|
| +static Sk4f unpremul(const SkPMFloat& pm) {
|
| + float scale = 255 / pm.a(); // candidate for fast/approx invert?
|
| + return Sk4f(pm) * Sk4f(scale, scale, scale, 1);
|
| +}
|
|
|
| +void SkColorMatrixFilter::filterSpan(const SkPMColor src[], int count, SkPMColor dst[]) const {
|
| + Proc proc = fProc;
|
| if (NULL == proc) {
|
| if (src != dst) {
|
| memcpy(dst, src, count * sizeof(SkPMColor));
|
| @@ -230,36 +283,82 @@ void SkColorMatrixFilter::filterSpan(const SkPMColor src[], int count,
|
| return;
|
| }
|
|
|
| - const SkUnPreMultiply::Scale* table = SkUnPreMultiply::GetScaleTable();
|
| +#ifdef SK_SUPPORT_LEGACY_INT_COLORMATRIX
|
| + const bool use_floats = false;
|
| +#else
|
| + const bool use_floats = true;
|
| +#endif
|
| +
|
| + if (use_floats) {
|
| + const Sk4f c0 = Sk4f::Load(fTranspose + 0);
|
| + const Sk4f c1 = Sk4f::Load(fTranspose + 4);
|
| + const Sk4f c2 = Sk4f::Load(fTranspose + 8);
|
| + const Sk4f c3 = Sk4f::Load(fTranspose + 12);
|
| + const Sk4f c4 = Sk4f::Load(fTranspose + 16); // translates
|
|
|
| - for (int i = 0; i < count; i++) {
|
| - SkPMColor c = src[i];
|
| + SkPMColor matrix_translate_pmcolor = SkPMFloat(premul(c4)).clamped();
|
|
|
| - unsigned r = SkGetPackedR32(c);
|
| - unsigned g = SkGetPackedG32(c);
|
| - unsigned b = SkGetPackedB32(c);
|
| - unsigned a = SkGetPackedA32(c);
|
| + for (int i = 0; i < count; i++) {
|
| + const SkPMColor src_c = src[i];
|
| + if (0 == src_c) {
|
| + dst[i] = matrix_translate_pmcolor;
|
| + continue;
|
| + }
|
|
|
| - // need our components to be un-premultiplied
|
| - if (255 != a) {
|
| - SkUnPreMultiply::Scale scale = table[a];
|
| - r = SkUnPreMultiply::ApplyScale(scale, r);
|
| - g = SkUnPreMultiply::ApplyScale(scale, g);
|
| - b = SkUnPreMultiply::ApplyScale(scale, b);
|
| + SkPMFloat srcf(src_c);
|
|
|
| - SkASSERT(r <= 255);
|
| - SkASSERT(g <= 255);
|
| - SkASSERT(b <= 255);
|
| + if (0xFF != SkGetPackedA32(src_c)) {
|
| + srcf = unpremul(srcf);
|
| + }
|
| +
|
| + Sk4f r4 = Sk4f(srcf.r());
|
| + Sk4f g4 = Sk4f(srcf.g());
|
| + Sk4f b4 = Sk4f(srcf.b());
|
| + Sk4f a4 = Sk4f(srcf.a());
|
| +
|
| + // apply matrix
|
| + Sk4f dst4 = c0 * r4 + c1 * g4 + c2 * b4 + c3 * a4 + c4;
|
| +
|
| + // pin before re-premul (convention for color-matrix???)
|
| + dst4 = Sk4f::Max(Sk4f(0), Sk4f::Min(Sk4f(255), dst4));
|
| +
|
| + // re-premul and write
|
| + dst[i] = SkPMFloat(premul(dst4)).get();
|
| }
|
| + } else {
|
| + const State& state = fState;
|
| + int32_t result[4];
|
| + const SkUnPreMultiply::Scale* table = SkUnPreMultiply::GetScaleTable();
|
| +
|
| + for (int i = 0; i < count; i++) {
|
| + SkPMColor c = src[i];
|
| +
|
| + unsigned r = SkGetPackedR32(c);
|
| + unsigned g = SkGetPackedG32(c);
|
| + unsigned b = SkGetPackedB32(c);
|
| + unsigned a = SkGetPackedA32(c);
|
| +
|
| + // need our components to be un-premultiplied
|
| + if (255 != a) {
|
| + SkUnPreMultiply::Scale scale = table[a];
|
| + r = SkUnPreMultiply::ApplyScale(scale, r);
|
| + g = SkUnPreMultiply::ApplyScale(scale, g);
|
| + b = SkUnPreMultiply::ApplyScale(scale, b);
|
| +
|
| + SkASSERT(r <= 255);
|
| + SkASSERT(g <= 255);
|
| + SkASSERT(b <= 255);
|
| + }
|
|
|
| - proc(state, r, g, b, a, result);
|
| + proc(state, r, g, b, a, result);
|
|
|
| - r = pin(result[0], SK_R32_MASK);
|
| - g = pin(result[1], SK_G32_MASK);
|
| - b = pin(result[2], SK_B32_MASK);
|
| - a = pin(result[3], SK_A32_MASK);
|
| - // re-prepremultiply if needed
|
| - dst[i] = SkPremultiplyARGBInline(a, r, g, b);
|
| + r = pin(result[0], SK_R32_MASK);
|
| + g = pin(result[1], SK_G32_MASK);
|
| + b = pin(result[2], SK_B32_MASK);
|
| + a = pin(result[3], SK_A32_MASK);
|
| + // re-prepremultiply if needed
|
| + dst[i] = SkPremultiplyARGBInline(a, r, g, b);
|
| + }
|
| }
|
| }
|
|
|
|
|