Index: src/core/SkXfermode4f.cpp |
diff --git a/src/core/SkXfermode4f.cpp b/src/core/SkXfermode4f.cpp |
index 0485a5e6edd4c0c65f884eae88b27beb8c9cd637..583e54535512b1ad3aa2f91511841ddd64593c16 100644 |
--- a/src/core/SkXfermode4f.cpp |
+++ b/src/core/SkXfermode4f.cpp |
@@ -35,6 +35,10 @@ template <DstType D> uint32_t store_dst(const Sk4f& x4) { |
return (D == kSRGB_Dst) ? Sk4f_toS32(x4) : Sk4f_toL32(x4); |
} |
+static Sk4f linear_unit_to_srgb_255(const Sk4f& l4) { |
+ return linear_to_srgb(l4) * Sk4f(255) + Sk4f(0.5f); |
+} |
+ |
/////////////////////////////////////////////////////////////////////////////////////////////////// |
static Sk4f scale_255_round(const SkPM4f& pm4) { |
@@ -257,7 +261,7 @@ template <DstType D> void srcover_n(const SkXfermode::PM4fState& state, uint32_t |
template <DstType D> void srcover_1(const SkXfermode::PM4fState& state, uint32_t dst[], |
mtklein
2016/02/02 18:10:08
At this point I think it'd be clearer to split src
mtklein
2016/02/02 18:10:46
(otherwise lgtm)
|
const SkPM4f& src, int count, const SkAlpha aa[]) { |
Sk4f s4 = Sk4f::Load(src.fVec); |
- Sk4f scale = Sk4f(1 - get_alpha(s4)); |
+ Sk4f dst_scale = Sk4f(1 - get_alpha(s4)); |
if (aa) { |
for (int i = 0; i < count; ++i) { |
@@ -271,15 +275,50 @@ template <DstType D> void srcover_1(const SkXfermode::PM4fState& state, uint32_t |
s4 = scale_by_coverage(s4, a); |
r4 = s4 + d4 * Sk4f(1 - get_alpha(s4)); |
} else { |
- r4 = s4 + d4 * scale; |
+ r4 = s4 + d4 * dst_scale; |
} |
dst[i] = store_dst<D>(r4); |
} |
} else { |
- for (int i = 0; i < count; ++i) { |
- Sk4f d4 = load_dst<D>(dst[i]); |
- Sk4f r4 = s4 + d4 * scale; |
- dst[i] = store_dst<D>(r4); |
+ if (D == kLinear_Dst) { |
+ // Do the blend math in 255-bias, since the dst bytes are already linear |
+ s4 = s4 * Sk4f(255) + Sk4f(0.5f); // +0.5 to pre-bias for rounding |
+ while (count >= 4) { |
+ Sk4f d0 = to_4f(dst[0]); |
+ Sk4f d1 = to_4f(dst[1]); |
+ Sk4f d2 = to_4f(dst[2]); |
+ Sk4f d3 = to_4f(dst[3]); |
+ Sk4f_ToBytes((uint8_t*)dst, |
+ s4 + d0 * dst_scale, |
+ s4 + d1 * dst_scale, |
+ s4 + d2 * dst_scale, |
+ s4 + d3 * dst_scale); |
+ dst += 4; |
+ count -= 4; |
+ } |
+ for (int i = 0; i < count; ++i) { |
+ Sk4f d4 = to_4f(dst[i]); |
+ dst[i] = to_4b(s4 + d4 * dst_scale); |
+ } |
+ } else { |
+ // Do the blend math in unit-bias, since we have to convert to/from dst sRGB |
+ while (count >= 4) { |
+ Sk4f d0 = load_dst<D>(dst[0]); |
+ Sk4f d1 = load_dst<D>(dst[1]); |
+ Sk4f d2 = load_dst<D>(dst[2]); |
+ Sk4f d3 = load_dst<D>(dst[3]); |
+ Sk4f_ToBytes((uint8_t*)dst, |
+ linear_unit_to_srgb_255(s4 + d0 * dst_scale), |
+ linear_unit_to_srgb_255(s4 + d1 * dst_scale), |
+ linear_unit_to_srgb_255(s4 + d2 * dst_scale), |
+ linear_unit_to_srgb_255(s4 + d3 * dst_scale)); |
+ dst += 4; |
+ count -= 4; |
+ } |
+ for (int i = 0; i < count; ++i) { |
+ Sk4f d4 = load_dst<D>(dst[i]); |
+ dst[i] = to_4b(linear_unit_to_srgb_255(s4 + d4 * dst_scale)); |
+ } |
} |
} |
} |