Chromium Code Reviews| Index: src/opts/SkColorXform_opts.h |
| diff --git a/src/opts/SkColorXform_opts.h b/src/opts/SkColorXform_opts.h |
| index da0c9010f23c7d58480f14240e3e2bb87f06b1aa..9680ce1e33dd97137e99a64d3fb9dc0582b235af 100644 |
| --- a/src/opts/SkColorXform_opts.h |
| +++ b/src/opts/SkColorXform_opts.h |
| @@ -167,13 +167,41 @@ static __m128 linear_to_2dot2(__m128 x) { |
| return _mm_mul_ps(_mm_mul_ps(_mm_mul_ps(scale, _mm_rcp_ps(x2)), x32), _mm_rcp_ps(x64)); |
| } |
| +static __m128 if_then_else(__m128 mask, __m128 a, __m128 b) { |
| +#if SK_CPU_SSE_LEVEL >= SK_CPU_SSE_LEVEL_SSE41 |
| + return _mm_blendv_ps(b, a, mask); |
| +#else |
| + return _mm_or_ps(_mm_and_ps(a, mask), _mm_andnot_ps(b, mask)); |
|
mtklein
2016/06/17 17:19:40
and(mask, a) or andnot(mask, b)
msarett
2016/06/17 20:10:11
Let's just use Sk4f haha :).
|
| +#endif |
| +} |
| + |
| +// Below is a good approximation of the sRGB gamma curve (within 1 when scaled to 8-bit pixels). |
| +// For 0.00000f <= x < 0.00349f, 12.92 * x |
| +// For 0.00349f <= x <= 1.00000f, 0.679*(x.^0.5) + 0.423*x.^(0.25) - 0.101 |
| +// Note that the intersection was selected to be a point where both functions produce the |
| +// same pixel value when rounded. |
| +static __m128 linear_to_srgb(__m128 x) { |
| + __m128 rsqrt = _mm_rsqrt_ps(x); |
| + __m128 sqrt = _mm_rcp_ps(rsqrt); |
| + __m128 ftrt = _mm_rsqrt_ps(rsqrt); |
| + |
| + __m128 hi = _mm_add_ps(_mm_add_ps( _mm_set1_ps(-0.101115084998961f * 255.0f), |
| + _mm_mul_ps(sqrt, _mm_set1_ps(+0.678513029959381f * 255.0f))), |
| + _mm_mul_ps(ftrt, _mm_set1_ps(+0.422602055039580f * 255.0f))); |
| + |
| + __m128 lo = _mm_mul_ps(x, _mm_set1_ps(12.92f * 255.0f)); |
| + |
| + __m128 mask = _mm_cmplt_ps(x, _mm_set1_ps(0.00349f)); |
| + return if_then_else(mask, lo, hi); |
| +} |
| + |
| static __m128 clamp_0_to_255(__m128 x) { |
| // The order of the arguments is important here. We want to make sure that NaN |
| // clamps to zero. Note that max(NaN, 0) = 0, while max(0, NaN) = NaN. |
| return _mm_min_ps(_mm_max_ps(x, _mm_setzero_ps()), _mm_set1_ps(255.0f)); |
| } |
| -template <const float (&linear_from_curve)[256]> |
| +template <const float (&linear_from_curve)[256], __m128 (*linear_to_curve)(__m128)> |
| static void color_xform_RGB1(uint32_t* dst, const uint32_t* src, int len, |
| const float matrix[16]) { |
| // Load transformation matrix. |
| @@ -228,9 +256,9 @@ static void color_xform_RGB1(uint32_t* dst, const uint32_t* src, int len, |
| dstBlues = _mm_add_ps(dstBlues, _mm_mul_ps(blues, bZ)); |
| // Convert to dst gamma. |
| - dstReds = linear_to_2dot2(dstReds); |
| - dstGreens = linear_to_2dot2(dstGreens); |
| - dstBlues = linear_to_2dot2(dstBlues); |
| + dstReds = linear_to_curve(dstReds); |
| + dstGreens = linear_to_curve(dstGreens); |
| + dstBlues = linear_to_curve(dstBlues); |
| // Clamp floats. |
| dstReds = clamp_0_to_255(dstReds); |
| @@ -261,7 +289,7 @@ static void color_xform_RGB1(uint32_t* dst, const uint32_t* src, int len, |
| dstPixel = _mm_add_ps(dstPixel, _mm_mul_ps(b, rZgZbZ)); |
| // Convert to dst gamma. |
| - dstPixel = linear_to_2dot2(dstPixel); |
| + dstPixel = linear_to_curve(dstPixel); |
| // Clamp floats to 0-255 range. |
| dstPixel = clamp_0_to_255(dstPixel); |
| @@ -280,6 +308,18 @@ static void color_xform_RGB1(uint32_t* dst, const uint32_t* src, int len, |
| #else |
| +static float linear_to_2dot2(float v) { |
| + return powf(v, (1.0f / 2.2f)) * 255.0f; |
| +} |
| + |
| +static void linear_to_srgb(float v) { |
| + if (v < 0031308f) { |
| + return (12.92f * v) * 255.0f; |
| + } else { |
| + return ((1.055f * powf(v, (1.0f / 2.4f))) - 0.055f) * 255.0f; |
| + } |
| +} |
| + |
| static uint8_t clamp_float_to_byte(float v) { |
| // The ordering of the logic is a little strange here in order |
| // to make sure we convert NaNs to 0. |
| @@ -292,7 +332,7 @@ static uint8_t clamp_float_to_byte(float v) { |
| } |
| } |
| -template <const float (&linear_from_curve)[256]> |
| +template <const float (&linear_from_curve)[256], float(*linear_to_curve)(float)> |
| static void color_xform_RGB1(uint32_t* dst, const uint32_t* src, int len, |
| const float matrix[16]) { |
| while (len-- > 0) { |
| @@ -313,9 +353,9 @@ static void color_xform_RGB1(uint32_t* dst, const uint32_t* src, int len, |
| // Convert to dst gamma. |
| // Note: pow is really, really slow. We will suffer when SSE2 is not supported. |
| - dstFloats[0] = powf(dstFloats[0], (1/2.2f)) * 255.0f; |
| - dstFloats[1] = powf(dstFloats[1], (1/2.2f)) * 255.0f; |
| - dstFloats[2] = powf(dstFloats[2], (1/2.2f)) * 255.0f; |
| + dstFloats[0] = linear_to_curve(dstFloats[0]); |
| + dstFloats[1] = linear_to_curve(dstFloats[1]); |
| + dstFloats[2] = linear_to_curve(dstFloats[2]); |
| *dst = (0xFF << 24) | |
| (clamp_float_to_byte(dstFloats[2]) << 16) | |
| @@ -331,12 +371,22 @@ static void color_xform_RGB1(uint32_t* dst, const uint32_t* src, int len, |
| static void color_xform_RGB1_srgb_to_2dot2(uint32_t* dst, const uint32_t* src, int len, |
| const float matrix[16]) { |
| - color_xform_RGB1<linear_from_srgb>(dst, src, len, matrix); |
| + color_xform_RGB1<linear_from_srgb, linear_to_2dot2>(dst, src, len, matrix); |
| } |
| static void color_xform_RGB1_2dot2_to_2dot2(uint32_t* dst, const uint32_t* src, int len, |
| const float matrix[16]) { |
| - color_xform_RGB1<linear_from_2dot2>(dst, src, len, matrix); |
| + color_xform_RGB1<linear_from_2dot2, linear_to_2dot2>(dst, src, len, matrix); |
| +} |
| + |
| +static void color_xform_RGB1_srgb_to_srgb(uint32_t* dst, const uint32_t* src, int len, |
| + const float matrix[16]) { |
| + color_xform_RGB1<linear_from_srgb, linear_to_srgb>(dst, src, len, matrix); |
| +} |
| + |
| +static void color_xform_RGB1_2dot2_to_srgb(uint32_t* dst, const uint32_t* src, int len, |
| + const float matrix[16]) { |
| + color_xform_RGB1<linear_from_2dot2, linear_to_srgb>(dst, src, len, matrix); |
| } |
| } |