Index: src/opts/SkColorXform_opts.h |
diff --git a/src/opts/SkColorXform_opts.h b/src/opts/SkColorXform_opts.h |
index da0c9010f23c7d58480f14240e3e2bb87f06b1aa..9680ce1e33dd97137e99a64d3fb9dc0582b235af 100644 |
--- a/src/opts/SkColorXform_opts.h |
+++ b/src/opts/SkColorXform_opts.h |
@@ -167,13 +167,41 @@ static __m128 linear_to_2dot2(__m128 x) { |
return _mm_mul_ps(_mm_mul_ps(_mm_mul_ps(scale, _mm_rcp_ps(x2)), x32), _mm_rcp_ps(x64)); |
} |
+static __m128 if_then_else(__m128 mask, __m128 a, __m128 b) { |
+#if SK_CPU_SSE_LEVEL >= SK_CPU_SSE_LEVEL_SSE41 |
+ return _mm_blendv_ps(b, a, mask); |
+#else |
+ return _mm_or_ps(_mm_and_ps(a, mask), _mm_andnot_ps(b, mask)); |
mtklein
2016/06/17 17:19:40
and(mask, a) or andnot(mask, b)
msarett
2016/06/17 20:10:11
Let's just use Sk4f haha :).
|
+#endif |
+} |
+ |
+// Below is a good approximation of the sRGB gamma curve (within 1 when scaled to 8-bit pixels). |
+// For 0.00000f <= x < 0.00349f, 12.92 * x |
+// For 0.00349f <= x <= 1.00000f, 0.679*(x.^0.5) + 0.423*x.^(0.25) - 0.101 |
+// Note that the intersection was selected to be a point where both functions produce the |
+// same pixel value when rounded. |
+static __m128 linear_to_srgb(__m128 x) { |
+ __m128 rsqrt = _mm_rsqrt_ps(x); |
+ __m128 sqrt = _mm_rcp_ps(rsqrt); |
+ __m128 ftrt = _mm_rsqrt_ps(rsqrt); |
+ |
+ __m128 hi = _mm_add_ps(_mm_add_ps( _mm_set1_ps(-0.101115084998961f * 255.0f), |
+ _mm_mul_ps(sqrt, _mm_set1_ps(+0.678513029959381f * 255.0f))), |
+ _mm_mul_ps(ftrt, _mm_set1_ps(+0.422602055039580f * 255.0f))); |
+ |
+ __m128 lo = _mm_mul_ps(x, _mm_set1_ps(12.92f * 255.0f)); |
+ |
+ __m128 mask = _mm_cmplt_ps(x, _mm_set1_ps(0.00349f)); |
+ return if_then_else(mask, lo, hi); |
+} |
+ |
static __m128 clamp_0_to_255(__m128 x) { |
// The order of the arguments is important here. We want to make sure that NaN |
// clamps to zero. Note that max(NaN, 0) = 0, while max(0, NaN) = NaN. |
return _mm_min_ps(_mm_max_ps(x, _mm_setzero_ps()), _mm_set1_ps(255.0f)); |
} |
-template <const float (&linear_from_curve)[256]> |
+template <const float (&linear_from_curve)[256], __m128 (*linear_to_curve)(__m128)> |
static void color_xform_RGB1(uint32_t* dst, const uint32_t* src, int len, |
const float matrix[16]) { |
// Load transformation matrix. |
@@ -228,9 +256,9 @@ static void color_xform_RGB1(uint32_t* dst, const uint32_t* src, int len, |
dstBlues = _mm_add_ps(dstBlues, _mm_mul_ps(blues, bZ)); |
// Convert to dst gamma. |
- dstReds = linear_to_2dot2(dstReds); |
- dstGreens = linear_to_2dot2(dstGreens); |
- dstBlues = linear_to_2dot2(dstBlues); |
+ dstReds = linear_to_curve(dstReds); |
+ dstGreens = linear_to_curve(dstGreens); |
+ dstBlues = linear_to_curve(dstBlues); |
// Clamp floats. |
dstReds = clamp_0_to_255(dstReds); |
@@ -261,7 +289,7 @@ static void color_xform_RGB1(uint32_t* dst, const uint32_t* src, int len, |
dstPixel = _mm_add_ps(dstPixel, _mm_mul_ps(b, rZgZbZ)); |
// Convert to dst gamma. |
- dstPixel = linear_to_2dot2(dstPixel); |
+ dstPixel = linear_to_curve(dstPixel); |
// Clamp floats to 0-255 range. |
dstPixel = clamp_0_to_255(dstPixel); |
@@ -280,6 +308,18 @@ static void color_xform_RGB1(uint32_t* dst, const uint32_t* src, int len, |
#else |
+static float linear_to_2dot2(float v) { |
+ return powf(v, (1.0f / 2.2f)) * 255.0f; |
+} |
+ |
+static void linear_to_srgb(float v) { |
+ if (v < 0031308f) { |
+ return (12.92f * v) * 255.0f; |
+ } else { |
+ return ((1.055f * powf(v, (1.0f / 2.4f))) - 0.055f) * 255.0f; |
+ } |
+} |
+ |
static uint8_t clamp_float_to_byte(float v) { |
// The ordering of the logic is a little strange here in order |
// to make sure we convert NaNs to 0. |
@@ -292,7 +332,7 @@ static uint8_t clamp_float_to_byte(float v) { |
} |
} |
-template <const float (&linear_from_curve)[256]> |
+template <const float (&linear_from_curve)[256], float(*linear_to_curve)(float)> |
static void color_xform_RGB1(uint32_t* dst, const uint32_t* src, int len, |
const float matrix[16]) { |
while (len-- > 0) { |
@@ -313,9 +353,9 @@ static void color_xform_RGB1(uint32_t* dst, const uint32_t* src, int len, |
// Convert to dst gamma. |
// Note: pow is really, really slow. We will suffer when SSE2 is not supported. |
- dstFloats[0] = powf(dstFloats[0], (1/2.2f)) * 255.0f; |
- dstFloats[1] = powf(dstFloats[1], (1/2.2f)) * 255.0f; |
- dstFloats[2] = powf(dstFloats[2], (1/2.2f)) * 255.0f; |
+ dstFloats[0] = linear_to_curve(dstFloats[0]); |
+ dstFloats[1] = linear_to_curve(dstFloats[1]); |
+ dstFloats[2] = linear_to_curve(dstFloats[2]); |
*dst = (0xFF << 24) | |
(clamp_float_to_byte(dstFloats[2]) << 16) | |
@@ -331,12 +371,22 @@ static void color_xform_RGB1(uint32_t* dst, const uint32_t* src, int len, |
static void color_xform_RGB1_srgb_to_2dot2(uint32_t* dst, const uint32_t* src, int len, |
const float matrix[16]) { |
- color_xform_RGB1<linear_from_srgb>(dst, src, len, matrix); |
+ color_xform_RGB1<linear_from_srgb, linear_to_2dot2>(dst, src, len, matrix); |
} |
static void color_xform_RGB1_2dot2_to_2dot2(uint32_t* dst, const uint32_t* src, int len, |
const float matrix[16]) { |
- color_xform_RGB1<linear_from_2dot2>(dst, src, len, matrix); |
+ color_xform_RGB1<linear_from_2dot2, linear_to_2dot2>(dst, src, len, matrix); |
+} |
+ |
+static void color_xform_RGB1_srgb_to_srgb(uint32_t* dst, const uint32_t* src, int len, |
+ const float matrix[16]) { |
+ color_xform_RGB1<linear_from_srgb, linear_to_srgb>(dst, src, len, matrix); |
+} |
+ |
+static void color_xform_RGB1_2dot2_to_srgb(uint32_t* dst, const uint32_t* src, int len, |
+ const float matrix[16]) { |
+ color_xform_RGB1<linear_from_2dot2, linear_to_srgb>(dst, src, len, matrix); |
} |
} |