Chromium Code Reviews| Index: src/opts/SkColorXform_opts.h |
| diff --git a/src/opts/SkColorXform_opts.h b/src/opts/SkColorXform_opts.h |
| index 2c14c802831e51950b799054ee73e67b5021a3a0..dad9e847287cca7f18126ec8333c59bfea781798 100644 |
| --- a/src/opts/SkColorXform_opts.h |
| +++ b/src/opts/SkColorXform_opts.h |
| @@ -191,26 +191,47 @@ static void color_xform_RGB1(uint32_t* dst, const uint32_t* src, int len, |
| rYgYbY = Sk4f::Load(matrix + 4), |
| rZgZbZ = Sk4f::Load(matrix + 8); |
| - while (len >= 4) { |
| - // Convert to linear. The look-up table has perfect accuracy. |
| - auto reds = Sk4f{linear_from_curve[(src[0] >> 0) & 0xFF], |
| - linear_from_curve[(src[1] >> 0) & 0xFF], |
| - linear_from_curve[(src[2] >> 0) & 0xFF], |
| - linear_from_curve[(src[3] >> 0) & 0xFF]}; |
| - auto greens = Sk4f{linear_from_curve[(src[0] >> 8) & 0xFF], |
| - linear_from_curve[(src[1] >> 8) & 0xFF], |
| - linear_from_curve[(src[2] >> 8) & 0xFF], |
| - linear_from_curve[(src[3] >> 8) & 0xFF]}; |
| - auto blues = Sk4f{linear_from_curve[(src[0] >> 16) & 0xFF], |
| - linear_from_curve[(src[1] >> 16) & 0xFF], |
| - linear_from_curve[(src[2] >> 16) & 0xFF], |
| - linear_from_curve[(src[3] >> 16) & 0xFF]}; |
| + // Load linear floats. Do this once outside the loop. |
| + Sk4f reds, greens, blues; |
| + if (len >= 4) { |
| + reds = Sk4f{linear_from_curve[(src[0] >> 0) & 0xFF], |
|
mtklein
2016/06/22 14:46:54
Let's make this a lambda to share here and in the
msarett
2016/06/22 18:34:55
Nice! I like how this looks.
|
| + linear_from_curve[(src[1] >> 0) & 0xFF], |
| + linear_from_curve[(src[2] >> 0) & 0xFF], |
| + linear_from_curve[(src[3] >> 0) & 0xFF]}; |
| + greens = Sk4f{linear_from_curve[(src[0] >> 8) & 0xFF], |
| + linear_from_curve[(src[1] >> 8) & 0xFF], |
| + linear_from_curve[(src[2] >> 8) & 0xFF], |
| + linear_from_curve[(src[3] >> 8) & 0xFF]}; |
| + blues = Sk4f{linear_from_curve[(src[0] >> 16) & 0xFF], |
| + linear_from_curve[(src[1] >> 16) & 0xFF], |
| + linear_from_curve[(src[2] >> 16) & 0xFF], |
| + linear_from_curve[(src[3] >> 16) & 0xFF]}; |
| + |
| + src += 4; |
| + len -= 4; |
| + } |
| + while (len >= 4) { |
| // Apply the transformation matrix to dst gamut. |
| auto dstReds = rXgXbX[0]*reds + rYgYbY[0]*greens + rZgZbZ[0]*blues, |
| dstGreens = rXgXbX[1]*reds + rYgYbY[1]*greens + rZgZbZ[1]*blues, |
| dstBlues = rXgXbX[2]*reds + rYgYbY[2]*greens + rZgZbZ[2]*blues; |
| + // Load floats for the next iteration. This can happen in parallel with |
| + // the math intensive linear_to_curve conversion. |
| + reds = Sk4f{linear_from_curve[(src[0] >> 0) & 0xFF], |
| + linear_from_curve[(src[1] >> 0) & 0xFF], |
| + linear_from_curve[(src[2] >> 0) & 0xFF], |
| + linear_from_curve[(src[3] >> 0) & 0xFF]}; |
| + greens = Sk4f{linear_from_curve[(src[0] >> 8) & 0xFF], |
| + linear_from_curve[(src[1] >> 8) & 0xFF], |
| + linear_from_curve[(src[2] >> 8) & 0xFF], |
| + linear_from_curve[(src[3] >> 8) & 0xFF]}; |
| + blues = Sk4f{linear_from_curve[(src[0] >> 16) & 0xFF], |
| + linear_from_curve[(src[1] >> 16) & 0xFF], |
| + linear_from_curve[(src[2] >> 16) & 0xFF], |
| + linear_from_curve[(src[3] >> 16) & 0xFF]}; |
| + |
| // Convert to dst gamma. |
| dstReds = linear_to_curve(dstReds); |
| dstGreens = linear_to_curve(dstGreens); |
| @@ -233,6 +254,26 @@ static void color_xform_RGB1(uint32_t* dst, const uint32_t* src, int len, |
| len -= 4; |
| } |
| + // Complete the final set of four pixels. |
| + auto dstReds = rXgXbX[0]*reds + rYgYbY[0]*greens + rZgZbZ[0]*blues, |
| + dstGreens = rXgXbX[1]*reds + rYgYbY[1]*greens + rZgZbZ[1]*blues, |
| + dstBlues = rXgXbX[2]*reds + rYgYbY[2]*greens + rZgZbZ[2]*blues; |
| + |
| + dstReds = linear_to_curve(dstReds); |
| + dstGreens = linear_to_curve(dstGreens); |
| + dstBlues = linear_to_curve(dstBlues); |
| + |
| + dstReds = clamp_0_to_255(dstReds); |
| + dstGreens = clamp_0_to_255(dstGreens); |
| + dstBlues = clamp_0_to_255(dstBlues); |
| + |
| + auto rgba = (Sk4i{(int)0xFF000000} ) |
| + | (SkNx_cast<int>(dstReds) ) |
| + | (SkNx_cast<int>(dstGreens) << 8) |
| + | (SkNx_cast<int>(dstBlues) << 16); |
| + rgba.store(dst); |
| + dst += 4; |
| + |
| while (len > 0) { |
| // Splat r,g,b across a register each. |
| auto r = Sk4f{linear_from_curve[(*src >> 0) & 0xFF]}, |