Index: src/opts/SkColorXform_opts.h |
diff --git a/src/opts/SkColorXform_opts.h b/src/opts/SkColorXform_opts.h |
index b3da55c1fdd758d2b3b7d74ea41b050b5918eb18..d7e3b0842a753116aafbbba82eaa3b70260f67a5 100644 |
--- a/src/opts/SkColorXform_opts.h |
+++ b/src/opts/SkColorXform_opts.h |
@@ -41,8 +41,8 @@ enum DstGamma { |
kLinear_DstGamma, |
}; |
-template <DstGamma kDstGamma, bool kSwapRB> |
-static void color_xform_RGB1(void* dst, const uint32_t* src, int len, |
+template <DstGamma kDstGamma, bool kPremul, bool kSwapRB> |
mtklein
2016/07/29 18:48:57
to think about, bool kPremul -> {Premul, Unpremul,
msarett
2016/07/29 20:07:51
Agreed, that would be better, particularly for F16
|
+static void color_xform_RGBA(void* dst, const uint32_t* src, int len, |
const float* const srcTables[3], const float matrix[16], |
const uint8_t* const dstTables[3]) { |
int kRShift = 0; |
@@ -60,8 +60,9 @@ static void color_xform_RGB1(void* dst, const uint32_t* src, int len, |
rTgTbT = Sk4f::Load(matrix + 12); |
if (len >= 4) { |
- Sk4f reds, greens, blues; |
- auto load_next_4 = [&reds, &greens, &blues, &src, &len, &srcTables] { |
+ Sk4i iAlphas; |
+ Sk4f reds, greens, blues, alphas; |
+ auto load_next_4 = [&reds, &greens, &blues, &iAlphas, &alphas, &src, &len, &srcTables] { |
reds = Sk4f{srcTables[0][(src[0] >> 0) & 0xFF], |
srcTables[0][(src[1] >> 0) & 0xFF], |
srcTables[0][(src[2] >> 0) & 0xFF], |
@@ -74,20 +75,35 @@ static void color_xform_RGB1(void* dst, const uint32_t* src, int len, |
srcTables[2][(src[1] >> 16) & 0xFF], |
srcTables[2][(src[2] >> 16) & 0xFF], |
srcTables[2][(src[3] >> 16) & 0xFF]}; |
+ |
+ Sk4u uAlphas = Sk4u::Load(src) >> 24; |
+ iAlphas = Sk4i::Load(&uAlphas); |
mtklein
2016/07/29 18:48:57
// We'll need iAlphas again later.
msarett
2016/07/29 20:07:51
Done.
|
+ alphas = (1.0f / 255.0f) * SkNx_cast<float>(iAlphas); |
+ |
src += 4; |
len -= 4; |
}; |
- Sk4f dstReds, dstGreens, dstBlues; |
- auto transform_4 = [&reds, &greens, &blues, &dstReds, &dstGreens, &dstBlues, &rXgXbX, |
- &rYgYbY, &rZgZbZ, &rTgTbT] { |
+ Sk4i dstIAlphas; |
mtklein
2016/07/29 18:48:57
Do we really need dstIAlphas and iAlphas? Can we
msarett
2016/07/29 20:07:51
Yeah we need it because we interleave loads and st
|
+ Sk4f dstReds, dstGreens, dstBlues, dstAlphas; |
+ auto transform_4 = [&reds, &greens, &blues, &alphas, &iAlphas, &dstReds, &dstGreens, |
+ &dstBlues, &dstAlphas, &dstIAlphas, &rXgXbX, &rYgYbY, &rZgZbZ, &rTgTbT] |
+ { |
dstReds = rXgXbX[0]*reds + rYgYbY[0]*greens + rZgZbZ[0]*blues + rTgTbT[0]; |
dstGreens = rXgXbX[1]*reds + rYgYbY[1]*greens + rZgZbZ[1]*blues + rTgTbT[1]; |
dstBlues = rXgXbX[2]*reds + rYgYbY[2]*greens + rZgZbZ[2]*blues + rTgTbT[2]; |
+ dstAlphas = alphas; |
+ dstIAlphas = iAlphas; |
+ |
+ if (kPremul) { |
+ dstReds = alphas * dstReds; |
+ dstGreens = alphas * dstGreens; |
+ dstBlues = alphas * dstBlues; |
+ } |
}; |
- auto store_4 = [&dstReds, &dstGreens, &dstBlues, &dst, &dstTables, kRShift, kGShift, |
- kBShift, kAShift] { |
+ auto store_4 = [&dstReds, &dstGreens, &dstBlues, &dstIAlphas, &dstAlphas, &dst, &dstTables, |
+ kRShift, kGShift, kBShift, kAShift] { |
if (kSRGB_DstGamma == kDstGamma || k2Dot2_DstGamma == kDstGamma) { |
Sk4f (*linear_to_curve)(const Sk4f&) = (kSRGB_DstGamma == kDstGamma) ? |
sk_linear_to_srgb_needs_trunc : linear_to_2dot2; |
@@ -105,7 +121,7 @@ static void color_xform_RGB1(void* dst, const uint32_t* src, int len, |
auto rgba = (float_to_int(dstReds) << kRShift) |
| (float_to_int(dstGreens) << kGShift) |
| (float_to_int(dstBlues) << kBShift) |
- | (Sk4i{0xFF} << kAShift); |
+ | (dstIAlphas << kAShift); |
rgba.store((uint32_t*) dst); |
dst = SkTAddOffset<void>(dst, 4 * sizeof(uint32_t)); |
@@ -122,26 +138,26 @@ static void color_xform_RGB1(void* dst, const uint32_t* src, int len, |
dst32[0] = dstTables[0][indicesReds [0]] << kRShift |
| dstTables[1][indicesGreens[0]] << kGShift |
| dstTables[2][indicesBlues [0]] << kBShift |
- | 0xFF << kAShift; |
+ | dstIAlphas[0] << kAShift; |
dst32[1] = dstTables[0][indicesReds [1]] << kRShift |
| dstTables[1][indicesGreens[1]] << kGShift |
| dstTables[2][indicesBlues [1]] << kBShift |
- | 0xFF << kAShift; |
+ | dstIAlphas[1] << kAShift; |
dst32[2] = dstTables[0][indicesReds [2]] << kRShift |
| dstTables[1][indicesGreens[2]] << kGShift |
| dstTables[2][indicesBlues [2]] << kBShift |
- | 0xFF << kAShift; |
+ | dstIAlphas[2] << kAShift; |
dst32[3] = dstTables[0][indicesReds [3]] << kRShift |
| dstTables[1][indicesGreens[3]] << kGShift |
| dstTables[2][indicesBlues [3]] << kBShift |
- | 0xFF << kAShift; |
+ | dstIAlphas[3] << kAShift; |
dst = SkTAddOffset<void>(dst, 4 * sizeof(uint32_t)); |
} else { |
Sk4h_store4(dst, SkFloatToHalf_finite(dstReds), |
SkFloatToHalf_finite(dstGreens), |
SkFloatToHalf_finite(dstBlues), |
- SK_Half1); |
+ SkFloatToHalf_finite(dstAlphas)); |
dst = SkTAddOffset<void>(dst, 4 * sizeof(uint64_t)); |
} |
}; |
@@ -162,7 +178,8 @@ static void color_xform_RGB1(void* dst, const uint32_t* src, int len, |
// Splat r,g,b across a register each. |
auto r = Sk4f{srcTables[0][(*src >> 0) & 0xFF]}, |
g = Sk4f{srcTables[1][(*src >> 8) & 0xFF]}, |
- b = Sk4f{srcTables[2][(*src >> 16) & 0xFF]}; |
+ b = Sk4f{srcTables[2][(*src >> 16) & 0xFF]}, |
+ a = (1.0f / 255.0f) * Sk4f(*src >> 24); |
auto dstPixel = rXgXbX*r + rYgYbY*g + rZgZbZ*b + rTgTbT; |
@@ -172,16 +189,21 @@ static void color_xform_RGB1(void* dst, const uint32_t* src, int len, |
Sk4i (*float_to_int)(const Sk4f&) = (kSRGB_DstGamma == kDstGamma) ? |
Sk4f_trunc : Sk4f_round; |
+ if (kPremul) { |
+ dstPixel = a * dstPixel; |
+ } |
+ |
dstPixel = sk_clamp_0_255(linear_to_curve(dstPixel)); |
uint32_t rgba; |
SkNx_cast<uint8_t>(float_to_int(dstPixel)).store(&rgba); |
- rgba |= 0xFF000000; |
+ uint32_t* dst32 = (uint32_t*) dst; |
+ *dst32 = (*src & 0xFF000000) | (rgba & 0x00FFFFFF); |
+ |
if (kSwapRB) { |
- *((uint32_t*) dst) = SkSwizzle_RB(rgba); |
- } else { |
- *((uint32_t*) dst) = rgba; |
+ *dst32 = SkSwizzle_RB(*dst32); |
} |
+ |
dst = SkTAddOffset<void>(dst, sizeof(uint32_t)); |
} else if (kTable_DstGamma == kDstGamma) { |
Sk4f scaledPixel = Sk4f::Min(Sk4f::Max(1023.0f * dstPixel, 0.0f), 1023.0f); |
@@ -191,14 +213,12 @@ static void color_xform_RGB1(void* dst, const uint32_t* src, int len, |
*((uint32_t*) dst) = dstTables[0][indices[0]] << kRShift |
| dstTables[1][indices[1]] << kGShift |
| dstTables[2][indices[2]] << kBShift |
- | 0xFF << kAShift; |
+ | (*src & 0xFF000000); |
dst = SkTAddOffset<void>(dst, sizeof(uint32_t)); |
} else { |
- uint64_t rgba; |
- SkFloatToHalf_finite(dstPixel).store(&rgba); |
- rgba |= static_cast<uint64_t>(SK_Half1) << 48; |
- *((uint64_t*) dst) = rgba; |
+ dstPixel = Sk4f(dstPixel[0], dstPixel[1], dstPixel[2], a[3]); |
+ SkFloatToHalf_finite(dstPixel).store((uint64_t*) dst); |
dst = SkTAddOffset<void>(dst, sizeof(uint64_t)); |
} |
@@ -207,44 +227,88 @@ static void color_xform_RGB1(void* dst, const uint32_t* src, int len, |
} |
} |
-static void color_xform_RGB1_to_2dot2(uint32_t* dst, const uint32_t* src, int len, |
+static void color_xform_RGBA_to_2dot2(uint32_t* dst, const uint32_t* src, int len, |
const float* const srcTables[3], const float matrix[16]) { |
- color_xform_RGB1<k2Dot2_DstGamma, false>(dst, src, len, srcTables, matrix, nullptr); |
+ color_xform_RGBA<k2Dot2_DstGamma, false, false>(dst, src, len, srcTables, matrix, nullptr); |
} |
-static void color_xform_RGB1_to_srgb(uint32_t* dst, const uint32_t* src, int len, |
+static void color_xform_RGBA_to_srgb(uint32_t* dst, const uint32_t* src, int len, |
const float* const srcTables[3], const float matrix[16]) { |
- color_xform_RGB1<kSRGB_DstGamma, false>(dst, src, len, srcTables, matrix, nullptr); |
+ color_xform_RGBA<kSRGB_DstGamma, false, false>(dst, src, len, srcTables, matrix, nullptr); |
} |
-static void color_xform_RGB1_to_table(uint32_t* dst, const uint32_t* src, int len, |
+static void color_xform_RGBA_to_table(uint32_t* dst, const uint32_t* src, int len, |
const float* const srcTables[3], const float matrix[16], |
const uint8_t* const dstTables[3]) { |
- color_xform_RGB1<kTable_DstGamma, false>(dst, src, len, srcTables, matrix, dstTables); |
+ color_xform_RGBA<kTable_DstGamma, false, false>(dst, src, len, srcTables, matrix, dstTables); |
} |
-static void color_xform_RGB1_to_linear(uint64_t* dst, const uint32_t* src, int len, |
+static void color_xform_RGBA_to_linear(uint64_t* dst, const uint32_t* src, int len, |
const float* const srcTables[3], const float matrix[16]) { |
- color_xform_RGB1<kLinear_DstGamma, false>(dst, src, len, srcTables, matrix, nullptr); |
+ color_xform_RGBA<kLinear_DstGamma, false, false>(dst, src, len, srcTables, matrix, nullptr); |
+} |
+ |
+static void color_xform_RGBA_to_2dot2_swaprb(uint32_t* dst, const uint32_t* src, int len, |
+ const float* const srcTables[3], |
+ const float matrix[16]) { |
+ color_xform_RGBA<k2Dot2_DstGamma, false, true>(dst, src, len, srcTables, matrix, nullptr); |
+} |
+ |
+static void color_xform_RGBA_to_srgb_swaprb(uint32_t* dst, const uint32_t* src, int len, |
+ const float* const srcTables[3], |
+ const float matrix[16]) { |
+ color_xform_RGBA<kSRGB_DstGamma, false, true>(dst, src, len, srcTables, matrix, nullptr); |
+} |
+ |
+static void color_xform_RGBA_to_table_swaprb(uint32_t* dst, const uint32_t* src, int len, |
+ const float* const srcTables[3], |
+ const float matrix[16], |
+ const uint8_t* const dstTables[3]) { |
+ color_xform_RGBA<kTable_DstGamma, false, true>(dst, src, len, srcTables, matrix, dstTables); |
} |
-static void color_xform_RGB1_to_2dot2_swaprb(uint32_t* dst, const uint32_t* src, int len, |
+static void color_xform_RGBA_to_2dot2_premul(uint32_t* dst, const uint32_t* src, int len, |
const float* const srcTables[3], |
const float matrix[16]) { |
- color_xform_RGB1<k2Dot2_DstGamma, true>(dst, src, len, srcTables, matrix, nullptr); |
+ color_xform_RGBA<k2Dot2_DstGamma, true, false>(dst, src, len, srcTables, matrix, nullptr); |
} |
-static void color_xform_RGB1_to_srgb_swaprb(uint32_t* dst, const uint32_t* src, int len, |
+static void color_xform_RGBA_to_srgb_premul(uint32_t* dst, const uint32_t* src, int len, |
const float* const srcTables[3], |
const float matrix[16]) { |
- color_xform_RGB1<kSRGB_DstGamma, true>(dst, src, len, srcTables, matrix, nullptr); |
+ color_xform_RGBA<kSRGB_DstGamma, true, false>(dst, src, len, srcTables, matrix, nullptr); |
} |
-static void color_xform_RGB1_to_table_swaprb(uint32_t* dst, const uint32_t* src, int len, |
+static void color_xform_RGBA_to_table_premul(uint32_t* dst, const uint32_t* src, int len, |
const float* const srcTables[3], |
const float matrix[16], |
const uint8_t* const dstTables[3]) { |
- color_xform_RGB1<kTable_DstGamma, true>(dst, src, len, srcTables, matrix, dstTables); |
+ color_xform_RGBA<kTable_DstGamma, true, false>(dst, src, len, srcTables, matrix, dstTables); |
+} |
+ |
+static void color_xform_RGBA_to_linear_premul(uint64_t* dst, const uint32_t* src, int len, |
+ const float* const srcTables[3], |
+ const float matrix[16]) { |
+ color_xform_RGBA<kLinear_DstGamma, true, false>(dst, src, len, srcTables, matrix, nullptr); |
+} |
+ |
+static void color_xform_RGBA_to_2dot2_premul_swaprb(uint32_t* dst, const uint32_t* src, int len, |
+ const float* const srcTables[3], |
+ const float matrix[16]) { |
+ color_xform_RGBA<k2Dot2_DstGamma, true, true>(dst, src, len, srcTables, matrix, nullptr); |
+} |
+ |
+static void color_xform_RGBA_to_srgb_premul_swaprb(uint32_t* dst, const uint32_t* src, int len, |
+ const float* const srcTables[3], |
+ const float matrix[16]) { |
+ color_xform_RGBA<kSRGB_DstGamma, true, true>(dst, src, len, srcTables, matrix, nullptr); |
+} |
+ |
+static void color_xform_RGBA_to_table_premul_swaprb(uint32_t* dst, const uint32_t* src, int len, |
+ const float* const srcTables[3], |
+ const float matrix[16], |
+ const uint8_t* const dstTables[3]) { |
+ color_xform_RGBA<kTable_DstGamma, true, true>(dst, src, len, srcTables, matrix, dstTables); |
} |
} // namespace SK_OPTS_NS |