Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(1272)

Unified Diff: src/opts/SkRasterPipeline_opts.h

Issue 2449243003: Initial implementation of a SkColorSpace_A2B xform (Closed)
Patch Set: updated implementation to use SkRasterPipeline Created 4 years, 1 month ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View side-by-side diff with in-line comments
Download patch
Index: src/opts/SkRasterPipeline_opts.h
diff --git a/src/opts/SkRasterPipeline_opts.h b/src/opts/SkRasterPipeline_opts.h
index 5c5418be1a44d6d6ece160e288467828f10a75a5..db9a5bf290f270106a0178be146440179ef8ba31 100644
--- a/src/opts/SkRasterPipeline_opts.h
+++ b/src/opts/SkRasterPipeline_opts.h
@@ -9,7 +9,9 @@
#define SkRasterPipeline_opts_DEFINED
#include "SkColorPriv.h"
+#include "SkColorSpace_Base.h"
#include "SkHalf.h"
+#include "SkMatrix44.h"
#include "SkPM4f.h"
#include "SkPM4fPriv.h"
#include "SkRasterPipeline.h"
@@ -134,6 +136,59 @@ SI void SK_VECTORCALL next(TailStage* st, size_t x, size_t tail,
static SK_ALWAYS_INLINE SkNf name##_kernel(const SkNf& s, const SkNf& sa, \
const SkNf& d, const SkNf& da)
+
+#define GAMMA_STAGE(name) \
msarett1 2016/11/09 00:01:05 Instead of this, I think I would prefer 6 normal s
mtklein_C 2016/11/09 11:04:36 I think you mean 3 normal stages? Each STAGE invo
raftias 2016/11/10 21:36:06 I did this with fn_1_r/g/b. If we add in specific
+ static SK_ALWAYS_INLINE SkNf name##_kernel(void* ctx, SkNf& s); \
+ SI void SK_VECTORCALL name##_r(BodyStage* st, size_t x, \
+ SkNf r, SkNf g, SkNf b, SkNf a, \
+ SkNf dr, SkNf dg, SkNf db, SkNf da) { \
+ r = name##_kernel(st->ctx, r); \
+ next(st, x, r,g,b,a, dr,dg,db,da); \
+ } \
+ SI void SK_VECTORCALL name##_r(TailStage* st, size_t x, size_t tail, \
+ SkNf r, SkNf g, SkNf b, SkNf a, \
+ SkNf dr, SkNf dg, SkNf db, SkNf da) { \
+ r = name##_kernel(st->ctx, r); \
+ next(st, x,tail, r,g,b,a, dr,dg,db,da); \
+ } \
+ SI void SK_VECTORCALL name##_g(BodyStage* st, size_t x, \
+ SkNf r, SkNf g, SkNf b, SkNf a, \
+ SkNf dr, SkNf dg, SkNf db, SkNf da) { \
+ g = name##_kernel(st->ctx, g); \
+ next(st, x, r,g,b,a, dr,dg,db,da); \
+ } \
+ SI void SK_VECTORCALL name##_g(TailStage* st, size_t x, size_t tail, \
+ SkNf r, SkNf g, SkNf b, SkNf a, \
+ SkNf dr, SkNf dg, SkNf db, SkNf da) { \
+ g = name##_kernel(st->ctx, g); \
+ next(st, x,tail, r,g,b,a, dr,dg,db,da); \
+ } \
+ SI void SK_VECTORCALL name##_b(BodyStage* st, size_t x, \
+ SkNf r, SkNf g, SkNf b, SkNf a, \
+ SkNf dr, SkNf dg, SkNf db, SkNf da) { \
+ b = name##_kernel(st->ctx, b); \
+ next(st, x, r,g,b,a, dr,dg,db,da); \
+ } \
+ SI void SK_VECTORCALL name##_b(TailStage* st, size_t x, size_t tail, \
+ SkNf r, SkNf g, SkNf b, SkNf a, \
+ SkNf dr, SkNf dg, SkNf db, SkNf da) { \
+ b = name##_kernel(st->ctx, b); \
+ next(st, x,tail, r,g,b,a, dr,dg,db,da); \
+ } \
+ SI void SK_VECTORCALL name##_a(BodyStage* st, size_t x, \
msarett1 2016/11/09 00:01:05 All we need to do with "a" is load it and store.
raftias 2016/11/10 21:36:07 It was indeed for CMYK/etc support.
+ SkNf r, SkNf g, SkNf b, SkNf a, \
+ SkNf dr, SkNf dg, SkNf db, SkNf da) { \
+ a = name##_kernel(st->ctx, a); \
+ next(st, x, r,g,b,a, dr,dg,db,da); \
+ } \
+ SI void SK_VECTORCALL name##_a(TailStage* st, size_t x, size_t tail, \
+ SkNf r, SkNf g, SkNf b, SkNf a, \
+ SkNf dr, SkNf dg, SkNf db, SkNf da) { \
+ a = name##_kernel(st->ctx, a); \
+ next(st, x,tail, r,g,b,a, dr,dg,db,da); \
+ } \
+ static SK_ALWAYS_INLINE SkNf name##_kernel(void* ctx, SkNf& s)
+
SI SkNf inv(const SkNf& x) { return 1.0f - x; }
SI SkNf lerp(const SkNf& from, const SkNf& to, const SkNf& cov) {
@@ -431,6 +486,59 @@ STAGE(store_srgb, false) {
| SkNx_cast<int>(0.5f + 255.0f * a) << SK_A32_SHIFT), (int*)ptr);
}
+STAGE(load_s_linear_rgba, true) {
msarett1 2016/11/09 00:01:05 nit: Follow style conventions from above Use whit
mtklein_C 2016/11/09 11:04:36 Let's call these _8888. That's our common shortha
+ auto ptr = *(const uint32_t**)ctx + x;
+
+ auto px = load<kIsTail>(tail, ptr);
+ auto to_int = [](const SkNx<N, uint32_t>& v) { return SkNi::Load(&v); };
+ r = (1/255.0f)*SkNx_cast<float>(to_int((px >> 0) & 0xFF));
+ g = (1/255.0f)*SkNx_cast<float>(to_int((px >> 8) & 0xFF));
+ b = (1/255.0f)*SkNx_cast<float>(to_int((px >> 16) & 0xFF));
+ a = (1/255.0f)*SkNx_cast<float>(to_int(px >> 24));
+}
+
+STAGE(load_s_linear_bgra, true) {
mtklein_C 2016/11/09 11:04:36 How about we write everything in terms of rgba, an
raftias 2016/11/10 21:36:07 I that before (with that exact name, even), then t
msarett1 2016/11/11 14:36:51 Let's defer to Mike on this one. lgtm, as is.
+ auto ptr = *(const uint32_t**)ctx + x;
+
+ auto px = load<kIsTail>(tail, ptr);
+ auto to_int = [](const SkNx<N, uint32_t>& v) { return SkNi::Load(&v); };
+ r = (1/255.0f)*SkNx_cast<float>(to_int((px >> 16) & 0xFF));
+ g = (1/255.0f)*SkNx_cast<float>(to_int((px >> 8) & 0xFF));
+ b = (1/255.0f)*SkNx_cast<float>(to_int((px >> 0) & 0xFF));
+ a = (1/255.0f)*SkNx_cast<float>(to_int((px >> 24)));
+}
+
+// Clamp colors into [0,1] premul (e.g. just before storing back to memory).
raftias 2016/11/08 21:19:58 I noticed when I pulled before uploading that this
msarett1 2016/11/09 00:01:05 I believe the idea is to not waste time clamping w
mtklein_C 2016/11/09 11:04:36 This has now been split into two stages, clamp_0 a
+SI void clamp_01_premul(SkNf& r, SkNf& g, SkNf& b, SkNf& a) {
+ a = SkNf::Max(a, 0.0f);
+ r = SkNf::Max(r, 0.0f);
+ g = SkNf::Max(g, 0.0f);
+ b = SkNf::Max(b, 0.0f);
+
+ a = SkNf::Min(a, 1.0f);
+ r = SkNf::Min(r, a);
+ g = SkNf::Min(g, a);
+ b = SkNf::Min(b, a);
+}
+
+STAGE(store_linear_rgba, false) {
+ clamp_01_premul(r,g,b,a);
+ auto ptr = *(uint32_t**)ctx + x;
+ store<kIsTail>(tail, ( SkNx_cast<int>(255.0f * r + 0.5f) << 0
msarett1 2016/11/09 00:01:05 I don't think you need the "+ 0.5f" terms. I thin
mtklein_C 2016/11/09 11:04:36 No, we're doing that to round to the nearest byte
+ | SkNx_cast<int>(255.0f * g + 0.5f) << 8
+ | SkNx_cast<int>(255.0f * b + 0.5f) << 16
+ | SkNx_cast<int>(255.0f * a + 0.5f) << 24 ), (int*)ptr);
+}
+
+STAGE(store_linear_bgra, false) {
+ clamp_01_premul(r,g,b,a);
+ auto ptr = *(uint32_t**)ctx + x;
+ store<kIsTail>(tail, ( SkNx_cast<int>(255.0f * r + 0.5f) << 16
+ | SkNx_cast<int>(255.0f * g + 0.5f) << 8
+ | SkNx_cast<int>(255.0f * b + 0.5f) << 0
+ | SkNx_cast<int>(255.0f * a + 0.5f) << 24 ), (int*)ptr);
+}
+
RGBA_XFERMODE(clear) { return 0.0f; }
//RGBA_XFERMODE(src) { return s; } // This would be a no-op stage, so we just omit it.
RGBA_XFERMODE(dst) { return d; }
@@ -490,6 +598,19 @@ STAGE(luminance_to_alpha, true) {
r = g = b = 0;
}
+STAGE(matrix_4x4, true) {
+ const SkMatrix44& mat = *(const SkMatrix44*)ctx;
mtklein_C 2016/11/09 11:04:36 I'd like matrix_4x4 and matrix_4x5 to look and beh
raftias 2016/11/10 21:36:06 They were just different since I had written it an
+ auto fma = [](const SkNf& f, const SkNf& m, const SkNf& a) { return SkNx_fma(f,m,a); };
+ dr = fma(mat.get(0, 0),r, fma(mat.get(0, 1),g, fma(mat.get(0, 2),b, mat.get(0, 3)*a)));
msarett1 2016/11/09 00:01:05 No need for "*a". Actually I think we don't want
mtklein_C 2016/11/09 11:04:36 If we don't *a here, we can't really call this sta
raftias 2016/11/10 21:36:06 It's 3x4 now.
+ dg = fma(mat.get(1, 0),r, fma(mat.get(1, 1),g, fma(mat.get(1, 2),b, mat.get(1, 3)*a)));
+ db = fma(mat.get(2, 0),r, fma(mat.get(2, 1),g, fma(mat.get(2, 2),b, mat.get(2, 3)*a)));
msarett1 2016/11/09 00:01:05 Mike, is it ok that we're destructive to dr, dg, d
mtklein_C 2016/11/09 11:04:36 The pedantic answer is that that depends what you'
raftias 2016/11/10 21:36:06 I'll remove these and put them in temporaries. I j
+ da = fma(mat.get(3, 0),r, fma(mat.get(3, 1),g, fma(mat.get(3, 2),b, mat.get(3, 3)*a)));
+ r = dr;
+ g = dg;
+ b = db;
+ a = da;
+}
+
STAGE(matrix_4x5, true) {
auto m = (const float*)ctx;
@@ -504,6 +625,169 @@ STAGE(matrix_4x5, true) {
a = A;
}
+static inline Sk4f powNf(const Sk4f& x, float exp) {
mtklein_C 2016/11/09 11:04:36 Generally this file writes static inline as SI. I
raftias 2016/11/10 21:36:06 Acknowledged.
+ return Sk4f{::powf(x[0], exp), ::powf(x[1], exp), ::powf(x[2], exp), ::powf(x[3], exp)};
+}
+
+static inline Sk8f powNf(const Sk8f& x, float exp) {
+ return Sk8f{::powf(x[0], exp), ::powf(x[1], exp), ::powf(x[2], exp), ::powf(x[3], exp),
+ ::powf(x[4], exp), ::powf(x[5], exp), ::powf(x[6], exp), ::powf(x[7], exp)};
+}
+
+GAMMA_STAGE(param_gamma) {
+ const SkColorSpaceTransferFn& gamma = *(const SkColorSpaceTransferFn*)ctx;
+ return (s <= gamma.fD).thenElse(gamma.fE * s + gamma.fF,
msarett1 2016/11/09 00:01:05 nit: < instead of <=
raftias 2016/11/10 21:36:06 Done.
+ powNf(s * gamma.fA + gamma.fB, gamma.fG) + gamma.fC);
+}
+
+static constexpr float kGammaTableSize = 1024;
+
+GAMMA_STAGE(table_gamma) {
+ constexpr float maxIndex = kGammaTableSize - 1;
+ const float* gammaTables = (const float*)ctx;
mtklein_C 2016/11/09 11:04:36 This name makes it seem like we're going to be usi
raftias 2016/11/10 21:36:06 Acknowledged.
+ s = SkNf::Min(SkNf::Max(maxIndex * s, 0.f), maxIndex);
mtklein_C 2016/11/09 11:04:36 If we're not going to source the 1024 constant fro
raftias 2016/11/10 21:36:06 ApplyTable stores it now.
+ float result[N];
+ for (int i = 0; i < N; ++i) {
+ result[i] = gammaTables[lrintf(s[i])];
+ }
+ return SkNf::Load(result);
+}
+
+static inline void interp_3d_clut(float dst[3], float src[3], const SkColorLookUpTable* colorLUT) {
msarett1 2016/11/09 00:01:05 This maybe does not need to belong in this file.
mtklein_C 2016/11/09 11:04:36 Why don't we make this a normal, separately-compil
raftias 2016/11/10 21:36:06 Done.
+ // Call the src components x, y, and z.
+ uint8_t maxX = colorLUT->fGridPoints[0] - 1;
+ uint8_t maxY = colorLUT->fGridPoints[1] - 1;
+ uint8_t maxZ = colorLUT->fGridPoints[2] - 1;
+
+ // An approximate index into each of the three dimensions of the table.
+ float x = src[0] * maxX;
+ float y = src[1] * maxY;
+ float z = src[2] * maxZ;
+
+ // This gives us the low index for our interpolation.
+ int ix = sk_float_floor2int(x);
+ int iy = sk_float_floor2int(y);
+ int iz = sk_float_floor2int(z);
+
+ // Make sure the low index is not also the max index.
+ ix = (maxX == ix) ? ix - 1 : ix;
+ iy = (maxY == iy) ? iy - 1 : iy;
+ iz = (maxZ == iz) ? iz - 1 : iz;
+
+ // Weighting factors for the interpolation.
+ float diffX = x - ix;
+ float diffY = y - iy;
+ float diffZ = z - iz;
+
+ // Constants to help us navigate the 3D table.
+ // Ex: Assume x = a, y = b, z = c.
+ // table[a * n001 + b * n010 + c * n100] logically equals table[a][b][c].
+ const int n000 = 0;
+ const int n001 = 3 * colorLUT->fGridPoints[1] * colorLUT->fGridPoints[2];
+ const int n010 = 3 * colorLUT->fGridPoints[2];
+ const int n011 = n001 + n010;
+ const int n100 = 3;
+ const int n101 = n100 + n001;
+ const int n110 = n100 + n010;
+ const int n111 = n110 + n001;
+
+ // Base ptr into the table.
+ const float* ptr = &(colorLUT->table()[ix*n001 + iy*n010 + iz*n100]);
+
+ // The code below performs a tetrahedral interpolation for each of the three
+ // dst components. Once the tetrahedron containing the interpolation point is
+ // identified, the interpolation is a weighted sum of grid values at the
+ // vertices of the tetrahedron. The claim is that tetrahedral interpolation
+ // provides a more accurate color conversion.
+ // blogs.mathworks.com/steve/2006/11/24/tetrahedral-interpolation-for-colorspace-conversion/
+ //
+ // I have one test image, and visually I can't tell the difference between
+ // tetrahedral and trilinear interpolation. In terms of computation, the
+ // tetrahedral code requires more branches but less computation. The
+ // SampleICC library provides an option for the client to choose either
+ // tetrahedral or trilinear.
+ for (int i = 0; i < 3; i++) {
+ if (diffZ < diffY) {
+ if (diffZ < diffX) {
+ dst[i] = (ptr[n000] + diffZ * (ptr[n110] - ptr[n010]) +
+ diffY * (ptr[n010] - ptr[n000]) +
+ diffX * (ptr[n111] - ptr[n110]));
+ } else if (diffY < diffX) {
+ dst[i] = (ptr[n000] + diffZ * (ptr[n111] - ptr[n011]) +
+ diffY * (ptr[n011] - ptr[n001]) +
+ diffX * (ptr[n001] - ptr[n000]));
+ } else {
+ dst[i] = (ptr[n000] + diffZ * (ptr[n111] - ptr[n011]) +
+ diffY * (ptr[n010] - ptr[n000]) +
+ diffX * (ptr[n011] - ptr[n010]));
+ }
+ } else {
+ if (diffZ < diffX) {
+ dst[i] = (ptr[n000] + diffZ * (ptr[n101] - ptr[n001]) +
+ diffY * (ptr[n111] - ptr[n101]) +
+ diffX * (ptr[n001] - ptr[n000]));
+ } else if (diffY < diffX) {
+ dst[i] = (ptr[n000] + diffZ * (ptr[n100] - ptr[n000]) +
+ diffY * (ptr[n111] - ptr[n101]) +
+ diffX * (ptr[n101] - ptr[n100]));
+ } else {
+ dst[i] = (ptr[n000] + diffZ * (ptr[n100] - ptr[n000]) +
+ diffY * (ptr[n110] - ptr[n100]) +
+ diffX * (ptr[n111] - ptr[n110]));
+ }
+ }
+
+ // Increment the table ptr in order to handle the next component.
+ // Note that this is the how table is designed: all of nXXX
+ // variables are multiples of 3 because there are 3 output
+ // components.
+ ptr++;
+ }
+}
+
+STAGE(clut, true) {
mtklein_C 2016/11/09 11:04:36 how about color_lookup_table?
raftias 2016/11/10 21:36:06 Done.
+ const SkColorLookUpTable* colorLUT = (const SkColorLookUpTable*)ctx;
mtklein_C 2016/11/09 11:04:36 Side note: it's going to drive me nuts that we cap
raftias 2016/11/10 21:36:06 I didn't name it, but my guess is that it's becaus
msarett1 2016/11/11 14:36:51 I don't feel strongly about the name. Feel free t
+ float rgb[3];
+ alignas(alignof(SkNf)) float result[3][N];
mtklein_C 2016/11/09 11:04:36 Let's drop the alignment business. SkNf::Load() s
raftias 2016/11/10 21:36:06 Done.
+ for (int i = 0; i < N; ++i) {
+ rgb[0] = r[i];
+ rgb[1] = g[i];
+ rgb[2] = b[i];
+ interp_3d_clut(rgb, rgb, colorLUT);
+ result[0][i] = rgb[0];
+ result[1][i] = rgb[1];
+ result[2][i] = rgb[2];
+ }
+ r = SkNf::Load(result[0]);
+ g = SkNf::Load(result[1]);
+ b = SkNf::Load(result[2]);
+}
+
+STAGE(labtoxyz, true) {
raftias 2016/11/08 21:19:58 I think this can be expressed as a matrix_4x4 foll
mtklein_C 2016/11/09 11:04:36 I think this is clearer as its own stage. It's pr
raftias 2016/11/10 21:36:06 Done.
+ const auto lab_l = r * 100.f;
+ const auto lab_a = g * 255.f - 128.f;
+ const auto lab_b = b * 255.f - 128.f;
+ auto Y = (lab_l + 16.f) * (1.f/116.f);
+ auto X = lab_a * (1.f/500.f) + Y;
mtklein_C 2016/11/09 11:04:36 One .f is plenty to get these solidly as float con
raftias 2016/11/10 21:36:06 Acknowledged.
+ auto Z = Y - (lab_b * (1.f/200.f));
+
+ auto cubed = X*X*X;
+ X = (cubed > 0.008856f).thenElse(cubed, (X - (16.f/116.f)) * (1.f/7.787f));
+ cubed = Y*Y*Y;
mtklein_C 2016/11/09 11:04:36 At a glance it looks like cubed must be accumulati
raftias 2016/11/10 21:36:06 Done.
+ Y = (cubed > 0.008856f).thenElse(cubed, (Y - (16.f/116.f)) * (1.f/7.787f));
+ cubed = Z*Z*Z;
+ Z = (cubed > 0.008856f).thenElse(cubed, (Z - (16.f/116.f)) * (1.f/7.787f));
+
+ // adjust to D50 illuminant
+ X *= 0.96422f;
+ Y *= 1.00000f;
+ Z *= 0.82521f;
+
+ r = X;
+ g = Y;
+ b = Z;
+}
+
template <typename Fn>
SI Fn enum_to_Fn(SkRasterPipeline::StockStage st) {
switch (st) {

Powered by Google App Engine
This is Rietveld 408576698