Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(9)

Side by Side Diff: src/opts/SkRasterPipeline_opts.h

Issue 2449243003: Initial implementation of a SkColorSpace_A2B xform (Closed)
Patch Set: updated implementation to use SkRasterPipeline Created 4 years, 1 month ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
OLDNEW
1 /* 1 /*
2 * Copyright 2016 Google Inc. 2 * Copyright 2016 Google Inc.
3 * 3 *
4 * Use of this source code is governed by a BSD-style license that can be 4 * Use of this source code is governed by a BSD-style license that can be
5 * found in the LICENSE file. 5 * found in the LICENSE file.
6 */ 6 */
7 7
8 #ifndef SkRasterPipeline_opts_DEFINED 8 #ifndef SkRasterPipeline_opts_DEFINED
9 #define SkRasterPipeline_opts_DEFINED 9 #define SkRasterPipeline_opts_DEFINED
10 10
11 #include "SkColorPriv.h" 11 #include "SkColorPriv.h"
12 #include "SkColorSpace_Base.h"
12 #include "SkHalf.h" 13 #include "SkHalf.h"
14 #include "SkMatrix44.h"
13 #include "SkPM4f.h" 15 #include "SkPM4f.h"
14 #include "SkPM4fPriv.h" 16 #include "SkPM4fPriv.h"
15 #include "SkRasterPipeline.h" 17 #include "SkRasterPipeline.h"
16 #include "SkSRGB.h" 18 #include "SkSRGB.h"
17 #include "SkUtils.h" 19 #include "SkUtils.h"
18 #include <utility> 20 #include <utility>
19 21
20 namespace { 22 namespace {
21 23
22 #if SK_CPU_SSE_LEVEL >= SK_CPU_SSE_LEVEL_AVX2 24 #if SK_CPU_SSE_LEVEL >= SK_CPU_SSE_LEVEL_AVX2
(...skipping 104 matching lines...) Expand 10 before | Expand all | Expand 10 after
127 SkNf dr, SkNf dg, SkNf db, SkNf da) { \ 129 SkNf dr, SkNf dg, SkNf db, SkNf da) { \
128 r = name##_kernel(r,a,dr,da); \ 130 r = name##_kernel(r,a,dr,da); \
129 g = name##_kernel(g,a,dg,da); \ 131 g = name##_kernel(g,a,dg,da); \
130 b = name##_kernel(b,a,db,da); \ 132 b = name##_kernel(b,a,db,da); \
131 a = a + (da * (1.0f-a)); \ 133 a = a + (da * (1.0f-a)); \
132 next(st, x,tail, r,g,b,a, dr,dg,db,da); \ 134 next(st, x,tail, r,g,b,a, dr,dg,db,da); \
133 } \ 135 } \
134 static SK_ALWAYS_INLINE SkNf name##_kernel(const SkNf& s, const SkNf& sa, \ 136 static SK_ALWAYS_INLINE SkNf name##_kernel(const SkNf& s, const SkNf& sa, \
135 const SkNf& d, const SkNf& da) 137 const SkNf& d, const SkNf& da)
136 138
139
140 #define GAMMA_STAGE(name) \
msarett1 2016/11/09 00:01:05 Instead of this, I think I would prefer 6 normal s
mtklein_C 2016/11/09 11:04:36 I think you mean 3 normal stages? Each STAGE invo
raftias 2016/11/10 21:36:06 I did this with fn_1_r/g/b. If we add in specific
141 static SK_ALWAYS_INLINE SkNf name##_kernel(void* ctx, SkNf& s); \
142 SI void SK_VECTORCALL name##_r(BodyStage* st, size_t x, \
143 SkNf r, SkNf g, SkNf b, SkNf a, \
144 SkNf dr, SkNf dg, SkNf db, SkNf da) { \
145 r = name##_kernel(st->ctx, r); \
146 next(st, x, r,g,b,a, dr,dg,db,da); \
147 } \
148 SI void SK_VECTORCALL name##_r(TailStage* st, size_t x, size_t tail, \
149 SkNf r, SkNf g, SkNf b, SkNf a, \
150 SkNf dr, SkNf dg, SkNf db, SkNf da) { \
151 r = name##_kernel(st->ctx, r); \
152 next(st, x,tail, r,g,b,a, dr,dg,db,da); \
153 } \
154 SI void SK_VECTORCALL name##_g(BodyStage* st, size_t x, \
155 SkNf r, SkNf g, SkNf b, SkNf a, \
156 SkNf dr, SkNf dg, SkNf db, SkNf da) { \
157 g = name##_kernel(st->ctx, g); \
158 next(st, x, r,g,b,a, dr,dg,db,da); \
159 } \
160 SI void SK_VECTORCALL name##_g(TailStage* st, size_t x, size_t tail, \
161 SkNf r, SkNf g, SkNf b, SkNf a, \
162 SkNf dr, SkNf dg, SkNf db, SkNf da) { \
163 g = name##_kernel(st->ctx, g); \
164 next(st, x,tail, r,g,b,a, dr,dg,db,da); \
165 } \
166 SI void SK_VECTORCALL name##_b(BodyStage* st, size_t x, \
167 SkNf r, SkNf g, SkNf b, SkNf a, \
168 SkNf dr, SkNf dg, SkNf db, SkNf da) { \
169 b = name##_kernel(st->ctx, b); \
170 next(st, x, r,g,b,a, dr,dg,db,da); \
171 } \
172 SI void SK_VECTORCALL name##_b(TailStage* st, size_t x, size_t tail, \
173 SkNf r, SkNf g, SkNf b, SkNf a, \
174 SkNf dr, SkNf dg, SkNf db, SkNf da) { \
175 b = name##_kernel(st->ctx, b); \
176 next(st, x,tail, r,g,b,a, dr,dg,db,da); \
177 } \
178 SI void SK_VECTORCALL name##_a(BodyStage* st, size_t x, \
msarett1 2016/11/09 00:01:05 All we need to do with "a" is load it and store.
raftias 2016/11/10 21:36:07 It was indeed for CMYK/etc support.
179 SkNf r, SkNf g, SkNf b, SkNf a, \
180 SkNf dr, SkNf dg, SkNf db, SkNf da) { \
181 a = name##_kernel(st->ctx, a); \
182 next(st, x, r,g,b,a, dr,dg,db,da); \
183 } \
184 SI void SK_VECTORCALL name##_a(TailStage* st, size_t x, size_t tail, \
185 SkNf r, SkNf g, SkNf b, SkNf a, \
186 SkNf dr, SkNf dg, SkNf db, SkNf da) { \
187 a = name##_kernel(st->ctx, a); \
188 next(st, x,tail, r,g,b,a, dr,dg,db,da); \
189 } \
190 static SK_ALWAYS_INLINE SkNf name##_kernel(void* ctx, SkNf& s)
191
137 SI SkNf inv(const SkNf& x) { return 1.0f - x; } 192 SI SkNf inv(const SkNf& x) { return 1.0f - x; }
138 193
139 SI SkNf lerp(const SkNf& from, const SkNf& to, const SkNf& cov) { 194 SI SkNf lerp(const SkNf& from, const SkNf& to, const SkNf& cov) {
140 return SkNx_fma(to-from, cov, from); 195 return SkNx_fma(to-from, cov, from);
141 } 196 }
142 197
143 template <bool kIsTail, typename T> 198 template <bool kIsTail, typename T>
144 SI SkNx<N,T> load(size_t tail, const T* src) { 199 SI SkNx<N,T> load(size_t tail, const T* src) {
145 SkASSERT(kIsTail == (tail > 0)); 200 SkASSERT(kIsTail == (tail > 0));
146 // TODO: maskload for 32- and 64-bit T 201 // TODO: maskload for 32- and 64-bit T
(...skipping 277 matching lines...) Expand 10 before | Expand all | Expand 10 after
424 } 479 }
425 480
426 STAGE(store_srgb, false) { 481 STAGE(store_srgb, false) {
427 auto ptr = *(uint32_t**)ctx + x; 482 auto ptr = *(uint32_t**)ctx + x;
428 store<kIsTail>(tail, ( sk_linear_to_srgb(r) << SK_R32_SHIFT 483 store<kIsTail>(tail, ( sk_linear_to_srgb(r) << SK_R32_SHIFT
429 | sk_linear_to_srgb(g) << SK_G32_SHIFT 484 | sk_linear_to_srgb(g) << SK_G32_SHIFT
430 | sk_linear_to_srgb(b) << SK_B32_SHIFT 485 | sk_linear_to_srgb(b) << SK_B32_SHIFT
431 | SkNx_cast<int>(0.5f + 255.0f * a) << SK_A32_SHIFT), ( int*)ptr); 486 | SkNx_cast<int>(0.5f + 255.0f * a) << SK_A32_SHIFT), ( int*)ptr);
432 } 487 }
433 488
489 STAGE(load_s_linear_rgba, true) {
msarett1 2016/11/09 00:01:05 nit: Follow style conventions from above Use whit
mtklein_C 2016/11/09 11:04:36 Let's call these _8888. That's our common shortha
490 auto ptr = *(const uint32_t**)ctx + x;
491
492 auto px = load<kIsTail>(tail, ptr);
493 auto to_int = [](const SkNx<N, uint32_t>& v) { return SkNi::Load(&v); };
494 r = (1/255.0f)*SkNx_cast<float>(to_int((px >> 0) & 0xFF));
495 g = (1/255.0f)*SkNx_cast<float>(to_int((px >> 8) & 0xFF));
496 b = (1/255.0f)*SkNx_cast<float>(to_int((px >> 16) & 0xFF));
497 a = (1/255.0f)*SkNx_cast<float>(to_int(px >> 24));
498 }
499
500 STAGE(load_s_linear_bgra, true) {
mtklein_C 2016/11/09 11:04:36 How about we write everything in terms of rgba, an
raftias 2016/11/10 21:36:07 I that before (with that exact name, even), then t
msarett1 2016/11/11 14:36:51 Let's defer to Mike on this one. lgtm, as is.
501 auto ptr = *(const uint32_t**)ctx + x;
502
503 auto px = load<kIsTail>(tail, ptr);
504 auto to_int = [](const SkNx<N, uint32_t>& v) { return SkNi::Load(&v); };
505 r = (1/255.0f)*SkNx_cast<float>(to_int((px >> 16) & 0xFF));
506 g = (1/255.0f)*SkNx_cast<float>(to_int((px >> 8) & 0xFF));
507 b = (1/255.0f)*SkNx_cast<float>(to_int((px >> 0) & 0xFF));
508 a = (1/255.0f)*SkNx_cast<float>(to_int((px >> 24)));
509 }
510
511 // Clamp colors into [0,1] premul (e.g. just before storing back to memory).
raftias 2016/11/08 21:19:58 I noticed when I pulled before uploading that this
msarett1 2016/11/09 00:01:05 I believe the idea is to not waste time clamping w
mtklein_C 2016/11/09 11:04:36 This has now been split into two stages, clamp_0 a
512 SI void clamp_01_premul(SkNf& r, SkNf& g, SkNf& b, SkNf& a) {
513 a = SkNf::Max(a, 0.0f);
514 r = SkNf::Max(r, 0.0f);
515 g = SkNf::Max(g, 0.0f);
516 b = SkNf::Max(b, 0.0f);
517
518 a = SkNf::Min(a, 1.0f);
519 r = SkNf::Min(r, a);
520 g = SkNf::Min(g, a);
521 b = SkNf::Min(b, a);
522 }
523
524 STAGE(store_linear_rgba, false) {
525 clamp_01_premul(r,g,b,a);
526 auto ptr = *(uint32_t**)ctx + x;
527 store<kIsTail>(tail, ( SkNx_cast<int>(255.0f * r + 0.5f) << 0
msarett1 2016/11/09 00:01:05 I don't think you need the "+ 0.5f" terms. I thin
mtklein_C 2016/11/09 11:04:36 No, we're doing that to round to the nearest byte
528 | SkNx_cast<int>(255.0f * g + 0.5f) << 8
529 | SkNx_cast<int>(255.0f * b + 0.5f) << 16
530 | SkNx_cast<int>(255.0f * a + 0.5f) << 24 ), (int*)ptr) ;
531 }
532
533 STAGE(store_linear_bgra, false) {
534 clamp_01_premul(r,g,b,a);
535 auto ptr = *(uint32_t**)ctx + x;
536 store<kIsTail>(tail, ( SkNx_cast<int>(255.0f * r + 0.5f) << 16
537 | SkNx_cast<int>(255.0f * g + 0.5f) << 8
538 | SkNx_cast<int>(255.0f * b + 0.5f) << 0
539 | SkNx_cast<int>(255.0f * a + 0.5f) << 24 ), (int*)ptr) ;
540 }
541
434 RGBA_XFERMODE(clear) { return 0.0f; } 542 RGBA_XFERMODE(clear) { return 0.0f; }
435 //RGBA_XFERMODE(src) { return s; } // This would be a no-op stage, so we just omit it. 543 //RGBA_XFERMODE(src) { return s; } // This would be a no-op stage, so we just omit it.
436 RGBA_XFERMODE(dst) { return d; } 544 RGBA_XFERMODE(dst) { return d; }
437 545
438 RGBA_XFERMODE(srcatop) { return s*da + d*inv(sa); } 546 RGBA_XFERMODE(srcatop) { return s*da + d*inv(sa); }
439 RGBA_XFERMODE(srcin) { return s * da; } 547 RGBA_XFERMODE(srcin) { return s * da; }
440 RGBA_XFERMODE(srcout) { return s * inv(da); } 548 RGBA_XFERMODE(srcout) { return s * inv(da); }
441 RGBA_XFERMODE(srcover) { return SkNx_fma(d, inv(sa), s); } 549 RGBA_XFERMODE(srcover) { return SkNx_fma(d, inv(sa), s); }
442 RGBA_XFERMODE(dstatop) { return srcatop_kernel(d,da,s,sa); } 550 RGBA_XFERMODE(dstatop) { return srcatop_kernel(d,da,s,sa); }
443 RGBA_XFERMODE(dstin) { return srcin_kernel (d,da,s,sa); } 551 RGBA_XFERMODE(dstin) { return srcin_kernel (d,da,s,sa); }
(...skipping 39 matching lines...) Expand 10 before | Expand all | Expand 10 after
483 liteDst = m.rsqrt().invert() - m, // Used in case 3. 591 liteDst = m.rsqrt().invert() - m, // Used in case 3.
484 liteSrc = d*sa + da*(s2 - sa) * (4.0f*d <= da).thenElse(darkDst, liteDs t); // 2 or 3? 592 liteSrc = d*sa + da*(s2 - sa) * (4.0f*d <= da).thenElse(darkDst, liteDs t); // 2 or 3?
485 return s*inv(da) + d*inv(sa) + (s2 <= sa).thenElse(darkSrc, liteSrc); // 1 or (2 or 3)? 593 return s*inv(da) + d*inv(sa) + (s2 <= sa).thenElse(darkSrc, liteSrc); // 1 or (2 or 3)?
486 } 594 }
487 595
488 STAGE(luminance_to_alpha, true) { 596 STAGE(luminance_to_alpha, true) {
489 a = SK_LUM_COEFF_R*r + SK_LUM_COEFF_G*g + SK_LUM_COEFF_B*b; 597 a = SK_LUM_COEFF_R*r + SK_LUM_COEFF_G*g + SK_LUM_COEFF_B*b;
490 r = g = b = 0; 598 r = g = b = 0;
491 } 599 }
492 600
601 STAGE(matrix_4x4, true) {
602 const SkMatrix44& mat = *(const SkMatrix44*)ctx;
mtklein_C 2016/11/09 11:04:36 I'd like matrix_4x4 and matrix_4x5 to look and beh
raftias 2016/11/10 21:36:06 They were just different since I had written it an
603 auto fma = [](const SkNf& f, const SkNf& m, const SkNf& a) { return SkNx_fma (f,m,a); };
604 dr = fma(mat.get(0, 0),r, fma(mat.get(0, 1),g, fma(mat.get(0, 2),b, mat.get( 0, 3)*a)));
msarett1 2016/11/09 00:01:05 No need for "*a". Actually I think we don't want
mtklein_C 2016/11/09 11:04:36 If we don't *a here, we can't really call this sta
raftias 2016/11/10 21:36:06 It's 3x4 now.
605 dg = fma(mat.get(1, 0),r, fma(mat.get(1, 1),g, fma(mat.get(1, 2),b, mat.get( 1, 3)*a)));
606 db = fma(mat.get(2, 0),r, fma(mat.get(2, 1),g, fma(mat.get(2, 2),b, mat.get( 2, 3)*a)));
msarett1 2016/11/09 00:01:05 Mike, is it ok that we're destructive to dr, dg, d
mtklein_C 2016/11/09 11:04:36 The pedantic answer is that that depends what you'
raftias 2016/11/10 21:36:06 I'll remove these and put them in temporaries. I j
607 da = fma(mat.get(3, 0),r, fma(mat.get(3, 1),g, fma(mat.get(3, 2),b, mat.get( 3, 3)*a)));
608 r = dr;
609 g = dg;
610 b = db;
611 a = da;
612 }
613
493 STAGE(matrix_4x5, true) { 614 STAGE(matrix_4x5, true) {
494 auto m = (const float*)ctx; 615 auto m = (const float*)ctx;
495 616
496 auto fma = [](const SkNf& f, const SkNf& m, const SkNf& a) { return SkNx_fma (f,m,a); }; 617 auto fma = [](const SkNf& f, const SkNf& m, const SkNf& a) { return SkNx_fma (f,m,a); };
497 auto R = fma(r,m[0], fma(g,m[4], fma(b,m[ 8], fma(a,m[12], m[16])))), 618 auto R = fma(r,m[0], fma(g,m[4], fma(b,m[ 8], fma(a,m[12], m[16])))),
498 G = fma(r,m[1], fma(g,m[5], fma(b,m[ 9], fma(a,m[13], m[17])))), 619 G = fma(r,m[1], fma(g,m[5], fma(b,m[ 9], fma(a,m[13], m[17])))),
499 B = fma(r,m[2], fma(g,m[6], fma(b,m[10], fma(a,m[14], m[18])))), 620 B = fma(r,m[2], fma(g,m[6], fma(b,m[10], fma(a,m[14], m[18])))),
500 A = fma(r,m[3], fma(g,m[7], fma(b,m[11], fma(a,m[15], m[19])))); 621 A = fma(r,m[3], fma(g,m[7], fma(b,m[11], fma(a,m[15], m[19]))));
501 r = R; 622 r = R;
502 g = G; 623 g = G;
503 b = B; 624 b = B;
504 a = A; 625 a = A;
505 } 626 }
506 627
628 static inline Sk4f powNf(const Sk4f& x, float exp) {
mtklein_C 2016/11/09 11:04:36 Generally this file writes static inline as SI. I
raftias 2016/11/10 21:36:06 Acknowledged.
629 return Sk4f{::powf(x[0], exp), ::powf(x[1], exp), ::powf(x[2], exp), ::powf( x[3], exp)};
630 }
631
632 static inline Sk8f powNf(const Sk8f& x, float exp) {
633 return Sk8f{::powf(x[0], exp), ::powf(x[1], exp), ::powf(x[2], exp), ::powf( x[3], exp),
634 ::powf(x[4], exp), ::powf(x[5], exp), ::powf(x[6], exp), ::powf( x[7], exp)};
635 }
636
637 GAMMA_STAGE(param_gamma) {
638 const SkColorSpaceTransferFn& gamma = *(const SkColorSpaceTransferFn*)ctx;
639 return (s <= gamma.fD).thenElse(gamma.fE * s + gamma.fF,
msarett1 2016/11/09 00:01:05 nit: < instead of <=
raftias 2016/11/10 21:36:06 Done.
640 powNf(s * gamma.fA + gamma.fB, gamma.fG) + g amma.fC);
641 }
642
643 static constexpr float kGammaTableSize = 1024;
644
645 GAMMA_STAGE(table_gamma) {
646 constexpr float maxIndex = kGammaTableSize - 1;
647 const float* gammaTables = (const float*)ctx;
mtklein_C 2016/11/09 11:04:36 This name makes it seem like we're going to be usi
raftias 2016/11/10 21:36:06 Acknowledged.
648 s = SkNf::Min(SkNf::Max(maxIndex * s, 0.f), maxIndex);
mtklein_C 2016/11/09 11:04:36 If we're not going to source the 1024 constant fro
raftias 2016/11/10 21:36:06 ApplyTable stores it now.
649 float result[N];
650 for (int i = 0; i < N; ++i) {
651 result[i] = gammaTables[lrintf(s[i])];
652 }
653 return SkNf::Load(result);
654 }
655
656 static inline void interp_3d_clut(float dst[3], float src[3], const SkColorLookU pTable* colorLUT) {
msarett1 2016/11/09 00:01:05 This maybe does not need to belong in this file.
mtklein_C 2016/11/09 11:04:36 Why don't we make this a normal, separately-compil
raftias 2016/11/10 21:36:06 Done.
657 // Call the src components x, y, and z.
658 uint8_t maxX = colorLUT->fGridPoints[0] - 1;
659 uint8_t maxY = colorLUT->fGridPoints[1] - 1;
660 uint8_t maxZ = colorLUT->fGridPoints[2] - 1;
661
662 // An approximate index into each of the three dimensions of the table.
663 float x = src[0] * maxX;
664 float y = src[1] * maxY;
665 float z = src[2] * maxZ;
666
667 // This gives us the low index for our interpolation.
668 int ix = sk_float_floor2int(x);
669 int iy = sk_float_floor2int(y);
670 int iz = sk_float_floor2int(z);
671
672 // Make sure the low index is not also the max index.
673 ix = (maxX == ix) ? ix - 1 : ix;
674 iy = (maxY == iy) ? iy - 1 : iy;
675 iz = (maxZ == iz) ? iz - 1 : iz;
676
677 // Weighting factors for the interpolation.
678 float diffX = x - ix;
679 float diffY = y - iy;
680 float diffZ = z - iz;
681
682 // Constants to help us navigate the 3D table.
683 // Ex: Assume x = a, y = b, z = c.
684 // table[a * n001 + b * n010 + c * n100] logically equals table[a][b][c] .
685 const int n000 = 0;
686 const int n001 = 3 * colorLUT->fGridPoints[1] * colorLUT->fGridPoints[2];
687 const int n010 = 3 * colorLUT->fGridPoints[2];
688 const int n011 = n001 + n010;
689 const int n100 = 3;
690 const int n101 = n100 + n001;
691 const int n110 = n100 + n010;
692 const int n111 = n110 + n001;
693
694 // Base ptr into the table.
695 const float* ptr = &(colorLUT->table()[ix*n001 + iy*n010 + iz*n100]);
696
697 // The code below performs a tetrahedral interpolation for each of the three
698 // dst components. Once the tetrahedron containing the interpolation point is
699 // identified, the interpolation is a weighted sum of grid values at the
700 // vertices of the tetrahedron. The claim is that tetrahedral interpolation
701 // provides a more accurate color conversion.
702 // blogs.mathworks.com/steve/2006/11/24/tetrahedral-interpolation-for-colors pace-conversion/
703 //
704 // I have one test image, and visually I can't tell the difference between
705 // tetrahedral and trilinear interpolation. In terms of computation, the
706 // tetrahedral code requires more branches but less computation. The
707 // SampleICC library provides an option for the client to choose either
708 // tetrahedral or trilinear.
709 for (int i = 0; i < 3; i++) {
710 if (diffZ < diffY) {
711 if (diffZ < diffX) {
712 dst[i] = (ptr[n000] + diffZ * (ptr[n110] - ptr[n010]) +
713 diffY * (ptr[n010] - ptr[n000]) +
714 diffX * (ptr[n111] - ptr[n110]));
715 } else if (diffY < diffX) {
716 dst[i] = (ptr[n000] + diffZ * (ptr[n111] - ptr[n011]) +
717 diffY * (ptr[n011] - ptr[n001]) +
718 diffX * (ptr[n001] - ptr[n000]));
719 } else {
720 dst[i] = (ptr[n000] + diffZ * (ptr[n111] - ptr[n011]) +
721 diffY * (ptr[n010] - ptr[n000]) +
722 diffX * (ptr[n011] - ptr[n010]));
723 }
724 } else {
725 if (diffZ < diffX) {
726 dst[i] = (ptr[n000] + diffZ * (ptr[n101] - ptr[n001]) +
727 diffY * (ptr[n111] - ptr[n101]) +
728 diffX * (ptr[n001] - ptr[n000]));
729 } else if (diffY < diffX) {
730 dst[i] = (ptr[n000] + diffZ * (ptr[n100] - ptr[n000]) +
731 diffY * (ptr[n111] - ptr[n101]) +
732 diffX * (ptr[n101] - ptr[n100]));
733 } else {
734 dst[i] = (ptr[n000] + diffZ * (ptr[n100] - ptr[n000]) +
735 diffY * (ptr[n110] - ptr[n100]) +
736 diffX * (ptr[n111] - ptr[n110]));
737 }
738 }
739
740 // Increment the table ptr in order to handle the next component.
741 // Note that this is the how table is designed: all of nXXX
742 // variables are multiples of 3 because there are 3 output
743 // components.
744 ptr++;
745 }
746 }
747
748 STAGE(clut, true) {
mtklein_C 2016/11/09 11:04:36 how about color_lookup_table?
raftias 2016/11/10 21:36:06 Done.
749 const SkColorLookUpTable* colorLUT = (const SkColorLookUpTable*)ctx;
mtklein_C 2016/11/09 11:04:36 Side note: it's going to drive me nuts that we cap
raftias 2016/11/10 21:36:06 I didn't name it, but my guess is that it's becaus
msarett1 2016/11/11 14:36:51 I don't feel strongly about the name. Feel free t
750 float rgb[3];
751 alignas(alignof(SkNf)) float result[3][N];
mtklein_C 2016/11/09 11:04:36 Let's drop the alignment business. SkNf::Load() s
raftias 2016/11/10 21:36:06 Done.
752 for (int i = 0; i < N; ++i) {
753 rgb[0] = r[i];
754 rgb[1] = g[i];
755 rgb[2] = b[i];
756 interp_3d_clut(rgb, rgb, colorLUT);
757 result[0][i] = rgb[0];
758 result[1][i] = rgb[1];
759 result[2][i] = rgb[2];
760 }
761 r = SkNf::Load(result[0]);
762 g = SkNf::Load(result[1]);
763 b = SkNf::Load(result[2]);
764 }
765
766 STAGE(labtoxyz, true) {
raftias 2016/11/08 21:19:58 I think this can be expressed as a matrix_4x4 foll
mtklein_C 2016/11/09 11:04:36 I think this is clearer as its own stage. It's pr
raftias 2016/11/10 21:36:06 Done.
767 const auto lab_l = r * 100.f;
768 const auto lab_a = g * 255.f - 128.f;
769 const auto lab_b = b * 255.f - 128.f;
770 auto Y = (lab_l + 16.f) * (1.f/116.f);
771 auto X = lab_a * (1.f/500.f) + Y;
mtklein_C 2016/11/09 11:04:36 One .f is plenty to get these solidly as float con
raftias 2016/11/10 21:36:06 Acknowledged.
772 auto Z = Y - (lab_b * (1.f/200.f));
773
774 auto cubed = X*X*X;
775 X = (cubed > 0.008856f).thenElse(cubed, (X - (16.f/116.f)) * (1.f/7.787f));
776 cubed = Y*Y*Y;
mtklein_C 2016/11/09 11:04:36 At a glance it looks like cubed must be accumulati
raftias 2016/11/10 21:36:06 Done.
777 Y = (cubed > 0.008856f).thenElse(cubed, (Y - (16.f/116.f)) * (1.f/7.787f));
778 cubed = Z*Z*Z;
779 Z = (cubed > 0.008856f).thenElse(cubed, (Z - (16.f/116.f)) * (1.f/7.787f));
780
781 // adjust to D50 illuminant
782 X *= 0.96422f;
783 Y *= 1.00000f;
784 Z *= 0.82521f;
785
786 r = X;
787 g = Y;
788 b = Z;
789 }
790
507 template <typename Fn> 791 template <typename Fn>
508 SI Fn enum_to_Fn(SkRasterPipeline::StockStage st) { 792 SI Fn enum_to_Fn(SkRasterPipeline::StockStage st) {
509 switch (st) { 793 switch (st) {
510 #define M(stage) case SkRasterPipeline::stage: return stage; 794 #define M(stage) case SkRasterPipeline::stage: return stage;
511 SK_RASTER_PIPELINE_STAGES(M) 795 SK_RASTER_PIPELINE_STAGES(M)
512 #undef M 796 #undef M
513 } 797 }
514 SkASSERT(false); 798 SkASSERT(false);
515 return just_return; 799 return just_return;
516 } 800 }
(...skipping 80 matching lines...) Expand 10 before | Expand all | Expand 10 after
597 } 881 }
598 882
599 } // namespace SK_OPTS_NS 883 } // namespace SK_OPTS_NS
600 884
601 #undef SI 885 #undef SI
602 #undef STAGE 886 #undef STAGE
603 #undef RGBA_XFERMODE 887 #undef RGBA_XFERMODE
604 #undef RGB_XFERMODE 888 #undef RGB_XFERMODE
605 889
606 #endif//SkRasterPipeline_opts_DEFINED 890 #endif//SkRasterPipeline_opts_DEFINED
OLDNEW

Powered by Google App Engine
This is Rietveld 408576698