Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(5)

Side by Side Diff: src/core/SkColorSpaceXformOpts.h

Issue 2206403003: Optimize color xforms when src and dst are matching (Closed) Base URL: https://skia.googlesource.com/skia.git@master
Patch Set: Response to comments Created 4 years, 4 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
« no previous file with comments | « src/core/SkColorSpaceXform.cpp ('k') | no next file » | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
(Empty)
1 /*
2 * Copyright 2016 Google Inc.
3 *
4 * Use of this source code is governed by a BSD-style license that can be
5 * found in the LICENSE file.
6 */
7
8 #ifndef SkColorSpaceXformOpts_DEFINED
9 #define SkColorSpaceXformOpts_DEFINED
10
11 #include "SkNx.h"
12 #include "SkColorPriv.h"
13 #include "SkHalf.h"
14 #include "SkSRGB.h"
15 #include "SkTemplates.h"
16
17 enum SwapRB {
18 kNo_SwapRB,
19 kYes_SwapRB,
20 };
21
22 static inline void load_matrix(const float matrix[16],
23 Sk4f& rXgXbX, Sk4f& rYgYbY, Sk4f& rZgZbZ, Sk4f& r TgTbT) {
24 rXgXbX = Sk4f::Load(matrix + 0);
25 rYgYbY = Sk4f::Load(matrix + 4);
26 rZgZbZ = Sk4f::Load(matrix + 8);
27 rTgTbT = Sk4f::Load(matrix + 12);
28 }
29
30 static inline void load_rgb_from_tables(const uint32_t* src,
31 Sk4f& r, Sk4f& g, Sk4f& b, Sk4f& a,
32 const float* const srcTables[3]) {
33 r = { srcTables[0][(src[0] >> 0) & 0xFF],
34 srcTables[0][(src[1] >> 0) & 0xFF],
35 srcTables[0][(src[2] >> 0) & 0xFF],
36 srcTables[0][(src[3] >> 0) & 0xFF], };
37 g = { srcTables[1][(src[0] >> 8) & 0xFF],
38 srcTables[1][(src[1] >> 8) & 0xFF],
39 srcTables[1][(src[2] >> 8) & 0xFF],
40 srcTables[1][(src[3] >> 8) & 0xFF], };
41 b = { srcTables[2][(src[0] >> 16) & 0xFF],
42 srcTables[2][(src[1] >> 16) & 0xFF],
43 srcTables[2][(src[2] >> 16) & 0xFF],
44 srcTables[2][(src[3] >> 16) & 0xFF], };
45 a = 0.0f; // Don't let the compiler complain that |a| is uninitialized.
46 }
47
48 static inline void load_rgba_from_tables(const uint32_t* src,
49 Sk4f& r, Sk4f& g, Sk4f& b, Sk4f& a,
50 const float* const srcTables[3]) {
51 r = { srcTables[0][(src[0] >> 0) & 0xFF],
52 srcTables[0][(src[1] >> 0) & 0xFF],
53 srcTables[0][(src[2] >> 0) & 0xFF],
54 srcTables[0][(src[3] >> 0) & 0xFF], };
55 g = { srcTables[1][(src[0] >> 8) & 0xFF],
56 srcTables[1][(src[1] >> 8) & 0xFF],
57 srcTables[1][(src[2] >> 8) & 0xFF],
58 srcTables[1][(src[3] >> 8) & 0xFF], };
59 b = { srcTables[2][(src[0] >> 16) & 0xFF],
60 srcTables[2][(src[1] >> 16) & 0xFF],
61 srcTables[2][(src[2] >> 16) & 0xFF],
62 srcTables[2][(src[3] >> 16) & 0xFF], };
63 a = (1.0f / 255.0f) * SkNx_cast<float>(Sk4u::Load(src) >> 24);
64 }
65
66 static inline void load_rgb_from_tables_1(const uint32_t* src,
67 Sk4f& r, Sk4f& g, Sk4f& b, Sk4f&,
68 const float* const srcTables[3]) {
69 // Splat r,g,b across a register each.
70 r = Sk4f(srcTables[0][(*src >> 0) & 0xFF]);
71 g = Sk4f(srcTables[1][(*src >> 8) & 0xFF]);
72 b = Sk4f(srcTables[2][(*src >> 16) & 0xFF]);
73 }
74
75 static inline void load_rgba_from_tables_1(const uint32_t* src,
76 Sk4f& r, Sk4f& g, Sk4f& b, Sk4f& a,
77 const float* const srcTables[3]) {
78 // Splat r,g,b across a register each.
79 r = Sk4f(srcTables[0][(*src >> 0) & 0xFF]);
80 g = Sk4f(srcTables[1][(*src >> 8) & 0xFF]);
81 b = Sk4f(srcTables[2][(*src >> 16) & 0xFF]);
82 a = (1.0f / 255.0f) * Sk4f(*src >> 24);
83 }
84
85 static inline void transform_gamut(const Sk4f& r, const Sk4f& g, const Sk4f& b, const Sk4f& a,
86 const Sk4f& rXgXbX, const Sk4f& rYgYbY, const Sk4f& rZgZbZ,
87 Sk4f& dr, Sk4f& dg, Sk4f& db, Sk4f& da) {
88 dr = rXgXbX[0]*r + rYgYbY[0]*g + rZgZbZ[0]*b;
89 dg = rXgXbX[1]*r + rYgYbY[1]*g + rZgZbZ[1]*b;
90 db = rXgXbX[2]*r + rYgYbY[2]*g + rZgZbZ[2]*b;
91 da = a;
92 }
93
94 static inline void transform_gamut_1(const Sk4f& r, const Sk4f& g, const Sk4f& b ,
95 const Sk4f& rXgXbX, const Sk4f& rYgYbY, con st Sk4f& rZgZbZ,
96 Sk4f& rgba) {
97 rgba = rXgXbX*r + rYgYbY*g + rZgZbZ*b;
98 }
99
100 static inline void translate_gamut(const Sk4f& rTgTbT, Sk4f& dr, Sk4f& dg, Sk4f& db) {
101 dr = dr + rTgTbT[0];
102 dg = dg + rTgTbT[1];
103 db = db + rTgTbT[2];
104 }
105
106 static inline void translate_gamut_1(const Sk4f& rTgTbT, Sk4f& rgba) {
107 rgba = rgba + rTgTbT;
108 }
109
110 static inline void premultiply(Sk4f& dr, Sk4f& dg, Sk4f& db, const Sk4f& da) {
111 dr = da * dr;
112 dg = da * dg;
113 db = da * db;
114 }
115
116 static inline void premultiply_1(const Sk4f& a, Sk4f& rgba) {
117 rgba = a * rgba;
118 }
119
120 static inline void store_srgb(void* dst, const uint32_t* src,
121 Sk4f& dr, Sk4f& dg, Sk4f& db, Sk4f&,
122 const uint8_t* const[3], SwapRB kSwapRB) {
123 int kRShift = 0;
124 int kGShift = 8;
125 int kBShift = 16;
126 if (kYes_SwapRB == kSwapRB) {
127 kBShift = 0;
128 kRShift = 16;
129 }
130
131 dr = sk_linear_to_srgb_needs_trunc(dr);
132 dg = sk_linear_to_srgb_needs_trunc(dg);
133 db = sk_linear_to_srgb_needs_trunc(db);
134
135 dr = sk_clamp_0_255(dr);
136 dg = sk_clamp_0_255(dg);
137 db = sk_clamp_0_255(db);
138
139 Sk4i da = Sk4i::Load(src) & 0xFF000000;
140
141 Sk4i rgba = (SkNx_cast<int>(dr) << kRShift)
142 | (SkNx_cast<int>(dg) << kGShift)
143 | (SkNx_cast<int>(db) << kBShift)
144 | (da );
145 rgba.store(dst);
146 }
147
148 static inline void store_srgb_1(void* dst, const uint32_t* src,
149 Sk4f& rgba, const Sk4f&,
150 const uint8_t* const[3], SwapRB kSwapRB) {
151 rgba = sk_clamp_0_255(sk_linear_to_srgb_needs_trunc(rgba));
152
153 uint32_t tmp;
154 SkNx_cast<uint8_t>(SkNx_cast<int32_t>(rgba)).store(&tmp);
155 tmp = (*src & 0xFF000000) | (tmp & 0x00FFFFFF);
156 if (kYes_SwapRB == kSwapRB) {
157 tmp = SkSwizzle_RB(tmp);
158 }
159
160 *(uint32_t*)dst = tmp;
161 }
162
163 static inline Sk4f linear_to_2dot2(const Sk4f& x) {
164 // x^(29/64) is a very good approximation of the true value, x^(1/2.2).
165 auto x2 = x.rsqrt(), // x^(-1/2)
166 x32 = x2.rsqrt().rsqrt().rsqrt().rsqrt(), // x^(-1/32)
167 x64 = x32.rsqrt(); // x^(+1/64)
168
169 // 29 = 32 - 2 - 1
170 return 255.0f * x2.invert() * x32 * x64.invert();
171 }
172
173 static inline void store_2dot2(void* dst, const uint32_t* src,
174 Sk4f& dr, Sk4f& dg, Sk4f& db, Sk4f&,
175 const uint8_t* const[3], SwapRB kSwapRB) {
176 int kRShift = 0;
177 int kGShift = 8;
178 int kBShift = 16;
179 if (kYes_SwapRB == kSwapRB) {
180 kBShift = 0;
181 kRShift = 16;
182 }
183
184 dr = linear_to_2dot2(dr);
185 dg = linear_to_2dot2(dg);
186 db = linear_to_2dot2(db);
187
188 dr = sk_clamp_0_255(dr);
189 dg = sk_clamp_0_255(dg);
190 db = sk_clamp_0_255(db);
191
192 Sk4i da = Sk4i::Load(src) & 0xFF000000;
193
194 Sk4i rgba = (Sk4f_round(dr) << kRShift)
195 | (Sk4f_round(dg) << kGShift)
196 | (Sk4f_round(db) << kBShift)
197 | (da );
198 rgba.store(dst);
199 }
200
201 static inline void store_2dot2_1(void* dst, const uint32_t* src,
202 Sk4f& rgba, const Sk4f&,
203 const uint8_t* const[3], SwapRB kSwapRB) {
204 rgba = sk_clamp_0_255(linear_to_2dot2(rgba));
205
206 uint32_t tmp;
207 SkNx_cast<uint8_t>(Sk4f_round(rgba)).store(&tmp);
208 tmp = (*src & 0xFF000000) | (tmp & 0x00FFFFFF);
209 if (kYes_SwapRB == kSwapRB) {
210 tmp = SkSwizzle_RB(tmp);
211 }
212
213 *(uint32_t*)dst = tmp;
214 }
215
216 static inline void store_f16(void* dst, const uint32_t* src,
217 Sk4f& dr, Sk4f& dg, Sk4f& db, Sk4f& da,
218 const uint8_t* const[3], SwapRB) {
219 Sk4h_store4(dst, SkFloatToHalf_finite(dr),
220 SkFloatToHalf_finite(dg),
221 SkFloatToHalf_finite(db),
222 SkFloatToHalf_finite(da));
223 }
224
225 static inline void store_f16_1(void* dst, const uint32_t* src,
226 Sk4f& rgba, const Sk4f& a,
227 const uint8_t* const[3], SwapRB kSwapRB) {
228 rgba = Sk4f(rgba[0], rgba[1], rgba[2], a[3]);
229 SkFloatToHalf_finite(rgba).store((uint64_t*) dst);
230 }
231
232 static inline void store_f16_opaque(void* dst, const uint32_t* src,
233 Sk4f& dr, Sk4f& dg, Sk4f& db, Sk4f& da,
234 const uint8_t* const[3], SwapRB) {
235 Sk4h_store4(dst, SkFloatToHalf_finite(dr),
236 SkFloatToHalf_finite(dg),
237 SkFloatToHalf_finite(db),
238 SK_Half1);
239 }
240
241 static inline void store_f16_1_opaque(void* dst, const uint32_t* src,
242 Sk4f& rgba, const Sk4f& a,
243 const uint8_t* const[3], SwapRB kSwapRB) {
244 uint64_t tmp;
245 SkFloatToHalf_finite(rgba).store(&tmp);
246 tmp |= static_cast<uint64_t>(SK_Half1) << 48;
247 *((uint64_t*) dst) = tmp;
248 }
249
250 static inline void store_generic(void* dst, const uint32_t* src,
251 Sk4f& dr, Sk4f& dg, Sk4f& db, Sk4f&,
252 const uint8_t* const dstTables[3], SwapRB kSwap RB) {
253 int kRShift = 0;
254 int kGShift = 8;
255 int kBShift = 16;
256 if (kYes_SwapRB == kSwapRB) {
257 kBShift = 0;
258 kRShift = 16;
259 }
260
261 dr = Sk4f::Min(Sk4f::Max(1023.0f * dr, 0.0f), 1023.0f);
262 dg = Sk4f::Min(Sk4f::Max(1023.0f * dg, 0.0f), 1023.0f);
263 db = Sk4f::Min(Sk4f::Max(1023.0f * db, 0.0f), 1023.0f);
264
265 Sk4i ir = Sk4f_round(dr);
266 Sk4i ig = Sk4f_round(dg);
267 Sk4i ib = Sk4f_round(db);
268
269 Sk4i da = Sk4i::Load(src) & 0xFF000000;
270
271 uint32_t* dst32 = (uint32_t*) dst;
272 dst32[0] = dstTables[0][ir[0]] << kRShift
273 | dstTables[1][ig[0]] << kGShift
274 | dstTables[2][ib[0]] << kBShift
275 | da[0];
276 dst32[1] = dstTables[0][ir[1]] << kRShift
277 | dstTables[1][ig[1]] << kGShift
278 | dstTables[2][ib[1]] << kBShift
279 | da[1];
280 dst32[2] = dstTables[0][ir[2]] << kRShift
281 | dstTables[1][ig[2]] << kGShift
282 | dstTables[2][ib[2]] << kBShift
283 | da[2];
284 dst32[3] = dstTables[0][ir[3]] << kRShift
285 | dstTables[1][ig[3]] << kGShift
286 | dstTables[2][ib[3]] << kBShift
287 | da[3];
288 }
289
290 static inline void store_generic_1(void* dst, const uint32_t* src,
291 Sk4f& rgba, const Sk4f&,
292 const uint8_t* const dstTables[3], SwapRB kSw apRB) {
293 rgba = Sk4f::Min(Sk4f::Max(1023.0f * rgba, 0.0f), 1023.0f);
294
295 Sk4i indices = Sk4f_round(rgba);
296
297 *((uint32_t*) dst) = dstTables[0][indices[0]] << 0
298 | dstTables[1][indices[1]] << 8
299 | dstTables[2][indices[2]] << 16
300 | (*src & 0xFF000000);
301 }
302
303 template <SkColorSpace::GammaNamed kDstGamma, SkAlphaType kAlphaType, SwapRB kSw apRB>
304 static void color_xform_RGBA(void* dst, const uint32_t* src, int len,
305 const float* const srcTables[3], const float matrix [16],
306 const uint8_t* const dstTables[3]) {
307 decltype(store_srgb )* store;
308 decltype(store_srgb_1 )* store_1;
309 decltype(load_rgb_from_tables )* load;
310 decltype(load_rgb_from_tables_1)* load_1;
311 size_t sizeOfDstPixel;
312 switch (kDstGamma) {
313 case SkColorSpace::kSRGB_GammaNamed:
314 load = (kPremul_SkAlphaType == kAlphaType) ? load_rgba_from_table s :
315 load_rgb_from_tables ;
316 load_1 = (kPremul_SkAlphaType == kAlphaType) ? load_rgba_from_table s_1 :
317 load_rgb_from_tables _1;
318 store = store_srgb;
319 store_1 = store_srgb_1;
320 sizeOfDstPixel = 4;
321 break;
322 case SkColorSpace::k2Dot2Curve_GammaNamed:
323 load = (kPremul_SkAlphaType == kAlphaType) ? load_rgba_from_table s :
324 load_rgb_from_tables ;
325 load_1 = (kPremul_SkAlphaType == kAlphaType) ? load_rgba_from_table s_1 :
326 load_rgb_from_tables _1;
327 store = store_2dot2;
328 store_1 = store_2dot2_1;
329 sizeOfDstPixel = 4;
330 break;
331 case SkColorSpace::kLinear_GammaNamed:
332 load = load_rgba_from_tables;
333 load_1 = load_rgba_from_tables_1;
334 store = (kOpaque_SkAlphaType == kAlphaType) ? store_f16_opaque :
335 store_f16;
336 store_1 = (kOpaque_SkAlphaType == kAlphaType) ? store_f16_1_opaque :
337 store_f16_1;
338 sizeOfDstPixel = 8;
339 break;
340 case SkColorSpace::kNonStandard_GammaNamed:
341 load = (kPremul_SkAlphaType == kAlphaType) ? load_rgba_from_table s :
342 load_rgb_from_tables ;
343 load_1 = (kPremul_SkAlphaType == kAlphaType) ? load_rgba_from_table s_1 :
344 load_rgb_from_tables _1;
345 store = store_generic;
346 store_1 = store_generic_1;
347 sizeOfDstPixel = 4;
348 break;
349 }
350
351 Sk4f rXgXbX, rYgYbY, rZgZbZ, rTgTbT;
352 load_matrix(matrix, rXgXbX, rYgYbY, rZgZbZ, rTgTbT);
353
354 if (len >= 4) {
355 // Naively this would be a loop of load-transform-store, but we found it faster to
356 // move the N+1th load ahead of the Nth store. We don't bother doing th is for N<4.
357 Sk4f r, g, b, a;
358 load(src, r, g, b, a, srcTables);
359 src += 4;
360 len -= 4;
361
362 Sk4f dr, dg, db, da;
363 while (len >= 4) {
364 transform_gamut(r, g, b, a, rXgXbX, rYgYbY, rZgZbZ, dr, dg, db, da);
365 translate_gamut(rTgTbT, dr, dg, db);
366
367 if (kPremul_SkAlphaType == kAlphaType) {
368 premultiply(dr, dg, db, da);
369 }
370
371 load(src, r, g, b, a, srcTables);
372
373 store(dst, src - 4, dr, dg, db, da, dstTables, kSwapRB);
374 dst = SkTAddOffset<void>(dst, 4 * sizeOfDstPixel);
375 src += 4;
376 len -= 4;
377 }
378
379 transform_gamut(r, g, b, a, rXgXbX, rYgYbY, rZgZbZ, dr, dg, db, da);
380 translate_gamut(rTgTbT, dr, dg, db);
381
382 if (kPremul_SkAlphaType == kAlphaType) {
383 premultiply(dr, dg, db, da);
384 }
385
386 store(dst, src - 4, dr, dg, db, da, dstTables, kSwapRB);
387 dst = SkTAddOffset<void>(dst, 4 * sizeOfDstPixel);
388 }
389
390 while (len > 0) {
391 Sk4f r, g, b, a;
392 load_1(src, r, g, b, a, srcTables);
393
394 Sk4f rgba;
395 transform_gamut_1(r, g, b, rXgXbX, rYgYbY, rZgZbZ, rgba);
396 translate_gamut_1(rTgTbT, rgba);
397
398 store_1(dst, src, rgba, a, dstTables, kSwapRB);
399
400 src += 1;
401 len -= 1;
402 dst = SkTAddOffset<void>(dst, sizeOfDstPixel);
403 }
404 }
405
406 #endif // SkColorSpaceXformOpts_DEFINED
OLDNEW
« no previous file with comments | « src/core/SkColorSpaceXform.cpp ('k') | no next file » | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698