OLD | NEW |
| (Empty) |
1 /* | |
2 * Copyright 2016 Google Inc. | |
3 * | |
4 * Use of this source code is governed by a BSD-style license that can be | |
5 * found in the LICENSE file. | |
6 */ | |
7 | |
8 #ifndef SkColorSpaceXformOpts_DEFINED | |
9 #define SkColorSpaceXformOpts_DEFINED | |
10 | |
11 #include "SkNx.h" | |
12 #include "SkColorPriv.h" | |
13 #include "SkHalf.h" | |
14 #include "SkSRGB.h" | |
15 #include "SkTemplates.h" | |
16 | |
17 enum SwapRB { | |
18 kNo_SwapRB, | |
19 kYes_SwapRB, | |
20 }; | |
21 | |
22 static inline void load_matrix(const float matrix[16], | |
23 Sk4f& rXgXbX, Sk4f& rYgYbY, Sk4f& rZgZbZ, Sk4f& r
TgTbT) { | |
24 rXgXbX = Sk4f::Load(matrix + 0); | |
25 rYgYbY = Sk4f::Load(matrix + 4); | |
26 rZgZbZ = Sk4f::Load(matrix + 8); | |
27 rTgTbT = Sk4f::Load(matrix + 12); | |
28 } | |
29 | |
30 static inline void load_rgb_from_tables(const uint32_t* src, | |
31 Sk4f& r, Sk4f& g, Sk4f& b, Sk4f& a, | |
32 const float* const srcTables[3]) { | |
33 r = { srcTables[0][(src[0] >> 0) & 0xFF], | |
34 srcTables[0][(src[1] >> 0) & 0xFF], | |
35 srcTables[0][(src[2] >> 0) & 0xFF], | |
36 srcTables[0][(src[3] >> 0) & 0xFF], }; | |
37 g = { srcTables[1][(src[0] >> 8) & 0xFF], | |
38 srcTables[1][(src[1] >> 8) & 0xFF], | |
39 srcTables[1][(src[2] >> 8) & 0xFF], | |
40 srcTables[1][(src[3] >> 8) & 0xFF], }; | |
41 b = { srcTables[2][(src[0] >> 16) & 0xFF], | |
42 srcTables[2][(src[1] >> 16) & 0xFF], | |
43 srcTables[2][(src[2] >> 16) & 0xFF], | |
44 srcTables[2][(src[3] >> 16) & 0xFF], }; | |
45 a = 0.0f; // Don't let the compiler complain that |a| is uninitialized. | |
46 } | |
47 | |
48 static inline void load_rgba_from_tables(const uint32_t* src, | |
49 Sk4f& r, Sk4f& g, Sk4f& b, Sk4f& a, | |
50 const float* const srcTables[3]) { | |
51 r = { srcTables[0][(src[0] >> 0) & 0xFF], | |
52 srcTables[0][(src[1] >> 0) & 0xFF], | |
53 srcTables[0][(src[2] >> 0) & 0xFF], | |
54 srcTables[0][(src[3] >> 0) & 0xFF], }; | |
55 g = { srcTables[1][(src[0] >> 8) & 0xFF], | |
56 srcTables[1][(src[1] >> 8) & 0xFF], | |
57 srcTables[1][(src[2] >> 8) & 0xFF], | |
58 srcTables[1][(src[3] >> 8) & 0xFF], }; | |
59 b = { srcTables[2][(src[0] >> 16) & 0xFF], | |
60 srcTables[2][(src[1] >> 16) & 0xFF], | |
61 srcTables[2][(src[2] >> 16) & 0xFF], | |
62 srcTables[2][(src[3] >> 16) & 0xFF], }; | |
63 a = (1.0f / 255.0f) * SkNx_cast<float>(Sk4u::Load(src) >> 24); | |
64 } | |
65 | |
66 static inline void load_rgb_from_tables_1(const uint32_t* src, | |
67 Sk4f& r, Sk4f& g, Sk4f& b, Sk4f&, | |
68 const float* const srcTables[3]) { | |
69 // Splat r,g,b across a register each. | |
70 r = Sk4f(srcTables[0][(*src >> 0) & 0xFF]); | |
71 g = Sk4f(srcTables[1][(*src >> 8) & 0xFF]); | |
72 b = Sk4f(srcTables[2][(*src >> 16) & 0xFF]); | |
73 } | |
74 | |
75 static inline void load_rgba_from_tables_1(const uint32_t* src, | |
76 Sk4f& r, Sk4f& g, Sk4f& b, Sk4f& a, | |
77 const float* const srcTables[3]) { | |
78 // Splat r,g,b across a register each. | |
79 r = Sk4f(srcTables[0][(*src >> 0) & 0xFF]); | |
80 g = Sk4f(srcTables[1][(*src >> 8) & 0xFF]); | |
81 b = Sk4f(srcTables[2][(*src >> 16) & 0xFF]); | |
82 a = (1.0f / 255.0f) * Sk4f(*src >> 24); | |
83 } | |
84 | |
85 static inline void transform_gamut(const Sk4f& r, const Sk4f& g, const Sk4f& b,
const Sk4f& a, | |
86 const Sk4f& rXgXbX, const Sk4f& rYgYbY, const
Sk4f& rZgZbZ, | |
87 Sk4f& dr, Sk4f& dg, Sk4f& db, Sk4f& da) { | |
88 dr = rXgXbX[0]*r + rYgYbY[0]*g + rZgZbZ[0]*b; | |
89 dg = rXgXbX[1]*r + rYgYbY[1]*g + rZgZbZ[1]*b; | |
90 db = rXgXbX[2]*r + rYgYbY[2]*g + rZgZbZ[2]*b; | |
91 da = a; | |
92 } | |
93 | |
94 static inline void transform_gamut_1(const Sk4f& r, const Sk4f& g, const Sk4f& b
, | |
95 const Sk4f& rXgXbX, const Sk4f& rYgYbY, con
st Sk4f& rZgZbZ, | |
96 Sk4f& rgba) { | |
97 rgba = rXgXbX*r + rYgYbY*g + rZgZbZ*b; | |
98 } | |
99 | |
100 static inline void translate_gamut(const Sk4f& rTgTbT, Sk4f& dr, Sk4f& dg, Sk4f&
db) { | |
101 dr = dr + rTgTbT[0]; | |
102 dg = dg + rTgTbT[1]; | |
103 db = db + rTgTbT[2]; | |
104 } | |
105 | |
106 static inline void translate_gamut_1(const Sk4f& rTgTbT, Sk4f& rgba) { | |
107 rgba = rgba + rTgTbT; | |
108 } | |
109 | |
110 static inline void premultiply(Sk4f& dr, Sk4f& dg, Sk4f& db, const Sk4f& da) { | |
111 dr = da * dr; | |
112 dg = da * dg; | |
113 db = da * db; | |
114 } | |
115 | |
116 static inline void premultiply_1(const Sk4f& a, Sk4f& rgba) { | |
117 rgba = a * rgba; | |
118 } | |
119 | |
120 static inline void store_srgb(void* dst, const uint32_t* src, | |
121 Sk4f& dr, Sk4f& dg, Sk4f& db, Sk4f&, | |
122 const uint8_t* const[3], SwapRB kSwapRB) { | |
123 int kRShift = 0; | |
124 int kGShift = 8; | |
125 int kBShift = 16; | |
126 if (kYes_SwapRB == kSwapRB) { | |
127 kBShift = 0; | |
128 kRShift = 16; | |
129 } | |
130 | |
131 dr = sk_linear_to_srgb_needs_trunc(dr); | |
132 dg = sk_linear_to_srgb_needs_trunc(dg); | |
133 db = sk_linear_to_srgb_needs_trunc(db); | |
134 | |
135 dr = sk_clamp_0_255(dr); | |
136 dg = sk_clamp_0_255(dg); | |
137 db = sk_clamp_0_255(db); | |
138 | |
139 Sk4i da = Sk4i::Load(src) & 0xFF000000; | |
140 | |
141 Sk4i rgba = (SkNx_cast<int>(dr) << kRShift) | |
142 | (SkNx_cast<int>(dg) << kGShift) | |
143 | (SkNx_cast<int>(db) << kBShift) | |
144 | (da ); | |
145 rgba.store(dst); | |
146 } | |
147 | |
148 static inline void store_srgb_1(void* dst, const uint32_t* src, | |
149 Sk4f& rgba, const Sk4f&, | |
150 const uint8_t* const[3], SwapRB kSwapRB) { | |
151 rgba = sk_clamp_0_255(sk_linear_to_srgb_needs_trunc(rgba)); | |
152 | |
153 uint32_t tmp; | |
154 SkNx_cast<uint8_t>(SkNx_cast<int32_t>(rgba)).store(&tmp); | |
155 tmp = (*src & 0xFF000000) | (tmp & 0x00FFFFFF); | |
156 if (kYes_SwapRB == kSwapRB) { | |
157 tmp = SkSwizzle_RB(tmp); | |
158 } | |
159 | |
160 *(uint32_t*)dst = tmp; | |
161 } | |
162 | |
163 static inline Sk4f linear_to_2dot2(const Sk4f& x) { | |
164 // x^(29/64) is a very good approximation of the true value, x^(1/2.2). | |
165 auto x2 = x.rsqrt(), // x^(-1/2) | |
166 x32 = x2.rsqrt().rsqrt().rsqrt().rsqrt(), // x^(-1/32) | |
167 x64 = x32.rsqrt(); // x^(+1/64) | |
168 | |
169 // 29 = 32 - 2 - 1 | |
170 return 255.0f * x2.invert() * x32 * x64.invert(); | |
171 } | |
172 | |
173 static inline void store_2dot2(void* dst, const uint32_t* src, | |
174 Sk4f& dr, Sk4f& dg, Sk4f& db, Sk4f&, | |
175 const uint8_t* const[3], SwapRB kSwapRB) { | |
176 int kRShift = 0; | |
177 int kGShift = 8; | |
178 int kBShift = 16; | |
179 if (kYes_SwapRB == kSwapRB) { | |
180 kBShift = 0; | |
181 kRShift = 16; | |
182 } | |
183 | |
184 dr = linear_to_2dot2(dr); | |
185 dg = linear_to_2dot2(dg); | |
186 db = linear_to_2dot2(db); | |
187 | |
188 dr = sk_clamp_0_255(dr); | |
189 dg = sk_clamp_0_255(dg); | |
190 db = sk_clamp_0_255(db); | |
191 | |
192 Sk4i da = Sk4i::Load(src) & 0xFF000000; | |
193 | |
194 Sk4i rgba = (Sk4f_round(dr) << kRShift) | |
195 | (Sk4f_round(dg) << kGShift) | |
196 | (Sk4f_round(db) << kBShift) | |
197 | (da ); | |
198 rgba.store(dst); | |
199 } | |
200 | |
201 static inline void store_2dot2_1(void* dst, const uint32_t* src, | |
202 Sk4f& rgba, const Sk4f&, | |
203 const uint8_t* const[3], SwapRB kSwapRB) { | |
204 rgba = sk_clamp_0_255(linear_to_2dot2(rgba)); | |
205 | |
206 uint32_t tmp; | |
207 SkNx_cast<uint8_t>(Sk4f_round(rgba)).store(&tmp); | |
208 tmp = (*src & 0xFF000000) | (tmp & 0x00FFFFFF); | |
209 if (kYes_SwapRB == kSwapRB) { | |
210 tmp = SkSwizzle_RB(tmp); | |
211 } | |
212 | |
213 *(uint32_t*)dst = tmp; | |
214 } | |
215 | |
216 static inline void store_f16(void* dst, const uint32_t* src, | |
217 Sk4f& dr, Sk4f& dg, Sk4f& db, Sk4f& da, | |
218 const uint8_t* const[3], SwapRB) { | |
219 Sk4h_store4(dst, SkFloatToHalf_finite(dr), | |
220 SkFloatToHalf_finite(dg), | |
221 SkFloatToHalf_finite(db), | |
222 SkFloatToHalf_finite(da)); | |
223 } | |
224 | |
225 static inline void store_f16_1(void* dst, const uint32_t* src, | |
226 Sk4f& rgba, const Sk4f& a, | |
227 const uint8_t* const[3], SwapRB kSwapRB) { | |
228 rgba = Sk4f(rgba[0], rgba[1], rgba[2], a[3]); | |
229 SkFloatToHalf_finite(rgba).store((uint64_t*) dst); | |
230 } | |
231 | |
232 static inline void store_f16_opaque(void* dst, const uint32_t* src, | |
233 Sk4f& dr, Sk4f& dg, Sk4f& db, Sk4f& da, | |
234 const uint8_t* const[3], SwapRB) { | |
235 Sk4h_store4(dst, SkFloatToHalf_finite(dr), | |
236 SkFloatToHalf_finite(dg), | |
237 SkFloatToHalf_finite(db), | |
238 SK_Half1); | |
239 } | |
240 | |
241 static inline void store_f16_1_opaque(void* dst, const uint32_t* src, | |
242 Sk4f& rgba, const Sk4f& a, | |
243 const uint8_t* const[3], SwapRB kSwapRB) { | |
244 uint64_t tmp; | |
245 SkFloatToHalf_finite(rgba).store(&tmp); | |
246 tmp |= static_cast<uint64_t>(SK_Half1) << 48; | |
247 *((uint64_t*) dst) = tmp; | |
248 } | |
249 | |
250 static inline void store_generic(void* dst, const uint32_t* src, | |
251 Sk4f& dr, Sk4f& dg, Sk4f& db, Sk4f&, | |
252 const uint8_t* const dstTables[3], SwapRB kSwap
RB) { | |
253 int kRShift = 0; | |
254 int kGShift = 8; | |
255 int kBShift = 16; | |
256 if (kYes_SwapRB == kSwapRB) { | |
257 kBShift = 0; | |
258 kRShift = 16; | |
259 } | |
260 | |
261 dr = Sk4f::Min(Sk4f::Max(1023.0f * dr, 0.0f), 1023.0f); | |
262 dg = Sk4f::Min(Sk4f::Max(1023.0f * dg, 0.0f), 1023.0f); | |
263 db = Sk4f::Min(Sk4f::Max(1023.0f * db, 0.0f), 1023.0f); | |
264 | |
265 Sk4i ir = Sk4f_round(dr); | |
266 Sk4i ig = Sk4f_round(dg); | |
267 Sk4i ib = Sk4f_round(db); | |
268 | |
269 Sk4i da = Sk4i::Load(src) & 0xFF000000; | |
270 | |
271 uint32_t* dst32 = (uint32_t*) dst; | |
272 dst32[0] = dstTables[0][ir[0]] << kRShift | |
273 | dstTables[1][ig[0]] << kGShift | |
274 | dstTables[2][ib[0]] << kBShift | |
275 | da[0]; | |
276 dst32[1] = dstTables[0][ir[1]] << kRShift | |
277 | dstTables[1][ig[1]] << kGShift | |
278 | dstTables[2][ib[1]] << kBShift | |
279 | da[1]; | |
280 dst32[2] = dstTables[0][ir[2]] << kRShift | |
281 | dstTables[1][ig[2]] << kGShift | |
282 | dstTables[2][ib[2]] << kBShift | |
283 | da[2]; | |
284 dst32[3] = dstTables[0][ir[3]] << kRShift | |
285 | dstTables[1][ig[3]] << kGShift | |
286 | dstTables[2][ib[3]] << kBShift | |
287 | da[3]; | |
288 } | |
289 | |
290 static inline void store_generic_1(void* dst, const uint32_t* src, | |
291 Sk4f& rgba, const Sk4f&, | |
292 const uint8_t* const dstTables[3], SwapRB kSw
apRB) { | |
293 rgba = Sk4f::Min(Sk4f::Max(1023.0f * rgba, 0.0f), 1023.0f); | |
294 | |
295 Sk4i indices = Sk4f_round(rgba); | |
296 | |
297 *((uint32_t*) dst) = dstTables[0][indices[0]] << 0 | |
298 | dstTables[1][indices[1]] << 8 | |
299 | dstTables[2][indices[2]] << 16 | |
300 | (*src & 0xFF000000); | |
301 } | |
302 | |
303 template <SkColorSpace::GammaNamed kDstGamma, SkAlphaType kAlphaType, SwapRB kSw
apRB> | |
304 static void color_xform_RGBA(void* dst, const uint32_t* src, int len, | |
305 const float* const srcTables[3], const float matrix
[16], | |
306 const uint8_t* const dstTables[3]) { | |
307 decltype(store_srgb )* store; | |
308 decltype(store_srgb_1 )* store_1; | |
309 decltype(load_rgb_from_tables )* load; | |
310 decltype(load_rgb_from_tables_1)* load_1; | |
311 size_t sizeOfDstPixel; | |
312 switch (kDstGamma) { | |
313 case SkColorSpace::kSRGB_GammaNamed: | |
314 load = (kPremul_SkAlphaType == kAlphaType) ? load_rgba_from_table
s : | |
315 load_rgb_from_tables
; | |
316 load_1 = (kPremul_SkAlphaType == kAlphaType) ? load_rgba_from_table
s_1 : | |
317 load_rgb_from_tables
_1; | |
318 store = store_srgb; | |
319 store_1 = store_srgb_1; | |
320 sizeOfDstPixel = 4; | |
321 break; | |
322 case SkColorSpace::k2Dot2Curve_GammaNamed: | |
323 load = (kPremul_SkAlphaType == kAlphaType) ? load_rgba_from_table
s : | |
324 load_rgb_from_tables
; | |
325 load_1 = (kPremul_SkAlphaType == kAlphaType) ? load_rgba_from_table
s_1 : | |
326 load_rgb_from_tables
_1; | |
327 store = store_2dot2; | |
328 store_1 = store_2dot2_1; | |
329 sizeOfDstPixel = 4; | |
330 break; | |
331 case SkColorSpace::kLinear_GammaNamed: | |
332 load = load_rgba_from_tables; | |
333 load_1 = load_rgba_from_tables_1; | |
334 store = (kOpaque_SkAlphaType == kAlphaType) ? store_f16_opaque : | |
335 store_f16; | |
336 store_1 = (kOpaque_SkAlphaType == kAlphaType) ? store_f16_1_opaque : | |
337 store_f16_1; | |
338 sizeOfDstPixel = 8; | |
339 break; | |
340 case SkColorSpace::kNonStandard_GammaNamed: | |
341 load = (kPremul_SkAlphaType == kAlphaType) ? load_rgba_from_table
s : | |
342 load_rgb_from_tables
; | |
343 load_1 = (kPremul_SkAlphaType == kAlphaType) ? load_rgba_from_table
s_1 : | |
344 load_rgb_from_tables
_1; | |
345 store = store_generic; | |
346 store_1 = store_generic_1; | |
347 sizeOfDstPixel = 4; | |
348 break; | |
349 } | |
350 | |
351 Sk4f rXgXbX, rYgYbY, rZgZbZ, rTgTbT; | |
352 load_matrix(matrix, rXgXbX, rYgYbY, rZgZbZ, rTgTbT); | |
353 | |
354 if (len >= 4) { | |
355 // Naively this would be a loop of load-transform-store, but we found it
faster to | |
356 // move the N+1th load ahead of the Nth store. We don't bother doing th
is for N<4. | |
357 Sk4f r, g, b, a; | |
358 load(src, r, g, b, a, srcTables); | |
359 src += 4; | |
360 len -= 4; | |
361 | |
362 Sk4f dr, dg, db, da; | |
363 while (len >= 4) { | |
364 transform_gamut(r, g, b, a, rXgXbX, rYgYbY, rZgZbZ, dr, dg, db, da); | |
365 translate_gamut(rTgTbT, dr, dg, db); | |
366 | |
367 if (kPremul_SkAlphaType == kAlphaType) { | |
368 premultiply(dr, dg, db, da); | |
369 } | |
370 | |
371 load(src, r, g, b, a, srcTables); | |
372 | |
373 store(dst, src - 4, dr, dg, db, da, dstTables, kSwapRB); | |
374 dst = SkTAddOffset<void>(dst, 4 * sizeOfDstPixel); | |
375 src += 4; | |
376 len -= 4; | |
377 } | |
378 | |
379 transform_gamut(r, g, b, a, rXgXbX, rYgYbY, rZgZbZ, dr, dg, db, da); | |
380 translate_gamut(rTgTbT, dr, dg, db); | |
381 | |
382 if (kPremul_SkAlphaType == kAlphaType) { | |
383 premultiply(dr, dg, db, da); | |
384 } | |
385 | |
386 store(dst, src - 4, dr, dg, db, da, dstTables, kSwapRB); | |
387 dst = SkTAddOffset<void>(dst, 4 * sizeOfDstPixel); | |
388 } | |
389 | |
390 while (len > 0) { | |
391 Sk4f r, g, b, a; | |
392 load_1(src, r, g, b, a, srcTables); | |
393 | |
394 Sk4f rgba; | |
395 transform_gamut_1(r, g, b, rXgXbX, rYgYbY, rZgZbZ, rgba); | |
396 translate_gamut_1(rTgTbT, rgba); | |
397 | |
398 store_1(dst, src, rgba, a, dstTables, kSwapRB); | |
399 | |
400 src += 1; | |
401 len -= 1; | |
402 dst = SkTAddOffset<void>(dst, sizeOfDstPixel); | |
403 } | |
404 } | |
405 | |
406 #endif // SkColorSpaceXformOpts_DEFINED | |
OLD | NEW |