| OLD | NEW |
| (Empty) |
| 1 /* | |
| 2 * Copyright 2016 Google Inc. | |
| 3 * | |
| 4 * Use of this source code is governed by a BSD-style license that can be | |
| 5 * found in the LICENSE file. | |
| 6 */ | |
| 7 | |
| 8 #ifndef SkColorSpaceXformOpts_DEFINED | |
| 9 #define SkColorSpaceXformOpts_DEFINED | |
| 10 | |
| 11 #include "SkNx.h" | |
| 12 #include "SkColorPriv.h" | |
| 13 #include "SkHalf.h" | |
| 14 #include "SkSRGB.h" | |
| 15 #include "SkTemplates.h" | |
| 16 | |
| 17 enum SwapRB { | |
| 18 kNo_SwapRB, | |
| 19 kYes_SwapRB, | |
| 20 }; | |
| 21 | |
| 22 static inline void load_matrix(const float matrix[16], | |
| 23 Sk4f& rXgXbX, Sk4f& rYgYbY, Sk4f& rZgZbZ, Sk4f& r
TgTbT) { | |
| 24 rXgXbX = Sk4f::Load(matrix + 0); | |
| 25 rYgYbY = Sk4f::Load(matrix + 4); | |
| 26 rZgZbZ = Sk4f::Load(matrix + 8); | |
| 27 rTgTbT = Sk4f::Load(matrix + 12); | |
| 28 } | |
| 29 | |
| 30 static inline void load_rgb_from_tables(const uint32_t* src, | |
| 31 Sk4f& r, Sk4f& g, Sk4f& b, Sk4f& a, | |
| 32 const float* const srcTables[3]) { | |
| 33 r = { srcTables[0][(src[0] >> 0) & 0xFF], | |
| 34 srcTables[0][(src[1] >> 0) & 0xFF], | |
| 35 srcTables[0][(src[2] >> 0) & 0xFF], | |
| 36 srcTables[0][(src[3] >> 0) & 0xFF], }; | |
| 37 g = { srcTables[1][(src[0] >> 8) & 0xFF], | |
| 38 srcTables[1][(src[1] >> 8) & 0xFF], | |
| 39 srcTables[1][(src[2] >> 8) & 0xFF], | |
| 40 srcTables[1][(src[3] >> 8) & 0xFF], }; | |
| 41 b = { srcTables[2][(src[0] >> 16) & 0xFF], | |
| 42 srcTables[2][(src[1] >> 16) & 0xFF], | |
| 43 srcTables[2][(src[2] >> 16) & 0xFF], | |
| 44 srcTables[2][(src[3] >> 16) & 0xFF], }; | |
| 45 a = 0.0f; // Don't let the compiler complain that |a| is uninitialized. | |
| 46 } | |
| 47 | |
| 48 static inline void load_rgba_from_tables(const uint32_t* src, | |
| 49 Sk4f& r, Sk4f& g, Sk4f& b, Sk4f& a, | |
| 50 const float* const srcTables[3]) { | |
| 51 r = { srcTables[0][(src[0] >> 0) & 0xFF], | |
| 52 srcTables[0][(src[1] >> 0) & 0xFF], | |
| 53 srcTables[0][(src[2] >> 0) & 0xFF], | |
| 54 srcTables[0][(src[3] >> 0) & 0xFF], }; | |
| 55 g = { srcTables[1][(src[0] >> 8) & 0xFF], | |
| 56 srcTables[1][(src[1] >> 8) & 0xFF], | |
| 57 srcTables[1][(src[2] >> 8) & 0xFF], | |
| 58 srcTables[1][(src[3] >> 8) & 0xFF], }; | |
| 59 b = { srcTables[2][(src[0] >> 16) & 0xFF], | |
| 60 srcTables[2][(src[1] >> 16) & 0xFF], | |
| 61 srcTables[2][(src[2] >> 16) & 0xFF], | |
| 62 srcTables[2][(src[3] >> 16) & 0xFF], }; | |
| 63 a = (1.0f / 255.0f) * SkNx_cast<float>(Sk4u::Load(src) >> 24); | |
| 64 } | |
| 65 | |
| 66 static inline void load_rgb_from_tables_1(const uint32_t* src, | |
| 67 Sk4f& r, Sk4f& g, Sk4f& b, Sk4f&, | |
| 68 const float* const srcTables[3]) { | |
| 69 // Splat r,g,b across a register each. | |
| 70 r = Sk4f(srcTables[0][(*src >> 0) & 0xFF]); | |
| 71 g = Sk4f(srcTables[1][(*src >> 8) & 0xFF]); | |
| 72 b = Sk4f(srcTables[2][(*src >> 16) & 0xFF]); | |
| 73 } | |
| 74 | |
| 75 static inline void load_rgba_from_tables_1(const uint32_t* src, | |
| 76 Sk4f& r, Sk4f& g, Sk4f& b, Sk4f& a, | |
| 77 const float* const srcTables[3]) { | |
| 78 // Splat r,g,b across a register each. | |
| 79 r = Sk4f(srcTables[0][(*src >> 0) & 0xFF]); | |
| 80 g = Sk4f(srcTables[1][(*src >> 8) & 0xFF]); | |
| 81 b = Sk4f(srcTables[2][(*src >> 16) & 0xFF]); | |
| 82 a = (1.0f / 255.0f) * Sk4f(*src >> 24); | |
| 83 } | |
| 84 | |
| 85 static inline void transform_gamut(const Sk4f& r, const Sk4f& g, const Sk4f& b,
const Sk4f& a, | |
| 86 const Sk4f& rXgXbX, const Sk4f& rYgYbY, const
Sk4f& rZgZbZ, | |
| 87 Sk4f& dr, Sk4f& dg, Sk4f& db, Sk4f& da) { | |
| 88 dr = rXgXbX[0]*r + rYgYbY[0]*g + rZgZbZ[0]*b; | |
| 89 dg = rXgXbX[1]*r + rYgYbY[1]*g + rZgZbZ[1]*b; | |
| 90 db = rXgXbX[2]*r + rYgYbY[2]*g + rZgZbZ[2]*b; | |
| 91 da = a; | |
| 92 } | |
| 93 | |
| 94 static inline void transform_gamut_1(const Sk4f& r, const Sk4f& g, const Sk4f& b
, | |
| 95 const Sk4f& rXgXbX, const Sk4f& rYgYbY, con
st Sk4f& rZgZbZ, | |
| 96 Sk4f& rgba) { | |
| 97 rgba = rXgXbX*r + rYgYbY*g + rZgZbZ*b; | |
| 98 } | |
| 99 | |
| 100 static inline void translate_gamut(const Sk4f& rTgTbT, Sk4f& dr, Sk4f& dg, Sk4f&
db) { | |
| 101 dr = dr + rTgTbT[0]; | |
| 102 dg = dg + rTgTbT[1]; | |
| 103 db = db + rTgTbT[2]; | |
| 104 } | |
| 105 | |
| 106 static inline void translate_gamut_1(const Sk4f& rTgTbT, Sk4f& rgba) { | |
| 107 rgba = rgba + rTgTbT; | |
| 108 } | |
| 109 | |
| 110 static inline void premultiply(Sk4f& dr, Sk4f& dg, Sk4f& db, const Sk4f& da) { | |
| 111 dr = da * dr; | |
| 112 dg = da * dg; | |
| 113 db = da * db; | |
| 114 } | |
| 115 | |
| 116 static inline void premultiply_1(const Sk4f& a, Sk4f& rgba) { | |
| 117 rgba = a * rgba; | |
| 118 } | |
| 119 | |
| 120 static inline void store_srgb(void* dst, const uint32_t* src, | |
| 121 Sk4f& dr, Sk4f& dg, Sk4f& db, Sk4f&, | |
| 122 const uint8_t* const[3], SwapRB kSwapRB) { | |
| 123 int kRShift = 0; | |
| 124 int kGShift = 8; | |
| 125 int kBShift = 16; | |
| 126 if (kYes_SwapRB == kSwapRB) { | |
| 127 kBShift = 0; | |
| 128 kRShift = 16; | |
| 129 } | |
| 130 | |
| 131 dr = sk_linear_to_srgb_needs_trunc(dr); | |
| 132 dg = sk_linear_to_srgb_needs_trunc(dg); | |
| 133 db = sk_linear_to_srgb_needs_trunc(db); | |
| 134 | |
| 135 dr = sk_clamp_0_255(dr); | |
| 136 dg = sk_clamp_0_255(dg); | |
| 137 db = sk_clamp_0_255(db); | |
| 138 | |
| 139 Sk4i da = Sk4i::Load(src) & 0xFF000000; | |
| 140 | |
| 141 Sk4i rgba = (SkNx_cast<int>(dr) << kRShift) | |
| 142 | (SkNx_cast<int>(dg) << kGShift) | |
| 143 | (SkNx_cast<int>(db) << kBShift) | |
| 144 | (da ); | |
| 145 rgba.store(dst); | |
| 146 } | |
| 147 | |
| 148 static inline void store_srgb_1(void* dst, const uint32_t* src, | |
| 149 Sk4f& rgba, const Sk4f&, | |
| 150 const uint8_t* const[3], SwapRB kSwapRB) { | |
| 151 rgba = sk_clamp_0_255(sk_linear_to_srgb_needs_trunc(rgba)); | |
| 152 | |
| 153 uint32_t tmp; | |
| 154 SkNx_cast<uint8_t>(SkNx_cast<int32_t>(rgba)).store(&tmp); | |
| 155 tmp = (*src & 0xFF000000) | (tmp & 0x00FFFFFF); | |
| 156 if (kYes_SwapRB == kSwapRB) { | |
| 157 tmp = SkSwizzle_RB(tmp); | |
| 158 } | |
| 159 | |
| 160 *(uint32_t*)dst = tmp; | |
| 161 } | |
| 162 | |
| 163 static inline Sk4f linear_to_2dot2(const Sk4f& x) { | |
| 164 // x^(29/64) is a very good approximation of the true value, x^(1/2.2). | |
| 165 auto x2 = x.rsqrt(), // x^(-1/2) | |
| 166 x32 = x2.rsqrt().rsqrt().rsqrt().rsqrt(), // x^(-1/32) | |
| 167 x64 = x32.rsqrt(); // x^(+1/64) | |
| 168 | |
| 169 // 29 = 32 - 2 - 1 | |
| 170 return 255.0f * x2.invert() * x32 * x64.invert(); | |
| 171 } | |
| 172 | |
| 173 static inline void store_2dot2(void* dst, const uint32_t* src, | |
| 174 Sk4f& dr, Sk4f& dg, Sk4f& db, Sk4f&, | |
| 175 const uint8_t* const[3], SwapRB kSwapRB) { | |
| 176 int kRShift = 0; | |
| 177 int kGShift = 8; | |
| 178 int kBShift = 16; | |
| 179 if (kYes_SwapRB == kSwapRB) { | |
| 180 kBShift = 0; | |
| 181 kRShift = 16; | |
| 182 } | |
| 183 | |
| 184 dr = linear_to_2dot2(dr); | |
| 185 dg = linear_to_2dot2(dg); | |
| 186 db = linear_to_2dot2(db); | |
| 187 | |
| 188 dr = sk_clamp_0_255(dr); | |
| 189 dg = sk_clamp_0_255(dg); | |
| 190 db = sk_clamp_0_255(db); | |
| 191 | |
| 192 Sk4i da = Sk4i::Load(src) & 0xFF000000; | |
| 193 | |
| 194 Sk4i rgba = (Sk4f_round(dr) << kRShift) | |
| 195 | (Sk4f_round(dg) << kGShift) | |
| 196 | (Sk4f_round(db) << kBShift) | |
| 197 | (da ); | |
| 198 rgba.store(dst); | |
| 199 } | |
| 200 | |
| 201 static inline void store_2dot2_1(void* dst, const uint32_t* src, | |
| 202 Sk4f& rgba, const Sk4f&, | |
| 203 const uint8_t* const[3], SwapRB kSwapRB) { | |
| 204 rgba = sk_clamp_0_255(linear_to_2dot2(rgba)); | |
| 205 | |
| 206 uint32_t tmp; | |
| 207 SkNx_cast<uint8_t>(Sk4f_round(rgba)).store(&tmp); | |
| 208 tmp = (*src & 0xFF000000) | (tmp & 0x00FFFFFF); | |
| 209 if (kYes_SwapRB == kSwapRB) { | |
| 210 tmp = SkSwizzle_RB(tmp); | |
| 211 } | |
| 212 | |
| 213 *(uint32_t*)dst = tmp; | |
| 214 } | |
| 215 | |
| 216 static inline void store_f16(void* dst, const uint32_t* src, | |
| 217 Sk4f& dr, Sk4f& dg, Sk4f& db, Sk4f& da, | |
| 218 const uint8_t* const[3], SwapRB) { | |
| 219 Sk4h_store4(dst, SkFloatToHalf_finite(dr), | |
| 220 SkFloatToHalf_finite(dg), | |
| 221 SkFloatToHalf_finite(db), | |
| 222 SkFloatToHalf_finite(da)); | |
| 223 } | |
| 224 | |
| 225 static inline void store_f16_1(void* dst, const uint32_t* src, | |
| 226 Sk4f& rgba, const Sk4f& a, | |
| 227 const uint8_t* const[3], SwapRB kSwapRB) { | |
| 228 rgba = Sk4f(rgba[0], rgba[1], rgba[2], a[3]); | |
| 229 SkFloatToHalf_finite(rgba).store((uint64_t*) dst); | |
| 230 } | |
| 231 | |
| 232 static inline void store_f16_opaque(void* dst, const uint32_t* src, | |
| 233 Sk4f& dr, Sk4f& dg, Sk4f& db, Sk4f& da, | |
| 234 const uint8_t* const[3], SwapRB) { | |
| 235 Sk4h_store4(dst, SkFloatToHalf_finite(dr), | |
| 236 SkFloatToHalf_finite(dg), | |
| 237 SkFloatToHalf_finite(db), | |
| 238 SK_Half1); | |
| 239 } | |
| 240 | |
| 241 static inline void store_f16_1_opaque(void* dst, const uint32_t* src, | |
| 242 Sk4f& rgba, const Sk4f& a, | |
| 243 const uint8_t* const[3], SwapRB kSwapRB) { | |
| 244 uint64_t tmp; | |
| 245 SkFloatToHalf_finite(rgba).store(&tmp); | |
| 246 tmp |= static_cast<uint64_t>(SK_Half1) << 48; | |
| 247 *((uint64_t*) dst) = tmp; | |
| 248 } | |
| 249 | |
| 250 static inline void store_generic(void* dst, const uint32_t* src, | |
| 251 Sk4f& dr, Sk4f& dg, Sk4f& db, Sk4f&, | |
| 252 const uint8_t* const dstTables[3], SwapRB kSwap
RB) { | |
| 253 int kRShift = 0; | |
| 254 int kGShift = 8; | |
| 255 int kBShift = 16; | |
| 256 if (kYes_SwapRB == kSwapRB) { | |
| 257 kBShift = 0; | |
| 258 kRShift = 16; | |
| 259 } | |
| 260 | |
| 261 dr = Sk4f::Min(Sk4f::Max(1023.0f * dr, 0.0f), 1023.0f); | |
| 262 dg = Sk4f::Min(Sk4f::Max(1023.0f * dg, 0.0f), 1023.0f); | |
| 263 db = Sk4f::Min(Sk4f::Max(1023.0f * db, 0.0f), 1023.0f); | |
| 264 | |
| 265 Sk4i ir = Sk4f_round(dr); | |
| 266 Sk4i ig = Sk4f_round(dg); | |
| 267 Sk4i ib = Sk4f_round(db); | |
| 268 | |
| 269 Sk4i da = Sk4i::Load(src) & 0xFF000000; | |
| 270 | |
| 271 uint32_t* dst32 = (uint32_t*) dst; | |
| 272 dst32[0] = dstTables[0][ir[0]] << kRShift | |
| 273 | dstTables[1][ig[0]] << kGShift | |
| 274 | dstTables[2][ib[0]] << kBShift | |
| 275 | da[0]; | |
| 276 dst32[1] = dstTables[0][ir[1]] << kRShift | |
| 277 | dstTables[1][ig[1]] << kGShift | |
| 278 | dstTables[2][ib[1]] << kBShift | |
| 279 | da[1]; | |
| 280 dst32[2] = dstTables[0][ir[2]] << kRShift | |
| 281 | dstTables[1][ig[2]] << kGShift | |
| 282 | dstTables[2][ib[2]] << kBShift | |
| 283 | da[2]; | |
| 284 dst32[3] = dstTables[0][ir[3]] << kRShift | |
| 285 | dstTables[1][ig[3]] << kGShift | |
| 286 | dstTables[2][ib[3]] << kBShift | |
| 287 | da[3]; | |
| 288 } | |
| 289 | |
| 290 static inline void store_generic_1(void* dst, const uint32_t* src, | |
| 291 Sk4f& rgba, const Sk4f&, | |
| 292 const uint8_t* const dstTables[3], SwapRB kSw
apRB) { | |
| 293 rgba = Sk4f::Min(Sk4f::Max(1023.0f * rgba, 0.0f), 1023.0f); | |
| 294 | |
| 295 Sk4i indices = Sk4f_round(rgba); | |
| 296 | |
| 297 *((uint32_t*) dst) = dstTables[0][indices[0]] << 0 | |
| 298 | dstTables[1][indices[1]] << 8 | |
| 299 | dstTables[2][indices[2]] << 16 | |
| 300 | (*src & 0xFF000000); | |
| 301 } | |
| 302 | |
| 303 template <SkColorSpace::GammaNamed kDstGamma, SkAlphaType kAlphaType, SwapRB kSw
apRB> | |
| 304 static void color_xform_RGBA(void* dst, const uint32_t* src, int len, | |
| 305 const float* const srcTables[3], const float matrix
[16], | |
| 306 const uint8_t* const dstTables[3]) { | |
| 307 decltype(store_srgb )* store; | |
| 308 decltype(store_srgb_1 )* store_1; | |
| 309 decltype(load_rgb_from_tables )* load; | |
| 310 decltype(load_rgb_from_tables_1)* load_1; | |
| 311 size_t sizeOfDstPixel; | |
| 312 switch (kDstGamma) { | |
| 313 case SkColorSpace::kSRGB_GammaNamed: | |
| 314 load = (kPremul_SkAlphaType == kAlphaType) ? load_rgba_from_table
s : | |
| 315 load_rgb_from_tables
; | |
| 316 load_1 = (kPremul_SkAlphaType == kAlphaType) ? load_rgba_from_table
s_1 : | |
| 317 load_rgb_from_tables
_1; | |
| 318 store = store_srgb; | |
| 319 store_1 = store_srgb_1; | |
| 320 sizeOfDstPixel = 4; | |
| 321 break; | |
| 322 case SkColorSpace::k2Dot2Curve_GammaNamed: | |
| 323 load = (kPremul_SkAlphaType == kAlphaType) ? load_rgba_from_table
s : | |
| 324 load_rgb_from_tables
; | |
| 325 load_1 = (kPremul_SkAlphaType == kAlphaType) ? load_rgba_from_table
s_1 : | |
| 326 load_rgb_from_tables
_1; | |
| 327 store = store_2dot2; | |
| 328 store_1 = store_2dot2_1; | |
| 329 sizeOfDstPixel = 4; | |
| 330 break; | |
| 331 case SkColorSpace::kLinear_GammaNamed: | |
| 332 load = load_rgba_from_tables; | |
| 333 load_1 = load_rgba_from_tables_1; | |
| 334 store = (kOpaque_SkAlphaType == kAlphaType) ? store_f16_opaque : | |
| 335 store_f16; | |
| 336 store_1 = (kOpaque_SkAlphaType == kAlphaType) ? store_f16_1_opaque : | |
| 337 store_f16_1; | |
| 338 sizeOfDstPixel = 8; | |
| 339 break; | |
| 340 case SkColorSpace::kNonStandard_GammaNamed: | |
| 341 load = (kPremul_SkAlphaType == kAlphaType) ? load_rgba_from_table
s : | |
| 342 load_rgb_from_tables
; | |
| 343 load_1 = (kPremul_SkAlphaType == kAlphaType) ? load_rgba_from_table
s_1 : | |
| 344 load_rgb_from_tables
_1; | |
| 345 store = store_generic; | |
| 346 store_1 = store_generic_1; | |
| 347 sizeOfDstPixel = 4; | |
| 348 break; | |
| 349 } | |
| 350 | |
| 351 Sk4f rXgXbX, rYgYbY, rZgZbZ, rTgTbT; | |
| 352 load_matrix(matrix, rXgXbX, rYgYbY, rZgZbZ, rTgTbT); | |
| 353 | |
| 354 if (len >= 4) { | |
| 355 // Naively this would be a loop of load-transform-store, but we found it
faster to | |
| 356 // move the N+1th load ahead of the Nth store. We don't bother doing th
is for N<4. | |
| 357 Sk4f r, g, b, a; | |
| 358 load(src, r, g, b, a, srcTables); | |
| 359 src += 4; | |
| 360 len -= 4; | |
| 361 | |
| 362 Sk4f dr, dg, db, da; | |
| 363 while (len >= 4) { | |
| 364 transform_gamut(r, g, b, a, rXgXbX, rYgYbY, rZgZbZ, dr, dg, db, da); | |
| 365 translate_gamut(rTgTbT, dr, dg, db); | |
| 366 | |
| 367 if (kPremul_SkAlphaType == kAlphaType) { | |
| 368 premultiply(dr, dg, db, da); | |
| 369 } | |
| 370 | |
| 371 load(src, r, g, b, a, srcTables); | |
| 372 | |
| 373 store(dst, src - 4, dr, dg, db, da, dstTables, kSwapRB); | |
| 374 dst = SkTAddOffset<void>(dst, 4 * sizeOfDstPixel); | |
| 375 src += 4; | |
| 376 len -= 4; | |
| 377 } | |
| 378 | |
| 379 transform_gamut(r, g, b, a, rXgXbX, rYgYbY, rZgZbZ, dr, dg, db, da); | |
| 380 translate_gamut(rTgTbT, dr, dg, db); | |
| 381 | |
| 382 if (kPremul_SkAlphaType == kAlphaType) { | |
| 383 premultiply(dr, dg, db, da); | |
| 384 } | |
| 385 | |
| 386 store(dst, src - 4, dr, dg, db, da, dstTables, kSwapRB); | |
| 387 dst = SkTAddOffset<void>(dst, 4 * sizeOfDstPixel); | |
| 388 } | |
| 389 | |
| 390 while (len > 0) { | |
| 391 Sk4f r, g, b, a; | |
| 392 load_1(src, r, g, b, a, srcTables); | |
| 393 | |
| 394 Sk4f rgba; | |
| 395 transform_gamut_1(r, g, b, rXgXbX, rYgYbY, rZgZbZ, rgba); | |
| 396 translate_gamut_1(rTgTbT, rgba); | |
| 397 | |
| 398 store_1(dst, src, rgba, a, dstTables, kSwapRB); | |
| 399 | |
| 400 src += 1; | |
| 401 len -= 1; | |
| 402 dst = SkTAddOffset<void>(dst, sizeOfDstPixel); | |
| 403 } | |
| 404 } | |
| 405 | |
| 406 #endif // SkColorSpaceXformOpts_DEFINED | |
| OLD | NEW |