OLD | NEW |
(Empty) | |
| 1 /* |
| 2 * Copyright 2016 Google Inc. |
| 3 * |
| 4 * Use of this source code is governed by a BSD-style license that can be |
| 5 * found in the LICENSE file. |
| 6 */ |
| 7 |
| 8 #ifndef SkColorSpaceXformOpts_DEFINED |
| 9 #define SkColorSpaceXformOpts_DEFINED |
| 10 |
| 11 #include "SkNx.h" |
| 12 #include "SkColorPriv.h" |
| 13 #include "SkHalf.h" |
| 14 #include "SkSRGB.h" |
| 15 #include "SkTemplates.h" |
| 16 |
| 17 static inline void load_matrix(const float matrix[16], |
| 18 Sk4f& rXgXbX, Sk4f& rYgYbY, Sk4f& rZgZbZ, Sk4f& r
TgTbT) { |
| 19 rXgXbX = Sk4f::Load(matrix + 0); |
| 20 rYgYbY = Sk4f::Load(matrix + 4); |
| 21 rZgZbZ = Sk4f::Load(matrix + 8); |
| 22 rTgTbT = Sk4f::Load(matrix + 12); |
| 23 } |
| 24 |
| 25 static inline void load_rgb_from_tables(const uint32_t* src, |
| 26 Sk4f& r, Sk4f& g, Sk4f& b, Sk4f&, |
| 27 const float* const srcTables[3]) { |
| 28 r = { srcTables[0][(src[0] >> 0) & 0xFF], |
| 29 srcTables[0][(src[1] >> 0) & 0xFF], |
| 30 srcTables[0][(src[2] >> 0) & 0xFF], |
| 31 srcTables[0][(src[3] >> 0) & 0xFF], }; |
| 32 g = { srcTables[1][(src[0] >> 8) & 0xFF], |
| 33 srcTables[1][(src[1] >> 8) & 0xFF], |
| 34 srcTables[1][(src[2] >> 8) & 0xFF], |
| 35 srcTables[1][(src[3] >> 8) & 0xFF], }; |
| 36 b = { srcTables[2][(src[0] >> 16) & 0xFF], |
| 37 srcTables[2][(src[1] >> 16) & 0xFF], |
| 38 srcTables[2][(src[2] >> 16) & 0xFF], |
| 39 srcTables[2][(src[3] >> 16) & 0xFF], }; |
| 40 } |
| 41 |
| 42 static inline void load_rgba_from_tables(const uint32_t* src, |
| 43 Sk4f& r, Sk4f& g, Sk4f& b, Sk4f& a, |
| 44 const float* const srcTables[3]) { |
| 45 r = { srcTables[0][(src[0] >> 0) & 0xFF], |
| 46 srcTables[0][(src[1] >> 0) & 0xFF], |
| 47 srcTables[0][(src[2] >> 0) & 0xFF], |
| 48 srcTables[0][(src[3] >> 0) & 0xFF], }; |
| 49 g = { srcTables[1][(src[0] >> 8) & 0xFF], |
| 50 srcTables[1][(src[1] >> 8) & 0xFF], |
| 51 srcTables[1][(src[2] >> 8) & 0xFF], |
| 52 srcTables[1][(src[3] >> 8) & 0xFF], }; |
| 53 b = { srcTables[2][(src[0] >> 16) & 0xFF], |
| 54 srcTables[2][(src[1] >> 16) & 0xFF], |
| 55 srcTables[2][(src[2] >> 16) & 0xFF], |
| 56 srcTables[2][(src[3] >> 16) & 0xFF], }; |
| 57 a = (1.0f / 255.0f) * SkNx_cast<float>(Sk4u::Load(src) >> 24); |
| 58 } |
| 59 |
| 60 static inline void load_rgb_from_tables_1(const uint32_t* src, |
| 61 Sk4f& r, Sk4f& g, Sk4f& b, Sk4f&, |
| 62 const float* const srcTables[3]) { |
| 63 // Splat r,g,b across a register each. |
| 64 r = Sk4f(srcTables[0][(*src >> 0) & 0xFF]); |
| 65 g = Sk4f(srcTables[1][(*src >> 8) & 0xFF]); |
| 66 b = Sk4f(srcTables[2][(*src >> 16) & 0xFF]); |
| 67 } |
| 68 |
| 69 static inline void load_rgba_from_tables_1(const uint32_t* src, |
| 70 Sk4f& r, Sk4f& g, Sk4f& b, Sk4f& a, |
| 71 const float* const srcTables[3]) { |
| 72 // Splat r,g,b across a register each. |
| 73 r = Sk4f(srcTables[0][(*src >> 0) & 0xFF]); |
| 74 g = Sk4f(srcTables[1][(*src >> 8) & 0xFF]); |
| 75 b = Sk4f(srcTables[2][(*src >> 16) & 0xFF]); |
| 76 a = (1.0f / 255.0f) * Sk4f(*src >> 24); |
| 77 } |
| 78 |
| 79 static inline void transform_gamut(const Sk4f& r, const Sk4f& g, const Sk4f& b,
const Sk4f& a, |
| 80 const Sk4f& rXgXbX, const Sk4f& rYgYbY, const
Sk4f& rZgZbZ, |
| 81 Sk4f& dr, Sk4f& dg, Sk4f& db, Sk4f& da) { |
| 82 dr = rXgXbX[0]*r + rYgYbY[0]*g + rZgZbZ[0]*b; |
| 83 dg = rXgXbX[1]*r + rYgYbY[1]*g + rZgZbZ[1]*b; |
| 84 db = rXgXbX[2]*r + rYgYbY[2]*g + rZgZbZ[2]*b; |
| 85 da = a; |
| 86 } |
| 87 |
| 88 static inline void transform_gamut_1(const Sk4f& r, const Sk4f& g, const Sk4f& b
, |
| 89 const Sk4f& rXgXbX, const Sk4f& rYgYbY, con
st Sk4f& rZgZbZ, |
| 90 Sk4f& rgba) { |
| 91 rgba = rXgXbX*r + rYgYbY*g + rZgZbZ*b; |
| 92 } |
| 93 |
| 94 static inline void translate_gamut(const Sk4f& rTgTbT, Sk4f& dr, Sk4f& dg, Sk4f&
db) { |
| 95 dr = dr + rTgTbT[0]; |
| 96 dg = dg + rTgTbT[1]; |
| 97 db = db + rTgTbT[2]; |
| 98 } |
| 99 |
| 100 static inline void translate_gamut_1(const Sk4f& rTgTbT, Sk4f& rgba) { |
| 101 rgba = rgba + rTgTbT; |
| 102 } |
| 103 |
| 104 static inline void premultiply(Sk4f& dr, Sk4f& dg, Sk4f& db, const Sk4f& da) { |
| 105 dr = da * dr; |
| 106 dg = da * dg; |
| 107 db = da * db; |
| 108 } |
| 109 |
| 110 static inline void premultiply_1(const Sk4f& a, Sk4f& rgba) { |
| 111 rgba = a * rgba; |
| 112 } |
| 113 |
| 114 static inline void store_srgb(void* dst, const uint32_t* src, |
| 115 Sk4f& dr, Sk4f& dg, Sk4f& db, Sk4f&, |
| 116 const uint8_t* const[3], bool kSwapRB) { |
| 117 int kRShift = 0; |
| 118 int kGShift = 8; |
| 119 int kBShift = 16; |
| 120 int kAShift = 24; |
| 121 if (kSwapRB) { |
| 122 kBShift = 0; |
| 123 kRShift = 16; |
| 124 } |
| 125 |
| 126 dr = sk_linear_to_srgb_needs_trunc(dr); |
| 127 dg = sk_linear_to_srgb_needs_trunc(dg); |
| 128 db = sk_linear_to_srgb_needs_trunc(db); |
| 129 |
| 130 dr = sk_clamp_0_255(dr); |
| 131 dg = sk_clamp_0_255(dg); |
| 132 db = sk_clamp_0_255(db); |
| 133 |
| 134 Sk4i da = SkNx_cast<int32_t>(Sk4u::Load(src) >> 24); |
| 135 |
| 136 Sk4i rgba = (SkNx_cast<int>(dr) << kRShift) |
| 137 | (SkNx_cast<int>(dg) << kGShift) |
| 138 | (SkNx_cast<int>(db) << kBShift) |
| 139 | (da << kAShift); |
| 140 rgba.store(dst); |
| 141 } |
| 142 |
| 143 static inline void store_srgb_1(void* dst, const uint32_t* src, |
| 144 Sk4f& rgba, const Sk4f&, |
| 145 const uint8_t* const[3], bool kSwapRB) { |
| 146 rgba = sk_clamp_0_255(sk_linear_to_srgb_needs_trunc(rgba)); |
| 147 |
| 148 uint32_t tmp; |
| 149 SkNx_cast<uint8_t>(SkNx_cast<int32_t>(rgba)).store(&tmp); |
| 150 tmp = (*src & 0xFF000000) | (tmp & 0x00FFFFFF); |
| 151 if (kSwapRB) { |
| 152 tmp = SkSwizzle_RB(tmp); |
| 153 } |
| 154 |
| 155 *(uint32_t*)dst = tmp; |
| 156 } |
| 157 |
| 158 static inline Sk4f linear_to_2dot2(const Sk4f& x) { |
| 159 // x^(29/64) is a very good approximation of the true value, x^(1/2.2). |
| 160 auto x2 = x.rsqrt(), // x^(-1/2) |
| 161 x32 = x2.rsqrt().rsqrt().rsqrt().rsqrt(), // x^(-1/32) |
| 162 x64 = x32.rsqrt(); // x^(+1/64) |
| 163 |
| 164 // 29 = 32 - 2 - 1 |
| 165 return 255.0f * x2.invert() * x32 * x64.invert(); |
| 166 } |
| 167 |
| 168 static inline void store_2dot2(void* dst, const uint32_t* src, |
| 169 Sk4f& dr, Sk4f& dg, Sk4f& db, Sk4f&, |
| 170 const uint8_t* const[3], bool kSwapRB) { |
| 171 int kRShift = 0; |
| 172 int kGShift = 8; |
| 173 int kBShift = 16; |
| 174 int kAShift = 24; |
| 175 if (kSwapRB) { |
| 176 kBShift = 0; |
| 177 kRShift = 16; |
| 178 } |
| 179 |
| 180 dr = linear_to_2dot2(dr); |
| 181 dg = linear_to_2dot2(dg); |
| 182 db = linear_to_2dot2(db); |
| 183 |
| 184 dr = sk_clamp_0_255(dr); |
| 185 dg = sk_clamp_0_255(dg); |
| 186 db = sk_clamp_0_255(db); |
| 187 |
| 188 Sk4i da = SkNx_cast<int32_t>(Sk4u::Load(src) >> 24); |
| 189 |
| 190 Sk4i rgba = (Sk4f_round(dr) << kRShift) |
| 191 | (Sk4f_round(dg) << kGShift) |
| 192 | (Sk4f_round(db) << kBShift) |
| 193 | (da << kAShift); |
| 194 rgba.store(dst); |
| 195 } |
| 196 |
| 197 static inline void store_2dot2_1(void* dst, const uint32_t* src, |
| 198 Sk4f& rgba, const Sk4f&, |
| 199 const uint8_t* const[3], bool kSwapRB) { |
| 200 rgba = sk_clamp_0_255(linear_to_2dot2(rgba)); |
| 201 |
| 202 uint32_t tmp; |
| 203 SkNx_cast<uint8_t>(Sk4f_round(rgba)).store(&tmp); |
| 204 tmp = (*src & 0xFF000000) | (tmp & 0x00FFFFFF); |
| 205 if (kSwapRB) { |
| 206 tmp = SkSwizzle_RB(tmp); |
| 207 } |
| 208 |
| 209 *(uint32_t*)dst = tmp; |
| 210 } |
| 211 |
| 212 static inline void store_f16(void* dst, const uint32_t* src, |
| 213 Sk4f& dr, Sk4f& dg, Sk4f& db, Sk4f& da, |
| 214 const uint8_t* const[3], bool kSwapRB) { |
| 215 Sk4h_store4(dst, SkFloatToHalf_finite(dr), |
| 216 SkFloatToHalf_finite(dg), |
| 217 SkFloatToHalf_finite(db), |
| 218 SkFloatToHalf_finite(da)); |
| 219 dst = SkTAddOffset<void>(dst, 4 * sizeof(uint64_t)); |
| 220 } |
| 221 |
| 222 static inline void store_f16_1(void* dst, const uint32_t* src, |
| 223 Sk4f& rgba, const Sk4f& a, |
| 224 const uint8_t* const[3], bool kSwapRB) { |
| 225 rgba = Sk4f(rgba[0], rgba[1], rgba[2], a[3]); |
| 226 SkFloatToHalf_finite(rgba).store((uint64_t*) dst); |
| 227 } |
| 228 |
| 229 static inline void store_generic(void* dst, const uint32_t* src, |
| 230 Sk4f& dr, Sk4f& dg, Sk4f& db, Sk4f&, |
| 231 const uint8_t* const dstTables[3], bool kSwapRB
) { |
| 232 int kRShift = 0; |
| 233 int kGShift = 8; |
| 234 int kBShift = 16; |
| 235 int kAShift = 24; |
| 236 if (kSwapRB) { |
| 237 kBShift = 0; |
| 238 kRShift = 16; |
| 239 } |
| 240 |
| 241 dr = Sk4f::Min(Sk4f::Max(1023.0f * dr, 0.0f), 1023.0f); |
| 242 dg = Sk4f::Min(Sk4f::Max(1023.0f * dg, 0.0f), 1023.0f); |
| 243 db = Sk4f::Min(Sk4f::Max(1023.0f * db, 0.0f), 1023.0f); |
| 244 |
| 245 Sk4i ir = Sk4f_round(dr); |
| 246 Sk4i ig = Sk4f_round(dg); |
| 247 Sk4i ib = Sk4f_round(db); |
| 248 |
| 249 Sk4i da = SkNx_cast<int32_t>(Sk4u::Load(src) >> 24); |
| 250 |
| 251 uint32_t* dst32 = (uint32_t*) dst; |
| 252 dst32[0] = dstTables[0][ir[0]] << kRShift |
| 253 | dstTables[1][ig[0]] << kGShift |
| 254 | dstTables[2][ib[0]] << kBShift |
| 255 | da[0] << kAShift; |
| 256 dst32[1] = dstTables[0][ir[1]] << kRShift |
| 257 | dstTables[1][ig[1]] << kGShift |
| 258 | dstTables[2][ib[1]] << kBShift |
| 259 | da[1] << kAShift; |
| 260 dst32[2] = dstTables[0][ir[2]] << kRShift |
| 261 | dstTables[1][ig[2]] << kGShift |
| 262 | dstTables[2][ib[2]] << kBShift |
| 263 | da[2] << kAShift; |
| 264 dst32[3] = dstTables[0][ir[3]] << kRShift |
| 265 | dstTables[1][ig[3]] << kGShift |
| 266 | dstTables[2][ib[3]] << kBShift |
| 267 | da[3] << kAShift; |
| 268 } |
| 269 |
| 270 static inline void store_generic_1(void* dst, const uint32_t* src, |
| 271 Sk4f& rgba, const Sk4f&, |
| 272 const uint8_t* const dstTables[3], bool kSwap
RB) { |
| 273 rgba = Sk4f::Min(Sk4f::Max(1023.0f * rgba, 0.0f), 1023.0f); |
| 274 |
| 275 Sk4i indices = Sk4f_round(rgba); |
| 276 |
| 277 *((uint32_t*) dst) = dstTables[0][indices[0]] << 0 |
| 278 | dstTables[1][indices[1]] << 8 |
| 279 | dstTables[2][indices[2]] << 16 |
| 280 | (*src & 0xFF000000); |
| 281 } |
| 282 |
| 283 template <SkColorSpace::GammaNamed kDstGamma, bool kPremul, bool kSwapRB> |
| 284 static void color_xform_RGBA(void* dst, const uint32_t* src, int len, |
| 285 const float* const srcTables[3], const float matrix
[16], |
| 286 const uint8_t* const dstTables[3]) { |
| 287 decltype(store_srgb )* store; |
| 288 decltype(store_srgb_1 )* store_1; |
| 289 decltype(load_rgb_from_tables )* load; |
| 290 decltype(load_rgb_from_tables_1)* load_1; |
| 291 size_t sizeOfDstPixel; |
| 292 switch (kDstGamma) { |
| 293 case SkColorSpace::kSRGB_GammaNamed: |
| 294 load = kPremul ? load_rgba_from_tables : load_rgb_from_tables; |
| 295 load_1 = kPremul ? load_rgba_from_tables_1 : load_rgb_from_tables_1
; |
| 296 store = store_srgb; |
| 297 store_1 = store_srgb_1; |
| 298 sizeOfDstPixel = 4; |
| 299 break; |
| 300 case SkColorSpace::k2Dot2Curve_GammaNamed: |
| 301 load = kPremul ? load_rgba_from_tables : load_rgb_from_tables; |
| 302 load_1 = kPremul ? load_rgba_from_tables_1 : load_rgb_from_tables_1
; |
| 303 store = store_2dot2; |
| 304 store_1 = store_2dot2_1; |
| 305 sizeOfDstPixel = 4; |
| 306 break; |
| 307 case SkColorSpace::kLinear_GammaNamed: |
| 308 load = load_rgba_from_tables; |
| 309 load_1 = load_rgba_from_tables_1; |
| 310 store = store_f16; |
| 311 store_1 = store_f16_1; |
| 312 sizeOfDstPixel = 8; |
| 313 break; |
| 314 case SkColorSpace::kNonStandard_GammaNamed: |
| 315 load = kPremul ? load_rgba_from_tables : load_rgb_from_tables; |
| 316 load_1 = kPremul ? load_rgba_from_tables_1 : load_rgb_from_tables_1
; |
| 317 store = store_generic; |
| 318 store_1 = store_generic_1; |
| 319 sizeOfDstPixel = 4; |
| 320 break; |
| 321 } |
| 322 |
| 323 Sk4f rXgXbX, rYgYbY, rZgZbZ, rTgTbT; |
| 324 load_matrix(matrix, rXgXbX, rYgYbY, rZgZbZ, rTgTbT); |
| 325 |
| 326 if (len >= 4) { |
| 327 // Naively this would be a loop of load-transform-store, but we found it
faster to |
| 328 // move the N+1th load ahead of the Nth store. We don't bother doing th
is for N<4. |
| 329 Sk4f r, g, b, a; |
| 330 load(src, r, g, b, a, srcTables); |
| 331 src += 4; |
| 332 len -= 4; |
| 333 |
| 334 Sk4f dr, dg, db, da; |
| 335 while (len >= 4) { |
| 336 transform_gamut(r, g, b, a, rXgXbX, rYgYbY, rZgZbZ, dr, dg, db, da); |
| 337 translate_gamut(rTgTbT, dr, dg, db); |
| 338 |
| 339 if (kPremul) { |
| 340 premultiply(dr, dg, db, da); |
| 341 } |
| 342 |
| 343 load(src, r, g, b, a, srcTables); |
| 344 src += 4; |
| 345 len -= 4; |
| 346 |
| 347 store(dst, src - 4, dr, dg, db, da, dstTables, kSwapRB); |
| 348 dst = SkTAddOffset<void>(dst, 4 * sizeOfDstPixel); |
| 349 } |
| 350 |
| 351 transform_gamut(r, g, b, a, rXgXbX, rYgYbY, rZgZbZ, dr, dg, db, da); |
| 352 translate_gamut(rTgTbT, dr, dg, db); |
| 353 |
| 354 if (kPremul) { |
| 355 premultiply(dr, dg, db, da); |
| 356 } |
| 357 |
| 358 store(dst, src - 4, dr, dg, db, da, dstTables, kSwapRB); |
| 359 dst = SkTAddOffset<void>(dst, 4 * sizeOfDstPixel); |
| 360 } |
| 361 |
| 362 while (len > 0) { |
| 363 Sk4f r, g, b, a; |
| 364 load_1(src, r, g, b, a, srcTables); |
| 365 |
| 366 Sk4f rgba; |
| 367 transform_gamut_1(r, g, b, rXgXbX, rYgYbY, rZgZbZ, rgba); |
| 368 |
| 369 translate_gamut_1(rTgTbT, rgba); |
| 370 |
| 371 store_1(dst, src, rgba, a, dstTables, kSwapRB); |
| 372 |
| 373 src += 1; |
| 374 len -= 1; |
| 375 dst = SkTAddOffset<void>(dst, sizeOfDstPixel); |
| 376 } |
| 377 } |
| 378 |
| 379 #endif // SkColorSpaceXformOpts_DEFINED |
OLD | NEW |