OLD | NEW |
1 /* | 1 /* |
2 * Copyright 2014 Google Inc. | 2 * Copyright 2014 Google Inc. |
3 * | 3 * |
4 * Use of this source code is governed by a BSD-style license that can be | 4 * Use of this source code is governed by a BSD-style license that can be |
5 * found in the LICENSE file. | 5 * found in the LICENSE file. |
6 */ | 6 */ |
7 | 7 |
8 #include "SkTextureCompressor_LATC.h" | 8 #include "SkTextureCompressor_LATC.h" |
| 9 #include "SkTextureCompressor_Blitter.h" |
9 | 10 |
10 #include "SkEndian.h" | 11 #include "SkEndian.h" |
11 | 12 |
| 13 // Compression options. In general, the slow version is much more accurate, but |
| 14 // much slower. The fast option is much faster, but much less accurate. YMMV. |
| 15 #define COMPRESS_LATC_SLOW 0 |
| 16 #define COMPRESS_LATC_FAST 1 |
| 17 |
| 18 //////////////////////////////////////////////////////////////////////////////// |
| 19 |
| 20 #if COMPRESS_LATC_SLOW |
| 21 |
12 //////////////////////////////////////////////////////////////////////////////// | 22 //////////////////////////////////////////////////////////////////////////////// |
13 // | 23 // |
14 // Utility Functions | 24 // Utility Functions |
15 // | 25 // |
16 //////////////////////////////////////////////////////////////////////////////// | 26 //////////////////////////////////////////////////////////////////////////////// |
17 | 27 |
18 // Absolute difference between two values. More correct than SkTAbs(a - b) | 28 // Absolute difference between two values. More correct than SkTAbs(a - b) |
19 // because it works on unsigned values. | 29 // because it works on unsigned values. |
20 template <typename T> inline T abs_diff(const T &a, const T &b) { | 30 template <typename T> inline T abs_diff(const T &a, const T &b) { |
21 return (a > b) ? (a - b) : (b - a); | 31 return (a > b) ? (a - b) : (b - a); |
(...skipping 249 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
271 // This is really just for correctness, in all of my tests we | 281 // This is really just for correctness, in all of my tests we |
272 // never take this step. We don't lose too much perf here because | 282 // never take this step. We don't lose too much perf here because |
273 // most of the processing in this function is worth it for the | 283 // most of the processing in this function is worth it for the |
274 // 1 == nUniquePixels optimization. | 284 // 1 == nUniquePixels optimization. |
275 return compress_latc_block_bb(pixels); | 285 return compress_latc_block_bb(pixels); |
276 } else { | 286 } else { |
277 return compress_latc_block_bb_ignore_extremal(pixels); | 287 return compress_latc_block_bb_ignore_extremal(pixels); |
278 } | 288 } |
279 } | 289 } |
280 | 290 |
| 291 #endif // COMPRESS_LATC_SLOW |
| 292 |
| 293 //////////////////////////////////////////////////////////////////////////////// |
| 294 |
| 295 #if COMPRESS_LATC_FAST |
| 296 |
| 297 // Take the top three indices of each int and pack them into the low 12 |
| 298 // bits of the integer. |
| 299 static inline uint32_t convert_index(uint32_t x) { |
| 300 // Since the palette is |
| 301 // 255, 0, 219, 182, 146, 109, 73, 36 |
| 302 // we need to map the high three bits of each byte in the integer |
| 303 // from |
| 304 // 0 1 2 3 4 5 6 7 |
| 305 // to |
| 306 // 1 7 6 5 4 3 2 0 |
| 307 // |
| 308 // This first operation takes the mapping from |
| 309 // 0 1 2 3 4 5 6 7 --> 7 6 5 4 3 2 1 0 |
| 310 x = 0x07070707 - ((x >> 5) & 0x07070707); |
| 311 |
| 312 // mask is 1 if index is non-zero |
| 313 const uint32_t mask = (x | (x >> 1) | (x >> 2)) & 0x01010101; |
| 314 |
| 315 // add mask: |
| 316 // 7 6 5 4 3 2 1 0 --> 8 7 6 5 4 3 2 0 |
| 317 x = (x + mask); |
| 318 |
| 319 // Handle overflow: |
| 320 // 8 7 6 5 4 3 2 0 --> 9 7 6 5 4 3 2 0 |
| 321 x |= (x >> 3) & 0x01010101; |
| 322 |
| 323 // Mask out high bits: |
| 324 // 9 7 6 5 4 3 2 0 --> 1 7 6 5 4 3 2 0 |
| 325 x &= 0x07070707; |
| 326 |
| 327 // Pack it in... |
| 328 #if defined (SK_CPU_BENDIAN) |
| 329 return |
| 330 (x >> 24) | |
| 331 ((x >> 13) & 0x38) | |
| 332 ((x >> 2) & 0x1C0) | |
| 333 ((x << 9) & 0xE00); |
| 334 #else |
| 335 return |
| 336 (x & 0x7) | |
| 337 ((x >> 5) & 0x38) | |
| 338 ((x >> 10) & 0x1C0) | |
| 339 ((x >> 15) & 0xE00); |
| 340 #endif |
| 341 } |
| 342 |
| 343 typedef uint64_t (*PackIndicesProc)(const uint8_t* alpha, int rowBytes); |
| 344 template<PackIndicesProc packIndicesProc> |
| 345 static void compress_a8_latc_block(uint8_t** dstPtr, const uint8_t* src, int row
Bytes) { |
| 346 *(reinterpret_cast<uint64_t*>(*dstPtr)) = |
| 347 SkEndian_SwapLE64(0xFF | (packIndicesProc(src, rowBytes) << 16)); |
| 348 *dstPtr += 8; |
| 349 } |
| 350 |
| 351 inline uint64_t PackRowMajor(const uint8_t *indices, int rowBytes) { |
| 352 uint64_t result = 0; |
| 353 for (int i = 0; i < 4; ++i) { |
| 354 const uint32_t idx = *(reinterpret_cast<const uint32_t*>(indices + i*row
Bytes)); |
| 355 result |= static_cast<uint64_t>(convert_index(idx)) << 12*i; |
| 356 } |
| 357 return result; |
| 358 } |
| 359 |
| 360 inline uint64_t PackColumnMajor(const uint8_t *indices, int rowBytes) { |
| 361 // !SPEED! Blarg, this is kind of annoying. SSE4 can make this |
| 362 // a LOT faster. |
| 363 uint8_t transposed[16]; |
| 364 for (int i = 0; i < 4; ++i) { |
| 365 for (int j = 0; j < 4; ++j) { |
| 366 transposed[j*4+i] = indices[i*rowBytes + j]; |
| 367 } |
| 368 } |
| 369 |
| 370 return PackRowMajor(transposed, 4); |
| 371 } |
| 372 |
| 373 static bool compress_4x4_a8_latc(uint8_t* dst, const uint8_t* src, |
| 374 int width, int height, int rowBytes) { |
| 375 |
| 376 if (width < 0 || ((width % 4) != 0) || height < 0 || ((height % 4) != 0)) { |
| 377 return false; |
| 378 } |
| 379 |
| 380 uint8_t** dstPtr = &dst; |
| 381 for (int y = 0; y < height; y += 4) { |
| 382 for (int x = 0; x < width; x += 4) { |
| 383 compress_a8_latc_block<PackRowMajor>(dstPtr, src + y*rowBytes + x, r
owBytes); |
| 384 } |
| 385 } |
| 386 |
| 387 return true; |
| 388 } |
| 389 |
| 390 void CompressA8LATCBlockVertical(uint8_t* dst, const uint8_t block[]) { |
| 391 compress_a8_latc_block<PackColumnMajor>(&dst, block, 4); |
| 392 } |
| 393 |
| 394 #endif // COMPRESS_LATC_FAST |
| 395 |
281 //////////////////////////////////////////////////////////////////////////////// | 396 //////////////////////////////////////////////////////////////////////////////// |
282 | 397 |
283 namespace SkTextureCompressor { | 398 namespace SkTextureCompressor { |
284 | 399 |
285 bool CompressA8ToLATC(uint8_t* dst, const uint8_t* src, int width, int height, i
nt rowBytes) { | 400 bool CompressA8ToLATC(uint8_t* dst, const uint8_t* src, int width, int height, i
nt rowBytes) { |
| 401 #if COMPRESS_LATC_FAST |
| 402 return compress_4x4_a8_latc(dst, src, width, height, rowBytes); |
| 403 #elif COMPRESS_LATC_SLOW |
286 return compress_4x4_a8_to_64bit(dst, src, width, height, rowBytes, compress_
latc_block); | 404 return compress_4x4_a8_to_64bit(dst, src, width, height, rowBytes, compress_
latc_block); |
| 405 #else |
| 406 #error "Must choose either fast or slow LATC compression" |
| 407 #endif |
287 } | 408 } |
288 | 409 |
289 SkBlitter* CreateLATCBlitter(int width, int height, void* outputBuffer) { | 410 SkBlitter* CreateLATCBlitter(int width, int height, void* outputBuffer) { |
| 411 #if COMPRESS_LATC_FAST |
| 412 return new |
| 413 SkTCompressedAlphaBlitter<4, 8, CompressA8LATCBlockVertical> |
| 414 (width, height, outputBuffer); |
| 415 #elif COMPRESS_LATC_SLOW |
290 // TODO (krajcevski) | 416 // TODO (krajcevski) |
291 return NULL; | 417 return NULL; |
| 418 #endif |
292 } | 419 } |
293 | 420 |
294 } // SkTextureCompressor | 421 } // SkTextureCompressor |
OLD | NEW |