| OLD | NEW | 
|---|
| 1 /* | 1 /* | 
| 2  * Copyright 2014 Google Inc. | 2  * Copyright 2014 Google Inc. | 
| 3  * | 3  * | 
| 4  * Use of this source code is governed by a BSD-style license that can be | 4  * Use of this source code is governed by a BSD-style license that can be | 
| 5  * found in the LICENSE file. | 5  * found in the LICENSE file. | 
| 6  */ | 6  */ | 
| 7 | 7 | 
| 8 #include "SkTextureCompressor_LATC.h" | 8 #include "SkTextureCompressor_LATC.h" | 
|  | 9 #include "SkTextureCompressor_Blitter.h" | 
| 9 | 10 | 
| 10 #include "SkEndian.h" | 11 #include "SkEndian.h" | 
| 11 | 12 | 
|  | 13 // Compression options. In general, the slow version is much more accurate, but | 
|  | 14 // much slower. The fast option is much faster, but much less accurate. YMMV. | 
|  | 15 #define COMPRESS_LATC_SLOW 0 | 
|  | 16 #define COMPRESS_LATC_FAST 1 | 
|  | 17 | 
|  | 18 //////////////////////////////////////////////////////////////////////////////// | 
|  | 19 | 
|  | 20 #if COMPRESS_LATC_SLOW | 
|  | 21 | 
| 12 //////////////////////////////////////////////////////////////////////////////// | 22 //////////////////////////////////////////////////////////////////////////////// | 
| 13 // | 23 // | 
| 14 // Utility Functions | 24 // Utility Functions | 
| 15 // | 25 // | 
| 16 //////////////////////////////////////////////////////////////////////////////// | 26 //////////////////////////////////////////////////////////////////////////////// | 
| 17 | 27 | 
| 18 // Absolute difference between two values. More correct than SkTAbs(a - b) | 28 // Absolute difference between two values. More correct than SkTAbs(a - b) | 
| 19 // because it works on unsigned values. | 29 // because it works on unsigned values. | 
| 20 template <typename T> inline T abs_diff(const T &a, const T &b) { | 30 template <typename T> inline T abs_diff(const T &a, const T &b) { | 
| 21     return (a > b) ? (a - b) : (b - a); | 31     return (a > b) ? (a - b) : (b - a); | 
| (...skipping 249 matching lines...) Expand 10 before | Expand all | Expand 10 after  Loading... | 
| 271         // This is really just for correctness, in all of my tests we | 281         // This is really just for correctness, in all of my tests we | 
| 272         // never take this step. We don't lose too much perf here because | 282         // never take this step. We don't lose too much perf here because | 
| 273         // most of the processing in this function is worth it for the | 283         // most of the processing in this function is worth it for the | 
| 274         // 1 == nUniquePixels optimization. | 284         // 1 == nUniquePixels optimization. | 
| 275         return compress_latc_block_bb(pixels); | 285         return compress_latc_block_bb(pixels); | 
| 276     } else { | 286     } else { | 
| 277         return compress_latc_block_bb_ignore_extremal(pixels); | 287         return compress_latc_block_bb_ignore_extremal(pixels); | 
| 278     } | 288     } | 
| 279 } | 289 } | 
| 280 | 290 | 
|  | 291 #endif  // COMPRESS_LATC_SLOW | 
|  | 292 | 
|  | 293 //////////////////////////////////////////////////////////////////////////////// | 
|  | 294 | 
|  | 295 #if COMPRESS_LATC_FAST | 
|  | 296 | 
|  | 297 // Take the top three indices of each int and pack them into the low 12 | 
|  | 298 // bits of the integer. | 
|  | 299 static inline uint32_t convert_index(uint32_t x) { | 
|  | 300     // Since the palette is | 
|  | 301     // 255, 0, 219, 182, 146, 109, 73, 36 | 
|  | 302     // we need to map the high three bits of each byte in the integer | 
|  | 303     // from | 
|  | 304     // 0 1 2 3 4 5 6 7 | 
|  | 305     // to | 
|  | 306     // 1 7 6 5 4 3 2 0 | 
|  | 307     // | 
|  | 308     // This first operation takes the mapping from | 
|  | 309     // 0 1 2 3 4 5 6 7  -->  7 6 5 4 3 2 1 0 | 
|  | 310     x = 0x07070707 - ((x >> 5) & 0x07070707); | 
|  | 311 | 
|  | 312     // mask is 1 if index is non-zero | 
|  | 313     const uint32_t mask = (x | (x >> 1) | (x >> 2)) & 0x01010101; | 
|  | 314 | 
|  | 315     // add mask: | 
|  | 316     // 7 6 5 4 3 2 1 0 --> 8 7 6 5 4 3 2 0 | 
|  | 317     x = (x + mask); | 
|  | 318 | 
|  | 319     // Handle overflow: | 
|  | 320     // 8 7 6 5 4 3 2 0 --> 9 7 6 5 4 3 2 0 | 
|  | 321     x |= (x >> 3) & 0x01010101; | 
|  | 322 | 
|  | 323     // Mask out high bits: | 
|  | 324     // 9 7 6 5 4 3 2 0 --> 1 7 6 5 4 3 2 0 | 
|  | 325     x &= 0x07070707; | 
|  | 326 | 
|  | 327     // Pack it in... | 
|  | 328 #if defined (SK_CPU_BENDIAN) | 
|  | 329     return | 
|  | 330         (x >> 24) | | 
|  | 331         ((x >> 13) & 0x38) | | 
|  | 332         ((x >> 2) & 0x1C0) | | 
|  | 333         ((x << 9) & 0xE00); | 
|  | 334 #else | 
|  | 335     return | 
|  | 336         (x & 0x7) | | 
|  | 337         ((x >> 5) & 0x38) | | 
|  | 338         ((x >> 10) & 0x1C0) | | 
|  | 339         ((x >> 15) & 0xE00); | 
|  | 340 #endif | 
|  | 341 } | 
|  | 342 | 
|  | 343 typedef uint64_t (*PackIndicesProc)(const uint8_t* alpha, int rowBytes); | 
|  | 344 template<PackIndicesProc packIndicesProc> | 
|  | 345 static void compress_a8_latc_block(uint8_t** dstPtr, const uint8_t* src, int row
     Bytes) { | 
|  | 346     *(reinterpret_cast<uint64_t*>(*dstPtr)) = | 
|  | 347         SkEndian_SwapLE64(0xFF | (packIndicesProc(src, rowBytes) << 16)); | 
|  | 348     *dstPtr += 8; | 
|  | 349 } | 
|  | 350 | 
|  | 351 inline uint64_t PackRowMajor(const uint8_t *indices, int rowBytes) { | 
|  | 352     uint64_t result = 0; | 
|  | 353     for (int i = 0; i < 4; ++i) { | 
|  | 354         const uint32_t idx = *(reinterpret_cast<const uint32_t*>(indices + i*row
     Bytes)); | 
|  | 355         result |= static_cast<uint64_t>(convert_index(idx)) << 12*i; | 
|  | 356     } | 
|  | 357     return result; | 
|  | 358 } | 
|  | 359 | 
|  | 360 inline uint64_t PackColumnMajor(const uint8_t *indices, int rowBytes) { | 
|  | 361     // !SPEED! Blarg, this is kind of annoying. SSE4 can make this | 
|  | 362     // a LOT faster. | 
|  | 363     uint8_t transposed[16]; | 
|  | 364     for (int i = 0; i < 4; ++i) { | 
|  | 365         for (int j = 0; j < 4; ++j) { | 
|  | 366             transposed[j*4+i] = indices[i*rowBytes + j]; | 
|  | 367         } | 
|  | 368     } | 
|  | 369 | 
|  | 370     return PackRowMajor(transposed, 4); | 
|  | 371 } | 
|  | 372 | 
|  | 373 static bool compress_4x4_a8_latc(uint8_t* dst, const uint8_t* src, | 
|  | 374                                  int width, int height, int rowBytes) { | 
|  | 375 | 
|  | 376     if (width < 0 || ((width % 4) != 0) || height < 0 || ((height % 4) != 0)) { | 
|  | 377         return false; | 
|  | 378     } | 
|  | 379 | 
|  | 380     uint8_t** dstPtr = &dst; | 
|  | 381     for (int y = 0; y < height; y += 4) { | 
|  | 382         for (int x = 0; x < width; x += 4) { | 
|  | 383             compress_a8_latc_block<PackRowMajor>(dstPtr, src + y*rowBytes + x, r
     owBytes); | 
|  | 384         } | 
|  | 385     } | 
|  | 386 | 
|  | 387     return true; | 
|  | 388 } | 
|  | 389 | 
|  | 390 void CompressA8LATCBlockVertical(uint8_t* dst, const uint8_t block[]) { | 
|  | 391     compress_a8_latc_block<PackColumnMajor>(&dst, block, 4); | 
|  | 392 } | 
|  | 393 | 
|  | 394 #endif  // COMPRESS_LATC_FAST | 
|  | 395 | 
| 281 //////////////////////////////////////////////////////////////////////////////// | 396 //////////////////////////////////////////////////////////////////////////////// | 
| 282 | 397 | 
| 283 namespace SkTextureCompressor { | 398 namespace SkTextureCompressor { | 
| 284 | 399 | 
| 285 bool CompressA8ToLATC(uint8_t* dst, const uint8_t* src, int width, int height, i
     nt rowBytes) { | 400 bool CompressA8ToLATC(uint8_t* dst, const uint8_t* src, int width, int height, i
     nt rowBytes) { | 
|  | 401 #if COMPRESS_LATC_FAST | 
|  | 402     return compress_4x4_a8_latc(dst, src, width, height, rowBytes); | 
|  | 403 #elif COMPRESS_LATC_SLOW | 
| 286     return compress_4x4_a8_to_64bit(dst, src, width, height, rowBytes, compress_
     latc_block); | 404     return compress_4x4_a8_to_64bit(dst, src, width, height, rowBytes, compress_
     latc_block); | 
|  | 405 #else | 
|  | 406 #error "Must choose either fast or slow LATC compression" | 
|  | 407 #endif | 
| 287 } | 408 } | 
| 288 | 409 | 
| 289 SkBlitter* CreateLATCBlitter(int width, int height, void* outputBuffer) { | 410 SkBlitter* CreateLATCBlitter(int width, int height, void* outputBuffer) { | 
|  | 411 #if COMPRESS_LATC_FAST | 
|  | 412     return new | 
|  | 413         SkTCompressedAlphaBlitter<4, 8, CompressA8LATCBlockVertical> | 
|  | 414         (width, height, outputBuffer); | 
|  | 415 #elif COMPRESS_LATC_SLOW | 
| 290     // TODO (krajcevski) | 416     // TODO (krajcevski) | 
| 291     return NULL; | 417     return NULL; | 
|  | 418 #endif | 
| 292 } | 419 } | 
| 293 | 420 | 
| 294 }  // SkTextureCompressor | 421 }  // SkTextureCompressor | 
| OLD | NEW | 
|---|