| OLD | NEW |
| 1 /* | 1 /* |
| 2 * Copyright 2014 Google Inc. | 2 * Copyright 2014 Google Inc. |
| 3 * | 3 * |
| 4 * Use of this source code is governed by a BSD-style license that can be | 4 * Use of this source code is governed by a BSD-style license that can be |
| 5 * found in the LICENSE file. | 5 * found in the LICENSE file. |
| 6 */ | 6 */ |
| 7 | 7 |
| 8 #include "SkTextureCompressor.h" | 8 #include "SkTextureCompressor.h" |
| 9 #include "SkTextureCompressor_Blitter.h" |
| 9 | 10 |
| 10 #include "SkEndian.h" | 11 #include "SkEndian.h" |
| 11 | 12 |
| 12 // #define COMPRESS_R11_EAC_SLOW 1 | 13 // #define COMPRESS_R11_EAC_SLOW 1 |
| 13 // #define COMPRESS_R11_EAC_FAST 1 | 14 // #define COMPRESS_R11_EAC_FAST 1 |
| 14 #define COMPRESS_R11_EAC_FASTEST 1 | 15 #define COMPRESS_R11_EAC_FASTEST 1 |
| 15 | 16 |
| 16 // Blocks compressed into R11 EAC are represented as follows: | 17 // Blocks compressed into R11 EAC are represented as follows: |
| 17 // 0000000000000000000000000000000000000000000000000000000000000000 | 18 // 0000000000000000000000000000000000000000000000000000000000000000 |
| 18 // |base_cw|mod|mul| ----------------- indices ------------------- | 19 // |base_cw|mod|mul| ----------------- indices ------------------- |
| (...skipping 276 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 295 *encPtr = proc(block); | 296 *encPtr = proc(block); |
| 296 ++encPtr; | 297 ++encPtr; |
| 297 } | 298 } |
| 298 src += 4 * rowBytes; | 299 src += 4 * rowBytes; |
| 299 } | 300 } |
| 300 | 301 |
| 301 return true; | 302 return true; |
| 302 } | 303 } |
| 303 #endif // (COMPRESS_R11_EAC_SLOW) || (COMPRESS_R11_EAC_FAST) | 304 #endif // (COMPRESS_R11_EAC_SLOW) || (COMPRESS_R11_EAC_FAST) |
| 304 | 305 |
| 306 // This function converts an integer containing four bytes of alpha |
| 307 // values into an integer containing four bytes of indices into R11 EAC. |
| 308 // Note, there needs to be a mapping of indices: |
| 309 // 0 1 2 3 4 5 6 7 |
| 310 // 3 2 1 0 4 5 6 7 |
| 311 // |
| 312 // To compute this, we first negate each byte, and then add three, which |
| 313 // gives the mapping |
| 314 // 3 2 1 0 -1 -2 -3 -4 |
| 315 // |
| 316 // Then we mask out the negative values, take their absolute value, and |
| 317 // add three. |
| 318 // |
| 319 // Most of the voodoo in this function comes from Hacker's Delight, section 2-18 |
| 320 static inline uint32_t convert_indices(uint32_t x) { |
| 321 // Take the top three bits... |
| 322 x = (x & 0xE0E0E0E0) >> 5; |
| 323 |
| 324 // Negate... |
| 325 x = ~((0x80808080 - x) ^ 0x7F7F7F7F); |
| 326 |
| 327 // Add three |
| 328 const uint32_t s = (x & 0x7F7F7F7F) + 0x03030303; |
| 329 x = ((x ^ 0x03030303) & 0x80808080) ^ s; |
| 330 |
| 331 // Absolute value |
| 332 const uint32_t a = x & 0x80808080; |
| 333 const uint32_t b = a >> 7; |
| 334 |
| 335 // Aside: mask negatives (m is three if the byte was negative) |
| 336 const uint32_t m = (a >> 6) | b; |
| 337 |
| 338 // .. continue absolute value |
| 339 x = (x ^ ((a - b) | a)) + b; |
| 340 |
| 341 // Add three |
| 342 return x + m; |
| 343 } |
| 344 |
| 305 #if COMPRESS_R11_EAC_FASTEST | 345 #if COMPRESS_R11_EAC_FASTEST |
| 306 template<unsigned shift> | 346 template<unsigned shift> |
| 307 static inline uint64_t swap_shift(uint64_t x, uint64_t mask) { | 347 static inline uint64_t swap_shift(uint64_t x, uint64_t mask) { |
| 308 const uint64_t t = (x ^ (x >> shift)) & mask; | 348 const uint64_t t = (x ^ (x >> shift)) & mask; |
| 309 return x ^ t ^ (t << shift); | 349 return x ^ t ^ (t << shift); |
| 310 } | 350 } |
| 311 | 351 |
| 312 static inline uint64_t interleave6(uint64_t topRows, uint64_t bottomRows) { | 352 static inline uint64_t interleave6(uint64_t topRows, uint64_t bottomRows) { |
| 313 // If our 3-bit block indices are laid out as: | 353 // If our 3-bit block indices are laid out as: |
| 314 // a b c d | 354 // a b c d |
| (...skipping 54 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 369 | 409 |
| 370 // x: 00 00 00 00 00 00 00 00 a e i m d h l p b f j n c g k o | 410 // x: 00 00 00 00 00 00 00 00 a e i m d h l p b f j n c g k o |
| 371 | 411 |
| 372 x = (x & (0xFFFULL << 36)) | ((x & 0xFFFFFFULL) << 12) | ((x >> 24) & 0xFFFU
LL); | 412 x = (x & (0xFFFULL << 36)) | ((x & 0xFFFFFFULL) << 12) | ((x >> 24) & 0xFFFU
LL); |
| 373 #endif | 413 #endif |
| 374 | 414 |
| 375 // x: 00 00 00 00 00 00 00 00 a e i m b f j n c g k o d h l p | 415 // x: 00 00 00 00 00 00 00 00 a e i m b f j n c g k o d h l p |
| 376 return x; | 416 return x; |
| 377 } | 417 } |
| 378 | 418 |
| 379 // This function converts an integer containing four bytes of alpha | |
| 380 // values into an integer containing four bytes of indices into R11 EAC. | |
| 381 // Note, there needs to be a mapping of indices: | |
| 382 // 0 1 2 3 4 5 6 7 | |
| 383 // 3 2 1 0 4 5 6 7 | |
| 384 // | |
| 385 // To compute this, we first negate each byte, and then add three, which | |
| 386 // gives the mapping | |
| 387 // 3 2 1 0 -1 -2 -3 -4 | |
| 388 // | |
| 389 // Then we mask out the negative values, take their absolute value, and | |
| 390 // add three. | |
| 391 // | |
| 392 // Most of the voodoo in this function comes from Hacker's Delight, section 2-18 | |
| 393 static inline uint32_t convert_indices(uint32_t x) { | |
| 394 // Take the top three bits... | |
| 395 x = (x & 0xE0E0E0E0) >> 5; | |
| 396 | |
| 397 // Negate... | |
| 398 x = ~((0x80808080 - x) ^ 0x7F7F7F7F); | |
| 399 | |
| 400 // Add three | |
| 401 const uint32_t s = (x & 0x7F7F7F7F) + 0x03030303; | |
| 402 x = ((x ^ 0x03030303) & 0x80808080) ^ s; | |
| 403 | |
| 404 // Absolute value | |
| 405 const uint32_t a = x & 0x80808080; | |
| 406 const uint32_t b = a >> 7; | |
| 407 | |
| 408 // Aside: mask negatives (m is three if the byte was negative) | |
| 409 const uint32_t m = (a >> 6) | b; | |
| 410 | |
| 411 // .. continue absolute value | |
| 412 x = (x ^ ((a - b) | a)) + b; | |
| 413 | |
| 414 // Add three | |
| 415 return x + m; | |
| 416 } | |
| 417 | |
| 418 // This function follows the same basic procedure as compress_heterogeneous_r11e
ac_block | 419 // This function follows the same basic procedure as compress_heterogeneous_r11e
ac_block |
| 419 // above when COMPRESS_R11_EAC_FAST is defined, but it avoids a few loads/stores
and | 420 // above when COMPRESS_R11_EAC_FAST is defined, but it avoids a few loads/stores
and |
| 420 // tries to optimize where it can using SIMD. | 421 // tries to optimize where it can using SIMD. |
| 421 static uint64_t compress_r11eac_block_fast(const uint8_t* src, int rowBytes) { | 422 static uint64_t compress_r11eac_block_fast(const uint8_t* src, int rowBytes) { |
| 422 // Store each row of alpha values in an integer | 423 // Store each row of alpha values in an integer |
| 423 const uint32_t alphaRow1 = *(reinterpret_cast<const uint32_t*>(src)); | 424 const uint32_t alphaRow1 = *(reinterpret_cast<const uint32_t*>(src)); |
| 424 const uint32_t alphaRow2 = *(reinterpret_cast<const uint32_t*>(src + rowByte
s)); | 425 const uint32_t alphaRow2 = *(reinterpret_cast<const uint32_t*>(src + rowByte
s)); |
| 425 const uint32_t alphaRow3 = *(reinterpret_cast<const uint32_t*>(src + 2*rowBy
tes)); | 426 const uint32_t alphaRow3 = *(reinterpret_cast<const uint32_t*>(src + 2*rowBy
tes)); |
| 426 const uint32_t alphaRow4 = *(reinterpret_cast<const uint32_t*>(src + 3*rowBy
tes)); | 427 const uint32_t alphaRow4 = *(reinterpret_cast<const uint32_t*>(src + 3*rowBy
tes)); |
| 427 | 428 |
| (...skipping 78 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 506 ((x >> 13) & (7 << 3)) | | 507 ((x >> 13) & (7 << 3)) | |
| 507 ((x >> 2) & (7 << 6)) | | 508 ((x >> 2) & (7 << 6)) | |
| 508 ((x << 9) & (7 << 9)); | 509 ((x << 9) & (7 << 9)); |
| 509 #endif | 510 #endif |
| 510 } | 511 } |
| 511 | 512 |
| 512 // This function returns the compressed format of a block given as four columns
of | 513 // This function returns the compressed format of a block given as four columns
of |
| 513 // alpha values. Each column is assumed to be loaded from top to bottom, and hen
ce | 514 // alpha values. Each column is assumed to be loaded from top to bottom, and hen
ce |
| 514 // must first be converted to indices and then packed into the resulting 64-bit | 515 // must first be converted to indices and then packed into the resulting 64-bit |
| 515 // integer. | 516 // integer. |
| 516 static inline uint64_t compress_block_vertical(const uint32_t alphaColumn0, | 517 inline void compress_block_vertical(uint8_t* dstPtr, const uint8_t *block) { |
| 517 const uint32_t alphaColumn1, | 518 |
| 518 const uint32_t alphaColumn2, | 519 const uint32_t* src = reinterpret_cast<const uint32_t*>(block); |
| 519 const uint32_t alphaColumn3) { | 520 uint64_t* dst = reinterpret_cast<uint64_t*>(dstPtr); |
| 521 |
| 522 const uint32_t alphaColumn0 = src[0]; |
| 523 const uint32_t alphaColumn1 = src[1]; |
| 524 const uint32_t alphaColumn2 = src[2]; |
| 525 const uint32_t alphaColumn3 = src[3]; |
| 520 | 526 |
| 521 if (alphaColumn0 == alphaColumn1 && | 527 if (alphaColumn0 == alphaColumn1 && |
| 522 alphaColumn2 == alphaColumn3 && | 528 alphaColumn2 == alphaColumn3 && |
| 523 alphaColumn0 == alphaColumn2) { | 529 alphaColumn0 == alphaColumn2) { |
| 524 | 530 |
| 525 if (0 == alphaColumn0) { | 531 if (0 == alphaColumn0) { |
| 526 // Transparent | 532 // Transparent |
| 527 return 0x0020000000002000ULL; | 533 *dst = 0x0020000000002000ULL; |
| 534 return; |
| 528 } | 535 } |
| 529 else if (0xFFFFFFFF == alphaColumn0) { | 536 else if (0xFFFFFFFF == alphaColumn0) { |
| 530 // Opaque | 537 // Opaque |
| 531 return 0xFFFFFFFFFFFFFFFFULL; | 538 *dst = 0xFFFFFFFFFFFFFFFFULL; |
| 539 return; |
| 532 } | 540 } |
| 533 } | 541 } |
| 534 | 542 |
| 535 const uint32_t indexColumn0 = convert_indices(alphaColumn0); | 543 const uint32_t indexColumn0 = convert_indices(alphaColumn0); |
| 536 const uint32_t indexColumn1 = convert_indices(alphaColumn1); | 544 const uint32_t indexColumn1 = convert_indices(alphaColumn1); |
| 537 const uint32_t indexColumn2 = convert_indices(alphaColumn2); | 545 const uint32_t indexColumn2 = convert_indices(alphaColumn2); |
| 538 const uint32_t indexColumn3 = convert_indices(alphaColumn3); | 546 const uint32_t indexColumn3 = convert_indices(alphaColumn3); |
| 539 | 547 |
| 540 const uint32_t packedIndexColumn0 = pack_indices_vertical(indexColumn0); | 548 const uint32_t packedIndexColumn0 = pack_indices_vertical(indexColumn0); |
| 541 const uint32_t packedIndexColumn1 = pack_indices_vertical(indexColumn1); | 549 const uint32_t packedIndexColumn1 = pack_indices_vertical(indexColumn1); |
| 542 const uint32_t packedIndexColumn2 = pack_indices_vertical(indexColumn2); | 550 const uint32_t packedIndexColumn2 = pack_indices_vertical(indexColumn2); |
| 543 const uint32_t packedIndexColumn3 = pack_indices_vertical(indexColumn3); | 551 const uint32_t packedIndexColumn3 = pack_indices_vertical(indexColumn3); |
| 544 | 552 |
| 545 return SkEndian_SwapBE64(0x8490000000000000ULL | | 553 *dst = SkEndian_SwapBE64(0x8490000000000000ULL | |
| 546 (static_cast<uint64_t>(packedIndexColumn0) << 36) | | 554 (static_cast<uint64_t>(packedIndexColumn0) << 36) | |
| 547 (static_cast<uint64_t>(packedIndexColumn1) << 24) | | 555 (static_cast<uint64_t>(packedIndexColumn1) << 24) | |
| 548 static_cast<uint64_t>(packedIndexColumn2 << 12) | | 556 static_cast<uint64_t>(packedIndexColumn2 << 12) | |
| 549 static_cast<uint64_t>(packedIndexColumn3)); | 557 static_cast<uint64_t>(packedIndexColumn3)); |
| 550 | |
| 551 } | |
| 552 | |
| 553 // Updates the block whose columns are stored in blockColN. curAlphai is expecte
d | |
| 554 // to store, as an integer, the four alpha values that will be placed within eac
h | |
| 555 // of the columns in the range [col, col+colsLeft). | |
| 556 static inline void update_block_columns(uint32_t* block, const int col, | |
| 557 const int colsLeft, const uint32_t curAl
phai) { | |
| 558 SkASSERT(NULL != block); | |
| 559 SkASSERT(col + colsLeft <= 4); | |
| 560 | |
| 561 for (int i = col; i < (col + colsLeft); ++i) { | |
| 562 block[i] = curAlphai; | |
| 563 } | |
| 564 } | 558 } |
| 565 | 559 |
| 566 //////////////////////////////////////////////////////////////////////////////// | 560 //////////////////////////////////////////////////////////////////////////////// |
| 567 | 561 |
| 568 namespace SkTextureCompressor { | 562 namespace SkTextureCompressor { |
| 569 | 563 |
| 570 bool CompressA8ToR11EAC(uint8_t* dst, const uint8_t* src, int width, int height,
int rowBytes) { | 564 bool CompressA8ToR11EAC(uint8_t* dst, const uint8_t* src, int width, int height,
int rowBytes) { |
| 571 | 565 |
| 572 #if (COMPRESS_R11_EAC_SLOW) || (COMPRESS_R11_EAC_FAST) | 566 #if (COMPRESS_R11_EAC_SLOW) || (COMPRESS_R11_EAC_FAST) |
| 573 | 567 |
| 574 return compress_4x4_a8_to_64bit(dst, src, width, height, rowBytes, compress_
r11eac_block); | 568 return compress_4x4_a8_to_64bit(dst, src, width, height, rowBytes, compress_
r11eac_block); |
| 575 | 569 |
| 576 #elif COMPRESS_R11_EAC_FASTEST | 570 #elif COMPRESS_R11_EAC_FASTEST |
| 577 | 571 |
| 578 return compress_a8_to_r11eac_fast(dst, src, width, height, rowBytes); | 572 return compress_a8_to_r11eac_fast(dst, src, width, height, rowBytes); |
| 579 | 573 |
| 580 #else | 574 #else |
| 581 #error "Must choose R11 EAC algorithm" | 575 #error "Must choose R11 EAC algorithm" |
| 582 #endif | 576 #endif |
| 583 } | 577 } |
| 584 | 578 |
| 585 // This class implements a blitter that blits directly into a buffer that will | |
| 586 // be used as an R11 EAC compressed texture. We compute this buffer by | |
| 587 // buffering four scan lines and then outputting them all at once. This blitter | |
| 588 // is only expected to be used with alpha masks, i.e. kAlpha8_SkColorType. | |
| 589 class R11_EACBlitter : public SkBlitter { | |
| 590 public: | |
| 591 R11_EACBlitter(int width, int height, void *compressedBuffer); | |
| 592 virtual ~R11_EACBlitter() { this->flushRuns(); } | |
| 593 | |
| 594 // Blit a horizontal run of one or more pixels. | |
| 595 virtual void blitH(int x, int y, int width) SK_OVERRIDE { | |
| 596 // This function is intended to be called from any standard RGB | |
| 597 // buffer, so we should never encounter it. However, if some code | |
| 598 // path does end up here, then this needs to be investigated. | |
| 599 SkFAIL("Not implemented!"); | |
| 600 } | |
| 601 | |
| 602 // Blit a horizontal run of antialiased pixels; runs[] is a *sparse* | |
| 603 // zero-terminated run-length encoding of spans of constant alpha values. | |
| 604 virtual void blitAntiH(int x, int y, | |
| 605 const SkAlpha antialias[], | |
| 606 const int16_t runs[]) SK_OVERRIDE; | |
| 607 | |
| 608 // Blit a vertical run of pixels with a constant alpha value. | |
| 609 virtual void blitV(int x, int y, int height, SkAlpha alpha) SK_OVERRIDE { | |
| 610 // This function is currently not implemented. It is not explicitly | |
| 611 // required by the contract, but if at some time a code path runs into | |
| 612 // this function (which is entirely possible), it needs to be implemente
d. | |
| 613 // | |
| 614 // TODO (krajcevski): | |
| 615 // This function will be most easily implemented in one of two ways: | |
| 616 // 1. Buffer each vertical column value and then construct a list | |
| 617 // of alpha values and output all of the blocks at once. This only | |
| 618 // requires a write to the compressed buffer | |
| 619 // 2. Replace the indices of each block with the proper indices based | |
| 620 // on the alpha value. This requires a read and write of the compress
ed | |
| 621 // buffer, but much less overhead. | |
| 622 SkFAIL("Not implemented!"); | |
| 623 } | |
| 624 | |
| 625 // Blit a solid rectangle one or more pixels wide. | |
| 626 virtual void blitRect(int x, int y, int width, int height) SK_OVERRIDE { | |
| 627 // Analogous to blitRow, this function is intended for RGB targets | |
| 628 // and should never be called by this blitter. Any calls to this functio
n | |
| 629 // are probably a bug and should be investigated. | |
| 630 SkFAIL("Not implemented!"); | |
| 631 } | |
| 632 | |
| 633 // Blit a rectangle with one alpha-blended column on the left, | |
| 634 // width (zero or more) opaque pixels, and one alpha-blended column | |
| 635 // on the right. The result will always be at least two pixels wide. | |
| 636 virtual void blitAntiRect(int x, int y, int width, int height, | |
| 637 SkAlpha leftAlpha, SkAlpha rightAlpha) SK_OVERRIDE
{ | |
| 638 // This function is currently not implemented. It is not explicitly | |
| 639 // required by the contract, but if at some time a code path runs into | |
| 640 // this function (which is entirely possible), it needs to be implemente
d. | |
| 641 // | |
| 642 // TODO (krajcevski): | |
| 643 // This function will be most easily implemented as follows: | |
| 644 // 1. If width/height are smaller than a block, then update the | |
| 645 // indices of the affected blocks. | |
| 646 // 2. If width/height are larger than a block, then construct a 9-patch | |
| 647 // of block encodings that represent the rectangle, and write them | |
| 648 // to the compressed buffer as necessary. Whether or not the blocks | |
| 649 // are overwritten by zeros or just their indices are updated is up | |
| 650 // to debate. | |
| 651 SkFAIL("Not implemented!"); | |
| 652 } | |
| 653 | |
| 654 // Blit a pattern of pixels defined by a rectangle-clipped mask; | |
| 655 // typically used for text. | |
| 656 virtual void blitMask(const SkMask&, const SkIRect& clip) SK_OVERRIDE { | |
| 657 // This function is currently not implemented. It is not explicitly | |
| 658 // required by the contract, but if at some time a code path runs into | |
| 659 // this function (which is entirely possible), it needs to be implemente
d. | |
| 660 // | |
| 661 // TODO (krajcevski): | |
| 662 // This function will be most easily implemented in the same way as | |
| 663 // blitAntiRect above. | |
| 664 SkFAIL("Not implemented!"); | |
| 665 } | |
| 666 | |
| 667 // If the blitter just sets a single value for each pixel, return the | |
| 668 // bitmap it draws into, and assign value. If not, return NULL and ignore | |
| 669 // the value parameter. | |
| 670 virtual const SkBitmap* justAnOpaqueColor(uint32_t* value) SK_OVERRIDE { | |
| 671 return NULL; | |
| 672 } | |
| 673 | |
| 674 /** | |
| 675 * Compressed texture blitters only really work correctly if they get | |
| 676 * four blocks at a time. That being said, this blitter tries it's best | |
| 677 * to preserve semantics if blitAntiH doesn't get called in too many | |
| 678 * weird ways... | |
| 679 */ | |
| 680 virtual int requestRowsPreserved() const { return kR11_EACBlockSz; } | |
| 681 | |
| 682 protected: | |
| 683 virtual void onNotifyFinished() { this->flushRuns(); } | |
| 684 | |
| 685 private: | |
| 686 static const int kR11_EACBlockSz = 4; | |
| 687 static const int kPixelsPerBlock = kR11_EACBlockSz * kR11_EACBlockSz; | |
| 688 | |
| 689 // The longest possible run of pixels that this blitter will receive. | |
| 690 // This is initialized in the constructor to 0x7FFE, which is one less | |
| 691 // than the largest positive 16-bit integer. We make sure that it's one | |
| 692 // less for debugging purposes. We also don't make this variable static | |
| 693 // in order to make sure that we can construct a valid pointer to it. | |
| 694 const int16_t kLongestRun; | |
| 695 | |
| 696 // Usually used in conjunction with kLongestRun. This is initialized to | |
| 697 // zero. | |
| 698 const SkAlpha kZeroAlpha; | |
| 699 | |
| 700 // This is the information that we buffer whenever we're asked to blit | |
| 701 // a row with this blitter. | |
| 702 struct BufferedRun { | |
| 703 const SkAlpha* fAlphas; | |
| 704 const int16_t* fRuns; | |
| 705 int fX, fY; | |
| 706 } fBufferedRuns[kR11_EACBlockSz]; | |
| 707 | |
| 708 // The next row (0-3) that we need to blit. This value should never exceed | |
| 709 // the number of rows that we have (kR11_EACBlockSz) | |
| 710 int fNextRun; | |
| 711 | |
| 712 // The width and height of the image that we're blitting | |
| 713 const int fWidth; | |
| 714 const int fHeight; | |
| 715 | |
| 716 // The R11 EAC buffer that we're blitting into. It is assumed that the buffe
r | |
| 717 // is large enough to store a compressed image of size fWidth*fHeight. | |
| 718 uint64_t* const fBuffer; | |
| 719 | |
| 720 // Various utility functions | |
| 721 int blocksWide() const { return fWidth / kR11_EACBlockSz; } | |
| 722 int blocksTall() const { return fHeight / kR11_EACBlockSz; } | |
| 723 int totalBlocks() const { return (fWidth * fHeight) / kPixelsPerBlock; } | |
| 724 | |
| 725 // Returns the block index for the block containing pixel (x, y). Block | |
| 726 // indices start at zero and proceed in raster order. | |
| 727 int getBlockOffset(int x, int y) const { | |
| 728 SkASSERT(x < fWidth); | |
| 729 SkASSERT(y < fHeight); | |
| 730 const int blockCol = x / kR11_EACBlockSz; | |
| 731 const int blockRow = y / kR11_EACBlockSz; | |
| 732 return blockRow * this->blocksWide() + blockCol; | |
| 733 } | |
| 734 | |
| 735 // Returns a pointer to the block containing pixel (x, y) | |
| 736 uint64_t *getBlock(int x, int y) const { | |
| 737 return fBuffer + this->getBlockOffset(x, y); | |
| 738 } | |
| 739 | |
| 740 // The following function writes the buffered runs to compressed blocks. | |
| 741 // If fNextRun < 4, then we fill the runs that we haven't buffered with | |
| 742 // the constant zero buffer. | |
| 743 void flushRuns(); | |
| 744 }; | |
| 745 | |
| 746 | |
| 747 R11_EACBlitter::R11_EACBlitter(int width, int height, void *latcBuffer) | |
| 748 // 0x7FFE is one minus the largest positive 16-bit int. We use it for | |
| 749 // debugging to make sure that we're properly setting the nextX distance | |
| 750 // in flushRuns(). | |
| 751 : kLongestRun(0x7FFE), kZeroAlpha(0) | |
| 752 , fNextRun(0) | |
| 753 , fWidth(width) | |
| 754 , fHeight(height) | |
| 755 , fBuffer(reinterpret_cast<uint64_t*const>(latcBuffer)) | |
| 756 { | |
| 757 SkASSERT((width % kR11_EACBlockSz) == 0); | |
| 758 SkASSERT((height % kR11_EACBlockSz) == 0); | |
| 759 } | |
| 760 | |
| 761 void R11_EACBlitter::blitAntiH(int x, int y, | |
| 762 const SkAlpha* antialias, | |
| 763 const int16_t* runs) { | |
| 764 // Make sure that the new row to blit is either the first | |
| 765 // row that we're blitting, or it's exactly the next scan row | |
| 766 // since the last row that we blit. This is to ensure that when | |
| 767 // we go to flush the runs, that they are all the same four | |
| 768 // runs. | |
| 769 if (fNextRun > 0 && | |
| 770 ((x != fBufferedRuns[fNextRun-1].fX) || | |
| 771 (y-1 != fBufferedRuns[fNextRun-1].fY))) { | |
| 772 this->flushRuns(); | |
| 773 } | |
| 774 | |
| 775 // Align the rows to a block boundary. If we receive rows that | |
| 776 // are not on a block boundary, then fill in the preceding runs | |
| 777 // with zeros. We do this by producing a single RLE that says | |
| 778 // that we have 0x7FFE pixels of zero (0x7FFE = 32766). | |
| 779 const int row = y & ~3; | |
| 780 while ((row + fNextRun) < y) { | |
| 781 fBufferedRuns[fNextRun].fAlphas = &kZeroAlpha; | |
| 782 fBufferedRuns[fNextRun].fRuns = &kLongestRun; | |
| 783 fBufferedRuns[fNextRun].fX = 0; | |
| 784 fBufferedRuns[fNextRun].fY = row + fNextRun; | |
| 785 ++fNextRun; | |
| 786 } | |
| 787 | |
| 788 // Make sure that our assumptions aren't violated... | |
| 789 SkASSERT(fNextRun == (y & 3)); | |
| 790 SkASSERT(fNextRun == 0 || fBufferedRuns[fNextRun - 1].fY < y); | |
| 791 | |
| 792 // Set the values of the next run | |
| 793 fBufferedRuns[fNextRun].fAlphas = antialias; | |
| 794 fBufferedRuns[fNextRun].fRuns = runs; | |
| 795 fBufferedRuns[fNextRun].fX = x; | |
| 796 fBufferedRuns[fNextRun].fY = y; | |
| 797 | |
| 798 // If we've output four scanlines in a row that don't violate our | |
| 799 // assumptions, then it's time to flush them... | |
| 800 if (4 == ++fNextRun) { | |
| 801 this->flushRuns(); | |
| 802 } | |
| 803 } | |
| 804 | |
| 805 void R11_EACBlitter::flushRuns() { | |
| 806 | |
| 807 // If we don't have any runs, then just return. | |
| 808 if (0 == fNextRun) { | |
| 809 return; | |
| 810 } | |
| 811 | |
| 812 #ifndef NDEBUG | |
| 813 // Make sure that if we have any runs, they all match | |
| 814 for (int i = 1; i < fNextRun; ++i) { | |
| 815 SkASSERT(fBufferedRuns[i].fY == fBufferedRuns[i-1].fY + 1); | |
| 816 SkASSERT(fBufferedRuns[i].fX == fBufferedRuns[i-1].fX); | |
| 817 } | |
| 818 #endif | |
| 819 | |
| 820 // If we dont have as many runs as we have rows, fill in the remaining | |
| 821 // runs with constant zeros. | |
| 822 for (int i = fNextRun; i < kR11_EACBlockSz; ++i) { | |
| 823 fBufferedRuns[i].fY = fBufferedRuns[0].fY + i; | |
| 824 fBufferedRuns[i].fX = fBufferedRuns[0].fX; | |
| 825 fBufferedRuns[i].fAlphas = &kZeroAlpha; | |
| 826 fBufferedRuns[i].fRuns = &kLongestRun; | |
| 827 } | |
| 828 | |
| 829 // Make sure that our assumptions aren't violated. | |
| 830 SkASSERT(fNextRun > 0 && fNextRun <= 4); | |
| 831 SkASSERT((fBufferedRuns[0].fY & 3) == 0); | |
| 832 | |
| 833 // The following logic walks four rows at a time and outputs compressed | |
| 834 // blocks to the buffer passed into the constructor. | |
| 835 // We do the following: | |
| 836 // | |
| 837 // c1 c2 c3 c4 | |
| 838 // ----------------------------------------------------------------------- | |
| 839 // ... | | | | | ----> fBufferedRuns[0] | |
| 840 // ----------------------------------------------------------------------- | |
| 841 // ... | | | | | ----> fBufferedRuns[1] | |
| 842 // ----------------------------------------------------------------------- | |
| 843 // ... | | | | | ----> fBufferedRuns[2] | |
| 844 // ----------------------------------------------------------------------- | |
| 845 // ... | | | | | ----> fBufferedRuns[3] | |
| 846 // ----------------------------------------------------------------------- | |
| 847 // | |
| 848 // curX -- the macro X value that we've gotten to. | |
| 849 // c1, c2, c3, c4 -- the integers that represent the columns of the current
block | |
| 850 // that we're operating on | |
| 851 // curAlphaColumn -- integer containing the column of alpha values from fBuf
feredRuns. | |
| 852 // nextX -- for each run, the next point at which we need to update curAlpha
Column | |
| 853 // after the value of curX. | |
| 854 // finalX -- the minimum of all the nextX values. | |
| 855 // | |
| 856 // curX advances to finalX outputting any blocks that it passes along | |
| 857 // the way. Since finalX will not change when we reach the end of a | |
| 858 // run, the termination criteria will be whenever curX == finalX at the | |
| 859 // end of a loop. | |
| 860 | |
| 861 // Setup: | |
| 862 uint32_t c[4] = { 0, 0, 0, 0 }; | |
| 863 uint32_t curAlphaColumn = 0; | |
| 864 SkAlpha *curAlpha = reinterpret_cast<SkAlpha*>(&curAlphaColumn); | |
| 865 | |
| 866 int nextX[kR11_EACBlockSz]; | |
| 867 for (int i = 0; i < kR11_EACBlockSz; ++i) { | |
| 868 nextX[i] = 0x7FFFFF; | |
| 869 } | |
| 870 | |
| 871 uint64_t* outPtr = this->getBlock(fBufferedRuns[0].fX, fBufferedRuns[0].fY); | |
| 872 | |
| 873 // Populate the first set of runs and figure out how far we need to | |
| 874 // advance on the first step | |
| 875 int curX = 0; | |
| 876 int finalX = 0xFFFFF; | |
| 877 for (int i = 0; i < kR11_EACBlockSz; ++i) { | |
| 878 nextX[i] = *(fBufferedRuns[i].fRuns); | |
| 879 curAlpha[i] = *(fBufferedRuns[i].fAlphas); | |
| 880 | |
| 881 finalX = SkMin32(nextX[i], finalX); | |
| 882 } | |
| 883 | |
| 884 // Make sure that we have a valid right-bound X value | |
| 885 SkASSERT(finalX < 0xFFFFF); | |
| 886 | |
| 887 // Run the blitter... | |
| 888 while (curX != finalX) { | |
| 889 SkASSERT(finalX >= curX); | |
| 890 | |
| 891 // Do we need to populate the rest of the block? | |
| 892 if ((finalX - (curX & ~3)) >= kR11_EACBlockSz) { | |
| 893 const int col = curX & 3; | |
| 894 const int colsLeft = 4 - col; | |
| 895 SkASSERT(curX + colsLeft <= finalX); | |
| 896 | |
| 897 update_block_columns(c, col, colsLeft, curAlphaColumn); | |
| 898 | |
| 899 // Write this block | |
| 900 *outPtr = compress_block_vertical(c[0], c[1], c[2], c[3]); | |
| 901 ++outPtr; | |
| 902 curX += colsLeft; | |
| 903 } | |
| 904 | |
| 905 // If we can advance even further, then just keep memsetting the block | |
| 906 if ((finalX - curX) >= kR11_EACBlockSz) { | |
| 907 SkASSERT((curX & 3) == 0); | |
| 908 | |
| 909 const int col = 0; | |
| 910 const int colsLeft = kR11_EACBlockSz; | |
| 911 | |
| 912 update_block_columns(c, col, colsLeft, curAlphaColumn); | |
| 913 | |
| 914 // While we can keep advancing, just keep writing the block. | |
| 915 uint64_t lastBlock = compress_block_vertical(c[0], c[1], c[2], c[3])
; | |
| 916 while((finalX - curX) >= kR11_EACBlockSz) { | |
| 917 *outPtr = lastBlock; | |
| 918 ++outPtr; | |
| 919 curX += kR11_EACBlockSz; | |
| 920 } | |
| 921 } | |
| 922 | |
| 923 // If we haven't advanced within the block then do so. | |
| 924 if (curX < finalX) { | |
| 925 const int col = curX & 3; | |
| 926 const int colsLeft = finalX - curX; | |
| 927 | |
| 928 update_block_columns(c, col, colsLeft, curAlphaColumn); | |
| 929 | |
| 930 curX += colsLeft; | |
| 931 } | |
| 932 | |
| 933 SkASSERT(curX == finalX); | |
| 934 | |
| 935 // Figure out what the next advancement is... | |
| 936 for (int i = 0; i < kR11_EACBlockSz; ++i) { | |
| 937 if (nextX[i] == finalX) { | |
| 938 const int16_t run = *(fBufferedRuns[i].fRuns); | |
| 939 fBufferedRuns[i].fRuns += run; | |
| 940 fBufferedRuns[i].fAlphas += run; | |
| 941 curAlpha[i] = *(fBufferedRuns[i].fAlphas); | |
| 942 nextX[i] += *(fBufferedRuns[i].fRuns); | |
| 943 } | |
| 944 } | |
| 945 | |
| 946 finalX = 0xFFFFF; | |
| 947 for (int i = 0; i < kR11_EACBlockSz; ++i) { | |
| 948 finalX = SkMin32(nextX[i], finalX); | |
| 949 } | |
| 950 } | |
| 951 | |
| 952 // If we didn't land on a block boundary, output the block... | |
| 953 if ((curX & 3) > 1) { | |
| 954 *outPtr = compress_block_vertical(c[0], c[1], c[2], c[3]); | |
| 955 } | |
| 956 | |
| 957 fNextRun = 0; | |
| 958 } | |
| 959 | |
| 960 SkBlitter* CreateR11EACBlitter(int width, int height, void* outputBuffer) { | 579 SkBlitter* CreateR11EACBlitter(int width, int height, void* outputBuffer) { |
| 961 return new R11_EACBlitter(width, height, outputBuffer); | 580 return new |
| 581 SkTCompressedAlphaBlitter<4, 8, compress_block_vertical> |
| 582 (width, height, outputBuffer); |
| 962 } | 583 } |
| 963 | 584 |
| 964 } // namespace SkTextureCompressor | 585 } // namespace SkTextureCompressor |
| OLD | NEW |