OLD | NEW |
1 /* | 1 /* |
2 * Copyright 2014 Google Inc. | 2 * Copyright 2014 Google Inc. |
3 * | 3 * |
4 * Use of this source code is governed by a BSD-style license that can be | 4 * Use of this source code is governed by a BSD-style license that can be |
5 * found in the LICENSE file. | 5 * found in the LICENSE file. |
6 */ | 6 */ |
7 | 7 |
8 #include "SkTextureCompressor.h" | 8 #include "SkTextureCompressor.h" |
| 9 #include "SkTextureCompressor_Blitter.h" |
9 | 10 |
10 #include "SkEndian.h" | 11 #include "SkEndian.h" |
11 | 12 |
12 // #define COMPRESS_R11_EAC_SLOW 1 | 13 // #define COMPRESS_R11_EAC_SLOW 1 |
13 // #define COMPRESS_R11_EAC_FAST 1 | 14 // #define COMPRESS_R11_EAC_FAST 1 |
14 #define COMPRESS_R11_EAC_FASTEST 1 | 15 #define COMPRESS_R11_EAC_FASTEST 1 |
15 | 16 |
16 // Blocks compressed into R11 EAC are represented as follows: | 17 // Blocks compressed into R11 EAC are represented as follows: |
17 // 0000000000000000000000000000000000000000000000000000000000000000 | 18 // 0000000000000000000000000000000000000000000000000000000000000000 |
18 // |base_cw|mod|mul| ----------------- indices ------------------- | 19 // |base_cw|mod|mul| ----------------- indices ------------------- |
(...skipping 276 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
295 *encPtr = proc(block); | 296 *encPtr = proc(block); |
296 ++encPtr; | 297 ++encPtr; |
297 } | 298 } |
298 src += 4 * rowBytes; | 299 src += 4 * rowBytes; |
299 } | 300 } |
300 | 301 |
301 return true; | 302 return true; |
302 } | 303 } |
303 #endif // (COMPRESS_R11_EAC_SLOW) || (COMPRESS_R11_EAC_FAST) | 304 #endif // (COMPRESS_R11_EAC_SLOW) || (COMPRESS_R11_EAC_FAST) |
304 | 305 |
| 306 // This function converts an integer containing four bytes of alpha |
| 307 // values into an integer containing four bytes of indices into R11 EAC. |
| 308 // Note, there needs to be a mapping of indices: |
| 309 // 0 1 2 3 4 5 6 7 |
| 310 // 3 2 1 0 4 5 6 7 |
| 311 // |
| 312 // To compute this, we first negate each byte, and then add three, which |
| 313 // gives the mapping |
| 314 // 3 2 1 0 -1 -2 -3 -4 |
| 315 // |
| 316 // Then we mask out the negative values, take their absolute value, and |
| 317 // add three. |
| 318 // |
| 319 // Most of the voodoo in this function comes from Hacker's Delight, section 2-18 |
| 320 static inline uint32_t convert_indices(uint32_t x) { |
| 321 // Take the top three bits... |
| 322 x = (x & 0xE0E0E0E0) >> 5; |
| 323 |
| 324 // Negate... |
| 325 x = ~((0x80808080 - x) ^ 0x7F7F7F7F); |
| 326 |
| 327 // Add three |
| 328 const uint32_t s = (x & 0x7F7F7F7F) + 0x03030303; |
| 329 x = ((x ^ 0x03030303) & 0x80808080) ^ s; |
| 330 |
| 331 // Absolute value |
| 332 const uint32_t a = x & 0x80808080; |
| 333 const uint32_t b = a >> 7; |
| 334 |
| 335 // Aside: mask negatives (m is three if the byte was negative) |
| 336 const uint32_t m = (a >> 6) | b; |
| 337 |
| 338 // .. continue absolute value |
| 339 x = (x ^ ((a - b) | a)) + b; |
| 340 |
| 341 // Add three |
| 342 return x + m; |
| 343 } |
| 344 |
305 #if COMPRESS_R11_EAC_FASTEST | 345 #if COMPRESS_R11_EAC_FASTEST |
306 template<unsigned shift> | 346 template<unsigned shift> |
307 static inline uint64_t swap_shift(uint64_t x, uint64_t mask) { | 347 static inline uint64_t swap_shift(uint64_t x, uint64_t mask) { |
308 const uint64_t t = (x ^ (x >> shift)) & mask; | 348 const uint64_t t = (x ^ (x >> shift)) & mask; |
309 return x ^ t ^ (t << shift); | 349 return x ^ t ^ (t << shift); |
310 } | 350 } |
311 | 351 |
312 static inline uint64_t interleave6(uint64_t topRows, uint64_t bottomRows) { | 352 static inline uint64_t interleave6(uint64_t topRows, uint64_t bottomRows) { |
313 // If our 3-bit block indices are laid out as: | 353 // If our 3-bit block indices are laid out as: |
314 // a b c d | 354 // a b c d |
(...skipping 54 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
369 | 409 |
370 // x: 00 00 00 00 00 00 00 00 a e i m d h l p b f j n c g k o | 410 // x: 00 00 00 00 00 00 00 00 a e i m d h l p b f j n c g k o |
371 | 411 |
372 x = (x & (0xFFFULL << 36)) | ((x & 0xFFFFFFULL) << 12) | ((x >> 24) & 0xFFFU
LL); | 412 x = (x & (0xFFFULL << 36)) | ((x & 0xFFFFFFULL) << 12) | ((x >> 24) & 0xFFFU
LL); |
373 #endif | 413 #endif |
374 | 414 |
375 // x: 00 00 00 00 00 00 00 00 a e i m b f j n c g k o d h l p | 415 // x: 00 00 00 00 00 00 00 00 a e i m b f j n c g k o d h l p |
376 return x; | 416 return x; |
377 } | 417 } |
378 | 418 |
379 // This function converts an integer containing four bytes of alpha | |
380 // values into an integer containing four bytes of indices into R11 EAC. | |
381 // Note, there needs to be a mapping of indices: | |
382 // 0 1 2 3 4 5 6 7 | |
383 // 3 2 1 0 4 5 6 7 | |
384 // | |
385 // To compute this, we first negate each byte, and then add three, which | |
386 // gives the mapping | |
387 // 3 2 1 0 -1 -2 -3 -4 | |
388 // | |
389 // Then we mask out the negative values, take their absolute value, and | |
390 // add three. | |
391 // | |
392 // Most of the voodoo in this function comes from Hacker's Delight, section 2-18 | |
393 static inline uint32_t convert_indices(uint32_t x) { | |
394 // Take the top three bits... | |
395 x = (x & 0xE0E0E0E0) >> 5; | |
396 | |
397 // Negate... | |
398 x = ~((0x80808080 - x) ^ 0x7F7F7F7F); | |
399 | |
400 // Add three | |
401 const uint32_t s = (x & 0x7F7F7F7F) + 0x03030303; | |
402 x = ((x ^ 0x03030303) & 0x80808080) ^ s; | |
403 | |
404 // Absolute value | |
405 const uint32_t a = x & 0x80808080; | |
406 const uint32_t b = a >> 7; | |
407 | |
408 // Aside: mask negatives (m is three if the byte was negative) | |
409 const uint32_t m = (a >> 6) | b; | |
410 | |
411 // .. continue absolute value | |
412 x = (x ^ ((a - b) | a)) + b; | |
413 | |
414 // Add three | |
415 return x + m; | |
416 } | |
417 | |
418 // This function follows the same basic procedure as compress_heterogeneous_r11e
ac_block | 419 // This function follows the same basic procedure as compress_heterogeneous_r11e
ac_block |
419 // above when COMPRESS_R11_EAC_FAST is defined, but it avoids a few loads/stores
and | 420 // above when COMPRESS_R11_EAC_FAST is defined, but it avoids a few loads/stores
and |
420 // tries to optimize where it can using SIMD. | 421 // tries to optimize where it can using SIMD. |
421 static uint64_t compress_r11eac_block_fast(const uint8_t* src, int rowBytes) { | 422 static uint64_t compress_r11eac_block_fast(const uint8_t* src, int rowBytes) { |
422 // Store each row of alpha values in an integer | 423 // Store each row of alpha values in an integer |
423 const uint32_t alphaRow1 = *(reinterpret_cast<const uint32_t*>(src)); | 424 const uint32_t alphaRow1 = *(reinterpret_cast<const uint32_t*>(src)); |
424 const uint32_t alphaRow2 = *(reinterpret_cast<const uint32_t*>(src + rowByte
s)); | 425 const uint32_t alphaRow2 = *(reinterpret_cast<const uint32_t*>(src + rowByte
s)); |
425 const uint32_t alphaRow3 = *(reinterpret_cast<const uint32_t*>(src + 2*rowBy
tes)); | 426 const uint32_t alphaRow3 = *(reinterpret_cast<const uint32_t*>(src + 2*rowBy
tes)); |
426 const uint32_t alphaRow4 = *(reinterpret_cast<const uint32_t*>(src + 3*rowBy
tes)); | 427 const uint32_t alphaRow4 = *(reinterpret_cast<const uint32_t*>(src + 3*rowBy
tes)); |
427 | 428 |
(...skipping 78 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
506 ((x >> 13) & (7 << 3)) | | 507 ((x >> 13) & (7 << 3)) | |
507 ((x >> 2) & (7 << 6)) | | 508 ((x >> 2) & (7 << 6)) | |
508 ((x << 9) & (7 << 9)); | 509 ((x << 9) & (7 << 9)); |
509 #endif | 510 #endif |
510 } | 511 } |
511 | 512 |
512 // This function returns the compressed format of a block given as four columns
of | 513 // This function returns the compressed format of a block given as four columns
of |
513 // alpha values. Each column is assumed to be loaded from top to bottom, and hen
ce | 514 // alpha values. Each column is assumed to be loaded from top to bottom, and hen
ce |
514 // must first be converted to indices and then packed into the resulting 64-bit | 515 // must first be converted to indices and then packed into the resulting 64-bit |
515 // integer. | 516 // integer. |
516 static inline uint64_t compress_block_vertical(const uint32_t alphaColumn0, | 517 inline void compress_block_vertical(uint8_t* dstPtr, const uint8_t *block) { |
517 const uint32_t alphaColumn1, | 518 |
518 const uint32_t alphaColumn2, | 519 const uint32_t* src = reinterpret_cast<const uint32_t*>(block); |
519 const uint32_t alphaColumn3) { | 520 uint64_t* dst = reinterpret_cast<uint64_t*>(dstPtr); |
| 521 |
| 522 const uint32_t alphaColumn0 = src[0]; |
| 523 const uint32_t alphaColumn1 = src[1]; |
| 524 const uint32_t alphaColumn2 = src[2]; |
| 525 const uint32_t alphaColumn3 = src[3]; |
520 | 526 |
521 if (alphaColumn0 == alphaColumn1 && | 527 if (alphaColumn0 == alphaColumn1 && |
522 alphaColumn2 == alphaColumn3 && | 528 alphaColumn2 == alphaColumn3 && |
523 alphaColumn0 == alphaColumn2) { | 529 alphaColumn0 == alphaColumn2) { |
524 | 530 |
525 if (0 == alphaColumn0) { | 531 if (0 == alphaColumn0) { |
526 // Transparent | 532 // Transparent |
527 return 0x0020000000002000ULL; | 533 *dst = 0x0020000000002000ULL; |
| 534 return; |
528 } | 535 } |
529 else if (0xFFFFFFFF == alphaColumn0) { | 536 else if (0xFFFFFFFF == alphaColumn0) { |
530 // Opaque | 537 // Opaque |
531 return 0xFFFFFFFFFFFFFFFFULL; | 538 *dst = 0xFFFFFFFFFFFFFFFFULL; |
| 539 return; |
532 } | 540 } |
533 } | 541 } |
534 | 542 |
535 const uint32_t indexColumn0 = convert_indices(alphaColumn0); | 543 const uint32_t indexColumn0 = convert_indices(alphaColumn0); |
536 const uint32_t indexColumn1 = convert_indices(alphaColumn1); | 544 const uint32_t indexColumn1 = convert_indices(alphaColumn1); |
537 const uint32_t indexColumn2 = convert_indices(alphaColumn2); | 545 const uint32_t indexColumn2 = convert_indices(alphaColumn2); |
538 const uint32_t indexColumn3 = convert_indices(alphaColumn3); | 546 const uint32_t indexColumn3 = convert_indices(alphaColumn3); |
539 | 547 |
540 const uint32_t packedIndexColumn0 = pack_indices_vertical(indexColumn0); | 548 const uint32_t packedIndexColumn0 = pack_indices_vertical(indexColumn0); |
541 const uint32_t packedIndexColumn1 = pack_indices_vertical(indexColumn1); | 549 const uint32_t packedIndexColumn1 = pack_indices_vertical(indexColumn1); |
542 const uint32_t packedIndexColumn2 = pack_indices_vertical(indexColumn2); | 550 const uint32_t packedIndexColumn2 = pack_indices_vertical(indexColumn2); |
543 const uint32_t packedIndexColumn3 = pack_indices_vertical(indexColumn3); | 551 const uint32_t packedIndexColumn3 = pack_indices_vertical(indexColumn3); |
544 | 552 |
545 return SkEndian_SwapBE64(0x8490000000000000ULL | | 553 *dst = SkEndian_SwapBE64(0x8490000000000000ULL | |
546 (static_cast<uint64_t>(packedIndexColumn0) << 36) | | 554 (static_cast<uint64_t>(packedIndexColumn0) << 36) | |
547 (static_cast<uint64_t>(packedIndexColumn1) << 24) | | 555 (static_cast<uint64_t>(packedIndexColumn1) << 24) | |
548 static_cast<uint64_t>(packedIndexColumn2 << 12) | | 556 static_cast<uint64_t>(packedIndexColumn2 << 12) | |
549 static_cast<uint64_t>(packedIndexColumn3)); | 557 static_cast<uint64_t>(packedIndexColumn3)); |
550 | |
551 } | |
552 | |
553 // Updates the block whose columns are stored in blockColN. curAlphai is expecte
d | |
554 // to store, as an integer, the four alpha values that will be placed within eac
h | |
555 // of the columns in the range [col, col+colsLeft). | |
556 static inline void update_block_columns(uint32_t* block, const int col, | |
557 const int colsLeft, const uint32_t curAl
phai) { | |
558 SkASSERT(NULL != block); | |
559 SkASSERT(col + colsLeft <= 4); | |
560 | |
561 for (int i = col; i < (col + colsLeft); ++i) { | |
562 block[i] = curAlphai; | |
563 } | |
564 } | 558 } |
565 | 559 |
566 //////////////////////////////////////////////////////////////////////////////// | 560 //////////////////////////////////////////////////////////////////////////////// |
567 | 561 |
568 namespace SkTextureCompressor { | 562 namespace SkTextureCompressor { |
569 | 563 |
570 bool CompressA8ToR11EAC(uint8_t* dst, const uint8_t* src, int width, int height,
int rowBytes) { | 564 bool CompressA8ToR11EAC(uint8_t* dst, const uint8_t* src, int width, int height,
int rowBytes) { |
571 | 565 |
572 #if (COMPRESS_R11_EAC_SLOW) || (COMPRESS_R11_EAC_FAST) | 566 #if (COMPRESS_R11_EAC_SLOW) || (COMPRESS_R11_EAC_FAST) |
573 | 567 |
574 return compress_4x4_a8_to_64bit(dst, src, width, height, rowBytes, compress_
r11eac_block); | 568 return compress_4x4_a8_to_64bit(dst, src, width, height, rowBytes, compress_
r11eac_block); |
575 | 569 |
576 #elif COMPRESS_R11_EAC_FASTEST | 570 #elif COMPRESS_R11_EAC_FASTEST |
577 | 571 |
578 return compress_a8_to_r11eac_fast(dst, src, width, height, rowBytes); | 572 return compress_a8_to_r11eac_fast(dst, src, width, height, rowBytes); |
579 | 573 |
580 #else | 574 #else |
581 #error "Must choose R11 EAC algorithm" | 575 #error "Must choose R11 EAC algorithm" |
582 #endif | 576 #endif |
583 } | 577 } |
584 | 578 |
585 // This class implements a blitter that blits directly into a buffer that will | |
586 // be used as an R11 EAC compressed texture. We compute this buffer by | |
587 // buffering four scan lines and then outputting them all at once. This blitter | |
588 // is only expected to be used with alpha masks, i.e. kAlpha8_SkColorType. | |
589 class R11_EACBlitter : public SkBlitter { | |
590 public: | |
591 R11_EACBlitter(int width, int height, void *compressedBuffer); | |
592 virtual ~R11_EACBlitter() { this->flushRuns(); } | |
593 | |
594 // Blit a horizontal run of one or more pixels. | |
595 virtual void blitH(int x, int y, int width) SK_OVERRIDE { | |
596 // This function is intended to be called from any standard RGB | |
597 // buffer, so we should never encounter it. However, if some code | |
598 // path does end up here, then this needs to be investigated. | |
599 SkFAIL("Not implemented!"); | |
600 } | |
601 | |
602 // Blit a horizontal run of antialiased pixels; runs[] is a *sparse* | |
603 // zero-terminated run-length encoding of spans of constant alpha values. | |
604 virtual void blitAntiH(int x, int y, | |
605 const SkAlpha antialias[], | |
606 const int16_t runs[]) SK_OVERRIDE; | |
607 | |
608 // Blit a vertical run of pixels with a constant alpha value. | |
609 virtual void blitV(int x, int y, int height, SkAlpha alpha) SK_OVERRIDE { | |
610 // This function is currently not implemented. It is not explicitly | |
611 // required by the contract, but if at some time a code path runs into | |
612 // this function (which is entirely possible), it needs to be implemente
d. | |
613 // | |
614 // TODO (krajcevski): | |
615 // This function will be most easily implemented in one of two ways: | |
616 // 1. Buffer each vertical column value and then construct a list | |
617 // of alpha values and output all of the blocks at once. This only | |
618 // requires a write to the compressed buffer | |
619 // 2. Replace the indices of each block with the proper indices based | |
620 // on the alpha value. This requires a read and write of the compress
ed | |
621 // buffer, but much less overhead. | |
622 SkFAIL("Not implemented!"); | |
623 } | |
624 | |
625 // Blit a solid rectangle one or more pixels wide. | |
626 virtual void blitRect(int x, int y, int width, int height) SK_OVERRIDE { | |
627 // Analogous to blitRow, this function is intended for RGB targets | |
628 // and should never be called by this blitter. Any calls to this functio
n | |
629 // are probably a bug and should be investigated. | |
630 SkFAIL("Not implemented!"); | |
631 } | |
632 | |
633 // Blit a rectangle with one alpha-blended column on the left, | |
634 // width (zero or more) opaque pixels, and one alpha-blended column | |
635 // on the right. The result will always be at least two pixels wide. | |
636 virtual void blitAntiRect(int x, int y, int width, int height, | |
637 SkAlpha leftAlpha, SkAlpha rightAlpha) SK_OVERRIDE
{ | |
638 // This function is currently not implemented. It is not explicitly | |
639 // required by the contract, but if at some time a code path runs into | |
640 // this function (which is entirely possible), it needs to be implemente
d. | |
641 // | |
642 // TODO (krajcevski): | |
643 // This function will be most easily implemented as follows: | |
644 // 1. If width/height are smaller than a block, then update the | |
645 // indices of the affected blocks. | |
646 // 2. If width/height are larger than a block, then construct a 9-patch | |
647 // of block encodings that represent the rectangle, and write them | |
648 // to the compressed buffer as necessary. Whether or not the blocks | |
649 // are overwritten by zeros or just their indices are updated is up | |
650 // to debate. | |
651 SkFAIL("Not implemented!"); | |
652 } | |
653 | |
654 // Blit a pattern of pixels defined by a rectangle-clipped mask; | |
655 // typically used for text. | |
656 virtual void blitMask(const SkMask&, const SkIRect& clip) SK_OVERRIDE { | |
657 // This function is currently not implemented. It is not explicitly | |
658 // required by the contract, but if at some time a code path runs into | |
659 // this function (which is entirely possible), it needs to be implemente
d. | |
660 // | |
661 // TODO (krajcevski): | |
662 // This function will be most easily implemented in the same way as | |
663 // blitAntiRect above. | |
664 SkFAIL("Not implemented!"); | |
665 } | |
666 | |
667 // If the blitter just sets a single value for each pixel, return the | |
668 // bitmap it draws into, and assign value. If not, return NULL and ignore | |
669 // the value parameter. | |
670 virtual const SkBitmap* justAnOpaqueColor(uint32_t* value) SK_OVERRIDE { | |
671 return NULL; | |
672 } | |
673 | |
674 /** | |
675 * Compressed texture blitters only really work correctly if they get | |
676 * four blocks at a time. That being said, this blitter tries it's best | |
677 * to preserve semantics if blitAntiH doesn't get called in too many | |
678 * weird ways... | |
679 */ | |
680 virtual int requestRowsPreserved() const { return kR11_EACBlockSz; } | |
681 | |
682 protected: | |
683 virtual void onNotifyFinished() { this->flushRuns(); } | |
684 | |
685 private: | |
686 static const int kR11_EACBlockSz = 4; | |
687 static const int kPixelsPerBlock = kR11_EACBlockSz * kR11_EACBlockSz; | |
688 | |
689 // The longest possible run of pixels that this blitter will receive. | |
690 // This is initialized in the constructor to 0x7FFE, which is one less | |
691 // than the largest positive 16-bit integer. We make sure that it's one | |
692 // less for debugging purposes. We also don't make this variable static | |
693 // in order to make sure that we can construct a valid pointer to it. | |
694 const int16_t kLongestRun; | |
695 | |
696 // Usually used in conjunction with kLongestRun. This is initialized to | |
697 // zero. | |
698 const SkAlpha kZeroAlpha; | |
699 | |
700 // This is the information that we buffer whenever we're asked to blit | |
701 // a row with this blitter. | |
702 struct BufferedRun { | |
703 const SkAlpha* fAlphas; | |
704 const int16_t* fRuns; | |
705 int fX, fY; | |
706 } fBufferedRuns[kR11_EACBlockSz]; | |
707 | |
708 // The next row (0-3) that we need to blit. This value should never exceed | |
709 // the number of rows that we have (kR11_EACBlockSz) | |
710 int fNextRun; | |
711 | |
712 // The width and height of the image that we're blitting | |
713 const int fWidth; | |
714 const int fHeight; | |
715 | |
716 // The R11 EAC buffer that we're blitting into. It is assumed that the buffe
r | |
717 // is large enough to store a compressed image of size fWidth*fHeight. | |
718 uint64_t* const fBuffer; | |
719 | |
720 // Various utility functions | |
721 int blocksWide() const { return fWidth / kR11_EACBlockSz; } | |
722 int blocksTall() const { return fHeight / kR11_EACBlockSz; } | |
723 int totalBlocks() const { return (fWidth * fHeight) / kPixelsPerBlock; } | |
724 | |
725 // Returns the block index for the block containing pixel (x, y). Block | |
726 // indices start at zero and proceed in raster order. | |
727 int getBlockOffset(int x, int y) const { | |
728 SkASSERT(x < fWidth); | |
729 SkASSERT(y < fHeight); | |
730 const int blockCol = x / kR11_EACBlockSz; | |
731 const int blockRow = y / kR11_EACBlockSz; | |
732 return blockRow * this->blocksWide() + blockCol; | |
733 } | |
734 | |
735 // Returns a pointer to the block containing pixel (x, y) | |
736 uint64_t *getBlock(int x, int y) const { | |
737 return fBuffer + this->getBlockOffset(x, y); | |
738 } | |
739 | |
740 // The following function writes the buffered runs to compressed blocks. | |
741 // If fNextRun < 4, then we fill the runs that we haven't buffered with | |
742 // the constant zero buffer. | |
743 void flushRuns(); | |
744 }; | |
745 | |
746 | |
747 R11_EACBlitter::R11_EACBlitter(int width, int height, void *latcBuffer) | |
748 // 0x7FFE is one minus the largest positive 16-bit int. We use it for | |
749 // debugging to make sure that we're properly setting the nextX distance | |
750 // in flushRuns(). | |
751 : kLongestRun(0x7FFE), kZeroAlpha(0) | |
752 , fNextRun(0) | |
753 , fWidth(width) | |
754 , fHeight(height) | |
755 , fBuffer(reinterpret_cast<uint64_t*const>(latcBuffer)) | |
756 { | |
757 SkASSERT((width % kR11_EACBlockSz) == 0); | |
758 SkASSERT((height % kR11_EACBlockSz) == 0); | |
759 } | |
760 | |
761 void R11_EACBlitter::blitAntiH(int x, int y, | |
762 const SkAlpha* antialias, | |
763 const int16_t* runs) { | |
764 // Make sure that the new row to blit is either the first | |
765 // row that we're blitting, or it's exactly the next scan row | |
766 // since the last row that we blit. This is to ensure that when | |
767 // we go to flush the runs, that they are all the same four | |
768 // runs. | |
769 if (fNextRun > 0 && | |
770 ((x != fBufferedRuns[fNextRun-1].fX) || | |
771 (y-1 != fBufferedRuns[fNextRun-1].fY))) { | |
772 this->flushRuns(); | |
773 } | |
774 | |
775 // Align the rows to a block boundary. If we receive rows that | |
776 // are not on a block boundary, then fill in the preceding runs | |
777 // with zeros. We do this by producing a single RLE that says | |
778 // that we have 0x7FFE pixels of zero (0x7FFE = 32766). | |
779 const int row = y & ~3; | |
780 while ((row + fNextRun) < y) { | |
781 fBufferedRuns[fNextRun].fAlphas = &kZeroAlpha; | |
782 fBufferedRuns[fNextRun].fRuns = &kLongestRun; | |
783 fBufferedRuns[fNextRun].fX = 0; | |
784 fBufferedRuns[fNextRun].fY = row + fNextRun; | |
785 ++fNextRun; | |
786 } | |
787 | |
788 // Make sure that our assumptions aren't violated... | |
789 SkASSERT(fNextRun == (y & 3)); | |
790 SkASSERT(fNextRun == 0 || fBufferedRuns[fNextRun - 1].fY < y); | |
791 | |
792 // Set the values of the next run | |
793 fBufferedRuns[fNextRun].fAlphas = antialias; | |
794 fBufferedRuns[fNextRun].fRuns = runs; | |
795 fBufferedRuns[fNextRun].fX = x; | |
796 fBufferedRuns[fNextRun].fY = y; | |
797 | |
798 // If we've output four scanlines in a row that don't violate our | |
799 // assumptions, then it's time to flush them... | |
800 if (4 == ++fNextRun) { | |
801 this->flushRuns(); | |
802 } | |
803 } | |
804 | |
805 void R11_EACBlitter::flushRuns() { | |
806 | |
807 // If we don't have any runs, then just return. | |
808 if (0 == fNextRun) { | |
809 return; | |
810 } | |
811 | |
812 #ifndef NDEBUG | |
813 // Make sure that if we have any runs, they all match | |
814 for (int i = 1; i < fNextRun; ++i) { | |
815 SkASSERT(fBufferedRuns[i].fY == fBufferedRuns[i-1].fY + 1); | |
816 SkASSERT(fBufferedRuns[i].fX == fBufferedRuns[i-1].fX); | |
817 } | |
818 #endif | |
819 | |
820 // If we dont have as many runs as we have rows, fill in the remaining | |
821 // runs with constant zeros. | |
822 for (int i = fNextRun; i < kR11_EACBlockSz; ++i) { | |
823 fBufferedRuns[i].fY = fBufferedRuns[0].fY + i; | |
824 fBufferedRuns[i].fX = fBufferedRuns[0].fX; | |
825 fBufferedRuns[i].fAlphas = &kZeroAlpha; | |
826 fBufferedRuns[i].fRuns = &kLongestRun; | |
827 } | |
828 | |
829 // Make sure that our assumptions aren't violated. | |
830 SkASSERT(fNextRun > 0 && fNextRun <= 4); | |
831 SkASSERT((fBufferedRuns[0].fY & 3) == 0); | |
832 | |
833 // The following logic walks four rows at a time and outputs compressed | |
834 // blocks to the buffer passed into the constructor. | |
835 // We do the following: | |
836 // | |
837 // c1 c2 c3 c4 | |
838 // ----------------------------------------------------------------------- | |
839 // ... | | | | | ----> fBufferedRuns[0] | |
840 // ----------------------------------------------------------------------- | |
841 // ... | | | | | ----> fBufferedRuns[1] | |
842 // ----------------------------------------------------------------------- | |
843 // ... | | | | | ----> fBufferedRuns[2] | |
844 // ----------------------------------------------------------------------- | |
845 // ... | | | | | ----> fBufferedRuns[3] | |
846 // ----------------------------------------------------------------------- | |
847 // | |
848 // curX -- the macro X value that we've gotten to. | |
849 // c1, c2, c3, c4 -- the integers that represent the columns of the current
block | |
850 // that we're operating on | |
851 // curAlphaColumn -- integer containing the column of alpha values from fBuf
feredRuns. | |
852 // nextX -- for each run, the next point at which we need to update curAlpha
Column | |
853 // after the value of curX. | |
854 // finalX -- the minimum of all the nextX values. | |
855 // | |
856 // curX advances to finalX outputting any blocks that it passes along | |
857 // the way. Since finalX will not change when we reach the end of a | |
858 // run, the termination criteria will be whenever curX == finalX at the | |
859 // end of a loop. | |
860 | |
861 // Setup: | |
862 uint32_t c[4] = { 0, 0, 0, 0 }; | |
863 uint32_t curAlphaColumn = 0; | |
864 SkAlpha *curAlpha = reinterpret_cast<SkAlpha*>(&curAlphaColumn); | |
865 | |
866 int nextX[kR11_EACBlockSz]; | |
867 for (int i = 0; i < kR11_EACBlockSz; ++i) { | |
868 nextX[i] = 0x7FFFFF; | |
869 } | |
870 | |
871 uint64_t* outPtr = this->getBlock(fBufferedRuns[0].fX, fBufferedRuns[0].fY); | |
872 | |
873 // Populate the first set of runs and figure out how far we need to | |
874 // advance on the first step | |
875 int curX = 0; | |
876 int finalX = 0xFFFFF; | |
877 for (int i = 0; i < kR11_EACBlockSz; ++i) { | |
878 nextX[i] = *(fBufferedRuns[i].fRuns); | |
879 curAlpha[i] = *(fBufferedRuns[i].fAlphas); | |
880 | |
881 finalX = SkMin32(nextX[i], finalX); | |
882 } | |
883 | |
884 // Make sure that we have a valid right-bound X value | |
885 SkASSERT(finalX < 0xFFFFF); | |
886 | |
887 // Run the blitter... | |
888 while (curX != finalX) { | |
889 SkASSERT(finalX >= curX); | |
890 | |
891 // Do we need to populate the rest of the block? | |
892 if ((finalX - (curX & ~3)) >= kR11_EACBlockSz) { | |
893 const int col = curX & 3; | |
894 const int colsLeft = 4 - col; | |
895 SkASSERT(curX + colsLeft <= finalX); | |
896 | |
897 update_block_columns(c, col, colsLeft, curAlphaColumn); | |
898 | |
899 // Write this block | |
900 *outPtr = compress_block_vertical(c[0], c[1], c[2], c[3]); | |
901 ++outPtr; | |
902 curX += colsLeft; | |
903 } | |
904 | |
905 // If we can advance even further, then just keep memsetting the block | |
906 if ((finalX - curX) >= kR11_EACBlockSz) { | |
907 SkASSERT((curX & 3) == 0); | |
908 | |
909 const int col = 0; | |
910 const int colsLeft = kR11_EACBlockSz; | |
911 | |
912 update_block_columns(c, col, colsLeft, curAlphaColumn); | |
913 | |
914 // While we can keep advancing, just keep writing the block. | |
915 uint64_t lastBlock = compress_block_vertical(c[0], c[1], c[2], c[3])
; | |
916 while((finalX - curX) >= kR11_EACBlockSz) { | |
917 *outPtr = lastBlock; | |
918 ++outPtr; | |
919 curX += kR11_EACBlockSz; | |
920 } | |
921 } | |
922 | |
923 // If we haven't advanced within the block then do so. | |
924 if (curX < finalX) { | |
925 const int col = curX & 3; | |
926 const int colsLeft = finalX - curX; | |
927 | |
928 update_block_columns(c, col, colsLeft, curAlphaColumn); | |
929 | |
930 curX += colsLeft; | |
931 } | |
932 | |
933 SkASSERT(curX == finalX); | |
934 | |
935 // Figure out what the next advancement is... | |
936 for (int i = 0; i < kR11_EACBlockSz; ++i) { | |
937 if (nextX[i] == finalX) { | |
938 const int16_t run = *(fBufferedRuns[i].fRuns); | |
939 fBufferedRuns[i].fRuns += run; | |
940 fBufferedRuns[i].fAlphas += run; | |
941 curAlpha[i] = *(fBufferedRuns[i].fAlphas); | |
942 nextX[i] += *(fBufferedRuns[i].fRuns); | |
943 } | |
944 } | |
945 | |
946 finalX = 0xFFFFF; | |
947 for (int i = 0; i < kR11_EACBlockSz; ++i) { | |
948 finalX = SkMin32(nextX[i], finalX); | |
949 } | |
950 } | |
951 | |
952 // If we didn't land on a block boundary, output the block... | |
953 if ((curX & 3) > 1) { | |
954 *outPtr = compress_block_vertical(c[0], c[1], c[2], c[3]); | |
955 } | |
956 | |
957 fNextRun = 0; | |
958 } | |
959 | |
960 SkBlitter* CreateR11EACBlitter(int width, int height, void* outputBuffer) { | 579 SkBlitter* CreateR11EACBlitter(int width, int height, void* outputBuffer) { |
961 return new R11_EACBlitter(width, height, outputBuffer); | 580 return new |
| 581 SkTCompressedAlphaBlitter<4, 8, compress_block_vertical> |
| 582 (width, height, outputBuffer); |
962 } | 583 } |
963 | 584 |
964 } // namespace SkTextureCompressor | 585 } // namespace SkTextureCompressor |
OLD | NEW |