src/utils/SkTextureCompressor_R11EAC.cpp - Issue 421593004: Generalize compressed blitter into its own templated class

Side by Side Diff: src/utils/SkTextureCompressor_R11EAC.cpp

Issue 421593004: Generalize compressed blitter into its own templated class (Closed) Base URL: https://skia.googlesource.com/skia.git@master

Patch Set: Sync Created 6 years, 4 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View unified diff | Download patch

OLD	NEW
1 /*	1 /*

2 * Copyright 2014 Google Inc.	2 * Copyright 2014 Google Inc.

3 *	3 *

4 * Use of this source code is governed by a BSD-style license that can be	4 * Use of this source code is governed by a BSD-style license that can be

5 * found in the LICENSE file.	5 * found in the LICENSE file.

6 */	6 */

7	7

8 #include "SkTextureCompressor.h"	8 #include "SkTextureCompressor.h"

	9 #include "SkTextureCompressor_Blitter.h"

9	10

10 #include "SkEndian.h"	11 #include "SkEndian.h"

11	12

12 // #define COMPRESS_R11_EAC_SLOW 1	13 // #define COMPRESS_R11_EAC_SLOW 1

13 // #define COMPRESS_R11_EAC_FAST 1	14 // #define COMPRESS_R11_EAC_FAST 1

14 #define COMPRESS_R11_EAC_FASTEST 1	15 #define COMPRESS_R11_EAC_FASTEST 1

15	16

16 // Blocks compressed into R11 EAC are represented as follows:	17 // Blocks compressed into R11 EAC are represented as follows:

17 // 0000000000000000000000000000000000000000000000000000000000000000	18 // 0000000000000000000000000000000000000000000000000000000000000000

18 // \|base_cw\|mod\|mul\| ----------------- indices -------------------	19 // \|base_cw\|mod\|mul\| ----------------- indices -------------------

(...skipping 276 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
295 *encPtr = proc(block);	296 *encPtr = proc(block);

296 ++encPtr;	297 ++encPtr;

297 }	298 }

298 src += 4 * rowBytes;	299 src += 4 * rowBytes;

299 }	300 }

300	301

301 return true;	302 return true;

302 }	303 }

303 #endif // (COMPRESS_R11_EAC_SLOW) \|\| (COMPRESS_R11_EAC_FAST)	304 #endif // (COMPRESS_R11_EAC_SLOW) \|\| (COMPRESS_R11_EAC_FAST)

304	305

	306 // This function converts an integer containing four bytes of alpha

	307 // values into an integer containing four bytes of indices into R11 EAC.

	308 // Note, there needs to be a mapping of indices:

	309 // 0 1 2 3 4 5 6 7

	310 // 3 2 1 0 4 5 6 7

	311 //

	312 // To compute this, we first negate each byte, and then add three, which

	313 // gives the mapping

	314 // 3 2 1 0 -1 -2 -3 -4

	315 //

	316 // Then we mask out the negative values, take their absolute value, and

	317 // add three.

	318 //

	319 // Most of the voodoo in this function comes from Hacker's Delight, section 2-18

	320 static inline uint32_t convert_indices(uint32_t x) {

	321 // Take the top three bits...

	322 x = (x & 0xE0E0E0E0) >> 5;

	323

	324 // Negate...

	325 x = ~((0x80808080 - x) ^ 0x7F7F7F7F);

	326

	327 // Add three

	328 const uint32_t s = (x & 0x7F7F7F7F) + 0x03030303;

	329 x = ((x ^ 0x03030303) & 0x80808080) ^ s;

	330

	331 // Absolute value

	332 const uint32_t a = x & 0x80808080;

	333 const uint32_t b = a >> 7;

	334

	335 // Aside: mask negatives (m is three if the byte was negative)

	336 const uint32_t m = (a >> 6) \| b;

	337

	338 // .. continue absolute value

	339 x = (x ^ ((a - b) \| a)) + b;

	340

	341 // Add three

	342 return x + m;

	343 }

	344

305 #if COMPRESS_R11_EAC_FASTEST	345 #if COMPRESS_R11_EAC_FASTEST

306 template<unsigned shift>	346 template<unsigned shift>

307 static inline uint64_t swap_shift(uint64_t x, uint64_t mask) {	347 static inline uint64_t swap_shift(uint64_t x, uint64_t mask) {

308 const uint64_t t = (x ^ (x >> shift)) & mask;	348 const uint64_t t = (x ^ (x >> shift)) & mask;

309 return x ^ t ^ (t << shift);	349 return x ^ t ^ (t << shift);

310 }	350 }

311	351

312 static inline uint64_t interleave6(uint64_t topRows, uint64_t bottomRows) {	352 static inline uint64_t interleave6(uint64_t topRows, uint64_t bottomRows) {

313 // If our 3-bit block indices are laid out as:	353 // If our 3-bit block indices are laid out as:

314 // a b c d	354 // a b c d

(...skipping 54 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
369	409

370 // x: 00 00 00 00 00 00 00 00 a e i m d h l p b f j n c g k o	410 // x: 00 00 00 00 00 00 00 00 a e i m d h l p b f j n c g k o

371	411

372 x = (x & (0xFFFULL << 36)) \| ((x & 0xFFFFFFULL) << 12) \| ((x >> 24) & 0xFFFU LL);	412 x = (x & (0xFFFULL << 36)) \| ((x & 0xFFFFFFULL) << 12) \| ((x >> 24) & 0xFFFU LL);

373 #endif	413 #endif

374	414

375 // x: 00 00 00 00 00 00 00 00 a e i m b f j n c g k o d h l p	415 // x: 00 00 00 00 00 00 00 00 a e i m b f j n c g k o d h l p

376 return x;	416 return x;

377 }	417 }

378	418

379 // This function converts an integer containing four bytes of alpha

380 // values into an integer containing four bytes of indices into R11 EAC.

381 // Note, there needs to be a mapping of indices:

382 // 0 1 2 3 4 5 6 7

383 // 3 2 1 0 4 5 6 7

384 //

385 // To compute this, we first negate each byte, and then add three, which

386 // gives the mapping

387 // 3 2 1 0 -1 -2 -3 -4

388 //

389 // Then we mask out the negative values, take their absolute value, and

390 // add three.

391 //

392 // Most of the voodoo in this function comes from Hacker's Delight, section 2-18

393 static inline uint32_t convert_indices(uint32_t x) {

394 // Take the top three bits...

395 x = (x & 0xE0E0E0E0) >> 5;

396

397 // Negate...

398 x = ~((0x80808080 - x) ^ 0x7F7F7F7F);

399

400 // Add three

401 const uint32_t s = (x & 0x7F7F7F7F) + 0x03030303;

402 x = ((x ^ 0x03030303) & 0x80808080) ^ s;

403

404 // Absolute value

405 const uint32_t a = x & 0x80808080;

406 const uint32_t b = a >> 7;

407

408 // Aside: mask negatives (m is three if the byte was negative)

409 const uint32_t m = (a >> 6) \| b;

410

411 // .. continue absolute value

412 x = (x ^ ((a - b) \| a)) + b;

413

414 // Add three

415 return x + m;

416 }

417

418 // This function follows the same basic procedure as compress_heterogeneous_r11e ac_block	419 // This function follows the same basic procedure as compress_heterogeneous_r11e ac_block

419 // above when COMPRESS_R11_EAC_FAST is defined, but it avoids a few loads/stores and	420 // above when COMPRESS_R11_EAC_FAST is defined, but it avoids a few loads/stores and

420 // tries to optimize where it can using SIMD.	421 // tries to optimize where it can using SIMD.

421 static uint64_t compress_r11eac_block_fast(const uint8_t* src, int rowBytes) {	422 static uint64_t compress_r11eac_block_fast(const uint8_t* src, int rowBytes) {

422 // Store each row of alpha values in an integer	423 // Store each row of alpha values in an integer

423 const uint32_t alphaRow1 = (reinterpret_cast<const uint32_t>(src));	424 const uint32_t alphaRow1 = (reinterpret_cast<const uint32_t>(src));

424 const uint32_t alphaRow2 = (reinterpret_cast<const uint32_t>(src + rowByte s));	425 const uint32_t alphaRow2 = (reinterpret_cast<const uint32_t>(src + rowByte s));

425 const uint32_t alphaRow3 = (reinterpret_cast<const uint32_t>(src + 2*rowBy tes));	426 const uint32_t alphaRow3 = (reinterpret_cast<const uint32_t>(src + 2*rowBy tes));

426 const uint32_t alphaRow4 = (reinterpret_cast<const uint32_t>(src + 3*rowBy tes));	427 const uint32_t alphaRow4 = (reinterpret_cast<const uint32_t>(src + 3*rowBy tes));

427	428

(...skipping 78 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
506 ((x >> 13) & (7 << 3)) \|	507 ((x >> 13) & (7 << 3)) \|

507 ((x >> 2) & (7 << 6)) \|	508 ((x >> 2) & (7 << 6)) \|

508 ((x << 9) & (7 << 9));	509 ((x << 9) & (7 << 9));

509 #endif	510 #endif

510 }	511 }

511	512

512 // This function returns the compressed format of a block given as four columns of	513 // This function returns the compressed format of a block given as four columns of

513 // alpha values. Each column is assumed to be loaded from top to bottom, and hen ce	514 // alpha values. Each column is assumed to be loaded from top to bottom, and hen ce

514 // must first be converted to indices and then packed into the resulting 64-bit	515 // must first be converted to indices and then packed into the resulting 64-bit

515 // integer.	516 // integer.

516 static inline uint64_t compress_block_vertical(const uint32_t alphaColumn0,	517 inline void compress_block_vertical(uint8_t* dstPtr, const uint8_t *block) {

517 const uint32_t alphaColumn1,	518

518 const uint32_t alphaColumn2,	519 const uint32_t* src = reinterpret_cast<const uint32_t*>(block);

519 const uint32_t alphaColumn3) {	520 uint64_t* dst = reinterpret_cast<uint64_t*>(dstPtr);

	521

	522 const uint32_t alphaColumn0 = src[0];

	523 const uint32_t alphaColumn1 = src[1];

	524 const uint32_t alphaColumn2 = src[2];

	525 const uint32_t alphaColumn3 = src[3];

520	526

521 if (alphaColumn0 == alphaColumn1 &&	527 if (alphaColumn0 == alphaColumn1 &&

522 alphaColumn2 == alphaColumn3 &&	528 alphaColumn2 == alphaColumn3 &&

523 alphaColumn0 == alphaColumn2) {	529 alphaColumn0 == alphaColumn2) {

524	530

525 if (0 == alphaColumn0) {	531 if (0 == alphaColumn0) {

526 // Transparent	532 // Transparent

527 return 0x0020000000002000ULL;	533 *dst = 0x0020000000002000ULL;

	534 return;

528 }	535 }

529 else if (0xFFFFFFFF == alphaColumn0) {	536 else if (0xFFFFFFFF == alphaColumn0) {

530 // Opaque	537 // Opaque

531 return 0xFFFFFFFFFFFFFFFFULL;	538 *dst = 0xFFFFFFFFFFFFFFFFULL;

	539 return;

532 }	540 }

533 }	541 }

534	542

535 const uint32_t indexColumn0 = convert_indices(alphaColumn0);	543 const uint32_t indexColumn0 = convert_indices(alphaColumn0);

536 const uint32_t indexColumn1 = convert_indices(alphaColumn1);	544 const uint32_t indexColumn1 = convert_indices(alphaColumn1);

537 const uint32_t indexColumn2 = convert_indices(alphaColumn2);	545 const uint32_t indexColumn2 = convert_indices(alphaColumn2);

538 const uint32_t indexColumn3 = convert_indices(alphaColumn3);	546 const uint32_t indexColumn3 = convert_indices(alphaColumn3);

539	547

540 const uint32_t packedIndexColumn0 = pack_indices_vertical(indexColumn0);	548 const uint32_t packedIndexColumn0 = pack_indices_vertical(indexColumn0);

541 const uint32_t packedIndexColumn1 = pack_indices_vertical(indexColumn1);	549 const uint32_t packedIndexColumn1 = pack_indices_vertical(indexColumn1);

542 const uint32_t packedIndexColumn2 = pack_indices_vertical(indexColumn2);	550 const uint32_t packedIndexColumn2 = pack_indices_vertical(indexColumn2);

543 const uint32_t packedIndexColumn3 = pack_indices_vertical(indexColumn3);	551 const uint32_t packedIndexColumn3 = pack_indices_vertical(indexColumn3);

544	552

545 return SkEndian_SwapBE64(0x8490000000000000ULL \|	553 *dst = SkEndian_SwapBE64(0x8490000000000000ULL \|

546 (static_cast<uint64_t>(packedIndexColumn0) << 36) \|	554 (static_cast<uint64_t>(packedIndexColumn0) << 36) \|

547 (static_cast<uint64_t>(packedIndexColumn1) << 24) \|	555 (static_cast<uint64_t>(packedIndexColumn1) << 24) \|

548 static_cast<uint64_t>(packedIndexColumn2 << 12) \|	556 static_cast<uint64_t>(packedIndexColumn2 << 12) \|

549 static_cast<uint64_t>(packedIndexColumn3));	557 static_cast<uint64_t>(packedIndexColumn3));

550

551 }

552

553 // Updates the block whose columns are stored in blockColN. curAlphai is expecte d

554 // to store, as an integer, the four alpha values that will be placed within eac h

555 // of the columns in the range [col, col+colsLeft).

556 static inline void update_block_columns(uint32_t* block, const int col,

557 const int colsLeft, const uint32_t curAl phai) {

558 SkASSERT(NULL != block);

559 SkASSERT(col + colsLeft <= 4);

560

561 for (int i = col; i < (col + colsLeft); ++i) {

562 block[i] = curAlphai;

563 }

564 }	558 }

565	559

566 ////////////////////////////////////////////////////////////////////////////////	560 ////////////////////////////////////////////////////////////////////////////////

567	561

568 namespace SkTextureCompressor {	562 namespace SkTextureCompressor {

569	563

570 bool CompressA8ToR11EAC(uint8_t* dst, const uint8_t* src, int width, int height, int rowBytes) {	564 bool CompressA8ToR11EAC(uint8_t* dst, const uint8_t* src, int width, int height, int rowBytes) {

571	565

572 #if (COMPRESS_R11_EAC_SLOW) \|\| (COMPRESS_R11_EAC_FAST)	566 #if (COMPRESS_R11_EAC_SLOW) \|\| (COMPRESS_R11_EAC_FAST)

573	567

574 return compress_4x4_a8_to_64bit(dst, src, width, height, rowBytes, compress_ r11eac_block);	568 return compress_4x4_a8_to_64bit(dst, src, width, height, rowBytes, compress_ r11eac_block);

575	569

576 #elif COMPRESS_R11_EAC_FASTEST	570 #elif COMPRESS_R11_EAC_FASTEST

577	571

578 return compress_a8_to_r11eac_fast(dst, src, width, height, rowBytes);	572 return compress_a8_to_r11eac_fast(dst, src, width, height, rowBytes);

579	573

580 #else	574 #else

581 #error "Must choose R11 EAC algorithm"	575 #error "Must choose R11 EAC algorithm"

582 #endif	576 #endif

583 }	577 }

584	578

585 // This class implements a blitter that blits directly into a buffer that will

586 // be used as an R11 EAC compressed texture. We compute this buffer by

587 // buffering four scan lines and then outputting them all at once. This blitter

588 // is only expected to be used with alpha masks, i.e. kAlpha8_SkColorType.

589 class R11_EACBlitter : public SkBlitter {

590 public:

591 R11_EACBlitter(int width, int height, void *compressedBuffer);

592 virtual ~R11_EACBlitter() { this->flushRuns(); }

593

594 // Blit a horizontal run of one or more pixels.

595 virtual void blitH(int x, int y, int width) SK_OVERRIDE {

596 // This function is intended to be called from any standard RGB

597 // buffer, so we should never encounter it. However, if some code

598 // path does end up here, then this needs to be investigated.

599 SkFAIL("Not implemented!");

600 }

601

602 // Blit a horizontal run of antialiased pixels; runs[] is a sparse

603 // zero-terminated run-length encoding of spans of constant alpha values.

604 virtual void blitAntiH(int x, int y,

605 const SkAlpha antialias[],

606 const int16_t runs[]) SK_OVERRIDE;

607

608 // Blit a vertical run of pixels with a constant alpha value.

609 virtual void blitV(int x, int y, int height, SkAlpha alpha) SK_OVERRIDE {

610 // This function is currently not implemented. It is not explicitly

611 // required by the contract, but if at some time a code path runs into

612 // this function (which is entirely possible), it needs to be implemente d.

613 //

614 // TODO (krajcevski):

615 // This function will be most easily implemented in one of two ways:

616 // 1. Buffer each vertical column value and then construct a list

617 // of alpha values and output all of the blocks at once. This only

618 // requires a write to the compressed buffer

619 // 2. Replace the indices of each block with the proper indices based

620 // on the alpha value. This requires a read and write of the compress ed

621 // buffer, but much less overhead.

622 SkFAIL("Not implemented!");

623 }

624

625 // Blit a solid rectangle one or more pixels wide.

626 virtual void blitRect(int x, int y, int width, int height) SK_OVERRIDE {

627 // Analogous to blitRow, this function is intended for RGB targets

628 // and should never be called by this blitter. Any calls to this functio n

629 // are probably a bug and should be investigated.

630 SkFAIL("Not implemented!");

631 }

632

633 // Blit a rectangle with one alpha-blended column on the left,

634 // width (zero or more) opaque pixels, and one alpha-blended column

635 // on the right. The result will always be at least two pixels wide.

636 virtual void blitAntiRect(int x, int y, int width, int height,

637 SkAlpha leftAlpha, SkAlpha rightAlpha) SK_OVERRIDE {

638 // This function is currently not implemented. It is not explicitly

639 // required by the contract, but if at some time a code path runs into

640 // this function (which is entirely possible), it needs to be implemente d.

641 //

642 // TODO (krajcevski):

643 // This function will be most easily implemented as follows:

644 // 1. If width/height are smaller than a block, then update the

645 // indices of the affected blocks.

646 // 2. If width/height are larger than a block, then construct a 9-patch

647 // of block encodings that represent the rectangle, and write them

648 // to the compressed buffer as necessary. Whether or not the blocks

649 // are overwritten by zeros or just their indices are updated is up

650 // to debate.

651 SkFAIL("Not implemented!");

652 }

653

654 // Blit a pattern of pixels defined by a rectangle-clipped mask;

655 // typically used for text.

656 virtual void blitMask(const SkMask&, const SkIRect& clip) SK_OVERRIDE {

657 // This function is currently not implemented. It is not explicitly

658 // required by the contract, but if at some time a code path runs into

659 // this function (which is entirely possible), it needs to be implemente d.

660 //

661 // TODO (krajcevski):

662 // This function will be most easily implemented in the same way as

663 // blitAntiRect above.

664 SkFAIL("Not implemented!");

665 }

666

667 // If the blitter just sets a single value for each pixel, return the

668 // bitmap it draws into, and assign value. If not, return NULL and ignore

669 // the value parameter.

670 virtual const SkBitmap* justAnOpaqueColor(uint32_t* value) SK_OVERRIDE {

671 return NULL;

672 }

673

674 /**

675 * Compressed texture blitters only really work correctly if they get

676 * four blocks at a time. That being said, this blitter tries it's best

677 * to preserve semantics if blitAntiH doesn't get called in too many

678 * weird ways...

679 */

680 virtual int requestRowsPreserved() const { return kR11_EACBlockSz; }

681

682 protected:

683 virtual void onNotifyFinished() { this->flushRuns(); }

684

685 private:

686 static const int kR11_EACBlockSz = 4;

687 static const int kPixelsPerBlock = kR11_EACBlockSz * kR11_EACBlockSz;

688

689 // The longest possible run of pixels that this blitter will receive.

690 // This is initialized in the constructor to 0x7FFE, which is one less

691 // than the largest positive 16-bit integer. We make sure that it's one

692 // less for debugging purposes. We also don't make this variable static

693 // in order to make sure that we can construct a valid pointer to it.

694 const int16_t kLongestRun;

695

696 // Usually used in conjunction with kLongestRun. This is initialized to

697 // zero.

698 const SkAlpha kZeroAlpha;

699

700 // This is the information that we buffer whenever we're asked to blit

701 // a row with this blitter.

702 struct BufferedRun {

703 const SkAlpha* fAlphas;

704 const int16_t* fRuns;

705 int fX, fY;

706 } fBufferedRuns[kR11_EACBlockSz];

707

708 // The next row (0-3) that we need to blit. This value should never exceed

709 // the number of rows that we have (kR11_EACBlockSz)

710 int fNextRun;

711

712 // The width and height of the image that we're blitting

713 const int fWidth;

714 const int fHeight;

715

716 // The R11 EAC buffer that we're blitting into. It is assumed that the buffe r

717 // is large enough to store a compressed image of size fWidth*fHeight.

718 uint64_t* const fBuffer;

719

720 // Various utility functions

721 int blocksWide() const { return fWidth / kR11_EACBlockSz; }

722 int blocksTall() const { return fHeight / kR11_EACBlockSz; }

723 int totalBlocks() const { return (fWidth * fHeight) / kPixelsPerBlock; }

724

725 // Returns the block index for the block containing pixel (x, y). Block

726 // indices start at zero and proceed in raster order.

727 int getBlockOffset(int x, int y) const {

728 SkASSERT(x < fWidth);

729 SkASSERT(y < fHeight);

730 const int blockCol = x / kR11_EACBlockSz;

731 const int blockRow = y / kR11_EACBlockSz;

732 return blockRow * this->blocksWide() + blockCol;

733 }

734

735 // Returns a pointer to the block containing pixel (x, y)

736 uint64_t *getBlock(int x, int y) const {

737 return fBuffer + this->getBlockOffset(x, y);

738 }

739

740 // The following function writes the buffered runs to compressed blocks.

741 // If fNextRun < 4, then we fill the runs that we haven't buffered with

742 // the constant zero buffer.

743 void flushRuns();

744 };

745

746

747 R11_EACBlitter::R11_EACBlitter(int width, int height, void *latcBuffer)

748 // 0x7FFE is one minus the largest positive 16-bit int. We use it for

749 // debugging to make sure that we're properly setting the nextX distance

750 // in flushRuns().

751 : kLongestRun(0x7FFE), kZeroAlpha(0)

752 , fNextRun(0)

753 , fWidth(width)

754 , fHeight(height)

755 , fBuffer(reinterpret_cast<uint64_t*const>(latcBuffer))

756 {

757 SkASSERT((width % kR11_EACBlockSz) == 0);

758 SkASSERT((height % kR11_EACBlockSz) == 0);

759 }

760

761 void R11_EACBlitter::blitAntiH(int x, int y,

762 const SkAlpha* antialias,

763 const int16_t* runs) {

764 // Make sure that the new row to blit is either the first

765 // row that we're blitting, or it's exactly the next scan row

766 // since the last row that we blit. This is to ensure that when

767 // we go to flush the runs, that they are all the same four

768 // runs.

769 if (fNextRun > 0 &&

770 ((x != fBufferedRuns[fNextRun-1].fX) \|\|

771 (y-1 != fBufferedRuns[fNextRun-1].fY))) {

772 this->flushRuns();

773 }

774

775 // Align the rows to a block boundary. If we receive rows that

776 // are not on a block boundary, then fill in the preceding runs

777 // with zeros. We do this by producing a single RLE that says

778 // that we have 0x7FFE pixels of zero (0x7FFE = 32766).

779 const int row = y & ~3;

780 while ((row + fNextRun) < y) {

781 fBufferedRuns[fNextRun].fAlphas = &kZeroAlpha;

782 fBufferedRuns[fNextRun].fRuns = &kLongestRun;

783 fBufferedRuns[fNextRun].fX = 0;

784 fBufferedRuns[fNextRun].fY = row + fNextRun;

785 ++fNextRun;

786 }

787

788 // Make sure that our assumptions aren't violated...

789 SkASSERT(fNextRun == (y & 3));

790 SkASSERT(fNextRun == 0 \|\| fBufferedRuns[fNextRun - 1].fY < y);

791

792 // Set the values of the next run

793 fBufferedRuns[fNextRun].fAlphas = antialias;

794 fBufferedRuns[fNextRun].fRuns = runs;

795 fBufferedRuns[fNextRun].fX = x;

796 fBufferedRuns[fNextRun].fY = y;

797

798 // If we've output four scanlines in a row that don't violate our

799 // assumptions, then it's time to flush them...

800 if (4 == ++fNextRun) {

801 this->flushRuns();

802 }

803 }

804

805 void R11_EACBlitter::flushRuns() {

806

807 // If we don't have any runs, then just return.

808 if (0 == fNextRun) {

809 return;

810 }

811

812 #ifndef NDEBUG

813 // Make sure that if we have any runs, they all match

814 for (int i = 1; i < fNextRun; ++i) {

815 SkASSERT(fBufferedRuns[i].fY == fBufferedRuns[i-1].fY + 1);

816 SkASSERT(fBufferedRuns[i].fX == fBufferedRuns[i-1].fX);

817 }

818 #endif

819

820 // If we dont have as many runs as we have rows, fill in the remaining

821 // runs with constant zeros.

822 for (int i = fNextRun; i < kR11_EACBlockSz; ++i) {

823 fBufferedRuns[i].fY = fBufferedRuns[0].fY + i;

824 fBufferedRuns[i].fX = fBufferedRuns[0].fX;

825 fBufferedRuns[i].fAlphas = &kZeroAlpha;

826 fBufferedRuns[i].fRuns = &kLongestRun;

827 }

828

829 // Make sure that our assumptions aren't violated.

830 SkASSERT(fNextRun > 0 && fNextRun <= 4);

831 SkASSERT((fBufferedRuns[0].fY & 3) == 0);

832

833 // The following logic walks four rows at a time and outputs compressed

834 // blocks to the buffer passed into the constructor.

835 // We do the following:

836 //

837 // c1 c2 c3 c4

838 // -----------------------------------------------------------------------

839 // ... \| \| \| \| \| ----> fBufferedRuns[0]

840 // -----------------------------------------------------------------------

841 // ... \| \| \| \| \| ----> fBufferedRuns[1]

842 // -----------------------------------------------------------------------

843 // ... \| \| \| \| \| ----> fBufferedRuns[2]

844 // -----------------------------------------------------------------------

845 // ... \| \| \| \| \| ----> fBufferedRuns[3]

846 // -----------------------------------------------------------------------

847 //

848 // curX -- the macro X value that we've gotten to.

849 // c1, c2, c3, c4 -- the integers that represent the columns of the current block

850 // that we're operating on

851 // curAlphaColumn -- integer containing the column of alpha values from fBuf feredRuns.

852 // nextX -- for each run, the next point at which we need to update curAlpha Column

853 // after the value of curX.

854 // finalX -- the minimum of all the nextX values.

855 //

856 // curX advances to finalX outputting any blocks that it passes along

857 // the way. Since finalX will not change when we reach the end of a

858 // run, the termination criteria will be whenever curX == finalX at the

859 // end of a loop.

860

861 // Setup:

862 uint32_t c[4] = { 0, 0, 0, 0 };

863 uint32_t curAlphaColumn = 0;

864 SkAlpha curAlpha = reinterpret_cast<SkAlpha>(&curAlphaColumn);

865

866 int nextX[kR11_EACBlockSz];

867 for (int i = 0; i < kR11_EACBlockSz; ++i) {

868 nextX[i] = 0x7FFFFF;

869 }

870

871 uint64_t* outPtr = this->getBlock(fBufferedRuns[0].fX, fBufferedRuns[0].fY);

872

873 // Populate the first set of runs and figure out how far we need to

874 // advance on the first step

875 int curX = 0;

876 int finalX = 0xFFFFF;

877 for (int i = 0; i < kR11_EACBlockSz; ++i) {

878 nextX[i] = *(fBufferedRuns[i].fRuns);

879 curAlpha[i] = *(fBufferedRuns[i].fAlphas);

880

881 finalX = SkMin32(nextX[i], finalX);

882 }

883

884 // Make sure that we have a valid right-bound X value

885 SkASSERT(finalX < 0xFFFFF);

886

887 // Run the blitter...

888 while (curX != finalX) {

889 SkASSERT(finalX >= curX);

890

891 // Do we need to populate the rest of the block?

892 if ((finalX - (curX & ~3)) >= kR11_EACBlockSz) {

893 const int col = curX & 3;

894 const int colsLeft = 4 - col;

895 SkASSERT(curX + colsLeft <= finalX);

896

897 update_block_columns(c, col, colsLeft, curAlphaColumn);

898

899 // Write this block

900 *outPtr = compress_block_vertical(c[0], c[1], c[2], c[3]);

901 ++outPtr;

902 curX += colsLeft;

903 }

904

905 // If we can advance even further, then just keep memsetting the block

906 if ((finalX - curX) >= kR11_EACBlockSz) {

907 SkASSERT((curX & 3) == 0);

908

909 const int col = 0;

910 const int colsLeft = kR11_EACBlockSz;

911

912 update_block_columns(c, col, colsLeft, curAlphaColumn);

913

914 // While we can keep advancing, just keep writing the block.

915 uint64_t lastBlock = compress_block_vertical(c[0], c[1], c[2], c[3]) ;

916 while((finalX - curX) >= kR11_EACBlockSz) {

917 *outPtr = lastBlock;

918 ++outPtr;

919 curX += kR11_EACBlockSz;

920 }

921 }

922

923 // If we haven't advanced within the block then do so.

924 if (curX < finalX) {

925 const int col = curX & 3;

926 const int colsLeft = finalX - curX;

927

928 update_block_columns(c, col, colsLeft, curAlphaColumn);

929

930 curX += colsLeft;

931 }

932

933 SkASSERT(curX == finalX);

934

935 // Figure out what the next advancement is...

936 for (int i = 0; i < kR11_EACBlockSz; ++i) {

937 if (nextX[i] == finalX) {

938 const int16_t run = *(fBufferedRuns[i].fRuns);

939 fBufferedRuns[i].fRuns += run;

940 fBufferedRuns[i].fAlphas += run;

941 curAlpha[i] = *(fBufferedRuns[i].fAlphas);

942 nextX[i] += *(fBufferedRuns[i].fRuns);

943 }

944 }

945

946 finalX = 0xFFFFF;

947 for (int i = 0; i < kR11_EACBlockSz; ++i) {

948 finalX = SkMin32(nextX[i], finalX);

949 }

950 }

951

952 // If we didn't land on a block boundary, output the block...

953 if ((curX & 3) > 1) {

954 *outPtr = compress_block_vertical(c[0], c[1], c[2], c[3]);

955 }

956

957 fNextRun = 0;

958 }

959

960 SkBlitter* CreateR11EACBlitter(int width, int height, void* outputBuffer) {	579 SkBlitter* CreateR11EACBlitter(int width, int height, void* outputBuffer) {

961 return new R11_EACBlitter(width, height, outputBuffer);	580 return new

	581 SkTCompressedAlphaBlitter<4, 8, compress_block_vertical>

	582 (width, height, outputBuffer);

962 }	583 }

963	584

964 } // namespace SkTextureCompressor	585 } // namespace SkTextureCompressor

OLD	NEW

« no previous file with comments | « src/utils/SkTextureCompressor_Blitter.h ('k') | no next file » | no next file with comments »