src/utils/SkTextureCompressor.cpp - Issue 373243002: Optimized R11 EAC compressor

Side by Side Diff: src/utils/SkTextureCompressor.cpp

Issue 373243002: Optimized R11 EAC compressor (Closed) Base URL: https://skia.googlesource.com/skia.git@master

Patch Set: Change -1's to 0xFFF's Created 6 years, 5 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

OLD	NEW
1 /*	1 /*

2 * Copyright 2014 Google Inc.	2 * Copyright 2014 Google Inc.

3 *	3 *

4 * Use of this source code is governed by a BSD-style license that can be	4 * Use of this source code is governed by a BSD-style license that can be

5 * found in the LICENSE file.	5 * found in the LICENSE file.

6 */	6 */

7	7

8 #include "SkTextureCompressor.h"	8 #include "SkTextureCompressor.h"

9	9

10 #include "SkBitmap.h"	10 #include "SkBitmap.h"

(...skipping 262 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
273 // This is really just for correctness, in all of my tests we	273 // This is really just for correctness, in all of my tests we

274 // never take this step. We don't lose too much perf here because	274 // never take this step. We don't lose too much perf here because

275 // most of the processing in this function is worth it for the	275 // most of the processing in this function is worth it for the

276 // 1 == nUniquePixels optimization.	276 // 1 == nUniquePixels optimization.

277 return compress_latc_block_bb(pixels);	277 return compress_latc_block_bb(pixels);

278 } else {	278 } else {

279 return compress_latc_block_bb_ignore_extremal(pixels);	279 return compress_latc_block_bb_ignore_extremal(pixels);

280 }	280 }

281 }	281 }

282	282

283 static bool compress_a8_to_latc(uint8_t* dst, const uint8_t* src,	283 static inline bool compress_a8_to_latc(uint8_t* dst, const uint8_t* src,

284 int width, int height, int rowBytes) {	284 int width, int height, int rowBytes) {

285 return compress_4x4_a8_to_64bit(dst, src, width, height, rowBytes, compress_ latc_block);	285 return compress_4x4_a8_to_64bit(dst, src, width, height, rowBytes, compress_ latc_block);

286 }	286 }

287	287

288 ////////////////////////////////////////////////////////////////////////////////	288 ////////////////////////////////////////////////////////////////////////////////

289 //	289 //

290 // R11 EAC Compressor	290 // R11 EAC Compressor

291 //	291 //

292 ////////////////////////////////////////////////////////////////////////////////	292 ////////////////////////////////////////////////////////////////////////////////

293	293

	294 // #define COMPRESS_R11_EAC_SLOW 1

	295 // #define COMPRESS_R11_EAC_FAST 1

	296 #define COMPRESS_R11_EAC_FASTEST 1

	297

294 // Blocks compressed into R11 EAC are represented as follows:	298 // Blocks compressed into R11 EAC are represented as follows:

295 // 0000000000000000000000000000000000000000000000000000000000000000	299 // 0000000000000000000000000000000000000000000000000000000000000000

296 // \|base_cw\|mod\|mul\| ----------------- indices -------------------	300 // \|base_cw\|mod\|mul\| ----------------- indices -------------------

297 //	301 //

298 // To reconstruct the value of a given pixel, we use the formula:	302 // To reconstruct the value of a given pixel, we use the formula:

299 // clamp[0, 2047](base_cw * 8 + 4 + mod_valmul8)	303 // clamp[0, 2047](base_cw * 8 + 4 + mod_valmul8)

300 //	304 //

301 // mod_val is chosen from a palette of values based on the index of the	305 // mod_val is chosen from a palette of values based on the index of the

302 // given pixel. The palette is chosen by the value stored in mod.	306 // given pixel. The palette is chosen by the value stored in mod.

303 // This formula returns a value between 0 and 2047, which is converted	307 // This formula returns a value between 0 and 2047, which is converted

(...skipping 16 matching lines...) Expand all Loading...
320 {-2, -6, -8, -10, 1, 5, 7, 9},	324 {-2, -6, -8, -10, 1, 5, 7, 9},

321 {-2, -5, -8, -10, 1, 4, 7, 9},	325 {-2, -5, -8, -10, 1, 4, 7, 9},

322 {-2, -4, -8, -10, 1, 3, 7, 9},	326 {-2, -4, -8, -10, 1, 3, 7, 9},

323 {-2, -5, -7, -10, 1, 4, 6, 9},	327 {-2, -5, -7, -10, 1, 4, 6, 9},

324 {-3, -4, -7, -10, 2, 3, 6, 9},	328 {-3, -4, -7, -10, 2, 3, 6, 9},

325 {-1, -2, -3, -10, 0, 1, 2, 9},	329 {-1, -2, -3, -10, 0, 1, 2, 9},

326 {-4, -6, -8, -9, 3, 5, 7, 8},	330 {-4, -6, -8, -9, 3, 5, 7, 8},

327 {-3, -5, -7, -9, 2, 4, 6, 8}	331 {-3, -5, -7, -9, 2, 4, 6, 8}

328 };	332 };

329	333

	334 #if COMPRESS_R11_EAC_SLOW

330 // Pack the base codeword, palette, and multiplier into the 64 bits necessary	335 // Pack the base codeword, palette, and multiplier into the 64 bits necessary

331 // to decode it.	336 // to decode it.

332 static uint64_t pack_r11eac_block(uint16_t base_cw, uint16_t palette, uint16_t m ultiplier,	337 static uint64_t pack_r11eac_block(uint16_t base_cw, uint16_t palette, uint16_t m ultiplier,

333 uint64_t indices) {	338 uint64_t indices) {

334 SkASSERT(palette < 16);	339 SkASSERT(palette < 16);

335 SkASSERT(multiplier < 16);	340 SkASSERT(multiplier < 16);

336 SkASSERT(indices < (static_cast<uint64_t>(1) << 48));	341 SkASSERT(indices < (static_cast<uint64_t>(1) << 48));

337	342

338 const uint64_t b = static_cast<uint64_t>(base_cw) << 56;	343 const uint64_t b = static_cast<uint64_t>(base_cw) << 56;

339 const uint64_t m = static_cast<uint64_t>(multiplier) << 52;	344 const uint64_t m = static_cast<uint64_t>(multiplier) << 52;

340 const uint64_t p = static_cast<uint64_t>(palette) << 48;	345 const uint64_t p = static_cast<uint64_t>(palette) << 48;

341 return SkEndian_SwapBE64(b \| m \| p \| indices);	346 return SkEndian_SwapBE64(b \| m \| p \| indices);

342 }	347 }

343	348

344 // Given a base codeword, a modifier, and a multiplier, compute the proper	349 // Given a base codeword, a modifier, and a multiplier, compute the proper

345 // pixel value in the range [0, 2047].	350 // pixel value in the range [0, 2047].

346 static uint16_t compute_r11eac_pixel(int base_cw, int modifier, int multiplier) {	351 static uint16_t compute_r11eac_pixel(int base_cw, int modifier, int multiplier) {

347 int ret = (base_cw * 8 + 4) + (modifier * multiplier * 8);	352 int ret = (base_cw * 8 + 4) + (modifier * multiplier * 8);

348 return (ret > 2047)? 2047 : ((ret < 0)? 0 : ret);	353 return (ret > 2047)? 2047 : ((ret < 0)? 0 : ret);

349 }	354 }

350	355

351 // Compress a block into R11 EAC format.	356 // Compress a block into R11 EAC format.

352 // The compression works as follows:	357 // The compression works as follows:

353 // 1. Find the center of the span of the block's values. Use this as the base co deword.	358 // 1. Find the center of the span of the block's values. Use this as the base co deword.

354 // 2. Choose a multiplier based roughly on the size of the span of block values	359 // 2. Choose a multiplier based roughly on the size of the span of block values

355 // 3. Iterate through each palette and choose the one with the most accurate	360 // 3. Iterate through each palette and choose the one with the most accurate

356 // modifiers.	361 // modifiers.

357 static uint64_t compress_heterogeneous_r11eac_block(const uint8_t block[16]) {	362 static inline uint64_t compress_heterogeneous_r11eac_block(const uint8_t block[1 6]) {

358 // Find the center of the data...	363 // Find the center of the data...

359 uint16_t bmin = block[0];	364 uint16_t bmin = block[0];

360 uint16_t bmax = block[0];	365 uint16_t bmax = block[0];

361 for (int i = 1; i < 16; ++i) {	366 for (int i = 1; i < 16; ++i) {

362 bmin = SkTMin<uint16_t>(bmin, block[i]);	367 bmin = SkTMin<uint16_t>(bmin, block[i]);

363 bmax = SkTMax<uint16_t>(bmax, block[i]);	368 bmax = SkTMax<uint16_t>(bmax, block[i]);

364 }	369 }

365	370

366 uint16_t center = (bmax + bmin) >> 1;	371 uint16_t center = (bmax + bmin) >> 1;

367 SkASSERT(center <= 255);	372 SkASSERT(center <= 255);

368	373

369 // Based on the min and max, we can guesstimate a proper multiplier	374 // Based on the min and max, we can guesstimate a proper multiplier

370 // This is kind of a magic choice to start with.	375 // This is kind of a magic choice to start with.

371 uint16_t multiplier = (bmax - center) / 10;	376 uint16_t multiplier = (bmax - center) / 10;

372	377

373 // Now convert the block to 11 bits and transpose it to match	378 // Now convert the block to 11 bits and transpose it to match

374 // the proper layout	379 // the proper layout

375 uint16_t cblock[16];	380 uint16_t cblock[16];

376 for (int i = 0; i < 4; ++i) {	381 for (int i = 0; i < 4; ++i) {

377 for (int j = 0; j < 4; ++j) {	382 for (int j = 0; j < 4; ++j) {

378 int srcIdx = i*4+j;	383 int srcIdx = i*4+j;

379 int dstIdx = j*4+i;	384 int dstIdx = j*4+i;

380 cblock[dstIdx] = (block[srcIdx] << 3) \| (block[srcIdx] >> 5);	385 cblock[dstIdx] = (block[srcIdx] << 3) \| (block[srcIdx] >> 5);

381 }	386 }

382 }	387 }

383	388

384 // Finally, choose the proper palette and indices	389 // Finally, choose the proper palette and indices

385 uint32_t bestError = static_cast<uint32_t>(-1);	390 uint32_t bestError = 0xFFFFFFFF;

386 uint64_t bestIndices = 0;	391 uint64_t bestIndices = 0;

387 uint16_t bestPalette = 0;	392 uint16_t bestPalette = 0;

388 for (uint16_t paletteIdx = 0; paletteIdx < kNumR11EACPalettes; ++paletteIdx) {	393 for (uint16_t paletteIdx = 0; paletteIdx < kNumR11EACPalettes; ++paletteIdx) {

389 const int *palette = kR11EACModifierPalettes[paletteIdx];	394 const int *palette = kR11EACModifierPalettes[paletteIdx];

390	395

391 // Iterate through each pixel to find the best palette index	396 // Iterate through each pixel to find the best palette index

392 // and update the indices with the choice. Also store the error	397 // and update the indices with the choice. Also store the error

393 // for this palette to be compared against the best error...	398 // for this palette to be compared against the best error...

394 uint32_t error = 0;	399 uint32_t error = 0;

395 uint64_t indices = 0;	400 uint64_t indices = 0;

(...skipping 29 matching lines...) Expand all Loading...
425 if (error < bestError) {	430 if (error < bestError) {

426 bestPalette = paletteIdx;	431 bestPalette = paletteIdx;

427 bestIndices = indices;	432 bestIndices = indices;

428 bestError = error;	433 bestError = error;

429 }	434 }

430 }	435 }

431	436

432 // Finally, pack everything together...	437 // Finally, pack everything together...

433 return pack_r11eac_block(center, bestPalette, multiplier, bestIndices);	438 return pack_r11eac_block(center, bestPalette, multiplier, bestIndices);

434 }	439 }

	440 #endif // COMPRESS_R11_EAC_SLOW

435	441

	442 #if COMPRESS_R11_EAC_FAST

	443 // This function takes into account that most blocks that we compress have a gra dation from

	444 // fully opaque to fully transparent. The compression scheme works by selecting the

	445 // palette and multiplier that has the tightest fit to the 0-255 range. This is encoded

	446 // as the block header (0x8490). The indices are then selected by considering th e top

	447 // three bits of each alpha value. For alpha masks, this reduces the dynamic ran ge from

	448 // 17 to 8, but the quality is still acceptable.

	449 //

	450 // There are a few caveats that need to be taken care of...

	451 //

	452 // 1. The block is read in as scanlines, so the indices are stored as:

	453 // 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15

	454 // However, the decomrpession routine reads them in column-major order, so th ey

	455 // need to be packed as:

	456 // 0 4 8 12 1 5 9 13 2 6 10 14 3 7 11 15

	457 // So when reading, they must be transposed.

	458 //

	459 // 2. We cannot use the top three bits as an index directly, since the R11 EAC p alettes

	460 // above store the modulation values first decreasing and then increasing:

	461 // e.g. {-3, -6, -9, -15, 2, 5, 8, 14}

	462 // Hence, we need to convert the indices with the following mapping:

	463 // From: 0 1 2 3 4 5 6 7

	464 // To: 3 2 1 0 4 5 6 7

	465 static inline uint64_t compress_heterogeneous_r11eac_block(const uint8_t block[1 6]) {

	466 uint64_t retVal = static_cast<uint64_t>(0x8490) << 48;

	467 for(int i = 0; i < 4; ++i) {

	468 for(int j = 0; j < 4; ++j) {

	469 const int shift = 45-3(j4+i);

	470 SkASSERT(shift <= 45);

	471 const uint64_t idx = block[i*4+j] >> 5;

	472 SkASSERT(idx < 8);

	473

	474 // !SPEED! This is slightly faster than having an if-statement.

	475 switch(idx) {

	476 case 0:

	477 case 1:

	478 case 2:

	479 case 3:

	480 retVal \|= (3-idx) << shift;

	481 break;

	482 default:

	483 retVal \|= idx << shift;

	484 break;

	485 }

	486 }

	487 }

	488

	489 return SkEndian_SwapBE64(retVal);

	490 }

	491 #endif // COMPRESS_R11_EAC_FAST

	492

	493 #if (COMPRESS_R11_EAC_SLOW) \|\| (COMPRESS_R11_EAC_FAST)

436 static uint64_t compress_r11eac_block(const uint8_t block[16]) {	494 static uint64_t compress_r11eac_block(const uint8_t block[16]) {

437 // Are all blocks a solid color?	495 // Are all blocks a solid color?

438 bool solid = true;	496 bool solid = true;

439 for (int i = 1; i < 16; ++i) {	497 for (int i = 1; i < 16; ++i) {

440 if (block[i] != block[0]) {	498 if (block[i] != block[0]) {

441 solid = false;	499 solid = false;

442 break;	500 break;

443 }	501 }

444 }	502 }

445	503

446 // Fully transparent? We know the encoding...	504 if (solid) {

447 if (solid && 0 == block[0]) {	505 switch(block[0]) {

448 // (0x0060 << 48) produces the following:	506 // Fully transparent? We know the encoding...

449 // basw_cw: 0	507 case 0:

450 // mod: 6, palette: {-4, -7, -8, -11, 3, 6, 7, 10}	508 // (0x0020 << 48) produces the following:

451 // mod_val: -3	509 // basw_cw: 0

452 //	510 // mod: 0, palette: {-3, -6, -9, -15, 2, 5, 8, 14}

453 // this gives the following formula:	511 // multiplier: 2

454 // clamp[0, 2047](0*8+4+(-4)) = 0	512 // mod_val: -3

455 return SkEndian_SwapBE64(static_cast<uint64_t>(0x0060) << 48);	513 //

456	514 // this gives the following formula:

457 // Fully opaque? We know this encoding too...	515 // clamp[0, 2047](08+4+(-3)2*8) = 0

458 } else if (solid && 255 == block[0]) {	516 //

459 // -1 produces the following:	517 // Furthermore, it is impervious to endianness:

460 // basw_cw: 255	518 // 0x0020000000002000ULL

461 // mod: 15, palette: {-3, -5, -7, -9, 2, 4, 6, 8}	519 // Will produce one pixel with index 2, which gives:

462 // mod_val: 8	520 // clamp[0, 2047](08+4+(-9)2*8) = 0

463 //	521 return 0x0020000000002000ULL;

464 // this gives the following formula:	522

465 // clamp[0, 2047](2558+4+88*8) = clamp[0, 2047](2556) = 2047	523 // Fully opaque? We know this encoding too...

466 return static_cast<uint64_t>(-1);	524 case 255:

467 }	525

468	526 // -1 produces the following:

469 #if 0	527 // basw_cw: 255

470 else if (solid) {	528 // mod: 15, palette: {-3, -5, -7, -9, 2, 4, 6, 8}

471 // !TODO! krajcevski:	529 // mod_val: 8

472 // This will probably never happen, since we're using this format	530 //

473 // primarily for compressing alpha maps. Usually the only	531 // this gives the following formula:

474 // non-fullly opaque or fully transparent blocks are not a solid	532 // clamp[0, 2047](2558+4+88*8) = clamp[0, 2047](2556) = 2047

475 // intermediate color. If we notice that they are, then we can	533 return 0xFFFFFFFFFFFFFFFFULL;

476 // add another optimization...	534

477 }	535 default:

	536 // !TODO! krajcevski:

	537 // This will probably never happen, since we're using this forma t

	538 // primarily for compressing alpha maps. Usually the only

	539 // non-fullly opaque or fully transparent blocks are not a solid

	540 // intermediate color. If we notice that they are, then we can

	541 // add another optimization...

	542 break;

	543 }

	544 }

	545

	546 return compress_heterogeneous_r11eac_block(block);

	547 }

	548 #endif // (COMPRESS_R11_EAC_SLOW) \|\| (COMPRESS_R11_EAC_FAST)

	549

	550 #if COMPRESS_R11_EAC_FASTEST

	551 static inline uint64_t interleave6(uint64_t topRows, uint64_t bottomRows) {

	552 // If our 3-bit block indices are laid out as:

	553 // a b c d

	554 // e f g h

	555 // i j k l

	556 // m n o p

	557 //

	558 // This function expects topRows and bottomRows to contain the first two row s

	559 // of indices interleaved in the least significant bits of a and b. In other words...

	560 //

	561 // If the architecture is big endian, then topRows and bottomRows will conta in the following:

	562 // Bits 31-0:

	563 // a: 00 a e 00 b f 00 c g 00 d h

	564 // b: 00 i m 00 j n 00 k o 00 l p

	565 //

	566 // If the architecture is little endian, then topRows and bottomRows will co ntain

	567 // the following:

	568 // Bits 31-0:

	569 // a: 00 d h 00 c g 00 b f 00 a e

	570 // b: 00 l p 00 k o 00 j n 00 i m

	571 //

	572 // This function returns a 48-bit packing of the form:

	573 // a e i m b f j n c g k o d h l p

	574 //

	575 // !SPEED! this function might be even faster if certain SIMD intrinsics are

	576 // used..

	577

	578 // For both architectures, we can figure out a packing of the bits by

	579 // using a shuffle and a few shift-rotates...

	580 uint64_t x = (static_cast<uint64_t>(topRows) << 32) \| static_cast<uint64_t>( bottomRows);

	581

	582 // x: 00 a e 00 b f 00 c g 00 d h 00 i m 00 j n 00 k o 00 l p

	583

	584 uint64_t t = (x ^ (x >> 10)) & 0x3FC0003FC00000ULL;

	585 x = x ^ t ^ (t << 10);

	586

	587 // x: b f 00 00 00 a e c g i m 00 00 00 d h j n 00 k o 00 l p

	588

	589 x \|= ((x << 52) & (0x3FULL << 52));

	590 x = (x \| ((x << 20) & (0x3FULL << 28))) >> 16;

	591

	592 #if defined (SK_CPU_BENDIAN)

	593 // x: 00 00 00 00 00 00 00 00 b f l p a e c g i m k o d h j n

	594

	595 t = (x ^ (x >> 6)) & 0xFC0000ULL;

	596 x = x ^ t ^ (t << 6);

	597

	598 // x: 00 00 00 00 00 00 00 00 b f l p a e i m c g k o d h j n

	599

	600 t = (x ^ (x >> 36)) & 0x3FULL;

	601 x = x ^ t ^ (t << 36);

	602

	603 // x: 00 00 00 00 00 00 00 00 b f j n a e i m c g k o d h l p

	604

	605 t = (x ^ (x >> 12)) & 0xFFF000000ULL;

	606 x = x ^ t ^ (t << 12);

	607

	608 // x: 00 00 00 00 00 00 00 00 a e i m b f j n c g k o d h l p

	609 return x;

	610 #else

	611 // If our CPU is little endian, then the above logic will

	612 // produce the following indices:

	613 // x: 00 00 00 00 00 00 00 00 c g i m d h b f l p j n a e k o

	614

	615 t = (x ^ (x >> 6)) & 0xFC0000ULL;

	616 x = x ^ t ^ (t << 6);

	617

	618 // x: 00 00 00 00 00 00 00 00 c g i m d h l p b f j n a e k o

	619

	620 t = (x ^ (x >> 36)) & 0xFC0ULL;

	621 x = x ^ t ^ (t << 36);

	622

	623 // x: 00 00 00 00 00 00 00 00 a e i m d h l p b f j n c g k o

	624

	625 x = (x & (0xFFFULL << 36)) \| ((x & 0xFFFFFFULL) << 12) \| ((x >> 24) & 0xFFFU LL);

	626

	627 // x: 00 00 00 00 00 00 00 00 a e i m b f j n c g k o d h l p

	628

	629 return x;

478 #endif	630 #endif

479	631 }

480 return compress_heterogeneous_r11eac_block(block);	632

481 }	633 // This function converts an integer containing four bytes of alpha

482	634 // values into an integer containing four bytes of indices into R11 EAC.

483 static bool compress_a8_to_r11eac(uint8_t* dst, const uint8_t* src,	635 // Note, there needs to be a mapping of indices:

484 int width, int height, int rowBytes) {	636 // 0 1 2 3 4 5 6 7

	637 // 3 2 1 0 4 5 6 7

	638 //

	639 // To compute this, we first negate each byte, and then add three, which

	640 // gives the mapping

	641 // 3 2 1 0 -1 -2 -3 -4

	642 //

	643 // Then we mask out the negative values, take their absolute value, and

	644 // add three.

	645 //

	646 // Most of the voodoo in this function comes from Hacker's Delight, section 2-18

	647 static inline uint32_t convert_indices(uint32_t x) {

	648 // Take the top three bits...

	649 x = (x & 0xE0E0E0E0) >> 5;

	650

	651 // Negate...

	652 x = ~((0x80808080 - x) ^ 0x7F7F7F7F);

	653

	654 // Add three

	655 const uint32_t s = (x & 0x7F7F7F7F) + 0x03030303;

	656 x = ((x ^ 0x03030303) & 0x80808080) ^ s;

	657

	658 // Absolute value

	659 const uint32_t a = x & 0x80808080;

	660 const uint32_t b = a >> 7;

	661

	662 // Aside: mask negatives (m is three if the byte was negative)

	663 const uint32_t m = (a >> 6) \| b;

	664

	665 // .. continue absolute value

	666 x = (x ^ ((a - b) \| a)) + b;

	667

	668 // Add three

	669 return x + m;

	670 }

	671

	672 // This function follows the same basic procedure as compress_heterogeneous_r11e ac_block

	673 // above when COMPRESS_R11_EAC_FAST is defined, but it avoids a few loads/stores and

	674 // tries to optimize where it can using SIMD.

	675 static uint64_t compress_r11eac_block_fast(const uint8_t* src, int rowBytes) {

	676 // Store each row of alpha values in an integer

	677 const uint32_t alphaRow1 = (reinterpret_cast<const uint32_t>(src));

	678 const uint32_t alphaRow2 = (reinterpret_cast<const uint32_t>(src + rowByte s));

	679 const uint32_t alphaRow3 = (reinterpret_cast<const uint32_t>(src + 2*rowBy tes));

	680 const uint32_t alphaRow4 = (reinterpret_cast<const uint32_t>(src + 3*rowBy tes));

	681

	682 // Check for solid blocks. The explanations for these values

	683 // can be found in the comments of compress_r11eac_block above

	684 if (alphaRow1 == alphaRow2 && alphaRow1 == alphaRow3 && alphaRow1 == alphaRo w4) {

	685 if (0 == alphaRow1) {

	686 // Fully transparent block

	687 return 0x0020000000002000ULL;

	688 } else if (0xFFFFFFFF == alphaRow1) {

	689 // Fully opaque block

	690 return 0xFFFFFFFFFFFFFFFFULL;

	691 }

	692 }

	693

	694 // Convert each integer of alpha values into an integer of indices

	695 const uint32_t indexRow1 = convert_indices(alphaRow1);

	696 const uint32_t indexRow2 = convert_indices(alphaRow2);

	697 const uint32_t indexRow3 = convert_indices(alphaRow3);

	698 const uint32_t indexRow4 = convert_indices(alphaRow4);

	699

	700 // Interleave the indices from the top two rows and bottom two rows

	701 // prior to passing them to interleave6. Since each index is at most

	702 // three bits, then each byte can hold two indices... The way that the

	703 // compression scheme expects the packing allows us to efficiently pack

	704 // the top two rows and bottom two rows. Interleaving each 6-bit sequence

	705 // and tightly packing it into a uint64_t is a little trickier, which is

	706 // taken care of in interleave6.

	707 const uint32_t r1r2 = (indexRow1 << 3) \| indexRow2;

	708 const uint32_t r3r4 = (indexRow3 << 3) \| indexRow4;

	709 const uint64_t indices = interleave6(r1r2, r3r4);

	710

	711 // Return the packed incdices in the least significant bits with the magic h eader

	712 return SkEndian_SwapBE64(0x8490000000000000ULL \| indices);

	713 }

	714

	715 static bool compress_a8_to_r11eac_fast(uint8_t* dst, const uint8_t* src,

	716 int width, int height, int rowBytes) {

	717 // Make sure that our data is well-formed enough to be considered for compre ssion

	718 if (0 == width \|\| 0 == height \|\| (width % 4) != 0 \|\| (height % 4) != 0) {

	719 return false;

	720 }

	721

	722 const int blocksX = width >> 2;

	723 const int blocksY = height >> 2;

	724

	725 uint64_t* encPtr = reinterpret_cast<uint64_t*>(dst);

	726 for (int y = 0; y < blocksY; ++y) {

	727 for (int x = 0; x < blocksX; ++x) {

	728 // Compress it

	729 encPtr = compress_r11eac_block_fast(src + 4x, rowBytes);

	730 ++encPtr;

	731 }

	732 src += 4 * rowBytes;

	733 }

	734 return true;

	735 }

	736 #endif // COMPRESS_R11_EAC_FASTEST

	737

	738 static inline bool compress_a8_to_r11eac(uint8_t* dst, const uint8_t* src,

	739 int width, int height, int rowBytes) {

	740 #if (COMPRESS_R11_EAC_SLOW) \|\| (COMPRESS_R11_EAC_FAST)

485 return compress_4x4_a8_to_64bit(dst, src, width, height, rowBytes, compress_ r11eac_block);	741 return compress_4x4_a8_to_64bit(dst, src, width, height, rowBytes, compress_ r11eac_block);

	742 #elif COMPRESS_R11_EAC_FASTEST

	743 return compress_a8_to_r11eac_fast(dst, src, width, height, rowBytes);

	744 #else

	745 #error "Must choose R11 EAC algorithm"

	746 #endif

486 }	747 }

487	748

488 ////////////////////////////////////////////////////////////////////////////////	749 ////////////////////////////////////////////////////////////////////////////////

489	750

490 namespace SkTextureCompressor {	751 namespace SkTextureCompressor {

491	752

492 static size_t get_compressed_data_size(Format fmt, int width, int height) {	753 static inline size_t get_compressed_data_size(Format fmt, int width, int height) {

493 switch (fmt) {	754 switch (fmt) {

	755 // These formats are 64 bits per 4x4 block.

494 case kR11_EAC_Format:	756 case kR11_EAC_Format:

495 case kLATC_Format:	757 case kLATC_Format:

496 {	758 {

497 // The LATC format is 64 bits per 4x4 block.

498 static const int kLATCEncodedBlockSize = 8;	759 static const int kLATCEncodedBlockSize = 8;

499	760

500 int blocksX = width / kLATCBlockSize;	761 const int blocksX = width / kLATCBlockSize;

501 int blocksY = height / kLATCBlockSize;	762 const int blocksY = height / kLATCBlockSize;

502	763

503 return blocksX * blocksY * kLATCEncodedBlockSize;	764 return blocksX * blocksY * kLATCEncodedBlockSize;

504 }	765 }

505	766

506 default:	767 default:

507 SkFAIL("Unknown compressed format!");	768 SkFAIL("Unknown compressed format!");

508 return 0;	769 return 0;

509 }	770 }

510 }	771 }

511	772

512 typedef bool (CompressBitmapProc)(uint8_t dst, const uint8_t* src,	773 typedef bool (CompressBitmapProc)(uint8_t dst, const uint8_t* src,

513 int width, int height, int rowBytes);	774 int width, int height, int rowBytes);

514	775

515 bool CompressBufferToFormat(uint8_t* dst, const uint8_t* src, SkColorType srcCol orType,	776 bool CompressBufferToFormat(uint8_t* dst, const uint8_t* src, SkColorType srcCol orType,

516 int width, int height, int rowBytes, Format format) {	777 int width, int height, int rowBytes, Format format) {

517	778

518 CompressBitmapProc kProcMap[kFormatCnt][kLastEnum_SkColorType + 1];	779 CompressBitmapProc kProcMap[kFormatCnt][kLastEnum_SkColorType + 1];

519 memset(kProcMap, 0, sizeof(kProcMap));	780 memset(kProcMap, 0, sizeof(kProcMap));

520	781

521 kProcMap[kLATC_Format][kAlpha_8_SkColorType] = compress_a8_to_latc;	782 kProcMap[kLATC_Format][kAlpha_8_SkColorType] = compress_a8_to_latc;

522 kProcMap[kR11_EAC_Format][kAlpha_8_SkColorType] = compress_a8_to_r11eac;	783 kProcMap[kR11_EAC_Format][kAlpha_8_SkColorType] = compress_a8_to_r11eac;

523	784

524 CompressBitmapProc proc = kProcMap[format][srcColorType];	785 CompressBitmapProc proc = kProcMap[format][srcColorType];

525 if (NULL != proc) {	786 if (NULL != proc) {

526 return proc(dst, src, width, height, rowBytes);	787 return proc(dst, src, width, height, rowBytes);

527 }	788 }

528	789

529 return false;	790 return false;

530 }	791 }

531	792

532 SkData *CompressBitmapToFormat(const SkBitmap &bitmap, Format format) {	793 SkData *CompressBitmapToFormat(const SkBitmap &bitmap, Format format) {

533 SkAutoLockPixels alp(bitmap);	794 SkAutoLockPixels alp(bitmap);

534	795

535 int compressedDataSize = get_compressed_data_size(format, bitmap.width(), bi tmap.height());	796 int compressedDataSize = get_compressed_data_size(format, bitmap.width(), bi tmap.height());

536 const uint8_t* src = reinterpret_cast<const uint8_t*>(bitmap.getPixels());	797 const uint8_t* src = reinterpret_cast<const uint8_t*>(bitmap.getPixels());

537 uint8_t* dst = reinterpret_cast<uint8_t*>(sk_malloc_throw(compressedDataSize ));	798 uint8_t* dst = reinterpret_cast<uint8_t*>(sk_malloc_throw(compressedDataSize ));

538 if (CompressBufferToFormat(dst, src, bitmap.colorType(), bitmap.width(), bit map.height(),	799 if (CompressBufferToFormat(dst, src, bitmap.colorType(), bitmap.width(), bit map.height(),

539 bitmap.rowBytes(), format)) {	800 bitmap.rowBytes(), format)) {

540 return SkData::NewFromMalloc(dst, compressedDataSize);	801 return SkData::NewFromMalloc(dst, compressedDataSize);

541 }	802 }

542	803

543 sk_free(dst);	804 sk_free(dst);

544 return NULL;	805 return NULL;

545 }	806 }

546	807

547 } // namespace SkTextureCompressor	808 } // namespace SkTextureCompressor

OLD	NEW

« no previous file with comments | « no previous file | no next file » | no next file with comments »