src/utils/SkTextureCompressor_R11EAC.cpp - Issue 403383003: Refactor texture compressors into separate files

Side by Side Diff: src/utils/SkTextureCompressor_R11EAC.cpp

Issue 403383003: Refactor texture compressors into separate files (Closed) Base URL: https://skia.googlesource.com/skia.git@master

Patch Set: Handle improper dimensions Created 6 years, 5 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View unified diff | Download patch

OLD	NEW
1 /*	1 /*

2 * Copyright 2014 Google Inc.	2 * Copyright 2014 Google Inc.

3 *	3 *

4 * Use of this source code is governed by a BSD-style license that can be	4 * Use of this source code is governed by a BSD-style license that can be

5 * found in the LICENSE file.	5 * found in the LICENSE file.

6 */	6 */

7	7

8 #include "SkTextureCompressor.h"	8 #include "SkTextureCompressor.h"

9	9

10 #include "SkBitmap.h"

11 #include "SkData.h"

12 #include "SkEndian.h"	10 #include "SkEndian.h"

13	11

14 #include "SkTextureCompression_opts.h"

15

16 ////////////////////////////////////////////////////////////////////////////////

17 //

18 // Utility Functions

19 //

20 ////////////////////////////////////////////////////////////////////////////////

21

22 // Absolute difference between two values. More correct than SkTAbs(a - b)

23 // because it works on unsigned values.

24 template <typename T> inline T abs_diff(const T &a, const T &b) {

25 return (a > b) ? (a - b) : (b - a);

26 }

27

28 static bool is_extremal(uint8_t pixel) {

29 return 0 == pixel \|\| 255 == pixel;

30 }

31

32 typedef uint64_t (*A84x4To64BitProc)(const uint8_t block[]);

33

34 // This function is used by both R11 EAC and LATC to compress 4x4 blocks

35 // of 8-bit alpha into 64-bit values that comprise the compressed data.

36 // For both formats, we need to make sure that the dimensions of the

37 // src pixels are divisible by 4, and copy 4x4 blocks one at a time

38 // for compression.

39 static bool compress_4x4_a8_to_64bit(uint8_t* dst, const uint8_t* src,

40 int width, int height, int rowBytes,

41 A84x4To64BitProc proc) {

42 // Make sure that our data is well-formed enough to be considered for compre ssion

43 if (0 == width \|\| 0 == height \|\| (width % 4) != 0 \|\| (height % 4) != 0) {

44 return false;

45 }

46

47 int blocksX = width >> 2;

48 int blocksY = height >> 2;

49

50 uint8_t block[16];

51 uint64_t* encPtr = reinterpret_cast<uint64_t*>(dst);

52 for (int y = 0; y < blocksY; ++y) {

53 for (int x = 0; x < blocksX; ++x) {

54 // Load block

55 for (int k = 0; k < 4; ++k) {

56 memcpy(block + k4, src + krowBytes + 4*x, 4);

57 }

58

59 // Compress it

60 *encPtr = proc(block);

61 ++encPtr;

62 }

63 src += 4 * rowBytes;

64 }

65

66 return true;

67 }

68

69 ////////////////////////////////////////////////////////////////////////////////

70 //

71 // LATC compressor

72 //

73 ////////////////////////////////////////////////////////////////////////////////

74

75 // LATC compressed texels down into square 4x4 blocks

76 static const int kLATCPaletteSize = 8;

77 static const int kLATCBlockSize = 4;

78 static const int kLATCPixelsPerBlock = kLATCBlockSize * kLATCBlockSize;

79

80 // Generates an LATC palette. LATC constructs

81 // a palette of eight colors from LUM0 and LUM1 using the algorithm:

82 //

83 // LUM0, if lum0 > lum1 and code(x,y) == 0

84 // LUM1, if lum0 > lum1 and code(x,y) == 1

85 // (6*LUM0+ LUM1)/7, if lum0 > lum1 and code(x,y) == 2

86 // (5LUM0+2LUM1)/7, if lum0 > lum1 and code(x,y) == 3

87 // (4LUM0+3LUM1)/7, if lum0 > lum1 and code(x,y) == 4

88 // (3LUM0+4LUM1)/7, if lum0 > lum1 and code(x,y) == 5

89 // (2LUM0+5LUM1)/7, if lum0 > lum1 and code(x,y) == 6

90 // ( LUM0+6*LUM1)/7, if lum0 > lum1 and code(x,y) == 7

91 //

92 // LUM0, if lum0 <= lum1 and code(x,y) == 0

93 // LUM1, if lum0 <= lum1 and code(x,y) == 1

94 // (4*LUM0+ LUM1)/5, if lum0 <= lum1 and code(x,y) == 2

95 // (3LUM0+2LUM1)/5, if lum0 <= lum1 and code(x,y) == 3

96 // (2LUM0+3LUM1)/5, if lum0 <= lum1 and code(x,y) == 4

97 // ( LUM0+4*LUM1)/5, if lum0 <= lum1 and code(x,y) == 5

98 // 0, if lum0 <= lum1 and code(x,y) == 6

99 // 255, if lum0 <= lum1 and code(x,y) == 7

100

101 static void generate_latc_palette(uint8_t palette[], uint8_t lum0, uint8_t lum1) {

102 palette[0] = lum0;

103 palette[1] = lum1;

104 if (lum0 > lum1) {

105 for (int i = 1; i < 7; i++) {

106 palette[i+1] = ((7-i)lum0 + ilum1) / 7;

107 }

108 } else {

109 for (int i = 1; i < 5; i++) {

110 palette[i+1] = ((5-i)lum0 + ilum1) / 5;

111 }

112 palette[6] = 0;

113 palette[7] = 255;

114 }

115 }

116

117 // Compress a block by using the bounding box of the pixels. It is assumed that

118 // there are no extremal pixels in this block otherwise we would have used

119 // compressBlockBBIgnoreExtremal.

120 static uint64_t compress_latc_block_bb(const uint8_t pixels[]) {

121 uint8_t minVal = 255;

122 uint8_t maxVal = 0;

123 for (int i = 0; i < kLATCPixelsPerBlock; ++i) {

124 minVal = SkTMin(pixels[i], minVal);

125 maxVal = SkTMax(pixels[i], maxVal);

126 }

127

128 SkASSERT(!is_extremal(minVal));

129 SkASSERT(!is_extremal(maxVal));

130

131 uint8_t palette[kLATCPaletteSize];

132 generate_latc_palette(palette, maxVal, minVal);

133

134 uint64_t indices = 0;

135 for (int i = kLATCPixelsPerBlock - 1; i >= 0; --i) {

136

137 // Find the best palette index

138 uint8_t bestError = abs_diff(pixels[i], palette[0]);

139 uint8_t idx = 0;

140 for (int j = 1; j < kLATCPaletteSize; ++j) {

141 uint8_t error = abs_diff(pixels[i], palette[j]);

142 if (error < bestError) {

143 bestError = error;

144 idx = j;

145 }

146 }

147

148 indices <<= 3;

149 indices \|= idx;

150 }

151

152 return

153 SkEndian_SwapLE64(

154 static_cast<uint64_t>(maxVal) \|

155 (static_cast<uint64_t>(minVal) << 8) \|

156 (indices << 16));

157 }

158

159 // Compress a block by using the bounding box of the pixels without taking into

160 // account the extremal values. The generated palette will contain extremal valu es

161 // and fewer points along the line segment to interpolate.

162 static uint64_t compress_latc_block_bb_ignore_extremal(const uint8_t pixels[]) {

163 uint8_t minVal = 255;

164 uint8_t maxVal = 0;

165 for (int i = 0; i < kLATCPixelsPerBlock; ++i) {

166 if (is_extremal(pixels[i])) {

167 continue;

168 }

169

170 minVal = SkTMin(pixels[i], minVal);

171 maxVal = SkTMax(pixels[i], maxVal);

172 }

173

174 SkASSERT(!is_extremal(minVal));

175 SkASSERT(!is_extremal(maxVal));

176

177 uint8_t palette[kLATCPaletteSize];

178 generate_latc_palette(palette, minVal, maxVal);

179

180 uint64_t indices = 0;

181 for (int i = kLATCPixelsPerBlock - 1; i >= 0; --i) {

182

183 // Find the best palette index

184 uint8_t idx = 0;

185 if (is_extremal(pixels[i])) {

186 if (0xFF == pixels[i]) {

187 idx = 7;

188 } else if (0 == pixels[i]) {

189 idx = 6;

190 } else {

191 SkFAIL("Pixel is extremal but not really?!");

192 }

193 } else {

194 uint8_t bestError = abs_diff(pixels[i], palette[0]);

195 for (int j = 1; j < kLATCPaletteSize - 2; ++j) {

196 uint8_t error = abs_diff(pixels[i], palette[j]);

197 if (error < bestError) {

198 bestError = error;

199 idx = j;

200 }

201 }

202 }

203

204 indices <<= 3;

205 indices \|= idx;

206 }

207

208 return

209 SkEndian_SwapLE64(

210 static_cast<uint64_t>(minVal) \|

211 (static_cast<uint64_t>(maxVal) << 8) \|

212 (indices << 16));

213 }

214

215

216 // Compress LATC block. Each 4x4 block of pixels is decompressed by LATC from tw o

217 // values LUM0 and LUM1, and an index into the generated palette. Details of how

218 // the palette is generated can be found in the comments of generatePalette abov e.

219 //

220 // We choose which palette type to use based on whether or not 'pixels' contains

221 // any extremal values (0 or 255). If there are extremal values, then we use the

222 // palette that has the extremal values built in. Otherwise, we use the full bou nding

223 // box.

224

225 static uint64_t compress_latc_block(const uint8_t pixels[]) {

226 // Collect unique pixels

227 int nUniquePixels = 0;

228 uint8_t uniquePixels[kLATCPixelsPerBlock];

229 for (int i = 0; i < kLATCPixelsPerBlock; ++i) {

230 bool foundPixel = false;

231 for (int j = 0; j < nUniquePixels; ++j) {

232 foundPixel = foundPixel \|\| uniquePixels[j] == pixels[i];

233 }

234

235 if (!foundPixel) {

236 uniquePixels[nUniquePixels] = pixels[i];

237 ++nUniquePixels;

238 }

239 }

240

241 // If there's only one unique pixel, then our compression is easy.

242 if (1 == nUniquePixels) {

243 return SkEndian_SwapLE64(pixels[0] \| (pixels[0] << 8));

244

245 // Similarly, if there are only two unique pixels, then our compression is

246 // easy again: place the pixels in the block header, and assign the indices

247 // with one or zero depending on which pixel they belong to.

248 } else if (2 == nUniquePixels) {

249 uint64_t outBlock = 0;

250 for (int i = kLATCPixelsPerBlock - 1; i >= 0; --i) {

251 int idx = 0;

252 if (pixels[i] == uniquePixels[1]) {

253 idx = 1;

254 }

255

256 outBlock <<= 3;

257 outBlock \|= idx;

258 }

259 outBlock <<= 16;

260 outBlock \|= (uniquePixels[0] \| (uniquePixels[1] << 8));

261 return SkEndian_SwapLE64(outBlock);

262 }

263

264 // Count non-maximal pixel values

265 int nonExtremalPixels = 0;

266 for (int i = 0; i < nUniquePixels; ++i) {

267 if (!is_extremal(uniquePixels[i])) {

268 ++nonExtremalPixels;

269 }

270 }

271

272 // If all the pixels are nonmaximal then compute the palette using

273 // the bounding box of all the pixels.

274 if (nonExtremalPixels == nUniquePixels) {

275 // This is really just for correctness, in all of my tests we

276 // never take this step. We don't lose too much perf here because

277 // most of the processing in this function is worth it for the

278 // 1 == nUniquePixels optimization.

279 return compress_latc_block_bb(pixels);

280 } else {

281 return compress_latc_block_bb_ignore_extremal(pixels);

282 }

283 }

284

285 static inline bool compress_a8_to_latc(uint8_t* dst, const uint8_t* src,

286 int width, int height, int rowBytes) {

287 return compress_4x4_a8_to_64bit(dst, src, width, height, rowBytes, compress_ latc_block);

288 }

289

290 ////////////////////////////////////////////////////////////////////////////////

291 //

292 // R11 EAC Compressor

293 //

294 ////////////////////////////////////////////////////////////////////////////////

295

296 // #define COMPRESS_R11_EAC_SLOW 1	12 // #define COMPRESS_R11_EAC_SLOW 1

297 // #define COMPRESS_R11_EAC_FAST 1	13 // #define COMPRESS_R11_EAC_FAST 1

298 #define COMPRESS_R11_EAC_FASTEST 1	14 #define COMPRESS_R11_EAC_FASTEST 1

299	15

300 // Blocks compressed into R11 EAC are represented as follows:	16 // Blocks compressed into R11 EAC are represented as follows:

301 // 0000000000000000000000000000000000000000000000000000000000000000	17 // 0000000000000000000000000000000000000000000000000000000000000000

302 // \|base_cw\|mod\|mul\| ----------------- indices -------------------	18 // \|base_cw\|mod\|mul\| ----------------- indices -------------------

303 //	19 //

304 // To reconstruct the value of a given pixel, we use the formula:	20 // To reconstruct the value of a given pixel, we use the formula:

305 // clamp[0, 2047](base_cw * 8 + 4 + mod_valmul8)	21 // clamp[0, 2047](base_cw * 8 + 4 + mod_valmul8)

(...skipping 235 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
541 // primarily for compressing alpha maps. Usually the only	257 // primarily for compressing alpha maps. Usually the only

542 // non-fullly opaque or fully transparent blocks are not a solid	258 // non-fullly opaque or fully transparent blocks are not a solid

543 // intermediate color. If we notice that they are, then we can	259 // intermediate color. If we notice that they are, then we can

544 // add another optimization...	260 // add another optimization...

545 break;	261 break;

546 }	262 }

547 }	263 }

548	264

549 return compress_heterogeneous_r11eac_block(block);	265 return compress_heterogeneous_r11eac_block(block);

550 }	266 }

	267

	268 // This function is used by R11 EAC to compress 4x4 blocks

	269 // of 8-bit alpha into 64-bit values that comprise the compressed data.

	270 // We need to make sure that the dimensions of the src pixels are divisible

	271 // by 4, and copy 4x4 blocks one at a time for compression.

	272 typedef uint64_t (*A84x4To64BitProc)(const uint8_t block[]);

	273

	274 static bool compress_4x4_a8_to_64bit(uint8_t* dst, const uint8_t* src,

	275 int width, int height, int rowBytes,

	276 A84x4To64BitProc proc) {

	277 // Make sure that our data is well-formed enough to be considered for compre ssion

	278 if (0 == width \|\| 0 == height \|\| (width % 4) != 0 \|\| (height % 4) != 0) {

	279 return false;

	280 }

	281

	282 int blocksX = width >> 2;

	283 int blocksY = height >> 2;

	284

	285 uint8_t block[16];

	286 uint64_t* encPtr = reinterpret_cast<uint64_t*>(dst);

	287 for (int y = 0; y < blocksY; ++y) {

	288 for (int x = 0; x < blocksX; ++x) {

	289 // Load block

	290 for (int k = 0; k < 4; ++k) {

	291 memcpy(block + k4, src + krowBytes + 4*x, 4);

	292 }

	293

	294 // Compress it

	295 *encPtr = proc(block);

	296 ++encPtr;

	297 }

	298 src += 4 * rowBytes;

	299 }

	300

	301 return true;

	302 }

551 #endif // (COMPRESS_R11_EAC_SLOW) \|\| (COMPRESS_R11_EAC_FAST)	303 #endif // (COMPRESS_R11_EAC_SLOW) \|\| (COMPRESS_R11_EAC_FAST)

552	304

553 #if COMPRESS_R11_EAC_FASTEST	305 #if COMPRESS_R11_EAC_FASTEST

	306 template<unsigned shift>

	307 static inline uint64_t swap_shift(uint64_t x, uint64_t mask) {

	308 const uint64_t t = (x ^ (x >> shift)) & mask;

	309 return x ^ t ^ (t << shift);

	310 }

	311

554 static inline uint64_t interleave6(uint64_t topRows, uint64_t bottomRows) {	312 static inline uint64_t interleave6(uint64_t topRows, uint64_t bottomRows) {

555 // If our 3-bit block indices are laid out as:	313 // If our 3-bit block indices are laid out as:

556 // a b c d	314 // a b c d

557 // e f g h	315 // e f g h

558 // i j k l	316 // i j k l

559 // m n o p	317 // m n o p

560 //	318 //

561 // This function expects topRows and bottomRows to contain the first two row s	319 // This function expects topRows and bottomRows to contain the first two row s

562 // of indices interleaved in the least significant bits of a and b. In other words...	320 // of indices interleaved in the least significant bits of a and b. In other words...

563 //	321 //

(...skipping 13 matching lines...) Expand all Loading...
577 //	335 //

578 // !SPEED! this function might be even faster if certain SIMD intrinsics are	336 // !SPEED! this function might be even faster if certain SIMD intrinsics are

579 // used..	337 // used..

580	338

581 // For both architectures, we can figure out a packing of the bits by	339 // For both architectures, we can figure out a packing of the bits by

582 // using a shuffle and a few shift-rotates...	340 // using a shuffle and a few shift-rotates...

583 uint64_t x = (static_cast<uint64_t>(topRows) << 32) \| static_cast<uint64_t>( bottomRows);	341 uint64_t x = (static_cast<uint64_t>(topRows) << 32) \| static_cast<uint64_t>( bottomRows);

584	342

585 // x: 00 a e 00 b f 00 c g 00 d h 00 i m 00 j n 00 k o 00 l p	343 // x: 00 a e 00 b f 00 c g 00 d h 00 i m 00 j n 00 k o 00 l p

586	344

587 uint64_t t = (x ^ (x >> 10)) & 0x3FC0003FC00000ULL;	345 x = swap_shift<10>(x, 0x3FC0003FC00000ULL);

588 x = x ^ t ^ (t << 10);

589	346

590 // x: b f 00 00 00 a e c g i m 00 00 00 d h j n 00 k o 00 l p	347 // x: b f 00 00 00 a e c g i m 00 00 00 d h j n 00 k o 00 l p

591	348

592 x = (x \| ((x << 52) & (0x3FULL << 52)) \| ((x << 20) & (0x3FULL << 28))) >> 1 6;	349 x = (x \| ((x << 52) & (0x3FULL << 52)) \| ((x << 20) & (0x3FULL << 28))) >> 1 6;

593	350

594 // x: 00 00 00 00 00 00 00 00 b f l p a e c g i m k o d h j n	351 // x: 00 00 00 00 00 00 00 00 b f l p a e c g i m k o d h j n

595	352

596 t = (x ^ (x >> 6)) & 0xFC0000ULL;	353 x = swap_shift<6>(x, 0xFC0000ULL);

597 x = x ^ t ^ (t << 6);

598	354

599 #if defined (SK_CPU_BENDIAN)	355 #if defined (SK_CPU_BENDIAN)

600 // x: 00 00 00 00 00 00 00 00 b f l p a e i m c g k o d h j n	356 // x: 00 00 00 00 00 00 00 00 b f l p a e i m c g k o d h j n

601	357

602 t = (x ^ (x >> 36)) & 0x3FULL;	358 x = swap_shift<36>(x, 0x3FULL);

603 x = x ^ t ^ (t << 36);

604	359

605 // x: 00 00 00 00 00 00 00 00 b f j n a e i m c g k o d h l p	360 // x: 00 00 00 00 00 00 00 00 b f j n a e i m c g k o d h l p

606	361

607 t = (x ^ (x >> 12)) & 0xFFF000000ULL;	362 x = swap_shift<12>(x, 0xFFF000000ULL);

608 x = x ^ t ^ (t << 12);

609

610 // x: 00 00 00 00 00 00 00 00 a e i m b f j n c g k o d h l p

611 return x;

612 #else	363 #else

613 // If our CPU is little endian, then the above logic will	364 // If our CPU is little endian, then the above logic will

614 // produce the following indices:	365 // produce the following indices:

615 // x: 00 00 00 00 00 00 00 00 c g i m d h l p b f j n a e k o	366 // x: 00 00 00 00 00 00 00 00 c g i m d h l p b f j n a e k o

616	367

617 t = (x ^ (x >> 36)) & 0xFC0ULL;	368 x = swap_shift<36>(x, 0xFC0ULL);

618 x = x ^ t ^ (t << 36);

619	369

620 // x: 00 00 00 00 00 00 00 00 a e i m d h l p b f j n c g k o	370 // x: 00 00 00 00 00 00 00 00 a e i m d h l p b f j n c g k o

621	371

622 x = (x & (0xFFFULL << 36)) \| ((x & 0xFFFFFFULL) << 12) \| ((x >> 24) & 0xFFFU LL);	372 x = (x & (0xFFFULL << 36)) \| ((x & 0xFFFFFFULL) << 12) \| ((x >> 24) & 0xFFFU LL);

	373 #endif

623	374

624 // x: 00 00 00 00 00 00 00 00 a e i m b f j n c g k o d h l p	375 // x: 00 00 00 00 00 00 00 00 a e i m b f j n c g k o d h l p

625

626 return x;	376 return x;

627 #endif

628 }	377 }

629	378

630 // This function converts an integer containing four bytes of alpha	379 // This function converts an integer containing four bytes of alpha

631 // values into an integer containing four bytes of indices into R11 EAC.	380 // values into an integer containing four bytes of indices into R11 EAC.

632 // Note, there needs to be a mapping of indices:	381 // Note, there needs to be a mapping of indices:

633 // 0 1 2 3 4 5 6 7	382 // 0 1 2 3 4 5 6 7

634 // 3 2 1 0 4 5 6 7	383 // 3 2 1 0 4 5 6 7

635 //	384 //

636 // To compute this, we first negate each byte, and then add three, which	385 // To compute this, we first negate each byte, and then add three, which

637 // gives the mapping	386 // gives the mapping

(...skipping 87 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
725 // Compress it	474 // Compress it

726 encPtr = compress_r11eac_block_fast(src + 4x, rowBytes);	475 encPtr = compress_r11eac_block_fast(src + 4x, rowBytes);

727 ++encPtr;	476 ++encPtr;

728 }	477 }

729 src += 4 * rowBytes;	478 src += 4 * rowBytes;

730 }	479 }

731 return true;	480 return true;

732 }	481 }

733 #endif // COMPRESS_R11_EAC_FASTEST	482 #endif // COMPRESS_R11_EAC_FASTEST

734	483

	484 ////////////////////////////////////////////////////////////////////////////////

	485 //

	486 // Utility functions used by the blitter

	487 //

	488 ////////////////////////////////////////////////////////////////////////////////

	489

735 // The R11 EAC format expects that indices are given in column-major order. Sinc e	490 // The R11 EAC format expects that indices are given in column-major order. Sinc e

736 // we receive alpha values in raster order, this usually means that we have to u se	491 // we receive alpha values in raster order, this usually means that we have to u se

737 // pack6 above to properly pack our indices. However, if our indices come from t he	492 // pack6 above to properly pack our indices. However, if our indices come from t he

738 // blitter, then each integer will be a column of indices, and hence can be effi ciently	493 // blitter, then each integer will be a column of indices, and hence can be effi ciently

739 // packed. This function takes the bottom three bits of each byte and places the m in	494 // packed. This function takes the bottom three bits of each byte and places the m in

740 // the least significant 12 bits of the resulting integer.	495 // the least significant 12 bits of the resulting integer.

741 static inline uint32_t pack_indices_vertical(uint32_t x) {	496 static inline uint32_t pack_indices_vertical(uint32_t x) {

742 #if defined (SK_CPU_BENDIAN)	497 #if defined (SK_CPU_BENDIAN)

743 return	498 return

744 (x & 7) \|	499 (x & 7) \|

(...skipping 43 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
788 const uint32_t packedIndexColumn3 = pack_indices_vertical(indexColumn3);	543 const uint32_t packedIndexColumn3 = pack_indices_vertical(indexColumn3);

789	544

790 return SkEndian_SwapBE64(0x8490000000000000ULL \|	545 return SkEndian_SwapBE64(0x8490000000000000ULL \|

791 (static_cast<uint64_t>(packedIndexColumn0) << 36) \|	546 (static_cast<uint64_t>(packedIndexColumn0) << 36) \|

792 (static_cast<uint64_t>(packedIndexColumn1) << 24) \|	547 (static_cast<uint64_t>(packedIndexColumn1) << 24) \|

793 static_cast<uint64_t>(packedIndexColumn2 << 12) \|	548 static_cast<uint64_t>(packedIndexColumn2 << 12) \|

794 static_cast<uint64_t>(packedIndexColumn3));	549 static_cast<uint64_t>(packedIndexColumn3));

795	550

796 }	551 }

797	552

798 static inline bool compress_a8_to_r11eac(uint8_t* dst, const uint8_t* src,

799 int width, int height, int rowBytes) {

800 #if (COMPRESS_R11_EAC_SLOW) \|\| (COMPRESS_R11_EAC_FAST)

801 return compress_4x4_a8_to_64bit(dst, src, width, height, rowBytes, compress_ r11eac_block);

802 #elif COMPRESS_R11_EAC_FASTEST

803 return compress_a8_to_r11eac_fast(dst, src, width, height, rowBytes);

804 #else

805 #error "Must choose R11 EAC algorithm"

806 #endif

807 }

808

809 // Updates the block whose columns are stored in blockColN. curAlphai is expecte d	553 // Updates the block whose columns are stored in blockColN. curAlphai is expecte d

810 // to store, as an integer, the four alpha values that will be placed within eac h	554 // to store, as an integer, the four alpha values that will be placed within eac h

811 // of the columns in the range [col, col+colsLeft).	555 // of the columns in the range [col, col+colsLeft).

812 static inline void update_block_columns(	556 static inline void update_block_columns(uint32_t* block, const int col,

813 uint32_t* blockCol1, uint32_t* blockCol2, uint32_t* blockCol3, uint32_t* blo ckCol4,	557 const int colsLeft, const uint32_t curAl phai) {

814 const int col, const int colsLeft, const uint32_t curAlphai) {	558 SkASSERT(NULL != block);

815 SkASSERT(NULL != blockCol1);

816 SkASSERT(NULL != blockCol2);

817 SkASSERT(NULL != blockCol3);

818 SkASSERT(NULL != blockCol4);

819 SkASSERT(col + colsLeft <= 4);	559 SkASSERT(col + colsLeft <= 4);

	560

820 for (int i = col; i < (col + colsLeft); ++i) {	561 for (int i = col; i < (col + colsLeft); ++i) {

821 switch(i) {	562 block[i] = curAlphai;

822 case 0:

823 *blockCol1 = curAlphai;

824 break;

825 case 1:

826 *blockCol2 = curAlphai;

827 break;

828 case 2:

829 *blockCol3 = curAlphai;

830 break;

831 case 3:

832 *blockCol4 = curAlphai;

833 break;

834 }

835 }	563 }

836 }	564 }

837	565

838 ////////////////////////////////////////////////////////////////////////////////	566 ////////////////////////////////////////////////////////////////////////////////

839	567

840 namespace SkTextureCompressor {	568 namespace SkTextureCompressor {

841	569

842 static inline size_t get_compressed_data_size(Format fmt, int width, int height) {	570 bool CompressA8ToR11EAC(uint8_t* dst, const uint8_t* src, int width, int height, int rowBytes) {

843 switch (fmt) {

844 // These formats are 64 bits per 4x4 block.

845 case kR11_EAC_Format:

846 case kLATC_Format:

847 {

848 static const int kLATCEncodedBlockSize = 8;

849	571

850 const int blocksX = width / kLATCBlockSize;	572 #if (COMPRESS_R11_EAC_SLOW) \|\| (COMPRESS_R11_EAC_FAST)

851 const int blocksY = height / kLATCBlockSize;

852	573

853 return blocksX * blocksY * kLATCEncodedBlockSize;	574 return compress_4x4_a8_to_64bit(dst, src, width, height, rowBytes, compress_ r11eac_block);

854 }

855	575

856 default:	576 #elif COMPRESS_R11_EAC_FASTEST

857 SkFAIL("Unknown compressed format!");	577

858 return 0;	578 return compress_a8_to_r11eac_fast(dst, src, width, height, rowBytes);

859 }	579

	580 #else

	581 #error "Must choose R11 EAC algorithm"

	582 #endif

860 }	583 }

861	584

862 bool CompressBufferToFormat(uint8_t* dst, const uint8_t* src, SkColorType srcCol orType,	585 // This class implements a blitter that blits directly into a buffer that will

863 int width, int height, int rowBytes, Format format, bool opt) {	586 // be used as an R11 EAC compressed texture. We compute this buffer by

864 CompressionProc proc = NULL;	587 // buffering four scan lines and then outputting them all at once. This blitter

865 if (opt) {	588 // is only expected to be used with alpha masks, i.e. kAlpha8_SkColorType.

866 proc = SkTextureCompressorGetPlatformProc(srcColorType, format);	589 class R11_EACBlitter : public SkBlitter {

	590 public:

	591 R11_EACBlitter(int width, int height, void *compressedBuffer);

	592 virtual ~R11_EACBlitter() { this->flushRuns(); }

	593

	594 // Blit a horizontal run of one or more pixels.

	595 virtual void blitH(int x, int y, int width) SK_OVERRIDE {

	596 // This function is intended to be called from any standard RGB

	597 // buffer, so we should never encounter it. However, if some code

	598 // path does end up here, then this needs to be investigated.

	599 SkFAIL("Not implemented!");

	600 }

	601

	602 // Blit a horizontal run of antialiased pixels; runs[] is a sparse

	603 // zero-terminated run-length encoding of spans of constant alpha values.

	604 virtual void blitAntiH(int x, int y,

	605 const SkAlpha antialias[],

	606 const int16_t runs[]) SK_OVERRIDE;

	607

	608 // Blit a vertical run of pixels with a constant alpha value.

	609 virtual void blitV(int x, int y, int height, SkAlpha alpha) SK_OVERRIDE {

	610 // This function is currently not implemented. It is not explicitly

	611 // required by the contract, but if at some time a code path runs into

	612 // this function (which is entirely possible), it needs to be implemente d.

	613 //

	614 // TODO (krajcevski):

	615 // This function will be most easily implemented in one of two ways:

	616 // 1. Buffer each vertical column value and then construct a list

	617 // of alpha values and output all of the blocks at once. This only

	618 // requires a write to the compressed buffer

	619 // 2. Replace the indices of each block with the proper indices based

	620 // on the alpha value. This requires a read and write of the compress ed

	621 // buffer, but much less overhead.

	622 SkFAIL("Not implemented!");

867 }	623 }

868	624

869 if (NULL == proc) {	625 // Blit a solid rectangle one or more pixels wide.

870 switch (srcColorType) {	626 virtual void blitRect(int x, int y, int width, int height) SK_OVERRIDE {

871 case kAlpha_8_SkColorType:	627 // Analogous to blitRow, this function is intended for RGB targets

872 {	628 // and should never be called by this blitter. Any calls to this functio n

873 switch (format) {	629 // are probably a bug and should be investigated.

874 case kLATC_Format:	630 SkFAIL("Not implemented!");

875 proc = compress_a8_to_latc;

876 break;

877 case kR11_EAC_Format:

878 proc = compress_a8_to_r11eac;

879 break;

880 default:

881 // Do nothing...

882 break;

883 }

884 }

885 break;

886

887 default:

888 // Do nothing...

889 break;

890 }

891 }	631 }

892	632

893 if (NULL != proc) {	633 // Blit a rectangle with one alpha-blended column on the left,

894 return proc(dst, src, width, height, rowBytes);	634 // width (zero or more) opaque pixels, and one alpha-blended column

	635 // on the right. The result will always be at least two pixels wide.

	636 virtual void blitAntiRect(int x, int y, int width, int height,

	637 SkAlpha leftAlpha, SkAlpha rightAlpha) SK_OVERRIDE {

	638 // This function is currently not implemented. It is not explicitly

	639 // required by the contract, but if at some time a code path runs into

	640 // this function (which is entirely possible), it needs to be implemente d.

	641 //

	642 // TODO (krajcevski):

	643 // This function will be most easily implemented as follows:

	644 // 1. If width/height are smaller than a block, then update the

	645 // indices of the affected blocks.

	646 // 2. If width/height are larger than a block, then construct a 9-patch

	647 // of block encodings that represent the rectangle, and write them

	648 // to the compressed buffer as necessary. Whether or not the blocks

	649 // are overwritten by zeros or just their indices are updated is up

	650 // to debate.

	651 SkFAIL("Not implemented!");

895 }	652 }

896	653

897 return false;	654 // Blit a pattern of pixels defined by a rectangle-clipped mask;

898 }	655 // typically used for text.

899	656 virtual void blitMask(const SkMask&, const SkIRect& clip) SK_OVERRIDE {

900 SkData *CompressBitmapToFormat(const SkBitmap &bitmap, Format format) {	657 // This function is currently not implemented. It is not explicitly

901 SkAutoLockPixels alp(bitmap);	658 // required by the contract, but if at some time a code path runs into

902	659 // this function (which is entirely possible), it needs to be implemente d.

903 int compressedDataSize = get_compressed_data_size(format, bitmap.width(), bi tmap.height());	660 //

904 const uint8_t* src = reinterpret_cast<const uint8_t*>(bitmap.getPixels());	661 // TODO (krajcevski):

905 uint8_t* dst = reinterpret_cast<uint8_t*>(sk_malloc_throw(compressedDataSize ));	662 // This function will be most easily implemented in the same way as

906 if (CompressBufferToFormat(dst, src, bitmap.colorType(), bitmap.width(), bit map.height(),	663 // blitAntiRect above.

907 bitmap.rowBytes(), format)) {	664 SkFAIL("Not implemented!");

908 return SkData::NewFromMalloc(dst, compressedDataSize);

909 }	665 }

910	666

911 sk_free(dst);	667 // If the blitter just sets a single value for each pixel, return the

912 return NULL;	668 // bitmap it draws into, and assign value. If not, return NULL and ignore

913 }	669 // the value parameter.

	670 virtual const SkBitmap* justAnOpaqueColor(uint32_t* value) SK_OVERRIDE {

	671 return NULL;

	672 }

	673

	674 /**

	675 * Compressed texture blitters only really work correctly if they get

	676 * four blocks at a time. That being said, this blitter tries it's best

	677 * to preserve semantics if blitAntiH doesn't get called in too many

	678 * weird ways...

	679 */

	680 virtual int requestRowsPreserved() const { return kR11_EACBlockSz; }

	681

	682 protected:

	683 virtual void onNotifyFinished() { this->flushRuns(); }

	684

	685 private:

	686 static const int kR11_EACBlockSz = 4;

	687 static const int kPixelsPerBlock = kR11_EACBlockSz * kR11_EACBlockSz;

	688

	689 // The longest possible run of pixels that this blitter will receive.

	690 // This is initialized in the constructor to 0x7FFE, which is one less

	691 // than the largest positive 16-bit integer. We make sure that it's one

	692 // less for debugging purposes. We also don't make this variable static

	693 // in order to make sure that we can construct a valid pointer to it.

	694 const int16_t kLongestRun;

	695

	696 // Usually used in conjunction with kLongestRun. This is initialized to

	697 // zero.

	698 const SkAlpha kZeroAlpha;

	699

	700 // This is the information that we buffer whenever we're asked to blit

	701 // a row with this blitter.

	702 struct BufferedRun {

	703 const SkAlpha* fAlphas;

	704 const int16_t* fRuns;

	705 int fX, fY;

	706 } fBufferedRuns[kR11_EACBlockSz];

	707

	708 // The next row (0-3) that we need to blit. This value should never exceed

	709 // the number of rows that we have (kR11_EACBlockSz)

	710 int fNextRun;

	711

	712 // The width and height of the image that we're blitting

	713 const int fWidth;

	714 const int fHeight;

	715

	716 // The R11 EAC buffer that we're blitting into. It is assumed that the buffe r

	717 // is large enough to store a compressed image of size fWidth*fHeight.

	718 uint64_t* const fBuffer;

	719

	720 // Various utility functions

	721 int blocksWide() const { return fWidth / kR11_EACBlockSz; }

	722 int blocksTall() const { return fHeight / kR11_EACBlockSz; }

	723 int totalBlocks() const { return (fWidth * fHeight) / kPixelsPerBlock; }

	724

	725 // Returns the block index for the block containing pixel (x, y). Block

	726 // indices start at zero and proceed in raster order.

	727 int getBlockOffset(int x, int y) const {

	728 SkASSERT(x < fWidth);

	729 SkASSERT(y < fHeight);

	730 const int blockCol = x / kR11_EACBlockSz;

	731 const int blockRow = y / kR11_EACBlockSz;

	732 return blockRow * this->blocksWide() + blockCol;

	733 }

	734

	735 // Returns a pointer to the block containing pixel (x, y)

	736 uint64_t *getBlock(int x, int y) const {

	737 return fBuffer + this->getBlockOffset(x, y);

	738 }

	739

	740 // The following function writes the buffered runs to compressed blocks.

	741 // If fNextRun < 4, then we fill the runs that we haven't buffered with

	742 // the constant zero buffer.

	743 void flushRuns();

	744 };

	745

914	746

915 R11_EACBlitter::R11_EACBlitter(int width, int height, void *latcBuffer)	747 R11_EACBlitter::R11_EACBlitter(int width, int height, void *latcBuffer)

916 // 0x7FFE is one minus the largest positive 16-bit int. We use it for	748 // 0x7FFE is one minus the largest positive 16-bit int. We use it for

917 // debugging to make sure that we're properly setting the nextX distance	749 // debugging to make sure that we're properly setting the nextX distance

918 // in flushRuns().	750 // in flushRuns().

919 : kLongestRun(0x7FFE), kZeroAlpha(0)	751 : kLongestRun(0x7FFE), kZeroAlpha(0)

920 , fNextRun(0)	752 , fNextRun(0)

921 , fWidth(width)	753 , fWidth(width)

922 , fHeight(height)	754 , fHeight(height)

923 , fBuffer(reinterpret_cast<uint64_t*const>(latcBuffer))	755 , fBuffer(reinterpret_cast<uint64_t*const>(latcBuffer))

(...skipping 96 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
1020 // nextX -- for each run, the next point at which we need to update curAlpha Column	852 // nextX -- for each run, the next point at which we need to update curAlpha Column

1021 // after the value of curX.	853 // after the value of curX.

1022 // finalX -- the minimum of all the nextX values.	854 // finalX -- the minimum of all the nextX values.

1023 //	855 //

1024 // curX advances to finalX outputting any blocks that it passes along	856 // curX advances to finalX outputting any blocks that it passes along

1025 // the way. Since finalX will not change when we reach the end of a	857 // the way. Since finalX will not change when we reach the end of a

1026 // run, the termination criteria will be whenever curX == finalX at the	858 // run, the termination criteria will be whenever curX == finalX at the

1027 // end of a loop.	859 // end of a loop.

1028	860

1029 // Setup:	861 // Setup:

1030 uint32_t c1 = 0;	862 uint32_t c[4] = { 0, 0, 0, 0 };

1031 uint32_t c2 = 0;

1032 uint32_t c3 = 0;

1033 uint32_t c4 = 0;

1034

1035 uint32_t curAlphaColumn = 0;	863 uint32_t curAlphaColumn = 0;

1036 SkAlpha curAlpha = reinterpret_cast<SkAlpha>(&curAlphaColumn);	864 SkAlpha curAlpha = reinterpret_cast<SkAlpha>(&curAlphaColumn);

1037	865

1038 int nextX[kR11_EACBlockSz];	866 int nextX[kR11_EACBlockSz];

1039 for (int i = 0; i < kR11_EACBlockSz; ++i) {	867 for (int i = 0; i < kR11_EACBlockSz; ++i) {

1040 nextX[i] = 0x7FFFFF;	868 nextX[i] = 0x7FFFFF;

1041 }	869 }

1042	870

1043 uint64_t* outPtr = this->getBlock(fBufferedRuns[0].fX, fBufferedRuns[0].fY);	871 uint64_t* outPtr = this->getBlock(fBufferedRuns[0].fX, fBufferedRuns[0].fY);

1044	872

(...skipping 14 matching lines...) Expand all Loading...
1059 // Run the blitter...	887 // Run the blitter...

1060 while (curX != finalX) {	888 while (curX != finalX) {

1061 SkASSERT(finalX >= curX);	889 SkASSERT(finalX >= curX);

1062	890

1063 // Do we need to populate the rest of the block?	891 // Do we need to populate the rest of the block?

1064 if ((finalX - (curX & ~3)) >= kR11_EACBlockSz) {	892 if ((finalX - (curX & ~3)) >= kR11_EACBlockSz) {

1065 const int col = curX & 3;	893 const int col = curX & 3;

1066 const int colsLeft = 4 - col;	894 const int colsLeft = 4 - col;

1067 SkASSERT(curX + colsLeft <= finalX);	895 SkASSERT(curX + colsLeft <= finalX);

1068	896

1069 update_block_columns(&c1, &c2, &c3, &c4, col, colsLeft, curAlphaColu mn);	897 update_block_columns(c, col, colsLeft, curAlphaColumn);

1070	898

1071 // Write this block	899 // Write this block

1072 *outPtr = compress_block_vertical(c1, c2, c3, c4);	900 *outPtr = compress_block_vertical(c[0], c[1], c[2], c[3]);

1073 ++outPtr;	901 ++outPtr;

1074 curX += colsLeft;	902 curX += colsLeft;

1075 }	903 }

1076	904

1077 // If we can advance even further, then just keep memsetting the block	905 // If we can advance even further, then just keep memsetting the block

1078 if ((finalX - curX) >= kR11_EACBlockSz) {	906 if ((finalX - curX) >= kR11_EACBlockSz) {

1079 SkASSERT((curX & 3) == 0);	907 SkASSERT((curX & 3) == 0);

1080	908

1081 const int col = 0;	909 const int col = 0;

1082 const int colsLeft = kR11_EACBlockSz;	910 const int colsLeft = kR11_EACBlockSz;

1083	911

1084 update_block_columns(&c1, &c2, &c3, &c4, col, colsLeft, curAlphaColu mn);	912 update_block_columns(c, col, colsLeft, curAlphaColumn);

1085	913

1086 // While we can keep advancing, just keep writing the block.	914 // While we can keep advancing, just keep writing the block.

1087 uint64_t lastBlock = compress_block_vertical(c1, c2, c3, c4);	915 uint64_t lastBlock = compress_block_vertical(c[0], c[1], c[2], c[3]) ;

1088 while((finalX - curX) >= kR11_EACBlockSz) {	916 while((finalX - curX) >= kR11_EACBlockSz) {

1089 *outPtr = lastBlock;	917 *outPtr = lastBlock;

1090 ++outPtr;	918 ++outPtr;

1091 curX += kR11_EACBlockSz;	919 curX += kR11_EACBlockSz;

1092 }	920 }

1093 }	921 }

1094	922

1095 // If we haven't advanced within the block then do so.	923 // If we haven't advanced within the block then do so.

1096 if (curX < finalX) {	924 if (curX < finalX) {

1097 const int col = curX & 3;	925 const int col = curX & 3;

1098 const int colsLeft = finalX - curX;	926 const int colsLeft = finalX - curX;

1099	927

1100 update_block_columns(&c1, &c2, &c3, &c4, col, colsLeft, curAlphaColu mn);	928 update_block_columns(c, col, colsLeft, curAlphaColumn);

1101	929

1102 curX += colsLeft;	930 curX += colsLeft;

1103 }	931 }

1104	932

1105 SkASSERT(curX == finalX);	933 SkASSERT(curX == finalX);

1106	934

1107 // Figure out what the next advancement is...	935 // Figure out what the next advancement is...

1108 for (int i = 0; i < kR11_EACBlockSz; ++i) {	936 for (int i = 0; i < kR11_EACBlockSz; ++i) {

1109 if (nextX[i] == finalX) {	937 if (nextX[i] == finalX) {

1110 const int16_t run = *(fBufferedRuns[i].fRuns);	938 const int16_t run = *(fBufferedRuns[i].fRuns);

1111 fBufferedRuns[i].fRuns += run;	939 fBufferedRuns[i].fRuns += run;

1112 fBufferedRuns[i].fAlphas += run;	940 fBufferedRuns[i].fAlphas += run;

1113 curAlpha[i] = *(fBufferedRuns[i].fAlphas);	941 curAlpha[i] = *(fBufferedRuns[i].fAlphas);

1114 nextX[i] += *(fBufferedRuns[i].fRuns);	942 nextX[i] += *(fBufferedRuns[i].fRuns);

1115 }	943 }

1116 }	944 }

1117	945

1118 finalX = 0xFFFFF;	946 finalX = 0xFFFFF;

1119 for (int i = 0; i < kR11_EACBlockSz; ++i) {	947 for (int i = 0; i < kR11_EACBlockSz; ++i) {

1120 finalX = SkMin32(nextX[i], finalX);	948 finalX = SkMin32(nextX[i], finalX);

1121 }	949 }

1122 }	950 }

1123	951

1124 // If we didn't land on a block boundary, output the block...	952 // If we didn't land on a block boundary, output the block...

1125 if ((curX & 3) > 1) {	953 if ((curX & 3) > 1) {

1126 *outPtr = compress_block_vertical(c1, c2, c3, c4);	954 *outPtr = compress_block_vertical(c[0], c[1], c[2], c[3]);

1127 }	955 }

1128	956

1129 fNextRun = 0;	957 fNextRun = 0;

1130 }	958 }

1131	959

	960 SkBlitter* CreateR11EACBlitter(int width, int height, void* outputBuffer) {

	961 return new R11_EACBlitter(width, height, outputBuffer);

	962 }

	963

1132 } // namespace SkTextureCompressor	964 } // namespace SkTextureCompressor

OLD	NEW

« no previous file with comments | « src/utils/SkTextureCompressor_R11EAC.h ('k') | no next file » | no next file with comments »