src/utils/SkTextureCompressor_R11EAC.cpp - Issue 403383003: Refactor texture compressors into separate files

Side by Side Diff: src/utils/SkTextureCompressor_R11EAC.cpp

Issue 403383003: Refactor texture compressors into separate files (Closed) Base URL: https://skia.googlesource.com/skia.git@master

Patch Set: Created 6 years, 5 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View unified diff | Download patch

OLD	NEW
1 /*	1 /*

2 * Copyright 2014 Google Inc.	2 * Copyright 2014 Google Inc.

3 *	3 *

4 * Use of this source code is governed by a BSD-style license that can be	4 * Use of this source code is governed by a BSD-style license that can be

5 * found in the LICENSE file.	5 * found in the LICENSE file.

6 */	6 */

7	7

8 #include "SkTextureCompressor.h"	8 #include "SkTextureCompressor.h"

9	9

10 #include "SkBitmap.h"

11 #include "SkData.h"

12 #include "SkEndian.h"	10 #include "SkEndian.h"

13	11

14 #include "SkTextureCompression_opts.h"

15

16 ////////////////////////////////////////////////////////////////////////////////

17 //

18 // Utility Functions

19 //

20 ////////////////////////////////////////////////////////////////////////////////

21

22 // Absolute difference between two values. More correct than SkTAbs(a - b)

23 // because it works on unsigned values.

24 template <typename T> inline T abs_diff(const T &a, const T &b) {

25 return (a > b) ? (a - b) : (b - a);

26 }

27

28 static bool is_extremal(uint8_t pixel) {

29 return 0 == pixel \|\| 255 == pixel;

30 }

31

32 typedef uint64_t (*A84x4To64BitProc)(const uint8_t block[]);

33

34 // This function is used by both R11 EAC and LATC to compress 4x4 blocks

35 // of 8-bit alpha into 64-bit values that comprise the compressed data.

36 // For both formats, we need to make sure that the dimensions of the

37 // src pixels are divisible by 4, and copy 4x4 blocks one at a time

38 // for compression.

39 static bool compress_4x4_a8_to_64bit(uint8_t* dst, const uint8_t* src,

40 int width, int height, int rowBytes,

41 A84x4To64BitProc proc) {

42 // Make sure that our data is well-formed enough to be considered for compre ssion

43 if (0 == width \|\| 0 == height \|\| (width % 4) != 0 \|\| (height % 4) != 0) {

44 return false;

45 }

46

47 int blocksX = width >> 2;

48 int blocksY = height >> 2;

49

50 uint8_t block[16];

51 uint64_t* encPtr = reinterpret_cast<uint64_t*>(dst);

52 for (int y = 0; y < blocksY; ++y) {

53 for (int x = 0; x < blocksX; ++x) {

54 // Load block

55 for (int k = 0; k < 4; ++k) {

56 memcpy(block + k4, src + krowBytes + 4*x, 4);

57 }

58

59 // Compress it

60 *encPtr = proc(block);

61 ++encPtr;

62 }

63 src += 4 * rowBytes;

64 }

65

66 return true;

67 }

68

69 ////////////////////////////////////////////////////////////////////////////////

70 //

71 // LATC compressor

72 //

73 ////////////////////////////////////////////////////////////////////////////////

74

75 // LATC compressed texels down into square 4x4 blocks

76 static const int kLATCPaletteSize = 8;

77 static const int kLATCBlockSize = 4;

78 static const int kLATCPixelsPerBlock = kLATCBlockSize * kLATCBlockSize;

79

80 // Generates an LATC palette. LATC constructs

81 // a palette of eight colors from LUM0 and LUM1 using the algorithm:

82 //

83 // LUM0, if lum0 > lum1 and code(x,y) == 0

84 // LUM1, if lum0 > lum1 and code(x,y) == 1

85 // (6*LUM0+ LUM1)/7, if lum0 > lum1 and code(x,y) == 2

86 // (5LUM0+2LUM1)/7, if lum0 > lum1 and code(x,y) == 3

87 // (4LUM0+3LUM1)/7, if lum0 > lum1 and code(x,y) == 4

88 // (3LUM0+4LUM1)/7, if lum0 > lum1 and code(x,y) == 5

89 // (2LUM0+5LUM1)/7, if lum0 > lum1 and code(x,y) == 6

90 // ( LUM0+6*LUM1)/7, if lum0 > lum1 and code(x,y) == 7

91 //

92 // LUM0, if lum0 <= lum1 and code(x,y) == 0

93 // LUM1, if lum0 <= lum1 and code(x,y) == 1

94 // (4*LUM0+ LUM1)/5, if lum0 <= lum1 and code(x,y) == 2

95 // (3LUM0+2LUM1)/5, if lum0 <= lum1 and code(x,y) == 3

96 // (2LUM0+3LUM1)/5, if lum0 <= lum1 and code(x,y) == 4

97 // ( LUM0+4*LUM1)/5, if lum0 <= lum1 and code(x,y) == 5

98 // 0, if lum0 <= lum1 and code(x,y) == 6

99 // 255, if lum0 <= lum1 and code(x,y) == 7

100

101 static void generate_latc_palette(uint8_t palette[], uint8_t lum0, uint8_t lum1) {

102 palette[0] = lum0;

103 palette[1] = lum1;

104 if (lum0 > lum1) {

105 for (int i = 1; i < 7; i++) {

106 palette[i+1] = ((7-i)lum0 + ilum1) / 7;

107 }

108 } else {

109 for (int i = 1; i < 5; i++) {

110 palette[i+1] = ((5-i)lum0 + ilum1) / 5;

111 }

112 palette[6] = 0;

113 palette[7] = 255;

114 }

115 }

116

117 // Compress a block by using the bounding box of the pixels. It is assumed that

118 // there are no extremal pixels in this block otherwise we would have used

119 // compressBlockBBIgnoreExtremal.

120 static uint64_t compress_latc_block_bb(const uint8_t pixels[]) {

121 uint8_t minVal = 255;

122 uint8_t maxVal = 0;

123 for (int i = 0; i < kLATCPixelsPerBlock; ++i) {

124 minVal = SkTMin(pixels[i], minVal);

125 maxVal = SkTMax(pixels[i], maxVal);

126 }

127

128 SkASSERT(!is_extremal(minVal));

129 SkASSERT(!is_extremal(maxVal));

130

131 uint8_t palette[kLATCPaletteSize];

132 generate_latc_palette(palette, maxVal, minVal);

133

134 uint64_t indices = 0;

135 for (int i = kLATCPixelsPerBlock - 1; i >= 0; --i) {

136

137 // Find the best palette index

138 uint8_t bestError = abs_diff(pixels[i], palette[0]);

139 uint8_t idx = 0;

140 for (int j = 1; j < kLATCPaletteSize; ++j) {

141 uint8_t error = abs_diff(pixels[i], palette[j]);

142 if (error < bestError) {

143 bestError = error;

144 idx = j;

145 }

146 }

147

148 indices <<= 3;

149 indices \|= idx;

150 }

151

152 return

153 SkEndian_SwapLE64(

154 static_cast<uint64_t>(maxVal) \|

155 (static_cast<uint64_t>(minVal) << 8) \|

156 (indices << 16));

157 }

158

159 // Compress a block by using the bounding box of the pixels without taking into

160 // account the extremal values. The generated palette will contain extremal valu es

161 // and fewer points along the line segment to interpolate.

162 static uint64_t compress_latc_block_bb_ignore_extremal(const uint8_t pixels[]) {

163 uint8_t minVal = 255;

164 uint8_t maxVal = 0;

165 for (int i = 0; i < kLATCPixelsPerBlock; ++i) {

166 if (is_extremal(pixels[i])) {

167 continue;

168 }

169

170 minVal = SkTMin(pixels[i], minVal);

171 maxVal = SkTMax(pixels[i], maxVal);

172 }

173

174 SkASSERT(!is_extremal(minVal));

175 SkASSERT(!is_extremal(maxVal));

176

177 uint8_t palette[kLATCPaletteSize];

178 generate_latc_palette(palette, minVal, maxVal);

179

180 uint64_t indices = 0;

181 for (int i = kLATCPixelsPerBlock - 1; i >= 0; --i) {

182

183 // Find the best palette index

184 uint8_t idx = 0;

185 if (is_extremal(pixels[i])) {

186 if (0xFF == pixels[i]) {

187 idx = 7;

188 } else if (0 == pixels[i]) {

189 idx = 6;

190 } else {

191 SkFAIL("Pixel is extremal but not really?!");

192 }

193 } else {

194 uint8_t bestError = abs_diff(pixels[i], palette[0]);

195 for (int j = 1; j < kLATCPaletteSize - 2; ++j) {

196 uint8_t error = abs_diff(pixels[i], palette[j]);

197 if (error < bestError) {

198 bestError = error;

199 idx = j;

200 }

201 }

202 }

203

204 indices <<= 3;

205 indices \|= idx;

206 }

207

208 return

209 SkEndian_SwapLE64(

210 static_cast<uint64_t>(minVal) \|

211 (static_cast<uint64_t>(maxVal) << 8) \|

212 (indices << 16));

213 }

214

215

216 // Compress LATC block. Each 4x4 block of pixels is decompressed by LATC from tw o

217 // values LUM0 and LUM1, and an index into the generated palette. Details of how

218 // the palette is generated can be found in the comments of generatePalette abov e.

219 //

220 // We choose which palette type to use based on whether or not 'pixels' contains

221 // any extremal values (0 or 255). If there are extremal values, then we use the

222 // palette that has the extremal values built in. Otherwise, we use the full bou nding

223 // box.

224

225 static uint64_t compress_latc_block(const uint8_t pixels[]) {

226 // Collect unique pixels

227 int nUniquePixels = 0;

228 uint8_t uniquePixels[kLATCPixelsPerBlock];

229 for (int i = 0; i < kLATCPixelsPerBlock; ++i) {

230 bool foundPixel = false;

231 for (int j = 0; j < nUniquePixels; ++j) {

232 foundPixel = foundPixel \|\| uniquePixels[j] == pixels[i];

233 }

234

235 if (!foundPixel) {

236 uniquePixels[nUniquePixels] = pixels[i];

237 ++nUniquePixels;

238 }

239 }

240

241 // If there's only one unique pixel, then our compression is easy.

242 if (1 == nUniquePixels) {

243 return SkEndian_SwapLE64(pixels[0] \| (pixels[0] << 8));

244

245 // Similarly, if there are only two unique pixels, then our compression is

246 // easy again: place the pixels in the block header, and assign the indices

247 // with one or zero depending on which pixel they belong to.

248 } else if (2 == nUniquePixels) {

249 uint64_t outBlock = 0;

250 for (int i = kLATCPixelsPerBlock - 1; i >= 0; --i) {

251 int idx = 0;

252 if (pixels[i] == uniquePixels[1]) {

253 idx = 1;

254 }

255

256 outBlock <<= 3;

257 outBlock \|= idx;

258 }

259 outBlock <<= 16;

260 outBlock \|= (uniquePixels[0] \| (uniquePixels[1] << 8));

261 return SkEndian_SwapLE64(outBlock);

262 }

263

264 // Count non-maximal pixel values

265 int nonExtremalPixels = 0;

266 for (int i = 0; i < nUniquePixels; ++i) {

267 if (!is_extremal(uniquePixels[i])) {

268 ++nonExtremalPixels;

269 }

270 }

271

272 // If all the pixels are nonmaximal then compute the palette using

273 // the bounding box of all the pixels.

274 if (nonExtremalPixels == nUniquePixels) {

275 // This is really just for correctness, in all of my tests we

276 // never take this step. We don't lose too much perf here because

277 // most of the processing in this function is worth it for the

278 // 1 == nUniquePixels optimization.

279 return compress_latc_block_bb(pixels);

280 } else {

281 return compress_latc_block_bb_ignore_extremal(pixels);

282 }

283 }

284

285 static inline bool compress_a8_to_latc(uint8_t* dst, const uint8_t* src,

286 int width, int height, int rowBytes) {

287 return compress_4x4_a8_to_64bit(dst, src, width, height, rowBytes, compress_ latc_block);

288 }

289

290 ////////////////////////////////////////////////////////////////////////////////

291 //

292 // R11 EAC Compressor

293 //

294 ////////////////////////////////////////////////////////////////////////////////

295

296 // #define COMPRESS_R11_EAC_SLOW 1	12 // #define COMPRESS_R11_EAC_SLOW 1

297 // #define COMPRESS_R11_EAC_FAST 1	13 // #define COMPRESS_R11_EAC_FAST 1

298 #define COMPRESS_R11_EAC_FASTEST 1	14 #define COMPRESS_R11_EAC_FASTEST 1

299	15

300 // Blocks compressed into R11 EAC are represented as follows:	16 // Blocks compressed into R11 EAC are represented as follows:

301 // 0000000000000000000000000000000000000000000000000000000000000000	17 // 0000000000000000000000000000000000000000000000000000000000000000

302 // \|base_cw\|mod\|mul\| ----------------- indices -------------------	18 // \|base_cw\|mod\|mul\| ----------------- indices -------------------

303 //	19 //

304 // To reconstruct the value of a given pixel, we use the formula:	20 // To reconstruct the value of a given pixel, we use the formula:

305 // clamp[0, 2047](base_cw * 8 + 4 + mod_valmul8)	21 // clamp[0, 2047](base_cw * 8 + 4 + mod_valmul8)

(...skipping 235 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
541 // primarily for compressing alpha maps. Usually the only	257 // primarily for compressing alpha maps. Usually the only

542 // non-fullly opaque or fully transparent blocks are not a solid	258 // non-fullly opaque or fully transparent blocks are not a solid

543 // intermediate color. If we notice that they are, then we can	259 // intermediate color. If we notice that they are, then we can

544 // add another optimization...	260 // add another optimization...

545 break;	261 break;

546 }	262 }

547 }	263 }

548	264

549 return compress_heterogeneous_r11eac_block(block);	265 return compress_heterogeneous_r11eac_block(block);

550 }	266 }

	267

	268 // This function is used by R11 EAC to compress 4x4 blocks

	269 // of 8-bit alpha into 64-bit values that comprise the compressed data.
	robertphillips 2014/07/21 21:56:57 both formats ? both formats ? krajcevski 2014/07/21 22:03:55 Done. Show quoted text On 2014/07/21 21:56:57, robertphillips wrote: > both formats ? Done.
	270 // For both formats, we need to make sure that the dimensions of the

	271 // src pixels are divisible by 4, and copy 4x4 blocks one at a time

	272 // for compression.

	273 typedef uint64_t (*A84x4To64BitProc)(const uint8_t block[]);

	274

	275 static bool compress_4x4_a8_to_64bit(uint8_t* dst, const uint8_t* src,

	276 int width, int height, int rowBytes,

	277 A84x4To64BitProc proc) {

	278 // Make sure that our data is well-formed enough to be considered for compre ssion

	279 if (0 == width \|\| 0 == height \|\| (width % 4) != 0 \|\| (height % 4) != 0) {

	280 return false;

	281 }

	282

	283 int blocksX = width >> 2;

	284 int blocksY = height >> 2;

	285

	286 uint8_t block[16];

	287 uint64_t* encPtr = reinterpret_cast<uint64_t*>(dst);

	288 for (int y = 0; y < blocksY; ++y) {

	289 for (int x = 0; x < blocksX; ++x) {

	290 // Load block

	291 for (int k = 0; k < 4; ++k) {

	292 memcpy(block + k4, src + krowBytes + 4*x, 4);

	293 }

	294

	295 // Compress it

	296 *encPtr = proc(block);

	297 ++encPtr;

	298 }

	299 src += 4 * rowBytes;

	300 }

	301

	302 return true;

	303 }

551 #endif // (COMPRESS_R11_EAC_SLOW) \|\| (COMPRESS_R11_EAC_FAST)	304 #endif // (COMPRESS_R11_EAC_SLOW) \|\| (COMPRESS_R11_EAC_FAST)

552	305

553 #if COMPRESS_R11_EAC_FASTEST	306 #if COMPRESS_R11_EAC_FASTEST

	307 template<unsigned shift>

	308 static inline uint64_t swap_shift(uint64_t x, uint64_t mask) {

	309 const uint64_t t = (x ^ (x >> shift)) & mask;

	310 return x ^ t ^ (t << shift);

	311 }

	312

554 static inline uint64_t interleave6(uint64_t topRows, uint64_t bottomRows) {	313 static inline uint64_t interleave6(uint64_t topRows, uint64_t bottomRows) {

555 // If our 3-bit block indices are laid out as:	314 // If our 3-bit block indices are laid out as:

556 // a b c d	315 // a b c d

557 // e f g h	316 // e f g h

558 // i j k l	317 // i j k l

559 // m n o p	318 // m n o p

560 //	319 //

561 // This function expects topRows and bottomRows to contain the first two row s	320 // This function expects topRows and bottomRows to contain the first two row s

562 // of indices interleaved in the least significant bits of a and b. In other words...	321 // of indices interleaved in the least significant bits of a and b. In other words...

563 //	322 //

(...skipping 13 matching lines...) Expand all Loading...
577 //	336 //

578 // !SPEED! this function might be even faster if certain SIMD intrinsics are	337 // !SPEED! this function might be even faster if certain SIMD intrinsics are

579 // used..	338 // used..

580	339

581 // For both architectures, we can figure out a packing of the bits by	340 // For both architectures, we can figure out a packing of the bits by

582 // using a shuffle and a few shift-rotates...	341 // using a shuffle and a few shift-rotates...

583 uint64_t x = (static_cast<uint64_t>(topRows) << 32) \| static_cast<uint64_t>( bottomRows);	342 uint64_t x = (static_cast<uint64_t>(topRows) << 32) \| static_cast<uint64_t>( bottomRows);

584	343

585 // x: 00 a e 00 b f 00 c g 00 d h 00 i m 00 j n 00 k o 00 l p	344 // x: 00 a e 00 b f 00 c g 00 d h 00 i m 00 j n 00 k o 00 l p

586	345

587 uint64_t t = (x ^ (x >> 10)) & 0x3FC0003FC00000ULL;	346 x = swap_shift<10>(x, 0x3FC0003FC00000ULL);

588 x = x ^ t ^ (t << 10);

589	347

590 // x: b f 00 00 00 a e c g i m 00 00 00 d h j n 00 k o 00 l p	348 // x: b f 00 00 00 a e c g i m 00 00 00 d h j n 00 k o 00 l p

591	349

592 x = (x \| ((x << 52) & (0x3FULL << 52)) \| ((x << 20) & (0x3FULL << 28))) >> 1 6;	350 x = (x \| ((x << 52) & (0x3FULL << 52)) \| ((x << 20) & (0x3FULL << 28))) >> 1 6;

593	351

594 // x: 00 00 00 00 00 00 00 00 b f l p a e c g i m k o d h j n	352 // x: 00 00 00 00 00 00 00 00 b f l p a e c g i m k o d h j n

595	353

596 t = (x ^ (x >> 6)) & 0xFC0000ULL;	354 x = swap_shift<6>(x, 0xFC0000ULL);

597 x = x ^ t ^ (t << 6);

598	355

599 #if defined (SK_CPU_BENDIAN)	356 #if defined (SK_CPU_BENDIAN)

600 // x: 00 00 00 00 00 00 00 00 b f l p a e i m c g k o d h j n	357 // x: 00 00 00 00 00 00 00 00 b f l p a e i m c g k o d h j n

601	358

602 t = (x ^ (x >> 36)) & 0x3FULL;	359 x = swap_shift<36>(x, 0x3FULL);

603 x = x ^ t ^ (t << 36);

604	360

605 // x: 00 00 00 00 00 00 00 00 b f j n a e i m c g k o d h l p	361 // x: 00 00 00 00 00 00 00 00 b f j n a e i m c g k o d h l p

606	362

607 t = (x ^ (x >> 12)) & 0xFFF000000ULL;	363 x = swap_shift<12>(x, 0xFFF000000ULL);

608 x = x ^ t ^ (t << 12);

609

610 // x: 00 00 00 00 00 00 00 00 a e i m b f j n c g k o d h l p

611 return x;

612 #else	364 #else

613 // If our CPU is little endian, then the above logic will	365 // If our CPU is little endian, then the above logic will

614 // produce the following indices:	366 // produce the following indices:

615 // x: 00 00 00 00 00 00 00 00 c g i m d h l p b f j n a e k o	367 // x: 00 00 00 00 00 00 00 00 c g i m d h l p b f j n a e k o

616	368

617 t = (x ^ (x >> 36)) & 0xFC0ULL;	369 x = swap_shift<36>(x, 0xFC0ULL);

618 x = x ^ t ^ (t << 36);

619	370

620 // x: 00 00 00 00 00 00 00 00 a e i m d h l p b f j n c g k o	371 // x: 00 00 00 00 00 00 00 00 a e i m d h l p b f j n c g k o

621	372

622 x = (x & (0xFFFULL << 36)) \| ((x & 0xFFFFFFULL) << 12) \| ((x >> 24) & 0xFFFU LL);	373 x = (x & (0xFFFULL << 36)) \| ((x & 0xFFFFFFULL) << 12) \| ((x >> 24) & 0xFFFU LL);

	374 #endif

623	375

624 // x: 00 00 00 00 00 00 00 00 a e i m b f j n c g k o d h l p	376 // x: 00 00 00 00 00 00 00 00 a e i m b f j n c g k o d h l p

625

626 return x;	377 return x;

627 #endif

628 }	378 }

629	379

630 // This function converts an integer containing four bytes of alpha	380 // This function converts an integer containing four bytes of alpha

631 // values into an integer containing four bytes of indices into R11 EAC.	381 // values into an integer containing four bytes of indices into R11 EAC.

632 // Note, there needs to be a mapping of indices:	382 // Note, there needs to be a mapping of indices:

633 // 0 1 2 3 4 5 6 7	383 // 0 1 2 3 4 5 6 7

634 // 3 2 1 0 4 5 6 7	384 // 3 2 1 0 4 5 6 7

635 //	385 //

636 // To compute this, we first negate each byte, and then add three, which	386 // To compute this, we first negate each byte, and then add three, which

637 // gives the mapping	387 // gives the mapping

(...skipping 87 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
725 // Compress it	475 // Compress it

726 encPtr = compress_r11eac_block_fast(src + 4x, rowBytes);	476 encPtr = compress_r11eac_block_fast(src + 4x, rowBytes);

727 ++encPtr;	477 ++encPtr;

728 }	478 }

729 src += 4 * rowBytes;	479 src += 4 * rowBytes;

730 }	480 }

731 return true;	481 return true;

732 }	482 }

733 #endif // COMPRESS_R11_EAC_FASTEST	483 #endif // COMPRESS_R11_EAC_FASTEST

734	484

	485 ////////////////////////////////////////////////////////////////////////////////

	486 //

	487 // Utility functions used by the blitter

	488 //

	489 ////////////////////////////////////////////////////////////////////////////////

	490

735 // The R11 EAC format expects that indices are given in column-major order. Sinc e	491 // The R11 EAC format expects that indices are given in column-major order. Sinc e

736 // we receive alpha values in raster order, this usually means that we have to u se	492 // we receive alpha values in raster order, this usually means that we have to u se

737 // pack6 above to properly pack our indices. However, if our indices come from t he	493 // pack6 above to properly pack our indices. However, if our indices come from t he

738 // blitter, then each integer will be a column of indices, and hence can be effi ciently	494 // blitter, then each integer will be a column of indices, and hence can be effi ciently

739 // packed. This function takes the bottom three bits of each byte and places the m in	495 // packed. This function takes the bottom three bits of each byte and places the m in

740 // the least significant 12 bits of the resulting integer.	496 // the least significant 12 bits of the resulting integer.

741 static inline uint32_t pack_indices_vertical(uint32_t x) {	497 static inline uint32_t pack_indices_vertical(uint32_t x) {

742 #if defined (SK_CPU_BENDIAN)	498 #if defined (SK_CPU_BENDIAN)

743 return	499 return

744 (x & 7) \|	500 (x & 7) \|

(...skipping 43 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
788 const uint32_t packedIndexColumn3 = pack_indices_vertical(indexColumn3);	544 const uint32_t packedIndexColumn3 = pack_indices_vertical(indexColumn3);

789	545

790 return SkEndian_SwapBE64(0x8490000000000000ULL \|	546 return SkEndian_SwapBE64(0x8490000000000000ULL \|

791 (static_cast<uint64_t>(packedIndexColumn0) << 36) \|	547 (static_cast<uint64_t>(packedIndexColumn0) << 36) \|

792 (static_cast<uint64_t>(packedIndexColumn1) << 24) \|	548 (static_cast<uint64_t>(packedIndexColumn1) << 24) \|

793 static_cast<uint64_t>(packedIndexColumn2 << 12) \|	549 static_cast<uint64_t>(packedIndexColumn2 << 12) \|

794 static_cast<uint64_t>(packedIndexColumn3));	550 static_cast<uint64_t>(packedIndexColumn3));

795	551

796 }	552 }

797	553

798 static inline bool compress_a8_to_r11eac(uint8_t* dst, const uint8_t* src,

799 int width, int height, int rowBytes) {

800 #if (COMPRESS_R11_EAC_SLOW) \|\| (COMPRESS_R11_EAC_FAST)

801 return compress_4x4_a8_to_64bit(dst, src, width, height, rowBytes, compress_ r11eac_block);

802 #elif COMPRESS_R11_EAC_FASTEST

803 return compress_a8_to_r11eac_fast(dst, src, width, height, rowBytes);

804 #else

805 #error "Must choose R11 EAC algorithm"

806 #endif

807 }

808

809 // Updates the block whose columns are stored in blockColN. curAlphai is expecte d	554 // Updates the block whose columns are stored in blockColN. curAlphai is expecte d

810 // to store, as an integer, the four alpha values that will be placed within eac h	555 // to store, as an integer, the four alpha values that will be placed within eac h

811 // of the columns in the range [col, col+colsLeft).	556 // of the columns in the range [col, col+colsLeft).

812 static inline void update_block_columns(	557 static inline void update_block_columns(uint32_t* block, const int col,

813 uint32_t* blockCol1, uint32_t* blockCol2, uint32_t* blockCol3, uint32_t* blo ckCol4,	558 const int colsLeft, const uint32_t curAl phai) {

814 const int col, const int colsLeft, const uint32_t curAlphai) {	559 SkASSERT(NULL != block);

815 SkASSERT(NULL != blockCol1);

816 SkASSERT(NULL != blockCol2);

817 SkASSERT(NULL != blockCol3);

818 SkASSERT(NULL != blockCol4);

819 SkASSERT(col + colsLeft <= 4);	560 SkASSERT(col + colsLeft <= 4);

	561

820 for (int i = col; i < (col + colsLeft); ++i) {	562 for (int i = col; i < (col + colsLeft); ++i) {

821 switch(i) {	563 block[i] = curAlphai;

822 case 0:

823 *blockCol1 = curAlphai;

824 break;

825 case 1:

826 *blockCol2 = curAlphai;

827 break;

828 case 2:

829 *blockCol3 = curAlphai;

830 break;

831 case 3:

832 *blockCol4 = curAlphai;

833 break;

834 }

835 }	564 }

836 }	565 }

837	566

838 ////////////////////////////////////////////////////////////////////////////////	567 ////////////////////////////////////////////////////////////////////////////////

839	568

840 namespace SkTextureCompressor {	569 namespace SkTextureCompressor {

841	570

842 static inline size_t get_compressed_data_size(Format fmt, int width, int height) {	571 bool CompressA8ToR11EAC(uint8_t* dst, const uint8_t* src, int width, int height, int rowBytes) {

843 switch (fmt) {

844 // These formats are 64 bits per 4x4 block.

845 case kR11_EAC_Format:

846 case kLATC_Format:

847 {

848 static const int kLATCEncodedBlockSize = 8;

849	572

850 const int blocksX = width / kLATCBlockSize;	573 #if (COMPRESS_R11_EAC_SLOW) \|\| (COMPRESS_R11_EAC_FAST)

851 const int blocksY = height / kLATCBlockSize;

852	574

853 return blocksX * blocksY * kLATCEncodedBlockSize;	575 return compress_4x4_a8_to_64bit(dst, src, width, height, rowBytes, compress_ r11eac_block);

854 }

855	576

856 default:	577 #elif COMPRESS_R11_EAC_FASTEST

857 SkFAIL("Unknown compressed format!");	578

858 return 0;	579 return compress_a8_to_r11eac_fast(dst, src, width, height, rowBytes);

859 }	580

	581 #else

	582 #error "Must choose R11 EAC algorithm"

	583 #endif

860 }	584 }

861	585

862 bool CompressBufferToFormat(uint8_t* dst, const uint8_t* src, SkColorType srcCol orType,	586 // This class implements a blitter that blits directly into a buffer that will

863 int width, int height, int rowBytes, Format format, bool opt) {	587 // be used as an R11 EAC compressed texture. We compute this buffer by

864 CompressionProc proc = NULL;	588 // buffering four scan lines and then outputting them all at once. This blitter

865 if (opt) {	589 // is only expected to be used with alpha masks, i.e. kAlpha8_SkColorType.

866 proc = SkTextureCompressorGetPlatformProc(srcColorType, format);	590 class R11_EACBlitter : public SkBlitter {

	591 public:

	592 R11_EACBlitter(int width, int height, void *compressedBuffer);

	593 virtual ~R11_EACBlitter() { this->flushRuns(); }

	594

	595 // Blit a horizontal run of one or more pixels.

	596 virtual void blitH(int x, int y, int width) SK_OVERRIDE {

	597 // This function is intended to be called from any standard RGB

	598 // buffer, so we should never encounter it. However, if some code

	599 // path does end up here, then this needs to be investigated.

	600 SkFAIL("Not implemented!");

	601 }

	602

	603 // Blit a horizontal run of antialiased pixels; runs[] is a sparse

	604 // zero-terminated run-length encoding of spans of constant alpha values.

	605 virtual void blitAntiH(int x, int y,

	606 const SkAlpha antialias[],

	607 const int16_t runs[]) SK_OVERRIDE;

	608

	609 // Blit a vertical run of pixels with a constant alpha value.

	610 virtual void blitV(int x, int y, int height, SkAlpha alpha) SK_OVERRIDE {

	611 // This function is currently not implemented. It is not explicitly

	612 // required by the contract, but if at some time a code path runs into

	613 // this function (which is entirely possible), it needs to be implemente d.

	614 //

	615 // TODO (krajcevski):

	616 // This function will be most easily implemented in one of two ways:

	617 // 1. Buffer each vertical column value and then construct a list

	618 // of alpha values and output all of the blocks at once. This only

	619 // requires a write to the compressed buffer

	620 // 2. Replace the indices of each block with the proper indices based

	621 // on the alpha value. This requires a read and write of the compress ed

	622 // buffer, but much less overhead.

	623 SkFAIL("Not implemented!");

867 }	624 }

868	625

869 if (NULL == proc) {	626 // Blit a solid rectangle one or more pixels wide.

870 switch (srcColorType) {	627 virtual void blitRect(int x, int y, int width, int height) SK_OVERRIDE {

871 case kAlpha_8_SkColorType:	628 // Analogous to blitRow, this function is intended for RGB targets

872 {	629 // and should never be called by this blitter. Any calls to this functio n

873 switch (format) {	630 // are probably a bug and should be investigated.

874 case kLATC_Format:	631 SkFAIL("Not implemented!");

875 proc = compress_a8_to_latc;

876 break;

877 case kR11_EAC_Format:

878 proc = compress_a8_to_r11eac;

879 break;

880 default:

881 // Do nothing...

882 break;

883 }

884 }

885 break;

886

887 default:

888 // Do nothing...

889 break;

890 }

891 }	632 }

892	633

893 if (NULL != proc) {	634 // Blit a rectangle with one alpha-blended column on the left,

894 return proc(dst, src, width, height, rowBytes);	635 // width (zero or more) opaque pixels, and one alpha-blended column

	636 // on the right. The result will always be at least two pixels wide.

	637 virtual void blitAntiRect(int x, int y, int width, int height,

	638 SkAlpha leftAlpha, SkAlpha rightAlpha) SK_OVERRIDE {

	639 // This function is currently not implemented. It is not explicitly

	640 // required by the contract, but if at some time a code path runs into

	641 // this function (which is entirely possible), it needs to be implemente d.

	642 //

	643 // TODO (krajcevski):

	644 // This function will be most easily implemented as follows:

	645 // 1. If width/height are smaller than a block, then update the

	646 // indices of the affected blocks.

	647 // 2. If width/height are larger than a block, then construct a 9-patch

	648 // of block encodings that represent the rectangle, and write them

	649 // to the compressed buffer as necessary. Whether or not the blocks

	650 // are overwritten by zeros or just their indices are updated is up

	651 // to debate.

	652 SkFAIL("Not implemented!");

895 }	653 }

896	654

897 return false;	655 // Blit a pattern of pixels defined by a rectangle-clipped mask;

898 }	656 // typically used for text.

899	657 virtual void blitMask(const SkMask&, const SkIRect& clip) SK_OVERRIDE {

900 SkData *CompressBitmapToFormat(const SkBitmap &bitmap, Format format) {	658 // This function is currently not implemented. It is not explicitly

901 SkAutoLockPixels alp(bitmap);	659 // required by the contract, but if at some time a code path runs into

902	660 // this function (which is entirely possible), it needs to be implemente d.

903 int compressedDataSize = get_compressed_data_size(format, bitmap.width(), bi tmap.height());	661 //

904 const uint8_t* src = reinterpret_cast<const uint8_t*>(bitmap.getPixels());	662 // TODO (krajcevski):

905 uint8_t* dst = reinterpret_cast<uint8_t*>(sk_malloc_throw(compressedDataSize ));	663 // This function will be most easily implemented in the same way as

906 if (CompressBufferToFormat(dst, src, bitmap.colorType(), bitmap.width(), bit map.height(),	664 // blitAntiRect above.

907 bitmap.rowBytes(), format)) {	665 SkFAIL("Not implemented!");

908 return SkData::NewFromMalloc(dst, compressedDataSize);

909 }	666 }

910	667

911 sk_free(dst);	668 // If the blitter just sets a single value for each pixel, return the

912 return NULL;	669 // bitmap it draws into, and assign value. If not, return NULL and ignore

913 }	670 // the value parameter.

	671 virtual const SkBitmap* justAnOpaqueColor(uint32_t* value) SK_OVERRIDE {

	672 return NULL;

	673 }

	674

	675 /**

	676 * Compressed texture blitters only really work correctly if they get

	677 * four blocks at a time. That being said, this blitter tries it's best

	678 * to preserve semantics if blitAntiH doesn't get called in too many

	679 * weird ways...

	680 */

	681 virtual int requestRowsPreserved() const { return kR11_EACBlockSz; }

	682

	683 protected:

	684 virtual void onNotifyFinished() { this->flushRuns(); }

	685

	686 private:

	687 static const int kR11_EACBlockSz = 4;

	688 static const int kPixelsPerBlock = kR11_EACBlockSz * kR11_EACBlockSz;

	689

	690 // The longest possible run of pixels that this blitter will receive.

	691 // This is initialized in the constructor to 0x7FFE, which is one less

	692 // than the largest positive 16-bit integer. We make sure that it's one

	693 // less for debugging purposes. We also don't make this variable static

	694 // in order to make sure that we can construct a valid pointer to it.

	695 const int16_t kLongestRun;

	696

	697 // Usually used in conjunction with kLongestRun. This is initialized to

	698 // zero.

	699 const SkAlpha kZeroAlpha;

	700

	701 // This is the information that we buffer whenever we're asked to blit

	702 // a row with this blitter.

	703 struct BufferedRun {

	704 const SkAlpha* fAlphas;

	705 const int16_t* fRuns;

	706 int fX, fY;

	707 } fBufferedRuns[kR11_EACBlockSz];

	708

	709 // The next row (0-3) that we need to blit. This value should never exceed

	710 // the number of rows that we have (kR11_EACBlockSz)

	711 int fNextRun;

	712

	713 // The width and height of the image that we're blitting

	714 const int fWidth;

	715 const int fHeight;

	716

	717 // The R11 EAC buffer that we're blitting into. It is assumed that the buffe r

	718 // is large enough to store a compressed image of size fWidth*fHeight.

	719 uint64_t* const fBuffer;

	720

	721 // Various utility functions

	722 int blocksWide() const { return fWidth / kR11_EACBlockSz; }

	723 int blocksTall() const { return fHeight / kR11_EACBlockSz; }

	724 int totalBlocks() const { return (fWidth * fHeight) / kPixelsPerBlock; }

	725

	726 // Returns the block index for the block containing pixel (x, y). Block

	727 // indices start at zero and proceed in raster order.

	728 int getBlockOffset(int x, int y) const {

	729 SkASSERT(x < fWidth);

	730 SkASSERT(y < fHeight);

	731 const int blockCol = x / kR11_EACBlockSz;

	732 const int blockRow = y / kR11_EACBlockSz;

	733 return blockRow * this->blocksWide() + blockCol;

	734 }

	735

	736 // Returns a pointer to the block containing pixel (x, y)

	737 uint64_t *getBlock(int x, int y) const {

	738 return fBuffer + this->getBlockOffset(x, y);

	739 }

	740

	741 // The following function writes the buffered runs to compressed blocks.

	742 // If fNextRun < 4, then we fill the runs that we haven't buffered with

	743 // the constant zero buffer.

	744 void flushRuns();

	745 };

	746

914	747

915 R11_EACBlitter::R11_EACBlitter(int width, int height, void *latcBuffer)	748 R11_EACBlitter::R11_EACBlitter(int width, int height, void *latcBuffer)

916 // 0x7FFE is one minus the largest positive 16-bit int. We use it for	749 // 0x7FFE is one minus the largest positive 16-bit int. We use it for

917 // debugging to make sure that we're properly setting the nextX distance	750 // debugging to make sure that we're properly setting the nextX distance

918 // in flushRuns().	751 // in flushRuns().

919 : kLongestRun(0x7FFE), kZeroAlpha(0)	752 : kLongestRun(0x7FFE), kZeroAlpha(0)

920 , fNextRun(0)	753 , fNextRun(0)

921 , fWidth(width)	754 , fWidth(width)

922 , fHeight(height)	755 , fHeight(height)

923 , fBuffer(reinterpret_cast<uint64_t*const>(latcBuffer))	756 , fBuffer(reinterpret_cast<uint64_t*const>(latcBuffer))

(...skipping 96 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
1020 // nextX -- for each run, the next point at which we need to update curAlpha Column	853 // nextX -- for each run, the next point at which we need to update curAlpha Column

1021 // after the value of curX.	854 // after the value of curX.

1022 // finalX -- the minimum of all the nextX values.	855 // finalX -- the minimum of all the nextX values.

1023 //	856 //

1024 // curX advances to finalX outputting any blocks that it passes along	857 // curX advances to finalX outputting any blocks that it passes along

1025 // the way. Since finalX will not change when we reach the end of a	858 // the way. Since finalX will not change when we reach the end of a

1026 // run, the termination criteria will be whenever curX == finalX at the	859 // run, the termination criteria will be whenever curX == finalX at the

1027 // end of a loop.	860 // end of a loop.

1028	861

1029 // Setup:	862 // Setup:

1030 uint32_t c1 = 0;	863 uint32_t c[4] = { 0, 0, 0, 0 };

1031 uint32_t c2 = 0;

1032 uint32_t c3 = 0;

1033 uint32_t c4 = 0;

1034

1035 uint32_t curAlphaColumn = 0;	864 uint32_t curAlphaColumn = 0;

1036 SkAlpha curAlpha = reinterpret_cast<SkAlpha>(&curAlphaColumn);	865 SkAlpha curAlpha = reinterpret_cast<SkAlpha>(&curAlphaColumn);

1037	866

1038 int nextX[kR11_EACBlockSz];	867 int nextX[kR11_EACBlockSz];

1039 for (int i = 0; i < kR11_EACBlockSz; ++i) {	868 for (int i = 0; i < kR11_EACBlockSz; ++i) {

1040 nextX[i] = 0x7FFFFF;	869 nextX[i] = 0x7FFFFF;

1041 }	870 }

1042	871

1043 uint64_t* outPtr = this->getBlock(fBufferedRuns[0].fX, fBufferedRuns[0].fY);	872 uint64_t* outPtr = this->getBlock(fBufferedRuns[0].fX, fBufferedRuns[0].fY);

1044	873

(...skipping 14 matching lines...) Expand all Loading...
1059 // Run the blitter...	888 // Run the blitter...

1060 while (curX != finalX) {	889 while (curX != finalX) {

1061 SkASSERT(finalX >= curX);	890 SkASSERT(finalX >= curX);

1062	891

1063 // Do we need to populate the rest of the block?	892 // Do we need to populate the rest of the block?

1064 if ((finalX - (curX & ~3)) >= kR11_EACBlockSz) {	893 if ((finalX - (curX & ~3)) >= kR11_EACBlockSz) {

1065 const int col = curX & 3;	894 const int col = curX & 3;

1066 const int colsLeft = 4 - col;	895 const int colsLeft = 4 - col;

1067 SkASSERT(curX + colsLeft <= finalX);	896 SkASSERT(curX + colsLeft <= finalX);

1068	897

1069 update_block_columns(&c1, &c2, &c3, &c4, col, colsLeft, curAlphaColu mn);	898 update_block_columns(c, col, colsLeft, curAlphaColumn);

1070	899

1071 // Write this block	900 // Write this block

1072 *outPtr = compress_block_vertical(c1, c2, c3, c4);	901 *outPtr = compress_block_vertical(c[0], c[1], c[2], c[3]);
	robertphillips 2014/07/21 21:56:57 Why not pass the pointer into compress_block_verti Why not pass the pointer into compress_block_vertical too? krajcevski 2014/07/21 22:03:55 I tried that, it seemed like it was a tad slower, Show quoted text On 2014/07/21 21:56:57, robertphillips wrote: > Why not pass the pointer into compress_block_vertical too? I tried that, it seemed like it was a tad slower, albeit within margin of error. Not sure why.
1073 ++outPtr;	902 ++outPtr;

1074 curX += colsLeft;	903 curX += colsLeft;

1075 }	904 }

1076	905

1077 // If we can advance even further, then just keep memsetting the block	906 // If we can advance even further, then just keep memsetting the block

1078 if ((finalX - curX) >= kR11_EACBlockSz) {	907 if ((finalX - curX) >= kR11_EACBlockSz) {

1079 SkASSERT((curX & 3) == 0);	908 SkASSERT((curX & 3) == 0);

1080	909

1081 const int col = 0;	910 const int col = 0;

1082 const int colsLeft = kR11_EACBlockSz;	911 const int colsLeft = kR11_EACBlockSz;

1083	912

1084 update_block_columns(&c1, &c2, &c3, &c4, col, colsLeft, curAlphaColu mn);	913 update_block_columns(c, col, colsLeft, curAlphaColumn);

1085	914

1086 // While we can keep advancing, just keep writing the block.	915 // While we can keep advancing, just keep writing the block.

1087 uint64_t lastBlock = compress_block_vertical(c1, c2, c3, c4);	916 uint64_t lastBlock = compress_block_vertical(c[0], c[1], c[2], c[3]) ;

1088 while((finalX - curX) >= kR11_EACBlockSz) {	917 while((finalX - curX) >= kR11_EACBlockSz) {

1089 *outPtr = lastBlock;	918 *outPtr = lastBlock;

1090 ++outPtr;	919 ++outPtr;

1091 curX += kR11_EACBlockSz;	920 curX += kR11_EACBlockSz;

1092 }	921 }

1093 }	922 }

1094	923

1095 // If we haven't advanced within the block then do so.	924 // If we haven't advanced within the block then do so.

1096 if (curX < finalX) {	925 if (curX < finalX) {

1097 const int col = curX & 3;	926 const int col = curX & 3;

1098 const int colsLeft = finalX - curX;	927 const int colsLeft = finalX - curX;

1099	928

1100 update_block_columns(&c1, &c2, &c3, &c4, col, colsLeft, curAlphaColu mn);	929 update_block_columns(c, col, colsLeft, curAlphaColumn);

1101	930

1102 curX += colsLeft;	931 curX += colsLeft;

1103 }	932 }

1104	933

1105 SkASSERT(curX == finalX);	934 SkASSERT(curX == finalX);

1106	935

1107 // Figure out what the next advancement is...	936 // Figure out what the next advancement is...

1108 for (int i = 0; i < kR11_EACBlockSz; ++i) {	937 for (int i = 0; i < kR11_EACBlockSz; ++i) {

1109 if (nextX[i] == finalX) {	938 if (nextX[i] == finalX) {

1110 const int16_t run = *(fBufferedRuns[i].fRuns);	939 const int16_t run = *(fBufferedRuns[i].fRuns);

1111 fBufferedRuns[i].fRuns += run;	940 fBufferedRuns[i].fRuns += run;

1112 fBufferedRuns[i].fAlphas += run;	941 fBufferedRuns[i].fAlphas += run;

1113 curAlpha[i] = *(fBufferedRuns[i].fAlphas);	942 curAlpha[i] = *(fBufferedRuns[i].fAlphas);

1114 nextX[i] += *(fBufferedRuns[i].fRuns);	943 nextX[i] += *(fBufferedRuns[i].fRuns);

1115 }	944 }

1116 }	945 }

1117	946

1118 finalX = 0xFFFFF;	947 finalX = 0xFFFFF;

1119 for (int i = 0; i < kR11_EACBlockSz; ++i) {	948 for (int i = 0; i < kR11_EACBlockSz; ++i) {

1120 finalX = SkMin32(nextX[i], finalX);	949 finalX = SkMin32(nextX[i], finalX);

1121 }	950 }

1122 }	951 }

1123	952

1124 // If we didn't land on a block boundary, output the block...	953 // If we didn't land on a block boundary, output the block...

1125 if ((curX & 3) > 1) {	954 if ((curX & 3) > 1) {

1126 *outPtr = compress_block_vertical(c1, c2, c3, c4);	955 *outPtr = compress_block_vertical(c[0], c[1], c[2], c[3]);

1127 }	956 }

1128	957

1129 fNextRun = 0;	958 fNextRun = 0;

1130 }	959 }

1131	960

	961 SkBlitter* CreateR11EACBlitter(int width, int height, void* outputBuffer) {

	962 return new R11_EACBlitter(width, height, outputBuffer);

	963 }

	964

1132 } // namespace SkTextureCompressor	965 } // namespace SkTextureCompressor

OLD	NEW

« src/utils/SkTextureCompressor.cpp ('K') | « src/utils/SkTextureCompressor_R11EAC.h ('k') | no next file » | no next file with comments »