OLD | NEW |
1 /* | 1 /* |
2 * Copyright 2014 Google Inc. | 2 * Copyright 2014 Google Inc. |
3 * | 3 * |
4 * Use of this source code is governed by a BSD-style license that can be | 4 * Use of this source code is governed by a BSD-style license that can be |
5 * found in the LICENSE file. | 5 * found in the LICENSE file. |
6 */ | 6 */ |
7 | 7 |
8 #include "SkTextureCompressor.h" | 8 #include "SkTextureCompressor.h" |
| 9 #include "SkTextureCompressor_R11EAC.h" |
| 10 #include "SkTextureCompressor_LATC.h" |
9 | 11 |
10 #include "SkBitmap.h" | 12 #include "SkBitmap.h" |
11 #include "SkData.h" | 13 #include "SkData.h" |
12 #include "SkEndian.h" | 14 #include "SkEndian.h" |
13 | 15 |
14 #include "SkTextureCompression_opts.h" | 16 #include "SkTextureCompression_opts.h" |
15 | 17 |
16 //////////////////////////////////////////////////////////////////////////////// | 18 //////////////////////////////////////////////////////////////////////////////// |
17 // | |
18 // Utility Functions | |
19 // | |
20 //////////////////////////////////////////////////////////////////////////////// | |
21 | |
22 // Absolute difference between two values. More correct than SkTAbs(a - b) | |
23 // because it works on unsigned values. | |
24 template <typename T> inline T abs_diff(const T &a, const T &b) { | |
25 return (a > b) ? (a - b) : (b - a); | |
26 } | |
27 | |
28 static bool is_extremal(uint8_t pixel) { | |
29 return 0 == pixel || 255 == pixel; | |
30 } | |
31 | |
32 typedef uint64_t (*A84x4To64BitProc)(const uint8_t block[]); | |
33 | |
34 // This function is used by both R11 EAC and LATC to compress 4x4 blocks | |
35 // of 8-bit alpha into 64-bit values that comprise the compressed data. | |
36 // For both formats, we need to make sure that the dimensions of the | |
37 // src pixels are divisible by 4, and copy 4x4 blocks one at a time | |
38 // for compression. | |
39 static bool compress_4x4_a8_to_64bit(uint8_t* dst, const uint8_t* src, | |
40 int width, int height, int rowBytes, | |
41 A84x4To64BitProc proc) { | |
42 // Make sure that our data is well-formed enough to be considered for compre
ssion | |
43 if (0 == width || 0 == height || (width % 4) != 0 || (height % 4) != 0) { | |
44 return false; | |
45 } | |
46 | |
47 int blocksX = width >> 2; | |
48 int blocksY = height >> 2; | |
49 | |
50 uint8_t block[16]; | |
51 uint64_t* encPtr = reinterpret_cast<uint64_t*>(dst); | |
52 for (int y = 0; y < blocksY; ++y) { | |
53 for (int x = 0; x < blocksX; ++x) { | |
54 // Load block | |
55 for (int k = 0; k < 4; ++k) { | |
56 memcpy(block + k*4, src + k*rowBytes + 4*x, 4); | |
57 } | |
58 | |
59 // Compress it | |
60 *encPtr = proc(block); | |
61 ++encPtr; | |
62 } | |
63 src += 4 * rowBytes; | |
64 } | |
65 | |
66 return true; | |
67 } | |
68 | |
69 //////////////////////////////////////////////////////////////////////////////// | |
70 // | |
71 // LATC compressor | |
72 // | |
73 //////////////////////////////////////////////////////////////////////////////// | |
74 | |
75 // LATC compressed texels down into square 4x4 blocks | |
76 static const int kLATCPaletteSize = 8; | |
77 static const int kLATCBlockSize = 4; | |
78 static const int kLATCPixelsPerBlock = kLATCBlockSize * kLATCBlockSize; | |
79 | |
80 // Generates an LATC palette. LATC constructs | |
81 // a palette of eight colors from LUM0 and LUM1 using the algorithm: | |
82 // | |
83 // LUM0, if lum0 > lum1 and code(x,y) == 0 | |
84 // LUM1, if lum0 > lum1 and code(x,y) == 1 | |
85 // (6*LUM0+ LUM1)/7, if lum0 > lum1 and code(x,y) == 2 | |
86 // (5*LUM0+2*LUM1)/7, if lum0 > lum1 and code(x,y) == 3 | |
87 // (4*LUM0+3*LUM1)/7, if lum0 > lum1 and code(x,y) == 4 | |
88 // (3*LUM0+4*LUM1)/7, if lum0 > lum1 and code(x,y) == 5 | |
89 // (2*LUM0+5*LUM1)/7, if lum0 > lum1 and code(x,y) == 6 | |
90 // ( LUM0+6*LUM1)/7, if lum0 > lum1 and code(x,y) == 7 | |
91 // | |
92 // LUM0, if lum0 <= lum1 and code(x,y) == 0 | |
93 // LUM1, if lum0 <= lum1 and code(x,y) == 1 | |
94 // (4*LUM0+ LUM1)/5, if lum0 <= lum1 and code(x,y) == 2 | |
95 // (3*LUM0+2*LUM1)/5, if lum0 <= lum1 and code(x,y) == 3 | |
96 // (2*LUM0+3*LUM1)/5, if lum0 <= lum1 and code(x,y) == 4 | |
97 // ( LUM0+4*LUM1)/5, if lum0 <= lum1 and code(x,y) == 5 | |
98 // 0, if lum0 <= lum1 and code(x,y) == 6 | |
99 // 255, if lum0 <= lum1 and code(x,y) == 7 | |
100 | |
101 static void generate_latc_palette(uint8_t palette[], uint8_t lum0, uint8_t lum1)
{ | |
102 palette[0] = lum0; | |
103 palette[1] = lum1; | |
104 if (lum0 > lum1) { | |
105 for (int i = 1; i < 7; i++) { | |
106 palette[i+1] = ((7-i)*lum0 + i*lum1) / 7; | |
107 } | |
108 } else { | |
109 for (int i = 1; i < 5; i++) { | |
110 palette[i+1] = ((5-i)*lum0 + i*lum1) / 5; | |
111 } | |
112 palette[6] = 0; | |
113 palette[7] = 255; | |
114 } | |
115 } | |
116 | |
117 // Compress a block by using the bounding box of the pixels. It is assumed that | |
118 // there are no extremal pixels in this block otherwise we would have used | |
119 // compressBlockBBIgnoreExtremal. | |
120 static uint64_t compress_latc_block_bb(const uint8_t pixels[]) { | |
121 uint8_t minVal = 255; | |
122 uint8_t maxVal = 0; | |
123 for (int i = 0; i < kLATCPixelsPerBlock; ++i) { | |
124 minVal = SkTMin(pixels[i], minVal); | |
125 maxVal = SkTMax(pixels[i], maxVal); | |
126 } | |
127 | |
128 SkASSERT(!is_extremal(minVal)); | |
129 SkASSERT(!is_extremal(maxVal)); | |
130 | |
131 uint8_t palette[kLATCPaletteSize]; | |
132 generate_latc_palette(palette, maxVal, minVal); | |
133 | |
134 uint64_t indices = 0; | |
135 for (int i = kLATCPixelsPerBlock - 1; i >= 0; --i) { | |
136 | |
137 // Find the best palette index | |
138 uint8_t bestError = abs_diff(pixels[i], palette[0]); | |
139 uint8_t idx = 0; | |
140 for (int j = 1; j < kLATCPaletteSize; ++j) { | |
141 uint8_t error = abs_diff(pixels[i], palette[j]); | |
142 if (error < bestError) { | |
143 bestError = error; | |
144 idx = j; | |
145 } | |
146 } | |
147 | |
148 indices <<= 3; | |
149 indices |= idx; | |
150 } | |
151 | |
152 return | |
153 SkEndian_SwapLE64( | |
154 static_cast<uint64_t>(maxVal) | | |
155 (static_cast<uint64_t>(minVal) << 8) | | |
156 (indices << 16)); | |
157 } | |
158 | |
159 // Compress a block by using the bounding box of the pixels without taking into | |
160 // account the extremal values. The generated palette will contain extremal valu
es | |
161 // and fewer points along the line segment to interpolate. | |
162 static uint64_t compress_latc_block_bb_ignore_extremal(const uint8_t pixels[]) { | |
163 uint8_t minVal = 255; | |
164 uint8_t maxVal = 0; | |
165 for (int i = 0; i < kLATCPixelsPerBlock; ++i) { | |
166 if (is_extremal(pixels[i])) { | |
167 continue; | |
168 } | |
169 | |
170 minVal = SkTMin(pixels[i], minVal); | |
171 maxVal = SkTMax(pixels[i], maxVal); | |
172 } | |
173 | |
174 SkASSERT(!is_extremal(minVal)); | |
175 SkASSERT(!is_extremal(maxVal)); | |
176 | |
177 uint8_t palette[kLATCPaletteSize]; | |
178 generate_latc_palette(palette, minVal, maxVal); | |
179 | |
180 uint64_t indices = 0; | |
181 for (int i = kLATCPixelsPerBlock - 1; i >= 0; --i) { | |
182 | |
183 // Find the best palette index | |
184 uint8_t idx = 0; | |
185 if (is_extremal(pixels[i])) { | |
186 if (0xFF == pixels[i]) { | |
187 idx = 7; | |
188 } else if (0 == pixels[i]) { | |
189 idx = 6; | |
190 } else { | |
191 SkFAIL("Pixel is extremal but not really?!"); | |
192 } | |
193 } else { | |
194 uint8_t bestError = abs_diff(pixels[i], palette[0]); | |
195 for (int j = 1; j < kLATCPaletteSize - 2; ++j) { | |
196 uint8_t error = abs_diff(pixels[i], palette[j]); | |
197 if (error < bestError) { | |
198 bestError = error; | |
199 idx = j; | |
200 } | |
201 } | |
202 } | |
203 | |
204 indices <<= 3; | |
205 indices |= idx; | |
206 } | |
207 | |
208 return | |
209 SkEndian_SwapLE64( | |
210 static_cast<uint64_t>(minVal) | | |
211 (static_cast<uint64_t>(maxVal) << 8) | | |
212 (indices << 16)); | |
213 } | |
214 | |
215 | |
216 // Compress LATC block. Each 4x4 block of pixels is decompressed by LATC from tw
o | |
217 // values LUM0 and LUM1, and an index into the generated palette. Details of how | |
218 // the palette is generated can be found in the comments of generatePalette abov
e. | |
219 // | |
220 // We choose which palette type to use based on whether or not 'pixels' contains | |
221 // any extremal values (0 or 255). If there are extremal values, then we use the | |
222 // palette that has the extremal values built in. Otherwise, we use the full bou
nding | |
223 // box. | |
224 | |
225 static uint64_t compress_latc_block(const uint8_t pixels[]) { | |
226 // Collect unique pixels | |
227 int nUniquePixels = 0; | |
228 uint8_t uniquePixels[kLATCPixelsPerBlock]; | |
229 for (int i = 0; i < kLATCPixelsPerBlock; ++i) { | |
230 bool foundPixel = false; | |
231 for (int j = 0; j < nUniquePixels; ++j) { | |
232 foundPixel = foundPixel || uniquePixels[j] == pixels[i]; | |
233 } | |
234 | |
235 if (!foundPixel) { | |
236 uniquePixels[nUniquePixels] = pixels[i]; | |
237 ++nUniquePixels; | |
238 } | |
239 } | |
240 | |
241 // If there's only one unique pixel, then our compression is easy. | |
242 if (1 == nUniquePixels) { | |
243 return SkEndian_SwapLE64(pixels[0] | (pixels[0] << 8)); | |
244 | |
245 // Similarly, if there are only two unique pixels, then our compression is | |
246 // easy again: place the pixels in the block header, and assign the indices | |
247 // with one or zero depending on which pixel they belong to. | |
248 } else if (2 == nUniquePixels) { | |
249 uint64_t outBlock = 0; | |
250 for (int i = kLATCPixelsPerBlock - 1; i >= 0; --i) { | |
251 int idx = 0; | |
252 if (pixels[i] == uniquePixels[1]) { | |
253 idx = 1; | |
254 } | |
255 | |
256 outBlock <<= 3; | |
257 outBlock |= idx; | |
258 } | |
259 outBlock <<= 16; | |
260 outBlock |= (uniquePixels[0] | (uniquePixels[1] << 8)); | |
261 return SkEndian_SwapLE64(outBlock); | |
262 } | |
263 | |
264 // Count non-maximal pixel values | |
265 int nonExtremalPixels = 0; | |
266 for (int i = 0; i < nUniquePixels; ++i) { | |
267 if (!is_extremal(uniquePixels[i])) { | |
268 ++nonExtremalPixels; | |
269 } | |
270 } | |
271 | |
272 // If all the pixels are nonmaximal then compute the palette using | |
273 // the bounding box of all the pixels. | |
274 if (nonExtremalPixels == nUniquePixels) { | |
275 // This is really just for correctness, in all of my tests we | |
276 // never take this step. We don't lose too much perf here because | |
277 // most of the processing in this function is worth it for the | |
278 // 1 == nUniquePixels optimization. | |
279 return compress_latc_block_bb(pixels); | |
280 } else { | |
281 return compress_latc_block_bb_ignore_extremal(pixels); | |
282 } | |
283 } | |
284 | |
285 static inline bool compress_a8_to_latc(uint8_t* dst, const uint8_t* src, | |
286 int width, int height, int rowBytes) { | |
287 return compress_4x4_a8_to_64bit(dst, src, width, height, rowBytes, compress_
latc_block); | |
288 } | |
289 | |
290 //////////////////////////////////////////////////////////////////////////////// | |
291 // | |
292 // R11 EAC Compressor | |
293 // | |
294 //////////////////////////////////////////////////////////////////////////////// | |
295 | |
296 // #define COMPRESS_R11_EAC_SLOW 1 | |
297 // #define COMPRESS_R11_EAC_FAST 1 | |
298 #define COMPRESS_R11_EAC_FASTEST 1 | |
299 | |
300 // Blocks compressed into R11 EAC are represented as follows: | |
301 // 0000000000000000000000000000000000000000000000000000000000000000 | |
302 // |base_cw|mod|mul| ----------------- indices ------------------- | |
303 // | |
304 // To reconstruct the value of a given pixel, we use the formula: | |
305 // clamp[0, 2047](base_cw * 8 + 4 + mod_val*mul*8) | |
306 // | |
307 // mod_val is chosen from a palette of values based on the index of the | |
308 // given pixel. The palette is chosen by the value stored in mod. | |
309 // This formula returns a value between 0 and 2047, which is converted | |
310 // to a float from 0 to 1 in OpenGL. | |
311 // | |
312 // If mul is zero, then we set mul = 1/8, so that the formula becomes | |
313 // clamp[0, 2047](base_cw * 8 + 4 + mod_val) | |
314 | |
315 #if COMPRESS_R11_EAC_SLOW | |
316 | |
317 static const int kNumR11EACPalettes = 16; | |
318 static const int kR11EACPaletteSize = 8; | |
319 static const int kR11EACModifierPalettes[kNumR11EACPalettes][kR11EACPaletteSize]
= { | |
320 {-3, -6, -9, -15, 2, 5, 8, 14}, | |
321 {-3, -7, -10, -13, 2, 6, 9, 12}, | |
322 {-2, -5, -8, -13, 1, 4, 7, 12}, | |
323 {-2, -4, -6, -13, 1, 3, 5, 12}, | |
324 {-3, -6, -8, -12, 2, 5, 7, 11}, | |
325 {-3, -7, -9, -11, 2, 6, 8, 10}, | |
326 {-4, -7, -8, -11, 3, 6, 7, 10}, | |
327 {-3, -5, -8, -11, 2, 4, 7, 10}, | |
328 {-2, -6, -8, -10, 1, 5, 7, 9}, | |
329 {-2, -5, -8, -10, 1, 4, 7, 9}, | |
330 {-2, -4, -8, -10, 1, 3, 7, 9}, | |
331 {-2, -5, -7, -10, 1, 4, 6, 9}, | |
332 {-3, -4, -7, -10, 2, 3, 6, 9}, | |
333 {-1, -2, -3, -10, 0, 1, 2, 9}, | |
334 {-4, -6, -8, -9, 3, 5, 7, 8}, | |
335 {-3, -5, -7, -9, 2, 4, 6, 8} | |
336 }; | |
337 | |
338 // Pack the base codeword, palette, and multiplier into the 64 bits necessary | |
339 // to decode it. | |
340 static uint64_t pack_r11eac_block(uint16_t base_cw, uint16_t palette, uint16_t m
ultiplier, | |
341 uint64_t indices) { | |
342 SkASSERT(palette < 16); | |
343 SkASSERT(multiplier < 16); | |
344 SkASSERT(indices < (static_cast<uint64_t>(1) << 48)); | |
345 | |
346 const uint64_t b = static_cast<uint64_t>(base_cw) << 56; | |
347 const uint64_t m = static_cast<uint64_t>(multiplier) << 52; | |
348 const uint64_t p = static_cast<uint64_t>(palette) << 48; | |
349 return SkEndian_SwapBE64(b | m | p | indices); | |
350 } | |
351 | |
352 // Given a base codeword, a modifier, and a multiplier, compute the proper | |
353 // pixel value in the range [0, 2047]. | |
354 static uint16_t compute_r11eac_pixel(int base_cw, int modifier, int multiplier)
{ | |
355 int ret = (base_cw * 8 + 4) + (modifier * multiplier * 8); | |
356 return (ret > 2047)? 2047 : ((ret < 0)? 0 : ret); | |
357 } | |
358 | |
359 // Compress a block into R11 EAC format. | |
360 // The compression works as follows: | |
361 // 1. Find the center of the span of the block's values. Use this as the base co
deword. | |
362 // 2. Choose a multiplier based roughly on the size of the span of block values | |
363 // 3. Iterate through each palette and choose the one with the most accurate | |
364 // modifiers. | |
365 static inline uint64_t compress_heterogeneous_r11eac_block(const uint8_t block[1
6]) { | |
366 // Find the center of the data... | |
367 uint16_t bmin = block[0]; | |
368 uint16_t bmax = block[0]; | |
369 for (int i = 1; i < 16; ++i) { | |
370 bmin = SkTMin<uint16_t>(bmin, block[i]); | |
371 bmax = SkTMax<uint16_t>(bmax, block[i]); | |
372 } | |
373 | |
374 uint16_t center = (bmax + bmin) >> 1; | |
375 SkASSERT(center <= 255); | |
376 | |
377 // Based on the min and max, we can guesstimate a proper multiplier | |
378 // This is kind of a magic choice to start with. | |
379 uint16_t multiplier = (bmax - center) / 10; | |
380 | |
381 // Now convert the block to 11 bits and transpose it to match | |
382 // the proper layout | |
383 uint16_t cblock[16]; | |
384 for (int i = 0; i < 4; ++i) { | |
385 for (int j = 0; j < 4; ++j) { | |
386 int srcIdx = i*4+j; | |
387 int dstIdx = j*4+i; | |
388 cblock[dstIdx] = (block[srcIdx] << 3) | (block[srcIdx] >> 5); | |
389 } | |
390 } | |
391 | |
392 // Finally, choose the proper palette and indices | |
393 uint32_t bestError = 0xFFFFFFFF; | |
394 uint64_t bestIndices = 0; | |
395 uint16_t bestPalette = 0; | |
396 for (uint16_t paletteIdx = 0; paletteIdx < kNumR11EACPalettes; ++paletteIdx)
{ | |
397 const int *palette = kR11EACModifierPalettes[paletteIdx]; | |
398 | |
399 // Iterate through each pixel to find the best palette index | |
400 // and update the indices with the choice. Also store the error | |
401 // for this palette to be compared against the best error... | |
402 uint32_t error = 0; | |
403 uint64_t indices = 0; | |
404 for (int pixelIdx = 0; pixelIdx < 16; ++pixelIdx) { | |
405 const uint16_t pixel = cblock[pixelIdx]; | |
406 | |
407 // Iterate through each palette value to find the best index | |
408 // for this particular pixel for this particular palette. | |
409 uint16_t bestPixelError = | |
410 abs_diff(pixel, compute_r11eac_pixel(center, palette[0], multipl
ier)); | |
411 int bestIndex = 0; | |
412 for (int i = 1; i < kR11EACPaletteSize; ++i) { | |
413 const uint16_t p = compute_r11eac_pixel(center, palette[i], mult
iplier); | |
414 const uint16_t perror = abs_diff(pixel, p); | |
415 | |
416 // Is this index better? | |
417 if (perror < bestPixelError) { | |
418 bestIndex = i; | |
419 bestPixelError = perror; | |
420 } | |
421 } | |
422 | |
423 SkASSERT(bestIndex < 8); | |
424 | |
425 error += bestPixelError; | |
426 indices <<= 3; | |
427 indices |= bestIndex; | |
428 } | |
429 | |
430 SkASSERT(indices < (static_cast<uint64_t>(1) << 48)); | |
431 | |
432 // Is this palette better? | |
433 if (error < bestError) { | |
434 bestPalette = paletteIdx; | |
435 bestIndices = indices; | |
436 bestError = error; | |
437 } | |
438 } | |
439 | |
440 // Finally, pack everything together... | |
441 return pack_r11eac_block(center, bestPalette, multiplier, bestIndices); | |
442 } | |
443 #endif // COMPRESS_R11_EAC_SLOW | |
444 | |
445 #if COMPRESS_R11_EAC_FAST | |
446 // This function takes into account that most blocks that we compress have a gra
dation from | |
447 // fully opaque to fully transparent. The compression scheme works by selecting
the | |
448 // palette and multiplier that has the tightest fit to the 0-255 range. This is
encoded | |
449 // as the block header (0x8490). The indices are then selected by considering th
e top | |
450 // three bits of each alpha value. For alpha masks, this reduces the dynamic ran
ge from | |
451 // 17 to 8, but the quality is still acceptable. | |
452 // | |
453 // There are a few caveats that need to be taken care of... | |
454 // | |
455 // 1. The block is read in as scanlines, so the indices are stored as: | |
456 // 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 | |
457 // However, the decomrpession routine reads them in column-major order, so th
ey | |
458 // need to be packed as: | |
459 // 0 4 8 12 1 5 9 13 2 6 10 14 3 7 11 15 | |
460 // So when reading, they must be transposed. | |
461 // | |
462 // 2. We cannot use the top three bits as an index directly, since the R11 EAC p
alettes | |
463 // above store the modulation values first decreasing and then increasing: | |
464 // e.g. {-3, -6, -9, -15, 2, 5, 8, 14} | |
465 // Hence, we need to convert the indices with the following mapping: | |
466 // From: 0 1 2 3 4 5 6 7 | |
467 // To: 3 2 1 0 4 5 6 7 | |
468 static inline uint64_t compress_heterogeneous_r11eac_block(const uint8_t block[1
6]) { | |
469 uint64_t retVal = static_cast<uint64_t>(0x8490) << 48; | |
470 for(int i = 0; i < 4; ++i) { | |
471 for(int j = 0; j < 4; ++j) { | |
472 const int shift = 45-3*(j*4+i); | |
473 SkASSERT(shift <= 45); | |
474 const uint64_t idx = block[i*4+j] >> 5; | |
475 SkASSERT(idx < 8); | |
476 | |
477 // !SPEED! This is slightly faster than having an if-statement. | |
478 switch(idx) { | |
479 case 0: | |
480 case 1: | |
481 case 2: | |
482 case 3: | |
483 retVal |= (3-idx) << shift; | |
484 break; | |
485 default: | |
486 retVal |= idx << shift; | |
487 break; | |
488 } | |
489 } | |
490 } | |
491 | |
492 return SkEndian_SwapBE64(retVal); | |
493 } | |
494 #endif // COMPRESS_R11_EAC_FAST | |
495 | |
496 #if (COMPRESS_R11_EAC_SLOW) || (COMPRESS_R11_EAC_FAST) | |
497 static uint64_t compress_r11eac_block(const uint8_t block[16]) { | |
498 // Are all blocks a solid color? | |
499 bool solid = true; | |
500 for (int i = 1; i < 16; ++i) { | |
501 if (block[i] != block[0]) { | |
502 solid = false; | |
503 break; | |
504 } | |
505 } | |
506 | |
507 if (solid) { | |
508 switch(block[0]) { | |
509 // Fully transparent? We know the encoding... | |
510 case 0: | |
511 // (0x0020 << 48) produces the following: | |
512 // basw_cw: 0 | |
513 // mod: 0, palette: {-3, -6, -9, -15, 2, 5, 8, 14} | |
514 // multiplier: 2 | |
515 // mod_val: -3 | |
516 // | |
517 // this gives the following formula: | |
518 // clamp[0, 2047](0*8+4+(-3)*2*8) = 0 | |
519 // | |
520 // Furthermore, it is impervious to endianness: | |
521 // 0x0020000000002000ULL | |
522 // Will produce one pixel with index 2, which gives: | |
523 // clamp[0, 2047](0*8+4+(-9)*2*8) = 0 | |
524 return 0x0020000000002000ULL; | |
525 | |
526 // Fully opaque? We know this encoding too... | |
527 case 255: | |
528 | |
529 // -1 produces the following: | |
530 // basw_cw: 255 | |
531 // mod: 15, palette: {-3, -5, -7, -9, 2, 4, 6, 8} | |
532 // mod_val: 8 | |
533 // | |
534 // this gives the following formula: | |
535 // clamp[0, 2047](255*8+4+8*8*8) = clamp[0, 2047](2556) = 2047 | |
536 return 0xFFFFFFFFFFFFFFFFULL; | |
537 | |
538 default: | |
539 // !TODO! krajcevski: | |
540 // This will probably never happen, since we're using this forma
t | |
541 // primarily for compressing alpha maps. Usually the only | |
542 // non-fullly opaque or fully transparent blocks are not a solid | |
543 // intermediate color. If we notice that they are, then we can | |
544 // add another optimization... | |
545 break; | |
546 } | |
547 } | |
548 | |
549 return compress_heterogeneous_r11eac_block(block); | |
550 } | |
551 #endif // (COMPRESS_R11_EAC_SLOW) || (COMPRESS_R11_EAC_FAST) | |
552 | |
553 #if COMPRESS_R11_EAC_FASTEST | |
554 static inline uint64_t interleave6(uint64_t topRows, uint64_t bottomRows) { | |
555 // If our 3-bit block indices are laid out as: | |
556 // a b c d | |
557 // e f g h | |
558 // i j k l | |
559 // m n o p | |
560 // | |
561 // This function expects topRows and bottomRows to contain the first two row
s | |
562 // of indices interleaved in the least significant bits of a and b. In other
words... | |
563 // | |
564 // If the architecture is big endian, then topRows and bottomRows will conta
in the following: | |
565 // Bits 31-0: | |
566 // a: 00 a e 00 b f 00 c g 00 d h | |
567 // b: 00 i m 00 j n 00 k o 00 l p | |
568 // | |
569 // If the architecture is little endian, then topRows and bottomRows will co
ntain | |
570 // the following: | |
571 // Bits 31-0: | |
572 // a: 00 d h 00 c g 00 b f 00 a e | |
573 // b: 00 l p 00 k o 00 j n 00 i m | |
574 // | |
575 // This function returns a 48-bit packing of the form: | |
576 // a e i m b f j n c g k o d h l p | |
577 // | |
578 // !SPEED! this function might be even faster if certain SIMD intrinsics are | |
579 // used.. | |
580 | |
581 // For both architectures, we can figure out a packing of the bits by | |
582 // using a shuffle and a few shift-rotates... | |
583 uint64_t x = (static_cast<uint64_t>(topRows) << 32) | static_cast<uint64_t>(
bottomRows); | |
584 | |
585 // x: 00 a e 00 b f 00 c g 00 d h 00 i m 00 j n 00 k o 00 l p | |
586 | |
587 uint64_t t = (x ^ (x >> 10)) & 0x3FC0003FC00000ULL; | |
588 x = x ^ t ^ (t << 10); | |
589 | |
590 // x: b f 00 00 00 a e c g i m 00 00 00 d h j n 00 k o 00 l p | |
591 | |
592 x = (x | ((x << 52) & (0x3FULL << 52)) | ((x << 20) & (0x3FULL << 28))) >> 1
6; | |
593 | |
594 // x: 00 00 00 00 00 00 00 00 b f l p a e c g i m k o d h j n | |
595 | |
596 t = (x ^ (x >> 6)) & 0xFC0000ULL; | |
597 x = x ^ t ^ (t << 6); | |
598 | |
599 #if defined (SK_CPU_BENDIAN) | |
600 // x: 00 00 00 00 00 00 00 00 b f l p a e i m c g k o d h j n | |
601 | |
602 t = (x ^ (x >> 36)) & 0x3FULL; | |
603 x = x ^ t ^ (t << 36); | |
604 | |
605 // x: 00 00 00 00 00 00 00 00 b f j n a e i m c g k o d h l p | |
606 | |
607 t = (x ^ (x >> 12)) & 0xFFF000000ULL; | |
608 x = x ^ t ^ (t << 12); | |
609 | |
610 // x: 00 00 00 00 00 00 00 00 a e i m b f j n c g k o d h l p | |
611 return x; | |
612 #else | |
613 // If our CPU is little endian, then the above logic will | |
614 // produce the following indices: | |
615 // x: 00 00 00 00 00 00 00 00 c g i m d h l p b f j n a e k o | |
616 | |
617 t = (x ^ (x >> 36)) & 0xFC0ULL; | |
618 x = x ^ t ^ (t << 36); | |
619 | |
620 // x: 00 00 00 00 00 00 00 00 a e i m d h l p b f j n c g k o | |
621 | |
622 x = (x & (0xFFFULL << 36)) | ((x & 0xFFFFFFULL) << 12) | ((x >> 24) & 0xFFFU
LL); | |
623 | |
624 // x: 00 00 00 00 00 00 00 00 a e i m b f j n c g k o d h l p | |
625 | |
626 return x; | |
627 #endif | |
628 } | |
629 | |
630 // This function converts an integer containing four bytes of alpha | |
631 // values into an integer containing four bytes of indices into R11 EAC. | |
632 // Note, there needs to be a mapping of indices: | |
633 // 0 1 2 3 4 5 6 7 | |
634 // 3 2 1 0 4 5 6 7 | |
635 // | |
636 // To compute this, we first negate each byte, and then add three, which | |
637 // gives the mapping | |
638 // 3 2 1 0 -1 -2 -3 -4 | |
639 // | |
640 // Then we mask out the negative values, take their absolute value, and | |
641 // add three. | |
642 // | |
643 // Most of the voodoo in this function comes from Hacker's Delight, section 2-18 | |
644 static inline uint32_t convert_indices(uint32_t x) { | |
645 // Take the top three bits... | |
646 x = (x & 0xE0E0E0E0) >> 5; | |
647 | |
648 // Negate... | |
649 x = ~((0x80808080 - x) ^ 0x7F7F7F7F); | |
650 | |
651 // Add three | |
652 const uint32_t s = (x & 0x7F7F7F7F) + 0x03030303; | |
653 x = ((x ^ 0x03030303) & 0x80808080) ^ s; | |
654 | |
655 // Absolute value | |
656 const uint32_t a = x & 0x80808080; | |
657 const uint32_t b = a >> 7; | |
658 | |
659 // Aside: mask negatives (m is three if the byte was negative) | |
660 const uint32_t m = (a >> 6) | b; | |
661 | |
662 // .. continue absolute value | |
663 x = (x ^ ((a - b) | a)) + b; | |
664 | |
665 // Add three | |
666 return x + m; | |
667 } | |
668 | |
669 // This function follows the same basic procedure as compress_heterogeneous_r11e
ac_block | |
670 // above when COMPRESS_R11_EAC_FAST is defined, but it avoids a few loads/stores
and | |
671 // tries to optimize where it can using SIMD. | |
672 static uint64_t compress_r11eac_block_fast(const uint8_t* src, int rowBytes) { | |
673 // Store each row of alpha values in an integer | |
674 const uint32_t alphaRow1 = *(reinterpret_cast<const uint32_t*>(src)); | |
675 const uint32_t alphaRow2 = *(reinterpret_cast<const uint32_t*>(src + rowByte
s)); | |
676 const uint32_t alphaRow3 = *(reinterpret_cast<const uint32_t*>(src + 2*rowBy
tes)); | |
677 const uint32_t alphaRow4 = *(reinterpret_cast<const uint32_t*>(src + 3*rowBy
tes)); | |
678 | |
679 // Check for solid blocks. The explanations for these values | |
680 // can be found in the comments of compress_r11eac_block above | |
681 if (alphaRow1 == alphaRow2 && alphaRow1 == alphaRow3 && alphaRow1 == alphaRo
w4) { | |
682 if (0 == alphaRow1) { | |
683 // Fully transparent block | |
684 return 0x0020000000002000ULL; | |
685 } else if (0xFFFFFFFF == alphaRow1) { | |
686 // Fully opaque block | |
687 return 0xFFFFFFFFFFFFFFFFULL; | |
688 } | |
689 } | |
690 | |
691 // Convert each integer of alpha values into an integer of indices | |
692 const uint32_t indexRow1 = convert_indices(alphaRow1); | |
693 const uint32_t indexRow2 = convert_indices(alphaRow2); | |
694 const uint32_t indexRow3 = convert_indices(alphaRow3); | |
695 const uint32_t indexRow4 = convert_indices(alphaRow4); | |
696 | |
697 // Interleave the indices from the top two rows and bottom two rows | |
698 // prior to passing them to interleave6. Since each index is at most | |
699 // three bits, then each byte can hold two indices... The way that the | |
700 // compression scheme expects the packing allows us to efficiently pack | |
701 // the top two rows and bottom two rows. Interleaving each 6-bit sequence | |
702 // and tightly packing it into a uint64_t is a little trickier, which is | |
703 // taken care of in interleave6. | |
704 const uint32_t r1r2 = (indexRow1 << 3) | indexRow2; | |
705 const uint32_t r3r4 = (indexRow3 << 3) | indexRow4; | |
706 const uint64_t indices = interleave6(r1r2, r3r4); | |
707 | |
708 // Return the packed incdices in the least significant bits with the magic h
eader | |
709 return SkEndian_SwapBE64(0x8490000000000000ULL | indices); | |
710 } | |
711 | |
712 static bool compress_a8_to_r11eac_fast(uint8_t* dst, const uint8_t* src, | |
713 int width, int height, int rowBytes) { | |
714 // Make sure that our data is well-formed enough to be considered for compre
ssion | |
715 if (0 == width || 0 == height || (width % 4) != 0 || (height % 4) != 0) { | |
716 return false; | |
717 } | |
718 | |
719 const int blocksX = width >> 2; | |
720 const int blocksY = height >> 2; | |
721 | |
722 uint64_t* encPtr = reinterpret_cast<uint64_t*>(dst); | |
723 for (int y = 0; y < blocksY; ++y) { | |
724 for (int x = 0; x < blocksX; ++x) { | |
725 // Compress it | |
726 *encPtr = compress_r11eac_block_fast(src + 4*x, rowBytes); | |
727 ++encPtr; | |
728 } | |
729 src += 4 * rowBytes; | |
730 } | |
731 return true; | |
732 } | |
733 #endif // COMPRESS_R11_EAC_FASTEST | |
734 | |
735 // The R11 EAC format expects that indices are given in column-major order. Sinc
e | |
736 // we receive alpha values in raster order, this usually means that we have to u
se | |
737 // pack6 above to properly pack our indices. However, if our indices come from t
he | |
738 // blitter, then each integer will be a column of indices, and hence can be effi
ciently | |
739 // packed. This function takes the bottom three bits of each byte and places the
m in | |
740 // the least significant 12 bits of the resulting integer. | |
741 static inline uint32_t pack_indices_vertical(uint32_t x) { | |
742 #if defined (SK_CPU_BENDIAN) | |
743 return | |
744 (x & 7) | | |
745 ((x >> 5) & (7 << 3)) | | |
746 ((x >> 10) & (7 << 6)) | | |
747 ((x >> 15) & (7 << 9)); | |
748 #else | |
749 return | |
750 ((x >> 24) & 7) | | |
751 ((x >> 13) & (7 << 3)) | | |
752 ((x >> 2) & (7 << 6)) | | |
753 ((x << 9) & (7 << 9)); | |
754 #endif | |
755 } | |
756 | |
757 // This function returns the compressed format of a block given as four columns
of | |
758 // alpha values. Each column is assumed to be loaded from top to bottom, and hen
ce | |
759 // must first be converted to indices and then packed into the resulting 64-bit | |
760 // integer. | |
761 static inline uint64_t compress_block_vertical(const uint32_t alphaColumn0, | |
762 const uint32_t alphaColumn1, | |
763 const uint32_t alphaColumn2, | |
764 const uint32_t alphaColumn3) { | |
765 | |
766 if (alphaColumn0 == alphaColumn1 && | |
767 alphaColumn2 == alphaColumn3 && | |
768 alphaColumn0 == alphaColumn2) { | |
769 | |
770 if (0 == alphaColumn0) { | |
771 // Transparent | |
772 return 0x0020000000002000ULL; | |
773 } | |
774 else if (0xFFFFFFFF == alphaColumn0) { | |
775 // Opaque | |
776 return 0xFFFFFFFFFFFFFFFFULL; | |
777 } | |
778 } | |
779 | |
780 const uint32_t indexColumn0 = convert_indices(alphaColumn0); | |
781 const uint32_t indexColumn1 = convert_indices(alphaColumn1); | |
782 const uint32_t indexColumn2 = convert_indices(alphaColumn2); | |
783 const uint32_t indexColumn3 = convert_indices(alphaColumn3); | |
784 | |
785 const uint32_t packedIndexColumn0 = pack_indices_vertical(indexColumn0); | |
786 const uint32_t packedIndexColumn1 = pack_indices_vertical(indexColumn1); | |
787 const uint32_t packedIndexColumn2 = pack_indices_vertical(indexColumn2); | |
788 const uint32_t packedIndexColumn3 = pack_indices_vertical(indexColumn3); | |
789 | |
790 return SkEndian_SwapBE64(0x8490000000000000ULL | | |
791 (static_cast<uint64_t>(packedIndexColumn0) << 36) | | |
792 (static_cast<uint64_t>(packedIndexColumn1) << 24) | | |
793 static_cast<uint64_t>(packedIndexColumn2 << 12) | | |
794 static_cast<uint64_t>(packedIndexColumn3)); | |
795 | |
796 } | |
797 | |
798 static inline bool compress_a8_to_r11eac(uint8_t* dst, const uint8_t* src, | |
799 int width, int height, int rowBytes) { | |
800 #if (COMPRESS_R11_EAC_SLOW) || (COMPRESS_R11_EAC_FAST) | |
801 return compress_4x4_a8_to_64bit(dst, src, width, height, rowBytes, compress_
r11eac_block); | |
802 #elif COMPRESS_R11_EAC_FASTEST | |
803 return compress_a8_to_r11eac_fast(dst, src, width, height, rowBytes); | |
804 #else | |
805 #error "Must choose R11 EAC algorithm" | |
806 #endif | |
807 } | |
808 | |
809 // Updates the block whose columns are stored in blockColN. curAlphai is expecte
d | |
810 // to store, as an integer, the four alpha values that will be placed within eac
h | |
811 // of the columns in the range [col, col+colsLeft). | |
812 static inline void update_block_columns( | |
813 uint32_t* blockCol1, uint32_t* blockCol2, uint32_t* blockCol3, uint32_t* blo
ckCol4, | |
814 const int col, const int colsLeft, const uint32_t curAlphai) { | |
815 SkASSERT(NULL != blockCol1); | |
816 SkASSERT(NULL != blockCol2); | |
817 SkASSERT(NULL != blockCol3); | |
818 SkASSERT(NULL != blockCol4); | |
819 SkASSERT(col + colsLeft <= 4); | |
820 for (int i = col; i < (col + colsLeft); ++i) { | |
821 switch(i) { | |
822 case 0: | |
823 *blockCol1 = curAlphai; | |
824 break; | |
825 case 1: | |
826 *blockCol2 = curAlphai; | |
827 break; | |
828 case 2: | |
829 *blockCol3 = curAlphai; | |
830 break; | |
831 case 3: | |
832 *blockCol4 = curAlphai; | |
833 break; | |
834 } | |
835 } | |
836 } | |
837 | |
838 //////////////////////////////////////////////////////////////////////////////// | |
839 | 19 |
840 namespace SkTextureCompressor { | 20 namespace SkTextureCompressor { |
841 | 21 |
842 static inline size_t get_compressed_data_size(Format fmt, int width, int height)
{ | 22 int GetCompressedDataSize(Format fmt, int width, int height) { |
843 switch (fmt) { | 23 switch (fmt) { |
844 // These formats are 64 bits per 4x4 block. | 24 // These formats are 64 bits per 4x4 block. |
845 case kR11_EAC_Format: | 25 case kR11_EAC_Format: |
846 case kLATC_Format: | 26 case kLATC_Format: |
847 { | 27 { |
848 static const int kLATCEncodedBlockSize = 8; | 28 static const int kBlockDimension = 4; |
| 29 static const int kEncodedBlockSize = 8; |
849 | 30 |
850 const int blocksX = width / kLATCBlockSize; | 31 if(((width % kBlockDimension) == 0) && ((height % kBlockDimension) =
= 0)) { |
851 const int blocksY = height / kLATCBlockSize; | |
852 | 32 |
853 return blocksX * blocksY * kLATCEncodedBlockSize; | 33 const int blocksX = width / kBlockDimension; |
| 34 const int blocksY = height / kBlockDimension; |
| 35 |
| 36 return blocksX * blocksY * kEncodedBlockSize; |
| 37 } |
| 38 |
| 39 return -1; |
854 } | 40 } |
855 | 41 |
856 default: | 42 default: |
857 SkFAIL("Unknown compressed format!"); | 43 SkFAIL("Unknown compressed format!"); |
858 return 0; | 44 return -1; |
859 } | 45 } |
860 } | 46 } |
861 | 47 |
862 bool CompressBufferToFormat(uint8_t* dst, const uint8_t* src, SkColorType srcCol
orType, | 48 bool CompressBufferToFormat(uint8_t* dst, const uint8_t* src, SkColorType srcCol
orType, |
863 int width, int height, int rowBytes, Format format,
bool opt) { | 49 int width, int height, int rowBytes, Format format,
bool opt) { |
864 CompressionProc proc = NULL; | 50 CompressionProc proc = NULL; |
865 if (opt) { | 51 if (opt) { |
866 proc = SkTextureCompressorGetPlatformProc(srcColorType, format); | 52 proc = SkTextureCompressorGetPlatformProc(srcColorType, format); |
867 } | 53 } |
868 | 54 |
869 if (NULL == proc) { | 55 if (NULL == proc) { |
870 switch (srcColorType) { | 56 switch (srcColorType) { |
871 case kAlpha_8_SkColorType: | 57 case kAlpha_8_SkColorType: |
872 { | 58 { |
873 switch (format) { | 59 switch (format) { |
874 case kLATC_Format: | 60 case kLATC_Format: |
875 proc = compress_a8_to_latc; | 61 proc = CompressA8ToLATC; |
876 break; | 62 break; |
877 case kR11_EAC_Format: | 63 case kR11_EAC_Format: |
878 proc = compress_a8_to_r11eac; | 64 proc = CompressA8ToR11EAC; |
879 break; | 65 break; |
880 default: | 66 default: |
881 // Do nothing... | 67 // Do nothing... |
882 break; | 68 break; |
883 } | 69 } |
884 } | 70 } |
885 break; | 71 break; |
886 | 72 |
887 default: | 73 default: |
888 // Do nothing... | 74 // Do nothing... |
889 break; | 75 break; |
890 } | 76 } |
891 } | 77 } |
892 | 78 |
893 if (NULL != proc) { | 79 if (NULL != proc) { |
894 return proc(dst, src, width, height, rowBytes); | 80 return proc(dst, src, width, height, rowBytes); |
895 } | 81 } |
896 | 82 |
897 return false; | 83 return false; |
898 } | 84 } |
899 | 85 |
900 SkData *CompressBitmapToFormat(const SkBitmap &bitmap, Format format) { | 86 SkData *CompressBitmapToFormat(const SkBitmap &bitmap, Format format) { |
901 SkAutoLockPixels alp(bitmap); | 87 SkAutoLockPixels alp(bitmap); |
902 | 88 |
903 int compressedDataSize = get_compressed_data_size(format, bitmap.width(), bi
tmap.height()); | 89 int compressedDataSize = GetCompressedDataSize(format, bitmap.width(), bitma
p.height()); |
| 90 if (compressedDataSize < 0) { |
| 91 return NULL; |
| 92 } |
| 93 |
904 const uint8_t* src = reinterpret_cast<const uint8_t*>(bitmap.getPixels()); | 94 const uint8_t* src = reinterpret_cast<const uint8_t*>(bitmap.getPixels()); |
905 uint8_t* dst = reinterpret_cast<uint8_t*>(sk_malloc_throw(compressedDataSize
)); | 95 uint8_t* dst = reinterpret_cast<uint8_t*>(sk_malloc_throw(compressedDataSize
)); |
| 96 |
906 if (CompressBufferToFormat(dst, src, bitmap.colorType(), bitmap.width(), bit
map.height(), | 97 if (CompressBufferToFormat(dst, src, bitmap.colorType(), bitmap.width(), bit
map.height(), |
907 bitmap.rowBytes(), format)) { | 98 bitmap.rowBytes(), format)) { |
908 return SkData::NewFromMalloc(dst, compressedDataSize); | 99 return SkData::NewFromMalloc(dst, compressedDataSize); |
909 } | 100 } |
910 | 101 |
911 sk_free(dst); | 102 sk_free(dst); |
912 return NULL; | 103 return NULL; |
913 } | 104 } |
914 | 105 |
915 R11_EACBlitter::R11_EACBlitter(int width, int height, void *latcBuffer) | 106 SkBlitter* CreateBlitterForFormat(int width, int height, void* compressedBuffer,
Format format) { |
916 // 0x7FFE is one minus the largest positive 16-bit int. We use it for | 107 switch(format) { |
917 // debugging to make sure that we're properly setting the nextX distance | 108 case kLATC_Format: |
918 // in flushRuns(). | 109 return CreateLATCBlitter(width, height, compressedBuffer); |
919 : kLongestRun(0x7FFE), kZeroAlpha(0) | |
920 , fNextRun(0) | |
921 , fWidth(width) | |
922 , fHeight(height) | |
923 , fBuffer(reinterpret_cast<uint64_t*const>(latcBuffer)) | |
924 { | |
925 SkASSERT((width % kR11_EACBlockSz) == 0); | |
926 SkASSERT((height % kR11_EACBlockSz) == 0); | |
927 } | |
928 | 110 |
929 void R11_EACBlitter::blitAntiH(int x, int y, | 111 case kR11_EAC_Format: |
930 const SkAlpha* antialias, | 112 return CreateR11EACBlitter(width, height, compressedBuffer); |
931 const int16_t* runs) { | 113 |
932 // Make sure that the new row to blit is either the first | 114 default: |
933 // row that we're blitting, or it's exactly the next scan row | 115 return NULL; |
934 // since the last row that we blit. This is to ensure that when | |
935 // we go to flush the runs, that they are all the same four | |
936 // runs. | |
937 if (fNextRun > 0 && | |
938 ((x != fBufferedRuns[fNextRun-1].fX) || | |
939 (y-1 != fBufferedRuns[fNextRun-1].fY))) { | |
940 this->flushRuns(); | |
941 } | 116 } |
942 | 117 |
943 // Align the rows to a block boundary. If we receive rows that | 118 return NULL; |
944 // are not on a block boundary, then fill in the preceding runs | |
945 // with zeros. We do this by producing a single RLE that says | |
946 // that we have 0x7FFE pixels of zero (0x7FFE = 32766). | |
947 const int row = y & ~3; | |
948 while ((row + fNextRun) < y) { | |
949 fBufferedRuns[fNextRun].fAlphas = &kZeroAlpha; | |
950 fBufferedRuns[fNextRun].fRuns = &kLongestRun; | |
951 fBufferedRuns[fNextRun].fX = 0; | |
952 fBufferedRuns[fNextRun].fY = row + fNextRun; | |
953 ++fNextRun; | |
954 } | |
955 | |
956 // Make sure that our assumptions aren't violated... | |
957 SkASSERT(fNextRun == (y & 3)); | |
958 SkASSERT(fNextRun == 0 || fBufferedRuns[fNextRun - 1].fY < y); | |
959 | |
960 // Set the values of the next run | |
961 fBufferedRuns[fNextRun].fAlphas = antialias; | |
962 fBufferedRuns[fNextRun].fRuns = runs; | |
963 fBufferedRuns[fNextRun].fX = x; | |
964 fBufferedRuns[fNextRun].fY = y; | |
965 | |
966 // If we've output four scanlines in a row that don't violate our | |
967 // assumptions, then it's time to flush them... | |
968 if (4 == ++fNextRun) { | |
969 this->flushRuns(); | |
970 } | |
971 } | |
972 | |
973 void R11_EACBlitter::flushRuns() { | |
974 | |
975 // If we don't have any runs, then just return. | |
976 if (0 == fNextRun) { | |
977 return; | |
978 } | |
979 | |
980 #ifndef NDEBUG | |
981 // Make sure that if we have any runs, they all match | |
982 for (int i = 1; i < fNextRun; ++i) { | |
983 SkASSERT(fBufferedRuns[i].fY == fBufferedRuns[i-1].fY + 1); | |
984 SkASSERT(fBufferedRuns[i].fX == fBufferedRuns[i-1].fX); | |
985 } | |
986 #endif | |
987 | |
988 // If we dont have as many runs as we have rows, fill in the remaining | |
989 // runs with constant zeros. | |
990 for (int i = fNextRun; i < kR11_EACBlockSz; ++i) { | |
991 fBufferedRuns[i].fY = fBufferedRuns[0].fY + i; | |
992 fBufferedRuns[i].fX = fBufferedRuns[0].fX; | |
993 fBufferedRuns[i].fAlphas = &kZeroAlpha; | |
994 fBufferedRuns[i].fRuns = &kLongestRun; | |
995 } | |
996 | |
997 // Make sure that our assumptions aren't violated. | |
998 SkASSERT(fNextRun > 0 && fNextRun <= 4); | |
999 SkASSERT((fBufferedRuns[0].fY & 3) == 0); | |
1000 | |
1001 // The following logic walks four rows at a time and outputs compressed | |
1002 // blocks to the buffer passed into the constructor. | |
1003 // We do the following: | |
1004 // | |
1005 // c1 c2 c3 c4 | |
1006 // ----------------------------------------------------------------------- | |
1007 // ... | | | | | ----> fBufferedRuns[0] | |
1008 // ----------------------------------------------------------------------- | |
1009 // ... | | | | | ----> fBufferedRuns[1] | |
1010 // ----------------------------------------------------------------------- | |
1011 // ... | | | | | ----> fBufferedRuns[2] | |
1012 // ----------------------------------------------------------------------- | |
1013 // ... | | | | | ----> fBufferedRuns[3] | |
1014 // ----------------------------------------------------------------------- | |
1015 // | |
1016 // curX -- the macro X value that we've gotten to. | |
1017 // c1, c2, c3, c4 -- the integers that represent the columns of the current
block | |
1018 // that we're operating on | |
1019 // curAlphaColumn -- integer containing the column of alpha values from fBuf
feredRuns. | |
1020 // nextX -- for each run, the next point at which we need to update curAlpha
Column | |
1021 // after the value of curX. | |
1022 // finalX -- the minimum of all the nextX values. | |
1023 // | |
1024 // curX advances to finalX outputting any blocks that it passes along | |
1025 // the way. Since finalX will not change when we reach the end of a | |
1026 // run, the termination criteria will be whenever curX == finalX at the | |
1027 // end of a loop. | |
1028 | |
1029 // Setup: | |
1030 uint32_t c1 = 0; | |
1031 uint32_t c2 = 0; | |
1032 uint32_t c3 = 0; | |
1033 uint32_t c4 = 0; | |
1034 | |
1035 uint32_t curAlphaColumn = 0; | |
1036 SkAlpha *curAlpha = reinterpret_cast<SkAlpha*>(&curAlphaColumn); | |
1037 | |
1038 int nextX[kR11_EACBlockSz]; | |
1039 for (int i = 0; i < kR11_EACBlockSz; ++i) { | |
1040 nextX[i] = 0x7FFFFF; | |
1041 } | |
1042 | |
1043 uint64_t* outPtr = this->getBlock(fBufferedRuns[0].fX, fBufferedRuns[0].fY); | |
1044 | |
1045 // Populate the first set of runs and figure out how far we need to | |
1046 // advance on the first step | |
1047 int curX = 0; | |
1048 int finalX = 0xFFFFF; | |
1049 for (int i = 0; i < kR11_EACBlockSz; ++i) { | |
1050 nextX[i] = *(fBufferedRuns[i].fRuns); | |
1051 curAlpha[i] = *(fBufferedRuns[i].fAlphas); | |
1052 | |
1053 finalX = SkMin32(nextX[i], finalX); | |
1054 } | |
1055 | |
1056 // Make sure that we have a valid right-bound X value | |
1057 SkASSERT(finalX < 0xFFFFF); | |
1058 | |
1059 // Run the blitter... | |
1060 while (curX != finalX) { | |
1061 SkASSERT(finalX >= curX); | |
1062 | |
1063 // Do we need to populate the rest of the block? | |
1064 if ((finalX - (curX & ~3)) >= kR11_EACBlockSz) { | |
1065 const int col = curX & 3; | |
1066 const int colsLeft = 4 - col; | |
1067 SkASSERT(curX + colsLeft <= finalX); | |
1068 | |
1069 update_block_columns(&c1, &c2, &c3, &c4, col, colsLeft, curAlphaColu
mn); | |
1070 | |
1071 // Write this block | |
1072 *outPtr = compress_block_vertical(c1, c2, c3, c4); | |
1073 ++outPtr; | |
1074 curX += colsLeft; | |
1075 } | |
1076 | |
1077 // If we can advance even further, then just keep memsetting the block | |
1078 if ((finalX - curX) >= kR11_EACBlockSz) { | |
1079 SkASSERT((curX & 3) == 0); | |
1080 | |
1081 const int col = 0; | |
1082 const int colsLeft = kR11_EACBlockSz; | |
1083 | |
1084 update_block_columns(&c1, &c2, &c3, &c4, col, colsLeft, curAlphaColu
mn); | |
1085 | |
1086 // While we can keep advancing, just keep writing the block. | |
1087 uint64_t lastBlock = compress_block_vertical(c1, c2, c3, c4); | |
1088 while((finalX - curX) >= kR11_EACBlockSz) { | |
1089 *outPtr = lastBlock; | |
1090 ++outPtr; | |
1091 curX += kR11_EACBlockSz; | |
1092 } | |
1093 } | |
1094 | |
1095 // If we haven't advanced within the block then do so. | |
1096 if (curX < finalX) { | |
1097 const int col = curX & 3; | |
1098 const int colsLeft = finalX - curX; | |
1099 | |
1100 update_block_columns(&c1, &c2, &c3, &c4, col, colsLeft, curAlphaColu
mn); | |
1101 | |
1102 curX += colsLeft; | |
1103 } | |
1104 | |
1105 SkASSERT(curX == finalX); | |
1106 | |
1107 // Figure out what the next advancement is... | |
1108 for (int i = 0; i < kR11_EACBlockSz; ++i) { | |
1109 if (nextX[i] == finalX) { | |
1110 const int16_t run = *(fBufferedRuns[i].fRuns); | |
1111 fBufferedRuns[i].fRuns += run; | |
1112 fBufferedRuns[i].fAlphas += run; | |
1113 curAlpha[i] = *(fBufferedRuns[i].fAlphas); | |
1114 nextX[i] += *(fBufferedRuns[i].fRuns); | |
1115 } | |
1116 } | |
1117 | |
1118 finalX = 0xFFFFF; | |
1119 for (int i = 0; i < kR11_EACBlockSz; ++i) { | |
1120 finalX = SkMin32(nextX[i], finalX); | |
1121 } | |
1122 } | |
1123 | |
1124 // If we didn't land on a block boundary, output the block... | |
1125 if ((curX & 3) > 1) { | |
1126 *outPtr = compress_block_vertical(c1, c2, c3, c4); | |
1127 } | |
1128 | |
1129 fNextRun = 0; | |
1130 } | 119 } |
1131 | 120 |
1132 } // namespace SkTextureCompressor | 121 } // namespace SkTextureCompressor |
OLD | NEW |