OLD | NEW |
---|---|
1 /* | 1 /* |
2 * Copyright 2014 Google Inc. | 2 * Copyright 2014 Google Inc. |
3 * | 3 * |
4 * Use of this source code is governed by a BSD-style license that can be | 4 * Use of this source code is governed by a BSD-style license that can be |
5 * found in the LICENSE file. | 5 * found in the LICENSE file. |
6 */ | 6 */ |
7 | 7 |
8 #include "SkTextureCompressor_LATC.h" | 8 #include "SkTextureCompressor_LATC.h" |
9 #include "SkTextureCompressor_Blitter.h" | |
9 | 10 |
10 #include "SkEndian.h" | 11 #include "SkEndian.h" |
11 | 12 |
13 // Compression options. In general, the slow version is much more accurate, but | |
14 // much slower. The fast option is much faster, but much less accurate. YMMV. | |
15 #define COMPRESS_LATC_SLOW 0 | |
16 #define COMPRESS_LATC_FAST 1 | |
17 | |
18 //////////////////////////////////////////////////////////////////////////////// | |
19 | |
20 #if COMPRESS_LATC_SLOW | |
21 | |
12 //////////////////////////////////////////////////////////////////////////////// | 22 //////////////////////////////////////////////////////////////////////////////// |
13 // | 23 // |
14 // Utility Functions | 24 // Utility Functions |
15 // | 25 // |
16 //////////////////////////////////////////////////////////////////////////////// | 26 //////////////////////////////////////////////////////////////////////////////// |
17 | 27 |
18 // Absolute difference between two values. More correct than SkTAbs(a - b) | 28 // Absolute difference between two values. More correct than SkTAbs(a - b) |
19 // because it works on unsigned values. | 29 // because it works on unsigned values. |
20 template <typename T> inline T abs_diff(const T &a, const T &b) { | 30 template <typename T> inline T abs_diff(const T &a, const T &b) { |
21 return (a > b) ? (a - b) : (b - a); | 31 return (a > b) ? (a - b) : (b - a); |
(...skipping 249 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
271 // This is really just for correctness, in all of my tests we | 281 // This is really just for correctness, in all of my tests we |
272 // never take this step. We don't lose too much perf here because | 282 // never take this step. We don't lose too much perf here because |
273 // most of the processing in this function is worth it for the | 283 // most of the processing in this function is worth it for the |
274 // 1 == nUniquePixels optimization. | 284 // 1 == nUniquePixels optimization. |
275 return compress_latc_block_bb(pixels); | 285 return compress_latc_block_bb(pixels); |
276 } else { | 286 } else { |
277 return compress_latc_block_bb_ignore_extremal(pixels); | 287 return compress_latc_block_bb_ignore_extremal(pixels); |
278 } | 288 } |
279 } | 289 } |
280 | 290 |
291 #endif // COMPRESS_LATC_SLOW | |
292 | |
293 //////////////////////////////////////////////////////////////////////////////// | |
294 | |
295 #if COMPRESS_LATC_FAST | |
296 | |
297 // Take the top three indices of each int and pack them into the low 12 | |
298 // bits of the integer. | |
robertphillips
2014/07/29 20:52:49
rm 'register' in param - general consensus is that
krajcevski
2014/07/29 21:08:26
Done.
| |
299 static inline uint32_t convert_index(register uint32_t x) { | |
300 // Since the palette is | |
301 // 255, 0, 219, 182, 146, 109, 73, 36 | |
302 // we need to map the high three bits of each byte in the integer | |
303 // from | |
304 // 0 1 2 3 4 5 6 7 | |
305 // to | |
306 // 1 7 6 5 4 3 2 0 | |
307 // | |
308 // This first operation takes the mapping from | |
309 // 0 1 2 3 4 5 6 7 --> 7 6 5 4 3 2 1 0 | |
310 x = 0x07070707 - ((x >> 5) & 0x07070707); | |
311 | |
312 // mask is 1 if index is non-zero | |
313 const uint32_t mask = (x | (x >> 1) | (x >> 2)) & 0x01010101; | |
314 | |
315 // add mask: | |
316 // 7 6 5 4 3 2 1 0 --> 8 7 6 5 4 3 2 0 | |
317 x = (x + mask); | |
318 | |
319 // Handle overflow: | |
320 // 8 7 6 5 4 3 2 0 --> 9 7 6 5 4 3 2 0 | |
321 x |= (x >> 3) & 0x01010101; | |
322 | |
323 // Mask out high bits: | |
324 // 9 7 6 5 4 3 2 0 --> 1 7 6 5 4 3 2 0 | |
325 x &= 0x07070707; | |
326 | |
327 // Pack it in... | |
328 #if defined (SK_CPU_BENDIAN) | |
329 return | |
330 (x >> 24) | | |
331 ((x >> 13) & 0x38) | | |
332 ((x >> 2) & 0x1C0) | | |
333 ((x << 9) & 0xE00); | |
334 #else | |
335 return | |
336 (x & 0x7) | | |
337 ((x >> 5) & 0x38) | | |
338 ((x >> 10) & 0x1C0) | | |
339 ((x >> 15) & 0xE00); | |
340 #endif | |
341 } | |
342 | |
343 typedef uint64_t (*PackIndicesProc)(const uint8_t* alpha, int rowBytes); | |
344 template<PackIndicesProc packIndicesProc> | |
345 static void compress_a8_latc_block(uint8_t** dstPtr, const uint8_t* src, int row Bytes) { | |
346 *(reinterpret_cast<uint64_t*>(*dstPtr)) = | |
347 SkEndian_SwapLE64(0xFF | (packIndicesProc(src, rowBytes) << 16)); | |
348 *dstPtr += 8; | |
349 } | |
350 | |
351 inline uint64_t PackRowMajor(const uint8_t *indices, int rowBytes) { | |
352 uint64_t result = 0; | |
353 for (int i = 0; i < 4; ++i) { | |
354 const uint32_t idx = *(reinterpret_cast<const uint32_t*>(indices + i*row Bytes)); | |
355 result |= static_cast<uint64_t>(convert_index(idx)) << 12*i; | |
356 } | |
357 return result; | |
358 } | |
359 | |
360 inline uint64_t PackColumnMajor(const uint8_t *indices, int rowBytes) { | |
361 // !SPEED! Blarg, this is kind of annoying. SSE4 can make this | |
362 // a LOT faster. | |
363 uint8_t transposed[16]; | |
364 for (int i = 0; i < 4; ++i) { | |
365 for (int j = 0; j < 4; ++j) { | |
366 transposed[j*4+i] = indices[i*rowBytes + j]; | |
367 } | |
368 } | |
369 | |
370 return PackRowMajor(transposed, 4); | |
371 } | |
372 | |
373 static bool compress_4x4_a8_latc(uint8_t* dst, const uint8_t* src, | |
374 int width, int height, int rowBytes) { | |
375 | |
376 if (width < 0 || ((width % 4) != 0) || height < 0 || ((height % 4) != 0)) { | |
377 return false; | |
378 } | |
379 | |
380 uint8_t** dstPtr = &dst; | |
robertphillips
2014/07/29 20:52:49
spaces around +='s ?
krajcevski
2014/07/29 21:08:26
Done.
| |
381 for (int y = 0; y < height; y+=4) { | |
382 for (int x = 0; x < width; x+=4) { | |
383 compress_a8_latc_block<PackRowMajor>(dstPtr, src + y*rowBytes + x, r owBytes); | |
384 } | |
385 } | |
386 | |
387 return true; | |
388 } | |
389 | |
390 void CompressA8LATCBlockVertical(uint8_t* dst, const uint8_t block[]) { | |
391 compress_a8_latc_block<PackColumnMajor>(&dst, block, 4); | |
392 } | |
393 | |
394 #endif // COMPRESS_LATC_FAST | |
395 | |
281 //////////////////////////////////////////////////////////////////////////////// | 396 //////////////////////////////////////////////////////////////////////////////// |
282 | 397 |
283 namespace SkTextureCompressor { | 398 namespace SkTextureCompressor { |
284 | 399 |
285 bool CompressA8ToLATC(uint8_t* dst, const uint8_t* src, int width, int height, i nt rowBytes) { | 400 bool CompressA8ToLATC(uint8_t* dst, const uint8_t* src, int width, int height, i nt rowBytes) { |
401 #if COMPRESS_LATC_FAST | |
402 return compress_4x4_a8_latc(dst, src, width, height, rowBytes); | |
403 #elif COMPRESS_LATC_SLOW | |
286 return compress_4x4_a8_to_64bit(dst, src, width, height, rowBytes, compress_ latc_block); | 404 return compress_4x4_a8_to_64bit(dst, src, width, height, rowBytes, compress_ latc_block); |
405 #else | |
406 #error "Must choose either fast or slow LATC compression" | |
407 #endif | |
287 } | 408 } |
288 | 409 |
289 SkBlitter* CreateLATCBlitter(int width, int height, void* outputBuffer) { | 410 SkBlitter* CreateLATCBlitter(int width, int height, void* outputBuffer) { |
411 #if COMPRESS_LATC_FAST | |
412 return new | |
413 SkTCompressedAlphaBlitter<4, 8, CompressA8LATCBlockVertical> | |
414 (width, height, outputBuffer); | |
415 #elif COMPRESS_LATC_SLOW | |
290 // TODO (krajcevski) | 416 // TODO (krajcevski) |
291 return NULL; | 417 return NULL; |
418 #endif | |
292 } | 419 } |
293 | 420 |
294 } // SkTextureCompressor | 421 } // SkTextureCompressor |
OLD | NEW |