Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(77)

Side by Side Diff: third_party/libwebp/dsp/lossless_enc.c

Issue 1546003002: libwebp: update to 0.5.0 (Closed) Base URL: https://chromium.googlesource.com/chromium/src.git@master
Patch Set: rebase Created 4 years, 11 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
« no previous file with comments | « third_party/libwebp/dsp/lossless.c ('k') | third_party/libwebp/dsp/lossless_enc_mips32.c » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 // Copyright 2012 Google Inc. All Rights Reserved. 1 // Copyright 2015 Google Inc. All Rights Reserved.
2 // 2 //
3 // Use of this source code is governed by a BSD-style license 3 // Use of this source code is governed by a BSD-style license
4 // that can be found in the COPYING file in the root of the source 4 // that can be found in the COPYING file in the root of the source
5 // tree. An additional intellectual property rights grant can be found 5 // tree. An additional intellectual property rights grant can be found
6 // in the file PATENTS. All contributing project authors may 6 // in the file PATENTS. All contributing project authors may
7 // be found in the AUTHORS file in the root of the source tree. 7 // be found in the AUTHORS file in the root of the source tree.
8 // ----------------------------------------------------------------------------- 8 // -----------------------------------------------------------------------------
9 // 9 //
10 // Image transforms and color space conversion methods for lossless decoder. 10 // Image transform methods for lossless encoder.
11 // 11 //
12 // Authors: Vikas Arora (vikaas.arora@gmail.com) 12 // Authors: Vikas Arora (vikaas.arora@gmail.com)
13 // Jyrki Alakuijala (jyrki@google.com) 13 // Jyrki Alakuijala (jyrki@google.com)
14 // Urvang Joshi (urvang@google.com) 14 // Urvang Joshi (urvang@google.com)
15 15
16 #include "./dsp.h" 16 #include "./dsp.h"
17 17
18 #include <math.h> 18 #include <math.h>
19 #include <stdlib.h> 19 #include <stdlib.h>
20 #include "../dec/vp8li.h" 20 #include "../dec/vp8li.h"
21 #include "../utils/endian_inl.h" 21 #include "../utils/endian_inl.h"
22 #include "./lossless.h" 22 #include "./lossless.h"
23 #include "./yuv.h" 23 #include "./yuv.h"
24 24
25 #define MAX_DIFF_COST (1e30f) 25 #define MAX_DIFF_COST (1e30f)
26 26
27 static const int kPredLowEffort = 11;
28 static const uint32_t kMaskAlpha = 0xff000000;
29
27 // lookup table for small values of log2(int) 30 // lookup table for small values of log2(int)
28 const float kLog2Table[LOG_LOOKUP_IDX_MAX] = { 31 const float kLog2Table[LOG_LOOKUP_IDX_MAX] = {
29 0.0000000000000000f, 0.0000000000000000f, 32 0.0000000000000000f, 0.0000000000000000f,
30 1.0000000000000000f, 1.5849625007211560f, 33 1.0000000000000000f, 1.5849625007211560f,
31 2.0000000000000000f, 2.3219280948873621f, 34 2.0000000000000000f, 2.3219280948873621f,
32 2.5849625007211560f, 2.8073549220576041f, 35 2.5849625007211560f, 2.8073549220576041f,
33 3.0000000000000000f, 3.1699250014423121f, 36 3.0000000000000000f, 3.1699250014423121f,
34 3.3219280948873621f, 3.4594316186372973f, 37 3.3219280948873621f, 3.4594316186372973f,
35 3.5849625007211560f, 3.7004397181410921f, 38 3.5849625007211560f, 3.7004397181410921f,
36 3.8073549220576041f, 3.9068905956085187f, 39 3.8073549220576041f, 3.9068905956085187f,
(...skipping 282 matching lines...) Expand 10 before | Expand all | Expand 10 after
319 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 322 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
320 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 323 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,
321 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 324 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47,
322 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 325 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63,
323 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 326 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79,
324 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 327 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95,
325 96, 97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, 328 96, 97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111,
326 112, 113, 114, 115, 116, 117, 118, 119, 120, 121, 122, 123, 124, 125, 126 329 112, 113, 114, 115, 116, 117, 118, 119, 120, 121, 122, 123, 124, 125, 126
327 }; 330 };
328 331
329 // The threshold till approximate version of log_2 can be used.
330 // Practically, we can get rid of the call to log() as the two values match to
331 // very high degree (the ratio of these two is 0.99999x).
332 // Keeping a high threshold for now.
333 #define APPROX_LOG_WITH_CORRECTION_MAX 65536
334 #define APPROX_LOG_MAX 4096
335 #define LOG_2_RECIPROCAL 1.44269504088896338700465094007086
336 static float FastSLog2Slow(uint32_t v) { 332 static float FastSLog2Slow(uint32_t v) {
337 assert(v >= LOG_LOOKUP_IDX_MAX); 333 assert(v >= LOG_LOOKUP_IDX_MAX);
338 if (v < APPROX_LOG_WITH_CORRECTION_MAX) { 334 if (v < APPROX_LOG_WITH_CORRECTION_MAX) {
339 int log_cnt = 0; 335 int log_cnt = 0;
340 uint32_t y = 1; 336 uint32_t y = 1;
341 int correction = 0; 337 int correction = 0;
342 const float v_f = (float)v; 338 const float v_f = (float)v;
343 const uint32_t orig_v = v; 339 const uint32_t orig_v = v;
344 do { 340 do {
345 ++log_cnt; 341 ++log_cnt;
(...skipping 31 matching lines...) Expand 10 before | Expand all | Expand 10 after
377 // for large values of 'v'. 373 // for large values of 'v'.
378 const int correction = (23 * (orig_v & (y - 1))) >> 4; 374 const int correction = (23 * (orig_v & (y - 1))) >> 4;
379 log_2 += (double)correction / orig_v; 375 log_2 += (double)correction / orig_v;
380 } 376 }
381 return (float)log_2; 377 return (float)log_2;
382 } else { 378 } else {
383 return (float)(LOG_2_RECIPROCAL * log((double)v)); 379 return (float)(LOG_2_RECIPROCAL * log((double)v));
384 } 380 }
385 } 381 }
386 382
387 //------------------------------------------------------------------------------
388 // Image transforms.
389
390 // Mostly used to reduce code size + readability 383 // Mostly used to reduce code size + readability
391 static WEBP_INLINE int GetMin(int a, int b) { return (a > b) ? b : a; } 384 static WEBP_INLINE int GetMin(int a, int b) { return (a > b) ? b : a; }
392 385
393 // In-place sum of each component with mod 256.
394 static WEBP_INLINE void AddPixelsEq(uint32_t* a, uint32_t b) {
395 const uint32_t alpha_and_green = (*a & 0xff00ff00u) + (b & 0xff00ff00u);
396 const uint32_t red_and_blue = (*a & 0x00ff00ffu) + (b & 0x00ff00ffu);
397 *a = (alpha_and_green & 0xff00ff00u) | (red_and_blue & 0x00ff00ffu);
398 }
399
400 static WEBP_INLINE uint32_t Average2(uint32_t a0, uint32_t a1) {
401 return (((a0 ^ a1) & 0xfefefefeL) >> 1) + (a0 & a1);
402 }
403
404 static WEBP_INLINE uint32_t Average3(uint32_t a0, uint32_t a1, uint32_t a2) {
405 return Average2(Average2(a0, a2), a1);
406 }
407
408 static WEBP_INLINE uint32_t Average4(uint32_t a0, uint32_t a1,
409 uint32_t a2, uint32_t a3) {
410 return Average2(Average2(a0, a1), Average2(a2, a3));
411 }
412
413 static WEBP_INLINE uint32_t Clip255(uint32_t a) {
414 if (a < 256) {
415 return a;
416 }
417 // return 0, when a is a negative integer.
418 // return 255, when a is positive.
419 return ~a >> 24;
420 }
421
422 static WEBP_INLINE int AddSubtractComponentFull(int a, int b, int c) {
423 return Clip255(a + b - c);
424 }
425
426 static WEBP_INLINE uint32_t ClampedAddSubtractFull(uint32_t c0, uint32_t c1,
427 uint32_t c2) {
428 const int a = AddSubtractComponentFull(c0 >> 24, c1 >> 24, c2 >> 24);
429 const int r = AddSubtractComponentFull((c0 >> 16) & 0xff,
430 (c1 >> 16) & 0xff,
431 (c2 >> 16) & 0xff);
432 const int g = AddSubtractComponentFull((c0 >> 8) & 0xff,
433 (c1 >> 8) & 0xff,
434 (c2 >> 8) & 0xff);
435 const int b = AddSubtractComponentFull(c0 & 0xff, c1 & 0xff, c2 & 0xff);
436 return ((uint32_t)a << 24) | (r << 16) | (g << 8) | b;
437 }
438
439 static WEBP_INLINE int AddSubtractComponentHalf(int a, int b) {
440 return Clip255(a + (a - b) / 2);
441 }
442
443 static WEBP_INLINE uint32_t ClampedAddSubtractHalf(uint32_t c0, uint32_t c1,
444 uint32_t c2) {
445 const uint32_t ave = Average2(c0, c1);
446 const int a = AddSubtractComponentHalf(ave >> 24, c2 >> 24);
447 const int r = AddSubtractComponentHalf((ave >> 16) & 0xff, (c2 >> 16) & 0xff);
448 const int g = AddSubtractComponentHalf((ave >> 8) & 0xff, (c2 >> 8) & 0xff);
449 const int b = AddSubtractComponentHalf((ave >> 0) & 0xff, (c2 >> 0) & 0xff);
450 return ((uint32_t)a << 24) | (r << 16) | (g << 8) | b;
451 }
452
453 // gcc-4.9 on ARM generates incorrect code in Select() when Sub3() is inlined.
454 #if defined(__arm__) && LOCAL_GCC_VERSION == 0x409
455 # define LOCAL_INLINE __attribute__ ((noinline))
456 #else
457 # define LOCAL_INLINE WEBP_INLINE
458 #endif
459
460 static LOCAL_INLINE int Sub3(int a, int b, int c) {
461 const int pb = b - c;
462 const int pa = a - c;
463 return abs(pb) - abs(pa);
464 }
465
466 #undef LOCAL_INLINE
467
468 static WEBP_INLINE uint32_t Select(uint32_t a, uint32_t b, uint32_t c) {
469 const int pa_minus_pb =
470 Sub3((a >> 24) , (b >> 24) , (c >> 24) ) +
471 Sub3((a >> 16) & 0xff, (b >> 16) & 0xff, (c >> 16) & 0xff) +
472 Sub3((a >> 8) & 0xff, (b >> 8) & 0xff, (c >> 8) & 0xff) +
473 Sub3((a ) & 0xff, (b ) & 0xff, (c ) & 0xff);
474 return (pa_minus_pb <= 0) ? a : b;
475 }
476
477 //------------------------------------------------------------------------------ 386 //------------------------------------------------------------------------------
478 // Predictors 387 // Methods to calculate Entropy (Shannon).
479
480 static uint32_t Predictor0(uint32_t left, const uint32_t* const top) {
481 (void)top;
482 (void)left;
483 return ARGB_BLACK;
484 }
485 static uint32_t Predictor1(uint32_t left, const uint32_t* const top) {
486 (void)top;
487 return left;
488 }
489 static uint32_t Predictor2(uint32_t left, const uint32_t* const top) {
490 (void)left;
491 return top[0];
492 }
493 static uint32_t Predictor3(uint32_t left, const uint32_t* const top) {
494 (void)left;
495 return top[1];
496 }
497 static uint32_t Predictor4(uint32_t left, const uint32_t* const top) {
498 (void)left;
499 return top[-1];
500 }
501 static uint32_t Predictor5(uint32_t left, const uint32_t* const top) {
502 const uint32_t pred = Average3(left, top[0], top[1]);
503 return pred;
504 }
505 static uint32_t Predictor6(uint32_t left, const uint32_t* const top) {
506 const uint32_t pred = Average2(left, top[-1]);
507 return pred;
508 }
509 static uint32_t Predictor7(uint32_t left, const uint32_t* const top) {
510 const uint32_t pred = Average2(left, top[0]);
511 return pred;
512 }
513 static uint32_t Predictor8(uint32_t left, const uint32_t* const top) {
514 const uint32_t pred = Average2(top[-1], top[0]);
515 (void)left;
516 return pred;
517 }
518 static uint32_t Predictor9(uint32_t left, const uint32_t* const top) {
519 const uint32_t pred = Average2(top[0], top[1]);
520 (void)left;
521 return pred;
522 }
523 static uint32_t Predictor10(uint32_t left, const uint32_t* const top) {
524 const uint32_t pred = Average4(left, top[-1], top[0], top[1]);
525 return pred;
526 }
527 static uint32_t Predictor11(uint32_t left, const uint32_t* const top) {
528 const uint32_t pred = Select(top[0], left, top[-1]);
529 return pred;
530 }
531 static uint32_t Predictor12(uint32_t left, const uint32_t* const top) {
532 const uint32_t pred = ClampedAddSubtractFull(left, top[0], top[-1]);
533 return pred;
534 }
535 static uint32_t Predictor13(uint32_t left, const uint32_t* const top) {
536 const uint32_t pred = ClampedAddSubtractHalf(left, top[0], top[-1]);
537 return pred;
538 }
539
540 static const VP8LPredictorFunc kPredictorsC[16] = {
541 Predictor0, Predictor1, Predictor2, Predictor3,
542 Predictor4, Predictor5, Predictor6, Predictor7,
543 Predictor8, Predictor9, Predictor10, Predictor11,
544 Predictor12, Predictor13,
545 Predictor0, Predictor0 // <- padding security sentinels
546 };
547 388
548 static float PredictionCostSpatial(const int counts[256], int weight_0, 389 static float PredictionCostSpatial(const int counts[256], int weight_0,
549 double exp_val) { 390 double exp_val) {
550 const int significant_symbols = 256 >> 4; 391 const int significant_symbols = 256 >> 4;
551 const double exp_decay_factor = 0.6; 392 const double exp_decay_factor = 0.6;
552 double bits = weight_0 * counts[0]; 393 double bits = weight_0 * counts[0];
553 int i; 394 int i;
554 for (i = 1; i < significant_symbols; ++i) { 395 for (i = 1; i < significant_symbols; ++i) {
555 bits += exp_val * (counts[i] + counts[256 - i]); 396 bits += exp_val * (counts[i] + counts[256 - i]);
556 exp_val *= exp_decay_factor; 397 exp_val *= exp_decay_factor;
557 } 398 }
558 return (float)(-0.1 * bits); 399 return (float)(-0.1 * bits);
559 } 400 }
560 401
561 // Compute the combined Shanon's entropy for distribution {X} and {X+Y} 402 // Compute the combined Shanon's entropy for distribution {X} and {X+Y}
562 static float CombinedShannonEntropy(const int X[256], const int Y[256]) { 403 static float CombinedShannonEntropy(const int X[256], const int Y[256]) {
563 int i; 404 int i;
564 double retval = 0.; 405 double retval = 0.;
565 int sumX = 0, sumXY = 0; 406 int sumX = 0, sumXY = 0;
566 for (i = 0; i < 256; ++i) { 407 for (i = 0; i < 256; ++i) {
567 const int x = X[i]; 408 const int x = X[i];
568 const int xy = x + Y[i];
569 if (x != 0) { 409 if (x != 0) {
410 const int xy = x + Y[i];
570 sumX += x; 411 sumX += x;
571 retval -= VP8LFastSLog2(x); 412 retval -= VP8LFastSLog2(x);
572 sumXY += xy; 413 sumXY += xy;
573 retval -= VP8LFastSLog2(xy); 414 retval -= VP8LFastSLog2(xy);
574 } else if (xy != 0) { 415 } else if (Y[i] != 0) {
575 sumXY += xy; 416 sumXY += Y[i];
576 retval -= VP8LFastSLog2(xy); 417 retval -= VP8LFastSLog2(Y[i]);
577 } 418 }
578 } 419 }
579 retval += VP8LFastSLog2(sumX) + VP8LFastSLog2(sumXY); 420 retval += VP8LFastSLog2(sumX) + VP8LFastSLog2(sumXY);
580 return (float)retval; 421 return (float)retval;
581 } 422 }
582 423
583 static float PredictionCostSpatialHistogram(const int accumulated[4][256], 424 static float PredictionCostSpatialHistogram(const int accumulated[4][256],
584 const int tile[4][256]) { 425 const int tile[4][256]) {
585 int i; 426 int i;
586 double retval = 0; 427 double retval = 0;
587 for (i = 0; i < 4; ++i) { 428 for (i = 0; i < 4; ++i) {
588 const double kExpValue = 0.94; 429 const double kExpValue = 0.94;
589 retval += PredictionCostSpatial(tile[i], 1, kExpValue); 430 retval += PredictionCostSpatial(tile[i], 1, kExpValue);
590 retval += CombinedShannonEntropy(tile[i], accumulated[i]); 431 retval += VP8LCombinedShannonEntropy(tile[i], accumulated[i]);
591 } 432 }
592 return (float)retval; 433 return (float)retval;
593 } 434 }
594 435
436 void VP8LBitEntropyInit(VP8LBitEntropy* const entropy) {
437 entropy->entropy = 0.;
438 entropy->sum = 0;
439 entropy->nonzeros = 0;
440 entropy->max_val = 0;
441 entropy->nonzero_code = VP8L_NON_TRIVIAL_SYM;
442 }
443
444 void VP8LBitsEntropyUnrefined(const uint32_t* const array, int n,
445 VP8LBitEntropy* const entropy) {
446 int i;
447
448 VP8LBitEntropyInit(entropy);
449
450 for (i = 0; i < n; ++i) {
451 if (array[i] != 0) {
452 entropy->sum += array[i];
453 entropy->nonzero_code = i;
454 ++entropy->nonzeros;
455 entropy->entropy -= VP8LFastSLog2(array[i]);
456 if (entropy->max_val < array[i]) {
457 entropy->max_val = array[i];
458 }
459 }
460 }
461 entropy->entropy += VP8LFastSLog2(entropy->sum);
462 }
463
464 static WEBP_INLINE void GetEntropyUnrefinedHelper(
465 uint32_t val, int i, uint32_t* const val_prev, int* const i_prev,
466 VP8LBitEntropy* const bit_entropy, VP8LStreaks* const stats) {
467 const int streak = i - *i_prev;
468
469 // Gather info for the bit entropy.
470 if (*val_prev != 0) {
471 bit_entropy->sum += (*val_prev) * streak;
472 bit_entropy->nonzeros += streak;
473 bit_entropy->nonzero_code = *i_prev;
474 bit_entropy->entropy -= VP8LFastSLog2(*val_prev) * streak;
475 if (bit_entropy->max_val < *val_prev) {
476 bit_entropy->max_val = *val_prev;
477 }
478 }
479
480 // Gather info for the Huffman cost.
481 stats->counts[*val_prev != 0] += (streak > 3);
482 stats->streaks[*val_prev != 0][(streak > 3)] += streak;
483
484 *val_prev = val;
485 *i_prev = i;
486 }
487
488 void VP8LGetEntropyUnrefined(const uint32_t* const X, int length,
489 VP8LBitEntropy* const bit_entropy,
490 VP8LStreaks* const stats) {
491 int i;
492 int i_prev = 0;
493 uint32_t x_prev = X[0];
494
495 memset(stats, 0, sizeof(*stats));
496 VP8LBitEntropyInit(bit_entropy);
497
498 for (i = 1; i < length; ++i) {
499 const uint32_t x = X[i];
500 if (x != x_prev) {
501 VP8LGetEntropyUnrefinedHelper(x, i, &x_prev, &i_prev, bit_entropy, stats);
502 }
503 }
504 VP8LGetEntropyUnrefinedHelper(0, i, &x_prev, &i_prev, bit_entropy, stats);
505
506 bit_entropy->entropy += VP8LFastSLog2(bit_entropy->sum);
507 }
508
509 void VP8LGetCombinedEntropyUnrefined(const uint32_t* const X,
510 const uint32_t* const Y, int length,
511 VP8LBitEntropy* const bit_entropy,
512 VP8LStreaks* const stats) {
513 int i = 1;
514 int i_prev = 0;
515 uint32_t xy_prev = X[0] + Y[0];
516
517 memset(stats, 0, sizeof(*stats));
518 VP8LBitEntropyInit(bit_entropy);
519
520 for (i = 1; i < length; ++i) {
521 const uint32_t xy = X[i] + Y[i];
522 if (xy != xy_prev) {
523 VP8LGetEntropyUnrefinedHelper(xy, i, &xy_prev, &i_prev, bit_entropy,
524 stats);
525 }
526 }
527 VP8LGetEntropyUnrefinedHelper(0, i, &xy_prev, &i_prev, bit_entropy, stats);
528
529 bit_entropy->entropy += VP8LFastSLog2(bit_entropy->sum);
530 }
531
595 static WEBP_INLINE void UpdateHisto(int histo_argb[4][256], uint32_t argb) { 532 static WEBP_INLINE void UpdateHisto(int histo_argb[4][256], uint32_t argb) {
596 ++histo_argb[0][argb >> 24]; 533 ++histo_argb[0][argb >> 24];
597 ++histo_argb[1][(argb >> 16) & 0xff]; 534 ++histo_argb[1][(argb >> 16) & 0xff];
598 ++histo_argb[2][(argb >> 8) & 0xff]; 535 ++histo_argb[2][(argb >> 8) & 0xff];
599 ++histo_argb[3][argb & 0xff]; 536 ++histo_argb[3][argb & 0xff];
600 } 537 }
601 538
539 //------------------------------------------------------------------------------
540
541 static WEBP_INLINE uint32_t Predict(VP8LPredictorFunc pred_func,
542 int x, int y,
543 const uint32_t* current_row,
544 const uint32_t* upper_row) {
545 if (y == 0) {
546 return (x == 0) ? ARGB_BLACK : current_row[x - 1]; // Left.
547 } else if (x == 0) {
548 return upper_row[x]; // Top.
549 } else {
550 return pred_func(current_row[x - 1], upper_row + x);
551 }
552 }
553
554 // Returns best predictor and updates the accumulated histogram.
602 static int GetBestPredictorForTile(int width, int height, 555 static int GetBestPredictorForTile(int width, int height,
603 int tile_x, int tile_y, int bits, 556 int tile_x, int tile_y, int bits,
604 const int accumulated[4][256], 557 int accumulated[4][256],
605 const uint32_t* const argb_scratch) { 558 const uint32_t* const argb_scratch,
559 int exact) {
606 const int kNumPredModes = 14; 560 const int kNumPredModes = 14;
607 const int col_start = tile_x << bits; 561 const int col_start = tile_x << bits;
608 const int row_start = tile_y << bits; 562 const int row_start = tile_y << bits;
609 const int tile_size = 1 << bits; 563 const int tile_size = 1 << bits;
610 const int max_y = GetMin(tile_size, height - row_start); 564 const int max_y = GetMin(tile_size, height - row_start);
611 const int max_x = GetMin(tile_size, width - col_start); 565 const int max_x = GetMin(tile_size, width - col_start);
612 float best_diff = MAX_DIFF_COST; 566 float best_diff = MAX_DIFF_COST;
613 int best_mode = 0; 567 int best_mode = 0;
614 int mode; 568 int mode;
569 int histo_stack_1[4][256];
570 int histo_stack_2[4][256];
571 // Need pointers to be able to swap arrays.
572 int (*histo_argb)[256] = histo_stack_1;
573 int (*best_histo)[256] = histo_stack_2;
574
575 int i, j;
615 for (mode = 0; mode < kNumPredModes; ++mode) { 576 for (mode = 0; mode < kNumPredModes; ++mode) {
616 const uint32_t* current_row = argb_scratch; 577 const uint32_t* current_row = argb_scratch;
617 const VP8LPredictorFunc pred_func = VP8LPredictors[mode]; 578 const VP8LPredictorFunc pred_func = VP8LPredictors[mode];
618 float cur_diff; 579 float cur_diff;
619 int y; 580 int y;
620 int histo_argb[4][256]; 581 memset(histo_argb, 0, sizeof(histo_stack_1));
621 memset(histo_argb, 0, sizeof(histo_argb));
622 for (y = 0; y < max_y; ++y) { 582 for (y = 0; y < max_y; ++y) {
623 int x; 583 int x;
624 const int row = row_start + y; 584 const int row = row_start + y;
625 const uint32_t* const upper_row = current_row; 585 const uint32_t* const upper_row = current_row;
626 current_row = upper_row + width; 586 current_row = upper_row + width;
627 for (x = 0; x < max_x; ++x) { 587 for (x = 0; x < max_x; ++x) {
628 const int col = col_start + x; 588 const int col = col_start + x;
629 uint32_t predict; 589 const uint32_t predict =
630 if (row == 0) { 590 Predict(pred_func, col, row, current_row, upper_row);
631 predict = (col == 0) ? ARGB_BLACK : current_row[col - 1]; // Left. 591 uint32_t residual = VP8LSubPixels(current_row[col], predict);
632 } else if (col == 0) { 592 if (!exact && (current_row[col] & kMaskAlpha) == 0) {
633 predict = upper_row[col]; // Top. 593 residual &= kMaskAlpha; // See CopyTileWithPrediction.
634 } else {
635 predict = pred_func(current_row[col - 1], upper_row + col);
636 } 594 }
637 UpdateHisto(histo_argb, VP8LSubPixels(current_row[col], predict)); 595 UpdateHisto(histo_argb, residual);
638 } 596 }
639 } 597 }
640 cur_diff = PredictionCostSpatialHistogram( 598 cur_diff = PredictionCostSpatialHistogram(
641 accumulated, (const int (*)[256])histo_argb); 599 (const int (*)[256])accumulated, (const int (*)[256])histo_argb);
642 if (cur_diff < best_diff) { 600 if (cur_diff < best_diff) {
601 int (*tmp)[256] = histo_argb;
602 histo_argb = best_histo;
603 best_histo = tmp;
643 best_diff = cur_diff; 604 best_diff = cur_diff;
644 best_mode = mode; 605 best_mode = mode;
645 } 606 }
646 } 607 }
647 608
609 for (i = 0; i < 4; i++) {
610 for (j = 0; j < 256; j++) {
611 accumulated[i][j] += best_histo[i][j];
612 }
613 }
614
648 return best_mode; 615 return best_mode;
649 } 616 }
650 617
651 static void CopyTileWithPrediction(int width, int height, 618 static void CopyImageWithPrediction(int width, int height,
652 int tile_x, int tile_y, int bits, int mode, 619 int bits, uint32_t* const modes,
653 const uint32_t* const argb_scratch, 620 uint32_t* const argb_scratch,
654 uint32_t* const argb) { 621 uint32_t* const argb,
655 const int col_start = tile_x << bits; 622 int low_effort, int exact) {
656 const int row_start = tile_y << bits; 623 const int tiles_per_row = VP8LSubSampleSize(width, bits);
657 const int tile_size = 1 << bits; 624 const int mask = (1 << bits) - 1;
658 const int max_y = GetMin(tile_size, height - row_start); 625 // The row size is one pixel longer to allow the top right pixel to point to
659 const int max_x = GetMin(tile_size, width - col_start); 626 // the leftmost pixel of the next row when at the right edge.
660 const VP8LPredictorFunc pred_func = VP8LPredictors[mode]; 627 uint32_t* current_row = argb_scratch;
661 const uint32_t* current_row = argb_scratch; 628 uint32_t* upper_row = argb_scratch + width + 1;
629 int y;
630 VP8LPredictorFunc pred_func =
631 low_effort ? VP8LPredictors[kPredLowEffort] : NULL;
662 632
663 int y; 633 for (y = 0; y < height; ++y) {
664 for (y = 0; y < max_y; ++y) {
665 int x; 634 int x;
666 const int row = row_start + y; 635 uint32_t* tmp = upper_row;
667 const uint32_t* const upper_row = current_row; 636 upper_row = current_row;
668 current_row = upper_row + width; 637 current_row = tmp;
669 for (x = 0; x < max_x; ++x) { 638 memcpy(current_row, argb + y * width, sizeof(*current_row) * width);
670 const int col = col_start + x; 639 current_row[width] = (y + 1 < height) ? argb[(y + 1) * width] : ARGB_BLACK;
671 const int pix = row * width + col; 640
672 uint32_t predict; 641 if (low_effort) {
673 if (row == 0) { 642 for (x = 0; x < width; ++x) {
674 predict = (col == 0) ? ARGB_BLACK : current_row[col - 1]; // Left. 643 const uint32_t predict =
675 } else if (col == 0) { 644 Predict(pred_func, x, y, current_row, upper_row);
676 predict = upper_row[col]; // Top. 645 argb[y * width + x] = VP8LSubPixels(current_row[x], predict);
677 } else {
678 predict = pred_func(current_row[col - 1], upper_row + col);
679 } 646 }
680 argb[pix] = VP8LSubPixels(current_row[col], predict); 647 } else {
648 for (x = 0; x < width; ++x) {
649 uint32_t predict, residual;
650 if ((x & mask) == 0) {
651 const int mode =
652 (modes[(y >> bits) * tiles_per_row + (x >> bits)] >> 8) & 0xff;
653 pred_func = VP8LPredictors[mode];
654 }
655 predict = Predict(pred_func, x, y, current_row, upper_row);
656 residual = VP8LSubPixels(current_row[x], predict);
657 if (!exact && (current_row[x] & kMaskAlpha) == 0) {
658 // If alpha is 0, cleanup RGB. We can choose the RGB values of the
659 // residual for best compression. The prediction of alpha itself can
660 // be non-zero and must be kept though. We choose RGB of the residual
661 // to be 0.
662 residual &= kMaskAlpha;
663 // Update input image so that next predictions use correct RGB value.
664 current_row[x] = predict & ~kMaskAlpha;
665 if (x == 0 && y != 0) upper_row[width] = current_row[x];
666 }
667 argb[y * width + x] = residual;
668 }
681 } 669 }
682 } 670 }
683 } 671 }
684 672
685 void VP8LResidualImage(int width, int height, int bits, 673 void VP8LResidualImage(int width, int height, int bits, int low_effort,
686 uint32_t* const argb, uint32_t* const argb_scratch, 674 uint32_t* const argb, uint32_t* const argb_scratch,
687 uint32_t* const image) { 675 uint32_t* const image, int exact) {
688 const int max_tile_size = 1 << bits; 676 const int max_tile_size = 1 << bits;
689 const int tiles_per_row = VP8LSubSampleSize(width, bits); 677 const int tiles_per_row = VP8LSubSampleSize(width, bits);
690 const int tiles_per_col = VP8LSubSampleSize(height, bits); 678 const int tiles_per_col = VP8LSubSampleSize(height, bits);
691 uint32_t* const upper_row = argb_scratch; 679 uint32_t* const upper_row = argb_scratch;
692 uint32_t* const current_tile_rows = argb_scratch + width; 680 uint32_t* const current_tile_rows = argb_scratch + width;
693 int tile_y; 681 int tile_y;
694 int histo[4][256]; 682 int histo[4][256];
695 memset(histo, 0, sizeof(histo)); 683 if (low_effort) {
696 for (tile_y = 0; tile_y < tiles_per_col; ++tile_y) { 684 int i;
697 const int tile_y_offset = tile_y * max_tile_size; 685 for (i = 0; i < tiles_per_row * tiles_per_col; ++i) {
698 const int this_tile_height = 686 image[i] = ARGB_BLACK | (kPredLowEffort << 8);
699 (tile_y < tiles_per_col - 1) ? max_tile_size : height - tile_y_offset;
700 int tile_x;
701 if (tile_y > 0) {
702 memcpy(upper_row, current_tile_rows + (max_tile_size - 1) * width,
703 width * sizeof(*upper_row));
704 } 687 }
705 memcpy(current_tile_rows, &argb[tile_y_offset * width], 688 } else {
706 this_tile_height * width * sizeof(*current_tile_rows)); 689 memset(histo, 0, sizeof(histo));
707 for (tile_x = 0; tile_x < tiles_per_row; ++tile_x) { 690 for (tile_y = 0; tile_y < tiles_per_col; ++tile_y) {
708 int pred; 691 const int tile_y_offset = tile_y * max_tile_size;
709 int y; 692 const int this_tile_height =
710 const int tile_x_offset = tile_x * max_tile_size; 693 (tile_y < tiles_per_col - 1) ? max_tile_size : height - tile_y_offset;
711 int all_x_max = tile_x_offset + max_tile_size; 694 int tile_x;
712 if (all_x_max > width) { 695 if (tile_y > 0) {
713 all_x_max = width; 696 memcpy(upper_row, current_tile_rows + (max_tile_size - 1) * width,
697 width * sizeof(*upper_row));
714 } 698 }
715 pred = GetBestPredictorForTile(width, height, tile_x, tile_y, bits, 699 memcpy(current_tile_rows, &argb[tile_y_offset * width],
716 (const int (*)[256])histo, 700 this_tile_height * width * sizeof(*current_tile_rows));
717 argb_scratch); 701 for (tile_x = 0; tile_x < tiles_per_row; ++tile_x) {
718 image[tile_y * tiles_per_row + tile_x] = 0xff000000u | (pred << 8); 702 const int pred = GetBestPredictorForTile(width, height, tile_x, tile_y,
719 CopyTileWithPrediction(width, height, tile_x, tile_y, bits, pred, 703 bits, (int (*)[256])histo, argb_scratch, exact);
720 argb_scratch, argb); 704 image[tile_y * tiles_per_row + tile_x] = ARGB_BLACK | (pred << 8);
721 for (y = 0; y < max_tile_size; ++y) {
722 int ix;
723 int all_x;
724 int all_y = tile_y_offset + y;
725 if (all_y >= height) {
726 break;
727 }
728 ix = all_y * width + tile_x_offset;
729 for (all_x = tile_x_offset; all_x < all_x_max; ++all_x, ++ix) {
730 UpdateHisto(histo, argb[ix]);
731 }
732 } 705 }
733 } 706 }
734 } 707 }
735 }
736 708
737 // Inverse prediction. 709 CopyImageWithPrediction(width, height, bits,
738 static void PredictorInverseTransform(const VP8LTransform* const transform, 710 image, argb_scratch, argb, low_effort, exact);
739 int y_start, int y_end, uint32_t* data) {
740 const int width = transform->xsize_;
741 if (y_start == 0) { // First Row follows the L (mode=1) mode.
742 int x;
743 const uint32_t pred0 = Predictor0(data[-1], NULL);
744 AddPixelsEq(data, pred0);
745 for (x = 1; x < width; ++x) {
746 const uint32_t pred1 = Predictor1(data[x - 1], NULL);
747 AddPixelsEq(data + x, pred1);
748 }
749 data += width;
750 ++y_start;
751 }
752
753 {
754 int y = y_start;
755 const int tile_width = 1 << transform->bits_;
756 const int mask = tile_width - 1;
757 const int safe_width = width & ~mask;
758 const int tiles_per_row = VP8LSubSampleSize(width, transform->bits_);
759 const uint32_t* pred_mode_base =
760 transform->data_ + (y >> transform->bits_) * tiles_per_row;
761
762 while (y < y_end) {
763 const uint32_t pred2 = Predictor2(data[-1], data - width);
764 const uint32_t* pred_mode_src = pred_mode_base;
765 VP8LPredictorFunc pred_func;
766 int x = 1;
767 int t = 1;
768 // First pixel follows the T (mode=2) mode.
769 AddPixelsEq(data, pred2);
770 // .. the rest:
771 while (x < safe_width) {
772 pred_func = VP8LPredictors[((*pred_mode_src++) >> 8) & 0xf];
773 for (; t < tile_width; ++t, ++x) {
774 const uint32_t pred = pred_func(data[x - 1], data + x - width);
775 AddPixelsEq(data + x, pred);
776 }
777 t = 0;
778 }
779 if (x < width) {
780 pred_func = VP8LPredictors[((*pred_mode_src++) >> 8) & 0xf];
781 for (; x < width; ++x) {
782 const uint32_t pred = pred_func(data[x - 1], data + x - width);
783 AddPixelsEq(data + x, pred);
784 }
785 }
786 data += width;
787 ++y;
788 if ((y & mask) == 0) { // Use the same mask, since tiles are squares.
789 pred_mode_base += tiles_per_row;
790 }
791 }
792 }
793 } 711 }
794 712
795 void VP8LSubtractGreenFromBlueAndRed_C(uint32_t* argb_data, int num_pixels) { 713 void VP8LSubtractGreenFromBlueAndRed_C(uint32_t* argb_data, int num_pixels) {
796 int i; 714 int i;
797 for (i = 0; i < num_pixels; ++i) { 715 for (i = 0; i < num_pixels; ++i) {
798 const uint32_t argb = argb_data[i]; 716 const uint32_t argb = argb_data[i];
799 const uint32_t green = (argb >> 8) & 0xff; 717 const uint32_t green = (argb >> 8) & 0xff;
800 const uint32_t new_r = (((argb >> 16) & 0xff) - green) & 0xff; 718 const uint32_t new_r = (((argb >> 16) & 0xff) - green) & 0xff;
801 const uint32_t new_b = ((argb & 0xff) - green) & 0xff; 719 const uint32_t new_b = ((argb & 0xff) - green) & 0xff;
802 argb_data[i] = (argb & 0xff00ff00) | (new_r << 16) | new_b; 720 argb_data[i] = (argb & 0xff00ff00) | (new_r << 16) | new_b;
803 } 721 }
804 } 722 }
805 723
806 // Add green to blue and red channels (i.e. perform the inverse transform of
807 // 'subtract green').
808 void VP8LAddGreenToBlueAndRed_C(uint32_t* data, int num_pixels) {
809 int i;
810 for (i = 0; i < num_pixels; ++i) {
811 const uint32_t argb = data[i];
812 const uint32_t green = ((argb >> 8) & 0xff);
813 uint32_t red_blue = (argb & 0x00ff00ffu);
814 red_blue += (green << 16) | green;
815 red_blue &= 0x00ff00ffu;
816 data[i] = (argb & 0xff00ff00u) | red_blue;
817 }
818 }
819
820 static WEBP_INLINE void MultipliersClear(VP8LMultipliers* const m) { 724 static WEBP_INLINE void MultipliersClear(VP8LMultipliers* const m) {
821 m->green_to_red_ = 0; 725 m->green_to_red_ = 0;
822 m->green_to_blue_ = 0; 726 m->green_to_blue_ = 0;
823 m->red_to_blue_ = 0; 727 m->red_to_blue_ = 0;
824 } 728 }
825 729
826 static WEBP_INLINE uint32_t ColorTransformDelta(int8_t color_pred, 730 static WEBP_INLINE uint32_t ColorTransformDelta(int8_t color_pred,
827 int8_t color) { 731 int8_t color) {
828 return (uint32_t)((int)(color_pred) * color) >> 5; 732 return (uint32_t)((int)(color_pred) * color) >> 5;
829 } 733 }
(...skipping 24 matching lines...) Expand all
854 uint32_t new_blue = argb; 758 uint32_t new_blue = argb;
855 new_red -= ColorTransformDelta(m->green_to_red_, green); 759 new_red -= ColorTransformDelta(m->green_to_red_, green);
856 new_red &= 0xff; 760 new_red &= 0xff;
857 new_blue -= ColorTransformDelta(m->green_to_blue_, green); 761 new_blue -= ColorTransformDelta(m->green_to_blue_, green);
858 new_blue -= ColorTransformDelta(m->red_to_blue_, red); 762 new_blue -= ColorTransformDelta(m->red_to_blue_, red);
859 new_blue &= 0xff; 763 new_blue &= 0xff;
860 data[i] = (argb & 0xff00ff00u) | (new_red << 16) | (new_blue); 764 data[i] = (argb & 0xff00ff00u) | (new_red << 16) | (new_blue);
861 } 765 }
862 } 766 }
863 767
864 void VP8LTransformColorInverse_C(const VP8LMultipliers* const m, uint32_t* data,
865 int num_pixels) {
866 int i;
867 for (i = 0; i < num_pixels; ++i) {
868 const uint32_t argb = data[i];
869 const uint32_t green = argb >> 8;
870 const uint32_t red = argb >> 16;
871 uint32_t new_red = red;
872 uint32_t new_blue = argb;
873 new_red += ColorTransformDelta(m->green_to_red_, green);
874 new_red &= 0xff;
875 new_blue += ColorTransformDelta(m->green_to_blue_, green);
876 new_blue += ColorTransformDelta(m->red_to_blue_, new_red);
877 new_blue &= 0xff;
878 data[i] = (argb & 0xff00ff00u) | (new_red << 16) | (new_blue);
879 }
880 }
881
882 static WEBP_INLINE uint8_t TransformColorRed(uint8_t green_to_red, 768 static WEBP_INLINE uint8_t TransformColorRed(uint8_t green_to_red,
883 uint32_t argb) { 769 uint32_t argb) {
884 const uint32_t green = argb >> 8; 770 const uint32_t green = argb >> 8;
885 uint32_t new_red = argb >> 16; 771 uint32_t new_red = argb >> 16;
886 new_red -= ColorTransformDelta(green_to_red, green); 772 new_red -= ColorTransformDelta(green_to_red, green);
887 return (new_red & 0xff); 773 return (new_red & 0xff);
888 } 774 }
889 775
890 static WEBP_INLINE uint8_t TransformColorBlue(uint8_t green_to_blue, 776 static WEBP_INLINE uint8_t TransformColorBlue(uint8_t green_to_blue,
891 uint8_t red_to_blue, 777 uint8_t red_to_blue,
892 uint32_t argb) { 778 uint32_t argb) {
893 const uint32_t green = argb >> 8; 779 const uint32_t green = argb >> 8;
894 const uint32_t red = argb >> 16; 780 const uint32_t red = argb >> 16;
895 uint8_t new_blue = argb; 781 uint8_t new_blue = argb;
896 new_blue -= ColorTransformDelta(green_to_blue, green); 782 new_blue -= ColorTransformDelta(green_to_blue, green);
897 new_blue -= ColorTransformDelta(red_to_blue, red); 783 new_blue -= ColorTransformDelta(red_to_blue, red);
898 return (new_blue & 0xff); 784 return (new_blue & 0xff);
899 } 785 }
900 786
901 static float PredictionCostCrossColor(const int accumulated[256], 787 static float PredictionCostCrossColor(const int accumulated[256],
902 const int counts[256]) { 788 const int counts[256]) {
903 // Favor low entropy, locally and globally. 789 // Favor low entropy, locally and globally.
904 // Favor small absolute values for PredictionCostSpatial 790 // Favor small absolute values for PredictionCostSpatial
905 static const double kExpValue = 2.4; 791 static const double kExpValue = 2.4;
906 return CombinedShannonEntropy(counts, accumulated) + 792 return VP8LCombinedShannonEntropy(counts, accumulated) +
907 PredictionCostSpatial(counts, 3, kExpValue); 793 PredictionCostSpatial(counts, 3, kExpValue);
908 } 794 }
909 795
796 void VP8LCollectColorRedTransforms_C(const uint32_t* argb, int stride,
797 int tile_width, int tile_height,
798 int green_to_red, int histo[]) {
799 while (tile_height-- > 0) {
800 int x;
801 for (x = 0; x < tile_width; ++x) {
802 ++histo[TransformColorRed(green_to_red, argb[x])];
803 }
804 argb += stride;
805 }
806 }
807
910 static float GetPredictionCostCrossColorRed( 808 static float GetPredictionCostCrossColorRed(
911 int tile_x_offset, int tile_y_offset, int all_x_max, int all_y_max, 809 const uint32_t* argb, int stride, int tile_width, int tile_height,
912 int xsize, VP8LMultipliers prev_x, VP8LMultipliers prev_y, int green_to_red, 810 VP8LMultipliers prev_x, VP8LMultipliers prev_y, int green_to_red,
913 const int accumulated_red_histo[256], const uint32_t* const argb) { 811 const int accumulated_red_histo[256]) {
914 int all_y;
915 int histo[256] = { 0 }; 812 int histo[256] = { 0 };
916 float cur_diff; 813 float cur_diff;
917 for (all_y = tile_y_offset; all_y < all_y_max; ++all_y) { 814
918 int ix = all_y * xsize + tile_x_offset; 815 VP8LCollectColorRedTransforms(argb, stride, tile_width, tile_height,
919 int all_x; 816 green_to_red, histo);
920 for (all_x = tile_x_offset; all_x < all_x_max; ++all_x, ++ix) { 817
921 ++histo[TransformColorRed(green_to_red, argb[ix])]; // red.
922 }
923 }
924 cur_diff = PredictionCostCrossColor(accumulated_red_histo, histo); 818 cur_diff = PredictionCostCrossColor(accumulated_red_histo, histo);
925 if ((uint8_t)green_to_red == prev_x.green_to_red_) { 819 if ((uint8_t)green_to_red == prev_x.green_to_red_) {
926 cur_diff -= 3; // favor keeping the areas locally similar 820 cur_diff -= 3; // favor keeping the areas locally similar
927 } 821 }
928 if ((uint8_t)green_to_red == prev_y.green_to_red_) { 822 if ((uint8_t)green_to_red == prev_y.green_to_red_) {
929 cur_diff -= 3; // favor keeping the areas locally similar 823 cur_diff -= 3; // favor keeping the areas locally similar
930 } 824 }
931 if (green_to_red == 0) { 825 if (green_to_red == 0) {
932 cur_diff -= 3; 826 cur_diff -= 3;
933 } 827 }
934 return cur_diff; 828 return cur_diff;
935 } 829 }
936 830
937 static void GetBestGreenToRed( 831 static void GetBestGreenToRed(
938 int tile_x_offset, int tile_y_offset, int all_x_max, int all_y_max, 832 const uint32_t* argb, int stride, int tile_width, int tile_height,
939 int xsize, VP8LMultipliers prev_x, VP8LMultipliers prev_y, 833 VP8LMultipliers prev_x, VP8LMultipliers prev_y, int quality,
940 const int accumulated_red_histo[256], const uint32_t* const argb, 834 const int accumulated_red_histo[256], VP8LMultipliers* const best_tx) {
941 VP8LMultipliers* const best_tx) { 835 const int kMaxIters = 4 + ((7 * quality) >> 8); // in range [4..6]
942 int min_green_to_red = -64; 836 int green_to_red_best = 0;
943 int max_green_to_red = 64; 837 int iter, offset;
944 int green_to_red = 0; 838 float best_diff = GetPredictionCostCrossColorRed(
945 int eval_min = 1; 839 argb, stride, tile_width, tile_height, prev_x, prev_y,
946 int eval_max = 1; 840 green_to_red_best, accumulated_red_histo);
947 float cur_diff_min = MAX_DIFF_COST; 841 for (iter = 0; iter < kMaxIters; ++iter) {
948 float cur_diff_max = MAX_DIFF_COST; 842 // ColorTransformDelta is a 3.5 bit fixed point, so 32 is equal to
949 // Do a binary search to find the optimal green_to_red color transform. 843 // one in color computation. Having initial delta here as 1 is sufficient
950 while (max_green_to_red - min_green_to_red > 2) { 844 // to explore the range of (-2, 2).
951 if (eval_min) { 845 const int delta = 32 >> iter;
952 cur_diff_min = GetPredictionCostCrossColorRed( 846 // Try a negative and a positive delta from the best known value.
953 tile_x_offset, tile_y_offset, all_x_max, all_y_max, xsize, 847 for (offset = -delta; offset <= delta; offset += 2 * delta) {
954 prev_x, prev_y, min_green_to_red, accumulated_red_histo, argb); 848 const int green_to_red_cur = offset + green_to_red_best;
955 eval_min = 0; 849 const float cur_diff = GetPredictionCostCrossColorRed(
956 } 850 argb, stride, tile_width, tile_height, prev_x, prev_y,
957 if (eval_max) { 851 green_to_red_cur, accumulated_red_histo);
958 cur_diff_max = GetPredictionCostCrossColorRed( 852 if (cur_diff < best_diff) {
959 tile_x_offset, tile_y_offset, all_x_max, all_y_max, xsize, 853 best_diff = cur_diff;
960 prev_x, prev_y, max_green_to_red, accumulated_red_histo, argb); 854 green_to_red_best = green_to_red_cur;
961 eval_max = 0; 855 }
962 }
963 if (cur_diff_min < cur_diff_max) {
964 green_to_red = min_green_to_red;
965 max_green_to_red = (max_green_to_red + min_green_to_red) / 2;
966 eval_max = 1;
967 } else {
968 green_to_red = max_green_to_red;
969 min_green_to_red = (max_green_to_red + min_green_to_red) / 2;
970 eval_min = 1;
971 } 856 }
972 } 857 }
973 best_tx->green_to_red_ = green_to_red; 858 best_tx->green_to_red_ = green_to_red_best;
859 }
860
861 void VP8LCollectColorBlueTransforms_C(const uint32_t* argb, int stride,
862 int tile_width, int tile_height,
863 int green_to_blue, int red_to_blue,
864 int histo[]) {
865 while (tile_height-- > 0) {
866 int x;
867 for (x = 0; x < tile_width; ++x) {
868 ++histo[TransformColorBlue(green_to_blue, red_to_blue, argb[x])];
869 }
870 argb += stride;
871 }
974 } 872 }
975 873
976 static float GetPredictionCostCrossColorBlue( 874 static float GetPredictionCostCrossColorBlue(
977 int tile_x_offset, int tile_y_offset, int all_x_max, int all_y_max, 875 const uint32_t* argb, int stride, int tile_width, int tile_height,
978 int xsize, VP8LMultipliers prev_x, VP8LMultipliers prev_y, 876 VP8LMultipliers prev_x, VP8LMultipliers prev_y,
979 int green_to_blue, int red_to_blue, const int accumulated_blue_histo[256], 877 int green_to_blue, int red_to_blue, const int accumulated_blue_histo[256]) {
980 const uint32_t* const argb) {
981 int all_y;
982 int histo[256] = { 0 }; 878 int histo[256] = { 0 };
983 float cur_diff; 879 float cur_diff;
984 for (all_y = tile_y_offset; all_y < all_y_max; ++all_y) { 880
985 int all_x; 881 VP8LCollectColorBlueTransforms(argb, stride, tile_width, tile_height,
986 int ix = all_y * xsize + tile_x_offset; 882 green_to_blue, red_to_blue, histo);
987 for (all_x = tile_x_offset; all_x < all_x_max; ++all_x, ++ix) { 883
988 ++histo[TransformColorBlue(green_to_blue, red_to_blue, argb[ix])];
989 }
990 }
991 cur_diff = PredictionCostCrossColor(accumulated_blue_histo, histo); 884 cur_diff = PredictionCostCrossColor(accumulated_blue_histo, histo);
992 if ((uint8_t)green_to_blue == prev_x.green_to_blue_) { 885 if ((uint8_t)green_to_blue == prev_x.green_to_blue_) {
993 cur_diff -= 3; // favor keeping the areas locally similar 886 cur_diff -= 3; // favor keeping the areas locally similar
994 } 887 }
995 if ((uint8_t)green_to_blue == prev_y.green_to_blue_) { 888 if ((uint8_t)green_to_blue == prev_y.green_to_blue_) {
996 cur_diff -= 3; // favor keeping the areas locally similar 889 cur_diff -= 3; // favor keeping the areas locally similar
997 } 890 }
998 if ((uint8_t)red_to_blue == prev_x.red_to_blue_) { 891 if ((uint8_t)red_to_blue == prev_x.red_to_blue_) {
999 cur_diff -= 3; // favor keeping the areas locally similar 892 cur_diff -= 3; // favor keeping the areas locally similar
1000 } 893 }
1001 if ((uint8_t)red_to_blue == prev_y.red_to_blue_) { 894 if ((uint8_t)red_to_blue == prev_y.red_to_blue_) {
1002 cur_diff -= 3; // favor keeping the areas locally similar 895 cur_diff -= 3; // favor keeping the areas locally similar
1003 } 896 }
1004 if (green_to_blue == 0) { 897 if (green_to_blue == 0) {
1005 cur_diff -= 3; 898 cur_diff -= 3;
1006 } 899 }
1007 if (red_to_blue == 0) { 900 if (red_to_blue == 0) {
1008 cur_diff -= 3; 901 cur_diff -= 3;
1009 } 902 }
1010 return cur_diff; 903 return cur_diff;
1011 } 904 }
1012 905
906 #define kGreenRedToBlueNumAxis 8
907 #define kGreenRedToBlueMaxIters 7
1013 static void GetBestGreenRedToBlue( 908 static void GetBestGreenRedToBlue(
1014 int tile_x_offset, int tile_y_offset, int all_x_max, int all_y_max, 909 const uint32_t* argb, int stride, int tile_width, int tile_height,
1015 int xsize, VP8LMultipliers prev_x, VP8LMultipliers prev_y, int quality, 910 VP8LMultipliers prev_x, VP8LMultipliers prev_y, int quality,
1016 const int accumulated_blue_histo[256], const uint32_t* const argb, 911 const int accumulated_blue_histo[256],
1017 VP8LMultipliers* const best_tx) { 912 VP8LMultipliers* const best_tx) {
1018 float best_diff = MAX_DIFF_COST; 913 const int8_t offset[kGreenRedToBlueNumAxis][2] =
1019 float cur_diff; 914 {{0, -1}, {0, 1}, {-1, 0}, {1, 0}, {-1, -1}, {-1, 1}, {1, -1}, {1, 1}};
1020 const int step = (quality < 25) ? 32 : (quality > 50) ? 8 : 16; 915 const int8_t delta_lut[kGreenRedToBlueMaxIters] = { 16, 16, 8, 4, 2, 2, 2 };
1021 const int min_green_to_blue = -32; 916 const int iters =
1022 const int max_green_to_blue = 32; 917 (quality < 25) ? 1 : (quality > 50) ? kGreenRedToBlueMaxIters : 4;
1023 const int min_red_to_blue = -32; 918 int green_to_blue_best = 0;
1024 const int max_red_to_blue = 32; 919 int red_to_blue_best = 0;
1025 const int num_iters = 920 int iter;
1026 (1 + (max_green_to_blue - min_green_to_blue) / step) * 921 // Initial value at origin:
1027 (1 + (max_red_to_blue - min_red_to_blue) / step); 922 float best_diff = GetPredictionCostCrossColorBlue(
1028 // Number of tries to get optimal green_to_blue & red_to_blue color transforms 923 argb, stride, tile_width, tile_height, prev_x, prev_y,
1029 // after finding a local minima. 924 green_to_blue_best, red_to_blue_best, accumulated_blue_histo);
1030 const int max_tries_after_min = 4 + (num_iters >> 2); 925 for (iter = 0; iter < iters; ++iter) {
1031 int num_tries_after_min = 0; 926 const int delta = delta_lut[iter];
1032 int green_to_blue; 927 int axis;
1033 for (green_to_blue = min_green_to_blue; 928 for (axis = 0; axis < kGreenRedToBlueNumAxis; ++axis) {
1034 green_to_blue <= max_green_to_blue && 929 const int green_to_blue_cur =
1035 num_tries_after_min < max_tries_after_min; 930 offset[axis][0] * delta + green_to_blue_best;
1036 green_to_blue += step) { 931 const int red_to_blue_cur = offset[axis][1] * delta + red_to_blue_best;
1037 int red_to_blue; 932 const float cur_diff = GetPredictionCostCrossColorBlue(
1038 for (red_to_blue = min_red_to_blue; 933 argb, stride, tile_width, tile_height, prev_x, prev_y,
1039 red_to_blue <= max_red_to_blue && 934 green_to_blue_cur, red_to_blue_cur, accumulated_blue_histo);
1040 num_tries_after_min < max_tries_after_min;
1041 red_to_blue += step) {
1042 cur_diff = GetPredictionCostCrossColorBlue(
1043 tile_x_offset, tile_y_offset, all_x_max, all_y_max, xsize, prev_x,
1044 prev_y, green_to_blue, red_to_blue, accumulated_blue_histo, argb);
1045 if (cur_diff < best_diff) { 935 if (cur_diff < best_diff) {
1046 best_diff = cur_diff; 936 best_diff = cur_diff;
1047 best_tx->green_to_blue_ = green_to_blue; 937 green_to_blue_best = green_to_blue_cur;
1048 best_tx->red_to_blue_ = red_to_blue; 938 red_to_blue_best = red_to_blue_cur;
1049 num_tries_after_min = 0; 939 }
1050 } else { 940 if (quality < 25 && iter == 4) {
1051 ++num_tries_after_min; 941 // Only axis aligned diffs for lower quality.
942 break; // next iter.
1052 } 943 }
1053 } 944 }
945 if (delta == 2 && green_to_blue_best == 0 && red_to_blue_best == 0) {
946 // Further iterations would not help.
947 break; // out of iter-loop.
948 }
1054 } 949 }
950 best_tx->green_to_blue_ = green_to_blue_best;
951 best_tx->red_to_blue_ = red_to_blue_best;
1055 } 952 }
953 #undef kGreenRedToBlueMaxIters
954 #undef kGreenRedToBlueNumAxis
1056 955
1057 static VP8LMultipliers GetBestColorTransformForTile( 956 static VP8LMultipliers GetBestColorTransformForTile(
1058 int tile_x, int tile_y, int bits, 957 int tile_x, int tile_y, int bits,
1059 VP8LMultipliers prev_x, 958 VP8LMultipliers prev_x,
1060 VP8LMultipliers prev_y, 959 VP8LMultipliers prev_y,
1061 int quality, int xsize, int ysize, 960 int quality, int xsize, int ysize,
1062 const int accumulated_red_histo[256], 961 const int accumulated_red_histo[256],
1063 const int accumulated_blue_histo[256], 962 const int accumulated_blue_histo[256],
1064 const uint32_t* const argb) { 963 const uint32_t* const argb) {
1065 const int max_tile_size = 1 << bits; 964 const int max_tile_size = 1 << bits;
1066 const int tile_y_offset = tile_y * max_tile_size; 965 const int tile_y_offset = tile_y * max_tile_size;
1067 const int tile_x_offset = tile_x * max_tile_size; 966 const int tile_x_offset = tile_x * max_tile_size;
1068 const int all_x_max = GetMin(tile_x_offset + max_tile_size, xsize); 967 const int all_x_max = GetMin(tile_x_offset + max_tile_size, xsize);
1069 const int all_y_max = GetMin(tile_y_offset + max_tile_size, ysize); 968 const int all_y_max = GetMin(tile_y_offset + max_tile_size, ysize);
969 const int tile_width = all_x_max - tile_x_offset;
970 const int tile_height = all_y_max - tile_y_offset;
971 const uint32_t* const tile_argb = argb + tile_y_offset * xsize
972 + tile_x_offset;
1070 VP8LMultipliers best_tx; 973 VP8LMultipliers best_tx;
1071 MultipliersClear(&best_tx); 974 MultipliersClear(&best_tx);
1072 975
1073 GetBestGreenToRed(tile_x_offset, tile_y_offset, all_x_max, all_y_max, xsize, 976 GetBestGreenToRed(tile_argb, xsize, tile_width, tile_height,
1074 prev_x, prev_y, accumulated_red_histo, argb, &best_tx); 977 prev_x, prev_y, quality, accumulated_red_histo, &best_tx);
1075 GetBestGreenRedToBlue(tile_x_offset, tile_y_offset, all_x_max, all_y_max, 978 GetBestGreenRedToBlue(tile_argb, xsize, tile_width, tile_height,
1076 xsize, prev_x, prev_y, quality, accumulated_blue_histo, 979 prev_x, prev_y, quality, accumulated_blue_histo,
1077 argb, &best_tx); 980 &best_tx);
1078 return best_tx; 981 return best_tx;
1079 } 982 }
1080 983
1081 static void CopyTileWithColorTransform(int xsize, int ysize, 984 static void CopyTileWithColorTransform(int xsize, int ysize,
1082 int tile_x, int tile_y, 985 int tile_x, int tile_y,
1083 int max_tile_size, 986 int max_tile_size,
1084 VP8LMultipliers color_transform, 987 VP8LMultipliers color_transform,
1085 uint32_t* argb) { 988 uint32_t* argb) {
1086 const int xscan = GetMin(max_tile_size, xsize - tile_x); 989 const int xscan = GetMin(max_tile_size, xsize - tile_x);
1087 int yscan = GetMin(max_tile_size, ysize - tile_y); 990 int yscan = GetMin(max_tile_size, ysize - tile_y);
(...skipping 54 matching lines...) Expand 10 before | Expand all | Expand 10 after
1142 continue; // repeated pixels are handled by backward references 1045 continue; // repeated pixels are handled by backward references
1143 } 1046 }
1144 ++accumulated_red_histo[(pix >> 16) & 0xff]; 1047 ++accumulated_red_histo[(pix >> 16) & 0xff];
1145 ++accumulated_blue_histo[(pix >> 0) & 0xff]; 1048 ++accumulated_blue_histo[(pix >> 0) & 0xff];
1146 } 1049 }
1147 } 1050 }
1148 } 1051 }
1149 } 1052 }
1150 } 1053 }
1151 1054
1152 // Color space inverse transform.
1153 static void ColorSpaceInverseTransform(const VP8LTransform* const transform,
1154 int y_start, int y_end, uint32_t* data) {
1155 const int width = transform->xsize_;
1156 const int tile_width = 1 << transform->bits_;
1157 const int mask = tile_width - 1;
1158 const int safe_width = width & ~mask;
1159 const int remaining_width = width - safe_width;
1160 const int tiles_per_row = VP8LSubSampleSize(width, transform->bits_);
1161 int y = y_start;
1162 const uint32_t* pred_row =
1163 transform->data_ + (y >> transform->bits_) * tiles_per_row;
1164
1165 while (y < y_end) {
1166 const uint32_t* pred = pred_row;
1167 VP8LMultipliers m = { 0, 0, 0 };
1168 const uint32_t* const data_safe_end = data + safe_width;
1169 const uint32_t* const data_end = data + width;
1170 while (data < data_safe_end) {
1171 ColorCodeToMultipliers(*pred++, &m);
1172 VP8LTransformColorInverse(&m, data, tile_width);
1173 data += tile_width;
1174 }
1175 if (data < data_end) { // Left-overs using C-version.
1176 ColorCodeToMultipliers(*pred++, &m);
1177 VP8LTransformColorInverse(&m, data, remaining_width);
1178 data += remaining_width;
1179 }
1180 ++y;
1181 if ((y & mask) == 0) pred_row += tiles_per_row;
1182 }
1183 }
1184
1185 // Separate out pixels packed together using pixel-bundling.
1186 // We define two methods for ARGB data (uint32_t) and alpha-only data (uint8_t).
1187 #define COLOR_INDEX_INVERSE(FUNC_NAME, TYPE, GET_INDEX, GET_VALUE) \
1188 void FUNC_NAME(const VP8LTransform* const transform, \
1189 int y_start, int y_end, const TYPE* src, TYPE* dst) { \
1190 int y; \
1191 const int bits_per_pixel = 8 >> transform->bits_; \
1192 const int width = transform->xsize_; \
1193 const uint32_t* const color_map = transform->data_; \
1194 if (bits_per_pixel < 8) { \
1195 const int pixels_per_byte = 1 << transform->bits_; \
1196 const int count_mask = pixels_per_byte - 1; \
1197 const uint32_t bit_mask = (1 << bits_per_pixel) - 1; \
1198 for (y = y_start; y < y_end; ++y) { \
1199 uint32_t packed_pixels = 0; \
1200 int x; \
1201 for (x = 0; x < width; ++x) { \
1202 /* We need to load fresh 'packed_pixels' once every */ \
1203 /* 'pixels_per_byte' increments of x. Fortunately, pixels_per_byte */ \
1204 /* is a power of 2, so can just use a mask for that, instead of */ \
1205 /* decrementing a counter. */ \
1206 if ((x & count_mask) == 0) packed_pixels = GET_INDEX(*src++); \
1207 *dst++ = GET_VALUE(color_map[packed_pixels & bit_mask]); \
1208 packed_pixels >>= bits_per_pixel; \
1209 } \
1210 } \
1211 } else { \
1212 for (y = y_start; y < y_end; ++y) { \
1213 int x; \
1214 for (x = 0; x < width; ++x) { \
1215 *dst++ = GET_VALUE(color_map[GET_INDEX(*src++)]); \
1216 } \
1217 } \
1218 } \
1219 }
1220
1221 static WEBP_INLINE uint32_t GetARGBIndex(uint32_t idx) {
1222 return (idx >> 8) & 0xff;
1223 }
1224
1225 static WEBP_INLINE uint8_t GetAlphaIndex(uint8_t idx) {
1226 return idx;
1227 }
1228
1229 static WEBP_INLINE uint32_t GetARGBValue(uint32_t val) {
1230 return val;
1231 }
1232
1233 static WEBP_INLINE uint8_t GetAlphaValue(uint32_t val) {
1234 return (val >> 8) & 0xff;
1235 }
1236
1237 static COLOR_INDEX_INVERSE(ColorIndexInverseTransform, uint32_t, GetARGBIndex,
1238 GetARGBValue)
1239 COLOR_INDEX_INVERSE(VP8LColorIndexInverseTransformAlpha, uint8_t, GetAlphaIndex,
1240 GetAlphaValue)
1241
1242 #undef COLOR_INDEX_INVERSE
1243
1244 void VP8LInverseTransform(const VP8LTransform* const transform,
1245 int row_start, int row_end,
1246 const uint32_t* const in, uint32_t* const out) {
1247 const int width = transform->xsize_;
1248 assert(row_start < row_end);
1249 assert(row_end <= transform->ysize_);
1250 switch (transform->type_) {
1251 case SUBTRACT_GREEN:
1252 VP8LAddGreenToBlueAndRed(out, (row_end - row_start) * width);
1253 break;
1254 case PREDICTOR_TRANSFORM:
1255 PredictorInverseTransform(transform, row_start, row_end, out);
1256 if (row_end != transform->ysize_) {
1257 // The last predicted row in this iteration will be the top-pred row
1258 // for the first row in next iteration.
1259 memcpy(out - width, out + (row_end - row_start - 1) * width,
1260 width * sizeof(*out));
1261 }
1262 break;
1263 case CROSS_COLOR_TRANSFORM:
1264 ColorSpaceInverseTransform(transform, row_start, row_end, out);
1265 break;
1266 case COLOR_INDEXING_TRANSFORM:
1267 if (in == out && transform->bits_ > 0) {
1268 // Move packed pixels to the end of unpacked region, so that unpacking
1269 // can occur seamlessly.
1270 // Also, note that this is the only transform that applies on
1271 // the effective width of VP8LSubSampleSize(xsize_, bits_). All other
1272 // transforms work on effective width of xsize_.
1273 const int out_stride = (row_end - row_start) * width;
1274 const int in_stride = (row_end - row_start) *
1275 VP8LSubSampleSize(transform->xsize_, transform->bits_);
1276 uint32_t* const src = out + out_stride - in_stride;
1277 memmove(src, out, in_stride * sizeof(*src));
1278 ColorIndexInverseTransform(transform, row_start, row_end, src, out);
1279 } else {
1280 ColorIndexInverseTransform(transform, row_start, row_end, in, out);
1281 }
1282 break;
1283 }
1284 }
1285
1286 //------------------------------------------------------------------------------
1287 // Color space conversion.
1288
1289 static int is_big_endian(void) {
1290 static const union {
1291 uint16_t w;
1292 uint8_t b[2];
1293 } tmp = { 1 };
1294 return (tmp.b[0] != 1);
1295 }
1296
1297 void VP8LConvertBGRAToRGB_C(const uint32_t* src,
1298 int num_pixels, uint8_t* dst) {
1299 const uint32_t* const src_end = src + num_pixels;
1300 while (src < src_end) {
1301 const uint32_t argb = *src++;
1302 *dst++ = (argb >> 16) & 0xff;
1303 *dst++ = (argb >> 8) & 0xff;
1304 *dst++ = (argb >> 0) & 0xff;
1305 }
1306 }
1307
1308 void VP8LConvertBGRAToRGBA_C(const uint32_t* src,
1309 int num_pixels, uint8_t* dst) {
1310 const uint32_t* const src_end = src + num_pixels;
1311 while (src < src_end) {
1312 const uint32_t argb = *src++;
1313 *dst++ = (argb >> 16) & 0xff;
1314 *dst++ = (argb >> 8) & 0xff;
1315 *dst++ = (argb >> 0) & 0xff;
1316 *dst++ = (argb >> 24) & 0xff;
1317 }
1318 }
1319
1320 void VP8LConvertBGRAToRGBA4444_C(const uint32_t* src,
1321 int num_pixels, uint8_t* dst) {
1322 const uint32_t* const src_end = src + num_pixels;
1323 while (src < src_end) {
1324 const uint32_t argb = *src++;
1325 const uint8_t rg = ((argb >> 16) & 0xf0) | ((argb >> 12) & 0xf);
1326 const uint8_t ba = ((argb >> 0) & 0xf0) | ((argb >> 28) & 0xf);
1327 #ifdef WEBP_SWAP_16BIT_CSP
1328 *dst++ = ba;
1329 *dst++ = rg;
1330 #else
1331 *dst++ = rg;
1332 *dst++ = ba;
1333 #endif
1334 }
1335 }
1336
1337 void VP8LConvertBGRAToRGB565_C(const uint32_t* src,
1338 int num_pixels, uint8_t* dst) {
1339 const uint32_t* const src_end = src + num_pixels;
1340 while (src < src_end) {
1341 const uint32_t argb = *src++;
1342 const uint8_t rg = ((argb >> 16) & 0xf8) | ((argb >> 13) & 0x7);
1343 const uint8_t gb = ((argb >> 5) & 0xe0) | ((argb >> 3) & 0x1f);
1344 #ifdef WEBP_SWAP_16BIT_CSP
1345 *dst++ = gb;
1346 *dst++ = rg;
1347 #else
1348 *dst++ = rg;
1349 *dst++ = gb;
1350 #endif
1351 }
1352 }
1353
1354 void VP8LConvertBGRAToBGR_C(const uint32_t* src,
1355 int num_pixels, uint8_t* dst) {
1356 const uint32_t* const src_end = src + num_pixels;
1357 while (src < src_end) {
1358 const uint32_t argb = *src++;
1359 *dst++ = (argb >> 0) & 0xff;
1360 *dst++ = (argb >> 8) & 0xff;
1361 *dst++ = (argb >> 16) & 0xff;
1362 }
1363 }
1364
1365 static void CopyOrSwap(const uint32_t* src, int num_pixels, uint8_t* dst,
1366 int swap_on_big_endian) {
1367 if (is_big_endian() == swap_on_big_endian) {
1368 const uint32_t* const src_end = src + num_pixels;
1369 while (src < src_end) {
1370 const uint32_t argb = *src++;
1371
1372 #if !defined(WORDS_BIGENDIAN)
1373 #if !defined(WEBP_REFERENCE_IMPLEMENTATION)
1374 *(uint32_t*)dst = BSwap32(argb);
1375 #else // WEBP_REFERENCE_IMPLEMENTATION
1376 dst[0] = (argb >> 24) & 0xff;
1377 dst[1] = (argb >> 16) & 0xff;
1378 dst[2] = (argb >> 8) & 0xff;
1379 dst[3] = (argb >> 0) & 0xff;
1380 #endif
1381 #else // WORDS_BIGENDIAN
1382 dst[0] = (argb >> 0) & 0xff;
1383 dst[1] = (argb >> 8) & 0xff;
1384 dst[2] = (argb >> 16) & 0xff;
1385 dst[3] = (argb >> 24) & 0xff;
1386 #endif
1387 dst += sizeof(argb);
1388 }
1389 } else {
1390 memcpy(dst, src, num_pixels * sizeof(*src));
1391 }
1392 }
1393
1394 void VP8LConvertFromBGRA(const uint32_t* const in_data, int num_pixels,
1395 WEBP_CSP_MODE out_colorspace, uint8_t* const rgba) {
1396 switch (out_colorspace) {
1397 case MODE_RGB:
1398 VP8LConvertBGRAToRGB(in_data, num_pixels, rgba);
1399 break;
1400 case MODE_RGBA:
1401 VP8LConvertBGRAToRGBA(in_data, num_pixels, rgba);
1402 break;
1403 case MODE_rgbA:
1404 VP8LConvertBGRAToRGBA(in_data, num_pixels, rgba);
1405 WebPApplyAlphaMultiply(rgba, 0, num_pixels, 1, 0);
1406 break;
1407 case MODE_BGR:
1408 VP8LConvertBGRAToBGR(in_data, num_pixels, rgba);
1409 break;
1410 case MODE_BGRA:
1411 CopyOrSwap(in_data, num_pixels, rgba, 1);
1412 break;
1413 case MODE_bgrA:
1414 CopyOrSwap(in_data, num_pixels, rgba, 1);
1415 WebPApplyAlphaMultiply(rgba, 0, num_pixels, 1, 0);
1416 break;
1417 case MODE_ARGB:
1418 CopyOrSwap(in_data, num_pixels, rgba, 0);
1419 break;
1420 case MODE_Argb:
1421 CopyOrSwap(in_data, num_pixels, rgba, 0);
1422 WebPApplyAlphaMultiply(rgba, 1, num_pixels, 1, 0);
1423 break;
1424 case MODE_RGBA_4444:
1425 VP8LConvertBGRAToRGBA4444(in_data, num_pixels, rgba);
1426 break;
1427 case MODE_rgbA_4444:
1428 VP8LConvertBGRAToRGBA4444(in_data, num_pixels, rgba);
1429 WebPApplyAlphaMultiply4444(rgba, num_pixels, 1, 0);
1430 break;
1431 case MODE_RGB_565:
1432 VP8LConvertBGRAToRGB565(in_data, num_pixels, rgba);
1433 break;
1434 default:
1435 assert(0); // Code flow should not reach here.
1436 }
1437 }
1438
1439 //------------------------------------------------------------------------------ 1055 //------------------------------------------------------------------------------
1440 // Bundles multiple (1, 2, 4 or 8) pixels into a single pixel. 1056 // Bundles multiple (1, 2, 4 or 8) pixels into a single pixel.
1441 void VP8LBundleColorMap(const uint8_t* const row, int width, 1057 void VP8LBundleColorMap(const uint8_t* const row, int width,
1442 int xbits, uint32_t* const dst) { 1058 int xbits, uint32_t* const dst) {
1443 int x; 1059 int x;
1444 if (xbits > 0) { 1060 if (xbits > 0) {
1445 const int bit_depth = 1 << (3 - xbits); 1061 const int bit_depth = 1 << (3 - xbits);
1446 const int mask = (1 << xbits) - 1; 1062 const int mask = (1 << xbits) - 1;
1447 uint32_t code = 0xff000000; 1063 uint32_t code = 0xff000000;
1448 for (x = 0; x < width; ++x) { 1064 for (x = 0; x < width; ++x) {
(...skipping 22 matching lines...) Expand all
1471 int length) { 1087 int length) {
1472 int i; 1088 int i;
1473 double cost = 0.; 1089 double cost = 0.;
1474 for (i = 2; i < length - 2; ++i) { 1090 for (i = 2; i < length - 2; ++i) {
1475 const int xy = X[i + 2] + Y[i + 2]; 1091 const int xy = X[i + 2] + Y[i + 2];
1476 cost += (i >> 1) * xy; 1092 cost += (i >> 1) * xy;
1477 } 1093 }
1478 return cost; 1094 return cost;
1479 } 1095 }
1480 1096
1481 // Returns the various RLE counts
1482 static VP8LStreaks HuffmanCostCount(const uint32_t* population, int length) {
1483 int i;
1484 int streak = 0;
1485 VP8LStreaks stats;
1486 memset(&stats, 0, sizeof(stats));
1487 for (i = 0; i < length - 1; ++i) {
1488 ++streak;
1489 if (population[i] == population[i + 1]) {
1490 continue;
1491 }
1492 stats.counts[population[i] != 0] += (streak > 3);
1493 stats.streaks[population[i] != 0][(streak > 3)] += streak;
1494 streak = 0;
1495 }
1496 ++streak;
1497 stats.counts[population[i] != 0] += (streak > 3);
1498 stats.streaks[population[i] != 0][(streak > 3)] += streak;
1499 return stats;
1500 }
1501
1502 static VP8LStreaks HuffmanCostCombinedCount(const uint32_t* X,
1503 const uint32_t* Y, int length) {
1504 int i;
1505 int streak = 0;
1506 VP8LStreaks stats;
1507 memset(&stats, 0, sizeof(stats));
1508 for (i = 0; i < length - 1; ++i) {
1509 const int xy = X[i] + Y[i];
1510 const int xy_next = X[i + 1] + Y[i + 1];
1511 ++streak;
1512 if (xy == xy_next) {
1513 continue;
1514 }
1515 stats.counts[xy != 0] += (streak > 3);
1516 stats.streaks[xy != 0][(streak > 3)] += streak;
1517 streak = 0;
1518 }
1519 {
1520 const int xy = X[i] + Y[i];
1521 ++streak;
1522 stats.counts[xy != 0] += (streak > 3);
1523 stats.streaks[xy != 0][(streak > 3)] += streak;
1524 }
1525 return stats;
1526 }
1527
1528 //------------------------------------------------------------------------------ 1097 //------------------------------------------------------------------------------
1529 1098
1530 static void HistogramAdd(const VP8LHistogram* const a, 1099 static void HistogramAdd(const VP8LHistogram* const a,
1531 const VP8LHistogram* const b, 1100 const VP8LHistogram* const b,
1532 VP8LHistogram* const out) { 1101 VP8LHistogram* const out) {
1533 int i; 1102 int i;
1534 const int literal_size = VP8LHistogramNumCodes(a->palette_code_bits_); 1103 const int literal_size = VP8LHistogramNumCodes(a->palette_code_bits_);
1535 assert(a->palette_code_bits_ == b->palette_code_bits_); 1104 assert(a->palette_code_bits_ == b->palette_code_bits_);
1536 if (b != out) { 1105 if (b != out) {
1537 for (i = 0; i < literal_size; ++i) { 1106 for (i = 0; i < literal_size; ++i) {
(...skipping 18 matching lines...) Expand all
1556 out->red_[i] += a->red_[i]; 1125 out->red_[i] += a->red_[i];
1557 out->blue_[i] += a->blue_[i]; 1126 out->blue_[i] += a->blue_[i];
1558 out->alpha_[i] += a->alpha_[i]; 1127 out->alpha_[i] += a->alpha_[i];
1559 } 1128 }
1560 } 1129 }
1561 } 1130 }
1562 1131
1563 //------------------------------------------------------------------------------ 1132 //------------------------------------------------------------------------------
1564 1133
1565 VP8LProcessBlueAndRedFunc VP8LSubtractGreenFromBlueAndRed; 1134 VP8LProcessBlueAndRedFunc VP8LSubtractGreenFromBlueAndRed;
1566 VP8LProcessBlueAndRedFunc VP8LAddGreenToBlueAndRed;
1567 VP8LPredictorFunc VP8LPredictors[16];
1568 1135
1569 VP8LTransformColorFunc VP8LTransformColor; 1136 VP8LTransformColorFunc VP8LTransformColor;
1570 VP8LTransformColorFunc VP8LTransformColorInverse;
1571 1137
1572 VP8LConvertFunc VP8LConvertBGRAToRGB; 1138 VP8LCollectColorBlueTransformsFunc VP8LCollectColorBlueTransforms;
1573 VP8LConvertFunc VP8LConvertBGRAToRGBA; 1139 VP8LCollectColorRedTransformsFunc VP8LCollectColorRedTransforms;
1574 VP8LConvertFunc VP8LConvertBGRAToRGBA4444;
1575 VP8LConvertFunc VP8LConvertBGRAToRGB565;
1576 VP8LConvertFunc VP8LConvertBGRAToBGR;
1577 1140
1578 VP8LFastLog2SlowFunc VP8LFastLog2Slow; 1141 VP8LFastLog2SlowFunc VP8LFastLog2Slow;
1579 VP8LFastLog2SlowFunc VP8LFastSLog2Slow; 1142 VP8LFastLog2SlowFunc VP8LFastSLog2Slow;
1580 1143
1581 VP8LCostFunc VP8LExtraCost; 1144 VP8LCostFunc VP8LExtraCost;
1582 VP8LCostCombinedFunc VP8LExtraCostCombined; 1145 VP8LCostCombinedFunc VP8LExtraCostCombined;
1146 VP8LCombinedShannonEntropyFunc VP8LCombinedShannonEntropy;
1583 1147
1584 VP8LCostCountFunc VP8LHuffmanCostCount; 1148 GetEntropyUnrefinedHelperFunc VP8LGetEntropyUnrefinedHelper;
1585 VP8LCostCombinedCountFunc VP8LHuffmanCostCombinedCount;
1586 1149
1587 VP8LHistogramAddFunc VP8LHistogramAdd; 1150 VP8LHistogramAddFunc VP8LHistogramAdd;
1588 1151
1589 extern void VP8LDspInitSSE2(void); 1152 extern void VP8LEncDspInitSSE2(void);
1590 extern void VP8LDspInitNEON(void); 1153 extern void VP8LEncDspInitSSE41(void);
1591 extern void VP8LDspInitMIPS32(void); 1154 extern void VP8LEncDspInitNEON(void);
1155 extern void VP8LEncDspInitMIPS32(void);
1156 extern void VP8LEncDspInitMIPSdspR2(void);
1592 1157
1593 static volatile VP8CPUInfo lossless_last_cpuinfo_used = 1158 static volatile VP8CPUInfo lossless_enc_last_cpuinfo_used =
1594 (VP8CPUInfo)&lossless_last_cpuinfo_used; 1159 (VP8CPUInfo)&lossless_enc_last_cpuinfo_used;
1595 1160
1596 void VP8LDspInit(void) { 1161 WEBP_TSAN_IGNORE_FUNCTION void VP8LEncDspInit(void) {
1597 if (lossless_last_cpuinfo_used == VP8GetCPUInfo) return; 1162 if (lossless_enc_last_cpuinfo_used == VP8GetCPUInfo) return;
1598 1163
1599 memcpy(VP8LPredictors, kPredictorsC, sizeof(VP8LPredictors)); 1164 VP8LDspInit();
1600 1165
1601 VP8LSubtractGreenFromBlueAndRed = VP8LSubtractGreenFromBlueAndRed_C; 1166 VP8LSubtractGreenFromBlueAndRed = VP8LSubtractGreenFromBlueAndRed_C;
1602 VP8LAddGreenToBlueAndRed = VP8LAddGreenToBlueAndRed_C;
1603 1167
1604 VP8LTransformColor = VP8LTransformColor_C; 1168 VP8LTransformColor = VP8LTransformColor_C;
1605 VP8LTransformColorInverse = VP8LTransformColorInverse_C;
1606 1169
1607 VP8LConvertBGRAToRGB = VP8LConvertBGRAToRGB_C; 1170 VP8LCollectColorBlueTransforms = VP8LCollectColorBlueTransforms_C;
1608 VP8LConvertBGRAToRGBA = VP8LConvertBGRAToRGBA_C; 1171 VP8LCollectColorRedTransforms = VP8LCollectColorRedTransforms_C;
1609 VP8LConvertBGRAToRGBA4444 = VP8LConvertBGRAToRGBA4444_C;
1610 VP8LConvertBGRAToRGB565 = VP8LConvertBGRAToRGB565_C;
1611 VP8LConvertBGRAToBGR = VP8LConvertBGRAToBGR_C;
1612 1172
1613 VP8LFastLog2Slow = FastLog2Slow; 1173 VP8LFastLog2Slow = FastLog2Slow;
1614 VP8LFastSLog2Slow = FastSLog2Slow; 1174 VP8LFastSLog2Slow = FastSLog2Slow;
1615 1175
1616 VP8LExtraCost = ExtraCost; 1176 VP8LExtraCost = ExtraCost;
1617 VP8LExtraCostCombined = ExtraCostCombined; 1177 VP8LExtraCostCombined = ExtraCostCombined;
1178 VP8LCombinedShannonEntropy = CombinedShannonEntropy;
1618 1179
1619 VP8LHuffmanCostCount = HuffmanCostCount; 1180 VP8LGetEntropyUnrefinedHelper = GetEntropyUnrefinedHelper;
1620 VP8LHuffmanCostCombinedCount = HuffmanCostCombinedCount;
1621 1181
1622 VP8LHistogramAdd = HistogramAdd; 1182 VP8LHistogramAdd = HistogramAdd;
1623 1183
1624 // If defined, use CPUInfo() to overwrite some pointers with faster versions. 1184 // If defined, use CPUInfo() to overwrite some pointers with faster versions.
1625 if (VP8GetCPUInfo != NULL) { 1185 if (VP8GetCPUInfo != NULL) {
1626 #if defined(WEBP_USE_SSE2) 1186 #if defined(WEBP_USE_SSE2)
1627 if (VP8GetCPUInfo(kSSE2)) { 1187 if (VP8GetCPUInfo(kSSE2)) {
1628 VP8LDspInitSSE2(); 1188 VP8LEncDspInitSSE2();
1189 #if defined(WEBP_USE_SSE41)
1190 if (VP8GetCPUInfo(kSSE4_1)) {
1191 VP8LEncDspInitSSE41();
1192 }
1193 #endif
1629 } 1194 }
1630 #endif 1195 #endif
1631 #if defined(WEBP_USE_NEON) 1196 #if defined(WEBP_USE_NEON)
1632 if (VP8GetCPUInfo(kNEON)) { 1197 if (VP8GetCPUInfo(kNEON)) {
1633 VP8LDspInitNEON(); 1198 VP8LEncDspInitNEON();
1634 } 1199 }
1635 #endif 1200 #endif
1636 #if defined(WEBP_USE_MIPS32) 1201 #if defined(WEBP_USE_MIPS32)
1637 if (VP8GetCPUInfo(kMIPS32)) { 1202 if (VP8GetCPUInfo(kMIPS32)) {
1638 VP8LDspInitMIPS32(); 1203 VP8LEncDspInitMIPS32();
1204 }
1205 #endif
1206 #if defined(WEBP_USE_MIPS_DSP_R2)
1207 if (VP8GetCPUInfo(kMIPSdspR2)) {
1208 VP8LEncDspInitMIPSdspR2();
1639 } 1209 }
1640 #endif 1210 #endif
1641 } 1211 }
1642 lossless_last_cpuinfo_used = VP8GetCPUInfo; 1212 lossless_enc_last_cpuinfo_used = VP8GetCPUInfo;
1643 } 1213 }
1644 1214
1645 //------------------------------------------------------------------------------ 1215 //------------------------------------------------------------------------------
OLDNEW
« no previous file with comments | « third_party/libwebp/dsp/lossless.c ('k') | third_party/libwebp/dsp/lossless_enc_mips32.c » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698