| Index: third_party/libwebp/dsp/lossless_enc.c
|
| diff --git a/third_party/libwebp/dsp/lossless.c b/third_party/libwebp/dsp/lossless_enc.c
|
| similarity index 53%
|
| copy from third_party/libwebp/dsp/lossless.c
|
| copy to third_party/libwebp/dsp/lossless_enc.c
|
| index ee334bceb0b9669bc7791c21ef2da255eb9445c7..2eafa3da7d23d69bc7e084e0e0d519b9bbd0a9e1 100644
|
| --- a/third_party/libwebp/dsp/lossless.c
|
| +++ b/third_party/libwebp/dsp/lossless_enc.c
|
| @@ -1,4 +1,4 @@
|
| -// Copyright 2012 Google Inc. All Rights Reserved.
|
| +// Copyright 2015 Google Inc. All Rights Reserved.
|
| //
|
| // Use of this source code is governed by a BSD-style license
|
| // that can be found in the COPYING file in the root of the source
|
| @@ -7,7 +7,7 @@
|
| // be found in the AUTHORS file in the root of the source tree.
|
| // -----------------------------------------------------------------------------
|
| //
|
| -// Image transforms and color space conversion methods for lossless decoder.
|
| +// Image transform methods for lossless encoder.
|
| //
|
| // Authors: Vikas Arora (vikaas.arora@gmail.com)
|
| // Jyrki Alakuijala (jyrki@google.com)
|
| @@ -24,6 +24,9 @@
|
|
|
| #define MAX_DIFF_COST (1e30f)
|
|
|
| +static const int kPredLowEffort = 11;
|
| +static const uint32_t kMaskAlpha = 0xff000000;
|
| +
|
| // lookup table for small values of log2(int)
|
| const float kLog2Table[LOG_LOOKUP_IDX_MAX] = {
|
| 0.0000000000000000f, 0.0000000000000000f,
|
| @@ -326,13 +329,6 @@ const uint8_t kPrefixEncodeExtraBitsValue[PREFIX_LOOKUP_IDX_MAX] = {
|
| 112, 113, 114, 115, 116, 117, 118, 119, 120, 121, 122, 123, 124, 125, 126
|
| };
|
|
|
| -// The threshold till approximate version of log_2 can be used.
|
| -// Practically, we can get rid of the call to log() as the two values match to
|
| -// very high degree (the ratio of these two is 0.99999x).
|
| -// Keeping a high threshold for now.
|
| -#define APPROX_LOG_WITH_CORRECTION_MAX 65536
|
| -#define APPROX_LOG_MAX 4096
|
| -#define LOG_2_RECIPROCAL 1.44269504088896338700465094007086
|
| static float FastSLog2Slow(uint32_t v) {
|
| assert(v >= LOG_LOOKUP_IDX_MAX);
|
| if (v < APPROX_LOG_WITH_CORRECTION_MAX) {
|
| @@ -384,166 +380,11 @@ static float FastLog2Slow(uint32_t v) {
|
| }
|
| }
|
|
|
| -//------------------------------------------------------------------------------
|
| -// Image transforms.
|
| -
|
| // Mostly used to reduce code size + readability
|
| static WEBP_INLINE int GetMin(int a, int b) { return (a > b) ? b : a; }
|
|
|
| -// In-place sum of each component with mod 256.
|
| -static WEBP_INLINE void AddPixelsEq(uint32_t* a, uint32_t b) {
|
| - const uint32_t alpha_and_green = (*a & 0xff00ff00u) + (b & 0xff00ff00u);
|
| - const uint32_t red_and_blue = (*a & 0x00ff00ffu) + (b & 0x00ff00ffu);
|
| - *a = (alpha_and_green & 0xff00ff00u) | (red_and_blue & 0x00ff00ffu);
|
| -}
|
| -
|
| -static WEBP_INLINE uint32_t Average2(uint32_t a0, uint32_t a1) {
|
| - return (((a0 ^ a1) & 0xfefefefeL) >> 1) + (a0 & a1);
|
| -}
|
| -
|
| -static WEBP_INLINE uint32_t Average3(uint32_t a0, uint32_t a1, uint32_t a2) {
|
| - return Average2(Average2(a0, a2), a1);
|
| -}
|
| -
|
| -static WEBP_INLINE uint32_t Average4(uint32_t a0, uint32_t a1,
|
| - uint32_t a2, uint32_t a3) {
|
| - return Average2(Average2(a0, a1), Average2(a2, a3));
|
| -}
|
| -
|
| -static WEBP_INLINE uint32_t Clip255(uint32_t a) {
|
| - if (a < 256) {
|
| - return a;
|
| - }
|
| - // return 0, when a is a negative integer.
|
| - // return 255, when a is positive.
|
| - return ~a >> 24;
|
| -}
|
| -
|
| -static WEBP_INLINE int AddSubtractComponentFull(int a, int b, int c) {
|
| - return Clip255(a + b - c);
|
| -}
|
| -
|
| -static WEBP_INLINE uint32_t ClampedAddSubtractFull(uint32_t c0, uint32_t c1,
|
| - uint32_t c2) {
|
| - const int a = AddSubtractComponentFull(c0 >> 24, c1 >> 24, c2 >> 24);
|
| - const int r = AddSubtractComponentFull((c0 >> 16) & 0xff,
|
| - (c1 >> 16) & 0xff,
|
| - (c2 >> 16) & 0xff);
|
| - const int g = AddSubtractComponentFull((c0 >> 8) & 0xff,
|
| - (c1 >> 8) & 0xff,
|
| - (c2 >> 8) & 0xff);
|
| - const int b = AddSubtractComponentFull(c0 & 0xff, c1 & 0xff, c2 & 0xff);
|
| - return ((uint32_t)a << 24) | (r << 16) | (g << 8) | b;
|
| -}
|
| -
|
| -static WEBP_INLINE int AddSubtractComponentHalf(int a, int b) {
|
| - return Clip255(a + (a - b) / 2);
|
| -}
|
| -
|
| -static WEBP_INLINE uint32_t ClampedAddSubtractHalf(uint32_t c0, uint32_t c1,
|
| - uint32_t c2) {
|
| - const uint32_t ave = Average2(c0, c1);
|
| - const int a = AddSubtractComponentHalf(ave >> 24, c2 >> 24);
|
| - const int r = AddSubtractComponentHalf((ave >> 16) & 0xff, (c2 >> 16) & 0xff);
|
| - const int g = AddSubtractComponentHalf((ave >> 8) & 0xff, (c2 >> 8) & 0xff);
|
| - const int b = AddSubtractComponentHalf((ave >> 0) & 0xff, (c2 >> 0) & 0xff);
|
| - return ((uint32_t)a << 24) | (r << 16) | (g << 8) | b;
|
| -}
|
| -
|
| -// gcc-4.9 on ARM generates incorrect code in Select() when Sub3() is inlined.
|
| -#if defined(__arm__) && LOCAL_GCC_VERSION == 0x409
|
| -# define LOCAL_INLINE __attribute__ ((noinline))
|
| -#else
|
| -# define LOCAL_INLINE WEBP_INLINE
|
| -#endif
|
| -
|
| -static LOCAL_INLINE int Sub3(int a, int b, int c) {
|
| - const int pb = b - c;
|
| - const int pa = a - c;
|
| - return abs(pb) - abs(pa);
|
| -}
|
| -
|
| -#undef LOCAL_INLINE
|
| -
|
| -static WEBP_INLINE uint32_t Select(uint32_t a, uint32_t b, uint32_t c) {
|
| - const int pa_minus_pb =
|
| - Sub3((a >> 24) , (b >> 24) , (c >> 24) ) +
|
| - Sub3((a >> 16) & 0xff, (b >> 16) & 0xff, (c >> 16) & 0xff) +
|
| - Sub3((a >> 8) & 0xff, (b >> 8) & 0xff, (c >> 8) & 0xff) +
|
| - Sub3((a ) & 0xff, (b ) & 0xff, (c ) & 0xff);
|
| - return (pa_minus_pb <= 0) ? a : b;
|
| -}
|
| -
|
| //------------------------------------------------------------------------------
|
| -// Predictors
|
| -
|
| -static uint32_t Predictor0(uint32_t left, const uint32_t* const top) {
|
| - (void)top;
|
| - (void)left;
|
| - return ARGB_BLACK;
|
| -}
|
| -static uint32_t Predictor1(uint32_t left, const uint32_t* const top) {
|
| - (void)top;
|
| - return left;
|
| -}
|
| -static uint32_t Predictor2(uint32_t left, const uint32_t* const top) {
|
| - (void)left;
|
| - return top[0];
|
| -}
|
| -static uint32_t Predictor3(uint32_t left, const uint32_t* const top) {
|
| - (void)left;
|
| - return top[1];
|
| -}
|
| -static uint32_t Predictor4(uint32_t left, const uint32_t* const top) {
|
| - (void)left;
|
| - return top[-1];
|
| -}
|
| -static uint32_t Predictor5(uint32_t left, const uint32_t* const top) {
|
| - const uint32_t pred = Average3(left, top[0], top[1]);
|
| - return pred;
|
| -}
|
| -static uint32_t Predictor6(uint32_t left, const uint32_t* const top) {
|
| - const uint32_t pred = Average2(left, top[-1]);
|
| - return pred;
|
| -}
|
| -static uint32_t Predictor7(uint32_t left, const uint32_t* const top) {
|
| - const uint32_t pred = Average2(left, top[0]);
|
| - return pred;
|
| -}
|
| -static uint32_t Predictor8(uint32_t left, const uint32_t* const top) {
|
| - const uint32_t pred = Average2(top[-1], top[0]);
|
| - (void)left;
|
| - return pred;
|
| -}
|
| -static uint32_t Predictor9(uint32_t left, const uint32_t* const top) {
|
| - const uint32_t pred = Average2(top[0], top[1]);
|
| - (void)left;
|
| - return pred;
|
| -}
|
| -static uint32_t Predictor10(uint32_t left, const uint32_t* const top) {
|
| - const uint32_t pred = Average4(left, top[-1], top[0], top[1]);
|
| - return pred;
|
| -}
|
| -static uint32_t Predictor11(uint32_t left, const uint32_t* const top) {
|
| - const uint32_t pred = Select(top[0], left, top[-1]);
|
| - return pred;
|
| -}
|
| -static uint32_t Predictor12(uint32_t left, const uint32_t* const top) {
|
| - const uint32_t pred = ClampedAddSubtractFull(left, top[0], top[-1]);
|
| - return pred;
|
| -}
|
| -static uint32_t Predictor13(uint32_t left, const uint32_t* const top) {
|
| - const uint32_t pred = ClampedAddSubtractHalf(left, top[0], top[-1]);
|
| - return pred;
|
| -}
|
| -
|
| -static const VP8LPredictorFunc kPredictorsC[16] = {
|
| - Predictor0, Predictor1, Predictor2, Predictor3,
|
| - Predictor4, Predictor5, Predictor6, Predictor7,
|
| - Predictor8, Predictor9, Predictor10, Predictor11,
|
| - Predictor12, Predictor13,
|
| - Predictor0, Predictor0 // <- padding security sentinels
|
| -};
|
| +// Methods to calculate Entropy (Shannon).
|
|
|
| static float PredictionCostSpatial(const int counts[256], int weight_0,
|
| double exp_val) {
|
| @@ -565,15 +406,15 @@ static float CombinedShannonEntropy(const int X[256], const int Y[256]) {
|
| int sumX = 0, sumXY = 0;
|
| for (i = 0; i < 256; ++i) {
|
| const int x = X[i];
|
| - const int xy = x + Y[i];
|
| if (x != 0) {
|
| + const int xy = x + Y[i];
|
| sumX += x;
|
| retval -= VP8LFastSLog2(x);
|
| sumXY += xy;
|
| retval -= VP8LFastSLog2(xy);
|
| - } else if (xy != 0) {
|
| - sumXY += xy;
|
| - retval -= VP8LFastSLog2(xy);
|
| + } else if (Y[i] != 0) {
|
| + sumXY += Y[i];
|
| + retval -= VP8LFastSLog2(Y[i]);
|
| }
|
| }
|
| retval += VP8LFastSLog2(sumX) + VP8LFastSLog2(sumXY);
|
| @@ -587,11 +428,107 @@ static float PredictionCostSpatialHistogram(const int accumulated[4][256],
|
| for (i = 0; i < 4; ++i) {
|
| const double kExpValue = 0.94;
|
| retval += PredictionCostSpatial(tile[i], 1, kExpValue);
|
| - retval += CombinedShannonEntropy(tile[i], accumulated[i]);
|
| + retval += VP8LCombinedShannonEntropy(tile[i], accumulated[i]);
|
| }
|
| return (float)retval;
|
| }
|
|
|
| +void VP8LBitEntropyInit(VP8LBitEntropy* const entropy) {
|
| + entropy->entropy = 0.;
|
| + entropy->sum = 0;
|
| + entropy->nonzeros = 0;
|
| + entropy->max_val = 0;
|
| + entropy->nonzero_code = VP8L_NON_TRIVIAL_SYM;
|
| +}
|
| +
|
| +void VP8LBitsEntropyUnrefined(const uint32_t* const array, int n,
|
| + VP8LBitEntropy* const entropy) {
|
| + int i;
|
| +
|
| + VP8LBitEntropyInit(entropy);
|
| +
|
| + for (i = 0; i < n; ++i) {
|
| + if (array[i] != 0) {
|
| + entropy->sum += array[i];
|
| + entropy->nonzero_code = i;
|
| + ++entropy->nonzeros;
|
| + entropy->entropy -= VP8LFastSLog2(array[i]);
|
| + if (entropy->max_val < array[i]) {
|
| + entropy->max_val = array[i];
|
| + }
|
| + }
|
| + }
|
| + entropy->entropy += VP8LFastSLog2(entropy->sum);
|
| +}
|
| +
|
| +static WEBP_INLINE void GetEntropyUnrefinedHelper(
|
| + uint32_t val, int i, uint32_t* const val_prev, int* const i_prev,
|
| + VP8LBitEntropy* const bit_entropy, VP8LStreaks* const stats) {
|
| + const int streak = i - *i_prev;
|
| +
|
| + // Gather info for the bit entropy.
|
| + if (*val_prev != 0) {
|
| + bit_entropy->sum += (*val_prev) * streak;
|
| + bit_entropy->nonzeros += streak;
|
| + bit_entropy->nonzero_code = *i_prev;
|
| + bit_entropy->entropy -= VP8LFastSLog2(*val_prev) * streak;
|
| + if (bit_entropy->max_val < *val_prev) {
|
| + bit_entropy->max_val = *val_prev;
|
| + }
|
| + }
|
| +
|
| + // Gather info for the Huffman cost.
|
| + stats->counts[*val_prev != 0] += (streak > 3);
|
| + stats->streaks[*val_prev != 0][(streak > 3)] += streak;
|
| +
|
| + *val_prev = val;
|
| + *i_prev = i;
|
| +}
|
| +
|
| +void VP8LGetEntropyUnrefined(const uint32_t* const X, int length,
|
| + VP8LBitEntropy* const bit_entropy,
|
| + VP8LStreaks* const stats) {
|
| + int i;
|
| + int i_prev = 0;
|
| + uint32_t x_prev = X[0];
|
| +
|
| + memset(stats, 0, sizeof(*stats));
|
| + VP8LBitEntropyInit(bit_entropy);
|
| +
|
| + for (i = 1; i < length; ++i) {
|
| + const uint32_t x = X[i];
|
| + if (x != x_prev) {
|
| + VP8LGetEntropyUnrefinedHelper(x, i, &x_prev, &i_prev, bit_entropy, stats);
|
| + }
|
| + }
|
| + VP8LGetEntropyUnrefinedHelper(0, i, &x_prev, &i_prev, bit_entropy, stats);
|
| +
|
| + bit_entropy->entropy += VP8LFastSLog2(bit_entropy->sum);
|
| +}
|
| +
|
| +void VP8LGetCombinedEntropyUnrefined(const uint32_t* const X,
|
| + const uint32_t* const Y, int length,
|
| + VP8LBitEntropy* const bit_entropy,
|
| + VP8LStreaks* const stats) {
|
| + int i = 1;
|
| + int i_prev = 0;
|
| + uint32_t xy_prev = X[0] + Y[0];
|
| +
|
| + memset(stats, 0, sizeof(*stats));
|
| + VP8LBitEntropyInit(bit_entropy);
|
| +
|
| + for (i = 1; i < length; ++i) {
|
| + const uint32_t xy = X[i] + Y[i];
|
| + if (xy != xy_prev) {
|
| + VP8LGetEntropyUnrefinedHelper(xy, i, &xy_prev, &i_prev, bit_entropy,
|
| + stats);
|
| + }
|
| + }
|
| + VP8LGetEntropyUnrefinedHelper(0, i, &xy_prev, &i_prev, bit_entropy, stats);
|
| +
|
| + bit_entropy->entropy += VP8LFastSLog2(bit_entropy->sum);
|
| +}
|
| +
|
| static WEBP_INLINE void UpdateHisto(int histo_argb[4][256], uint32_t argb) {
|
| ++histo_argb[0][argb >> 24];
|
| ++histo_argb[1][(argb >> 16) & 0xff];
|
| @@ -599,10 +536,27 @@ static WEBP_INLINE void UpdateHisto(int histo_argb[4][256], uint32_t argb) {
|
| ++histo_argb[3][argb & 0xff];
|
| }
|
|
|
| +//------------------------------------------------------------------------------
|
| +
|
| +static WEBP_INLINE uint32_t Predict(VP8LPredictorFunc pred_func,
|
| + int x, int y,
|
| + const uint32_t* current_row,
|
| + const uint32_t* upper_row) {
|
| + if (y == 0) {
|
| + return (x == 0) ? ARGB_BLACK : current_row[x - 1]; // Left.
|
| + } else if (x == 0) {
|
| + return upper_row[x]; // Top.
|
| + } else {
|
| + return pred_func(current_row[x - 1], upper_row + x);
|
| + }
|
| +}
|
| +
|
| +// Returns best predictor and updates the accumulated histogram.
|
| static int GetBestPredictorForTile(int width, int height,
|
| int tile_x, int tile_y, int bits,
|
| - const int accumulated[4][256],
|
| - const uint32_t* const argb_scratch) {
|
| + int accumulated[4][256],
|
| + const uint32_t* const argb_scratch,
|
| + int exact) {
|
| const int kNumPredModes = 14;
|
| const int col_start = tile_x << bits;
|
| const int row_start = tile_y << bits;
|
| @@ -612,13 +566,19 @@ static int GetBestPredictorForTile(int width, int height,
|
| float best_diff = MAX_DIFF_COST;
|
| int best_mode = 0;
|
| int mode;
|
| + int histo_stack_1[4][256];
|
| + int histo_stack_2[4][256];
|
| + // Need pointers to be able to swap arrays.
|
| + int (*histo_argb)[256] = histo_stack_1;
|
| + int (*best_histo)[256] = histo_stack_2;
|
| +
|
| + int i, j;
|
| for (mode = 0; mode < kNumPredModes; ++mode) {
|
| const uint32_t* current_row = argb_scratch;
|
| const VP8LPredictorFunc pred_func = VP8LPredictors[mode];
|
| float cur_diff;
|
| int y;
|
| - int histo_argb[4][256];
|
| - memset(histo_argb, 0, sizeof(histo_argb));
|
| + memset(histo_argb, 0, sizeof(histo_stack_1));
|
| for (y = 0; y < max_y; ++y) {
|
| int x;
|
| const int row = row_start + y;
|
| @@ -626,65 +586,93 @@ static int GetBestPredictorForTile(int width, int height,
|
| current_row = upper_row + width;
|
| for (x = 0; x < max_x; ++x) {
|
| const int col = col_start + x;
|
| - uint32_t predict;
|
| - if (row == 0) {
|
| - predict = (col == 0) ? ARGB_BLACK : current_row[col - 1]; // Left.
|
| - } else if (col == 0) {
|
| - predict = upper_row[col]; // Top.
|
| - } else {
|
| - predict = pred_func(current_row[col - 1], upper_row + col);
|
| + const uint32_t predict =
|
| + Predict(pred_func, col, row, current_row, upper_row);
|
| + uint32_t residual = VP8LSubPixels(current_row[col], predict);
|
| + if (!exact && (current_row[col] & kMaskAlpha) == 0) {
|
| + residual &= kMaskAlpha; // See CopyTileWithPrediction.
|
| }
|
| - UpdateHisto(histo_argb, VP8LSubPixels(current_row[col], predict));
|
| + UpdateHisto(histo_argb, residual);
|
| }
|
| }
|
| cur_diff = PredictionCostSpatialHistogram(
|
| - accumulated, (const int (*)[256])histo_argb);
|
| + (const int (*)[256])accumulated, (const int (*)[256])histo_argb);
|
| if (cur_diff < best_diff) {
|
| + int (*tmp)[256] = histo_argb;
|
| + histo_argb = best_histo;
|
| + best_histo = tmp;
|
| best_diff = cur_diff;
|
| best_mode = mode;
|
| }
|
| }
|
|
|
| + for (i = 0; i < 4; i++) {
|
| + for (j = 0; j < 256; j++) {
|
| + accumulated[i][j] += best_histo[i][j];
|
| + }
|
| + }
|
| +
|
| return best_mode;
|
| }
|
|
|
| -static void CopyTileWithPrediction(int width, int height,
|
| - int tile_x, int tile_y, int bits, int mode,
|
| - const uint32_t* const argb_scratch,
|
| - uint32_t* const argb) {
|
| - const int col_start = tile_x << bits;
|
| - const int row_start = tile_y << bits;
|
| - const int tile_size = 1 << bits;
|
| - const int max_y = GetMin(tile_size, height - row_start);
|
| - const int max_x = GetMin(tile_size, width - col_start);
|
| - const VP8LPredictorFunc pred_func = VP8LPredictors[mode];
|
| - const uint32_t* current_row = argb_scratch;
|
| -
|
| +static void CopyImageWithPrediction(int width, int height,
|
| + int bits, uint32_t* const modes,
|
| + uint32_t* const argb_scratch,
|
| + uint32_t* const argb,
|
| + int low_effort, int exact) {
|
| + const int tiles_per_row = VP8LSubSampleSize(width, bits);
|
| + const int mask = (1 << bits) - 1;
|
| + // The row size is one pixel longer to allow the top right pixel to point to
|
| + // the leftmost pixel of the next row when at the right edge.
|
| + uint32_t* current_row = argb_scratch;
|
| + uint32_t* upper_row = argb_scratch + width + 1;
|
| int y;
|
| - for (y = 0; y < max_y; ++y) {
|
| + VP8LPredictorFunc pred_func =
|
| + low_effort ? VP8LPredictors[kPredLowEffort] : NULL;
|
| +
|
| + for (y = 0; y < height; ++y) {
|
| int x;
|
| - const int row = row_start + y;
|
| - const uint32_t* const upper_row = current_row;
|
| - current_row = upper_row + width;
|
| - for (x = 0; x < max_x; ++x) {
|
| - const int col = col_start + x;
|
| - const int pix = row * width + col;
|
| - uint32_t predict;
|
| - if (row == 0) {
|
| - predict = (col == 0) ? ARGB_BLACK : current_row[col - 1]; // Left.
|
| - } else if (col == 0) {
|
| - predict = upper_row[col]; // Top.
|
| - } else {
|
| - predict = pred_func(current_row[col - 1], upper_row + col);
|
| + uint32_t* tmp = upper_row;
|
| + upper_row = current_row;
|
| + current_row = tmp;
|
| + memcpy(current_row, argb + y * width, sizeof(*current_row) * width);
|
| + current_row[width] = (y + 1 < height) ? argb[(y + 1) * width] : ARGB_BLACK;
|
| +
|
| + if (low_effort) {
|
| + for (x = 0; x < width; ++x) {
|
| + const uint32_t predict =
|
| + Predict(pred_func, x, y, current_row, upper_row);
|
| + argb[y * width + x] = VP8LSubPixels(current_row[x], predict);
|
| + }
|
| + } else {
|
| + for (x = 0; x < width; ++x) {
|
| + uint32_t predict, residual;
|
| + if ((x & mask) == 0) {
|
| + const int mode =
|
| + (modes[(y >> bits) * tiles_per_row + (x >> bits)] >> 8) & 0xff;
|
| + pred_func = VP8LPredictors[mode];
|
| + }
|
| + predict = Predict(pred_func, x, y, current_row, upper_row);
|
| + residual = VP8LSubPixels(current_row[x], predict);
|
| + if (!exact && (current_row[x] & kMaskAlpha) == 0) {
|
| + // If alpha is 0, cleanup RGB. We can choose the RGB values of the
|
| + // residual for best compression. The prediction of alpha itself can
|
| + // be non-zero and must be kept though. We choose RGB of the residual
|
| + // to be 0.
|
| + residual &= kMaskAlpha;
|
| + // Update input image so that next predictions use correct RGB value.
|
| + current_row[x] = predict & ~kMaskAlpha;
|
| + if (x == 0 && y != 0) upper_row[width] = current_row[x];
|
| + }
|
| + argb[y * width + x] = residual;
|
| }
|
| - argb[pix] = VP8LSubPixels(current_row[col], predict);
|
| }
|
| }
|
| }
|
|
|
| -void VP8LResidualImage(int width, int height, int bits,
|
| +void VP8LResidualImage(int width, int height, int bits, int low_effort,
|
| uint32_t* const argb, uint32_t* const argb_scratch,
|
| - uint32_t* const image) {
|
| + uint32_t* const image, int exact) {
|
| const int max_tile_size = 1 << bits;
|
| const int tiles_per_row = VP8LSubSampleSize(width, bits);
|
| const int tiles_per_col = VP8LSubSampleSize(height, bits);
|
| @@ -692,104 +680,34 @@ void VP8LResidualImage(int width, int height, int bits,
|
| uint32_t* const current_tile_rows = argb_scratch + width;
|
| int tile_y;
|
| int histo[4][256];
|
| - memset(histo, 0, sizeof(histo));
|
| - for (tile_y = 0; tile_y < tiles_per_col; ++tile_y) {
|
| - const int tile_y_offset = tile_y * max_tile_size;
|
| - const int this_tile_height =
|
| - (tile_y < tiles_per_col - 1) ? max_tile_size : height - tile_y_offset;
|
| - int tile_x;
|
| - if (tile_y > 0) {
|
| - memcpy(upper_row, current_tile_rows + (max_tile_size - 1) * width,
|
| - width * sizeof(*upper_row));
|
| + if (low_effort) {
|
| + int i;
|
| + for (i = 0; i < tiles_per_row * tiles_per_col; ++i) {
|
| + image[i] = ARGB_BLACK | (kPredLowEffort << 8);
|
| }
|
| - memcpy(current_tile_rows, &argb[tile_y_offset * width],
|
| - this_tile_height * width * sizeof(*current_tile_rows));
|
| - for (tile_x = 0; tile_x < tiles_per_row; ++tile_x) {
|
| - int pred;
|
| - int y;
|
| - const int tile_x_offset = tile_x * max_tile_size;
|
| - int all_x_max = tile_x_offset + max_tile_size;
|
| - if (all_x_max > width) {
|
| - all_x_max = width;
|
| + } else {
|
| + memset(histo, 0, sizeof(histo));
|
| + for (tile_y = 0; tile_y < tiles_per_col; ++tile_y) {
|
| + const int tile_y_offset = tile_y * max_tile_size;
|
| + const int this_tile_height =
|
| + (tile_y < tiles_per_col - 1) ? max_tile_size : height - tile_y_offset;
|
| + int tile_x;
|
| + if (tile_y > 0) {
|
| + memcpy(upper_row, current_tile_rows + (max_tile_size - 1) * width,
|
| + width * sizeof(*upper_row));
|
| }
|
| - pred = GetBestPredictorForTile(width, height, tile_x, tile_y, bits,
|
| - (const int (*)[256])histo,
|
| - argb_scratch);
|
| - image[tile_y * tiles_per_row + tile_x] = 0xff000000u | (pred << 8);
|
| - CopyTileWithPrediction(width, height, tile_x, tile_y, bits, pred,
|
| - argb_scratch, argb);
|
| - for (y = 0; y < max_tile_size; ++y) {
|
| - int ix;
|
| - int all_x;
|
| - int all_y = tile_y_offset + y;
|
| - if (all_y >= height) {
|
| - break;
|
| - }
|
| - ix = all_y * width + tile_x_offset;
|
| - for (all_x = tile_x_offset; all_x < all_x_max; ++all_x, ++ix) {
|
| - UpdateHisto(histo, argb[ix]);
|
| - }
|
| + memcpy(current_tile_rows, &argb[tile_y_offset * width],
|
| + this_tile_height * width * sizeof(*current_tile_rows));
|
| + for (tile_x = 0; tile_x < tiles_per_row; ++tile_x) {
|
| + const int pred = GetBestPredictorForTile(width, height, tile_x, tile_y,
|
| + bits, (int (*)[256])histo, argb_scratch, exact);
|
| + image[tile_y * tiles_per_row + tile_x] = ARGB_BLACK | (pred << 8);
|
| }
|
| }
|
| }
|
| -}
|
| -
|
| -// Inverse prediction.
|
| -static void PredictorInverseTransform(const VP8LTransform* const transform,
|
| - int y_start, int y_end, uint32_t* data) {
|
| - const int width = transform->xsize_;
|
| - if (y_start == 0) { // First Row follows the L (mode=1) mode.
|
| - int x;
|
| - const uint32_t pred0 = Predictor0(data[-1], NULL);
|
| - AddPixelsEq(data, pred0);
|
| - for (x = 1; x < width; ++x) {
|
| - const uint32_t pred1 = Predictor1(data[x - 1], NULL);
|
| - AddPixelsEq(data + x, pred1);
|
| - }
|
| - data += width;
|
| - ++y_start;
|
| - }
|
|
|
| - {
|
| - int y = y_start;
|
| - const int tile_width = 1 << transform->bits_;
|
| - const int mask = tile_width - 1;
|
| - const int safe_width = width & ~mask;
|
| - const int tiles_per_row = VP8LSubSampleSize(width, transform->bits_);
|
| - const uint32_t* pred_mode_base =
|
| - transform->data_ + (y >> transform->bits_) * tiles_per_row;
|
| -
|
| - while (y < y_end) {
|
| - const uint32_t pred2 = Predictor2(data[-1], data - width);
|
| - const uint32_t* pred_mode_src = pred_mode_base;
|
| - VP8LPredictorFunc pred_func;
|
| - int x = 1;
|
| - int t = 1;
|
| - // First pixel follows the T (mode=2) mode.
|
| - AddPixelsEq(data, pred2);
|
| - // .. the rest:
|
| - while (x < safe_width) {
|
| - pred_func = VP8LPredictors[((*pred_mode_src++) >> 8) & 0xf];
|
| - for (; t < tile_width; ++t, ++x) {
|
| - const uint32_t pred = pred_func(data[x - 1], data + x - width);
|
| - AddPixelsEq(data + x, pred);
|
| - }
|
| - t = 0;
|
| - }
|
| - if (x < width) {
|
| - pred_func = VP8LPredictors[((*pred_mode_src++) >> 8) & 0xf];
|
| - for (; x < width; ++x) {
|
| - const uint32_t pred = pred_func(data[x - 1], data + x - width);
|
| - AddPixelsEq(data + x, pred);
|
| - }
|
| - }
|
| - data += width;
|
| - ++y;
|
| - if ((y & mask) == 0) { // Use the same mask, since tiles are squares.
|
| - pred_mode_base += tiles_per_row;
|
| - }
|
| - }
|
| - }
|
| + CopyImageWithPrediction(width, height, bits,
|
| + image, argb_scratch, argb, low_effort, exact);
|
| }
|
|
|
| void VP8LSubtractGreenFromBlueAndRed_C(uint32_t* argb_data, int num_pixels) {
|
| @@ -803,20 +721,6 @@ void VP8LSubtractGreenFromBlueAndRed_C(uint32_t* argb_data, int num_pixels) {
|
| }
|
| }
|
|
|
| -// Add green to blue and red channels (i.e. perform the inverse transform of
|
| -// 'subtract green').
|
| -void VP8LAddGreenToBlueAndRed_C(uint32_t* data, int num_pixels) {
|
| - int i;
|
| - for (i = 0; i < num_pixels; ++i) {
|
| - const uint32_t argb = data[i];
|
| - const uint32_t green = ((argb >> 8) & 0xff);
|
| - uint32_t red_blue = (argb & 0x00ff00ffu);
|
| - red_blue += (green << 16) | green;
|
| - red_blue &= 0x00ff00ffu;
|
| - data[i] = (argb & 0xff00ff00u) | red_blue;
|
| - }
|
| -}
|
| -
|
| static WEBP_INLINE void MultipliersClear(VP8LMultipliers* const m) {
|
| m->green_to_red_ = 0;
|
| m->green_to_blue_ = 0;
|
| @@ -861,24 +765,6 @@ void VP8LTransformColor_C(const VP8LMultipliers* const m, uint32_t* data,
|
| }
|
| }
|
|
|
| -void VP8LTransformColorInverse_C(const VP8LMultipliers* const m, uint32_t* data,
|
| - int num_pixels) {
|
| - int i;
|
| - for (i = 0; i < num_pixels; ++i) {
|
| - const uint32_t argb = data[i];
|
| - const uint32_t green = argb >> 8;
|
| - const uint32_t red = argb >> 16;
|
| - uint32_t new_red = red;
|
| - uint32_t new_blue = argb;
|
| - new_red += ColorTransformDelta(m->green_to_red_, green);
|
| - new_red &= 0xff;
|
| - new_blue += ColorTransformDelta(m->green_to_blue_, green);
|
| - new_blue += ColorTransformDelta(m->red_to_blue_, new_red);
|
| - new_blue &= 0xff;
|
| - data[i] = (argb & 0xff00ff00u) | (new_red << 16) | (new_blue);
|
| - }
|
| -}
|
| -
|
| static WEBP_INLINE uint8_t TransformColorRed(uint8_t green_to_red,
|
| uint32_t argb) {
|
| const uint32_t green = argb >> 8;
|
| @@ -903,24 +789,32 @@ static float PredictionCostCrossColor(const int accumulated[256],
|
| // Favor low entropy, locally and globally.
|
| // Favor small absolute values for PredictionCostSpatial
|
| static const double kExpValue = 2.4;
|
| - return CombinedShannonEntropy(counts, accumulated) +
|
| + return VP8LCombinedShannonEntropy(counts, accumulated) +
|
| PredictionCostSpatial(counts, 3, kExpValue);
|
| }
|
|
|
| +void VP8LCollectColorRedTransforms_C(const uint32_t* argb, int stride,
|
| + int tile_width, int tile_height,
|
| + int green_to_red, int histo[]) {
|
| + while (tile_height-- > 0) {
|
| + int x;
|
| + for (x = 0; x < tile_width; ++x) {
|
| + ++histo[TransformColorRed(green_to_red, argb[x])];
|
| + }
|
| + argb += stride;
|
| + }
|
| +}
|
| +
|
| static float GetPredictionCostCrossColorRed(
|
| - int tile_x_offset, int tile_y_offset, int all_x_max, int all_y_max,
|
| - int xsize, VP8LMultipliers prev_x, VP8LMultipliers prev_y, int green_to_red,
|
| - const int accumulated_red_histo[256], const uint32_t* const argb) {
|
| - int all_y;
|
| + const uint32_t* argb, int stride, int tile_width, int tile_height,
|
| + VP8LMultipliers prev_x, VP8LMultipliers prev_y, int green_to_red,
|
| + const int accumulated_red_histo[256]) {
|
| int histo[256] = { 0 };
|
| float cur_diff;
|
| - for (all_y = tile_y_offset; all_y < all_y_max; ++all_y) {
|
| - int ix = all_y * xsize + tile_x_offset;
|
| - int all_x;
|
| - for (all_x = tile_x_offset; all_x < all_x_max; ++all_x, ++ix) {
|
| - ++histo[TransformColorRed(green_to_red, argb[ix])]; // red.
|
| - }
|
| - }
|
| +
|
| + VP8LCollectColorRedTransforms(argb, stride, tile_width, tile_height,
|
| + green_to_red, histo);
|
| +
|
| cur_diff = PredictionCostCrossColor(accumulated_red_histo, histo);
|
| if ((uint8_t)green_to_red == prev_x.green_to_red_) {
|
| cur_diff -= 3; // favor keeping the areas locally similar
|
| @@ -935,59 +829,58 @@ static float GetPredictionCostCrossColorRed(
|
| }
|
|
|
| static void GetBestGreenToRed(
|
| - int tile_x_offset, int tile_y_offset, int all_x_max, int all_y_max,
|
| - int xsize, VP8LMultipliers prev_x, VP8LMultipliers prev_y,
|
| - const int accumulated_red_histo[256], const uint32_t* const argb,
|
| - VP8LMultipliers* const best_tx) {
|
| - int min_green_to_red = -64;
|
| - int max_green_to_red = 64;
|
| - int green_to_red = 0;
|
| - int eval_min = 1;
|
| - int eval_max = 1;
|
| - float cur_diff_min = MAX_DIFF_COST;
|
| - float cur_diff_max = MAX_DIFF_COST;
|
| - // Do a binary search to find the optimal green_to_red color transform.
|
| - while (max_green_to_red - min_green_to_red > 2) {
|
| - if (eval_min) {
|
| - cur_diff_min = GetPredictionCostCrossColorRed(
|
| - tile_x_offset, tile_y_offset, all_x_max, all_y_max, xsize,
|
| - prev_x, prev_y, min_green_to_red, accumulated_red_histo, argb);
|
| - eval_min = 0;
|
| - }
|
| - if (eval_max) {
|
| - cur_diff_max = GetPredictionCostCrossColorRed(
|
| - tile_x_offset, tile_y_offset, all_x_max, all_y_max, xsize,
|
| - prev_x, prev_y, max_green_to_red, accumulated_red_histo, argb);
|
| - eval_max = 0;
|
| + const uint32_t* argb, int stride, int tile_width, int tile_height,
|
| + VP8LMultipliers prev_x, VP8LMultipliers prev_y, int quality,
|
| + const int accumulated_red_histo[256], VP8LMultipliers* const best_tx) {
|
| + const int kMaxIters = 4 + ((7 * quality) >> 8); // in range [4..6]
|
| + int green_to_red_best = 0;
|
| + int iter, offset;
|
| + float best_diff = GetPredictionCostCrossColorRed(
|
| + argb, stride, tile_width, tile_height, prev_x, prev_y,
|
| + green_to_red_best, accumulated_red_histo);
|
| + for (iter = 0; iter < kMaxIters; ++iter) {
|
| + // ColorTransformDelta is a 3.5 bit fixed point, so 32 is equal to
|
| + // one in color computation. Having initial delta here as 1 is sufficient
|
| + // to explore the range of (-2, 2).
|
| + const int delta = 32 >> iter;
|
| + // Try a negative and a positive delta from the best known value.
|
| + for (offset = -delta; offset <= delta; offset += 2 * delta) {
|
| + const int green_to_red_cur = offset + green_to_red_best;
|
| + const float cur_diff = GetPredictionCostCrossColorRed(
|
| + argb, stride, tile_width, tile_height, prev_x, prev_y,
|
| + green_to_red_cur, accumulated_red_histo);
|
| + if (cur_diff < best_diff) {
|
| + best_diff = cur_diff;
|
| + green_to_red_best = green_to_red_cur;
|
| + }
|
| }
|
| - if (cur_diff_min < cur_diff_max) {
|
| - green_to_red = min_green_to_red;
|
| - max_green_to_red = (max_green_to_red + min_green_to_red) / 2;
|
| - eval_max = 1;
|
| - } else {
|
| - green_to_red = max_green_to_red;
|
| - min_green_to_red = (max_green_to_red + min_green_to_red) / 2;
|
| - eval_min = 1;
|
| + }
|
| + best_tx->green_to_red_ = green_to_red_best;
|
| +}
|
| +
|
| +void VP8LCollectColorBlueTransforms_C(const uint32_t* argb, int stride,
|
| + int tile_width, int tile_height,
|
| + int green_to_blue, int red_to_blue,
|
| + int histo[]) {
|
| + while (tile_height-- > 0) {
|
| + int x;
|
| + for (x = 0; x < tile_width; ++x) {
|
| + ++histo[TransformColorBlue(green_to_blue, red_to_blue, argb[x])];
|
| }
|
| + argb += stride;
|
| }
|
| - best_tx->green_to_red_ = green_to_red;
|
| }
|
|
|
| static float GetPredictionCostCrossColorBlue(
|
| - int tile_x_offset, int tile_y_offset, int all_x_max, int all_y_max,
|
| - int xsize, VP8LMultipliers prev_x, VP8LMultipliers prev_y,
|
| - int green_to_blue, int red_to_blue, const int accumulated_blue_histo[256],
|
| - const uint32_t* const argb) {
|
| - int all_y;
|
| + const uint32_t* argb, int stride, int tile_width, int tile_height,
|
| + VP8LMultipliers prev_x, VP8LMultipliers prev_y,
|
| + int green_to_blue, int red_to_blue, const int accumulated_blue_histo[256]) {
|
| int histo[256] = { 0 };
|
| float cur_diff;
|
| - for (all_y = tile_y_offset; all_y < all_y_max; ++all_y) {
|
| - int all_x;
|
| - int ix = all_y * xsize + tile_x_offset;
|
| - for (all_x = tile_x_offset; all_x < all_x_max; ++all_x, ++ix) {
|
| - ++histo[TransformColorBlue(green_to_blue, red_to_blue, argb[ix])];
|
| - }
|
| - }
|
| +
|
| + VP8LCollectColorBlueTransforms(argb, stride, tile_width, tile_height,
|
| + green_to_blue, red_to_blue, histo);
|
| +
|
| cur_diff = PredictionCostCrossColor(accumulated_blue_histo, histo);
|
| if ((uint8_t)green_to_blue == prev_x.green_to_blue_) {
|
| cur_diff -= 3; // favor keeping the areas locally similar
|
| @@ -1010,49 +903,55 @@ static float GetPredictionCostCrossColorBlue(
|
| return cur_diff;
|
| }
|
|
|
| +#define kGreenRedToBlueNumAxis 8
|
| +#define kGreenRedToBlueMaxIters 7
|
| static void GetBestGreenRedToBlue(
|
| - int tile_x_offset, int tile_y_offset, int all_x_max, int all_y_max,
|
| - int xsize, VP8LMultipliers prev_x, VP8LMultipliers prev_y, int quality,
|
| - const int accumulated_blue_histo[256], const uint32_t* const argb,
|
| + const uint32_t* argb, int stride, int tile_width, int tile_height,
|
| + VP8LMultipliers prev_x, VP8LMultipliers prev_y, int quality,
|
| + const int accumulated_blue_histo[256],
|
| VP8LMultipliers* const best_tx) {
|
| - float best_diff = MAX_DIFF_COST;
|
| - float cur_diff;
|
| - const int step = (quality < 25) ? 32 : (quality > 50) ? 8 : 16;
|
| - const int min_green_to_blue = -32;
|
| - const int max_green_to_blue = 32;
|
| - const int min_red_to_blue = -32;
|
| - const int max_red_to_blue = 32;
|
| - const int num_iters =
|
| - (1 + (max_green_to_blue - min_green_to_blue) / step) *
|
| - (1 + (max_red_to_blue - min_red_to_blue) / step);
|
| - // Number of tries to get optimal green_to_blue & red_to_blue color transforms
|
| - // after finding a local minima.
|
| - const int max_tries_after_min = 4 + (num_iters >> 2);
|
| - int num_tries_after_min = 0;
|
| - int green_to_blue;
|
| - for (green_to_blue = min_green_to_blue;
|
| - green_to_blue <= max_green_to_blue &&
|
| - num_tries_after_min < max_tries_after_min;
|
| - green_to_blue += step) {
|
| - int red_to_blue;
|
| - for (red_to_blue = min_red_to_blue;
|
| - red_to_blue <= max_red_to_blue &&
|
| - num_tries_after_min < max_tries_after_min;
|
| - red_to_blue += step) {
|
| - cur_diff = GetPredictionCostCrossColorBlue(
|
| - tile_x_offset, tile_y_offset, all_x_max, all_y_max, xsize, prev_x,
|
| - prev_y, green_to_blue, red_to_blue, accumulated_blue_histo, argb);
|
| + const int8_t offset[kGreenRedToBlueNumAxis][2] =
|
| + {{0, -1}, {0, 1}, {-1, 0}, {1, 0}, {-1, -1}, {-1, 1}, {1, -1}, {1, 1}};
|
| + const int8_t delta_lut[kGreenRedToBlueMaxIters] = { 16, 16, 8, 4, 2, 2, 2 };
|
| + const int iters =
|
| + (quality < 25) ? 1 : (quality > 50) ? kGreenRedToBlueMaxIters : 4;
|
| + int green_to_blue_best = 0;
|
| + int red_to_blue_best = 0;
|
| + int iter;
|
| + // Initial value at origin:
|
| + float best_diff = GetPredictionCostCrossColorBlue(
|
| + argb, stride, tile_width, tile_height, prev_x, prev_y,
|
| + green_to_blue_best, red_to_blue_best, accumulated_blue_histo);
|
| + for (iter = 0; iter < iters; ++iter) {
|
| + const int delta = delta_lut[iter];
|
| + int axis;
|
| + for (axis = 0; axis < kGreenRedToBlueNumAxis; ++axis) {
|
| + const int green_to_blue_cur =
|
| + offset[axis][0] * delta + green_to_blue_best;
|
| + const int red_to_blue_cur = offset[axis][1] * delta + red_to_blue_best;
|
| + const float cur_diff = GetPredictionCostCrossColorBlue(
|
| + argb, stride, tile_width, tile_height, prev_x, prev_y,
|
| + green_to_blue_cur, red_to_blue_cur, accumulated_blue_histo);
|
| if (cur_diff < best_diff) {
|
| best_diff = cur_diff;
|
| - best_tx->green_to_blue_ = green_to_blue;
|
| - best_tx->red_to_blue_ = red_to_blue;
|
| - num_tries_after_min = 0;
|
| - } else {
|
| - ++num_tries_after_min;
|
| + green_to_blue_best = green_to_blue_cur;
|
| + red_to_blue_best = red_to_blue_cur;
|
| + }
|
| + if (quality < 25 && iter == 4) {
|
| + // Only axis aligned diffs for lower quality.
|
| + break; // next iter.
|
| }
|
| }
|
| + if (delta == 2 && green_to_blue_best == 0 && red_to_blue_best == 0) {
|
| + // Further iterations would not help.
|
| + break; // out of iter-loop.
|
| + }
|
| }
|
| + best_tx->green_to_blue_ = green_to_blue_best;
|
| + best_tx->red_to_blue_ = red_to_blue_best;
|
| }
|
| +#undef kGreenRedToBlueMaxIters
|
| +#undef kGreenRedToBlueNumAxis
|
|
|
| static VP8LMultipliers GetBestColorTransformForTile(
|
| int tile_x, int tile_y, int bits,
|
| @@ -1067,14 +966,18 @@ static VP8LMultipliers GetBestColorTransformForTile(
|
| const int tile_x_offset = tile_x * max_tile_size;
|
| const int all_x_max = GetMin(tile_x_offset + max_tile_size, xsize);
|
| const int all_y_max = GetMin(tile_y_offset + max_tile_size, ysize);
|
| + const int tile_width = all_x_max - tile_x_offset;
|
| + const int tile_height = all_y_max - tile_y_offset;
|
| + const uint32_t* const tile_argb = argb + tile_y_offset * xsize
|
| + + tile_x_offset;
|
| VP8LMultipliers best_tx;
|
| MultipliersClear(&best_tx);
|
|
|
| - GetBestGreenToRed(tile_x_offset, tile_y_offset, all_x_max, all_y_max, xsize,
|
| - prev_x, prev_y, accumulated_red_histo, argb, &best_tx);
|
| - GetBestGreenRedToBlue(tile_x_offset, tile_y_offset, all_x_max, all_y_max,
|
| - xsize, prev_x, prev_y, quality, accumulated_blue_histo,
|
| - argb, &best_tx);
|
| + GetBestGreenToRed(tile_argb, xsize, tile_width, tile_height,
|
| + prev_x, prev_y, quality, accumulated_red_histo, &best_tx);
|
| + GetBestGreenRedToBlue(tile_argb, xsize, tile_width, tile_height,
|
| + prev_x, prev_y, quality, accumulated_blue_histo,
|
| + &best_tx);
|
| return best_tx;
|
| }
|
|
|
| @@ -1149,293 +1052,6 @@ void VP8LColorSpaceTransform(int width, int height, int bits, int quality,
|
| }
|
| }
|
|
|
| -// Color space inverse transform.
|
| -static void ColorSpaceInverseTransform(const VP8LTransform* const transform,
|
| - int y_start, int y_end, uint32_t* data) {
|
| - const int width = transform->xsize_;
|
| - const int tile_width = 1 << transform->bits_;
|
| - const int mask = tile_width - 1;
|
| - const int safe_width = width & ~mask;
|
| - const int remaining_width = width - safe_width;
|
| - const int tiles_per_row = VP8LSubSampleSize(width, transform->bits_);
|
| - int y = y_start;
|
| - const uint32_t* pred_row =
|
| - transform->data_ + (y >> transform->bits_) * tiles_per_row;
|
| -
|
| - while (y < y_end) {
|
| - const uint32_t* pred = pred_row;
|
| - VP8LMultipliers m = { 0, 0, 0 };
|
| - const uint32_t* const data_safe_end = data + safe_width;
|
| - const uint32_t* const data_end = data + width;
|
| - while (data < data_safe_end) {
|
| - ColorCodeToMultipliers(*pred++, &m);
|
| - VP8LTransformColorInverse(&m, data, tile_width);
|
| - data += tile_width;
|
| - }
|
| - if (data < data_end) { // Left-overs using C-version.
|
| - ColorCodeToMultipliers(*pred++, &m);
|
| - VP8LTransformColorInverse(&m, data, remaining_width);
|
| - data += remaining_width;
|
| - }
|
| - ++y;
|
| - if ((y & mask) == 0) pred_row += tiles_per_row;
|
| - }
|
| -}
|
| -
|
| -// Separate out pixels packed together using pixel-bundling.
|
| -// We define two methods for ARGB data (uint32_t) and alpha-only data (uint8_t).
|
| -#define COLOR_INDEX_INVERSE(FUNC_NAME, TYPE, GET_INDEX, GET_VALUE) \
|
| -void FUNC_NAME(const VP8LTransform* const transform, \
|
| - int y_start, int y_end, const TYPE* src, TYPE* dst) { \
|
| - int y; \
|
| - const int bits_per_pixel = 8 >> transform->bits_; \
|
| - const int width = transform->xsize_; \
|
| - const uint32_t* const color_map = transform->data_; \
|
| - if (bits_per_pixel < 8) { \
|
| - const int pixels_per_byte = 1 << transform->bits_; \
|
| - const int count_mask = pixels_per_byte - 1; \
|
| - const uint32_t bit_mask = (1 << bits_per_pixel) - 1; \
|
| - for (y = y_start; y < y_end; ++y) { \
|
| - uint32_t packed_pixels = 0; \
|
| - int x; \
|
| - for (x = 0; x < width; ++x) { \
|
| - /* We need to load fresh 'packed_pixels' once every */ \
|
| - /* 'pixels_per_byte' increments of x. Fortunately, pixels_per_byte */ \
|
| - /* is a power of 2, so can just use a mask for that, instead of */ \
|
| - /* decrementing a counter. */ \
|
| - if ((x & count_mask) == 0) packed_pixels = GET_INDEX(*src++); \
|
| - *dst++ = GET_VALUE(color_map[packed_pixels & bit_mask]); \
|
| - packed_pixels >>= bits_per_pixel; \
|
| - } \
|
| - } \
|
| - } else { \
|
| - for (y = y_start; y < y_end; ++y) { \
|
| - int x; \
|
| - for (x = 0; x < width; ++x) { \
|
| - *dst++ = GET_VALUE(color_map[GET_INDEX(*src++)]); \
|
| - } \
|
| - } \
|
| - } \
|
| -}
|
| -
|
| -static WEBP_INLINE uint32_t GetARGBIndex(uint32_t idx) {
|
| - return (idx >> 8) & 0xff;
|
| -}
|
| -
|
| -static WEBP_INLINE uint8_t GetAlphaIndex(uint8_t idx) {
|
| - return idx;
|
| -}
|
| -
|
| -static WEBP_INLINE uint32_t GetARGBValue(uint32_t val) {
|
| - return val;
|
| -}
|
| -
|
| -static WEBP_INLINE uint8_t GetAlphaValue(uint32_t val) {
|
| - return (val >> 8) & 0xff;
|
| -}
|
| -
|
| -static COLOR_INDEX_INVERSE(ColorIndexInverseTransform, uint32_t, GetARGBIndex,
|
| - GetARGBValue)
|
| -COLOR_INDEX_INVERSE(VP8LColorIndexInverseTransformAlpha, uint8_t, GetAlphaIndex,
|
| - GetAlphaValue)
|
| -
|
| -#undef COLOR_INDEX_INVERSE
|
| -
|
| -void VP8LInverseTransform(const VP8LTransform* const transform,
|
| - int row_start, int row_end,
|
| - const uint32_t* const in, uint32_t* const out) {
|
| - const int width = transform->xsize_;
|
| - assert(row_start < row_end);
|
| - assert(row_end <= transform->ysize_);
|
| - switch (transform->type_) {
|
| - case SUBTRACT_GREEN:
|
| - VP8LAddGreenToBlueAndRed(out, (row_end - row_start) * width);
|
| - break;
|
| - case PREDICTOR_TRANSFORM:
|
| - PredictorInverseTransform(transform, row_start, row_end, out);
|
| - if (row_end != transform->ysize_) {
|
| - // The last predicted row in this iteration will be the top-pred row
|
| - // for the first row in next iteration.
|
| - memcpy(out - width, out + (row_end - row_start - 1) * width,
|
| - width * sizeof(*out));
|
| - }
|
| - break;
|
| - case CROSS_COLOR_TRANSFORM:
|
| - ColorSpaceInverseTransform(transform, row_start, row_end, out);
|
| - break;
|
| - case COLOR_INDEXING_TRANSFORM:
|
| - if (in == out && transform->bits_ > 0) {
|
| - // Move packed pixels to the end of unpacked region, so that unpacking
|
| - // can occur seamlessly.
|
| - // Also, note that this is the only transform that applies on
|
| - // the effective width of VP8LSubSampleSize(xsize_, bits_). All other
|
| - // transforms work on effective width of xsize_.
|
| - const int out_stride = (row_end - row_start) * width;
|
| - const int in_stride = (row_end - row_start) *
|
| - VP8LSubSampleSize(transform->xsize_, transform->bits_);
|
| - uint32_t* const src = out + out_stride - in_stride;
|
| - memmove(src, out, in_stride * sizeof(*src));
|
| - ColorIndexInverseTransform(transform, row_start, row_end, src, out);
|
| - } else {
|
| - ColorIndexInverseTransform(transform, row_start, row_end, in, out);
|
| - }
|
| - break;
|
| - }
|
| -}
|
| -
|
| -//------------------------------------------------------------------------------
|
| -// Color space conversion.
|
| -
|
| -static int is_big_endian(void) {
|
| - static const union {
|
| - uint16_t w;
|
| - uint8_t b[2];
|
| - } tmp = { 1 };
|
| - return (tmp.b[0] != 1);
|
| -}
|
| -
|
| -void VP8LConvertBGRAToRGB_C(const uint32_t* src,
|
| - int num_pixels, uint8_t* dst) {
|
| - const uint32_t* const src_end = src + num_pixels;
|
| - while (src < src_end) {
|
| - const uint32_t argb = *src++;
|
| - *dst++ = (argb >> 16) & 0xff;
|
| - *dst++ = (argb >> 8) & 0xff;
|
| - *dst++ = (argb >> 0) & 0xff;
|
| - }
|
| -}
|
| -
|
| -void VP8LConvertBGRAToRGBA_C(const uint32_t* src,
|
| - int num_pixels, uint8_t* dst) {
|
| - const uint32_t* const src_end = src + num_pixels;
|
| - while (src < src_end) {
|
| - const uint32_t argb = *src++;
|
| - *dst++ = (argb >> 16) & 0xff;
|
| - *dst++ = (argb >> 8) & 0xff;
|
| - *dst++ = (argb >> 0) & 0xff;
|
| - *dst++ = (argb >> 24) & 0xff;
|
| - }
|
| -}
|
| -
|
| -void VP8LConvertBGRAToRGBA4444_C(const uint32_t* src,
|
| - int num_pixels, uint8_t* dst) {
|
| - const uint32_t* const src_end = src + num_pixels;
|
| - while (src < src_end) {
|
| - const uint32_t argb = *src++;
|
| - const uint8_t rg = ((argb >> 16) & 0xf0) | ((argb >> 12) & 0xf);
|
| - const uint8_t ba = ((argb >> 0) & 0xf0) | ((argb >> 28) & 0xf);
|
| -#ifdef WEBP_SWAP_16BIT_CSP
|
| - *dst++ = ba;
|
| - *dst++ = rg;
|
| -#else
|
| - *dst++ = rg;
|
| - *dst++ = ba;
|
| -#endif
|
| - }
|
| -}
|
| -
|
| -void VP8LConvertBGRAToRGB565_C(const uint32_t* src,
|
| - int num_pixels, uint8_t* dst) {
|
| - const uint32_t* const src_end = src + num_pixels;
|
| - while (src < src_end) {
|
| - const uint32_t argb = *src++;
|
| - const uint8_t rg = ((argb >> 16) & 0xf8) | ((argb >> 13) & 0x7);
|
| - const uint8_t gb = ((argb >> 5) & 0xe0) | ((argb >> 3) & 0x1f);
|
| -#ifdef WEBP_SWAP_16BIT_CSP
|
| - *dst++ = gb;
|
| - *dst++ = rg;
|
| -#else
|
| - *dst++ = rg;
|
| - *dst++ = gb;
|
| -#endif
|
| - }
|
| -}
|
| -
|
| -void VP8LConvertBGRAToBGR_C(const uint32_t* src,
|
| - int num_pixels, uint8_t* dst) {
|
| - const uint32_t* const src_end = src + num_pixels;
|
| - while (src < src_end) {
|
| - const uint32_t argb = *src++;
|
| - *dst++ = (argb >> 0) & 0xff;
|
| - *dst++ = (argb >> 8) & 0xff;
|
| - *dst++ = (argb >> 16) & 0xff;
|
| - }
|
| -}
|
| -
|
| -static void CopyOrSwap(const uint32_t* src, int num_pixels, uint8_t* dst,
|
| - int swap_on_big_endian) {
|
| - if (is_big_endian() == swap_on_big_endian) {
|
| - const uint32_t* const src_end = src + num_pixels;
|
| - while (src < src_end) {
|
| - const uint32_t argb = *src++;
|
| -
|
| -#if !defined(WORDS_BIGENDIAN)
|
| -#if !defined(WEBP_REFERENCE_IMPLEMENTATION)
|
| - *(uint32_t*)dst = BSwap32(argb);
|
| -#else // WEBP_REFERENCE_IMPLEMENTATION
|
| - dst[0] = (argb >> 24) & 0xff;
|
| - dst[1] = (argb >> 16) & 0xff;
|
| - dst[2] = (argb >> 8) & 0xff;
|
| - dst[3] = (argb >> 0) & 0xff;
|
| -#endif
|
| -#else // WORDS_BIGENDIAN
|
| - dst[0] = (argb >> 0) & 0xff;
|
| - dst[1] = (argb >> 8) & 0xff;
|
| - dst[2] = (argb >> 16) & 0xff;
|
| - dst[3] = (argb >> 24) & 0xff;
|
| -#endif
|
| - dst += sizeof(argb);
|
| - }
|
| - } else {
|
| - memcpy(dst, src, num_pixels * sizeof(*src));
|
| - }
|
| -}
|
| -
|
| -void VP8LConvertFromBGRA(const uint32_t* const in_data, int num_pixels,
|
| - WEBP_CSP_MODE out_colorspace, uint8_t* const rgba) {
|
| - switch (out_colorspace) {
|
| - case MODE_RGB:
|
| - VP8LConvertBGRAToRGB(in_data, num_pixels, rgba);
|
| - break;
|
| - case MODE_RGBA:
|
| - VP8LConvertBGRAToRGBA(in_data, num_pixels, rgba);
|
| - break;
|
| - case MODE_rgbA:
|
| - VP8LConvertBGRAToRGBA(in_data, num_pixels, rgba);
|
| - WebPApplyAlphaMultiply(rgba, 0, num_pixels, 1, 0);
|
| - break;
|
| - case MODE_BGR:
|
| - VP8LConvertBGRAToBGR(in_data, num_pixels, rgba);
|
| - break;
|
| - case MODE_BGRA:
|
| - CopyOrSwap(in_data, num_pixels, rgba, 1);
|
| - break;
|
| - case MODE_bgrA:
|
| - CopyOrSwap(in_data, num_pixels, rgba, 1);
|
| - WebPApplyAlphaMultiply(rgba, 0, num_pixels, 1, 0);
|
| - break;
|
| - case MODE_ARGB:
|
| - CopyOrSwap(in_data, num_pixels, rgba, 0);
|
| - break;
|
| - case MODE_Argb:
|
| - CopyOrSwap(in_data, num_pixels, rgba, 0);
|
| - WebPApplyAlphaMultiply(rgba, 1, num_pixels, 1, 0);
|
| - break;
|
| - case MODE_RGBA_4444:
|
| - VP8LConvertBGRAToRGBA4444(in_data, num_pixels, rgba);
|
| - break;
|
| - case MODE_rgbA_4444:
|
| - VP8LConvertBGRAToRGBA4444(in_data, num_pixels, rgba);
|
| - WebPApplyAlphaMultiply4444(rgba, num_pixels, 1, 0);
|
| - break;
|
| - case MODE_RGB_565:
|
| - VP8LConvertBGRAToRGB565(in_data, num_pixels, rgba);
|
| - break;
|
| - default:
|
| - assert(0); // Code flow should not reach here.
|
| - }
|
| -}
|
| -
|
| //------------------------------------------------------------------------------
|
| // Bundles multiple (1, 2, 4 or 8) pixels into a single pixel.
|
| void VP8LBundleColorMap(const uint8_t* const row, int width,
|
| @@ -1478,53 +1094,6 @@ static double ExtraCostCombined(const uint32_t* X, const uint32_t* Y,
|
| return cost;
|
| }
|
|
|
| -// Returns the various RLE counts
|
| -static VP8LStreaks HuffmanCostCount(const uint32_t* population, int length) {
|
| - int i;
|
| - int streak = 0;
|
| - VP8LStreaks stats;
|
| - memset(&stats, 0, sizeof(stats));
|
| - for (i = 0; i < length - 1; ++i) {
|
| - ++streak;
|
| - if (population[i] == population[i + 1]) {
|
| - continue;
|
| - }
|
| - stats.counts[population[i] != 0] += (streak > 3);
|
| - stats.streaks[population[i] != 0][(streak > 3)] += streak;
|
| - streak = 0;
|
| - }
|
| - ++streak;
|
| - stats.counts[population[i] != 0] += (streak > 3);
|
| - stats.streaks[population[i] != 0][(streak > 3)] += streak;
|
| - return stats;
|
| -}
|
| -
|
| -static VP8LStreaks HuffmanCostCombinedCount(const uint32_t* X,
|
| - const uint32_t* Y, int length) {
|
| - int i;
|
| - int streak = 0;
|
| - VP8LStreaks stats;
|
| - memset(&stats, 0, sizeof(stats));
|
| - for (i = 0; i < length - 1; ++i) {
|
| - const int xy = X[i] + Y[i];
|
| - const int xy_next = X[i + 1] + Y[i + 1];
|
| - ++streak;
|
| - if (xy == xy_next) {
|
| - continue;
|
| - }
|
| - stats.counts[xy != 0] += (streak > 3);
|
| - stats.streaks[xy != 0][(streak > 3)] += streak;
|
| - streak = 0;
|
| - }
|
| - {
|
| - const int xy = X[i] + Y[i];
|
| - ++streak;
|
| - stats.counts[xy != 0] += (streak > 3);
|
| - stats.streaks[xy != 0][(streak > 3)] += streak;
|
| - }
|
| - return stats;
|
| -}
|
| -
|
| //------------------------------------------------------------------------------
|
|
|
| static void HistogramAdd(const VP8LHistogram* const a,
|
| @@ -1563,61 +1132,52 @@ static void HistogramAdd(const VP8LHistogram* const a,
|
| //------------------------------------------------------------------------------
|
|
|
| VP8LProcessBlueAndRedFunc VP8LSubtractGreenFromBlueAndRed;
|
| -VP8LProcessBlueAndRedFunc VP8LAddGreenToBlueAndRed;
|
| -VP8LPredictorFunc VP8LPredictors[16];
|
|
|
| VP8LTransformColorFunc VP8LTransformColor;
|
| -VP8LTransformColorFunc VP8LTransformColorInverse;
|
|
|
| -VP8LConvertFunc VP8LConvertBGRAToRGB;
|
| -VP8LConvertFunc VP8LConvertBGRAToRGBA;
|
| -VP8LConvertFunc VP8LConvertBGRAToRGBA4444;
|
| -VP8LConvertFunc VP8LConvertBGRAToRGB565;
|
| -VP8LConvertFunc VP8LConvertBGRAToBGR;
|
| +VP8LCollectColorBlueTransformsFunc VP8LCollectColorBlueTransforms;
|
| +VP8LCollectColorRedTransformsFunc VP8LCollectColorRedTransforms;
|
|
|
| VP8LFastLog2SlowFunc VP8LFastLog2Slow;
|
| VP8LFastLog2SlowFunc VP8LFastSLog2Slow;
|
|
|
| VP8LCostFunc VP8LExtraCost;
|
| VP8LCostCombinedFunc VP8LExtraCostCombined;
|
| +VP8LCombinedShannonEntropyFunc VP8LCombinedShannonEntropy;
|
|
|
| -VP8LCostCountFunc VP8LHuffmanCostCount;
|
| -VP8LCostCombinedCountFunc VP8LHuffmanCostCombinedCount;
|
| +GetEntropyUnrefinedHelperFunc VP8LGetEntropyUnrefinedHelper;
|
|
|
| VP8LHistogramAddFunc VP8LHistogramAdd;
|
|
|
| -extern void VP8LDspInitSSE2(void);
|
| -extern void VP8LDspInitNEON(void);
|
| -extern void VP8LDspInitMIPS32(void);
|
| +extern void VP8LEncDspInitSSE2(void);
|
| +extern void VP8LEncDspInitSSE41(void);
|
| +extern void VP8LEncDspInitNEON(void);
|
| +extern void VP8LEncDspInitMIPS32(void);
|
| +extern void VP8LEncDspInitMIPSdspR2(void);
|
|
|
| -static volatile VP8CPUInfo lossless_last_cpuinfo_used =
|
| - (VP8CPUInfo)&lossless_last_cpuinfo_used;
|
| +static volatile VP8CPUInfo lossless_enc_last_cpuinfo_used =
|
| + (VP8CPUInfo)&lossless_enc_last_cpuinfo_used;
|
|
|
| -void VP8LDspInit(void) {
|
| - if (lossless_last_cpuinfo_used == VP8GetCPUInfo) return;
|
| +WEBP_TSAN_IGNORE_FUNCTION void VP8LEncDspInit(void) {
|
| + if (lossless_enc_last_cpuinfo_used == VP8GetCPUInfo) return;
|
|
|
| - memcpy(VP8LPredictors, kPredictorsC, sizeof(VP8LPredictors));
|
| + VP8LDspInit();
|
|
|
| VP8LSubtractGreenFromBlueAndRed = VP8LSubtractGreenFromBlueAndRed_C;
|
| - VP8LAddGreenToBlueAndRed = VP8LAddGreenToBlueAndRed_C;
|
|
|
| VP8LTransformColor = VP8LTransformColor_C;
|
| - VP8LTransformColorInverse = VP8LTransformColorInverse_C;
|
|
|
| - VP8LConvertBGRAToRGB = VP8LConvertBGRAToRGB_C;
|
| - VP8LConvertBGRAToRGBA = VP8LConvertBGRAToRGBA_C;
|
| - VP8LConvertBGRAToRGBA4444 = VP8LConvertBGRAToRGBA4444_C;
|
| - VP8LConvertBGRAToRGB565 = VP8LConvertBGRAToRGB565_C;
|
| - VP8LConvertBGRAToBGR = VP8LConvertBGRAToBGR_C;
|
| + VP8LCollectColorBlueTransforms = VP8LCollectColorBlueTransforms_C;
|
| + VP8LCollectColorRedTransforms = VP8LCollectColorRedTransforms_C;
|
|
|
| VP8LFastLog2Slow = FastLog2Slow;
|
| VP8LFastSLog2Slow = FastSLog2Slow;
|
|
|
| VP8LExtraCost = ExtraCost;
|
| VP8LExtraCostCombined = ExtraCostCombined;
|
| + VP8LCombinedShannonEntropy = CombinedShannonEntropy;
|
|
|
| - VP8LHuffmanCostCount = HuffmanCostCount;
|
| - VP8LHuffmanCostCombinedCount = HuffmanCostCombinedCount;
|
| + VP8LGetEntropyUnrefinedHelper = GetEntropyUnrefinedHelper;
|
|
|
| VP8LHistogramAdd = HistogramAdd;
|
|
|
| @@ -1625,21 +1185,31 @@ void VP8LDspInit(void) {
|
| if (VP8GetCPUInfo != NULL) {
|
| #if defined(WEBP_USE_SSE2)
|
| if (VP8GetCPUInfo(kSSE2)) {
|
| - VP8LDspInitSSE2();
|
| + VP8LEncDspInitSSE2();
|
| +#if defined(WEBP_USE_SSE41)
|
| + if (VP8GetCPUInfo(kSSE4_1)) {
|
| + VP8LEncDspInitSSE41();
|
| + }
|
| +#endif
|
| }
|
| #endif
|
| #if defined(WEBP_USE_NEON)
|
| if (VP8GetCPUInfo(kNEON)) {
|
| - VP8LDspInitNEON();
|
| + VP8LEncDspInitNEON();
|
| }
|
| #endif
|
| #if defined(WEBP_USE_MIPS32)
|
| if (VP8GetCPUInfo(kMIPS32)) {
|
| - VP8LDspInitMIPS32();
|
| + VP8LEncDspInitMIPS32();
|
| + }
|
| +#endif
|
| +#if defined(WEBP_USE_MIPS_DSP_R2)
|
| + if (VP8GetCPUInfo(kMIPSdspR2)) {
|
| + VP8LEncDspInitMIPSdspR2();
|
| }
|
| #endif
|
| }
|
| - lossless_last_cpuinfo_used = VP8GetCPUInfo;
|
| + lossless_enc_last_cpuinfo_used = VP8GetCPUInfo;
|
| }
|
|
|
| //------------------------------------------------------------------------------
|
|
|