third_party/libwebp/dsp/lossless_enc.c - Issue 1546003002: libwebp: update to 0.5.0

Unified Diff: third_party/libwebp/dsp/lossless_enc.c

Issue 1546003002: libwebp: update to 0.5.0 (Closed) Base URL: https://chromium.googlesource.com/chromium/src.git@master

Patch Set: rebase Created 5 years ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View side-by-side diff with in-line comments

Download patch

Index: third_party/libwebp/dsp/lossless_enc.c

diff --git a/third_party/libwebp/dsp/lossless.c b/third_party/libwebp/dsp/lossless_enc.c

similarity index 53%

copy from third_party/libwebp/dsp/lossless.c

copy to third_party/libwebp/dsp/lossless_enc.c

index ee334bceb0b9669bc7791c21ef2da255eb9445c7..2eafa3da7d23d69bc7e084e0e0d519b9bbd0a9e1 100644

--- a/third_party/libwebp/dsp/lossless.c

+++ b/third_party/libwebp/dsp/lossless_enc.c

@@ -1,4 +1,4 @@

// Use of this source code is governed by a BSD-style license

// that can be found in the COPYING file in the root of the source

@@ -7,7 +7,7 @@

// be found in the AUTHORS file in the root of the source tree.

// -----------------------------------------------------------------------------

-// Image transforms and color space conversion methods for lossless decoder.

+// Image transform methods for lossless encoder.

// Authors: Vikas Arora (vikaas.arora@gmail.com)

// Jyrki Alakuijala (jyrki@google.com)

@@ -24,6 +24,9 @@

#define MAX_DIFF_COST (1e30f)

+static const int kPredLowEffort = 11;

+static const uint32_t kMaskAlpha = 0xff000000;

// lookup table for small values of log2(int)

const float kLog2Table[LOG_LOOKUP_IDX_MAX] = {

0.0000000000000000f, 0.0000000000000000f,

@@ -326,13 +329,6 @@ const uint8_t kPrefixEncodeExtraBitsValue[PREFIX_LOOKUP_IDX_MAX] = {

112, 113, 114, 115, 116, 117, 118, 119, 120, 121, 122, 123, 124, 125, 126

};

-// The threshold till approximate version of log_2 can be used.

-// Practically, we can get rid of the call to log() as the two values match to

-// very high degree (the ratio of these two is 0.99999x).

-// Keeping a high threshold for now.

-#define APPROX_LOG_WITH_CORRECTION_MAX 65536

-#define APPROX_LOG_MAX 4096

-#define LOG_2_RECIPROCAL 1.44269504088896338700465094007086

static float FastSLog2Slow(uint32_t v) {

assert(v >= LOG_LOOKUP_IDX_MAX);

if (v < APPROX_LOG_WITH_CORRECTION_MAX) {

@@ -384,166 +380,11 @@ static float FastLog2Slow(uint32_t v) {

}

-//------------------------------------------------------------------------------

-// Image transforms.

// Mostly used to reduce code size + readability

static WEBP_INLINE int GetMin(int a, int b) { return (a > b) ? b : a; }

-// In-place sum of each component with mod 256.

-static WEBP_INLINE void AddPixelsEq(uint32_t* a, uint32_t b) {

- const uint32_t alpha_and_green = (*a & 0xff00ff00u) + (b & 0xff00ff00u);

- const uint32_t red_and_blue = (*a & 0x00ff00ffu) + (b & 0x00ff00ffu);

- *a = (alpha_and_green & 0xff00ff00u) | (red_and_blue & 0x00ff00ffu);

-static WEBP_INLINE uint32_t Average2(uint32_t a0, uint32_t a1) {

- return (((a0 ^ a1) & 0xfefefefeL) >> 1) + (a0 & a1);

-static WEBP_INLINE uint32_t Average3(uint32_t a0, uint32_t a1, uint32_t a2) {

- return Average2(Average2(a0, a2), a1);

-static WEBP_INLINE uint32_t Average4(uint32_t a0, uint32_t a1,

- uint32_t a2, uint32_t a3) {

- return Average2(Average2(a0, a1), Average2(a2, a3));

-static WEBP_INLINE uint32_t Clip255(uint32_t a) {

- if (a < 256) {

- return a;

- }

- // return 0, when a is a negative integer.

- // return 255, when a is positive.

- return ~a >> 24;

-static WEBP_INLINE int AddSubtractComponentFull(int a, int b, int c) {

- return Clip255(a + b - c);

-static WEBP_INLINE uint32_t ClampedAddSubtractFull(uint32_t c0, uint32_t c1,

- uint32_t c2) {

- const int a = AddSubtractComponentFull(c0 >> 24, c1 >> 24, c2 >> 24);

- const int r = AddSubtractComponentFull((c0 >> 16) & 0xff,

- (c1 >> 16) & 0xff,

- (c2 >> 16) & 0xff);

- const int g = AddSubtractComponentFull((c0 >> 8) & 0xff,

- (c1 >> 8) & 0xff,

- (c2 >> 8) & 0xff);

- const int b = AddSubtractComponentFull(c0 & 0xff, c1 & 0xff, c2 & 0xff);

- return ((uint32_t)a << 24) | (r << 16) | (g << 8) | b;

-static WEBP_INLINE int AddSubtractComponentHalf(int a, int b) {

- return Clip255(a + (a - b) / 2);

-static WEBP_INLINE uint32_t ClampedAddSubtractHalf(uint32_t c0, uint32_t c1,

- uint32_t c2) {

- const uint32_t ave = Average2(c0, c1);

- const int a = AddSubtractComponentHalf(ave >> 24, c2 >> 24);

- const int r = AddSubtractComponentHalf((ave >> 16) & 0xff, (c2 >> 16) & 0xff);

- const int g = AddSubtractComponentHalf((ave >> 8) & 0xff, (c2 >> 8) & 0xff);

- const int b = AddSubtractComponentHalf((ave >> 0) & 0xff, (c2 >> 0) & 0xff);

- return ((uint32_t)a << 24) | (r << 16) | (g << 8) | b;

-// gcc-4.9 on ARM generates incorrect code in Select() when Sub3() is inlined.

-#if defined(__arm__) && LOCAL_GCC_VERSION == 0x409

-# define LOCAL_INLINE __attribute__ ((noinline))

-#else

-# define LOCAL_INLINE WEBP_INLINE

-#endif

-static LOCAL_INLINE int Sub3(int a, int b, int c) {

- const int pb = b - c;

- const int pa = a - c;

- return abs(pb) - abs(pa);

-#undef LOCAL_INLINE

-static WEBP_INLINE uint32_t Select(uint32_t a, uint32_t b, uint32_t c) {

- const int pa_minus_pb =

- Sub3((a >> 24) , (b >> 24) , (c >> 24) ) +

- Sub3((a >> 16) & 0xff, (b >> 16) & 0xff, (c >> 16) & 0xff) +

- Sub3((a >> 8) & 0xff, (b >> 8) & 0xff, (c >> 8) & 0xff) +

- Sub3((a ) & 0xff, (b ) & 0xff, (c ) & 0xff);

- return (pa_minus_pb <= 0) ? a : b;

//------------------------------------------------------------------------------

-// Predictors

-static uint32_t Predictor0(uint32_t left, const uint32_t* const top) {

- (void)top;

- (void)left;

- return ARGB_BLACK;

-static uint32_t Predictor1(uint32_t left, const uint32_t* const top) {

- (void)top;

- return left;

-static uint32_t Predictor2(uint32_t left, const uint32_t* const top) {

- (void)left;

- return top[0];

-static uint32_t Predictor3(uint32_t left, const uint32_t* const top) {

- (void)left;

- return top[1];

-static uint32_t Predictor4(uint32_t left, const uint32_t* const top) {

- (void)left;

- return top[-1];

-static uint32_t Predictor5(uint32_t left, const uint32_t* const top) {

- const uint32_t pred = Average3(left, top[0], top[1]);

- return pred;

-static uint32_t Predictor6(uint32_t left, const uint32_t* const top) {

- const uint32_t pred = Average2(left, top[-1]);

- return pred;

-static uint32_t Predictor7(uint32_t left, const uint32_t* const top) {

- const uint32_t pred = Average2(left, top[0]);

- return pred;

-static uint32_t Predictor8(uint32_t left, const uint32_t* const top) {

- const uint32_t pred = Average2(top[-1], top[0]);

- (void)left;

- return pred;

-static uint32_t Predictor9(uint32_t left, const uint32_t* const top) {

- const uint32_t pred = Average2(top[0], top[1]);

- (void)left;

- return pred;

-static uint32_t Predictor10(uint32_t left, const uint32_t* const top) {

- const uint32_t pred = Average4(left, top[-1], top[0], top[1]);

- return pred;

-static uint32_t Predictor11(uint32_t left, const uint32_t* const top) {

- const uint32_t pred = Select(top[0], left, top[-1]);

- return pred;

-static uint32_t Predictor12(uint32_t left, const uint32_t* const top) {

- const uint32_t pred = ClampedAddSubtractFull(left, top[0], top[-1]);

- return pred;

-static uint32_t Predictor13(uint32_t left, const uint32_t* const top) {

- const uint32_t pred = ClampedAddSubtractHalf(left, top[0], top[-1]);

- return pred;

-static const VP8LPredictorFunc kPredictorsC[16] = {

- Predictor0, Predictor1, Predictor2, Predictor3,

- Predictor4, Predictor5, Predictor6, Predictor7,

- Predictor8, Predictor9, Predictor10, Predictor11,

- Predictor12, Predictor13,

- Predictor0, Predictor0 // <- padding security sentinels

-};

+// Methods to calculate Entropy (Shannon).

static float PredictionCostSpatial(const int counts[256], int weight_0,

double exp_val) {

@@ -565,15 +406,15 @@ static float CombinedShannonEntropy(const int X[256], const int Y[256]) {

int sumX = 0, sumXY = 0;

for (i = 0; i < 256; ++i) {

const int x = X[i];

- const int xy = x + Y[i];

if (x != 0) {

+ const int xy = x + Y[i];

sumX += x;

retval -= VP8LFastSLog2(x);

sumXY += xy;

retval -= VP8LFastSLog2(xy);

- } else if (xy != 0) {

- sumXY += xy;

- retval -= VP8LFastSLog2(xy);

+ } else if (Y[i] != 0) {

+ sumXY += Y[i];

+ retval -= VP8LFastSLog2(Y[i]);

}

retval += VP8LFastSLog2(sumX) + VP8LFastSLog2(sumXY);

@@ -587,11 +428,107 @@ static float PredictionCostSpatialHistogram(const int accumulated[4][256],

for (i = 0; i < 4; ++i) {

const double kExpValue = 0.94;

retval += PredictionCostSpatial(tile[i], 1, kExpValue);

- retval += CombinedShannonEntropy(tile[i], accumulated[i]);

+ retval += VP8LCombinedShannonEntropy(tile[i], accumulated[i]);

}

return (float)retval;

}

+void VP8LBitEntropyInit(VP8LBitEntropy* const entropy) {

+ entropy->entropy = 0.;

+ entropy->sum = 0;

+ entropy->nonzeros = 0;

+ entropy->max_val = 0;

+ entropy->nonzero_code = VP8L_NON_TRIVIAL_SYM;

+void VP8LBitsEntropyUnrefined(const uint32_t* const array, int n,

+ VP8LBitEntropy* const entropy) {

+ int i;

+ VP8LBitEntropyInit(entropy);

+ for (i = 0; i < n; ++i) {

+ if (array[i] != 0) {

+ entropy->sum += array[i];

+ entropy->nonzero_code = i;

+ ++entropy->nonzeros;

+ entropy->entropy -= VP8LFastSLog2(array[i]);

+ if (entropy->max_val < array[i]) {

+ entropy->max_val = array[i];

+ }

+ entropy->entropy += VP8LFastSLog2(entropy->sum);

+static WEBP_INLINE void GetEntropyUnrefinedHelper(

+ uint32_t val, int i, uint32_t* const val_prev, int* const i_prev,

+ VP8LBitEntropy* const bit_entropy, VP8LStreaks* const stats) {

+ const int streak = i - *i_prev;

+ // Gather info for the bit entropy.

+ if (*val_prev != 0) {

+ bit_entropy->sum += (*val_prev) * streak;

+ bit_entropy->nonzeros += streak;

+ bit_entropy->nonzero_code = *i_prev;

+ bit_entropy->entropy -= VP8LFastSLog2(*val_prev) * streak;

+ if (bit_entropy->max_val < *val_prev) {

+ bit_entropy->max_val = *val_prev;

+ }

+ // Gather info for the Huffman cost.

+ stats->counts[*val_prev != 0] += (streak > 3);

+ stats->streaks[*val_prev != 0][(streak > 3)] += streak;

+ *val_prev = val;

+ *i_prev = i;

+void VP8LGetEntropyUnrefined(const uint32_t* const X, int length,

+ VP8LBitEntropy* const bit_entropy,

+ VP8LStreaks* const stats) {

+ int i;

+ int i_prev = 0;

+ uint32_t x_prev = X[0];

+ memset(stats, 0, sizeof(*stats));

+ VP8LBitEntropyInit(bit_entropy);

+ for (i = 1; i < length; ++i) {

+ const uint32_t x = X[i];

+ if (x != x_prev) {

+ VP8LGetEntropyUnrefinedHelper(x, i, &x_prev, &i_prev, bit_entropy, stats);

+ }

+ VP8LGetEntropyUnrefinedHelper(0, i, &x_prev, &i_prev, bit_entropy, stats);

+ bit_entropy->entropy += VP8LFastSLog2(bit_entropy->sum);

+void VP8LGetCombinedEntropyUnrefined(const uint32_t* const X,

+ const uint32_t* const Y, int length,

+ VP8LBitEntropy* const bit_entropy,

+ VP8LStreaks* const stats) {

+ int i = 1;

+ int i_prev = 0;

+ uint32_t xy_prev = X[0] + Y[0];

+ memset(stats, 0, sizeof(*stats));

+ VP8LBitEntropyInit(bit_entropy);

+ for (i = 1; i < length; ++i) {

+ const uint32_t xy = X[i] + Y[i];

+ if (xy != xy_prev) {

+ VP8LGetEntropyUnrefinedHelper(xy, i, &xy_prev, &i_prev, bit_entropy,

+ stats);

+ }

+ VP8LGetEntropyUnrefinedHelper(0, i, &xy_prev, &i_prev, bit_entropy, stats);

+ bit_entropy->entropy += VP8LFastSLog2(bit_entropy->sum);

static WEBP_INLINE void UpdateHisto(int histo_argb[4][256], uint32_t argb) {

++histo_argb[0][argb >> 24];

++histo_argb[1][(argb >> 16) & 0xff];

@@ -599,10 +536,27 @@ static WEBP_INLINE void UpdateHisto(int histo_argb[4][256], uint32_t argb) {

++histo_argb[3][argb & 0xff];

}

+//------------------------------------------------------------------------------

+static WEBP_INLINE uint32_t Predict(VP8LPredictorFunc pred_func,

+ int x, int y,

+ const uint32_t* current_row,

+ const uint32_t* upper_row) {

+ if (y == 0) {

+ return (x == 0) ? ARGB_BLACK : current_row[x - 1]; // Left.

+ } else if (x == 0) {

+ return upper_row[x]; // Top.

+ } else {

+ return pred_func(current_row[x - 1], upper_row + x);

+ }

+// Returns best predictor and updates the accumulated histogram.

static int GetBestPredictorForTile(int width, int height,

int tile_x, int tile_y, int bits,

- const int accumulated[4][256],

- const uint32_t* const argb_scratch) {

+ int accumulated[4][256],

+ const uint32_t* const argb_scratch,

+ int exact) {

const int kNumPredModes = 14;

const int col_start = tile_x << bits;

const int row_start = tile_y << bits;

@@ -612,13 +566,19 @@ static int GetBestPredictorForTile(int width, int height,

float best_diff = MAX_DIFF_COST;

int best_mode = 0;

int mode;

+ int histo_stack_1[4][256];

+ int histo_stack_2[4][256];

+ // Need pointers to be able to swap arrays.

+ int (*histo_argb)[256] = histo_stack_1;

+ int (*best_histo)[256] = histo_stack_2;

+ int i, j;

for (mode = 0; mode < kNumPredModes; ++mode) {

const uint32_t* current_row = argb_scratch;

const VP8LPredictorFunc pred_func = VP8LPredictors[mode];

float cur_diff;

int y;

- int histo_argb[4][256];

- memset(histo_argb, 0, sizeof(histo_argb));

+ memset(histo_argb, 0, sizeof(histo_stack_1));

for (y = 0; y < max_y; ++y) {

int x;

const int row = row_start + y;

@@ -626,65 +586,93 @@ static int GetBestPredictorForTile(int width, int height,

current_row = upper_row + width;

for (x = 0; x < max_x; ++x) {

const int col = col_start + x;

- uint32_t predict;

- if (row == 0) {

- predict = (col == 0) ? ARGB_BLACK : current_row[col - 1]; // Left.

- } else if (col == 0) {

- predict = upper_row[col]; // Top.

- } else {

- predict = pred_func(current_row[col - 1], upper_row + col);

+ const uint32_t predict =

+ Predict(pred_func, col, row, current_row, upper_row);

+ uint32_t residual = VP8LSubPixels(current_row[col], predict);

+ if (!exact && (current_row[col] & kMaskAlpha) == 0) {

+ residual &= kMaskAlpha; // See CopyTileWithPrediction.

}

- UpdateHisto(histo_argb, VP8LSubPixels(current_row[col], predict));

+ UpdateHisto(histo_argb, residual);

}

cur_diff = PredictionCostSpatialHistogram(

- accumulated, (const int (*)[256])histo_argb);

+ (const int (*)[256])accumulated, (const int (*)[256])histo_argb);

if (cur_diff < best_diff) {

+ int (*tmp)[256] = histo_argb;

+ histo_argb = best_histo;

+ best_histo = tmp;

best_diff = cur_diff;

best_mode = mode;

}

+ for (i = 0; i < 4; i++) {

+ for (j = 0; j < 256; j++) {

+ accumulated[i][j] += best_histo[i][j];

+ }

return best_mode;

}

-static void CopyTileWithPrediction(int width, int height,

- int tile_x, int tile_y, int bits, int mode,

- const uint32_t* const argb_scratch,

- uint32_t* const argb) {

- const int col_start = tile_x << bits;

- const int row_start = tile_y << bits;

- const int tile_size = 1 << bits;

- const int max_y = GetMin(tile_size, height - row_start);

- const int max_x = GetMin(tile_size, width - col_start);

- const VP8LPredictorFunc pred_func = VP8LPredictors[mode];

- const uint32_t* current_row = argb_scratch;

+static void CopyImageWithPrediction(int width, int height,

+ int bits, uint32_t* const modes,

+ uint32_t* const argb_scratch,

+ uint32_t* const argb,

+ int low_effort, int exact) {

+ const int tiles_per_row = VP8LSubSampleSize(width, bits);

+ const int mask = (1 << bits) - 1;

+ // The row size is one pixel longer to allow the top right pixel to point to

+ // the leftmost pixel of the next row when at the right edge.

+ uint32_t* current_row = argb_scratch;

+ uint32_t* upper_row = argb_scratch + width + 1;

int y;

- for (y = 0; y < max_y; ++y) {

+ VP8LPredictorFunc pred_func =

+ low_effort ? VP8LPredictors[kPredLowEffort] : NULL;

+ for (y = 0; y < height; ++y) {

int x;

- const int row = row_start + y;

- const uint32_t* const upper_row = current_row;

- current_row = upper_row + width;

- for (x = 0; x < max_x; ++x) {

- const int col = col_start + x;

- const int pix = row * width + col;

- uint32_t predict;

- if (row == 0) {

- predict = (col == 0) ? ARGB_BLACK : current_row[col - 1]; // Left.

- } else if (col == 0) {

- predict = upper_row[col]; // Top.

- } else {

- predict = pred_func(current_row[col - 1], upper_row + col);

+ uint32_t* tmp = upper_row;

+ upper_row = current_row;

+ current_row = tmp;

+ memcpy(current_row, argb + y * width, sizeof(*current_row) * width);

+ current_row[width] = (y + 1 < height) ? argb[(y + 1) * width] : ARGB_BLACK;

+ if (low_effort) {

+ for (x = 0; x < width; ++x) {

+ const uint32_t predict =

+ Predict(pred_func, x, y, current_row, upper_row);

+ argb[y * width + x] = VP8LSubPixels(current_row[x], predict);

+ }

+ } else {

+ for (x = 0; x < width; ++x) {

+ uint32_t predict, residual;

+ if ((x & mask) == 0) {

+ const int mode =

+ (modes[(y >> bits) * tiles_per_row + (x >> bits)] >> 8) & 0xff;

+ pred_func = VP8LPredictors[mode];

+ }

+ predict = Predict(pred_func, x, y, current_row, upper_row);

+ residual = VP8LSubPixels(current_row[x], predict);

+ if (!exact && (current_row[x] & kMaskAlpha) == 0) {

+ // If alpha is 0, cleanup RGB. We can choose the RGB values of the

+ // residual for best compression. The prediction of alpha itself can

+ // be non-zero and must be kept though. We choose RGB of the residual

+ // to be 0.

+ residual &= kMaskAlpha;

+ // Update input image so that next predictions use correct RGB value.

+ current_row[x] = predict & ~kMaskAlpha;

+ if (x == 0 && y != 0) upper_row[width] = current_row[x];

+ }

+ argb[y * width + x] = residual;

}

- argb[pix] = VP8LSubPixels(current_row[col], predict);

}

-void VP8LResidualImage(int width, int height, int bits,

+void VP8LResidualImage(int width, int height, int bits, int low_effort,

uint32_t* const argb, uint32_t* const argb_scratch,

- uint32_t* const image) {

+ uint32_t* const image, int exact) {

const int max_tile_size = 1 << bits;

const int tiles_per_row = VP8LSubSampleSize(width, bits);

const int tiles_per_col = VP8LSubSampleSize(height, bits);

@@ -692,104 +680,34 @@ void VP8LResidualImage(int width, int height, int bits,

uint32_t* const current_tile_rows = argb_scratch + width;

int tile_y;

int histo[4][256];

- memset(histo, 0, sizeof(histo));

- for (tile_y = 0; tile_y < tiles_per_col; ++tile_y) {

- const int tile_y_offset = tile_y * max_tile_size;

- const int this_tile_height =

- (tile_y < tiles_per_col - 1) ? max_tile_size : height - tile_y_offset;

- int tile_x;

- if (tile_y > 0) {

- memcpy(upper_row, current_tile_rows + (max_tile_size - 1) * width,

- width * sizeof(*upper_row));

+ if (low_effort) {

+ int i;

+ for (i = 0; i < tiles_per_row * tiles_per_col; ++i) {

+ image[i] = ARGB_BLACK | (kPredLowEffort << 8);

}

- memcpy(current_tile_rows, &argb[tile_y_offset * width],

- this_tile_height * width * sizeof(*current_tile_rows));

- for (tile_x = 0; tile_x < tiles_per_row; ++tile_x) {

- int pred;

- int y;

- const int tile_x_offset = tile_x * max_tile_size;

- int all_x_max = tile_x_offset + max_tile_size;

- if (all_x_max > width) {

- all_x_max = width;

+ } else {

+ memset(histo, 0, sizeof(histo));

+ for (tile_y = 0; tile_y < tiles_per_col; ++tile_y) {

+ const int tile_y_offset = tile_y * max_tile_size;

+ const int this_tile_height =

+ (tile_y < tiles_per_col - 1) ? max_tile_size : height - tile_y_offset;

+ int tile_x;

+ if (tile_y > 0) {

+ memcpy(upper_row, current_tile_rows + (max_tile_size - 1) * width,

+ width * sizeof(*upper_row));

}

- pred = GetBestPredictorForTile(width, height, tile_x, tile_y, bits,

- (const int (*)[256])histo,

- argb_scratch);

- image[tile_y * tiles_per_row + tile_x] = 0xff000000u | (pred << 8);

- CopyTileWithPrediction(width, height, tile_x, tile_y, bits, pred,

- argb_scratch, argb);

- for (y = 0; y < max_tile_size; ++y) {

- int ix;

- int all_x;

- int all_y = tile_y_offset + y;

- if (all_y >= height) {

- break;

- }

- ix = all_y * width + tile_x_offset;

- for (all_x = tile_x_offset; all_x < all_x_max; ++all_x, ++ix) {

- UpdateHisto(histo, argb[ix]);

- }

+ memcpy(current_tile_rows, &argb[tile_y_offset * width],

+ this_tile_height * width * sizeof(*current_tile_rows));

+ for (tile_x = 0; tile_x < tiles_per_row; ++tile_x) {

+ const int pred = GetBestPredictorForTile(width, height, tile_x, tile_y,

+ bits, (int (*)[256])histo, argb_scratch, exact);

+ image[tile_y * tiles_per_row + tile_x] = ARGB_BLACK | (pred << 8);

}

-// Inverse prediction.

-static void PredictorInverseTransform(const VP8LTransform* const transform,

- int y_start, int y_end, uint32_t* data) {

- const int width = transform->xsize_;

- if (y_start == 0) { // First Row follows the L (mode=1) mode.

- int x;

- const uint32_t pred0 = Predictor0(data[-1], NULL);

- AddPixelsEq(data, pred0);

- for (x = 1; x < width; ++x) {

- const uint32_t pred1 = Predictor1(data[x - 1], NULL);

- AddPixelsEq(data + x, pred1);

- }

- data += width;

- ++y_start;

- }

- {

- int y = y_start;

- const int tile_width = 1 << transform->bits_;

- const int mask = tile_width - 1;

- const int safe_width = width & ~mask;

- const int tiles_per_row = VP8LSubSampleSize(width, transform->bits_);

- const uint32_t* pred_mode_base =

- transform->data_ + (y >> transform->bits_) * tiles_per_row;

- while (y < y_end) {

- const uint32_t pred2 = Predictor2(data[-1], data - width);

- const uint32_t* pred_mode_src = pred_mode_base;

- VP8LPredictorFunc pred_func;

- int x = 1;

- int t = 1;

- // First pixel follows the T (mode=2) mode.

- AddPixelsEq(data, pred2);

- // .. the rest:

- while (x < safe_width) {

- pred_func = VP8LPredictors[((*pred_mode_src++) >> 8) & 0xf];

- for (; t < tile_width; ++t, ++x) {

- const uint32_t pred = pred_func(data[x - 1], data + x - width);

- AddPixelsEq(data + x, pred);

- }

- t = 0;

- }

- if (x < width) {

- pred_func = VP8LPredictors[((*pred_mode_src++) >> 8) & 0xf];

- for (; x < width; ++x) {

- const uint32_t pred = pred_func(data[x - 1], data + x - width);

- AddPixelsEq(data + x, pred);

- }

- data += width;

- ++y;

- if ((y & mask) == 0) { // Use the same mask, since tiles are squares.

- pred_mode_base += tiles_per_row;

- }

+ CopyImageWithPrediction(width, height, bits,

+ image, argb_scratch, argb, low_effort, exact);

}

void VP8LSubtractGreenFromBlueAndRed_C(uint32_t* argb_data, int num_pixels) {

@@ -803,20 +721,6 @@ void VP8LSubtractGreenFromBlueAndRed_C(uint32_t* argb_data, int num_pixels) {

}

-// Add green to blue and red channels (i.e. perform the inverse transform of

-// 'subtract green').

-void VP8LAddGreenToBlueAndRed_C(uint32_t* data, int num_pixels) {

- int i;

- for (i = 0; i < num_pixels; ++i) {

- const uint32_t argb = data[i];

- const uint32_t green = ((argb >> 8) & 0xff);

- uint32_t red_blue = (argb & 0x00ff00ffu);

- red_blue += (green << 16) | green;

- red_blue &= 0x00ff00ffu;

- data[i] = (argb & 0xff00ff00u) | red_blue;

- }

static WEBP_INLINE void MultipliersClear(VP8LMultipliers* const m) {

m->green_to_red_ = 0;

m->green_to_blue_ = 0;

@@ -861,24 +765,6 @@ void VP8LTransformColor_C(const VP8LMultipliers* const m, uint32_t* data,

}

-void VP8LTransformColorInverse_C(const VP8LMultipliers* const m, uint32_t* data,

- int num_pixels) {

- int i;

- for (i = 0; i < num_pixels; ++i) {

- const uint32_t argb = data[i];

- const uint32_t green = argb >> 8;

- const uint32_t red = argb >> 16;

- uint32_t new_red = red;

- uint32_t new_blue = argb;

- new_red += ColorTransformDelta(m->green_to_red_, green);

- new_red &= 0xff;

- new_blue += ColorTransformDelta(m->green_to_blue_, green);

- new_blue += ColorTransformDelta(m->red_to_blue_, new_red);

- new_blue &= 0xff;

- data[i] = (argb & 0xff00ff00u) | (new_red << 16) | (new_blue);

- }

static WEBP_INLINE uint8_t TransformColorRed(uint8_t green_to_red,

uint32_t argb) {

const uint32_t green = argb >> 8;

@@ -903,24 +789,32 @@ static float PredictionCostCrossColor(const int accumulated[256],

// Favor low entropy, locally and globally.

// Favor small absolute values for PredictionCostSpatial

static const double kExpValue = 2.4;

- return CombinedShannonEntropy(counts, accumulated) +

+ return VP8LCombinedShannonEntropy(counts, accumulated) +

PredictionCostSpatial(counts, 3, kExpValue);

}

+void VP8LCollectColorRedTransforms_C(const uint32_t* argb, int stride,

+ int tile_width, int tile_height,

+ int green_to_red, int histo[]) {

+ while (tile_height-- > 0) {

+ int x;

+ for (x = 0; x < tile_width; ++x) {

+ ++histo[TransformColorRed(green_to_red, argb[x])];

+ }

+ argb += stride;

+ }

static float GetPredictionCostCrossColorRed(

- int tile_x_offset, int tile_y_offset, int all_x_max, int all_y_max,

- int xsize, VP8LMultipliers prev_x, VP8LMultipliers prev_y, int green_to_red,

- const int accumulated_red_histo[256], const uint32_t* const argb) {

- int all_y;

+ const uint32_t* argb, int stride, int tile_width, int tile_height,

+ VP8LMultipliers prev_x, VP8LMultipliers prev_y, int green_to_red,

+ const int accumulated_red_histo[256]) {

int histo[256] = { 0 };

float cur_diff;

- for (all_y = tile_y_offset; all_y < all_y_max; ++all_y) {

- int ix = all_y * xsize + tile_x_offset;

- int all_x;

- for (all_x = tile_x_offset; all_x < all_x_max; ++all_x, ++ix) {

- ++histo[TransformColorRed(green_to_red, argb[ix])]; // red.

- }

+ VP8LCollectColorRedTransforms(argb, stride, tile_width, tile_height,

+ green_to_red, histo);

cur_diff = PredictionCostCrossColor(accumulated_red_histo, histo);

if ((uint8_t)green_to_red == prev_x.green_to_red_) {

cur_diff -= 3; // favor keeping the areas locally similar

@@ -935,59 +829,58 @@ static float GetPredictionCostCrossColorRed(

}

static void GetBestGreenToRed(

- int tile_x_offset, int tile_y_offset, int all_x_max, int all_y_max,

- int xsize, VP8LMultipliers prev_x, VP8LMultipliers prev_y,

- const int accumulated_red_histo[256], const uint32_t* const argb,

- VP8LMultipliers* const best_tx) {

- int min_green_to_red = -64;

- int max_green_to_red = 64;

- int green_to_red = 0;

- int eval_min = 1;

- int eval_max = 1;

- float cur_diff_min = MAX_DIFF_COST;

- float cur_diff_max = MAX_DIFF_COST;

- // Do a binary search to find the optimal green_to_red color transform.

- while (max_green_to_red - min_green_to_red > 2) {

- if (eval_min) {

- cur_diff_min = GetPredictionCostCrossColorRed(

- tile_x_offset, tile_y_offset, all_x_max, all_y_max, xsize,

- prev_x, prev_y, min_green_to_red, accumulated_red_histo, argb);

- eval_min = 0;

- }

- if (eval_max) {

- cur_diff_max = GetPredictionCostCrossColorRed(

- tile_x_offset, tile_y_offset, all_x_max, all_y_max, xsize,

- prev_x, prev_y, max_green_to_red, accumulated_red_histo, argb);

- eval_max = 0;

+ const uint32_t* argb, int stride, int tile_width, int tile_height,

+ VP8LMultipliers prev_x, VP8LMultipliers prev_y, int quality,

+ const int accumulated_red_histo[256], VP8LMultipliers* const best_tx) {

+ const int kMaxIters = 4 + ((7 * quality) >> 8); // in range [4..6]

+ int green_to_red_best = 0;

+ int iter, offset;

+ float best_diff = GetPredictionCostCrossColorRed(

+ argb, stride, tile_width, tile_height, prev_x, prev_y,

+ green_to_red_best, accumulated_red_histo);

+ for (iter = 0; iter < kMaxIters; ++iter) {

+ // ColorTransformDelta is a 3.5 bit fixed point, so 32 is equal to

+ // one in color computation. Having initial delta here as 1 is sufficient

+ // to explore the range of (-2, 2).

+ const int delta = 32 >> iter;

+ // Try a negative and a positive delta from the best known value.

+ for (offset = -delta; offset <= delta; offset += 2 * delta) {

+ const int green_to_red_cur = offset + green_to_red_best;

+ const float cur_diff = GetPredictionCostCrossColorRed(

+ argb, stride, tile_width, tile_height, prev_x, prev_y,

+ green_to_red_cur, accumulated_red_histo);

+ if (cur_diff < best_diff) {

+ best_diff = cur_diff;

+ green_to_red_best = green_to_red_cur;

+ }

}

- if (cur_diff_min < cur_diff_max) {

- green_to_red = min_green_to_red;

- max_green_to_red = (max_green_to_red + min_green_to_red) / 2;

- eval_max = 1;

- } else {

- green_to_red = max_green_to_red;

- min_green_to_red = (max_green_to_red + min_green_to_red) / 2;

- eval_min = 1;

+ }

+ best_tx->green_to_red_ = green_to_red_best;

+void VP8LCollectColorBlueTransforms_C(const uint32_t* argb, int stride,

+ int tile_width, int tile_height,

+ int green_to_blue, int red_to_blue,

+ int histo[]) {

+ while (tile_height-- > 0) {

+ int x;

+ for (x = 0; x < tile_width; ++x) {

+ ++histo[TransformColorBlue(green_to_blue, red_to_blue, argb[x])];

}

+ argb += stride;

}

- best_tx->green_to_red_ = green_to_red;

}

static float GetPredictionCostCrossColorBlue(

- int tile_x_offset, int tile_y_offset, int all_x_max, int all_y_max,

- int xsize, VP8LMultipliers prev_x, VP8LMultipliers prev_y,

- int green_to_blue, int red_to_blue, const int accumulated_blue_histo[256],

- const uint32_t* const argb) {

- int all_y;

+ const uint32_t* argb, int stride, int tile_width, int tile_height,

+ VP8LMultipliers prev_x, VP8LMultipliers prev_y,

+ int green_to_blue, int red_to_blue, const int accumulated_blue_histo[256]) {

int histo[256] = { 0 };

float cur_diff;

- for (all_y = tile_y_offset; all_y < all_y_max; ++all_y) {

- int all_x;

- int ix = all_y * xsize + tile_x_offset;

- for (all_x = tile_x_offset; all_x < all_x_max; ++all_x, ++ix) {

- ++histo[TransformColorBlue(green_to_blue, red_to_blue, argb[ix])];

- }

+ VP8LCollectColorBlueTransforms(argb, stride, tile_width, tile_height,

+ green_to_blue, red_to_blue, histo);

cur_diff = PredictionCostCrossColor(accumulated_blue_histo, histo);

if ((uint8_t)green_to_blue == prev_x.green_to_blue_) {

cur_diff -= 3; // favor keeping the areas locally similar

@@ -1010,49 +903,55 @@ static float GetPredictionCostCrossColorBlue(

return cur_diff;

}

+#define kGreenRedToBlueNumAxis 8

+#define kGreenRedToBlueMaxIters 7

static void GetBestGreenRedToBlue(

- int tile_x_offset, int tile_y_offset, int all_x_max, int all_y_max,

- int xsize, VP8LMultipliers prev_x, VP8LMultipliers prev_y, int quality,

- const int accumulated_blue_histo[256], const uint32_t* const argb,

+ const uint32_t* argb, int stride, int tile_width, int tile_height,

+ VP8LMultipliers prev_x, VP8LMultipliers prev_y, int quality,

+ const int accumulated_blue_histo[256],

VP8LMultipliers* const best_tx) {

- float best_diff = MAX_DIFF_COST;

- float cur_diff;

- const int step = (quality < 25) ? 32 : (quality > 50) ? 8 : 16;

- const int min_green_to_blue = -32;

- const int max_green_to_blue = 32;

- const int min_red_to_blue = -32;

- const int max_red_to_blue = 32;

- const int num_iters =

- (1 + (max_green_to_blue - min_green_to_blue) / step) *

- (1 + (max_red_to_blue - min_red_to_blue) / step);

- // Number of tries to get optimal green_to_blue & red_to_blue color transforms

- // after finding a local minima.

- const int max_tries_after_min = 4 + (num_iters >> 2);

- int num_tries_after_min = 0;

- int green_to_blue;

- for (green_to_blue = min_green_to_blue;

- green_to_blue <= max_green_to_blue &&

- num_tries_after_min < max_tries_after_min;

- green_to_blue += step) {

- int red_to_blue;

- for (red_to_blue = min_red_to_blue;

- red_to_blue <= max_red_to_blue &&

- num_tries_after_min < max_tries_after_min;

- red_to_blue += step) {

- cur_diff = GetPredictionCostCrossColorBlue(

- tile_x_offset, tile_y_offset, all_x_max, all_y_max, xsize, prev_x,

- prev_y, green_to_blue, red_to_blue, accumulated_blue_histo, argb);

+ const int8_t offset[kGreenRedToBlueNumAxis][2] =

+ {{0, -1}, {0, 1}, {-1, 0}, {1, 0}, {-1, -1}, {-1, 1}, {1, -1}, {1, 1}};

+ const int8_t delta_lut[kGreenRedToBlueMaxIters] = { 16, 16, 8, 4, 2, 2, 2 };

+ const int iters =

+ (quality < 25) ? 1 : (quality > 50) ? kGreenRedToBlueMaxIters : 4;

+ int green_to_blue_best = 0;

+ int red_to_blue_best = 0;

+ int iter;

+ // Initial value at origin:

+ float best_diff = GetPredictionCostCrossColorBlue(

+ argb, stride, tile_width, tile_height, prev_x, prev_y,

+ green_to_blue_best, red_to_blue_best, accumulated_blue_histo);

+ for (iter = 0; iter < iters; ++iter) {

+ const int delta = delta_lut[iter];

+ int axis;

+ for (axis = 0; axis < kGreenRedToBlueNumAxis; ++axis) {

+ const int green_to_blue_cur =

+ offset[axis][0] * delta + green_to_blue_best;

+ const int red_to_blue_cur = offset[axis][1] * delta + red_to_blue_best;

+ const float cur_diff = GetPredictionCostCrossColorBlue(

+ argb, stride, tile_width, tile_height, prev_x, prev_y,

+ green_to_blue_cur, red_to_blue_cur, accumulated_blue_histo);

if (cur_diff < best_diff) {

best_diff = cur_diff;

- best_tx->green_to_blue_ = green_to_blue;

- best_tx->red_to_blue_ = red_to_blue;

- num_tries_after_min = 0;

- } else {

- ++num_tries_after_min;

+ green_to_blue_best = green_to_blue_cur;

+ red_to_blue_best = red_to_blue_cur;

+ }

+ if (quality < 25 && iter == 4) {

+ // Only axis aligned diffs for lower quality.

+ break; // next iter.

}

+ if (delta == 2 && green_to_blue_best == 0 && red_to_blue_best == 0) {

+ // Further iterations would not help.

+ break; // out of iter-loop.

+ }

}

+ best_tx->green_to_blue_ = green_to_blue_best;

+ best_tx->red_to_blue_ = red_to_blue_best;

}

+#undef kGreenRedToBlueMaxIters

+#undef kGreenRedToBlueNumAxis

static VP8LMultipliers GetBestColorTransformForTile(

int tile_x, int tile_y, int bits,

@@ -1067,14 +966,18 @@ static VP8LMultipliers GetBestColorTransformForTile(

const int tile_x_offset = tile_x * max_tile_size;

const int all_x_max = GetMin(tile_x_offset + max_tile_size, xsize);

const int all_y_max = GetMin(tile_y_offset + max_tile_size, ysize);

+ const int tile_width = all_x_max - tile_x_offset;

+ const int tile_height = all_y_max - tile_y_offset;

+ const uint32_t* const tile_argb = argb + tile_y_offset * xsize

+ + tile_x_offset;

VP8LMultipliers best_tx;

MultipliersClear(&best_tx);

- GetBestGreenToRed(tile_x_offset, tile_y_offset, all_x_max, all_y_max, xsize,

- prev_x, prev_y, accumulated_red_histo, argb, &best_tx);

- GetBestGreenRedToBlue(tile_x_offset, tile_y_offset, all_x_max, all_y_max,

- xsize, prev_x, prev_y, quality, accumulated_blue_histo,

- argb, &best_tx);

+ GetBestGreenToRed(tile_argb, xsize, tile_width, tile_height,

+ prev_x, prev_y, quality, accumulated_red_histo, &best_tx);

+ GetBestGreenRedToBlue(tile_argb, xsize, tile_width, tile_height,

+ prev_x, prev_y, quality, accumulated_blue_histo,

+ &best_tx);

return best_tx;

}

@@ -1149,293 +1052,6 @@ void VP8LColorSpaceTransform(int width, int height, int bits, int quality,

}

-// Color space inverse transform.

-static void ColorSpaceInverseTransform(const VP8LTransform* const transform,

- int y_start, int y_end, uint32_t* data) {

- const int width = transform->xsize_;

- const int tile_width = 1 << transform->bits_;

- const int mask = tile_width - 1;

- const int safe_width = width & ~mask;

- const int remaining_width = width - safe_width;

- const int tiles_per_row = VP8LSubSampleSize(width, transform->bits_);

- int y = y_start;

- const uint32_t* pred_row =

- transform->data_ + (y >> transform->bits_) * tiles_per_row;

- while (y < y_end) {

- const uint32_t* pred = pred_row;

- VP8LMultipliers m = { 0, 0, 0 };

- const uint32_t* const data_safe_end = data + safe_width;

- const uint32_t* const data_end = data + width;

- while (data < data_safe_end) {

- ColorCodeToMultipliers(*pred++, &m);

- VP8LTransformColorInverse(&m, data, tile_width);

- data += tile_width;

- }

- if (data < data_end) { // Left-overs using C-version.

- ColorCodeToMultipliers(*pred++, &m);

- VP8LTransformColorInverse(&m, data, remaining_width);

- data += remaining_width;

- }

- ++y;

- if ((y & mask) == 0) pred_row += tiles_per_row;

- }

-// Separate out pixels packed together using pixel-bundling.

-// We define two methods for ARGB data (uint32_t) and alpha-only data (uint8_t).

-#define COLOR_INDEX_INVERSE(FUNC_NAME, TYPE, GET_INDEX, GET_VALUE) \

-void FUNC_NAME(const VP8LTransform* const transform, \

- int y_start, int y_end, const TYPE* src, TYPE* dst) { \

- int y; \

- const int bits_per_pixel = 8 >> transform->bits_; \

- const int width = transform->xsize_; \

- const uint32_t* const color_map = transform->data_; \

- if (bits_per_pixel < 8) { \

- const int pixels_per_byte = 1 << transform->bits_; \

- const int count_mask = pixels_per_byte - 1; \

- const uint32_t bit_mask = (1 << bits_per_pixel) - 1; \

- for (y = y_start; y < y_end; ++y) { \

- uint32_t packed_pixels = 0; \

- int x; \

- for (x = 0; x < width; ++x) { \

- /* We need to load fresh 'packed_pixels' once every */ \

- /* 'pixels_per_byte' increments of x. Fortunately, pixels_per_byte */ \

- /* is a power of 2, so can just use a mask for that, instead of */ \

- /* decrementing a counter. */ \

- if ((x & count_mask) == 0) packed_pixels = GET_INDEX(*src++); \

- *dst++ = GET_VALUE(color_map[packed_pixels & bit_mask]); \

- packed_pixels >>= bits_per_pixel; \

- } \

- } else { \

- for (y = y_start; y < y_end; ++y) { \

- int x; \

- for (x = 0; x < width; ++x) { \

- *dst++ = GET_VALUE(color_map[GET_INDEX(*src++)]); \

- } \

-static WEBP_INLINE uint32_t GetARGBIndex(uint32_t idx) {

- return (idx >> 8) & 0xff;

-static WEBP_INLINE uint8_t GetAlphaIndex(uint8_t idx) {

- return idx;

-static WEBP_INLINE uint32_t GetARGBValue(uint32_t val) {

- return val;

-static WEBP_INLINE uint8_t GetAlphaValue(uint32_t val) {

- return (val >> 8) & 0xff;

-static COLOR_INDEX_INVERSE(ColorIndexInverseTransform, uint32_t, GetARGBIndex,

- GetARGBValue)

-COLOR_INDEX_INVERSE(VP8LColorIndexInverseTransformAlpha, uint8_t, GetAlphaIndex,

- GetAlphaValue)

-#undef COLOR_INDEX_INVERSE

-void VP8LInverseTransform(const VP8LTransform* const transform,

- int row_start, int row_end,

- const uint32_t* const in, uint32_t* const out) {

- const int width = transform->xsize_;

- assert(row_start < row_end);

- assert(row_end <= transform->ysize_);

- switch (transform->type_) {

- case SUBTRACT_GREEN:

- VP8LAddGreenToBlueAndRed(out, (row_end - row_start) * width);

- break;

- case PREDICTOR_TRANSFORM:

- PredictorInverseTransform(transform, row_start, row_end, out);

- if (row_end != transform->ysize_) {

- // The last predicted row in this iteration will be the top-pred row

- // for the first row in next iteration.

- memcpy(out - width, out + (row_end - row_start - 1) * width,

- width * sizeof(*out));

- }

- break;

- case CROSS_COLOR_TRANSFORM:

- ColorSpaceInverseTransform(transform, row_start, row_end, out);

- break;

- case COLOR_INDEXING_TRANSFORM:

- if (in == out && transform->bits_ > 0) {

- // Move packed pixels to the end of unpacked region, so that unpacking

- // can occur seamlessly.

- // Also, note that this is the only transform that applies on

- // the effective width of VP8LSubSampleSize(xsize_, bits_). All other

- // transforms work on effective width of xsize_.

- const int out_stride = (row_end - row_start) * width;

- const int in_stride = (row_end - row_start) *

- VP8LSubSampleSize(transform->xsize_, transform->bits_);

- uint32_t* const src = out + out_stride - in_stride;

- memmove(src, out, in_stride * sizeof(*src));

- ColorIndexInverseTransform(transform, row_start, row_end, src, out);

- } else {

- ColorIndexInverseTransform(transform, row_start, row_end, in, out);

- }

- break;

- }

-//------------------------------------------------------------------------------

-// Color space conversion.

-static int is_big_endian(void) {

- static const union {

- uint16_t w;

- uint8_t b[2];

- } tmp = { 1 };

- return (tmp.b[0] != 1);

-void VP8LConvertBGRAToRGB_C(const uint32_t* src,

- int num_pixels, uint8_t* dst) {

- const uint32_t* const src_end = src + num_pixels;

- while (src < src_end) {

- const uint32_t argb = *src++;

- *dst++ = (argb >> 16) & 0xff;

- *dst++ = (argb >> 8) & 0xff;

- *dst++ = (argb >> 0) & 0xff;

- }

-void VP8LConvertBGRAToRGBA_C(const uint32_t* src,

- int num_pixels, uint8_t* dst) {

- const uint32_t* const src_end = src + num_pixels;

- while (src < src_end) {

- const uint32_t argb = *src++;

- *dst++ = (argb >> 16) & 0xff;

- *dst++ = (argb >> 8) & 0xff;

- *dst++ = (argb >> 0) & 0xff;

- *dst++ = (argb >> 24) & 0xff;

- }

-void VP8LConvertBGRAToRGBA4444_C(const uint32_t* src,

- int num_pixels, uint8_t* dst) {

- const uint32_t* const src_end = src + num_pixels;

- while (src < src_end) {

- const uint32_t argb = *src++;

- const uint8_t rg = ((argb >> 16) & 0xf0) | ((argb >> 12) & 0xf);

- const uint8_t ba = ((argb >> 0) & 0xf0) | ((argb >> 28) & 0xf);

-#ifdef WEBP_SWAP_16BIT_CSP

- *dst++ = ba;

- *dst++ = rg;

-#else

- *dst++ = rg;

- *dst++ = ba;

-#endif

- }

-void VP8LConvertBGRAToRGB565_C(const uint32_t* src,

- int num_pixels, uint8_t* dst) {

- const uint32_t* const src_end = src + num_pixels;

- while (src < src_end) {

- const uint32_t argb = *src++;

- const uint8_t rg = ((argb >> 16) & 0xf8) | ((argb >> 13) & 0x7);

- const uint8_t gb = ((argb >> 5) & 0xe0) | ((argb >> 3) & 0x1f);

-#ifdef WEBP_SWAP_16BIT_CSP

- *dst++ = gb;

- *dst++ = rg;

-#else

- *dst++ = rg;

- *dst++ = gb;

-#endif

- }

-void VP8LConvertBGRAToBGR_C(const uint32_t* src,

- int num_pixels, uint8_t* dst) {

- const uint32_t* const src_end = src + num_pixels;

- while (src < src_end) {

- const uint32_t argb = *src++;

- *dst++ = (argb >> 0) & 0xff;

- *dst++ = (argb >> 8) & 0xff;

- *dst++ = (argb >> 16) & 0xff;

- }

-static void CopyOrSwap(const uint32_t* src, int num_pixels, uint8_t* dst,

- int swap_on_big_endian) {

- if (is_big_endian() == swap_on_big_endian) {

- const uint32_t* const src_end = src + num_pixels;

- while (src < src_end) {

- const uint32_t argb = *src++;

-#if !defined(WORDS_BIGENDIAN)

-#if !defined(WEBP_REFERENCE_IMPLEMENTATION)

- *(uint32_t*)dst = BSwap32(argb);

-#else // WEBP_REFERENCE_IMPLEMENTATION

- dst[0] = (argb >> 24) & 0xff;

- dst[1] = (argb >> 16) & 0xff;

- dst[2] = (argb >> 8) & 0xff;

- dst[3] = (argb >> 0) & 0xff;

-#endif

-#else // WORDS_BIGENDIAN

- dst[0] = (argb >> 0) & 0xff;

- dst[1] = (argb >> 8) & 0xff;

- dst[2] = (argb >> 16) & 0xff;

- dst[3] = (argb >> 24) & 0xff;

-#endif

- dst += sizeof(argb);

- }

- } else {

- memcpy(dst, src, num_pixels * sizeof(*src));

- }

-void VP8LConvertFromBGRA(const uint32_t* const in_data, int num_pixels,

- WEBP_CSP_MODE out_colorspace, uint8_t* const rgba) {

- switch (out_colorspace) {

- case MODE_RGB:

- VP8LConvertBGRAToRGB(in_data, num_pixels, rgba);

- break;

- case MODE_RGBA:

- VP8LConvertBGRAToRGBA(in_data, num_pixels, rgba);

- break;

- case MODE_rgbA:

- VP8LConvertBGRAToRGBA(in_data, num_pixels, rgba);

- WebPApplyAlphaMultiply(rgba, 0, num_pixels, 1, 0);

- break;

- case MODE_BGR:

- VP8LConvertBGRAToBGR(in_data, num_pixels, rgba);

- break;

- case MODE_BGRA:

- CopyOrSwap(in_data, num_pixels, rgba, 1);

- break;

- case MODE_bgrA:

- CopyOrSwap(in_data, num_pixels, rgba, 1);

- WebPApplyAlphaMultiply(rgba, 0, num_pixels, 1, 0);

- break;

- case MODE_ARGB:

- CopyOrSwap(in_data, num_pixels, rgba, 0);

- break;

- case MODE_Argb:

- CopyOrSwap(in_data, num_pixels, rgba, 0);

- WebPApplyAlphaMultiply(rgba, 1, num_pixels, 1, 0);

- break;

- case MODE_RGBA_4444:

- VP8LConvertBGRAToRGBA4444(in_data, num_pixels, rgba);

- break;

- case MODE_rgbA_4444:

- VP8LConvertBGRAToRGBA4444(in_data, num_pixels, rgba);

- WebPApplyAlphaMultiply4444(rgba, num_pixels, 1, 0);

- break;

- case MODE_RGB_565:

- VP8LConvertBGRAToRGB565(in_data, num_pixels, rgba);

- break;

- default:

- assert(0); // Code flow should not reach here.

- }

//------------------------------------------------------------------------------

// Bundles multiple (1, 2, 4 or 8) pixels into a single pixel.

void VP8LBundleColorMap(const uint8_t* const row, int width,

@@ -1478,53 +1094,6 @@ static double ExtraCostCombined(const uint32_t* X, const uint32_t* Y,

return cost;

}

-// Returns the various RLE counts

-static VP8LStreaks HuffmanCostCount(const uint32_t* population, int length) {

- int i;

- int streak = 0;

- VP8LStreaks stats;

- memset(&stats, 0, sizeof(stats));

- for (i = 0; i < length - 1; ++i) {

- ++streak;

- if (population[i] == population[i + 1]) {

- continue;

- }

- stats.counts[population[i] != 0] += (streak > 3);

- stats.streaks[population[i] != 0][(streak > 3)] += streak;

- streak = 0;

- }

- ++streak;

- stats.counts[population[i] != 0] += (streak > 3);

- stats.streaks[population[i] != 0][(streak > 3)] += streak;

- return stats;

-static VP8LStreaks HuffmanCostCombinedCount(const uint32_t* X,

- const uint32_t* Y, int length) {

- int i;

- int streak = 0;

- VP8LStreaks stats;

- memset(&stats, 0, sizeof(stats));

- for (i = 0; i < length - 1; ++i) {

- const int xy = X[i] + Y[i];

- const int xy_next = X[i + 1] + Y[i + 1];

- ++streak;

- if (xy == xy_next) {

- continue;

- }

- stats.counts[xy != 0] += (streak > 3);

- stats.streaks[xy != 0][(streak > 3)] += streak;

- streak = 0;

- }

- {

- const int xy = X[i] + Y[i];

- ++streak;

- stats.counts[xy != 0] += (streak > 3);

- stats.streaks[xy != 0][(streak > 3)] += streak;

- }

- return stats;

//------------------------------------------------------------------------------

static void HistogramAdd(const VP8LHistogram* const a,

@@ -1563,61 +1132,52 @@ static void HistogramAdd(const VP8LHistogram* const a,

//------------------------------------------------------------------------------

VP8LProcessBlueAndRedFunc VP8LSubtractGreenFromBlueAndRed;

-VP8LProcessBlueAndRedFunc VP8LAddGreenToBlueAndRed;

-VP8LPredictorFunc VP8LPredictors[16];

VP8LTransformColorFunc VP8LTransformColor;

-VP8LTransformColorFunc VP8LTransformColorInverse;

-VP8LConvertFunc VP8LConvertBGRAToRGB;

-VP8LConvertFunc VP8LConvertBGRAToRGBA;

-VP8LConvertFunc VP8LConvertBGRAToRGBA4444;

-VP8LConvertFunc VP8LConvertBGRAToRGB565;

-VP8LConvertFunc VP8LConvertBGRAToBGR;

+VP8LCollectColorBlueTransformsFunc VP8LCollectColorBlueTransforms;

+VP8LCollectColorRedTransformsFunc VP8LCollectColorRedTransforms;

VP8LFastLog2SlowFunc VP8LFastLog2Slow;

VP8LFastLog2SlowFunc VP8LFastSLog2Slow;

VP8LCostFunc VP8LExtraCost;

VP8LCostCombinedFunc VP8LExtraCostCombined;

+VP8LCombinedShannonEntropyFunc VP8LCombinedShannonEntropy;

-VP8LCostCountFunc VP8LHuffmanCostCount;

-VP8LCostCombinedCountFunc VP8LHuffmanCostCombinedCount;

+GetEntropyUnrefinedHelperFunc VP8LGetEntropyUnrefinedHelper;

VP8LHistogramAddFunc VP8LHistogramAdd;

-extern void VP8LDspInitSSE2(void);

-extern void VP8LDspInitNEON(void);

-extern void VP8LDspInitMIPS32(void);

+extern void VP8LEncDspInitSSE2(void);

+extern void VP8LEncDspInitSSE41(void);

+extern void VP8LEncDspInitNEON(void);

+extern void VP8LEncDspInitMIPS32(void);

+extern void VP8LEncDspInitMIPSdspR2(void);

-static volatile VP8CPUInfo lossless_last_cpuinfo_used =

- (VP8CPUInfo)&lossless_last_cpuinfo_used;

+static volatile VP8CPUInfo lossless_enc_last_cpuinfo_used =

+ (VP8CPUInfo)&lossless_enc_last_cpuinfo_used;

-void VP8LDspInit(void) {

- if (lossless_last_cpuinfo_used == VP8GetCPUInfo) return;

+WEBP_TSAN_IGNORE_FUNCTION void VP8LEncDspInit(void) {

+ if (lossless_enc_last_cpuinfo_used == VP8GetCPUInfo) return;

- memcpy(VP8LPredictors, kPredictorsC, sizeof(VP8LPredictors));

+ VP8LDspInit();

VP8LSubtractGreenFromBlueAndRed = VP8LSubtractGreenFromBlueAndRed_C;

- VP8LAddGreenToBlueAndRed = VP8LAddGreenToBlueAndRed_C;

VP8LTransformColor = VP8LTransformColor_C;

- VP8LTransformColorInverse = VP8LTransformColorInverse_C;

- VP8LConvertBGRAToRGB = VP8LConvertBGRAToRGB_C;

- VP8LConvertBGRAToRGBA = VP8LConvertBGRAToRGBA_C;

- VP8LConvertBGRAToRGBA4444 = VP8LConvertBGRAToRGBA4444_C;

- VP8LConvertBGRAToRGB565 = VP8LConvertBGRAToRGB565_C;

- VP8LConvertBGRAToBGR = VP8LConvertBGRAToBGR_C;

+ VP8LCollectColorBlueTransforms = VP8LCollectColorBlueTransforms_C;

+ VP8LCollectColorRedTransforms = VP8LCollectColorRedTransforms_C;

VP8LFastLog2Slow = FastLog2Slow;

VP8LFastSLog2Slow = FastSLog2Slow;

VP8LExtraCost = ExtraCost;

VP8LExtraCostCombined = ExtraCostCombined;

+ VP8LCombinedShannonEntropy = CombinedShannonEntropy;

- VP8LHuffmanCostCount = HuffmanCostCount;

- VP8LHuffmanCostCombinedCount = HuffmanCostCombinedCount;

+ VP8LGetEntropyUnrefinedHelper = GetEntropyUnrefinedHelper;

VP8LHistogramAdd = HistogramAdd;

@@ -1625,21 +1185,31 @@ void VP8LDspInit(void) {

if (VP8GetCPUInfo != NULL) {

#if defined(WEBP_USE_SSE2)

if (VP8GetCPUInfo(kSSE2)) {

- VP8LDspInitSSE2();

+ VP8LEncDspInitSSE2();

+#if defined(WEBP_USE_SSE41)

+ if (VP8GetCPUInfo(kSSE4_1)) {

+ VP8LEncDspInitSSE41();

+ }

+#endif

}

#endif

#if defined(WEBP_USE_NEON)

if (VP8GetCPUInfo(kNEON)) {

- VP8LDspInitNEON();

+ VP8LEncDspInitNEON();

}

#endif

#if defined(WEBP_USE_MIPS32)

if (VP8GetCPUInfo(kMIPS32)) {

- VP8LDspInitMIPS32();

+ VP8LEncDspInitMIPS32();

+ }

+#endif

+#if defined(WEBP_USE_MIPS_DSP_R2)

+ if (VP8GetCPUInfo(kMIPSdspR2)) {

+ VP8LEncDspInitMIPSdspR2();

}

#endif

}

- lossless_last_cpuinfo_used = VP8GetCPUInfo;

+ lossless_enc_last_cpuinfo_used = VP8GetCPUInfo;

}

//------------------------------------------------------------------------------

« no previous file with comments | « third_party/libwebp/dsp/lossless.c ('k') | third_party/libwebp/dsp/lossless_enc_mips32.c » ('j') | no next file with comments »