Index: third_party/libwebp/dsp/lossless_enc.c |
diff --git a/third_party/libwebp/dsp/lossless.c b/third_party/libwebp/dsp/lossless_enc.c |
similarity index 53% |
copy from third_party/libwebp/dsp/lossless.c |
copy to third_party/libwebp/dsp/lossless_enc.c |
index ee334bceb0b9669bc7791c21ef2da255eb9445c7..2eafa3da7d23d69bc7e084e0e0d519b9bbd0a9e1 100644 |
--- a/third_party/libwebp/dsp/lossless.c |
+++ b/third_party/libwebp/dsp/lossless_enc.c |
@@ -1,4 +1,4 @@ |
-// Copyright 2012 Google Inc. All Rights Reserved. |
+// Copyright 2015 Google Inc. All Rights Reserved. |
// |
// Use of this source code is governed by a BSD-style license |
// that can be found in the COPYING file in the root of the source |
@@ -7,7 +7,7 @@ |
// be found in the AUTHORS file in the root of the source tree. |
// ----------------------------------------------------------------------------- |
// |
-// Image transforms and color space conversion methods for lossless decoder. |
+// Image transform methods for lossless encoder. |
// |
// Authors: Vikas Arora (vikaas.arora@gmail.com) |
// Jyrki Alakuijala (jyrki@google.com) |
@@ -24,6 +24,9 @@ |
#define MAX_DIFF_COST (1e30f) |
+static const int kPredLowEffort = 11; |
+static const uint32_t kMaskAlpha = 0xff000000; |
+ |
// lookup table for small values of log2(int) |
const float kLog2Table[LOG_LOOKUP_IDX_MAX] = { |
0.0000000000000000f, 0.0000000000000000f, |
@@ -326,13 +329,6 @@ const uint8_t kPrefixEncodeExtraBitsValue[PREFIX_LOOKUP_IDX_MAX] = { |
112, 113, 114, 115, 116, 117, 118, 119, 120, 121, 122, 123, 124, 125, 126 |
}; |
-// The threshold till approximate version of log_2 can be used. |
-// Practically, we can get rid of the call to log() as the two values match to |
-// very high degree (the ratio of these two is 0.99999x). |
-// Keeping a high threshold for now. |
-#define APPROX_LOG_WITH_CORRECTION_MAX 65536 |
-#define APPROX_LOG_MAX 4096 |
-#define LOG_2_RECIPROCAL 1.44269504088896338700465094007086 |
static float FastSLog2Slow(uint32_t v) { |
assert(v >= LOG_LOOKUP_IDX_MAX); |
if (v < APPROX_LOG_WITH_CORRECTION_MAX) { |
@@ -384,166 +380,11 @@ static float FastLog2Slow(uint32_t v) { |
} |
} |
-//------------------------------------------------------------------------------ |
-// Image transforms. |
- |
// Mostly used to reduce code size + readability |
static WEBP_INLINE int GetMin(int a, int b) { return (a > b) ? b : a; } |
-// In-place sum of each component with mod 256. |
-static WEBP_INLINE void AddPixelsEq(uint32_t* a, uint32_t b) { |
- const uint32_t alpha_and_green = (*a & 0xff00ff00u) + (b & 0xff00ff00u); |
- const uint32_t red_and_blue = (*a & 0x00ff00ffu) + (b & 0x00ff00ffu); |
- *a = (alpha_and_green & 0xff00ff00u) | (red_and_blue & 0x00ff00ffu); |
-} |
- |
-static WEBP_INLINE uint32_t Average2(uint32_t a0, uint32_t a1) { |
- return (((a0 ^ a1) & 0xfefefefeL) >> 1) + (a0 & a1); |
-} |
- |
-static WEBP_INLINE uint32_t Average3(uint32_t a0, uint32_t a1, uint32_t a2) { |
- return Average2(Average2(a0, a2), a1); |
-} |
- |
-static WEBP_INLINE uint32_t Average4(uint32_t a0, uint32_t a1, |
- uint32_t a2, uint32_t a3) { |
- return Average2(Average2(a0, a1), Average2(a2, a3)); |
-} |
- |
-static WEBP_INLINE uint32_t Clip255(uint32_t a) { |
- if (a < 256) { |
- return a; |
- } |
- // return 0, when a is a negative integer. |
- // return 255, when a is positive. |
- return ~a >> 24; |
-} |
- |
-static WEBP_INLINE int AddSubtractComponentFull(int a, int b, int c) { |
- return Clip255(a + b - c); |
-} |
- |
-static WEBP_INLINE uint32_t ClampedAddSubtractFull(uint32_t c0, uint32_t c1, |
- uint32_t c2) { |
- const int a = AddSubtractComponentFull(c0 >> 24, c1 >> 24, c2 >> 24); |
- const int r = AddSubtractComponentFull((c0 >> 16) & 0xff, |
- (c1 >> 16) & 0xff, |
- (c2 >> 16) & 0xff); |
- const int g = AddSubtractComponentFull((c0 >> 8) & 0xff, |
- (c1 >> 8) & 0xff, |
- (c2 >> 8) & 0xff); |
- const int b = AddSubtractComponentFull(c0 & 0xff, c1 & 0xff, c2 & 0xff); |
- return ((uint32_t)a << 24) | (r << 16) | (g << 8) | b; |
-} |
- |
-static WEBP_INLINE int AddSubtractComponentHalf(int a, int b) { |
- return Clip255(a + (a - b) / 2); |
-} |
- |
-static WEBP_INLINE uint32_t ClampedAddSubtractHalf(uint32_t c0, uint32_t c1, |
- uint32_t c2) { |
- const uint32_t ave = Average2(c0, c1); |
- const int a = AddSubtractComponentHalf(ave >> 24, c2 >> 24); |
- const int r = AddSubtractComponentHalf((ave >> 16) & 0xff, (c2 >> 16) & 0xff); |
- const int g = AddSubtractComponentHalf((ave >> 8) & 0xff, (c2 >> 8) & 0xff); |
- const int b = AddSubtractComponentHalf((ave >> 0) & 0xff, (c2 >> 0) & 0xff); |
- return ((uint32_t)a << 24) | (r << 16) | (g << 8) | b; |
-} |
- |
-// gcc-4.9 on ARM generates incorrect code in Select() when Sub3() is inlined. |
-#if defined(__arm__) && LOCAL_GCC_VERSION == 0x409 |
-# define LOCAL_INLINE __attribute__ ((noinline)) |
-#else |
-# define LOCAL_INLINE WEBP_INLINE |
-#endif |
- |
-static LOCAL_INLINE int Sub3(int a, int b, int c) { |
- const int pb = b - c; |
- const int pa = a - c; |
- return abs(pb) - abs(pa); |
-} |
- |
-#undef LOCAL_INLINE |
- |
-static WEBP_INLINE uint32_t Select(uint32_t a, uint32_t b, uint32_t c) { |
- const int pa_minus_pb = |
- Sub3((a >> 24) , (b >> 24) , (c >> 24) ) + |
- Sub3((a >> 16) & 0xff, (b >> 16) & 0xff, (c >> 16) & 0xff) + |
- Sub3((a >> 8) & 0xff, (b >> 8) & 0xff, (c >> 8) & 0xff) + |
- Sub3((a ) & 0xff, (b ) & 0xff, (c ) & 0xff); |
- return (pa_minus_pb <= 0) ? a : b; |
-} |
- |
//------------------------------------------------------------------------------ |
-// Predictors |
- |
-static uint32_t Predictor0(uint32_t left, const uint32_t* const top) { |
- (void)top; |
- (void)left; |
- return ARGB_BLACK; |
-} |
-static uint32_t Predictor1(uint32_t left, const uint32_t* const top) { |
- (void)top; |
- return left; |
-} |
-static uint32_t Predictor2(uint32_t left, const uint32_t* const top) { |
- (void)left; |
- return top[0]; |
-} |
-static uint32_t Predictor3(uint32_t left, const uint32_t* const top) { |
- (void)left; |
- return top[1]; |
-} |
-static uint32_t Predictor4(uint32_t left, const uint32_t* const top) { |
- (void)left; |
- return top[-1]; |
-} |
-static uint32_t Predictor5(uint32_t left, const uint32_t* const top) { |
- const uint32_t pred = Average3(left, top[0], top[1]); |
- return pred; |
-} |
-static uint32_t Predictor6(uint32_t left, const uint32_t* const top) { |
- const uint32_t pred = Average2(left, top[-1]); |
- return pred; |
-} |
-static uint32_t Predictor7(uint32_t left, const uint32_t* const top) { |
- const uint32_t pred = Average2(left, top[0]); |
- return pred; |
-} |
-static uint32_t Predictor8(uint32_t left, const uint32_t* const top) { |
- const uint32_t pred = Average2(top[-1], top[0]); |
- (void)left; |
- return pred; |
-} |
-static uint32_t Predictor9(uint32_t left, const uint32_t* const top) { |
- const uint32_t pred = Average2(top[0], top[1]); |
- (void)left; |
- return pred; |
-} |
-static uint32_t Predictor10(uint32_t left, const uint32_t* const top) { |
- const uint32_t pred = Average4(left, top[-1], top[0], top[1]); |
- return pred; |
-} |
-static uint32_t Predictor11(uint32_t left, const uint32_t* const top) { |
- const uint32_t pred = Select(top[0], left, top[-1]); |
- return pred; |
-} |
-static uint32_t Predictor12(uint32_t left, const uint32_t* const top) { |
- const uint32_t pred = ClampedAddSubtractFull(left, top[0], top[-1]); |
- return pred; |
-} |
-static uint32_t Predictor13(uint32_t left, const uint32_t* const top) { |
- const uint32_t pred = ClampedAddSubtractHalf(left, top[0], top[-1]); |
- return pred; |
-} |
- |
-static const VP8LPredictorFunc kPredictorsC[16] = { |
- Predictor0, Predictor1, Predictor2, Predictor3, |
- Predictor4, Predictor5, Predictor6, Predictor7, |
- Predictor8, Predictor9, Predictor10, Predictor11, |
- Predictor12, Predictor13, |
- Predictor0, Predictor0 // <- padding security sentinels |
-}; |
+// Methods to calculate Entropy (Shannon). |
static float PredictionCostSpatial(const int counts[256], int weight_0, |
double exp_val) { |
@@ -565,15 +406,15 @@ static float CombinedShannonEntropy(const int X[256], const int Y[256]) { |
int sumX = 0, sumXY = 0; |
for (i = 0; i < 256; ++i) { |
const int x = X[i]; |
- const int xy = x + Y[i]; |
if (x != 0) { |
+ const int xy = x + Y[i]; |
sumX += x; |
retval -= VP8LFastSLog2(x); |
sumXY += xy; |
retval -= VP8LFastSLog2(xy); |
- } else if (xy != 0) { |
- sumXY += xy; |
- retval -= VP8LFastSLog2(xy); |
+ } else if (Y[i] != 0) { |
+ sumXY += Y[i]; |
+ retval -= VP8LFastSLog2(Y[i]); |
} |
} |
retval += VP8LFastSLog2(sumX) + VP8LFastSLog2(sumXY); |
@@ -587,11 +428,107 @@ static float PredictionCostSpatialHistogram(const int accumulated[4][256], |
for (i = 0; i < 4; ++i) { |
const double kExpValue = 0.94; |
retval += PredictionCostSpatial(tile[i], 1, kExpValue); |
- retval += CombinedShannonEntropy(tile[i], accumulated[i]); |
+ retval += VP8LCombinedShannonEntropy(tile[i], accumulated[i]); |
} |
return (float)retval; |
} |
+void VP8LBitEntropyInit(VP8LBitEntropy* const entropy) { |
+ entropy->entropy = 0.; |
+ entropy->sum = 0; |
+ entropy->nonzeros = 0; |
+ entropy->max_val = 0; |
+ entropy->nonzero_code = VP8L_NON_TRIVIAL_SYM; |
+} |
+ |
+void VP8LBitsEntropyUnrefined(const uint32_t* const array, int n, |
+ VP8LBitEntropy* const entropy) { |
+ int i; |
+ |
+ VP8LBitEntropyInit(entropy); |
+ |
+ for (i = 0; i < n; ++i) { |
+ if (array[i] != 0) { |
+ entropy->sum += array[i]; |
+ entropy->nonzero_code = i; |
+ ++entropy->nonzeros; |
+ entropy->entropy -= VP8LFastSLog2(array[i]); |
+ if (entropy->max_val < array[i]) { |
+ entropy->max_val = array[i]; |
+ } |
+ } |
+ } |
+ entropy->entropy += VP8LFastSLog2(entropy->sum); |
+} |
+ |
+static WEBP_INLINE void GetEntropyUnrefinedHelper( |
+ uint32_t val, int i, uint32_t* const val_prev, int* const i_prev, |
+ VP8LBitEntropy* const bit_entropy, VP8LStreaks* const stats) { |
+ const int streak = i - *i_prev; |
+ |
+ // Gather info for the bit entropy. |
+ if (*val_prev != 0) { |
+ bit_entropy->sum += (*val_prev) * streak; |
+ bit_entropy->nonzeros += streak; |
+ bit_entropy->nonzero_code = *i_prev; |
+ bit_entropy->entropy -= VP8LFastSLog2(*val_prev) * streak; |
+ if (bit_entropy->max_val < *val_prev) { |
+ bit_entropy->max_val = *val_prev; |
+ } |
+ } |
+ |
+ // Gather info for the Huffman cost. |
+ stats->counts[*val_prev != 0] += (streak > 3); |
+ stats->streaks[*val_prev != 0][(streak > 3)] += streak; |
+ |
+ *val_prev = val; |
+ *i_prev = i; |
+} |
+ |
+void VP8LGetEntropyUnrefined(const uint32_t* const X, int length, |
+ VP8LBitEntropy* const bit_entropy, |
+ VP8LStreaks* const stats) { |
+ int i; |
+ int i_prev = 0; |
+ uint32_t x_prev = X[0]; |
+ |
+ memset(stats, 0, sizeof(*stats)); |
+ VP8LBitEntropyInit(bit_entropy); |
+ |
+ for (i = 1; i < length; ++i) { |
+ const uint32_t x = X[i]; |
+ if (x != x_prev) { |
+ VP8LGetEntropyUnrefinedHelper(x, i, &x_prev, &i_prev, bit_entropy, stats); |
+ } |
+ } |
+ VP8LGetEntropyUnrefinedHelper(0, i, &x_prev, &i_prev, bit_entropy, stats); |
+ |
+ bit_entropy->entropy += VP8LFastSLog2(bit_entropy->sum); |
+} |
+ |
+void VP8LGetCombinedEntropyUnrefined(const uint32_t* const X, |
+ const uint32_t* const Y, int length, |
+ VP8LBitEntropy* const bit_entropy, |
+ VP8LStreaks* const stats) { |
+ int i = 1; |
+ int i_prev = 0; |
+ uint32_t xy_prev = X[0] + Y[0]; |
+ |
+ memset(stats, 0, sizeof(*stats)); |
+ VP8LBitEntropyInit(bit_entropy); |
+ |
+ for (i = 1; i < length; ++i) { |
+ const uint32_t xy = X[i] + Y[i]; |
+ if (xy != xy_prev) { |
+ VP8LGetEntropyUnrefinedHelper(xy, i, &xy_prev, &i_prev, bit_entropy, |
+ stats); |
+ } |
+ } |
+ VP8LGetEntropyUnrefinedHelper(0, i, &xy_prev, &i_prev, bit_entropy, stats); |
+ |
+ bit_entropy->entropy += VP8LFastSLog2(bit_entropy->sum); |
+} |
+ |
static WEBP_INLINE void UpdateHisto(int histo_argb[4][256], uint32_t argb) { |
++histo_argb[0][argb >> 24]; |
++histo_argb[1][(argb >> 16) & 0xff]; |
@@ -599,10 +536,27 @@ static WEBP_INLINE void UpdateHisto(int histo_argb[4][256], uint32_t argb) { |
++histo_argb[3][argb & 0xff]; |
} |
+//------------------------------------------------------------------------------ |
+ |
+static WEBP_INLINE uint32_t Predict(VP8LPredictorFunc pred_func, |
+ int x, int y, |
+ const uint32_t* current_row, |
+ const uint32_t* upper_row) { |
+ if (y == 0) { |
+ return (x == 0) ? ARGB_BLACK : current_row[x - 1]; // Left. |
+ } else if (x == 0) { |
+ return upper_row[x]; // Top. |
+ } else { |
+ return pred_func(current_row[x - 1], upper_row + x); |
+ } |
+} |
+ |
+// Returns best predictor and updates the accumulated histogram. |
static int GetBestPredictorForTile(int width, int height, |
int tile_x, int tile_y, int bits, |
- const int accumulated[4][256], |
- const uint32_t* const argb_scratch) { |
+ int accumulated[4][256], |
+ const uint32_t* const argb_scratch, |
+ int exact) { |
const int kNumPredModes = 14; |
const int col_start = tile_x << bits; |
const int row_start = tile_y << bits; |
@@ -612,13 +566,19 @@ static int GetBestPredictorForTile(int width, int height, |
float best_diff = MAX_DIFF_COST; |
int best_mode = 0; |
int mode; |
+ int histo_stack_1[4][256]; |
+ int histo_stack_2[4][256]; |
+ // Need pointers to be able to swap arrays. |
+ int (*histo_argb)[256] = histo_stack_1; |
+ int (*best_histo)[256] = histo_stack_2; |
+ |
+ int i, j; |
for (mode = 0; mode < kNumPredModes; ++mode) { |
const uint32_t* current_row = argb_scratch; |
const VP8LPredictorFunc pred_func = VP8LPredictors[mode]; |
float cur_diff; |
int y; |
- int histo_argb[4][256]; |
- memset(histo_argb, 0, sizeof(histo_argb)); |
+ memset(histo_argb, 0, sizeof(histo_stack_1)); |
for (y = 0; y < max_y; ++y) { |
int x; |
const int row = row_start + y; |
@@ -626,65 +586,93 @@ static int GetBestPredictorForTile(int width, int height, |
current_row = upper_row + width; |
for (x = 0; x < max_x; ++x) { |
const int col = col_start + x; |
- uint32_t predict; |
- if (row == 0) { |
- predict = (col == 0) ? ARGB_BLACK : current_row[col - 1]; // Left. |
- } else if (col == 0) { |
- predict = upper_row[col]; // Top. |
- } else { |
- predict = pred_func(current_row[col - 1], upper_row + col); |
+ const uint32_t predict = |
+ Predict(pred_func, col, row, current_row, upper_row); |
+ uint32_t residual = VP8LSubPixels(current_row[col], predict); |
+ if (!exact && (current_row[col] & kMaskAlpha) == 0) { |
+ residual &= kMaskAlpha; // See CopyTileWithPrediction. |
} |
- UpdateHisto(histo_argb, VP8LSubPixels(current_row[col], predict)); |
+ UpdateHisto(histo_argb, residual); |
} |
} |
cur_diff = PredictionCostSpatialHistogram( |
- accumulated, (const int (*)[256])histo_argb); |
+ (const int (*)[256])accumulated, (const int (*)[256])histo_argb); |
if (cur_diff < best_diff) { |
+ int (*tmp)[256] = histo_argb; |
+ histo_argb = best_histo; |
+ best_histo = tmp; |
best_diff = cur_diff; |
best_mode = mode; |
} |
} |
+ for (i = 0; i < 4; i++) { |
+ for (j = 0; j < 256; j++) { |
+ accumulated[i][j] += best_histo[i][j]; |
+ } |
+ } |
+ |
return best_mode; |
} |
-static void CopyTileWithPrediction(int width, int height, |
- int tile_x, int tile_y, int bits, int mode, |
- const uint32_t* const argb_scratch, |
- uint32_t* const argb) { |
- const int col_start = tile_x << bits; |
- const int row_start = tile_y << bits; |
- const int tile_size = 1 << bits; |
- const int max_y = GetMin(tile_size, height - row_start); |
- const int max_x = GetMin(tile_size, width - col_start); |
- const VP8LPredictorFunc pred_func = VP8LPredictors[mode]; |
- const uint32_t* current_row = argb_scratch; |
- |
+static void CopyImageWithPrediction(int width, int height, |
+ int bits, uint32_t* const modes, |
+ uint32_t* const argb_scratch, |
+ uint32_t* const argb, |
+ int low_effort, int exact) { |
+ const int tiles_per_row = VP8LSubSampleSize(width, bits); |
+ const int mask = (1 << bits) - 1; |
+ // The row size is one pixel longer to allow the top right pixel to point to |
+ // the leftmost pixel of the next row when at the right edge. |
+ uint32_t* current_row = argb_scratch; |
+ uint32_t* upper_row = argb_scratch + width + 1; |
int y; |
- for (y = 0; y < max_y; ++y) { |
+ VP8LPredictorFunc pred_func = |
+ low_effort ? VP8LPredictors[kPredLowEffort] : NULL; |
+ |
+ for (y = 0; y < height; ++y) { |
int x; |
- const int row = row_start + y; |
- const uint32_t* const upper_row = current_row; |
- current_row = upper_row + width; |
- for (x = 0; x < max_x; ++x) { |
- const int col = col_start + x; |
- const int pix = row * width + col; |
- uint32_t predict; |
- if (row == 0) { |
- predict = (col == 0) ? ARGB_BLACK : current_row[col - 1]; // Left. |
- } else if (col == 0) { |
- predict = upper_row[col]; // Top. |
- } else { |
- predict = pred_func(current_row[col - 1], upper_row + col); |
+ uint32_t* tmp = upper_row; |
+ upper_row = current_row; |
+ current_row = tmp; |
+ memcpy(current_row, argb + y * width, sizeof(*current_row) * width); |
+ current_row[width] = (y + 1 < height) ? argb[(y + 1) * width] : ARGB_BLACK; |
+ |
+ if (low_effort) { |
+ for (x = 0; x < width; ++x) { |
+ const uint32_t predict = |
+ Predict(pred_func, x, y, current_row, upper_row); |
+ argb[y * width + x] = VP8LSubPixels(current_row[x], predict); |
+ } |
+ } else { |
+ for (x = 0; x < width; ++x) { |
+ uint32_t predict, residual; |
+ if ((x & mask) == 0) { |
+ const int mode = |
+ (modes[(y >> bits) * tiles_per_row + (x >> bits)] >> 8) & 0xff; |
+ pred_func = VP8LPredictors[mode]; |
+ } |
+ predict = Predict(pred_func, x, y, current_row, upper_row); |
+ residual = VP8LSubPixels(current_row[x], predict); |
+ if (!exact && (current_row[x] & kMaskAlpha) == 0) { |
+ // If alpha is 0, cleanup RGB. We can choose the RGB values of the |
+ // residual for best compression. The prediction of alpha itself can |
+ // be non-zero and must be kept though. We choose RGB of the residual |
+ // to be 0. |
+ residual &= kMaskAlpha; |
+ // Update input image so that next predictions use correct RGB value. |
+ current_row[x] = predict & ~kMaskAlpha; |
+ if (x == 0 && y != 0) upper_row[width] = current_row[x]; |
+ } |
+ argb[y * width + x] = residual; |
} |
- argb[pix] = VP8LSubPixels(current_row[col], predict); |
} |
} |
} |
-void VP8LResidualImage(int width, int height, int bits, |
+void VP8LResidualImage(int width, int height, int bits, int low_effort, |
uint32_t* const argb, uint32_t* const argb_scratch, |
- uint32_t* const image) { |
+ uint32_t* const image, int exact) { |
const int max_tile_size = 1 << bits; |
const int tiles_per_row = VP8LSubSampleSize(width, bits); |
const int tiles_per_col = VP8LSubSampleSize(height, bits); |
@@ -692,104 +680,34 @@ void VP8LResidualImage(int width, int height, int bits, |
uint32_t* const current_tile_rows = argb_scratch + width; |
int tile_y; |
int histo[4][256]; |
- memset(histo, 0, sizeof(histo)); |
- for (tile_y = 0; tile_y < tiles_per_col; ++tile_y) { |
- const int tile_y_offset = tile_y * max_tile_size; |
- const int this_tile_height = |
- (tile_y < tiles_per_col - 1) ? max_tile_size : height - tile_y_offset; |
- int tile_x; |
- if (tile_y > 0) { |
- memcpy(upper_row, current_tile_rows + (max_tile_size - 1) * width, |
- width * sizeof(*upper_row)); |
+ if (low_effort) { |
+ int i; |
+ for (i = 0; i < tiles_per_row * tiles_per_col; ++i) { |
+ image[i] = ARGB_BLACK | (kPredLowEffort << 8); |
} |
- memcpy(current_tile_rows, &argb[tile_y_offset * width], |
- this_tile_height * width * sizeof(*current_tile_rows)); |
- for (tile_x = 0; tile_x < tiles_per_row; ++tile_x) { |
- int pred; |
- int y; |
- const int tile_x_offset = tile_x * max_tile_size; |
- int all_x_max = tile_x_offset + max_tile_size; |
- if (all_x_max > width) { |
- all_x_max = width; |
+ } else { |
+ memset(histo, 0, sizeof(histo)); |
+ for (tile_y = 0; tile_y < tiles_per_col; ++tile_y) { |
+ const int tile_y_offset = tile_y * max_tile_size; |
+ const int this_tile_height = |
+ (tile_y < tiles_per_col - 1) ? max_tile_size : height - tile_y_offset; |
+ int tile_x; |
+ if (tile_y > 0) { |
+ memcpy(upper_row, current_tile_rows + (max_tile_size - 1) * width, |
+ width * sizeof(*upper_row)); |
} |
- pred = GetBestPredictorForTile(width, height, tile_x, tile_y, bits, |
- (const int (*)[256])histo, |
- argb_scratch); |
- image[tile_y * tiles_per_row + tile_x] = 0xff000000u | (pred << 8); |
- CopyTileWithPrediction(width, height, tile_x, tile_y, bits, pred, |
- argb_scratch, argb); |
- for (y = 0; y < max_tile_size; ++y) { |
- int ix; |
- int all_x; |
- int all_y = tile_y_offset + y; |
- if (all_y >= height) { |
- break; |
- } |
- ix = all_y * width + tile_x_offset; |
- for (all_x = tile_x_offset; all_x < all_x_max; ++all_x, ++ix) { |
- UpdateHisto(histo, argb[ix]); |
- } |
+ memcpy(current_tile_rows, &argb[tile_y_offset * width], |
+ this_tile_height * width * sizeof(*current_tile_rows)); |
+ for (tile_x = 0; tile_x < tiles_per_row; ++tile_x) { |
+ const int pred = GetBestPredictorForTile(width, height, tile_x, tile_y, |
+ bits, (int (*)[256])histo, argb_scratch, exact); |
+ image[tile_y * tiles_per_row + tile_x] = ARGB_BLACK | (pred << 8); |
} |
} |
} |
-} |
- |
-// Inverse prediction. |
-static void PredictorInverseTransform(const VP8LTransform* const transform, |
- int y_start, int y_end, uint32_t* data) { |
- const int width = transform->xsize_; |
- if (y_start == 0) { // First Row follows the L (mode=1) mode. |
- int x; |
- const uint32_t pred0 = Predictor0(data[-1], NULL); |
- AddPixelsEq(data, pred0); |
- for (x = 1; x < width; ++x) { |
- const uint32_t pred1 = Predictor1(data[x - 1], NULL); |
- AddPixelsEq(data + x, pred1); |
- } |
- data += width; |
- ++y_start; |
- } |
- { |
- int y = y_start; |
- const int tile_width = 1 << transform->bits_; |
- const int mask = tile_width - 1; |
- const int safe_width = width & ~mask; |
- const int tiles_per_row = VP8LSubSampleSize(width, transform->bits_); |
- const uint32_t* pred_mode_base = |
- transform->data_ + (y >> transform->bits_) * tiles_per_row; |
- |
- while (y < y_end) { |
- const uint32_t pred2 = Predictor2(data[-1], data - width); |
- const uint32_t* pred_mode_src = pred_mode_base; |
- VP8LPredictorFunc pred_func; |
- int x = 1; |
- int t = 1; |
- // First pixel follows the T (mode=2) mode. |
- AddPixelsEq(data, pred2); |
- // .. the rest: |
- while (x < safe_width) { |
- pred_func = VP8LPredictors[((*pred_mode_src++) >> 8) & 0xf]; |
- for (; t < tile_width; ++t, ++x) { |
- const uint32_t pred = pred_func(data[x - 1], data + x - width); |
- AddPixelsEq(data + x, pred); |
- } |
- t = 0; |
- } |
- if (x < width) { |
- pred_func = VP8LPredictors[((*pred_mode_src++) >> 8) & 0xf]; |
- for (; x < width; ++x) { |
- const uint32_t pred = pred_func(data[x - 1], data + x - width); |
- AddPixelsEq(data + x, pred); |
- } |
- } |
- data += width; |
- ++y; |
- if ((y & mask) == 0) { // Use the same mask, since tiles are squares. |
- pred_mode_base += tiles_per_row; |
- } |
- } |
- } |
+ CopyImageWithPrediction(width, height, bits, |
+ image, argb_scratch, argb, low_effort, exact); |
} |
void VP8LSubtractGreenFromBlueAndRed_C(uint32_t* argb_data, int num_pixels) { |
@@ -803,20 +721,6 @@ void VP8LSubtractGreenFromBlueAndRed_C(uint32_t* argb_data, int num_pixels) { |
} |
} |
-// Add green to blue and red channels (i.e. perform the inverse transform of |
-// 'subtract green'). |
-void VP8LAddGreenToBlueAndRed_C(uint32_t* data, int num_pixels) { |
- int i; |
- for (i = 0; i < num_pixels; ++i) { |
- const uint32_t argb = data[i]; |
- const uint32_t green = ((argb >> 8) & 0xff); |
- uint32_t red_blue = (argb & 0x00ff00ffu); |
- red_blue += (green << 16) | green; |
- red_blue &= 0x00ff00ffu; |
- data[i] = (argb & 0xff00ff00u) | red_blue; |
- } |
-} |
- |
static WEBP_INLINE void MultipliersClear(VP8LMultipliers* const m) { |
m->green_to_red_ = 0; |
m->green_to_blue_ = 0; |
@@ -861,24 +765,6 @@ void VP8LTransformColor_C(const VP8LMultipliers* const m, uint32_t* data, |
} |
} |
-void VP8LTransformColorInverse_C(const VP8LMultipliers* const m, uint32_t* data, |
- int num_pixels) { |
- int i; |
- for (i = 0; i < num_pixels; ++i) { |
- const uint32_t argb = data[i]; |
- const uint32_t green = argb >> 8; |
- const uint32_t red = argb >> 16; |
- uint32_t new_red = red; |
- uint32_t new_blue = argb; |
- new_red += ColorTransformDelta(m->green_to_red_, green); |
- new_red &= 0xff; |
- new_blue += ColorTransformDelta(m->green_to_blue_, green); |
- new_blue += ColorTransformDelta(m->red_to_blue_, new_red); |
- new_blue &= 0xff; |
- data[i] = (argb & 0xff00ff00u) | (new_red << 16) | (new_blue); |
- } |
-} |
- |
static WEBP_INLINE uint8_t TransformColorRed(uint8_t green_to_red, |
uint32_t argb) { |
const uint32_t green = argb >> 8; |
@@ -903,24 +789,32 @@ static float PredictionCostCrossColor(const int accumulated[256], |
// Favor low entropy, locally and globally. |
// Favor small absolute values for PredictionCostSpatial |
static const double kExpValue = 2.4; |
- return CombinedShannonEntropy(counts, accumulated) + |
+ return VP8LCombinedShannonEntropy(counts, accumulated) + |
PredictionCostSpatial(counts, 3, kExpValue); |
} |
+void VP8LCollectColorRedTransforms_C(const uint32_t* argb, int stride, |
+ int tile_width, int tile_height, |
+ int green_to_red, int histo[]) { |
+ while (tile_height-- > 0) { |
+ int x; |
+ for (x = 0; x < tile_width; ++x) { |
+ ++histo[TransformColorRed(green_to_red, argb[x])]; |
+ } |
+ argb += stride; |
+ } |
+} |
+ |
static float GetPredictionCostCrossColorRed( |
- int tile_x_offset, int tile_y_offset, int all_x_max, int all_y_max, |
- int xsize, VP8LMultipliers prev_x, VP8LMultipliers prev_y, int green_to_red, |
- const int accumulated_red_histo[256], const uint32_t* const argb) { |
- int all_y; |
+ const uint32_t* argb, int stride, int tile_width, int tile_height, |
+ VP8LMultipliers prev_x, VP8LMultipliers prev_y, int green_to_red, |
+ const int accumulated_red_histo[256]) { |
int histo[256] = { 0 }; |
float cur_diff; |
- for (all_y = tile_y_offset; all_y < all_y_max; ++all_y) { |
- int ix = all_y * xsize + tile_x_offset; |
- int all_x; |
- for (all_x = tile_x_offset; all_x < all_x_max; ++all_x, ++ix) { |
- ++histo[TransformColorRed(green_to_red, argb[ix])]; // red. |
- } |
- } |
+ |
+ VP8LCollectColorRedTransforms(argb, stride, tile_width, tile_height, |
+ green_to_red, histo); |
+ |
cur_diff = PredictionCostCrossColor(accumulated_red_histo, histo); |
if ((uint8_t)green_to_red == prev_x.green_to_red_) { |
cur_diff -= 3; // favor keeping the areas locally similar |
@@ -935,59 +829,58 @@ static float GetPredictionCostCrossColorRed( |
} |
static void GetBestGreenToRed( |
- int tile_x_offset, int tile_y_offset, int all_x_max, int all_y_max, |
- int xsize, VP8LMultipliers prev_x, VP8LMultipliers prev_y, |
- const int accumulated_red_histo[256], const uint32_t* const argb, |
- VP8LMultipliers* const best_tx) { |
- int min_green_to_red = -64; |
- int max_green_to_red = 64; |
- int green_to_red = 0; |
- int eval_min = 1; |
- int eval_max = 1; |
- float cur_diff_min = MAX_DIFF_COST; |
- float cur_diff_max = MAX_DIFF_COST; |
- // Do a binary search to find the optimal green_to_red color transform. |
- while (max_green_to_red - min_green_to_red > 2) { |
- if (eval_min) { |
- cur_diff_min = GetPredictionCostCrossColorRed( |
- tile_x_offset, tile_y_offset, all_x_max, all_y_max, xsize, |
- prev_x, prev_y, min_green_to_red, accumulated_red_histo, argb); |
- eval_min = 0; |
- } |
- if (eval_max) { |
- cur_diff_max = GetPredictionCostCrossColorRed( |
- tile_x_offset, tile_y_offset, all_x_max, all_y_max, xsize, |
- prev_x, prev_y, max_green_to_red, accumulated_red_histo, argb); |
- eval_max = 0; |
+ const uint32_t* argb, int stride, int tile_width, int tile_height, |
+ VP8LMultipliers prev_x, VP8LMultipliers prev_y, int quality, |
+ const int accumulated_red_histo[256], VP8LMultipliers* const best_tx) { |
+ const int kMaxIters = 4 + ((7 * quality) >> 8); // in range [4..6] |
+ int green_to_red_best = 0; |
+ int iter, offset; |
+ float best_diff = GetPredictionCostCrossColorRed( |
+ argb, stride, tile_width, tile_height, prev_x, prev_y, |
+ green_to_red_best, accumulated_red_histo); |
+ for (iter = 0; iter < kMaxIters; ++iter) { |
+ // ColorTransformDelta is a 3.5 bit fixed point, so 32 is equal to |
+ // one in color computation. Having initial delta here as 1 is sufficient |
+ // to explore the range of (-2, 2). |
+ const int delta = 32 >> iter; |
+ // Try a negative and a positive delta from the best known value. |
+ for (offset = -delta; offset <= delta; offset += 2 * delta) { |
+ const int green_to_red_cur = offset + green_to_red_best; |
+ const float cur_diff = GetPredictionCostCrossColorRed( |
+ argb, stride, tile_width, tile_height, prev_x, prev_y, |
+ green_to_red_cur, accumulated_red_histo); |
+ if (cur_diff < best_diff) { |
+ best_diff = cur_diff; |
+ green_to_red_best = green_to_red_cur; |
+ } |
} |
- if (cur_diff_min < cur_diff_max) { |
- green_to_red = min_green_to_red; |
- max_green_to_red = (max_green_to_red + min_green_to_red) / 2; |
- eval_max = 1; |
- } else { |
- green_to_red = max_green_to_red; |
- min_green_to_red = (max_green_to_red + min_green_to_red) / 2; |
- eval_min = 1; |
+ } |
+ best_tx->green_to_red_ = green_to_red_best; |
+} |
+ |
+void VP8LCollectColorBlueTransforms_C(const uint32_t* argb, int stride, |
+ int tile_width, int tile_height, |
+ int green_to_blue, int red_to_blue, |
+ int histo[]) { |
+ while (tile_height-- > 0) { |
+ int x; |
+ for (x = 0; x < tile_width; ++x) { |
+ ++histo[TransformColorBlue(green_to_blue, red_to_blue, argb[x])]; |
} |
+ argb += stride; |
} |
- best_tx->green_to_red_ = green_to_red; |
} |
static float GetPredictionCostCrossColorBlue( |
- int tile_x_offset, int tile_y_offset, int all_x_max, int all_y_max, |
- int xsize, VP8LMultipliers prev_x, VP8LMultipliers prev_y, |
- int green_to_blue, int red_to_blue, const int accumulated_blue_histo[256], |
- const uint32_t* const argb) { |
- int all_y; |
+ const uint32_t* argb, int stride, int tile_width, int tile_height, |
+ VP8LMultipliers prev_x, VP8LMultipliers prev_y, |
+ int green_to_blue, int red_to_blue, const int accumulated_blue_histo[256]) { |
int histo[256] = { 0 }; |
float cur_diff; |
- for (all_y = tile_y_offset; all_y < all_y_max; ++all_y) { |
- int all_x; |
- int ix = all_y * xsize + tile_x_offset; |
- for (all_x = tile_x_offset; all_x < all_x_max; ++all_x, ++ix) { |
- ++histo[TransformColorBlue(green_to_blue, red_to_blue, argb[ix])]; |
- } |
- } |
+ |
+ VP8LCollectColorBlueTransforms(argb, stride, tile_width, tile_height, |
+ green_to_blue, red_to_blue, histo); |
+ |
cur_diff = PredictionCostCrossColor(accumulated_blue_histo, histo); |
if ((uint8_t)green_to_blue == prev_x.green_to_blue_) { |
cur_diff -= 3; // favor keeping the areas locally similar |
@@ -1010,49 +903,55 @@ static float GetPredictionCostCrossColorBlue( |
return cur_diff; |
} |
+#define kGreenRedToBlueNumAxis 8 |
+#define kGreenRedToBlueMaxIters 7 |
static void GetBestGreenRedToBlue( |
- int tile_x_offset, int tile_y_offset, int all_x_max, int all_y_max, |
- int xsize, VP8LMultipliers prev_x, VP8LMultipliers prev_y, int quality, |
- const int accumulated_blue_histo[256], const uint32_t* const argb, |
+ const uint32_t* argb, int stride, int tile_width, int tile_height, |
+ VP8LMultipliers prev_x, VP8LMultipliers prev_y, int quality, |
+ const int accumulated_blue_histo[256], |
VP8LMultipliers* const best_tx) { |
- float best_diff = MAX_DIFF_COST; |
- float cur_diff; |
- const int step = (quality < 25) ? 32 : (quality > 50) ? 8 : 16; |
- const int min_green_to_blue = -32; |
- const int max_green_to_blue = 32; |
- const int min_red_to_blue = -32; |
- const int max_red_to_blue = 32; |
- const int num_iters = |
- (1 + (max_green_to_blue - min_green_to_blue) / step) * |
- (1 + (max_red_to_blue - min_red_to_blue) / step); |
- // Number of tries to get optimal green_to_blue & red_to_blue color transforms |
- // after finding a local minima. |
- const int max_tries_after_min = 4 + (num_iters >> 2); |
- int num_tries_after_min = 0; |
- int green_to_blue; |
- for (green_to_blue = min_green_to_blue; |
- green_to_blue <= max_green_to_blue && |
- num_tries_after_min < max_tries_after_min; |
- green_to_blue += step) { |
- int red_to_blue; |
- for (red_to_blue = min_red_to_blue; |
- red_to_blue <= max_red_to_blue && |
- num_tries_after_min < max_tries_after_min; |
- red_to_blue += step) { |
- cur_diff = GetPredictionCostCrossColorBlue( |
- tile_x_offset, tile_y_offset, all_x_max, all_y_max, xsize, prev_x, |
- prev_y, green_to_blue, red_to_blue, accumulated_blue_histo, argb); |
+ const int8_t offset[kGreenRedToBlueNumAxis][2] = |
+ {{0, -1}, {0, 1}, {-1, 0}, {1, 0}, {-1, -1}, {-1, 1}, {1, -1}, {1, 1}}; |
+ const int8_t delta_lut[kGreenRedToBlueMaxIters] = { 16, 16, 8, 4, 2, 2, 2 }; |
+ const int iters = |
+ (quality < 25) ? 1 : (quality > 50) ? kGreenRedToBlueMaxIters : 4; |
+ int green_to_blue_best = 0; |
+ int red_to_blue_best = 0; |
+ int iter; |
+ // Initial value at origin: |
+ float best_diff = GetPredictionCostCrossColorBlue( |
+ argb, stride, tile_width, tile_height, prev_x, prev_y, |
+ green_to_blue_best, red_to_blue_best, accumulated_blue_histo); |
+ for (iter = 0; iter < iters; ++iter) { |
+ const int delta = delta_lut[iter]; |
+ int axis; |
+ for (axis = 0; axis < kGreenRedToBlueNumAxis; ++axis) { |
+ const int green_to_blue_cur = |
+ offset[axis][0] * delta + green_to_blue_best; |
+ const int red_to_blue_cur = offset[axis][1] * delta + red_to_blue_best; |
+ const float cur_diff = GetPredictionCostCrossColorBlue( |
+ argb, stride, tile_width, tile_height, prev_x, prev_y, |
+ green_to_blue_cur, red_to_blue_cur, accumulated_blue_histo); |
if (cur_diff < best_diff) { |
best_diff = cur_diff; |
- best_tx->green_to_blue_ = green_to_blue; |
- best_tx->red_to_blue_ = red_to_blue; |
- num_tries_after_min = 0; |
- } else { |
- ++num_tries_after_min; |
+ green_to_blue_best = green_to_blue_cur; |
+ red_to_blue_best = red_to_blue_cur; |
+ } |
+ if (quality < 25 && iter == 4) { |
+ // Only axis aligned diffs for lower quality. |
+ break; // next iter. |
} |
} |
+ if (delta == 2 && green_to_blue_best == 0 && red_to_blue_best == 0) { |
+ // Further iterations would not help. |
+ break; // out of iter-loop. |
+ } |
} |
+ best_tx->green_to_blue_ = green_to_blue_best; |
+ best_tx->red_to_blue_ = red_to_blue_best; |
} |
+#undef kGreenRedToBlueMaxIters |
+#undef kGreenRedToBlueNumAxis |
static VP8LMultipliers GetBestColorTransformForTile( |
int tile_x, int tile_y, int bits, |
@@ -1067,14 +966,18 @@ static VP8LMultipliers GetBestColorTransformForTile( |
const int tile_x_offset = tile_x * max_tile_size; |
const int all_x_max = GetMin(tile_x_offset + max_tile_size, xsize); |
const int all_y_max = GetMin(tile_y_offset + max_tile_size, ysize); |
+ const int tile_width = all_x_max - tile_x_offset; |
+ const int tile_height = all_y_max - tile_y_offset; |
+ const uint32_t* const tile_argb = argb + tile_y_offset * xsize |
+ + tile_x_offset; |
VP8LMultipliers best_tx; |
MultipliersClear(&best_tx); |
- GetBestGreenToRed(tile_x_offset, tile_y_offset, all_x_max, all_y_max, xsize, |
- prev_x, prev_y, accumulated_red_histo, argb, &best_tx); |
- GetBestGreenRedToBlue(tile_x_offset, tile_y_offset, all_x_max, all_y_max, |
- xsize, prev_x, prev_y, quality, accumulated_blue_histo, |
- argb, &best_tx); |
+ GetBestGreenToRed(tile_argb, xsize, tile_width, tile_height, |
+ prev_x, prev_y, quality, accumulated_red_histo, &best_tx); |
+ GetBestGreenRedToBlue(tile_argb, xsize, tile_width, tile_height, |
+ prev_x, prev_y, quality, accumulated_blue_histo, |
+ &best_tx); |
return best_tx; |
} |
@@ -1149,293 +1052,6 @@ void VP8LColorSpaceTransform(int width, int height, int bits, int quality, |
} |
} |
-// Color space inverse transform. |
-static void ColorSpaceInverseTransform(const VP8LTransform* const transform, |
- int y_start, int y_end, uint32_t* data) { |
- const int width = transform->xsize_; |
- const int tile_width = 1 << transform->bits_; |
- const int mask = tile_width - 1; |
- const int safe_width = width & ~mask; |
- const int remaining_width = width - safe_width; |
- const int tiles_per_row = VP8LSubSampleSize(width, transform->bits_); |
- int y = y_start; |
- const uint32_t* pred_row = |
- transform->data_ + (y >> transform->bits_) * tiles_per_row; |
- |
- while (y < y_end) { |
- const uint32_t* pred = pred_row; |
- VP8LMultipliers m = { 0, 0, 0 }; |
- const uint32_t* const data_safe_end = data + safe_width; |
- const uint32_t* const data_end = data + width; |
- while (data < data_safe_end) { |
- ColorCodeToMultipliers(*pred++, &m); |
- VP8LTransformColorInverse(&m, data, tile_width); |
- data += tile_width; |
- } |
- if (data < data_end) { // Left-overs using C-version. |
- ColorCodeToMultipliers(*pred++, &m); |
- VP8LTransformColorInverse(&m, data, remaining_width); |
- data += remaining_width; |
- } |
- ++y; |
- if ((y & mask) == 0) pred_row += tiles_per_row; |
- } |
-} |
- |
-// Separate out pixels packed together using pixel-bundling. |
-// We define two methods for ARGB data (uint32_t) and alpha-only data (uint8_t). |
-#define COLOR_INDEX_INVERSE(FUNC_NAME, TYPE, GET_INDEX, GET_VALUE) \ |
-void FUNC_NAME(const VP8LTransform* const transform, \ |
- int y_start, int y_end, const TYPE* src, TYPE* dst) { \ |
- int y; \ |
- const int bits_per_pixel = 8 >> transform->bits_; \ |
- const int width = transform->xsize_; \ |
- const uint32_t* const color_map = transform->data_; \ |
- if (bits_per_pixel < 8) { \ |
- const int pixels_per_byte = 1 << transform->bits_; \ |
- const int count_mask = pixels_per_byte - 1; \ |
- const uint32_t bit_mask = (1 << bits_per_pixel) - 1; \ |
- for (y = y_start; y < y_end; ++y) { \ |
- uint32_t packed_pixels = 0; \ |
- int x; \ |
- for (x = 0; x < width; ++x) { \ |
- /* We need to load fresh 'packed_pixels' once every */ \ |
- /* 'pixels_per_byte' increments of x. Fortunately, pixels_per_byte */ \ |
- /* is a power of 2, so can just use a mask for that, instead of */ \ |
- /* decrementing a counter. */ \ |
- if ((x & count_mask) == 0) packed_pixels = GET_INDEX(*src++); \ |
- *dst++ = GET_VALUE(color_map[packed_pixels & bit_mask]); \ |
- packed_pixels >>= bits_per_pixel; \ |
- } \ |
- } \ |
- } else { \ |
- for (y = y_start; y < y_end; ++y) { \ |
- int x; \ |
- for (x = 0; x < width; ++x) { \ |
- *dst++ = GET_VALUE(color_map[GET_INDEX(*src++)]); \ |
- } \ |
- } \ |
- } \ |
-} |
- |
-static WEBP_INLINE uint32_t GetARGBIndex(uint32_t idx) { |
- return (idx >> 8) & 0xff; |
-} |
- |
-static WEBP_INLINE uint8_t GetAlphaIndex(uint8_t idx) { |
- return idx; |
-} |
- |
-static WEBP_INLINE uint32_t GetARGBValue(uint32_t val) { |
- return val; |
-} |
- |
-static WEBP_INLINE uint8_t GetAlphaValue(uint32_t val) { |
- return (val >> 8) & 0xff; |
-} |
- |
-static COLOR_INDEX_INVERSE(ColorIndexInverseTransform, uint32_t, GetARGBIndex, |
- GetARGBValue) |
-COLOR_INDEX_INVERSE(VP8LColorIndexInverseTransformAlpha, uint8_t, GetAlphaIndex, |
- GetAlphaValue) |
- |
-#undef COLOR_INDEX_INVERSE |
- |
-void VP8LInverseTransform(const VP8LTransform* const transform, |
- int row_start, int row_end, |
- const uint32_t* const in, uint32_t* const out) { |
- const int width = transform->xsize_; |
- assert(row_start < row_end); |
- assert(row_end <= transform->ysize_); |
- switch (transform->type_) { |
- case SUBTRACT_GREEN: |
- VP8LAddGreenToBlueAndRed(out, (row_end - row_start) * width); |
- break; |
- case PREDICTOR_TRANSFORM: |
- PredictorInverseTransform(transform, row_start, row_end, out); |
- if (row_end != transform->ysize_) { |
- // The last predicted row in this iteration will be the top-pred row |
- // for the first row in next iteration. |
- memcpy(out - width, out + (row_end - row_start - 1) * width, |
- width * sizeof(*out)); |
- } |
- break; |
- case CROSS_COLOR_TRANSFORM: |
- ColorSpaceInverseTransform(transform, row_start, row_end, out); |
- break; |
- case COLOR_INDEXING_TRANSFORM: |
- if (in == out && transform->bits_ > 0) { |
- // Move packed pixels to the end of unpacked region, so that unpacking |
- // can occur seamlessly. |
- // Also, note that this is the only transform that applies on |
- // the effective width of VP8LSubSampleSize(xsize_, bits_). All other |
- // transforms work on effective width of xsize_. |
- const int out_stride = (row_end - row_start) * width; |
- const int in_stride = (row_end - row_start) * |
- VP8LSubSampleSize(transform->xsize_, transform->bits_); |
- uint32_t* const src = out + out_stride - in_stride; |
- memmove(src, out, in_stride * sizeof(*src)); |
- ColorIndexInverseTransform(transform, row_start, row_end, src, out); |
- } else { |
- ColorIndexInverseTransform(transform, row_start, row_end, in, out); |
- } |
- break; |
- } |
-} |
- |
-//------------------------------------------------------------------------------ |
-// Color space conversion. |
- |
-static int is_big_endian(void) { |
- static const union { |
- uint16_t w; |
- uint8_t b[2]; |
- } tmp = { 1 }; |
- return (tmp.b[0] != 1); |
-} |
- |
-void VP8LConvertBGRAToRGB_C(const uint32_t* src, |
- int num_pixels, uint8_t* dst) { |
- const uint32_t* const src_end = src + num_pixels; |
- while (src < src_end) { |
- const uint32_t argb = *src++; |
- *dst++ = (argb >> 16) & 0xff; |
- *dst++ = (argb >> 8) & 0xff; |
- *dst++ = (argb >> 0) & 0xff; |
- } |
-} |
- |
-void VP8LConvertBGRAToRGBA_C(const uint32_t* src, |
- int num_pixels, uint8_t* dst) { |
- const uint32_t* const src_end = src + num_pixels; |
- while (src < src_end) { |
- const uint32_t argb = *src++; |
- *dst++ = (argb >> 16) & 0xff; |
- *dst++ = (argb >> 8) & 0xff; |
- *dst++ = (argb >> 0) & 0xff; |
- *dst++ = (argb >> 24) & 0xff; |
- } |
-} |
- |
-void VP8LConvertBGRAToRGBA4444_C(const uint32_t* src, |
- int num_pixels, uint8_t* dst) { |
- const uint32_t* const src_end = src + num_pixels; |
- while (src < src_end) { |
- const uint32_t argb = *src++; |
- const uint8_t rg = ((argb >> 16) & 0xf0) | ((argb >> 12) & 0xf); |
- const uint8_t ba = ((argb >> 0) & 0xf0) | ((argb >> 28) & 0xf); |
-#ifdef WEBP_SWAP_16BIT_CSP |
- *dst++ = ba; |
- *dst++ = rg; |
-#else |
- *dst++ = rg; |
- *dst++ = ba; |
-#endif |
- } |
-} |
- |
-void VP8LConvertBGRAToRGB565_C(const uint32_t* src, |
- int num_pixels, uint8_t* dst) { |
- const uint32_t* const src_end = src + num_pixels; |
- while (src < src_end) { |
- const uint32_t argb = *src++; |
- const uint8_t rg = ((argb >> 16) & 0xf8) | ((argb >> 13) & 0x7); |
- const uint8_t gb = ((argb >> 5) & 0xe0) | ((argb >> 3) & 0x1f); |
-#ifdef WEBP_SWAP_16BIT_CSP |
- *dst++ = gb; |
- *dst++ = rg; |
-#else |
- *dst++ = rg; |
- *dst++ = gb; |
-#endif |
- } |
-} |
- |
-void VP8LConvertBGRAToBGR_C(const uint32_t* src, |
- int num_pixels, uint8_t* dst) { |
- const uint32_t* const src_end = src + num_pixels; |
- while (src < src_end) { |
- const uint32_t argb = *src++; |
- *dst++ = (argb >> 0) & 0xff; |
- *dst++ = (argb >> 8) & 0xff; |
- *dst++ = (argb >> 16) & 0xff; |
- } |
-} |
- |
-static void CopyOrSwap(const uint32_t* src, int num_pixels, uint8_t* dst, |
- int swap_on_big_endian) { |
- if (is_big_endian() == swap_on_big_endian) { |
- const uint32_t* const src_end = src + num_pixels; |
- while (src < src_end) { |
- const uint32_t argb = *src++; |
- |
-#if !defined(WORDS_BIGENDIAN) |
-#if !defined(WEBP_REFERENCE_IMPLEMENTATION) |
- *(uint32_t*)dst = BSwap32(argb); |
-#else // WEBP_REFERENCE_IMPLEMENTATION |
- dst[0] = (argb >> 24) & 0xff; |
- dst[1] = (argb >> 16) & 0xff; |
- dst[2] = (argb >> 8) & 0xff; |
- dst[3] = (argb >> 0) & 0xff; |
-#endif |
-#else // WORDS_BIGENDIAN |
- dst[0] = (argb >> 0) & 0xff; |
- dst[1] = (argb >> 8) & 0xff; |
- dst[2] = (argb >> 16) & 0xff; |
- dst[3] = (argb >> 24) & 0xff; |
-#endif |
- dst += sizeof(argb); |
- } |
- } else { |
- memcpy(dst, src, num_pixels * sizeof(*src)); |
- } |
-} |
- |
-void VP8LConvertFromBGRA(const uint32_t* const in_data, int num_pixels, |
- WEBP_CSP_MODE out_colorspace, uint8_t* const rgba) { |
- switch (out_colorspace) { |
- case MODE_RGB: |
- VP8LConvertBGRAToRGB(in_data, num_pixels, rgba); |
- break; |
- case MODE_RGBA: |
- VP8LConvertBGRAToRGBA(in_data, num_pixels, rgba); |
- break; |
- case MODE_rgbA: |
- VP8LConvertBGRAToRGBA(in_data, num_pixels, rgba); |
- WebPApplyAlphaMultiply(rgba, 0, num_pixels, 1, 0); |
- break; |
- case MODE_BGR: |
- VP8LConvertBGRAToBGR(in_data, num_pixels, rgba); |
- break; |
- case MODE_BGRA: |
- CopyOrSwap(in_data, num_pixels, rgba, 1); |
- break; |
- case MODE_bgrA: |
- CopyOrSwap(in_data, num_pixels, rgba, 1); |
- WebPApplyAlphaMultiply(rgba, 0, num_pixels, 1, 0); |
- break; |
- case MODE_ARGB: |
- CopyOrSwap(in_data, num_pixels, rgba, 0); |
- break; |
- case MODE_Argb: |
- CopyOrSwap(in_data, num_pixels, rgba, 0); |
- WebPApplyAlphaMultiply(rgba, 1, num_pixels, 1, 0); |
- break; |
- case MODE_RGBA_4444: |
- VP8LConvertBGRAToRGBA4444(in_data, num_pixels, rgba); |
- break; |
- case MODE_rgbA_4444: |
- VP8LConvertBGRAToRGBA4444(in_data, num_pixels, rgba); |
- WebPApplyAlphaMultiply4444(rgba, num_pixels, 1, 0); |
- break; |
- case MODE_RGB_565: |
- VP8LConvertBGRAToRGB565(in_data, num_pixels, rgba); |
- break; |
- default: |
- assert(0); // Code flow should not reach here. |
- } |
-} |
- |
//------------------------------------------------------------------------------ |
// Bundles multiple (1, 2, 4 or 8) pixels into a single pixel. |
void VP8LBundleColorMap(const uint8_t* const row, int width, |
@@ -1478,53 +1094,6 @@ static double ExtraCostCombined(const uint32_t* X, const uint32_t* Y, |
return cost; |
} |
-// Returns the various RLE counts |
-static VP8LStreaks HuffmanCostCount(const uint32_t* population, int length) { |
- int i; |
- int streak = 0; |
- VP8LStreaks stats; |
- memset(&stats, 0, sizeof(stats)); |
- for (i = 0; i < length - 1; ++i) { |
- ++streak; |
- if (population[i] == population[i + 1]) { |
- continue; |
- } |
- stats.counts[population[i] != 0] += (streak > 3); |
- stats.streaks[population[i] != 0][(streak > 3)] += streak; |
- streak = 0; |
- } |
- ++streak; |
- stats.counts[population[i] != 0] += (streak > 3); |
- stats.streaks[population[i] != 0][(streak > 3)] += streak; |
- return stats; |
-} |
- |
-static VP8LStreaks HuffmanCostCombinedCount(const uint32_t* X, |
- const uint32_t* Y, int length) { |
- int i; |
- int streak = 0; |
- VP8LStreaks stats; |
- memset(&stats, 0, sizeof(stats)); |
- for (i = 0; i < length - 1; ++i) { |
- const int xy = X[i] + Y[i]; |
- const int xy_next = X[i + 1] + Y[i + 1]; |
- ++streak; |
- if (xy == xy_next) { |
- continue; |
- } |
- stats.counts[xy != 0] += (streak > 3); |
- stats.streaks[xy != 0][(streak > 3)] += streak; |
- streak = 0; |
- } |
- { |
- const int xy = X[i] + Y[i]; |
- ++streak; |
- stats.counts[xy != 0] += (streak > 3); |
- stats.streaks[xy != 0][(streak > 3)] += streak; |
- } |
- return stats; |
-} |
- |
//------------------------------------------------------------------------------ |
static void HistogramAdd(const VP8LHistogram* const a, |
@@ -1563,61 +1132,52 @@ static void HistogramAdd(const VP8LHistogram* const a, |
//------------------------------------------------------------------------------ |
VP8LProcessBlueAndRedFunc VP8LSubtractGreenFromBlueAndRed; |
-VP8LProcessBlueAndRedFunc VP8LAddGreenToBlueAndRed; |
-VP8LPredictorFunc VP8LPredictors[16]; |
VP8LTransformColorFunc VP8LTransformColor; |
-VP8LTransformColorFunc VP8LTransformColorInverse; |
-VP8LConvertFunc VP8LConvertBGRAToRGB; |
-VP8LConvertFunc VP8LConvertBGRAToRGBA; |
-VP8LConvertFunc VP8LConvertBGRAToRGBA4444; |
-VP8LConvertFunc VP8LConvertBGRAToRGB565; |
-VP8LConvertFunc VP8LConvertBGRAToBGR; |
+VP8LCollectColorBlueTransformsFunc VP8LCollectColorBlueTransforms; |
+VP8LCollectColorRedTransformsFunc VP8LCollectColorRedTransforms; |
VP8LFastLog2SlowFunc VP8LFastLog2Slow; |
VP8LFastLog2SlowFunc VP8LFastSLog2Slow; |
VP8LCostFunc VP8LExtraCost; |
VP8LCostCombinedFunc VP8LExtraCostCombined; |
+VP8LCombinedShannonEntropyFunc VP8LCombinedShannonEntropy; |
-VP8LCostCountFunc VP8LHuffmanCostCount; |
-VP8LCostCombinedCountFunc VP8LHuffmanCostCombinedCount; |
+GetEntropyUnrefinedHelperFunc VP8LGetEntropyUnrefinedHelper; |
VP8LHistogramAddFunc VP8LHistogramAdd; |
-extern void VP8LDspInitSSE2(void); |
-extern void VP8LDspInitNEON(void); |
-extern void VP8LDspInitMIPS32(void); |
+extern void VP8LEncDspInitSSE2(void); |
+extern void VP8LEncDspInitSSE41(void); |
+extern void VP8LEncDspInitNEON(void); |
+extern void VP8LEncDspInitMIPS32(void); |
+extern void VP8LEncDspInitMIPSdspR2(void); |
-static volatile VP8CPUInfo lossless_last_cpuinfo_used = |
- (VP8CPUInfo)&lossless_last_cpuinfo_used; |
+static volatile VP8CPUInfo lossless_enc_last_cpuinfo_used = |
+ (VP8CPUInfo)&lossless_enc_last_cpuinfo_used; |
-void VP8LDspInit(void) { |
- if (lossless_last_cpuinfo_used == VP8GetCPUInfo) return; |
+WEBP_TSAN_IGNORE_FUNCTION void VP8LEncDspInit(void) { |
+ if (lossless_enc_last_cpuinfo_used == VP8GetCPUInfo) return; |
- memcpy(VP8LPredictors, kPredictorsC, sizeof(VP8LPredictors)); |
+ VP8LDspInit(); |
VP8LSubtractGreenFromBlueAndRed = VP8LSubtractGreenFromBlueAndRed_C; |
- VP8LAddGreenToBlueAndRed = VP8LAddGreenToBlueAndRed_C; |
VP8LTransformColor = VP8LTransformColor_C; |
- VP8LTransformColorInverse = VP8LTransformColorInverse_C; |
- VP8LConvertBGRAToRGB = VP8LConvertBGRAToRGB_C; |
- VP8LConvertBGRAToRGBA = VP8LConvertBGRAToRGBA_C; |
- VP8LConvertBGRAToRGBA4444 = VP8LConvertBGRAToRGBA4444_C; |
- VP8LConvertBGRAToRGB565 = VP8LConvertBGRAToRGB565_C; |
- VP8LConvertBGRAToBGR = VP8LConvertBGRAToBGR_C; |
+ VP8LCollectColorBlueTransforms = VP8LCollectColorBlueTransforms_C; |
+ VP8LCollectColorRedTransforms = VP8LCollectColorRedTransforms_C; |
VP8LFastLog2Slow = FastLog2Slow; |
VP8LFastSLog2Slow = FastSLog2Slow; |
VP8LExtraCost = ExtraCost; |
VP8LExtraCostCombined = ExtraCostCombined; |
+ VP8LCombinedShannonEntropy = CombinedShannonEntropy; |
- VP8LHuffmanCostCount = HuffmanCostCount; |
- VP8LHuffmanCostCombinedCount = HuffmanCostCombinedCount; |
+ VP8LGetEntropyUnrefinedHelper = GetEntropyUnrefinedHelper; |
VP8LHistogramAdd = HistogramAdd; |
@@ -1625,21 +1185,31 @@ void VP8LDspInit(void) { |
if (VP8GetCPUInfo != NULL) { |
#if defined(WEBP_USE_SSE2) |
if (VP8GetCPUInfo(kSSE2)) { |
- VP8LDspInitSSE2(); |
+ VP8LEncDspInitSSE2(); |
+#if defined(WEBP_USE_SSE41) |
+ if (VP8GetCPUInfo(kSSE4_1)) { |
+ VP8LEncDspInitSSE41(); |
+ } |
+#endif |
} |
#endif |
#if defined(WEBP_USE_NEON) |
if (VP8GetCPUInfo(kNEON)) { |
- VP8LDspInitNEON(); |
+ VP8LEncDspInitNEON(); |
} |
#endif |
#if defined(WEBP_USE_MIPS32) |
if (VP8GetCPUInfo(kMIPS32)) { |
- VP8LDspInitMIPS32(); |
+ VP8LEncDspInitMIPS32(); |
+ } |
+#endif |
+#if defined(WEBP_USE_MIPS_DSP_R2) |
+ if (VP8GetCPUInfo(kMIPSdspR2)) { |
+ VP8LEncDspInitMIPSdspR2(); |
} |
#endif |
} |
- lossless_last_cpuinfo_used = VP8GetCPUInfo; |
+ lossless_enc_last_cpuinfo_used = VP8GetCPUInfo; |
} |
//------------------------------------------------------------------------------ |