Index: third_party/libwebp/dsp/enc.c |
diff --git a/third_party/libwebp/dsp/enc.c b/third_party/libwebp/dsp/enc.c |
index 02234564beefb8276c0c3983529709b5fa24c0af..ae2c830a23186c12fe4fe6fb0ac15336c42a8709 100644 |
--- a/third_party/libwebp/dsp/enc.c |
+++ b/third_party/libwebp/dsp/enc.c |
@@ -17,31 +17,18 @@ |
extern "C" { |
#endif |
-//------------------------------------------------------------------------------ |
-// Compute susceptibility based on DCT-coeff histograms: |
-// the higher, the "easier" the macroblock is to compress. |
- |
-static int ClipAlpha(int alpha) { |
- return alpha < 0 ? 0 : alpha > 255 ? 255 : alpha; |
+static WEBP_INLINE uint8_t clip_8b(int v) { |
+ return (!(v & ~0xff)) ? v : (v < 0) ? 0 : 255; |
} |
-int VP8GetAlpha(const int histo[MAX_COEFF_THRESH + 1]) { |
- int num = 0, den = 0, val = 0; |
- int k; |
- int alpha; |
- // note: changing this loop to avoid the numerous "k + 1" slows things down. |
- for (k = 0; k < MAX_COEFF_THRESH; ++k) { |
- if (histo[k + 1]) { |
- val += histo[k + 1]; |
- num += val * (k + 1); |
- den += (k + 1) * (k + 1); |
- } |
- } |
- // we scale the value to a usable [0..255] range |
- alpha = den ? 10 * num / den - 5 : 0; |
- return ClipAlpha(alpha); |
+static WEBP_INLINE int clip_max(int v, int max) { |
+ return (v > max) ? max : v; |
} |
+//------------------------------------------------------------------------------ |
+// Compute susceptibility based on DCT-coeff histograms: |
+// the higher, the "easier" the macroblock is to compress. |
+ |
const int VP8DspScan[16 + 4 + 4] = { |
// Luma |
0 + 0 * BPS, 4 + 0 * BPS, 8 + 0 * BPS, 12 + 0 * BPS, |
@@ -53,27 +40,23 @@ const int VP8DspScan[16 + 4 + 4] = { |
8 + 0 * BPS, 12 + 0 * BPS, 8 + 4 * BPS, 12 + 4 * BPS // V |
}; |
-static int CollectHistogram(const uint8_t* ref, const uint8_t* pred, |
- int start_block, int end_block) { |
- int histo[MAX_COEFF_THRESH + 1] = { 0 }; |
- int16_t out[16]; |
- int j, k; |
+static void CollectHistogram(const uint8_t* ref, const uint8_t* pred, |
+ int start_block, int end_block, |
+ VP8Histogram* const histo) { |
+ int j; |
for (j = start_block; j < end_block; ++j) { |
- VP8FTransform(ref + VP8DspScan[j], pred + VP8DspScan[j], out); |
+ int k; |
+ int16_t out[16]; |
- // Convert coefficients to bin (within out[]). |
- for (k = 0; k < 16; ++k) { |
- const int v = abs(out[k]) >> 2; |
- out[k] = (v > MAX_COEFF_THRESH) ? MAX_COEFF_THRESH : v; |
- } |
+ VP8FTransform(ref + VP8DspScan[j], pred + VP8DspScan[j], out); |
- // Use bin to update histogram. |
+ // Convert coefficients to bin. |
for (k = 0; k < 16; ++k) { |
- histo[out[k]]++; |
+ const int v = abs(out[k]) >> 3; // TODO(skal): add rounding? |
+ const int clipped_value = clip_max(v, MAX_COEFF_THRESH); |
+ histo->distribution[clipped_value]++; |
} |
} |
- |
- return VP8GetAlpha(histo); |
} |
//------------------------------------------------------------------------------ |
@@ -89,15 +72,12 @@ static void InitTables(void) { |
if (!tables_ok) { |
int i; |
for (i = -255; i <= 255 + 255; ++i) { |
- clip1[255 + i] = (i < 0) ? 0 : (i > 255) ? 255 : i; |
+ clip1[255 + i] = clip_8b(i); |
} |
tables_ok = 1; |
} |
} |
-static WEBP_INLINE uint8_t clip_8b(int v) { |
- return (!(v & ~0xff)) ? v : v < 0 ? 0 : 255; |
-} |
//------------------------------------------------------------------------------ |
// Transforms (Paragraph 14.4) |
@@ -154,25 +134,25 @@ static void FTransform(const uint8_t* src, const uint8_t* ref, int16_t* out) { |
int i; |
int tmp[16]; |
for (i = 0; i < 4; ++i, src += BPS, ref += BPS) { |
- const int d0 = src[0] - ref[0]; |
+ const int d0 = src[0] - ref[0]; // 9bit dynamic range ([-255,255]) |
const int d1 = src[1] - ref[1]; |
const int d2 = src[2] - ref[2]; |
const int d3 = src[3] - ref[3]; |
- const int a0 = (d0 + d3) << 3; |
- const int a1 = (d1 + d2) << 3; |
- const int a2 = (d1 - d2) << 3; |
- const int a3 = (d0 - d3) << 3; |
- tmp[0 + i * 4] = (a0 + a1); |
- tmp[1 + i * 4] = (a2 * 2217 + a3 * 5352 + 14500) >> 12; |
- tmp[2 + i * 4] = (a0 - a1); |
- tmp[3 + i * 4] = (a3 * 2217 - a2 * 5352 + 7500) >> 12; |
+ const int a0 = (d0 + d3); // 10b [-510,510] |
+ const int a1 = (d1 + d2); |
+ const int a2 = (d1 - d2); |
+ const int a3 = (d0 - d3); |
+ tmp[0 + i * 4] = (a0 + a1) << 3; // 14b [-8160,8160] |
+ tmp[1 + i * 4] = (a2 * 2217 + a3 * 5352 + 1812) >> 9; // [-7536,7542] |
+ tmp[2 + i * 4] = (a0 - a1) << 3; |
+ tmp[3 + i * 4] = (a3 * 2217 - a2 * 5352 + 937) >> 9; |
} |
for (i = 0; i < 4; ++i) { |
- const int a0 = (tmp[0 + i] + tmp[12 + i]); |
+ const int a0 = (tmp[0 + i] + tmp[12 + i]); // 15b |
const int a1 = (tmp[4 + i] + tmp[ 8 + i]); |
const int a2 = (tmp[4 + i] - tmp[ 8 + i]); |
const int a3 = (tmp[0 + i] - tmp[12 + i]); |
- out[0 + i] = (a0 + a1 + 7) >> 4; |
+ out[0 + i] = (a0 + a1 + 7) >> 4; // 12b |
out[4 + i] = ((a2 * 2217 + a3 * 5352 + 12000) >> 16) + (a3 != 0); |
out[8 + i] = (a0 - a1 + 7) >> 4; |
out[12+ i] = ((a3 * 2217 - a2 * 5352 + 51000) >> 16); |
@@ -589,30 +569,30 @@ static int TTransform(const uint8_t* in, const uint16_t* w) { |
int i; |
// horizontal pass |
for (i = 0; i < 4; ++i, in += BPS) { |
- const int a0 = (in[0] + in[2]) << 2; |
- const int a1 = (in[1] + in[3]) << 2; |
- const int a2 = (in[1] - in[3]) << 2; |
- const int a3 = (in[0] - in[2]) << 2; |
- tmp[0 + i * 4] = a0 + a1 + (a0 != 0); |
+ const int a0 = in[0] + in[2]; |
+ const int a1 = in[1] + in[3]; |
+ const int a2 = in[1] - in[3]; |
+ const int a3 = in[0] - in[2]; |
+ tmp[0 + i * 4] = a0 + a1; |
tmp[1 + i * 4] = a3 + a2; |
tmp[2 + i * 4] = a3 - a2; |
tmp[3 + i * 4] = a0 - a1; |
} |
// vertical pass |
for (i = 0; i < 4; ++i, ++w) { |
- const int a0 = (tmp[0 + i] + tmp[8 + i]); |
- const int a1 = (tmp[4 + i] + tmp[12+ i]); |
- const int a2 = (tmp[4 + i] - tmp[12+ i]); |
- const int a3 = (tmp[0 + i] - tmp[8 + i]); |
+ const int a0 = tmp[0 + i] + tmp[8 + i]; |
+ const int a1 = tmp[4 + i] + tmp[12+ i]; |
+ const int a2 = tmp[4 + i] - tmp[12+ i]; |
+ const int a3 = tmp[0 + i] - tmp[8 + i]; |
const int b0 = a0 + a1; |
const int b1 = a3 + a2; |
const int b2 = a3 - a2; |
const int b3 = a0 - a1; |
- // abs((b + (b<0) + 3) >> 3) = (abs(b) + 3) >> 3 |
- sum += w[ 0] * ((abs(b0) + 3) >> 3); |
- sum += w[ 4] * ((abs(b1) + 3) >> 3); |
- sum += w[ 8] * ((abs(b2) + 3) >> 3); |
- sum += w[12] * ((abs(b3) + 3) >> 3); |
+ |
+ sum += w[ 0] * abs(b0); |
+ sum += w[ 4] * abs(b1); |
+ sum += w[ 8] * abs(b2); |
+ sum += w[12] * abs(b3); |
} |
return sum; |
} |
@@ -621,7 +601,7 @@ static int Disto4x4(const uint8_t* const a, const uint8_t* const b, |
const uint16_t* const w) { |
const int sum1 = TTransform(a, w); |
const int sum2 = TTransform(b, w); |
- return (abs(sum2 - sum1) + 8) >> 4; |
+ return abs(sum2 - sum1) >> 5; |
} |
static int Disto16x16(const uint8_t* const a, const uint8_t* const b, |
@@ -706,6 +686,7 @@ VP8QuantizeBlock VP8EncQuantizeBlock; |
VP8BlockCopy VP8Copy4x4; |
extern void VP8EncDspInitSSE2(void); |
+extern void VP8EncDspInitNEON(void); |
void VP8EncDspInit(void) { |
InitTables(); |
@@ -734,6 +715,10 @@ void VP8EncDspInit(void) { |
if (VP8GetCPUInfo(kSSE2)) { |
VP8EncDspInitSSE2(); |
} |
+#elif defined(WEBP_USE_NEON) |
+ if (VP8GetCPUInfo(kNEON)) { |
+ VP8EncDspInitNEON(); |
+ } |
#endif |
} |
} |