third_party/libwebp/enc/quant.c - Issue 116213006: Update libwebp to 0.4.0

Unified Diff: third_party/libwebp/enc/quant.c

Issue 116213006: Update libwebp to 0.4.0 (Closed) Base URL: svn://svn.chromium.org/chrome/trunk/src

Patch Set: After Blink Roll Created 6 years, 11 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View side-by-side diff with in-line comments

Index: third_party/libwebp/enc/quant.c

diff --git a/third_party/libwebp/enc/quant.c b/third_party/libwebp/enc/quant.c

index 462d4e9e6ef4306d416ce661b9bebb62afa0c37b..e1d202b5a3b500758645b7a089369e4710b3eeb0 100644

--- a/third_party/libwebp/enc/quant.c

+++ b/third_party/libwebp/enc/quant.c

@@ -13,6 +13,7 @@

#include <assert.h>

#include <math.h>

+#include <stdlib.h> // for abs()

#include "./vp8enci.h"

#include "./cost.h"

@@ -24,18 +25,78 @@

#define MID_ALPHA 64 // neutral value for susceptibility

#define MIN_ALPHA 30 // lowest usable value for susceptibility

-#define MAX_ALPHA 100 // higher meaninful value for susceptibility

+#define MAX_ALPHA 100 // higher meaningful value for susceptibility

#define SNS_TO_DQ 0.9 // Scaling constant between the sns value and the QP

// power-law modulation. Must be strictly less than 1.

#define I4_PENALTY 4000 // Rate-penalty for quick i4/i16 decision

+// number of non-zero coeffs below which we consider the block very flat

+// (and apply a penalty to complex predictions)

+#define FLATNESS_LIMIT_I16 10 // I16 mode

+#define FLATNESS_LIMIT_I4 3 // I4 mode

+#define FLATNESS_LIMIT_UV 2 // UV mode

+#define FLATNESS_PENALTY 140 // roughly ~1bit per block

#define MULT_8B(a, b) (((a) * (b) + 128) >> 8)

-#if defined(__cplusplus) || defined(c_plusplus)

-extern "C" {

-#endif

+// #define DEBUG_BLOCK

+//------------------------------------------------------------------------------

+#if defined(DEBUG_BLOCK)

+#include <stdio.h>

+#include <stdlib.h>

+static void PrintBlockInfo(const VP8EncIterator* const it,

+ const VP8ModeScore* const rd) {

+ int i, j;

+ const int is_i16 = (it->mb_->type_ == 1);

+ printf("SOURCE / OUTPUT / ABS DELTA\n");

+ for (j = 0; j < 24; ++j) {

+ if (j == 16) printf("\n"); // newline before the U/V block

+ for (i = 0; i < 16; ++i) printf("%3d ", it->yuv_in_[i + j * BPS]);

+ printf(" ");

+ for (i = 0; i < 16; ++i) printf("%3d ", it->yuv_out_[i + j * BPS]);

+ printf(" ");

+ for (i = 0; i < 16; ++i) {

+ printf("%1d ", abs(it->yuv_out_[i + j * BPS] - it->yuv_in_[i + j * BPS]));

+ }

+ printf("\n");

+ }

+ printf("\nD:%d SD:%d R:%d H:%d nz:0x%x score:%d\n",

+ (int)rd->D, (int)rd->SD, (int)rd->R, (int)rd->H, (int)rd->nz,

+ (int)rd->score);

+ if (is_i16) {

+ printf("Mode: %d\n", rd->mode_i16);

+ printf("y_dc_levels:");

+ for (i = 0; i < 16; ++i) printf("%3d ", rd->y_dc_levels[i]);

+ printf("\n");

+ } else {

+ printf("Modes[16]: ");

+ for (i = 0; i < 16; ++i) printf("%d ", rd->modes_i4[i]);

+ printf("\n");

+ }

+ printf("y_ac_levels:\n");

+ for (j = 0; j < 16; ++j) {

+ for (i = is_i16 ? 1 : 0; i < 16; ++i) {

+ printf("%4d ", rd->y_ac_levels[j][i]);

+ }

+ printf("\n");

+ }

+ printf("\n");

+ printf("uv_levels (mode=%d):\n", rd->mode_uv);

+ for (j = 0; j < 8; ++j) {

+ for (i = 0; i < 16; ++i) {

+ printf("%4d ", rd->uv_levels[j][i]);

+ }

+ printf("\n");

+ }

+#endif // DEBUG_BLOCK

//------------------------------------------------------------------------------

@@ -104,31 +165,13 @@ static const uint16_t kAcTable2[128] = {

385, 393, 401, 409, 416, 424, 432, 440

};

-static const uint16_t kCoeffThresh[16] = {

- 0, 10, 20, 30,

- 10, 20, 30, 30,

- 20, 30, 30, 30,

- 30, 30, 30, 30

-};

-// TODO(skal): tune more. Coeff thresholding?

-static const uint8_t kBiasMatrices[3][16] = { // [3] = [luma-ac,luma-dc,chroma]

- { 96, 96, 96, 96,

- 96, 96, 96, 96,

- 96, 96, 96, 96 },

- { 96, 96, 96, 96,

- 96, 96, 96, 96,

- 96, 96, 96, 96 },

- { 96, 96, 96, 96,

- 96, 96, 96, 96,

- 96, 96, 96, 96 }

+static const uint8_t kBiasMatrices[3][2] = { // [luma-ac,luma-dc,chroma][dc,ac]

+ { 96, 110 }, { 96, 108 }, { 110, 115 }

};

-// Sharpening by (slightly) raising the hi-frequency coeffs (only for trellis).

+// Sharpening by (slightly) raising the hi-frequency coeffs.

// Hack-ish but helpful for mid-bitrate range. Use with care.

+#define SHARPEN_BITS 11 // number of descaling bits for sharpening bias

static const uint8_t kFreqSharpening[16] = {

0, 30, 60, 90,

30, 60, 90, 90,

@@ -141,20 +184,30 @@ static const uint8_t kFreqSharpening[16] = {

// Returns the average quantizer

static int ExpandMatrix(VP8Matrix* const m, int type) {

- int i;

- int sum = 0;

+ int i, sum;

+ for (i = 0; i < 2; ++i) {

+ const int is_ac_coeff = (i > 0);

+ const int bias = kBiasMatrices[type][is_ac_coeff];

+ m->iq_[i] = (1 << QFIX) / m->q_[i];

+ m->bias_[i] = BIAS(bias);

+ // zthresh_ is the exact value such that QUANTDIV(coeff, iQ, B) is:

+ // * zero if coeff <= zthresh

+ // * non-zero if coeff > zthresh

+ m->zthresh_[i] = ((1 << QFIX) - 1 - m->bias_[i]) / m->iq_[i];

+ }

for (i = 2; i < 16; ++i) {

m->q_[i] = m->q_[1];

+ m->iq_[i] = m->iq_[1];

+ m->bias_[i] = m->bias_[1];

+ m->zthresh_[i] = m->zthresh_[1];

}

- for (i = 0; i < 16; ++i) {

- const int j = kZigzag[i];

- const int bias = kBiasMatrices[type][j];

- m->iq_[j] = (1 << QFIX) / m->q_[j];

- m->bias_[j] = BIAS(bias);

- // TODO(skal): tune kCoeffThresh[]

- m->zthresh_[j] = ((256 /*+ kCoeffThresh[j]*/ - bias) * m->q_[j] + 127) >> 8;

- m->sharpen_[j] = (kFreqSharpening[j] * m->q_[j]) >> 11;

- sum += m->q_[j];

+ for (sum = 0, i = 0; i < 16; ++i) {

+ if (type == 0) { // we only use sharpening for AC luma coeffs

+ m->sharpen_[i] = (kFreqSharpening[i] * m->q_[i]) >> SHARPEN_BITS;

+ } else {

+ m->sharpen_[i] = 0;

+ }

+ sum += m->q_[i];

}

return (sum + 8) >> 4;

}

@@ -182,17 +235,17 @@ static void SetupMatrices(VP8Encoder* enc) {

q16 = ExpandMatrix(&m->y2_, 1);

quv = ExpandMatrix(&m->uv_, 2);

- // TODO: Switch to kLambda*[] tables?

- {

- m->lambda_i4_ = (3 * q4 * q4) >> 7;

- m->lambda_i16_ = (3 * q16 * q16);

- m->lambda_uv_ = (3 * quv * quv) >> 6;

- m->lambda_mode_ = (1 * q4 * q4) >> 7;

- m->lambda_trellis_i4_ = (7 * q4 * q4) >> 3;

- m->lambda_trellis_i16_ = (q16 * q16) >> 2;

- m->lambda_trellis_uv_ = (quv *quv) << 1;

- m->tlambda_ = (tlambda_scale * q4) >> 5;

- }

+ m->lambda_i4_ = (3 * q4 * q4) >> 7;

+ m->lambda_i16_ = (3 * q16 * q16);

+ m->lambda_uv_ = (3 * quv * quv) >> 6;

+ m->lambda_mode_ = (1 * q4 * q4) >> 7;

+ m->lambda_trellis_i4_ = (7 * q4 * q4) >> 3;

+ m->lambda_trellis_i16_ = (q16 * q16) >> 2;

+ m->lambda_trellis_uv_ = (quv *quv) << 1;

+ m->tlambda_ = (tlambda_scale * q4) >> 5;

+ m->min_disto_ = 10 * m->y1_.q_[0]; // quantization-aware min disto

+ m->max_edge_ = 0;

}

@@ -201,16 +254,21 @@ static void SetupMatrices(VP8Encoder* enc) {

// Very small filter-strength values have close to no visual effect. So we can

// save a little decoding-CPU by turning filtering off for these.

-#define FSTRENGTH_CUTOFF 3

+#define FSTRENGTH_CUTOFF 2

static void SetupFilterStrength(VP8Encoder* const enc) {

int i;

- const int level0 = enc->config_->filter_strength;

+ // level0 is in [0..500]. Using '-f 50' as filter_strength is mid-filtering.

+ const int level0 = 5 * enc->config_->filter_strength;

for (i = 0; i < NUM_MB_SEGMENTS; ++i) {

- // Segments with lower quantizer will be less filtered. TODO: tune (wrt SNS)

- const int level = level0 * 256 * enc->dqm_[i].quant_ / 128;

- const int f = level / (256 + enc->dqm_[i].beta_);

- enc->dqm_[i].fstrength_ = (f < FSTRENGTH_CUTOFF) ? 0 : (f > 63) ? 63 : f;

+ VP8SegmentInfo* const m = &enc->dqm_[i];

+ // We focus on the quantization of AC coeffs.

+ const int qstep = kAcTable[clip(m->quant_, 0, 127)] >> 2;

+ const int base_strength =

+ VP8FilterStrengthFromDelta(enc->filter_hdr_.sharpness_, qstep);

+ // Segments with lower complexity ('beta') will be less filtered.

+ const int f = base_strength * level0 / (256 + m->beta_);

+ m->fstrength_ = (f < FSTRENGTH_CUTOFF) ? 0 : (f > 63) ? 63 : f;

}

// We record the initial strength (mainly for the case of 1-segment only).

enc->filter_hdr_.level_ = enc->dqm_[0].fstrength_;

@@ -234,7 +292,7 @@ static double QualityToCompression(double c) {

// exponent is somewhere between 2.8 and 3.2, but we're mostly interested

// in the mid-quant range. So we scale the compressibility inversely to

// this power-law: quant ~= compression ^ 1/3. This law holds well for

- // low quant. Finer modelling for high-quant would make use of kAcTable[]

+ // low quant. Finer modeling for high-quant would make use of kAcTable[]

// more explicitly.

const double v = pow(linear_c, 1 / 3.);

return v;

@@ -367,16 +425,14 @@ const int VP8I4ModeOffsets[NUM_BMODES] = {

};

void VP8MakeLuma16Preds(const VP8EncIterator* const it) {

- const VP8Encoder* const enc = it->enc_;

- const uint8_t* const left = it->x_ ? enc->y_left_ : NULL;

- const uint8_t* const top = it->y_ ? enc->y_top_ + it->x_ * 16 : NULL;

+ const uint8_t* const left = it->x_ ? it->y_left_ : NULL;

+ const uint8_t* const top = it->y_ ? it->y_top_ : NULL;

VP8EncPredLuma16(it->yuv_p_, left, top);

}

void VP8MakeChroma8Preds(const VP8EncIterator* const it) {

- const VP8Encoder* const enc = it->enc_;

- const uint8_t* const left = it->x_ ? enc->u_left_ : NULL;

- const uint8_t* const top = it->y_ ? enc->uv_top_ + it->x_ * 16 : NULL;

+ const uint8_t* const left = it->x_ ? it->u_left_ : NULL;

+ const uint8_t* const top = it->y_ ? it->uv_top_ : NULL;

VP8EncPredChroma8(it->yuv_p_, left, top);

}

@@ -432,6 +488,7 @@ static void InitScore(VP8ModeScore* const rd) {

rd->D = 0;

rd->SD = 0;

rd->R = 0;

+ rd->H = 0;

rd->nz = 0;

rd->score = MAX_COST;

}

@@ -440,6 +497,7 @@ static void CopyScore(VP8ModeScore* const dst, const VP8ModeScore* const src) {

dst->D = src->D;

dst->SD = src->SD;

dst->R = src->R;

+ dst->H = src->H;

dst->nz = src->nz; // note that nz is not accumulated, but just copied.

dst->score = src->score;

}

@@ -448,6 +506,7 @@ static void AddScore(VP8ModeScore* const dst, const VP8ModeScore* const src) {

dst->D += src->D;

dst->SD += src->SD;

dst->R += src->R;

+ dst->H += src->H;

dst->nz |= src->nz; // here, new nz bits are accumulated.

dst->score += src->score;

}

@@ -476,7 +535,7 @@ typedef struct {

static WEBP_INLINE void SetRDScore(int lambda, VP8ModeScore* const rd) {

// TODO: incorporate the "* 256" in the tables?

- rd->score = rd->R * lambda + 256 * (rd->D + rd->SD);

+ rd->score = (rd->R + rd->H) * lambda + 256 * (rd->D + rd->SD);

}

static WEBP_INLINE score_t RDScoreTrellis(int lambda, score_t rate,

@@ -539,11 +598,10 @@ static int TrellisQuantizeBlock(const VP8EncIterator* const it,

// note: it's important to take sign of the _original_ coeff,

// so we don't have to consider level < 0 afterward.

const int sign = (in[j] < 0);

- int coeff0 = (sign ? -in[j] : in[j]) + mtx->sharpen_[j];

- int level0;

- if (coeff0 > 2047) coeff0 = 2047;

+ const int coeff0 = (sign ? -in[j] : in[j]) + mtx->sharpen_[j];

+ int level0 = QUANTDIV(coeff0, iQ, B);

+ if (level0 > MAX_LEVEL) level0 = MAX_LEVEL;

- level0 = QUANTDIV(coeff0, iQ, B);

// test all alternate level values around level0.

for (m = -MIN_DELTA; m <= MAX_DELTA; ++m) {

Node* const cur = &NODE(n, m);

@@ -555,7 +613,7 @@ static int TrellisQuantizeBlock(const VP8EncIterator* const it,

cur->sign = sign;

cur->level = level;

cur->ctx = (level == 0) ? 0 : (level == 1) ? 1 : 2;

- if (level >= 2048 || level < 0) { // node is dead?

+ if (level > MAX_LEVEL || level < 0) { // node is dead?

cur->cost = MAX_COST;

continue;

}

@@ -648,10 +706,10 @@ static int ReconstructIntra16(VP8EncIterator* const it,

VP8ModeScore* const rd,

uint8_t* const yuv_out,

int mode) {

- const VP8Encoder* const enc = it->enc_;

+ VP8Encoder* const enc = it->enc_;

const uint8_t* const ref = it->yuv_p_ + VP8I16ModeOffsets[mode];

const uint8_t* const src = it->yuv_in_ + Y_OFF;

- const VP8SegmentInfo* const dqm = &enc->dqm_[it->mb_->segment_];

+ VP8SegmentInfo* const dqm = &enc->dqm_[it->mb_->segment_];

int nz = 0;

int n;

int16_t tmp[16][16], dc_tmp[16];

@@ -660,7 +718,7 @@ static int ReconstructIntra16(VP8EncIterator* const it,

VP8FTransform(src + VP8Scan[n], ref + VP8Scan[n], tmp[n]);

}

VP8FTransformWHT(tmp[0], dc_tmp);

- nz |= VP8EncQuantizeBlock(dc_tmp, rd->y_dc_levels, 0, &dqm->y2_) << 24;

+ nz |= VP8EncQuantizeBlockWHT(dc_tmp, rd->y_dc_levels, &dqm->y2_) << 24;

if (DO_TRELLIS_I16 && it->do_trellis_) {

int x, y;

@@ -755,7 +813,18 @@ static int ReconstructUV(VP8EncIterator* const it, VP8ModeScore* const rd,

//------------------------------------------------------------------------------

// RD-opt decision. Reconstruct each modes, evalue distortion and bit-cost.

-// Pick the mode is lower RD-cost = Rate + lamba * Distortion.

+// Pick the mode is lower RD-cost = Rate + lambda * Distortion.

+static void StoreMaxDelta(VP8SegmentInfo* const dqm, const int16_t DCs[16]) {

+ // We look at the first three AC coefficients to determine what is the average

+ // delta between each sub-4x4 block.

+ const int v0 = abs(DCs[1]);

+ const int v1 = abs(DCs[4]);

+ const int v2 = abs(DCs[5]);

+ int max_v = (v0 > v1) ? v1 : v0;

+ max_v = (v2 > max_v) ? v2 : max_v;

+ if (max_v > dqm->max_edge_) dqm->max_edge_ = max_v;

static void SwapPtr(uint8_t** a, uint8_t** b) {

uint8_t* const tmp = *a;

@@ -767,9 +836,23 @@ static void SwapOut(VP8EncIterator* const it) {

SwapPtr(&it->yuv_out_, &it->yuv_out2_);

}

+static score_t IsFlat(const int16_t* levels, int num_blocks, score_t thresh) {

+ score_t score = 0;

+ while (num_blocks-- > 0) { // TODO(skal): refine positional scoring?

+ int i;

+ for (i = 1; i < 16; ++i) { // omit DC, we're only interested in AC

+ score += (levels[i] != 0);

+ if (score > thresh) return 0;

+ }

+ levels += 16;

+ }

+ return 1;

static void PickBestIntra16(VP8EncIterator* const it, VP8ModeScore* const rd) {

- const VP8Encoder* const enc = it->enc_;

- const VP8SegmentInfo* const dqm = &enc->dqm_[it->mb_->segment_];

+ const int kNumBlocks = 16;

+ VP8Encoder* const enc = it->enc_;

+ VP8SegmentInfo* const dqm = &enc->dqm_[it->mb_->segment_];

const int lambda = dqm->lambda_i16_;

const int tlambda = dqm->tlambda_;

const uint8_t* const src = it->yuv_in_ + Y_OFF;

@@ -788,8 +871,13 @@ static void PickBestIntra16(VP8EncIterator* const it, VP8ModeScore* const rd) {

rd16.D = VP8SSE16x16(src, tmp_dst);

rd16.SD = tlambda ? MULT_8B(tlambda, VP8TDisto16x16(src, tmp_dst, kWeightY))

: 0;

+ rd16.H = VP8FixedCostsI16[mode];

rd16.R = VP8GetCostLuma16(it, &rd16);

- rd16.R += VP8FixedCostsI16[mode];

+ if (mode > 0 &&

+ IsFlat(rd16.y_ac_levels[0], kNumBlocks, FLATNESS_LIMIT_I16)) {

+ // penalty to avoid flat area to be mispredicted by complex mode

+ rd16.R += FLATNESS_PENALTY * kNumBlocks;

+ }

// Since we always examine Intra16 first, we can overwrite *rd directly.

SetRDScore(lambda, &rd16);

@@ -804,6 +892,13 @@ static void PickBestIntra16(VP8EncIterator* const it, VP8ModeScore* const rd) {

}

SetRDScore(dqm->lambda_mode_, rd); // finalize score for mode decision.

VP8SetIntra16Mode(it, rd->mode_i16);

+ // we have a blocky macroblock (only DCs are non-zero) with fairly high

+ // distortion, record max delta so we can later adjust the minimal filtering

+ // strength needed to smooth these blocks out.

+ if ((rd->nz & 0xffff) == 0 && rd->D > dqm->min_disto_) {

+ StoreMaxDelta(dqm, rd->y_dc_levels);

+ }

}

//------------------------------------------------------------------------------

@@ -833,9 +928,11 @@ static int PickBestIntra4(VP8EncIterator* const it, VP8ModeScore* const rd) {

}

InitScore(&rd_best);

- rd_best.score = 211; // '211' is the value of VP8BitCost(0, 145)

+ rd_best.H = 211; // '211' is the value of VP8BitCost(0, 145)

+ SetRDScore(dqm->lambda_mode_, &rd_best);

VP8IteratorStartI4(it);

do {

+ const int kNumBlocks = 1;

VP8ModeScore rd_i4;

int mode;

int best_mode = -1;

@@ -859,8 +956,11 @@ static int PickBestIntra4(VP8EncIterator* const it, VP8ModeScore* const rd) {

rd_tmp.SD =

tlambda ? MULT_8B(tlambda, VP8TDisto4x4(src, tmp_dst, kWeightY))

: 0;

+ rd_tmp.H = mode_costs[mode];

rd_tmp.R = VP8GetCostLuma4(it, tmp_levels);

- rd_tmp.R += mode_costs[mode];

+ if (mode > 0 && IsFlat(tmp_levels, kNumBlocks, FLATNESS_LIMIT_I4)) {

+ rd_tmp.R += FLATNESS_PENALTY * kNumBlocks;

+ }

SetRDScore(lambda, &rd_tmp);

if (best_mode < 0 || rd_tmp.score < rd_i4.score) {

@@ -872,14 +972,17 @@ static int PickBestIntra4(VP8EncIterator* const it, VP8ModeScore* const rd) {

}

SetRDScore(dqm->lambda_mode_, &rd_i4);

AddScore(&rd_best, &rd_i4);

- total_header_bits += mode_costs[best_mode];

- if (rd_best.score >= rd->score ||

- total_header_bits > enc->max_i4_header_bits_) {

+ if (rd_best.score >= rd->score) {

+ return 0;

+ }

+ total_header_bits += (int)rd_i4.H; // <- equal to mode_costs[best_mode];

+ if (total_header_bits > enc->max_i4_header_bits_) {

return 0;

}

// Copy selected samples if not in the right place already.

- if (best_block != best_blocks + VP8Scan[it->i4_])

+ if (best_block != best_blocks + VP8Scan[it->i4_]) {

VP8Copy4x4(best_block, best_blocks + VP8Scan[it->i4_]);

+ }

rd->modes_i4[it->i4_] = best_mode;

it->top_nz_[it->i4_ & 3] = it->left_nz_[it->i4_ >> 2] = (rd_i4.nz ? 1 : 0);

} while (VP8IteratorRotateI4(it, best_blocks));

@@ -895,6 +998,7 @@ static int PickBestIntra4(VP8EncIterator* const it, VP8ModeScore* const rd) {

//------------------------------------------------------------------------------

static void PickBestUV(VP8EncIterator* const it, VP8ModeScore* const rd) {

+ const int kNumBlocks = 8;

const VP8Encoder* const enc = it->enc_;

const VP8SegmentInfo* const dqm = &enc->dqm_[it->mb_->segment_];

const int lambda = dqm->lambda_uv_;

@@ -915,8 +1019,11 @@ static void PickBestUV(VP8EncIterator* const it, VP8ModeScore* const rd) {

// Compute RD-score

rd_uv.D = VP8SSE16x8(src, tmp_dst);

rd_uv.SD = 0; // TODO: should we call TDisto? it tends to flatten areas.

+ rd_uv.H = VP8FixedCostsUV[mode];

rd_uv.R = VP8GetCostUV(it, &rd_uv);

- rd_uv.R += VP8FixedCostsUV[mode];

+ if (mode > 0 && IsFlat(rd_uv.uv_levels[0], kNumBlocks, FLATNESS_LIMIT_UV)) {

+ rd_uv.R += FLATNESS_PENALTY * kNumBlocks;

+ }

SetRDScore(lambda, &rd_uv);

if (mode == 0 || rd_uv.score < rd_best.score) {

@@ -1047,6 +1154,3 @@ int VP8Decimate(VP8EncIterator* const it, VP8ModeScore* const rd,

return is_skipped;

}

-#if defined(__cplusplus) || defined(c_plusplus)

-} // extern "C"

-#endif

« no previous file with comments | « third_party/libwebp/enc/picture.c ('k') | third_party/libwebp/enc/syntax.c » ('j') | no next file with comments »