Index: third_party/libwebp/enc/quant.c |
diff --git a/third_party/libwebp/enc/quant.c b/third_party/libwebp/enc/quant.c |
index ea153849c825cb0d2a2d3d00f5c8ec47bf8c95d5..dcfd4d16daeb28dc9b9e91e101b79225558f6da9 100644 |
--- a/third_party/libwebp/enc/quant.c |
+++ b/third_party/libwebp/enc/quant.c |
@@ -27,6 +27,8 @@ |
#define SNS_TO_DQ 0.9 // Scaling constant between the sns value and the QP |
// power-law modulation. Must be strictly less than 1. |
+#define I4_PENALTY 4000 // Rate-penalty for quick i4/i16 decision |
+ |
#define MULT_8B(a, b) (((a) * (b) + 128) >> 8) |
#if defined(__cplusplus) || defined(c_plusplus) |
@@ -224,28 +226,90 @@ static void SetupFilterStrength(VP8Encoder* const enc) { |
// We want to emulate jpeg-like behaviour where the expected "good" quality |
// is around q=75. Internally, our "good" middle is around c=50. So we |
// map accordingly using linear piece-wise function |
-static double QualityToCompression(double q) { |
- const double c = q / 100.; |
- return (c < 0.75) ? c * (2. / 3.) : 2. * c - 1.; |
+static double QualityToCompression(double c) { |
+ const double linear_c = (c < 0.75) ? c * (2. / 3.) : 2. * c - 1.; |
+ // The file size roughly scales as pow(quantizer, 3.). Actually, the |
+ // exponent is somewhere between 2.8 and 3.2, but we're mostly interested |
+ // in the mid-quant range. So we scale the compressibility inversely to |
+ // this power-law: quant ~= compression ^ 1/3. This law holds well for |
+ // low quant. Finer modelling for high-quant would make use of kAcTable[] |
+ // more explicitly. |
+ const double v = pow(linear_c, 1 / 3.); |
+ return v; |
+} |
+ |
+static double QualityToJPEGCompression(double c, double alpha) { |
+ // We map the complexity 'alpha' and quality setting 'c' to a compression |
+ // exponent empirically matched to the compression curve of libjpeg6b. |
+ // On average, the WebP output size will be roughly similar to that of a |
+ // JPEG file compressed with same quality factor. |
+ const double amin = 0.30; |
+ const double amax = 0.85; |
+ const double exp_min = 0.4; |
+ const double exp_max = 0.9; |
+ const double slope = (exp_min - exp_max) / (amax - amin); |
+ // Linearly interpolate 'expn' from exp_min to exp_max |
+ // in the [amin, amax] range. |
+ const double expn = (alpha > amax) ? exp_min |
+ : (alpha < amin) ? exp_max |
+ : exp_max + slope * (alpha - amin); |
+ const double v = pow(c, expn); |
+ return v; |
+} |
+ |
+static int SegmentsAreEquivalent(const VP8SegmentInfo* const S1, |
+ const VP8SegmentInfo* const S2) { |
+ return (S1->quant_ == S2->quant_) && (S1->fstrength_ == S2->fstrength_); |
+} |
+ |
+static void SimplifySegments(VP8Encoder* const enc) { |
+ int map[NUM_MB_SEGMENTS] = { 0, 1, 2, 3 }; |
+ const int num_segments = enc->segment_hdr_.num_segments_; |
+ int num_final_segments = 1; |
+ int s1, s2; |
+ for (s1 = 1; s1 < num_segments; ++s1) { // find similar segments |
+ const VP8SegmentInfo* const S1 = &enc->dqm_[s1]; |
+ int found = 0; |
+ // check if we already have similar segment |
+ for (s2 = 0; s2 < num_final_segments; ++s2) { |
+ const VP8SegmentInfo* const S2 = &enc->dqm_[s2]; |
+ if (SegmentsAreEquivalent(S1, S2)) { |
+ found = 1; |
+ break; |
+ } |
+ } |
+ map[s1] = s2; |
+ if (!found) { |
+ if (num_final_segments != s1) { |
+ enc->dqm_[num_final_segments] = enc->dqm_[s1]; |
+ } |
+ ++num_final_segments; |
+ } |
+ } |
+ if (num_final_segments < num_segments) { // Remap |
+ int i = enc->mb_w_ * enc->mb_h_; |
+ while (i-- > 0) enc->mb_info_[i].segment_ = map[enc->mb_info_[i].segment_]; |
+ enc->segment_hdr_.num_segments_ = num_final_segments; |
+ // Replicate the trailing segment infos (it's mostly cosmetics) |
+ for (i = num_final_segments; i < num_segments; ++i) { |
+ enc->dqm_[i] = enc->dqm_[num_final_segments - 1]; |
+ } |
+ } |
} |
void VP8SetSegmentParams(VP8Encoder* const enc, float quality) { |
int i; |
int dq_uv_ac, dq_uv_dc; |
- const int num_segments = enc->config_->segments; |
+ const int num_segments = enc->segment_hdr_.num_segments_; |
const double amp = SNS_TO_DQ * enc->config_->sns_strength / 100. / 128.; |
- const double c_base = QualityToCompression(quality); |
+ const double Q = quality / 100.; |
+ const double c_base = enc->config_->emulate_jpeg_size ? |
+ QualityToJPEGCompression(Q, enc->alpha_ / 255.) : |
+ QualityToCompression(Q); |
for (i = 0; i < num_segments; ++i) { |
- // The file size roughly scales as pow(quantizer, 3.). Actually, the |
- // exponent is somewhere between 2.8 and 3.2, but we're mostly interested |
- // in the mid-quant range. So we scale the compressibility inversely to |
- // this power-law: quant ~= compression ^ 1/3. This law holds well for |
- // low quant. Finer modelling for high-quant would make use of kAcTable[] |
- // more explicitely. |
- // Additionally, we modulate the base exponent 1/3 to accommodate for the |
- // quantization susceptibility and allow denser segments to be quantized |
- // more. |
- const double expn = (1. - amp * enc->dqm_[i].alpha_) / 3.; |
+ // We modulate the base coefficient to accommodate for the quantization |
+ // susceptibility and allow denser segments to be quantized more. |
+ const double expn = 1. - amp * enc->dqm_[i].alpha_; |
const double c = pow(c_base, expn); |
const int q = (int)(127. * (1. - c)); |
assert(expn > 0.); |
@@ -281,9 +345,11 @@ void VP8SetSegmentParams(VP8Encoder* const enc, float quality) { |
enc->dq_uv_dc_ = dq_uv_dc; |
enc->dq_uv_ac_ = dq_uv_ac; |
- SetupMatrices(enc); |
- |
SetupFilterStrength(enc); // initialize segments' filtering, eventually |
+ |
+ if (num_segments > 1) SimplifySegments(enc); |
+ |
+ SetupMatrices(enc); // finalize quantization matrices |
} |
//------------------------------------------------------------------------------ |
@@ -709,7 +775,7 @@ static void PickBestIntra16(VP8EncIterator* const it, VP8ModeScore* const rd) { |
int mode; |
rd->mode_i16 = -1; |
- for (mode = 0; mode < 4; ++mode) { |
+ for (mode = 0; mode < NUM_PRED_MODES; ++mode) { |
uint8_t* const tmp_dst = it->yuv_out2_ + Y_OFF; // scratch buffer |
int nz; |
@@ -838,7 +904,7 @@ static void PickBestUV(VP8EncIterator* const it, VP8ModeScore* const rd) { |
rd->mode_uv = -1; |
InitScore(&rd_best); |
- for (mode = 0; mode < 4; ++mode) { |
+ for (mode = 0; mode < NUM_PRED_MODES; ++mode) { |
VP8ModeScore rd_uv; |
// Reconstruct |
@@ -867,10 +933,10 @@ static void PickBestUV(VP8EncIterator* const it, VP8ModeScore* const rd) { |
static void SimpleQuantize(VP8EncIterator* const it, VP8ModeScore* const rd) { |
const VP8Encoder* const enc = it->enc_; |
- const int i16 = (it->mb_->type_ == 1); |
+ const int is_i16 = (it->mb_->type_ == 1); |
int nz = 0; |
- if (i16) { |
+ if (is_i16) { |
nz = ReconstructIntra16(it, rd, it->yuv_out_ + Y_OFF, it->preds_[0]); |
} else { |
VP8IteratorStartI4(it); |
@@ -889,11 +955,66 @@ static void SimpleQuantize(VP8EncIterator* const it, VP8ModeScore* const rd) { |
rd->nz = nz; |
} |
+// Refine intra16/intra4 sub-modes based on distortion only (not rate). |
+static void DistoRefine(VP8EncIterator* const it, int try_both_i4_i16) { |
+ const int is_i16 = (it->mb_->type_ == 1); |
+ score_t best_score = MAX_COST; |
+ |
+ if (try_both_i4_i16 || is_i16) { |
+ int mode; |
+ int best_mode = -1; |
+ for (mode = 0; mode < NUM_PRED_MODES; ++mode) { |
+ const uint8_t* const ref = it->yuv_p_ + VP8I16ModeOffsets[mode]; |
+ const uint8_t* const src = it->yuv_in_ + Y_OFF; |
+ const score_t score = VP8SSE16x16(src, ref); |
+ if (score < best_score) { |
+ best_mode = mode; |
+ best_score = score; |
+ } |
+ } |
+ VP8SetIntra16Mode(it, best_mode); |
+ } |
+ if (try_both_i4_i16 || !is_i16) { |
+ uint8_t modes_i4[16]; |
+ // We don't evaluate the rate here, but just account for it through a |
+ // constant penalty (i4 mode usually needs more bits compared to i16). |
+ score_t score_i4 = (score_t)I4_PENALTY; |
+ |
+ VP8IteratorStartI4(it); |
+ do { |
+ int mode; |
+ int best_sub_mode = -1; |
+ score_t best_sub_score = MAX_COST; |
+ const uint8_t* const src = it->yuv_in_ + Y_OFF + VP8Scan[it->i4_]; |
+ |
+ // TODO(skal): we don't really need the prediction pixels here, |
+ // but just the distortion against 'src'. |
+ VP8MakeIntra4Preds(it); |
+ for (mode = 0; mode < NUM_BMODES; ++mode) { |
+ const uint8_t* const ref = it->yuv_p_ + VP8I4ModeOffsets[mode]; |
+ const score_t score = VP8SSE4x4(src, ref); |
+ if (score < best_sub_score) { |
+ best_sub_mode = mode; |
+ best_sub_score = score; |
+ } |
+ } |
+ modes_i4[it->i4_] = best_sub_mode; |
+ score_i4 += best_sub_score; |
+ if (score_i4 >= best_score) break; |
+ } while (VP8IteratorRotateI4(it, it->yuv_in_ + Y_OFF)); |
+ if (score_i4 < best_score) { |
+ VP8SetIntra4Mode(it, modes_i4); |
+ } |
+ } |
+} |
+ |
//------------------------------------------------------------------------------ |
// Entry point |
-int VP8Decimate(VP8EncIterator* const it, VP8ModeScore* const rd, int rd_opt) { |
+int VP8Decimate(VP8EncIterator* const it, VP8ModeScore* const rd, |
+ VP8RDLevel rd_opt) { |
int is_skipped; |
+ const int method = it->enc_->method_; |
InitScore(rd); |
@@ -902,22 +1023,21 @@ int VP8Decimate(VP8EncIterator* const it, VP8ModeScore* const rd, int rd_opt) { |
VP8MakeLuma16Preds(it); |
VP8MakeChroma8Preds(it); |
- // for rd_opt = 2, we perform trellis-quant on the final decision only. |
- // for rd_opt > 2, we use it for every scoring (=much slower). |
- if (rd_opt > 0) { |
- it->do_trellis_ = (rd_opt > 2); |
+ if (rd_opt > RD_OPT_NONE) { |
+ it->do_trellis_ = (rd_opt >= RD_OPT_TRELLIS_ALL); |
PickBestIntra16(it, rd); |
- if (it->enc_->method_ >= 2) { |
+ if (method >= 2) { |
PickBestIntra4(it, rd); |
} |
PickBestUV(it, rd); |
- if (rd_opt == 2) { |
+ if (rd_opt == RD_OPT_TRELLIS) { // finish off with trellis-optim now |
it->do_trellis_ = 1; |
SimpleQuantize(it, rd); |
} |
} else { |
- // TODO: for method_ == 2, pick the best intra4/intra16 based on SSE |
- it->do_trellis_ = (it->enc_->method_ == 2); |
+ // For method == 2, pick the best intra4/intra16 based on SSE (~tad slower). |
+ // For method <= 1, we refine intra4 or intra16 (but don't re-examine mode). |
+ DistoRefine(it, (method >= 2)); |
SimpleQuantize(it, rd); |
} |
is_skipped = (rd->nz == 0); |