Index: third_party/libwebp/enc/quant.c |
diff --git a/third_party/libwebp/enc/quant.c b/third_party/libwebp/enc/quant.c |
index dd6885ab3724176ad232c706aa494b2a4311191d..549ad26f932e9393a308db26b8d8bef6851b6d5f 100644 |
--- a/third_party/libwebp/enc/quant.c |
+++ b/third_party/libwebp/enc/quant.c |
@@ -30,8 +30,6 @@ |
#define SNS_TO_DQ 0.9 // Scaling constant between the sns value and the QP |
// power-law modulation. Must be strictly less than 1. |
-#define I4_PENALTY 14000 // Rate-penalty for quick i4/i16 decision |
- |
// number of non-zero coeffs below which we consider the block very flat |
// (and apply a penalty to complex predictions) |
#define FLATNESS_LIMIT_I16 10 // I16 mode |
@@ -236,6 +234,8 @@ static int ExpandMatrix(VP8Matrix* const m, int type) { |
return (sum + 8) >> 4; |
} |
+static void CheckLambdaValue(int* const v) { if (*v < 1) *v = 1; } |
+ |
static void SetupMatrices(VP8Encoder* enc) { |
int i; |
const int tlambda_scale = |
@@ -245,7 +245,7 @@ static void SetupMatrices(VP8Encoder* enc) { |
for (i = 0; i < num_segments; ++i) { |
VP8SegmentInfo* const m = &enc->dqm_[i]; |
const int q = m->quant_; |
- int q4, q16, quv; |
+ int q_i4, q_i16, q_uv; |
m->y1_.q_[0] = kDcTable[clip(q + enc->dq_y1_dc_, 0, 127)]; |
m->y1_.q_[1] = kAcTable[clip(q, 0, 127)]; |
@@ -255,21 +255,33 @@ static void SetupMatrices(VP8Encoder* enc) { |
m->uv_.q_[0] = kDcTable[clip(q + enc->dq_uv_dc_, 0, 117)]; |
m->uv_.q_[1] = kAcTable[clip(q + enc->dq_uv_ac_, 0, 127)]; |
- q4 = ExpandMatrix(&m->y1_, 0); |
- q16 = ExpandMatrix(&m->y2_, 1); |
- quv = ExpandMatrix(&m->uv_, 2); |
- |
- m->lambda_i4_ = (3 * q4 * q4) >> 7; |
- m->lambda_i16_ = (3 * q16 * q16); |
- m->lambda_uv_ = (3 * quv * quv) >> 6; |
- m->lambda_mode_ = (1 * q4 * q4) >> 7; |
- m->lambda_trellis_i4_ = (7 * q4 * q4) >> 3; |
- m->lambda_trellis_i16_ = (q16 * q16) >> 2; |
- m->lambda_trellis_uv_ = (quv *quv) << 1; |
- m->tlambda_ = (tlambda_scale * q4) >> 5; |
+ q_i4 = ExpandMatrix(&m->y1_, 0); |
+ q_i16 = ExpandMatrix(&m->y2_, 1); |
+ q_uv = ExpandMatrix(&m->uv_, 2); |
+ |
+ m->lambda_i4_ = (3 * q_i4 * q_i4) >> 7; |
+ m->lambda_i16_ = (3 * q_i16 * q_i16); |
+ m->lambda_uv_ = (3 * q_uv * q_uv) >> 6; |
+ m->lambda_mode_ = (1 * q_i4 * q_i4) >> 7; |
+ m->lambda_trellis_i4_ = (7 * q_i4 * q_i4) >> 3; |
+ m->lambda_trellis_i16_ = (q_i16 * q_i16) >> 2; |
+ m->lambda_trellis_uv_ = (q_uv * q_uv) << 1; |
+ m->tlambda_ = (tlambda_scale * q_i4) >> 5; |
+ |
+ // none of these constants should be < 1 |
+ CheckLambdaValue(&m->lambda_i4_); |
+ CheckLambdaValue(&m->lambda_i16_); |
+ CheckLambdaValue(&m->lambda_uv_); |
+ CheckLambdaValue(&m->lambda_mode_); |
+ CheckLambdaValue(&m->lambda_trellis_i4_); |
+ CheckLambdaValue(&m->lambda_trellis_i16_); |
+ CheckLambdaValue(&m->lambda_trellis_uv_); |
+ CheckLambdaValue(&m->tlambda_); |
m->min_disto_ = 10 * m->y1_.q_[0]; // quantization-aware min disto |
m->max_edge_ = 0; |
+ |
+ m->i4_penalty_ = 1000 * q_i4 * q_i4; |
} |
} |
@@ -348,7 +360,12 @@ static int SegmentsAreEquivalent(const VP8SegmentInfo* const S1, |
static void SimplifySegments(VP8Encoder* const enc) { |
int map[NUM_MB_SEGMENTS] = { 0, 1, 2, 3 }; |
- const int num_segments = enc->segment_hdr_.num_segments_; |
+ // 'num_segments_' is previously validated and <= NUM_MB_SEGMENTS, but an |
+ // explicit check is needed to avoid a spurious warning about 'i' exceeding |
+ // array bounds of 'dqm_' with some compilers (noticed with gcc-4.9). |
+ const int num_segments = (enc->segment_hdr_.num_segments_ < NUM_MB_SEGMENTS) |
+ ? enc->segment_hdr_.num_segments_ |
+ : NUM_MB_SEGMENTS; |
int num_final_segments = 1; |
int s1, s2; |
for (s1 = 1; s1 < num_segments; ++s1) { // find similar segments |
@@ -1127,19 +1144,29 @@ static void RefineUsingDistortion(VP8EncIterator* const it, |
int try_both_modes, int refine_uv_mode, |
VP8ModeScore* const rd) { |
score_t best_score = MAX_COST; |
- score_t score_i4 = (score_t)I4_PENALTY; |
- int16_t tmp_levels[16][16]; |
- uint8_t modes_i4[16]; |
int nz = 0; |
int mode; |
int is_i16 = try_both_modes || (it->mb_->type_ == 1); |
+ const VP8SegmentInfo* const dqm = &it->enc_->dqm_[it->mb_->segment_]; |
+ // Some empiric constants, of approximate order of magnitude. |
+ const int lambda_d_i16 = 106; |
+ const int lambda_d_i4 = 11; |
+ const int lambda_d_uv = 120; |
+ score_t score_i4 = dqm->i4_penalty_; |
+ score_t i4_bit_sum = 0; |
+ const score_t bit_limit = it->enc_->mb_header_limit_; |
+ |
if (is_i16) { // First, evaluate Intra16 distortion |
int best_mode = -1; |
const uint8_t* const src = it->yuv_in_ + Y_OFF_ENC; |
for (mode = 0; mode < NUM_PRED_MODES; ++mode) { |
const uint8_t* const ref = it->yuv_p_ + VP8I16ModeOffsets[mode]; |
- const score_t score = VP8SSE16x16(src, ref); |
+ const score_t score = VP8SSE16x16(src, ref) * RD_DISTO_MULT |
+ + VP8FixedCostsI16[mode] * lambda_d_i16; |
+ if (mode > 0 && VP8FixedCostsI16[mode] > bit_limit) { |
+ continue; |
+ } |
if (score < best_score) { |
best_mode = mode; |
best_score = score; |
@@ -1159,25 +1186,28 @@ static void RefineUsingDistortion(VP8EncIterator* const it, |
int best_i4_mode = -1; |
score_t best_i4_score = MAX_COST; |
const uint8_t* const src = it->yuv_in_ + Y_OFF_ENC + VP8Scan[it->i4_]; |
+ const uint16_t* const mode_costs = GetCostModeI4(it, rd->modes_i4); |
VP8MakeIntra4Preds(it); |
for (mode = 0; mode < NUM_BMODES; ++mode) { |
const uint8_t* const ref = it->yuv_p_ + VP8I4ModeOffsets[mode]; |
- const score_t score = VP8SSE4x4(src, ref); |
+ const score_t score = VP8SSE4x4(src, ref) * RD_DISTO_MULT |
+ + mode_costs[mode] * lambda_d_i4; |
if (score < best_i4_score) { |
best_i4_mode = mode; |
best_i4_score = score; |
} |
} |
- modes_i4[it->i4_] = best_i4_mode; |
+ i4_bit_sum += mode_costs[best_i4_mode]; |
+ rd->modes_i4[it->i4_] = best_i4_mode; |
score_i4 += best_i4_score; |
- if (score_i4 >= best_score) { |
+ if (score_i4 >= best_score || i4_bit_sum > bit_limit) { |
// Intra4 won't be better than Intra16. Bail out and pick Intra16. |
is_i16 = 1; |
break; |
} else { // reconstruct partial block inside yuv_out2_ buffer |
uint8_t* const tmp_dst = it->yuv_out2_ + Y_OFF_ENC + VP8Scan[it->i4_]; |
- nz |= ReconstructIntra4(it, tmp_levels[it->i4_], |
+ nz |= ReconstructIntra4(it, rd->y_ac_levels[it->i4_], |
src, tmp_dst, best_i4_mode) << it->i4_; |
} |
} while (VP8IteratorRotateI4(it, it->yuv_out2_ + Y_OFF_ENC)); |
@@ -1185,8 +1215,7 @@ static void RefineUsingDistortion(VP8EncIterator* const it, |
// Final reconstruction, depending on which mode is selected. |
if (!is_i16) { |
- VP8SetIntra4Mode(it, modes_i4); |
- memcpy(rd->y_ac_levels, tmp_levels, sizeof(tmp_levels)); |
+ VP8SetIntra4Mode(it, rd->modes_i4); |
SwapOut(it); |
best_score = score_i4; |
} else { |
@@ -1200,7 +1229,8 @@ static void RefineUsingDistortion(VP8EncIterator* const it, |
const uint8_t* const src = it->yuv_in_ + U_OFF_ENC; |
for (mode = 0; mode < NUM_PRED_MODES; ++mode) { |
const uint8_t* const ref = it->yuv_p_ + VP8UVModeOffsets[mode]; |
- const score_t score = VP8SSE16x8(src, ref); |
+ const score_t score = VP8SSE16x8(src, ref) * RD_DISTO_MULT |
+ + VP8FixedCostsUV[mode] * lambda_d_uv; |
if (score < best_uv_score) { |
best_mode = mode; |
best_uv_score = score; |