Index: third_party/libwebp/enc/quant.c |
diff --git a/third_party/libwebp/enc/quant.c b/third_party/libwebp/enc/quant.c |
index 9130a41609baa781ca1a6633a56e2c0a2659fbe1..dd6885ab3724176ad232c706aa494b2a4311191d 100644 |
--- a/third_party/libwebp/enc/quant.c |
+++ b/third_party/libwebp/enc/quant.c |
@@ -30,7 +30,7 @@ |
#define SNS_TO_DQ 0.9 // Scaling constant between the sns value and the QP |
// power-law modulation. Must be strictly less than 1. |
-#define I4_PENALTY 4000 // Rate-penalty for quick i4/i16 decision |
+#define I4_PENALTY 14000 // Rate-penalty for quick i4/i16 decision |
// number of non-zero coeffs below which we consider the block very flat |
// (and apply a penalty to complex predictions) |
@@ -41,6 +41,8 @@ |
#define MULT_8B(a, b) (((a) * (b) + 128) >> 8) |
+#define RD_DISTO_MULT 256 // distortion multiplier (equivalent of lambda) |
+ |
// #define DEBUG_BLOCK |
//------------------------------------------------------------------------------ |
@@ -54,15 +56,37 @@ static void PrintBlockInfo(const VP8EncIterator* const it, |
const VP8ModeScore* const rd) { |
int i, j; |
const int is_i16 = (it->mb_->type_ == 1); |
+ const uint8_t* const y_in = it->yuv_in_ + Y_OFF_ENC; |
+ const uint8_t* const y_out = it->yuv_out_ + Y_OFF_ENC; |
+ const uint8_t* const uv_in = it->yuv_in_ + U_OFF_ENC; |
+ const uint8_t* const uv_out = it->yuv_out_ + U_OFF_ENC; |
printf("SOURCE / OUTPUT / ABS DELTA\n"); |
- for (j = 0; j < 24; ++j) { |
- if (j == 16) printf("\n"); // newline before the U/V block |
- for (i = 0; i < 16; ++i) printf("%3d ", it->yuv_in_[i + j * BPS]); |
+ for (j = 0; j < 16; ++j) { |
+ for (i = 0; i < 16; ++i) printf("%3d ", y_in[i + j * BPS]); |
printf(" "); |
- for (i = 0; i < 16; ++i) printf("%3d ", it->yuv_out_[i + j * BPS]); |
+ for (i = 0; i < 16; ++i) printf("%3d ", y_out[i + j * BPS]); |
printf(" "); |
for (i = 0; i < 16; ++i) { |
- printf("%1d ", abs(it->yuv_out_[i + j * BPS] - it->yuv_in_[i + j * BPS])); |
+ printf("%1d ", abs(y_in[i + j * BPS] - y_out[i + j * BPS])); |
+ } |
+ printf("\n"); |
+ } |
+ printf("\n"); // newline before the U/V block |
+ for (j = 0; j < 8; ++j) { |
+ for (i = 0; i < 8; ++i) printf("%3d ", uv_in[i + j * BPS]); |
+ printf(" "); |
+ for (i = 8; i < 16; ++i) printf("%3d ", uv_in[i + j * BPS]); |
+ printf(" "); |
+ for (i = 0; i < 8; ++i) printf("%3d ", uv_out[i + j * BPS]); |
+ printf(" "); |
+ for (i = 8; i < 16; ++i) printf("%3d ", uv_out[i + j * BPS]); |
+ printf(" "); |
+ for (i = 0; i < 8; ++i) { |
+ printf("%1d ", abs(uv_out[i + j * BPS] - uv_in[i + j * BPS])); |
+ } |
+ printf(" "); |
+ for (i = 8; i < 16; ++i) { |
+ printf("%1d ", abs(uv_out[i + j * BPS] - uv_in[i + j * BPS])); |
} |
printf("\n"); |
} |
@@ -444,15 +468,12 @@ void VP8MakeIntra4Preds(const VP8EncIterator* const it) { |
// Quantize |
// Layout: |
-// +----+ |
-// |YYYY| 0 |
-// |YYYY| 4 |
-// |YYYY| 8 |
-// |YYYY| 12 |
-// +----+ |
-// |UUVV| 16 |
-// |UUVV| 20 |
-// +----+ |
+// +----+----+ |
+// |YYYY|UUVV| 0 |
+// |YYYY|UUVV| 4 |
+// |YYYY|....| 8 |
+// |YYYY|....| 12 |
+// +----+----+ |
const int VP8Scan[16] = { // Luma |
0 + 0 * BPS, 4 + 0 * BPS, 8 + 0 * BPS, 12 + 0 * BPS, |
@@ -538,13 +559,12 @@ typedef struct { |
#define SCORE_STATE(n, l) (score_states[n][(l) + MIN_DELTA]) |
static WEBP_INLINE void SetRDScore(int lambda, VP8ModeScore* const rd) { |
- // TODO: incorporate the "* 256" in the tables? |
- rd->score = (rd->R + rd->H) * lambda + 256 * (rd->D + rd->SD); |
+ rd->score = (rd->R + rd->H) * lambda + RD_DISTO_MULT * (rd->D + rd->SD); |
} |
static WEBP_INLINE score_t RDScoreTrellis(int lambda, score_t rate, |
score_t distortion) { |
- return rate * lambda + 256 * distortion; |
+ return rate * lambda + RD_DISTO_MULT * distortion; |
} |
static int TrellisQuantizeBlock(const VP8Encoder* const enc, |
@@ -553,7 +573,8 @@ static int TrellisQuantizeBlock(const VP8Encoder* const enc, |
const VP8Matrix* const mtx, |
int lambda) { |
const ProbaArray* const probas = enc->proba_.coeffs_[coeff_type]; |
- const CostArray* const costs = enc->proba_.level_cost_[coeff_type]; |
+ CostArrayPtr const costs = |
+ (CostArrayPtr)enc->proba_.remapped_costs_[coeff_type]; |
const int first = (coeff_type == 0) ? 1 : 0; |
Node nodes[16][NUM_NODES]; |
ScoreState score_states[2][NUM_NODES]; |
@@ -590,7 +611,7 @@ static int TrellisQuantizeBlock(const VP8Encoder* const enc, |
for (m = -MIN_DELTA; m <= MAX_DELTA; ++m) { |
const score_t rate = (ctx0 == 0) ? VP8BitCost(1, last_proba) : 0; |
ss_cur[m].score = RDScoreTrellis(lambda, rate, 0); |
- ss_cur[m].costs = costs[VP8EncBands[first]][ctx0]; |
+ ss_cur[m].costs = costs[first][ctx0]; |
} |
} |
@@ -624,7 +645,7 @@ static int TrellisQuantizeBlock(const VP8Encoder* const enc, |
int best_prev = 0; // default, in case |
ss_cur[m].score = MAX_COST; |
- ss_cur[m].costs = costs[band][ctx]; |
+ ss_cur[m].costs = costs[n + 1][ctx]; |
if (level > MAX_LEVEL || level < 0) { // node is dead? |
continue; |
} |
@@ -719,14 +740,14 @@ static int ReconstructIntra16(VP8EncIterator* const it, |
int mode) { |
const VP8Encoder* const enc = it->enc_; |
const uint8_t* const ref = it->yuv_p_ + VP8I16ModeOffsets[mode]; |
- const uint8_t* const src = it->yuv_in_ + Y_OFF; |
+ const uint8_t* const src = it->yuv_in_ + Y_OFF_ENC; |
const VP8SegmentInfo* const dqm = &enc->dqm_[it->mb_->segment_]; |
int nz = 0; |
int n; |
int16_t tmp[16][16], dc_tmp[16]; |
- for (n = 0; n < 16; ++n) { |
- VP8FTransform(src + VP8Scan[n], ref + VP8Scan[n], tmp[n]); |
+ for (n = 0; n < 16; n += 2) { |
+ VP8FTransform2(src + VP8Scan[n], ref + VP8Scan[n], tmp[n]); |
} |
VP8FTransformWHT(tmp[0], dc_tmp); |
nz |= VP8EncQuantizeBlockWHT(dc_tmp, rd->y_dc_levels, &dqm->y2_) << 24; |
@@ -746,12 +767,13 @@ static int ReconstructIntra16(VP8EncIterator* const it, |
} |
} |
} else { |
- for (n = 0; n < 16; ++n) { |
+ for (n = 0; n < 16; n += 2) { |
// Zero-out the first coeff, so that: a) nz is correct below, and |
// b) finding 'last' non-zero coeffs in SetResidualCoeffs() is simplified. |
- tmp[n][0] = 0; |
- nz |= VP8EncQuantizeBlock(tmp[n], rd->y_ac_levels[n], &dqm->y1_) << n; |
- assert(rd->y_ac_levels[n][0] == 0); |
+ tmp[n][0] = tmp[n + 1][0] = 0; |
+ nz |= VP8EncQuantize2Blocks(tmp[n], rd->y_ac_levels[n], &dqm->y1_) << n; |
+ assert(rd->y_ac_levels[n + 0][0] == 0); |
+ assert(rd->y_ac_levels[n + 1][0] == 0); |
} |
} |
@@ -792,14 +814,14 @@ static int ReconstructUV(VP8EncIterator* const it, VP8ModeScore* const rd, |
uint8_t* const yuv_out, int mode) { |
const VP8Encoder* const enc = it->enc_; |
const uint8_t* const ref = it->yuv_p_ + VP8UVModeOffsets[mode]; |
- const uint8_t* const src = it->yuv_in_ + U_OFF; |
+ const uint8_t* const src = it->yuv_in_ + U_OFF_ENC; |
const VP8SegmentInfo* const dqm = &enc->dqm_[it->mb_->segment_]; |
int nz = 0; |
int n; |
int16_t tmp[8][16]; |
- for (n = 0; n < 8; ++n) { |
- VP8FTransform(src + VP8ScanUV[n], ref + VP8ScanUV[n], tmp[n]); |
+ for (n = 0; n < 8; n += 2) { |
+ VP8FTransform2(src + VP8ScanUV[n], ref + VP8ScanUV[n], tmp[n]); |
} |
if (DO_TRELLIS_UV && it->do_trellis_) { |
int ch, x, y; |
@@ -816,8 +838,8 @@ static int ReconstructUV(VP8EncIterator* const it, VP8ModeScore* const rd, |
} |
} |
} else { |
- for (n = 0; n < 8; ++n) { |
- nz |= VP8EncQuantizeBlock(tmp[n], rd->uv_levels[n], &dqm->uv_) << n; |
+ for (n = 0; n < 8; n += 2) { |
+ nz |= VP8EncQuantize2Blocks(tmp[n], rd->uv_levels[n], &dqm->uv_) << n; |
} |
} |
@@ -842,6 +864,12 @@ static void StoreMaxDelta(VP8SegmentInfo* const dqm, const int16_t DCs[16]) { |
if (max_v > dqm->max_edge_) dqm->max_edge_ = max_v; |
} |
+static void SwapModeScore(VP8ModeScore** a, VP8ModeScore** b) { |
+ VP8ModeScore* const tmp = *a; |
+ *a = *b; |
+ *b = tmp; |
+} |
+ |
static void SwapPtr(uint8_t** a, uint8_t** b) { |
uint8_t* const tmp = *a; |
*a = *b; |
@@ -865,46 +893,47 @@ static score_t IsFlat(const int16_t* levels, int num_blocks, score_t thresh) { |
return 1; |
} |
-static void PickBestIntra16(VP8EncIterator* const it, VP8ModeScore* const rd) { |
+static void PickBestIntra16(VP8EncIterator* const it, VP8ModeScore* rd) { |
const int kNumBlocks = 16; |
VP8SegmentInfo* const dqm = &it->enc_->dqm_[it->mb_->segment_]; |
const int lambda = dqm->lambda_i16_; |
const int tlambda = dqm->tlambda_; |
- const uint8_t* const src = it->yuv_in_ + Y_OFF; |
- VP8ModeScore rd16; |
+ const uint8_t* const src = it->yuv_in_ + Y_OFF_ENC; |
+ VP8ModeScore rd_tmp; |
+ VP8ModeScore* rd_cur = &rd_tmp; |
+ VP8ModeScore* rd_best = rd; |
int mode; |
rd->mode_i16 = -1; |
for (mode = 0; mode < NUM_PRED_MODES; ++mode) { |
- uint8_t* const tmp_dst = it->yuv_out2_ + Y_OFF; // scratch buffer |
- int nz; |
+ uint8_t* const tmp_dst = it->yuv_out2_ + Y_OFF_ENC; // scratch buffer |
+ rd_cur->mode_i16 = mode; |
// Reconstruct |
- nz = ReconstructIntra16(it, &rd16, tmp_dst, mode); |
+ rd_cur->nz = ReconstructIntra16(it, rd_cur, tmp_dst, mode); |
// Measure RD-score |
- rd16.D = VP8SSE16x16(src, tmp_dst); |
- rd16.SD = tlambda ? MULT_8B(tlambda, VP8TDisto16x16(src, tmp_dst, kWeightY)) |
- : 0; |
- rd16.H = VP8FixedCostsI16[mode]; |
- rd16.R = VP8GetCostLuma16(it, &rd16); |
+ rd_cur->D = VP8SSE16x16(src, tmp_dst); |
+ rd_cur->SD = |
+ tlambda ? MULT_8B(tlambda, VP8TDisto16x16(src, tmp_dst, kWeightY)) : 0; |
+ rd_cur->H = VP8FixedCostsI16[mode]; |
+ rd_cur->R = VP8GetCostLuma16(it, rd_cur); |
if (mode > 0 && |
- IsFlat(rd16.y_ac_levels[0], kNumBlocks, FLATNESS_LIMIT_I16)) { |
+ IsFlat(rd_cur->y_ac_levels[0], kNumBlocks, FLATNESS_LIMIT_I16)) { |
// penalty to avoid flat area to be mispredicted by complex mode |
- rd16.R += FLATNESS_PENALTY * kNumBlocks; |
+ rd_cur->R += FLATNESS_PENALTY * kNumBlocks; |
} |
// Since we always examine Intra16 first, we can overwrite *rd directly. |
- SetRDScore(lambda, &rd16); |
- if (mode == 0 || rd16.score < rd->score) { |
- CopyScore(rd, &rd16); |
- rd->mode_i16 = mode; |
- rd->nz = nz; |
- memcpy(rd->y_ac_levels, rd16.y_ac_levels, sizeof(rd16.y_ac_levels)); |
- memcpy(rd->y_dc_levels, rd16.y_dc_levels, sizeof(rd16.y_dc_levels)); |
+ SetRDScore(lambda, rd_cur); |
+ if (mode == 0 || rd_cur->score < rd_best->score) { |
+ SwapModeScore(&rd_cur, &rd_best); |
SwapOut(it); |
} |
} |
+ if (rd_best != rd) { |
+ memcpy(rd, rd_best, sizeof(*rd)); |
+ } |
SetRDScore(dqm->lambda_mode_, rd); // finalize score for mode decision. |
VP8SetIntra16Mode(it, rd->mode_i16); |
@@ -933,8 +962,8 @@ static int PickBestIntra4(VP8EncIterator* const it, VP8ModeScore* const rd) { |
const VP8SegmentInfo* const dqm = &enc->dqm_[it->mb_->segment_]; |
const int lambda = dqm->lambda_i4_; |
const int tlambda = dqm->tlambda_; |
- const uint8_t* const src0 = it->yuv_in_ + Y_OFF; |
- uint8_t* const best_blocks = it->yuv_out2_ + Y_OFF; |
+ const uint8_t* const src0 = it->yuv_in_ + Y_OFF_ENC; |
+ uint8_t* const best_blocks = it->yuv_out2_ + Y_OFF_ENC; |
int total_header_bits = 0; |
VP8ModeScore rd_best; |
@@ -972,17 +1001,28 @@ static int PickBestIntra4(VP8EncIterator* const it, VP8ModeScore* const rd) { |
tlambda ? MULT_8B(tlambda, VP8TDisto4x4(src, tmp_dst, kWeightY)) |
: 0; |
rd_tmp.H = mode_costs[mode]; |
- rd_tmp.R = VP8GetCostLuma4(it, tmp_levels); |
+ |
+ // Add flatness penalty |
if (mode > 0 && IsFlat(tmp_levels, kNumBlocks, FLATNESS_LIMIT_I4)) { |
- rd_tmp.R += FLATNESS_PENALTY * kNumBlocks; |
+ rd_tmp.R = FLATNESS_PENALTY * kNumBlocks; |
+ } else { |
+ rd_tmp.R = 0; |
} |
+ // early-out check |
SetRDScore(lambda, &rd_tmp); |
+ if (best_mode >= 0 && rd_tmp.score >= rd_i4.score) continue; |
+ |
+ // finish computing score |
+ rd_tmp.R += VP8GetCostLuma4(it, tmp_levels); |
+ SetRDScore(lambda, &rd_tmp); |
+ |
if (best_mode < 0 || rd_tmp.score < rd_i4.score) { |
CopyScore(&rd_i4, &rd_tmp); |
best_mode = mode; |
SwapPtr(&tmp_dst, &best_block); |
- memcpy(rd_best.y_ac_levels[it->i4_], tmp_levels, sizeof(tmp_levels)); |
+ memcpy(rd_best.y_ac_levels[it->i4_], tmp_levels, |
+ sizeof(rd_best.y_ac_levels[it->i4_])); |
} |
} |
SetRDScore(dqm->lambda_mode_, &rd_i4); |
@@ -1016,9 +1056,10 @@ static void PickBestUV(VP8EncIterator* const it, VP8ModeScore* const rd) { |
const int kNumBlocks = 8; |
const VP8SegmentInfo* const dqm = &it->enc_->dqm_[it->mb_->segment_]; |
const int lambda = dqm->lambda_uv_; |
- const uint8_t* const src = it->yuv_in_ + U_OFF; |
- uint8_t* const tmp_dst = it->yuv_out2_ + U_OFF; // scratch buffer |
- uint8_t* const dst0 = it->yuv_out_ + U_OFF; |
+ const uint8_t* const src = it->yuv_in_ + U_OFF_ENC; |
+ uint8_t* tmp_dst = it->yuv_out2_ + U_OFF_ENC; // scratch buffer |
+ uint8_t* dst0 = it->yuv_out_ + U_OFF_ENC; |
+ uint8_t* dst = dst0; |
VP8ModeScore rd_best; |
int mode; |
@@ -1032,7 +1073,7 @@ static void PickBestUV(VP8EncIterator* const it, VP8ModeScore* const rd) { |
// Compute RD-score |
rd_uv.D = VP8SSE16x8(src, tmp_dst); |
- rd_uv.SD = 0; // TODO: should we call TDisto? it tends to flatten areas. |
+ rd_uv.SD = 0; // not calling TDisto here: it tends to flatten areas. |
rd_uv.H = VP8FixedCostsUV[mode]; |
rd_uv.R = VP8GetCostUV(it, &rd_uv); |
if (mode > 0 && IsFlat(rd_uv.uv_levels[0], kNumBlocks, FLATNESS_LIMIT_UV)) { |
@@ -1044,11 +1085,14 @@ static void PickBestUV(VP8EncIterator* const it, VP8ModeScore* const rd) { |
CopyScore(&rd_best, &rd_uv); |
rd->mode_uv = mode; |
memcpy(rd->uv_levels, rd_uv.uv_levels, sizeof(rd->uv_levels)); |
- memcpy(dst0, tmp_dst, UV_SIZE); // TODO: SwapUVOut() ? |
+ SwapPtr(&dst, &tmp_dst); |
} |
} |
VP8SetIntraUVMode(it, rd->mode_uv); |
AddScore(rd, &rd_best); |
+ if (dst != dst0) { // copy 16x8 block if needed |
+ VP8Copy16x8(dst, dst0); |
+ } |
} |
//------------------------------------------------------------------------------ |
@@ -1060,35 +1104,41 @@ static void SimpleQuantize(VP8EncIterator* const it, VP8ModeScore* const rd) { |
int nz = 0; |
if (is_i16) { |
- nz = ReconstructIntra16(it, rd, it->yuv_out_ + Y_OFF, it->preds_[0]); |
+ nz = ReconstructIntra16(it, rd, it->yuv_out_ + Y_OFF_ENC, it->preds_[0]); |
} else { |
VP8IteratorStartI4(it); |
do { |
const int mode = |
it->preds_[(it->i4_ & 3) + (it->i4_ >> 2) * enc->preds_w_]; |
- const uint8_t* const src = it->yuv_in_ + Y_OFF + VP8Scan[it->i4_]; |
- uint8_t* const dst = it->yuv_out_ + Y_OFF + VP8Scan[it->i4_]; |
+ const uint8_t* const src = it->yuv_in_ + Y_OFF_ENC + VP8Scan[it->i4_]; |
+ uint8_t* const dst = it->yuv_out_ + Y_OFF_ENC + VP8Scan[it->i4_]; |
VP8MakeIntra4Preds(it); |
nz |= ReconstructIntra4(it, rd->y_ac_levels[it->i4_], |
src, dst, mode) << it->i4_; |
- } while (VP8IteratorRotateI4(it, it->yuv_out_ + Y_OFF)); |
+ } while (VP8IteratorRotateI4(it, it->yuv_out_ + Y_OFF_ENC)); |
} |
- nz |= ReconstructUV(it, rd, it->yuv_out_ + U_OFF, it->mb_->uv_mode_); |
+ nz |= ReconstructUV(it, rd, it->yuv_out_ + U_OFF_ENC, it->mb_->uv_mode_); |
rd->nz = nz; |
} |
// Refine intra16/intra4 sub-modes based on distortion only (not rate). |
-static void DistoRefine(VP8EncIterator* const it, int try_both_i4_i16) { |
- const int is_i16 = (it->mb_->type_ == 1); |
+static void RefineUsingDistortion(VP8EncIterator* const it, |
+ int try_both_modes, int refine_uv_mode, |
+ VP8ModeScore* const rd) { |
score_t best_score = MAX_COST; |
+ score_t score_i4 = (score_t)I4_PENALTY; |
+ int16_t tmp_levels[16][16]; |
+ uint8_t modes_i4[16]; |
+ int nz = 0; |
+ int mode; |
+ int is_i16 = try_both_modes || (it->mb_->type_ == 1); |
- if (try_both_i4_i16 || is_i16) { |
- int mode; |
+ if (is_i16) { // First, evaluate Intra16 distortion |
int best_mode = -1; |
+ const uint8_t* const src = it->yuv_in_ + Y_OFF_ENC; |
for (mode = 0; mode < NUM_PRED_MODES; ++mode) { |
const uint8_t* const ref = it->yuv_p_ + VP8I16ModeOffsets[mode]; |
- const uint8_t* const src = it->yuv_in_ + Y_OFF; |
const score_t score = VP8SSE16x16(src, ref); |
if (score < best_score) { |
best_mode = mode; |
@@ -1096,39 +1146,72 @@ static void DistoRefine(VP8EncIterator* const it, int try_both_i4_i16) { |
} |
} |
VP8SetIntra16Mode(it, best_mode); |
+ // we'll reconstruct later, if i16 mode actually gets selected |
} |
- if (try_both_i4_i16 || !is_i16) { |
- uint8_t modes_i4[16]; |
+ |
+ // Next, evaluate Intra4 |
+ if (try_both_modes || !is_i16) { |
// We don't evaluate the rate here, but just account for it through a |
// constant penalty (i4 mode usually needs more bits compared to i16). |
- score_t score_i4 = (score_t)I4_PENALTY; |
- |
+ is_i16 = 0; |
VP8IteratorStartI4(it); |
do { |
- int mode; |
- int best_sub_mode = -1; |
- score_t best_sub_score = MAX_COST; |
- const uint8_t* const src = it->yuv_in_ + Y_OFF + VP8Scan[it->i4_]; |
+ int best_i4_mode = -1; |
+ score_t best_i4_score = MAX_COST; |
+ const uint8_t* const src = it->yuv_in_ + Y_OFF_ENC + VP8Scan[it->i4_]; |
- // TODO(skal): we don't really need the prediction pixels here, |
- // but just the distortion against 'src'. |
VP8MakeIntra4Preds(it); |
for (mode = 0; mode < NUM_BMODES; ++mode) { |
const uint8_t* const ref = it->yuv_p_ + VP8I4ModeOffsets[mode]; |
const score_t score = VP8SSE4x4(src, ref); |
- if (score < best_sub_score) { |
- best_sub_mode = mode; |
- best_sub_score = score; |
+ if (score < best_i4_score) { |
+ best_i4_mode = mode; |
+ best_i4_score = score; |
} |
} |
- modes_i4[it->i4_] = best_sub_mode; |
- score_i4 += best_sub_score; |
- if (score_i4 >= best_score) break; |
- } while (VP8IteratorRotateI4(it, it->yuv_in_ + Y_OFF)); |
- if (score_i4 < best_score) { |
- VP8SetIntra4Mode(it, modes_i4); |
+ modes_i4[it->i4_] = best_i4_mode; |
+ score_i4 += best_i4_score; |
+ if (score_i4 >= best_score) { |
+ // Intra4 won't be better than Intra16. Bail out and pick Intra16. |
+ is_i16 = 1; |
+ break; |
+ } else { // reconstruct partial block inside yuv_out2_ buffer |
+ uint8_t* const tmp_dst = it->yuv_out2_ + Y_OFF_ENC + VP8Scan[it->i4_]; |
+ nz |= ReconstructIntra4(it, tmp_levels[it->i4_], |
+ src, tmp_dst, best_i4_mode) << it->i4_; |
+ } |
+ } while (VP8IteratorRotateI4(it, it->yuv_out2_ + Y_OFF_ENC)); |
+ } |
+ |
+ // Final reconstruction, depending on which mode is selected. |
+ if (!is_i16) { |
+ VP8SetIntra4Mode(it, modes_i4); |
+ memcpy(rd->y_ac_levels, tmp_levels, sizeof(tmp_levels)); |
+ SwapOut(it); |
+ best_score = score_i4; |
+ } else { |
+ nz = ReconstructIntra16(it, rd, it->yuv_out_ + Y_OFF_ENC, it->preds_[0]); |
+ } |
+ |
+ // ... and UV! |
+ if (refine_uv_mode) { |
+ int best_mode = -1; |
+ score_t best_uv_score = MAX_COST; |
+ const uint8_t* const src = it->yuv_in_ + U_OFF_ENC; |
+ for (mode = 0; mode < NUM_PRED_MODES; ++mode) { |
+ const uint8_t* const ref = it->yuv_p_ + VP8UVModeOffsets[mode]; |
+ const score_t score = VP8SSE16x8(src, ref); |
+ if (score < best_uv_score) { |
+ best_mode = mode; |
+ best_uv_score = score; |
+ } |
} |
+ VP8SetIntraUVMode(it, best_mode); |
} |
+ nz |= ReconstructUV(it, rd, it->yuv_out_ + U_OFF_ENC, it->mb_->uv_mode_); |
+ |
+ rd->nz = nz; |
+ rd->score = best_score; |
} |
//------------------------------------------------------------------------------ |
@@ -1158,13 +1241,13 @@ int VP8Decimate(VP8EncIterator* const it, VP8ModeScore* const rd, |
SimpleQuantize(it, rd); |
} |
} else { |
- // For method == 2, pick the best intra4/intra16 based on SSE (~tad slower). |
- // For method <= 1, we refine intra4 or intra16 (but don't re-examine mode). |
- DistoRefine(it, (method >= 2)); |
- SimpleQuantize(it, rd); |
+ // At this point we have heuristically decided intra16 / intra4. |
+ // For method >= 2, pick the best intra4/intra16 based on SSE (~tad slower). |
+ // For method <= 1, we don't re-examine the decision but just go ahead with |
+ // quantization/reconstruction. |
+ RefineUsingDistortion(it, (method >= 2), (method >= 1), rd); |
} |
is_skipped = (rd->nz == 0); |
VP8SetSkip(it, is_skipped); |
return is_skipped; |
} |
- |