| Index: third_party/libwebp/enc/quant.c
|
| diff --git a/third_party/libwebp/enc/quant.c b/third_party/libwebp/enc/quant.c
|
| index 9130a41609baa781ca1a6633a56e2c0a2659fbe1..dd6885ab3724176ad232c706aa494b2a4311191d 100644
|
| --- a/third_party/libwebp/enc/quant.c
|
| +++ b/third_party/libwebp/enc/quant.c
|
| @@ -30,7 +30,7 @@
|
| #define SNS_TO_DQ 0.9 // Scaling constant between the sns value and the QP
|
| // power-law modulation. Must be strictly less than 1.
|
|
|
| -#define I4_PENALTY 4000 // Rate-penalty for quick i4/i16 decision
|
| +#define I4_PENALTY 14000 // Rate-penalty for quick i4/i16 decision
|
|
|
| // number of non-zero coeffs below which we consider the block very flat
|
| // (and apply a penalty to complex predictions)
|
| @@ -41,6 +41,8 @@
|
|
|
| #define MULT_8B(a, b) (((a) * (b) + 128) >> 8)
|
|
|
| +#define RD_DISTO_MULT 256 // distortion multiplier (equivalent of lambda)
|
| +
|
| // #define DEBUG_BLOCK
|
|
|
| //------------------------------------------------------------------------------
|
| @@ -54,15 +56,37 @@ static void PrintBlockInfo(const VP8EncIterator* const it,
|
| const VP8ModeScore* const rd) {
|
| int i, j;
|
| const int is_i16 = (it->mb_->type_ == 1);
|
| + const uint8_t* const y_in = it->yuv_in_ + Y_OFF_ENC;
|
| + const uint8_t* const y_out = it->yuv_out_ + Y_OFF_ENC;
|
| + const uint8_t* const uv_in = it->yuv_in_ + U_OFF_ENC;
|
| + const uint8_t* const uv_out = it->yuv_out_ + U_OFF_ENC;
|
| printf("SOURCE / OUTPUT / ABS DELTA\n");
|
| - for (j = 0; j < 24; ++j) {
|
| - if (j == 16) printf("\n"); // newline before the U/V block
|
| - for (i = 0; i < 16; ++i) printf("%3d ", it->yuv_in_[i + j * BPS]);
|
| + for (j = 0; j < 16; ++j) {
|
| + for (i = 0; i < 16; ++i) printf("%3d ", y_in[i + j * BPS]);
|
| printf(" ");
|
| - for (i = 0; i < 16; ++i) printf("%3d ", it->yuv_out_[i + j * BPS]);
|
| + for (i = 0; i < 16; ++i) printf("%3d ", y_out[i + j * BPS]);
|
| printf(" ");
|
| for (i = 0; i < 16; ++i) {
|
| - printf("%1d ", abs(it->yuv_out_[i + j * BPS] - it->yuv_in_[i + j * BPS]));
|
| + printf("%1d ", abs(y_in[i + j * BPS] - y_out[i + j * BPS]));
|
| + }
|
| + printf("\n");
|
| + }
|
| + printf("\n"); // newline before the U/V block
|
| + for (j = 0; j < 8; ++j) {
|
| + for (i = 0; i < 8; ++i) printf("%3d ", uv_in[i + j * BPS]);
|
| + printf(" ");
|
| + for (i = 8; i < 16; ++i) printf("%3d ", uv_in[i + j * BPS]);
|
| + printf(" ");
|
| + for (i = 0; i < 8; ++i) printf("%3d ", uv_out[i + j * BPS]);
|
| + printf(" ");
|
| + for (i = 8; i < 16; ++i) printf("%3d ", uv_out[i + j * BPS]);
|
| + printf(" ");
|
| + for (i = 0; i < 8; ++i) {
|
| + printf("%1d ", abs(uv_out[i + j * BPS] - uv_in[i + j * BPS]));
|
| + }
|
| + printf(" ");
|
| + for (i = 8; i < 16; ++i) {
|
| + printf("%1d ", abs(uv_out[i + j * BPS] - uv_in[i + j * BPS]));
|
| }
|
| printf("\n");
|
| }
|
| @@ -444,15 +468,12 @@ void VP8MakeIntra4Preds(const VP8EncIterator* const it) {
|
| // Quantize
|
|
|
| // Layout:
|
| -// +----+
|
| -// |YYYY| 0
|
| -// |YYYY| 4
|
| -// |YYYY| 8
|
| -// |YYYY| 12
|
| -// +----+
|
| -// |UUVV| 16
|
| -// |UUVV| 20
|
| -// +----+
|
| +// +----+----+
|
| +// |YYYY|UUVV| 0
|
| +// |YYYY|UUVV| 4
|
| +// |YYYY|....| 8
|
| +// |YYYY|....| 12
|
| +// +----+----+
|
|
|
| const int VP8Scan[16] = { // Luma
|
| 0 + 0 * BPS, 4 + 0 * BPS, 8 + 0 * BPS, 12 + 0 * BPS,
|
| @@ -538,13 +559,12 @@ typedef struct {
|
| #define SCORE_STATE(n, l) (score_states[n][(l) + MIN_DELTA])
|
|
|
| static WEBP_INLINE void SetRDScore(int lambda, VP8ModeScore* const rd) {
|
| - // TODO: incorporate the "* 256" in the tables?
|
| - rd->score = (rd->R + rd->H) * lambda + 256 * (rd->D + rd->SD);
|
| + rd->score = (rd->R + rd->H) * lambda + RD_DISTO_MULT * (rd->D + rd->SD);
|
| }
|
|
|
| static WEBP_INLINE score_t RDScoreTrellis(int lambda, score_t rate,
|
| score_t distortion) {
|
| - return rate * lambda + 256 * distortion;
|
| + return rate * lambda + RD_DISTO_MULT * distortion;
|
| }
|
|
|
| static int TrellisQuantizeBlock(const VP8Encoder* const enc,
|
| @@ -553,7 +573,8 @@ static int TrellisQuantizeBlock(const VP8Encoder* const enc,
|
| const VP8Matrix* const mtx,
|
| int lambda) {
|
| const ProbaArray* const probas = enc->proba_.coeffs_[coeff_type];
|
| - const CostArray* const costs = enc->proba_.level_cost_[coeff_type];
|
| + CostArrayPtr const costs =
|
| + (CostArrayPtr)enc->proba_.remapped_costs_[coeff_type];
|
| const int first = (coeff_type == 0) ? 1 : 0;
|
| Node nodes[16][NUM_NODES];
|
| ScoreState score_states[2][NUM_NODES];
|
| @@ -590,7 +611,7 @@ static int TrellisQuantizeBlock(const VP8Encoder* const enc,
|
| for (m = -MIN_DELTA; m <= MAX_DELTA; ++m) {
|
| const score_t rate = (ctx0 == 0) ? VP8BitCost(1, last_proba) : 0;
|
| ss_cur[m].score = RDScoreTrellis(lambda, rate, 0);
|
| - ss_cur[m].costs = costs[VP8EncBands[first]][ctx0];
|
| + ss_cur[m].costs = costs[first][ctx0];
|
| }
|
| }
|
|
|
| @@ -624,7 +645,7 @@ static int TrellisQuantizeBlock(const VP8Encoder* const enc,
|
| int best_prev = 0; // default, in case
|
|
|
| ss_cur[m].score = MAX_COST;
|
| - ss_cur[m].costs = costs[band][ctx];
|
| + ss_cur[m].costs = costs[n + 1][ctx];
|
| if (level > MAX_LEVEL || level < 0) { // node is dead?
|
| continue;
|
| }
|
| @@ -719,14 +740,14 @@ static int ReconstructIntra16(VP8EncIterator* const it,
|
| int mode) {
|
| const VP8Encoder* const enc = it->enc_;
|
| const uint8_t* const ref = it->yuv_p_ + VP8I16ModeOffsets[mode];
|
| - const uint8_t* const src = it->yuv_in_ + Y_OFF;
|
| + const uint8_t* const src = it->yuv_in_ + Y_OFF_ENC;
|
| const VP8SegmentInfo* const dqm = &enc->dqm_[it->mb_->segment_];
|
| int nz = 0;
|
| int n;
|
| int16_t tmp[16][16], dc_tmp[16];
|
|
|
| - for (n = 0; n < 16; ++n) {
|
| - VP8FTransform(src + VP8Scan[n], ref + VP8Scan[n], tmp[n]);
|
| + for (n = 0; n < 16; n += 2) {
|
| + VP8FTransform2(src + VP8Scan[n], ref + VP8Scan[n], tmp[n]);
|
| }
|
| VP8FTransformWHT(tmp[0], dc_tmp);
|
| nz |= VP8EncQuantizeBlockWHT(dc_tmp, rd->y_dc_levels, &dqm->y2_) << 24;
|
| @@ -746,12 +767,13 @@ static int ReconstructIntra16(VP8EncIterator* const it,
|
| }
|
| }
|
| } else {
|
| - for (n = 0; n < 16; ++n) {
|
| + for (n = 0; n < 16; n += 2) {
|
| // Zero-out the first coeff, so that: a) nz is correct below, and
|
| // b) finding 'last' non-zero coeffs in SetResidualCoeffs() is simplified.
|
| - tmp[n][0] = 0;
|
| - nz |= VP8EncQuantizeBlock(tmp[n], rd->y_ac_levels[n], &dqm->y1_) << n;
|
| - assert(rd->y_ac_levels[n][0] == 0);
|
| + tmp[n][0] = tmp[n + 1][0] = 0;
|
| + nz |= VP8EncQuantize2Blocks(tmp[n], rd->y_ac_levels[n], &dqm->y1_) << n;
|
| + assert(rd->y_ac_levels[n + 0][0] == 0);
|
| + assert(rd->y_ac_levels[n + 1][0] == 0);
|
| }
|
| }
|
|
|
| @@ -792,14 +814,14 @@ static int ReconstructUV(VP8EncIterator* const it, VP8ModeScore* const rd,
|
| uint8_t* const yuv_out, int mode) {
|
| const VP8Encoder* const enc = it->enc_;
|
| const uint8_t* const ref = it->yuv_p_ + VP8UVModeOffsets[mode];
|
| - const uint8_t* const src = it->yuv_in_ + U_OFF;
|
| + const uint8_t* const src = it->yuv_in_ + U_OFF_ENC;
|
| const VP8SegmentInfo* const dqm = &enc->dqm_[it->mb_->segment_];
|
| int nz = 0;
|
| int n;
|
| int16_t tmp[8][16];
|
|
|
| - for (n = 0; n < 8; ++n) {
|
| - VP8FTransform(src + VP8ScanUV[n], ref + VP8ScanUV[n], tmp[n]);
|
| + for (n = 0; n < 8; n += 2) {
|
| + VP8FTransform2(src + VP8ScanUV[n], ref + VP8ScanUV[n], tmp[n]);
|
| }
|
| if (DO_TRELLIS_UV && it->do_trellis_) {
|
| int ch, x, y;
|
| @@ -816,8 +838,8 @@ static int ReconstructUV(VP8EncIterator* const it, VP8ModeScore* const rd,
|
| }
|
| }
|
| } else {
|
| - for (n = 0; n < 8; ++n) {
|
| - nz |= VP8EncQuantizeBlock(tmp[n], rd->uv_levels[n], &dqm->uv_) << n;
|
| + for (n = 0; n < 8; n += 2) {
|
| + nz |= VP8EncQuantize2Blocks(tmp[n], rd->uv_levels[n], &dqm->uv_) << n;
|
| }
|
| }
|
|
|
| @@ -842,6 +864,12 @@ static void StoreMaxDelta(VP8SegmentInfo* const dqm, const int16_t DCs[16]) {
|
| if (max_v > dqm->max_edge_) dqm->max_edge_ = max_v;
|
| }
|
|
|
| +static void SwapModeScore(VP8ModeScore** a, VP8ModeScore** b) {
|
| + VP8ModeScore* const tmp = *a;
|
| + *a = *b;
|
| + *b = tmp;
|
| +}
|
| +
|
| static void SwapPtr(uint8_t** a, uint8_t** b) {
|
| uint8_t* const tmp = *a;
|
| *a = *b;
|
| @@ -865,46 +893,47 @@ static score_t IsFlat(const int16_t* levels, int num_blocks, score_t thresh) {
|
| return 1;
|
| }
|
|
|
| -static void PickBestIntra16(VP8EncIterator* const it, VP8ModeScore* const rd) {
|
| +static void PickBestIntra16(VP8EncIterator* const it, VP8ModeScore* rd) {
|
| const int kNumBlocks = 16;
|
| VP8SegmentInfo* const dqm = &it->enc_->dqm_[it->mb_->segment_];
|
| const int lambda = dqm->lambda_i16_;
|
| const int tlambda = dqm->tlambda_;
|
| - const uint8_t* const src = it->yuv_in_ + Y_OFF;
|
| - VP8ModeScore rd16;
|
| + const uint8_t* const src = it->yuv_in_ + Y_OFF_ENC;
|
| + VP8ModeScore rd_tmp;
|
| + VP8ModeScore* rd_cur = &rd_tmp;
|
| + VP8ModeScore* rd_best = rd;
|
| int mode;
|
|
|
| rd->mode_i16 = -1;
|
| for (mode = 0; mode < NUM_PRED_MODES; ++mode) {
|
| - uint8_t* const tmp_dst = it->yuv_out2_ + Y_OFF; // scratch buffer
|
| - int nz;
|
| + uint8_t* const tmp_dst = it->yuv_out2_ + Y_OFF_ENC; // scratch buffer
|
| + rd_cur->mode_i16 = mode;
|
|
|
| // Reconstruct
|
| - nz = ReconstructIntra16(it, &rd16, tmp_dst, mode);
|
| + rd_cur->nz = ReconstructIntra16(it, rd_cur, tmp_dst, mode);
|
|
|
| // Measure RD-score
|
| - rd16.D = VP8SSE16x16(src, tmp_dst);
|
| - rd16.SD = tlambda ? MULT_8B(tlambda, VP8TDisto16x16(src, tmp_dst, kWeightY))
|
| - : 0;
|
| - rd16.H = VP8FixedCostsI16[mode];
|
| - rd16.R = VP8GetCostLuma16(it, &rd16);
|
| + rd_cur->D = VP8SSE16x16(src, tmp_dst);
|
| + rd_cur->SD =
|
| + tlambda ? MULT_8B(tlambda, VP8TDisto16x16(src, tmp_dst, kWeightY)) : 0;
|
| + rd_cur->H = VP8FixedCostsI16[mode];
|
| + rd_cur->R = VP8GetCostLuma16(it, rd_cur);
|
| if (mode > 0 &&
|
| - IsFlat(rd16.y_ac_levels[0], kNumBlocks, FLATNESS_LIMIT_I16)) {
|
| + IsFlat(rd_cur->y_ac_levels[0], kNumBlocks, FLATNESS_LIMIT_I16)) {
|
| // penalty to avoid flat area to be mispredicted by complex mode
|
| - rd16.R += FLATNESS_PENALTY * kNumBlocks;
|
| + rd_cur->R += FLATNESS_PENALTY * kNumBlocks;
|
| }
|
|
|
| // Since we always examine Intra16 first, we can overwrite *rd directly.
|
| - SetRDScore(lambda, &rd16);
|
| - if (mode == 0 || rd16.score < rd->score) {
|
| - CopyScore(rd, &rd16);
|
| - rd->mode_i16 = mode;
|
| - rd->nz = nz;
|
| - memcpy(rd->y_ac_levels, rd16.y_ac_levels, sizeof(rd16.y_ac_levels));
|
| - memcpy(rd->y_dc_levels, rd16.y_dc_levels, sizeof(rd16.y_dc_levels));
|
| + SetRDScore(lambda, rd_cur);
|
| + if (mode == 0 || rd_cur->score < rd_best->score) {
|
| + SwapModeScore(&rd_cur, &rd_best);
|
| SwapOut(it);
|
| }
|
| }
|
| + if (rd_best != rd) {
|
| + memcpy(rd, rd_best, sizeof(*rd));
|
| + }
|
| SetRDScore(dqm->lambda_mode_, rd); // finalize score for mode decision.
|
| VP8SetIntra16Mode(it, rd->mode_i16);
|
|
|
| @@ -933,8 +962,8 @@ static int PickBestIntra4(VP8EncIterator* const it, VP8ModeScore* const rd) {
|
| const VP8SegmentInfo* const dqm = &enc->dqm_[it->mb_->segment_];
|
| const int lambda = dqm->lambda_i4_;
|
| const int tlambda = dqm->tlambda_;
|
| - const uint8_t* const src0 = it->yuv_in_ + Y_OFF;
|
| - uint8_t* const best_blocks = it->yuv_out2_ + Y_OFF;
|
| + const uint8_t* const src0 = it->yuv_in_ + Y_OFF_ENC;
|
| + uint8_t* const best_blocks = it->yuv_out2_ + Y_OFF_ENC;
|
| int total_header_bits = 0;
|
| VP8ModeScore rd_best;
|
|
|
| @@ -972,17 +1001,28 @@ static int PickBestIntra4(VP8EncIterator* const it, VP8ModeScore* const rd) {
|
| tlambda ? MULT_8B(tlambda, VP8TDisto4x4(src, tmp_dst, kWeightY))
|
| : 0;
|
| rd_tmp.H = mode_costs[mode];
|
| - rd_tmp.R = VP8GetCostLuma4(it, tmp_levels);
|
| +
|
| + // Add flatness penalty
|
| if (mode > 0 && IsFlat(tmp_levels, kNumBlocks, FLATNESS_LIMIT_I4)) {
|
| - rd_tmp.R += FLATNESS_PENALTY * kNumBlocks;
|
| + rd_tmp.R = FLATNESS_PENALTY * kNumBlocks;
|
| + } else {
|
| + rd_tmp.R = 0;
|
| }
|
|
|
| + // early-out check
|
| SetRDScore(lambda, &rd_tmp);
|
| + if (best_mode >= 0 && rd_tmp.score >= rd_i4.score) continue;
|
| +
|
| + // finish computing score
|
| + rd_tmp.R += VP8GetCostLuma4(it, tmp_levels);
|
| + SetRDScore(lambda, &rd_tmp);
|
| +
|
| if (best_mode < 0 || rd_tmp.score < rd_i4.score) {
|
| CopyScore(&rd_i4, &rd_tmp);
|
| best_mode = mode;
|
| SwapPtr(&tmp_dst, &best_block);
|
| - memcpy(rd_best.y_ac_levels[it->i4_], tmp_levels, sizeof(tmp_levels));
|
| + memcpy(rd_best.y_ac_levels[it->i4_], tmp_levels,
|
| + sizeof(rd_best.y_ac_levels[it->i4_]));
|
| }
|
| }
|
| SetRDScore(dqm->lambda_mode_, &rd_i4);
|
| @@ -1016,9 +1056,10 @@ static void PickBestUV(VP8EncIterator* const it, VP8ModeScore* const rd) {
|
| const int kNumBlocks = 8;
|
| const VP8SegmentInfo* const dqm = &it->enc_->dqm_[it->mb_->segment_];
|
| const int lambda = dqm->lambda_uv_;
|
| - const uint8_t* const src = it->yuv_in_ + U_OFF;
|
| - uint8_t* const tmp_dst = it->yuv_out2_ + U_OFF; // scratch buffer
|
| - uint8_t* const dst0 = it->yuv_out_ + U_OFF;
|
| + const uint8_t* const src = it->yuv_in_ + U_OFF_ENC;
|
| + uint8_t* tmp_dst = it->yuv_out2_ + U_OFF_ENC; // scratch buffer
|
| + uint8_t* dst0 = it->yuv_out_ + U_OFF_ENC;
|
| + uint8_t* dst = dst0;
|
| VP8ModeScore rd_best;
|
| int mode;
|
|
|
| @@ -1032,7 +1073,7 @@ static void PickBestUV(VP8EncIterator* const it, VP8ModeScore* const rd) {
|
|
|
| // Compute RD-score
|
| rd_uv.D = VP8SSE16x8(src, tmp_dst);
|
| - rd_uv.SD = 0; // TODO: should we call TDisto? it tends to flatten areas.
|
| + rd_uv.SD = 0; // not calling TDisto here: it tends to flatten areas.
|
| rd_uv.H = VP8FixedCostsUV[mode];
|
| rd_uv.R = VP8GetCostUV(it, &rd_uv);
|
| if (mode > 0 && IsFlat(rd_uv.uv_levels[0], kNumBlocks, FLATNESS_LIMIT_UV)) {
|
| @@ -1044,11 +1085,14 @@ static void PickBestUV(VP8EncIterator* const it, VP8ModeScore* const rd) {
|
| CopyScore(&rd_best, &rd_uv);
|
| rd->mode_uv = mode;
|
| memcpy(rd->uv_levels, rd_uv.uv_levels, sizeof(rd->uv_levels));
|
| - memcpy(dst0, tmp_dst, UV_SIZE); // TODO: SwapUVOut() ?
|
| + SwapPtr(&dst, &tmp_dst);
|
| }
|
| }
|
| VP8SetIntraUVMode(it, rd->mode_uv);
|
| AddScore(rd, &rd_best);
|
| + if (dst != dst0) { // copy 16x8 block if needed
|
| + VP8Copy16x8(dst, dst0);
|
| + }
|
| }
|
|
|
| //------------------------------------------------------------------------------
|
| @@ -1060,35 +1104,41 @@ static void SimpleQuantize(VP8EncIterator* const it, VP8ModeScore* const rd) {
|
| int nz = 0;
|
|
|
| if (is_i16) {
|
| - nz = ReconstructIntra16(it, rd, it->yuv_out_ + Y_OFF, it->preds_[0]);
|
| + nz = ReconstructIntra16(it, rd, it->yuv_out_ + Y_OFF_ENC, it->preds_[0]);
|
| } else {
|
| VP8IteratorStartI4(it);
|
| do {
|
| const int mode =
|
| it->preds_[(it->i4_ & 3) + (it->i4_ >> 2) * enc->preds_w_];
|
| - const uint8_t* const src = it->yuv_in_ + Y_OFF + VP8Scan[it->i4_];
|
| - uint8_t* const dst = it->yuv_out_ + Y_OFF + VP8Scan[it->i4_];
|
| + const uint8_t* const src = it->yuv_in_ + Y_OFF_ENC + VP8Scan[it->i4_];
|
| + uint8_t* const dst = it->yuv_out_ + Y_OFF_ENC + VP8Scan[it->i4_];
|
| VP8MakeIntra4Preds(it);
|
| nz |= ReconstructIntra4(it, rd->y_ac_levels[it->i4_],
|
| src, dst, mode) << it->i4_;
|
| - } while (VP8IteratorRotateI4(it, it->yuv_out_ + Y_OFF));
|
| + } while (VP8IteratorRotateI4(it, it->yuv_out_ + Y_OFF_ENC));
|
| }
|
|
|
| - nz |= ReconstructUV(it, rd, it->yuv_out_ + U_OFF, it->mb_->uv_mode_);
|
| + nz |= ReconstructUV(it, rd, it->yuv_out_ + U_OFF_ENC, it->mb_->uv_mode_);
|
| rd->nz = nz;
|
| }
|
|
|
| // Refine intra16/intra4 sub-modes based on distortion only (not rate).
|
| -static void DistoRefine(VP8EncIterator* const it, int try_both_i4_i16) {
|
| - const int is_i16 = (it->mb_->type_ == 1);
|
| +static void RefineUsingDistortion(VP8EncIterator* const it,
|
| + int try_both_modes, int refine_uv_mode,
|
| + VP8ModeScore* const rd) {
|
| score_t best_score = MAX_COST;
|
| + score_t score_i4 = (score_t)I4_PENALTY;
|
| + int16_t tmp_levels[16][16];
|
| + uint8_t modes_i4[16];
|
| + int nz = 0;
|
| + int mode;
|
| + int is_i16 = try_both_modes || (it->mb_->type_ == 1);
|
|
|
| - if (try_both_i4_i16 || is_i16) {
|
| - int mode;
|
| + if (is_i16) { // First, evaluate Intra16 distortion
|
| int best_mode = -1;
|
| + const uint8_t* const src = it->yuv_in_ + Y_OFF_ENC;
|
| for (mode = 0; mode < NUM_PRED_MODES; ++mode) {
|
| const uint8_t* const ref = it->yuv_p_ + VP8I16ModeOffsets[mode];
|
| - const uint8_t* const src = it->yuv_in_ + Y_OFF;
|
| const score_t score = VP8SSE16x16(src, ref);
|
| if (score < best_score) {
|
| best_mode = mode;
|
| @@ -1096,39 +1146,72 @@ static void DistoRefine(VP8EncIterator* const it, int try_both_i4_i16) {
|
| }
|
| }
|
| VP8SetIntra16Mode(it, best_mode);
|
| + // we'll reconstruct later, if i16 mode actually gets selected
|
| }
|
| - if (try_both_i4_i16 || !is_i16) {
|
| - uint8_t modes_i4[16];
|
| +
|
| + // Next, evaluate Intra4
|
| + if (try_both_modes || !is_i16) {
|
| // We don't evaluate the rate here, but just account for it through a
|
| // constant penalty (i4 mode usually needs more bits compared to i16).
|
| - score_t score_i4 = (score_t)I4_PENALTY;
|
| -
|
| + is_i16 = 0;
|
| VP8IteratorStartI4(it);
|
| do {
|
| - int mode;
|
| - int best_sub_mode = -1;
|
| - score_t best_sub_score = MAX_COST;
|
| - const uint8_t* const src = it->yuv_in_ + Y_OFF + VP8Scan[it->i4_];
|
| + int best_i4_mode = -1;
|
| + score_t best_i4_score = MAX_COST;
|
| + const uint8_t* const src = it->yuv_in_ + Y_OFF_ENC + VP8Scan[it->i4_];
|
|
|
| - // TODO(skal): we don't really need the prediction pixels here,
|
| - // but just the distortion against 'src'.
|
| VP8MakeIntra4Preds(it);
|
| for (mode = 0; mode < NUM_BMODES; ++mode) {
|
| const uint8_t* const ref = it->yuv_p_ + VP8I4ModeOffsets[mode];
|
| const score_t score = VP8SSE4x4(src, ref);
|
| - if (score < best_sub_score) {
|
| - best_sub_mode = mode;
|
| - best_sub_score = score;
|
| + if (score < best_i4_score) {
|
| + best_i4_mode = mode;
|
| + best_i4_score = score;
|
| }
|
| }
|
| - modes_i4[it->i4_] = best_sub_mode;
|
| - score_i4 += best_sub_score;
|
| - if (score_i4 >= best_score) break;
|
| - } while (VP8IteratorRotateI4(it, it->yuv_in_ + Y_OFF));
|
| - if (score_i4 < best_score) {
|
| - VP8SetIntra4Mode(it, modes_i4);
|
| + modes_i4[it->i4_] = best_i4_mode;
|
| + score_i4 += best_i4_score;
|
| + if (score_i4 >= best_score) {
|
| + // Intra4 won't be better than Intra16. Bail out and pick Intra16.
|
| + is_i16 = 1;
|
| + break;
|
| + } else { // reconstruct partial block inside yuv_out2_ buffer
|
| + uint8_t* const tmp_dst = it->yuv_out2_ + Y_OFF_ENC + VP8Scan[it->i4_];
|
| + nz |= ReconstructIntra4(it, tmp_levels[it->i4_],
|
| + src, tmp_dst, best_i4_mode) << it->i4_;
|
| + }
|
| + } while (VP8IteratorRotateI4(it, it->yuv_out2_ + Y_OFF_ENC));
|
| + }
|
| +
|
| + // Final reconstruction, depending on which mode is selected.
|
| + if (!is_i16) {
|
| + VP8SetIntra4Mode(it, modes_i4);
|
| + memcpy(rd->y_ac_levels, tmp_levels, sizeof(tmp_levels));
|
| + SwapOut(it);
|
| + best_score = score_i4;
|
| + } else {
|
| + nz = ReconstructIntra16(it, rd, it->yuv_out_ + Y_OFF_ENC, it->preds_[0]);
|
| + }
|
| +
|
| + // ... and UV!
|
| + if (refine_uv_mode) {
|
| + int best_mode = -1;
|
| + score_t best_uv_score = MAX_COST;
|
| + const uint8_t* const src = it->yuv_in_ + U_OFF_ENC;
|
| + for (mode = 0; mode < NUM_PRED_MODES; ++mode) {
|
| + const uint8_t* const ref = it->yuv_p_ + VP8UVModeOffsets[mode];
|
| + const score_t score = VP8SSE16x8(src, ref);
|
| + if (score < best_uv_score) {
|
| + best_mode = mode;
|
| + best_uv_score = score;
|
| + }
|
| }
|
| + VP8SetIntraUVMode(it, best_mode);
|
| }
|
| + nz |= ReconstructUV(it, rd, it->yuv_out_ + U_OFF_ENC, it->mb_->uv_mode_);
|
| +
|
| + rd->nz = nz;
|
| + rd->score = best_score;
|
| }
|
|
|
| //------------------------------------------------------------------------------
|
| @@ -1158,13 +1241,13 @@ int VP8Decimate(VP8EncIterator* const it, VP8ModeScore* const rd,
|
| SimpleQuantize(it, rd);
|
| }
|
| } else {
|
| - // For method == 2, pick the best intra4/intra16 based on SSE (~tad slower).
|
| - // For method <= 1, we refine intra4 or intra16 (but don't re-examine mode).
|
| - DistoRefine(it, (method >= 2));
|
| - SimpleQuantize(it, rd);
|
| + // At this point we have heuristically decided intra16 / intra4.
|
| + // For method >= 2, pick the best intra4/intra16 based on SSE (~tad slower).
|
| + // For method <= 1, we don't re-examine the decision but just go ahead with
|
| + // quantization/reconstruction.
|
| + RefineUsingDistortion(it, (method >= 2), (method >= 1), rd);
|
| }
|
| is_skipped = (rd->nz == 0);
|
| VP8SetSkip(it, is_skipped);
|
| return is_skipped;
|
| }
|
| -
|
|
|