| Index: source/libvpx/vp9/encoder/vp9_pickmode.c
|
| diff --git a/source/libvpx/vp9/encoder/vp9_pickmode.c b/source/libvpx/vp9/encoder/vp9_pickmode.c
|
| index 5acfcc51df0f97629c9fe285244effcc66a46717..c18687b557a8b22bfc622d4cedba5b8f6905fa64 100644
|
| --- a/source/libvpx/vp9/encoder/vp9_pickmode.c
|
| +++ b/source/libvpx/vp9/encoder/vp9_pickmode.c
|
| @@ -216,6 +216,8 @@ static void model_rd_for_sb_y(VP9_COMP *cpi, BLOCK_SIZE bsize,
|
| int64_t dist;
|
| struct macroblock_plane *const p = &x->plane[0];
|
| struct macroblockd_plane *const pd = &xd->plane[0];
|
| + const int64_t dc_thr = p->quant_thred[0] >> 6;
|
| + const int64_t ac_thr = p->quant_thred[1] >> 6;
|
| const uint32_t dc_quant = pd->dequant[0];
|
| const uint32_t ac_quant = pd->dequant[1];
|
| unsigned int var = cpi->fn_ptr[bsize].vf(p->src.buf, p->src.stride,
|
| @@ -223,12 +225,14 @@ static void model_rd_for_sb_y(VP9_COMP *cpi, BLOCK_SIZE bsize,
|
| *var_y = var;
|
| *sse_y = sse;
|
|
|
| - if (sse < dc_quant * dc_quant >> 6)
|
| - x->skip_txfm[0] = 1;
|
| - else if (var < ac_quant * ac_quant >> 6)
|
| + x->skip_txfm[0] = 0;
|
| + // Check if all ac coefficients can be quantized to zero.
|
| + if (var < ac_thr || var == 0) {
|
| x->skip_txfm[0] = 2;
|
| - else
|
| - x->skip_txfm[0] = 0;
|
| + // Check if dc coefficient can be quantized to zero.
|
| + if (sse - var < dc_thr || sse == var)
|
| + x->skip_txfm[0] = 1;
|
| + }
|
|
|
| if (cpi->common.tx_mode == TX_MODE_SELECT) {
|
| if (sse > (var << 2))
|
| @@ -238,9 +242,13 @@ static void model_rd_for_sb_y(VP9_COMP *cpi, BLOCK_SIZE bsize,
|
| else
|
| xd->mi[0].src_mi->mbmi.tx_size = TX_8X8;
|
|
|
| - if (cpi->sf.partition_search_type == VAR_BASED_PARTITION &&
|
| - xd->mi[0].src_mi->mbmi.tx_size > TX_16X16)
|
| - xd->mi[0].src_mi->mbmi.tx_size = TX_16X16;
|
| + if (cpi->sf.partition_search_type == VAR_BASED_PARTITION) {
|
| + if (cpi->oxcf.aq_mode == CYCLIC_REFRESH_AQ &&
|
| + xd->mi[0].src_mi->mbmi.segment_id != CR_SEGMENT_ID_BASE)
|
| + xd->mi[0].src_mi->mbmi.tx_size = TX_8X8;
|
| + else if (xd->mi[0].src_mi->mbmi.tx_size > TX_16X16)
|
| + xd->mi[0].src_mi->mbmi.tx_size = TX_16X16;
|
| + }
|
| } else {
|
| xd->mi[0].src_mi->mbmi.tx_size =
|
| MIN(max_txsize_lookup[bsize],
|
| @@ -283,6 +291,71 @@ static void model_rd_for_sb_y(VP9_COMP *cpi, BLOCK_SIZE bsize,
|
| x->skip_txfm[0] = 1;
|
| }
|
|
|
| +static void model_rd_for_sb_uv(VP9_COMP *cpi, BLOCK_SIZE bsize,
|
| + MACROBLOCK *x, MACROBLOCKD *xd,
|
| + int *out_rate_sum, int64_t *out_dist_sum,
|
| + unsigned int *var_y, unsigned int *sse_y) {
|
| + // Note our transform coeffs are 8 times an orthogonal transform.
|
| + // Hence quantizer step is also 8 times. To get effective quantizer
|
| + // we need to divide by 8 before sending to modeling function.
|
| + unsigned int sse;
|
| + int rate;
|
| + int64_t dist;
|
| + int i;
|
| +
|
| + *out_rate_sum = 0;
|
| + *out_dist_sum = 0;
|
| +
|
| + for (i = 1; i <= 2; ++i) {
|
| + struct macroblock_plane *const p = &x->plane[i];
|
| + struct macroblockd_plane *const pd = &xd->plane[i];
|
| + const uint32_t dc_quant = pd->dequant[0];
|
| + const uint32_t ac_quant = pd->dequant[1];
|
| + const BLOCK_SIZE bs = get_plane_block_size(bsize, pd);
|
| + unsigned int var;
|
| +
|
| + if (!x->color_sensitivity[i - 1])
|
| + continue;
|
| +
|
| + var = cpi->fn_ptr[bs].vf(p->src.buf, p->src.stride,
|
| + pd->dst.buf, pd->dst.stride, &sse);
|
| + *var_y += var;
|
| + *sse_y += sse;
|
| +
|
| + #if CONFIG_VP9_HIGHBITDEPTH
|
| + if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
|
| + vp9_model_rd_from_var_lapndz(sse - var, num_pels_log2_lookup[bs],
|
| + dc_quant >> (xd->bd - 5), &rate, &dist);
|
| + } else {
|
| + vp9_model_rd_from_var_lapndz(sse - var, num_pels_log2_lookup[bs],
|
| + dc_quant >> 3, &rate, &dist);
|
| + }
|
| + #else
|
| + vp9_model_rd_from_var_lapndz(sse - var, num_pels_log2_lookup[bs],
|
| + dc_quant >> 3, &rate, &dist);
|
| + #endif // CONFIG_VP9_HIGHBITDEPTH
|
| +
|
| + *out_rate_sum += rate >> 1;
|
| + *out_dist_sum += dist << 3;
|
| +
|
| + #if CONFIG_VP9_HIGHBITDEPTH
|
| + if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
|
| + vp9_model_rd_from_var_lapndz(var, num_pels_log2_lookup[bs],
|
| + ac_quant >> (xd->bd - 5), &rate, &dist);
|
| + } else {
|
| + vp9_model_rd_from_var_lapndz(var, num_pels_log2_lookup[bs],
|
| + ac_quant >> 3, &rate, &dist);
|
| + }
|
| + #else
|
| + vp9_model_rd_from_var_lapndz(var, num_pels_log2_lookup[bs],
|
| + ac_quant >> 3, &rate, &dist);
|
| + #endif // CONFIG_VP9_HIGHBITDEPTH
|
| +
|
| + *out_rate_sum += rate;
|
| + *out_dist_sum += dist << 4;
|
| + }
|
| +}
|
| +
|
| static int get_pred_buffer(PRED_BUFFER *p, int len) {
|
| int i;
|
|
|
| @@ -325,11 +398,11 @@ static void encode_breakout_test(VP9_COMP *cpi, MACROBLOCK *x,
|
| const unsigned int min_thresh =
|
| MIN(((unsigned int)x->encode_breakout << 4), max_thresh);
|
| #if CONFIG_VP9_HIGHBITDEPTH
|
| - const int shift = 2 * xd->bd - 16;
|
| + const int shift = (xd->bd << 1) - 16;
|
| #endif
|
|
|
| // Calculate threshold according to dequant value.
|
| - thresh_ac = (xd->plane[0].dequant[1] * xd->plane[0].dequant[1]) / 9;
|
| + thresh_ac = (xd->plane[0].dequant[1] * xd->plane[0].dequant[1]) >> 3;
|
| #if CONFIG_VP9_HIGHBITDEPTH
|
| if ((xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) && shift > 0) {
|
| thresh_ac = ROUND_POWER_OF_TWO(thresh_ac, shift);
|
| @@ -371,14 +444,14 @@ static void encode_breakout_test(VP9_COMP *cpi, MACROBLOCK *x,
|
| xd->plane[1].dst.stride, &sse_u);
|
|
|
| // U skipping condition checking
|
| - if ((var_u * 4 <= thresh_ac) && (sse_u - var_u <= thresh_dc)) {
|
| + if (((var_u << 2) <= thresh_ac) && (sse_u - var_u <= thresh_dc)) {
|
| var_v = cpi->fn_ptr[uv_size].vf(x->plane[2].src.buf,
|
| x->plane[2].src.stride,
|
| xd->plane[2].dst.buf,
|
| xd->plane[2].dst.stride, &sse_v);
|
|
|
| // V skipping condition checking
|
| - if ((var_v * 4 <= thresh_ac) && (sse_v - var_v <= thresh_dc)) {
|
| + if (((var_v << 2) <= thresh_ac) && (sse_v - var_v <= thresh_dc)) {
|
| x->skip = 1;
|
|
|
| // The cost of skip bit needs to be added.
|
| @@ -508,6 +581,13 @@ void vp9_pick_intra_mode(VP9_COMP *cpi, MACROBLOCK *x, RD_COST *rd_cost,
|
| *rd_cost = best_rdc;
|
| }
|
|
|
| +static const PREDICTION_MODE inter_mode_set[INTER_MODES] = {
|
| + ZEROMV, NEARESTMV, NEARMV, NEWMV,
|
| +};
|
| +
|
| +static const int ref_frame_cost[MAX_REF_FRAMES] = {
|
| + 1235, 229, 530, 615,
|
| +};
|
| // TODO(jingning) placeholder for inter-frame non-RD mode decision.
|
| // this needs various further optimizations. to be continued..
|
| void vp9_pick_inter_mode(VP9_COMP *cpi, MACROBLOCK *x,
|
| @@ -624,7 +704,7 @@ void vp9_pick_inter_mode(VP9_COMP *cpi, MACROBLOCK *x,
|
|
|
| if (cm->use_prev_frame_mvs)
|
| vp9_find_mv_refs(cm, xd, tile_info, xd->mi[0].src_mi, ref_frame,
|
| - candidates, mi_row, mi_col);
|
| + candidates, mi_row, mi_col, NULL, NULL);
|
| else
|
| const_motion[ref_frame] = mv_refs_rt(cm, xd, tile_info,
|
| xd->mi[0].src_mi,
|
| @@ -658,7 +738,8 @@ void vp9_pick_inter_mode(VP9_COMP *cpi, MACROBLOCK *x,
|
| continue;
|
|
|
| // Select prediction reference frames.
|
| - xd->plane[0].pre[0] = yv12_mb[ref_frame][0];
|
| + for (i = 0; i < MAX_MB_PLANE; i++)
|
| + xd->plane[i].pre[0] = yv12_mb[ref_frame][i];
|
|
|
| clamp_mv2(&frame_mv[NEARESTMV][ref_frame].as_mv, xd);
|
| clamp_mv2(&frame_mv[NEARMV][ref_frame].as_mv, xd);
|
| @@ -666,10 +747,12 @@ void vp9_pick_inter_mode(VP9_COMP *cpi, MACROBLOCK *x,
|
| mbmi->ref_frame[0] = ref_frame;
|
| set_ref_ptrs(cm, xd, ref_frame, NONE);
|
|
|
| - for (this_mode = NEARESTMV; this_mode <= NEWMV; ++this_mode) {
|
| + for (i = 0; i < INTER_MODES; ++i) {
|
| int rate_mv = 0;
|
| int mode_rd_thresh;
|
| - int mode_index = mode_idx[ref_frame][INTER_OFFSET(this_mode)];
|
| + int mode_index;
|
| + this_mode = inter_mode_set[i];
|
| + mode_index = mode_idx[ref_frame][INTER_OFFSET(this_mode)];
|
|
|
| if (const_motion[ref_frame] && this_mode == NEARMV)
|
| continue;
|
| @@ -735,9 +818,8 @@ void vp9_pick_inter_mode(VP9_COMP *cpi, MACROBLOCK *x,
|
| vp9_build_inter_predictors_sby(xd, mi_row, mi_col, bsize);
|
| model_rd_for_sb_y(cpi, bsize, x, xd, &pf_rate[filter],
|
| &pf_dist[filter], &pf_var[filter], &pf_sse[filter]);
|
| - cost = RDCOST(x->rdmult, x->rddiv,
|
| - vp9_get_switchable_rate(cpi, xd) + pf_rate[filter],
|
| - pf_dist[filter]);
|
| + pf_rate[filter] += vp9_get_switchable_rate(cpi, xd);
|
| + cost = RDCOST(x->rdmult, x->rddiv, pf_rate[filter], pf_dist[filter]);
|
| pf_tx_size[filter] = mbmi->tx_size;
|
| if (cost < best_cost) {
|
| best_filter = filter;
|
| @@ -774,11 +856,28 @@ void vp9_pick_inter_mode(VP9_COMP *cpi, MACROBLOCK *x,
|
| vp9_build_inter_predictors_sby(xd, mi_row, mi_col, bsize);
|
| model_rd_for_sb_y(cpi, bsize, x, xd, &this_rdc.rate, &this_rdc.dist,
|
| &var_y, &sse_y);
|
| + this_rdc.rate += cm->interp_filter == SWITCHABLE ?
|
| + vp9_get_switchable_rate(cpi, xd) : 0;
|
| + }
|
| +
|
| + // chroma component rate-distortion cost modeling
|
| + if (x->color_sensitivity[0] || x->color_sensitivity[1]) {
|
| + int uv_rate = 0;
|
| + int64_t uv_dist = 0;
|
| + if (x->color_sensitivity[0])
|
| + vp9_build_inter_predictors_sbp(xd, mi_row, mi_col, bsize, 1);
|
| + if (x->color_sensitivity[1])
|
| + vp9_build_inter_predictors_sbp(xd, mi_row, mi_col, bsize, 2);
|
| + model_rd_for_sb_uv(cpi, bsize, x, xd, &uv_rate, &uv_dist,
|
| + &var_y, &sse_y);
|
| + this_rdc.rate += uv_rate;
|
| + this_rdc.dist += uv_dist;
|
| }
|
|
|
| this_rdc.rate += rate_mv;
|
| this_rdc.rate += cpi->inter_mode_cost[mbmi->mode_context[ref_frame]]
|
| [INTER_OFFSET(this_mode)];
|
| + this_rdc.rate += ref_frame_cost[ref_frame];
|
| this_rdc.rdcost = RDCOST(x->rdmult, x->rddiv,
|
| this_rdc.rate, this_rdc.dist);
|
|
|
| @@ -885,6 +984,7 @@ void vp9_pick_inter_mode(VP9_COMP *cpi, MACROBLOCK *x,
|
| this_rdc.rate = args.rate;
|
| this_rdc.dist = args.dist;
|
| this_rdc.rate += cpi->mbmode_cost[this_mode];
|
| + this_rdc.rate += ref_frame_cost[INTRA_FRAME];
|
| this_rdc.rate += intra_cost_penalty;
|
| this_rdc.rdcost = RDCOST(x->rdmult, x->rddiv,
|
| this_rdc.rate, this_rdc.dist);
|
| @@ -988,7 +1088,7 @@ void vp9_pick_inter_mode_sub8x8(VP9_COMP *cpi, MACROBLOCK *x,
|
| vp9_setup_pred_block(xd, yv12_mb[ref_frame], yv12, mi_row, mi_col,
|
| sf, sf);
|
| vp9_find_mv_refs(cm, xd, tile_info, xd->mi[0].src_mi, ref_frame,
|
| - candidates, mi_row, mi_col);
|
| + candidates, mi_row, mi_col, NULL, NULL);
|
|
|
| vp9_find_best_ref_mvs(xd, cm->allow_high_precision_mv, candidates,
|
| &dummy_mv[0], &dummy_mv[1]);
|
| @@ -1038,8 +1138,6 @@ void vp9_pick_inter_mode_sub8x8(VP9_COMP *cpi, MACROBLOCK *x,
|
| int64_t b_best_rd = INT64_MAX;
|
| const int i = idy * 2 + idx;
|
| PREDICTION_MODE this_mode;
|
| - int b_rate = 0;
|
| - int64_t b_dist = 0;
|
| RD_COST this_rdc;
|
| unsigned int var_y, sse_y;
|
|
|
| @@ -1067,6 +1165,7 @@ void vp9_pick_inter_mode_sub8x8(VP9_COMP *cpi, MACROBLOCK *x,
|
| &b_mv[NEARMV]);
|
|
|
| for (this_mode = NEARESTMV; this_mode <= NEWMV; ++this_mode) {
|
| + int b_rate = 0;
|
| xd->mi[0].bmi[i].as_mv[0].as_int = b_mv[this_mode].as_int;
|
|
|
| if (this_mode == NEWMV) {
|
| @@ -1128,9 +1227,14 @@ void vp9_pick_inter_mode_sub8x8(VP9_COMP *cpi, MACROBLOCK *x,
|
| &x->pred_sse[ref_frame], NULL, 0, 0);
|
|
|
| xd->mi[0].bmi[i].as_mv[0].as_mv = tmp_mv;
|
| + } else {
|
| + b_rate += cpi->inter_mode_cost[mbmi->mode_context[ref_frame]]
|
| + [INTER_OFFSET(this_mode)];
|
| }
|
|
|
| - vp9_build_inter_predictor(pd->pre[0].buf, pd->pre[0].stride,
|
| +#if CONFIG_VP9_HIGHBITDEPTH
|
| + if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
|
| + vp9_highbd_build_inter_predictor(pd->pre[0].buf, pd->pre[0].stride,
|
| pd->dst.buf, pd->dst.stride,
|
| &xd->mi[0].bmi[i].as_mv[0].as_mv,
|
| &xd->block_refs[0]->sf,
|
| @@ -1139,12 +1243,28 @@ void vp9_pick_inter_mode_sub8x8(VP9_COMP *cpi, MACROBLOCK *x,
|
| vp9_get_interp_kernel(mbmi->interp_filter),
|
| MV_PRECISION_Q3,
|
| mi_col * MI_SIZE + 4 * (i & 0x01),
|
| - mi_row * MI_SIZE + 4 * (i >> 1));
|
| + mi_row * MI_SIZE + 4 * (i >> 1), xd->bd);
|
| + } else {
|
| +#endif
|
| + vp9_build_inter_predictor(pd->pre[0].buf, pd->pre[0].stride,
|
| + pd->dst.buf, pd->dst.stride,
|
| + &xd->mi[0].bmi[i].as_mv[0].as_mv,
|
| + &xd->block_refs[0]->sf,
|
| + 4 * num_4x4_blocks_wide,
|
| + 4 * num_4x4_blocks_high, 0,
|
| + vp9_get_interp_kernel(mbmi->interp_filter),
|
| + MV_PRECISION_Q3,
|
| + mi_col * MI_SIZE + 4 * (i & 0x01),
|
| + mi_row * MI_SIZE + 4 * (i >> 1));
|
| +
|
| +#if CONFIG_VP9_HIGHBITDEPTH
|
| + }
|
| +#endif
|
| +
|
| model_rd_for_sb_y(cpi, bsize, x, xd, &this_rdc.rate, &this_rdc.dist,
|
| &var_y, &sse_y);
|
|
|
| this_rdc.rate += b_rate;
|
| - this_rdc.dist += b_dist;
|
| this_rdc.rdcost = RDCOST(x->rdmult, x->rddiv,
|
| this_rdc.rate, this_rdc.dist);
|
| if (this_rdc.rdcost < b_best_rd) {
|
|
|