| Index: source/libvpx/vp9/encoder/vp9_rdopt.c
|
| ===================================================================
|
| --- source/libvpx/vp9/encoder/vp9_rdopt.c (revision 292608)
|
| +++ source/libvpx/vp9/encoder/vp9_rdopt.c (working copy)
|
| @@ -131,7 +131,7 @@
|
|
|
| static int raster_block_offset(BLOCK_SIZE plane_bsize,
|
| int raster_block, int stride) {
|
| - const int bw = b_width_log2(plane_bsize);
|
| + const int bw = b_width_log2_lookup[plane_bsize];
|
| const int y = 4 * (raster_block >> bw);
|
| const int x = 4 * (raster_block & ((1 << bw) - 1));
|
| return y * stride + x;
|
| @@ -169,7 +169,8 @@
|
|
|
| static void model_rd_for_sb(VP9_COMP *cpi, BLOCK_SIZE bsize,
|
| MACROBLOCK *x, MACROBLOCKD *xd,
|
| - int *out_rate_sum, int64_t *out_dist_sum) {
|
| + int *out_rate_sum, int64_t *out_dist_sum,
|
| + int *skip_txfm_sb, int64_t *skip_sse_sb) {
|
| // Note our transform coeffs are 8 times an orthogonal transform.
|
| // Hence quantizer step is also 8 times. To get effective quantizer
|
| // we need to divide by 8 before sending to modeling function.
|
| @@ -180,7 +181,9 @@
|
| unsigned int sse;
|
| unsigned int var = 0;
|
| unsigned int sum_sse = 0;
|
| - const int shift = 8;
|
| + int64_t total_sse = 0;
|
| + int skip_flag = 1;
|
| + const int shift = 6;
|
| int rate;
|
| int64_t dist;
|
|
|
| @@ -192,6 +195,12 @@
|
| const BLOCK_SIZE bs = get_plane_block_size(bsize, pd);
|
| const TX_SIZE max_tx_size = max_txsize_lookup[bs];
|
| const BLOCK_SIZE unit_size = txsize_to_bsize[max_tx_size];
|
| + const int64_t dc_thr = p->quant_thred[0] >> shift;
|
| + const int64_t ac_thr = p->quant_thred[1] >> shift;
|
| + // The low thresholds are used to measure if the prediction errors are
|
| + // low enough so that we can skip the mode search.
|
| + const int64_t low_dc_thr = MIN(50, dc_thr >> 2);
|
| + const int64_t low_ac_thr = MIN(80, ac_thr >> 2);
|
| int bw = 1 << (b_width_log2_lookup[bs] - b_width_log2_lookup[unit_size]);
|
| int bh = 1 << (b_height_log2_lookup[bs] - b_width_log2_lookup[unit_size]);
|
| int idx, idy;
|
| @@ -205,6 +214,7 @@
|
| uint8_t *src = p->src.buf + (idy * p->src.stride << lh) + (idx << lw);
|
| uint8_t *dst = pd->dst.buf + (idy * pd->dst.stride << lh) + (idx << lh);
|
| int block_idx = (idy << 1) + idx;
|
| + int low_err_skip = 0;
|
|
|
| var = cpi->fn_ptr[unit_size].vf(src, p->src.stride,
|
| dst, pd->dst.stride, &sse);
|
| @@ -211,26 +221,42 @@
|
| x->bsse[(i << 2) + block_idx] = sse;
|
| sum_sse += sse;
|
|
|
| + x->skip_txfm[(i << 2) + block_idx] = 0;
|
| if (!x->select_tx_size) {
|
| - if (x->bsse[(i << 2) + block_idx] < p->quant_thred[0] >> shift)
|
| - x->skip_txfm[(i << 2) + block_idx] = 1;
|
| - else if (var < p->quant_thred[1] >> shift)
|
| + // Check if all ac coefficients can be quantized to zero.
|
| + if (var < ac_thr || var == 0) {
|
| x->skip_txfm[(i << 2) + block_idx] = 2;
|
| - else
|
| - x->skip_txfm[(i << 2) + block_idx] = 0;
|
| +
|
| + // Check if dc coefficient can be quantized to zero.
|
| + if (sse - var < dc_thr || sse == var) {
|
| + x->skip_txfm[(i << 2) + block_idx] = 1;
|
| +
|
| + if (!sse || (var < low_ac_thr && sse - var < low_dc_thr))
|
| + low_err_skip = 1;
|
| + }
|
| + }
|
| }
|
|
|
| + if (skip_flag && !low_err_skip)
|
| + skip_flag = 0;
|
| +
|
| if (i == 0)
|
| x->pred_sse[ref] += sse;
|
| }
|
| }
|
|
|
| + total_sse += sum_sse;
|
| +
|
| // Fast approximate the modelling function.
|
| if (cpi->oxcf.speed > 4) {
|
| int64_t rate;
|
| - int64_t dist;
|
| - int64_t square_error = sse;
|
| + const int64_t square_error = sum_sse;
|
| int quantizer = (pd->dequant[1] >> 3);
|
| +#if CONFIG_VP9_HIGHBITDEPTH
|
| + if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
|
| + quantizer >>= (xd->bd - 8);
|
| + }
|
| +#endif // CONFIG_VP9_HIGHBITDEPTH
|
|
|
| if (quantizer < 120)
|
| rate = (square_error * (280 - quantizer)) >> 8;
|
| @@ -240,13 +266,26 @@
|
| rate_sum += rate;
|
| dist_sum += dist;
|
| } else {
|
| +#if CONFIG_VP9_HIGHBITDEPTH
|
| + if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
|
| + vp9_model_rd_from_var_lapndz(sum_sse, 1 << num_pels_log2_lookup[bs],
|
| + pd->dequant[1] >> (xd->bd - 5),
|
| + &rate, &dist);
|
| + } else {
|
| + vp9_model_rd_from_var_lapndz(sum_sse, 1 << num_pels_log2_lookup[bs],
|
| + pd->dequant[1] >> 3, &rate, &dist);
|
| + }
|
| +#else
|
| vp9_model_rd_from_var_lapndz(sum_sse, 1 << num_pels_log2_lookup[bs],
|
| pd->dequant[1] >> 3, &rate, &dist);
|
| +#endif // CONFIG_VP9_HIGHBITDEPTH
|
| rate_sum += rate;
|
| dist_sum += dist;
|
| }
|
| }
|
|
|
| + *skip_txfm_sb = skip_flag;
|
| + *skip_sse_sb = total_sse << 4;
|
| *out_rate_sum = (int)rate_sum;
|
| *out_dist_sum = dist_sum << 4;
|
| }
|
| @@ -266,6 +305,31 @@
|
| return error;
|
| }
|
|
|
| +
|
| +#if CONFIG_VP9_HIGHBITDEPTH
|
| +int64_t vp9_highbd_block_error_c(const tran_low_t *coeff,
|
| + const tran_low_t *dqcoeff,
|
| + intptr_t block_size,
|
| + int64_t *ssz, int bd) {
|
| + int i;
|
| + int64_t error = 0, sqcoeff = 0;
|
| + int shift = 2 * (bd - 8);
|
| + int rounding = shift > 0 ? 1 << (shift - 1) : 0;
|
| +
|
| + for (i = 0; i < block_size; i++) {
|
| + const int64_t diff = coeff[i] - dqcoeff[i];
|
| + error += diff * diff;
|
| + sqcoeff += (int64_t)coeff[i] * (int64_t)coeff[i];
|
| + }
|
| + assert(error >= 0 && sqcoeff >= 0);
|
| + error = (error + rounding) >> shift;
|
| + sqcoeff = (sqcoeff + rounding) >> shift;
|
| +
|
| + *ssz = sqcoeff;
|
| + return error;
|
| +}
|
| +#endif // CONFIG_VP9_HIGHBITDEPTH
|
| +
|
| /* The trailing '0' is a terminator which is used inside cost_coeffs() to
|
| * decide whether to include cost of a trailing EOB node or not (i.e. we
|
| * can skip this if the last coefficient in this transform block, e.g. the
|
| @@ -351,8 +415,14 @@
|
|
|
| return cost;
|
| }
|
| +
|
| +#if CONFIG_VP9_HIGHBITDEPTH
|
| static void dist_block(int plane, int block, TX_SIZE tx_size,
|
| + struct rdcost_block_args* args, int bd) {
|
| +#else
|
| +static void dist_block(int plane, int block, TX_SIZE tx_size,
|
| struct rdcost_block_args* args) {
|
| +#endif // CONFIG_VP9_HIGHBITDEPTH
|
| const int ss_txfrm_size = tx_size << 1;
|
| MACROBLOCK* const x = args->x;
|
| MACROBLOCKD* const xd = &x->e_mbd;
|
| @@ -362,8 +432,13 @@
|
| int shift = tx_size == TX_32X32 ? 0 : 2;
|
| tran_low_t *const coeff = BLOCK_OFFSET(p->coeff, block);
|
| tran_low_t *const dqcoeff = BLOCK_OFFSET(pd->dqcoeff, block);
|
| +#if CONFIG_VP9_HIGHBITDEPTH
|
| + args->dist = vp9_highbd_block_error(coeff, dqcoeff, 16 << ss_txfrm_size,
|
| + &this_sse, bd) >> shift;
|
| +#else
|
| args->dist = vp9_block_error(coeff, dqcoeff, 16 << ss_txfrm_size,
|
| &this_sse) >> shift;
|
| +#endif // CONFIG_VP9_HIGHBITDEPTH
|
| args->sse = this_sse >> shift;
|
|
|
| if (x->skip_encode && !is_inter_block(&xd->mi[0].src_mi->mbmi)) {
|
| @@ -370,6 +445,11 @@
|
| // TODO(jingning): tune the model to better capture the distortion.
|
| int64_t p = (pd->dequant[1] * pd->dequant[1] *
|
| (1 << ss_txfrm_size)) >> (shift + 2);
|
| +#if CONFIG_VP9_HIGHBITDEPTH
|
| + if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
|
| + p >>= ((xd->bd - 8) * 2);
|
| + }
|
| +#endif // CONFIG_VP9_HIGHBITDEPTH
|
| args->dist += (p >> 4);
|
| args->sse += p;
|
| }
|
| @@ -399,12 +479,28 @@
|
|
|
| if (!is_inter_block(mbmi)) {
|
| vp9_encode_block_intra(x, plane, block, plane_bsize, tx_size, &mbmi->skip);
|
| +#if CONFIG_VP9_HIGHBITDEPTH
|
| + if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
|
| + dist_block(plane, block, tx_size, args, xd->bd);
|
| + } else {
|
| + dist_block(plane, block, tx_size, args, 8);
|
| + }
|
| +#else
|
| dist_block(plane, block, tx_size, args);
|
| +#endif // CONFIG_VP9_HIGHBITDEPTH
|
| } else if (max_txsize_lookup[plane_bsize] == tx_size) {
|
| if (x->skip_txfm[(plane << 2) + (block >> (tx_size << 1))] == 0) {
|
| // full forward transform and quantization
|
| vp9_xform_quant(x, plane, block, plane_bsize, tx_size);
|
| +#if CONFIG_VP9_HIGHBITDEPTH
|
| + if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
|
| + dist_block(plane, block, tx_size, args, xd->bd);
|
| + } else {
|
| + dist_block(plane, block, tx_size, args, 8);
|
| + }
|
| +#else
|
| dist_block(plane, block, tx_size, args);
|
| +#endif // CONFIG_VP9_HIGHBITDEPTH
|
| } else if (x->skip_txfm[(plane << 2) + (block >> (tx_size << 1))] == 2) {
|
| // compute DC coefficient
|
| tran_low_t *const coeff = BLOCK_OFFSET(x->plane[plane].coeff, block);
|
| @@ -412,9 +508,17 @@
|
| vp9_xform_quant_dc(x, plane, block, plane_bsize, tx_size);
|
| args->sse = x->bsse[(plane << 2) + (block >> (tx_size << 1))] << 4;
|
| args->dist = args->sse;
|
| - if (!x->plane[plane].eobs[block])
|
| - args->dist = args->sse - ((coeff[0] * coeff[0] -
|
| - (coeff[0] - dqcoeff[0]) * (coeff[0] - dqcoeff[0])) >> 2);
|
| + if (x->plane[plane].eobs[block]) {
|
| + int64_t dc_correct = coeff[0] * coeff[0] -
|
| + (coeff[0] - dqcoeff[0]) * (coeff[0] - dqcoeff[0]);
|
| +#if CONFIG_VP9_HIGHBITDEPTH
|
| + dc_correct >>= ((xd->bd - 8) * 2);
|
| +#endif
|
| + if (tx_size != TX_32X32)
|
| + dc_correct >>= 2;
|
| +
|
| + args->dist = MAX(0, args->sse - dc_correct);
|
| + }
|
| } else {
|
| // skip forward transform
|
| x->plane[plane].eobs[block] = 0;
|
| @@ -424,7 +528,15 @@
|
| } else {
|
| // full forward transform and quantization
|
| vp9_xform_quant(x, plane, block, plane_bsize, tx_size);
|
| +#if CONFIG_VP9_HIGHBITDEPTH
|
| + if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
|
| + dist_block(plane, block, tx_size, args, xd->bd);
|
| + } else {
|
| + dist_block(plane, block, tx_size, args, 8);
|
| + }
|
| +#else
|
| dist_block(plane, block, tx_size, args);
|
| +#endif // CONFIG_VP9_HIGHBITDEPTH
|
| }
|
|
|
| rate_block(plane, block, plane_bsize, tx_size, args);
|
| @@ -659,6 +771,9 @@
|
| const int num_4x4_blocks_high = num_4x4_blocks_high_lookup[bsize];
|
| int idx, idy;
|
| uint8_t best_dst[8 * 8];
|
| +#if CONFIG_VP9_HIGHBITDEPTH
|
| + uint16_t best_dst16[8 * 8];
|
| +#endif
|
|
|
| assert(ib < 4);
|
|
|
| @@ -666,6 +781,108 @@
|
| vpx_memcpy(tl, l, sizeof(tl));
|
| xd->mi[0].src_mi->mbmi.tx_size = TX_4X4;
|
|
|
| +#if CONFIG_VP9_HIGHBITDEPTH
|
| + if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
|
| + for (mode = DC_PRED; mode <= TM_PRED; ++mode) {
|
| + int64_t this_rd;
|
| + int ratey = 0;
|
| + int64_t distortion = 0;
|
| + int rate = bmode_costs[mode];
|
| +
|
| + if (!(cpi->sf.intra_y_mode_mask[TX_4X4] & (1 << mode)))
|
| + continue;
|
| +
|
| + // Only do the oblique modes if the best so far is
|
| + // one of the neighboring directional modes
|
| + if (cpi->sf.mode_search_skip_flags & FLAG_SKIP_INTRA_DIRMISMATCH) {
|
| + if (conditional_skipintra(mode, *best_mode))
|
| + continue;
|
| + }
|
| +
|
| + vpx_memcpy(tempa, ta, sizeof(ta));
|
| + vpx_memcpy(templ, tl, sizeof(tl));
|
| +
|
| + for (idy = 0; idy < num_4x4_blocks_high; ++idy) {
|
| + for (idx = 0; idx < num_4x4_blocks_wide; ++idx) {
|
| + const int block = ib + idy * 2 + idx;
|
| + const uint8_t *const src = &src_init[idx * 4 + idy * 4 * src_stride];
|
| + uint8_t *const dst = &dst_init[idx * 4 + idy * 4 * dst_stride];
|
| + int16_t *const src_diff = raster_block_offset_int16(BLOCK_8X8, block,
|
| + p->src_diff);
|
| + tran_low_t *const coeff = BLOCK_OFFSET(x->plane[0].coeff, block);
|
| + xd->mi[0].src_mi->bmi[block].as_mode = mode;
|
| + vp9_predict_intra_block(xd, block, 1,
|
| + TX_4X4, mode,
|
| + x->skip_encode ? src : dst,
|
| + x->skip_encode ? src_stride : dst_stride,
|
| + dst, dst_stride, idx, idy, 0);
|
| + vp9_highbd_subtract_block(4, 4, src_diff, 8, src, src_stride,
|
| + dst, dst_stride, xd->bd);
|
| + if (xd->lossless) {
|
| + const scan_order *so = &vp9_default_scan_orders[TX_4X4];
|
| + vp9_highbd_fwht4x4(src_diff, coeff, 8);
|
| + vp9_regular_quantize_b_4x4(x, 0, block, so->scan, so->iscan);
|
| + ratey += cost_coeffs(x, 0, block, tempa + idx, templ + idy, TX_4X4,
|
| + so->scan, so->neighbors,
|
| + cpi->sf.use_fast_coef_costing);
|
| + if (RDCOST(x->rdmult, x->rddiv, ratey, distortion) >= best_rd)
|
| + goto next_highbd;
|
| + vp9_highbd_iwht4x4_add(BLOCK_OFFSET(pd->dqcoeff, block),
|
| + dst, dst_stride,
|
| + p->eobs[block], xd->bd);
|
| + } else {
|
| + int64_t unused;
|
| + const TX_TYPE tx_type = get_tx_type_4x4(PLANE_TYPE_Y, xd, block);
|
| + const scan_order *so = &vp9_scan_orders[TX_4X4][tx_type];
|
| + vp9_highbd_fht4x4(src_diff, coeff, 8, tx_type);
|
| + vp9_regular_quantize_b_4x4(x, 0, block, so->scan, so->iscan);
|
| + ratey += cost_coeffs(x, 0, block, tempa + idx, templ + idy, TX_4X4,
|
| + so->scan, so->neighbors,
|
| + cpi->sf.use_fast_coef_costing);
|
| + distortion += vp9_highbd_block_error(
|
| + coeff, BLOCK_OFFSET(pd->dqcoeff, block),
|
| + 16, &unused, xd->bd) >> 2;
|
| + if (RDCOST(x->rdmult, x->rddiv, ratey, distortion) >= best_rd)
|
| + goto next_highbd;
|
| + vp9_highbd_iht4x4_add(tx_type, BLOCK_OFFSET(pd->dqcoeff, block),
|
| + dst, dst_stride, p->eobs[block], xd->bd);
|
| + }
|
| + }
|
| + }
|
| +
|
| + rate += ratey;
|
| + this_rd = RDCOST(x->rdmult, x->rddiv, rate, distortion);
|
| +
|
| + if (this_rd < best_rd) {
|
| + *bestrate = rate;
|
| + *bestratey = ratey;
|
| + *bestdistortion = distortion;
|
| + best_rd = this_rd;
|
| + *best_mode = mode;
|
| + vpx_memcpy(a, tempa, sizeof(tempa));
|
| + vpx_memcpy(l, templ, sizeof(templ));
|
| + for (idy = 0; idy < num_4x4_blocks_high * 4; ++idy) {
|
| + vpx_memcpy(best_dst16 + idy * 8,
|
| + CONVERT_TO_SHORTPTR(dst_init + idy * dst_stride),
|
| + num_4x4_blocks_wide * 4 * sizeof(uint16_t));
|
| + }
|
| + }
|
| + next_highbd:
|
| + {}
|
| + }
|
| + if (best_rd >= rd_thresh || x->skip_encode)
|
| + return best_rd;
|
| +
|
| + for (idy = 0; idy < num_4x4_blocks_high * 4; ++idy) {
|
| + vpx_memcpy(CONVERT_TO_SHORTPTR(dst_init + idy * dst_stride),
|
| + best_dst16 + idy * 8,
|
| + num_4x4_blocks_wide * 4 * sizeof(uint16_t));
|
| + }
|
| +
|
| + return best_rd;
|
| + }
|
| +#endif // CONFIG_VP9_HIGHBITDEPTH
|
| +
|
| for (mode = DC_PRED; mode <= TM_PRED; ++mode) {
|
| int64_t this_rd;
|
| int ratey = 0;
|
| @@ -827,6 +1044,7 @@
|
| return RDCOST(mb->rdmult, mb->rddiv, cost, total_distortion);
|
| }
|
|
|
| +// This function is used only for intra_only frames
|
| static int64_t rd_pick_intra_sby_mode(VP9_COMP *cpi, MACROBLOCK *x,
|
| int *rate, int *rate_tokenonly,
|
| int64_t *distortion, int *skippable,
|
| @@ -841,24 +1059,21 @@
|
| int64_t this_distortion, this_rd;
|
| TX_SIZE best_tx = TX_4X4;
|
| int i;
|
| - int *bmode_costs = cpi->mbmode_cost;
|
| + int *bmode_costs;
|
| + const MODE_INFO *above_mi = xd->mi[-xd->mi_stride].src_mi;
|
| + const MODE_INFO *left_mi = xd->left_available ? xd->mi[-1].src_mi : NULL;
|
| + const PREDICTION_MODE A = vp9_above_block_mode(mic, above_mi, 0);
|
| + const PREDICTION_MODE L = vp9_left_block_mode(mic, left_mi, 0);
|
| + bmode_costs = cpi->y_mode_costs[A][L];
|
|
|
| if (cpi->sf.tx_size_search_method == USE_FULL_RD)
|
| for (i = 0; i < TX_MODES; i++)
|
| tx_cache[i] = INT64_MAX;
|
|
|
| + vpx_memset(x->skip_txfm, 0, sizeof(x->skip_txfm));
|
| /* Y Search for intra prediction mode */
|
| for (mode = DC_PRED; mode <= TM_PRED; mode++) {
|
| int64_t local_tx_cache[TX_MODES];
|
| - MODE_INFO *above_mi = xd->mi[-xd->mi_stride].src_mi;
|
| - MODE_INFO *left_mi = xd->left_available ? xd->mi[-1].src_mi : NULL;
|
| -
|
| - if (cpi->common.frame_type == KEY_FRAME) {
|
| - const PREDICTION_MODE A = vp9_above_block_mode(mic, above_mi, 0);
|
| - const PREDICTION_MODE L = vp9_left_block_mode(mic, left_mi, 0);
|
| -
|
| - bmode_costs = cpi->y_mode_costs[A][L];
|
| - }
|
| mic->mbmi.mode = mode;
|
|
|
| super_block_yrd(cpi, x, &this_rate_tokenonly, &this_distortion,
|
| @@ -897,10 +1112,12 @@
|
| return best_rd;
|
| }
|
|
|
| -static void super_block_uvrd(const VP9_COMP *cpi, MACROBLOCK *x,
|
| - int *rate, int64_t *distortion, int *skippable,
|
| - int64_t *sse, BLOCK_SIZE bsize,
|
| - int64_t ref_best_rd) {
|
| +// Return value 0: early termination triggered, no valid rd cost available;
|
| +// 1: rd cost values are valid.
|
| +static int super_block_uvrd(const VP9_COMP *cpi, MACROBLOCK *x,
|
| + int *rate, int64_t *distortion, int *skippable,
|
| + int64_t *sse, BLOCK_SIZE bsize,
|
| + int64_t ref_best_rd) {
|
| MACROBLOCKD *const xd = &x->e_mbd;
|
| MB_MODE_INFO *const mbmi = &xd->mi[0].src_mi->mbmi;
|
| const TX_SIZE uv_tx_size = get_uv_tx_size(mbmi, &xd->plane[1]);
|
| @@ -907,11 +1124,12 @@
|
| int plane;
|
| int pnrate = 0, pnskip = 1;
|
| int64_t pndist = 0, pnsse = 0;
|
| + int is_cost_valid = 1;
|
|
|
| if (ref_best_rd < 0)
|
| - goto term;
|
| + is_cost_valid = 0;
|
|
|
| - if (is_inter_block(mbmi)) {
|
| + if (is_inter_block(mbmi) && is_cost_valid) {
|
| int plane;
|
| for (plane = 1; plane < MAX_MB_PLANE; ++plane)
|
| vp9_subtract_plane(x, bsize, plane);
|
| @@ -926,21 +1144,25 @@
|
| txfm_rd_in_plane(x, &pnrate, &pndist, &pnskip, &pnsse,
|
| ref_best_rd, plane, bsize, uv_tx_size,
|
| cpi->sf.use_fast_coef_costing);
|
| - if (pnrate == INT_MAX)
|
| - goto term;
|
| + if (pnrate == INT_MAX) {
|
| + is_cost_valid = 0;
|
| + break;
|
| + }
|
| *rate += pnrate;
|
| *distortion += pndist;
|
| *sse += pnsse;
|
| *skippable &= pnskip;
|
| }
|
| - return;
|
|
|
| - term:
|
| - *rate = INT_MAX;
|
| - *distortion = INT64_MAX;
|
| - *sse = INT64_MAX;
|
| - *skippable = 0;
|
| - return;
|
| + if (!is_cost_valid) {
|
| + // reset cost value
|
| + *rate = INT_MAX;
|
| + *distortion = INT64_MAX;
|
| + *sse = INT64_MAX;
|
| + *skippable = 0;
|
| + }
|
| +
|
| + return is_cost_valid;
|
| }
|
|
|
| static int64_t rd_pick_intra_sbuv_mode(VP9_COMP *cpi, MACROBLOCK *x,
|
| @@ -955,6 +1177,7 @@
|
| int this_rate_tokenonly, this_rate, s;
|
| int64_t this_distortion, this_sse;
|
|
|
| + vpx_memset(x->skip_txfm, 0, sizeof(x->skip_txfm));
|
| for (mode = DC_PRED; mode <= TM_PRED; ++mode) {
|
| if (!(cpi->sf.intra_uv_mode_mask[max_tx_size] & (1 << mode)))
|
| continue;
|
| @@ -961,9 +1184,8 @@
|
|
|
| xd->mi[0].src_mi->mbmi.uv_mode = mode;
|
|
|
| - super_block_uvrd(cpi, x, &this_rate_tokenonly,
|
| - &this_distortion, &s, &this_sse, bsize, best_rd);
|
| - if (this_rate_tokenonly == INT_MAX)
|
| + if (!super_block_uvrd(cpi, x, &this_rate_tokenonly,
|
| + &this_distortion, &s, &this_sse, bsize, best_rd))
|
| continue;
|
| this_rate = this_rate_tokenonly +
|
| cpi->intra_uv_mode_cost[cpi->common.frame_type][mode];
|
| @@ -993,6 +1215,7 @@
|
| int64_t unused;
|
|
|
| x->e_mbd.mi[0].src_mi->mbmi.uv_mode = DC_PRED;
|
| + vpx_memset(x->skip_txfm, 0, sizeof(x->skip_txfm));
|
| super_block_uvrd(cpi, x, rate_tokenonly, distortion,
|
| skippable, &unused, bsize, INT64_MAX);
|
| *rate = *rate_tokenonly + cpi->intra_uv_mode_cost[cm->frame_type][DC_PRED];
|
| @@ -1121,6 +1344,16 @@
|
| for (ref = 0; ref < 1 + is_compound; ++ref) {
|
| const uint8_t *pre = &pd->pre[ref].buf[raster_block_offset(BLOCK_8X8, i,
|
| pd->pre[ref].stride)];
|
| +#if CONFIG_VP9_HIGHBITDEPTH
|
| + if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
|
| + vp9_highbd_build_inter_predictor(pre, pd->pre[ref].stride,
|
| + dst, pd->dst.stride,
|
| + &mi->bmi[i].as_mv[ref].as_mv,
|
| + &xd->block_refs[ref]->sf, width, height,
|
| + ref, kernel, MV_PRECISION_Q3,
|
| + mi_col * MI_SIZE + 4 * (i % 2),
|
| + mi_row * MI_SIZE + 4 * (i / 2), xd->bd);
|
| + } else {
|
| vp9_build_inter_predictor(pre, pd->pre[ref].stride,
|
| dst, pd->dst.stride,
|
| &mi->bmi[i].as_mv[ref].as_mv,
|
| @@ -1129,11 +1362,32 @@
|
| mi_col * MI_SIZE + 4 * (i % 2),
|
| mi_row * MI_SIZE + 4 * (i / 2));
|
| }
|
| +#else
|
| + vp9_build_inter_predictor(pre, pd->pre[ref].stride,
|
| + dst, pd->dst.stride,
|
| + &mi->bmi[i].as_mv[ref].as_mv,
|
| + &xd->block_refs[ref]->sf, width, height, ref,
|
| + kernel, MV_PRECISION_Q3,
|
| + mi_col * MI_SIZE + 4 * (i % 2),
|
| + mi_row * MI_SIZE + 4 * (i / 2));
|
| +#endif // CONFIG_VP9_HIGHBITDEPTH
|
| + }
|
|
|
| +#if CONFIG_VP9_HIGHBITDEPTH
|
| + if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
|
| + vp9_highbd_subtract_block(
|
| + height, width, raster_block_offset_int16(BLOCK_8X8, i, p->src_diff), 8,
|
| + src, p->src.stride, dst, pd->dst.stride, xd->bd);
|
| + } else {
|
| + vp9_subtract_block(
|
| + height, width, raster_block_offset_int16(BLOCK_8X8, i, p->src_diff), 8,
|
| + src, p->src.stride, dst, pd->dst.stride);
|
| + }
|
| +#else
|
| vp9_subtract_block(height, width,
|
| raster_block_offset_int16(BLOCK_8X8, i, p->src_diff), 8,
|
| - src, p->src.stride,
|
| - dst, pd->dst.stride);
|
| + src, p->src.stride, dst, pd->dst.stride);
|
| +#endif // CONFIG_VP9_HIGHBITDEPTH
|
|
|
| k = i;
|
| for (idy = 0; idy < height / 4; ++idy) {
|
| @@ -1146,8 +1400,19 @@
|
| x->fwd_txm4x4(raster_block_offset_int16(BLOCK_8X8, k, p->src_diff),
|
| coeff, 8);
|
| vp9_regular_quantize_b_4x4(x, 0, k, so->scan, so->iscan);
|
| +#if CONFIG_VP9_HIGHBITDEPTH
|
| + if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
|
| + thisdistortion += vp9_highbd_block_error(coeff,
|
| + BLOCK_OFFSET(pd->dqcoeff, k),
|
| + 16, &ssz, xd->bd);
|
| + } else {
|
| + thisdistortion += vp9_block_error(coeff, BLOCK_OFFSET(pd->dqcoeff, k),
|
| + 16, &ssz);
|
| + }
|
| +#else
|
| thisdistortion += vp9_block_error(coeff, BLOCK_OFFSET(pd->dqcoeff, k),
|
| 16, &ssz);
|
| +#endif // CONFIG_VP9_HIGHBITDEPTH
|
| thissse += ssz;
|
| thisrate += cost_coeffs(x, 0, k, ta + (k & 1), tl + (k >> 1), TX_4X4,
|
| so->scan, so->neighbors,
|
| @@ -1369,7 +1634,7 @@
|
| int sadpb = x->sadperbit4;
|
| MV mvp_full;
|
| int max_mv;
|
| - int sad_list[5];
|
| + int cost_list[5];
|
|
|
| /* Is the best so far sufficiently good that we cant justify doing
|
| * and new motion search. */
|
| @@ -1415,7 +1680,7 @@
|
|
|
| bestsme = vp9_full_pixel_search(
|
| cpi, x, bsize, &mvp_full, step_param, sadpb,
|
| - cpi->sf.mv.subpel_search_method != SUBPEL_TREE ? sad_list : NULL,
|
| + cpi->sf.mv.subpel_search_method != SUBPEL_TREE ? cost_list : NULL,
|
| &bsi->ref_mv[0]->as_mv, new_mv,
|
| INT_MAX, 1);
|
|
|
| @@ -1429,7 +1694,7 @@
|
| sadpb, 16, &cpi->fn_ptr[bsize],
|
| &bsi->ref_mv[0]->as_mv,
|
| &best_mv->as_mv);
|
| - sad_list[1] = sad_list[2] = sad_list[3] = sad_list[4] = INT_MAX;
|
| + cost_list[1] = cost_list[2] = cost_list[3] = cost_list[4] = INT_MAX;
|
| if (thissme < bestsme) {
|
| bestsme = thissme;
|
| *new_mv = best_mv->as_mv;
|
| @@ -1450,7 +1715,7 @@
|
| x->errorperbit, &cpi->fn_ptr[bsize],
|
| cpi->sf.mv.subpel_force_stop,
|
| cpi->sf.mv.subpel_iters_per_step,
|
| - cond_sad_list(cpi, sad_list),
|
| + cond_cost_list(cpi, cost_list),
|
| x->nmvjointcost, x->mvcost,
|
| &distortion,
|
| &x->pred_sse[mbmi->ref_frame[0]],
|
| @@ -1784,7 +2049,7 @@
|
| int tmp_col_max = x->mv_col_max;
|
| int tmp_row_min = x->mv_row_min;
|
| int tmp_row_max = x->mv_row_max;
|
| - int sad_list[5];
|
| + int cost_list[5];
|
|
|
| const YV12_BUFFER_CONFIG *scaled_ref_frame = vp9_get_scaled_ref_frame(cpi,
|
| ref);
|
| @@ -1820,14 +2085,14 @@
|
| }
|
|
|
| if (cpi->sf.adaptive_motion_search && bsize < BLOCK_64X64) {
|
| - int boffset = 2 * (b_width_log2(BLOCK_64X64) - MIN(b_height_log2(bsize),
|
| - b_width_log2(bsize)));
|
| + int boffset = 2 * (b_width_log2_lookup[BLOCK_64X64] -
|
| + MIN(b_height_log2_lookup[bsize], b_width_log2_lookup[bsize]));
|
| step_param = MAX(step_param, boffset);
|
| }
|
|
|
| if (cpi->sf.adaptive_motion_search) {
|
| - int bwl = b_width_log2(bsize);
|
| - int bhl = b_height_log2(bsize);
|
| + int bwl = b_width_log2_lookup[bsize];
|
| + int bhl = b_height_log2_lookup[bsize];
|
| int i;
|
| int tlevel = x->pred_mv_sad[ref] >> (bwl + bhl + 4);
|
|
|
| @@ -1856,7 +2121,7 @@
|
| mvp_full.row >>= 3;
|
|
|
| bestsme = vp9_full_pixel_search(cpi, x, bsize, &mvp_full, step_param, sadpb,
|
| - cond_sad_list(cpi, sad_list),
|
| + cond_cost_list(cpi, cost_list),
|
| &ref_mv, &tmp_mv->as_mv, INT_MAX, 1);
|
|
|
| x->mv_col_min = tmp_col_min;
|
| @@ -1872,7 +2137,7 @@
|
| &cpi->fn_ptr[bsize],
|
| cpi->sf.mv.subpel_force_stop,
|
| cpi->sf.mv.subpel_iters_per_step,
|
| - cond_sad_list(cpi, sad_list),
|
| + cond_cost_list(cpi, cost_list),
|
| x->nmvjointcost, x->mvcost,
|
| &dis, &x->pred_sse[ref], NULL, 0, 0);
|
| }
|
| @@ -1904,7 +2169,12 @@
|
| int_mv ref_mv[2];
|
| int ite, ref;
|
| // Prediction buffer from second frame.
|
| +#if CONFIG_VP9_HIGHBITDEPTH
|
| + uint8_t *second_pred;
|
| + uint8_t *second_pred_alloc;
|
| +#else
|
| uint8_t *second_pred = vpx_memalign(16, pw * ph * sizeof(uint8_t));
|
| +#endif // CONFIG_VP9_HIGHBITDEPTH
|
| const InterpKernel *kernel = vp9_get_interp_kernel(mbmi->interp_filter);
|
|
|
| // Do joint motion search in compound mode to get more accurate mv.
|
| @@ -1915,6 +2185,15 @@
|
| vp9_get_scaled_ref_frame(cpi, mbmi->ref_frame[0]),
|
| vp9_get_scaled_ref_frame(cpi, mbmi->ref_frame[1])
|
| };
|
| +#if CONFIG_VP9_HIGHBITDEPTH
|
| + if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
|
| + second_pred_alloc = vpx_memalign(16, pw * ph * sizeof(uint16_t));
|
| + second_pred = CONVERT_TO_BYTEPTR(second_pred_alloc);
|
| + } else {
|
| + second_pred_alloc = vpx_memalign(16, pw * ph * sizeof(uint8_t));
|
| + second_pred = second_pred_alloc;
|
| + }
|
| +#endif // CONFIG_VP9_HIGHBITDEPTH
|
|
|
| for (ref = 0; ref < 2; ++ref) {
|
| ref_mv[ref] = mbmi->ref_mvs[refs[ref]][0];
|
| @@ -1953,6 +2232,28 @@
|
| ref_yv12[1] = xd->plane[0].pre[1];
|
|
|
| // Get pred block from second frame.
|
| +#if CONFIG_VP9_HIGHBITDEPTH
|
| + if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
|
| + vp9_highbd_build_inter_predictor(ref_yv12[!id].buf,
|
| + ref_yv12[!id].stride,
|
| + second_pred, pw,
|
| + &frame_mv[refs[!id]].as_mv,
|
| + &xd->block_refs[!id]->sf,
|
| + pw, ph, 0,
|
| + kernel, MV_PRECISION_Q3,
|
| + mi_col * MI_SIZE, mi_row * MI_SIZE,
|
| + xd->bd);
|
| + } else {
|
| + vp9_build_inter_predictor(ref_yv12[!id].buf,
|
| + ref_yv12[!id].stride,
|
| + second_pred, pw,
|
| + &frame_mv[refs[!id]].as_mv,
|
| + &xd->block_refs[!id]->sf,
|
| + pw, ph, 0,
|
| + kernel, MV_PRECISION_Q3,
|
| + mi_col * MI_SIZE, mi_row * MI_SIZE);
|
| + }
|
| +#else
|
| vp9_build_inter_predictor(ref_yv12[!id].buf,
|
| ref_yv12[!id].stride,
|
| second_pred, pw,
|
| @@ -1961,6 +2262,7 @@
|
| pw, ph, 0,
|
| kernel, MV_PRECISION_Q3,
|
| mi_col * MI_SIZE, mi_row * MI_SIZE);
|
| +#endif // CONFIG_VP9_HIGHBITDEPTH
|
|
|
| // Compound motion search on first ref frame.
|
| if (id)
|
| @@ -2029,7 +2331,11 @@
|
| x->nmvjointcost, x->mvcost, MV_COST_WEIGHT);
|
| }
|
|
|
| +#if CONFIG_VP9_HIGHBITDEPTH
|
| + vpx_free(second_pred_alloc);
|
| +#else
|
| vpx_free(second_pred);
|
| +#endif // CONFIG_VP9_HIGHBITDEPTH
|
| }
|
|
|
| static INLINE void restore_dst_buf(MACROBLOCKD *xd,
|
| @@ -2042,93 +2348,12 @@
|
| }
|
| }
|
|
|
| -static void rd_encode_breakout_test(VP9_COMP *cpi, MACROBLOCK *x,
|
| - BLOCK_SIZE bsize, int *rate2,
|
| - int64_t *distortion, int64_t *distortion_uv,
|
| - int *disable_skip) {
|
| - VP9_COMMON *cm = &cpi->common;
|
| - MACROBLOCKD *xd = &x->e_mbd;
|
| - const BLOCK_SIZE y_size = get_plane_block_size(bsize, &xd->plane[0]);
|
| - const BLOCK_SIZE uv_size = get_plane_block_size(bsize, &xd->plane[1]);
|
| - unsigned int var, sse;
|
| - // Skipping threshold for ac.
|
| - unsigned int thresh_ac;
|
| - // Skipping threshold for dc
|
| - unsigned int thresh_dc;
|
| -
|
| - var = cpi->fn_ptr[y_size].vf(x->plane[0].src.buf, x->plane[0].src.stride,
|
| - xd->plane[0].dst.buf,
|
| - xd->plane[0].dst.stride, &sse);
|
| -
|
| - if (x->encode_breakout > 0) {
|
| - // Set a maximum for threshold to avoid big PSNR loss in low bitrate
|
| - // case. Use extreme low threshold for static frames to limit skipping.
|
| - const unsigned int max_thresh = (cpi->allow_encode_breakout ==
|
| - ENCODE_BREAKOUT_LIMITED) ? 128 : 36000;
|
| - // The encode_breakout input
|
| - const unsigned int min_thresh =
|
| - MIN(((unsigned int)x->encode_breakout << 4), max_thresh);
|
| -
|
| - // Calculate threshold according to dequant value.
|
| - thresh_ac = (xd->plane[0].dequant[1] * xd->plane[0].dequant[1]) / 9;
|
| - thresh_ac = clamp(thresh_ac, min_thresh, max_thresh);
|
| -
|
| - // Adjust threshold according to partition size.
|
| - thresh_ac >>= 8 - (b_width_log2(bsize) +
|
| - b_height_log2(bsize));
|
| - thresh_dc = (xd->plane[0].dequant[0] * xd->plane[0].dequant[0] >> 6);
|
| - } else {
|
| - thresh_ac = 0;
|
| - thresh_dc = 0;
|
| - }
|
| -
|
| - // Y skipping condition checking
|
| - if (sse < thresh_ac || sse == 0) {
|
| - // dc skipping checking
|
| - if ((sse - var) < thresh_dc || sse == var) {
|
| - unsigned int sse_u, sse_v;
|
| - unsigned int var_u, var_v;
|
| -
|
| - var_u = cpi->fn_ptr[uv_size].vf(x->plane[1].src.buf,
|
| - x->plane[1].src.stride,
|
| - xd->plane[1].dst.buf,
|
| - xd->plane[1].dst.stride, &sse_u);
|
| -
|
| - // U skipping condition checking
|
| - if ((sse_u * 4 < thresh_ac || sse_u == 0) &&
|
| - (sse_u - var_u < thresh_dc || sse_u == var_u)) {
|
| - var_v = cpi->fn_ptr[uv_size].vf(x->plane[2].src.buf,
|
| - x->plane[2].src.stride,
|
| - xd->plane[2].dst.buf,
|
| - xd->plane[2].dst.stride, &sse_v);
|
| -
|
| - // V skipping condition checking
|
| - if ((sse_v * 4 < thresh_ac || sse_v == 0) &&
|
| - (sse_v - var_v < thresh_dc || sse_v == var_v)) {
|
| - x->skip = 1;
|
| -
|
| - // The cost of skip bit needs to be added.
|
| - *rate2 += vp9_cost_bit(vp9_get_skip_prob(cm, xd), 1);
|
| -
|
| - // Scaling factor for SSE from spatial domain to frequency domain
|
| - // is 16. Adjust distortion accordingly.
|
| - *distortion_uv = (sse_u + sse_v) << 4;
|
| - *distortion = (sse << 4) + *distortion_uv;
|
| -
|
| - *disable_skip = 1;
|
| - }
|
| - }
|
| - }
|
| - }
|
| -}
|
| -
|
| static int64_t handle_inter_mode(VP9_COMP *cpi, MACROBLOCK *x,
|
| BLOCK_SIZE bsize,
|
| int64_t txfm_cache[],
|
| int *rate2, int64_t *distortion,
|
| int *skippable,
|
| - int *rate_y, int64_t *distortion_y,
|
| - int *rate_uv, int64_t *distortion_uv,
|
| + int *rate_y, int *rate_uv,
|
| int *disable_skip,
|
| int_mv (*mode_mv)[MAX_REF_FRAMES],
|
| int mi_row, int mi_col,
|
| @@ -2148,8 +2373,13 @@
|
| int refs[2] = { mbmi->ref_frame[0],
|
| (mbmi->ref_frame[1] < 0 ? 0 : mbmi->ref_frame[1]) };
|
| int_mv cur_mv[2];
|
| - int64_t this_rd = 0;
|
| +#if CONFIG_VP9_HIGHBITDEPTH
|
| + DECLARE_ALIGNED_ARRAY(16, uint16_t, tmp_buf16, MAX_MB_PLANE * 64 * 64);
|
| + DECLARE_ALIGNED_ARRAY(16, uint8_t, tmp_buf8, MAX_MB_PLANE * 64 * 64);
|
| + uint8_t *tmp_buf;
|
| +#else
|
| DECLARE_ALIGNED_ARRAY(16, uint8_t, tmp_buf, MAX_MB_PLANE * 64 * 64);
|
| +#endif // CONFIG_VP9_HIGHBITDEPTH
|
| int pred_exists = 0;
|
| int intpel_mv;
|
| int64_t rd, tmp_rd, best_rd = INT64_MAX;
|
| @@ -2166,6 +2396,18 @@
|
| (((mi_row + mi_col) >> bsl) +
|
| get_chessboard_index(cm->current_video_frame)) & 0x1 : 0;
|
|
|
| + int skip_txfm_sb = 0;
|
| + int64_t skip_sse_sb = INT64_MAX;
|
| + int64_t distortion_y = 0, distortion_uv = 0;
|
| +
|
| +#if CONFIG_VP9_HIGHBITDEPTH
|
| + if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
|
| + tmp_buf = CONVERT_TO_BYTEPTR(tmp_buf16);
|
| + } else {
|
| + tmp_buf = tmp_buf8;
|
| + }
|
| +#endif // CONFIG_VP9_HIGHBITDEPTH
|
| +
|
| if (pred_filter_search) {
|
| INTERP_FILTER af = SWITCHABLE, lf = SWITCHABLE;
|
| if (xd->up_available)
|
| @@ -2275,6 +2517,9 @@
|
| for (i = 0; i < SWITCHABLE_FILTERS; ++i) {
|
| int j;
|
| int64_t rs_rd;
|
| + int tmp_skip_sb = 0;
|
| + int64_t tmp_skip_sse = INT64_MAX;
|
| +
|
| mbmi->interp_filter = i;
|
| rs = vp9_get_switchable_rate(cpi);
|
| rs_rd = RDCOST(x->rdmult, x->rddiv, rs, 0);
|
| @@ -2310,7 +2555,8 @@
|
| }
|
| }
|
| vp9_build_inter_predictors_sb(xd, mi_row, mi_col, bsize);
|
| - model_rd_for_sb(cpi, bsize, x, xd, &rate_sum, &dist_sum);
|
| + model_rd_for_sb(cpi, bsize, x, xd, &rate_sum, &dist_sum,
|
| + &tmp_skip_sb, &tmp_skip_sse);
|
|
|
| rd = RDCOST(x->rdmult, x->rddiv, rate_sum, dist_sum);
|
| rd_opt->filter_cache[i] = rd;
|
| @@ -2339,8 +2585,6 @@
|
| best_filter = mbmi->interp_filter;
|
| if (cm->interp_filter == SWITCHABLE && i && !intpel_mv)
|
| best_needs_copy = !best_needs_copy;
|
| - vpx_memcpy(skip_txfm, x->skip_txfm, sizeof(skip_txfm));
|
| - vpx_memcpy(bsse, x->bsse, sizeof(bsse));
|
| }
|
|
|
| if ((cm->interp_filter == SWITCHABLE && newbest) ||
|
| @@ -2348,6 +2592,11 @@
|
| cm->interp_filter == mbmi->interp_filter)) {
|
| pred_exists = 1;
|
| tmp_rd = best_rd;
|
| +
|
| + skip_txfm_sb = tmp_skip_sb;
|
| + skip_sse_sb = tmp_skip_sse;
|
| + vpx_memcpy(skip_txfm, x->skip_txfm, sizeof(skip_txfm));
|
| + vpx_memcpy(bsse, x->bsse, sizeof(bsse));
|
| }
|
| }
|
| restore_dst_buf(xd, orig_dst, orig_dst_stride);
|
| @@ -2374,7 +2623,8 @@
|
| // switchable list (ex. bilinear) is indicated at the frame level, or
|
| // skip condition holds.
|
| vp9_build_inter_predictors_sb(xd, mi_row, mi_col, bsize);
|
| - model_rd_for_sb(cpi, bsize, x, xd, &tmp_rate, &tmp_dist);
|
| + model_rd_for_sb(cpi, bsize, x, xd, &tmp_rate, &tmp_dist,
|
| + &skip_txfm_sb, &skip_sse_sb);
|
| rd = RDCOST(x->rdmult, x->rddiv, rs + tmp_rate, tmp_dist);
|
| vpx_memcpy(skip_txfm, x->skip_txfm, sizeof(skip_txfm));
|
| vpx_memcpy(bsse, x->bsse, sizeof(bsse));
|
| @@ -2401,16 +2651,10 @@
|
| if (cm->interp_filter == SWITCHABLE)
|
| *rate2 += rs;
|
|
|
| - if (!is_comp_pred) {
|
| - if (cpi->allow_encode_breakout)
|
| - rd_encode_breakout_test(cpi, x, bsize, rate2, distortion, distortion_uv,
|
| - disable_skip);
|
| - }
|
| -
|
| vpx_memcpy(x->skip_txfm, skip_txfm, sizeof(skip_txfm));
|
| vpx_memcpy(x->bsse, bsse, sizeof(bsse));
|
|
|
| - if (!x->skip) {
|
| + if (!skip_txfm_sb) {
|
| int skippable_y, skippable_uv;
|
| int64_t sseuv = INT64_MAX;
|
| int64_t rdcosty = INT64_MAX;
|
| @@ -2417,7 +2661,7 @@
|
|
|
| // Y cost and distortion
|
| vp9_subtract_plane(x, bsize, 0);
|
| - super_block_yrd(cpi, x, rate_y, distortion_y, &skippable_y, psse,
|
| + super_block_yrd(cpi, x, rate_y, &distortion_y, &skippable_y, psse,
|
| bsize, txfm_cache, ref_best_rd);
|
|
|
| if (*rate_y == INT_MAX) {
|
| @@ -2428,14 +2672,13 @@
|
| }
|
|
|
| *rate2 += *rate_y;
|
| - *distortion += *distortion_y;
|
| + *distortion += distortion_y;
|
|
|
| rdcosty = RDCOST(x->rdmult, x->rddiv, *rate2, *distortion);
|
| rdcosty = MIN(rdcosty, RDCOST(x->rdmult, x->rddiv, 0, *psse));
|
|
|
| - super_block_uvrd(cpi, x, rate_uv, distortion_uv, &skippable_uv, &sseuv,
|
| - bsize, ref_best_rd - rdcosty);
|
| - if (*rate_uv == INT_MAX) {
|
| + if (!super_block_uvrd(cpi, x, rate_uv, &distortion_uv, &skippable_uv,
|
| + &sseuv, bsize, ref_best_rd - rdcosty)) {
|
| *rate2 = INT_MAX;
|
| *distortion = INT64_MAX;
|
| restore_dst_buf(xd, orig_dst, orig_dst_stride);
|
| @@ -2444,8 +2687,16 @@
|
|
|
| *psse += sseuv;
|
| *rate2 += *rate_uv;
|
| - *distortion += *distortion_uv;
|
| + *distortion += distortion_uv;
|
| *skippable = skippable_y && skippable_uv;
|
| + } else {
|
| + x->skip = 1;
|
| + *disable_skip = 1;
|
| +
|
| + // The cost of skip bit needs to be added.
|
| + *rate2 += vp9_cost_bit(vp9_get_skip_prob(cm, xd), 1);
|
| +
|
| + *distortion = skip_sse_sb;
|
| }
|
|
|
| if (!is_comp_pred)
|
| @@ -2452,12 +2703,11 @@
|
| single_skippable[this_mode][refs[0]] = *skippable;
|
|
|
| restore_dst_buf(xd, orig_dst, orig_dst_stride);
|
| - return this_rd; // if 0, this will be re-calculated by caller
|
| + return 0; // The rate-distortion cost will be re-calculated by caller.
|
| }
|
|
|
| void vp9_rd_pick_intra_mode_sb(VP9_COMP *cpi, MACROBLOCK *x,
|
| - int *returnrate, int64_t *returndist,
|
| - BLOCK_SIZE bsize,
|
| + RD_COST *rd_cost, BLOCK_SIZE bsize,
|
| PICK_MODE_CONTEXT *ctx, int64_t best_rd) {
|
| VP9_COMMON *const cm = &cpi->common;
|
| MACROBLOCKD *const xd = &x->e_mbd;
|
| @@ -2474,37 +2724,34 @@
|
| if (rd_pick_intra_sby_mode(cpi, x, &rate_y, &rate_y_tokenonly,
|
| &dist_y, &y_skip, bsize, tx_cache,
|
| best_rd) >= best_rd) {
|
| - *returnrate = INT_MAX;
|
| + rd_cost->rate = INT_MAX;
|
| return;
|
| }
|
| - max_uv_tx_size = get_uv_tx_size_impl(xd->mi[0].src_mi->mbmi.tx_size, bsize,
|
| - pd[1].subsampling_x,
|
| - pd[1].subsampling_y);
|
| - rd_pick_intra_sbuv_mode(cpi, x, ctx, &rate_uv, &rate_uv_tokenonly,
|
| - &dist_uv, &uv_skip, bsize, max_uv_tx_size);
|
| } else {
|
| y_skip = 0;
|
| if (rd_pick_intra_sub_8x8_y_mode(cpi, x, &rate_y, &rate_y_tokenonly,
|
| &dist_y, best_rd) >= best_rd) {
|
| - *returnrate = INT_MAX;
|
| + rd_cost->rate = INT_MAX;
|
| return;
|
| }
|
| - max_uv_tx_size = get_uv_tx_size_impl(xd->mi[0].src_mi->mbmi.tx_size, bsize,
|
| - pd[1].subsampling_x,
|
| - pd[1].subsampling_y);
|
| - rd_pick_intra_sbuv_mode(cpi, x, ctx, &rate_uv, &rate_uv_tokenonly,
|
| - &dist_uv, &uv_skip, BLOCK_8X8, max_uv_tx_size);
|
| }
|
| + max_uv_tx_size = get_uv_tx_size_impl(xd->mi[0].src_mi->mbmi.tx_size, bsize,
|
| + pd[1].subsampling_x,
|
| + pd[1].subsampling_y);
|
| + rd_pick_intra_sbuv_mode(cpi, x, ctx, &rate_uv, &rate_uv_tokenonly,
|
| + &dist_uv, &uv_skip, MAX(BLOCK_8X8, bsize),
|
| + max_uv_tx_size);
|
|
|
| if (y_skip && uv_skip) {
|
| - *returnrate = rate_y + rate_uv - rate_y_tokenonly - rate_uv_tokenonly +
|
| - vp9_cost_bit(vp9_get_skip_prob(cm, xd), 1);
|
| - *returndist = dist_y + dist_uv;
|
| + rd_cost->rate = rate_y + rate_uv - rate_y_tokenonly - rate_uv_tokenonly +
|
| + vp9_cost_bit(vp9_get_skip_prob(cm, xd), 1);
|
| + rd_cost->dist = dist_y + dist_uv;
|
| vp9_zero(ctx->tx_rd_diff);
|
| } else {
|
| int i;
|
| - *returnrate = rate_y + rate_uv + vp9_cost_bit(vp9_get_skip_prob(cm, xd), 0);
|
| - *returndist = dist_y + dist_uv;
|
| + rd_cost->rate = rate_y + rate_uv +
|
| + vp9_cost_bit(vp9_get_skip_prob(cm, xd), 0);
|
| + rd_cost->dist = dist_y + dist_uv;
|
| if (cpi->sf.tx_size_search_method == USE_FULL_RD)
|
| for (i = 0; i < TX_MODES; i++) {
|
| if (tx_cache[i] < INT64_MAX && tx_cache[cm->tx_mode] < INT64_MAX)
|
| @@ -2515,13 +2762,9 @@
|
| }
|
|
|
| ctx->mic = *xd->mi[0].src_mi;
|
| + rd_cost->rdcost = RDCOST(x->rdmult, x->rddiv, rd_cost->rate, rd_cost->dist);
|
| }
|
|
|
| -// Updating rd_thresh_freq_fact[] here means that the different
|
| -// partition/block sizes are handled independently based on the best
|
| -// choice for the current partition. It may well be better to keep a scaled
|
| -// best rd so far value and update rd_thresh_freq_fact based on the mode/size
|
| -// combination that wins out.
|
| static void update_rd_thresh_fact(VP9_COMP *cpi, int bsize,
|
| int best_mode_index) {
|
| if (cpi->sf.adaptive_rd_thresh > 0) {
|
| @@ -2528,32 +2771,34 @@
|
| const int top_mode = bsize < BLOCK_8X8 ? MAX_REFS : MAX_MODES;
|
| int mode;
|
| for (mode = 0; mode < top_mode; ++mode) {
|
| - int *const fact = &cpi->rd.thresh_freq_fact[bsize][mode];
|
| -
|
| - if (mode == best_mode_index) {
|
| - *fact -= (*fact >> 3);
|
| - } else {
|
| - *fact = MIN(*fact + RD_THRESH_INC,
|
| - cpi->sf.adaptive_rd_thresh * RD_THRESH_MAX_FACT);
|
| + const BLOCK_SIZE min_size = MAX(bsize - 1, BLOCK_4X4);
|
| + const BLOCK_SIZE max_size = MIN(bsize + 2, BLOCK_64X64);
|
| + BLOCK_SIZE bs;
|
| + for (bs = min_size; bs <= max_size; ++bs) {
|
| + int *const fact = &cpi->rd.thresh_freq_fact[bs][mode];
|
| + if (mode == best_mode_index) {
|
| + *fact -= (*fact >> 4);
|
| + } else {
|
| + *fact = MIN(*fact + RD_THRESH_INC,
|
| + cpi->sf.adaptive_rd_thresh * RD_THRESH_MAX_FACT);
|
| + }
|
| }
|
| }
|
| }
|
| }
|
|
|
| -int64_t vp9_rd_pick_inter_mode_sb(VP9_COMP *cpi, MACROBLOCK *x,
|
| - const TileInfo *const tile,
|
| - int mi_row, int mi_col,
|
| - int *returnrate,
|
| - int64_t *returndistortion,
|
| - BLOCK_SIZE bsize,
|
| - PICK_MODE_CONTEXT *ctx,
|
| - int64_t best_rd_so_far) {
|
| +void vp9_rd_pick_inter_mode_sb(VP9_COMP *cpi, MACROBLOCK *x,
|
| + const TileInfo *const tile,
|
| + int mi_row, int mi_col,
|
| + RD_COST *rd_cost, BLOCK_SIZE bsize,
|
| + PICK_MODE_CONTEXT *ctx,
|
| + int64_t best_rd_so_far) {
|
| VP9_COMMON *const cm = &cpi->common;
|
| RD_OPT *const rd_opt = &cpi->rd;
|
| + SPEED_FEATURES *const sf = &cpi->sf;
|
| MACROBLOCKD *const xd = &x->e_mbd;
|
| MB_MODE_INFO *const mbmi = &xd->mi[0].src_mi->mbmi;
|
| const struct segmentation *const seg = &cm->seg;
|
| - struct macroblockd_plane *const pd = xd->plane;
|
| PREDICTION_MODE this_mode;
|
| MV_REFERENCE_FRAME ref_frame, second_ref_frame;
|
| unsigned char segment_id = mbmi->segment_id;
|
| @@ -2584,20 +2829,20 @@
|
| int64_t dist_uv[TX_SIZES];
|
| int skip_uv[TX_SIZES];
|
| PREDICTION_MODE mode_uv[TX_SIZES];
|
| - const int intra_cost_penalty =
|
| - 20 * vp9_dc_quant(cm->base_qindex, cm->y_dc_delta_q, cm->bit_depth);
|
| + const int intra_cost_penalty = vp9_get_intra_cost_penalty(
|
| + cm->base_qindex, cm->y_dc_delta_q, cm->bit_depth);
|
| int best_skip2 = 0;
|
| uint8_t ref_frame_skip_mask[2] = { 0 };
|
| uint16_t mode_skip_mask[MAX_REF_FRAMES] = { 0 };
|
| - int mode_skip_start = cpi->sf.mode_skip_start + 1;
|
| + int mode_skip_start = sf->mode_skip_start + 1;
|
| const int *const rd_threshes = rd_opt->threshes[segment_id][bsize];
|
| const int *const rd_thresh_freq_fact = rd_opt->thresh_freq_fact[bsize];
|
| - int mode_threshold[MAX_MODES];
|
| + int64_t mode_threshold[MAX_MODES];
|
| int *mode_map = rd_opt->mode_map[bsize];
|
| - const int mode_search_skip_flags = cpi->sf.mode_search_skip_flags;
|
| + const int mode_search_skip_flags = sf->mode_search_skip_flags;
|
| vp9_zero(best_mbmode);
|
|
|
| - x->skip_encode = cpi->sf.skip_encode_frame && x->q_index < QIDX_SKIP_THRESH;
|
| + x->skip_encode = sf->skip_encode_frame && x->q_index < QIDX_SKIP_THRESH;
|
|
|
| estimate_ref_frame_costs(cm, xd, segment_id, ref_costs_single, ref_costs_comp,
|
| &comp_mode_p);
|
| @@ -2619,7 +2864,7 @@
|
| }
|
| }
|
|
|
| - *returnrate = INT_MAX;
|
| + rd_cost->rate = INT_MAX;
|
|
|
| for (ref_frame = LAST_FRAME; ref_frame <= ALTREF_FRAME; ++ref_frame) {
|
| x->pred_mv_sad[ref_frame] = INT_MAX;
|
| @@ -2638,7 +2883,7 @@
|
| // are masked out.
|
| ref_frame_skip_mask[0] |= (1 << ref_frame);
|
| ref_frame_skip_mask[1] |= SECOND_REF_FRAME_MASK;
|
| - } else if (cpi->sf.reference_masking) {
|
| + } else if (sf->reference_masking) {
|
| for (i = LAST_FRAME; i <= ALTREF_FRAME; ++i) {
|
| // Skip fixed mv modes for poor references
|
| if ((x->pred_mv_sad[ref_frame] >> 2) > x->pred_mv_sad[i]) {
|
| @@ -2676,7 +2921,7 @@
|
| }
|
|
|
| if (cpi->rc.is_src_frame_alt_ref) {
|
| - if (cpi->sf.alt_ref_search_fp) {
|
| + if (sf->alt_ref_search_fp) {
|
| mode_skip_mask[ALTREF_FRAME] = 0;
|
| ref_frame_skip_mask[0] = ~(1 << ALTREF_FRAME);
|
| ref_frame_skip_mask[1] = SECOND_REF_FRAME_MASK;
|
| @@ -2683,18 +2928,30 @@
|
| }
|
| }
|
|
|
| - if (bsize > cpi->sf.max_intra_bsize) {
|
| + if (sf->alt_ref_search_fp)
|
| + if (!cm->show_frame && x->pred_mv_sad[GOLDEN_FRAME] < INT_MAX)
|
| + if (x->pred_mv_sad[ALTREF_FRAME] > (x->pred_mv_sad[GOLDEN_FRAME] << 1))
|
| + mode_skip_mask[ALTREF_FRAME] |= INTER_ALL;
|
| +
|
| + if (sf->adaptive_mode_search) {
|
| + if (cm->show_frame && !cpi->rc.is_src_frame_alt_ref &&
|
| + cpi->rc.frames_since_golden >= 3)
|
| + if (x->pred_mv_sad[GOLDEN_FRAME] > (x->pred_mv_sad[LAST_FRAME] << 1))
|
| + mode_skip_mask[GOLDEN_FRAME] |= INTER_ALL;
|
| + }
|
| +
|
| + if (bsize > sf->max_intra_bsize) {
|
| ref_frame_skip_mask[0] |= (1 << INTRA_FRAME);
|
| ref_frame_skip_mask[1] |= (1 << INTRA_FRAME);
|
| }
|
|
|
| mode_skip_mask[INTRA_FRAME] |=
|
| - ~(cpi->sf.intra_y_mode_mask[max_txsize_lookup[bsize]]);
|
| + ~(sf->intra_y_mode_mask[max_txsize_lookup[bsize]]);
|
|
|
| for (i = 0; i < MAX_MODES; ++i)
|
| mode_threshold[i] = ((int64_t)rd_threshes[i] * rd_thresh_freq_fact[i]) >> 5;
|
|
|
| - midx = cpi->sf.schedule_mode_search ? mode_skip_start : 0;
|
| + midx = sf->schedule_mode_search ? mode_skip_start : 0;
|
| while (midx > 4) {
|
| uint8_t end_pos = 0;
|
| for (i = 5; i < midx; ++i) {
|
| @@ -2758,18 +3015,18 @@
|
| continue;
|
|
|
| // Test best rd so far against threshold for trying this mode.
|
| - if (best_mode_skippable && cpi->sf.schedule_mode_search)
|
| + if (best_mode_skippable && sf->schedule_mode_search)
|
| mode_threshold[mode_index] <<= 1;
|
|
|
| if (best_rd < mode_threshold[mode_index])
|
| continue;
|
|
|
| - if (cpi->sf.motion_field_mode_search) {
|
| + if (sf->motion_field_mode_search) {
|
| const int mi_width = MIN(num_8x8_blocks_wide_lookup[bsize],
|
| tile->mi_col_end - mi_col);
|
| const int mi_height = MIN(num_8x8_blocks_high_lookup[bsize],
|
| tile->mi_row_end - mi_row);
|
| - const int bsl = mi_width_log2(bsize);
|
| + const int bsl = mi_width_log2_lookup[bsize];
|
| int cb_partition_search_ctrl = (((mi_row + mi_col) >> bsl)
|
| + get_chessboard_index(cm->current_video_frame)) & 0x1;
|
| MB_MODE_INFO *ref_mbmi;
|
| @@ -2838,7 +3095,7 @@
|
| }
|
|
|
| if (ref_frame == INTRA_FRAME) {
|
| - if (cpi->sf.adaptive_mode_search)
|
| + if (sf->adaptive_mode_search)
|
| if ((x->source_variance << num_pels_log2_lookup[bsize]) > best_pred_sse)
|
| continue;
|
|
|
| @@ -2895,14 +3152,15 @@
|
|
|
| if (ref_frame == INTRA_FRAME) {
|
| TX_SIZE uv_tx;
|
| + struct macroblockd_plane *const pd = &xd->plane[1];
|
| + vpx_memset(x->skip_txfm, 0, sizeof(x->skip_txfm));
|
| super_block_yrd(cpi, x, &rate_y, &distortion_y, &skippable,
|
| NULL, bsize, tx_cache, best_rd);
|
| -
|
| if (rate_y == INT_MAX)
|
| continue;
|
|
|
| - uv_tx = get_uv_tx_size_impl(mbmi->tx_size, bsize, pd[1].subsampling_x,
|
| - pd[1].subsampling_y);
|
| + uv_tx = get_uv_tx_size_impl(mbmi->tx_size, bsize, pd->subsampling_x,
|
| + pd->subsampling_y);
|
| if (rate_uv_intra[uv_tx] == INT_MAX) {
|
| choose_intra_uv_mode(cpi, ctx, bsize, uv_tx,
|
| &rate_uv_intra[uv_tx], &rate_uv_tokenonly[uv_tx],
|
| @@ -2922,8 +3180,7 @@
|
| this_rd = handle_inter_mode(cpi, x, bsize,
|
| tx_cache,
|
| &rate2, &distortion2, &skippable,
|
| - &rate_y, &distortion_y,
|
| - &rate_uv, &distortion_uv,
|
| + &rate_y, &rate_uv,
|
| &disable_skip, frame_mv,
|
| mi_row, mi_col,
|
| single_newmv, single_inter_filter,
|
| @@ -3004,8 +3261,9 @@
|
| best_pred_sse = x->pred_sse[ref_frame];
|
| }
|
|
|
| - *returnrate = rate2;
|
| - *returndistortion = distortion2;
|
| + rd_cost->rate = rate2;
|
| + rd_cost->dist = distortion2;
|
| + rd_cost->rdcost = this_rd;
|
| best_rd = this_rd;
|
| best_mbmode = *mbmi;
|
| best_skip2 = this_skip2;
|
| @@ -3020,9 +3278,14 @@
|
| // based on qp, activity mask and history
|
| if ((mode_search_skip_flags & FLAG_EARLY_TERMINATE) &&
|
| (mode_index > MIN_EARLY_TERM_INDEX)) {
|
| - const int qstep = xd->plane[0].dequant[1];
|
| + int qstep = xd->plane[0].dequant[1];
|
| // TODO(debargha): Enhance this by specializing for each mode_index
|
| int scale = 4;
|
| +#if CONFIG_VP9_HIGHBITDEPTH
|
| + if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
|
| + qstep >>= (xd->bd - 8);
|
| + }
|
| +#endif // CONFIG_VP9_HIGHBITDEPTH
|
| if (x->source_variance < UINT_MAX) {
|
| const int var_adjust = (x->source_variance < 16);
|
| scale -= var_adjust;
|
| @@ -3130,11 +3393,14 @@
|
| best_mbmode.mode = ZEROMV;
|
| }
|
|
|
| - if (best_mode_index < 0 || best_rd >= best_rd_so_far)
|
| - return INT64_MAX;
|
| + if (best_mode_index < 0 || best_rd >= best_rd_so_far) {
|
| + rd_cost->rate = INT_MAX;
|
| + rd_cost->rdcost = INT64_MAX;
|
| + return;
|
| + }
|
|
|
| // If we used an estimate for the uv intra rd in the loop above...
|
| - if (cpi->sf.use_uv_intra_rd_estimate) {
|
| + if (sf->use_uv_intra_rd_estimate) {
|
| // Do Intra UV best rd mode selection if best mode choice above was intra.
|
| if (best_mbmode.ref_frame[0] == INTRA_FRAME) {
|
| TX_SIZE uv_tx_size;
|
| @@ -3191,18 +3457,33 @@
|
| // updating code causes PSNR loss. Need to figure out the confliction.
|
| x->skip |= best_mode_skippable;
|
|
|
| + if (!x->skip && !x->select_tx_size) {
|
| + int has_high_freq_coeff = 0;
|
| + int plane;
|
| + int max_plane = is_inter_block(&xd->mi[0].src_mi->mbmi)
|
| + ? MAX_MB_PLANE : 1;
|
| + for (plane = 0; plane < max_plane; ++plane) {
|
| + x->plane[plane].eobs = ctx->eobs_pbuf[plane][1];
|
| + has_high_freq_coeff |= vp9_has_high_freq_in_plane(x, bsize, plane);
|
| + }
|
| +
|
| + for (plane = max_plane; plane < MAX_MB_PLANE; ++plane) {
|
| + x->plane[plane].eobs = ctx->eobs_pbuf[plane][2];
|
| + has_high_freq_coeff |= vp9_has_high_freq_in_plane(x, bsize, plane);
|
| + }
|
| +
|
| + best_mode_skippable |= !has_high_freq_coeff;
|
| + }
|
| +
|
| store_coding_context(x, ctx, best_mode_index, best_pred_diff,
|
| best_tx_diff, best_filter_diff, best_mode_skippable);
|
| -
|
| - return best_rd;
|
| }
|
|
|
| -int64_t vp9_rd_pick_inter_mode_sb_seg_skip(VP9_COMP *cpi, MACROBLOCK *x,
|
| - int *returnrate,
|
| - int64_t *returndistortion,
|
| - BLOCK_SIZE bsize,
|
| - PICK_MODE_CONTEXT *ctx,
|
| - int64_t best_rd_so_far) {
|
| +void vp9_rd_pick_inter_mode_sb_seg_skip(VP9_COMP *cpi, MACROBLOCK *x,
|
| + RD_COST *rd_cost,
|
| + BLOCK_SIZE bsize,
|
| + PICK_MODE_CONTEXT *ctx,
|
| + int64_t best_rd_so_far) {
|
| VP9_COMMON *const cm = &cpi->common;
|
| RD_OPT *const rd_opt = &cpi->rd;
|
| MACROBLOCKD *const xd = &x->e_mbd;
|
| @@ -3230,7 +3511,7 @@
|
| for (i = LAST_FRAME; i < MAX_REF_FRAMES; ++i)
|
| x->pred_mv_sad[i] = INT_MAX;
|
|
|
| - *returnrate = INT_MAX;
|
| + rd_cost->rate = INT_MAX;
|
|
|
| assert(vp9_segfeature_active(&cm->seg, segment_id, SEG_LVL_SKIP));
|
|
|
| @@ -3279,11 +3560,15 @@
|
| rate2 += ref_costs_single[LAST_FRAME];
|
| this_rd = RDCOST(x->rdmult, x->rddiv, rate2, distortion2);
|
|
|
| - *returnrate = rate2;
|
| - *returndistortion = distortion2;
|
| + rd_cost->rate = rate2;
|
| + rd_cost->dist = distortion2;
|
| + rd_cost->rdcost = this_rd;
|
|
|
| - if (this_rd >= best_rd_so_far)
|
| - return INT64_MAX;
|
| + if (this_rd >= best_rd_so_far) {
|
| + rd_cost->rate = INT_MAX;
|
| + rd_cost->rdcost = INT64_MAX;
|
| + return;
|
| + }
|
|
|
| assert((cm->interp_filter == SWITCHABLE) ||
|
| (cm->interp_filter == mbmi->interp_filter));
|
| @@ -3298,20 +3583,18 @@
|
| swap_block_ptr(x, ctx, 1, 0, 0, MAX_MB_PLANE);
|
| store_coding_context(x, ctx, THR_ZEROMV,
|
| best_pred_diff, best_tx_diff, best_filter_diff, 0);
|
| -
|
| - return this_rd;
|
| }
|
|
|
| -int64_t vp9_rd_pick_inter_mode_sub8x8(VP9_COMP *cpi, MACROBLOCK *x,
|
| - const TileInfo *const tile,
|
| - int mi_row, int mi_col,
|
| - int *returnrate,
|
| - int64_t *returndistortion,
|
| - BLOCK_SIZE bsize,
|
| - PICK_MODE_CONTEXT *ctx,
|
| - int64_t best_rd_so_far) {
|
| +void vp9_rd_pick_inter_mode_sub8x8(VP9_COMP *cpi, MACROBLOCK *x,
|
| + const TileInfo *const tile,
|
| + int mi_row, int mi_col,
|
| + RD_COST *rd_cost,
|
| + BLOCK_SIZE bsize,
|
| + PICK_MODE_CONTEXT *ctx,
|
| + int64_t best_rd_so_far) {
|
| VP9_COMMON *const cm = &cpi->common;
|
| RD_OPT *const rd_opt = &cpi->rd;
|
| + SPEED_FEATURES *const sf = &cpi->sf;
|
| MACROBLOCKD *const xd = &x->e_mbd;
|
| MB_MODE_INFO *const mbmi = &xd->mi[0].src_mi->mbmi;
|
| const struct segmentation *const seg = &cm->seg;
|
| @@ -3338,14 +3621,14 @@
|
| int64_t dist_uv;
|
| int skip_uv;
|
| PREDICTION_MODE mode_uv = DC_PRED;
|
| - const int intra_cost_penalty =
|
| - 20 * vp9_dc_quant(cm->base_qindex, cm->y_dc_delta_q, cm->bit_depth);
|
| + const int intra_cost_penalty = vp9_get_intra_cost_penalty(
|
| + cm->base_qindex, cm->y_dc_delta_q, cm->bit_depth);
|
| int_mv seg_mvs[4][MAX_REF_FRAMES];
|
| b_mode_info best_bmodes[4];
|
| int best_skip2 = 0;
|
| int ref_frame_skip_mask[2] = { 0 };
|
|
|
| - x->skip_encode = cpi->sf.skip_encode_frame && x->q_index < QIDX_SKIP_THRESH;
|
| + x->skip_encode = sf->skip_encode_frame && x->q_index < QIDX_SKIP_THRESH;
|
| vpx_memset(x->zcoeff_blk[TX_4X4], 0, 4);
|
| vp9_zero(best_mbmode);
|
|
|
| @@ -3364,7 +3647,7 @@
|
| best_filter_rd[i] = INT64_MAX;
|
| rate_uv_intra = INT_MAX;
|
|
|
| - *returnrate = INT_MAX;
|
| + rd_cost->rate = INT_MAX;
|
|
|
| for (ref_frame = LAST_FRAME; ref_frame <= ALTREF_FRAME; ref_frame++) {
|
| if (cpi->ref_frame_flags & flag_list[ref_frame]) {
|
| @@ -3398,7 +3681,7 @@
|
|
|
| // Look at the reference frame of the best mode so far and set the
|
| // skip mask to look at a subset of the remaining modes.
|
| - if (ref_index > 2 && cpi->sf.mode_skip_start < MAX_MODES) {
|
| + if (ref_index > 2 && sf->mode_skip_start < MAX_MODES) {
|
| if (ref_index == 3) {
|
| switch (best_mbmode.ref_frame[0]) {
|
| case INTRA_FRAME:
|
| @@ -3443,7 +3726,7 @@
|
| if (vp9_segfeature_active(seg, segment_id, SEG_LVL_REF_FRAME))
|
| continue;
|
|
|
| - if ((cpi->sf.mode_search_skip_flags & FLAG_SKIP_COMP_BESTINTRA) &&
|
| + if ((sf->mode_search_skip_flags & FLAG_SKIP_COMP_BESTINTRA) &&
|
| best_mbmode.ref_frame[0] == INTRA_FRAME)
|
| continue;
|
| }
|
| @@ -3547,12 +3830,12 @@
|
|
|
| if (cm->interp_filter != BILINEAR) {
|
| tmp_best_filter = EIGHTTAP;
|
| - if (x->source_variance < cpi->sf.disable_filter_search_var_thresh) {
|
| + if (x->source_variance < sf->disable_filter_search_var_thresh) {
|
| tmp_best_filter = EIGHTTAP;
|
| - } else if (cpi->sf.adaptive_pred_interp_filter == 1 &&
|
| + } else if (sf->adaptive_pred_interp_filter == 1 &&
|
| ctx->pred_interp_filter < SWITCHABLE) {
|
| tmp_best_filter = ctx->pred_interp_filter;
|
| - } else if (cpi->sf.adaptive_pred_interp_filter == 2) {
|
| + } else if (sf->adaptive_pred_interp_filter == 2) {
|
| tmp_best_filter = ctx->pred_interp_filter < SWITCHABLE ?
|
| ctx->pred_interp_filter : 0;
|
| } else {
|
| @@ -3605,7 +3888,7 @@
|
| }
|
| pred_exists = 1;
|
| if (switchable_filter_index == 0 &&
|
| - cpi->sf.use_rd_breakout &&
|
| + sf->use_rd_breakout &&
|
| best_rd < INT64_MAX) {
|
| if (tmp_best_rdu / 2 > best_rd) {
|
| // skip searching the other filters if the first is
|
| @@ -3668,10 +3951,11 @@
|
| // then dont bother looking at UV
|
| vp9_build_inter_predictors_sbuv(&x->e_mbd, mi_row, mi_col,
|
| BLOCK_8X8);
|
| - super_block_uvrd(cpi, x, &rate_uv, &distortion_uv, &uv_skippable,
|
| - &uv_sse, BLOCK_8X8, tmp_best_rdu);
|
| - if (rate_uv == INT_MAX)
|
| + vpx_memset(x->skip_txfm, 0, sizeof(x->skip_txfm));
|
| + if (!super_block_uvrd(cpi, x, &rate_uv, &distortion_uv, &uv_skippable,
|
| + &uv_sse, BLOCK_8X8, tmp_best_rdu))
|
| continue;
|
| +
|
| rate2 += rate_uv;
|
| distortion2 += distortion_uv;
|
| skippable = skippable && uv_skippable;
|
| @@ -3738,8 +4022,9 @@
|
| max_plane = 1;
|
| }
|
|
|
| - *returnrate = rate2;
|
| - *returndistortion = distortion2;
|
| + rd_cost->rate = rate2;
|
| + rd_cost->dist = distortion2;
|
| + rd_cost->rdcost = this_rd;
|
| best_rd = this_rd;
|
| best_yrd = best_rd -
|
| RDCOST(x->rdmult, x->rddiv, rate_uv, distortion_uv);
|
| @@ -3755,11 +4040,16 @@
|
|
|
| // TODO(debargha): enhance this test with a better distortion prediction
|
| // based on qp, activity mask and history
|
| - if ((cpi->sf.mode_search_skip_flags & FLAG_EARLY_TERMINATE) &&
|
| + if ((sf->mode_search_skip_flags & FLAG_EARLY_TERMINATE) &&
|
| (ref_index > MIN_EARLY_TERM_INDEX)) {
|
| - const int qstep = xd->plane[0].dequant[1];
|
| + int qstep = xd->plane[0].dequant[1];
|
| // TODO(debargha): Enhance this by specializing for each mode_index
|
| int scale = 4;
|
| +#if CONFIG_VP9_HIGHBITDEPTH
|
| + if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
|
| + qstep >>= (xd->bd - 8);
|
| + }
|
| +#endif // CONFIG_VP9_HIGHBITDEPTH
|
| if (x->source_variance < UINT_MAX) {
|
| const int var_adjust = (x->source_variance < 16);
|
| scale -= var_adjust;
|
| @@ -3826,11 +4116,14 @@
|
| break;
|
| }
|
|
|
| - if (best_rd >= best_rd_so_far)
|
| - return INT64_MAX;
|
| + if (best_rd >= best_rd_so_far) {
|
| + rd_cost->rate = INT_MAX;
|
| + rd_cost->rdcost = INT64_MAX;
|
| + return;
|
| + }
|
|
|
| // If we used an estimate for the uv intra rd in the loop above...
|
| - if (cpi->sf.use_uv_intra_rd_estimate) {
|
| + if (sf->use_uv_intra_rd_estimate) {
|
| // Do Intra UV best rd mode selection if best mode choice above was intra.
|
| if (best_mbmode.ref_frame[0] == INTRA_FRAME) {
|
| *mbmi = best_mbmode;
|
| @@ -3843,9 +4136,10 @@
|
| }
|
|
|
| if (best_rd == INT64_MAX) {
|
| - *returnrate = INT_MAX;
|
| - *returndistortion = INT64_MAX;
|
| - return best_rd;
|
| + rd_cost->rate = INT_MAX;
|
| + rd_cost->dist = INT64_MAX;
|
| + rd_cost->rdcost = INT64_MAX;
|
| + return;
|
| }
|
|
|
| assert((cm->interp_filter == SWITCHABLE) ||
|
| @@ -3891,7 +4185,5 @@
|
|
|
| store_coding_context(x, ctx, best_ref_index,
|
| best_pred_diff, best_tx_diff, best_filter_diff, 0);
|
| -
|
| - return best_rd;
|
| }
|
|
|
|
|