Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(95)

Unified Diff: source/libvpx/vp9/encoder/vp9_rdopt.c

Issue 668403002: libvpx: Pull from upstream (Closed) Base URL: svn://svn.chromium.org/chrome/trunk/deps/third_party/libvpx/
Patch Set: Created 6 years, 2 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View side-by-side diff with in-line comments
Download patch
« no previous file with comments | « source/libvpx/vp9/encoder/vp9_rdopt.h ('k') | source/libvpx/vp9/encoder/vp9_resize.h » ('j') | no next file with comments »
Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
Index: source/libvpx/vp9/encoder/vp9_rdopt.c
===================================================================
--- source/libvpx/vp9/encoder/vp9_rdopt.c (revision 292608)
+++ source/libvpx/vp9/encoder/vp9_rdopt.c (working copy)
@@ -131,7 +131,7 @@
static int raster_block_offset(BLOCK_SIZE plane_bsize,
int raster_block, int stride) {
- const int bw = b_width_log2(plane_bsize);
+ const int bw = b_width_log2_lookup[plane_bsize];
const int y = 4 * (raster_block >> bw);
const int x = 4 * (raster_block & ((1 << bw) - 1));
return y * stride + x;
@@ -169,7 +169,8 @@
static void model_rd_for_sb(VP9_COMP *cpi, BLOCK_SIZE bsize,
MACROBLOCK *x, MACROBLOCKD *xd,
- int *out_rate_sum, int64_t *out_dist_sum) {
+ int *out_rate_sum, int64_t *out_dist_sum,
+ int *skip_txfm_sb, int64_t *skip_sse_sb) {
// Note our transform coeffs are 8 times an orthogonal transform.
// Hence quantizer step is also 8 times. To get effective quantizer
// we need to divide by 8 before sending to modeling function.
@@ -180,7 +181,9 @@
unsigned int sse;
unsigned int var = 0;
unsigned int sum_sse = 0;
- const int shift = 8;
+ int64_t total_sse = 0;
+ int skip_flag = 1;
+ const int shift = 6;
int rate;
int64_t dist;
@@ -192,6 +195,12 @@
const BLOCK_SIZE bs = get_plane_block_size(bsize, pd);
const TX_SIZE max_tx_size = max_txsize_lookup[bs];
const BLOCK_SIZE unit_size = txsize_to_bsize[max_tx_size];
+ const int64_t dc_thr = p->quant_thred[0] >> shift;
+ const int64_t ac_thr = p->quant_thred[1] >> shift;
+ // The low thresholds are used to measure if the prediction errors are
+ // low enough so that we can skip the mode search.
+ const int64_t low_dc_thr = MIN(50, dc_thr >> 2);
+ const int64_t low_ac_thr = MIN(80, ac_thr >> 2);
int bw = 1 << (b_width_log2_lookup[bs] - b_width_log2_lookup[unit_size]);
int bh = 1 << (b_height_log2_lookup[bs] - b_width_log2_lookup[unit_size]);
int idx, idy;
@@ -205,6 +214,7 @@
uint8_t *src = p->src.buf + (idy * p->src.stride << lh) + (idx << lw);
uint8_t *dst = pd->dst.buf + (idy * pd->dst.stride << lh) + (idx << lh);
int block_idx = (idy << 1) + idx;
+ int low_err_skip = 0;
var = cpi->fn_ptr[unit_size].vf(src, p->src.stride,
dst, pd->dst.stride, &sse);
@@ -211,26 +221,42 @@
x->bsse[(i << 2) + block_idx] = sse;
sum_sse += sse;
+ x->skip_txfm[(i << 2) + block_idx] = 0;
if (!x->select_tx_size) {
- if (x->bsse[(i << 2) + block_idx] < p->quant_thred[0] >> shift)
- x->skip_txfm[(i << 2) + block_idx] = 1;
- else if (var < p->quant_thred[1] >> shift)
+ // Check if all ac coefficients can be quantized to zero.
+ if (var < ac_thr || var == 0) {
x->skip_txfm[(i << 2) + block_idx] = 2;
- else
- x->skip_txfm[(i << 2) + block_idx] = 0;
+
+ // Check if dc coefficient can be quantized to zero.
+ if (sse - var < dc_thr || sse == var) {
+ x->skip_txfm[(i << 2) + block_idx] = 1;
+
+ if (!sse || (var < low_ac_thr && sse - var < low_dc_thr))
+ low_err_skip = 1;
+ }
+ }
}
+ if (skip_flag && !low_err_skip)
+ skip_flag = 0;
+
if (i == 0)
x->pred_sse[ref] += sse;
}
}
+ total_sse += sum_sse;
+
// Fast approximate the modelling function.
if (cpi->oxcf.speed > 4) {
int64_t rate;
- int64_t dist;
- int64_t square_error = sse;
+ const int64_t square_error = sum_sse;
int quantizer = (pd->dequant[1] >> 3);
+#if CONFIG_VP9_HIGHBITDEPTH
+ if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
+ quantizer >>= (xd->bd - 8);
+ }
+#endif // CONFIG_VP9_HIGHBITDEPTH
if (quantizer < 120)
rate = (square_error * (280 - quantizer)) >> 8;
@@ -240,13 +266,26 @@
rate_sum += rate;
dist_sum += dist;
} else {
+#if CONFIG_VP9_HIGHBITDEPTH
+ if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
+ vp9_model_rd_from_var_lapndz(sum_sse, 1 << num_pels_log2_lookup[bs],
+ pd->dequant[1] >> (xd->bd - 5),
+ &rate, &dist);
+ } else {
+ vp9_model_rd_from_var_lapndz(sum_sse, 1 << num_pels_log2_lookup[bs],
+ pd->dequant[1] >> 3, &rate, &dist);
+ }
+#else
vp9_model_rd_from_var_lapndz(sum_sse, 1 << num_pels_log2_lookup[bs],
pd->dequant[1] >> 3, &rate, &dist);
+#endif // CONFIG_VP9_HIGHBITDEPTH
rate_sum += rate;
dist_sum += dist;
}
}
+ *skip_txfm_sb = skip_flag;
+ *skip_sse_sb = total_sse << 4;
*out_rate_sum = (int)rate_sum;
*out_dist_sum = dist_sum << 4;
}
@@ -266,6 +305,31 @@
return error;
}
+
+#if CONFIG_VP9_HIGHBITDEPTH
+int64_t vp9_highbd_block_error_c(const tran_low_t *coeff,
+ const tran_low_t *dqcoeff,
+ intptr_t block_size,
+ int64_t *ssz, int bd) {
+ int i;
+ int64_t error = 0, sqcoeff = 0;
+ int shift = 2 * (bd - 8);
+ int rounding = shift > 0 ? 1 << (shift - 1) : 0;
+
+ for (i = 0; i < block_size; i++) {
+ const int64_t diff = coeff[i] - dqcoeff[i];
+ error += diff * diff;
+ sqcoeff += (int64_t)coeff[i] * (int64_t)coeff[i];
+ }
+ assert(error >= 0 && sqcoeff >= 0);
+ error = (error + rounding) >> shift;
+ sqcoeff = (sqcoeff + rounding) >> shift;
+
+ *ssz = sqcoeff;
+ return error;
+}
+#endif // CONFIG_VP9_HIGHBITDEPTH
+
/* The trailing '0' is a terminator which is used inside cost_coeffs() to
* decide whether to include cost of a trailing EOB node or not (i.e. we
* can skip this if the last coefficient in this transform block, e.g. the
@@ -351,8 +415,14 @@
return cost;
}
+
+#if CONFIG_VP9_HIGHBITDEPTH
static void dist_block(int plane, int block, TX_SIZE tx_size,
+ struct rdcost_block_args* args, int bd) {
+#else
+static void dist_block(int plane, int block, TX_SIZE tx_size,
struct rdcost_block_args* args) {
+#endif // CONFIG_VP9_HIGHBITDEPTH
const int ss_txfrm_size = tx_size << 1;
MACROBLOCK* const x = args->x;
MACROBLOCKD* const xd = &x->e_mbd;
@@ -362,8 +432,13 @@
int shift = tx_size == TX_32X32 ? 0 : 2;
tran_low_t *const coeff = BLOCK_OFFSET(p->coeff, block);
tran_low_t *const dqcoeff = BLOCK_OFFSET(pd->dqcoeff, block);
+#if CONFIG_VP9_HIGHBITDEPTH
+ args->dist = vp9_highbd_block_error(coeff, dqcoeff, 16 << ss_txfrm_size,
+ &this_sse, bd) >> shift;
+#else
args->dist = vp9_block_error(coeff, dqcoeff, 16 << ss_txfrm_size,
&this_sse) >> shift;
+#endif // CONFIG_VP9_HIGHBITDEPTH
args->sse = this_sse >> shift;
if (x->skip_encode && !is_inter_block(&xd->mi[0].src_mi->mbmi)) {
@@ -370,6 +445,11 @@
// TODO(jingning): tune the model to better capture the distortion.
int64_t p = (pd->dequant[1] * pd->dequant[1] *
(1 << ss_txfrm_size)) >> (shift + 2);
+#if CONFIG_VP9_HIGHBITDEPTH
+ if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
+ p >>= ((xd->bd - 8) * 2);
+ }
+#endif // CONFIG_VP9_HIGHBITDEPTH
args->dist += (p >> 4);
args->sse += p;
}
@@ -399,12 +479,28 @@
if (!is_inter_block(mbmi)) {
vp9_encode_block_intra(x, plane, block, plane_bsize, tx_size, &mbmi->skip);
+#if CONFIG_VP9_HIGHBITDEPTH
+ if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
+ dist_block(plane, block, tx_size, args, xd->bd);
+ } else {
+ dist_block(plane, block, tx_size, args, 8);
+ }
+#else
dist_block(plane, block, tx_size, args);
+#endif // CONFIG_VP9_HIGHBITDEPTH
} else if (max_txsize_lookup[plane_bsize] == tx_size) {
if (x->skip_txfm[(plane << 2) + (block >> (tx_size << 1))] == 0) {
// full forward transform and quantization
vp9_xform_quant(x, plane, block, plane_bsize, tx_size);
+#if CONFIG_VP9_HIGHBITDEPTH
+ if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
+ dist_block(plane, block, tx_size, args, xd->bd);
+ } else {
+ dist_block(plane, block, tx_size, args, 8);
+ }
+#else
dist_block(plane, block, tx_size, args);
+#endif // CONFIG_VP9_HIGHBITDEPTH
} else if (x->skip_txfm[(plane << 2) + (block >> (tx_size << 1))] == 2) {
// compute DC coefficient
tran_low_t *const coeff = BLOCK_OFFSET(x->plane[plane].coeff, block);
@@ -412,9 +508,17 @@
vp9_xform_quant_dc(x, plane, block, plane_bsize, tx_size);
args->sse = x->bsse[(plane << 2) + (block >> (tx_size << 1))] << 4;
args->dist = args->sse;
- if (!x->plane[plane].eobs[block])
- args->dist = args->sse - ((coeff[0] * coeff[0] -
- (coeff[0] - dqcoeff[0]) * (coeff[0] - dqcoeff[0])) >> 2);
+ if (x->plane[plane].eobs[block]) {
+ int64_t dc_correct = coeff[0] * coeff[0] -
+ (coeff[0] - dqcoeff[0]) * (coeff[0] - dqcoeff[0]);
+#if CONFIG_VP9_HIGHBITDEPTH
+ dc_correct >>= ((xd->bd - 8) * 2);
+#endif
+ if (tx_size != TX_32X32)
+ dc_correct >>= 2;
+
+ args->dist = MAX(0, args->sse - dc_correct);
+ }
} else {
// skip forward transform
x->plane[plane].eobs[block] = 0;
@@ -424,7 +528,15 @@
} else {
// full forward transform and quantization
vp9_xform_quant(x, plane, block, plane_bsize, tx_size);
+#if CONFIG_VP9_HIGHBITDEPTH
+ if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
+ dist_block(plane, block, tx_size, args, xd->bd);
+ } else {
+ dist_block(plane, block, tx_size, args, 8);
+ }
+#else
dist_block(plane, block, tx_size, args);
+#endif // CONFIG_VP9_HIGHBITDEPTH
}
rate_block(plane, block, plane_bsize, tx_size, args);
@@ -659,6 +771,9 @@
const int num_4x4_blocks_high = num_4x4_blocks_high_lookup[bsize];
int idx, idy;
uint8_t best_dst[8 * 8];
+#if CONFIG_VP9_HIGHBITDEPTH
+ uint16_t best_dst16[8 * 8];
+#endif
assert(ib < 4);
@@ -666,6 +781,108 @@
vpx_memcpy(tl, l, sizeof(tl));
xd->mi[0].src_mi->mbmi.tx_size = TX_4X4;
+#if CONFIG_VP9_HIGHBITDEPTH
+ if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
+ for (mode = DC_PRED; mode <= TM_PRED; ++mode) {
+ int64_t this_rd;
+ int ratey = 0;
+ int64_t distortion = 0;
+ int rate = bmode_costs[mode];
+
+ if (!(cpi->sf.intra_y_mode_mask[TX_4X4] & (1 << mode)))
+ continue;
+
+ // Only do the oblique modes if the best so far is
+ // one of the neighboring directional modes
+ if (cpi->sf.mode_search_skip_flags & FLAG_SKIP_INTRA_DIRMISMATCH) {
+ if (conditional_skipintra(mode, *best_mode))
+ continue;
+ }
+
+ vpx_memcpy(tempa, ta, sizeof(ta));
+ vpx_memcpy(templ, tl, sizeof(tl));
+
+ for (idy = 0; idy < num_4x4_blocks_high; ++idy) {
+ for (idx = 0; idx < num_4x4_blocks_wide; ++idx) {
+ const int block = ib + idy * 2 + idx;
+ const uint8_t *const src = &src_init[idx * 4 + idy * 4 * src_stride];
+ uint8_t *const dst = &dst_init[idx * 4 + idy * 4 * dst_stride];
+ int16_t *const src_diff = raster_block_offset_int16(BLOCK_8X8, block,
+ p->src_diff);
+ tran_low_t *const coeff = BLOCK_OFFSET(x->plane[0].coeff, block);
+ xd->mi[0].src_mi->bmi[block].as_mode = mode;
+ vp9_predict_intra_block(xd, block, 1,
+ TX_4X4, mode,
+ x->skip_encode ? src : dst,
+ x->skip_encode ? src_stride : dst_stride,
+ dst, dst_stride, idx, idy, 0);
+ vp9_highbd_subtract_block(4, 4, src_diff, 8, src, src_stride,
+ dst, dst_stride, xd->bd);
+ if (xd->lossless) {
+ const scan_order *so = &vp9_default_scan_orders[TX_4X4];
+ vp9_highbd_fwht4x4(src_diff, coeff, 8);
+ vp9_regular_quantize_b_4x4(x, 0, block, so->scan, so->iscan);
+ ratey += cost_coeffs(x, 0, block, tempa + idx, templ + idy, TX_4X4,
+ so->scan, so->neighbors,
+ cpi->sf.use_fast_coef_costing);
+ if (RDCOST(x->rdmult, x->rddiv, ratey, distortion) >= best_rd)
+ goto next_highbd;
+ vp9_highbd_iwht4x4_add(BLOCK_OFFSET(pd->dqcoeff, block),
+ dst, dst_stride,
+ p->eobs[block], xd->bd);
+ } else {
+ int64_t unused;
+ const TX_TYPE tx_type = get_tx_type_4x4(PLANE_TYPE_Y, xd, block);
+ const scan_order *so = &vp9_scan_orders[TX_4X4][tx_type];
+ vp9_highbd_fht4x4(src_diff, coeff, 8, tx_type);
+ vp9_regular_quantize_b_4x4(x, 0, block, so->scan, so->iscan);
+ ratey += cost_coeffs(x, 0, block, tempa + idx, templ + idy, TX_4X4,
+ so->scan, so->neighbors,
+ cpi->sf.use_fast_coef_costing);
+ distortion += vp9_highbd_block_error(
+ coeff, BLOCK_OFFSET(pd->dqcoeff, block),
+ 16, &unused, xd->bd) >> 2;
+ if (RDCOST(x->rdmult, x->rddiv, ratey, distortion) >= best_rd)
+ goto next_highbd;
+ vp9_highbd_iht4x4_add(tx_type, BLOCK_OFFSET(pd->dqcoeff, block),
+ dst, dst_stride, p->eobs[block], xd->bd);
+ }
+ }
+ }
+
+ rate += ratey;
+ this_rd = RDCOST(x->rdmult, x->rddiv, rate, distortion);
+
+ if (this_rd < best_rd) {
+ *bestrate = rate;
+ *bestratey = ratey;
+ *bestdistortion = distortion;
+ best_rd = this_rd;
+ *best_mode = mode;
+ vpx_memcpy(a, tempa, sizeof(tempa));
+ vpx_memcpy(l, templ, sizeof(templ));
+ for (idy = 0; idy < num_4x4_blocks_high * 4; ++idy) {
+ vpx_memcpy(best_dst16 + idy * 8,
+ CONVERT_TO_SHORTPTR(dst_init + idy * dst_stride),
+ num_4x4_blocks_wide * 4 * sizeof(uint16_t));
+ }
+ }
+ next_highbd:
+ {}
+ }
+ if (best_rd >= rd_thresh || x->skip_encode)
+ return best_rd;
+
+ for (idy = 0; idy < num_4x4_blocks_high * 4; ++idy) {
+ vpx_memcpy(CONVERT_TO_SHORTPTR(dst_init + idy * dst_stride),
+ best_dst16 + idy * 8,
+ num_4x4_blocks_wide * 4 * sizeof(uint16_t));
+ }
+
+ return best_rd;
+ }
+#endif // CONFIG_VP9_HIGHBITDEPTH
+
for (mode = DC_PRED; mode <= TM_PRED; ++mode) {
int64_t this_rd;
int ratey = 0;
@@ -827,6 +1044,7 @@
return RDCOST(mb->rdmult, mb->rddiv, cost, total_distortion);
}
+// This function is used only for intra_only frames
static int64_t rd_pick_intra_sby_mode(VP9_COMP *cpi, MACROBLOCK *x,
int *rate, int *rate_tokenonly,
int64_t *distortion, int *skippable,
@@ -841,24 +1059,21 @@
int64_t this_distortion, this_rd;
TX_SIZE best_tx = TX_4X4;
int i;
- int *bmode_costs = cpi->mbmode_cost;
+ int *bmode_costs;
+ const MODE_INFO *above_mi = xd->mi[-xd->mi_stride].src_mi;
+ const MODE_INFO *left_mi = xd->left_available ? xd->mi[-1].src_mi : NULL;
+ const PREDICTION_MODE A = vp9_above_block_mode(mic, above_mi, 0);
+ const PREDICTION_MODE L = vp9_left_block_mode(mic, left_mi, 0);
+ bmode_costs = cpi->y_mode_costs[A][L];
if (cpi->sf.tx_size_search_method == USE_FULL_RD)
for (i = 0; i < TX_MODES; i++)
tx_cache[i] = INT64_MAX;
+ vpx_memset(x->skip_txfm, 0, sizeof(x->skip_txfm));
/* Y Search for intra prediction mode */
for (mode = DC_PRED; mode <= TM_PRED; mode++) {
int64_t local_tx_cache[TX_MODES];
- MODE_INFO *above_mi = xd->mi[-xd->mi_stride].src_mi;
- MODE_INFO *left_mi = xd->left_available ? xd->mi[-1].src_mi : NULL;
-
- if (cpi->common.frame_type == KEY_FRAME) {
- const PREDICTION_MODE A = vp9_above_block_mode(mic, above_mi, 0);
- const PREDICTION_MODE L = vp9_left_block_mode(mic, left_mi, 0);
-
- bmode_costs = cpi->y_mode_costs[A][L];
- }
mic->mbmi.mode = mode;
super_block_yrd(cpi, x, &this_rate_tokenonly, &this_distortion,
@@ -897,10 +1112,12 @@
return best_rd;
}
-static void super_block_uvrd(const VP9_COMP *cpi, MACROBLOCK *x,
- int *rate, int64_t *distortion, int *skippable,
- int64_t *sse, BLOCK_SIZE bsize,
- int64_t ref_best_rd) {
+// Return value 0: early termination triggered, no valid rd cost available;
+// 1: rd cost values are valid.
+static int super_block_uvrd(const VP9_COMP *cpi, MACROBLOCK *x,
+ int *rate, int64_t *distortion, int *skippable,
+ int64_t *sse, BLOCK_SIZE bsize,
+ int64_t ref_best_rd) {
MACROBLOCKD *const xd = &x->e_mbd;
MB_MODE_INFO *const mbmi = &xd->mi[0].src_mi->mbmi;
const TX_SIZE uv_tx_size = get_uv_tx_size(mbmi, &xd->plane[1]);
@@ -907,11 +1124,12 @@
int plane;
int pnrate = 0, pnskip = 1;
int64_t pndist = 0, pnsse = 0;
+ int is_cost_valid = 1;
if (ref_best_rd < 0)
- goto term;
+ is_cost_valid = 0;
- if (is_inter_block(mbmi)) {
+ if (is_inter_block(mbmi) && is_cost_valid) {
int plane;
for (plane = 1; plane < MAX_MB_PLANE; ++plane)
vp9_subtract_plane(x, bsize, plane);
@@ -926,21 +1144,25 @@
txfm_rd_in_plane(x, &pnrate, &pndist, &pnskip, &pnsse,
ref_best_rd, plane, bsize, uv_tx_size,
cpi->sf.use_fast_coef_costing);
- if (pnrate == INT_MAX)
- goto term;
+ if (pnrate == INT_MAX) {
+ is_cost_valid = 0;
+ break;
+ }
*rate += pnrate;
*distortion += pndist;
*sse += pnsse;
*skippable &= pnskip;
}
- return;
- term:
- *rate = INT_MAX;
- *distortion = INT64_MAX;
- *sse = INT64_MAX;
- *skippable = 0;
- return;
+ if (!is_cost_valid) {
+ // reset cost value
+ *rate = INT_MAX;
+ *distortion = INT64_MAX;
+ *sse = INT64_MAX;
+ *skippable = 0;
+ }
+
+ return is_cost_valid;
}
static int64_t rd_pick_intra_sbuv_mode(VP9_COMP *cpi, MACROBLOCK *x,
@@ -955,6 +1177,7 @@
int this_rate_tokenonly, this_rate, s;
int64_t this_distortion, this_sse;
+ vpx_memset(x->skip_txfm, 0, sizeof(x->skip_txfm));
for (mode = DC_PRED; mode <= TM_PRED; ++mode) {
if (!(cpi->sf.intra_uv_mode_mask[max_tx_size] & (1 << mode)))
continue;
@@ -961,9 +1184,8 @@
xd->mi[0].src_mi->mbmi.uv_mode = mode;
- super_block_uvrd(cpi, x, &this_rate_tokenonly,
- &this_distortion, &s, &this_sse, bsize, best_rd);
- if (this_rate_tokenonly == INT_MAX)
+ if (!super_block_uvrd(cpi, x, &this_rate_tokenonly,
+ &this_distortion, &s, &this_sse, bsize, best_rd))
continue;
this_rate = this_rate_tokenonly +
cpi->intra_uv_mode_cost[cpi->common.frame_type][mode];
@@ -993,6 +1215,7 @@
int64_t unused;
x->e_mbd.mi[0].src_mi->mbmi.uv_mode = DC_PRED;
+ vpx_memset(x->skip_txfm, 0, sizeof(x->skip_txfm));
super_block_uvrd(cpi, x, rate_tokenonly, distortion,
skippable, &unused, bsize, INT64_MAX);
*rate = *rate_tokenonly + cpi->intra_uv_mode_cost[cm->frame_type][DC_PRED];
@@ -1121,6 +1344,16 @@
for (ref = 0; ref < 1 + is_compound; ++ref) {
const uint8_t *pre = &pd->pre[ref].buf[raster_block_offset(BLOCK_8X8, i,
pd->pre[ref].stride)];
+#if CONFIG_VP9_HIGHBITDEPTH
+ if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
+ vp9_highbd_build_inter_predictor(pre, pd->pre[ref].stride,
+ dst, pd->dst.stride,
+ &mi->bmi[i].as_mv[ref].as_mv,
+ &xd->block_refs[ref]->sf, width, height,
+ ref, kernel, MV_PRECISION_Q3,
+ mi_col * MI_SIZE + 4 * (i % 2),
+ mi_row * MI_SIZE + 4 * (i / 2), xd->bd);
+ } else {
vp9_build_inter_predictor(pre, pd->pre[ref].stride,
dst, pd->dst.stride,
&mi->bmi[i].as_mv[ref].as_mv,
@@ -1129,11 +1362,32 @@
mi_col * MI_SIZE + 4 * (i % 2),
mi_row * MI_SIZE + 4 * (i / 2));
}
+#else
+ vp9_build_inter_predictor(pre, pd->pre[ref].stride,
+ dst, pd->dst.stride,
+ &mi->bmi[i].as_mv[ref].as_mv,
+ &xd->block_refs[ref]->sf, width, height, ref,
+ kernel, MV_PRECISION_Q3,
+ mi_col * MI_SIZE + 4 * (i % 2),
+ mi_row * MI_SIZE + 4 * (i / 2));
+#endif // CONFIG_VP9_HIGHBITDEPTH
+ }
+#if CONFIG_VP9_HIGHBITDEPTH
+ if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
+ vp9_highbd_subtract_block(
+ height, width, raster_block_offset_int16(BLOCK_8X8, i, p->src_diff), 8,
+ src, p->src.stride, dst, pd->dst.stride, xd->bd);
+ } else {
+ vp9_subtract_block(
+ height, width, raster_block_offset_int16(BLOCK_8X8, i, p->src_diff), 8,
+ src, p->src.stride, dst, pd->dst.stride);
+ }
+#else
vp9_subtract_block(height, width,
raster_block_offset_int16(BLOCK_8X8, i, p->src_diff), 8,
- src, p->src.stride,
- dst, pd->dst.stride);
+ src, p->src.stride, dst, pd->dst.stride);
+#endif // CONFIG_VP9_HIGHBITDEPTH
k = i;
for (idy = 0; idy < height / 4; ++idy) {
@@ -1146,8 +1400,19 @@
x->fwd_txm4x4(raster_block_offset_int16(BLOCK_8X8, k, p->src_diff),
coeff, 8);
vp9_regular_quantize_b_4x4(x, 0, k, so->scan, so->iscan);
+#if CONFIG_VP9_HIGHBITDEPTH
+ if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
+ thisdistortion += vp9_highbd_block_error(coeff,
+ BLOCK_OFFSET(pd->dqcoeff, k),
+ 16, &ssz, xd->bd);
+ } else {
+ thisdistortion += vp9_block_error(coeff, BLOCK_OFFSET(pd->dqcoeff, k),
+ 16, &ssz);
+ }
+#else
thisdistortion += vp9_block_error(coeff, BLOCK_OFFSET(pd->dqcoeff, k),
16, &ssz);
+#endif // CONFIG_VP9_HIGHBITDEPTH
thissse += ssz;
thisrate += cost_coeffs(x, 0, k, ta + (k & 1), tl + (k >> 1), TX_4X4,
so->scan, so->neighbors,
@@ -1369,7 +1634,7 @@
int sadpb = x->sadperbit4;
MV mvp_full;
int max_mv;
- int sad_list[5];
+ int cost_list[5];
/* Is the best so far sufficiently good that we cant justify doing
* and new motion search. */
@@ -1415,7 +1680,7 @@
bestsme = vp9_full_pixel_search(
cpi, x, bsize, &mvp_full, step_param, sadpb,
- cpi->sf.mv.subpel_search_method != SUBPEL_TREE ? sad_list : NULL,
+ cpi->sf.mv.subpel_search_method != SUBPEL_TREE ? cost_list : NULL,
&bsi->ref_mv[0]->as_mv, new_mv,
INT_MAX, 1);
@@ -1429,7 +1694,7 @@
sadpb, 16, &cpi->fn_ptr[bsize],
&bsi->ref_mv[0]->as_mv,
&best_mv->as_mv);
- sad_list[1] = sad_list[2] = sad_list[3] = sad_list[4] = INT_MAX;
+ cost_list[1] = cost_list[2] = cost_list[3] = cost_list[4] = INT_MAX;
if (thissme < bestsme) {
bestsme = thissme;
*new_mv = best_mv->as_mv;
@@ -1450,7 +1715,7 @@
x->errorperbit, &cpi->fn_ptr[bsize],
cpi->sf.mv.subpel_force_stop,
cpi->sf.mv.subpel_iters_per_step,
- cond_sad_list(cpi, sad_list),
+ cond_cost_list(cpi, cost_list),
x->nmvjointcost, x->mvcost,
&distortion,
&x->pred_sse[mbmi->ref_frame[0]],
@@ -1784,7 +2049,7 @@
int tmp_col_max = x->mv_col_max;
int tmp_row_min = x->mv_row_min;
int tmp_row_max = x->mv_row_max;
- int sad_list[5];
+ int cost_list[5];
const YV12_BUFFER_CONFIG *scaled_ref_frame = vp9_get_scaled_ref_frame(cpi,
ref);
@@ -1820,14 +2085,14 @@
}
if (cpi->sf.adaptive_motion_search && bsize < BLOCK_64X64) {
- int boffset = 2 * (b_width_log2(BLOCK_64X64) - MIN(b_height_log2(bsize),
- b_width_log2(bsize)));
+ int boffset = 2 * (b_width_log2_lookup[BLOCK_64X64] -
+ MIN(b_height_log2_lookup[bsize], b_width_log2_lookup[bsize]));
step_param = MAX(step_param, boffset);
}
if (cpi->sf.adaptive_motion_search) {
- int bwl = b_width_log2(bsize);
- int bhl = b_height_log2(bsize);
+ int bwl = b_width_log2_lookup[bsize];
+ int bhl = b_height_log2_lookup[bsize];
int i;
int tlevel = x->pred_mv_sad[ref] >> (bwl + bhl + 4);
@@ -1856,7 +2121,7 @@
mvp_full.row >>= 3;
bestsme = vp9_full_pixel_search(cpi, x, bsize, &mvp_full, step_param, sadpb,
- cond_sad_list(cpi, sad_list),
+ cond_cost_list(cpi, cost_list),
&ref_mv, &tmp_mv->as_mv, INT_MAX, 1);
x->mv_col_min = tmp_col_min;
@@ -1872,7 +2137,7 @@
&cpi->fn_ptr[bsize],
cpi->sf.mv.subpel_force_stop,
cpi->sf.mv.subpel_iters_per_step,
- cond_sad_list(cpi, sad_list),
+ cond_cost_list(cpi, cost_list),
x->nmvjointcost, x->mvcost,
&dis, &x->pred_sse[ref], NULL, 0, 0);
}
@@ -1904,7 +2169,12 @@
int_mv ref_mv[2];
int ite, ref;
// Prediction buffer from second frame.
+#if CONFIG_VP9_HIGHBITDEPTH
+ uint8_t *second_pred;
+ uint8_t *second_pred_alloc;
+#else
uint8_t *second_pred = vpx_memalign(16, pw * ph * sizeof(uint8_t));
+#endif // CONFIG_VP9_HIGHBITDEPTH
const InterpKernel *kernel = vp9_get_interp_kernel(mbmi->interp_filter);
// Do joint motion search in compound mode to get more accurate mv.
@@ -1915,6 +2185,15 @@
vp9_get_scaled_ref_frame(cpi, mbmi->ref_frame[0]),
vp9_get_scaled_ref_frame(cpi, mbmi->ref_frame[1])
};
+#if CONFIG_VP9_HIGHBITDEPTH
+ if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
+ second_pred_alloc = vpx_memalign(16, pw * ph * sizeof(uint16_t));
+ second_pred = CONVERT_TO_BYTEPTR(second_pred_alloc);
+ } else {
+ second_pred_alloc = vpx_memalign(16, pw * ph * sizeof(uint8_t));
+ second_pred = second_pred_alloc;
+ }
+#endif // CONFIG_VP9_HIGHBITDEPTH
for (ref = 0; ref < 2; ++ref) {
ref_mv[ref] = mbmi->ref_mvs[refs[ref]][0];
@@ -1953,6 +2232,28 @@
ref_yv12[1] = xd->plane[0].pre[1];
// Get pred block from second frame.
+#if CONFIG_VP9_HIGHBITDEPTH
+ if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
+ vp9_highbd_build_inter_predictor(ref_yv12[!id].buf,
+ ref_yv12[!id].stride,
+ second_pred, pw,
+ &frame_mv[refs[!id]].as_mv,
+ &xd->block_refs[!id]->sf,
+ pw, ph, 0,
+ kernel, MV_PRECISION_Q3,
+ mi_col * MI_SIZE, mi_row * MI_SIZE,
+ xd->bd);
+ } else {
+ vp9_build_inter_predictor(ref_yv12[!id].buf,
+ ref_yv12[!id].stride,
+ second_pred, pw,
+ &frame_mv[refs[!id]].as_mv,
+ &xd->block_refs[!id]->sf,
+ pw, ph, 0,
+ kernel, MV_PRECISION_Q3,
+ mi_col * MI_SIZE, mi_row * MI_SIZE);
+ }
+#else
vp9_build_inter_predictor(ref_yv12[!id].buf,
ref_yv12[!id].stride,
second_pred, pw,
@@ -1961,6 +2262,7 @@
pw, ph, 0,
kernel, MV_PRECISION_Q3,
mi_col * MI_SIZE, mi_row * MI_SIZE);
+#endif // CONFIG_VP9_HIGHBITDEPTH
// Compound motion search on first ref frame.
if (id)
@@ -2029,7 +2331,11 @@
x->nmvjointcost, x->mvcost, MV_COST_WEIGHT);
}
+#if CONFIG_VP9_HIGHBITDEPTH
+ vpx_free(second_pred_alloc);
+#else
vpx_free(second_pred);
+#endif // CONFIG_VP9_HIGHBITDEPTH
}
static INLINE void restore_dst_buf(MACROBLOCKD *xd,
@@ -2042,93 +2348,12 @@
}
}
-static void rd_encode_breakout_test(VP9_COMP *cpi, MACROBLOCK *x,
- BLOCK_SIZE bsize, int *rate2,
- int64_t *distortion, int64_t *distortion_uv,
- int *disable_skip) {
- VP9_COMMON *cm = &cpi->common;
- MACROBLOCKD *xd = &x->e_mbd;
- const BLOCK_SIZE y_size = get_plane_block_size(bsize, &xd->plane[0]);
- const BLOCK_SIZE uv_size = get_plane_block_size(bsize, &xd->plane[1]);
- unsigned int var, sse;
- // Skipping threshold for ac.
- unsigned int thresh_ac;
- // Skipping threshold for dc
- unsigned int thresh_dc;
-
- var = cpi->fn_ptr[y_size].vf(x->plane[0].src.buf, x->plane[0].src.stride,
- xd->plane[0].dst.buf,
- xd->plane[0].dst.stride, &sse);
-
- if (x->encode_breakout > 0) {
- // Set a maximum for threshold to avoid big PSNR loss in low bitrate
- // case. Use extreme low threshold for static frames to limit skipping.
- const unsigned int max_thresh = (cpi->allow_encode_breakout ==
- ENCODE_BREAKOUT_LIMITED) ? 128 : 36000;
- // The encode_breakout input
- const unsigned int min_thresh =
- MIN(((unsigned int)x->encode_breakout << 4), max_thresh);
-
- // Calculate threshold according to dequant value.
- thresh_ac = (xd->plane[0].dequant[1] * xd->plane[0].dequant[1]) / 9;
- thresh_ac = clamp(thresh_ac, min_thresh, max_thresh);
-
- // Adjust threshold according to partition size.
- thresh_ac >>= 8 - (b_width_log2(bsize) +
- b_height_log2(bsize));
- thresh_dc = (xd->plane[0].dequant[0] * xd->plane[0].dequant[0] >> 6);
- } else {
- thresh_ac = 0;
- thresh_dc = 0;
- }
-
- // Y skipping condition checking
- if (sse < thresh_ac || sse == 0) {
- // dc skipping checking
- if ((sse - var) < thresh_dc || sse == var) {
- unsigned int sse_u, sse_v;
- unsigned int var_u, var_v;
-
- var_u = cpi->fn_ptr[uv_size].vf(x->plane[1].src.buf,
- x->plane[1].src.stride,
- xd->plane[1].dst.buf,
- xd->plane[1].dst.stride, &sse_u);
-
- // U skipping condition checking
- if ((sse_u * 4 < thresh_ac || sse_u == 0) &&
- (sse_u - var_u < thresh_dc || sse_u == var_u)) {
- var_v = cpi->fn_ptr[uv_size].vf(x->plane[2].src.buf,
- x->plane[2].src.stride,
- xd->plane[2].dst.buf,
- xd->plane[2].dst.stride, &sse_v);
-
- // V skipping condition checking
- if ((sse_v * 4 < thresh_ac || sse_v == 0) &&
- (sse_v - var_v < thresh_dc || sse_v == var_v)) {
- x->skip = 1;
-
- // The cost of skip bit needs to be added.
- *rate2 += vp9_cost_bit(vp9_get_skip_prob(cm, xd), 1);
-
- // Scaling factor for SSE from spatial domain to frequency domain
- // is 16. Adjust distortion accordingly.
- *distortion_uv = (sse_u + sse_v) << 4;
- *distortion = (sse << 4) + *distortion_uv;
-
- *disable_skip = 1;
- }
- }
- }
- }
-}
-
static int64_t handle_inter_mode(VP9_COMP *cpi, MACROBLOCK *x,
BLOCK_SIZE bsize,
int64_t txfm_cache[],
int *rate2, int64_t *distortion,
int *skippable,
- int *rate_y, int64_t *distortion_y,
- int *rate_uv, int64_t *distortion_uv,
+ int *rate_y, int *rate_uv,
int *disable_skip,
int_mv (*mode_mv)[MAX_REF_FRAMES],
int mi_row, int mi_col,
@@ -2148,8 +2373,13 @@
int refs[2] = { mbmi->ref_frame[0],
(mbmi->ref_frame[1] < 0 ? 0 : mbmi->ref_frame[1]) };
int_mv cur_mv[2];
- int64_t this_rd = 0;
+#if CONFIG_VP9_HIGHBITDEPTH
+ DECLARE_ALIGNED_ARRAY(16, uint16_t, tmp_buf16, MAX_MB_PLANE * 64 * 64);
+ DECLARE_ALIGNED_ARRAY(16, uint8_t, tmp_buf8, MAX_MB_PLANE * 64 * 64);
+ uint8_t *tmp_buf;
+#else
DECLARE_ALIGNED_ARRAY(16, uint8_t, tmp_buf, MAX_MB_PLANE * 64 * 64);
+#endif // CONFIG_VP9_HIGHBITDEPTH
int pred_exists = 0;
int intpel_mv;
int64_t rd, tmp_rd, best_rd = INT64_MAX;
@@ -2166,6 +2396,18 @@
(((mi_row + mi_col) >> bsl) +
get_chessboard_index(cm->current_video_frame)) & 0x1 : 0;
+ int skip_txfm_sb = 0;
+ int64_t skip_sse_sb = INT64_MAX;
+ int64_t distortion_y = 0, distortion_uv = 0;
+
+#if CONFIG_VP9_HIGHBITDEPTH
+ if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
+ tmp_buf = CONVERT_TO_BYTEPTR(tmp_buf16);
+ } else {
+ tmp_buf = tmp_buf8;
+ }
+#endif // CONFIG_VP9_HIGHBITDEPTH
+
if (pred_filter_search) {
INTERP_FILTER af = SWITCHABLE, lf = SWITCHABLE;
if (xd->up_available)
@@ -2275,6 +2517,9 @@
for (i = 0; i < SWITCHABLE_FILTERS; ++i) {
int j;
int64_t rs_rd;
+ int tmp_skip_sb = 0;
+ int64_t tmp_skip_sse = INT64_MAX;
+
mbmi->interp_filter = i;
rs = vp9_get_switchable_rate(cpi);
rs_rd = RDCOST(x->rdmult, x->rddiv, rs, 0);
@@ -2310,7 +2555,8 @@
}
}
vp9_build_inter_predictors_sb(xd, mi_row, mi_col, bsize);
- model_rd_for_sb(cpi, bsize, x, xd, &rate_sum, &dist_sum);
+ model_rd_for_sb(cpi, bsize, x, xd, &rate_sum, &dist_sum,
+ &tmp_skip_sb, &tmp_skip_sse);
rd = RDCOST(x->rdmult, x->rddiv, rate_sum, dist_sum);
rd_opt->filter_cache[i] = rd;
@@ -2339,8 +2585,6 @@
best_filter = mbmi->interp_filter;
if (cm->interp_filter == SWITCHABLE && i && !intpel_mv)
best_needs_copy = !best_needs_copy;
- vpx_memcpy(skip_txfm, x->skip_txfm, sizeof(skip_txfm));
- vpx_memcpy(bsse, x->bsse, sizeof(bsse));
}
if ((cm->interp_filter == SWITCHABLE && newbest) ||
@@ -2348,6 +2592,11 @@
cm->interp_filter == mbmi->interp_filter)) {
pred_exists = 1;
tmp_rd = best_rd;
+
+ skip_txfm_sb = tmp_skip_sb;
+ skip_sse_sb = tmp_skip_sse;
+ vpx_memcpy(skip_txfm, x->skip_txfm, sizeof(skip_txfm));
+ vpx_memcpy(bsse, x->bsse, sizeof(bsse));
}
}
restore_dst_buf(xd, orig_dst, orig_dst_stride);
@@ -2374,7 +2623,8 @@
// switchable list (ex. bilinear) is indicated at the frame level, or
// skip condition holds.
vp9_build_inter_predictors_sb(xd, mi_row, mi_col, bsize);
- model_rd_for_sb(cpi, bsize, x, xd, &tmp_rate, &tmp_dist);
+ model_rd_for_sb(cpi, bsize, x, xd, &tmp_rate, &tmp_dist,
+ &skip_txfm_sb, &skip_sse_sb);
rd = RDCOST(x->rdmult, x->rddiv, rs + tmp_rate, tmp_dist);
vpx_memcpy(skip_txfm, x->skip_txfm, sizeof(skip_txfm));
vpx_memcpy(bsse, x->bsse, sizeof(bsse));
@@ -2401,16 +2651,10 @@
if (cm->interp_filter == SWITCHABLE)
*rate2 += rs;
- if (!is_comp_pred) {
- if (cpi->allow_encode_breakout)
- rd_encode_breakout_test(cpi, x, bsize, rate2, distortion, distortion_uv,
- disable_skip);
- }
-
vpx_memcpy(x->skip_txfm, skip_txfm, sizeof(skip_txfm));
vpx_memcpy(x->bsse, bsse, sizeof(bsse));
- if (!x->skip) {
+ if (!skip_txfm_sb) {
int skippable_y, skippable_uv;
int64_t sseuv = INT64_MAX;
int64_t rdcosty = INT64_MAX;
@@ -2417,7 +2661,7 @@
// Y cost and distortion
vp9_subtract_plane(x, bsize, 0);
- super_block_yrd(cpi, x, rate_y, distortion_y, &skippable_y, psse,
+ super_block_yrd(cpi, x, rate_y, &distortion_y, &skippable_y, psse,
bsize, txfm_cache, ref_best_rd);
if (*rate_y == INT_MAX) {
@@ -2428,14 +2672,13 @@
}
*rate2 += *rate_y;
- *distortion += *distortion_y;
+ *distortion += distortion_y;
rdcosty = RDCOST(x->rdmult, x->rddiv, *rate2, *distortion);
rdcosty = MIN(rdcosty, RDCOST(x->rdmult, x->rddiv, 0, *psse));
- super_block_uvrd(cpi, x, rate_uv, distortion_uv, &skippable_uv, &sseuv,
- bsize, ref_best_rd - rdcosty);
- if (*rate_uv == INT_MAX) {
+ if (!super_block_uvrd(cpi, x, rate_uv, &distortion_uv, &skippable_uv,
+ &sseuv, bsize, ref_best_rd - rdcosty)) {
*rate2 = INT_MAX;
*distortion = INT64_MAX;
restore_dst_buf(xd, orig_dst, orig_dst_stride);
@@ -2444,8 +2687,16 @@
*psse += sseuv;
*rate2 += *rate_uv;
- *distortion += *distortion_uv;
+ *distortion += distortion_uv;
*skippable = skippable_y && skippable_uv;
+ } else {
+ x->skip = 1;
+ *disable_skip = 1;
+
+ // The cost of skip bit needs to be added.
+ *rate2 += vp9_cost_bit(vp9_get_skip_prob(cm, xd), 1);
+
+ *distortion = skip_sse_sb;
}
if (!is_comp_pred)
@@ -2452,12 +2703,11 @@
single_skippable[this_mode][refs[0]] = *skippable;
restore_dst_buf(xd, orig_dst, orig_dst_stride);
- return this_rd; // if 0, this will be re-calculated by caller
+ return 0; // The rate-distortion cost will be re-calculated by caller.
}
void vp9_rd_pick_intra_mode_sb(VP9_COMP *cpi, MACROBLOCK *x,
- int *returnrate, int64_t *returndist,
- BLOCK_SIZE bsize,
+ RD_COST *rd_cost, BLOCK_SIZE bsize,
PICK_MODE_CONTEXT *ctx, int64_t best_rd) {
VP9_COMMON *const cm = &cpi->common;
MACROBLOCKD *const xd = &x->e_mbd;
@@ -2474,37 +2724,34 @@
if (rd_pick_intra_sby_mode(cpi, x, &rate_y, &rate_y_tokenonly,
&dist_y, &y_skip, bsize, tx_cache,
best_rd) >= best_rd) {
- *returnrate = INT_MAX;
+ rd_cost->rate = INT_MAX;
return;
}
- max_uv_tx_size = get_uv_tx_size_impl(xd->mi[0].src_mi->mbmi.tx_size, bsize,
- pd[1].subsampling_x,
- pd[1].subsampling_y);
- rd_pick_intra_sbuv_mode(cpi, x, ctx, &rate_uv, &rate_uv_tokenonly,
- &dist_uv, &uv_skip, bsize, max_uv_tx_size);
} else {
y_skip = 0;
if (rd_pick_intra_sub_8x8_y_mode(cpi, x, &rate_y, &rate_y_tokenonly,
&dist_y, best_rd) >= best_rd) {
- *returnrate = INT_MAX;
+ rd_cost->rate = INT_MAX;
return;
}
- max_uv_tx_size = get_uv_tx_size_impl(xd->mi[0].src_mi->mbmi.tx_size, bsize,
- pd[1].subsampling_x,
- pd[1].subsampling_y);
- rd_pick_intra_sbuv_mode(cpi, x, ctx, &rate_uv, &rate_uv_tokenonly,
- &dist_uv, &uv_skip, BLOCK_8X8, max_uv_tx_size);
}
+ max_uv_tx_size = get_uv_tx_size_impl(xd->mi[0].src_mi->mbmi.tx_size, bsize,
+ pd[1].subsampling_x,
+ pd[1].subsampling_y);
+ rd_pick_intra_sbuv_mode(cpi, x, ctx, &rate_uv, &rate_uv_tokenonly,
+ &dist_uv, &uv_skip, MAX(BLOCK_8X8, bsize),
+ max_uv_tx_size);
if (y_skip && uv_skip) {
- *returnrate = rate_y + rate_uv - rate_y_tokenonly - rate_uv_tokenonly +
- vp9_cost_bit(vp9_get_skip_prob(cm, xd), 1);
- *returndist = dist_y + dist_uv;
+ rd_cost->rate = rate_y + rate_uv - rate_y_tokenonly - rate_uv_tokenonly +
+ vp9_cost_bit(vp9_get_skip_prob(cm, xd), 1);
+ rd_cost->dist = dist_y + dist_uv;
vp9_zero(ctx->tx_rd_diff);
} else {
int i;
- *returnrate = rate_y + rate_uv + vp9_cost_bit(vp9_get_skip_prob(cm, xd), 0);
- *returndist = dist_y + dist_uv;
+ rd_cost->rate = rate_y + rate_uv +
+ vp9_cost_bit(vp9_get_skip_prob(cm, xd), 0);
+ rd_cost->dist = dist_y + dist_uv;
if (cpi->sf.tx_size_search_method == USE_FULL_RD)
for (i = 0; i < TX_MODES; i++) {
if (tx_cache[i] < INT64_MAX && tx_cache[cm->tx_mode] < INT64_MAX)
@@ -2515,13 +2762,9 @@
}
ctx->mic = *xd->mi[0].src_mi;
+ rd_cost->rdcost = RDCOST(x->rdmult, x->rddiv, rd_cost->rate, rd_cost->dist);
}
-// Updating rd_thresh_freq_fact[] here means that the different
-// partition/block sizes are handled independently based on the best
-// choice for the current partition. It may well be better to keep a scaled
-// best rd so far value and update rd_thresh_freq_fact based on the mode/size
-// combination that wins out.
static void update_rd_thresh_fact(VP9_COMP *cpi, int bsize,
int best_mode_index) {
if (cpi->sf.adaptive_rd_thresh > 0) {
@@ -2528,32 +2771,34 @@
const int top_mode = bsize < BLOCK_8X8 ? MAX_REFS : MAX_MODES;
int mode;
for (mode = 0; mode < top_mode; ++mode) {
- int *const fact = &cpi->rd.thresh_freq_fact[bsize][mode];
-
- if (mode == best_mode_index) {
- *fact -= (*fact >> 3);
- } else {
- *fact = MIN(*fact + RD_THRESH_INC,
- cpi->sf.adaptive_rd_thresh * RD_THRESH_MAX_FACT);
+ const BLOCK_SIZE min_size = MAX(bsize - 1, BLOCK_4X4);
+ const BLOCK_SIZE max_size = MIN(bsize + 2, BLOCK_64X64);
+ BLOCK_SIZE bs;
+ for (bs = min_size; bs <= max_size; ++bs) {
+ int *const fact = &cpi->rd.thresh_freq_fact[bs][mode];
+ if (mode == best_mode_index) {
+ *fact -= (*fact >> 4);
+ } else {
+ *fact = MIN(*fact + RD_THRESH_INC,
+ cpi->sf.adaptive_rd_thresh * RD_THRESH_MAX_FACT);
+ }
}
}
}
}
-int64_t vp9_rd_pick_inter_mode_sb(VP9_COMP *cpi, MACROBLOCK *x,
- const TileInfo *const tile,
- int mi_row, int mi_col,
- int *returnrate,
- int64_t *returndistortion,
- BLOCK_SIZE bsize,
- PICK_MODE_CONTEXT *ctx,
- int64_t best_rd_so_far) {
+void vp9_rd_pick_inter_mode_sb(VP9_COMP *cpi, MACROBLOCK *x,
+ const TileInfo *const tile,
+ int mi_row, int mi_col,
+ RD_COST *rd_cost, BLOCK_SIZE bsize,
+ PICK_MODE_CONTEXT *ctx,
+ int64_t best_rd_so_far) {
VP9_COMMON *const cm = &cpi->common;
RD_OPT *const rd_opt = &cpi->rd;
+ SPEED_FEATURES *const sf = &cpi->sf;
MACROBLOCKD *const xd = &x->e_mbd;
MB_MODE_INFO *const mbmi = &xd->mi[0].src_mi->mbmi;
const struct segmentation *const seg = &cm->seg;
- struct macroblockd_plane *const pd = xd->plane;
PREDICTION_MODE this_mode;
MV_REFERENCE_FRAME ref_frame, second_ref_frame;
unsigned char segment_id = mbmi->segment_id;
@@ -2584,20 +2829,20 @@
int64_t dist_uv[TX_SIZES];
int skip_uv[TX_SIZES];
PREDICTION_MODE mode_uv[TX_SIZES];
- const int intra_cost_penalty =
- 20 * vp9_dc_quant(cm->base_qindex, cm->y_dc_delta_q, cm->bit_depth);
+ const int intra_cost_penalty = vp9_get_intra_cost_penalty(
+ cm->base_qindex, cm->y_dc_delta_q, cm->bit_depth);
int best_skip2 = 0;
uint8_t ref_frame_skip_mask[2] = { 0 };
uint16_t mode_skip_mask[MAX_REF_FRAMES] = { 0 };
- int mode_skip_start = cpi->sf.mode_skip_start + 1;
+ int mode_skip_start = sf->mode_skip_start + 1;
const int *const rd_threshes = rd_opt->threshes[segment_id][bsize];
const int *const rd_thresh_freq_fact = rd_opt->thresh_freq_fact[bsize];
- int mode_threshold[MAX_MODES];
+ int64_t mode_threshold[MAX_MODES];
int *mode_map = rd_opt->mode_map[bsize];
- const int mode_search_skip_flags = cpi->sf.mode_search_skip_flags;
+ const int mode_search_skip_flags = sf->mode_search_skip_flags;
vp9_zero(best_mbmode);
- x->skip_encode = cpi->sf.skip_encode_frame && x->q_index < QIDX_SKIP_THRESH;
+ x->skip_encode = sf->skip_encode_frame && x->q_index < QIDX_SKIP_THRESH;
estimate_ref_frame_costs(cm, xd, segment_id, ref_costs_single, ref_costs_comp,
&comp_mode_p);
@@ -2619,7 +2864,7 @@
}
}
- *returnrate = INT_MAX;
+ rd_cost->rate = INT_MAX;
for (ref_frame = LAST_FRAME; ref_frame <= ALTREF_FRAME; ++ref_frame) {
x->pred_mv_sad[ref_frame] = INT_MAX;
@@ -2638,7 +2883,7 @@
// are masked out.
ref_frame_skip_mask[0] |= (1 << ref_frame);
ref_frame_skip_mask[1] |= SECOND_REF_FRAME_MASK;
- } else if (cpi->sf.reference_masking) {
+ } else if (sf->reference_masking) {
for (i = LAST_FRAME; i <= ALTREF_FRAME; ++i) {
// Skip fixed mv modes for poor references
if ((x->pred_mv_sad[ref_frame] >> 2) > x->pred_mv_sad[i]) {
@@ -2676,7 +2921,7 @@
}
if (cpi->rc.is_src_frame_alt_ref) {
- if (cpi->sf.alt_ref_search_fp) {
+ if (sf->alt_ref_search_fp) {
mode_skip_mask[ALTREF_FRAME] = 0;
ref_frame_skip_mask[0] = ~(1 << ALTREF_FRAME);
ref_frame_skip_mask[1] = SECOND_REF_FRAME_MASK;
@@ -2683,18 +2928,30 @@
}
}
- if (bsize > cpi->sf.max_intra_bsize) {
+ if (sf->alt_ref_search_fp)
+ if (!cm->show_frame && x->pred_mv_sad[GOLDEN_FRAME] < INT_MAX)
+ if (x->pred_mv_sad[ALTREF_FRAME] > (x->pred_mv_sad[GOLDEN_FRAME] << 1))
+ mode_skip_mask[ALTREF_FRAME] |= INTER_ALL;
+
+ if (sf->adaptive_mode_search) {
+ if (cm->show_frame && !cpi->rc.is_src_frame_alt_ref &&
+ cpi->rc.frames_since_golden >= 3)
+ if (x->pred_mv_sad[GOLDEN_FRAME] > (x->pred_mv_sad[LAST_FRAME] << 1))
+ mode_skip_mask[GOLDEN_FRAME] |= INTER_ALL;
+ }
+
+ if (bsize > sf->max_intra_bsize) {
ref_frame_skip_mask[0] |= (1 << INTRA_FRAME);
ref_frame_skip_mask[1] |= (1 << INTRA_FRAME);
}
mode_skip_mask[INTRA_FRAME] |=
- ~(cpi->sf.intra_y_mode_mask[max_txsize_lookup[bsize]]);
+ ~(sf->intra_y_mode_mask[max_txsize_lookup[bsize]]);
for (i = 0; i < MAX_MODES; ++i)
mode_threshold[i] = ((int64_t)rd_threshes[i] * rd_thresh_freq_fact[i]) >> 5;
- midx = cpi->sf.schedule_mode_search ? mode_skip_start : 0;
+ midx = sf->schedule_mode_search ? mode_skip_start : 0;
while (midx > 4) {
uint8_t end_pos = 0;
for (i = 5; i < midx; ++i) {
@@ -2758,18 +3015,18 @@
continue;
// Test best rd so far against threshold for trying this mode.
- if (best_mode_skippable && cpi->sf.schedule_mode_search)
+ if (best_mode_skippable && sf->schedule_mode_search)
mode_threshold[mode_index] <<= 1;
if (best_rd < mode_threshold[mode_index])
continue;
- if (cpi->sf.motion_field_mode_search) {
+ if (sf->motion_field_mode_search) {
const int mi_width = MIN(num_8x8_blocks_wide_lookup[bsize],
tile->mi_col_end - mi_col);
const int mi_height = MIN(num_8x8_blocks_high_lookup[bsize],
tile->mi_row_end - mi_row);
- const int bsl = mi_width_log2(bsize);
+ const int bsl = mi_width_log2_lookup[bsize];
int cb_partition_search_ctrl = (((mi_row + mi_col) >> bsl)
+ get_chessboard_index(cm->current_video_frame)) & 0x1;
MB_MODE_INFO *ref_mbmi;
@@ -2838,7 +3095,7 @@
}
if (ref_frame == INTRA_FRAME) {
- if (cpi->sf.adaptive_mode_search)
+ if (sf->adaptive_mode_search)
if ((x->source_variance << num_pels_log2_lookup[bsize]) > best_pred_sse)
continue;
@@ -2895,14 +3152,15 @@
if (ref_frame == INTRA_FRAME) {
TX_SIZE uv_tx;
+ struct macroblockd_plane *const pd = &xd->plane[1];
+ vpx_memset(x->skip_txfm, 0, sizeof(x->skip_txfm));
super_block_yrd(cpi, x, &rate_y, &distortion_y, &skippable,
NULL, bsize, tx_cache, best_rd);
-
if (rate_y == INT_MAX)
continue;
- uv_tx = get_uv_tx_size_impl(mbmi->tx_size, bsize, pd[1].subsampling_x,
- pd[1].subsampling_y);
+ uv_tx = get_uv_tx_size_impl(mbmi->tx_size, bsize, pd->subsampling_x,
+ pd->subsampling_y);
if (rate_uv_intra[uv_tx] == INT_MAX) {
choose_intra_uv_mode(cpi, ctx, bsize, uv_tx,
&rate_uv_intra[uv_tx], &rate_uv_tokenonly[uv_tx],
@@ -2922,8 +3180,7 @@
this_rd = handle_inter_mode(cpi, x, bsize,
tx_cache,
&rate2, &distortion2, &skippable,
- &rate_y, &distortion_y,
- &rate_uv, &distortion_uv,
+ &rate_y, &rate_uv,
&disable_skip, frame_mv,
mi_row, mi_col,
single_newmv, single_inter_filter,
@@ -3004,8 +3261,9 @@
best_pred_sse = x->pred_sse[ref_frame];
}
- *returnrate = rate2;
- *returndistortion = distortion2;
+ rd_cost->rate = rate2;
+ rd_cost->dist = distortion2;
+ rd_cost->rdcost = this_rd;
best_rd = this_rd;
best_mbmode = *mbmi;
best_skip2 = this_skip2;
@@ -3020,9 +3278,14 @@
// based on qp, activity mask and history
if ((mode_search_skip_flags & FLAG_EARLY_TERMINATE) &&
(mode_index > MIN_EARLY_TERM_INDEX)) {
- const int qstep = xd->plane[0].dequant[1];
+ int qstep = xd->plane[0].dequant[1];
// TODO(debargha): Enhance this by specializing for each mode_index
int scale = 4;
+#if CONFIG_VP9_HIGHBITDEPTH
+ if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
+ qstep >>= (xd->bd - 8);
+ }
+#endif // CONFIG_VP9_HIGHBITDEPTH
if (x->source_variance < UINT_MAX) {
const int var_adjust = (x->source_variance < 16);
scale -= var_adjust;
@@ -3130,11 +3393,14 @@
best_mbmode.mode = ZEROMV;
}
- if (best_mode_index < 0 || best_rd >= best_rd_so_far)
- return INT64_MAX;
+ if (best_mode_index < 0 || best_rd >= best_rd_so_far) {
+ rd_cost->rate = INT_MAX;
+ rd_cost->rdcost = INT64_MAX;
+ return;
+ }
// If we used an estimate for the uv intra rd in the loop above...
- if (cpi->sf.use_uv_intra_rd_estimate) {
+ if (sf->use_uv_intra_rd_estimate) {
// Do Intra UV best rd mode selection if best mode choice above was intra.
if (best_mbmode.ref_frame[0] == INTRA_FRAME) {
TX_SIZE uv_tx_size;
@@ -3191,18 +3457,33 @@
// updating code causes PSNR loss. Need to figure out the confliction.
x->skip |= best_mode_skippable;
+ if (!x->skip && !x->select_tx_size) {
+ int has_high_freq_coeff = 0;
+ int plane;
+ int max_plane = is_inter_block(&xd->mi[0].src_mi->mbmi)
+ ? MAX_MB_PLANE : 1;
+ for (plane = 0; plane < max_plane; ++plane) {
+ x->plane[plane].eobs = ctx->eobs_pbuf[plane][1];
+ has_high_freq_coeff |= vp9_has_high_freq_in_plane(x, bsize, plane);
+ }
+
+ for (plane = max_plane; plane < MAX_MB_PLANE; ++plane) {
+ x->plane[plane].eobs = ctx->eobs_pbuf[plane][2];
+ has_high_freq_coeff |= vp9_has_high_freq_in_plane(x, bsize, plane);
+ }
+
+ best_mode_skippable |= !has_high_freq_coeff;
+ }
+
store_coding_context(x, ctx, best_mode_index, best_pred_diff,
best_tx_diff, best_filter_diff, best_mode_skippable);
-
- return best_rd;
}
-int64_t vp9_rd_pick_inter_mode_sb_seg_skip(VP9_COMP *cpi, MACROBLOCK *x,
- int *returnrate,
- int64_t *returndistortion,
- BLOCK_SIZE bsize,
- PICK_MODE_CONTEXT *ctx,
- int64_t best_rd_so_far) {
+void vp9_rd_pick_inter_mode_sb_seg_skip(VP9_COMP *cpi, MACROBLOCK *x,
+ RD_COST *rd_cost,
+ BLOCK_SIZE bsize,
+ PICK_MODE_CONTEXT *ctx,
+ int64_t best_rd_so_far) {
VP9_COMMON *const cm = &cpi->common;
RD_OPT *const rd_opt = &cpi->rd;
MACROBLOCKD *const xd = &x->e_mbd;
@@ -3230,7 +3511,7 @@
for (i = LAST_FRAME; i < MAX_REF_FRAMES; ++i)
x->pred_mv_sad[i] = INT_MAX;
- *returnrate = INT_MAX;
+ rd_cost->rate = INT_MAX;
assert(vp9_segfeature_active(&cm->seg, segment_id, SEG_LVL_SKIP));
@@ -3279,11 +3560,15 @@
rate2 += ref_costs_single[LAST_FRAME];
this_rd = RDCOST(x->rdmult, x->rddiv, rate2, distortion2);
- *returnrate = rate2;
- *returndistortion = distortion2;
+ rd_cost->rate = rate2;
+ rd_cost->dist = distortion2;
+ rd_cost->rdcost = this_rd;
- if (this_rd >= best_rd_so_far)
- return INT64_MAX;
+ if (this_rd >= best_rd_so_far) {
+ rd_cost->rate = INT_MAX;
+ rd_cost->rdcost = INT64_MAX;
+ return;
+ }
assert((cm->interp_filter == SWITCHABLE) ||
(cm->interp_filter == mbmi->interp_filter));
@@ -3298,20 +3583,18 @@
swap_block_ptr(x, ctx, 1, 0, 0, MAX_MB_PLANE);
store_coding_context(x, ctx, THR_ZEROMV,
best_pred_diff, best_tx_diff, best_filter_diff, 0);
-
- return this_rd;
}
-int64_t vp9_rd_pick_inter_mode_sub8x8(VP9_COMP *cpi, MACROBLOCK *x,
- const TileInfo *const tile,
- int mi_row, int mi_col,
- int *returnrate,
- int64_t *returndistortion,
- BLOCK_SIZE bsize,
- PICK_MODE_CONTEXT *ctx,
- int64_t best_rd_so_far) {
+void vp9_rd_pick_inter_mode_sub8x8(VP9_COMP *cpi, MACROBLOCK *x,
+ const TileInfo *const tile,
+ int mi_row, int mi_col,
+ RD_COST *rd_cost,
+ BLOCK_SIZE bsize,
+ PICK_MODE_CONTEXT *ctx,
+ int64_t best_rd_so_far) {
VP9_COMMON *const cm = &cpi->common;
RD_OPT *const rd_opt = &cpi->rd;
+ SPEED_FEATURES *const sf = &cpi->sf;
MACROBLOCKD *const xd = &x->e_mbd;
MB_MODE_INFO *const mbmi = &xd->mi[0].src_mi->mbmi;
const struct segmentation *const seg = &cm->seg;
@@ -3338,14 +3621,14 @@
int64_t dist_uv;
int skip_uv;
PREDICTION_MODE mode_uv = DC_PRED;
- const int intra_cost_penalty =
- 20 * vp9_dc_quant(cm->base_qindex, cm->y_dc_delta_q, cm->bit_depth);
+ const int intra_cost_penalty = vp9_get_intra_cost_penalty(
+ cm->base_qindex, cm->y_dc_delta_q, cm->bit_depth);
int_mv seg_mvs[4][MAX_REF_FRAMES];
b_mode_info best_bmodes[4];
int best_skip2 = 0;
int ref_frame_skip_mask[2] = { 0 };
- x->skip_encode = cpi->sf.skip_encode_frame && x->q_index < QIDX_SKIP_THRESH;
+ x->skip_encode = sf->skip_encode_frame && x->q_index < QIDX_SKIP_THRESH;
vpx_memset(x->zcoeff_blk[TX_4X4], 0, 4);
vp9_zero(best_mbmode);
@@ -3364,7 +3647,7 @@
best_filter_rd[i] = INT64_MAX;
rate_uv_intra = INT_MAX;
- *returnrate = INT_MAX;
+ rd_cost->rate = INT_MAX;
for (ref_frame = LAST_FRAME; ref_frame <= ALTREF_FRAME; ref_frame++) {
if (cpi->ref_frame_flags & flag_list[ref_frame]) {
@@ -3398,7 +3681,7 @@
// Look at the reference frame of the best mode so far and set the
// skip mask to look at a subset of the remaining modes.
- if (ref_index > 2 && cpi->sf.mode_skip_start < MAX_MODES) {
+ if (ref_index > 2 && sf->mode_skip_start < MAX_MODES) {
if (ref_index == 3) {
switch (best_mbmode.ref_frame[0]) {
case INTRA_FRAME:
@@ -3443,7 +3726,7 @@
if (vp9_segfeature_active(seg, segment_id, SEG_LVL_REF_FRAME))
continue;
- if ((cpi->sf.mode_search_skip_flags & FLAG_SKIP_COMP_BESTINTRA) &&
+ if ((sf->mode_search_skip_flags & FLAG_SKIP_COMP_BESTINTRA) &&
best_mbmode.ref_frame[0] == INTRA_FRAME)
continue;
}
@@ -3547,12 +3830,12 @@
if (cm->interp_filter != BILINEAR) {
tmp_best_filter = EIGHTTAP;
- if (x->source_variance < cpi->sf.disable_filter_search_var_thresh) {
+ if (x->source_variance < sf->disable_filter_search_var_thresh) {
tmp_best_filter = EIGHTTAP;
- } else if (cpi->sf.adaptive_pred_interp_filter == 1 &&
+ } else if (sf->adaptive_pred_interp_filter == 1 &&
ctx->pred_interp_filter < SWITCHABLE) {
tmp_best_filter = ctx->pred_interp_filter;
- } else if (cpi->sf.adaptive_pred_interp_filter == 2) {
+ } else if (sf->adaptive_pred_interp_filter == 2) {
tmp_best_filter = ctx->pred_interp_filter < SWITCHABLE ?
ctx->pred_interp_filter : 0;
} else {
@@ -3605,7 +3888,7 @@
}
pred_exists = 1;
if (switchable_filter_index == 0 &&
- cpi->sf.use_rd_breakout &&
+ sf->use_rd_breakout &&
best_rd < INT64_MAX) {
if (tmp_best_rdu / 2 > best_rd) {
// skip searching the other filters if the first is
@@ -3668,10 +3951,11 @@
// then dont bother looking at UV
vp9_build_inter_predictors_sbuv(&x->e_mbd, mi_row, mi_col,
BLOCK_8X8);
- super_block_uvrd(cpi, x, &rate_uv, &distortion_uv, &uv_skippable,
- &uv_sse, BLOCK_8X8, tmp_best_rdu);
- if (rate_uv == INT_MAX)
+ vpx_memset(x->skip_txfm, 0, sizeof(x->skip_txfm));
+ if (!super_block_uvrd(cpi, x, &rate_uv, &distortion_uv, &uv_skippable,
+ &uv_sse, BLOCK_8X8, tmp_best_rdu))
continue;
+
rate2 += rate_uv;
distortion2 += distortion_uv;
skippable = skippable && uv_skippable;
@@ -3738,8 +4022,9 @@
max_plane = 1;
}
- *returnrate = rate2;
- *returndistortion = distortion2;
+ rd_cost->rate = rate2;
+ rd_cost->dist = distortion2;
+ rd_cost->rdcost = this_rd;
best_rd = this_rd;
best_yrd = best_rd -
RDCOST(x->rdmult, x->rddiv, rate_uv, distortion_uv);
@@ -3755,11 +4040,16 @@
// TODO(debargha): enhance this test with a better distortion prediction
// based on qp, activity mask and history
- if ((cpi->sf.mode_search_skip_flags & FLAG_EARLY_TERMINATE) &&
+ if ((sf->mode_search_skip_flags & FLAG_EARLY_TERMINATE) &&
(ref_index > MIN_EARLY_TERM_INDEX)) {
- const int qstep = xd->plane[0].dequant[1];
+ int qstep = xd->plane[0].dequant[1];
// TODO(debargha): Enhance this by specializing for each mode_index
int scale = 4;
+#if CONFIG_VP9_HIGHBITDEPTH
+ if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
+ qstep >>= (xd->bd - 8);
+ }
+#endif // CONFIG_VP9_HIGHBITDEPTH
if (x->source_variance < UINT_MAX) {
const int var_adjust = (x->source_variance < 16);
scale -= var_adjust;
@@ -3826,11 +4116,14 @@
break;
}
- if (best_rd >= best_rd_so_far)
- return INT64_MAX;
+ if (best_rd >= best_rd_so_far) {
+ rd_cost->rate = INT_MAX;
+ rd_cost->rdcost = INT64_MAX;
+ return;
+ }
// If we used an estimate for the uv intra rd in the loop above...
- if (cpi->sf.use_uv_intra_rd_estimate) {
+ if (sf->use_uv_intra_rd_estimate) {
// Do Intra UV best rd mode selection if best mode choice above was intra.
if (best_mbmode.ref_frame[0] == INTRA_FRAME) {
*mbmi = best_mbmode;
@@ -3843,9 +4136,10 @@
}
if (best_rd == INT64_MAX) {
- *returnrate = INT_MAX;
- *returndistortion = INT64_MAX;
- return best_rd;
+ rd_cost->rate = INT_MAX;
+ rd_cost->dist = INT64_MAX;
+ rd_cost->rdcost = INT64_MAX;
+ return;
}
assert((cm->interp_filter == SWITCHABLE) ||
@@ -3891,7 +4185,5 @@
store_coding_context(x, ctx, best_ref_index,
best_pred_diff, best_tx_diff, best_filter_diff, 0);
-
- return best_rd;
}
« no previous file with comments | « source/libvpx/vp9/encoder/vp9_rdopt.h ('k') | source/libvpx/vp9/encoder/vp9_resize.h » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698