source/libvpx/vp9/encoder/vp9_rdopt.c - Issue 668403002: libvpx: Pull from upstream

Unified Diff: source/libvpx/vp9/encoder/vp9_rdopt.c

Issue 668403002: libvpx: Pull from upstream (Closed) Base URL: svn://svn.chromium.org/chrome/trunk/deps/third_party/libvpx/

Patch Set: Created 6 years, 2 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View side-by-side diff with in-line comments

Download patch

Index: source/libvpx/vp9/encoder/vp9_rdopt.c

===================================================================

--- source/libvpx/vp9/encoder/vp9_rdopt.c (revision 292608)

+++ source/libvpx/vp9/encoder/vp9_rdopt.c (working copy)

@@ -131,7 +131,7 @@

static int raster_block_offset(BLOCK_SIZE plane_bsize,

int raster_block, int stride) {

- const int bw = b_width_log2(plane_bsize);

+ const int bw = b_width_log2_lookup[plane_bsize];

const int y = 4 * (raster_block >> bw);

const int x = 4 * (raster_block & ((1 << bw) - 1));

return y * stride + x;

@@ -169,7 +169,8 @@

static void model_rd_for_sb(VP9_COMP *cpi, BLOCK_SIZE bsize,

MACROBLOCK *x, MACROBLOCKD *xd,

- int *out_rate_sum, int64_t *out_dist_sum) {

+ int *out_rate_sum, int64_t *out_dist_sum,

+ int *skip_txfm_sb, int64_t *skip_sse_sb) {

// Note our transform coeffs are 8 times an orthogonal transform.

// Hence quantizer step is also 8 times. To get effective quantizer

// we need to divide by 8 before sending to modeling function.

@@ -180,7 +181,9 @@

unsigned int sse;

unsigned int var = 0;

unsigned int sum_sse = 0;

- const int shift = 8;

+ int64_t total_sse = 0;

+ int skip_flag = 1;

+ const int shift = 6;

int rate;

int64_t dist;

@@ -192,6 +195,12 @@

const BLOCK_SIZE bs = get_plane_block_size(bsize, pd);

const TX_SIZE max_tx_size = max_txsize_lookup[bs];

const BLOCK_SIZE unit_size = txsize_to_bsize[max_tx_size];

+ const int64_t dc_thr = p->quant_thred[0] >> shift;

+ const int64_t ac_thr = p->quant_thred[1] >> shift;

+ // The low thresholds are used to measure if the prediction errors are

+ // low enough so that we can skip the mode search.

+ const int64_t low_dc_thr = MIN(50, dc_thr >> 2);

+ const int64_t low_ac_thr = MIN(80, ac_thr >> 2);

int bw = 1 << (b_width_log2_lookup[bs] - b_width_log2_lookup[unit_size]);

int bh = 1 << (b_height_log2_lookup[bs] - b_width_log2_lookup[unit_size]);

int idx, idy;

@@ -205,6 +214,7 @@

uint8_t *src = p->src.buf + (idy * p->src.stride << lh) + (idx << lw);

uint8_t *dst = pd->dst.buf + (idy * pd->dst.stride << lh) + (idx << lh);

int block_idx = (idy << 1) + idx;

+ int low_err_skip = 0;

var = cpi->fn_ptr[unit_size].vf(src, p->src.stride,

dst, pd->dst.stride, &sse);

@@ -211,26 +221,42 @@

x->bsse[(i << 2) + block_idx] = sse;

sum_sse += sse;

+ x->skip_txfm[(i << 2) + block_idx] = 0;

if (!x->select_tx_size) {

- if (x->bsse[(i << 2) + block_idx] < p->quant_thred[0] >> shift)

- x->skip_txfm[(i << 2) + block_idx] = 1;

- else if (var < p->quant_thred[1] >> shift)

+ // Check if all ac coefficients can be quantized to zero.

+ if (var < ac_thr || var == 0) {

x->skip_txfm[(i << 2) + block_idx] = 2;

- else

- x->skip_txfm[(i << 2) + block_idx] = 0;

+ // Check if dc coefficient can be quantized to zero.

+ if (sse - var < dc_thr || sse == var) {

+ x->skip_txfm[(i << 2) + block_idx] = 1;

+ if (!sse || (var < low_ac_thr && sse - var < low_dc_thr))

+ low_err_skip = 1;

+ }

}

+ if (skip_flag && !low_err_skip)

+ skip_flag = 0;

if (i == 0)

x->pred_sse[ref] += sse;

}

+ total_sse += sum_sse;

// Fast approximate the modelling function.

if (cpi->oxcf.speed > 4) {

int64_t rate;

- int64_t dist;

- int64_t square_error = sse;

+ const int64_t square_error = sum_sse;

int quantizer = (pd->dequant[1] >> 3);

+#if CONFIG_VP9_HIGHBITDEPTH

+ if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {

+ quantizer >>= (xd->bd - 8);

+ }

+#endif // CONFIG_VP9_HIGHBITDEPTH

if (quantizer < 120)

rate = (square_error * (280 - quantizer)) >> 8;

@@ -240,13 +266,26 @@

rate_sum += rate;

dist_sum += dist;

} else {

+#if CONFIG_VP9_HIGHBITDEPTH

+ if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {

+ vp9_model_rd_from_var_lapndz(sum_sse, 1 << num_pels_log2_lookup[bs],

+ pd->dequant[1] >> (xd->bd - 5),

+ &rate, &dist);

+ } else {

+ vp9_model_rd_from_var_lapndz(sum_sse, 1 << num_pels_log2_lookup[bs],

+ pd->dequant[1] >> 3, &rate, &dist);

+ }

+#else

vp9_model_rd_from_var_lapndz(sum_sse, 1 << num_pels_log2_lookup[bs],

pd->dequant[1] >> 3, &rate, &dist);

+#endif // CONFIG_VP9_HIGHBITDEPTH

rate_sum += rate;

dist_sum += dist;

}

+ *skip_txfm_sb = skip_flag;

+ *skip_sse_sb = total_sse << 4;

*out_rate_sum = (int)rate_sum;

*out_dist_sum = dist_sum << 4;

}

@@ -266,6 +305,31 @@

return error;

}

+#if CONFIG_VP9_HIGHBITDEPTH

+int64_t vp9_highbd_block_error_c(const tran_low_t *coeff,

+ const tran_low_t *dqcoeff,

+ intptr_t block_size,

+ int64_t *ssz, int bd) {

+ int i;

+ int64_t error = 0, sqcoeff = 0;

+ int shift = 2 * (bd - 8);

+ int rounding = shift > 0 ? 1 << (shift - 1) : 0;

+ for (i = 0; i < block_size; i++) {

+ const int64_t diff = coeff[i] - dqcoeff[i];

+ error += diff * diff;

+ sqcoeff += (int64_t)coeff[i] * (int64_t)coeff[i];

+ }

+ assert(error >= 0 && sqcoeff >= 0);

+ error = (error + rounding) >> shift;

+ sqcoeff = (sqcoeff + rounding) >> shift;

+ *ssz = sqcoeff;

+ return error;

+#endif // CONFIG_VP9_HIGHBITDEPTH

/* The trailing '0' is a terminator which is used inside cost_coeffs() to

* decide whether to include cost of a trailing EOB node or not (i.e. we

* can skip this if the last coefficient in this transform block, e.g. the

@@ -351,8 +415,14 @@

return cost;

}

+#if CONFIG_VP9_HIGHBITDEPTH

static void dist_block(int plane, int block, TX_SIZE tx_size,

+ struct rdcost_block_args* args, int bd) {

+#else

+static void dist_block(int plane, int block, TX_SIZE tx_size,

struct rdcost_block_args* args) {

+#endif // CONFIG_VP9_HIGHBITDEPTH

const int ss_txfrm_size = tx_size << 1;

MACROBLOCK* const x = args->x;

MACROBLOCKD* const xd = &x->e_mbd;

@@ -362,8 +432,13 @@

int shift = tx_size == TX_32X32 ? 0 : 2;

tran_low_t *const coeff = BLOCK_OFFSET(p->coeff, block);

tran_low_t *const dqcoeff = BLOCK_OFFSET(pd->dqcoeff, block);

+#if CONFIG_VP9_HIGHBITDEPTH

+ args->dist = vp9_highbd_block_error(coeff, dqcoeff, 16 << ss_txfrm_size,

+ &this_sse, bd) >> shift;

+#else

args->dist = vp9_block_error(coeff, dqcoeff, 16 << ss_txfrm_size,

&this_sse) >> shift;

+#endif // CONFIG_VP9_HIGHBITDEPTH

args->sse = this_sse >> shift;

if (x->skip_encode && !is_inter_block(&xd->mi[0].src_mi->mbmi)) {

@@ -370,6 +445,11 @@

// TODO(jingning): tune the model to better capture the distortion.

int64_t p = (pd->dequant[1] * pd->dequant[1] *

(1 << ss_txfrm_size)) >> (shift + 2);

+#if CONFIG_VP9_HIGHBITDEPTH

+ if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {

+ p >>= ((xd->bd - 8) * 2);

+ }

+#endif // CONFIG_VP9_HIGHBITDEPTH

args->dist += (p >> 4);

args->sse += p;

}

@@ -399,12 +479,28 @@

if (!is_inter_block(mbmi)) {

vp9_encode_block_intra(x, plane, block, plane_bsize, tx_size, &mbmi->skip);

+#if CONFIG_VP9_HIGHBITDEPTH

+ if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {

+ dist_block(plane, block, tx_size, args, xd->bd);

+ } else {

+ dist_block(plane, block, tx_size, args, 8);

+ }

+#else

dist_block(plane, block, tx_size, args);

+#endif // CONFIG_VP9_HIGHBITDEPTH

} else if (max_txsize_lookup[plane_bsize] == tx_size) {

if (x->skip_txfm[(plane << 2) + (block >> (tx_size << 1))] == 0) {

// full forward transform and quantization

vp9_xform_quant(x, plane, block, plane_bsize, tx_size);

+#if CONFIG_VP9_HIGHBITDEPTH

+ if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {

+ dist_block(plane, block, tx_size, args, xd->bd);

+ } else {

+ dist_block(plane, block, tx_size, args, 8);

+ }

+#else

dist_block(plane, block, tx_size, args);

+#endif // CONFIG_VP9_HIGHBITDEPTH

} else if (x->skip_txfm[(plane << 2) + (block >> (tx_size << 1))] == 2) {

// compute DC coefficient

tran_low_t *const coeff = BLOCK_OFFSET(x->plane[plane].coeff, block);

@@ -412,9 +508,17 @@

vp9_xform_quant_dc(x, plane, block, plane_bsize, tx_size);

args->sse = x->bsse[(plane << 2) + (block >> (tx_size << 1))] << 4;

args->dist = args->sse;

- if (!x->plane[plane].eobs[block])

- args->dist = args->sse - ((coeff[0] * coeff[0] -

- (coeff[0] - dqcoeff[0]) * (coeff[0] - dqcoeff[0])) >> 2);

+ if (x->plane[plane].eobs[block]) {

+ int64_t dc_correct = coeff[0] * coeff[0] -

+ (coeff[0] - dqcoeff[0]) * (coeff[0] - dqcoeff[0]);

+#if CONFIG_VP9_HIGHBITDEPTH

+ dc_correct >>= ((xd->bd - 8) * 2);

+#endif

+ if (tx_size != TX_32X32)

+ dc_correct >>= 2;

+ args->dist = MAX(0, args->sse - dc_correct);

+ }

} else {

// skip forward transform

x->plane[plane].eobs[block] = 0;

@@ -424,7 +528,15 @@

} else {

// full forward transform and quantization

vp9_xform_quant(x, plane, block, plane_bsize, tx_size);

+#if CONFIG_VP9_HIGHBITDEPTH

+ if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {

+ dist_block(plane, block, tx_size, args, xd->bd);

+ } else {

+ dist_block(plane, block, tx_size, args, 8);

+ }

+#else

dist_block(plane, block, tx_size, args);

+#endif // CONFIG_VP9_HIGHBITDEPTH

}

rate_block(plane, block, plane_bsize, tx_size, args);

@@ -659,6 +771,9 @@

const int num_4x4_blocks_high = num_4x4_blocks_high_lookup[bsize];

int idx, idy;

uint8_t best_dst[8 * 8];

+#if CONFIG_VP9_HIGHBITDEPTH

+ uint16_t best_dst16[8 * 8];

+#endif

assert(ib < 4);

@@ -666,6 +781,108 @@

vpx_memcpy(tl, l, sizeof(tl));

xd->mi[0].src_mi->mbmi.tx_size = TX_4X4;

+#if CONFIG_VP9_HIGHBITDEPTH

+ if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {

+ for (mode = DC_PRED; mode <= TM_PRED; ++mode) {

+ int64_t this_rd;

+ int ratey = 0;

+ int64_t distortion = 0;

+ int rate = bmode_costs[mode];

+ if (!(cpi->sf.intra_y_mode_mask[TX_4X4] & (1 << mode)))

+ continue;

+ // Only do the oblique modes if the best so far is

+ // one of the neighboring directional modes

+ if (cpi->sf.mode_search_skip_flags & FLAG_SKIP_INTRA_DIRMISMATCH) {

+ if (conditional_skipintra(mode, *best_mode))

+ continue;

+ }

+ vpx_memcpy(tempa, ta, sizeof(ta));

+ vpx_memcpy(templ, tl, sizeof(tl));

+ for (idy = 0; idy < num_4x4_blocks_high; ++idy) {

+ for (idx = 0; idx < num_4x4_blocks_wide; ++idx) {

+ const int block = ib + idy * 2 + idx;

+ const uint8_t *const src = &src_init[idx * 4 + idy * 4 * src_stride];

+ uint8_t *const dst = &dst_init[idx * 4 + idy * 4 * dst_stride];

+ int16_t *const src_diff = raster_block_offset_int16(BLOCK_8X8, block,

+ p->src_diff);

+ tran_low_t *const coeff = BLOCK_OFFSET(x->plane[0].coeff, block);

+ xd->mi[0].src_mi->bmi[block].as_mode = mode;

+ vp9_predict_intra_block(xd, block, 1,

+ TX_4X4, mode,

+ x->skip_encode ? src : dst,

+ x->skip_encode ? src_stride : dst_stride,

+ dst, dst_stride, idx, idy, 0);

+ vp9_highbd_subtract_block(4, 4, src_diff, 8, src, src_stride,

+ dst, dst_stride, xd->bd);

+ if (xd->lossless) {

+ const scan_order *so = &vp9_default_scan_orders[TX_4X4];

+ vp9_highbd_fwht4x4(src_diff, coeff, 8);

+ vp9_regular_quantize_b_4x4(x, 0, block, so->scan, so->iscan);

+ ratey += cost_coeffs(x, 0, block, tempa + idx, templ + idy, TX_4X4,

+ so->scan, so->neighbors,

+ cpi->sf.use_fast_coef_costing);

+ if (RDCOST(x->rdmult, x->rddiv, ratey, distortion) >= best_rd)

+ goto next_highbd;

+ vp9_highbd_iwht4x4_add(BLOCK_OFFSET(pd->dqcoeff, block),

+ dst, dst_stride,

+ p->eobs[block], xd->bd);

+ } else {

+ int64_t unused;

+ const TX_TYPE tx_type = get_tx_type_4x4(PLANE_TYPE_Y, xd, block);

+ const scan_order *so = &vp9_scan_orders[TX_4X4][tx_type];

+ vp9_highbd_fht4x4(src_diff, coeff, 8, tx_type);

+ vp9_regular_quantize_b_4x4(x, 0, block, so->scan, so->iscan);

+ ratey += cost_coeffs(x, 0, block, tempa + idx, templ + idy, TX_4X4,

+ so->scan, so->neighbors,

+ cpi->sf.use_fast_coef_costing);

+ distortion += vp9_highbd_block_error(

+ coeff, BLOCK_OFFSET(pd->dqcoeff, block),

+ 16, &unused, xd->bd) >> 2;

+ if (RDCOST(x->rdmult, x->rddiv, ratey, distortion) >= best_rd)

+ goto next_highbd;

+ vp9_highbd_iht4x4_add(tx_type, BLOCK_OFFSET(pd->dqcoeff, block),

+ dst, dst_stride, p->eobs[block], xd->bd);

+ }

+ rate += ratey;

+ this_rd = RDCOST(x->rdmult, x->rddiv, rate, distortion);

+ if (this_rd < best_rd) {

+ *bestrate = rate;

+ *bestratey = ratey;

+ *bestdistortion = distortion;

+ best_rd = this_rd;

+ *best_mode = mode;

+ vpx_memcpy(a, tempa, sizeof(tempa));

+ vpx_memcpy(l, templ, sizeof(templ));

+ for (idy = 0; idy < num_4x4_blocks_high * 4; ++idy) {

+ vpx_memcpy(best_dst16 + idy * 8,

+ CONVERT_TO_SHORTPTR(dst_init + idy * dst_stride),

+ num_4x4_blocks_wide * 4 * sizeof(uint16_t));

+ }

+ next_highbd:

+ {}

+ }

+ if (best_rd >= rd_thresh || x->skip_encode)

+ return best_rd;

+ for (idy = 0; idy < num_4x4_blocks_high * 4; ++idy) {

+ vpx_memcpy(CONVERT_TO_SHORTPTR(dst_init + idy * dst_stride),

+ best_dst16 + idy * 8,

+ num_4x4_blocks_wide * 4 * sizeof(uint16_t));

+ }

+ return best_rd;

+ }

+#endif // CONFIG_VP9_HIGHBITDEPTH

for (mode = DC_PRED; mode <= TM_PRED; ++mode) {

int64_t this_rd;

int ratey = 0;

@@ -827,6 +1044,7 @@

return RDCOST(mb->rdmult, mb->rddiv, cost, total_distortion);

}

+// This function is used only for intra_only frames

static int64_t rd_pick_intra_sby_mode(VP9_COMP *cpi, MACROBLOCK *x,

int *rate, int *rate_tokenonly,

int64_t *distortion, int *skippable,

@@ -841,24 +1059,21 @@

int64_t this_distortion, this_rd;

TX_SIZE best_tx = TX_4X4;

int i;

- int *bmode_costs = cpi->mbmode_cost;

+ int *bmode_costs;

+ const MODE_INFO *above_mi = xd->mi[-xd->mi_stride].src_mi;

+ const MODE_INFO *left_mi = xd->left_available ? xd->mi[-1].src_mi : NULL;

+ const PREDICTION_MODE A = vp9_above_block_mode(mic, above_mi, 0);

+ const PREDICTION_MODE L = vp9_left_block_mode(mic, left_mi, 0);

+ bmode_costs = cpi->y_mode_costs[A][L];

if (cpi->sf.tx_size_search_method == USE_FULL_RD)

for (i = 0; i < TX_MODES; i++)

tx_cache[i] = INT64_MAX;

+ vpx_memset(x->skip_txfm, 0, sizeof(x->skip_txfm));

/* Y Search for intra prediction mode */

for (mode = DC_PRED; mode <= TM_PRED; mode++) {

int64_t local_tx_cache[TX_MODES];

- MODE_INFO *above_mi = xd->mi[-xd->mi_stride].src_mi;

- MODE_INFO *left_mi = xd->left_available ? xd->mi[-1].src_mi : NULL;

- if (cpi->common.frame_type == KEY_FRAME) {

- const PREDICTION_MODE A = vp9_above_block_mode(mic, above_mi, 0);

- const PREDICTION_MODE L = vp9_left_block_mode(mic, left_mi, 0);

- bmode_costs = cpi->y_mode_costs[A][L];

- }

mic->mbmi.mode = mode;

super_block_yrd(cpi, x, &this_rate_tokenonly, &this_distortion,

@@ -897,10 +1112,12 @@

return best_rd;

}

-static void super_block_uvrd(const VP9_COMP *cpi, MACROBLOCK *x,

- int *rate, int64_t *distortion, int *skippable,

- int64_t *sse, BLOCK_SIZE bsize,

- int64_t ref_best_rd) {

+// Return value 0: early termination triggered, no valid rd cost available;

+// 1: rd cost values are valid.

+static int super_block_uvrd(const VP9_COMP *cpi, MACROBLOCK *x,

+ int *rate, int64_t *distortion, int *skippable,

+ int64_t *sse, BLOCK_SIZE bsize,

+ int64_t ref_best_rd) {

MACROBLOCKD *const xd = &x->e_mbd;

MB_MODE_INFO *const mbmi = &xd->mi[0].src_mi->mbmi;

const TX_SIZE uv_tx_size = get_uv_tx_size(mbmi, &xd->plane[1]);

@@ -907,11 +1124,12 @@

int plane;

int pnrate = 0, pnskip = 1;

int64_t pndist = 0, pnsse = 0;

+ int is_cost_valid = 1;

if (ref_best_rd < 0)

- goto term;

+ is_cost_valid = 0;

- if (is_inter_block(mbmi)) {

+ if (is_inter_block(mbmi) && is_cost_valid) {

int plane;

for (plane = 1; plane < MAX_MB_PLANE; ++plane)

vp9_subtract_plane(x, bsize, plane);

@@ -926,21 +1144,25 @@

txfm_rd_in_plane(x, &pnrate, &pndist, &pnskip, &pnsse,

ref_best_rd, plane, bsize, uv_tx_size,

cpi->sf.use_fast_coef_costing);

- if (pnrate == INT_MAX)

- goto term;

+ if (pnrate == INT_MAX) {

+ is_cost_valid = 0;

+ break;

+ }

*rate += pnrate;

*distortion += pndist;

*sse += pnsse;

*skippable &= pnskip;

}

- return;

- term:

- *rate = INT_MAX;

- *distortion = INT64_MAX;

- *sse = INT64_MAX;

- *skippable = 0;

- return;

+ if (!is_cost_valid) {

+ // reset cost value

+ *rate = INT_MAX;

+ *distortion = INT64_MAX;

+ *sse = INT64_MAX;

+ *skippable = 0;

+ }

+ return is_cost_valid;

}

static int64_t rd_pick_intra_sbuv_mode(VP9_COMP *cpi, MACROBLOCK *x,

@@ -955,6 +1177,7 @@

int this_rate_tokenonly, this_rate, s;

int64_t this_distortion, this_sse;

+ vpx_memset(x->skip_txfm, 0, sizeof(x->skip_txfm));

for (mode = DC_PRED; mode <= TM_PRED; ++mode) {

if (!(cpi->sf.intra_uv_mode_mask[max_tx_size] & (1 << mode)))

continue;

@@ -961,9 +1184,8 @@

xd->mi[0].src_mi->mbmi.uv_mode = mode;

- super_block_uvrd(cpi, x, &this_rate_tokenonly,

- &this_distortion, &s, &this_sse, bsize, best_rd);

- if (this_rate_tokenonly == INT_MAX)

+ if (!super_block_uvrd(cpi, x, &this_rate_tokenonly,

+ &this_distortion, &s, &this_sse, bsize, best_rd))

continue;

this_rate = this_rate_tokenonly +

cpi->intra_uv_mode_cost[cpi->common.frame_type][mode];

@@ -993,6 +1215,7 @@

int64_t unused;

x->e_mbd.mi[0].src_mi->mbmi.uv_mode = DC_PRED;

+ vpx_memset(x->skip_txfm, 0, sizeof(x->skip_txfm));

super_block_uvrd(cpi, x, rate_tokenonly, distortion,

skippable, &unused, bsize, INT64_MAX);

*rate = *rate_tokenonly + cpi->intra_uv_mode_cost[cm->frame_type][DC_PRED];

@@ -1121,6 +1344,16 @@

for (ref = 0; ref < 1 + is_compound; ++ref) {

const uint8_t *pre = &pd->pre[ref].buf[raster_block_offset(BLOCK_8X8, i,

pd->pre[ref].stride)];

+#if CONFIG_VP9_HIGHBITDEPTH

+ if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {

+ vp9_highbd_build_inter_predictor(pre, pd->pre[ref].stride,

+ dst, pd->dst.stride,

+ &mi->bmi[i].as_mv[ref].as_mv,

+ &xd->block_refs[ref]->sf, width, height,

+ ref, kernel, MV_PRECISION_Q3,

+ mi_col * MI_SIZE + 4 * (i % 2),

+ mi_row * MI_SIZE + 4 * (i / 2), xd->bd);

+ } else {

vp9_build_inter_predictor(pre, pd->pre[ref].stride,

dst, pd->dst.stride,

&mi->bmi[i].as_mv[ref].as_mv,

@@ -1129,11 +1362,32 @@

mi_col * MI_SIZE + 4 * (i % 2),

mi_row * MI_SIZE + 4 * (i / 2));

}

+#else

+ vp9_build_inter_predictor(pre, pd->pre[ref].stride,

+ dst, pd->dst.stride,

+ &mi->bmi[i].as_mv[ref].as_mv,

+ &xd->block_refs[ref]->sf, width, height, ref,

+ kernel, MV_PRECISION_Q3,

+ mi_col * MI_SIZE + 4 * (i % 2),

+ mi_row * MI_SIZE + 4 * (i / 2));

+#endif // CONFIG_VP9_HIGHBITDEPTH

+ }

+#if CONFIG_VP9_HIGHBITDEPTH

+ if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {

+ vp9_highbd_subtract_block(

+ height, width, raster_block_offset_int16(BLOCK_8X8, i, p->src_diff), 8,

+ src, p->src.stride, dst, pd->dst.stride, xd->bd);

+ } else {

+ vp9_subtract_block(

+ height, width, raster_block_offset_int16(BLOCK_8X8, i, p->src_diff), 8,

+ src, p->src.stride, dst, pd->dst.stride);

+ }

+#else

vp9_subtract_block(height, width,

raster_block_offset_int16(BLOCK_8X8, i, p->src_diff), 8,

- src, p->src.stride,

- dst, pd->dst.stride);

+ src, p->src.stride, dst, pd->dst.stride);

+#endif // CONFIG_VP9_HIGHBITDEPTH

k = i;

for (idy = 0; idy < height / 4; ++idy) {

@@ -1146,8 +1400,19 @@

x->fwd_txm4x4(raster_block_offset_int16(BLOCK_8X8, k, p->src_diff),

coeff, 8);

vp9_regular_quantize_b_4x4(x, 0, k, so->scan, so->iscan);

+#if CONFIG_VP9_HIGHBITDEPTH

+ if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {

+ thisdistortion += vp9_highbd_block_error(coeff,

+ BLOCK_OFFSET(pd->dqcoeff, k),

+ 16, &ssz, xd->bd);

+ } else {

+ thisdistortion += vp9_block_error(coeff, BLOCK_OFFSET(pd->dqcoeff, k),

+ 16, &ssz);

+ }

+#else

thisdistortion += vp9_block_error(coeff, BLOCK_OFFSET(pd->dqcoeff, k),

16, &ssz);

+#endif // CONFIG_VP9_HIGHBITDEPTH

thissse += ssz;

thisrate += cost_coeffs(x, 0, k, ta + (k & 1), tl + (k >> 1), TX_4X4,

so->scan, so->neighbors,

@@ -1369,7 +1634,7 @@

int sadpb = x->sadperbit4;

MV mvp_full;

int max_mv;

- int sad_list[5];

+ int cost_list[5];

/* Is the best so far sufficiently good that we cant justify doing

* and new motion search. */

@@ -1415,7 +1680,7 @@

bestsme = vp9_full_pixel_search(

cpi, x, bsize, &mvp_full, step_param, sadpb,

- cpi->sf.mv.subpel_search_method != SUBPEL_TREE ? sad_list : NULL,

+ cpi->sf.mv.subpel_search_method != SUBPEL_TREE ? cost_list : NULL,

&bsi->ref_mv[0]->as_mv, new_mv,

INT_MAX, 1);

@@ -1429,7 +1694,7 @@

sadpb, 16, &cpi->fn_ptr[bsize],

&bsi->ref_mv[0]->as_mv,

&best_mv->as_mv);

- sad_list[1] = sad_list[2] = sad_list[3] = sad_list[4] = INT_MAX;

+ cost_list[1] = cost_list[2] = cost_list[3] = cost_list[4] = INT_MAX;

if (thissme < bestsme) {

bestsme = thissme;

*new_mv = best_mv->as_mv;

@@ -1450,7 +1715,7 @@

x->errorperbit, &cpi->fn_ptr[bsize],

cpi->sf.mv.subpel_force_stop,

cpi->sf.mv.subpel_iters_per_step,

- cond_sad_list(cpi, sad_list),

+ cond_cost_list(cpi, cost_list),

x->nmvjointcost, x->mvcost,

&distortion,

&x->pred_sse[mbmi->ref_frame[0]],

@@ -1784,7 +2049,7 @@

int tmp_col_max = x->mv_col_max;

int tmp_row_min = x->mv_row_min;

int tmp_row_max = x->mv_row_max;

- int sad_list[5];

+ int cost_list[5];

const YV12_BUFFER_CONFIG *scaled_ref_frame = vp9_get_scaled_ref_frame(cpi,

ref);

@@ -1820,14 +2085,14 @@

}

if (cpi->sf.adaptive_motion_search && bsize < BLOCK_64X64) {

- int boffset = 2 * (b_width_log2(BLOCK_64X64) - MIN(b_height_log2(bsize),

- b_width_log2(bsize)));

+ int boffset = 2 * (b_width_log2_lookup[BLOCK_64X64] -

+ MIN(b_height_log2_lookup[bsize], b_width_log2_lookup[bsize]));

step_param = MAX(step_param, boffset);

}

if (cpi->sf.adaptive_motion_search) {

- int bwl = b_width_log2(bsize);

- int bhl = b_height_log2(bsize);

+ int bwl = b_width_log2_lookup[bsize];

+ int bhl = b_height_log2_lookup[bsize];

int i;

int tlevel = x->pred_mv_sad[ref] >> (bwl + bhl + 4);

@@ -1856,7 +2121,7 @@

mvp_full.row >>= 3;

bestsme = vp9_full_pixel_search(cpi, x, bsize, &mvp_full, step_param, sadpb,

- cond_sad_list(cpi, sad_list),

+ cond_cost_list(cpi, cost_list),

&ref_mv, &tmp_mv->as_mv, INT_MAX, 1);

x->mv_col_min = tmp_col_min;

@@ -1872,7 +2137,7 @@

&cpi->fn_ptr[bsize],

cpi->sf.mv.subpel_force_stop,

cpi->sf.mv.subpel_iters_per_step,

- cond_sad_list(cpi, sad_list),

+ cond_cost_list(cpi, cost_list),

x->nmvjointcost, x->mvcost,

&dis, &x->pred_sse[ref], NULL, 0, 0);

}

@@ -1904,7 +2169,12 @@

int_mv ref_mv[2];

int ite, ref;

// Prediction buffer from second frame.

+#if CONFIG_VP9_HIGHBITDEPTH

+ uint8_t *second_pred;

+ uint8_t *second_pred_alloc;

+#else

uint8_t *second_pred = vpx_memalign(16, pw * ph * sizeof(uint8_t));

+#endif // CONFIG_VP9_HIGHBITDEPTH

const InterpKernel *kernel = vp9_get_interp_kernel(mbmi->interp_filter);

// Do joint motion search in compound mode to get more accurate mv.

@@ -1915,6 +2185,15 @@

vp9_get_scaled_ref_frame(cpi, mbmi->ref_frame[0]),

vp9_get_scaled_ref_frame(cpi, mbmi->ref_frame[1])

};

+#if CONFIG_VP9_HIGHBITDEPTH

+ if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {

+ second_pred_alloc = vpx_memalign(16, pw * ph * sizeof(uint16_t));

+ second_pred = CONVERT_TO_BYTEPTR(second_pred_alloc);

+ } else {

+ second_pred_alloc = vpx_memalign(16, pw * ph * sizeof(uint8_t));

+ second_pred = second_pred_alloc;

+ }

+#endif // CONFIG_VP9_HIGHBITDEPTH

for (ref = 0; ref < 2; ++ref) {

ref_mv[ref] = mbmi->ref_mvs[refs[ref]][0];

@@ -1953,6 +2232,28 @@

ref_yv12[1] = xd->plane[0].pre[1];

// Get pred block from second frame.

+#if CONFIG_VP9_HIGHBITDEPTH

+ if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {

+ vp9_highbd_build_inter_predictor(ref_yv12[!id].buf,

+ ref_yv12[!id].stride,

+ second_pred, pw,

+ &frame_mv[refs[!id]].as_mv,

+ &xd->block_refs[!id]->sf,

+ pw, ph, 0,

+ kernel, MV_PRECISION_Q3,

+ mi_col * MI_SIZE, mi_row * MI_SIZE,

+ xd->bd);

+ } else {

+ vp9_build_inter_predictor(ref_yv12[!id].buf,

+ ref_yv12[!id].stride,

+ second_pred, pw,

+ &frame_mv[refs[!id]].as_mv,

+ &xd->block_refs[!id]->sf,

+ pw, ph, 0,

+ kernel, MV_PRECISION_Q3,

+ mi_col * MI_SIZE, mi_row * MI_SIZE);

+ }

+#else

vp9_build_inter_predictor(ref_yv12[!id].buf,

ref_yv12[!id].stride,

second_pred, pw,

@@ -1961,6 +2262,7 @@

pw, ph, 0,

kernel, MV_PRECISION_Q3,

mi_col * MI_SIZE, mi_row * MI_SIZE);

+#endif // CONFIG_VP9_HIGHBITDEPTH

// Compound motion search on first ref frame.

if (id)

@@ -2029,7 +2331,11 @@

x->nmvjointcost, x->mvcost, MV_COST_WEIGHT);

}

+#if CONFIG_VP9_HIGHBITDEPTH

+ vpx_free(second_pred_alloc);

+#else

vpx_free(second_pred);

+#endif // CONFIG_VP9_HIGHBITDEPTH

}

static INLINE void restore_dst_buf(MACROBLOCKD *xd,

@@ -2042,93 +2348,12 @@

}

-static void rd_encode_breakout_test(VP9_COMP *cpi, MACROBLOCK *x,

- BLOCK_SIZE bsize, int *rate2,

- int64_t *distortion, int64_t *distortion_uv,

- int *disable_skip) {

- VP9_COMMON *cm = &cpi->common;

- MACROBLOCKD *xd = &x->e_mbd;

- const BLOCK_SIZE y_size = get_plane_block_size(bsize, &xd->plane[0]);

- const BLOCK_SIZE uv_size = get_plane_block_size(bsize, &xd->plane[1]);

- unsigned int var, sse;

- // Skipping threshold for ac.

- unsigned int thresh_ac;

- // Skipping threshold for dc

- unsigned int thresh_dc;

- var = cpi->fn_ptr[y_size].vf(x->plane[0].src.buf, x->plane[0].src.stride,

- xd->plane[0].dst.buf,

- xd->plane[0].dst.stride, &sse);

- if (x->encode_breakout > 0) {

- // Set a maximum for threshold to avoid big PSNR loss in low bitrate

- // case. Use extreme low threshold for static frames to limit skipping.

- const unsigned int max_thresh = (cpi->allow_encode_breakout ==

- ENCODE_BREAKOUT_LIMITED) ? 128 : 36000;

- // The encode_breakout input

- const unsigned int min_thresh =

- MIN(((unsigned int)x->encode_breakout << 4), max_thresh);

- // Calculate threshold according to dequant value.

- thresh_ac = (xd->plane[0].dequant[1] * xd->plane[0].dequant[1]) / 9;

- thresh_ac = clamp(thresh_ac, min_thresh, max_thresh);

- // Adjust threshold according to partition size.

- thresh_ac >>= 8 - (b_width_log2(bsize) +

- b_height_log2(bsize));

- thresh_dc = (xd->plane[0].dequant[0] * xd->plane[0].dequant[0] >> 6);

- } else {

- thresh_ac = 0;

- thresh_dc = 0;

- }

- // Y skipping condition checking

- if (sse < thresh_ac || sse == 0) {

- // dc skipping checking

- if ((sse - var) < thresh_dc || sse == var) {

- unsigned int sse_u, sse_v;

- unsigned int var_u, var_v;

- var_u = cpi->fn_ptr[uv_size].vf(x->plane[1].src.buf,

- x->plane[1].src.stride,

- xd->plane[1].dst.buf,

- xd->plane[1].dst.stride, &sse_u);

- // U skipping condition checking

- if ((sse_u * 4 < thresh_ac || sse_u == 0) &&

- (sse_u - var_u < thresh_dc || sse_u == var_u)) {

- var_v = cpi->fn_ptr[uv_size].vf(x->plane[2].src.buf,

- x->plane[2].src.stride,

- xd->plane[2].dst.buf,

- xd->plane[2].dst.stride, &sse_v);

- // V skipping condition checking

- if ((sse_v * 4 < thresh_ac || sse_v == 0) &&

- (sse_v - var_v < thresh_dc || sse_v == var_v)) {

- x->skip = 1;

- // The cost of skip bit needs to be added.

- *rate2 += vp9_cost_bit(vp9_get_skip_prob(cm, xd), 1);

- // Scaling factor for SSE from spatial domain to frequency domain

- // is 16. Adjust distortion accordingly.

- *distortion_uv = (sse_u + sse_v) << 4;

- *distortion = (sse << 4) + *distortion_uv;

- *disable_skip = 1;

- }

static int64_t handle_inter_mode(VP9_COMP *cpi, MACROBLOCK *x,

BLOCK_SIZE bsize,

int64_t txfm_cache[],

int *rate2, int64_t *distortion,

int *skippable,

- int *rate_y, int64_t *distortion_y,

- int *rate_uv, int64_t *distortion_uv,

+ int *rate_y, int *rate_uv,

int *disable_skip,

int_mv (*mode_mv)[MAX_REF_FRAMES],

int mi_row, int mi_col,

@@ -2148,8 +2373,13 @@

int refs[2] = { mbmi->ref_frame[0],

(mbmi->ref_frame[1] < 0 ? 0 : mbmi->ref_frame[1]) };

int_mv cur_mv[2];

- int64_t this_rd = 0;

+#if CONFIG_VP9_HIGHBITDEPTH

+ DECLARE_ALIGNED_ARRAY(16, uint16_t, tmp_buf16, MAX_MB_PLANE * 64 * 64);

+ DECLARE_ALIGNED_ARRAY(16, uint8_t, tmp_buf8, MAX_MB_PLANE * 64 * 64);

+ uint8_t *tmp_buf;

+#else

DECLARE_ALIGNED_ARRAY(16, uint8_t, tmp_buf, MAX_MB_PLANE * 64 * 64);

+#endif // CONFIG_VP9_HIGHBITDEPTH

int pred_exists = 0;

int intpel_mv;

int64_t rd, tmp_rd, best_rd = INT64_MAX;

@@ -2166,6 +2396,18 @@

(((mi_row + mi_col) >> bsl) +

get_chessboard_index(cm->current_video_frame)) & 0x1 : 0;

+ int skip_txfm_sb = 0;

+ int64_t skip_sse_sb = INT64_MAX;

+ int64_t distortion_y = 0, distortion_uv = 0;

+#if CONFIG_VP9_HIGHBITDEPTH

+ if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {

+ tmp_buf = CONVERT_TO_BYTEPTR(tmp_buf16);

+ } else {

+ tmp_buf = tmp_buf8;

+ }

+#endif // CONFIG_VP9_HIGHBITDEPTH

if (pred_filter_search) {

INTERP_FILTER af = SWITCHABLE, lf = SWITCHABLE;

if (xd->up_available)

@@ -2275,6 +2517,9 @@

for (i = 0; i < SWITCHABLE_FILTERS; ++i) {

int j;

int64_t rs_rd;

+ int tmp_skip_sb = 0;

+ int64_t tmp_skip_sse = INT64_MAX;

mbmi->interp_filter = i;

rs = vp9_get_switchable_rate(cpi);

rs_rd = RDCOST(x->rdmult, x->rddiv, rs, 0);

@@ -2310,7 +2555,8 @@

}

vp9_build_inter_predictors_sb(xd, mi_row, mi_col, bsize);

- model_rd_for_sb(cpi, bsize, x, xd, &rate_sum, &dist_sum);

+ model_rd_for_sb(cpi, bsize, x, xd, &rate_sum, &dist_sum,

+ &tmp_skip_sb, &tmp_skip_sse);

rd = RDCOST(x->rdmult, x->rddiv, rate_sum, dist_sum);

rd_opt->filter_cache[i] = rd;

@@ -2339,8 +2585,6 @@

best_filter = mbmi->interp_filter;

if (cm->interp_filter == SWITCHABLE && i && !intpel_mv)

best_needs_copy = !best_needs_copy;

- vpx_memcpy(skip_txfm, x->skip_txfm, sizeof(skip_txfm));

- vpx_memcpy(bsse, x->bsse, sizeof(bsse));

}

if ((cm->interp_filter == SWITCHABLE && newbest) ||

@@ -2348,6 +2592,11 @@

cm->interp_filter == mbmi->interp_filter)) {

pred_exists = 1;

tmp_rd = best_rd;

+ skip_txfm_sb = tmp_skip_sb;

+ skip_sse_sb = tmp_skip_sse;

+ vpx_memcpy(skip_txfm, x->skip_txfm, sizeof(skip_txfm));

+ vpx_memcpy(bsse, x->bsse, sizeof(bsse));

}

restore_dst_buf(xd, orig_dst, orig_dst_stride);

@@ -2374,7 +2623,8 @@

// switchable list (ex. bilinear) is indicated at the frame level, or

// skip condition holds.

vp9_build_inter_predictors_sb(xd, mi_row, mi_col, bsize);

- model_rd_for_sb(cpi, bsize, x, xd, &tmp_rate, &tmp_dist);

+ model_rd_for_sb(cpi, bsize, x, xd, &tmp_rate, &tmp_dist,

+ &skip_txfm_sb, &skip_sse_sb);

rd = RDCOST(x->rdmult, x->rddiv, rs + tmp_rate, tmp_dist);

vpx_memcpy(skip_txfm, x->skip_txfm, sizeof(skip_txfm));

vpx_memcpy(bsse, x->bsse, sizeof(bsse));

@@ -2401,16 +2651,10 @@

if (cm->interp_filter == SWITCHABLE)

*rate2 += rs;

- if (!is_comp_pred) {

- if (cpi->allow_encode_breakout)

- rd_encode_breakout_test(cpi, x, bsize, rate2, distortion, distortion_uv,

- disable_skip);

- }

vpx_memcpy(x->skip_txfm, skip_txfm, sizeof(skip_txfm));

vpx_memcpy(x->bsse, bsse, sizeof(bsse));

- if (!x->skip) {

+ if (!skip_txfm_sb) {

int skippable_y, skippable_uv;

int64_t sseuv = INT64_MAX;

int64_t rdcosty = INT64_MAX;

@@ -2417,7 +2661,7 @@

// Y cost and distortion

vp9_subtract_plane(x, bsize, 0);

- super_block_yrd(cpi, x, rate_y, distortion_y, &skippable_y, psse,

+ super_block_yrd(cpi, x, rate_y, &distortion_y, &skippable_y, psse,

bsize, txfm_cache, ref_best_rd);

if (*rate_y == INT_MAX) {

@@ -2428,14 +2672,13 @@

}

*rate2 += *rate_y;

- *distortion += *distortion_y;

+ *distortion += distortion_y;

rdcosty = RDCOST(x->rdmult, x->rddiv, *rate2, *distortion);

rdcosty = MIN(rdcosty, RDCOST(x->rdmult, x->rddiv, 0, *psse));

- super_block_uvrd(cpi, x, rate_uv, distortion_uv, &skippable_uv, &sseuv,

- bsize, ref_best_rd - rdcosty);

- if (*rate_uv == INT_MAX) {

+ if (!super_block_uvrd(cpi, x, rate_uv, &distortion_uv, &skippable_uv,

+ &sseuv, bsize, ref_best_rd - rdcosty)) {

*rate2 = INT_MAX;

*distortion = INT64_MAX;

restore_dst_buf(xd, orig_dst, orig_dst_stride);

@@ -2444,8 +2687,16 @@

*psse += sseuv;

*rate2 += *rate_uv;

- *distortion += *distortion_uv;

+ *distortion += distortion_uv;

*skippable = skippable_y && skippable_uv;

+ } else {

+ x->skip = 1;

+ *disable_skip = 1;

+ // The cost of skip bit needs to be added.

+ *rate2 += vp9_cost_bit(vp9_get_skip_prob(cm, xd), 1);

+ *distortion = skip_sse_sb;

}

if (!is_comp_pred)

@@ -2452,12 +2703,11 @@

single_skippable[this_mode][refs[0]] = *skippable;

restore_dst_buf(xd, orig_dst, orig_dst_stride);

- return this_rd; // if 0, this will be re-calculated by caller

+ return 0; // The rate-distortion cost will be re-calculated by caller.

}

void vp9_rd_pick_intra_mode_sb(VP9_COMP *cpi, MACROBLOCK *x,

- int *returnrate, int64_t *returndist,

- BLOCK_SIZE bsize,

+ RD_COST *rd_cost, BLOCK_SIZE bsize,

PICK_MODE_CONTEXT *ctx, int64_t best_rd) {

VP9_COMMON *const cm = &cpi->common;

MACROBLOCKD *const xd = &x->e_mbd;

@@ -2474,37 +2724,34 @@

if (rd_pick_intra_sby_mode(cpi, x, &rate_y, &rate_y_tokenonly,

&dist_y, &y_skip, bsize, tx_cache,

best_rd) >= best_rd) {

- *returnrate = INT_MAX;

+ rd_cost->rate = INT_MAX;

return;

}

- max_uv_tx_size = get_uv_tx_size_impl(xd->mi[0].src_mi->mbmi.tx_size, bsize,

- pd[1].subsampling_x,

- pd[1].subsampling_y);

- rd_pick_intra_sbuv_mode(cpi, x, ctx, &rate_uv, &rate_uv_tokenonly,

- &dist_uv, &uv_skip, bsize, max_uv_tx_size);

} else {

y_skip = 0;

if (rd_pick_intra_sub_8x8_y_mode(cpi, x, &rate_y, &rate_y_tokenonly,

&dist_y, best_rd) >= best_rd) {

- *returnrate = INT_MAX;

+ rd_cost->rate = INT_MAX;

return;

}

- max_uv_tx_size = get_uv_tx_size_impl(xd->mi[0].src_mi->mbmi.tx_size, bsize,

- pd[1].subsampling_x,

- pd[1].subsampling_y);

- rd_pick_intra_sbuv_mode(cpi, x, ctx, &rate_uv, &rate_uv_tokenonly,

- &dist_uv, &uv_skip, BLOCK_8X8, max_uv_tx_size);

}

+ max_uv_tx_size = get_uv_tx_size_impl(xd->mi[0].src_mi->mbmi.tx_size, bsize,

+ pd[1].subsampling_x,

+ pd[1].subsampling_y);

+ rd_pick_intra_sbuv_mode(cpi, x, ctx, &rate_uv, &rate_uv_tokenonly,

+ &dist_uv, &uv_skip, MAX(BLOCK_8X8, bsize),

+ max_uv_tx_size);

if (y_skip && uv_skip) {

- *returnrate = rate_y + rate_uv - rate_y_tokenonly - rate_uv_tokenonly +

- vp9_cost_bit(vp9_get_skip_prob(cm, xd), 1);

- *returndist = dist_y + dist_uv;

+ rd_cost->rate = rate_y + rate_uv - rate_y_tokenonly - rate_uv_tokenonly +

+ vp9_cost_bit(vp9_get_skip_prob(cm, xd), 1);

+ rd_cost->dist = dist_y + dist_uv;

vp9_zero(ctx->tx_rd_diff);

} else {

int i;

- *returnrate = rate_y + rate_uv + vp9_cost_bit(vp9_get_skip_prob(cm, xd), 0);

- *returndist = dist_y + dist_uv;

+ rd_cost->rate = rate_y + rate_uv +

+ vp9_cost_bit(vp9_get_skip_prob(cm, xd), 0);

+ rd_cost->dist = dist_y + dist_uv;

if (cpi->sf.tx_size_search_method == USE_FULL_RD)

for (i = 0; i < TX_MODES; i++) {

if (tx_cache[i] < INT64_MAX && tx_cache[cm->tx_mode] < INT64_MAX)

@@ -2515,13 +2762,9 @@

}

ctx->mic = *xd->mi[0].src_mi;

+ rd_cost->rdcost = RDCOST(x->rdmult, x->rddiv, rd_cost->rate, rd_cost->dist);

}

-// Updating rd_thresh_freq_fact[] here means that the different

-// partition/block sizes are handled independently based on the best

-// choice for the current partition. It may well be better to keep a scaled

-// best rd so far value and update rd_thresh_freq_fact based on the mode/size

-// combination that wins out.

static void update_rd_thresh_fact(VP9_COMP *cpi, int bsize,

int best_mode_index) {

if (cpi->sf.adaptive_rd_thresh > 0) {

@@ -2528,32 +2771,34 @@

const int top_mode = bsize < BLOCK_8X8 ? MAX_REFS : MAX_MODES;

int mode;

for (mode = 0; mode < top_mode; ++mode) {

- int *const fact = &cpi->rd.thresh_freq_fact[bsize][mode];

- if (mode == best_mode_index) {

- *fact -= (*fact >> 3);

- } else {

- *fact = MIN(*fact + RD_THRESH_INC,

- cpi->sf.adaptive_rd_thresh * RD_THRESH_MAX_FACT);

+ const BLOCK_SIZE min_size = MAX(bsize - 1, BLOCK_4X4);

+ const BLOCK_SIZE max_size = MIN(bsize + 2, BLOCK_64X64);

+ BLOCK_SIZE bs;

+ for (bs = min_size; bs <= max_size; ++bs) {

+ int *const fact = &cpi->rd.thresh_freq_fact[bs][mode];

+ if (mode == best_mode_index) {

+ *fact -= (*fact >> 4);

+ } else {

+ *fact = MIN(*fact + RD_THRESH_INC,

+ cpi->sf.adaptive_rd_thresh * RD_THRESH_MAX_FACT);

+ }

}

-int64_t vp9_rd_pick_inter_mode_sb(VP9_COMP *cpi, MACROBLOCK *x,

- const TileInfo *const tile,

- int mi_row, int mi_col,

- int *returnrate,

- int64_t *returndistortion,

- BLOCK_SIZE bsize,

- PICK_MODE_CONTEXT *ctx,

- int64_t best_rd_so_far) {

+void vp9_rd_pick_inter_mode_sb(VP9_COMP *cpi, MACROBLOCK *x,

+ const TileInfo *const tile,

+ int mi_row, int mi_col,

+ RD_COST *rd_cost, BLOCK_SIZE bsize,

+ PICK_MODE_CONTEXT *ctx,

+ int64_t best_rd_so_far) {

VP9_COMMON *const cm = &cpi->common;

RD_OPT *const rd_opt = &cpi->rd;

+ SPEED_FEATURES *const sf = &cpi->sf;

MACROBLOCKD *const xd = &x->e_mbd;

MB_MODE_INFO *const mbmi = &xd->mi[0].src_mi->mbmi;

const struct segmentation *const seg = &cm->seg;

- struct macroblockd_plane *const pd = xd->plane;

PREDICTION_MODE this_mode;

MV_REFERENCE_FRAME ref_frame, second_ref_frame;

unsigned char segment_id = mbmi->segment_id;

@@ -2584,20 +2829,20 @@

int64_t dist_uv[TX_SIZES];

int skip_uv[TX_SIZES];

PREDICTION_MODE mode_uv[TX_SIZES];

- const int intra_cost_penalty =

- 20 * vp9_dc_quant(cm->base_qindex, cm->y_dc_delta_q, cm->bit_depth);

+ const int intra_cost_penalty = vp9_get_intra_cost_penalty(

+ cm->base_qindex, cm->y_dc_delta_q, cm->bit_depth);

int best_skip2 = 0;

uint8_t ref_frame_skip_mask[2] = { 0 };

uint16_t mode_skip_mask[MAX_REF_FRAMES] = { 0 };

- int mode_skip_start = cpi->sf.mode_skip_start + 1;

+ int mode_skip_start = sf->mode_skip_start + 1;

const int *const rd_threshes = rd_opt->threshes[segment_id][bsize];

const int *const rd_thresh_freq_fact = rd_opt->thresh_freq_fact[bsize];

- int mode_threshold[MAX_MODES];

+ int64_t mode_threshold[MAX_MODES];

int *mode_map = rd_opt->mode_map[bsize];

- const int mode_search_skip_flags = cpi->sf.mode_search_skip_flags;

+ const int mode_search_skip_flags = sf->mode_search_skip_flags;

vp9_zero(best_mbmode);

- x->skip_encode = cpi->sf.skip_encode_frame && x->q_index < QIDX_SKIP_THRESH;

+ x->skip_encode = sf->skip_encode_frame && x->q_index < QIDX_SKIP_THRESH;

estimate_ref_frame_costs(cm, xd, segment_id, ref_costs_single, ref_costs_comp,

&comp_mode_p);

@@ -2619,7 +2864,7 @@

}

- *returnrate = INT_MAX;

+ rd_cost->rate = INT_MAX;

for (ref_frame = LAST_FRAME; ref_frame <= ALTREF_FRAME; ++ref_frame) {

x->pred_mv_sad[ref_frame] = INT_MAX;

@@ -2638,7 +2883,7 @@

// are masked out.

ref_frame_skip_mask[0] |= (1 << ref_frame);

ref_frame_skip_mask[1] |= SECOND_REF_FRAME_MASK;

- } else if (cpi->sf.reference_masking) {

+ } else if (sf->reference_masking) {

for (i = LAST_FRAME; i <= ALTREF_FRAME; ++i) {

// Skip fixed mv modes for poor references

if ((x->pred_mv_sad[ref_frame] >> 2) > x->pred_mv_sad[i]) {

@@ -2676,7 +2921,7 @@

}

if (cpi->rc.is_src_frame_alt_ref) {

- if (cpi->sf.alt_ref_search_fp) {

+ if (sf->alt_ref_search_fp) {

mode_skip_mask[ALTREF_FRAME] = 0;

ref_frame_skip_mask[0] = ~(1 << ALTREF_FRAME);

ref_frame_skip_mask[1] = SECOND_REF_FRAME_MASK;

@@ -2683,18 +2928,30 @@

}

- if (bsize > cpi->sf.max_intra_bsize) {

+ if (sf->alt_ref_search_fp)

+ if (!cm->show_frame && x->pred_mv_sad[GOLDEN_FRAME] < INT_MAX)

+ if (x->pred_mv_sad[ALTREF_FRAME] > (x->pred_mv_sad[GOLDEN_FRAME] << 1))

+ mode_skip_mask[ALTREF_FRAME] |= INTER_ALL;

+ if (sf->adaptive_mode_search) {

+ if (cm->show_frame && !cpi->rc.is_src_frame_alt_ref &&

+ cpi->rc.frames_since_golden >= 3)

+ if (x->pred_mv_sad[GOLDEN_FRAME] > (x->pred_mv_sad[LAST_FRAME] << 1))

+ mode_skip_mask[GOLDEN_FRAME] |= INTER_ALL;

+ }

+ if (bsize > sf->max_intra_bsize) {

ref_frame_skip_mask[0] |= (1 << INTRA_FRAME);

ref_frame_skip_mask[1] |= (1 << INTRA_FRAME);

}

mode_skip_mask[INTRA_FRAME] |=

- ~(cpi->sf.intra_y_mode_mask[max_txsize_lookup[bsize]]);

+ ~(sf->intra_y_mode_mask[max_txsize_lookup[bsize]]);

for (i = 0; i < MAX_MODES; ++i)

mode_threshold[i] = ((int64_t)rd_threshes[i] * rd_thresh_freq_fact[i]) >> 5;

- midx = cpi->sf.schedule_mode_search ? mode_skip_start : 0;

+ midx = sf->schedule_mode_search ? mode_skip_start : 0;

while (midx > 4) {

uint8_t end_pos = 0;

for (i = 5; i < midx; ++i) {

@@ -2758,18 +3015,18 @@

continue;

// Test best rd so far against threshold for trying this mode.

- if (best_mode_skippable && cpi->sf.schedule_mode_search)

+ if (best_mode_skippable && sf->schedule_mode_search)

mode_threshold[mode_index] <<= 1;

if (best_rd < mode_threshold[mode_index])

continue;

- if (cpi->sf.motion_field_mode_search) {

+ if (sf->motion_field_mode_search) {

const int mi_width = MIN(num_8x8_blocks_wide_lookup[bsize],

tile->mi_col_end - mi_col);

const int mi_height = MIN(num_8x8_blocks_high_lookup[bsize],

tile->mi_row_end - mi_row);

- const int bsl = mi_width_log2(bsize);

+ const int bsl = mi_width_log2_lookup[bsize];

int cb_partition_search_ctrl = (((mi_row + mi_col) >> bsl)

+ get_chessboard_index(cm->current_video_frame)) & 0x1;

MB_MODE_INFO *ref_mbmi;

@@ -2838,7 +3095,7 @@

}

if (ref_frame == INTRA_FRAME) {

- if (cpi->sf.adaptive_mode_search)

+ if (sf->adaptive_mode_search)

if ((x->source_variance << num_pels_log2_lookup[bsize]) > best_pred_sse)

continue;

@@ -2895,14 +3152,15 @@

if (ref_frame == INTRA_FRAME) {

TX_SIZE uv_tx;

+ struct macroblockd_plane *const pd = &xd->plane[1];

+ vpx_memset(x->skip_txfm, 0, sizeof(x->skip_txfm));

super_block_yrd(cpi, x, &rate_y, &distortion_y, &skippable,

NULL, bsize, tx_cache, best_rd);

if (rate_y == INT_MAX)

continue;

- uv_tx = get_uv_tx_size_impl(mbmi->tx_size, bsize, pd[1].subsampling_x,

- pd[1].subsampling_y);

+ uv_tx = get_uv_tx_size_impl(mbmi->tx_size, bsize, pd->subsampling_x,

+ pd->subsampling_y);

if (rate_uv_intra[uv_tx] == INT_MAX) {

choose_intra_uv_mode(cpi, ctx, bsize, uv_tx,

&rate_uv_intra[uv_tx], &rate_uv_tokenonly[uv_tx],

@@ -2922,8 +3180,7 @@

this_rd = handle_inter_mode(cpi, x, bsize,

tx_cache,

&rate2, &distortion2, &skippable,

- &rate_y, &distortion_y,

- &rate_uv, &distortion_uv,

+ &rate_y, &rate_uv,

&disable_skip, frame_mv,

mi_row, mi_col,

single_newmv, single_inter_filter,

@@ -3004,8 +3261,9 @@

best_pred_sse = x->pred_sse[ref_frame];

}

- *returnrate = rate2;

- *returndistortion = distortion2;

+ rd_cost->rate = rate2;

+ rd_cost->dist = distortion2;

+ rd_cost->rdcost = this_rd;

best_rd = this_rd;

best_mbmode = *mbmi;

best_skip2 = this_skip2;

@@ -3020,9 +3278,14 @@

// based on qp, activity mask and history

if ((mode_search_skip_flags & FLAG_EARLY_TERMINATE) &&

(mode_index > MIN_EARLY_TERM_INDEX)) {

- const int qstep = xd->plane[0].dequant[1];

+ int qstep = xd->plane[0].dequant[1];

// TODO(debargha): Enhance this by specializing for each mode_index

int scale = 4;

+#if CONFIG_VP9_HIGHBITDEPTH

+ if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {

+ qstep >>= (xd->bd - 8);

+ }

+#endif // CONFIG_VP9_HIGHBITDEPTH

if (x->source_variance < UINT_MAX) {

const int var_adjust = (x->source_variance < 16);

scale -= var_adjust;

@@ -3130,11 +3393,14 @@

best_mbmode.mode = ZEROMV;

}

- if (best_mode_index < 0 || best_rd >= best_rd_so_far)

- return INT64_MAX;

+ if (best_mode_index < 0 || best_rd >= best_rd_so_far) {

+ rd_cost->rate = INT_MAX;

+ rd_cost->rdcost = INT64_MAX;

+ return;

+ }

// If we used an estimate for the uv intra rd in the loop above...

- if (cpi->sf.use_uv_intra_rd_estimate) {

+ if (sf->use_uv_intra_rd_estimate) {

// Do Intra UV best rd mode selection if best mode choice above was intra.

if (best_mbmode.ref_frame[0] == INTRA_FRAME) {

TX_SIZE uv_tx_size;

@@ -3191,18 +3457,33 @@

// updating code causes PSNR loss. Need to figure out the confliction.

x->skip |= best_mode_skippable;

+ if (!x->skip && !x->select_tx_size) {

+ int has_high_freq_coeff = 0;

+ int plane;

+ int max_plane = is_inter_block(&xd->mi[0].src_mi->mbmi)

+ ? MAX_MB_PLANE : 1;

+ for (plane = 0; plane < max_plane; ++plane) {

+ x->plane[plane].eobs = ctx->eobs_pbuf[plane][1];

+ has_high_freq_coeff |= vp9_has_high_freq_in_plane(x, bsize, plane);

+ }

+ for (plane = max_plane; plane < MAX_MB_PLANE; ++plane) {

+ x->plane[plane].eobs = ctx->eobs_pbuf[plane][2];

+ has_high_freq_coeff |= vp9_has_high_freq_in_plane(x, bsize, plane);

+ }

+ best_mode_skippable |= !has_high_freq_coeff;

+ }

store_coding_context(x, ctx, best_mode_index, best_pred_diff,

best_tx_diff, best_filter_diff, best_mode_skippable);

- return best_rd;

}

-int64_t vp9_rd_pick_inter_mode_sb_seg_skip(VP9_COMP *cpi, MACROBLOCK *x,

- int *returnrate,

- int64_t *returndistortion,

- BLOCK_SIZE bsize,

- PICK_MODE_CONTEXT *ctx,

- int64_t best_rd_so_far) {

+void vp9_rd_pick_inter_mode_sb_seg_skip(VP9_COMP *cpi, MACROBLOCK *x,

+ RD_COST *rd_cost,

+ BLOCK_SIZE bsize,

+ PICK_MODE_CONTEXT *ctx,

+ int64_t best_rd_so_far) {

VP9_COMMON *const cm = &cpi->common;

RD_OPT *const rd_opt = &cpi->rd;

MACROBLOCKD *const xd = &x->e_mbd;

@@ -3230,7 +3511,7 @@

for (i = LAST_FRAME; i < MAX_REF_FRAMES; ++i)

x->pred_mv_sad[i] = INT_MAX;

- *returnrate = INT_MAX;

+ rd_cost->rate = INT_MAX;

assert(vp9_segfeature_active(&cm->seg, segment_id, SEG_LVL_SKIP));

@@ -3279,11 +3560,15 @@

rate2 += ref_costs_single[LAST_FRAME];

this_rd = RDCOST(x->rdmult, x->rddiv, rate2, distortion2);

- *returnrate = rate2;

- *returndistortion = distortion2;

+ rd_cost->rate = rate2;

+ rd_cost->dist = distortion2;

+ rd_cost->rdcost = this_rd;

- if (this_rd >= best_rd_so_far)

- return INT64_MAX;

+ if (this_rd >= best_rd_so_far) {

+ rd_cost->rate = INT_MAX;

+ rd_cost->rdcost = INT64_MAX;

+ return;

+ }

assert((cm->interp_filter == SWITCHABLE) ||

(cm->interp_filter == mbmi->interp_filter));

@@ -3298,20 +3583,18 @@

swap_block_ptr(x, ctx, 1, 0, 0, MAX_MB_PLANE);

store_coding_context(x, ctx, THR_ZEROMV,

best_pred_diff, best_tx_diff, best_filter_diff, 0);

- return this_rd;

}

-int64_t vp9_rd_pick_inter_mode_sub8x8(VP9_COMP *cpi, MACROBLOCK *x,

- const TileInfo *const tile,

- int mi_row, int mi_col,

- int *returnrate,

- int64_t *returndistortion,

- BLOCK_SIZE bsize,

- PICK_MODE_CONTEXT *ctx,

- int64_t best_rd_so_far) {

+void vp9_rd_pick_inter_mode_sub8x8(VP9_COMP *cpi, MACROBLOCK *x,

+ const TileInfo *const tile,

+ int mi_row, int mi_col,

+ RD_COST *rd_cost,

+ BLOCK_SIZE bsize,

+ PICK_MODE_CONTEXT *ctx,

+ int64_t best_rd_so_far) {

VP9_COMMON *const cm = &cpi->common;

RD_OPT *const rd_opt = &cpi->rd;

+ SPEED_FEATURES *const sf = &cpi->sf;

MACROBLOCKD *const xd = &x->e_mbd;

MB_MODE_INFO *const mbmi = &xd->mi[0].src_mi->mbmi;

const struct segmentation *const seg = &cm->seg;

@@ -3338,14 +3621,14 @@

int64_t dist_uv;

int skip_uv;

PREDICTION_MODE mode_uv = DC_PRED;

- const int intra_cost_penalty =

- 20 * vp9_dc_quant(cm->base_qindex, cm->y_dc_delta_q, cm->bit_depth);

+ const int intra_cost_penalty = vp9_get_intra_cost_penalty(

+ cm->base_qindex, cm->y_dc_delta_q, cm->bit_depth);

int_mv seg_mvs[4][MAX_REF_FRAMES];

b_mode_info best_bmodes[4];

int best_skip2 = 0;

int ref_frame_skip_mask[2] = { 0 };

- x->skip_encode = cpi->sf.skip_encode_frame && x->q_index < QIDX_SKIP_THRESH;

+ x->skip_encode = sf->skip_encode_frame && x->q_index < QIDX_SKIP_THRESH;

vpx_memset(x->zcoeff_blk[TX_4X4], 0, 4);

vp9_zero(best_mbmode);

@@ -3364,7 +3647,7 @@

best_filter_rd[i] = INT64_MAX;

rate_uv_intra = INT_MAX;

- *returnrate = INT_MAX;

+ rd_cost->rate = INT_MAX;

for (ref_frame = LAST_FRAME; ref_frame <= ALTREF_FRAME; ref_frame++) {

if (cpi->ref_frame_flags & flag_list[ref_frame]) {

@@ -3398,7 +3681,7 @@

// Look at the reference frame of the best mode so far and set the

// skip mask to look at a subset of the remaining modes.

- if (ref_index > 2 && cpi->sf.mode_skip_start < MAX_MODES) {

+ if (ref_index > 2 && sf->mode_skip_start < MAX_MODES) {

if (ref_index == 3) {

switch (best_mbmode.ref_frame[0]) {

case INTRA_FRAME:

@@ -3443,7 +3726,7 @@

if (vp9_segfeature_active(seg, segment_id, SEG_LVL_REF_FRAME))

continue;

- if ((cpi->sf.mode_search_skip_flags & FLAG_SKIP_COMP_BESTINTRA) &&

+ if ((sf->mode_search_skip_flags & FLAG_SKIP_COMP_BESTINTRA) &&

best_mbmode.ref_frame[0] == INTRA_FRAME)

continue;

}

@@ -3547,12 +3830,12 @@

if (cm->interp_filter != BILINEAR) {

tmp_best_filter = EIGHTTAP;

- if (x->source_variance < cpi->sf.disable_filter_search_var_thresh) {

+ if (x->source_variance < sf->disable_filter_search_var_thresh) {

tmp_best_filter = EIGHTTAP;

- } else if (cpi->sf.adaptive_pred_interp_filter == 1 &&

+ } else if (sf->adaptive_pred_interp_filter == 1 &&

ctx->pred_interp_filter < SWITCHABLE) {

tmp_best_filter = ctx->pred_interp_filter;

- } else if (cpi->sf.adaptive_pred_interp_filter == 2) {

+ } else if (sf->adaptive_pred_interp_filter == 2) {

tmp_best_filter = ctx->pred_interp_filter < SWITCHABLE ?

ctx->pred_interp_filter : 0;

} else {

@@ -3605,7 +3888,7 @@

}

pred_exists = 1;

if (switchable_filter_index == 0 &&

- cpi->sf.use_rd_breakout &&

+ sf->use_rd_breakout &&

best_rd < INT64_MAX) {

if (tmp_best_rdu / 2 > best_rd) {

// skip searching the other filters if the first is

@@ -3668,10 +3951,11 @@

// then dont bother looking at UV

vp9_build_inter_predictors_sbuv(&x->e_mbd, mi_row, mi_col,

BLOCK_8X8);

- super_block_uvrd(cpi, x, &rate_uv, &distortion_uv, &uv_skippable,

- &uv_sse, BLOCK_8X8, tmp_best_rdu);

- if (rate_uv == INT_MAX)

+ vpx_memset(x->skip_txfm, 0, sizeof(x->skip_txfm));

+ if (!super_block_uvrd(cpi, x, &rate_uv, &distortion_uv, &uv_skippable,

+ &uv_sse, BLOCK_8X8, tmp_best_rdu))

continue;

rate2 += rate_uv;

distortion2 += distortion_uv;

skippable = skippable && uv_skippable;

@@ -3738,8 +4022,9 @@

max_plane = 1;

}

- *returnrate = rate2;

- *returndistortion = distortion2;

+ rd_cost->rate = rate2;

+ rd_cost->dist = distortion2;

+ rd_cost->rdcost = this_rd;

best_rd = this_rd;

best_yrd = best_rd -

RDCOST(x->rdmult, x->rddiv, rate_uv, distortion_uv);

@@ -3755,11 +4040,16 @@

// TODO(debargha): enhance this test with a better distortion prediction

// based on qp, activity mask and history

- if ((cpi->sf.mode_search_skip_flags & FLAG_EARLY_TERMINATE) &&

+ if ((sf->mode_search_skip_flags & FLAG_EARLY_TERMINATE) &&

(ref_index > MIN_EARLY_TERM_INDEX)) {

- const int qstep = xd->plane[0].dequant[1];

+ int qstep = xd->plane[0].dequant[1];

// TODO(debargha): Enhance this by specializing for each mode_index

int scale = 4;

+#if CONFIG_VP9_HIGHBITDEPTH

+ if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {

+ qstep >>= (xd->bd - 8);

+ }

+#endif // CONFIG_VP9_HIGHBITDEPTH

if (x->source_variance < UINT_MAX) {

const int var_adjust = (x->source_variance < 16);

scale -= var_adjust;

@@ -3826,11 +4116,14 @@

break;

}

- if (best_rd >= best_rd_so_far)

- return INT64_MAX;

+ if (best_rd >= best_rd_so_far) {

+ rd_cost->rate = INT_MAX;

+ rd_cost->rdcost = INT64_MAX;

+ return;

+ }

// If we used an estimate for the uv intra rd in the loop above...

- if (cpi->sf.use_uv_intra_rd_estimate) {

+ if (sf->use_uv_intra_rd_estimate) {

// Do Intra UV best rd mode selection if best mode choice above was intra.

if (best_mbmode.ref_frame[0] == INTRA_FRAME) {

*mbmi = best_mbmode;

@@ -3843,9 +4136,10 @@

}

if (best_rd == INT64_MAX) {

- *returnrate = INT_MAX;

- *returndistortion = INT64_MAX;

- return best_rd;

+ rd_cost->rate = INT_MAX;

+ rd_cost->dist = INT64_MAX;

+ rd_cost->rdcost = INT64_MAX;

+ return;

}

assert((cm->interp_filter == SWITCHABLE) ||

@@ -3891,7 +4185,5 @@

store_coding_context(x, ctx, best_ref_index,

best_pred_diff, best_tx_diff, best_filter_diff, 0);

- return best_rd;

}

« no previous file with comments | « source/libvpx/vp9/encoder/vp9_rdopt.h ('k') | source/libvpx/vp9/encoder/vp9_resize.h » ('j') | no next file with comments »