| Index: source/libvpx/vp9/encoder/vp9_encodeframe.c
|
| ===================================================================
|
| --- source/libvpx/vp9/encoder/vp9_encodeframe.c (revision 292608)
|
| +++ source/libvpx/vp9/encoder/vp9_encodeframe.c (working copy)
|
| @@ -61,16 +61,51 @@
|
| // Eventually this should be replaced by custom no-reference routines,
|
| // which will be faster.
|
| static const uint8_t VP9_VAR_OFFS[64] = {
|
| - 128, 128, 128, 128, 128, 128, 128, 128,
|
| - 128, 128, 128, 128, 128, 128, 128, 128,
|
| - 128, 128, 128, 128, 128, 128, 128, 128,
|
| - 128, 128, 128, 128, 128, 128, 128, 128,
|
| - 128, 128, 128, 128, 128, 128, 128, 128,
|
| - 128, 128, 128, 128, 128, 128, 128, 128,
|
| - 128, 128, 128, 128, 128, 128, 128, 128,
|
| - 128, 128, 128, 128, 128, 128, 128, 128
|
| + 128, 128, 128, 128, 128, 128, 128, 128,
|
| + 128, 128, 128, 128, 128, 128, 128, 128,
|
| + 128, 128, 128, 128, 128, 128, 128, 128,
|
| + 128, 128, 128, 128, 128, 128, 128, 128,
|
| + 128, 128, 128, 128, 128, 128, 128, 128,
|
| + 128, 128, 128, 128, 128, 128, 128, 128,
|
| + 128, 128, 128, 128, 128, 128, 128, 128,
|
| + 128, 128, 128, 128, 128, 128, 128, 128
|
| };
|
|
|
| +#if CONFIG_VP9_HIGHBITDEPTH
|
| +static const uint16_t VP9_HIGH_VAR_OFFS_8[64] = {
|
| + 128, 128, 128, 128, 128, 128, 128, 128,
|
| + 128, 128, 128, 128, 128, 128, 128, 128,
|
| + 128, 128, 128, 128, 128, 128, 128, 128,
|
| + 128, 128, 128, 128, 128, 128, 128, 128,
|
| + 128, 128, 128, 128, 128, 128, 128, 128,
|
| + 128, 128, 128, 128, 128, 128, 128, 128,
|
| + 128, 128, 128, 128, 128, 128, 128, 128,
|
| + 128, 128, 128, 128, 128, 128, 128, 128
|
| +};
|
| +
|
| +static const uint16_t VP9_HIGH_VAR_OFFS_10[64] = {
|
| + 128*4, 128*4, 128*4, 128*4, 128*4, 128*4, 128*4, 128*4,
|
| + 128*4, 128*4, 128*4, 128*4, 128*4, 128*4, 128*4, 128*4,
|
| + 128*4, 128*4, 128*4, 128*4, 128*4, 128*4, 128*4, 128*4,
|
| + 128*4, 128*4, 128*4, 128*4, 128*4, 128*4, 128*4, 128*4,
|
| + 128*4, 128*4, 128*4, 128*4, 128*4, 128*4, 128*4, 128*4,
|
| + 128*4, 128*4, 128*4, 128*4, 128*4, 128*4, 128*4, 128*4,
|
| + 128*4, 128*4, 128*4, 128*4, 128*4, 128*4, 128*4, 128*4,
|
| + 128*4, 128*4, 128*4, 128*4, 128*4, 128*4, 128*4, 128*4
|
| +};
|
| +
|
| +static const uint16_t VP9_HIGH_VAR_OFFS_12[64] = {
|
| + 128*16, 128*16, 128*16, 128*16, 128*16, 128*16, 128*16, 128*16,
|
| + 128*16, 128*16, 128*16, 128*16, 128*16, 128*16, 128*16, 128*16,
|
| + 128*16, 128*16, 128*16, 128*16, 128*16, 128*16, 128*16, 128*16,
|
| + 128*16, 128*16, 128*16, 128*16, 128*16, 128*16, 128*16, 128*16,
|
| + 128*16, 128*16, 128*16, 128*16, 128*16, 128*16, 128*16, 128*16,
|
| + 128*16, 128*16, 128*16, 128*16, 128*16, 128*16, 128*16, 128*16,
|
| + 128*16, 128*16, 128*16, 128*16, 128*16, 128*16, 128*16, 128*16,
|
| + 128*16, 128*16, 128*16, 128*16, 128*16, 128*16, 128*16, 128*16
|
| +};
|
| +#endif // CONFIG_VP9_HIGHBITDEPTH
|
| +
|
| static unsigned int get_sby_perpixel_variance(VP9_COMP *cpi,
|
| const struct buf_2d *ref,
|
| BLOCK_SIZE bs) {
|
| @@ -80,6 +115,32 @@
|
| return ROUND_POWER_OF_TWO(var, num_pels_log2_lookup[bs]);
|
| }
|
|
|
| +#if CONFIG_VP9_HIGHBITDEPTH
|
| +static unsigned int high_get_sby_perpixel_variance(
|
| + VP9_COMP *cpi, const struct buf_2d *ref, BLOCK_SIZE bs, int bd) {
|
| + unsigned int var, sse;
|
| + switch (bd) {
|
| + case 10:
|
| + var = cpi->fn_ptr[bs].vf(ref->buf, ref->stride,
|
| + CONVERT_TO_BYTEPTR(VP9_HIGH_VAR_OFFS_10),
|
| + 0, &sse);
|
| + break;
|
| + case 12:
|
| + var = cpi->fn_ptr[bs].vf(ref->buf, ref->stride,
|
| + CONVERT_TO_BYTEPTR(VP9_HIGH_VAR_OFFS_12),
|
| + 0, &sse);
|
| + break;
|
| + case 8:
|
| + default:
|
| + var = cpi->fn_ptr[bs].vf(ref->buf, ref->stride,
|
| + CONVERT_TO_BYTEPTR(VP9_HIGH_VAR_OFFS_8),
|
| + 0, &sse);
|
| + break;
|
| + }
|
| + return ROUND_POWER_OF_TWO(var, num_pels_log2_lookup[bs]);
|
| +}
|
| +#endif // CONFIG_VP9_HIGHBITDEPTH
|
| +
|
| static unsigned int get_sby_perpixel_diff_variance(VP9_COMP *cpi,
|
| const struct buf_2d *ref,
|
| int mi_row, int mi_col,
|
| @@ -335,10 +396,11 @@
|
| const int block_width = num_8x8_blocks_wide_lookup[bsize];
|
| const int block_height = num_8x8_blocks_high_lookup[bsize];
|
| // TODO(debargha): Choose this more intelligently.
|
| - const int64_t threshold_multiplier = 25;
|
| - int64_t threshold = threshold_multiplier * cpi->common.base_qindex;
|
| + const int threshold_multiplier = cm->frame_type == KEY_FRAME ? 64 : 4;
|
| + int64_t threshold =
|
| + (int64_t)(threshold_multiplier *
|
| + vp9_convert_qindex_to_q(cm->base_qindex, cm->bit_depth));
|
| assert(block_height == block_width);
|
| -
|
| tree_to_node(data, bsize, &vt);
|
|
|
| // Split none is available only if we have more than half a block size
|
| @@ -350,29 +412,47 @@
|
| return 1;
|
| }
|
|
|
| - // Vertical split is available on all but the bottom border.
|
| - if (mi_row + block_height / 2 < cm->mi_rows &&
|
| - vt.part_variances->vert[0].variance < threshold &&
|
| - vt.part_variances->vert[1].variance < threshold) {
|
| - BLOCK_SIZE subsize = get_subsize(bsize, PARTITION_VERT);
|
| - set_block_size(cpi, mi_row, mi_col, subsize);
|
| - set_block_size(cpi, mi_row, mi_col + block_width / 2, subsize);
|
| - return 1;
|
| + // Only allow split for blocks above 16x16.
|
| + if (bsize > BLOCK_16X16) {
|
| + // Vertical split is available on all but the bottom border.
|
| + if (mi_row + block_height / 2 < cm->mi_rows &&
|
| + vt.part_variances->vert[0].variance < threshold &&
|
| + vt.part_variances->vert[1].variance < threshold) {
|
| + BLOCK_SIZE subsize = get_subsize(bsize, PARTITION_VERT);
|
| + set_block_size(cpi, mi_row, mi_col, subsize);
|
| + set_block_size(cpi, mi_row, mi_col + block_width / 2, subsize);
|
| + return 1;
|
| + }
|
| +
|
| + // Horizontal split is available on all but the right border.
|
| + if (mi_col + block_width / 2 < cm->mi_cols &&
|
| + vt.part_variances->horz[0].variance < threshold &&
|
| + vt.part_variances->horz[1].variance < threshold) {
|
| + BLOCK_SIZE subsize = get_subsize(bsize, PARTITION_HORZ);
|
| + set_block_size(cpi, mi_row, mi_col, subsize);
|
| + set_block_size(cpi, mi_row + block_height / 2, mi_col, subsize);
|
| + return 1;
|
| + }
|
| }
|
|
|
| - // Horizontal split is available on all but the right border.
|
| - if (mi_col + block_width / 2 < cm->mi_cols &&
|
| - vt.part_variances->horz[0].variance < threshold &&
|
| - vt.part_variances->horz[1].variance < threshold) {
|
| - BLOCK_SIZE subsize = get_subsize(bsize, PARTITION_HORZ);
|
| - set_block_size(cpi, mi_row, mi_col, subsize);
|
| - set_block_size(cpi, mi_row + block_height / 2, mi_col, subsize);
|
| - return 1;
|
| + // This will only allow 8x8 if the 16x16 variance is very large.
|
| + if (bsize == BLOCK_16X16) {
|
| + if (mi_col + block_width / 2 < cm->mi_cols &&
|
| + mi_row + block_height / 2 < cm->mi_rows &&
|
| + vt.part_variances->none.variance < (threshold << 6)) {
|
| + set_block_size(cpi, mi_row, mi_col, bsize);
|
| + return 1;
|
| + }
|
| }
|
| return 0;
|
| }
|
|
|
| -// TODO(debargha): Fix this function and make it work as expected.
|
| +// This function chooses partitioning based on the variance
|
| +// between source and reconstructed last, where variance is
|
| +// computed for 8x8 downsampled inputs. Some things to check:
|
| +// using the last source rather than reconstructed last, and
|
| +// allowing for small downsampling (4x4 or 2x2) for selection
|
| +// of smaller block sizes (i.e., < 16x16).
|
| static void choose_partitioning(VP9_COMP *cpi,
|
| const TileInfo *const tile,
|
| int mi_row, int mi_col) {
|
| @@ -391,6 +471,7 @@
|
| const YV12_BUFFER_CONFIG *yv12 = get_ref_frame_buffer(cpi, LAST_FRAME);
|
| const struct scale_factors *const sf = &cm->frame_refs[LAST_FRAME - 1].sf;
|
|
|
| + vp9_clear_system_state();
|
| vp9_zero(vt);
|
| set_offsets(cpi, tile, mi_row, mi_col, BLOCK_64X64);
|
|
|
| @@ -419,6 +500,22 @@
|
| } else {
|
| d = VP9_VAR_OFFS;
|
| dp = 0;
|
| +#if CONFIG_VP9_HIGHBITDEPTH
|
| + if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
|
| + switch (xd->bd) {
|
| + case 10:
|
| + d = CONVERT_TO_BYTEPTR(VP9_HIGH_VAR_OFFS_10);
|
| + break;
|
| + case 12:
|
| + d = CONVERT_TO_BYTEPTR(VP9_HIGH_VAR_OFFS_12);
|
| + break;
|
| + case 8:
|
| + default:
|
| + d = CONVERT_TO_BYTEPTR(VP9_HIGH_VAR_OFFS_8);
|
| + break;
|
| + }
|
| + }
|
| +#endif // CONFIG_VP9_HIGHBITDEPTH
|
| }
|
|
|
| // Fill in the entire tree of 8x8 variances for splits.
|
| @@ -434,10 +531,28 @@
|
| int y_idx = y16_idx + ((k >> 1) << 3);
|
| unsigned int sse = 0;
|
| int sum = 0;
|
| - if (x_idx < pixels_wide && y_idx < pixels_high)
|
| - vp9_get8x8var(s + y_idx * sp + x_idx, sp,
|
| - d + y_idx * dp + x_idx, dp, &sse, &sum);
|
| - fill_variance(sse, sum, 64, &vst->split[k].part_variances.none);
|
| +
|
| + if (x_idx < pixels_wide && y_idx < pixels_high) {
|
| + int s_avg, d_avg;
|
| +#if CONFIG_VP9_HIGHBITDEPTH
|
| + if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
|
| + s_avg = vp9_highbd_avg_8x8(s + y_idx * sp + x_idx, sp);
|
| + d_avg = vp9_highbd_avg_8x8(d + y_idx * dp + x_idx, dp);
|
| + } else {
|
| + s_avg = vp9_avg_8x8(s + y_idx * sp + x_idx, sp);
|
| + d_avg = vp9_avg_8x8(d + y_idx * dp + x_idx, dp);
|
| + }
|
| +#else
|
| + s_avg = vp9_avg_8x8(s + y_idx * sp + x_idx, sp);
|
| + d_avg = vp9_avg_8x8(d + y_idx * dp + x_idx, dp);
|
| +#endif
|
| + sum = s_avg - d_avg;
|
| + sse = sum * sum;
|
| + }
|
| + // For an 8x8 block we have just one value the average of all 64
|
| + // pixels, so use 1. This means of course that there is no variance
|
| + // in an 8x8 block.
|
| + fill_variance(sse, sum, 1, &vst->split[k].part_variances.none);
|
| }
|
| }
|
| }
|
| @@ -453,8 +568,8 @@
|
| // Now go through the entire structure, splitting every block size until
|
| // we get to one that's got a variance lower than our threshold, or we
|
| // hit 8x8.
|
| - if (!set_vt_partitioning(cpi, &vt, BLOCK_64X64,
|
| - mi_row, mi_col)) {
|
| + if ( mi_col + 8 > cm->mi_cols || mi_row + 8 > cm->mi_rows ||
|
| + !set_vt_partitioning(cpi, &vt, BLOCK_64X64, mi_row, mi_col)) {
|
| for (i = 0; i < 4; ++i) {
|
| const int x32_idx = ((i & 1) << 2);
|
| const int y32_idx = ((i >> 1) << 2);
|
| @@ -463,31 +578,15 @@
|
| for (j = 0; j < 4; ++j) {
|
| const int x16_idx = ((j & 1) << 1);
|
| const int y16_idx = ((j >> 1) << 1);
|
| - // NOTE: This is a temporary hack to disable 8x8 partitions,
|
| - // since it works really bad - possibly due to a bug
|
| -#define DISABLE_8X8_VAR_BASED_PARTITION
|
| -#ifdef DISABLE_8X8_VAR_BASED_PARTITION
|
| - if (mi_row + y32_idx + y16_idx + 1 < cm->mi_rows &&
|
| - mi_row + x32_idx + x16_idx + 1 < cm->mi_cols) {
|
| - set_block_size(cpi,
|
| - (mi_row + y32_idx + y16_idx),
|
| - (mi_col + x32_idx + x16_idx),
|
| - BLOCK_16X16);
|
| - } else {
|
| - for (k = 0; k < 4; ++k) {
|
| - const int x8_idx = (k & 1);
|
| - const int y8_idx = (k >> 1);
|
| - set_block_size(cpi,
|
| - (mi_row + y32_idx + y16_idx + y8_idx),
|
| - (mi_col + x32_idx + x16_idx + x8_idx),
|
| - BLOCK_8X8);
|
| - }
|
| - }
|
| -#else
|
| - if (!set_vt_partitioning(cpi, &vt.split[i].split[j], tile,
|
| + // NOTE: Since this uses 8x8 downsampling for variance calculation
|
| + // we cannot really select block size 8x8 (or even 8x16/16x8),
|
| + // since we do not sufficient samples for variance.
|
| + // For now, 8x8 partition is only set if the variance of the 16x16
|
| + // block is very high. This is controlled in set_vt_partitioning.
|
| + if (!set_vt_partitioning(cpi, &vt.split[i].split[j],
|
| BLOCK_16X16,
|
| - (mi_row + y32_idx + y16_idx),
|
| - (mi_col + x32_idx + x16_idx), 2)) {
|
| + mi_row + y32_idx + y16_idx,
|
| + mi_col + x32_idx + x16_idx)) {
|
| for (k = 0; k < 4; ++k) {
|
| const int x8_idx = (k & 1);
|
| const int y8_idx = (k >> 1);
|
| @@ -497,7 +596,6 @@
|
| BLOCK_8X8);
|
| }
|
| }
|
| -#endif
|
| }
|
| }
|
| }
|
| @@ -684,10 +782,9 @@
|
| }
|
|
|
| static void rd_pick_sb_modes(VP9_COMP *cpi, const TileInfo *const tile,
|
| - int mi_row, int mi_col,
|
| - int *totalrate, int64_t *totaldist,
|
| + int mi_row, int mi_col, RD_COST *rd_cost,
|
| BLOCK_SIZE bsize, PICK_MODE_CONTEXT *ctx,
|
| - int64_t best_rd, int block) {
|
| + int64_t best_rd) {
|
| VP9_COMMON *const cm = &cpi->common;
|
| MACROBLOCK *const x = &cpi->mb;
|
| MACROBLOCKD *const xd = &x->e_mbd;
|
| @@ -704,19 +801,6 @@
|
| // Use the lower precision, but faster, 32x32 fdct for mode selection.
|
| x->use_lp32x32fdct = 1;
|
|
|
| - // TODO(JBB): Most other places in the code instead of calling the function
|
| - // and then checking if its not the first 8x8 we put the check in the
|
| - // calling function. Do that here.
|
| - if (bsize < BLOCK_8X8) {
|
| - // When ab_index = 0 all sub-blocks are handled, so for ab_index != 0
|
| - // there is nothing to be done.
|
| - if (block != 0) {
|
| - *totalrate = 0;
|
| - *totaldist = 0;
|
| - return;
|
| - }
|
| - }
|
| -
|
| set_offsets(cpi, tile, mi_row, mi_col, bsize);
|
| mbmi = &xd->mi[0].src_mi->mbmi;
|
| mbmi->sb_type = bsize;
|
| @@ -734,7 +818,17 @@
|
| // Set to zero to make sure we do not use the previous encoded frame stats
|
| mbmi->skip = 0;
|
|
|
| +#if CONFIG_VP9_HIGHBITDEPTH
|
| + if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
|
| + x->source_variance =
|
| + high_get_sby_perpixel_variance(cpi, &x->plane[0].src, bsize, xd->bd);
|
| + } else {
|
| + x->source_variance =
|
| + get_sby_perpixel_variance(cpi, &x->plane[0].src, bsize);
|
| + }
|
| +#else
|
| x->source_variance = get_sby_perpixel_variance(cpi, &x->plane[0].src, bsize);
|
| +#endif // CONFIG_VP9_HIGHBITDEPTH
|
|
|
| // Save rdmult before it might be changed, so it can be restored later.
|
| orig_rdmult = x->rdmult;
|
| @@ -774,28 +868,33 @@
|
| // Find best coding mode & reconstruct the MB so it is available
|
| // as a predictor for MBs that follow in the SB
|
| if (frame_is_intra_only(cm)) {
|
| - vp9_rd_pick_intra_mode_sb(cpi, x, totalrate, totaldist, bsize, ctx,
|
| - best_rd);
|
| + vp9_rd_pick_intra_mode_sb(cpi, x, rd_cost, bsize, ctx, best_rd);
|
| } else {
|
| if (bsize >= BLOCK_8X8) {
|
| if (vp9_segfeature_active(&cm->seg, mbmi->segment_id, SEG_LVL_SKIP))
|
| - vp9_rd_pick_inter_mode_sb_seg_skip(cpi, x, totalrate, totaldist, bsize,
|
| + vp9_rd_pick_inter_mode_sb_seg_skip(cpi, x, rd_cost, bsize,
|
| ctx, best_rd);
|
| else
|
| vp9_rd_pick_inter_mode_sb(cpi, x, tile, mi_row, mi_col,
|
| - totalrate, totaldist, bsize, ctx, best_rd);
|
| + rd_cost, bsize, ctx, best_rd);
|
| } else {
|
| - vp9_rd_pick_inter_mode_sub8x8(cpi, x, tile, mi_row, mi_col, totalrate,
|
| - totaldist, bsize, ctx, best_rd);
|
| + vp9_rd_pick_inter_mode_sub8x8(cpi, x, tile, mi_row, mi_col, rd_cost,
|
| + bsize, ctx, best_rd);
|
| }
|
| }
|
|
|
| + if (aq_mode == VARIANCE_AQ && rd_cost->rate != INT_MAX) {
|
| + vp9_clear_system_state();
|
| + rd_cost->rate = (int)round(rd_cost->rate * rdmult_ratio);
|
| + rd_cost->rdcost = RDCOST(x->rdmult, x->rddiv, rd_cost->rate, rd_cost->dist);
|
| + }
|
| +
|
| x->rdmult = orig_rdmult;
|
|
|
| - if (aq_mode == VARIANCE_AQ && *totalrate != INT_MAX) {
|
| - vp9_clear_system_state();
|
| - *totalrate = (int)round(*totalrate * rdmult_ratio);
|
| - }
|
| + // TODO(jingning) The rate-distortion optimization flow needs to be
|
| + // refactored to provide proper exit/return handle.
|
| + if (rd_cost->rate == INT_MAX)
|
| + rd_cost->rdcost = INT64_MAX;
|
| }
|
|
|
| static void update_stats(VP9_COMMON *cm, const MACROBLOCK *x) {
|
| @@ -925,7 +1024,7 @@
|
| MACROBLOCK *const x = &cpi->mb;
|
| MACROBLOCKD *const xd = &x->e_mbd;
|
|
|
| - const int bsl = b_width_log2(bsize), hbs = (1 << bsl) / 4;
|
| + const int bsl = b_width_log2_lookup[bsize], hbs = (1 << bsl) / 4;
|
| int ctx;
|
| PARTITION_TYPE partition;
|
| BLOCK_SIZE subsize = bsize;
|
| @@ -1297,12 +1396,18 @@
|
| *(xd->mi[0].src_mi) = ctx->mic;
|
| xd->mi[0].src_mi = &xd->mi[0];
|
|
|
| -
|
| - // For in frame adaptive Q, check for reseting the segment_id and updating
|
| - // the cyclic refresh map.
|
| - if ((cpi->oxcf.aq_mode == CYCLIC_REFRESH_AQ) && seg->enabled) {
|
| - vp9_cyclic_refresh_update_segment(cpi, &xd->mi[0].src_mi->mbmi,
|
| - mi_row, mi_col, bsize, 1);
|
| + if (seg->enabled && cpi->oxcf.aq_mode) {
|
| + // For in frame complexity AQ or variance AQ, copy segment_id from
|
| + // segmentation_map.
|
| + if (cpi->oxcf.aq_mode == COMPLEXITY_AQ ||
|
| + cpi->oxcf.aq_mode == VARIANCE_AQ ) {
|
| + const uint8_t *const map = seg->update_map ? cpi->segmentation_map
|
| + : cm->last_frame_seg_map;
|
| + mbmi->segment_id = vp9_get_segment_id(cm, map, bsize, mi_row, mi_col);
|
| + } else {
|
| + // Setting segmentation map for cyclic_refresh
|
| + vp9_cyclic_refresh_update_segment(cpi, mbmi, mi_row, mi_col, bsize, 1);
|
| + }
|
| vp9_init_plane_quantizers(cpi, x);
|
| }
|
|
|
| @@ -1348,7 +1453,7 @@
|
| MACROBLOCK *const x = &cpi->mb;
|
| MACROBLOCKD *const xd = &x->e_mbd;
|
|
|
| - const int bsl = b_width_log2(bsize), hbs = (1 << bsl) / 4;
|
| + const int bsl = b_width_log2_lookup[bsize], hbs = (1 << bsl) / 4;
|
| int ctx;
|
| PARTITION_TYPE partition;
|
| BLOCK_SIZE subsize;
|
| @@ -1411,10 +1516,9 @@
|
| update_partition_context(xd, mi_row, mi_col, subsize, bsize);
|
| }
|
|
|
| -static void rd_use_partition(VP9_COMP *cpi,
|
| - const TileInfo *const tile,
|
| - MODE_INFO *mi_8x8,
|
| - TOKENEXTRA **tp, int mi_row, int mi_col,
|
| +static void rd_use_partition(VP9_COMP *cpi, const TileInfo *const tile,
|
| + MODE_INFO *mi_8x8, TOKENEXTRA **tp,
|
| + int mi_row, int mi_col,
|
| BLOCK_SIZE bsize, int *rate, int64_t *dist,
|
| int do_recon, PC_TREE *pc_tree) {
|
| VP9_COMMON *const cm = &cpi->common;
|
| @@ -1421,7 +1525,7 @@
|
| MACROBLOCK *const x = &cpi->mb;
|
| MACROBLOCKD *const xd = &x->e_mbd;
|
| const int mis = cm->mi_stride;
|
| - const int bsl = b_width_log2(bsize);
|
| + const int bsl = b_width_log2_lookup[bsize];
|
| const int mi_step = num_4x4_blocks_wide_lookup[bsize] / 2;
|
| const int bss = (1 << bsl) / 4;
|
| int i, pl;
|
| @@ -1429,15 +1533,7 @@
|
| BLOCK_SIZE subsize;
|
| ENTROPY_CONTEXT l[16 * MAX_MB_PLANE], a[16 * MAX_MB_PLANE];
|
| PARTITION_CONTEXT sl[8], sa[8];
|
| - int last_part_rate = INT_MAX;
|
| - int64_t last_part_dist = INT64_MAX;
|
| - int64_t last_part_rd = INT64_MAX;
|
| - int none_rate = INT_MAX;
|
| - int64_t none_dist = INT64_MAX;
|
| - int64_t none_rd = INT64_MAX;
|
| - int chosen_rate = INT_MAX;
|
| - int64_t chosen_dist = INT64_MAX;
|
| - int64_t chosen_rd = INT64_MAX;
|
| + RD_COST last_part_rdc, none_rdc, chosen_rdc;
|
| BLOCK_SIZE sub_subsize = BLOCK_4X4;
|
| int splits_below = 0;
|
| BLOCK_SIZE bs_type = mi_8x8[0].src_mi->mbmi.sb_type;
|
| @@ -1450,6 +1546,10 @@
|
| assert(num_4x4_blocks_wide_lookup[bsize] ==
|
| num_4x4_blocks_high_lookup[bsize]);
|
|
|
| + vp9_rd_cost_reset(&last_part_rdc);
|
| + vp9_rd_cost_reset(&none_rdc);
|
| + vp9_rd_cost_reset(&chosen_rdc);
|
| +
|
| partition = partition_lookup[bsl][bs_type];
|
| subsize = get_subsize(bsize, partition);
|
|
|
| @@ -1483,14 +1583,15 @@
|
| mi_row + (mi_step >> 1) < cm->mi_rows &&
|
| mi_col + (mi_step >> 1) < cm->mi_cols) {
|
| pc_tree->partitioning = PARTITION_NONE;
|
| - rd_pick_sb_modes(cpi, tile, mi_row, mi_col, &none_rate, &none_dist, bsize,
|
| - ctx, INT64_MAX, 0);
|
| + rd_pick_sb_modes(cpi, tile, mi_row, mi_col, &none_rdc, bsize,
|
| + ctx, INT64_MAX);
|
|
|
| pl = partition_plane_context(xd, mi_row, mi_col, bsize);
|
|
|
| - if (none_rate < INT_MAX) {
|
| - none_rate += cpi->partition_cost[pl][PARTITION_NONE];
|
| - none_rd = RDCOST(x->rdmult, x->rddiv, none_rate, none_dist);
|
| + if (none_rdc.rate < INT_MAX) {
|
| + none_rdc.rate += cpi->partition_cost[pl][PARTITION_NONE];
|
| + none_rdc.rdcost = RDCOST(x->rdmult, x->rddiv, none_rdc.rate,
|
| + none_rdc.dist);
|
| }
|
|
|
| restore_context(cpi, mi_row, mi_col, a, l, sa, sl, bsize);
|
| @@ -1501,84 +1602,81 @@
|
|
|
| switch (partition) {
|
| case PARTITION_NONE:
|
| - rd_pick_sb_modes(cpi, tile, mi_row, mi_col, &last_part_rate,
|
| - &last_part_dist, bsize, ctx, INT64_MAX, 0);
|
| + rd_pick_sb_modes(cpi, tile, mi_row, mi_col, &last_part_rdc,
|
| + bsize, ctx, INT64_MAX);
|
| break;
|
| case PARTITION_HORZ:
|
| - rd_pick_sb_modes(cpi, tile, mi_row, mi_col, &last_part_rate,
|
| - &last_part_dist, subsize, &pc_tree->horizontal[0],
|
| - INT64_MAX, 0);
|
| - if (last_part_rate != INT_MAX &&
|
| + rd_pick_sb_modes(cpi, tile, mi_row, mi_col, &last_part_rdc,
|
| + subsize, &pc_tree->horizontal[0],
|
| + INT64_MAX);
|
| + if (last_part_rdc.rate != INT_MAX &&
|
| bsize >= BLOCK_8X8 && mi_row + (mi_step >> 1) < cm->mi_rows) {
|
| - int rt = 0;
|
| - int64_t dt = 0;
|
| + RD_COST tmp_rdc;
|
| PICK_MODE_CONTEXT *ctx = &pc_tree->horizontal[0];
|
| + vp9_rd_cost_init(&tmp_rdc);
|
| update_state(cpi, ctx, mi_row, mi_col, subsize, 0);
|
| encode_superblock(cpi, tp, 0, mi_row, mi_col, subsize, ctx);
|
| - rd_pick_sb_modes(cpi, tile, mi_row + (mi_step >> 1), mi_col, &rt, &dt,
|
| - subsize, &pc_tree->horizontal[1], INT64_MAX, 1);
|
| - if (rt == INT_MAX || dt == INT64_MAX) {
|
| - last_part_rate = INT_MAX;
|
| - last_part_dist = INT64_MAX;
|
| + rd_pick_sb_modes(cpi, tile, mi_row + (mi_step >> 1), mi_col, &tmp_rdc,
|
| + subsize, &pc_tree->horizontal[1], INT64_MAX);
|
| + if (tmp_rdc.rate == INT_MAX || tmp_rdc.dist == INT64_MAX) {
|
| + vp9_rd_cost_reset(&last_part_rdc);
|
| break;
|
| }
|
| -
|
| - last_part_rate += rt;
|
| - last_part_dist += dt;
|
| + last_part_rdc.rate += tmp_rdc.rate;
|
| + last_part_rdc.dist += tmp_rdc.dist;
|
| + last_part_rdc.rdcost += tmp_rdc.rdcost;
|
| }
|
| break;
|
| case PARTITION_VERT:
|
| - rd_pick_sb_modes(cpi, tile, mi_row, mi_col, &last_part_rate,
|
| - &last_part_dist, subsize, &pc_tree->vertical[0],
|
| - INT64_MAX, 0);
|
| - if (last_part_rate != INT_MAX &&
|
| + rd_pick_sb_modes(cpi, tile, mi_row, mi_col, &last_part_rdc,
|
| + subsize, &pc_tree->vertical[0], INT64_MAX);
|
| + if (last_part_rdc.rate != INT_MAX &&
|
| bsize >= BLOCK_8X8 && mi_col + (mi_step >> 1) < cm->mi_cols) {
|
| - int rt = 0;
|
| - int64_t dt = 0;
|
| + RD_COST tmp_rdc;
|
| PICK_MODE_CONTEXT *ctx = &pc_tree->vertical[0];
|
| + vp9_rd_cost_init(&tmp_rdc);
|
| update_state(cpi, ctx, mi_row, mi_col, subsize, 0);
|
| encode_superblock(cpi, tp, 0, mi_row, mi_col, subsize, ctx);
|
| - rd_pick_sb_modes(cpi, tile, mi_row, mi_col + (mi_step >> 1), &rt, &dt,
|
| + rd_pick_sb_modes(cpi, tile, mi_row, mi_col + (mi_step >> 1), &tmp_rdc,
|
| subsize, &pc_tree->vertical[bsize > BLOCK_8X8],
|
| - INT64_MAX, 1);
|
| - if (rt == INT_MAX || dt == INT64_MAX) {
|
| - last_part_rate = INT_MAX;
|
| - last_part_dist = INT64_MAX;
|
| + INT64_MAX);
|
| + if (tmp_rdc.rate == INT_MAX || tmp_rdc.dist == INT64_MAX) {
|
| + vp9_rd_cost_reset(&last_part_rdc);
|
| break;
|
| }
|
| - last_part_rate += rt;
|
| - last_part_dist += dt;
|
| + last_part_rdc.rate += tmp_rdc.rate;
|
| + last_part_rdc.dist += tmp_rdc.dist;
|
| + last_part_rdc.rdcost += tmp_rdc.rdcost;
|
| }
|
| break;
|
| case PARTITION_SPLIT:
|
| if (bsize == BLOCK_8X8) {
|
| - rd_pick_sb_modes(cpi, tile, mi_row, mi_col, &last_part_rate,
|
| - &last_part_dist, subsize, pc_tree->leaf_split[0],
|
| - INT64_MAX, 0);
|
| + rd_pick_sb_modes(cpi, tile, mi_row, mi_col, &last_part_rdc,
|
| + subsize, pc_tree->leaf_split[0], INT64_MAX);
|
| break;
|
| }
|
| - last_part_rate = 0;
|
| - last_part_dist = 0;
|
| + last_part_rdc.rate = 0;
|
| + last_part_rdc.dist = 0;
|
| + last_part_rdc.rdcost = 0;
|
| for (i = 0; i < 4; i++) {
|
| int x_idx = (i & 1) * (mi_step >> 1);
|
| int y_idx = (i >> 1) * (mi_step >> 1);
|
| int jj = i >> 1, ii = i & 0x01;
|
| - int rt;
|
| - int64_t dt;
|
| -
|
| + RD_COST tmp_rdc;
|
| if ((mi_row + y_idx >= cm->mi_rows) || (mi_col + x_idx >= cm->mi_cols))
|
| continue;
|
|
|
| + vp9_rd_cost_init(&tmp_rdc);
|
| rd_use_partition(cpi, tile, mi_8x8 + jj * bss * mis + ii * bss, tp,
|
| - mi_row + y_idx, mi_col + x_idx, subsize, &rt, &dt,
|
| + mi_row + y_idx, mi_col + x_idx, subsize,
|
| + &tmp_rdc.rate, &tmp_rdc.dist,
|
| i != 3, pc_tree->split[i]);
|
| - if (rt == INT_MAX || dt == INT64_MAX) {
|
| - last_part_rate = INT_MAX;
|
| - last_part_dist = INT64_MAX;
|
| + if (tmp_rdc.rate == INT_MAX || tmp_rdc.dist == INT64_MAX) {
|
| + vp9_rd_cost_reset(&last_part_rdc);
|
| break;
|
| }
|
| - last_part_rate += rt;
|
| - last_part_dist += dt;
|
| + last_part_rdc.rate += tmp_rdc.rate;
|
| + last_part_rdc.dist += tmp_rdc.dist;
|
| }
|
| break;
|
| default:
|
| @@ -1587,9 +1685,10 @@
|
| }
|
|
|
| pl = partition_plane_context(xd, mi_row, mi_col, bsize);
|
| - if (last_part_rate < INT_MAX) {
|
| - last_part_rate += cpi->partition_cost[pl][partition];
|
| - last_part_rd = RDCOST(x->rdmult, x->rddiv, last_part_rate, last_part_dist);
|
| + if (last_part_rdc.rate < INT_MAX) {
|
| + last_part_rdc.rate += cpi->partition_cost[pl][partition];
|
| + last_part_rdc.rdcost = RDCOST(x->rdmult, x->rddiv,
|
| + last_part_rdc.rate, last_part_rdc.dist);
|
| }
|
|
|
| if (do_partition_search
|
| @@ -1601,8 +1700,8 @@
|
| && (mi_col + mi_step < cm->mi_cols ||
|
| mi_col + (mi_step >> 1) == cm->mi_cols)) {
|
| BLOCK_SIZE split_subsize = get_subsize(bsize, PARTITION_SPLIT);
|
| - chosen_rate = 0;
|
| - chosen_dist = 0;
|
| + chosen_rdc.rate = 0;
|
| + chosen_rdc.dist = 0;
|
| restore_context(cpi, mi_row, mi_col, a, l, sa, sl, bsize);
|
| pc_tree->partitioning = PARTITION_SPLIT;
|
|
|
| @@ -1610,8 +1709,7 @@
|
| for (i = 0; i < 4; i++) {
|
| int x_idx = (i & 1) * (mi_step >> 1);
|
| int y_idx = (i >> 1) * (mi_step >> 1);
|
| - int rt = 0;
|
| - int64_t dt = 0;
|
| + RD_COST tmp_rdc;
|
| ENTROPY_CONTEXT l[16 * MAX_MB_PLANE], a[16 * MAX_MB_PLANE];
|
| PARTITION_CONTEXT sl[8], sa[8];
|
|
|
| @@ -1620,20 +1718,18 @@
|
|
|
| save_context(cpi, mi_row, mi_col, a, l, sa, sl, bsize);
|
| pc_tree->split[i]->partitioning = PARTITION_NONE;
|
| - rd_pick_sb_modes(cpi, tile, mi_row + y_idx, mi_col + x_idx, &rt, &dt,
|
| - split_subsize, &pc_tree->split[i]->none,
|
| - INT64_MAX, i);
|
| + rd_pick_sb_modes(cpi, tile, mi_row + y_idx, mi_col + x_idx, &tmp_rdc,
|
| + split_subsize, &pc_tree->split[i]->none, INT64_MAX);
|
|
|
| restore_context(cpi, mi_row, mi_col, a, l, sa, sl, bsize);
|
|
|
| - if (rt == INT_MAX || dt == INT64_MAX) {
|
| - chosen_rate = INT_MAX;
|
| - chosen_dist = INT64_MAX;
|
| + if (tmp_rdc.rate == INT_MAX || tmp_rdc.dist == INT64_MAX) {
|
| + vp9_rd_cost_reset(&chosen_rdc);
|
| break;
|
| }
|
|
|
| - chosen_rate += rt;
|
| - chosen_dist += dt;
|
| + chosen_rdc.rate += tmp_rdc.rate;
|
| + chosen_rdc.dist += tmp_rdc.dist;
|
|
|
| if (i != 3)
|
| encode_sb(cpi, tile, tp, mi_row + y_idx, mi_col + x_idx, 0,
|
| @@ -1641,30 +1737,28 @@
|
|
|
| pl = partition_plane_context(xd, mi_row + y_idx, mi_col + x_idx,
|
| split_subsize);
|
| - chosen_rate += cpi->partition_cost[pl][PARTITION_NONE];
|
| + chosen_rdc.rate += cpi->partition_cost[pl][PARTITION_NONE];
|
| }
|
| pl = partition_plane_context(xd, mi_row, mi_col, bsize);
|
| - if (chosen_rate < INT_MAX) {
|
| - chosen_rate += cpi->partition_cost[pl][PARTITION_SPLIT];
|
| - chosen_rd = RDCOST(x->rdmult, x->rddiv, chosen_rate, chosen_dist);
|
| + if (chosen_rdc.rate < INT_MAX) {
|
| + chosen_rdc.rate += cpi->partition_cost[pl][PARTITION_SPLIT];
|
| + chosen_rdc.rdcost = RDCOST(x->rdmult, x->rddiv,
|
| + chosen_rdc.rate, chosen_rdc.dist);
|
| }
|
| }
|
|
|
| // If last_part is better set the partitioning to that.
|
| - if (last_part_rd < chosen_rd) {
|
| + if (last_part_rdc.rdcost < chosen_rdc.rdcost) {
|
| mi_8x8[0].src_mi->mbmi.sb_type = bsize;
|
| if (bsize >= BLOCK_8X8)
|
| pc_tree->partitioning = partition;
|
| - chosen_rate = last_part_rate;
|
| - chosen_dist = last_part_dist;
|
| - chosen_rd = last_part_rd;
|
| + chosen_rdc = last_part_rdc;
|
| }
|
| // If none was better set the partitioning to that.
|
| - if (none_rd < chosen_rd) {
|
| + if (none_rdc.rdcost < chosen_rdc.rdcost) {
|
| if (bsize >= BLOCK_8X8)
|
| pc_tree->partitioning = PARTITION_NONE;
|
| - chosen_rate = none_rate;
|
| - chosen_dist = none_dist;
|
| + chosen_rdc = none_rdc;
|
| }
|
|
|
| restore_context(cpi, mi_row, mi_col, a, l, sa, sl, bsize);
|
| @@ -1671,8 +1765,8 @@
|
|
|
| // We must have chosen a partitioning and encoding or we'll fail later on.
|
| // No other opportunities for success.
|
| - if ( bsize == BLOCK_64X64)
|
| - assert(chosen_rate < INT_MAX && chosen_dist < INT64_MAX);
|
| + if (bsize == BLOCK_64X64)
|
| + assert(chosen_rdc.rate < INT_MAX && chosen_rdc.dist < INT64_MAX);
|
|
|
| if (do_recon) {
|
| int output_enabled = (bsize == BLOCK_64X64);
|
| @@ -1682,18 +1776,18 @@
|
| // closer to the target.
|
| if ((cpi->oxcf.aq_mode == COMPLEXITY_AQ) && cm->seg.update_map) {
|
| vp9_select_in_frame_q_segment(cpi, mi_row, mi_col,
|
| - output_enabled, chosen_rate);
|
| + output_enabled, chosen_rdc.rate);
|
| }
|
|
|
| if (cpi->oxcf.aq_mode == CYCLIC_REFRESH_AQ)
|
| vp9_cyclic_refresh_set_rate_and_dist_sb(cpi->cyclic_refresh,
|
| - chosen_rate, chosen_dist);
|
| + chosen_rdc.rate, chosen_rdc.dist);
|
| encode_sb(cpi, tile, tp, mi_row, mi_col, output_enabled, bsize,
|
| pc_tree);
|
| }
|
|
|
| - *rate = chosen_rate;
|
| - *dist = chosen_dist;
|
| + *rate = chosen_rdc.rate;
|
| + *dist = chosen_rdc.dist;
|
| }
|
|
|
| static const BLOCK_SIZE min_partition_size[BLOCK_SIZES] = {
|
| @@ -1863,7 +1957,7 @@
|
| int bh, bw;
|
| BLOCK_SIZE min_size = BLOCK_32X32;
|
| BLOCK_SIZE max_size = BLOCK_8X8;
|
| - int bsl = mi_width_log2(BLOCK_64X64);
|
| + int bsl = mi_width_log2_lookup[BLOCK_64X64];
|
| const int search_range_ctrl = (((mi_row + mi_col) >> bsl) +
|
| get_chessboard_index(cm->current_video_frame)) & 0x1;
|
| // Trap case where we do not have a prediction.
|
| @@ -2022,10 +2116,9 @@
|
| // unlikely to be selected depending on previous rate-distortion optimization
|
| // results, for encoding speed-up.
|
| static void rd_pick_partition(VP9_COMP *cpi, const TileInfo *const tile,
|
| - TOKENEXTRA **tp, int mi_row,
|
| - int mi_col, BLOCK_SIZE bsize, int *rate,
|
| - int64_t *dist, int64_t best_rd,
|
| - PC_TREE *pc_tree) {
|
| + TOKENEXTRA **tp, int mi_row, int mi_col,
|
| + BLOCK_SIZE bsize, RD_COST *rd_cost,
|
| + int64_t best_rd, PC_TREE *pc_tree) {
|
| VP9_COMMON *const cm = &cpi->common;
|
| MACROBLOCK *const x = &cpi->mb;
|
| MACROBLOCKD *const xd = &x->e_mbd;
|
| @@ -2036,9 +2129,7 @@
|
| PICK_MODE_CONTEXT *ctx = &pc_tree->none;
|
| int i, pl;
|
| BLOCK_SIZE subsize;
|
| - int this_rate, sum_rate = 0, best_rate = INT_MAX;
|
| - int64_t this_dist, sum_dist = 0, best_dist = INT64_MAX;
|
| - int64_t sum_rd = 0;
|
| + RD_COST this_rdc, sum_rdc, best_rdc;
|
| int do_split = bsize >= BLOCK_8X8;
|
| int do_rect = 1;
|
|
|
| @@ -2066,6 +2157,11 @@
|
| assert(num_8x8_blocks_wide_lookup[bsize] ==
|
| num_8x8_blocks_high_lookup[bsize]);
|
|
|
| + vp9_rd_cost_init(&this_rdc);
|
| + vp9_rd_cost_init(&sum_rdc);
|
| + vp9_rd_cost_reset(&best_rdc);
|
| + best_rdc.rdcost = best_rd;
|
| +
|
| set_offsets(cpi, tile, mi_row, mi_col, bsize);
|
|
|
| if (bsize == BLOCK_16X16 && cpi->oxcf.aq_mode)
|
| @@ -2157,29 +2253,30 @@
|
|
|
| // PARTITION_NONE
|
| if (partition_none_allowed) {
|
| - rd_pick_sb_modes(cpi, tile, mi_row, mi_col, &this_rate, &this_dist, bsize,
|
| - ctx, best_rd, 0);
|
| - if (this_rate != INT_MAX) {
|
| + rd_pick_sb_modes(cpi, tile, mi_row, mi_col, &this_rdc, bsize, ctx,
|
| + best_rdc.rdcost);
|
| + if (this_rdc.rate != INT_MAX) {
|
| if (bsize >= BLOCK_8X8) {
|
| pl = partition_plane_context(xd, mi_row, mi_col, bsize);
|
| - this_rate += cpi->partition_cost[pl][PARTITION_NONE];
|
| + this_rdc.rate += cpi->partition_cost[pl][PARTITION_NONE];
|
| + this_rdc.rdcost = RDCOST(x->rdmult, x->rddiv,
|
| + this_rdc.rate, this_rdc.dist);
|
| }
|
| - sum_rd = RDCOST(x->rdmult, x->rddiv, this_rate, this_dist);
|
|
|
| - if (sum_rd < best_rd) {
|
| + if (this_rdc.rdcost < best_rdc.rdcost) {
|
| int64_t dist_breakout_thr = cpi->sf.partition_search_breakout_dist_thr;
|
| int rate_breakout_thr = cpi->sf.partition_search_breakout_rate_thr;
|
|
|
| - best_rate = this_rate;
|
| - best_dist = this_dist;
|
| - best_rd = sum_rd;
|
| + best_rdc = this_rdc;
|
| if (bsize >= BLOCK_8X8)
|
| pc_tree->partitioning = PARTITION_NONE;
|
|
|
| // Adjust dist breakout threshold according to the partition size.
|
| - dist_breakout_thr >>= 8 - (b_width_log2(bsize) +
|
| - b_height_log2(bsize));
|
| + dist_breakout_thr >>= 8 - (b_width_log2_lookup[bsize] +
|
| + b_height_log2_lookup[bsize]);
|
|
|
| + rate_breakout_thr *= num_pels_log2_lookup[bsize];
|
| +
|
| // If all y, u, v transform blocks in this partition are skippable, and
|
| // the dist & rate are within the thresholds, the partition search is
|
| // terminated for current branch of the partition search tree.
|
| @@ -2186,8 +2283,8 @@
|
| // The dist & rate thresholds are set to 0 at speed 0 to disable the
|
| // early termination at that speed.
|
| if (!x->e_mbd.lossless &&
|
| - (ctx->skippable && best_dist < dist_breakout_thr &&
|
| - best_rate < rate_breakout_thr)) {
|
| + (ctx->skippable && best_rdc.dist < dist_breakout_thr &&
|
| + best_rdc.rate < rate_breakout_thr)) {
|
| do_split = 0;
|
| do_rect = 0;
|
| }
|
| @@ -2247,7 +2344,6 @@
|
| store_pred_mv(x, ctx);
|
|
|
| // PARTITION_SPLIT
|
| - sum_rd = 0;
|
| // TODO(jingning): use the motion vectors given by the above search as
|
| // the starting point of motion search in the following partition type check.
|
| if (do_split) {
|
| @@ -2257,14 +2353,12 @@
|
| if (cpi->sf.adaptive_pred_interp_filter && partition_none_allowed)
|
| pc_tree->leaf_split[0]->pred_interp_filter =
|
| ctx->mic.mbmi.interp_filter;
|
| - rd_pick_sb_modes(cpi, tile, mi_row, mi_col, &sum_rate, &sum_dist, subsize,
|
| - pc_tree->leaf_split[0], best_rd, 0);
|
| - if (sum_rate == INT_MAX)
|
| - sum_rd = INT64_MAX;
|
| - else
|
| - sum_rd = RDCOST(x->rdmult, x->rddiv, sum_rate, sum_dist);
|
| + rd_pick_sb_modes(cpi, tile, mi_row, mi_col, &sum_rdc, subsize,
|
| + pc_tree->leaf_split[0], best_rdc.rdcost);
|
| + if (sum_rdc.rate == INT_MAX)
|
| + sum_rdc.rdcost = INT64_MAX;
|
| } else {
|
| - for (i = 0; i < 4 && sum_rd < best_rd; ++i) {
|
| + for (i = 0; i < 4 && sum_rdc.rdcost < best_rdc.rdcost; ++i) {
|
| const int x_idx = (i & 1) * mi_step;
|
| const int y_idx = (i >> 1) * mi_step;
|
|
|
| @@ -2276,28 +2370,28 @@
|
|
|
| pc_tree->split[i]->index = i;
|
| rd_pick_partition(cpi, tile, tp, mi_row + y_idx, mi_col + x_idx,
|
| - subsize, &this_rate, &this_dist,
|
| - best_rd - sum_rd, pc_tree->split[i]);
|
| + subsize, &this_rdc,
|
| + best_rdc.rdcost - sum_rdc.rdcost, pc_tree->split[i]);
|
|
|
| - if (this_rate == INT_MAX) {
|
| - sum_rd = INT64_MAX;
|
| + if (this_rdc.rate == INT_MAX) {
|
| + sum_rdc.rdcost = INT64_MAX;
|
| + break;
|
| } else {
|
| - sum_rate += this_rate;
|
| - sum_dist += this_dist;
|
| - sum_rd = RDCOST(x->rdmult, x->rddiv, sum_rate, sum_dist);
|
| + sum_rdc.rate += this_rdc.rate;
|
| + sum_rdc.dist += this_rdc.dist;
|
| + sum_rdc.rdcost += this_rdc.rdcost;
|
| }
|
| }
|
| }
|
|
|
| - if (sum_rd < best_rd && i == 4) {
|
| + if (sum_rdc.rdcost < best_rdc.rdcost && i == 4) {
|
| pl = partition_plane_context(xd, mi_row, mi_col, bsize);
|
| - sum_rate += cpi->partition_cost[pl][PARTITION_SPLIT];
|
| - sum_rd = RDCOST(x->rdmult, x->rddiv, sum_rate, sum_dist);
|
| + sum_rdc.rate += cpi->partition_cost[pl][PARTITION_SPLIT];
|
| + sum_rdc.rdcost = RDCOST(x->rdmult, x->rddiv,
|
| + sum_rdc.rate, sum_rdc.dist);
|
|
|
| - if (sum_rd < best_rd) {
|
| - best_rate = sum_rate;
|
| - best_dist = sum_dist;
|
| - best_rd = sum_rd;
|
| + if (sum_rdc.rdcost < best_rdc.rdcost) {
|
| + best_rdc = sum_rdc;
|
| pc_tree->partitioning = PARTITION_SPLIT;
|
| }
|
| } else {
|
| @@ -2318,11 +2412,11 @@
|
| partition_none_allowed)
|
| pc_tree->horizontal[0].pred_interp_filter =
|
| ctx->mic.mbmi.interp_filter;
|
| - rd_pick_sb_modes(cpi, tile, mi_row, mi_col, &sum_rate, &sum_dist, subsize,
|
| - &pc_tree->horizontal[0], best_rd, 0);
|
| - sum_rd = RDCOST(x->rdmult, x->rddiv, sum_rate, sum_dist);
|
| + rd_pick_sb_modes(cpi, tile, mi_row, mi_col, &sum_rdc, subsize,
|
| + &pc_tree->horizontal[0], best_rdc.rdcost);
|
|
|
| - if (sum_rd < best_rd && mi_row + mi_step < cm->mi_rows) {
|
| + if (sum_rdc.rdcost < best_rdc.rdcost && mi_row + mi_step < cm->mi_rows &&
|
| + bsize > BLOCK_8X8) {
|
| PICK_MODE_CONTEXT *ctx = &pc_tree->horizontal[0];
|
| update_state(cpi, ctx, mi_row, mi_col, subsize, 0);
|
| encode_superblock(cpi, tp, 0, mi_row, mi_col, subsize, ctx);
|
| @@ -2333,25 +2427,24 @@
|
| partition_none_allowed)
|
| pc_tree->horizontal[1].pred_interp_filter =
|
| ctx->mic.mbmi.interp_filter;
|
| - rd_pick_sb_modes(cpi, tile, mi_row + mi_step, mi_col, &this_rate,
|
| - &this_dist, subsize, &pc_tree->horizontal[1],
|
| - best_rd - sum_rd, 1);
|
| - if (this_rate == INT_MAX) {
|
| - sum_rd = INT64_MAX;
|
| + rd_pick_sb_modes(cpi, tile, mi_row + mi_step, mi_col, &this_rdc,
|
| + subsize, &pc_tree->horizontal[1],
|
| + best_rdc.rdcost - sum_rdc.rdcost);
|
| + if (this_rdc.rate == INT_MAX) {
|
| + sum_rdc.rdcost = INT64_MAX;
|
| } else {
|
| - sum_rate += this_rate;
|
| - sum_dist += this_dist;
|
| - sum_rd = RDCOST(x->rdmult, x->rddiv, sum_rate, sum_dist);
|
| + sum_rdc.rate += this_rdc.rate;
|
| + sum_rdc.dist += this_rdc.dist;
|
| + sum_rdc.rdcost += this_rdc.rdcost;
|
| }
|
| }
|
| - if (sum_rd < best_rd) {
|
| +
|
| + if (sum_rdc.rdcost < best_rdc.rdcost) {
|
| pl = partition_plane_context(xd, mi_row, mi_col, bsize);
|
| - sum_rate += cpi->partition_cost[pl][PARTITION_HORZ];
|
| - sum_rd = RDCOST(x->rdmult, x->rddiv, sum_rate, sum_dist);
|
| - if (sum_rd < best_rd) {
|
| - best_rd = sum_rd;
|
| - best_rate = sum_rate;
|
| - best_dist = sum_dist;
|
| + sum_rdc.rate += cpi->partition_cost[pl][PARTITION_HORZ];
|
| + sum_rdc.rdcost = RDCOST(x->rdmult, x->rddiv, sum_rdc.rate, sum_rdc.dist);
|
| + if (sum_rdc.rdcost < best_rdc.rdcost) {
|
| + best_rdc = sum_rdc;
|
| pc_tree->partitioning = PARTITION_HORZ;
|
| }
|
| }
|
| @@ -2367,10 +2460,10 @@
|
| partition_none_allowed)
|
| pc_tree->vertical[0].pred_interp_filter =
|
| ctx->mic.mbmi.interp_filter;
|
| - rd_pick_sb_modes(cpi, tile, mi_row, mi_col, &sum_rate, &sum_dist, subsize,
|
| - &pc_tree->vertical[0], best_rd, 0);
|
| - sum_rd = RDCOST(x->rdmult, x->rddiv, sum_rate, sum_dist);
|
| - if (sum_rd < best_rd && mi_col + mi_step < cm->mi_cols) {
|
| + rd_pick_sb_modes(cpi, tile, mi_row, mi_col, &sum_rdc, subsize,
|
| + &pc_tree->vertical[0], best_rdc.rdcost);
|
| + if (sum_rdc.rdcost < best_rdc.rdcost && mi_col + mi_step < cm->mi_cols &&
|
| + bsize > BLOCK_8X8) {
|
| update_state(cpi, &pc_tree->vertical[0], mi_row, mi_col, subsize, 0);
|
| encode_superblock(cpi, tp, 0, mi_row, mi_col, subsize,
|
| &pc_tree->vertical[0]);
|
| @@ -2381,26 +2474,24 @@
|
| partition_none_allowed)
|
| pc_tree->vertical[1].pred_interp_filter =
|
| ctx->mic.mbmi.interp_filter;
|
| - rd_pick_sb_modes(cpi, tile, mi_row, mi_col + mi_step, &this_rate,
|
| - &this_dist, subsize,
|
| - &pc_tree->vertical[1], best_rd - sum_rd,
|
| - 1);
|
| - if (this_rate == INT_MAX) {
|
| - sum_rd = INT64_MAX;
|
| + rd_pick_sb_modes(cpi, tile, mi_row, mi_col + mi_step, &this_rdc, subsize,
|
| + &pc_tree->vertical[1], best_rdc.rdcost - sum_rdc.rdcost);
|
| + if (this_rdc.rate == INT_MAX) {
|
| + sum_rdc.rdcost = INT64_MAX;
|
| } else {
|
| - sum_rate += this_rate;
|
| - sum_dist += this_dist;
|
| - sum_rd = RDCOST(x->rdmult, x->rddiv, sum_rate, sum_dist);
|
| + sum_rdc.rate += this_rdc.rate;
|
| + sum_rdc.dist += this_rdc.dist;
|
| + sum_rdc.rdcost += this_rdc.rdcost;
|
| }
|
| }
|
| - if (sum_rd < best_rd) {
|
| +
|
| + if (sum_rdc.rdcost < best_rdc.rdcost) {
|
| pl = partition_plane_context(xd, mi_row, mi_col, bsize);
|
| - sum_rate += cpi->partition_cost[pl][PARTITION_VERT];
|
| - sum_rd = RDCOST(x->rdmult, x->rddiv, sum_rate, sum_dist);
|
| - if (sum_rd < best_rd) {
|
| - best_rate = sum_rate;
|
| - best_dist = sum_dist;
|
| - best_rd = sum_rd;
|
| + sum_rdc.rate += cpi->partition_cost[pl][PARTITION_VERT];
|
| + sum_rdc.rdcost = RDCOST(x->rdmult, x->rddiv,
|
| + sum_rdc.rate, sum_rdc.dist);
|
| + if (sum_rdc.rdcost < best_rdc.rdcost) {
|
| + best_rdc = sum_rdc;
|
| pc_tree->partitioning = PARTITION_VERT;
|
| }
|
| }
|
| @@ -2412,10 +2503,11 @@
|
| // point. This code should be refactored so that the duplicate
|
| // checks occur in some sub function and thus are used...
|
| (void) best_rd;
|
| - *rate = best_rate;
|
| - *dist = best_dist;
|
| + *rd_cost = best_rdc;
|
|
|
| - if (best_rate < INT_MAX && best_dist < INT64_MAX && pc_tree->index != 3) {
|
| +
|
| + if (best_rdc.rate < INT_MAX && best_rdc.dist < INT64_MAX &&
|
| + pc_tree->index != 3) {
|
| int output_enabled = (bsize == BLOCK_64X64);
|
|
|
| // Check the projected output rate for this SB against it's target
|
| @@ -2423,10 +2515,10 @@
|
| // closer to the target.
|
| if ((cpi->oxcf.aq_mode == COMPLEXITY_AQ) && cm->seg.update_map)
|
| vp9_select_in_frame_q_segment(cpi, mi_row, mi_col, output_enabled,
|
| - best_rate);
|
| + best_rdc.rate);
|
| if (cpi->oxcf.aq_mode == CYCLIC_REFRESH_AQ)
|
| vp9_cyclic_refresh_set_rate_and_dist_sb(cpi->cyclic_refresh,
|
| - best_rate, best_dist);
|
| + best_rdc.rate, best_rdc.dist);
|
|
|
| encode_sb(cpi, tile, tp, mi_row, mi_col, output_enabled, bsize, pc_tree);
|
| }
|
| @@ -2433,8 +2525,8 @@
|
|
|
| if (bsize == BLOCK_64X64) {
|
| assert(tp_orig < *tp);
|
| - assert(best_rate < INT_MAX);
|
| - assert(best_dist < INT64_MAX);
|
| + assert(best_rdc.rate < INT_MAX);
|
| + assert(best_rdc.dist < INT64_MAX);
|
| } else {
|
| assert(tp_orig == *tp);
|
| }
|
| @@ -2456,9 +2548,16 @@
|
| mi_col += MI_BLOCK_SIZE) {
|
| int dummy_rate;
|
| int64_t dummy_dist;
|
| -
|
| + RD_COST dummy_rdc;
|
| int i;
|
|
|
| + const int idx_str = cm->mi_stride * mi_row + mi_col;
|
| + MODE_INFO *mi = cm->mi + idx_str;
|
| + MODE_INFO *prev_mi = NULL;
|
| +
|
| + if (cm->frame_type != KEY_FRAME)
|
| + prev_mi = (cm->prev_mip + cm->mi_stride + 1 + idx_str)->src_mi;
|
| +
|
| if (sf->adaptive_pred_interp_filter) {
|
| for (i = 0; i < 64; ++i)
|
| cpi->leaf_tree[i].pred_interp_filter = SWITCHABLE;
|
| @@ -2477,71 +2576,44 @@
|
| // TODO(yunqingwang): use_lastframe_partitioning is no longer used in good-
|
| // quality encoding. Need to evaluate it in real-time encoding later to
|
| // decide if it can be removed too. And then, do the code cleanup.
|
| - if ((sf->partition_search_type == SEARCH_PARTITION &&
|
| - sf->use_lastframe_partitioning) ||
|
| - sf->partition_search_type == FIXED_PARTITION ||
|
| - sf->partition_search_type == VAR_BASED_PARTITION ||
|
| - sf->partition_search_type == VAR_BASED_FIXED_PARTITION) {
|
| - const int idx_str = cm->mi_stride * mi_row + mi_col;
|
| - MODE_INFO *mi = cm->mi + idx_str;
|
| - MODE_INFO *prev_mi = (cm->prev_mip + cm->mi_stride + 1 + idx_str)->src_mi;
|
| - cpi->mb.source_variance = UINT_MAX;
|
| - if (sf->partition_search_type == FIXED_PARTITION) {
|
| - set_offsets(cpi, tile, mi_row, mi_col, BLOCK_64X64);
|
| - set_fixed_partitioning(cpi, tile, mi, mi_row, mi_col,
|
| - sf->always_this_block_size);
|
| - rd_use_partition(cpi, tile, mi, tp, mi_row, mi_col, BLOCK_64X64,
|
| - &dummy_rate, &dummy_dist, 1, cpi->pc_root);
|
| - } else if (cpi->skippable_frame ||
|
| - sf->partition_search_type == VAR_BASED_FIXED_PARTITION) {
|
| - BLOCK_SIZE bsize;
|
| - set_offsets(cpi, tile, mi_row, mi_col, BLOCK_64X64);
|
| - bsize = get_rd_var_based_fixed_partition(cpi, mi_row, mi_col);
|
| - set_fixed_partitioning(cpi, tile, mi, mi_row, mi_col, bsize);
|
| - rd_use_partition(cpi, tile, mi, tp, mi_row, mi_col, BLOCK_64X64,
|
| - &dummy_rate, &dummy_dist, 1, cpi->pc_root);
|
| - } else if (sf->partition_search_type == VAR_BASED_PARTITION) {
|
| - choose_partitioning(cpi, tile, mi_row, mi_col);
|
| - rd_use_partition(cpi, tile, mi, tp, mi_row, mi_col, BLOCK_64X64,
|
| - &dummy_rate, &dummy_dist, 1, cpi->pc_root);
|
| - } else {
|
| - GF_GROUP * gf_grp = &cpi->twopass.gf_group;
|
| - int last_was_mid_sequence_overlay = 0;
|
| - if ((cpi->oxcf.pass == 2) && (gf_grp->index)) {
|
| - if (gf_grp->update_type[gf_grp->index - 1] == OVERLAY_UPDATE)
|
| - last_was_mid_sequence_overlay = 1;
|
| - }
|
| - if ((cpi->rc.frames_since_key
|
| - % sf->last_partitioning_redo_frequency) == 0
|
| - || last_was_mid_sequence_overlay
|
| - || cm->prev_mi == 0
|
| - || cm->show_frame == 0
|
| - || cm->frame_type == KEY_FRAME
|
| - || cpi->rc.is_src_frame_alt_ref
|
| - || ((sf->use_lastframe_partitioning ==
|
| - LAST_FRAME_PARTITION_LOW_MOTION) &&
|
| - sb_has_motion(cm, prev_mi, sf->lf_motion_threshold))) {
|
| - // If required set upper and lower partition size limits
|
| - if (sf->auto_min_max_partition_size) {
|
| - set_offsets(cpi, tile, mi_row, mi_col, BLOCK_64X64);
|
| - rd_auto_partition_range(cpi, tile, mi_row, mi_col,
|
| - &sf->min_partition_size,
|
| - &sf->max_partition_size);
|
| - }
|
| - rd_pick_partition(cpi, tile, tp, mi_row, mi_col, BLOCK_64X64,
|
| - &dummy_rate, &dummy_dist, INT64_MAX,
|
| - cpi->pc_root);
|
| - } else {
|
| - if (sf->constrain_copy_partition &&
|
| - sb_has_motion(cm, prev_mi, sf->lf_motion_threshold))
|
| - constrain_copy_partitioning(cpi, tile, mi, prev_mi,
|
| - mi_row, mi_col, BLOCK_16X16);
|
| - else
|
| - copy_partitioning(cm, mi, prev_mi);
|
| - rd_use_partition(cpi, tile, mi, tp, mi_row, mi_col, BLOCK_64X64,
|
| - &dummy_rate, &dummy_dist, 1, cpi->pc_root);
|
| - }
|
| - }
|
| + cpi->mb.source_variance = UINT_MAX;
|
| + if (sf->partition_search_type == FIXED_PARTITION) {
|
| + set_offsets(cpi, tile, mi_row, mi_col, BLOCK_64X64);
|
| + set_fixed_partitioning(cpi, tile, mi, mi_row, mi_col,
|
| + sf->always_this_block_size);
|
| + rd_use_partition(cpi, tile, mi, tp, mi_row, mi_col, BLOCK_64X64,
|
| + &dummy_rate, &dummy_dist, 1, cpi->pc_root);
|
| + } else if (cpi->partition_search_skippable_frame) {
|
| + BLOCK_SIZE bsize;
|
| + set_offsets(cpi, tile, mi_row, mi_col, BLOCK_64X64);
|
| + bsize = get_rd_var_based_fixed_partition(cpi, mi_row, mi_col);
|
| + set_fixed_partitioning(cpi, tile, mi, mi_row, mi_col, bsize);
|
| + rd_use_partition(cpi, tile, mi, tp, mi_row, mi_col, BLOCK_64X64,
|
| + &dummy_rate, &dummy_dist, 1, cpi->pc_root);
|
| + } else if (sf->partition_search_type == VAR_BASED_PARTITION &&
|
| + cm->frame_type != KEY_FRAME ) {
|
| + choose_partitioning(cpi, tile, mi_row, mi_col);
|
| + rd_use_partition(cpi, tile, mi, tp, mi_row, mi_col, BLOCK_64X64,
|
| + &dummy_rate, &dummy_dist, 1, cpi->pc_root);
|
| + } else if (sf->partition_search_type == SEARCH_PARTITION &&
|
| + sf->use_lastframe_partitioning &&
|
| + (cpi->rc.frames_since_key %
|
| + sf->last_partitioning_redo_frequency) &&
|
| + cm->prev_mi &&
|
| + cm->show_frame &&
|
| + cm->frame_type != KEY_FRAME &&
|
| + !cpi->rc.is_src_frame_alt_ref &&
|
| + ((sf->use_lastframe_partitioning !=
|
| + LAST_FRAME_PARTITION_LOW_MOTION) ||
|
| + !sb_has_motion(cm, prev_mi, sf->lf_motion_threshold))) {
|
| + if (sf->constrain_copy_partition &&
|
| + sb_has_motion(cm, prev_mi, sf->lf_motion_threshold))
|
| + constrain_copy_partitioning(cpi, tile, mi, prev_mi,
|
| + mi_row, mi_col, BLOCK_16X16);
|
| + else
|
| + copy_partitioning(cm, mi, prev_mi);
|
| + rd_use_partition(cpi, tile, mi, tp, mi_row, mi_col, BLOCK_64X64,
|
| + &dummy_rate, &dummy_dist, 1, cpi->pc_root);
|
| } else {
|
| // If required set upper and lower partition size limits
|
| if (sf->auto_min_max_partition_size) {
|
| @@ -2551,7 +2623,7 @@
|
| &sf->max_partition_size);
|
| }
|
| rd_pick_partition(cpi, tile, tp, mi_row, mi_col, BLOCK_64X64,
|
| - &dummy_rate, &dummy_dist, INT64_MAX, cpi->pc_root);
|
| + &dummy_rdc, INT64_MAX, cpi->pc_root);
|
| }
|
| }
|
| }
|
| @@ -2652,7 +2724,7 @@
|
| BLOCK_SIZE bsize, BLOCK_SIZE subsize,
|
| PC_TREE *pc_tree) {
|
| MACROBLOCKD *xd = &x->e_mbd;
|
| - int bsl = b_width_log2(bsize), hbs = (1 << bsl) / 4;
|
| + int bsl = b_width_log2_lookup[bsize], hbs = (1 << bsl) / 4;
|
| PARTITION_TYPE partition = pc_tree->partitioning;
|
|
|
| assert(bsize >= BLOCK_8X8);
|
| @@ -2771,9 +2843,14 @@
|
| this_rate += cpi->partition_cost[pl][PARTITION_NONE];
|
| sum_rd = RDCOST(x->rdmult, x->rddiv, this_rate, this_dist);
|
| if (sum_rd < best_rd) {
|
| - int64_t stop_thresh = 4096;
|
| - int64_t stop_thresh_rd;
|
| + int dist_breakout_thr = sf->partition_search_breakout_dist_thr;
|
| + int64_t rate_breakout_thr = sf->partition_search_breakout_rate_thr;
|
|
|
| + dist_breakout_thr >>= 8 - (b_width_log2_lookup[bsize] +
|
| + b_height_log2_lookup[bsize]);
|
| +
|
| + rate_breakout_thr *= num_pels_log2_lookup[bsize];
|
| +
|
| best_rate = this_rate;
|
| best_dist = this_dist;
|
| best_rd = sum_rd;
|
| @@ -2780,14 +2857,9 @@
|
| if (bsize >= BLOCK_8X8)
|
| pc_tree->partitioning = PARTITION_NONE;
|
|
|
| - // Adjust threshold according to partition size.
|
| - stop_thresh >>= 8 - (b_width_log2(bsize) +
|
| - b_height_log2(bsize));
|
| -
|
| - stop_thresh_rd = RDCOST(x->rdmult, x->rddiv, 0, stop_thresh);
|
| - // If obtained distortion is very small, choose current partition
|
| - // and stop splitting.
|
| - if (!x->e_mbd.lossless && best_rd < stop_thresh_rd) {
|
| + if (!x->e_mbd.lossless &&
|
| + this_rate < rate_breakout_thr &&
|
| + this_dist < dist_breakout_thr) {
|
| do_split = 0;
|
| do_rect = 0;
|
| }
|
| @@ -2974,7 +3046,7 @@
|
| VP9_COMMON *const cm = &cpi->common;
|
| MACROBLOCK *const x = &cpi->mb;
|
| MACROBLOCKD *const xd = &x->e_mbd;
|
| - const int bsl = b_width_log2(bsize), hbs = (1 << bsl) / 4;
|
| + const int bsl = b_width_log2_lookup[bsize], hbs = (1 << bsl) / 4;
|
| const int mis = cm->mi_stride;
|
| PARTITION_TYPE partition;
|
| BLOCK_SIZE subsize;
|
| @@ -3095,7 +3167,6 @@
|
| int64_t dummy_dist = 0;
|
| const int idx_str = cm->mi_stride * mi_row + mi_col;
|
| MODE_INFO *mi = cm->mi + idx_str;
|
| - MODE_INFO *prev_mi = (cm->prev_mip + cm->mi_stride + 1 + idx_str)->src_mi;
|
| BLOCK_SIZE bsize;
|
| x->in_static_area = 0;
|
| x->source_variance = UINT_MAX;
|
| @@ -3113,7 +3184,6 @@
|
| nonrd_use_partition(cpi, tile, mi, tp, mi_row, mi_col, BLOCK_64X64,
|
| 1, &dummy_rate, &dummy_dist, cpi->pc_root);
|
| break;
|
| - case VAR_BASED_FIXED_PARTITION:
|
| case FIXED_PARTITION:
|
| bsize = sf->partition_search_type == FIXED_PARTITION ?
|
| sf->always_this_block_size :
|
| @@ -3133,7 +3203,7 @@
|
| &dummy_rate, &dummy_dist, 1, INT64_MAX,
|
| cpi->pc_root);
|
| } else {
|
| - copy_partitioning(cm, mi, prev_mi);
|
| + choose_partitioning(cpi, tile, mi_row, mi_col);
|
| nonrd_use_partition(cpi, tile, mi, tp, mi_row, mi_col,
|
| BLOCK_64X64, 1, &dummy_rate, &dummy_dist,
|
| cpi->pc_root);
|
| @@ -3170,9 +3240,34 @@
|
|
|
| for (i = 0; i < cm->mb_rows; i++) {
|
| for (j = 0; j < cm->mb_cols; j++) {
|
| +#if CONFIG_VP9_HIGHBITDEPTH
|
| + if (cm->use_highbitdepth) {
|
| + switch (cm->bit_depth) {
|
| + case VPX_BITS_8:
|
| + vp9_highbd_get16x16var(src, src_stride, last_src, last_stride,
|
| + &var16->sse, &var16->sum);
|
| + break;
|
| + case VPX_BITS_10:
|
| + vp9_highbd_10_get16x16var(src, src_stride, last_src, last_stride,
|
| + &var16->sse, &var16->sum);
|
| + break;
|
| + case VPX_BITS_12:
|
| + vp9_highbd_12_get16x16var(src, src_stride, last_src, last_stride,
|
| + &var16->sse, &var16->sum);
|
| + break;
|
| + default:
|
| + assert(0 && "cm->bit_depth should be VPX_BITS_8, VPX_BITS_10"
|
| + " or VPX_BITS_12");
|
| + return -1;
|
| + }
|
| + } else {
|
| + vp9_get16x16var(src, src_stride, last_src, last_stride,
|
| + &var16->sse, &var16->sum);
|
| + }
|
| +#else
|
| vp9_get16x16var(src, src_stride, last_src, last_stride,
|
| &var16->sse, &var16->sum);
|
| -
|
| +#endif // CONFIG_VP9_HIGHBITDEPTH
|
| var16->var = var16->sse -
|
| (((uint32_t)var16->sum * var16->sum) >> 8);
|
|
|
| @@ -3252,25 +3347,39 @@
|
| const VP9_COMMON *const cm = &cpi->common;
|
| const int tile_cols = 1 << cm->log2_tile_cols;
|
| const int tile_rows = 1 << cm->log2_tile_rows;
|
| +
|
| int tile_col, tile_row;
|
| - TOKENEXTRA *tok = cpi->tok;
|
| + TileInfo tile[4][1 << 6];
|
| + TOKENEXTRA *tok[4][1 << 6];
|
| + TOKENEXTRA *pre_tok = cpi->tok;
|
| + int tile_tok = 0;
|
|
|
| for (tile_row = 0; tile_row < tile_rows; ++tile_row) {
|
| for (tile_col = 0; tile_col < tile_cols; ++tile_col) {
|
| - TileInfo tile;
|
| - TOKENEXTRA *old_tok = tok;
|
| + vp9_tile_init(&tile[tile_row][tile_col], cm, tile_row, tile_col);
|
| +
|
| + tok[tile_row][tile_col] = pre_tok + tile_tok;
|
| + pre_tok = tok[tile_row][tile_col];
|
| + tile_tok = allocated_tokens(tile[tile_row][tile_col]);
|
| + }
|
| + }
|
| +
|
| + for (tile_row = 0; tile_row < tile_rows; ++tile_row) {
|
| + for (tile_col = 0; tile_col < tile_cols; ++tile_col) {
|
| + const TileInfo * const ptile = &tile[tile_row][tile_col];
|
| + TOKENEXTRA * const old_tok = tok[tile_row][tile_col];
|
| int mi_row;
|
|
|
| - vp9_tile_init(&tile, cm, tile_row, tile_col);
|
| - for (mi_row = tile.mi_row_start; mi_row < tile.mi_row_end;
|
| + for (mi_row = ptile->mi_row_start; mi_row < ptile->mi_row_end;
|
| mi_row += MI_BLOCK_SIZE) {
|
| if (cpi->sf.use_nonrd_pick_mode && !frame_is_intra_only(cm))
|
| - encode_nonrd_sb_row(cpi, &tile, mi_row, &tok);
|
| + encode_nonrd_sb_row(cpi, ptile, mi_row, &tok[tile_row][tile_col]);
|
| else
|
| - encode_rd_sb_row(cpi, &tile, mi_row, &tok);
|
| + encode_rd_sb_row(cpi, ptile, mi_row, &tok[tile_row][tile_col]);
|
| }
|
| - cpi->tok_count[tile_row][tile_col] = (unsigned int)(tok - old_tok);
|
| - assert(tok - cpi->tok <= get_token_alloc(cm->mb_rows, cm->mb_cols));
|
| + cpi->tok_count[tile_row][tile_col] =
|
| + (unsigned int)(tok[tile_row][tile_col] - old_tok);
|
| + assert(tok[tile_row][tile_col] - old_tok <= allocated_tokens(*ptile));
|
| }
|
| }
|
| }
|
| @@ -3314,7 +3423,16 @@
|
|
|
| cm->tx_mode = select_tx_mode(cpi);
|
|
|
| +#if CONFIG_VP9_HIGHBITDEPTH
|
| + if (cm->use_highbitdepth)
|
| + x->fwd_txm4x4 = xd->lossless ? vp9_fwht4x4 : vp9_fdct4x4;
|
| + else
|
| + x->fwd_txm4x4 = xd->lossless ? vp9_highbd_fwht4x4 : vp9_highbd_fdct4x4;
|
| + x->highbd_itxm_add = xd->lossless ? vp9_highbd_iwht4x4_add :
|
| + vp9_highbd_idct4x4_add;
|
| +#else
|
| x->fwd_txm4x4 = xd->lossless ? vp9_fwht4x4 : vp9_fdct4x4;
|
| +#endif // CONFIG_VP9_HIGHBITDEPTH
|
| x->itxm_add = xd->lossless ? vp9_iwht4x4_add : vp9_idct4x4_add;
|
|
|
| if (xd->lossless) {
|
|
|