Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(830)

Unified Diff: source/libvpx/vp9/encoder/vp9_encodeframe.c

Issue 668403002: libvpx: Pull from upstream (Closed) Base URL: svn://svn.chromium.org/chrome/trunk/deps/third_party/libvpx/
Patch Set: Created 6 years, 2 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View side-by-side diff with in-line comments
Download patch
« no previous file with comments | « source/libvpx/vp9/encoder/vp9_denoiser.c ('k') | source/libvpx/vp9/encoder/vp9_encodemb.h » ('j') | no next file with comments »
Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
Index: source/libvpx/vp9/encoder/vp9_encodeframe.c
===================================================================
--- source/libvpx/vp9/encoder/vp9_encodeframe.c (revision 292608)
+++ source/libvpx/vp9/encoder/vp9_encodeframe.c (working copy)
@@ -61,16 +61,51 @@
// Eventually this should be replaced by custom no-reference routines,
// which will be faster.
static const uint8_t VP9_VAR_OFFS[64] = {
- 128, 128, 128, 128, 128, 128, 128, 128,
- 128, 128, 128, 128, 128, 128, 128, 128,
- 128, 128, 128, 128, 128, 128, 128, 128,
- 128, 128, 128, 128, 128, 128, 128, 128,
- 128, 128, 128, 128, 128, 128, 128, 128,
- 128, 128, 128, 128, 128, 128, 128, 128,
- 128, 128, 128, 128, 128, 128, 128, 128,
- 128, 128, 128, 128, 128, 128, 128, 128
+ 128, 128, 128, 128, 128, 128, 128, 128,
+ 128, 128, 128, 128, 128, 128, 128, 128,
+ 128, 128, 128, 128, 128, 128, 128, 128,
+ 128, 128, 128, 128, 128, 128, 128, 128,
+ 128, 128, 128, 128, 128, 128, 128, 128,
+ 128, 128, 128, 128, 128, 128, 128, 128,
+ 128, 128, 128, 128, 128, 128, 128, 128,
+ 128, 128, 128, 128, 128, 128, 128, 128
};
+#if CONFIG_VP9_HIGHBITDEPTH
+static const uint16_t VP9_HIGH_VAR_OFFS_8[64] = {
+ 128, 128, 128, 128, 128, 128, 128, 128,
+ 128, 128, 128, 128, 128, 128, 128, 128,
+ 128, 128, 128, 128, 128, 128, 128, 128,
+ 128, 128, 128, 128, 128, 128, 128, 128,
+ 128, 128, 128, 128, 128, 128, 128, 128,
+ 128, 128, 128, 128, 128, 128, 128, 128,
+ 128, 128, 128, 128, 128, 128, 128, 128,
+ 128, 128, 128, 128, 128, 128, 128, 128
+};
+
+static const uint16_t VP9_HIGH_VAR_OFFS_10[64] = {
+ 128*4, 128*4, 128*4, 128*4, 128*4, 128*4, 128*4, 128*4,
+ 128*4, 128*4, 128*4, 128*4, 128*4, 128*4, 128*4, 128*4,
+ 128*4, 128*4, 128*4, 128*4, 128*4, 128*4, 128*4, 128*4,
+ 128*4, 128*4, 128*4, 128*4, 128*4, 128*4, 128*4, 128*4,
+ 128*4, 128*4, 128*4, 128*4, 128*4, 128*4, 128*4, 128*4,
+ 128*4, 128*4, 128*4, 128*4, 128*4, 128*4, 128*4, 128*4,
+ 128*4, 128*4, 128*4, 128*4, 128*4, 128*4, 128*4, 128*4,
+ 128*4, 128*4, 128*4, 128*4, 128*4, 128*4, 128*4, 128*4
+};
+
+static const uint16_t VP9_HIGH_VAR_OFFS_12[64] = {
+ 128*16, 128*16, 128*16, 128*16, 128*16, 128*16, 128*16, 128*16,
+ 128*16, 128*16, 128*16, 128*16, 128*16, 128*16, 128*16, 128*16,
+ 128*16, 128*16, 128*16, 128*16, 128*16, 128*16, 128*16, 128*16,
+ 128*16, 128*16, 128*16, 128*16, 128*16, 128*16, 128*16, 128*16,
+ 128*16, 128*16, 128*16, 128*16, 128*16, 128*16, 128*16, 128*16,
+ 128*16, 128*16, 128*16, 128*16, 128*16, 128*16, 128*16, 128*16,
+ 128*16, 128*16, 128*16, 128*16, 128*16, 128*16, 128*16, 128*16,
+ 128*16, 128*16, 128*16, 128*16, 128*16, 128*16, 128*16, 128*16
+};
+#endif // CONFIG_VP9_HIGHBITDEPTH
+
static unsigned int get_sby_perpixel_variance(VP9_COMP *cpi,
const struct buf_2d *ref,
BLOCK_SIZE bs) {
@@ -80,6 +115,32 @@
return ROUND_POWER_OF_TWO(var, num_pels_log2_lookup[bs]);
}
+#if CONFIG_VP9_HIGHBITDEPTH
+static unsigned int high_get_sby_perpixel_variance(
+ VP9_COMP *cpi, const struct buf_2d *ref, BLOCK_SIZE bs, int bd) {
+ unsigned int var, sse;
+ switch (bd) {
+ case 10:
+ var = cpi->fn_ptr[bs].vf(ref->buf, ref->stride,
+ CONVERT_TO_BYTEPTR(VP9_HIGH_VAR_OFFS_10),
+ 0, &sse);
+ break;
+ case 12:
+ var = cpi->fn_ptr[bs].vf(ref->buf, ref->stride,
+ CONVERT_TO_BYTEPTR(VP9_HIGH_VAR_OFFS_12),
+ 0, &sse);
+ break;
+ case 8:
+ default:
+ var = cpi->fn_ptr[bs].vf(ref->buf, ref->stride,
+ CONVERT_TO_BYTEPTR(VP9_HIGH_VAR_OFFS_8),
+ 0, &sse);
+ break;
+ }
+ return ROUND_POWER_OF_TWO(var, num_pels_log2_lookup[bs]);
+}
+#endif // CONFIG_VP9_HIGHBITDEPTH
+
static unsigned int get_sby_perpixel_diff_variance(VP9_COMP *cpi,
const struct buf_2d *ref,
int mi_row, int mi_col,
@@ -335,10 +396,11 @@
const int block_width = num_8x8_blocks_wide_lookup[bsize];
const int block_height = num_8x8_blocks_high_lookup[bsize];
// TODO(debargha): Choose this more intelligently.
- const int64_t threshold_multiplier = 25;
- int64_t threshold = threshold_multiplier * cpi->common.base_qindex;
+ const int threshold_multiplier = cm->frame_type == KEY_FRAME ? 64 : 4;
+ int64_t threshold =
+ (int64_t)(threshold_multiplier *
+ vp9_convert_qindex_to_q(cm->base_qindex, cm->bit_depth));
assert(block_height == block_width);
-
tree_to_node(data, bsize, &vt);
// Split none is available only if we have more than half a block size
@@ -350,29 +412,47 @@
return 1;
}
- // Vertical split is available on all but the bottom border.
- if (mi_row + block_height / 2 < cm->mi_rows &&
- vt.part_variances->vert[0].variance < threshold &&
- vt.part_variances->vert[1].variance < threshold) {
- BLOCK_SIZE subsize = get_subsize(bsize, PARTITION_VERT);
- set_block_size(cpi, mi_row, mi_col, subsize);
- set_block_size(cpi, mi_row, mi_col + block_width / 2, subsize);
- return 1;
+ // Only allow split for blocks above 16x16.
+ if (bsize > BLOCK_16X16) {
+ // Vertical split is available on all but the bottom border.
+ if (mi_row + block_height / 2 < cm->mi_rows &&
+ vt.part_variances->vert[0].variance < threshold &&
+ vt.part_variances->vert[1].variance < threshold) {
+ BLOCK_SIZE subsize = get_subsize(bsize, PARTITION_VERT);
+ set_block_size(cpi, mi_row, mi_col, subsize);
+ set_block_size(cpi, mi_row, mi_col + block_width / 2, subsize);
+ return 1;
+ }
+
+ // Horizontal split is available on all but the right border.
+ if (mi_col + block_width / 2 < cm->mi_cols &&
+ vt.part_variances->horz[0].variance < threshold &&
+ vt.part_variances->horz[1].variance < threshold) {
+ BLOCK_SIZE subsize = get_subsize(bsize, PARTITION_HORZ);
+ set_block_size(cpi, mi_row, mi_col, subsize);
+ set_block_size(cpi, mi_row + block_height / 2, mi_col, subsize);
+ return 1;
+ }
}
- // Horizontal split is available on all but the right border.
- if (mi_col + block_width / 2 < cm->mi_cols &&
- vt.part_variances->horz[0].variance < threshold &&
- vt.part_variances->horz[1].variance < threshold) {
- BLOCK_SIZE subsize = get_subsize(bsize, PARTITION_HORZ);
- set_block_size(cpi, mi_row, mi_col, subsize);
- set_block_size(cpi, mi_row + block_height / 2, mi_col, subsize);
- return 1;
+ // This will only allow 8x8 if the 16x16 variance is very large.
+ if (bsize == BLOCK_16X16) {
+ if (mi_col + block_width / 2 < cm->mi_cols &&
+ mi_row + block_height / 2 < cm->mi_rows &&
+ vt.part_variances->none.variance < (threshold << 6)) {
+ set_block_size(cpi, mi_row, mi_col, bsize);
+ return 1;
+ }
}
return 0;
}
-// TODO(debargha): Fix this function and make it work as expected.
+// This function chooses partitioning based on the variance
+// between source and reconstructed last, where variance is
+// computed for 8x8 downsampled inputs. Some things to check:
+// using the last source rather than reconstructed last, and
+// allowing for small downsampling (4x4 or 2x2) for selection
+// of smaller block sizes (i.e., < 16x16).
static void choose_partitioning(VP9_COMP *cpi,
const TileInfo *const tile,
int mi_row, int mi_col) {
@@ -391,6 +471,7 @@
const YV12_BUFFER_CONFIG *yv12 = get_ref_frame_buffer(cpi, LAST_FRAME);
const struct scale_factors *const sf = &cm->frame_refs[LAST_FRAME - 1].sf;
+ vp9_clear_system_state();
vp9_zero(vt);
set_offsets(cpi, tile, mi_row, mi_col, BLOCK_64X64);
@@ -419,6 +500,22 @@
} else {
d = VP9_VAR_OFFS;
dp = 0;
+#if CONFIG_VP9_HIGHBITDEPTH
+ if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
+ switch (xd->bd) {
+ case 10:
+ d = CONVERT_TO_BYTEPTR(VP9_HIGH_VAR_OFFS_10);
+ break;
+ case 12:
+ d = CONVERT_TO_BYTEPTR(VP9_HIGH_VAR_OFFS_12);
+ break;
+ case 8:
+ default:
+ d = CONVERT_TO_BYTEPTR(VP9_HIGH_VAR_OFFS_8);
+ break;
+ }
+ }
+#endif // CONFIG_VP9_HIGHBITDEPTH
}
// Fill in the entire tree of 8x8 variances for splits.
@@ -434,10 +531,28 @@
int y_idx = y16_idx + ((k >> 1) << 3);
unsigned int sse = 0;
int sum = 0;
- if (x_idx < pixels_wide && y_idx < pixels_high)
- vp9_get8x8var(s + y_idx * sp + x_idx, sp,
- d + y_idx * dp + x_idx, dp, &sse, &sum);
- fill_variance(sse, sum, 64, &vst->split[k].part_variances.none);
+
+ if (x_idx < pixels_wide && y_idx < pixels_high) {
+ int s_avg, d_avg;
+#if CONFIG_VP9_HIGHBITDEPTH
+ if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
+ s_avg = vp9_highbd_avg_8x8(s + y_idx * sp + x_idx, sp);
+ d_avg = vp9_highbd_avg_8x8(d + y_idx * dp + x_idx, dp);
+ } else {
+ s_avg = vp9_avg_8x8(s + y_idx * sp + x_idx, sp);
+ d_avg = vp9_avg_8x8(d + y_idx * dp + x_idx, dp);
+ }
+#else
+ s_avg = vp9_avg_8x8(s + y_idx * sp + x_idx, sp);
+ d_avg = vp9_avg_8x8(d + y_idx * dp + x_idx, dp);
+#endif
+ sum = s_avg - d_avg;
+ sse = sum * sum;
+ }
+ // For an 8x8 block we have just one value the average of all 64
+ // pixels, so use 1. This means of course that there is no variance
+ // in an 8x8 block.
+ fill_variance(sse, sum, 1, &vst->split[k].part_variances.none);
}
}
}
@@ -453,8 +568,8 @@
// Now go through the entire structure, splitting every block size until
// we get to one that's got a variance lower than our threshold, or we
// hit 8x8.
- if (!set_vt_partitioning(cpi, &vt, BLOCK_64X64,
- mi_row, mi_col)) {
+ if ( mi_col + 8 > cm->mi_cols || mi_row + 8 > cm->mi_rows ||
+ !set_vt_partitioning(cpi, &vt, BLOCK_64X64, mi_row, mi_col)) {
for (i = 0; i < 4; ++i) {
const int x32_idx = ((i & 1) << 2);
const int y32_idx = ((i >> 1) << 2);
@@ -463,31 +578,15 @@
for (j = 0; j < 4; ++j) {
const int x16_idx = ((j & 1) << 1);
const int y16_idx = ((j >> 1) << 1);
- // NOTE: This is a temporary hack to disable 8x8 partitions,
- // since it works really bad - possibly due to a bug
-#define DISABLE_8X8_VAR_BASED_PARTITION
-#ifdef DISABLE_8X8_VAR_BASED_PARTITION
- if (mi_row + y32_idx + y16_idx + 1 < cm->mi_rows &&
- mi_row + x32_idx + x16_idx + 1 < cm->mi_cols) {
- set_block_size(cpi,
- (mi_row + y32_idx + y16_idx),
- (mi_col + x32_idx + x16_idx),
- BLOCK_16X16);
- } else {
- for (k = 0; k < 4; ++k) {
- const int x8_idx = (k & 1);
- const int y8_idx = (k >> 1);
- set_block_size(cpi,
- (mi_row + y32_idx + y16_idx + y8_idx),
- (mi_col + x32_idx + x16_idx + x8_idx),
- BLOCK_8X8);
- }
- }
-#else
- if (!set_vt_partitioning(cpi, &vt.split[i].split[j], tile,
+ // NOTE: Since this uses 8x8 downsampling for variance calculation
+ // we cannot really select block size 8x8 (or even 8x16/16x8),
+ // since we do not sufficient samples for variance.
+ // For now, 8x8 partition is only set if the variance of the 16x16
+ // block is very high. This is controlled in set_vt_partitioning.
+ if (!set_vt_partitioning(cpi, &vt.split[i].split[j],
BLOCK_16X16,
- (mi_row + y32_idx + y16_idx),
- (mi_col + x32_idx + x16_idx), 2)) {
+ mi_row + y32_idx + y16_idx,
+ mi_col + x32_idx + x16_idx)) {
for (k = 0; k < 4; ++k) {
const int x8_idx = (k & 1);
const int y8_idx = (k >> 1);
@@ -497,7 +596,6 @@
BLOCK_8X8);
}
}
-#endif
}
}
}
@@ -684,10 +782,9 @@
}
static void rd_pick_sb_modes(VP9_COMP *cpi, const TileInfo *const tile,
- int mi_row, int mi_col,
- int *totalrate, int64_t *totaldist,
+ int mi_row, int mi_col, RD_COST *rd_cost,
BLOCK_SIZE bsize, PICK_MODE_CONTEXT *ctx,
- int64_t best_rd, int block) {
+ int64_t best_rd) {
VP9_COMMON *const cm = &cpi->common;
MACROBLOCK *const x = &cpi->mb;
MACROBLOCKD *const xd = &x->e_mbd;
@@ -704,19 +801,6 @@
// Use the lower precision, but faster, 32x32 fdct for mode selection.
x->use_lp32x32fdct = 1;
- // TODO(JBB): Most other places in the code instead of calling the function
- // and then checking if its not the first 8x8 we put the check in the
- // calling function. Do that here.
- if (bsize < BLOCK_8X8) {
- // When ab_index = 0 all sub-blocks are handled, so for ab_index != 0
- // there is nothing to be done.
- if (block != 0) {
- *totalrate = 0;
- *totaldist = 0;
- return;
- }
- }
-
set_offsets(cpi, tile, mi_row, mi_col, bsize);
mbmi = &xd->mi[0].src_mi->mbmi;
mbmi->sb_type = bsize;
@@ -734,7 +818,17 @@
// Set to zero to make sure we do not use the previous encoded frame stats
mbmi->skip = 0;
+#if CONFIG_VP9_HIGHBITDEPTH
+ if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
+ x->source_variance =
+ high_get_sby_perpixel_variance(cpi, &x->plane[0].src, bsize, xd->bd);
+ } else {
+ x->source_variance =
+ get_sby_perpixel_variance(cpi, &x->plane[0].src, bsize);
+ }
+#else
x->source_variance = get_sby_perpixel_variance(cpi, &x->plane[0].src, bsize);
+#endif // CONFIG_VP9_HIGHBITDEPTH
// Save rdmult before it might be changed, so it can be restored later.
orig_rdmult = x->rdmult;
@@ -774,28 +868,33 @@
// Find best coding mode & reconstruct the MB so it is available
// as a predictor for MBs that follow in the SB
if (frame_is_intra_only(cm)) {
- vp9_rd_pick_intra_mode_sb(cpi, x, totalrate, totaldist, bsize, ctx,
- best_rd);
+ vp9_rd_pick_intra_mode_sb(cpi, x, rd_cost, bsize, ctx, best_rd);
} else {
if (bsize >= BLOCK_8X8) {
if (vp9_segfeature_active(&cm->seg, mbmi->segment_id, SEG_LVL_SKIP))
- vp9_rd_pick_inter_mode_sb_seg_skip(cpi, x, totalrate, totaldist, bsize,
+ vp9_rd_pick_inter_mode_sb_seg_skip(cpi, x, rd_cost, bsize,
ctx, best_rd);
else
vp9_rd_pick_inter_mode_sb(cpi, x, tile, mi_row, mi_col,
- totalrate, totaldist, bsize, ctx, best_rd);
+ rd_cost, bsize, ctx, best_rd);
} else {
- vp9_rd_pick_inter_mode_sub8x8(cpi, x, tile, mi_row, mi_col, totalrate,
- totaldist, bsize, ctx, best_rd);
+ vp9_rd_pick_inter_mode_sub8x8(cpi, x, tile, mi_row, mi_col, rd_cost,
+ bsize, ctx, best_rd);
}
}
+ if (aq_mode == VARIANCE_AQ && rd_cost->rate != INT_MAX) {
+ vp9_clear_system_state();
+ rd_cost->rate = (int)round(rd_cost->rate * rdmult_ratio);
+ rd_cost->rdcost = RDCOST(x->rdmult, x->rddiv, rd_cost->rate, rd_cost->dist);
+ }
+
x->rdmult = orig_rdmult;
- if (aq_mode == VARIANCE_AQ && *totalrate != INT_MAX) {
- vp9_clear_system_state();
- *totalrate = (int)round(*totalrate * rdmult_ratio);
- }
+ // TODO(jingning) The rate-distortion optimization flow needs to be
+ // refactored to provide proper exit/return handle.
+ if (rd_cost->rate == INT_MAX)
+ rd_cost->rdcost = INT64_MAX;
}
static void update_stats(VP9_COMMON *cm, const MACROBLOCK *x) {
@@ -925,7 +1024,7 @@
MACROBLOCK *const x = &cpi->mb;
MACROBLOCKD *const xd = &x->e_mbd;
- const int bsl = b_width_log2(bsize), hbs = (1 << bsl) / 4;
+ const int bsl = b_width_log2_lookup[bsize], hbs = (1 << bsl) / 4;
int ctx;
PARTITION_TYPE partition;
BLOCK_SIZE subsize = bsize;
@@ -1297,12 +1396,18 @@
*(xd->mi[0].src_mi) = ctx->mic;
xd->mi[0].src_mi = &xd->mi[0];
-
- // For in frame adaptive Q, check for reseting the segment_id and updating
- // the cyclic refresh map.
- if ((cpi->oxcf.aq_mode == CYCLIC_REFRESH_AQ) && seg->enabled) {
- vp9_cyclic_refresh_update_segment(cpi, &xd->mi[0].src_mi->mbmi,
- mi_row, mi_col, bsize, 1);
+ if (seg->enabled && cpi->oxcf.aq_mode) {
+ // For in frame complexity AQ or variance AQ, copy segment_id from
+ // segmentation_map.
+ if (cpi->oxcf.aq_mode == COMPLEXITY_AQ ||
+ cpi->oxcf.aq_mode == VARIANCE_AQ ) {
+ const uint8_t *const map = seg->update_map ? cpi->segmentation_map
+ : cm->last_frame_seg_map;
+ mbmi->segment_id = vp9_get_segment_id(cm, map, bsize, mi_row, mi_col);
+ } else {
+ // Setting segmentation map for cyclic_refresh
+ vp9_cyclic_refresh_update_segment(cpi, mbmi, mi_row, mi_col, bsize, 1);
+ }
vp9_init_plane_quantizers(cpi, x);
}
@@ -1348,7 +1453,7 @@
MACROBLOCK *const x = &cpi->mb;
MACROBLOCKD *const xd = &x->e_mbd;
- const int bsl = b_width_log2(bsize), hbs = (1 << bsl) / 4;
+ const int bsl = b_width_log2_lookup[bsize], hbs = (1 << bsl) / 4;
int ctx;
PARTITION_TYPE partition;
BLOCK_SIZE subsize;
@@ -1411,10 +1516,9 @@
update_partition_context(xd, mi_row, mi_col, subsize, bsize);
}
-static void rd_use_partition(VP9_COMP *cpi,
- const TileInfo *const tile,
- MODE_INFO *mi_8x8,
- TOKENEXTRA **tp, int mi_row, int mi_col,
+static void rd_use_partition(VP9_COMP *cpi, const TileInfo *const tile,
+ MODE_INFO *mi_8x8, TOKENEXTRA **tp,
+ int mi_row, int mi_col,
BLOCK_SIZE bsize, int *rate, int64_t *dist,
int do_recon, PC_TREE *pc_tree) {
VP9_COMMON *const cm = &cpi->common;
@@ -1421,7 +1525,7 @@
MACROBLOCK *const x = &cpi->mb;
MACROBLOCKD *const xd = &x->e_mbd;
const int mis = cm->mi_stride;
- const int bsl = b_width_log2(bsize);
+ const int bsl = b_width_log2_lookup[bsize];
const int mi_step = num_4x4_blocks_wide_lookup[bsize] / 2;
const int bss = (1 << bsl) / 4;
int i, pl;
@@ -1429,15 +1533,7 @@
BLOCK_SIZE subsize;
ENTROPY_CONTEXT l[16 * MAX_MB_PLANE], a[16 * MAX_MB_PLANE];
PARTITION_CONTEXT sl[8], sa[8];
- int last_part_rate = INT_MAX;
- int64_t last_part_dist = INT64_MAX;
- int64_t last_part_rd = INT64_MAX;
- int none_rate = INT_MAX;
- int64_t none_dist = INT64_MAX;
- int64_t none_rd = INT64_MAX;
- int chosen_rate = INT_MAX;
- int64_t chosen_dist = INT64_MAX;
- int64_t chosen_rd = INT64_MAX;
+ RD_COST last_part_rdc, none_rdc, chosen_rdc;
BLOCK_SIZE sub_subsize = BLOCK_4X4;
int splits_below = 0;
BLOCK_SIZE bs_type = mi_8x8[0].src_mi->mbmi.sb_type;
@@ -1450,6 +1546,10 @@
assert(num_4x4_blocks_wide_lookup[bsize] ==
num_4x4_blocks_high_lookup[bsize]);
+ vp9_rd_cost_reset(&last_part_rdc);
+ vp9_rd_cost_reset(&none_rdc);
+ vp9_rd_cost_reset(&chosen_rdc);
+
partition = partition_lookup[bsl][bs_type];
subsize = get_subsize(bsize, partition);
@@ -1483,14 +1583,15 @@
mi_row + (mi_step >> 1) < cm->mi_rows &&
mi_col + (mi_step >> 1) < cm->mi_cols) {
pc_tree->partitioning = PARTITION_NONE;
- rd_pick_sb_modes(cpi, tile, mi_row, mi_col, &none_rate, &none_dist, bsize,
- ctx, INT64_MAX, 0);
+ rd_pick_sb_modes(cpi, tile, mi_row, mi_col, &none_rdc, bsize,
+ ctx, INT64_MAX);
pl = partition_plane_context(xd, mi_row, mi_col, bsize);
- if (none_rate < INT_MAX) {
- none_rate += cpi->partition_cost[pl][PARTITION_NONE];
- none_rd = RDCOST(x->rdmult, x->rddiv, none_rate, none_dist);
+ if (none_rdc.rate < INT_MAX) {
+ none_rdc.rate += cpi->partition_cost[pl][PARTITION_NONE];
+ none_rdc.rdcost = RDCOST(x->rdmult, x->rddiv, none_rdc.rate,
+ none_rdc.dist);
}
restore_context(cpi, mi_row, mi_col, a, l, sa, sl, bsize);
@@ -1501,84 +1602,81 @@
switch (partition) {
case PARTITION_NONE:
- rd_pick_sb_modes(cpi, tile, mi_row, mi_col, &last_part_rate,
- &last_part_dist, bsize, ctx, INT64_MAX, 0);
+ rd_pick_sb_modes(cpi, tile, mi_row, mi_col, &last_part_rdc,
+ bsize, ctx, INT64_MAX);
break;
case PARTITION_HORZ:
- rd_pick_sb_modes(cpi, tile, mi_row, mi_col, &last_part_rate,
- &last_part_dist, subsize, &pc_tree->horizontal[0],
- INT64_MAX, 0);
- if (last_part_rate != INT_MAX &&
+ rd_pick_sb_modes(cpi, tile, mi_row, mi_col, &last_part_rdc,
+ subsize, &pc_tree->horizontal[0],
+ INT64_MAX);
+ if (last_part_rdc.rate != INT_MAX &&
bsize >= BLOCK_8X8 && mi_row + (mi_step >> 1) < cm->mi_rows) {
- int rt = 0;
- int64_t dt = 0;
+ RD_COST tmp_rdc;
PICK_MODE_CONTEXT *ctx = &pc_tree->horizontal[0];
+ vp9_rd_cost_init(&tmp_rdc);
update_state(cpi, ctx, mi_row, mi_col, subsize, 0);
encode_superblock(cpi, tp, 0, mi_row, mi_col, subsize, ctx);
- rd_pick_sb_modes(cpi, tile, mi_row + (mi_step >> 1), mi_col, &rt, &dt,
- subsize, &pc_tree->horizontal[1], INT64_MAX, 1);
- if (rt == INT_MAX || dt == INT64_MAX) {
- last_part_rate = INT_MAX;
- last_part_dist = INT64_MAX;
+ rd_pick_sb_modes(cpi, tile, mi_row + (mi_step >> 1), mi_col, &tmp_rdc,
+ subsize, &pc_tree->horizontal[1], INT64_MAX);
+ if (tmp_rdc.rate == INT_MAX || tmp_rdc.dist == INT64_MAX) {
+ vp9_rd_cost_reset(&last_part_rdc);
break;
}
-
- last_part_rate += rt;
- last_part_dist += dt;
+ last_part_rdc.rate += tmp_rdc.rate;
+ last_part_rdc.dist += tmp_rdc.dist;
+ last_part_rdc.rdcost += tmp_rdc.rdcost;
}
break;
case PARTITION_VERT:
- rd_pick_sb_modes(cpi, tile, mi_row, mi_col, &last_part_rate,
- &last_part_dist, subsize, &pc_tree->vertical[0],
- INT64_MAX, 0);
- if (last_part_rate != INT_MAX &&
+ rd_pick_sb_modes(cpi, tile, mi_row, mi_col, &last_part_rdc,
+ subsize, &pc_tree->vertical[0], INT64_MAX);
+ if (last_part_rdc.rate != INT_MAX &&
bsize >= BLOCK_8X8 && mi_col + (mi_step >> 1) < cm->mi_cols) {
- int rt = 0;
- int64_t dt = 0;
+ RD_COST tmp_rdc;
PICK_MODE_CONTEXT *ctx = &pc_tree->vertical[0];
+ vp9_rd_cost_init(&tmp_rdc);
update_state(cpi, ctx, mi_row, mi_col, subsize, 0);
encode_superblock(cpi, tp, 0, mi_row, mi_col, subsize, ctx);
- rd_pick_sb_modes(cpi, tile, mi_row, mi_col + (mi_step >> 1), &rt, &dt,
+ rd_pick_sb_modes(cpi, tile, mi_row, mi_col + (mi_step >> 1), &tmp_rdc,
subsize, &pc_tree->vertical[bsize > BLOCK_8X8],
- INT64_MAX, 1);
- if (rt == INT_MAX || dt == INT64_MAX) {
- last_part_rate = INT_MAX;
- last_part_dist = INT64_MAX;
+ INT64_MAX);
+ if (tmp_rdc.rate == INT_MAX || tmp_rdc.dist == INT64_MAX) {
+ vp9_rd_cost_reset(&last_part_rdc);
break;
}
- last_part_rate += rt;
- last_part_dist += dt;
+ last_part_rdc.rate += tmp_rdc.rate;
+ last_part_rdc.dist += tmp_rdc.dist;
+ last_part_rdc.rdcost += tmp_rdc.rdcost;
}
break;
case PARTITION_SPLIT:
if (bsize == BLOCK_8X8) {
- rd_pick_sb_modes(cpi, tile, mi_row, mi_col, &last_part_rate,
- &last_part_dist, subsize, pc_tree->leaf_split[0],
- INT64_MAX, 0);
+ rd_pick_sb_modes(cpi, tile, mi_row, mi_col, &last_part_rdc,
+ subsize, pc_tree->leaf_split[0], INT64_MAX);
break;
}
- last_part_rate = 0;
- last_part_dist = 0;
+ last_part_rdc.rate = 0;
+ last_part_rdc.dist = 0;
+ last_part_rdc.rdcost = 0;
for (i = 0; i < 4; i++) {
int x_idx = (i & 1) * (mi_step >> 1);
int y_idx = (i >> 1) * (mi_step >> 1);
int jj = i >> 1, ii = i & 0x01;
- int rt;
- int64_t dt;
-
+ RD_COST tmp_rdc;
if ((mi_row + y_idx >= cm->mi_rows) || (mi_col + x_idx >= cm->mi_cols))
continue;
+ vp9_rd_cost_init(&tmp_rdc);
rd_use_partition(cpi, tile, mi_8x8 + jj * bss * mis + ii * bss, tp,
- mi_row + y_idx, mi_col + x_idx, subsize, &rt, &dt,
+ mi_row + y_idx, mi_col + x_idx, subsize,
+ &tmp_rdc.rate, &tmp_rdc.dist,
i != 3, pc_tree->split[i]);
- if (rt == INT_MAX || dt == INT64_MAX) {
- last_part_rate = INT_MAX;
- last_part_dist = INT64_MAX;
+ if (tmp_rdc.rate == INT_MAX || tmp_rdc.dist == INT64_MAX) {
+ vp9_rd_cost_reset(&last_part_rdc);
break;
}
- last_part_rate += rt;
- last_part_dist += dt;
+ last_part_rdc.rate += tmp_rdc.rate;
+ last_part_rdc.dist += tmp_rdc.dist;
}
break;
default:
@@ -1587,9 +1685,10 @@
}
pl = partition_plane_context(xd, mi_row, mi_col, bsize);
- if (last_part_rate < INT_MAX) {
- last_part_rate += cpi->partition_cost[pl][partition];
- last_part_rd = RDCOST(x->rdmult, x->rddiv, last_part_rate, last_part_dist);
+ if (last_part_rdc.rate < INT_MAX) {
+ last_part_rdc.rate += cpi->partition_cost[pl][partition];
+ last_part_rdc.rdcost = RDCOST(x->rdmult, x->rddiv,
+ last_part_rdc.rate, last_part_rdc.dist);
}
if (do_partition_search
@@ -1601,8 +1700,8 @@
&& (mi_col + mi_step < cm->mi_cols ||
mi_col + (mi_step >> 1) == cm->mi_cols)) {
BLOCK_SIZE split_subsize = get_subsize(bsize, PARTITION_SPLIT);
- chosen_rate = 0;
- chosen_dist = 0;
+ chosen_rdc.rate = 0;
+ chosen_rdc.dist = 0;
restore_context(cpi, mi_row, mi_col, a, l, sa, sl, bsize);
pc_tree->partitioning = PARTITION_SPLIT;
@@ -1610,8 +1709,7 @@
for (i = 0; i < 4; i++) {
int x_idx = (i & 1) * (mi_step >> 1);
int y_idx = (i >> 1) * (mi_step >> 1);
- int rt = 0;
- int64_t dt = 0;
+ RD_COST tmp_rdc;
ENTROPY_CONTEXT l[16 * MAX_MB_PLANE], a[16 * MAX_MB_PLANE];
PARTITION_CONTEXT sl[8], sa[8];
@@ -1620,20 +1718,18 @@
save_context(cpi, mi_row, mi_col, a, l, sa, sl, bsize);
pc_tree->split[i]->partitioning = PARTITION_NONE;
- rd_pick_sb_modes(cpi, tile, mi_row + y_idx, mi_col + x_idx, &rt, &dt,
- split_subsize, &pc_tree->split[i]->none,
- INT64_MAX, i);
+ rd_pick_sb_modes(cpi, tile, mi_row + y_idx, mi_col + x_idx, &tmp_rdc,
+ split_subsize, &pc_tree->split[i]->none, INT64_MAX);
restore_context(cpi, mi_row, mi_col, a, l, sa, sl, bsize);
- if (rt == INT_MAX || dt == INT64_MAX) {
- chosen_rate = INT_MAX;
- chosen_dist = INT64_MAX;
+ if (tmp_rdc.rate == INT_MAX || tmp_rdc.dist == INT64_MAX) {
+ vp9_rd_cost_reset(&chosen_rdc);
break;
}
- chosen_rate += rt;
- chosen_dist += dt;
+ chosen_rdc.rate += tmp_rdc.rate;
+ chosen_rdc.dist += tmp_rdc.dist;
if (i != 3)
encode_sb(cpi, tile, tp, mi_row + y_idx, mi_col + x_idx, 0,
@@ -1641,30 +1737,28 @@
pl = partition_plane_context(xd, mi_row + y_idx, mi_col + x_idx,
split_subsize);
- chosen_rate += cpi->partition_cost[pl][PARTITION_NONE];
+ chosen_rdc.rate += cpi->partition_cost[pl][PARTITION_NONE];
}
pl = partition_plane_context(xd, mi_row, mi_col, bsize);
- if (chosen_rate < INT_MAX) {
- chosen_rate += cpi->partition_cost[pl][PARTITION_SPLIT];
- chosen_rd = RDCOST(x->rdmult, x->rddiv, chosen_rate, chosen_dist);
+ if (chosen_rdc.rate < INT_MAX) {
+ chosen_rdc.rate += cpi->partition_cost[pl][PARTITION_SPLIT];
+ chosen_rdc.rdcost = RDCOST(x->rdmult, x->rddiv,
+ chosen_rdc.rate, chosen_rdc.dist);
}
}
// If last_part is better set the partitioning to that.
- if (last_part_rd < chosen_rd) {
+ if (last_part_rdc.rdcost < chosen_rdc.rdcost) {
mi_8x8[0].src_mi->mbmi.sb_type = bsize;
if (bsize >= BLOCK_8X8)
pc_tree->partitioning = partition;
- chosen_rate = last_part_rate;
- chosen_dist = last_part_dist;
- chosen_rd = last_part_rd;
+ chosen_rdc = last_part_rdc;
}
// If none was better set the partitioning to that.
- if (none_rd < chosen_rd) {
+ if (none_rdc.rdcost < chosen_rdc.rdcost) {
if (bsize >= BLOCK_8X8)
pc_tree->partitioning = PARTITION_NONE;
- chosen_rate = none_rate;
- chosen_dist = none_dist;
+ chosen_rdc = none_rdc;
}
restore_context(cpi, mi_row, mi_col, a, l, sa, sl, bsize);
@@ -1671,8 +1765,8 @@
// We must have chosen a partitioning and encoding or we'll fail later on.
// No other opportunities for success.
- if ( bsize == BLOCK_64X64)
- assert(chosen_rate < INT_MAX && chosen_dist < INT64_MAX);
+ if (bsize == BLOCK_64X64)
+ assert(chosen_rdc.rate < INT_MAX && chosen_rdc.dist < INT64_MAX);
if (do_recon) {
int output_enabled = (bsize == BLOCK_64X64);
@@ -1682,18 +1776,18 @@
// closer to the target.
if ((cpi->oxcf.aq_mode == COMPLEXITY_AQ) && cm->seg.update_map) {
vp9_select_in_frame_q_segment(cpi, mi_row, mi_col,
- output_enabled, chosen_rate);
+ output_enabled, chosen_rdc.rate);
}
if (cpi->oxcf.aq_mode == CYCLIC_REFRESH_AQ)
vp9_cyclic_refresh_set_rate_and_dist_sb(cpi->cyclic_refresh,
- chosen_rate, chosen_dist);
+ chosen_rdc.rate, chosen_rdc.dist);
encode_sb(cpi, tile, tp, mi_row, mi_col, output_enabled, bsize,
pc_tree);
}
- *rate = chosen_rate;
- *dist = chosen_dist;
+ *rate = chosen_rdc.rate;
+ *dist = chosen_rdc.dist;
}
static const BLOCK_SIZE min_partition_size[BLOCK_SIZES] = {
@@ -1863,7 +1957,7 @@
int bh, bw;
BLOCK_SIZE min_size = BLOCK_32X32;
BLOCK_SIZE max_size = BLOCK_8X8;
- int bsl = mi_width_log2(BLOCK_64X64);
+ int bsl = mi_width_log2_lookup[BLOCK_64X64];
const int search_range_ctrl = (((mi_row + mi_col) >> bsl) +
get_chessboard_index(cm->current_video_frame)) & 0x1;
// Trap case where we do not have a prediction.
@@ -2022,10 +2116,9 @@
// unlikely to be selected depending on previous rate-distortion optimization
// results, for encoding speed-up.
static void rd_pick_partition(VP9_COMP *cpi, const TileInfo *const tile,
- TOKENEXTRA **tp, int mi_row,
- int mi_col, BLOCK_SIZE bsize, int *rate,
- int64_t *dist, int64_t best_rd,
- PC_TREE *pc_tree) {
+ TOKENEXTRA **tp, int mi_row, int mi_col,
+ BLOCK_SIZE bsize, RD_COST *rd_cost,
+ int64_t best_rd, PC_TREE *pc_tree) {
VP9_COMMON *const cm = &cpi->common;
MACROBLOCK *const x = &cpi->mb;
MACROBLOCKD *const xd = &x->e_mbd;
@@ -2036,9 +2129,7 @@
PICK_MODE_CONTEXT *ctx = &pc_tree->none;
int i, pl;
BLOCK_SIZE subsize;
- int this_rate, sum_rate = 0, best_rate = INT_MAX;
- int64_t this_dist, sum_dist = 0, best_dist = INT64_MAX;
- int64_t sum_rd = 0;
+ RD_COST this_rdc, sum_rdc, best_rdc;
int do_split = bsize >= BLOCK_8X8;
int do_rect = 1;
@@ -2066,6 +2157,11 @@
assert(num_8x8_blocks_wide_lookup[bsize] ==
num_8x8_blocks_high_lookup[bsize]);
+ vp9_rd_cost_init(&this_rdc);
+ vp9_rd_cost_init(&sum_rdc);
+ vp9_rd_cost_reset(&best_rdc);
+ best_rdc.rdcost = best_rd;
+
set_offsets(cpi, tile, mi_row, mi_col, bsize);
if (bsize == BLOCK_16X16 && cpi->oxcf.aq_mode)
@@ -2157,29 +2253,30 @@
// PARTITION_NONE
if (partition_none_allowed) {
- rd_pick_sb_modes(cpi, tile, mi_row, mi_col, &this_rate, &this_dist, bsize,
- ctx, best_rd, 0);
- if (this_rate != INT_MAX) {
+ rd_pick_sb_modes(cpi, tile, mi_row, mi_col, &this_rdc, bsize, ctx,
+ best_rdc.rdcost);
+ if (this_rdc.rate != INT_MAX) {
if (bsize >= BLOCK_8X8) {
pl = partition_plane_context(xd, mi_row, mi_col, bsize);
- this_rate += cpi->partition_cost[pl][PARTITION_NONE];
+ this_rdc.rate += cpi->partition_cost[pl][PARTITION_NONE];
+ this_rdc.rdcost = RDCOST(x->rdmult, x->rddiv,
+ this_rdc.rate, this_rdc.dist);
}
- sum_rd = RDCOST(x->rdmult, x->rddiv, this_rate, this_dist);
- if (sum_rd < best_rd) {
+ if (this_rdc.rdcost < best_rdc.rdcost) {
int64_t dist_breakout_thr = cpi->sf.partition_search_breakout_dist_thr;
int rate_breakout_thr = cpi->sf.partition_search_breakout_rate_thr;
- best_rate = this_rate;
- best_dist = this_dist;
- best_rd = sum_rd;
+ best_rdc = this_rdc;
if (bsize >= BLOCK_8X8)
pc_tree->partitioning = PARTITION_NONE;
// Adjust dist breakout threshold according to the partition size.
- dist_breakout_thr >>= 8 - (b_width_log2(bsize) +
- b_height_log2(bsize));
+ dist_breakout_thr >>= 8 - (b_width_log2_lookup[bsize] +
+ b_height_log2_lookup[bsize]);
+ rate_breakout_thr *= num_pels_log2_lookup[bsize];
+
// If all y, u, v transform blocks in this partition are skippable, and
// the dist & rate are within the thresholds, the partition search is
// terminated for current branch of the partition search tree.
@@ -2186,8 +2283,8 @@
// The dist & rate thresholds are set to 0 at speed 0 to disable the
// early termination at that speed.
if (!x->e_mbd.lossless &&
- (ctx->skippable && best_dist < dist_breakout_thr &&
- best_rate < rate_breakout_thr)) {
+ (ctx->skippable && best_rdc.dist < dist_breakout_thr &&
+ best_rdc.rate < rate_breakout_thr)) {
do_split = 0;
do_rect = 0;
}
@@ -2247,7 +2344,6 @@
store_pred_mv(x, ctx);
// PARTITION_SPLIT
- sum_rd = 0;
// TODO(jingning): use the motion vectors given by the above search as
// the starting point of motion search in the following partition type check.
if (do_split) {
@@ -2257,14 +2353,12 @@
if (cpi->sf.adaptive_pred_interp_filter && partition_none_allowed)
pc_tree->leaf_split[0]->pred_interp_filter =
ctx->mic.mbmi.interp_filter;
- rd_pick_sb_modes(cpi, tile, mi_row, mi_col, &sum_rate, &sum_dist, subsize,
- pc_tree->leaf_split[0], best_rd, 0);
- if (sum_rate == INT_MAX)
- sum_rd = INT64_MAX;
- else
- sum_rd = RDCOST(x->rdmult, x->rddiv, sum_rate, sum_dist);
+ rd_pick_sb_modes(cpi, tile, mi_row, mi_col, &sum_rdc, subsize,
+ pc_tree->leaf_split[0], best_rdc.rdcost);
+ if (sum_rdc.rate == INT_MAX)
+ sum_rdc.rdcost = INT64_MAX;
} else {
- for (i = 0; i < 4 && sum_rd < best_rd; ++i) {
+ for (i = 0; i < 4 && sum_rdc.rdcost < best_rdc.rdcost; ++i) {
const int x_idx = (i & 1) * mi_step;
const int y_idx = (i >> 1) * mi_step;
@@ -2276,28 +2370,28 @@
pc_tree->split[i]->index = i;
rd_pick_partition(cpi, tile, tp, mi_row + y_idx, mi_col + x_idx,
- subsize, &this_rate, &this_dist,
- best_rd - sum_rd, pc_tree->split[i]);
+ subsize, &this_rdc,
+ best_rdc.rdcost - sum_rdc.rdcost, pc_tree->split[i]);
- if (this_rate == INT_MAX) {
- sum_rd = INT64_MAX;
+ if (this_rdc.rate == INT_MAX) {
+ sum_rdc.rdcost = INT64_MAX;
+ break;
} else {
- sum_rate += this_rate;
- sum_dist += this_dist;
- sum_rd = RDCOST(x->rdmult, x->rddiv, sum_rate, sum_dist);
+ sum_rdc.rate += this_rdc.rate;
+ sum_rdc.dist += this_rdc.dist;
+ sum_rdc.rdcost += this_rdc.rdcost;
}
}
}
- if (sum_rd < best_rd && i == 4) {
+ if (sum_rdc.rdcost < best_rdc.rdcost && i == 4) {
pl = partition_plane_context(xd, mi_row, mi_col, bsize);
- sum_rate += cpi->partition_cost[pl][PARTITION_SPLIT];
- sum_rd = RDCOST(x->rdmult, x->rddiv, sum_rate, sum_dist);
+ sum_rdc.rate += cpi->partition_cost[pl][PARTITION_SPLIT];
+ sum_rdc.rdcost = RDCOST(x->rdmult, x->rddiv,
+ sum_rdc.rate, sum_rdc.dist);
- if (sum_rd < best_rd) {
- best_rate = sum_rate;
- best_dist = sum_dist;
- best_rd = sum_rd;
+ if (sum_rdc.rdcost < best_rdc.rdcost) {
+ best_rdc = sum_rdc;
pc_tree->partitioning = PARTITION_SPLIT;
}
} else {
@@ -2318,11 +2412,11 @@
partition_none_allowed)
pc_tree->horizontal[0].pred_interp_filter =
ctx->mic.mbmi.interp_filter;
- rd_pick_sb_modes(cpi, tile, mi_row, mi_col, &sum_rate, &sum_dist, subsize,
- &pc_tree->horizontal[0], best_rd, 0);
- sum_rd = RDCOST(x->rdmult, x->rddiv, sum_rate, sum_dist);
+ rd_pick_sb_modes(cpi, tile, mi_row, mi_col, &sum_rdc, subsize,
+ &pc_tree->horizontal[0], best_rdc.rdcost);
- if (sum_rd < best_rd && mi_row + mi_step < cm->mi_rows) {
+ if (sum_rdc.rdcost < best_rdc.rdcost && mi_row + mi_step < cm->mi_rows &&
+ bsize > BLOCK_8X8) {
PICK_MODE_CONTEXT *ctx = &pc_tree->horizontal[0];
update_state(cpi, ctx, mi_row, mi_col, subsize, 0);
encode_superblock(cpi, tp, 0, mi_row, mi_col, subsize, ctx);
@@ -2333,25 +2427,24 @@
partition_none_allowed)
pc_tree->horizontal[1].pred_interp_filter =
ctx->mic.mbmi.interp_filter;
- rd_pick_sb_modes(cpi, tile, mi_row + mi_step, mi_col, &this_rate,
- &this_dist, subsize, &pc_tree->horizontal[1],
- best_rd - sum_rd, 1);
- if (this_rate == INT_MAX) {
- sum_rd = INT64_MAX;
+ rd_pick_sb_modes(cpi, tile, mi_row + mi_step, mi_col, &this_rdc,
+ subsize, &pc_tree->horizontal[1],
+ best_rdc.rdcost - sum_rdc.rdcost);
+ if (this_rdc.rate == INT_MAX) {
+ sum_rdc.rdcost = INT64_MAX;
} else {
- sum_rate += this_rate;
- sum_dist += this_dist;
- sum_rd = RDCOST(x->rdmult, x->rddiv, sum_rate, sum_dist);
+ sum_rdc.rate += this_rdc.rate;
+ sum_rdc.dist += this_rdc.dist;
+ sum_rdc.rdcost += this_rdc.rdcost;
}
}
- if (sum_rd < best_rd) {
+
+ if (sum_rdc.rdcost < best_rdc.rdcost) {
pl = partition_plane_context(xd, mi_row, mi_col, bsize);
- sum_rate += cpi->partition_cost[pl][PARTITION_HORZ];
- sum_rd = RDCOST(x->rdmult, x->rddiv, sum_rate, sum_dist);
- if (sum_rd < best_rd) {
- best_rd = sum_rd;
- best_rate = sum_rate;
- best_dist = sum_dist;
+ sum_rdc.rate += cpi->partition_cost[pl][PARTITION_HORZ];
+ sum_rdc.rdcost = RDCOST(x->rdmult, x->rddiv, sum_rdc.rate, sum_rdc.dist);
+ if (sum_rdc.rdcost < best_rdc.rdcost) {
+ best_rdc = sum_rdc;
pc_tree->partitioning = PARTITION_HORZ;
}
}
@@ -2367,10 +2460,10 @@
partition_none_allowed)
pc_tree->vertical[0].pred_interp_filter =
ctx->mic.mbmi.interp_filter;
- rd_pick_sb_modes(cpi, tile, mi_row, mi_col, &sum_rate, &sum_dist, subsize,
- &pc_tree->vertical[0], best_rd, 0);
- sum_rd = RDCOST(x->rdmult, x->rddiv, sum_rate, sum_dist);
- if (sum_rd < best_rd && mi_col + mi_step < cm->mi_cols) {
+ rd_pick_sb_modes(cpi, tile, mi_row, mi_col, &sum_rdc, subsize,
+ &pc_tree->vertical[0], best_rdc.rdcost);
+ if (sum_rdc.rdcost < best_rdc.rdcost && mi_col + mi_step < cm->mi_cols &&
+ bsize > BLOCK_8X8) {
update_state(cpi, &pc_tree->vertical[0], mi_row, mi_col, subsize, 0);
encode_superblock(cpi, tp, 0, mi_row, mi_col, subsize,
&pc_tree->vertical[0]);
@@ -2381,26 +2474,24 @@
partition_none_allowed)
pc_tree->vertical[1].pred_interp_filter =
ctx->mic.mbmi.interp_filter;
- rd_pick_sb_modes(cpi, tile, mi_row, mi_col + mi_step, &this_rate,
- &this_dist, subsize,
- &pc_tree->vertical[1], best_rd - sum_rd,
- 1);
- if (this_rate == INT_MAX) {
- sum_rd = INT64_MAX;
+ rd_pick_sb_modes(cpi, tile, mi_row, mi_col + mi_step, &this_rdc, subsize,
+ &pc_tree->vertical[1], best_rdc.rdcost - sum_rdc.rdcost);
+ if (this_rdc.rate == INT_MAX) {
+ sum_rdc.rdcost = INT64_MAX;
} else {
- sum_rate += this_rate;
- sum_dist += this_dist;
- sum_rd = RDCOST(x->rdmult, x->rddiv, sum_rate, sum_dist);
+ sum_rdc.rate += this_rdc.rate;
+ sum_rdc.dist += this_rdc.dist;
+ sum_rdc.rdcost += this_rdc.rdcost;
}
}
- if (sum_rd < best_rd) {
+
+ if (sum_rdc.rdcost < best_rdc.rdcost) {
pl = partition_plane_context(xd, mi_row, mi_col, bsize);
- sum_rate += cpi->partition_cost[pl][PARTITION_VERT];
- sum_rd = RDCOST(x->rdmult, x->rddiv, sum_rate, sum_dist);
- if (sum_rd < best_rd) {
- best_rate = sum_rate;
- best_dist = sum_dist;
- best_rd = sum_rd;
+ sum_rdc.rate += cpi->partition_cost[pl][PARTITION_VERT];
+ sum_rdc.rdcost = RDCOST(x->rdmult, x->rddiv,
+ sum_rdc.rate, sum_rdc.dist);
+ if (sum_rdc.rdcost < best_rdc.rdcost) {
+ best_rdc = sum_rdc;
pc_tree->partitioning = PARTITION_VERT;
}
}
@@ -2412,10 +2503,11 @@
// point. This code should be refactored so that the duplicate
// checks occur in some sub function and thus are used...
(void) best_rd;
- *rate = best_rate;
- *dist = best_dist;
+ *rd_cost = best_rdc;
- if (best_rate < INT_MAX && best_dist < INT64_MAX && pc_tree->index != 3) {
+
+ if (best_rdc.rate < INT_MAX && best_rdc.dist < INT64_MAX &&
+ pc_tree->index != 3) {
int output_enabled = (bsize == BLOCK_64X64);
// Check the projected output rate for this SB against it's target
@@ -2423,10 +2515,10 @@
// closer to the target.
if ((cpi->oxcf.aq_mode == COMPLEXITY_AQ) && cm->seg.update_map)
vp9_select_in_frame_q_segment(cpi, mi_row, mi_col, output_enabled,
- best_rate);
+ best_rdc.rate);
if (cpi->oxcf.aq_mode == CYCLIC_REFRESH_AQ)
vp9_cyclic_refresh_set_rate_and_dist_sb(cpi->cyclic_refresh,
- best_rate, best_dist);
+ best_rdc.rate, best_rdc.dist);
encode_sb(cpi, tile, tp, mi_row, mi_col, output_enabled, bsize, pc_tree);
}
@@ -2433,8 +2525,8 @@
if (bsize == BLOCK_64X64) {
assert(tp_orig < *tp);
- assert(best_rate < INT_MAX);
- assert(best_dist < INT64_MAX);
+ assert(best_rdc.rate < INT_MAX);
+ assert(best_rdc.dist < INT64_MAX);
} else {
assert(tp_orig == *tp);
}
@@ -2456,9 +2548,16 @@
mi_col += MI_BLOCK_SIZE) {
int dummy_rate;
int64_t dummy_dist;
-
+ RD_COST dummy_rdc;
int i;
+ const int idx_str = cm->mi_stride * mi_row + mi_col;
+ MODE_INFO *mi = cm->mi + idx_str;
+ MODE_INFO *prev_mi = NULL;
+
+ if (cm->frame_type != KEY_FRAME)
+ prev_mi = (cm->prev_mip + cm->mi_stride + 1 + idx_str)->src_mi;
+
if (sf->adaptive_pred_interp_filter) {
for (i = 0; i < 64; ++i)
cpi->leaf_tree[i].pred_interp_filter = SWITCHABLE;
@@ -2477,71 +2576,44 @@
// TODO(yunqingwang): use_lastframe_partitioning is no longer used in good-
// quality encoding. Need to evaluate it in real-time encoding later to
// decide if it can be removed too. And then, do the code cleanup.
- if ((sf->partition_search_type == SEARCH_PARTITION &&
- sf->use_lastframe_partitioning) ||
- sf->partition_search_type == FIXED_PARTITION ||
- sf->partition_search_type == VAR_BASED_PARTITION ||
- sf->partition_search_type == VAR_BASED_FIXED_PARTITION) {
- const int idx_str = cm->mi_stride * mi_row + mi_col;
- MODE_INFO *mi = cm->mi + idx_str;
- MODE_INFO *prev_mi = (cm->prev_mip + cm->mi_stride + 1 + idx_str)->src_mi;
- cpi->mb.source_variance = UINT_MAX;
- if (sf->partition_search_type == FIXED_PARTITION) {
- set_offsets(cpi, tile, mi_row, mi_col, BLOCK_64X64);
- set_fixed_partitioning(cpi, tile, mi, mi_row, mi_col,
- sf->always_this_block_size);
- rd_use_partition(cpi, tile, mi, tp, mi_row, mi_col, BLOCK_64X64,
- &dummy_rate, &dummy_dist, 1, cpi->pc_root);
- } else if (cpi->skippable_frame ||
- sf->partition_search_type == VAR_BASED_FIXED_PARTITION) {
- BLOCK_SIZE bsize;
- set_offsets(cpi, tile, mi_row, mi_col, BLOCK_64X64);
- bsize = get_rd_var_based_fixed_partition(cpi, mi_row, mi_col);
- set_fixed_partitioning(cpi, tile, mi, mi_row, mi_col, bsize);
- rd_use_partition(cpi, tile, mi, tp, mi_row, mi_col, BLOCK_64X64,
- &dummy_rate, &dummy_dist, 1, cpi->pc_root);
- } else if (sf->partition_search_type == VAR_BASED_PARTITION) {
- choose_partitioning(cpi, tile, mi_row, mi_col);
- rd_use_partition(cpi, tile, mi, tp, mi_row, mi_col, BLOCK_64X64,
- &dummy_rate, &dummy_dist, 1, cpi->pc_root);
- } else {
- GF_GROUP * gf_grp = &cpi->twopass.gf_group;
- int last_was_mid_sequence_overlay = 0;
- if ((cpi->oxcf.pass == 2) && (gf_grp->index)) {
- if (gf_grp->update_type[gf_grp->index - 1] == OVERLAY_UPDATE)
- last_was_mid_sequence_overlay = 1;
- }
- if ((cpi->rc.frames_since_key
- % sf->last_partitioning_redo_frequency) == 0
- || last_was_mid_sequence_overlay
- || cm->prev_mi == 0
- || cm->show_frame == 0
- || cm->frame_type == KEY_FRAME
- || cpi->rc.is_src_frame_alt_ref
- || ((sf->use_lastframe_partitioning ==
- LAST_FRAME_PARTITION_LOW_MOTION) &&
- sb_has_motion(cm, prev_mi, sf->lf_motion_threshold))) {
- // If required set upper and lower partition size limits
- if (sf->auto_min_max_partition_size) {
- set_offsets(cpi, tile, mi_row, mi_col, BLOCK_64X64);
- rd_auto_partition_range(cpi, tile, mi_row, mi_col,
- &sf->min_partition_size,
- &sf->max_partition_size);
- }
- rd_pick_partition(cpi, tile, tp, mi_row, mi_col, BLOCK_64X64,
- &dummy_rate, &dummy_dist, INT64_MAX,
- cpi->pc_root);
- } else {
- if (sf->constrain_copy_partition &&
- sb_has_motion(cm, prev_mi, sf->lf_motion_threshold))
- constrain_copy_partitioning(cpi, tile, mi, prev_mi,
- mi_row, mi_col, BLOCK_16X16);
- else
- copy_partitioning(cm, mi, prev_mi);
- rd_use_partition(cpi, tile, mi, tp, mi_row, mi_col, BLOCK_64X64,
- &dummy_rate, &dummy_dist, 1, cpi->pc_root);
- }
- }
+ cpi->mb.source_variance = UINT_MAX;
+ if (sf->partition_search_type == FIXED_PARTITION) {
+ set_offsets(cpi, tile, mi_row, mi_col, BLOCK_64X64);
+ set_fixed_partitioning(cpi, tile, mi, mi_row, mi_col,
+ sf->always_this_block_size);
+ rd_use_partition(cpi, tile, mi, tp, mi_row, mi_col, BLOCK_64X64,
+ &dummy_rate, &dummy_dist, 1, cpi->pc_root);
+ } else if (cpi->partition_search_skippable_frame) {
+ BLOCK_SIZE bsize;
+ set_offsets(cpi, tile, mi_row, mi_col, BLOCK_64X64);
+ bsize = get_rd_var_based_fixed_partition(cpi, mi_row, mi_col);
+ set_fixed_partitioning(cpi, tile, mi, mi_row, mi_col, bsize);
+ rd_use_partition(cpi, tile, mi, tp, mi_row, mi_col, BLOCK_64X64,
+ &dummy_rate, &dummy_dist, 1, cpi->pc_root);
+ } else if (sf->partition_search_type == VAR_BASED_PARTITION &&
+ cm->frame_type != KEY_FRAME ) {
+ choose_partitioning(cpi, tile, mi_row, mi_col);
+ rd_use_partition(cpi, tile, mi, tp, mi_row, mi_col, BLOCK_64X64,
+ &dummy_rate, &dummy_dist, 1, cpi->pc_root);
+ } else if (sf->partition_search_type == SEARCH_PARTITION &&
+ sf->use_lastframe_partitioning &&
+ (cpi->rc.frames_since_key %
+ sf->last_partitioning_redo_frequency) &&
+ cm->prev_mi &&
+ cm->show_frame &&
+ cm->frame_type != KEY_FRAME &&
+ !cpi->rc.is_src_frame_alt_ref &&
+ ((sf->use_lastframe_partitioning !=
+ LAST_FRAME_PARTITION_LOW_MOTION) ||
+ !sb_has_motion(cm, prev_mi, sf->lf_motion_threshold))) {
+ if (sf->constrain_copy_partition &&
+ sb_has_motion(cm, prev_mi, sf->lf_motion_threshold))
+ constrain_copy_partitioning(cpi, tile, mi, prev_mi,
+ mi_row, mi_col, BLOCK_16X16);
+ else
+ copy_partitioning(cm, mi, prev_mi);
+ rd_use_partition(cpi, tile, mi, tp, mi_row, mi_col, BLOCK_64X64,
+ &dummy_rate, &dummy_dist, 1, cpi->pc_root);
} else {
// If required set upper and lower partition size limits
if (sf->auto_min_max_partition_size) {
@@ -2551,7 +2623,7 @@
&sf->max_partition_size);
}
rd_pick_partition(cpi, tile, tp, mi_row, mi_col, BLOCK_64X64,
- &dummy_rate, &dummy_dist, INT64_MAX, cpi->pc_root);
+ &dummy_rdc, INT64_MAX, cpi->pc_root);
}
}
}
@@ -2652,7 +2724,7 @@
BLOCK_SIZE bsize, BLOCK_SIZE subsize,
PC_TREE *pc_tree) {
MACROBLOCKD *xd = &x->e_mbd;
- int bsl = b_width_log2(bsize), hbs = (1 << bsl) / 4;
+ int bsl = b_width_log2_lookup[bsize], hbs = (1 << bsl) / 4;
PARTITION_TYPE partition = pc_tree->partitioning;
assert(bsize >= BLOCK_8X8);
@@ -2771,9 +2843,14 @@
this_rate += cpi->partition_cost[pl][PARTITION_NONE];
sum_rd = RDCOST(x->rdmult, x->rddiv, this_rate, this_dist);
if (sum_rd < best_rd) {
- int64_t stop_thresh = 4096;
- int64_t stop_thresh_rd;
+ int dist_breakout_thr = sf->partition_search_breakout_dist_thr;
+ int64_t rate_breakout_thr = sf->partition_search_breakout_rate_thr;
+ dist_breakout_thr >>= 8 - (b_width_log2_lookup[bsize] +
+ b_height_log2_lookup[bsize]);
+
+ rate_breakout_thr *= num_pels_log2_lookup[bsize];
+
best_rate = this_rate;
best_dist = this_dist;
best_rd = sum_rd;
@@ -2780,14 +2857,9 @@
if (bsize >= BLOCK_8X8)
pc_tree->partitioning = PARTITION_NONE;
- // Adjust threshold according to partition size.
- stop_thresh >>= 8 - (b_width_log2(bsize) +
- b_height_log2(bsize));
-
- stop_thresh_rd = RDCOST(x->rdmult, x->rddiv, 0, stop_thresh);
- // If obtained distortion is very small, choose current partition
- // and stop splitting.
- if (!x->e_mbd.lossless && best_rd < stop_thresh_rd) {
+ if (!x->e_mbd.lossless &&
+ this_rate < rate_breakout_thr &&
+ this_dist < dist_breakout_thr) {
do_split = 0;
do_rect = 0;
}
@@ -2974,7 +3046,7 @@
VP9_COMMON *const cm = &cpi->common;
MACROBLOCK *const x = &cpi->mb;
MACROBLOCKD *const xd = &x->e_mbd;
- const int bsl = b_width_log2(bsize), hbs = (1 << bsl) / 4;
+ const int bsl = b_width_log2_lookup[bsize], hbs = (1 << bsl) / 4;
const int mis = cm->mi_stride;
PARTITION_TYPE partition;
BLOCK_SIZE subsize;
@@ -3095,7 +3167,6 @@
int64_t dummy_dist = 0;
const int idx_str = cm->mi_stride * mi_row + mi_col;
MODE_INFO *mi = cm->mi + idx_str;
- MODE_INFO *prev_mi = (cm->prev_mip + cm->mi_stride + 1 + idx_str)->src_mi;
BLOCK_SIZE bsize;
x->in_static_area = 0;
x->source_variance = UINT_MAX;
@@ -3113,7 +3184,6 @@
nonrd_use_partition(cpi, tile, mi, tp, mi_row, mi_col, BLOCK_64X64,
1, &dummy_rate, &dummy_dist, cpi->pc_root);
break;
- case VAR_BASED_FIXED_PARTITION:
case FIXED_PARTITION:
bsize = sf->partition_search_type == FIXED_PARTITION ?
sf->always_this_block_size :
@@ -3133,7 +3203,7 @@
&dummy_rate, &dummy_dist, 1, INT64_MAX,
cpi->pc_root);
} else {
- copy_partitioning(cm, mi, prev_mi);
+ choose_partitioning(cpi, tile, mi_row, mi_col);
nonrd_use_partition(cpi, tile, mi, tp, mi_row, mi_col,
BLOCK_64X64, 1, &dummy_rate, &dummy_dist,
cpi->pc_root);
@@ -3170,9 +3240,34 @@
for (i = 0; i < cm->mb_rows; i++) {
for (j = 0; j < cm->mb_cols; j++) {
+#if CONFIG_VP9_HIGHBITDEPTH
+ if (cm->use_highbitdepth) {
+ switch (cm->bit_depth) {
+ case VPX_BITS_8:
+ vp9_highbd_get16x16var(src, src_stride, last_src, last_stride,
+ &var16->sse, &var16->sum);
+ break;
+ case VPX_BITS_10:
+ vp9_highbd_10_get16x16var(src, src_stride, last_src, last_stride,
+ &var16->sse, &var16->sum);
+ break;
+ case VPX_BITS_12:
+ vp9_highbd_12_get16x16var(src, src_stride, last_src, last_stride,
+ &var16->sse, &var16->sum);
+ break;
+ default:
+ assert(0 && "cm->bit_depth should be VPX_BITS_8, VPX_BITS_10"
+ " or VPX_BITS_12");
+ return -1;
+ }
+ } else {
+ vp9_get16x16var(src, src_stride, last_src, last_stride,
+ &var16->sse, &var16->sum);
+ }
+#else
vp9_get16x16var(src, src_stride, last_src, last_stride,
&var16->sse, &var16->sum);
-
+#endif // CONFIG_VP9_HIGHBITDEPTH
var16->var = var16->sse -
(((uint32_t)var16->sum * var16->sum) >> 8);
@@ -3252,25 +3347,39 @@
const VP9_COMMON *const cm = &cpi->common;
const int tile_cols = 1 << cm->log2_tile_cols;
const int tile_rows = 1 << cm->log2_tile_rows;
+
int tile_col, tile_row;
- TOKENEXTRA *tok = cpi->tok;
+ TileInfo tile[4][1 << 6];
+ TOKENEXTRA *tok[4][1 << 6];
+ TOKENEXTRA *pre_tok = cpi->tok;
+ int tile_tok = 0;
for (tile_row = 0; tile_row < tile_rows; ++tile_row) {
for (tile_col = 0; tile_col < tile_cols; ++tile_col) {
- TileInfo tile;
- TOKENEXTRA *old_tok = tok;
+ vp9_tile_init(&tile[tile_row][tile_col], cm, tile_row, tile_col);
+
+ tok[tile_row][tile_col] = pre_tok + tile_tok;
+ pre_tok = tok[tile_row][tile_col];
+ tile_tok = allocated_tokens(tile[tile_row][tile_col]);
+ }
+ }
+
+ for (tile_row = 0; tile_row < tile_rows; ++tile_row) {
+ for (tile_col = 0; tile_col < tile_cols; ++tile_col) {
+ const TileInfo * const ptile = &tile[tile_row][tile_col];
+ TOKENEXTRA * const old_tok = tok[tile_row][tile_col];
int mi_row;
- vp9_tile_init(&tile, cm, tile_row, tile_col);
- for (mi_row = tile.mi_row_start; mi_row < tile.mi_row_end;
+ for (mi_row = ptile->mi_row_start; mi_row < ptile->mi_row_end;
mi_row += MI_BLOCK_SIZE) {
if (cpi->sf.use_nonrd_pick_mode && !frame_is_intra_only(cm))
- encode_nonrd_sb_row(cpi, &tile, mi_row, &tok);
+ encode_nonrd_sb_row(cpi, ptile, mi_row, &tok[tile_row][tile_col]);
else
- encode_rd_sb_row(cpi, &tile, mi_row, &tok);
+ encode_rd_sb_row(cpi, ptile, mi_row, &tok[tile_row][tile_col]);
}
- cpi->tok_count[tile_row][tile_col] = (unsigned int)(tok - old_tok);
- assert(tok - cpi->tok <= get_token_alloc(cm->mb_rows, cm->mb_cols));
+ cpi->tok_count[tile_row][tile_col] =
+ (unsigned int)(tok[tile_row][tile_col] - old_tok);
+ assert(tok[tile_row][tile_col] - old_tok <= allocated_tokens(*ptile));
}
}
}
@@ -3314,7 +3423,16 @@
cm->tx_mode = select_tx_mode(cpi);
+#if CONFIG_VP9_HIGHBITDEPTH
+ if (cm->use_highbitdepth)
+ x->fwd_txm4x4 = xd->lossless ? vp9_fwht4x4 : vp9_fdct4x4;
+ else
+ x->fwd_txm4x4 = xd->lossless ? vp9_highbd_fwht4x4 : vp9_highbd_fdct4x4;
+ x->highbd_itxm_add = xd->lossless ? vp9_highbd_iwht4x4_add :
+ vp9_highbd_idct4x4_add;
+#else
x->fwd_txm4x4 = xd->lossless ? vp9_fwht4x4 : vp9_fdct4x4;
+#endif // CONFIG_VP9_HIGHBITDEPTH
x->itxm_add = xd->lossless ? vp9_iwht4x4_add : vp9_idct4x4_add;
if (xd->lossless) {
« no previous file with comments | « source/libvpx/vp9/encoder/vp9_denoiser.c ('k') | source/libvpx/vp9/encoder/vp9_encodemb.h » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698