Index: source/libvpx/vp9/encoder/vp9_mcomp.c |
=================================================================== |
--- source/libvpx/vp9/encoder/vp9_mcomp.c (revision 291857) |
+++ source/libvpx/vp9/encoder/vp9_mcomp.c (working copy) |
@@ -256,6 +256,137 @@ |
} \ |
} |
+#define SETUP_SUBPEL_SEARCH \ |
+ const uint8_t *const z = x->plane[0].src.buf; \ |
+ const int src_stride = x->plane[0].src.stride; \ |
+ const MACROBLOCKD *xd = &x->e_mbd; \ |
+ unsigned int besterr = INT_MAX; \ |
+ unsigned int sse; \ |
+ unsigned int whichdir; \ |
+ int thismse; \ |
+ const unsigned int halfiters = iters_per_step; \ |
+ const unsigned int quarteriters = iters_per_step; \ |
+ const unsigned int eighthiters = iters_per_step; \ |
+ const int y_stride = xd->plane[0].pre[0].stride; \ |
+ const int offset = bestmv->row * y_stride + bestmv->col; \ |
+ const uint8_t *const y = xd->plane[0].pre[0].buf; \ |
+ \ |
+ int rr = ref_mv->row; \ |
+ int rc = ref_mv->col; \ |
+ int br = bestmv->row * 8; \ |
+ int bc = bestmv->col * 8; \ |
+ int hstep = 4; \ |
+ const int minc = MAX(x->mv_col_min * 8, ref_mv->col - MV_MAX); \ |
+ const int maxc = MIN(x->mv_col_max * 8, ref_mv->col + MV_MAX); \ |
+ const int minr = MAX(x->mv_row_min * 8, ref_mv->row - MV_MAX); \ |
+ const int maxr = MIN(x->mv_row_max * 8, ref_mv->row + MV_MAX); \ |
+ int tr = br; \ |
+ int tc = bc; \ |
+ \ |
+ bestmv->row *= 8; \ |
+ bestmv->col *= 8; \ |
+ if (second_pred != NULL) { \ |
+ DECLARE_ALIGNED_ARRAY(16, uint8_t, comp_pred, 64 * 64); \ |
+ vp9_comp_avg_pred(comp_pred, second_pred, w, h, y + offset, y_stride); \ |
+ besterr = vfp->vf(comp_pred, w, z, src_stride, sse1); \ |
+ } else { \ |
+ besterr = vfp->vf(y + offset, y_stride, z, src_stride, sse1); \ |
+ } \ |
+ *distortion = besterr; \ |
+ besterr += mv_err_cost(bestmv, ref_mv, mvjcost, mvcost, error_per_bit); |
+ |
+int vp9_find_best_sub_pixel_tree_pruned(const MACROBLOCK *x, |
+ MV *bestmv, const MV *ref_mv, |
+ int allow_hp, |
+ int error_per_bit, |
+ const vp9_variance_fn_ptr_t *vfp, |
+ int forced_stop, |
+ int iters_per_step, |
+ int *sad_list, |
+ int *mvjcost, int *mvcost[2], |
+ int *distortion, |
+ unsigned int *sse1, |
+ const uint8_t *second_pred, |
+ int w, int h) { |
+ SETUP_SUBPEL_SEARCH; |
+ |
+ if (sad_list && |
+ sad_list[0] != INT_MAX && sad_list[1] != INT_MAX && |
+ sad_list[2] != INT_MAX && sad_list[3] != INT_MAX && |
+ sad_list[4] != INT_MAX) { |
+ unsigned int left, right, up, down, diag; |
+ whichdir = (sad_list[1] < sad_list[3] ? 0 : 1) + |
+ (sad_list[2] < sad_list[4] ? 0 : 2); |
+ switch (whichdir) { |
+ case 0: |
+ CHECK_BETTER(left, tr, tc - hstep); |
+ CHECK_BETTER(up, tr - hstep, tc); |
+ CHECK_BETTER(diag, tr - hstep, tc - hstep); |
+ break; |
+ case 1: |
+ CHECK_BETTER(right, tr, tc + hstep); |
+ CHECK_BETTER(up, tr - hstep, tc); |
+ CHECK_BETTER(diag, tr - hstep, tc + hstep); |
+ break; |
+ case 2: |
+ CHECK_BETTER(left, tr, tc - hstep); |
+ CHECK_BETTER(down, tr + hstep, tc); |
+ CHECK_BETTER(diag, tr + hstep, tc - hstep); |
+ break; |
+ case 3: |
+ CHECK_BETTER(right, tr, tc + hstep); |
+ CHECK_BETTER(down, tr + hstep, tc); |
+ CHECK_BETTER(diag, tr + hstep, tc + hstep); |
+ break; |
+ } |
+ } else { |
+ FIRST_LEVEL_CHECKS; |
+ if (halfiters > 1) { |
+ SECOND_LEVEL_CHECKS; |
+ } |
+ } |
+ |
+ tr = br; |
+ tc = bc; |
+ |
+ // Each subsequent iteration checks at least one point in common with |
+ // the last iteration could be 2 ( if diag selected) 1/4 pel |
+ |
+ // Note forced_stop: 0 - full, 1 - qtr only, 2 - half only |
+ if (forced_stop != 2) { |
+ hstep >>= 1; |
+ FIRST_LEVEL_CHECKS; |
+ if (quarteriters > 1) { |
+ SECOND_LEVEL_CHECKS; |
+ } |
+ tr = br; |
+ tc = bc; |
+ } |
+ |
+ if (allow_hp && vp9_use_mv_hp(ref_mv) && forced_stop == 0) { |
+ hstep >>= 1; |
+ FIRST_LEVEL_CHECKS; |
+ if (eighthiters > 1) { |
+ SECOND_LEVEL_CHECKS; |
+ } |
+ tr = br; |
+ tc = bc; |
+ } |
+ // These lines insure static analysis doesn't warn that |
+ // tr and tc aren't used after the above point. |
+ (void) tr; |
+ (void) tc; |
+ |
+ bestmv->row = br; |
+ bestmv->col = bc; |
+ |
+ if ((abs(bestmv->col - ref_mv->col) > (MAX_FULL_PEL_VAL << 3)) || |
+ (abs(bestmv->row - ref_mv->row) > (MAX_FULL_PEL_VAL << 3))) |
+ return INT_MAX; |
+ |
+ return besterr; |
+} |
+ |
int vp9_find_best_sub_pixel_tree(const MACROBLOCK *x, |
MV *bestmv, const MV *ref_mv, |
int allow_hp, |
@@ -263,56 +394,15 @@ |
const vp9_variance_fn_ptr_t *vfp, |
int forced_stop, |
int iters_per_step, |
+ int *sad_list, |
int *mvjcost, int *mvcost[2], |
int *distortion, |
unsigned int *sse1, |
const uint8_t *second_pred, |
int w, int h) { |
- const uint8_t *const z = x->plane[0].src.buf; |
- const int src_stride = x->plane[0].src.stride; |
- const MACROBLOCKD *xd = &x->e_mbd; |
- unsigned int besterr = INT_MAX; |
- unsigned int sse; |
- unsigned int whichdir; |
- int thismse; |
- const unsigned int halfiters = iters_per_step; |
- const unsigned int quarteriters = iters_per_step; |
- const unsigned int eighthiters = iters_per_step; |
+ SETUP_SUBPEL_SEARCH; |
+ (void) sad_list; // to silence compiler warning |
- const int y_stride = xd->plane[0].pre[0].stride; |
- const int offset = bestmv->row * y_stride + bestmv->col; |
- const uint8_t *const y = xd->plane[0].pre[0].buf; |
- |
- int rr = ref_mv->row; |
- int rc = ref_mv->col; |
- int br = bestmv->row * 8; |
- int bc = bestmv->col * 8; |
- int hstep = 4; |
- const int minc = MAX(x->mv_col_min * 8, ref_mv->col - MV_MAX); |
- const int maxc = MIN(x->mv_col_max * 8, ref_mv->col + MV_MAX); |
- const int minr = MAX(x->mv_row_min * 8, ref_mv->row - MV_MAX); |
- const int maxr = MIN(x->mv_row_max * 8, ref_mv->row + MV_MAX); |
- |
- int tr = br; |
- int tc = bc; |
- |
- // central mv |
- bestmv->row *= 8; |
- bestmv->col *= 8; |
- |
- // calculate central point error |
- // TODO(yunqingwang): central pointer error was already calculated in full- |
- // pixel search, and can be passed in this function. |
- if (second_pred != NULL) { |
- DECLARE_ALIGNED_ARRAY(16, uint8_t, comp_pred, 64 * 64); |
- vp9_comp_avg_pred(comp_pred, second_pred, w, h, y + offset, y_stride); |
- besterr = vfp->vf(comp_pred, w, z, src_stride, sse1); |
- } else { |
- besterr = vfp->vf(y + offset, y_stride, z, src_stride, sse1); |
- } |
- *distortion = besterr; |
- besterr += mv_err_cost(bestmv, ref_mv, mvjcost, mvcost, error_per_bit); |
- |
// Each subsequent iteration checks at least one point in |
// common with the last iteration could be 2 ( if diag selected) |
// 1/2 pel |
@@ -398,14 +488,17 @@ |
// Each scale can have a different number of candidates and shape of |
// candidates as indicated in the num_candidates and candidates arrays |
// passed into this function |
+// |
static int vp9_pattern_search(const MACROBLOCK *x, |
MV *ref_mv, |
int search_param, |
int sad_per_bit, |
- int do_init_search, int do_refine, |
+ int do_init_search, |
+ int *sad_list, |
const vp9_variance_fn_ptr_t *vfp, |
int use_mvcost, |
- const MV *center_mv, MV *best_mv, |
+ const MV *center_mv, |
+ MV *best_mv, |
const int num_candidates[MAX_PATTERN_SCALES], |
const MV candidates[MAX_PATTERN_SCALES] |
[MAX_PATTERN_CANDIDATES]) { |
@@ -413,7 +506,7 @@ |
static const int search_param_to_steps[MAX_MVSEARCH_STEPS] = { |
10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0, |
}; |
- int i, j, s, t; |
+ int i, s, t; |
const struct buf_2d *const what = &x->plane[0].src; |
const struct buf_2d *const in_what = &xd->plane[0].pre[0]; |
int br, bc; |
@@ -552,47 +645,38 @@ |
} while (s--); |
} |
- // Check 4 1-away neighbors if do_refine is true. |
- // For most well-designed schemes do_refine will not be necessary. |
- if (do_refine) { |
- static const MV neighbors[4] = {{0, -1}, { -1, 0}, {1, 0}, {0, 1}}; |
- |
- for (j = 0; j < 16; j++) { |
- int best_site = -1; |
- if (check_bounds(x, br, bc, 1)) { |
- for (i = 0; i < 4; i++) { |
- const MV this_mv = {br + neighbors[i].row, |
- bc + neighbors[i].col}; |
- thissad = vfp->sdf(what->buf, what->stride, |
- get_buf_from_mv(in_what, &this_mv), |
- in_what->stride); |
- CHECK_BETTER |
- } |
- } else { |
- for (i = 0; i < 4; i++) { |
- const MV this_mv = {br + neighbors[i].row, |
- bc + neighbors[i].col}; |
- if (!is_mv_in(x, &this_mv)) |
- continue; |
- thissad = vfp->sdf(what->buf, what->stride, |
- get_buf_from_mv(in_what, &this_mv), |
- in_what->stride); |
- CHECK_BETTER |
- } |
+ // Returns the one-away integer pel sad values around the best as follows: |
+ // sad_list[0]: sad at the best integer pel |
+ // sad_list[1]: sad at delta {0, -1} (left) from the best integer pel |
+ // sad_list[2]: sad at delta {-1, 0} (top) from the best integer pel |
+ // sad_list[3]: sad at delta { 0, 1} (right) from the best integer pel |
+ // sad_list[4]: sad at delta { 1, 0} (bottom) from the best integer pel |
+ if (sad_list) { |
+ static const MV neighbors[4] = {{0, -1}, {-1, 0}, {0, 1}, {1, 0}}; |
+ sad_list[0] = bestsad; |
+ if (check_bounds(x, br, bc, 1)) { |
+ for (i = 0; i < 4; i++) { |
+ const MV this_mv = {br + neighbors[i].row, |
+ bc + neighbors[i].col}; |
+ sad_list[i + 1] = vfp->sdf(what->buf, what->stride, |
+ get_buf_from_mv(in_what, &this_mv), |
+ in_what->stride); |
} |
- |
- if (best_site == -1) { |
- break; |
- } else { |
- br += neighbors[best_site].row; |
- bc += neighbors[best_site].col; |
+ } else { |
+ for (i = 0; i < 4; i++) { |
+ const MV this_mv = {br + neighbors[i].row, |
+ bc + neighbors[i].col}; |
+ if (!is_mv_in(x, &this_mv)) |
+ sad_list[i + 1] = INT_MAX; |
+ else |
+ sad_list[i + 1] = vfp->sdf(what->buf, what->stride, |
+ get_buf_from_mv(in_what, &this_mv), |
+ in_what->stride); |
} |
} |
} |
- |
best_mv->row = br; |
best_mv->col = bc; |
- |
return bestsad; |
} |
@@ -634,6 +718,7 @@ |
int search_param, |
int sad_per_bit, |
int do_init_search, |
+ int *sad_list, |
const vp9_variance_fn_ptr_t *vfp, |
int use_mvcost, |
const MV *center_mv, MV *best_mv) { |
@@ -658,7 +743,7 @@ |
{ -1024, 0}}, |
}; |
return vp9_pattern_search(x, ref_mv, search_param, sad_per_bit, |
- do_init_search, 0, vfp, use_mvcost, |
+ do_init_search, sad_list, vfp, use_mvcost, |
center_mv, best_mv, |
hex_num_candidates, hex_candidates); |
} |
@@ -668,6 +753,7 @@ |
int search_param, |
int sad_per_bit, |
int do_init_search, |
+ int *sad_list, |
const vp9_variance_fn_ptr_t *vfp, |
int use_mvcost, |
const MV *center_mv, |
@@ -699,7 +785,7 @@ |
{-512, 512}, {-1024, 0}}, |
}; |
return vp9_pattern_search(x, ref_mv, search_param, sad_per_bit, |
- do_init_search, 0, vfp, use_mvcost, |
+ do_init_search, sad_list, vfp, use_mvcost, |
center_mv, best_mv, |
bigdia_num_candidates, bigdia_candidates); |
} |
@@ -709,6 +795,7 @@ |
int search_param, |
int sad_per_bit, |
int do_init_search, |
+ int *sad_list, |
const vp9_variance_fn_ptr_t *vfp, |
int use_mvcost, |
const MV *center_mv, |
@@ -740,7 +827,7 @@ |
{0, 1024}, {-1024, 1024}, {-1024, 0}}, |
}; |
return vp9_pattern_search(x, ref_mv, search_param, sad_per_bit, |
- do_init_search, 0, vfp, use_mvcost, |
+ do_init_search, sad_list, vfp, use_mvcost, |
center_mv, best_mv, |
square_num_candidates, square_candidates); |
} |
@@ -750,12 +837,13 @@ |
int search_param, |
int sad_per_bit, |
int do_init_search, // must be zero for fast_hex |
+ int *sad_list, |
const vp9_variance_fn_ptr_t *vfp, |
int use_mvcost, |
const MV *center_mv, |
MV *best_mv) { |
return vp9_hex_search(x, ref_mv, MAX(MAX_MVSEARCH_STEPS - 2, search_param), |
- sad_per_bit, do_init_search, vfp, use_mvcost, |
+ sad_per_bit, do_init_search, sad_list, vfp, use_mvcost, |
center_mv, best_mv); |
} |
@@ -764,13 +852,14 @@ |
int search_param, |
int sad_per_bit, |
int do_init_search, |
+ int *sad_list, |
const vp9_variance_fn_ptr_t *vfp, |
int use_mvcost, |
const MV *center_mv, |
MV *best_mv) { |
return vp9_bigdia_search(x, ref_mv, MAX(MAX_MVSEARCH_STEPS - 2, search_param), |
- sad_per_bit, do_init_search, vfp, use_mvcost, |
- center_mv, best_mv); |
+ sad_per_bit, do_init_search, sad_list, vfp, |
+ use_mvcost, center_mv, best_mv); |
} |
#undef CHECK_BETTER |
@@ -1368,33 +1457,41 @@ |
int vp9_full_pixel_search(VP9_COMP *cpi, MACROBLOCK *x, |
BLOCK_SIZE bsize, MV *mvp_full, |
int step_param, int error_per_bit, |
+ int *sad_list, |
const MV *ref_mv, MV *tmp_mv, |
int var_max, int rd) { |
const SPEED_FEATURES *const sf = &cpi->sf; |
const SEARCH_METHODS method = sf->mv.search_method; |
vp9_variance_fn_ptr_t *fn_ptr = &cpi->fn_ptr[bsize]; |
int var = 0; |
+ if (sad_list) { |
+ sad_list[0] = INT_MAX; |
+ sad_list[1] = INT_MAX; |
+ sad_list[2] = INT_MAX; |
+ sad_list[3] = INT_MAX; |
+ sad_list[4] = INT_MAX; |
+ } |
switch (method) { |
case FAST_DIAMOND: |
var = vp9_fast_dia_search(x, mvp_full, step_param, error_per_bit, 0, |
- fn_ptr, 1, ref_mv, tmp_mv); |
+ sad_list, fn_ptr, 1, ref_mv, tmp_mv); |
break; |
case FAST_HEX: |
var = vp9_fast_hex_search(x, mvp_full, step_param, error_per_bit, 0, |
- fn_ptr, 1, ref_mv, tmp_mv); |
+ sad_list, fn_ptr, 1, ref_mv, tmp_mv); |
break; |
case HEX: |
var = vp9_hex_search(x, mvp_full, step_param, error_per_bit, 1, |
- fn_ptr, 1, ref_mv, tmp_mv); |
+ sad_list, fn_ptr, 1, ref_mv, tmp_mv); |
break; |
case SQUARE: |
var = vp9_square_search(x, mvp_full, step_param, error_per_bit, 1, |
- fn_ptr, 1, ref_mv, tmp_mv); |
+ sad_list, fn_ptr, 1, ref_mv, tmp_mv); |
break; |
case BIGDIA: |
var = vp9_bigdia_search(x, mvp_full, step_param, error_per_bit, 1, |
- fn_ptr, 1, ref_mv, tmp_mv); |
+ sad_list, fn_ptr, 1, ref_mv, tmp_mv); |
break; |
case NSTEP: |
var = vp9_full_pixel_diamond(cpi, x, mvp_full, step_param, error_per_bit, |