Index: source/libvpx/vp9/encoder/vp9_mcomp.c |
=================================================================== |
--- source/libvpx/vp9/encoder/vp9_mcomp.c (revision 293081) |
+++ source/libvpx/vp9/encoder/vp9_mcomp.c (working copy) |
@@ -590,6 +590,13 @@ |
return besterr; |
} |
+const MV search_step_table[12] = { |
+ // left, right, up, down |
+ {0, -4}, {0, 4}, {-4, 0}, {4, 0}, |
+ {0, -2}, {0, 2}, {-2, 0}, {2, 0}, |
+ {0, -1}, {0, 1}, {-1, 0}, {1, 0} |
+}; |
+ |
int vp9_find_best_sub_pixel_tree(const MACROBLOCK *x, |
MV *bestmv, const MV *ref_mv, |
int allow_hp, |
@@ -603,43 +610,134 @@ |
unsigned int *sse1, |
const uint8_t *second_pred, |
int w, int h) { |
- SETUP_SUBPEL_SEARCH; |
- SETUP_CENTER_ERROR; |
- (void) cost_list; // to silence compiler warning |
+ const uint8_t *const z = x->plane[0].src.buf; |
+ const uint8_t *const src_address = z; |
+ const int src_stride = x->plane[0].src.stride; |
+ const MACROBLOCKD *xd = &x->e_mbd; |
+ unsigned int besterr = INT_MAX; |
+ unsigned int sse; |
+ unsigned int whichdir = 0; |
+ int thismse; |
+ const int y_stride = xd->plane[0].pre[0].stride; |
+ const int offset = bestmv->row * y_stride + bestmv->col; |
+ const uint8_t *const y = xd->plane[0].pre[0].buf; |
- // Each subsequent iteration checks at least one point in |
- // common with the last iteration could be 2 ( if diag selected) |
- // 1/2 pel |
- FIRST_LEVEL_CHECKS; |
- if (halfiters > 1) { |
- SECOND_LEVEL_CHECKS; |
+ int rr = ref_mv->row; |
+ int rc = ref_mv->col; |
+ int br = bestmv->row * 8; |
+ int bc = bestmv->col * 8; |
+ int hstep = 4; |
+ int iter, round = 3 - forced_stop; |
+ const int minc = MAX(x->mv_col_min * 8, ref_mv->col - MV_MAX); |
+ const int maxc = MIN(x->mv_col_max * 8, ref_mv->col + MV_MAX); |
+ const int minr = MAX(x->mv_row_min * 8, ref_mv->row - MV_MAX); |
+ const int maxr = MIN(x->mv_row_max * 8, ref_mv->row + MV_MAX); |
+ int tr = br; |
+ int tc = bc; |
+ const MV *search_step = search_step_table; |
+ int idx, best_idx = -1; |
+ unsigned int cost_array[5]; |
+ |
+ if (!(allow_hp && vp9_use_mv_hp(ref_mv))) |
+ if (round == 3) |
+ round = 2; |
+ |
+ bestmv->row *= 8; |
+ bestmv->col *= 8; |
+ |
+ if (second_pred != NULL) { |
+ DECLARE_ALIGNED_ARRAY(16, uint8_t, comp_pred, 64 * 64); |
+ vp9_comp_avg_pred(comp_pred, second_pred, w, h, y + offset, y_stride); |
+ besterr = vfp->vf(comp_pred, w, src_address, src_stride, sse1); |
+ } else { |
+ besterr = vfp->vf(y + offset, y_stride, src_address, src_stride, sse1); |
} |
- tr = br; |
- tc = bc; |
+ *distortion = besterr; |
+ besterr += mv_err_cost(bestmv, ref_mv, mvjcost, mvcost, error_per_bit); |
- // Each subsequent iteration checks at least one point in common with |
- // the last iteration could be 2 ( if diag selected) 1/4 pel |
+ (void) cost_list; // to silence compiler warning |
- // Note forced_stop: 0 - full, 1 - qtr only, 2 - half only |
- if (forced_stop != 2) { |
- hstep >>= 1; |
- FIRST_LEVEL_CHECKS; |
- if (quarteriters > 1) { |
+ for (iter = 0; iter < round; ++iter) { |
+ // Check vertical and horizontal sub-pixel positions. |
+ for (idx = 0; idx < 4; ++idx) { |
+ tr = br + search_step[idx].row; |
+ tc = bc + search_step[idx].col; |
+ if (tc >= minc && tc <= maxc && tr >= minr && tr <= maxr) { |
+ const uint8_t *const pre_address = y + (tr >> 3) * y_stride + (tc >> 3); |
+ int row_offset = (tr & 0x07) << 1; |
+ int col_offset = (tc & 0x07) << 1; |
+ MV this_mv; |
+ this_mv.row = tr; |
+ this_mv.col = tc; |
+ if (second_pred == NULL) |
+ thismse = vfp->svf(pre_address, y_stride, col_offset, row_offset, |
+ src_address, src_stride, &sse); |
+ else |
+ thismse = vfp->svaf(pre_address, y_stride, col_offset, row_offset, |
+ src_address, src_stride, &sse, second_pred); |
+ cost_array[idx] = thismse + |
+ mv_err_cost(&this_mv, ref_mv, mvjcost, mvcost, error_per_bit); |
+ |
+ if (cost_array[idx] < besterr) { |
+ best_idx = idx; |
+ besterr = cost_array[idx]; |
+ *distortion = thismse; |
+ *sse1 = sse; |
+ } |
+ } else { |
+ cost_array[idx] = INT_MAX; |
+ } |
+ } |
+ |
+ // Check diagonal sub-pixel position |
+ tc = bc + (cost_array[0] < cost_array[1] ? -hstep : hstep); |
+ tr = br + (cost_array[2] < cost_array[3] ? -hstep : hstep); |
+ if (tc >= minc && tc <= maxc && tr >= minr && tr <= maxr) { |
+ const uint8_t *const pre_address = y + (tr >> 3) * y_stride + (tc >> 3); |
+ int row_offset = (tr & 0x07) << 1; |
+ int col_offset = (tc & 0x07) << 1; |
+ MV this_mv = {tr, tc}; |
+ if (second_pred == NULL) |
+ thismse = vfp->svf(pre_address, y_stride, col_offset, row_offset, |
+ src_address, src_stride, &sse); |
+ else |
+ thismse = vfp->svaf(pre_address, y_stride, col_offset, row_offset, |
+ src_address, src_stride, &sse, second_pred); |
+ cost_array[4] = thismse + |
+ mv_err_cost(&this_mv, ref_mv, mvjcost, mvcost, error_per_bit); |
+ |
+ if (cost_array[4] < besterr) { |
+ best_idx = 4; |
+ besterr = cost_array[4]; |
+ *distortion = thismse; |
+ *sse1 = sse; |
+ } |
+ } else { |
+ cost_array[idx] = INT_MAX; |
+ } |
+ |
+ if (best_idx < 4 && best_idx >= 0) { |
+ br += search_step[best_idx].row; |
+ bc += search_step[best_idx].col; |
+ } else if (best_idx == 4) { |
+ br = tr; |
+ bc = tc; |
+ } |
+ |
+ if (iters_per_step > 1) |
SECOND_LEVEL_CHECKS; |
- } |
+ |
tr = br; |
tc = bc; |
- } |
- if (allow_hp && vp9_use_mv_hp(ref_mv) && forced_stop == 0) { |
+ search_step += 4; |
hstep >>= 1; |
- FIRST_LEVEL_CHECKS; |
- if (eighthiters > 1) { |
- SECOND_LEVEL_CHECKS; |
- } |
- tr = br; |
- tc = bc; |
+ best_idx = -1; |
} |
+ |
+ // Each subsequent iteration checks at least one point in common with |
+ // the last iteration could be 2 ( if diag selected) 1/4 pel |
+ |
// These lines insure static analysis doesn't warn that |
// tr and tc aren't used after the above point. |
(void) tr; |
@@ -1654,7 +1752,7 @@ |
if (do_refine) { |
const int search_range = 8; |
MV best_mv = *dst_mv; |
- thissme = cpi->refining_search_sad(x, &best_mv, sadpb, search_range, |
+ thissme = vp9_refining_search_sad(x, &best_mv, sadpb, search_range, |
fn_ptr, ref_mv); |
if (thissme < INT_MAX) |
thissme = vp9_get_mvpred_var(x, &best_mv, ref_mv, fn_ptr, 1); |
@@ -1858,11 +1956,11 @@ |
return best_sad; |
} |
-int vp9_refining_search_sad_c(const MACROBLOCK *x, |
- MV *ref_mv, int error_per_bit, |
- int search_range, |
- const vp9_variance_fn_ptr_t *fn_ptr, |
- const MV *center_mv) { |
+int vp9_refining_search_sad(const MACROBLOCK *x, |
+ MV *ref_mv, int error_per_bit, |
+ int search_range, |
+ const vp9_variance_fn_ptr_t *fn_ptr, |
+ const MV *center_mv) { |
const MACROBLOCKD *const xd = &x->e_mbd; |
const MV neighbors[4] = {{ -1, 0}, {0, -1}, {0, 1}, {1, 0}}; |
const struct buf_2d *const what = &x->plane[0].src; |