OLD | NEW |
(Empty) | |
| 1 /* |
| 2 * Copyright (c) 2010 The WebM project authors. All Rights Reserved. |
| 3 * |
| 4 * Use of this source code is governed by a BSD-style license |
| 5 * that can be found in the LICENSE file in the root of the source |
| 6 * tree. An additional intellectual property rights grant can be found |
| 7 * in the file PATENTS. All contributing project authors may |
| 8 * be found in the AUTHORS file in the root of the source tree. |
| 9 */ |
| 10 |
| 11 |
| 12 #include "vp9/encoder/vp9_onyx_int.h" |
| 13 #include "vp9/encoder/vp9_mcomp.h" |
| 14 #include "vpx_mem/vpx_mem.h" |
| 15 #include "vpx_ports/config.h" |
| 16 #include <stdio.h> |
| 17 #include <limits.h> |
| 18 #include <math.h> |
| 19 #include "vp9/common/vp9_findnearmv.h" |
| 20 |
| 21 #ifdef ENTROPY_STATS |
| 22 static int mv_ref_ct [31] [4] [2]; |
| 23 static int mv_mode_cts [4] [2]; |
| 24 #endif |
| 25 |
| 26 void vp9_clamp_mv_min_max(MACROBLOCK *x, int_mv *ref_mv) { |
| 27 int col_min = (ref_mv->as_mv.col >> 3) - MAX_FULL_PEL_VAL + |
| 28 ((ref_mv->as_mv.col & 7) ? 1 : 0); |
| 29 int row_min = (ref_mv->as_mv.row >> 3) - MAX_FULL_PEL_VAL + |
| 30 ((ref_mv->as_mv.row & 7) ? 1 : 0); |
| 31 int col_max = (ref_mv->as_mv.col >> 3) + MAX_FULL_PEL_VAL; |
| 32 int row_max = (ref_mv->as_mv.row >> 3) + MAX_FULL_PEL_VAL; |
| 33 |
| 34 /* Get intersection of UMV window and valid MV window to reduce # of checks in
diamond search. */ |
| 35 if (x->mv_col_min < col_min) |
| 36 x->mv_col_min = col_min; |
| 37 if (x->mv_col_max > col_max) |
| 38 x->mv_col_max = col_max; |
| 39 if (x->mv_row_min < row_min) |
| 40 x->mv_row_min = row_min; |
| 41 if (x->mv_row_max > row_max) |
| 42 x->mv_row_max = row_max; |
| 43 } |
| 44 |
| 45 int vp9_mv_bit_cost(int_mv *mv, int_mv *ref, int *mvjcost, int *mvcost[2], |
| 46 int Weight, int ishp) { |
| 47 MV v; |
| 48 v.row = (mv->as_mv.row - ref->as_mv.row); |
| 49 v.col = (mv->as_mv.col - ref->as_mv.col); |
| 50 return ((mvjcost[vp9_get_mv_joint(v)] + |
| 51 mvcost[0][v.row] + mvcost[1][v.col]) * |
| 52 Weight) >> 7; |
| 53 } |
| 54 |
| 55 static int mv_err_cost(int_mv *mv, int_mv *ref, int *mvjcost, int *mvcost[2], |
| 56 int error_per_bit, int ishp) { |
| 57 if (mvcost) { |
| 58 MV v; |
| 59 v.row = (mv->as_mv.row - ref->as_mv.row); |
| 60 v.col = (mv->as_mv.col - ref->as_mv.col); |
| 61 return ((mvjcost[vp9_get_mv_joint(v)] + |
| 62 mvcost[0][v.row] + mvcost[1][v.col]) * |
| 63 error_per_bit + 128) >> 8; |
| 64 } |
| 65 return 0; |
| 66 } |
| 67 |
| 68 static int mvsad_err_cost(int_mv *mv, int_mv *ref, int *mvjsadcost, |
| 69 int *mvsadcost[2], int error_per_bit) { |
| 70 |
| 71 if (mvsadcost) { |
| 72 MV v; |
| 73 v.row = (mv->as_mv.row - ref->as_mv.row); |
| 74 v.col = (mv->as_mv.col - ref->as_mv.col); |
| 75 return ((mvjsadcost[vp9_get_mv_joint(v)] + |
| 76 mvsadcost[0][v.row] + mvsadcost[1][v.col]) * |
| 77 error_per_bit + 128) >> 8; |
| 78 } |
| 79 return 0; |
| 80 } |
| 81 |
| 82 void vp9_init_dsmotion_compensation(MACROBLOCK *x, int stride) { |
| 83 int Len; |
| 84 int search_site_count = 0; |
| 85 |
| 86 |
| 87 // Generate offsets for 4 search sites per step. |
| 88 Len = MAX_FIRST_STEP; |
| 89 x->ss[search_site_count].mv.col = 0; |
| 90 x->ss[search_site_count].mv.row = 0; |
| 91 x->ss[search_site_count].offset = 0; |
| 92 search_site_count++; |
| 93 |
| 94 while (Len > 0) { |
| 95 |
| 96 // Compute offsets for search sites. |
| 97 x->ss[search_site_count].mv.col = 0; |
| 98 x->ss[search_site_count].mv.row = -Len; |
| 99 x->ss[search_site_count].offset = -Len * stride; |
| 100 search_site_count++; |
| 101 |
| 102 // Compute offsets for search sites. |
| 103 x->ss[search_site_count].mv.col = 0; |
| 104 x->ss[search_site_count].mv.row = Len; |
| 105 x->ss[search_site_count].offset = Len * stride; |
| 106 search_site_count++; |
| 107 |
| 108 // Compute offsets for search sites. |
| 109 x->ss[search_site_count].mv.col = -Len; |
| 110 x->ss[search_site_count].mv.row = 0; |
| 111 x->ss[search_site_count].offset = -Len; |
| 112 search_site_count++; |
| 113 |
| 114 // Compute offsets for search sites. |
| 115 x->ss[search_site_count].mv.col = Len; |
| 116 x->ss[search_site_count].mv.row = 0; |
| 117 x->ss[search_site_count].offset = Len; |
| 118 search_site_count++; |
| 119 |
| 120 // Contract. |
| 121 Len /= 2; |
| 122 } |
| 123 |
| 124 x->ss_count = search_site_count; |
| 125 x->searches_per_step = 4; |
| 126 } |
| 127 |
| 128 void vp9_init3smotion_compensation(MACROBLOCK *x, int stride) { |
| 129 int Len; |
| 130 int search_site_count = 0; |
| 131 |
| 132 // Generate offsets for 8 search sites per step. |
| 133 Len = MAX_FIRST_STEP; |
| 134 x->ss[search_site_count].mv.col = 0; |
| 135 x->ss[search_site_count].mv.row = 0; |
| 136 x->ss[search_site_count].offset = 0; |
| 137 search_site_count++; |
| 138 |
| 139 while (Len > 0) { |
| 140 |
| 141 // Compute offsets for search sites. |
| 142 x->ss[search_site_count].mv.col = 0; |
| 143 x->ss[search_site_count].mv.row = -Len; |
| 144 x->ss[search_site_count].offset = -Len * stride; |
| 145 search_site_count++; |
| 146 |
| 147 // Compute offsets for search sites. |
| 148 x->ss[search_site_count].mv.col = 0; |
| 149 x->ss[search_site_count].mv.row = Len; |
| 150 x->ss[search_site_count].offset = Len * stride; |
| 151 search_site_count++; |
| 152 |
| 153 // Compute offsets for search sites. |
| 154 x->ss[search_site_count].mv.col = -Len; |
| 155 x->ss[search_site_count].mv.row = 0; |
| 156 x->ss[search_site_count].offset = -Len; |
| 157 search_site_count++; |
| 158 |
| 159 // Compute offsets for search sites. |
| 160 x->ss[search_site_count].mv.col = Len; |
| 161 x->ss[search_site_count].mv.row = 0; |
| 162 x->ss[search_site_count].offset = Len; |
| 163 search_site_count++; |
| 164 |
| 165 // Compute offsets for search sites. |
| 166 x->ss[search_site_count].mv.col = -Len; |
| 167 x->ss[search_site_count].mv.row = -Len; |
| 168 x->ss[search_site_count].offset = -Len * stride - Len; |
| 169 search_site_count++; |
| 170 |
| 171 // Compute offsets for search sites. |
| 172 x->ss[search_site_count].mv.col = Len; |
| 173 x->ss[search_site_count].mv.row = -Len; |
| 174 x->ss[search_site_count].offset = -Len * stride + Len; |
| 175 search_site_count++; |
| 176 |
| 177 // Compute offsets for search sites. |
| 178 x->ss[search_site_count].mv.col = -Len; |
| 179 x->ss[search_site_count].mv.row = Len; |
| 180 x->ss[search_site_count].offset = Len * stride - Len; |
| 181 search_site_count++; |
| 182 |
| 183 // Compute offsets for search sites. |
| 184 x->ss[search_site_count].mv.col = Len; |
| 185 x->ss[search_site_count].mv.row = Len; |
| 186 x->ss[search_site_count].offset = Len * stride + Len; |
| 187 search_site_count++; |
| 188 |
| 189 // Contract. |
| 190 Len /= 2; |
| 191 } |
| 192 |
| 193 x->ss_count = search_site_count; |
| 194 x->searches_per_step = 8; |
| 195 } |
| 196 |
| 197 /* |
| 198 * To avoid the penalty for crossing cache-line read, preload the reference |
| 199 * area in a small buffer, which is aligned to make sure there won't be crossing |
| 200 * cache-line read while reading from this buffer. This reduced the cpu |
| 201 * cycles spent on reading ref data in sub-pixel filter functions. |
| 202 * TODO: Currently, since sub-pixel search range here is -3 ~ 3, copy 22 rows x |
| 203 * 32 cols area that is enough for 16x16 macroblock. Later, for SPLITMV, we |
| 204 * could reduce the area. |
| 205 */ |
| 206 |
| 207 /* estimated cost of a motion vector (r,c) */ |
| 208 #define MVC(r, c) \ |
| 209 (mvcost ? \ |
| 210 ((mvjcost[((r) != rr) * 2 + ((c) != rc)] + \ |
| 211 mvcost[0][((r) - rr)] + mvcost[1][((c) - rc)]) * \ |
| 212 error_per_bit + 128) >> 8 : 0) |
| 213 |
| 214 #define SP(x) (((x) & 7) << 1) // convert motion vector component to offset |
| 215 // for svf calc |
| 216 |
| 217 #define IFMVCV(r, c, s, e) \ |
| 218 if (c >= minc && c <= maxc && r >= minr && r <= maxr) \ |
| 219 s \ |
| 220 else \ |
| 221 e; |
| 222 |
| 223 /* pointer to predictor base of a motionvector */ |
| 224 #define PRE(r, c) (y + (((r) >> 3) * y_stride + ((c) >> 3) -(offset))) |
| 225 |
| 226 /* returns subpixel variance error function */ |
| 227 #define DIST(r, c) \ |
| 228 vfp->svf(PRE(r, c), y_stride, SP(c), SP(r), z, b->src_stride, &sse) |
| 229 |
| 230 /* checks if (r, c) has better score than previous best */ |
| 231 #define CHECK_BETTER(v, r, c) \ |
| 232 IFMVCV(r, c, { \ |
| 233 thismse = (DIST(r, c)); \ |
| 234 if ((v = MVC(r, c) + thismse) < besterr) { \ |
| 235 besterr = v; \ |
| 236 br = r; \ |
| 237 bc = c; \ |
| 238 *distortion = thismse; \ |
| 239 *sse1 = sse; \ |
| 240 } \ |
| 241 }, \ |
| 242 v = INT_MAX;) |
| 243 |
| 244 #define MIN(x,y) (((x)<(y))?(x):(y)) |
| 245 #define MAX(x,y) (((x)>(y))?(x):(y)) |
| 246 |
| 247 int vp9_find_best_sub_pixel_step_iteratively(MACROBLOCK *x, BLOCK *b, BLOCKD *d, |
| 248 int_mv *bestmv, int_mv *ref_mv, |
| 249 int error_per_bit, |
| 250 const vp9_variance_fn_ptr_t *vfp, |
| 251 int *mvjcost, int *mvcost[2], |
| 252 int *distortion, |
| 253 unsigned int *sse1) { |
| 254 unsigned char *z = (*(b->base_src) + b->src); |
| 255 MACROBLOCKD *xd = &x->e_mbd; |
| 256 |
| 257 int rr, rc, br, bc, hstep; |
| 258 int tr, tc; |
| 259 unsigned int besterr = INT_MAX; |
| 260 unsigned int left, right, up, down, diag; |
| 261 unsigned int sse; |
| 262 unsigned int whichdir; |
| 263 unsigned int halfiters = 4; |
| 264 unsigned int quarteriters = 4; |
| 265 unsigned int eighthiters = 4; |
| 266 int thismse; |
| 267 int maxc, minc, maxr, minr; |
| 268 int y_stride; |
| 269 int offset; |
| 270 int usehp = xd->allow_high_precision_mv; |
| 271 |
| 272 #if !CONFIG_SUPERBLOCKS && (ARCH_X86 || ARCH_X86_64) |
| 273 unsigned char *y0 = *(d->base_pre) + d->pre + (bestmv->as_mv.row) * d->pre_str
ide + bestmv->as_mv.col; |
| 274 unsigned char *y; |
| 275 int buf_r1, buf_r2, buf_c1, buf_c2; |
| 276 |
| 277 // Clamping to avoid out-of-range data access |
| 278 buf_r1 = ((bestmv->as_mv.row - VP9_INTERP_EXTEND) < x->mv_row_min) ? |
| 279 (bestmv->as_mv.row - x->mv_row_min) : VP9_INTERP_EXTEND - 1; |
| 280 buf_r2 = ((bestmv->as_mv.row + VP9_INTERP_EXTEND) > x->mv_row_max) ? |
| 281 (x->mv_row_max - bestmv->as_mv.row) : VP9_INTERP_EXTEND - 1; |
| 282 buf_c1 = ((bestmv->as_mv.col - VP9_INTERP_EXTEND) < x->mv_col_min) ? |
| 283 (bestmv->as_mv.col - x->mv_col_min) : VP9_INTERP_EXTEND - 1; |
| 284 buf_c2 = ((bestmv->as_mv.col + VP9_INTERP_EXTEND) > x->mv_col_max) ? |
| 285 (x->mv_col_max - bestmv->as_mv.col) : VP9_INTERP_EXTEND - 1; |
| 286 y_stride = 32; |
| 287 |
| 288 /* Copy to intermediate buffer before searching. */ |
| 289 vfp->copymem(y0 - buf_c1 - d->pre_stride * buf_r1, d->pre_stride, xd->y_buf, y
_stride, 16 + buf_r1 + buf_r2); |
| 290 y = xd->y_buf + y_stride * buf_r1 + buf_c1; |
| 291 #else |
| 292 unsigned char *y = *(d->base_pre) + d->pre + (bestmv->as_mv.row) * d->pre_stri
de + bestmv->as_mv.col; |
| 293 y_stride = d->pre_stride; |
| 294 #endif |
| 295 |
| 296 rr = ref_mv->as_mv.row; |
| 297 rc = ref_mv->as_mv.col; |
| 298 br = bestmv->as_mv.row << 3; |
| 299 bc = bestmv->as_mv.col << 3; |
| 300 hstep = 4; |
| 301 minc = MAX(x->mv_col_min << 3, (ref_mv->as_mv.col) - ((1 << MV_MAX_BITS) - 1))
; |
| 302 maxc = MIN(x->mv_col_max << 3, (ref_mv->as_mv.col) + ((1 << MV_MAX_BITS) - 1))
; |
| 303 minr = MAX(x->mv_row_min << 3, (ref_mv->as_mv.row) - ((1 << MV_MAX_BITS) - 1))
; |
| 304 maxr = MIN(x->mv_row_max << 3, (ref_mv->as_mv.row) + ((1 << MV_MAX_BITS) - 1))
; |
| 305 |
| 306 tr = br; |
| 307 tc = bc; |
| 308 |
| 309 |
| 310 offset = (bestmv->as_mv.row) * y_stride + bestmv->as_mv.col; |
| 311 |
| 312 // central mv |
| 313 bestmv->as_mv.row <<= 3; |
| 314 bestmv->as_mv.col <<= 3; |
| 315 |
| 316 // calculate central point error |
| 317 besterr = vfp->vf(y, y_stride, z, b->src_stride, sse1); |
| 318 *distortion = besterr; |
| 319 besterr += mv_err_cost(bestmv, ref_mv, mvjcost, mvcost, |
| 320 error_per_bit, xd->allow_high_precision_mv); |
| 321 |
| 322 // TODO: Each subsequent iteration checks at least one point in |
| 323 // common with the last iteration could be 2 ( if diag selected) |
| 324 while (--halfiters) { |
| 325 // 1/2 pel |
| 326 CHECK_BETTER(left, tr, tc - hstep); |
| 327 CHECK_BETTER(right, tr, tc + hstep); |
| 328 CHECK_BETTER(up, tr - hstep, tc); |
| 329 CHECK_BETTER(down, tr + hstep, tc); |
| 330 |
| 331 whichdir = (left < right ? 0 : 1) + (up < down ? 0 : 2); |
| 332 |
| 333 switch (whichdir) { |
| 334 case 0: |
| 335 CHECK_BETTER(diag, tr - hstep, tc - hstep); |
| 336 break; |
| 337 case 1: |
| 338 CHECK_BETTER(diag, tr - hstep, tc + hstep); |
| 339 break; |
| 340 case 2: |
| 341 CHECK_BETTER(diag, tr + hstep, tc - hstep); |
| 342 break; |
| 343 case 3: |
| 344 CHECK_BETTER(diag, tr + hstep, tc + hstep); |
| 345 break; |
| 346 } |
| 347 |
| 348 // no reason to check the same one again. |
| 349 if (tr == br && tc == bc) |
| 350 break; |
| 351 |
| 352 tr = br; |
| 353 tc = bc; |
| 354 } |
| 355 |
| 356 // TODO: Each subsequent iteration checks at least one point in common with |
| 357 // the last iteration could be 2 ( if diag selected) 1/4 pel |
| 358 hstep >>= 1; |
| 359 while (--quarteriters) { |
| 360 CHECK_BETTER(left, tr, tc - hstep); |
| 361 CHECK_BETTER(right, tr, tc + hstep); |
| 362 CHECK_BETTER(up, tr - hstep, tc); |
| 363 CHECK_BETTER(down, tr + hstep, tc); |
| 364 |
| 365 whichdir = (left < right ? 0 : 1) + (up < down ? 0 : 2); |
| 366 |
| 367 switch (whichdir) { |
| 368 case 0: |
| 369 CHECK_BETTER(diag, tr - hstep, tc - hstep); |
| 370 break; |
| 371 case 1: |
| 372 CHECK_BETTER(diag, tr - hstep, tc + hstep); |
| 373 break; |
| 374 case 2: |
| 375 CHECK_BETTER(diag, tr + hstep, tc - hstep); |
| 376 break; |
| 377 case 3: |
| 378 CHECK_BETTER(diag, tr + hstep, tc + hstep); |
| 379 break; |
| 380 } |
| 381 |
| 382 // no reason to check the same one again. |
| 383 if (tr == br && tc == bc) |
| 384 break; |
| 385 |
| 386 tr = br; |
| 387 tc = bc; |
| 388 } |
| 389 |
| 390 if (xd->allow_high_precision_mv) { |
| 391 usehp = vp9_use_nmv_hp(&ref_mv->as_mv); |
| 392 } else { |
| 393 usehp = 0; |
| 394 } |
| 395 |
| 396 if (usehp) { |
| 397 hstep >>= 1; |
| 398 while (--eighthiters) { |
| 399 CHECK_BETTER(left, tr, tc - hstep); |
| 400 CHECK_BETTER(right, tr, tc + hstep); |
| 401 CHECK_BETTER(up, tr - hstep, tc); |
| 402 CHECK_BETTER(down, tr + hstep, tc); |
| 403 |
| 404 whichdir = (left < right ? 0 : 1) + (up < down ? 0 : 2); |
| 405 |
| 406 switch (whichdir) { |
| 407 case 0: |
| 408 CHECK_BETTER(diag, tr - hstep, tc - hstep); |
| 409 break; |
| 410 case 1: |
| 411 CHECK_BETTER(diag, tr - hstep, tc + hstep); |
| 412 break; |
| 413 case 2: |
| 414 CHECK_BETTER(diag, tr + hstep, tc - hstep); |
| 415 break; |
| 416 case 3: |
| 417 CHECK_BETTER(diag, tr + hstep, tc + hstep); |
| 418 break; |
| 419 } |
| 420 |
| 421 // no reason to check the same one again. |
| 422 if (tr == br && tc == bc) |
| 423 break; |
| 424 |
| 425 tr = br; |
| 426 tc = bc; |
| 427 } |
| 428 } |
| 429 bestmv->as_mv.row = br; |
| 430 bestmv->as_mv.col = bc; |
| 431 |
| 432 if ((abs(bestmv->as_mv.col - ref_mv->as_mv.col) > (MAX_FULL_PEL_VAL << 3)) || |
| 433 (abs(bestmv->as_mv.row - ref_mv->as_mv.row) > (MAX_FULL_PEL_VAL << 3))) |
| 434 return INT_MAX; |
| 435 |
| 436 return besterr; |
| 437 } |
| 438 #undef MVC |
| 439 #undef PRE |
| 440 #undef DIST |
| 441 #undef IFMVCV |
| 442 #undef CHECK_BETTER |
| 443 #undef MIN |
| 444 #undef MAX |
| 445 |
| 446 int vp9_find_best_sub_pixel_step(MACROBLOCK *x, BLOCK *b, BLOCKD *d, |
| 447 int_mv *bestmv, int_mv *ref_mv, |
| 448 int error_per_bit, |
| 449 const vp9_variance_fn_ptr_t *vfp, |
| 450 int *mvjcost, int *mvcost[2], int *distortion, |
| 451 unsigned int *sse1) { |
| 452 int bestmse = INT_MAX; |
| 453 int_mv startmv; |
| 454 int_mv this_mv; |
| 455 int_mv orig_mv; |
| 456 int yrow_movedback = 0, ycol_movedback = 0; |
| 457 unsigned char *z = (*(b->base_src) + b->src); |
| 458 int left, right, up, down, diag; |
| 459 unsigned int sse; |
| 460 int whichdir; |
| 461 int thismse; |
| 462 int y_stride; |
| 463 MACROBLOCKD *xd = &x->e_mbd; |
| 464 int usehp = xd->allow_high_precision_mv; |
| 465 |
| 466 #if !CONFIG_SUPERBLOCKS && (ARCH_X86 || ARCH_X86_64) |
| 467 unsigned char *y0 = *(d->base_pre) + d->pre + (bestmv->as_mv.row) * d->pre_str
ide + bestmv->as_mv.col; |
| 468 unsigned char *y; |
| 469 |
| 470 y_stride = 32; |
| 471 /* Copy 18 rows x 32 cols area to intermediate buffer before searching. */ |
| 472 vfp->copymem(y0 - 1 - d->pre_stride, d->pre_stride, xd->y_buf, y_stride, 18); |
| 473 y = xd->y_buf + y_stride + 1; |
| 474 #else |
| 475 unsigned char *y = *(d->base_pre) + d->pre + (bestmv->as_mv.row) * d->pre_stri
de + bestmv->as_mv.col; |
| 476 y_stride = d->pre_stride; |
| 477 #endif |
| 478 |
| 479 // central mv |
| 480 bestmv->as_mv.row <<= 3; |
| 481 bestmv->as_mv.col <<= 3; |
| 482 startmv = *bestmv; |
| 483 orig_mv = *bestmv; |
| 484 |
| 485 // calculate central point error |
| 486 bestmse = vfp->vf(y, y_stride, z, b->src_stride, sse1); |
| 487 *distortion = bestmse; |
| 488 bestmse += mv_err_cost(bestmv, ref_mv, mvjcost, mvcost, error_per_bit, |
| 489 xd->allow_high_precision_mv); |
| 490 |
| 491 // go left then right and check error |
| 492 this_mv.as_mv.row = startmv.as_mv.row; |
| 493 this_mv.as_mv.col = ((startmv.as_mv.col - 8) | 4); |
| 494 thismse = vfp->svf_halfpix_h(y - 1, y_stride, z, b->src_stride, &sse); |
| 495 left = thismse + mv_err_cost(&this_mv, ref_mv, mvjcost, mvcost, error_per_bit, |
| 496 xd->allow_high_precision_mv); |
| 497 |
| 498 if (left < bestmse) { |
| 499 *bestmv = this_mv; |
| 500 bestmse = left; |
| 501 *distortion = thismse; |
| 502 *sse1 = sse; |
| 503 } |
| 504 |
| 505 this_mv.as_mv.col += 8; |
| 506 thismse = vfp->svf_halfpix_h(y, y_stride, z, b->src_stride, &sse); |
| 507 right = thismse + mv_err_cost(&this_mv, ref_mv, mvjcost, mvcost, |
| 508 error_per_bit, xd->allow_high_precision_mv); |
| 509 |
| 510 if (right < bestmse) { |
| 511 *bestmv = this_mv; |
| 512 bestmse = right; |
| 513 *distortion = thismse; |
| 514 *sse1 = sse; |
| 515 } |
| 516 |
| 517 // go up then down and check error |
| 518 this_mv.as_mv.col = startmv.as_mv.col; |
| 519 this_mv.as_mv.row = ((startmv.as_mv.row - 8) | 4); |
| 520 thismse = vfp->svf_halfpix_v(y - y_stride, y_stride, z, b->src_stride, &sse); |
| 521 up = thismse + mv_err_cost(&this_mv, ref_mv, mvjcost, mvcost, error_per_bit, |
| 522 xd->allow_high_precision_mv); |
| 523 |
| 524 if (up < bestmse) { |
| 525 *bestmv = this_mv; |
| 526 bestmse = up; |
| 527 *distortion = thismse; |
| 528 *sse1 = sse; |
| 529 } |
| 530 |
| 531 this_mv.as_mv.row += 8; |
| 532 thismse = vfp->svf_halfpix_v(y, y_stride, z, b->src_stride, &sse); |
| 533 down = thismse + mv_err_cost(&this_mv, ref_mv, mvjcost, mvcost, error_per_bit, |
| 534 xd->allow_high_precision_mv); |
| 535 |
| 536 if (down < bestmse) { |
| 537 *bestmv = this_mv; |
| 538 bestmse = down; |
| 539 *distortion = thismse; |
| 540 *sse1 = sse; |
| 541 } |
| 542 |
| 543 |
| 544 // now check 1 more diagonal |
| 545 whichdir = (left < right ? 0 : 1) + (up < down ? 0 : 2); |
| 546 // for(whichdir =0;whichdir<4;whichdir++) |
| 547 // { |
| 548 this_mv = startmv; |
| 549 |
| 550 switch (whichdir) { |
| 551 case 0: |
| 552 this_mv.as_mv.col = (this_mv.as_mv.col - 8) | 4; |
| 553 this_mv.as_mv.row = (this_mv.as_mv.row - 8) | 4; |
| 554 thismse = vfp->svf_halfpix_hv(y - 1 - y_stride, y_stride, z, b->src_stride
, &sse); |
| 555 break; |
| 556 case 1: |
| 557 this_mv.as_mv.col += 4; |
| 558 this_mv.as_mv.row = (this_mv.as_mv.row - 8) | 4; |
| 559 thismse = vfp->svf_halfpix_hv(y - y_stride, y_stride, z, b->src_stride, &s
se); |
| 560 break; |
| 561 case 2: |
| 562 this_mv.as_mv.col = (this_mv.as_mv.col - 8) | 4; |
| 563 this_mv.as_mv.row += 4; |
| 564 thismse = vfp->svf_halfpix_hv(y - 1, y_stride, z, b->src_stride, &sse); |
| 565 break; |
| 566 case 3: |
| 567 default: |
| 568 this_mv.as_mv.col += 4; |
| 569 this_mv.as_mv.row += 4; |
| 570 thismse = vfp->svf_halfpix_hv(y, y_stride, z, b->src_stride, &sse); |
| 571 break; |
| 572 } |
| 573 |
| 574 diag = thismse + mv_err_cost(&this_mv, ref_mv, mvjcost, mvcost, error_per_bit, |
| 575 xd->allow_high_precision_mv); |
| 576 |
| 577 if (diag < bestmse) { |
| 578 *bestmv = this_mv; |
| 579 bestmse = diag; |
| 580 *distortion = thismse; |
| 581 *sse1 = sse; |
| 582 } |
| 583 |
| 584 // } |
| 585 |
| 586 |
| 587 // time to check quarter pels. |
| 588 if (bestmv->as_mv.row < startmv.as_mv.row) { |
| 589 y -= y_stride; |
| 590 yrow_movedback = 1; |
| 591 } |
| 592 |
| 593 if (bestmv->as_mv.col < startmv.as_mv.col) { |
| 594 y--; |
| 595 ycol_movedback = 1; |
| 596 } |
| 597 |
| 598 startmv = *bestmv; |
| 599 |
| 600 |
| 601 |
| 602 // go left then right and check error |
| 603 this_mv.as_mv.row = startmv.as_mv.row; |
| 604 |
| 605 if (startmv.as_mv.col & 7) { |
| 606 this_mv.as_mv.col = startmv.as_mv.col - 2; |
| 607 thismse = vfp->svf(y, y_stride, |
| 608 SP(this_mv.as_mv.col), SP(this_mv.as_mv.row), |
| 609 z, b->src_stride, &sse); |
| 610 } else { |
| 611 this_mv.as_mv.col = (startmv.as_mv.col - 8) | 6; |
| 612 thismse = vfp->svf(y - 1, y_stride, SP(6), SP(this_mv.as_mv.row), z, |
| 613 b->src_stride, &sse); |
| 614 } |
| 615 |
| 616 left = thismse + mv_err_cost(&this_mv, ref_mv, mvjcost, mvcost, error_per_bit, |
| 617 xd->allow_high_precision_mv); |
| 618 |
| 619 if (left < bestmse) { |
| 620 *bestmv = this_mv; |
| 621 bestmse = left; |
| 622 *distortion = thismse; |
| 623 *sse1 = sse; |
| 624 } |
| 625 |
| 626 this_mv.as_mv.col += 4; |
| 627 thismse = vfp->svf(y, y_stride, |
| 628 SP(this_mv.as_mv.col), SP(this_mv.as_mv.row), |
| 629 z, b->src_stride, &sse); |
| 630 right = thismse + mv_err_cost(&this_mv, ref_mv, mvjcost, mvcost, |
| 631 error_per_bit, xd->allow_high_precision_mv); |
| 632 |
| 633 if (right < bestmse) { |
| 634 *bestmv = this_mv; |
| 635 bestmse = right; |
| 636 *distortion = thismse; |
| 637 *sse1 = sse; |
| 638 } |
| 639 |
| 640 // go up then down and check error |
| 641 this_mv.as_mv.col = startmv.as_mv.col; |
| 642 |
| 643 if (startmv.as_mv.row & 7) { |
| 644 this_mv.as_mv.row = startmv.as_mv.row - 2; |
| 645 thismse = vfp->svf(y, y_stride, |
| 646 SP(this_mv.as_mv.col), SP(this_mv.as_mv.row), |
| 647 z, b->src_stride, &sse); |
| 648 } else { |
| 649 this_mv.as_mv.row = (startmv.as_mv.row - 8) | 6; |
| 650 thismse = vfp->svf(y - y_stride, y_stride, SP(this_mv.as_mv.col), SP(6), |
| 651 z, b->src_stride, &sse); |
| 652 } |
| 653 |
| 654 up = thismse + mv_err_cost(&this_mv, ref_mv, mvjcost, mvcost, error_per_bit, |
| 655 xd->allow_high_precision_mv); |
| 656 |
| 657 if (up < bestmse) { |
| 658 *bestmv = this_mv; |
| 659 bestmse = up; |
| 660 *distortion = thismse; |
| 661 *sse1 = sse; |
| 662 } |
| 663 |
| 664 this_mv.as_mv.row += 4; |
| 665 thismse = vfp->svf(y, y_stride, SP(this_mv.as_mv.col), SP(this_mv.as_mv.row), |
| 666 z, b->src_stride, &sse); |
| 667 down = thismse + mv_err_cost(&this_mv, ref_mv, mvjcost, mvcost, error_per_bit, |
| 668 xd->allow_high_precision_mv); |
| 669 |
| 670 if (down < bestmse) { |
| 671 *bestmv = this_mv; |
| 672 bestmse = down; |
| 673 *distortion = thismse; |
| 674 *sse1 = sse; |
| 675 } |
| 676 |
| 677 |
| 678 // now check 1 more diagonal |
| 679 whichdir = (left < right ? 0 : 1) + (up < down ? 0 : 2); |
| 680 |
| 681 // for(whichdir=0;whichdir<4;whichdir++) |
| 682 // { |
| 683 this_mv = startmv; |
| 684 |
| 685 switch (whichdir) { |
| 686 case 0: |
| 687 |
| 688 if (startmv.as_mv.row & 7) { |
| 689 this_mv.as_mv.row -= 2; |
| 690 |
| 691 if (startmv.as_mv.col & 7) { |
| 692 this_mv.as_mv.col -= 2; |
| 693 thismse = vfp->svf(y, y_stride, SP(this_mv.as_mv.col), SP(this_mv.as_m
v.row), z, b->src_stride, &sse); |
| 694 } else { |
| 695 this_mv.as_mv.col = (startmv.as_mv.col - 8) | 6; |
| 696 thismse = vfp->svf(y - 1, y_stride, SP(6), SP(this_mv.as_mv.row), z, b
->src_stride, &sse);; |
| 697 } |
| 698 } else { |
| 699 this_mv.as_mv.row = (startmv.as_mv.row - 8) | 6; |
| 700 |
| 701 if (startmv.as_mv.col & 7) { |
| 702 this_mv.as_mv.col -= 2; |
| 703 thismse = vfp->svf(y - y_stride, y_stride, SP(this_mv.as_mv.col), SP(6
), z, b->src_stride, &sse); |
| 704 } else { |
| 705 this_mv.as_mv.col = (startmv.as_mv.col - 8) | 6; |
| 706 thismse = vfp->svf(y - y_stride - 1, y_stride, SP(6), SP(6), z, b->src
_stride, &sse); |
| 707 } |
| 708 } |
| 709 |
| 710 break; |
| 711 case 1: |
| 712 this_mv.as_mv.col += 2; |
| 713 |
| 714 if (startmv.as_mv.row & 7) { |
| 715 this_mv.as_mv.row -= 2; |
| 716 thismse = vfp->svf(y, y_stride, SP(this_mv.as_mv.col), SP(this_mv.as_mv.
row), z, b->src_stride, &sse); |
| 717 } else { |
| 718 this_mv.as_mv.row = (startmv.as_mv.row - 8) | 6; |
| 719 thismse = vfp->svf(y - y_stride, y_stride, SP(this_mv.as_mv.col), SP(6),
z, b->src_stride, &sse); |
| 720 } |
| 721 |
| 722 break; |
| 723 case 2: |
| 724 this_mv.as_mv.row += 2; |
| 725 |
| 726 if (startmv.as_mv.col & 7) { |
| 727 this_mv.as_mv.col -= 2; |
| 728 thismse = vfp->svf(y, y_stride, SP(this_mv.as_mv.col), SP(this_mv.as_mv.
row), |
| 729 z, b->src_stride, &sse); |
| 730 } else { |
| 731 this_mv.as_mv.col = (startmv.as_mv.col - 8) | 6; |
| 732 thismse = vfp->svf(y - 1, y_stride, SP(6), SP(this_mv.as_mv.row), z, |
| 733 b->src_stride, &sse); |
| 734 } |
| 735 |
| 736 break; |
| 737 case 3: |
| 738 this_mv.as_mv.col += 2; |
| 739 this_mv.as_mv.row += 2; |
| 740 thismse = vfp->svf(y, y_stride, |
| 741 SP(this_mv.as_mv.col), SP(this_mv.as_mv.row), |
| 742 z, b->src_stride, &sse); |
| 743 break; |
| 744 } |
| 745 |
| 746 diag = thismse + mv_err_cost(&this_mv, ref_mv, mvjcost, mvcost, error_per_bit, |
| 747 xd->allow_high_precision_mv); |
| 748 |
| 749 if (diag < bestmse) { |
| 750 *bestmv = this_mv; |
| 751 bestmse = diag; |
| 752 *distortion = thismse; |
| 753 *sse1 = sse; |
| 754 } |
| 755 |
| 756 if (x->e_mbd.allow_high_precision_mv) { |
| 757 usehp = vp9_use_nmv_hp(&ref_mv->as_mv); |
| 758 } else { |
| 759 usehp = 0; |
| 760 } |
| 761 if (!usehp) |
| 762 return bestmse; |
| 763 |
| 764 /* Now do 1/8th pixel */ |
| 765 if (bestmv->as_mv.row < orig_mv.as_mv.row && !yrow_movedback) { |
| 766 y -= y_stride; |
| 767 yrow_movedback = 1; |
| 768 } |
| 769 |
| 770 if (bestmv->as_mv.col < orig_mv.as_mv.col && !ycol_movedback) { |
| 771 y--; |
| 772 ycol_movedback = 1; |
| 773 } |
| 774 |
| 775 startmv = *bestmv; |
| 776 |
| 777 // go left then right and check error |
| 778 this_mv.as_mv.row = startmv.as_mv.row; |
| 779 |
| 780 if (startmv.as_mv.col & 7) { |
| 781 this_mv.as_mv.col = startmv.as_mv.col - 1; |
| 782 thismse = vfp->svf(y, y_stride, |
| 783 SP(this_mv.as_mv.col), SP(this_mv.as_mv.row), |
| 784 z, b->src_stride, &sse); |
| 785 } else { |
| 786 this_mv.as_mv.col = (startmv.as_mv.col - 8) | 7; |
| 787 thismse = vfp->svf(y - 1, y_stride, SP(7), SP(this_mv.as_mv.row), |
| 788 z, b->src_stride, &sse); |
| 789 } |
| 790 |
| 791 left = thismse + mv_err_cost(&this_mv, ref_mv, mvjcost, mvcost, error_per_bit, |
| 792 xd->allow_high_precision_mv); |
| 793 |
| 794 if (left < bestmse) { |
| 795 *bestmv = this_mv; |
| 796 bestmse = left; |
| 797 *distortion = thismse; |
| 798 *sse1 = sse; |
| 799 } |
| 800 |
| 801 this_mv.as_mv.col += 2; |
| 802 thismse = vfp->svf(y, y_stride, SP(this_mv.as_mv.col), SP(this_mv.as_mv.row), |
| 803 z, b->src_stride, &sse); |
| 804 right = thismse + mv_err_cost(&this_mv, ref_mv, mvjcost, mvcost, |
| 805 error_per_bit, xd->allow_high_precision_mv); |
| 806 |
| 807 if (right < bestmse) { |
| 808 *bestmv = this_mv; |
| 809 bestmse = right; |
| 810 *distortion = thismse; |
| 811 *sse1 = sse; |
| 812 } |
| 813 |
| 814 // go up then down and check error |
| 815 this_mv.as_mv.col = startmv.as_mv.col; |
| 816 |
| 817 if (startmv.as_mv.row & 7) { |
| 818 this_mv.as_mv.row = startmv.as_mv.row - 1; |
| 819 thismse = vfp->svf(y, y_stride, SP(this_mv.as_mv.col), SP(this_mv.as_mv.row)
, z, b->src_stride, &sse); |
| 820 } else { |
| 821 this_mv.as_mv.row = (startmv.as_mv.row - 8) | 7; |
| 822 thismse = vfp->svf(y - y_stride, y_stride, SP(this_mv.as_mv.col), SP(7), z,
b->src_stride, &sse); |
| 823 } |
| 824 |
| 825 up = thismse + mv_err_cost(&this_mv, ref_mv, mvjcost, mvcost, error_per_bit, |
| 826 xd->allow_high_precision_mv); |
| 827 |
| 828 if (up < bestmse) { |
| 829 *bestmv = this_mv; |
| 830 bestmse = up; |
| 831 *distortion = thismse; |
| 832 *sse1 = sse; |
| 833 } |
| 834 |
| 835 this_mv.as_mv.row += 2; |
| 836 thismse = vfp->svf(y, y_stride, SP(this_mv.as_mv.col), SP(this_mv.as_mv.row),
z, b->src_stride, &sse); |
| 837 down = thismse + mv_err_cost(&this_mv, ref_mv, mvjcost, mvcost, error_per_bit, |
| 838 xd->allow_high_precision_mv); |
| 839 |
| 840 if (down < bestmse) { |
| 841 *bestmv = this_mv; |
| 842 bestmse = down; |
| 843 *distortion = thismse; |
| 844 *sse1 = sse; |
| 845 } |
| 846 |
| 847 // now check 1 more diagonal |
| 848 whichdir = (left < right ? 0 : 1) + (up < down ? 0 : 2); |
| 849 |
| 850 // for(whichdir=0;whichdir<4;whichdir++) |
| 851 // { |
| 852 this_mv = startmv; |
| 853 |
| 854 switch (whichdir) { |
| 855 case 0: |
| 856 |
| 857 if (startmv.as_mv.row & 7) { |
| 858 this_mv.as_mv.row -= 1; |
| 859 |
| 860 if (startmv.as_mv.col & 7) { |
| 861 this_mv.as_mv.col -= 1; |
| 862 thismse = vfp->svf(y, y_stride, SP(this_mv.as_mv.col), SP(this_mv.as_m
v.row), z, b->src_stride, &sse); |
| 863 } else { |
| 864 this_mv.as_mv.col = (startmv.as_mv.col - 8) | 7; |
| 865 thismse = vfp->svf(y - 1, y_stride, SP(7), SP(this_mv.as_mv.row), z, b
->src_stride, &sse);; |
| 866 } |
| 867 } else { |
| 868 this_mv.as_mv.row = (startmv.as_mv.row - 8) | 7; |
| 869 |
| 870 if (startmv.as_mv.col & 7) { |
| 871 this_mv.as_mv.col -= 1; |
| 872 thismse = vfp->svf(y - y_stride, y_stride, SP(this_mv.as_mv.col), SP(7
), z, b->src_stride, &sse); |
| 873 } else { |
| 874 this_mv.as_mv.col = (startmv.as_mv.col - 8) | 7; |
| 875 thismse = vfp->svf(y - y_stride - 1, y_stride, SP(7), SP(7), z, b->src
_stride, &sse); |
| 876 } |
| 877 } |
| 878 |
| 879 break; |
| 880 case 1: |
| 881 this_mv.as_mv.col += 1; |
| 882 |
| 883 if (startmv.as_mv.row & 7) { |
| 884 this_mv.as_mv.row -= 1; |
| 885 thismse = vfp->svf(y, y_stride, SP(this_mv.as_mv.col), SP(this_mv.as_mv.
row), z, b->src_stride, &sse); |
| 886 } else { |
| 887 this_mv.as_mv.row = (startmv.as_mv.row - 8) | 7; |
| 888 thismse = vfp->svf(y - y_stride, y_stride, SP(this_mv.as_mv.col), SP(7),
z, b->src_stride, &sse); |
| 889 } |
| 890 |
| 891 break; |
| 892 case 2: |
| 893 this_mv.as_mv.row += 1; |
| 894 |
| 895 if (startmv.as_mv.col & 7) { |
| 896 this_mv.as_mv.col -= 1; |
| 897 thismse = vfp->svf(y, y_stride, SP(this_mv.as_mv.col), SP(this_mv.as_mv.
row), z, b->src_stride, &sse); |
| 898 } else { |
| 899 this_mv.as_mv.col = (startmv.as_mv.col - 8) | 7; |
| 900 thismse = vfp->svf(y - 1, y_stride, SP(7), SP(this_mv.as_mv.row), z, b->
src_stride, &sse); |
| 901 } |
| 902 |
| 903 break; |
| 904 case 3: |
| 905 this_mv.as_mv.col += 1; |
| 906 this_mv.as_mv.row += 1; |
| 907 thismse = vfp->svf(y, y_stride, SP(this_mv.as_mv.col), SP(this_mv.as_mv.r
ow), z, b->src_stride, &sse); |
| 908 break; |
| 909 } |
| 910 |
| 911 diag = thismse + mv_err_cost(&this_mv, ref_mv, mvjcost, mvcost, error_per_bit, |
| 912 xd->allow_high_precision_mv); |
| 913 |
| 914 if (diag < bestmse) { |
| 915 *bestmv = this_mv; |
| 916 bestmse = diag; |
| 917 *distortion = thismse; |
| 918 *sse1 = sse; |
| 919 } |
| 920 |
| 921 return bestmse; |
| 922 } |
| 923 |
| 924 #undef SP |
| 925 |
| 926 int vp9_find_best_half_pixel_step(MACROBLOCK *x, BLOCK *b, BLOCKD *d, |
| 927 int_mv *bestmv, int_mv *ref_mv, |
| 928 int error_per_bit, |
| 929 const vp9_variance_fn_ptr_t *vfp, |
| 930 int *mvjcost, int *mvcost[2], |
| 931 int *distortion, |
| 932 unsigned int *sse1) { |
| 933 int bestmse = INT_MAX; |
| 934 int_mv startmv; |
| 935 int_mv this_mv; |
| 936 unsigned char *z = (*(b->base_src) + b->src); |
| 937 int left, right, up, down, diag; |
| 938 unsigned int sse; |
| 939 int whichdir; |
| 940 int thismse; |
| 941 int y_stride; |
| 942 MACROBLOCKD *xd = &x->e_mbd; |
| 943 |
| 944 #if !CONFIG_SUPERBLOCKS && (ARCH_X86 || ARCH_X86_64) |
| 945 unsigned char *y0 = *(d->base_pre) + d->pre + |
| 946 (bestmv->as_mv.row) * d->pre_stride + bestmv->as_mv.col; |
| 947 unsigned char *y; |
| 948 |
| 949 y_stride = 32; |
| 950 /* Copy 18 rows x 32 cols area to intermediate buffer before searching. */ |
| 951 vfp->copymem(y0 - 1 - d->pre_stride, d->pre_stride, xd->y_buf, y_stride, 18); |
| 952 y = xd->y_buf + y_stride + 1; |
| 953 #else |
| 954 unsigned char *y = *(d->base_pre) + d->pre + |
| 955 (bestmv->as_mv.row) * d->pre_stride + bestmv->as_mv.col; |
| 956 y_stride = d->pre_stride; |
| 957 #endif |
| 958 |
| 959 // central mv |
| 960 bestmv->as_mv.row <<= 3; |
| 961 bestmv->as_mv.col <<= 3; |
| 962 startmv = *bestmv; |
| 963 |
| 964 // calculate central point error |
| 965 bestmse = vfp->vf(y, y_stride, z, b->src_stride, sse1); |
| 966 *distortion = bestmse; |
| 967 bestmse += mv_err_cost(bestmv, ref_mv, mvjcost, mvcost, error_per_bit, |
| 968 xd->allow_high_precision_mv); |
| 969 |
| 970 // go left then right and check error |
| 971 this_mv.as_mv.row = startmv.as_mv.row; |
| 972 this_mv.as_mv.col = ((startmv.as_mv.col - 8) | 4); |
| 973 thismse = vfp->svf_halfpix_h(y - 1, y_stride, z, b->src_stride, &sse); |
| 974 left = thismse + mv_err_cost(&this_mv, ref_mv, mvjcost, mvcost, error_per_bit, |
| 975 xd->allow_high_precision_mv); |
| 976 |
| 977 if (left < bestmse) { |
| 978 *bestmv = this_mv; |
| 979 bestmse = left; |
| 980 *distortion = thismse; |
| 981 *sse1 = sse; |
| 982 } |
| 983 |
| 984 this_mv.as_mv.col += 8; |
| 985 thismse = vfp->svf_halfpix_h(y, y_stride, z, b->src_stride, &sse); |
| 986 right = thismse + mv_err_cost(&this_mv, ref_mv, mvjcost, mvcost, |
| 987 error_per_bit, xd->allow_high_precision_mv); |
| 988 |
| 989 if (right < bestmse) { |
| 990 *bestmv = this_mv; |
| 991 bestmse = right; |
| 992 *distortion = thismse; |
| 993 *sse1 = sse; |
| 994 } |
| 995 |
| 996 // go up then down and check error |
| 997 this_mv.as_mv.col = startmv.as_mv.col; |
| 998 this_mv.as_mv.row = ((startmv.as_mv.row - 8) | 4); |
| 999 thismse = vfp->svf_halfpix_v(y - y_stride, y_stride, z, b->src_stride, &sse); |
| 1000 up = thismse + mv_err_cost(&this_mv, ref_mv, mvjcost, mvcost, error_per_bit, |
| 1001 xd->allow_high_precision_mv); |
| 1002 |
| 1003 if (up < bestmse) { |
| 1004 *bestmv = this_mv; |
| 1005 bestmse = up; |
| 1006 *distortion = thismse; |
| 1007 *sse1 = sse; |
| 1008 } |
| 1009 |
| 1010 this_mv.as_mv.row += 8; |
| 1011 thismse = vfp->svf_halfpix_v(y, y_stride, z, b->src_stride, &sse); |
| 1012 down = thismse + mv_err_cost(&this_mv, ref_mv, mvjcost, mvcost, error_per_bit, |
| 1013 xd->allow_high_precision_mv); |
| 1014 |
| 1015 if (down < bestmse) { |
| 1016 *bestmv = this_mv; |
| 1017 bestmse = down; |
| 1018 *distortion = thismse; |
| 1019 *sse1 = sse; |
| 1020 } |
| 1021 |
| 1022 // now check 1 more diagonal - |
| 1023 whichdir = (left < right ? 0 : 1) + (up < down ? 0 : 2); |
| 1024 this_mv = startmv; |
| 1025 |
| 1026 switch (whichdir) { |
| 1027 case 0: |
| 1028 this_mv.as_mv.col = (this_mv.as_mv.col - 8) | 4; |
| 1029 this_mv.as_mv.row = (this_mv.as_mv.row - 8) | 4; |
| 1030 thismse = vfp->svf_halfpix_hv(y - 1 - y_stride, y_stride, z, b->src_stride
, &sse); |
| 1031 break; |
| 1032 case 1: |
| 1033 this_mv.as_mv.col += 4; |
| 1034 this_mv.as_mv.row = (this_mv.as_mv.row - 8) | 4; |
| 1035 thismse = vfp->svf_halfpix_hv(y - y_stride, y_stride, z, b->src_stride, &s
se); |
| 1036 break; |
| 1037 case 2: |
| 1038 this_mv.as_mv.col = (this_mv.as_mv.col - 8) | 4; |
| 1039 this_mv.as_mv.row += 4; |
| 1040 thismse = vfp->svf_halfpix_hv(y - 1, y_stride, z, b->src_stride, &sse); |
| 1041 break; |
| 1042 case 3: |
| 1043 default: |
| 1044 this_mv.as_mv.col += 4; |
| 1045 this_mv.as_mv.row += 4; |
| 1046 thismse = vfp->svf_halfpix_hv(y, y_stride, z, b->src_stride, &sse); |
| 1047 break; |
| 1048 } |
| 1049 |
| 1050 diag = thismse + mv_err_cost(&this_mv, ref_mv, mvjcost, mvcost, error_per_bit, |
| 1051 xd->allow_high_precision_mv); |
| 1052 |
| 1053 if (diag < bestmse) { |
| 1054 *bestmv = this_mv; |
| 1055 bestmse = diag; |
| 1056 *distortion = thismse; |
| 1057 *sse1 = sse; |
| 1058 } |
| 1059 |
| 1060 return bestmse; |
| 1061 } |
| 1062 |
| 1063 #define CHECK_BOUNDS(range) \ |
| 1064 {\ |
| 1065 all_in = 1;\ |
| 1066 all_in &= ((br-range) >= x->mv_row_min);\ |
| 1067 all_in &= ((br+range) <= x->mv_row_max);\ |
| 1068 all_in &= ((bc-range) >= x->mv_col_min);\ |
| 1069 all_in &= ((bc+range) <= x->mv_col_max);\ |
| 1070 } |
| 1071 |
| 1072 #define CHECK_POINT \ |
| 1073 {\ |
| 1074 if (this_mv.as_mv.col < x->mv_col_min) continue;\ |
| 1075 if (this_mv.as_mv.col > x->mv_col_max) continue;\ |
| 1076 if (this_mv.as_mv.row < x->mv_row_min) continue;\ |
| 1077 if (this_mv.as_mv.row > x->mv_row_max) continue;\ |
| 1078 } |
| 1079 |
| 1080 #define CHECK_BETTER \ |
| 1081 {\ |
| 1082 if (thissad < bestsad)\ |
| 1083 {\ |
| 1084 thissad += mvsad_err_cost(&this_mv, &fcenter_mv, mvjsadcost, mvsadcost, \ |
| 1085 sad_per_bit);\ |
| 1086 if (thissad < bestsad)\ |
| 1087 {\ |
| 1088 bestsad = thissad;\ |
| 1089 best_site = i;\ |
| 1090 }\ |
| 1091 }\ |
| 1092 } |
| 1093 |
| 1094 static const MV next_chkpts[6][3] = { |
| 1095 {{ -2, 0}, { -1, -2}, {1, -2}}, |
| 1096 {{ -1, -2}, {1, -2}, {2, 0}}, |
| 1097 {{1, -2}, {2, 0}, {1, 2}}, |
| 1098 {{2, 0}, {1, 2}, { -1, 2}}, |
| 1099 {{1, 2}, { -1, 2}, { -2, 0}}, |
| 1100 {{ -1, 2}, { -2, 0}, { -1, -2}} |
| 1101 }; |
| 1102 |
| 1103 int vp9_hex_search |
| 1104 ( |
| 1105 MACROBLOCK *x, |
| 1106 BLOCK *b, |
| 1107 BLOCKD *d, |
| 1108 int_mv *ref_mv, |
| 1109 int_mv *best_mv, |
| 1110 int search_param, |
| 1111 int sad_per_bit, |
| 1112 const vp9_variance_fn_ptr_t *vfp, |
| 1113 int *mvjsadcost, int *mvsadcost[2], |
| 1114 int *mvjcost, int *mvcost[2], |
| 1115 int_mv *center_mv |
| 1116 ) { |
| 1117 MV hex[6] = { { -1, -2}, {1, -2}, {2, 0}, {1, 2}, { -1, 2}, { -2, 0} }; |
| 1118 MV neighbors[4] = {{0, -1}, { -1, 0}, {1, 0}, {0, 1}}; |
| 1119 int i, j; |
| 1120 |
| 1121 unsigned char *what = (*(b->base_src) + b->src); |
| 1122 int what_stride = b->src_stride; |
| 1123 int in_what_stride = d->pre_stride; |
| 1124 int br, bc; |
| 1125 int_mv this_mv; |
| 1126 unsigned int bestsad = 0x7fffffff; |
| 1127 unsigned int thissad; |
| 1128 unsigned char *base_offset; |
| 1129 unsigned char *this_offset; |
| 1130 int k = -1; |
| 1131 int all_in; |
| 1132 int best_site = -1; |
| 1133 |
| 1134 int_mv fcenter_mv; |
| 1135 fcenter_mv.as_mv.row = center_mv->as_mv.row >> 3; |
| 1136 fcenter_mv.as_mv.col = center_mv->as_mv.col >> 3; |
| 1137 |
| 1138 // adjust ref_mv to make sure it is within MV range |
| 1139 clamp_mv(ref_mv, x->mv_col_min, x->mv_col_max, x->mv_row_min, x->mv_row_max); |
| 1140 br = ref_mv->as_mv.row; |
| 1141 bc = ref_mv->as_mv.col; |
| 1142 |
| 1143 // Work out the start point for the search |
| 1144 base_offset = (unsigned char *)(*(d->base_pre) + d->pre); |
| 1145 this_offset = base_offset + (br * (d->pre_stride)) + bc; |
| 1146 this_mv.as_mv.row = br; |
| 1147 this_mv.as_mv.col = bc; |
| 1148 bestsad = vfp->sdf(what, what_stride, this_offset, |
| 1149 in_what_stride, 0x7fffffff) |
| 1150 + mvsad_err_cost(&this_mv, &fcenter_mv, mvjsadcost, mvsadcost, |
| 1151 sad_per_bit); |
| 1152 |
| 1153 // hex search |
| 1154 // j=0 |
| 1155 CHECK_BOUNDS(2) |
| 1156 |
| 1157 if (all_in) { |
| 1158 for (i = 0; i < 6; i++) { |
| 1159 this_mv.as_mv.row = br + hex[i].row; |
| 1160 this_mv.as_mv.col = bc + hex[i].col; |
| 1161 this_offset = base_offset + (this_mv.as_mv.row * in_what_stride) + this_mv
.as_mv.col; |
| 1162 thissad = vfp->sdf(what, what_stride, this_offset, in_what_stride, bestsad
); |
| 1163 CHECK_BETTER |
| 1164 } |
| 1165 } else { |
| 1166 for (i = 0; i < 6; i++) { |
| 1167 this_mv.as_mv.row = br + hex[i].row; |
| 1168 this_mv.as_mv.col = bc + hex[i].col; |
| 1169 CHECK_POINT |
| 1170 this_offset = base_offset + (this_mv.as_mv.row * in_what_stride) + this_mv
.as_mv.col; |
| 1171 thissad = vfp->sdf(what, what_stride, this_offset, in_what_stride, bestsad
); |
| 1172 CHECK_BETTER |
| 1173 } |
| 1174 } |
| 1175 |
| 1176 if (best_site == -1) |
| 1177 goto cal_neighbors; |
| 1178 else { |
| 1179 br += hex[best_site].row; |
| 1180 bc += hex[best_site].col; |
| 1181 k = best_site; |
| 1182 } |
| 1183 |
| 1184 for (j = 1; j < 127; j++) { |
| 1185 best_site = -1; |
| 1186 CHECK_BOUNDS(2) |
| 1187 |
| 1188 if (all_in) { |
| 1189 for (i = 0; i < 3; i++) { |
| 1190 this_mv.as_mv.row = br + next_chkpts[k][i].row; |
| 1191 this_mv.as_mv.col = bc + next_chkpts[k][i].col; |
| 1192 this_offset = base_offset + (this_mv.as_mv.row * (in_what_stride)) + thi
s_mv.as_mv.col; |
| 1193 thissad = vfp->sdf(what, what_stride, this_offset, in_what_stride, bests
ad); |
| 1194 CHECK_BETTER |
| 1195 } |
| 1196 } else { |
| 1197 for (i = 0; i < 3; i++) { |
| 1198 this_mv.as_mv.row = br + next_chkpts[k][i].row; |
| 1199 this_mv.as_mv.col = bc + next_chkpts[k][i].col; |
| 1200 CHECK_POINT |
| 1201 this_offset = base_offset + (this_mv.as_mv.row * (in_what_stride)) + thi
s_mv.as_mv.col; |
| 1202 thissad = vfp->sdf(what, what_stride, this_offset, in_what_stride, bests
ad); |
| 1203 CHECK_BETTER |
| 1204 } |
| 1205 } |
| 1206 |
| 1207 if (best_site == -1) |
| 1208 break; |
| 1209 else { |
| 1210 br += next_chkpts[k][best_site].row; |
| 1211 bc += next_chkpts[k][best_site].col; |
| 1212 k += 5 + best_site; |
| 1213 if (k >= 12) k -= 12; |
| 1214 else if (k >= 6) k -= 6; |
| 1215 } |
| 1216 } |
| 1217 |
| 1218 // check 4 1-away neighbors |
| 1219 cal_neighbors: |
| 1220 for (j = 0; j < 32; j++) { |
| 1221 best_site = -1; |
| 1222 CHECK_BOUNDS(1) |
| 1223 |
| 1224 if (all_in) { |
| 1225 for (i = 0; i < 4; i++) { |
| 1226 this_mv.as_mv.row = br + neighbors[i].row; |
| 1227 this_mv.as_mv.col = bc + neighbors[i].col; |
| 1228 this_offset = base_offset + (this_mv.as_mv.row * (in_what_stride)) + thi
s_mv.as_mv.col; |
| 1229 thissad = vfp->sdf(what, what_stride, this_offset, in_what_stride, bests
ad); |
| 1230 CHECK_BETTER |
| 1231 } |
| 1232 } else { |
| 1233 for (i = 0; i < 4; i++) { |
| 1234 this_mv.as_mv.row = br + neighbors[i].row; |
| 1235 this_mv.as_mv.col = bc + neighbors[i].col; |
| 1236 CHECK_POINT |
| 1237 this_offset = base_offset + (this_mv.as_mv.row * (in_what_stride)) + thi
s_mv.as_mv.col; |
| 1238 thissad = vfp->sdf(what, what_stride, this_offset, in_what_stride, bests
ad); |
| 1239 CHECK_BETTER |
| 1240 } |
| 1241 } |
| 1242 |
| 1243 if (best_site == -1) |
| 1244 break; |
| 1245 else { |
| 1246 br += neighbors[best_site].row; |
| 1247 bc += neighbors[best_site].col; |
| 1248 } |
| 1249 } |
| 1250 |
| 1251 best_mv->as_mv.row = br; |
| 1252 best_mv->as_mv.col = bc; |
| 1253 |
| 1254 return bestsad; |
| 1255 } |
| 1256 #undef CHECK_BOUNDS |
| 1257 #undef CHECK_POINT |
| 1258 #undef CHECK_BETTER |
| 1259 |
| 1260 int vp9_diamond_search_sad_c(MACROBLOCK *x, BLOCK *b, BLOCKD *d, |
| 1261 int_mv *ref_mv, int_mv *best_mv, |
| 1262 int search_param, int sad_per_bit, int *num00, |
| 1263 vp9_variance_fn_ptr_t *fn_ptr, int *mvjcost, |
| 1264 int *mvcost[2], int_mv *center_mv) { |
| 1265 int i, j, step; |
| 1266 |
| 1267 unsigned char *what = (*(b->base_src) + b->src); |
| 1268 int what_stride = b->src_stride; |
| 1269 unsigned char *in_what; |
| 1270 int in_what_stride = d->pre_stride; |
| 1271 unsigned char *best_address; |
| 1272 |
| 1273 int tot_steps; |
| 1274 int_mv this_mv; |
| 1275 |
| 1276 int bestsad = INT_MAX; |
| 1277 int best_site = 0; |
| 1278 int last_site = 0; |
| 1279 |
| 1280 int ref_row, ref_col; |
| 1281 int this_row_offset, this_col_offset; |
| 1282 search_site *ss; |
| 1283 |
| 1284 unsigned char *check_here; |
| 1285 int thissad; |
| 1286 MACROBLOCKD *xd = &x->e_mbd; |
| 1287 int_mv fcenter_mv; |
| 1288 |
| 1289 int *mvjsadcost = x->nmvjointsadcost; |
| 1290 int *mvsadcost[2] = {x->nmvsadcost[0], x->nmvsadcost[1]}; |
| 1291 |
| 1292 fcenter_mv.as_mv.row = center_mv->as_mv.row >> 3; |
| 1293 fcenter_mv.as_mv.col = center_mv->as_mv.col >> 3; |
| 1294 |
| 1295 clamp_mv(ref_mv, x->mv_col_min, x->mv_col_max, x->mv_row_min, x->mv_row_max); |
| 1296 ref_row = ref_mv->as_mv.row; |
| 1297 ref_col = ref_mv->as_mv.col; |
| 1298 *num00 = 0; |
| 1299 best_mv->as_mv.row = ref_row; |
| 1300 best_mv->as_mv.col = ref_col; |
| 1301 |
| 1302 // Work out the start point for the search |
| 1303 in_what = (unsigned char *)(*(d->base_pre) + d->pre + (ref_row * (d->pre_strid
e)) + ref_col); |
| 1304 best_address = in_what; |
| 1305 |
| 1306 // Check the starting position |
| 1307 bestsad = fn_ptr->sdf(what, what_stride, in_what, |
| 1308 in_what_stride, 0x7fffffff) |
| 1309 + mvsad_err_cost(best_mv, &fcenter_mv, mvjsadcost, mvsadcost, |
| 1310 sad_per_bit); |
| 1311 |
| 1312 // search_param determines the length of the initial step and hence the number
of iterations |
| 1313 // 0 = initial step (MAX_FIRST_STEP) pel : 1 = (MAX_FIRST_STEP/2) pel, 2 = (MA
X_FIRST_STEP/4) pel... etc. |
| 1314 ss = &x->ss[search_param * x->searches_per_step]; |
| 1315 tot_steps = (x->ss_count / x->searches_per_step) - search_param; |
| 1316 |
| 1317 i = 1; |
| 1318 |
| 1319 for (step = 0; step < tot_steps; step++) { |
| 1320 for (j = 0; j < x->searches_per_step; j++) { |
| 1321 // Trap illegal vectors |
| 1322 this_row_offset = best_mv->as_mv.row + ss[i].mv.row; |
| 1323 this_col_offset = best_mv->as_mv.col + ss[i].mv.col; |
| 1324 |
| 1325 if ((this_col_offset > x->mv_col_min) && (this_col_offset < x->mv_col_max)
&& |
| 1326 (this_row_offset > x->mv_row_min) && (this_row_offset < x->mv_row_max)
) |
| 1327 |
| 1328 { |
| 1329 check_here = ss[i].offset + best_address; |
| 1330 thissad = fn_ptr->sdf(what, what_stride, check_here, in_what_stride, bes
tsad); |
| 1331 |
| 1332 if (thissad < bestsad) { |
| 1333 this_mv.as_mv.row = this_row_offset; |
| 1334 this_mv.as_mv.col = this_col_offset; |
| 1335 thissad += mvsad_err_cost(&this_mv, &fcenter_mv, |
| 1336 mvjsadcost, mvsadcost, sad_per_bit); |
| 1337 |
| 1338 if (thissad < bestsad) { |
| 1339 bestsad = thissad; |
| 1340 best_site = i; |
| 1341 } |
| 1342 } |
| 1343 } |
| 1344 |
| 1345 i++; |
| 1346 } |
| 1347 |
| 1348 if (best_site != last_site) { |
| 1349 best_mv->as_mv.row += ss[best_site].mv.row; |
| 1350 best_mv->as_mv.col += ss[best_site].mv.col; |
| 1351 best_address += ss[best_site].offset; |
| 1352 last_site = best_site; |
| 1353 } else if (best_address == in_what) |
| 1354 (*num00)++; |
| 1355 } |
| 1356 |
| 1357 this_mv.as_mv.row = best_mv->as_mv.row << 3; |
| 1358 this_mv.as_mv.col = best_mv->as_mv.col << 3; |
| 1359 |
| 1360 if (bestsad == INT_MAX) |
| 1361 return INT_MAX; |
| 1362 |
| 1363 return |
| 1364 fn_ptr->vf(what, what_stride, best_address, in_what_stride, |
| 1365 (unsigned int *)(&thissad)) + |
| 1366 mv_err_cost(&this_mv, center_mv, mvjcost, mvcost, x->errorperbit, |
| 1367 xd->allow_high_precision_mv); |
| 1368 } |
| 1369 |
| 1370 int vp9_diamond_search_sadx4(MACROBLOCK *x, BLOCK *b, BLOCKD *d, |
| 1371 int_mv *ref_mv, int_mv *best_mv, int search_param, |
| 1372 int sad_per_bit, int *num00, |
| 1373 vp9_variance_fn_ptr_t *fn_ptr, |
| 1374 int *mvjcost, int *mvcost[2], int_mv *center_mv) { |
| 1375 int i, j, step; |
| 1376 |
| 1377 unsigned char *what = (*(b->base_src) + b->src); |
| 1378 int what_stride = b->src_stride; |
| 1379 unsigned char *in_what; |
| 1380 int in_what_stride = d->pre_stride; |
| 1381 unsigned char *best_address; |
| 1382 |
| 1383 int tot_steps; |
| 1384 int_mv this_mv; |
| 1385 |
| 1386 unsigned int bestsad = INT_MAX; |
| 1387 int best_site = 0; |
| 1388 int last_site = 0; |
| 1389 |
| 1390 int ref_row; |
| 1391 int ref_col; |
| 1392 int this_row_offset; |
| 1393 int this_col_offset; |
| 1394 search_site *ss; |
| 1395 |
| 1396 unsigned char *check_here; |
| 1397 unsigned int thissad; |
| 1398 MACROBLOCKD *xd = &x->e_mbd; |
| 1399 int_mv fcenter_mv; |
| 1400 |
| 1401 int *mvjsadcost = x->nmvjointsadcost; |
| 1402 int *mvsadcost[2] = {x->nmvsadcost[0], x->nmvsadcost[1]}; |
| 1403 |
| 1404 fcenter_mv.as_mv.row = center_mv->as_mv.row >> 3; |
| 1405 fcenter_mv.as_mv.col = center_mv->as_mv.col >> 3; |
| 1406 |
| 1407 clamp_mv(ref_mv, x->mv_col_min, x->mv_col_max, x->mv_row_min, x->mv_row_max); |
| 1408 ref_row = ref_mv->as_mv.row; |
| 1409 ref_col = ref_mv->as_mv.col; |
| 1410 *num00 = 0; |
| 1411 best_mv->as_mv.row = ref_row; |
| 1412 best_mv->as_mv.col = ref_col; |
| 1413 |
| 1414 // Work out the start point for the search |
| 1415 in_what = (unsigned char *)(*(d->base_pre) + d->pre + (ref_row * (d->pre_strid
e)) + ref_col); |
| 1416 best_address = in_what; |
| 1417 |
| 1418 // Check the starting position |
| 1419 bestsad = fn_ptr->sdf(what, what_stride, |
| 1420 in_what, in_what_stride, 0x7fffffff) |
| 1421 + mvsad_err_cost(best_mv, &fcenter_mv, mvjsadcost, mvsadcost, |
| 1422 sad_per_bit); |
| 1423 |
| 1424 // search_param determines the length of the initial step and hence the number
of iterations |
| 1425 // 0 = initial step (MAX_FIRST_STEP) pel : 1 = (MAX_FIRST_STEP/2) pel, 2 = (MA
X_FIRST_STEP/4) pel... etc. |
| 1426 ss = &x->ss[search_param * x->searches_per_step]; |
| 1427 tot_steps = (x->ss_count / x->searches_per_step) - search_param; |
| 1428 |
| 1429 i = 1; |
| 1430 |
| 1431 for (step = 0; step < tot_steps; step++) { |
| 1432 int all_in = 1, t; |
| 1433 |
| 1434 // To know if all neighbor points are within the bounds, 4 bounds checking a
re enough instead of |
| 1435 // checking 4 bounds for each points. |
| 1436 all_in &= ((best_mv->as_mv.row + ss[i].mv.row) > x->mv_row_min); |
| 1437 all_in &= ((best_mv->as_mv.row + ss[i + 1].mv.row) < x->mv_row_max); |
| 1438 all_in &= ((best_mv->as_mv.col + ss[i + 2].mv.col) > x->mv_col_min); |
| 1439 all_in &= ((best_mv->as_mv.col + ss[i + 3].mv.col) < x->mv_col_max); |
| 1440 |
| 1441 if (all_in) { |
| 1442 unsigned int sad_array[4]; |
| 1443 |
| 1444 for (j = 0; j < x->searches_per_step; j += 4) { |
| 1445 unsigned char const *block_offset[4]; |
| 1446 |
| 1447 for (t = 0; t < 4; t++) |
| 1448 block_offset[t] = ss[i + t].offset + best_address; |
| 1449 |
| 1450 fn_ptr->sdx4df(what, what_stride, block_offset, in_what_stride, |
| 1451 sad_array); |
| 1452 |
| 1453 for (t = 0; t < 4; t++, i++) { |
| 1454 if (sad_array[t] < bestsad) { |
| 1455 this_mv.as_mv.row = best_mv->as_mv.row + ss[i].mv.row; |
| 1456 this_mv.as_mv.col = best_mv->as_mv.col + ss[i].mv.col; |
| 1457 sad_array[t] += mvsad_err_cost(&this_mv, &fcenter_mv, |
| 1458 mvjsadcost, mvsadcost, sad_per_bit); |
| 1459 |
| 1460 if (sad_array[t] < bestsad) { |
| 1461 bestsad = sad_array[t]; |
| 1462 best_site = i; |
| 1463 } |
| 1464 } |
| 1465 } |
| 1466 } |
| 1467 } else { |
| 1468 for (j = 0; j < x->searches_per_step; j++) { |
| 1469 // Trap illegal vectors |
| 1470 this_row_offset = best_mv->as_mv.row + ss[i].mv.row; |
| 1471 this_col_offset = best_mv->as_mv.col + ss[i].mv.col; |
| 1472 |
| 1473 if ((this_col_offset > x->mv_col_min) && (this_col_offset < x->mv_col_ma
x) && |
| 1474 (this_row_offset > x->mv_row_min) && (this_row_offset < x->mv_row_ma
x)) { |
| 1475 check_here = ss[i].offset + best_address; |
| 1476 thissad = fn_ptr->sdf(what, what_stride, check_here, in_what_stride, b
estsad); |
| 1477 |
| 1478 if (thissad < bestsad) { |
| 1479 this_mv.as_mv.row = this_row_offset; |
| 1480 this_mv.as_mv.col = this_col_offset; |
| 1481 thissad += mvsad_err_cost(&this_mv, &fcenter_mv, |
| 1482 mvjsadcost, mvsadcost, sad_per_bit); |
| 1483 |
| 1484 if (thissad < bestsad) { |
| 1485 bestsad = thissad; |
| 1486 best_site = i; |
| 1487 } |
| 1488 } |
| 1489 } |
| 1490 i++; |
| 1491 } |
| 1492 } |
| 1493 |
| 1494 if (best_site != last_site) { |
| 1495 best_mv->as_mv.row += ss[best_site].mv.row; |
| 1496 best_mv->as_mv.col += ss[best_site].mv.col; |
| 1497 best_address += ss[best_site].offset; |
| 1498 last_site = best_site; |
| 1499 } else if (best_address == in_what) |
| 1500 (*num00)++; |
| 1501 } |
| 1502 |
| 1503 this_mv.as_mv.row = best_mv->as_mv.row << 3; |
| 1504 this_mv.as_mv.col = best_mv->as_mv.col << 3; |
| 1505 |
| 1506 if (bestsad == INT_MAX) |
| 1507 return INT_MAX; |
| 1508 |
| 1509 return |
| 1510 fn_ptr->vf(what, what_stride, best_address, in_what_stride, |
| 1511 (unsigned int *)(&thissad)) + |
| 1512 mv_err_cost(&this_mv, center_mv, mvjcost, mvcost, x->errorperbit, |
| 1513 xd->allow_high_precision_mv); |
| 1514 } |
| 1515 |
| 1516 /* do_refine: If last step (1-away) of n-step search doesn't pick the center |
| 1517 point as the best match, we will do a final 1-away diamond |
| 1518 refining search */ |
| 1519 int vp9_full_pixel_diamond(VP9_COMP *cpi, MACROBLOCK *x, BLOCK *b, |
| 1520 BLOCKD *d, int_mv *mvp_full, int step_param, |
| 1521 int sadpb, int further_steps, |
| 1522 int do_refine, vp9_variance_fn_ptr_t *fn_ptr, |
| 1523 int_mv *ref_mv, int_mv *dst_mv) { |
| 1524 int_mv temp_mv; |
| 1525 int thissme, n, num00; |
| 1526 int bestsme = cpi->diamond_search_sad(x, b, d, mvp_full, &temp_mv, |
| 1527 step_param, sadpb, &num00, |
| 1528 fn_ptr, x->nmvjointcost, |
| 1529 x->mvcost, ref_mv); |
| 1530 dst_mv->as_int = temp_mv.as_int; |
| 1531 |
| 1532 n = num00; |
| 1533 num00 = 0; |
| 1534 |
| 1535 /* If there won't be more n-step search, check to see if refining search is ne
eded. */ |
| 1536 if (n > further_steps) |
| 1537 do_refine = 0; |
| 1538 |
| 1539 while (n < further_steps) { |
| 1540 n++; |
| 1541 |
| 1542 if (num00) |
| 1543 num00--; |
| 1544 else { |
| 1545 thissme = cpi->diamond_search_sad(x, b, d, mvp_full, &temp_mv, |
| 1546 step_param + n, sadpb, &num00, |
| 1547 fn_ptr, x->nmvjointcost, x->mvcost, |
| 1548 ref_mv); |
| 1549 |
| 1550 /* check to see if refining search is needed. */ |
| 1551 if (num00 > (further_steps - n)) |
| 1552 do_refine = 0; |
| 1553 |
| 1554 if (thissme < bestsme) { |
| 1555 bestsme = thissme; |
| 1556 dst_mv->as_int = temp_mv.as_int; |
| 1557 } |
| 1558 } |
| 1559 } |
| 1560 |
| 1561 /* final 1-away diamond refining search */ |
| 1562 if (do_refine == 1) { |
| 1563 int search_range = 8; |
| 1564 int_mv best_mv; |
| 1565 best_mv.as_int = dst_mv->as_int; |
| 1566 thissme = cpi->refining_search_sad(x, b, d, &best_mv, sadpb, search_range, |
| 1567 fn_ptr, x->nmvjointcost, x->mvcost, |
| 1568 ref_mv); |
| 1569 |
| 1570 if (thissme < bestsme) { |
| 1571 bestsme = thissme; |
| 1572 dst_mv->as_int = best_mv.as_int; |
| 1573 } |
| 1574 } |
| 1575 return bestsme; |
| 1576 } |
| 1577 |
| 1578 int vp9_full_search_sad_c(MACROBLOCK *x, BLOCK *b, BLOCKD *d, int_mv *ref_mv, |
| 1579 int sad_per_bit, int distance, |
| 1580 vp9_variance_fn_ptr_t *fn_ptr, int *mvjcost, |
| 1581 int *mvcost[2], |
| 1582 int_mv *center_mv) { |
| 1583 unsigned char *what = (*(b->base_src) + b->src); |
| 1584 int what_stride = b->src_stride; |
| 1585 unsigned char *in_what; |
| 1586 int in_what_stride = d->pre_stride; |
| 1587 int mv_stride = d->pre_stride; |
| 1588 unsigned char *bestaddress; |
| 1589 int_mv *best_mv = &d->bmi.as_mv.first; |
| 1590 int_mv this_mv; |
| 1591 int bestsad = INT_MAX; |
| 1592 int r, c; |
| 1593 |
| 1594 unsigned char *check_here; |
| 1595 int thissad; |
| 1596 MACROBLOCKD *xd = &x->e_mbd; |
| 1597 |
| 1598 int ref_row = ref_mv->as_mv.row; |
| 1599 int ref_col = ref_mv->as_mv.col; |
| 1600 |
| 1601 int row_min = ref_row - distance; |
| 1602 int row_max = ref_row + distance; |
| 1603 int col_min = ref_col - distance; |
| 1604 int col_max = ref_col + distance; |
| 1605 int_mv fcenter_mv; |
| 1606 |
| 1607 int *mvjsadcost = x->nmvjointsadcost; |
| 1608 int *mvsadcost[2] = {x->nmvsadcost[0], x->nmvsadcost[1]}; |
| 1609 |
| 1610 fcenter_mv.as_mv.row = center_mv->as_mv.row >> 3; |
| 1611 fcenter_mv.as_mv.col = center_mv->as_mv.col >> 3; |
| 1612 |
| 1613 // Work out the mid point for the search |
| 1614 in_what = *(d->base_pre) + d->pre; |
| 1615 bestaddress = in_what + (ref_row * d->pre_stride) + ref_col; |
| 1616 |
| 1617 best_mv->as_mv.row = ref_row; |
| 1618 best_mv->as_mv.col = ref_col; |
| 1619 |
| 1620 // Baseline value at the centre |
| 1621 bestsad = fn_ptr->sdf(what, what_stride, bestaddress, |
| 1622 in_what_stride, 0x7fffffff) |
| 1623 + mvsad_err_cost(best_mv, &fcenter_mv, mvjsadcost, mvsadcost, |
| 1624 sad_per_bit); |
| 1625 |
| 1626 // Apply further limits to prevent us looking using vectors that stretch beyio
nd the UMV border |
| 1627 if (col_min < x->mv_col_min) |
| 1628 col_min = x->mv_col_min; |
| 1629 |
| 1630 if (col_max > x->mv_col_max) |
| 1631 col_max = x->mv_col_max; |
| 1632 |
| 1633 if (row_min < x->mv_row_min) |
| 1634 row_min = x->mv_row_min; |
| 1635 |
| 1636 if (row_max > x->mv_row_max) |
| 1637 row_max = x->mv_row_max; |
| 1638 |
| 1639 for (r = row_min; r < row_max; r++) { |
| 1640 this_mv.as_mv.row = r; |
| 1641 check_here = r * mv_stride + in_what + col_min; |
| 1642 |
| 1643 for (c = col_min; c < col_max; c++) { |
| 1644 thissad = fn_ptr->sdf(what, what_stride, check_here, in_what_stride, bests
ad); |
| 1645 |
| 1646 this_mv.as_mv.col = c; |
| 1647 thissad += mvsad_err_cost(&this_mv, &fcenter_mv, |
| 1648 mvjsadcost, mvsadcost, sad_per_bit); |
| 1649 |
| 1650 if (thissad < bestsad) { |
| 1651 bestsad = thissad; |
| 1652 best_mv->as_mv.row = r; |
| 1653 best_mv->as_mv.col = c; |
| 1654 bestaddress = check_here; |
| 1655 } |
| 1656 |
| 1657 check_here++; |
| 1658 } |
| 1659 } |
| 1660 |
| 1661 this_mv.as_mv.row = best_mv->as_mv.row << 3; |
| 1662 this_mv.as_mv.col = best_mv->as_mv.col << 3; |
| 1663 |
| 1664 if (bestsad < INT_MAX) |
| 1665 return |
| 1666 fn_ptr->vf(what, what_stride, bestaddress, in_what_stride, |
| 1667 (unsigned int *)(&thissad)) + |
| 1668 mv_err_cost(&this_mv, center_mv, mvjcost, mvcost, x->errorperbit, |
| 1669 xd->allow_high_precision_mv); |
| 1670 else |
| 1671 return INT_MAX; |
| 1672 } |
| 1673 |
| 1674 int vp9_full_search_sadx3(MACROBLOCK *x, BLOCK *b, BLOCKD *d, int_mv *ref_mv, |
| 1675 int sad_per_bit, int distance, |
| 1676 vp9_variance_fn_ptr_t *fn_ptr, int *mvjcost, |
| 1677 int *mvcost[2], int_mv *center_mv) { |
| 1678 unsigned char *what = (*(b->base_src) + b->src); |
| 1679 int what_stride = b->src_stride; |
| 1680 unsigned char *in_what; |
| 1681 int in_what_stride = d->pre_stride; |
| 1682 int mv_stride = d->pre_stride; |
| 1683 unsigned char *bestaddress; |
| 1684 int_mv *best_mv = &d->bmi.as_mv.first; |
| 1685 int_mv this_mv; |
| 1686 unsigned int bestsad = INT_MAX; |
| 1687 int r, c; |
| 1688 |
| 1689 unsigned char *check_here; |
| 1690 unsigned int thissad; |
| 1691 MACROBLOCKD *xd = &x->e_mbd; |
| 1692 |
| 1693 int ref_row = ref_mv->as_mv.row; |
| 1694 int ref_col = ref_mv->as_mv.col; |
| 1695 |
| 1696 int row_min = ref_row - distance; |
| 1697 int row_max = ref_row + distance; |
| 1698 int col_min = ref_col - distance; |
| 1699 int col_max = ref_col + distance; |
| 1700 |
| 1701 unsigned int sad_array[3]; |
| 1702 int_mv fcenter_mv; |
| 1703 |
| 1704 int *mvjsadcost = x->nmvjointsadcost; |
| 1705 int *mvsadcost[2] = {x->nmvsadcost[0], x->nmvsadcost[1]}; |
| 1706 |
| 1707 fcenter_mv.as_mv.row = center_mv->as_mv.row >> 3; |
| 1708 fcenter_mv.as_mv.col = center_mv->as_mv.col >> 3; |
| 1709 |
| 1710 // Work out the mid point for the search |
| 1711 in_what = *(d->base_pre) + d->pre; |
| 1712 bestaddress = in_what + (ref_row * d->pre_stride) + ref_col; |
| 1713 |
| 1714 best_mv->as_mv.row = ref_row; |
| 1715 best_mv->as_mv.col = ref_col; |
| 1716 |
| 1717 // Baseline value at the centre |
| 1718 bestsad = fn_ptr->sdf(what, what_stride, |
| 1719 bestaddress, in_what_stride, 0x7fffffff) |
| 1720 + mvsad_err_cost(best_mv, &fcenter_mv, mvjsadcost, mvsadcost, |
| 1721 sad_per_bit); |
| 1722 |
| 1723 // Apply further limits to prevent us looking using vectors that stretch beyio
nd the UMV border |
| 1724 if (col_min < x->mv_col_min) |
| 1725 col_min = x->mv_col_min; |
| 1726 |
| 1727 if (col_max > x->mv_col_max) |
| 1728 col_max = x->mv_col_max; |
| 1729 |
| 1730 if (row_min < x->mv_row_min) |
| 1731 row_min = x->mv_row_min; |
| 1732 |
| 1733 if (row_max > x->mv_row_max) |
| 1734 row_max = x->mv_row_max; |
| 1735 |
| 1736 for (r = row_min; r < row_max; r++) { |
| 1737 this_mv.as_mv.row = r; |
| 1738 check_here = r * mv_stride + in_what + col_min; |
| 1739 c = col_min; |
| 1740 |
| 1741 while ((c + 2) < col_max) { |
| 1742 int i; |
| 1743 |
| 1744 fn_ptr->sdx3f(what, what_stride, check_here, in_what_stride, sad_array); |
| 1745 |
| 1746 for (i = 0; i < 3; i++) { |
| 1747 thissad = sad_array[i]; |
| 1748 |
| 1749 if (thissad < bestsad) { |
| 1750 this_mv.as_mv.col = c; |
| 1751 thissad += mvsad_err_cost(&this_mv, &fcenter_mv, |
| 1752 mvjsadcost, mvsadcost, sad_per_bit); |
| 1753 |
| 1754 if (thissad < bestsad) { |
| 1755 bestsad = thissad; |
| 1756 best_mv->as_mv.row = r; |
| 1757 best_mv->as_mv.col = c; |
| 1758 bestaddress = check_here; |
| 1759 } |
| 1760 } |
| 1761 |
| 1762 check_here++; |
| 1763 c++; |
| 1764 } |
| 1765 } |
| 1766 |
| 1767 while (c < col_max) { |
| 1768 thissad = fn_ptr->sdf(what, what_stride, check_here, in_what_stride, bests
ad); |
| 1769 |
| 1770 if (thissad < bestsad) { |
| 1771 this_mv.as_mv.col = c; |
| 1772 thissad += mvsad_err_cost(&this_mv, &fcenter_mv, |
| 1773 mvjsadcost, mvsadcost, sad_per_bit); |
| 1774 |
| 1775 if (thissad < bestsad) { |
| 1776 bestsad = thissad; |
| 1777 best_mv->as_mv.row = r; |
| 1778 best_mv->as_mv.col = c; |
| 1779 bestaddress = check_here; |
| 1780 } |
| 1781 } |
| 1782 |
| 1783 check_here++; |
| 1784 c++; |
| 1785 } |
| 1786 |
| 1787 } |
| 1788 |
| 1789 this_mv.as_mv.row = best_mv->as_mv.row << 3; |
| 1790 this_mv.as_mv.col = best_mv->as_mv.col << 3; |
| 1791 |
| 1792 if (bestsad < INT_MAX) |
| 1793 return |
| 1794 fn_ptr->vf(what, what_stride, bestaddress, in_what_stride, |
| 1795 (unsigned int *)(&thissad)) + |
| 1796 mv_err_cost(&this_mv, center_mv, mvjcost, mvcost, x->errorperbit, |
| 1797 xd->allow_high_precision_mv); |
| 1798 else |
| 1799 return INT_MAX; |
| 1800 } |
| 1801 |
| 1802 int vp9_full_search_sadx8(MACROBLOCK *x, BLOCK *b, BLOCKD *d, int_mv *ref_mv, |
| 1803 int sad_per_bit, int distance, |
| 1804 vp9_variance_fn_ptr_t *fn_ptr, |
| 1805 int *mvjcost, int *mvcost[2], |
| 1806 int_mv *center_mv) { |
| 1807 unsigned char *what = (*(b->base_src) + b->src); |
| 1808 int what_stride = b->src_stride; |
| 1809 unsigned char *in_what; |
| 1810 int in_what_stride = d->pre_stride; |
| 1811 int mv_stride = d->pre_stride; |
| 1812 unsigned char *bestaddress; |
| 1813 int_mv *best_mv = &d->bmi.as_mv.first; |
| 1814 int_mv this_mv; |
| 1815 unsigned int bestsad = INT_MAX; |
| 1816 int r, c; |
| 1817 |
| 1818 unsigned char *check_here; |
| 1819 unsigned int thissad; |
| 1820 MACROBLOCKD *xd = &x->e_mbd; |
| 1821 |
| 1822 int ref_row = ref_mv->as_mv.row; |
| 1823 int ref_col = ref_mv->as_mv.col; |
| 1824 |
| 1825 int row_min = ref_row - distance; |
| 1826 int row_max = ref_row + distance; |
| 1827 int col_min = ref_col - distance; |
| 1828 int col_max = ref_col + distance; |
| 1829 |
| 1830 DECLARE_ALIGNED_ARRAY(16, unsigned short, sad_array8, 8); |
| 1831 unsigned int sad_array[3]; |
| 1832 int_mv fcenter_mv; |
| 1833 |
| 1834 int *mvjsadcost = x->nmvjointsadcost; |
| 1835 int *mvsadcost[2] = {x->nmvsadcost[0], x->nmvsadcost[1]}; |
| 1836 |
| 1837 fcenter_mv.as_mv.row = center_mv->as_mv.row >> 3; |
| 1838 fcenter_mv.as_mv.col = center_mv->as_mv.col >> 3; |
| 1839 |
| 1840 // Work out the mid point for the search |
| 1841 in_what = *(d->base_pre) + d->pre; |
| 1842 bestaddress = in_what + (ref_row * d->pre_stride) + ref_col; |
| 1843 |
| 1844 best_mv->as_mv.row = ref_row; |
| 1845 best_mv->as_mv.col = ref_col; |
| 1846 |
| 1847 // Baseline value at the centre |
| 1848 bestsad = fn_ptr->sdf(what, what_stride, |
| 1849 bestaddress, in_what_stride, 0x7fffffff) |
| 1850 + mvsad_err_cost(best_mv, &fcenter_mv, mvjsadcost, mvsadcost, |
| 1851 sad_per_bit); |
| 1852 |
| 1853 // Apply further limits to prevent us looking using vectors that stretch beyio
nd the UMV border |
| 1854 if (col_min < x->mv_col_min) |
| 1855 col_min = x->mv_col_min; |
| 1856 |
| 1857 if (col_max > x->mv_col_max) |
| 1858 col_max = x->mv_col_max; |
| 1859 |
| 1860 if (row_min < x->mv_row_min) |
| 1861 row_min = x->mv_row_min; |
| 1862 |
| 1863 if (row_max > x->mv_row_max) |
| 1864 row_max = x->mv_row_max; |
| 1865 |
| 1866 for (r = row_min; r < row_max; r++) { |
| 1867 this_mv.as_mv.row = r; |
| 1868 check_here = r * mv_stride + in_what + col_min; |
| 1869 c = col_min; |
| 1870 |
| 1871 while ((c + 7) < col_max) { |
| 1872 int i; |
| 1873 |
| 1874 fn_ptr->sdx8f(what, what_stride, check_here, in_what_stride, sad_array8); |
| 1875 |
| 1876 for (i = 0; i < 8; i++) { |
| 1877 thissad = (unsigned int)sad_array8[i]; |
| 1878 |
| 1879 if (thissad < bestsad) { |
| 1880 this_mv.as_mv.col = c; |
| 1881 thissad += mvsad_err_cost(&this_mv, &fcenter_mv, |
| 1882 mvjsadcost, mvsadcost, sad_per_bit); |
| 1883 |
| 1884 if (thissad < bestsad) { |
| 1885 bestsad = thissad; |
| 1886 best_mv->as_mv.row = r; |
| 1887 best_mv->as_mv.col = c; |
| 1888 bestaddress = check_here; |
| 1889 } |
| 1890 } |
| 1891 |
| 1892 check_here++; |
| 1893 c++; |
| 1894 } |
| 1895 } |
| 1896 |
| 1897 while ((c + 2) < col_max) { |
| 1898 int i; |
| 1899 |
| 1900 fn_ptr->sdx3f(what, what_stride, check_here, in_what_stride, sad_array); |
| 1901 |
| 1902 for (i = 0; i < 3; i++) { |
| 1903 thissad = sad_array[i]; |
| 1904 |
| 1905 if (thissad < bestsad) { |
| 1906 this_mv.as_mv.col = c; |
| 1907 thissad += mvsad_err_cost(&this_mv, &fcenter_mv, |
| 1908 mvjsadcost, mvsadcost, sad_per_bit); |
| 1909 |
| 1910 if (thissad < bestsad) { |
| 1911 bestsad = thissad; |
| 1912 best_mv->as_mv.row = r; |
| 1913 best_mv->as_mv.col = c; |
| 1914 bestaddress = check_here; |
| 1915 } |
| 1916 } |
| 1917 |
| 1918 check_here++; |
| 1919 c++; |
| 1920 } |
| 1921 } |
| 1922 |
| 1923 while (c < col_max) { |
| 1924 thissad = fn_ptr->sdf(what, what_stride, check_here, in_what_stride, bests
ad); |
| 1925 |
| 1926 if (thissad < bestsad) { |
| 1927 this_mv.as_mv.col = c; |
| 1928 thissad += mvsad_err_cost(&this_mv, &fcenter_mv, |
| 1929 mvjsadcost, mvsadcost, sad_per_bit); |
| 1930 |
| 1931 if (thissad < bestsad) { |
| 1932 bestsad = thissad; |
| 1933 best_mv->as_mv.row = r; |
| 1934 best_mv->as_mv.col = c; |
| 1935 bestaddress = check_here; |
| 1936 } |
| 1937 } |
| 1938 |
| 1939 check_here++; |
| 1940 c++; |
| 1941 } |
| 1942 } |
| 1943 |
| 1944 this_mv.as_mv.row = best_mv->as_mv.row << 3; |
| 1945 this_mv.as_mv.col = best_mv->as_mv.col << 3; |
| 1946 |
| 1947 if (bestsad < INT_MAX) |
| 1948 return |
| 1949 fn_ptr->vf(what, what_stride, bestaddress, in_what_stride, |
| 1950 (unsigned int *)(&thissad)) + |
| 1951 mv_err_cost(&this_mv, center_mv, mvjcost, mvcost, x->errorperbit, |
| 1952 xd->allow_high_precision_mv); |
| 1953 else |
| 1954 return INT_MAX; |
| 1955 } |
| 1956 int vp9_refining_search_sad_c(MACROBLOCK *x, BLOCK *b, BLOCKD *d, |
| 1957 int_mv *ref_mv, int error_per_bit, |
| 1958 int search_range, vp9_variance_fn_ptr_t *fn_ptr, |
| 1959 int *mvjcost, int *mvcost[2], int_mv *center_mv) { |
| 1960 MV neighbors[4] = {{ -1, 0}, {0, -1}, {0, 1}, {1, 0}}; |
| 1961 int i, j; |
| 1962 short this_row_offset, this_col_offset; |
| 1963 |
| 1964 int what_stride = b->src_stride; |
| 1965 int in_what_stride = d->pre_stride; |
| 1966 unsigned char *what = (*(b->base_src) + b->src); |
| 1967 unsigned char *best_address = (unsigned char *)(*(d->base_pre) + d->pre + |
| 1968 (ref_mv->as_mv.row * (d->pre_s
tride)) + ref_mv->as_mv.col); |
| 1969 unsigned char *check_here; |
| 1970 unsigned int thissad; |
| 1971 int_mv this_mv; |
| 1972 unsigned int bestsad = INT_MAX; |
| 1973 MACROBLOCKD *xd = &x->e_mbd; |
| 1974 int_mv fcenter_mv; |
| 1975 |
| 1976 int *mvjsadcost = x->nmvjointsadcost; |
| 1977 int *mvsadcost[2] = {x->nmvsadcost[0], x->nmvsadcost[1]}; |
| 1978 |
| 1979 fcenter_mv.as_mv.row = center_mv->as_mv.row >> 3; |
| 1980 fcenter_mv.as_mv.col = center_mv->as_mv.col >> 3; |
| 1981 |
| 1982 bestsad = fn_ptr->sdf(what, what_stride, best_address, in_what_stride, 0x7ffff
fff) + |
| 1983 mvsad_err_cost(ref_mv, &fcenter_mv, mvjsadcost, mvsadcost, error_per_bit); |
| 1984 |
| 1985 for (i = 0; i < search_range; i++) { |
| 1986 int best_site = -1; |
| 1987 |
| 1988 for (j = 0; j < 4; j++) { |
| 1989 this_row_offset = ref_mv->as_mv.row + neighbors[j].row; |
| 1990 this_col_offset = ref_mv->as_mv.col + neighbors[j].col; |
| 1991 |
| 1992 if ((this_col_offset > x->mv_col_min) && (this_col_offset < x->mv_col_max)
&& |
| 1993 (this_row_offset > x->mv_row_min) && (this_row_offset < x->mv_row_max)
) { |
| 1994 check_here = (neighbors[j].row) * in_what_stride + neighbors[j].col + be
st_address; |
| 1995 thissad = fn_ptr->sdf(what, what_stride, check_here, in_what_stride, bes
tsad); |
| 1996 |
| 1997 if (thissad < bestsad) { |
| 1998 this_mv.as_mv.row = this_row_offset; |
| 1999 this_mv.as_mv.col = this_col_offset; |
| 2000 thissad += mvsad_err_cost(&this_mv, &fcenter_mv, mvjsadcost, |
| 2001 mvsadcost, error_per_bit); |
| 2002 |
| 2003 if (thissad < bestsad) { |
| 2004 bestsad = thissad; |
| 2005 best_site = j; |
| 2006 } |
| 2007 } |
| 2008 } |
| 2009 } |
| 2010 |
| 2011 if (best_site == -1) |
| 2012 break; |
| 2013 else { |
| 2014 ref_mv->as_mv.row += neighbors[best_site].row; |
| 2015 ref_mv->as_mv.col += neighbors[best_site].col; |
| 2016 best_address += (neighbors[best_site].row) * in_what_stride + neighbors[be
st_site].col; |
| 2017 } |
| 2018 } |
| 2019 |
| 2020 this_mv.as_mv.row = ref_mv->as_mv.row << 3; |
| 2021 this_mv.as_mv.col = ref_mv->as_mv.col << 3; |
| 2022 |
| 2023 if (bestsad < INT_MAX) |
| 2024 return |
| 2025 fn_ptr->vf(what, what_stride, best_address, in_what_stride, |
| 2026 (unsigned int *)(&thissad)) + |
| 2027 mv_err_cost(&this_mv, center_mv, mvjcost, mvcost, x->errorperbit, |
| 2028 xd->allow_high_precision_mv); |
| 2029 else |
| 2030 return INT_MAX; |
| 2031 } |
| 2032 |
| 2033 int vp9_refining_search_sadx4(MACROBLOCK *x, BLOCK *b, BLOCKD *d, |
| 2034 int_mv *ref_mv, int error_per_bit, |
| 2035 int search_range, vp9_variance_fn_ptr_t *fn_ptr, |
| 2036 int *mvjcost, int *mvcost[2], int_mv *center_mv) { |
| 2037 MV neighbors[4] = {{ -1, 0}, {0, -1}, {0, 1}, {1, 0}}; |
| 2038 int i, j; |
| 2039 short this_row_offset, this_col_offset; |
| 2040 |
| 2041 int what_stride = b->src_stride; |
| 2042 int in_what_stride = d->pre_stride; |
| 2043 unsigned char *what = (*(b->base_src) + b->src); |
| 2044 unsigned char *best_address = (unsigned char *)(*(d->base_pre) + d->pre + |
| 2045 (ref_mv->as_mv.row * (d->pre_s
tride)) + ref_mv->as_mv.col); |
| 2046 unsigned char *check_here; |
| 2047 unsigned int thissad; |
| 2048 int_mv this_mv; |
| 2049 unsigned int bestsad = INT_MAX; |
| 2050 MACROBLOCKD *xd = &x->e_mbd; |
| 2051 int_mv fcenter_mv; |
| 2052 |
| 2053 int *mvjsadcost = x->nmvjointsadcost; |
| 2054 int *mvsadcost[2] = {x->nmvsadcost[0], x->nmvsadcost[1]}; |
| 2055 |
| 2056 fcenter_mv.as_mv.row = center_mv->as_mv.row >> 3; |
| 2057 fcenter_mv.as_mv.col = center_mv->as_mv.col >> 3; |
| 2058 |
| 2059 bestsad = fn_ptr->sdf(what, what_stride, best_address, in_what_stride, 0x7ffff
fff) + |
| 2060 mvsad_err_cost(ref_mv, &fcenter_mv, mvjsadcost, mvsadcost, error_per_bit); |
| 2061 |
| 2062 for (i = 0; i < search_range; i++) { |
| 2063 int best_site = -1; |
| 2064 int all_in = 1; |
| 2065 |
| 2066 all_in &= ((ref_mv->as_mv.row - 1) > x->mv_row_min); |
| 2067 all_in &= ((ref_mv->as_mv.row + 1) < x->mv_row_max); |
| 2068 all_in &= ((ref_mv->as_mv.col - 1) > x->mv_col_min); |
| 2069 all_in &= ((ref_mv->as_mv.col + 1) < x->mv_col_max); |
| 2070 |
| 2071 if (all_in) { |
| 2072 unsigned int sad_array[4]; |
| 2073 unsigned char const *block_offset[4]; |
| 2074 block_offset[0] = best_address - in_what_stride; |
| 2075 block_offset[1] = best_address - 1; |
| 2076 block_offset[2] = best_address + 1; |
| 2077 block_offset[3] = best_address + in_what_stride; |
| 2078 |
| 2079 fn_ptr->sdx4df(what, what_stride, block_offset, in_what_stride, sad_array)
; |
| 2080 |
| 2081 for (j = 0; j < 4; j++) { |
| 2082 if (sad_array[j] < bestsad) { |
| 2083 this_mv.as_mv.row = ref_mv->as_mv.row + neighbors[j].row; |
| 2084 this_mv.as_mv.col = ref_mv->as_mv.col + neighbors[j].col; |
| 2085 sad_array[j] += mvsad_err_cost(&this_mv, &fcenter_mv, mvjsadcost, |
| 2086 mvsadcost, error_per_bit); |
| 2087 |
| 2088 if (sad_array[j] < bestsad) { |
| 2089 bestsad = sad_array[j]; |
| 2090 best_site = j; |
| 2091 } |
| 2092 } |
| 2093 } |
| 2094 } else { |
| 2095 for (j = 0; j < 4; j++) { |
| 2096 this_row_offset = ref_mv->as_mv.row + neighbors[j].row; |
| 2097 this_col_offset = ref_mv->as_mv.col + neighbors[j].col; |
| 2098 |
| 2099 if ((this_col_offset > x->mv_col_min) && (this_col_offset < x->mv_col_ma
x) && |
| 2100 (this_row_offset > x->mv_row_min) && (this_row_offset < x->mv_row_ma
x)) { |
| 2101 check_here = (neighbors[j].row) * in_what_stride + neighbors[j].col +
best_address; |
| 2102 thissad = fn_ptr->sdf(what, what_stride, check_here, in_what_stride, b
estsad); |
| 2103 |
| 2104 if (thissad < bestsad) { |
| 2105 this_mv.as_mv.row = this_row_offset; |
| 2106 this_mv.as_mv.col = this_col_offset; |
| 2107 thissad += mvsad_err_cost(&this_mv, &fcenter_mv, mvjsadcost, |
| 2108 mvsadcost, error_per_bit); |
| 2109 |
| 2110 if (thissad < bestsad) { |
| 2111 bestsad = thissad; |
| 2112 best_site = j; |
| 2113 } |
| 2114 } |
| 2115 } |
| 2116 } |
| 2117 } |
| 2118 |
| 2119 if (best_site == -1) |
| 2120 break; |
| 2121 else { |
| 2122 ref_mv->as_mv.row += neighbors[best_site].row; |
| 2123 ref_mv->as_mv.col += neighbors[best_site].col; |
| 2124 best_address += (neighbors[best_site].row) * in_what_stride + neighbors[be
st_site].col; |
| 2125 } |
| 2126 } |
| 2127 |
| 2128 this_mv.as_mv.row = ref_mv->as_mv.row << 3; |
| 2129 this_mv.as_mv.col = ref_mv->as_mv.col << 3; |
| 2130 |
| 2131 if (bestsad < INT_MAX) |
| 2132 return |
| 2133 fn_ptr->vf(what, what_stride, best_address, in_what_stride, |
| 2134 (unsigned int *)(&thissad)) + |
| 2135 mv_err_cost(&this_mv, center_mv, mvjcost, mvcost, x->errorperbit, |
| 2136 xd->allow_high_precision_mv); |
| 2137 else |
| 2138 return INT_MAX; |
| 2139 } |
| 2140 |
| 2141 |
| 2142 |
| 2143 #ifdef ENTROPY_STATS |
| 2144 void print_mode_context(void) { |
| 2145 FILE *f = fopen("vp9_modecont.c", "a"); |
| 2146 int i, j; |
| 2147 |
| 2148 fprintf(f, "#include \"vp9_entropy.h\"\n"); |
| 2149 fprintf(f, "const int vp9_mode_contexts[6][4] ="); |
| 2150 fprintf(f, "{\n"); |
| 2151 for (j = 0; j < 6; j++) { |
| 2152 fprintf(f, " {/* %d */ ", j); |
| 2153 fprintf(f, " "); |
| 2154 for (i = 0; i < 4; i++) { |
| 2155 int this_prob; |
| 2156 int count; |
| 2157 |
| 2158 // context probs |
| 2159 count = mv_ref_ct[j][i][0] + mv_ref_ct[j][i][1]; |
| 2160 if (count) |
| 2161 this_prob = 256 * mv_ref_ct[j][i][0] / count; |
| 2162 else |
| 2163 this_prob = 128; |
| 2164 |
| 2165 if (this_prob == 0) |
| 2166 this_prob = 1; |
| 2167 fprintf(f, "%5d, ", this_prob); |
| 2168 } |
| 2169 fprintf(f, " },\n"); |
| 2170 } |
| 2171 |
| 2172 fprintf(f, "};\n"); |
| 2173 fclose(f); |
| 2174 } |
| 2175 |
| 2176 /* MV ref count ENTROPY_STATS stats code */ |
| 2177 void init_mv_ref_counts() { |
| 2178 vpx_memset(mv_ref_ct, 0, sizeof(mv_ref_ct)); |
| 2179 vpx_memset(mv_mode_cts, 0, sizeof(mv_mode_cts)); |
| 2180 } |
| 2181 |
| 2182 void accum_mv_refs(MB_PREDICTION_MODE m, const int ct[4]) { |
| 2183 if (m == ZEROMV) { |
| 2184 ++mv_ref_ct [ct[0]] [0] [0]; |
| 2185 ++mv_mode_cts[0][0]; |
| 2186 } else { |
| 2187 ++mv_ref_ct [ct[0]] [0] [1]; |
| 2188 ++mv_mode_cts[0][1]; |
| 2189 |
| 2190 if (m == NEARESTMV) { |
| 2191 ++mv_ref_ct [ct[1]] [1] [0]; |
| 2192 ++mv_mode_cts[1][0]; |
| 2193 } else { |
| 2194 ++mv_ref_ct [ct[1]] [1] [1]; |
| 2195 ++mv_mode_cts[1][1]; |
| 2196 |
| 2197 if (m == NEARMV) { |
| 2198 ++mv_ref_ct [ct[2]] [2] [0]; |
| 2199 ++mv_mode_cts[2][0]; |
| 2200 } else { |
| 2201 ++mv_ref_ct [ct[2]] [2] [1]; |
| 2202 ++mv_mode_cts[2][1]; |
| 2203 |
| 2204 if (m == NEWMV) { |
| 2205 ++mv_ref_ct [ct[3]] [3] [0]; |
| 2206 ++mv_mode_cts[3][0]; |
| 2207 } else { |
| 2208 ++mv_ref_ct [ct[3]] [3] [1]; |
| 2209 ++mv_mode_cts[3][1]; |
| 2210 } |
| 2211 } |
| 2212 } |
| 2213 } |
| 2214 } |
| 2215 |
| 2216 #endif/* END MV ref count ENTROPY_STATS stats code */ |
OLD | NEW |