OLD | NEW |
1 /* | 1 /* |
2 * Copyright (c) 2010 The WebM project authors. All Rights Reserved. | 2 * Copyright (c) 2010 The WebM project authors. All Rights Reserved. |
3 * | 3 * |
4 * Use of this source code is governed by a BSD-style license | 4 * Use of this source code is governed by a BSD-style license |
5 * that can be found in the LICENSE file in the root of the source | 5 * that can be found in the LICENSE file in the root of the source |
6 * tree. An additional intellectual property rights grant can be found | 6 * tree. An additional intellectual property rights grant can be found |
7 * in the file PATENTS. All contributing project authors may | 7 * in the file PATENTS. All contributing project authors may |
8 * be found in the AUTHORS file in the root of the source tree. | 8 * be found in the AUTHORS file in the root of the source tree. |
9 */ | 9 */ |
10 | 10 |
11 #include <limits.h> | 11 #include <limits.h> |
12 #include <math.h> | 12 #include <math.h> |
13 #include <stdio.h> | 13 #include <stdio.h> |
14 | 14 |
15 #include "./vpx_config.h" | 15 #include "./vpx_config.h" |
16 | 16 |
17 #include "vpx_mem/vpx_mem.h" | 17 #include "vpx_mem/vpx_mem.h" |
18 | 18 |
19 #include "vp9/common/vp9_common.h" | 19 #include "vp9/common/vp9_common.h" |
20 | 20 |
21 #include "vp9/encoder/vp9_onyx_int.h" | 21 #include "vp9/encoder/vp9_onyx_int.h" |
22 #include "vp9/encoder/vp9_mcomp.h" | 22 #include "vp9/encoder/vp9_mcomp.h" |
23 | 23 |
24 // #define NEW_DIAMOND_SEARCH | 24 // #define NEW_DIAMOND_SEARCH |
25 | 25 |
| 26 static INLINE const uint8_t *get_buf_from_mv(const struct buf_2d *buf, |
| 27 const MV *mv) { |
| 28 return &buf->buf[mv->row * buf->stride + mv->col]; |
| 29 } |
| 30 |
26 void vp9_set_mv_search_range(MACROBLOCK *x, const MV *mv) { | 31 void vp9_set_mv_search_range(MACROBLOCK *x, const MV *mv) { |
27 int col_min = (mv->col >> 3) - MAX_FULL_PEL_VAL + (mv->col & 7 ? 1 : 0); | 32 int col_min = (mv->col >> 3) - MAX_FULL_PEL_VAL + (mv->col & 7 ? 1 : 0); |
28 int row_min = (mv->row >> 3) - MAX_FULL_PEL_VAL + (mv->row & 7 ? 1 : 0); | 33 int row_min = (mv->row >> 3) - MAX_FULL_PEL_VAL + (mv->row & 7 ? 1 : 0); |
29 int col_max = (mv->col >> 3) + MAX_FULL_PEL_VAL; | 34 int col_max = (mv->col >> 3) + MAX_FULL_PEL_VAL; |
30 int row_max = (mv->row >> 3) + MAX_FULL_PEL_VAL; | 35 int row_max = (mv->row >> 3) + MAX_FULL_PEL_VAL; |
31 | 36 |
32 col_min = MAX(col_min, (MV_LOW >> 3) + 1); | 37 col_min = MAX(col_min, (MV_LOW >> 3) + 1); |
33 row_min = MAX(row_min, (MV_LOW >> 3) + 1); | 38 row_min = MAX(row_min, (MV_LOW >> 3) + 1); |
34 col_max = MIN(col_max, (MV_UPP >> 3) - 1); | 39 col_max = MIN(col_max, (MV_UPP >> 3) - 1); |
35 row_max = MIN(row_max, (MV_UPP >> 3) - 1); | 40 row_max = MIN(row_max, (MV_UPP >> 3) - 1); |
(...skipping 55 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
91 if (mvsadcost) { | 96 if (mvsadcost) { |
92 const MV diff = { mv->row - ref->row, | 97 const MV diff = { mv->row - ref->row, |
93 mv->col - ref->col }; | 98 mv->col - ref->col }; |
94 return ROUND_POWER_OF_TWO(mv_cost(&diff, mvjsadcost, mvsadcost) * | 99 return ROUND_POWER_OF_TWO(mv_cost(&diff, mvjsadcost, mvsadcost) * |
95 error_per_bit, 8); | 100 error_per_bit, 8); |
96 } | 101 } |
97 return 0; | 102 return 0; |
98 } | 103 } |
99 | 104 |
100 void vp9_init_dsmotion_compensation(MACROBLOCK *x, int stride) { | 105 void vp9_init_dsmotion_compensation(MACROBLOCK *x, int stride) { |
101 int len; | 106 int len, ss_count = 1; |
102 int search_site_count = 0; | |
103 | 107 |
104 // Generate offsets for 4 search sites per step. | 108 x->ss[0].mv.col = x->ss[0].mv.row = 0; |
105 x->ss[search_site_count].mv.col = 0; | 109 x->ss[0].offset = 0; |
106 x->ss[search_site_count].mv.row = 0; | |
107 x->ss[search_site_count].offset = 0; | |
108 search_site_count++; | |
109 | 110 |
110 for (len = MAX_FIRST_STEP; len > 0; len /= 2) { | 111 for (len = MAX_FIRST_STEP; len > 0; len /= 2) { |
111 // Compute offsets for search sites. | 112 // Generate offsets for 4 search sites per step. |
112 x->ss[search_site_count].mv.col = 0; | 113 const MV ss_mvs[] = {{-len, 0}, {len, 0}, {0, -len}, {0, len}}; |
113 x->ss[search_site_count].mv.row = -len; | 114 int i; |
114 x->ss[search_site_count].offset = -len * stride; | 115 for (i = 0; i < 4; ++i) { |
115 search_site_count++; | 116 search_site *const ss = &x->ss[ss_count++]; |
116 | 117 ss->mv = ss_mvs[i]; |
117 // Compute offsets for search sites. | 118 ss->offset = ss->mv.row * stride + ss->mv.col; |
118 x->ss[search_site_count].mv.col = 0; | 119 } |
119 x->ss[search_site_count].mv.row = len; | |
120 x->ss[search_site_count].offset = len * stride; | |
121 search_site_count++; | |
122 | |
123 // Compute offsets for search sites. | |
124 x->ss[search_site_count].mv.col = -len; | |
125 x->ss[search_site_count].mv.row = 0; | |
126 x->ss[search_site_count].offset = -len; | |
127 search_site_count++; | |
128 | |
129 // Compute offsets for search sites. | |
130 x->ss[search_site_count].mv.col = len; | |
131 x->ss[search_site_count].mv.row = 0; | |
132 x->ss[search_site_count].offset = len; | |
133 search_site_count++; | |
134 } | 120 } |
135 | 121 |
136 x->ss_count = search_site_count; | 122 x->ss_count = ss_count; |
137 x->searches_per_step = 4; | 123 x->searches_per_step = 4; |
138 } | 124 } |
139 | 125 |
140 void vp9_init3smotion_compensation(MACROBLOCK *x, int stride) { | 126 void vp9_init3smotion_compensation(MACROBLOCK *x, int stride) { |
141 int len, ss_count = 1; | 127 int len, ss_count = 1; |
142 | 128 |
143 x->ss[0].mv.col = x->ss[0].mv.row = 0; | 129 x->ss[0].mv.col = x->ss[0].mv.row = 0; |
144 x->ss[0].offset = 0; | 130 x->ss[0].offset = 0; |
145 | 131 |
146 for (len = MAX_FIRST_STEP; len > 0; len /= 2) { | 132 for (len = MAX_FIRST_STEP; len > 0; len /= 2) { |
(...skipping 235 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
382 unsigned int *sse1, | 368 unsigned int *sse1, |
383 const uint8_t *second_pred, | 369 const uint8_t *second_pred, |
384 int w, int h) { | 370 int w, int h) { |
385 const uint8_t *z = x->plane[0].src.buf; | 371 const uint8_t *z = x->plane[0].src.buf; |
386 const int src_stride = x->plane[0].src.stride; | 372 const int src_stride = x->plane[0].src.stride; |
387 const MACROBLOCKD *xd = &x->e_mbd; | 373 const MACROBLOCKD *xd = &x->e_mbd; |
388 unsigned int besterr = INT_MAX; | 374 unsigned int besterr = INT_MAX; |
389 unsigned int sse; | 375 unsigned int sse; |
390 unsigned int whichdir; | 376 unsigned int whichdir; |
391 int thismse; | 377 int thismse; |
392 unsigned int halfiters = iters_per_step; | 378 const unsigned int halfiters = iters_per_step; |
393 unsigned int quarteriters = iters_per_step; | 379 const unsigned int quarteriters = iters_per_step; |
394 unsigned int eighthiters = iters_per_step; | 380 const unsigned int eighthiters = iters_per_step; |
395 | 381 |
396 DECLARE_ALIGNED_ARRAY(16, uint8_t, comp_pred, 64 * 64); | 382 DECLARE_ALIGNED_ARRAY(16, uint8_t, comp_pred, 64 * 64); |
397 const int y_stride = xd->plane[0].pre[0].stride; | 383 const int y_stride = xd->plane[0].pre[0].stride; |
398 const int offset = bestmv->row * y_stride + bestmv->col; | 384 const int offset = bestmv->row * y_stride + bestmv->col; |
399 const uint8_t *y = xd->plane[0].pre[0].buf + offset; | 385 const uint8_t *y = xd->plane[0].pre[0].buf + offset; |
400 | 386 |
401 int rr = ref_mv->row; | 387 int rr = ref_mv->row; |
402 int rc = ref_mv->col; | 388 int rc = ref_mv->col; |
403 int br = bestmv->row * 8; | 389 int br = bestmv->row * 8; |
404 int bc = bestmv->col * 8; | 390 int bc = bestmv->col * 8; |
405 int hstep = 4; | 391 int hstep = 4; |
406 const int minc = MAX(x->mv_col_min * 8, ref_mv->col - MV_MAX); | 392 const int minc = MAX(x->mv_col_min * 8, ref_mv->col - MV_MAX); |
407 const int maxc = MIN(x->mv_col_max * 8, ref_mv->col + MV_MAX); | 393 const int maxc = MIN(x->mv_col_max * 8, ref_mv->col + MV_MAX); |
408 const int minr = MAX(x->mv_row_min * 8, ref_mv->row - MV_MAX); | 394 const int minr = MAX(x->mv_row_min * 8, ref_mv->row - MV_MAX); |
409 const int maxr = MIN(x->mv_row_max * 8, ref_mv->row + MV_MAX); | 395 const int maxr = MIN(x->mv_row_max * 8, ref_mv->row + MV_MAX); |
410 | 396 |
411 int tr = br; | 397 int tr = br; |
412 int tc = bc; | 398 int tc = bc; |
413 | 399 |
414 // central mv | 400 // central mv |
415 bestmv->row *= 8; | 401 bestmv->row *= 8; |
416 bestmv->col *= 8; | 402 bestmv->col *= 8; |
417 | 403 |
418 // calculate central point error | 404 // calculate central point error |
419 // TODO(yunqingwang): central pointer error was already calculated in full- | 405 // TODO(yunqingwang): central pointer error was already calculated in full- |
420 // pixel search, and can be passed in this function. | 406 // pixel search, and can be passed in this function. |
421 comp_avg_pred(comp_pred, second_pred, w, h, y, y_stride); | 407 vp9_comp_avg_pred(comp_pred, second_pred, w, h, y, y_stride); |
422 besterr = vfp->vf(comp_pred, w, z, src_stride, sse1); | 408 besterr = vfp->vf(comp_pred, w, z, src_stride, sse1); |
423 *distortion = besterr; | 409 *distortion = besterr; |
424 besterr += mv_err_cost(bestmv, ref_mv, mvjcost, mvcost, error_per_bit); | 410 besterr += mv_err_cost(bestmv, ref_mv, mvjcost, mvcost, error_per_bit); |
425 | 411 |
426 // Each subsequent iteration checks at least one point in | 412 // Each subsequent iteration checks at least one point in |
427 // common with the last iteration could be 2 ( if diag selected) | 413 // common with the last iteration could be 2 ( if diag selected) |
428 // 1/2 pel | 414 // 1/2 pel |
429 FIRST_LEVEL_CHECKS; | 415 FIRST_LEVEL_CHECKS; |
430 if (halfiters > 1) { | 416 if (halfiters > 1) { |
431 SECOND_LEVEL_CHECKS; | 417 SECOND_LEVEL_CHECKS; |
(...skipping 75 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
507 #define PATTERN_CANDIDATES_REF 3 // number of refinement candidates | 493 #define PATTERN_CANDIDATES_REF 3 // number of refinement candidates |
508 | 494 |
509 // Generic pattern search function that searches over multiple scales. | 495 // Generic pattern search function that searches over multiple scales. |
510 // Each scale can have a different number of candidates and shape of | 496 // Each scale can have a different number of candidates and shape of |
511 // candidates as indicated in the num_candidates and candidates arrays | 497 // candidates as indicated in the num_candidates and candidates arrays |
512 // passed into this function | 498 // passed into this function |
513 static int vp9_pattern_search(const MACROBLOCK *x, | 499 static int vp9_pattern_search(const MACROBLOCK *x, |
514 MV *ref_mv, | 500 MV *ref_mv, |
515 int search_param, | 501 int search_param, |
516 int sad_per_bit, | 502 int sad_per_bit, |
517 int do_init_search, | 503 int do_init_search, int do_refine, |
518 int do_refine, | |
519 const vp9_variance_fn_ptr_t *vfp, | 504 const vp9_variance_fn_ptr_t *vfp, |
520 int use_mvcost, | 505 int use_mvcost, |
521 const MV *center_mv, MV *best_mv, | 506 const MV *center_mv, MV *best_mv, |
522 const int num_candidates[MAX_PATTERN_SCALES], | 507 const int num_candidates[MAX_PATTERN_SCALES], |
523 const MV candidates[MAX_PATTERN_SCALES] | 508 const MV candidates[MAX_PATTERN_SCALES] |
524 [MAX_PATTERN_CANDIDATES]) { | 509 [MAX_PATTERN_CANDIDATES]) { |
525 const MACROBLOCKD *const xd = &x->e_mbd; | 510 const MACROBLOCKD *const xd = &x->e_mbd; |
526 static const int search_param_to_steps[MAX_MVSEARCH_STEPS] = { | 511 static const int search_param_to_steps[MAX_MVSEARCH_STEPS] = { |
527 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0, | 512 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0, |
528 }; | 513 }; |
529 int i, j, s, t; | 514 int i, j, s, t; |
530 const uint8_t *what = x->plane[0].src.buf; | 515 const struct buf_2d *const what = &x->plane[0].src; |
531 const int what_stride = x->plane[0].src.stride; | 516 const struct buf_2d *const in_what = &xd->plane[0].pre[0]; |
532 const int in_what_stride = xd->plane[0].pre[0].stride; | |
533 int br, bc; | 517 int br, bc; |
534 MV this_mv; | |
535 int bestsad = INT_MAX; | 518 int bestsad = INT_MAX; |
536 int thissad; | 519 int thissad; |
537 const uint8_t *base_offset; | |
538 const uint8_t *this_offset; | |
539 int k = -1; | 520 int k = -1; |
540 int best_site = -1; | |
541 const MV fcenter_mv = {center_mv->row >> 3, center_mv->col >> 3}; | 521 const MV fcenter_mv = {center_mv->row >> 3, center_mv->col >> 3}; |
542 int best_init_s = search_param_to_steps[search_param]; | 522 int best_init_s = search_param_to_steps[search_param]; |
543 const int *mvjsadcost = x->nmvjointsadcost; | 523 const int *const mvjsadcost = x->nmvjointsadcost; |
544 int *mvsadcost[2] = {x->nmvsadcost[0], x->nmvsadcost[1]}; | 524 int *mvsadcost[2] = {x->nmvsadcost[0], x->nmvsadcost[1]}; |
545 | 525 |
546 // adjust ref_mv to make sure it is within MV range | 526 // adjust ref_mv to make sure it is within MV range |
547 clamp_mv(ref_mv, x->mv_col_min, x->mv_col_max, x->mv_row_min, x->mv_row_max); | 527 clamp_mv(ref_mv, x->mv_col_min, x->mv_col_max, x->mv_row_min, x->mv_row_max); |
548 br = ref_mv->row; | 528 br = ref_mv->row; |
549 bc = ref_mv->col; | 529 bc = ref_mv->col; |
550 | 530 |
551 // Work out the start point for the search | 531 // Work out the start point for the search |
552 base_offset = xd->plane[0].pre[0].buf; | 532 bestsad = vfp->sdf(what->buf, what->stride, |
553 this_offset = base_offset + (br * in_what_stride) + bc; | 533 get_buf_from_mv(in_what, ref_mv), in_what->stride, |
554 this_mv.row = br; | 534 0x7fffffff) + mvsad_err_cost(ref_mv, &fcenter_mv, |
555 this_mv.col = bc; | 535 mvjsadcost, mvsadcost, sad_per_bit); |
556 bestsad = vfp->sdf(what, what_stride, this_offset, in_what_stride, 0x7fffffff) | |
557 + mvsad_err_cost(&this_mv, &fcenter_mv, | |
558 mvjsadcost, mvsadcost, sad_per_bit); | |
559 | 536 |
560 // Search all possible scales upto the search param around the center point | 537 // Search all possible scales upto the search param around the center point |
561 // pick the scale of the point that is best as the starting scale of | 538 // pick the scale of the point that is best as the starting scale of |
562 // further steps around it. | 539 // further steps around it. |
563 if (do_init_search) { | 540 if (do_init_search) { |
564 s = best_init_s; | 541 s = best_init_s; |
565 best_init_s = -1; | 542 best_init_s = -1; |
566 for (t = 0; t <= s; ++t) { | 543 for (t = 0; t <= s; ++t) { |
567 best_site = -1; | 544 int best_site = -1; |
568 if (check_bounds(x, br, bc, 1 << t)) { | 545 if (check_bounds(x, br, bc, 1 << t)) { |
569 for (i = 0; i < num_candidates[t]; i++) { | 546 for (i = 0; i < num_candidates[t]; i++) { |
570 this_mv.row = br + candidates[t][i].row; | 547 const MV this_mv = {br + candidates[t][i].row, |
571 this_mv.col = bc + candidates[t][i].col; | 548 bc + candidates[t][i].col}; |
572 this_offset = base_offset + (this_mv.row * in_what_stride) + | 549 thissad = vfp->sdf(what->buf, what->stride, |
573 this_mv.col; | 550 get_buf_from_mv(in_what, &this_mv), |
574 thissad = vfp->sdf(what, what_stride, this_offset, in_what_stride, | 551 in_what->stride, bestsad); |
575 bestsad); | |
576 CHECK_BETTER | 552 CHECK_BETTER |
577 } | 553 } |
578 } else { | 554 } else { |
579 for (i = 0; i < num_candidates[t]; i++) { | 555 for (i = 0; i < num_candidates[t]; i++) { |
580 this_mv.row = br + candidates[t][i].row; | 556 const MV this_mv = {br + candidates[t][i].row, |
581 this_mv.col = bc + candidates[t][i].col; | 557 bc + candidates[t][i].col}; |
582 if (!is_mv_in(x, &this_mv)) | 558 if (!is_mv_in(x, &this_mv)) |
583 continue; | 559 continue; |
584 this_offset = base_offset + (this_mv.row * in_what_stride) + | 560 thissad = vfp->sdf(what->buf, what->stride, |
585 this_mv.col; | 561 get_buf_from_mv(in_what, &this_mv), |
586 thissad = vfp->sdf(what, what_stride, this_offset, in_what_stride, | 562 in_what->stride, bestsad); |
587 bestsad); | |
588 CHECK_BETTER | 563 CHECK_BETTER |
589 } | 564 } |
590 } | 565 } |
591 if (best_site == -1) { | 566 if (best_site == -1) { |
592 continue; | 567 continue; |
593 } else { | 568 } else { |
594 best_init_s = t; | 569 best_init_s = t; |
595 k = best_site; | 570 k = best_site; |
596 } | 571 } |
597 } | 572 } |
598 if (best_init_s != -1) { | 573 if (best_init_s != -1) { |
599 br += candidates[best_init_s][k].row; | 574 br += candidates[best_init_s][k].row; |
600 bc += candidates[best_init_s][k].col; | 575 bc += candidates[best_init_s][k].col; |
601 } | 576 } |
602 } | 577 } |
603 | 578 |
604 // If the center point is still the best, just skip this and move to | 579 // If the center point is still the best, just skip this and move to |
605 // the refinement step. | 580 // the refinement step. |
606 if (best_init_s != -1) { | 581 if (best_init_s != -1) { |
| 582 int best_site = -1; |
607 s = best_init_s; | 583 s = best_init_s; |
608 best_site = -1; | 584 |
609 do { | 585 do { |
610 // No need to search all 6 points the 1st time if initial search was used | 586 // No need to search all 6 points the 1st time if initial search was used |
611 if (!do_init_search || s != best_init_s) { | 587 if (!do_init_search || s != best_init_s) { |
612 if (check_bounds(x, br, bc, 1 << s)) { | 588 if (check_bounds(x, br, bc, 1 << s)) { |
613 for (i = 0; i < num_candidates[s]; i++) { | 589 for (i = 0; i < num_candidates[s]; i++) { |
614 this_mv.row = br + candidates[s][i].row; | 590 const MV this_mv = {br + candidates[s][i].row, |
615 this_mv.col = bc + candidates[s][i].col; | 591 bc + candidates[s][i].col}; |
616 this_offset = base_offset + (this_mv.row * in_what_stride) + | 592 thissad = vfp->sdf(what->buf, what->stride, |
617 this_mv.col; | 593 get_buf_from_mv(in_what, &this_mv), |
618 thissad = vfp->sdf(what, what_stride, this_offset, in_what_stride, | 594 in_what->stride, bestsad); |
619 bestsad); | |
620 CHECK_BETTER | 595 CHECK_BETTER |
621 } | 596 } |
622 } else { | 597 } else { |
623 for (i = 0; i < num_candidates[s]; i++) { | 598 for (i = 0; i < num_candidates[s]; i++) { |
624 this_mv.row = br + candidates[s][i].row; | 599 const MV this_mv = {br + candidates[s][i].row, |
625 this_mv.col = bc + candidates[s][i].col; | 600 bc + candidates[s][i].col}; |
626 if (!is_mv_in(x, &this_mv)) | 601 if (!is_mv_in(x, &this_mv)) |
627 continue; | 602 continue; |
628 this_offset = base_offset + (this_mv.row * in_what_stride) + | 603 thissad = vfp->sdf(what->buf, what->stride, |
629 this_mv.col; | 604 get_buf_from_mv(in_what, &this_mv), |
630 thissad = vfp->sdf(what, what_stride, this_offset, in_what_stride, | 605 in_what->stride, bestsad); |
631 bestsad); | |
632 CHECK_BETTER | 606 CHECK_BETTER |
633 } | 607 } |
634 } | 608 } |
635 | 609 |
636 if (best_site == -1) { | 610 if (best_site == -1) { |
637 continue; | 611 continue; |
638 } else { | 612 } else { |
639 br += candidates[s][best_site].row; | 613 br += candidates[s][best_site].row; |
640 bc += candidates[s][best_site].col; | 614 bc += candidates[s][best_site].col; |
641 k = best_site; | 615 k = best_site; |
642 } | 616 } |
643 } | 617 } |
644 | 618 |
645 do { | 619 do { |
646 int next_chkpts_indices[PATTERN_CANDIDATES_REF]; | 620 int next_chkpts_indices[PATTERN_CANDIDATES_REF]; |
647 best_site = -1; | 621 best_site = -1; |
648 next_chkpts_indices[0] = (k == 0) ? num_candidates[s] - 1 : k - 1; | 622 next_chkpts_indices[0] = (k == 0) ? num_candidates[s] - 1 : k - 1; |
649 next_chkpts_indices[1] = k; | 623 next_chkpts_indices[1] = k; |
650 next_chkpts_indices[2] = (k == num_candidates[s] - 1) ? 0 : k + 1; | 624 next_chkpts_indices[2] = (k == num_candidates[s] - 1) ? 0 : k + 1; |
651 | 625 |
652 if (check_bounds(x, br, bc, 1 << s)) { | 626 if (check_bounds(x, br, bc, 1 << s)) { |
653 for (i = 0; i < PATTERN_CANDIDATES_REF; i++) { | 627 for (i = 0; i < PATTERN_CANDIDATES_REF; i++) { |
654 this_mv.row = br + candidates[s][next_chkpts_indices[i]].row; | 628 const MV this_mv = {br + candidates[s][next_chkpts_indices[i]].row, |
655 this_mv.col = bc + candidates[s][next_chkpts_indices[i]].col; | 629 bc + candidates[s][next_chkpts_indices[i]].col}; |
656 this_offset = base_offset + (this_mv.row * (in_what_stride)) + | 630 thissad = vfp->sdf(what->buf, what->stride, |
657 this_mv.col; | 631 get_buf_from_mv(in_what, &this_mv), |
658 thissad = vfp->sdf(what, what_stride, this_offset, in_what_stride, | 632 in_what->stride, bestsad); |
659 bestsad); | |
660 CHECK_BETTER | 633 CHECK_BETTER |
661 } | 634 } |
662 } else { | 635 } else { |
663 for (i = 0; i < PATTERN_CANDIDATES_REF; i++) { | 636 for (i = 0; i < PATTERN_CANDIDATES_REF; i++) { |
664 this_mv.row = br + candidates[s][next_chkpts_indices[i]].row; | 637 const MV this_mv = {br + candidates[s][next_chkpts_indices[i]].row, |
665 this_mv.col = bc + candidates[s][next_chkpts_indices[i]].col; | 638 bc + candidates[s][next_chkpts_indices[i]].col}; |
666 if (!is_mv_in(x, &this_mv)) | 639 if (!is_mv_in(x, &this_mv)) |
667 continue; | 640 continue; |
668 this_offset = base_offset + (this_mv.row * (in_what_stride)) + | 641 thissad = vfp->sdf(what->buf, what->stride, |
669 this_mv.col; | 642 get_buf_from_mv(in_what, &this_mv), |
670 thissad = vfp->sdf(what, what_stride, this_offset, in_what_stride, | 643 in_what->stride, bestsad); |
671 bestsad); | |
672 CHECK_BETTER | 644 CHECK_BETTER |
673 } | 645 } |
674 } | 646 } |
675 | 647 |
676 if (best_site != -1) { | 648 if (best_site != -1) { |
677 k = next_chkpts_indices[best_site]; | 649 k = next_chkpts_indices[best_site]; |
678 br += candidates[s][k].row; | 650 br += candidates[s][k].row; |
679 bc += candidates[s][k].col; | 651 bc += candidates[s][k].col; |
680 } | 652 } |
681 } while (best_site != -1); | 653 } while (best_site != -1); |
682 } while (s--); | 654 } while (s--); |
683 } | 655 } |
684 | 656 |
685 // Check 4 1-away neighbors if do_refine is true. | 657 // Check 4 1-away neighbors if do_refine is true. |
686 // For most well-designed schemes do_refine will not be necessary. | 658 // For most well-designed schemes do_refine will not be necessary. |
687 if (do_refine) { | 659 if (do_refine) { |
688 static const MV neighbors[4] = { {0, -1}, { -1, 0}, {1, 0}, {0, 1} }; | 660 static const MV neighbors[4] = {{0, -1}, { -1, 0}, {1, 0}, {0, 1}}; |
| 661 |
689 for (j = 0; j < 16; j++) { | 662 for (j = 0; j < 16; j++) { |
690 best_site = -1; | 663 int best_site = -1; |
691 if (check_bounds(x, br, bc, 1)) { | 664 if (check_bounds(x, br, bc, 1)) { |
692 for (i = 0; i < 4; i++) { | 665 for (i = 0; i < 4; i++) { |
693 this_mv.row = br + neighbors[i].row; | 666 const MV this_mv = {br + neighbors[i].row, |
694 this_mv.col = bc + neighbors[i].col; | 667 bc + neighbors[i].col}; |
695 this_offset = base_offset + this_mv.row * in_what_stride + | 668 thissad = vfp->sdf(what->buf, what->stride, |
696 this_mv.col; | 669 get_buf_from_mv(in_what, &this_mv), |
697 thissad = vfp->sdf(what, what_stride, this_offset, in_what_stride, | 670 in_what->stride, bestsad); |
698 bestsad); | |
699 CHECK_BETTER | 671 CHECK_BETTER |
700 } | 672 } |
701 } else { | 673 } else { |
702 for (i = 0; i < 4; i++) { | 674 for (i = 0; i < 4; i++) { |
703 this_mv.row = br + neighbors[i].row; | 675 const MV this_mv = {br + neighbors[i].row, |
704 this_mv.col = bc + neighbors[i].col; | 676 bc + neighbors[i].col}; |
705 if (!is_mv_in(x, &this_mv)) | 677 if (!is_mv_in(x, &this_mv)) |
706 continue; | 678 continue; |
707 this_offset = base_offset + this_mv.row * in_what_stride + | 679 thissad = vfp->sdf(what->buf, what->stride, |
708 this_mv.col; | 680 get_buf_from_mv(in_what, &this_mv), |
709 thissad = vfp->sdf(what, what_stride, this_offset, in_what_stride, | 681 in_what->stride, bestsad); |
710 bestsad); | |
711 CHECK_BETTER | 682 CHECK_BETTER |
712 } | 683 } |
713 } | 684 } |
714 | 685 |
715 if (best_site == -1) { | 686 if (best_site == -1) { |
716 break; | 687 break; |
717 } else { | 688 } else { |
718 br += neighbors[best_site].row; | 689 br += neighbors[best_site].row; |
719 bc += neighbors[best_site].col; | 690 bc += neighbors[best_site].col; |
720 } | 691 } |
721 } | 692 } |
722 } | 693 } |
723 | 694 |
724 best_mv->row = br; | 695 best_mv->row = br; |
725 best_mv->col = bc; | 696 best_mv->col = bc; |
726 | 697 |
727 this_offset = base_offset + (best_mv->row * in_what_stride) + | 698 return bestsad; |
728 best_mv->col; | |
729 this_mv.row = best_mv->row * 8; | |
730 this_mv.col = best_mv->col * 8; | |
731 if (bestsad == INT_MAX) | |
732 return INT_MAX; | |
733 | |
734 return vfp->vf(what, what_stride, this_offset, in_what_stride, | |
735 (unsigned int *)&bestsad) + | |
736 use_mvcost ? mv_err_cost(&this_mv, center_mv, | |
737 x->nmvjointcost, x->mvcost, x->errorperbit) | |
738 : 0; | |
739 } | 699 } |
740 | 700 |
| 701 int vp9_get_mvpred_var(const MACROBLOCK *x, |
| 702 const MV *best_mv, const MV *center_mv, |
| 703 const vp9_variance_fn_ptr_t *vfp, |
| 704 int use_mvcost) { |
| 705 const MACROBLOCKD *const xd = &x->e_mbd; |
| 706 const struct buf_2d *const what = &x->plane[0].src; |
| 707 const struct buf_2d *const in_what = &xd->plane[0].pre[0]; |
| 708 const MV mv = {best_mv->row * 8, best_mv->col * 8}; |
| 709 unsigned int unused; |
| 710 |
| 711 return vfp->vf(what->buf, what->stride, |
| 712 get_buf_from_mv(in_what, best_mv), in_what->stride, &unused) + |
| 713 (use_mvcost ? mv_err_cost(&mv, center_mv, x->nmvjointcost, |
| 714 x->mvcost, x->errorperbit) : 0); |
| 715 } |
| 716 |
| 717 int vp9_get_mvpred_av_var(const MACROBLOCK *x, |
| 718 const MV *best_mv, const MV *center_mv, |
| 719 const uint8_t *second_pred, |
| 720 const vp9_variance_fn_ptr_t *vfp, |
| 721 int use_mvcost) { |
| 722 const MACROBLOCKD *const xd = &x->e_mbd; |
| 723 const struct buf_2d *const what = &x->plane[0].src; |
| 724 const struct buf_2d *const in_what = &xd->plane[0].pre[0]; |
| 725 const MV mv = {best_mv->row * 8, best_mv->col * 8}; |
| 726 unsigned int unused; |
| 727 |
| 728 return vfp->svaf(get_buf_from_mv(in_what, best_mv), in_what->stride, 0, 0, |
| 729 what->buf, what->stride, &unused, second_pred) + |
| 730 (use_mvcost ? mv_err_cost(&mv, center_mv, x->nmvjointcost, |
| 731 x->mvcost, x->errorperbit) : 0); |
| 732 } |
741 | 733 |
742 int vp9_hex_search(const MACROBLOCK *x, | 734 int vp9_hex_search(const MACROBLOCK *x, |
743 MV *ref_mv, | 735 MV *ref_mv, |
744 int search_param, | 736 int search_param, |
745 int sad_per_bit, | 737 int sad_per_bit, |
746 int do_init_search, | 738 int do_init_search, |
747 const vp9_variance_fn_ptr_t *vfp, | 739 const vp9_variance_fn_ptr_t *vfp, |
748 int use_mvcost, | 740 int use_mvcost, |
749 const MV *center_mv, MV *best_mv) { | 741 const MV *center_mv, MV *best_mv) { |
750 // First scale has 8-closest points, the rest have 6 points in hex shape | 742 // First scale has 8-closest points, the rest have 6 points in hex shape |
(...skipping 95 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
846 {-256, 256}, {-256, 0}}, | 838 {-256, 256}, {-256, 0}}, |
847 {{-512, -512}, {0, -512}, {512, -512}, {512, 0}, {512, 512}, {0, 512}, | 839 {{-512, -512}, {0, -512}, {512, -512}, {512, 0}, {512, 512}, {0, 512}, |
848 {-512, 512}, {-512, 0}}, | 840 {-512, 512}, {-512, 0}}, |
849 {{-1024, -1024}, {0, -1024}, {1024, -1024}, {1024, 0}, {1024, 1024}, | 841 {{-1024, -1024}, {0, -1024}, {1024, -1024}, {1024, 0}, {1024, 1024}, |
850 {0, 1024}, {-1024, 1024}, {-1024, 0}}, | 842 {0, 1024}, {-1024, 1024}, {-1024, 0}}, |
851 }; | 843 }; |
852 return vp9_pattern_search(x, ref_mv, search_param, sad_per_bit, | 844 return vp9_pattern_search(x, ref_mv, search_param, sad_per_bit, |
853 do_init_search, 0, vfp, use_mvcost, | 845 do_init_search, 0, vfp, use_mvcost, |
854 center_mv, best_mv, | 846 center_mv, best_mv, |
855 square_num_candidates, square_candidates); | 847 square_num_candidates, square_candidates); |
856 }; | 848 } |
857 | |
858 // Number of candidates in first hex search | |
859 #define FIRST_HEX_CANDIDATES 6 | |
860 // Index of previous hex search's best match | |
861 #define PRE_BEST_CANDIDATE 6 | |
862 // Number of candidates in following hex search | |
863 #define NEXT_HEX_CANDIDATES 3 | |
864 // Number of candidates in refining search | |
865 #define REFINE_CANDIDATES 4 | |
866 | 849 |
867 int vp9_fast_hex_search(const MACROBLOCK *x, | 850 int vp9_fast_hex_search(const MACROBLOCK *x, |
868 MV *ref_mv, | 851 MV *ref_mv, |
869 int search_param, | 852 int search_param, |
870 int sad_per_bit, | 853 int sad_per_bit, |
| 854 int do_init_search, // must be zero for fast_hex |
871 const vp9_variance_fn_ptr_t *vfp, | 855 const vp9_variance_fn_ptr_t *vfp, |
872 int use_mvcost, | 856 int use_mvcost, |
873 const MV *center_mv, | 857 const MV *center_mv, |
874 MV *best_mv) { | 858 MV *best_mv) { |
875 const MACROBLOCKD* const xd = &x->e_mbd; | 859 return vp9_hex_search(x, ref_mv, MAX(MAX_MVSEARCH_STEPS - 2, search_param), |
876 static const MV hex[FIRST_HEX_CANDIDATES] = { | 860 sad_per_bit, do_init_search, vfp, use_mvcost, |
877 { -1, -2}, {1, -2}, {2, 0}, {1, 2}, { -1, 2}, { -2, 0} | 861 center_mv, best_mv); |
878 }; | 862 } |
879 static const MV next_chkpts[PRE_BEST_CANDIDATE][NEXT_HEX_CANDIDATES] = { | |
880 {{ -2, 0}, { -1, -2}, {1, -2}}, | |
881 {{ -1, -2}, {1, -2}, {2, 0}}, | |
882 {{1, -2}, {2, 0}, {1, 2}}, | |
883 {{2, 0}, {1, 2}, { -1, 2}}, | |
884 {{1, 2}, { -1, 2}, { -2, 0}}, | |
885 {{ -1, 2}, { -2, 0}, { -1, -2}} | |
886 }; | |
887 static const MV neighbors[REFINE_CANDIDATES] = { | |
888 {0, -1}, { -1, 0}, {1, 0}, {0, 1} | |
889 }; | |
890 int i, j; | |
891 | 863 |
892 const uint8_t *what = x->plane[0].src.buf; | 864 int vp9_fast_dia_search(const MACROBLOCK *x, |
893 const int what_stride = x->plane[0].src.stride; | 865 MV *ref_mv, |
894 const int in_what_stride = xd->plane[0].pre[0].stride; | 866 int search_param, |
895 int br, bc; | 867 int sad_per_bit, |
896 MV this_mv; | 868 int do_init_search, |
897 unsigned int bestsad = 0x7fffffff; | 869 const vp9_variance_fn_ptr_t *vfp, |
898 unsigned int thissad; | 870 int use_mvcost, |
899 const uint8_t *base_offset; | 871 const MV *center_mv, |
900 const uint8_t *this_offset; | 872 MV *best_mv) { |
901 int k = -1; | 873 return vp9_bigdia_search(x, ref_mv, MAX(MAX_MVSEARCH_STEPS - 2, search_param), |
902 int best_site = -1; | 874 sad_per_bit, do_init_search, vfp, use_mvcost, |
903 const int max_hex_search = 512; | 875 center_mv, best_mv); |
904 const int max_dia_search = 32; | |
905 | |
906 const int *mvjsadcost = x->nmvjointsadcost; | |
907 int *mvsadcost[2] = {x->nmvsadcost[0], x->nmvsadcost[1]}; | |
908 | |
909 const MV fcenter_mv = {center_mv->row >> 3, center_mv->col >> 3}; | |
910 | |
911 // Adjust ref_mv to make sure it is within MV range | |
912 clamp_mv(ref_mv, x->mv_col_min, x->mv_col_max, x->mv_row_min, x->mv_row_max); | |
913 br = ref_mv->row; | |
914 bc = ref_mv->col; | |
915 | |
916 // Check the start point | |
917 base_offset = xd->plane[0].pre[0].buf; | |
918 this_offset = base_offset + (br * in_what_stride) + bc; | |
919 this_mv.row = br; | |
920 this_mv.col = bc; | |
921 bestsad = vfp->sdf(what, what_stride, this_offset, in_what_stride, 0x7fffffff) | |
922 + mvsad_err_cost(&this_mv, &fcenter_mv, mvjsadcost, mvsadcost, | |
923 sad_per_bit); | |
924 | |
925 // Initial 6-point hex search | |
926 if (check_bounds(x, br, bc, 2)) { | |
927 for (i = 0; i < FIRST_HEX_CANDIDATES; i++) { | |
928 this_mv.row = br + hex[i].row; | |
929 this_mv.col = bc + hex[i].col; | |
930 this_offset = base_offset + (this_mv.row * in_what_stride) + this_mv.col; | |
931 thissad = vfp->sdf(what, what_stride, this_offset, in_what_stride, | |
932 bestsad); | |
933 CHECK_BETTER | |
934 } | |
935 } else { | |
936 for (i = 0; i < FIRST_HEX_CANDIDATES; i++) { | |
937 this_mv.row = br + hex[i].row; | |
938 this_mv.col = bc + hex[i].col; | |
939 if (!is_mv_in(x, &this_mv)) | |
940 continue; | |
941 this_offset = base_offset + (this_mv.row * in_what_stride) + this_mv.col; | |
942 thissad = vfp->sdf(what, what_stride, this_offset, in_what_stride, | |
943 bestsad); | |
944 CHECK_BETTER | |
945 } | |
946 } | |
947 | |
948 // Continue hex search if we find a better match in first round | |
949 if (best_site != -1) { | |
950 br += hex[best_site].row; | |
951 bc += hex[best_site].col; | |
952 k = best_site; | |
953 | |
954 // Allow search covering maximum MV range | |
955 for (j = 1; j < max_hex_search; j++) { | |
956 best_site = -1; | |
957 | |
958 if (check_bounds(x, br, bc, 2)) { | |
959 for (i = 0; i < 3; i++) { | |
960 this_mv.row = br + next_chkpts[k][i].row; | |
961 this_mv.col = bc + next_chkpts[k][i].col; | |
962 this_offset = base_offset + (this_mv.row * in_what_stride) + | |
963 this_mv.col; | |
964 thissad = vfp->sdf(what, what_stride, this_offset, in_what_stride, | |
965 bestsad); | |
966 CHECK_BETTER | |
967 } | |
968 } else { | |
969 for (i = 0; i < 3; i++) { | |
970 this_mv.row = br + next_chkpts[k][i].row; | |
971 this_mv.col = bc + next_chkpts[k][i].col; | |
972 if (!is_mv_in(x, &this_mv)) | |
973 continue; | |
974 this_offset = base_offset + (this_mv.row * in_what_stride) + | |
975 this_mv.col; | |
976 thissad = vfp->sdf(what, what_stride, this_offset, in_what_stride, | |
977 bestsad); | |
978 CHECK_BETTER | |
979 } | |
980 } | |
981 | |
982 if (best_site == -1) { | |
983 break; | |
984 } else { | |
985 br += next_chkpts[k][best_site].row; | |
986 bc += next_chkpts[k][best_site].col; | |
987 k += 5 + best_site; | |
988 if (k >= 12) k -= 12; | |
989 else if (k >= 6) k -= 6; | |
990 } | |
991 } | |
992 } | |
993 | |
994 // Check 4 1-away neighbors | |
995 for (j = 0; j < max_dia_search; j++) { | |
996 best_site = -1; | |
997 | |
998 if (check_bounds(x, br, bc, 1)) { | |
999 for (i = 0; i < REFINE_CANDIDATES; i++) { | |
1000 this_mv.row = br + neighbors[i].row; | |
1001 this_mv.col = bc + neighbors[i].col; | |
1002 this_offset = base_offset + (this_mv.row * in_what_stride) + | |
1003 this_mv.col; | |
1004 thissad = vfp->sdf(what, what_stride, this_offset, in_what_stride, | |
1005 bestsad); | |
1006 CHECK_BETTER | |
1007 } | |
1008 } else { | |
1009 for (i = 0; i < REFINE_CANDIDATES; i++) { | |
1010 this_mv.row = br + neighbors[i].row; | |
1011 this_mv.col = bc + neighbors[i].col; | |
1012 if (!is_mv_in(x, &this_mv)) | |
1013 continue; | |
1014 this_offset = base_offset + (this_mv.row * in_what_stride) + | |
1015 this_mv.col; | |
1016 thissad = vfp->sdf(what, what_stride, this_offset, in_what_stride, | |
1017 bestsad); | |
1018 CHECK_BETTER | |
1019 } | |
1020 } | |
1021 | |
1022 if (best_site == -1) { | |
1023 break; | |
1024 } else { | |
1025 br += neighbors[best_site].row; | |
1026 bc += neighbors[best_site].col; | |
1027 } | |
1028 } | |
1029 | |
1030 best_mv->row = br; | |
1031 best_mv->col = bc; | |
1032 | |
1033 return bestsad; | |
1034 } | 876 } |
1035 | 877 |
1036 #undef CHECK_BETTER | 878 #undef CHECK_BETTER |
1037 | 879 |
1038 int vp9_full_range_search_c(const MACROBLOCK *x, MV *ref_mv, MV *best_mv, | 880 int vp9_full_range_search_c(const MACROBLOCK *x, MV *ref_mv, MV *best_mv, |
1039 int search_param, int sad_per_bit, int *num00, | 881 int search_param, int sad_per_bit, int *num00, |
1040 const vp9_variance_fn_ptr_t *fn_ptr, | 882 const vp9_variance_fn_ptr_t *fn_ptr, |
1041 int *mvjcost, int *mvcost[2], | 883 int *mvjcost, int *mvcost[2], |
1042 const MV *center_mv) { | 884 const MV *center_mv) { |
1043 const MACROBLOCKD *const xd = &x->e_mbd; | 885 const MACROBLOCKD *const xd = &x->e_mbd; |
1044 const uint8_t *what = x->plane[0].src.buf; | 886 const uint8_t *what = x->plane[0].src.buf; |
1045 const int what_stride = x->plane[0].src.stride; | 887 const int what_stride = x->plane[0].src.stride; |
1046 const uint8_t *in_what; | 888 const uint8_t *in_what; |
1047 const int in_what_stride = xd->plane[0].pre[0].stride; | 889 const int in_what_stride = xd->plane[0].pre[0].stride; |
1048 const uint8_t *best_address; | |
1049 | |
1050 MV this_mv; | |
1051 | 890 |
1052 unsigned int bestsad = INT_MAX; | 891 unsigned int bestsad = INT_MAX; |
1053 int ref_row, ref_col; | 892 int ref_row, ref_col; |
1054 | 893 |
1055 unsigned int thissad; | 894 unsigned int thissad; |
1056 const MV fcenter_mv = {center_mv->row >> 3, center_mv->col >> 3}; | 895 const MV fcenter_mv = {center_mv->row >> 3, center_mv->col >> 3}; |
1057 | 896 |
1058 const int *mvjsadcost = x->nmvjointsadcost; | 897 const int *mvjsadcost = x->nmvjointsadcost; |
1059 int *mvsadcost[2] = {x->nmvsadcost[0], x->nmvsadcost[1]}; | 898 int *mvsadcost[2] = {x->nmvsadcost[0], x->nmvsadcost[1]}; |
1060 | 899 |
1061 int tr, tc; | 900 int tr, tc; |
1062 int best_tr = 0; | 901 int best_tr = 0; |
1063 int best_tc = 0; | 902 int best_tc = 0; |
1064 int range = 64; | 903 int range = 64; |
1065 | 904 |
1066 int start_col, end_col; | 905 int start_col, end_col; |
1067 int start_row, end_row; | 906 int start_row, end_row; |
1068 int i; | 907 int i; |
1069 | 908 |
1070 clamp_mv(ref_mv, x->mv_col_min, x->mv_col_max, x->mv_row_min, x->mv_row_max); | 909 clamp_mv(ref_mv, x->mv_col_min, x->mv_col_max, x->mv_row_min, x->mv_row_max); |
1071 ref_row = ref_mv->row; | 910 ref_row = ref_mv->row; |
1072 ref_col = ref_mv->col; | 911 ref_col = ref_mv->col; |
1073 *num00 = 11; | 912 *num00 = 11; |
1074 best_mv->row = ref_row; | 913 best_mv->row = ref_row; |
1075 best_mv->col = ref_col; | 914 best_mv->col = ref_col; |
1076 | 915 |
1077 // Work out the start point for the search | 916 // Work out the start point for the search |
1078 in_what = xd->plane[0].pre[0].buf + ref_row * in_what_stride + ref_col; | 917 in_what = xd->plane[0].pre[0].buf + ref_row * in_what_stride + ref_col; |
1079 best_address = in_what; | |
1080 | 918 |
1081 // Check the starting position | 919 // Check the starting position |
1082 bestsad = fn_ptr->sdf(what, what_stride, in_what, in_what_stride, 0x7fffffff) | 920 bestsad = fn_ptr->sdf(what, what_stride, in_what, in_what_stride, 0x7fffffff) |
1083 + mvsad_err_cost(best_mv, &fcenter_mv, | 921 + mvsad_err_cost(best_mv, &fcenter_mv, |
1084 mvjsadcost, mvsadcost, sad_per_bit); | 922 mvjsadcost, mvsadcost, sad_per_bit); |
1085 | 923 |
1086 start_row = MAX(-range, x->mv_row_min - ref_row); | 924 start_row = MAX(-range, x->mv_row_min - ref_row); |
1087 start_col = MAX(-range, x->mv_col_min - ref_col); | 925 start_col = MAX(-range, x->mv_col_min - ref_col); |
1088 end_row = MIN(range, x->mv_row_max - ref_row); | 926 end_row = MIN(range, x->mv_row_max - ref_row); |
1089 end_col = MIN(range, x->mv_col_max - ref_col); | 927 end_col = MIN(range, x->mv_col_max - ref_col); |
1090 | 928 |
1091 for (tr = start_row; tr <= end_row; ++tr) { | 929 for (tr = start_row; tr <= end_row; ++tr) { |
1092 for (tc = start_col; tc <= end_col; tc += 4) { | 930 for (tc = start_col; tc <= end_col; tc += 4) { |
1093 if ((tc + 3) <= end_col) { | 931 if ((tc + 3) <= end_col) { |
1094 unsigned int sad_array[4]; | 932 unsigned int sad_array[4]; |
1095 unsigned char const *addr_ref[4]; | 933 unsigned char const *addr_ref[4]; |
1096 for (i = 0; i < 4; ++i) | 934 for (i = 0; i < 4; ++i) |
1097 addr_ref[i] = in_what + tr * in_what_stride + tc + i; | 935 addr_ref[i] = in_what + tr * in_what_stride + tc + i; |
1098 | 936 |
1099 fn_ptr->sdx4df(what, what_stride, addr_ref, in_what_stride, sad_array); | 937 fn_ptr->sdx4df(what, what_stride, addr_ref, in_what_stride, sad_array); |
1100 | 938 |
1101 for (i = 0; i < 4; ++i) { | 939 for (i = 0; i < 4; ++i) { |
1102 if (sad_array[i] < bestsad) { | 940 if (sad_array[i] < bestsad) { |
1103 this_mv.row = ref_row + tr; | 941 const MV this_mv = {ref_row + tr, ref_col + tc + i}; |
1104 this_mv.col = ref_col + tc + i; | |
1105 thissad = sad_array[i] + | 942 thissad = sad_array[i] + |
1106 mvsad_err_cost(&this_mv, &fcenter_mv, | 943 mvsad_err_cost(&this_mv, &fcenter_mv, |
1107 mvjsadcost, mvsadcost, sad_per_bit); | 944 mvjsadcost, mvsadcost, sad_per_bit); |
1108 if (thissad < bestsad) { | 945 if (thissad < bestsad) { |
1109 bestsad = thissad; | 946 bestsad = thissad; |
1110 best_tr = tr; | 947 best_tr = tr; |
1111 best_tc = tc + i; | 948 best_tc = tc + i; |
1112 } | 949 } |
1113 } | 950 } |
1114 } | 951 } |
1115 } else { | 952 } else { |
1116 for (i = 0; i < end_col - tc; ++i) { | 953 for (i = 0; i < end_col - tc; ++i) { |
1117 const uint8_t *check_here = in_what + tr * in_what_stride + tc + i; | 954 const uint8_t *check_here = in_what + tr * in_what_stride + tc + i; |
1118 thissad = fn_ptr->sdf(what, what_stride, check_here, in_what_stride, | 955 thissad = fn_ptr->sdf(what, what_stride, check_here, in_what_stride, |
1119 bestsad); | 956 bestsad); |
1120 | 957 |
1121 if (thissad < bestsad) { | 958 if (thissad < bestsad) { |
1122 this_mv.row = ref_row + tr; | 959 const MV this_mv = {ref_row + tr, ref_col + tc + i}; |
1123 this_mv.col = ref_col + tc + i; | |
1124 thissad += mvsad_err_cost(&this_mv, &fcenter_mv, | 960 thissad += mvsad_err_cost(&this_mv, &fcenter_mv, |
1125 mvjsadcost, mvsadcost, sad_per_bit); | 961 mvjsadcost, mvsadcost, sad_per_bit); |
1126 | 962 |
1127 if (thissad < bestsad) { | 963 if (thissad < bestsad) { |
1128 bestsad = thissad; | 964 bestsad = thissad; |
1129 best_tr = tr; | 965 best_tr = tr; |
1130 best_tc = tc + i; | 966 best_tc = tc + i; |
1131 } | 967 } |
1132 } | 968 } |
1133 } | 969 } |
1134 } | 970 } |
1135 } | 971 } |
1136 } | 972 } |
1137 | |
1138 best_mv->row += best_tr; | 973 best_mv->row += best_tr; |
1139 best_mv->col += best_tc; | 974 best_mv->col += best_tc; |
1140 | 975 return bestsad; |
1141 this_mv.row = best_mv->row * 8; | |
1142 this_mv.col = best_mv->col * 8; | |
1143 | |
1144 if (bestsad == INT_MAX) | |
1145 return INT_MAX; | |
1146 | |
1147 return fn_ptr->vf(what, what_stride, best_address, in_what_stride, | |
1148 (unsigned int *)(&thissad)) + | |
1149 mv_err_cost(&this_mv, center_mv, | |
1150 mvjcost, mvcost, x->errorperbit); | |
1151 } | 976 } |
1152 | 977 |
1153 int vp9_diamond_search_sad_c(const MACROBLOCK *x, | 978 int vp9_diamond_search_sad_c(const MACROBLOCK *x, |
1154 MV *ref_mv, MV *best_mv, | 979 MV *ref_mv, MV *best_mv, |
1155 int search_param, int sad_per_bit, int *num00, | 980 int search_param, int sad_per_bit, int *num00, |
1156 const vp9_variance_fn_ptr_t *fn_ptr, | 981 const vp9_variance_fn_ptr_t *fn_ptr, |
1157 int *mvjcost, int *mvcost[2], | 982 int *mvjcost, int *mvcost[2], |
1158 const MV *center_mv) { | 983 const MV *center_mv) { |
1159 int i, j, step; | |
1160 | |
1161 const MACROBLOCKD *const xd = &x->e_mbd; | 984 const MACROBLOCKD *const xd = &x->e_mbd; |
1162 const uint8_t *what = x->plane[0].src.buf; | 985 const struct buf_2d *const what = &x->plane[0].src; |
1163 const int what_stride = x->plane[0].src.stride; | 986 const struct buf_2d *const in_what = &xd->plane[0].pre[0]; |
1164 const uint8_t *in_what; | |
1165 const int in_what_stride = xd->plane[0].pre[0].stride; | |
1166 const uint8_t *best_address; | |
1167 | |
1168 MV this_mv; | |
1169 | |
1170 int bestsad = INT_MAX; | |
1171 int best_site = 0; | |
1172 int last_site = 0; | |
1173 | |
1174 int ref_row, ref_col; | |
1175 int this_row_offset, this_col_offset; | |
1176 | |
1177 // search_param determines the length of the initial step and hence the number | 987 // search_param determines the length of the initial step and hence the number |
1178 // of iterations | 988 // of iterations |
1179 // 0 = initial step (MAX_FIRST_STEP) pel : 1 = (MAX_FIRST_STEP/2) pel, 2 = | 989 // 0 = initial step (MAX_FIRST_STEP) pel : 1 = (MAX_FIRST_STEP/2) pel, 2 = |
1180 // (MAX_FIRST_STEP/4) pel... etc. | 990 // (MAX_FIRST_STEP/4) pel... etc. |
1181 const search_site *const ss = &x->ss[search_param * x->searches_per_step]; | 991 const search_site *const ss = &x->ss[search_param * x->searches_per_step]; |
1182 const int tot_steps = (x->ss_count / x->searches_per_step) - search_param; | 992 const int tot_steps = (x->ss_count / x->searches_per_step) - search_param; |
1183 | |
1184 int thissad; | |
1185 const MV fcenter_mv = {center_mv->row >> 3, center_mv->col >> 3}; | 993 const MV fcenter_mv = {center_mv->row >> 3, center_mv->col >> 3}; |
1186 | |
1187 const int *mvjsadcost = x->nmvjointsadcost; | 994 const int *mvjsadcost = x->nmvjointsadcost; |
1188 int *mvsadcost[2] = {x->nmvsadcost[0], x->nmvsadcost[1]}; | 995 int *mvsadcost[2] = {x->nmvsadcost[0], x->nmvsadcost[1]}; |
| 996 const uint8_t *best_address; |
| 997 int best_sad = INT_MAX; |
| 998 int best_site = 0; |
| 999 int last_site = 0; |
| 1000 int i, j, step; |
1189 | 1001 |
1190 clamp_mv(ref_mv, x->mv_col_min, x->mv_col_max, x->mv_row_min, x->mv_row_max); | 1002 clamp_mv(ref_mv, x->mv_col_min, x->mv_col_max, x->mv_row_min, x->mv_row_max); |
1191 ref_row = ref_mv->row; | 1003 best_address = get_buf_from_mv(in_what, ref_mv); |
1192 ref_col = ref_mv->col; | |
1193 *num00 = 0; | 1004 *num00 = 0; |
1194 best_mv->row = ref_row; | 1005 *best_mv = *ref_mv; |
1195 best_mv->col = ref_col; | |
1196 | |
1197 // Work out the start point for the search | |
1198 in_what = xd->plane[0].pre[0].buf + ref_row * in_what_stride + ref_col; | |
1199 best_address = in_what; | |
1200 | 1006 |
1201 // Check the starting position | 1007 // Check the starting position |
1202 bestsad = fn_ptr->sdf(what, what_stride, in_what, in_what_stride, 0x7fffffff) | 1008 best_sad = fn_ptr->sdf(what->buf, what->stride, |
1203 + mvsad_err_cost(best_mv, &fcenter_mv, | 1009 in_what->buf, in_what->stride, 0x7fffffff) + |
1204 mvjsadcost, mvsadcost, sad_per_bit); | 1010 mvsad_err_cost(best_mv, &fcenter_mv, mvjsadcost, mvsadcost, sad_per_bit); |
1205 | 1011 |
1206 i = 1; | 1012 i = 1; |
1207 | 1013 |
1208 for (step = 0; step < tot_steps; step++) { | 1014 for (step = 0; step < tot_steps; step++) { |
1209 for (j = 0; j < x->searches_per_step; j++) { | 1015 for (j = 0; j < x->searches_per_step; j++) { |
1210 // Trap illegal vectors | 1016 const MV mv = {best_mv->row + ss[i].mv.row, |
1211 this_row_offset = best_mv->row + ss[i].mv.row; | 1017 best_mv->col + ss[i].mv.col}; |
1212 this_col_offset = best_mv->col + ss[i].mv.col; | 1018 if (is_mv_in(x, &mv)) { |
1213 | 1019 int sad = fn_ptr->sdf(what->buf, what->stride, |
1214 if ((this_col_offset > x->mv_col_min) && | 1020 best_address + ss[i].offset, in_what->stride, |
1215 (this_col_offset < x->mv_col_max) && | 1021 best_sad); |
1216 (this_row_offset > x->mv_row_min) && | 1022 if (sad < best_sad) { |
1217 (this_row_offset < x->mv_row_max)) { | 1023 sad += mvsad_err_cost(&mv, &fcenter_mv, mvjsadcost, mvsadcost, |
1218 const uint8_t *const check_here = ss[i].offset + best_address; | 1024 sad_per_bit); |
1219 thissad = fn_ptr->sdf(what, what_stride, check_here, in_what_stride, | 1025 if (sad < best_sad) { |
1220 bestsad); | 1026 best_sad = sad; |
1221 | |
1222 if (thissad < bestsad) { | |
1223 this_mv.row = this_row_offset; | |
1224 this_mv.col = this_col_offset; | |
1225 thissad += mvsad_err_cost(&this_mv, &fcenter_mv, | |
1226 mvjsadcost, mvsadcost, sad_per_bit); | |
1227 | |
1228 if (thissad < bestsad) { | |
1229 bestsad = thissad; | |
1230 best_site = i; | 1027 best_site = i; |
1231 } | 1028 } |
1232 } | 1029 } |
1233 } | 1030 } |
1234 | 1031 |
1235 i++; | 1032 i++; |
1236 } | 1033 } |
1237 | 1034 |
1238 if (best_site != last_site) { | 1035 if (best_site != last_site) { |
1239 best_mv->row += ss[best_site].mv.row; | 1036 best_mv->row += ss[best_site].mv.row; |
1240 best_mv->col += ss[best_site].mv.col; | 1037 best_mv->col += ss[best_site].mv.col; |
1241 best_address += ss[best_site].offset; | 1038 best_address += ss[best_site].offset; |
1242 last_site = best_site; | 1039 last_site = best_site; |
1243 #if defined(NEW_DIAMOND_SEARCH) | 1040 #if defined(NEW_DIAMOND_SEARCH) |
1244 while (1) { | 1041 while (1) { |
1245 this_row_offset = best_mv->row + ss[best_site].mv.row; | 1042 const MV this_mv = {best_mv->row + ss[best_site].mv.row, |
1246 this_col_offset = best_mv->col + ss[best_site].mv.col; | 1043 best_mv->col + ss[best_site].mv.col}; |
1247 if ((this_col_offset > x->mv_col_min) && | 1044 if (is_mv_in(x, &this_mv)) { |
1248 (this_col_offset < x->mv_col_max) && | 1045 int sad = fn_ptr->sdf(what->buf, what->stride, |
1249 (this_row_offset > x->mv_row_min) && | 1046 best_address + ss[best_site].offset, |
1250 (this_row_offset < x->mv_row_max)) { | 1047 in_what->stride, best_sad); |
1251 check_here = ss[best_site].offset + best_address; | 1048 if (sad < best_sad) { |
1252 thissad = fn_ptr->sdf(what, what_stride, check_here, in_what_stride, | 1049 sad += mvsad_err_cost(&this_mv, &fcenter_mv, |
1253 bestsad); | 1050 mvjsadcost, mvsadcost, sad_per_bit); |
1254 if (thissad < bestsad) { | 1051 if (sad < best_sad) { |
1255 this_mv.row = this_row_offset; | 1052 best_sad = sad; |
1256 this_mv.col = this_col_offset; | |
1257 thissad += mvsad_err_cost(&this_mv, &fcenter_mv, | |
1258 mvjsadcost, mvsadcost, sad_per_bit); | |
1259 if (thissad < bestsad) { | |
1260 bestsad = thissad; | |
1261 best_mv->row += ss[best_site].mv.row; | 1053 best_mv->row += ss[best_site].mv.row; |
1262 best_mv->col += ss[best_site].mv.col; | 1054 best_mv->col += ss[best_site].mv.col; |
1263 best_address += ss[best_site].offset; | 1055 best_address += ss[best_site].offset; |
1264 continue; | 1056 continue; |
1265 } | 1057 } |
1266 } | 1058 } |
1267 } | 1059 } |
1268 break; | 1060 break; |
1269 }; | 1061 }; |
1270 #endif | 1062 #endif |
1271 } else if (best_address == in_what) { | 1063 } else if (best_address == in_what->buf) { |
1272 (*num00)++; | 1064 (*num00)++; |
1273 } | 1065 } |
1274 } | 1066 } |
1275 | 1067 return best_sad; |
1276 this_mv.row = best_mv->row * 8; | |
1277 this_mv.col = best_mv->col * 8; | |
1278 | |
1279 if (bestsad == INT_MAX) | |
1280 return INT_MAX; | |
1281 | |
1282 return fn_ptr->vf(what, what_stride, best_address, in_what_stride, | |
1283 (unsigned int *)(&thissad)) + | |
1284 mv_err_cost(&this_mv, center_mv, | |
1285 mvjcost, mvcost, x->errorperbit); | |
1286 } | 1068 } |
1287 | 1069 |
1288 int vp9_diamond_search_sadx4(const MACROBLOCK *x, | 1070 int vp9_diamond_search_sadx4(const MACROBLOCK *x, |
1289 MV *ref_mv, MV *best_mv, int search_param, | 1071 MV *ref_mv, MV *best_mv, int search_param, |
1290 int sad_per_bit, int *num00, | 1072 int sad_per_bit, int *num00, |
1291 const vp9_variance_fn_ptr_t *fn_ptr, | 1073 const vp9_variance_fn_ptr_t *fn_ptr, |
1292 int *mvjcost, int *mvcost[2], | 1074 int *mvjcost, int *mvcost[2], |
1293 const MV *center_mv) { | 1075 const MV *center_mv) { |
1294 int i, j, step; | 1076 int i, j, step; |
1295 | 1077 |
1296 const MACROBLOCKD *const xd = &x->e_mbd; | 1078 const MACROBLOCKD *const xd = &x->e_mbd; |
1297 uint8_t *what = x->plane[0].src.buf; | 1079 uint8_t *what = x->plane[0].src.buf; |
1298 const int what_stride = x->plane[0].src.stride; | 1080 const int what_stride = x->plane[0].src.stride; |
1299 const uint8_t *in_what; | 1081 const uint8_t *in_what; |
1300 const int in_what_stride = xd->plane[0].pre[0].stride; | 1082 const int in_what_stride = xd->plane[0].pre[0].stride; |
1301 const uint8_t *best_address; | 1083 const uint8_t *best_address; |
1302 | 1084 |
1303 MV this_mv; | |
1304 | |
1305 unsigned int bestsad = INT_MAX; | 1085 unsigned int bestsad = INT_MAX; |
1306 int best_site = 0; | 1086 int best_site = 0; |
1307 int last_site = 0; | 1087 int last_site = 0; |
1308 | 1088 |
1309 int ref_row; | 1089 int ref_row; |
1310 int ref_col; | 1090 int ref_col; |
1311 int this_row_offset; | |
1312 int this_col_offset; | |
1313 | 1091 |
1314 // search_param determines the length of the initial step and hence the number | 1092 // search_param determines the length of the initial step and hence the number |
1315 // of iterations. | 1093 // of iterations. |
1316 // 0 = initial step (MAX_FIRST_STEP) pel | 1094 // 0 = initial step (MAX_FIRST_STEP) pel |
1317 // 1 = (MAX_FIRST_STEP/2) pel, | 1095 // 1 = (MAX_FIRST_STEP/2) pel, |
1318 // 2 = (MAX_FIRST_STEP/4) pel... | 1096 // 2 = (MAX_FIRST_STEP/4) pel... |
1319 const search_site *ss = &x->ss[search_param * x->searches_per_step]; | 1097 const search_site *ss = &x->ss[search_param * x->searches_per_step]; |
1320 const int tot_steps = (x->ss_count / x->searches_per_step) - search_param; | 1098 const int tot_steps = (x->ss_count / x->searches_per_step) - search_param; |
1321 | 1099 |
1322 unsigned int thissad; | |
1323 const MV fcenter_mv = {center_mv->row >> 3, center_mv->col >> 3}; | 1100 const MV fcenter_mv = {center_mv->row >> 3, center_mv->col >> 3}; |
1324 | 1101 |
1325 const int *mvjsadcost = x->nmvjointsadcost; | 1102 const int *mvjsadcost = x->nmvjointsadcost; |
1326 int *mvsadcost[2] = {x->nmvsadcost[0], x->nmvsadcost[1]}; | 1103 int *mvsadcost[2] = {x->nmvsadcost[0], x->nmvsadcost[1]}; |
1327 | 1104 |
1328 clamp_mv(ref_mv, x->mv_col_min, x->mv_col_max, x->mv_row_min, x->mv_row_max); | 1105 clamp_mv(ref_mv, x->mv_col_min, x->mv_col_max, x->mv_row_min, x->mv_row_max); |
1329 ref_row = ref_mv->row; | 1106 ref_row = ref_mv->row; |
1330 ref_col = ref_mv->col; | 1107 ref_col = ref_mv->col; |
1331 *num00 = 0; | 1108 *num00 = 0; |
1332 best_mv->row = ref_row; | 1109 best_mv->row = ref_row; |
(...skipping 30 matching lines...) Expand all Loading... |
1363 unsigned char const *block_offset[4]; | 1140 unsigned char const *block_offset[4]; |
1364 | 1141 |
1365 for (t = 0; t < 4; t++) | 1142 for (t = 0; t < 4; t++) |
1366 block_offset[t] = ss[i + t].offset + best_address; | 1143 block_offset[t] = ss[i + t].offset + best_address; |
1367 | 1144 |
1368 fn_ptr->sdx4df(what, what_stride, block_offset, in_what_stride, | 1145 fn_ptr->sdx4df(what, what_stride, block_offset, in_what_stride, |
1369 sad_array); | 1146 sad_array); |
1370 | 1147 |
1371 for (t = 0; t < 4; t++, i++) { | 1148 for (t = 0; t < 4; t++, i++) { |
1372 if (sad_array[t] < bestsad) { | 1149 if (sad_array[t] < bestsad) { |
1373 this_mv.row = best_mv->row + ss[i].mv.row; | 1150 const MV this_mv = {best_mv->row + ss[i].mv.row, |
1374 this_mv.col = best_mv->col + ss[i].mv.col; | 1151 best_mv->col + ss[i].mv.col}; |
1375 sad_array[t] += mvsad_err_cost(&this_mv, &fcenter_mv, | 1152 sad_array[t] += mvsad_err_cost(&this_mv, &fcenter_mv, |
1376 mvjsadcost, mvsadcost, sad_per_bit); | 1153 mvjsadcost, mvsadcost, sad_per_bit); |
1377 | 1154 |
1378 if (sad_array[t] < bestsad) { | 1155 if (sad_array[t] < bestsad) { |
1379 bestsad = sad_array[t]; | 1156 bestsad = sad_array[t]; |
1380 best_site = i; | 1157 best_site = i; |
1381 } | 1158 } |
1382 } | 1159 } |
1383 } | 1160 } |
1384 } | 1161 } |
1385 } else { | 1162 } else { |
1386 for (j = 0; j < x->searches_per_step; j++) { | 1163 for (j = 0; j < x->searches_per_step; j++) { |
1387 // Trap illegal vectors | 1164 // Trap illegal vectors |
1388 this_row_offset = best_mv->row + ss[i].mv.row; | 1165 const MV this_mv = {best_mv->row + ss[i].mv.row, |
1389 this_col_offset = best_mv->col + ss[i].mv.col; | 1166 best_mv->col + ss[i].mv.col}; |
1390 | 1167 |
1391 if ((this_col_offset > x->mv_col_min) && | 1168 if (is_mv_in(x, &this_mv)) { |
1392 (this_col_offset < x->mv_col_max) && | |
1393 (this_row_offset > x->mv_row_min) && | |
1394 (this_row_offset < x->mv_row_max)) { | |
1395 const uint8_t *const check_here = ss[i].offset + best_address; | 1169 const uint8_t *const check_here = ss[i].offset + best_address; |
1396 thissad = fn_ptr->sdf(what, what_stride, check_here, in_what_stride, | 1170 unsigned int thissad = fn_ptr->sdf(what, what_stride, check_here, |
1397 bestsad); | 1171 in_what_stride, bestsad); |
1398 | 1172 |
1399 if (thissad < bestsad) { | 1173 if (thissad < bestsad) { |
1400 this_mv.row = this_row_offset; | |
1401 this_mv.col = this_col_offset; | |
1402 thissad += mvsad_err_cost(&this_mv, &fcenter_mv, | 1174 thissad += mvsad_err_cost(&this_mv, &fcenter_mv, |
1403 mvjsadcost, mvsadcost, sad_per_bit); | 1175 mvjsadcost, mvsadcost, sad_per_bit); |
1404 | 1176 |
1405 if (thissad < bestsad) { | 1177 if (thissad < bestsad) { |
1406 bestsad = thissad; | 1178 bestsad = thissad; |
1407 best_site = i; | 1179 best_site = i; |
1408 } | 1180 } |
1409 } | 1181 } |
1410 } | 1182 } |
1411 i++; | 1183 i++; |
1412 } | 1184 } |
1413 } | 1185 } |
1414 if (best_site != last_site) { | 1186 if (best_site != last_site) { |
1415 best_mv->row += ss[best_site].mv.row; | 1187 best_mv->row += ss[best_site].mv.row; |
1416 best_mv->col += ss[best_site].mv.col; | 1188 best_mv->col += ss[best_site].mv.col; |
1417 best_address += ss[best_site].offset; | 1189 best_address += ss[best_site].offset; |
1418 last_site = best_site; | 1190 last_site = best_site; |
1419 #if defined(NEW_DIAMOND_SEARCH) | 1191 #if defined(NEW_DIAMOND_SEARCH) |
1420 while (1) { | 1192 while (1) { |
1421 this_row_offset = best_mv->row + ss[best_site].mv.row; | 1193 const MV this_mv = {best_mv->row + ss[best_site].mv.row, |
1422 this_col_offset = best_mv->col + ss[best_site].mv.col; | 1194 best_mv->col + ss[best_site].mv.col}; |
1423 if ((this_col_offset > x->mv_col_min) && | 1195 if (is_mv_in(x, &this_mv)) { |
1424 (this_col_offset < x->mv_col_max) && | 1196 const uint8_t *const check_here = ss[best_site].offset + best_address; |
1425 (this_row_offset > x->mv_row_min) && | 1197 unsigned int thissad = fn_ptr->sdf(what, what_stride, check_here, |
1426 (this_row_offset < x->mv_row_max)) { | 1198 in_what_stride, bestsad); |
1427 check_here = ss[best_site].offset + best_address; | |
1428 thissad = fn_ptr->sdf(what, what_stride, check_here, in_what_stride, | |
1429 bestsad); | |
1430 if (thissad < bestsad) { | 1199 if (thissad < bestsad) { |
1431 this_mv.row = this_row_offset; | |
1432 this_mv.col = this_col_offset; | |
1433 thissad += mvsad_err_cost(&this_mv, &fcenter_mv, | 1200 thissad += mvsad_err_cost(&this_mv, &fcenter_mv, |
1434 mvjsadcost, mvsadcost, sad_per_bit); | 1201 mvjsadcost, mvsadcost, sad_per_bit); |
1435 if (thissad < bestsad) { | 1202 if (thissad < bestsad) { |
1436 bestsad = thissad; | 1203 bestsad = thissad; |
1437 best_mv->row += ss[best_site].mv.row; | 1204 best_mv->row += ss[best_site].mv.row; |
1438 best_mv->col += ss[best_site].mv.col; | 1205 best_mv->col += ss[best_site].mv.col; |
1439 best_address += ss[best_site].offset; | 1206 best_address += ss[best_site].offset; |
1440 continue; | 1207 continue; |
1441 } | 1208 } |
1442 } | 1209 } |
1443 } | 1210 } |
1444 break; | 1211 break; |
1445 }; | 1212 }; |
1446 #endif | 1213 #endif |
1447 } else if (best_address == in_what) { | 1214 } else if (best_address == in_what) { |
1448 (*num00)++; | 1215 (*num00)++; |
1449 } | 1216 } |
1450 } | 1217 } |
1451 | 1218 return bestsad; |
1452 this_mv.row = best_mv->row * 8; | |
1453 this_mv.col = best_mv->col * 8; | |
1454 | |
1455 if (bestsad == INT_MAX) | |
1456 return INT_MAX; | |
1457 | |
1458 return fn_ptr->vf(what, what_stride, best_address, in_what_stride, | |
1459 (unsigned int *)(&thissad)) + | |
1460 mv_err_cost(&this_mv, center_mv, | |
1461 mvjcost, mvcost, x->errorperbit); | |
1462 } | 1219 } |
1463 | 1220 |
1464 /* do_refine: If last step (1-away) of n-step search doesn't pick the center | 1221 /* do_refine: If last step (1-away) of n-step search doesn't pick the center |
1465 point as the best match, we will do a final 1-away diamond | 1222 point as the best match, we will do a final 1-away diamond |
1466 refining search */ | 1223 refining search */ |
1467 | 1224 |
1468 int vp9_full_pixel_diamond(VP9_COMP *cpi, MACROBLOCK *x, | 1225 int vp9_full_pixel_diamond(const VP9_COMP *cpi, MACROBLOCK *x, |
1469 MV *mvp_full, int step_param, | 1226 MV *mvp_full, int step_param, |
1470 int sadpb, int further_steps, int do_refine, | 1227 int sadpb, int further_steps, int do_refine, |
1471 const vp9_variance_fn_ptr_t *fn_ptr, | 1228 const vp9_variance_fn_ptr_t *fn_ptr, |
1472 const MV *ref_mv, MV *dst_mv) { | 1229 const MV *ref_mv, MV *dst_mv) { |
1473 MV temp_mv; | 1230 MV temp_mv; |
1474 int thissme, n, num00 = 0; | 1231 int thissme, n, num00 = 0; |
1475 int bestsme = cpi->diamond_search_sad(x, mvp_full, &temp_mv, | 1232 int bestsme = cpi->diamond_search_sad(x, mvp_full, &temp_mv, |
1476 step_param, sadpb, &n, | 1233 step_param, sadpb, &n, |
1477 fn_ptr, x->nmvjointcost, | 1234 fn_ptr, x->nmvjointcost, |
1478 x->mvcost, ref_mv); | 1235 x->mvcost, ref_mv); |
| 1236 if (bestsme < INT_MAX) |
| 1237 bestsme = vp9_get_mvpred_var(x, &temp_mv, ref_mv, fn_ptr, 1); |
1479 *dst_mv = temp_mv; | 1238 *dst_mv = temp_mv; |
1480 | 1239 |
1481 // If there won't be more n-step search, check to see if refining search is | 1240 // If there won't be more n-step search, check to see if refining search is |
1482 // needed. | 1241 // needed. |
1483 if (n > further_steps) | 1242 if (n > further_steps) |
1484 do_refine = 0; | 1243 do_refine = 0; |
1485 | 1244 |
1486 while (n < further_steps) { | 1245 while (n < further_steps) { |
1487 ++n; | 1246 ++n; |
1488 | 1247 |
1489 if (num00) { | 1248 if (num00) { |
1490 num00--; | 1249 num00--; |
1491 } else { | 1250 } else { |
1492 thissme = cpi->diamond_search_sad(x, mvp_full, &temp_mv, | 1251 thissme = cpi->diamond_search_sad(x, mvp_full, &temp_mv, |
1493 step_param + n, sadpb, &num00, | 1252 step_param + n, sadpb, &num00, |
1494 fn_ptr, x->nmvjointcost, x->mvcost, | 1253 fn_ptr, x->nmvjointcost, x->mvcost, |
1495 ref_mv); | 1254 ref_mv); |
| 1255 if (thissme < INT_MAX) |
| 1256 thissme = vp9_get_mvpred_var(x, &temp_mv, ref_mv, fn_ptr, 1); |
1496 | 1257 |
1497 // check to see if refining search is needed. | 1258 // check to see if refining search is needed. |
1498 if (num00 > further_steps - n) | 1259 if (num00 > further_steps - n) |
1499 do_refine = 0; | 1260 do_refine = 0; |
1500 | 1261 |
1501 if (thissme < bestsme) { | 1262 if (thissme < bestsme) { |
1502 bestsme = thissme; | 1263 bestsme = thissme; |
1503 *dst_mv = temp_mv; | 1264 *dst_mv = temp_mv; |
1504 } | 1265 } |
1505 } | 1266 } |
1506 } | 1267 } |
1507 | 1268 |
1508 // final 1-away diamond refining search | 1269 // final 1-away diamond refining search |
1509 if (do_refine) { | 1270 if (do_refine) { |
1510 const int search_range = 8; | 1271 const int search_range = 8; |
1511 MV best_mv = *dst_mv; | 1272 MV best_mv = *dst_mv; |
1512 thissme = cpi->refining_search_sad(x, &best_mv, sadpb, search_range, | 1273 thissme = cpi->refining_search_sad(x, &best_mv, sadpb, search_range, |
1513 fn_ptr, x->nmvjointcost, x->mvcost, | 1274 fn_ptr, x->nmvjointcost, x->mvcost, |
1514 ref_mv); | 1275 ref_mv); |
| 1276 if (thissme < INT_MAX) |
| 1277 thissme = vp9_get_mvpred_var(x, &best_mv, ref_mv, fn_ptr, 1); |
1515 if (thissme < bestsme) { | 1278 if (thissme < bestsme) { |
1516 bestsme = thissme; | 1279 bestsme = thissme; |
1517 *dst_mv = best_mv; | 1280 *dst_mv = best_mv; |
1518 } | 1281 } |
1519 } | 1282 } |
1520 | |
1521 return bestsme; | 1283 return bestsme; |
1522 } | 1284 } |
1523 | 1285 |
1524 int vp9_full_search_sad_c(const MACROBLOCK *x, const MV *ref_mv, | 1286 int vp9_full_search_sad_c(const MACROBLOCK *x, const MV *ref_mv, |
1525 int sad_per_bit, int distance, | 1287 int sad_per_bit, int distance, |
1526 const vp9_variance_fn_ptr_t *fn_ptr, | 1288 const vp9_variance_fn_ptr_t *fn_ptr, |
1527 int *mvjcost, int *mvcost[2], | 1289 int *mvjcost, int *mvcost[2], |
1528 const MV *center_mv, MV *best_mv) { | 1290 const MV *center_mv, MV *best_mv) { |
1529 int r, c; | 1291 int r, c; |
1530 const MACROBLOCKD *const xd = &x->e_mbd; | 1292 const MACROBLOCKD *const xd = &x->e_mbd; |
1531 const uint8_t *const what = x->plane[0].src.buf; | 1293 const struct buf_2d *const what = &x->plane[0].src; |
1532 const int what_stride = x->plane[0].src.stride; | 1294 const struct buf_2d *const in_what = &xd->plane[0].pre[0]; |
1533 const uint8_t *const in_what = xd->plane[0].pre[0].buf; | |
1534 const int in_what_stride = xd->plane[0].pre[0].stride; | |
1535 const int row_min = MAX(ref_mv->row - distance, x->mv_row_min); | 1295 const int row_min = MAX(ref_mv->row - distance, x->mv_row_min); |
1536 const int row_max = MIN(ref_mv->row + distance, x->mv_row_max); | 1296 const int row_max = MIN(ref_mv->row + distance, x->mv_row_max); |
1537 const int col_min = MAX(ref_mv->col - distance, x->mv_col_min); | 1297 const int col_min = MAX(ref_mv->col - distance, x->mv_col_min); |
1538 const int col_max = MIN(ref_mv->col + distance, x->mv_col_max); | 1298 const int col_max = MIN(ref_mv->col + distance, x->mv_col_max); |
1539 const int *mvjsadcost = x->nmvjointsadcost; | 1299 const int *mvjsadcost = x->nmvjointsadcost; |
1540 int *mvsadcost[2] = {x->nmvsadcost[0], x->nmvsadcost[1]}; | 1300 int *mvsadcost[2] = {x->nmvsadcost[0], x->nmvsadcost[1]}; |
1541 const MV fcenter_mv = {center_mv->row >> 3, center_mv->col >> 3}; | 1301 const MV fcenter_mv = {center_mv->row >> 3, center_mv->col >> 3}; |
1542 const uint8_t *best_address = &in_what[ref_mv->row * in_what_stride + | 1302 int best_sad = fn_ptr->sdf(what->buf, what->stride, |
1543 ref_mv->col]; | 1303 get_buf_from_mv(in_what, ref_mv), in_what->stride, 0x7fffffff) + |
1544 int best_sad = fn_ptr->sdf(what, what_stride, best_address, in_what_stride, | |
1545 0x7fffffff) + | |
1546 mvsad_err_cost(ref_mv, &fcenter_mv, mvjsadcost, mvsadcost, sad_per_bit); | 1304 mvsad_err_cost(ref_mv, &fcenter_mv, mvjsadcost, mvsadcost, sad_per_bit); |
1547 *best_mv = *ref_mv; | 1305 *best_mv = *ref_mv; |
1548 | 1306 |
1549 for (r = row_min; r < row_max; ++r) { | 1307 for (r = row_min; r < row_max; ++r) { |
1550 for (c = col_min; c < col_max; ++c) { | 1308 for (c = col_min; c < col_max; ++c) { |
1551 const MV this_mv = {r, c}; | 1309 const MV mv = {r, c}; |
1552 const uint8_t *check_here = &in_what[r * in_what_stride + c]; | 1310 const int sad = fn_ptr->sdf(what->buf, what->stride, |
1553 const int sad = fn_ptr->sdf(what, what_stride, check_here, in_what_stride, | 1311 get_buf_from_mv(in_what, &mv), in_what->stride, best_sad) + |
1554 best_sad) + | 1312 mvsad_err_cost(&mv, &fcenter_mv, mvjsadcost, mvsadcost, |
1555 mvsad_err_cost(&this_mv, &fcenter_mv, | 1313 sad_per_bit); |
1556 mvjsadcost, mvsadcost, sad_per_bit); | |
1557 | 1314 |
1558 if (sad < best_sad) { | 1315 if (sad < best_sad) { |
1559 best_sad = sad; | 1316 best_sad = sad; |
1560 *best_mv = this_mv; | 1317 *best_mv = mv; |
1561 best_address = check_here; | |
1562 } | 1318 } |
1563 } | 1319 } |
1564 } | 1320 } |
1565 | 1321 return best_sad; |
1566 if (best_sad < INT_MAX) { | |
1567 unsigned int unused; | |
1568 const MV mv = {best_mv->row * 8, best_mv->col * 8}; | |
1569 return fn_ptr->vf(what, what_stride, best_address, in_what_stride, &unused) | |
1570 + mv_err_cost(&mv, center_mv, mvjcost, mvcost, x->errorperbit); | |
1571 } else { | |
1572 return INT_MAX; | |
1573 } | |
1574 } | 1322 } |
1575 | 1323 |
1576 int vp9_full_search_sadx3(const MACROBLOCK *x, const MV *ref_mv, | 1324 int vp9_full_search_sadx3(const MACROBLOCK *x, const MV *ref_mv, |
1577 int sad_per_bit, int distance, | 1325 int sad_per_bit, int distance, |
1578 const vp9_variance_fn_ptr_t *fn_ptr, | 1326 const vp9_variance_fn_ptr_t *fn_ptr, |
1579 int *mvjcost, int *mvcost[2], | 1327 int *mvjcost, int *mvcost[2], |
1580 const MV *center_mv, MV *best_mv) { | 1328 const MV *center_mv, MV *best_mv) { |
1581 const MACROBLOCKD *const xd = &x->e_mbd; | 1329 const MACROBLOCKD *const xd = &x->e_mbd; |
1582 const uint8_t *const what = x->plane[0].src.buf; | 1330 const uint8_t *const what = x->plane[0].src.buf; |
1583 const int what_stride = x->plane[0].src.stride; | 1331 const int what_stride = x->plane[0].src.stride; |
(...skipping 44 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
1628 | 1376 |
1629 if (thissad < bestsad) { | 1377 if (thissad < bestsad) { |
1630 this_mv.col = c; | 1378 this_mv.col = c; |
1631 thissad += mvsad_err_cost(&this_mv, &fcenter_mv, | 1379 thissad += mvsad_err_cost(&this_mv, &fcenter_mv, |
1632 mvjsadcost, mvsadcost, sad_per_bit); | 1380 mvjsadcost, mvsadcost, sad_per_bit); |
1633 | 1381 |
1634 if (thissad < bestsad) { | 1382 if (thissad < bestsad) { |
1635 bestsad = thissad; | 1383 bestsad = thissad; |
1636 best_mv->row = r; | 1384 best_mv->row = r; |
1637 best_mv->col = c; | 1385 best_mv->col = c; |
1638 bestaddress = check_here; | |
1639 } | 1386 } |
1640 } | 1387 } |
1641 | |
1642 check_here++; | 1388 check_here++; |
1643 c++; | 1389 c++; |
1644 } | 1390 } |
1645 } | 1391 } |
1646 | 1392 |
1647 while (c < col_max) { | 1393 while (c < col_max) { |
1648 thissad = fn_ptr->sdf(what, what_stride, check_here, in_what_stride, | 1394 thissad = fn_ptr->sdf(what, what_stride, check_here, in_what_stride, |
1649 bestsad); | 1395 bestsad); |
1650 | 1396 |
1651 if (thissad < bestsad) { | 1397 if (thissad < bestsad) { |
1652 this_mv.col = c; | 1398 this_mv.col = c; |
1653 thissad += mvsad_err_cost(&this_mv, &fcenter_mv, | 1399 thissad += mvsad_err_cost(&this_mv, &fcenter_mv, |
1654 mvjsadcost, mvsadcost, sad_per_bit); | 1400 mvjsadcost, mvsadcost, sad_per_bit); |
1655 | 1401 |
1656 if (thissad < bestsad) { | 1402 if (thissad < bestsad) { |
1657 bestsad = thissad; | 1403 bestsad = thissad; |
1658 best_mv->row = r; | 1404 best_mv->row = r; |
1659 best_mv->col = c; | 1405 best_mv->col = c; |
1660 bestaddress = check_here; | |
1661 } | 1406 } |
1662 } | 1407 } |
1663 | 1408 |
1664 check_here++; | 1409 check_here++; |
1665 c++; | 1410 c++; |
1666 } | 1411 } |
1667 } | 1412 } |
1668 | 1413 return bestsad; |
1669 this_mv.row = best_mv->row * 8; | |
1670 this_mv.col = best_mv->col * 8; | |
1671 | |
1672 if (bestsad < INT_MAX) | |
1673 return fn_ptr->vf(what, what_stride, bestaddress, in_what_stride, | |
1674 (unsigned int *)(&thissad)) + | |
1675 mv_err_cost(&this_mv, center_mv, | |
1676 mvjcost, mvcost, x->errorperbit); | |
1677 else | |
1678 return INT_MAX; | |
1679 } | 1414 } |
1680 | 1415 |
1681 int vp9_full_search_sadx8(const MACROBLOCK *x, const MV *ref_mv, | 1416 int vp9_full_search_sadx8(const MACROBLOCK *x, const MV *ref_mv, |
1682 int sad_per_bit, int distance, | 1417 int sad_per_bit, int distance, |
1683 const vp9_variance_fn_ptr_t *fn_ptr, | 1418 const vp9_variance_fn_ptr_t *fn_ptr, |
1684 int *mvjcost, int *mvcost[2], | 1419 int *mvjcost, int *mvcost[2], |
1685 const MV *center_mv, MV *best_mv) { | 1420 const MV *center_mv, MV *best_mv) { |
1686 const MACROBLOCKD *const xd = &x->e_mbd; | 1421 const MACROBLOCKD *const xd = &x->e_mbd; |
1687 const uint8_t *const what = x->plane[0].src.buf; | 1422 const uint8_t *const what = x->plane[0].src.buf; |
1688 const int what_stride = x->plane[0].src.stride; | 1423 const int what_stride = x->plane[0].src.stride; |
1689 const uint8_t *const in_what = xd->plane[0].pre[0].buf; | 1424 const uint8_t *const in_what = xd->plane[0].pre[0].buf; |
1690 const int in_what_stride = xd->plane[0].pre[0].stride; | 1425 const int in_what_stride = xd->plane[0].pre[0].stride; |
1691 MV this_mv; | 1426 MV this_mv; |
1692 unsigned int bestsad = INT_MAX; | 1427 unsigned int bestsad = INT_MAX; |
1693 int r, c; | 1428 int r, c; |
1694 unsigned int thissad; | |
1695 int ref_row = ref_mv->row; | 1429 int ref_row = ref_mv->row; |
1696 int ref_col = ref_mv->col; | 1430 int ref_col = ref_mv->col; |
1697 | 1431 |
1698 // Apply further limits to prevent us looking using vectors that stretch | 1432 // Apply further limits to prevent us looking using vectors that stretch |
1699 // beyond the UMV border | 1433 // beyond the UMV border |
1700 const int row_min = MAX(ref_row - distance, x->mv_row_min); | 1434 const int row_min = MAX(ref_row - distance, x->mv_row_min); |
1701 const int row_max = MIN(ref_row + distance, x->mv_row_max); | 1435 const int row_max = MIN(ref_row + distance, x->mv_row_max); |
1702 const int col_min = MAX(ref_col - distance, x->mv_col_min); | 1436 const int col_min = MAX(ref_col - distance, x->mv_col_min); |
1703 const int col_max = MIN(ref_col + distance, x->mv_col_max); | 1437 const int col_max = MIN(ref_col + distance, x->mv_col_max); |
1704 DECLARE_ALIGNED_ARRAY(16, uint32_t, sad_array8, 8); | 1438 DECLARE_ALIGNED_ARRAY(16, uint32_t, sad_array8, 8); |
(...skipping 19 matching lines...) Expand all Loading... |
1724 const uint8_t *check_here = &in_what[r * in_what_stride + col_min]; | 1458 const uint8_t *check_here = &in_what[r * in_what_stride + col_min]; |
1725 this_mv.row = r; | 1459 this_mv.row = r; |
1726 c = col_min; | 1460 c = col_min; |
1727 | 1461 |
1728 while ((c + 7) < col_max) { | 1462 while ((c + 7) < col_max) { |
1729 int i; | 1463 int i; |
1730 | 1464 |
1731 fn_ptr->sdx8f(what, what_stride, check_here, in_what_stride, sad_array8); | 1465 fn_ptr->sdx8f(what, what_stride, check_here, in_what_stride, sad_array8); |
1732 | 1466 |
1733 for (i = 0; i < 8; i++) { | 1467 for (i = 0; i < 8; i++) { |
1734 thissad = (unsigned int)sad_array8[i]; | 1468 unsigned int thissad = (unsigned int)sad_array8[i]; |
1735 | 1469 |
1736 if (thissad < bestsad) { | 1470 if (thissad < bestsad) { |
1737 this_mv.col = c; | 1471 this_mv.col = c; |
1738 thissad += mvsad_err_cost(&this_mv, &fcenter_mv, | 1472 thissad += mvsad_err_cost(&this_mv, &fcenter_mv, |
1739 mvjsadcost, mvsadcost, sad_per_bit); | 1473 mvjsadcost, mvsadcost, sad_per_bit); |
1740 | 1474 |
1741 if (thissad < bestsad) { | 1475 if (thissad < bestsad) { |
1742 bestsad = thissad; | 1476 bestsad = thissad; |
1743 best_mv->row = r; | 1477 best_mv->row = r; |
1744 best_mv->col = c; | 1478 best_mv->col = c; |
1745 bestaddress = check_here; | |
1746 } | 1479 } |
1747 } | 1480 } |
1748 | 1481 |
1749 check_here++; | 1482 check_here++; |
1750 c++; | 1483 c++; |
1751 } | 1484 } |
1752 } | 1485 } |
1753 | 1486 |
1754 while ((c + 2) < col_max && fn_ptr->sdx3f != NULL) { | 1487 while ((c + 2) < col_max && fn_ptr->sdx3f != NULL) { |
1755 int i; | 1488 int i; |
1756 | 1489 |
1757 fn_ptr->sdx3f(what, what_stride, check_here, in_what_stride, sad_array); | 1490 fn_ptr->sdx3f(what, what_stride, check_here, in_what_stride, sad_array); |
1758 | 1491 |
1759 for (i = 0; i < 3; i++) { | 1492 for (i = 0; i < 3; i++) { |
1760 thissad = sad_array[i]; | 1493 unsigned int thissad = sad_array[i]; |
1761 | 1494 |
1762 if (thissad < bestsad) { | 1495 if (thissad < bestsad) { |
1763 this_mv.col = c; | 1496 this_mv.col = c; |
1764 thissad += mvsad_err_cost(&this_mv, &fcenter_mv, | 1497 thissad += mvsad_err_cost(&this_mv, &fcenter_mv, |
1765 mvjsadcost, mvsadcost, sad_per_bit); | 1498 mvjsadcost, mvsadcost, sad_per_bit); |
1766 | 1499 |
1767 if (thissad < bestsad) { | 1500 if (thissad < bestsad) { |
1768 bestsad = thissad; | 1501 bestsad = thissad; |
1769 best_mv->row = r; | 1502 best_mv->row = r; |
1770 best_mv->col = c; | 1503 best_mv->col = c; |
1771 bestaddress = check_here; | |
1772 } | 1504 } |
1773 } | 1505 } |
1774 | 1506 |
1775 check_here++; | 1507 check_here++; |
1776 c++; | 1508 c++; |
1777 } | 1509 } |
1778 } | 1510 } |
1779 | 1511 |
1780 while (c < col_max) { | 1512 while (c < col_max) { |
1781 thissad = fn_ptr->sdf(what, what_stride, check_here, in_what_stride, | 1513 unsigned int thissad = fn_ptr->sdf(what, what_stride, |
1782 bestsad); | 1514 check_here, in_what_stride, bestsad); |
1783 | 1515 |
1784 if (thissad < bestsad) { | 1516 if (thissad < bestsad) { |
1785 this_mv.col = c; | 1517 this_mv.col = c; |
1786 thissad += mvsad_err_cost(&this_mv, &fcenter_mv, | 1518 thissad += mvsad_err_cost(&this_mv, &fcenter_mv, |
1787 mvjsadcost, mvsadcost, sad_per_bit); | 1519 mvjsadcost, mvsadcost, sad_per_bit); |
1788 | 1520 |
1789 if (thissad < bestsad) { | 1521 if (thissad < bestsad) { |
1790 bestsad = thissad; | 1522 bestsad = thissad; |
1791 best_mv->row = r; | 1523 best_mv->row = r; |
1792 best_mv->col = c; | 1524 best_mv->col = c; |
1793 bestaddress = check_here; | |
1794 } | 1525 } |
1795 } | 1526 } |
1796 | 1527 |
1797 check_here++; | 1528 check_here++; |
1798 c++; | 1529 c++; |
1799 } | 1530 } |
1800 } | 1531 } |
1801 | 1532 return bestsad; |
1802 this_mv.row = best_mv->row * 8; | |
1803 this_mv.col = best_mv->col * 8; | |
1804 | |
1805 if (bestsad < INT_MAX) | |
1806 return fn_ptr->vf(what, what_stride, bestaddress, in_what_stride, | |
1807 (unsigned int *)(&thissad)) + | |
1808 mv_err_cost(&this_mv, center_mv, | |
1809 mvjcost, mvcost, x->errorperbit); | |
1810 else | |
1811 return INT_MAX; | |
1812 } | 1533 } |
1813 | 1534 |
1814 int vp9_refining_search_sad_c(const MACROBLOCK *x, | 1535 int vp9_refining_search_sad_c(const MACROBLOCK *x, |
1815 MV *ref_mv, int error_per_bit, | 1536 MV *ref_mv, int error_per_bit, |
1816 int search_range, | 1537 int search_range, |
1817 const vp9_variance_fn_ptr_t *fn_ptr, | 1538 const vp9_variance_fn_ptr_t *fn_ptr, |
1818 int *mvjcost, int *mvcost[2], | 1539 int *mvjcost, int *mvcost[2], |
1819 const MV *center_mv) { | 1540 const MV *center_mv) { |
| 1541 const MV neighbors[4] = {{ -1, 0}, {0, -1}, {0, 1}, {1, 0}}; |
1820 const MACROBLOCKD *const xd = &x->e_mbd; | 1542 const MACROBLOCKD *const xd = &x->e_mbd; |
1821 const MV neighbors[4] = {{ -1, 0}, {0, -1}, {0, 1}, {1, 0}}; | 1543 const struct buf_2d *const what = &x->plane[0].src; |
1822 int i, j; | 1544 const struct buf_2d *const in_what = &xd->plane[0].pre[0]; |
1823 | |
1824 const int what_stride = x->plane[0].src.stride; | |
1825 const uint8_t *const what = x->plane[0].src.buf; | |
1826 const int in_what_stride = xd->plane[0].pre[0].stride; | |
1827 const uint8_t *const in_what = xd->plane[0].pre[0].buf; | |
1828 const uint8_t *best_address = &in_what[ref_mv->row * in_what_stride + | |
1829 ref_mv->col]; | |
1830 const MV fcenter_mv = {center_mv->row >> 3, center_mv->col >> 3}; | 1545 const MV fcenter_mv = {center_mv->row >> 3, center_mv->col >> 3}; |
1831 const int *mvjsadcost = x->nmvjointsadcost; | 1546 const int *mvjsadcost = x->nmvjointsadcost; |
1832 int *mvsadcost[2] = {x->nmvsadcost[0], x->nmvsadcost[1]}; | 1547 int *mvsadcost[2] = {x->nmvsadcost[0], x->nmvsadcost[1]}; |
1833 | 1548 |
1834 unsigned int bestsad = fn_ptr->sdf(what, what_stride, best_address, | 1549 unsigned int best_sad = fn_ptr->sdf(what->buf, what->stride, |
1835 in_what_stride, 0x7fffffff) + | 1550 get_buf_from_mv(in_what, ref_mv), |
| 1551 in_what->stride, 0x7fffffff) + |
1836 mvsad_err_cost(ref_mv, &fcenter_mv, mvjsadcost, mvsadcost, error_per_bit); | 1552 mvsad_err_cost(ref_mv, &fcenter_mv, mvjsadcost, mvsadcost, error_per_bit); |
| 1553 int i, j; |
1837 | 1554 |
1838 for (i = 0; i < search_range; i++) { | 1555 for (i = 0; i < search_range; i++) { |
1839 int best_site = -1; | 1556 int best_site = -1; |
1840 | 1557 |
1841 for (j = 0; j < 4; j++) { | 1558 for (j = 0; j < 4; j++) { |
1842 const MV this_mv = {ref_mv->row + neighbors[j].row, | 1559 const MV mv = {ref_mv->row + neighbors[j].row, |
1843 ref_mv->col + neighbors[j].col}; | 1560 ref_mv->col + neighbors[j].col}; |
1844 if (is_mv_in(x, &this_mv)) { | 1561 if (is_mv_in(x, &mv)) { |
1845 const uint8_t *check_here = &in_what[this_mv.row * in_what_stride + | 1562 unsigned int sad = fn_ptr->sdf(what->buf, what->stride, |
1846 this_mv.col]; | 1563 get_buf_from_mv(in_what, &mv), in_what->stride, best_sad); |
1847 unsigned int thissad = fn_ptr->sdf(what, what_stride, check_here, | 1564 if (sad < best_sad) { |
1848 in_what_stride, bestsad); | 1565 sad += mvsad_err_cost(&mv, &fcenter_mv, mvjsadcost, mvsadcost, |
1849 if (thissad < bestsad) { | 1566 error_per_bit); |
1850 thissad += mvsad_err_cost(&this_mv, &fcenter_mv, | 1567 if (sad < best_sad) { |
1851 mvjsadcost, mvsadcost, error_per_bit); | 1568 best_sad = sad; |
1852 | |
1853 if (thissad < bestsad) { | |
1854 bestsad = thissad; | |
1855 best_site = j; | 1569 best_site = j; |
1856 } | 1570 } |
1857 } | 1571 } |
1858 } | 1572 } |
1859 } | 1573 } |
1860 | 1574 |
1861 if (best_site == -1) { | 1575 if (best_site == -1) { |
1862 break; | 1576 break; |
1863 } else { | 1577 } else { |
1864 ref_mv->row += neighbors[best_site].row; | 1578 ref_mv->row += neighbors[best_site].row; |
1865 ref_mv->col += neighbors[best_site].col; | 1579 ref_mv->col += neighbors[best_site].col; |
1866 best_address = &in_what[ref_mv->row * in_what_stride + ref_mv->col]; | |
1867 } | 1580 } |
1868 } | 1581 } |
1869 | 1582 return best_sad; |
1870 if (bestsad < INT_MAX) { | |
1871 unsigned int unused; | |
1872 const MV mv = {ref_mv->row * 8, ref_mv->col * 8}; | |
1873 return fn_ptr->vf(what, what_stride, best_address, in_what_stride, | |
1874 &unused) + | |
1875 mv_err_cost(&mv, center_mv, mvjcost, mvcost, x->errorperbit); | |
1876 } else { | |
1877 return INT_MAX; | |
1878 } | |
1879 } | 1583 } |
1880 | 1584 |
1881 int vp9_refining_search_sadx4(const MACROBLOCK *x, | 1585 int vp9_refining_search_sadx4(const MACROBLOCK *x, |
1882 MV *ref_mv, int error_per_bit, | 1586 MV *ref_mv, int error_per_bit, |
1883 int search_range, | 1587 int search_range, |
1884 const vp9_variance_fn_ptr_t *fn_ptr, | 1588 const vp9_variance_fn_ptr_t *fn_ptr, |
1885 int *mvjcost, int *mvcost[2], | 1589 int *mvjcost, int *mvcost[2], |
1886 const MV *center_mv) { | 1590 const MV *center_mv) { |
1887 const MACROBLOCKD *const xd = &x->e_mbd; | 1591 const MACROBLOCKD *const xd = &x->e_mbd; |
1888 MV neighbors[4] = {{ -1, 0}, {0, -1}, {0, 1}, {1, 0}}; | 1592 const MV neighbors[4] = {{ -1, 0}, {0, -1}, {0, 1}, {1, 0}}; |
1889 int i, j; | 1593 const struct buf_2d *const what = &x->plane[0].src; |
1890 int this_row_offset, this_col_offset; | 1594 const struct buf_2d *const in_what = &xd->plane[0].pre[0]; |
1891 | |
1892 const int what_stride = x->plane[0].src.stride; | |
1893 const int in_what_stride = xd->plane[0].pre[0].stride; | |
1894 const uint8_t *what = x->plane[0].src.buf; | |
1895 const uint8_t *best_address = xd->plane[0].pre[0].buf + | |
1896 (ref_mv->row * xd->plane[0].pre[0].stride) + | |
1897 ref_mv->col; | |
1898 unsigned int thissad; | |
1899 MV this_mv; | |
1900 | |
1901 const MV fcenter_mv = {center_mv->row >> 3, center_mv->col >> 3}; | 1595 const MV fcenter_mv = {center_mv->row >> 3, center_mv->col >> 3}; |
1902 | |
1903 const int *mvjsadcost = x->nmvjointsadcost; | 1596 const int *mvjsadcost = x->nmvjointsadcost; |
1904 int *mvsadcost[2] = {x->nmvsadcost[0], x->nmvsadcost[1]}; | 1597 int *mvsadcost[2] = {x->nmvsadcost[0], x->nmvsadcost[1]}; |
1905 | 1598 const uint8_t *best_address = get_buf_from_mv(in_what, ref_mv); |
1906 unsigned int bestsad = fn_ptr->sdf(what, what_stride, best_address, | 1599 unsigned int best_sad = fn_ptr->sdf(what->buf, what->stride, best_address, |
1907 in_what_stride, 0x7fffffff) + | 1600 in_what->stride, 0x7fffffff) + |
1908 mvsad_err_cost(ref_mv, &fcenter_mv, mvjsadcost, mvsadcost, error_per_bit); | 1601 mvsad_err_cost(ref_mv, &fcenter_mv, mvjsadcost, mvsadcost, error_per_bit); |
| 1602 int i, j; |
1909 | 1603 |
1910 for (i = 0; i < search_range; i++) { | 1604 for (i = 0; i < search_range; i++) { |
1911 int best_site = -1; | 1605 int best_site = -1; |
1912 int all_in = ((ref_mv->row - 1) > x->mv_row_min) & | 1606 const int all_in = ((ref_mv->row - 1) > x->mv_row_min) & |
1913 ((ref_mv->row + 1) < x->mv_row_max) & | 1607 ((ref_mv->row + 1) < x->mv_row_max) & |
1914 ((ref_mv->col - 1) > x->mv_col_min) & | 1608 ((ref_mv->col - 1) > x->mv_col_min) & |
1915 ((ref_mv->col + 1) < x->mv_col_max); | 1609 ((ref_mv->col + 1) < x->mv_col_max); |
1916 | 1610 |
1917 if (all_in) { | 1611 if (all_in) { |
1918 unsigned int sad_array[4]; | 1612 unsigned int sads[4]; |
1919 uint8_t const *block_offset[4] = { | 1613 const uint8_t *const positions[4] = { |
1920 best_address - in_what_stride, | 1614 best_address - in_what->stride, |
1921 best_address - 1, | 1615 best_address - 1, |
1922 best_address + 1, | 1616 best_address + 1, |
1923 best_address + in_what_stride | 1617 best_address + in_what->stride |
1924 }; | 1618 }; |
1925 | 1619 |
1926 fn_ptr->sdx4df(what, what_stride, block_offset, in_what_stride, | 1620 fn_ptr->sdx4df(what->buf, what->stride, positions, in_what->stride, sads); |
1927 sad_array); | |
1928 | 1621 |
1929 for (j = 0; j < 4; j++) { | 1622 for (j = 0; j < 4; ++j) { |
1930 if (sad_array[j] < bestsad) { | 1623 if (sads[j] < best_sad) { |
1931 this_mv.row = ref_mv->row + neighbors[j].row; | 1624 const MV mv = {ref_mv->row + neighbors[j].row, |
1932 this_mv.col = ref_mv->col + neighbors[j].col; | 1625 ref_mv->col + neighbors[j].col}; |
1933 sad_array[j] += mvsad_err_cost(&this_mv, &fcenter_mv, | 1626 sads[j] += mvsad_err_cost(&mv, &fcenter_mv, |
1934 mvjsadcost, mvsadcost, error_per_bit); | 1627 mvjsadcost, mvsadcost, error_per_bit); |
1935 | 1628 |
1936 if (sad_array[j] < bestsad) { | 1629 if (sads[j] < best_sad) { |
1937 bestsad = sad_array[j]; | 1630 best_sad = sads[j]; |
1938 best_site = j; | 1631 best_site = j; |
1939 } | 1632 } |
1940 } | 1633 } |
1941 } | 1634 } |
1942 } else { | 1635 } else { |
1943 for (j = 0; j < 4; j++) { | 1636 for (j = 0; j < 4; ++j) { |
1944 this_row_offset = ref_mv->row + neighbors[j].row; | 1637 const MV mv = {ref_mv->row + neighbors[j].row, |
1945 this_col_offset = ref_mv->col + neighbors[j].col; | 1638 ref_mv->col + neighbors[j].col}; |
1946 | 1639 |
1947 if ((this_col_offset > x->mv_col_min) && | 1640 if (is_mv_in(x, &mv)) { |
1948 (this_col_offset < x->mv_col_max) && | 1641 unsigned int sad = fn_ptr->sdf(what->buf, what->stride, |
1949 (this_row_offset > x->mv_row_min) && | 1642 get_buf_from_mv(in_what, &mv), |
1950 (this_row_offset < x->mv_row_max)) { | 1643 in_what->stride, best_sad); |
1951 const uint8_t *check_here = neighbors[j].row * in_what_stride + | 1644 if (sad < best_sad) { |
1952 neighbors[j].col + best_address; | 1645 sad += mvsad_err_cost(&mv, &fcenter_mv, |
1953 thissad = fn_ptr->sdf(what, what_stride, check_here, in_what_stride, | 1646 mvjsadcost, mvsadcost, error_per_bit); |
1954 bestsad); | |
1955 | 1647 |
1956 if (thissad < bestsad) { | 1648 if (sad < best_sad) { |
1957 this_mv.row = this_row_offset; | 1649 best_sad = sad; |
1958 this_mv.col = this_col_offset; | |
1959 thissad += mvsad_err_cost(&this_mv, &fcenter_mv, | |
1960 mvjsadcost, mvsadcost, error_per_bit); | |
1961 | |
1962 if (thissad < bestsad) { | |
1963 bestsad = thissad; | |
1964 best_site = j; | 1650 best_site = j; |
1965 } | 1651 } |
1966 } | 1652 } |
1967 } | 1653 } |
1968 } | 1654 } |
1969 } | 1655 } |
1970 | 1656 |
1971 if (best_site == -1) { | 1657 if (best_site == -1) { |
1972 break; | 1658 break; |
1973 } else { | 1659 } else { |
1974 ref_mv->row += neighbors[best_site].row; | 1660 ref_mv->row += neighbors[best_site].row; |
1975 ref_mv->col += neighbors[best_site].col; | 1661 ref_mv->col += neighbors[best_site].col; |
1976 best_address += (neighbors[best_site].row) * in_what_stride + | 1662 best_address = get_buf_from_mv(in_what, ref_mv); |
1977 neighbors[best_site].col; | |
1978 } | 1663 } |
1979 } | 1664 } |
1980 | 1665 |
1981 this_mv.row = ref_mv->row * 8; | 1666 return best_sad; |
1982 this_mv.col = ref_mv->col * 8; | |
1983 | |
1984 if (bestsad < INT_MAX) | |
1985 return fn_ptr->vf(what, what_stride, best_address, in_what_stride, | |
1986 (unsigned int *)(&thissad)) + | |
1987 mv_err_cost(&this_mv, center_mv, | |
1988 mvjcost, mvcost, x->errorperbit); | |
1989 else | |
1990 return INT_MAX; | |
1991 } | 1667 } |
1992 | 1668 |
1993 // This function is called when we do joint motion search in comp_inter_inter | 1669 // This function is called when we do joint motion search in comp_inter_inter |
1994 // mode. | 1670 // mode. |
1995 int vp9_refining_search_8p_c(const MACROBLOCK *x, | 1671 int vp9_refining_search_8p_c(const MACROBLOCK *x, |
1996 MV *ref_mv, int error_per_bit, | 1672 MV *ref_mv, int error_per_bit, |
1997 int search_range, | 1673 int search_range, |
1998 const vp9_variance_fn_ptr_t *fn_ptr, | 1674 const vp9_variance_fn_ptr_t *fn_ptr, |
1999 int *mvjcost, int *mvcost[2], | 1675 int *mvjcost, int *mvcost[2], |
2000 const MV *center_mv, | 1676 const MV *center_mv, |
2001 const uint8_t *second_pred, int w, int h) { | 1677 const uint8_t *second_pred, int w, int h) { |
2002 const MACROBLOCKD *const xd = &x->e_mbd; | |
2003 const MV neighbors[8] = {{-1, 0}, {0, -1}, {0, 1}, {1, 0}, | 1678 const MV neighbors[8] = {{-1, 0}, {0, -1}, {0, 1}, {1, 0}, |
2004 {-1, -1}, {1, -1}, {-1, 1}, {1, 1}}; | 1679 {-1, -1}, {1, -1}, {-1, 1}, {1, 1}}; |
2005 int i, j; | 1680 const MACROBLOCKD *const xd = &x->e_mbd; |
2006 | 1681 const struct buf_2d *const what = &x->plane[0].src; |
2007 const uint8_t *what = x->plane[0].src.buf; | 1682 const struct buf_2d *const in_what = &xd->plane[0].pre[0]; |
2008 const int what_stride = x->plane[0].src.stride; | |
2009 const uint8_t *in_what = xd->plane[0].pre[0].buf; | |
2010 const int in_what_stride = xd->plane[0].pre[0].stride; | |
2011 const uint8_t *best_address = &in_what[ref_mv->row * in_what_stride + | |
2012 ref_mv->col]; | |
2013 unsigned int thissad; | |
2014 MV this_mv; | |
2015 const MV fcenter_mv = {center_mv->row >> 3, center_mv->col >> 3}; | 1683 const MV fcenter_mv = {center_mv->row >> 3, center_mv->col >> 3}; |
2016 | |
2017 const int *mvjsadcost = x->nmvjointsadcost; | 1684 const int *mvjsadcost = x->nmvjointsadcost; |
2018 int *mvsadcost[2] = {x->nmvsadcost[0], x->nmvsadcost[1]}; | 1685 int *mvsadcost[2] = {x->nmvsadcost[0], x->nmvsadcost[1]}; |
2019 | 1686 unsigned int best_sad = fn_ptr->sdaf(what->buf, what->stride, |
2020 /* Get compound pred by averaging two pred blocks. */ | 1687 get_buf_from_mv(in_what, ref_mv), in_what->stride, |
2021 unsigned int bestsad = fn_ptr->sdaf(what, what_stride, | 1688 second_pred, 0x7fffffff) + |
2022 best_address, in_what_stride, | |
2023 second_pred, 0x7fffffff) + | |
2024 mvsad_err_cost(ref_mv, &fcenter_mv, mvjsadcost, mvsadcost, error_per_bit); | 1689 mvsad_err_cost(ref_mv, &fcenter_mv, mvjsadcost, mvsadcost, error_per_bit); |
| 1690 int i, j; |
2025 | 1691 |
2026 for (i = 0; i < search_range; ++i) { | 1692 for (i = 0; i < search_range; ++i) { |
2027 int best_site = -1; | 1693 int best_site = -1; |
2028 | 1694 |
2029 for (j = 0; j < 8; j++) { | 1695 for (j = 0; j < 8; ++j) { |
2030 this_mv.row = ref_mv->row + neighbors[j].row; | 1696 const MV mv = {ref_mv->row + neighbors[j].row, |
2031 this_mv.col = ref_mv->col + neighbors[j].col; | 1697 ref_mv->col + neighbors[j].col}; |
2032 | 1698 |
2033 if (is_mv_in(x, &this_mv)) { | 1699 if (is_mv_in(x, &mv)) { |
2034 const uint8_t *check_here = &in_what[this_mv.row * in_what_stride + | 1700 unsigned int sad = fn_ptr->sdaf(what->buf, what->stride, |
2035 this_mv.col]; | 1701 get_buf_from_mv(in_what, &mv), in_what->stride, |
2036 | 1702 second_pred, best_sad); |
2037 thissad = fn_ptr->sdaf(what, what_stride, check_here, in_what_stride, | 1703 if (sad < best_sad) { |
2038 second_pred, bestsad); | 1704 sad += mvsad_err_cost(&mv, &fcenter_mv, |
2039 if (thissad < bestsad) { | |
2040 thissad += mvsad_err_cost(&this_mv, &fcenter_mv, | |
2041 mvjsadcost, mvsadcost, error_per_bit); | 1705 mvjsadcost, mvsadcost, error_per_bit); |
2042 if (thissad < bestsad) { | 1706 if (sad < best_sad) { |
2043 bestsad = thissad; | 1707 best_sad = sad; |
2044 best_site = j; | 1708 best_site = j; |
2045 } | 1709 } |
2046 } | 1710 } |
2047 } | 1711 } |
2048 } | 1712 } |
2049 | 1713 |
2050 if (best_site == -1) { | 1714 if (best_site == -1) { |
2051 break; | 1715 break; |
2052 } else { | 1716 } else { |
2053 ref_mv->row += neighbors[best_site].row; | 1717 ref_mv->row += neighbors[best_site].row; |
2054 ref_mv->col += neighbors[best_site].col; | 1718 ref_mv->col += neighbors[best_site].col; |
2055 best_address = &in_what[ref_mv->row * in_what_stride + ref_mv->col]; | |
2056 } | 1719 } |
2057 } | 1720 } |
2058 | 1721 return best_sad; |
2059 this_mv.row = ref_mv->row * 8; | |
2060 this_mv.col = ref_mv->col * 8; | |
2061 | |
2062 if (bestsad < INT_MAX) { | |
2063 // FIXME(rbultje, yunqing): add full-pixel averaging variance functions | |
2064 // so we don't have to use the subpixel with xoff=0,yoff=0 here. | |
2065 return fn_ptr->svaf(best_address, in_what_stride, 0, 0, what, what_stride, | |
2066 (unsigned int *)(&thissad), second_pred) + | |
2067 mv_err_cost(&this_mv, center_mv, | |
2068 mvjcost, mvcost, x->errorperbit); | |
2069 } else { | |
2070 return INT_MAX; | |
2071 } | |
2072 } | 1722 } |
OLD | NEW |