Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(138)

Side by Side Diff: source/libvpx/vp9/encoder/vp9_mcomp.c

Issue 11555023: libvpx: Add VP9 decoder. (Closed) Base URL: svn://chrome-svn/chrome/trunk/deps/third_party/libvpx/
Patch Set: Created 8 years ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch | Annotate | Revision Log
Property Changes:
Added: svn:eol-style
+ LF
OLDNEW
(Empty)
1 /*
2 * Copyright (c) 2010 The WebM project authors. All Rights Reserved.
3 *
4 * Use of this source code is governed by a BSD-style license
5 * that can be found in the LICENSE file in the root of the source
6 * tree. An additional intellectual property rights grant can be found
7 * in the file PATENTS. All contributing project authors may
8 * be found in the AUTHORS file in the root of the source tree.
9 */
10
11
12 #include "vp9/encoder/vp9_onyx_int.h"
13 #include "vp9/encoder/vp9_mcomp.h"
14 #include "vpx_mem/vpx_mem.h"
15 #include "vpx_ports/config.h"
16 #include <stdio.h>
17 #include <limits.h>
18 #include <math.h>
19 #include "vp9/common/vp9_findnearmv.h"
20
21 #ifdef ENTROPY_STATS
22 static int mv_ref_ct [31] [4] [2];
23 static int mv_mode_cts [4] [2];
24 #endif
25
26 void vp9_clamp_mv_min_max(MACROBLOCK *x, int_mv *ref_mv) {
27 int col_min = (ref_mv->as_mv.col >> 3) - MAX_FULL_PEL_VAL +
28 ((ref_mv->as_mv.col & 7) ? 1 : 0);
29 int row_min = (ref_mv->as_mv.row >> 3) - MAX_FULL_PEL_VAL +
30 ((ref_mv->as_mv.row & 7) ? 1 : 0);
31 int col_max = (ref_mv->as_mv.col >> 3) + MAX_FULL_PEL_VAL;
32 int row_max = (ref_mv->as_mv.row >> 3) + MAX_FULL_PEL_VAL;
33
34 /* Get intersection of UMV window and valid MV window to reduce # of checks in diamond search. */
35 if (x->mv_col_min < col_min)
36 x->mv_col_min = col_min;
37 if (x->mv_col_max > col_max)
38 x->mv_col_max = col_max;
39 if (x->mv_row_min < row_min)
40 x->mv_row_min = row_min;
41 if (x->mv_row_max > row_max)
42 x->mv_row_max = row_max;
43 }
44
45 int vp9_mv_bit_cost(int_mv *mv, int_mv *ref, int *mvjcost, int *mvcost[2],
46 int Weight, int ishp) {
47 MV v;
48 v.row = (mv->as_mv.row - ref->as_mv.row);
49 v.col = (mv->as_mv.col - ref->as_mv.col);
50 return ((mvjcost[vp9_get_mv_joint(v)] +
51 mvcost[0][v.row] + mvcost[1][v.col]) *
52 Weight) >> 7;
53 }
54
55 static int mv_err_cost(int_mv *mv, int_mv *ref, int *mvjcost, int *mvcost[2],
56 int error_per_bit, int ishp) {
57 if (mvcost) {
58 MV v;
59 v.row = (mv->as_mv.row - ref->as_mv.row);
60 v.col = (mv->as_mv.col - ref->as_mv.col);
61 return ((mvjcost[vp9_get_mv_joint(v)] +
62 mvcost[0][v.row] + mvcost[1][v.col]) *
63 error_per_bit + 128) >> 8;
64 }
65 return 0;
66 }
67
68 static int mvsad_err_cost(int_mv *mv, int_mv *ref, int *mvjsadcost,
69 int *mvsadcost[2], int error_per_bit) {
70
71 if (mvsadcost) {
72 MV v;
73 v.row = (mv->as_mv.row - ref->as_mv.row);
74 v.col = (mv->as_mv.col - ref->as_mv.col);
75 return ((mvjsadcost[vp9_get_mv_joint(v)] +
76 mvsadcost[0][v.row] + mvsadcost[1][v.col]) *
77 error_per_bit + 128) >> 8;
78 }
79 return 0;
80 }
81
82 void vp9_init_dsmotion_compensation(MACROBLOCK *x, int stride) {
83 int Len;
84 int search_site_count = 0;
85
86
87 // Generate offsets for 4 search sites per step.
88 Len = MAX_FIRST_STEP;
89 x->ss[search_site_count].mv.col = 0;
90 x->ss[search_site_count].mv.row = 0;
91 x->ss[search_site_count].offset = 0;
92 search_site_count++;
93
94 while (Len > 0) {
95
96 // Compute offsets for search sites.
97 x->ss[search_site_count].mv.col = 0;
98 x->ss[search_site_count].mv.row = -Len;
99 x->ss[search_site_count].offset = -Len * stride;
100 search_site_count++;
101
102 // Compute offsets for search sites.
103 x->ss[search_site_count].mv.col = 0;
104 x->ss[search_site_count].mv.row = Len;
105 x->ss[search_site_count].offset = Len * stride;
106 search_site_count++;
107
108 // Compute offsets for search sites.
109 x->ss[search_site_count].mv.col = -Len;
110 x->ss[search_site_count].mv.row = 0;
111 x->ss[search_site_count].offset = -Len;
112 search_site_count++;
113
114 // Compute offsets for search sites.
115 x->ss[search_site_count].mv.col = Len;
116 x->ss[search_site_count].mv.row = 0;
117 x->ss[search_site_count].offset = Len;
118 search_site_count++;
119
120 // Contract.
121 Len /= 2;
122 }
123
124 x->ss_count = search_site_count;
125 x->searches_per_step = 4;
126 }
127
128 void vp9_init3smotion_compensation(MACROBLOCK *x, int stride) {
129 int Len;
130 int search_site_count = 0;
131
132 // Generate offsets for 8 search sites per step.
133 Len = MAX_FIRST_STEP;
134 x->ss[search_site_count].mv.col = 0;
135 x->ss[search_site_count].mv.row = 0;
136 x->ss[search_site_count].offset = 0;
137 search_site_count++;
138
139 while (Len > 0) {
140
141 // Compute offsets for search sites.
142 x->ss[search_site_count].mv.col = 0;
143 x->ss[search_site_count].mv.row = -Len;
144 x->ss[search_site_count].offset = -Len * stride;
145 search_site_count++;
146
147 // Compute offsets for search sites.
148 x->ss[search_site_count].mv.col = 0;
149 x->ss[search_site_count].mv.row = Len;
150 x->ss[search_site_count].offset = Len * stride;
151 search_site_count++;
152
153 // Compute offsets for search sites.
154 x->ss[search_site_count].mv.col = -Len;
155 x->ss[search_site_count].mv.row = 0;
156 x->ss[search_site_count].offset = -Len;
157 search_site_count++;
158
159 // Compute offsets for search sites.
160 x->ss[search_site_count].mv.col = Len;
161 x->ss[search_site_count].mv.row = 0;
162 x->ss[search_site_count].offset = Len;
163 search_site_count++;
164
165 // Compute offsets for search sites.
166 x->ss[search_site_count].mv.col = -Len;
167 x->ss[search_site_count].mv.row = -Len;
168 x->ss[search_site_count].offset = -Len * stride - Len;
169 search_site_count++;
170
171 // Compute offsets for search sites.
172 x->ss[search_site_count].mv.col = Len;
173 x->ss[search_site_count].mv.row = -Len;
174 x->ss[search_site_count].offset = -Len * stride + Len;
175 search_site_count++;
176
177 // Compute offsets for search sites.
178 x->ss[search_site_count].mv.col = -Len;
179 x->ss[search_site_count].mv.row = Len;
180 x->ss[search_site_count].offset = Len * stride - Len;
181 search_site_count++;
182
183 // Compute offsets for search sites.
184 x->ss[search_site_count].mv.col = Len;
185 x->ss[search_site_count].mv.row = Len;
186 x->ss[search_site_count].offset = Len * stride + Len;
187 search_site_count++;
188
189 // Contract.
190 Len /= 2;
191 }
192
193 x->ss_count = search_site_count;
194 x->searches_per_step = 8;
195 }
196
197 /*
198 * To avoid the penalty for crossing cache-line read, preload the reference
199 * area in a small buffer, which is aligned to make sure there won't be crossing
200 * cache-line read while reading from this buffer. This reduced the cpu
201 * cycles spent on reading ref data in sub-pixel filter functions.
202 * TODO: Currently, since sub-pixel search range here is -3 ~ 3, copy 22 rows x
203 * 32 cols area that is enough for 16x16 macroblock. Later, for SPLITMV, we
204 * could reduce the area.
205 */
206
207 /* estimated cost of a motion vector (r,c) */
208 #define MVC(r, c) \
209 (mvcost ? \
210 ((mvjcost[((r) != rr) * 2 + ((c) != rc)] + \
211 mvcost[0][((r) - rr)] + mvcost[1][((c) - rc)]) * \
212 error_per_bit + 128) >> 8 : 0)
213
214 #define SP(x) (((x) & 7) << 1) // convert motion vector component to offset
215 // for svf calc
216
217 #define IFMVCV(r, c, s, e) \
218 if (c >= minc && c <= maxc && r >= minr && r <= maxr) \
219 s \
220 else \
221 e;
222
223 /* pointer to predictor base of a motionvector */
224 #define PRE(r, c) (y + (((r) >> 3) * y_stride + ((c) >> 3) -(offset)))
225
226 /* returns subpixel variance error function */
227 #define DIST(r, c) \
228 vfp->svf(PRE(r, c), y_stride, SP(c), SP(r), z, b->src_stride, &sse)
229
230 /* checks if (r, c) has better score than previous best */
231 #define CHECK_BETTER(v, r, c) \
232 IFMVCV(r, c, { \
233 thismse = (DIST(r, c)); \
234 if ((v = MVC(r, c) + thismse) < besterr) { \
235 besterr = v; \
236 br = r; \
237 bc = c; \
238 *distortion = thismse; \
239 *sse1 = sse; \
240 } \
241 }, \
242 v = INT_MAX;)
243
244 #define MIN(x,y) (((x)<(y))?(x):(y))
245 #define MAX(x,y) (((x)>(y))?(x):(y))
246
247 int vp9_find_best_sub_pixel_step_iteratively(MACROBLOCK *x, BLOCK *b, BLOCKD *d,
248 int_mv *bestmv, int_mv *ref_mv,
249 int error_per_bit,
250 const vp9_variance_fn_ptr_t *vfp,
251 int *mvjcost, int *mvcost[2],
252 int *distortion,
253 unsigned int *sse1) {
254 unsigned char *z = (*(b->base_src) + b->src);
255 MACROBLOCKD *xd = &x->e_mbd;
256
257 int rr, rc, br, bc, hstep;
258 int tr, tc;
259 unsigned int besterr = INT_MAX;
260 unsigned int left, right, up, down, diag;
261 unsigned int sse;
262 unsigned int whichdir;
263 unsigned int halfiters = 4;
264 unsigned int quarteriters = 4;
265 unsigned int eighthiters = 4;
266 int thismse;
267 int maxc, minc, maxr, minr;
268 int y_stride;
269 int offset;
270 int usehp = xd->allow_high_precision_mv;
271
272 #if !CONFIG_SUPERBLOCKS && (ARCH_X86 || ARCH_X86_64)
273 unsigned char *y0 = *(d->base_pre) + d->pre + (bestmv->as_mv.row) * d->pre_str ide + bestmv->as_mv.col;
274 unsigned char *y;
275 int buf_r1, buf_r2, buf_c1, buf_c2;
276
277 // Clamping to avoid out-of-range data access
278 buf_r1 = ((bestmv->as_mv.row - VP9_INTERP_EXTEND) < x->mv_row_min) ?
279 (bestmv->as_mv.row - x->mv_row_min) : VP9_INTERP_EXTEND - 1;
280 buf_r2 = ((bestmv->as_mv.row + VP9_INTERP_EXTEND) > x->mv_row_max) ?
281 (x->mv_row_max - bestmv->as_mv.row) : VP9_INTERP_EXTEND - 1;
282 buf_c1 = ((bestmv->as_mv.col - VP9_INTERP_EXTEND) < x->mv_col_min) ?
283 (bestmv->as_mv.col - x->mv_col_min) : VP9_INTERP_EXTEND - 1;
284 buf_c2 = ((bestmv->as_mv.col + VP9_INTERP_EXTEND) > x->mv_col_max) ?
285 (x->mv_col_max - bestmv->as_mv.col) : VP9_INTERP_EXTEND - 1;
286 y_stride = 32;
287
288 /* Copy to intermediate buffer before searching. */
289 vfp->copymem(y0 - buf_c1 - d->pre_stride * buf_r1, d->pre_stride, xd->y_buf, y _stride, 16 + buf_r1 + buf_r2);
290 y = xd->y_buf + y_stride * buf_r1 + buf_c1;
291 #else
292 unsigned char *y = *(d->base_pre) + d->pre + (bestmv->as_mv.row) * d->pre_stri de + bestmv->as_mv.col;
293 y_stride = d->pre_stride;
294 #endif
295
296 rr = ref_mv->as_mv.row;
297 rc = ref_mv->as_mv.col;
298 br = bestmv->as_mv.row << 3;
299 bc = bestmv->as_mv.col << 3;
300 hstep = 4;
301 minc = MAX(x->mv_col_min << 3, (ref_mv->as_mv.col) - ((1 << MV_MAX_BITS) - 1)) ;
302 maxc = MIN(x->mv_col_max << 3, (ref_mv->as_mv.col) + ((1 << MV_MAX_BITS) - 1)) ;
303 minr = MAX(x->mv_row_min << 3, (ref_mv->as_mv.row) - ((1 << MV_MAX_BITS) - 1)) ;
304 maxr = MIN(x->mv_row_max << 3, (ref_mv->as_mv.row) + ((1 << MV_MAX_BITS) - 1)) ;
305
306 tr = br;
307 tc = bc;
308
309
310 offset = (bestmv->as_mv.row) * y_stride + bestmv->as_mv.col;
311
312 // central mv
313 bestmv->as_mv.row <<= 3;
314 bestmv->as_mv.col <<= 3;
315
316 // calculate central point error
317 besterr = vfp->vf(y, y_stride, z, b->src_stride, sse1);
318 *distortion = besterr;
319 besterr += mv_err_cost(bestmv, ref_mv, mvjcost, mvcost,
320 error_per_bit, xd->allow_high_precision_mv);
321
322 // TODO: Each subsequent iteration checks at least one point in
323 // common with the last iteration could be 2 ( if diag selected)
324 while (--halfiters) {
325 // 1/2 pel
326 CHECK_BETTER(left, tr, tc - hstep);
327 CHECK_BETTER(right, tr, tc + hstep);
328 CHECK_BETTER(up, tr - hstep, tc);
329 CHECK_BETTER(down, tr + hstep, tc);
330
331 whichdir = (left < right ? 0 : 1) + (up < down ? 0 : 2);
332
333 switch (whichdir) {
334 case 0:
335 CHECK_BETTER(diag, tr - hstep, tc - hstep);
336 break;
337 case 1:
338 CHECK_BETTER(diag, tr - hstep, tc + hstep);
339 break;
340 case 2:
341 CHECK_BETTER(diag, tr + hstep, tc - hstep);
342 break;
343 case 3:
344 CHECK_BETTER(diag, tr + hstep, tc + hstep);
345 break;
346 }
347
348 // no reason to check the same one again.
349 if (tr == br && tc == bc)
350 break;
351
352 tr = br;
353 tc = bc;
354 }
355
356 // TODO: Each subsequent iteration checks at least one point in common with
357 // the last iteration could be 2 ( if diag selected) 1/4 pel
358 hstep >>= 1;
359 while (--quarteriters) {
360 CHECK_BETTER(left, tr, tc - hstep);
361 CHECK_BETTER(right, tr, tc + hstep);
362 CHECK_BETTER(up, tr - hstep, tc);
363 CHECK_BETTER(down, tr + hstep, tc);
364
365 whichdir = (left < right ? 0 : 1) + (up < down ? 0 : 2);
366
367 switch (whichdir) {
368 case 0:
369 CHECK_BETTER(diag, tr - hstep, tc - hstep);
370 break;
371 case 1:
372 CHECK_BETTER(diag, tr - hstep, tc + hstep);
373 break;
374 case 2:
375 CHECK_BETTER(diag, tr + hstep, tc - hstep);
376 break;
377 case 3:
378 CHECK_BETTER(diag, tr + hstep, tc + hstep);
379 break;
380 }
381
382 // no reason to check the same one again.
383 if (tr == br && tc == bc)
384 break;
385
386 tr = br;
387 tc = bc;
388 }
389
390 if (xd->allow_high_precision_mv) {
391 usehp = vp9_use_nmv_hp(&ref_mv->as_mv);
392 } else {
393 usehp = 0;
394 }
395
396 if (usehp) {
397 hstep >>= 1;
398 while (--eighthiters) {
399 CHECK_BETTER(left, tr, tc - hstep);
400 CHECK_BETTER(right, tr, tc + hstep);
401 CHECK_BETTER(up, tr - hstep, tc);
402 CHECK_BETTER(down, tr + hstep, tc);
403
404 whichdir = (left < right ? 0 : 1) + (up < down ? 0 : 2);
405
406 switch (whichdir) {
407 case 0:
408 CHECK_BETTER(diag, tr - hstep, tc - hstep);
409 break;
410 case 1:
411 CHECK_BETTER(diag, tr - hstep, tc + hstep);
412 break;
413 case 2:
414 CHECK_BETTER(diag, tr + hstep, tc - hstep);
415 break;
416 case 3:
417 CHECK_BETTER(diag, tr + hstep, tc + hstep);
418 break;
419 }
420
421 // no reason to check the same one again.
422 if (tr == br && tc == bc)
423 break;
424
425 tr = br;
426 tc = bc;
427 }
428 }
429 bestmv->as_mv.row = br;
430 bestmv->as_mv.col = bc;
431
432 if ((abs(bestmv->as_mv.col - ref_mv->as_mv.col) > (MAX_FULL_PEL_VAL << 3)) ||
433 (abs(bestmv->as_mv.row - ref_mv->as_mv.row) > (MAX_FULL_PEL_VAL << 3)))
434 return INT_MAX;
435
436 return besterr;
437 }
438 #undef MVC
439 #undef PRE
440 #undef DIST
441 #undef IFMVCV
442 #undef CHECK_BETTER
443 #undef MIN
444 #undef MAX
445
446 int vp9_find_best_sub_pixel_step(MACROBLOCK *x, BLOCK *b, BLOCKD *d,
447 int_mv *bestmv, int_mv *ref_mv,
448 int error_per_bit,
449 const vp9_variance_fn_ptr_t *vfp,
450 int *mvjcost, int *mvcost[2], int *distortion,
451 unsigned int *sse1) {
452 int bestmse = INT_MAX;
453 int_mv startmv;
454 int_mv this_mv;
455 int_mv orig_mv;
456 int yrow_movedback = 0, ycol_movedback = 0;
457 unsigned char *z = (*(b->base_src) + b->src);
458 int left, right, up, down, diag;
459 unsigned int sse;
460 int whichdir;
461 int thismse;
462 int y_stride;
463 MACROBLOCKD *xd = &x->e_mbd;
464 int usehp = xd->allow_high_precision_mv;
465
466 #if !CONFIG_SUPERBLOCKS && (ARCH_X86 || ARCH_X86_64)
467 unsigned char *y0 = *(d->base_pre) + d->pre + (bestmv->as_mv.row) * d->pre_str ide + bestmv->as_mv.col;
468 unsigned char *y;
469
470 y_stride = 32;
471 /* Copy 18 rows x 32 cols area to intermediate buffer before searching. */
472 vfp->copymem(y0 - 1 - d->pre_stride, d->pre_stride, xd->y_buf, y_stride, 18);
473 y = xd->y_buf + y_stride + 1;
474 #else
475 unsigned char *y = *(d->base_pre) + d->pre + (bestmv->as_mv.row) * d->pre_stri de + bestmv->as_mv.col;
476 y_stride = d->pre_stride;
477 #endif
478
479 // central mv
480 bestmv->as_mv.row <<= 3;
481 bestmv->as_mv.col <<= 3;
482 startmv = *bestmv;
483 orig_mv = *bestmv;
484
485 // calculate central point error
486 bestmse = vfp->vf(y, y_stride, z, b->src_stride, sse1);
487 *distortion = bestmse;
488 bestmse += mv_err_cost(bestmv, ref_mv, mvjcost, mvcost, error_per_bit,
489 xd->allow_high_precision_mv);
490
491 // go left then right and check error
492 this_mv.as_mv.row = startmv.as_mv.row;
493 this_mv.as_mv.col = ((startmv.as_mv.col - 8) | 4);
494 thismse = vfp->svf_halfpix_h(y - 1, y_stride, z, b->src_stride, &sse);
495 left = thismse + mv_err_cost(&this_mv, ref_mv, mvjcost, mvcost, error_per_bit,
496 xd->allow_high_precision_mv);
497
498 if (left < bestmse) {
499 *bestmv = this_mv;
500 bestmse = left;
501 *distortion = thismse;
502 *sse1 = sse;
503 }
504
505 this_mv.as_mv.col += 8;
506 thismse = vfp->svf_halfpix_h(y, y_stride, z, b->src_stride, &sse);
507 right = thismse + mv_err_cost(&this_mv, ref_mv, mvjcost, mvcost,
508 error_per_bit, xd->allow_high_precision_mv);
509
510 if (right < bestmse) {
511 *bestmv = this_mv;
512 bestmse = right;
513 *distortion = thismse;
514 *sse1 = sse;
515 }
516
517 // go up then down and check error
518 this_mv.as_mv.col = startmv.as_mv.col;
519 this_mv.as_mv.row = ((startmv.as_mv.row - 8) | 4);
520 thismse = vfp->svf_halfpix_v(y - y_stride, y_stride, z, b->src_stride, &sse);
521 up = thismse + mv_err_cost(&this_mv, ref_mv, mvjcost, mvcost, error_per_bit,
522 xd->allow_high_precision_mv);
523
524 if (up < bestmse) {
525 *bestmv = this_mv;
526 bestmse = up;
527 *distortion = thismse;
528 *sse1 = sse;
529 }
530
531 this_mv.as_mv.row += 8;
532 thismse = vfp->svf_halfpix_v(y, y_stride, z, b->src_stride, &sse);
533 down = thismse + mv_err_cost(&this_mv, ref_mv, mvjcost, mvcost, error_per_bit,
534 xd->allow_high_precision_mv);
535
536 if (down < bestmse) {
537 *bestmv = this_mv;
538 bestmse = down;
539 *distortion = thismse;
540 *sse1 = sse;
541 }
542
543
544 // now check 1 more diagonal
545 whichdir = (left < right ? 0 : 1) + (up < down ? 0 : 2);
546 // for(whichdir =0;whichdir<4;whichdir++)
547 // {
548 this_mv = startmv;
549
550 switch (whichdir) {
551 case 0:
552 this_mv.as_mv.col = (this_mv.as_mv.col - 8) | 4;
553 this_mv.as_mv.row = (this_mv.as_mv.row - 8) | 4;
554 thismse = vfp->svf_halfpix_hv(y - 1 - y_stride, y_stride, z, b->src_stride , &sse);
555 break;
556 case 1:
557 this_mv.as_mv.col += 4;
558 this_mv.as_mv.row = (this_mv.as_mv.row - 8) | 4;
559 thismse = vfp->svf_halfpix_hv(y - y_stride, y_stride, z, b->src_stride, &s se);
560 break;
561 case 2:
562 this_mv.as_mv.col = (this_mv.as_mv.col - 8) | 4;
563 this_mv.as_mv.row += 4;
564 thismse = vfp->svf_halfpix_hv(y - 1, y_stride, z, b->src_stride, &sse);
565 break;
566 case 3:
567 default:
568 this_mv.as_mv.col += 4;
569 this_mv.as_mv.row += 4;
570 thismse = vfp->svf_halfpix_hv(y, y_stride, z, b->src_stride, &sse);
571 break;
572 }
573
574 diag = thismse + mv_err_cost(&this_mv, ref_mv, mvjcost, mvcost, error_per_bit,
575 xd->allow_high_precision_mv);
576
577 if (diag < bestmse) {
578 *bestmv = this_mv;
579 bestmse = diag;
580 *distortion = thismse;
581 *sse1 = sse;
582 }
583
584 // }
585
586
587 // time to check quarter pels.
588 if (bestmv->as_mv.row < startmv.as_mv.row) {
589 y -= y_stride;
590 yrow_movedback = 1;
591 }
592
593 if (bestmv->as_mv.col < startmv.as_mv.col) {
594 y--;
595 ycol_movedback = 1;
596 }
597
598 startmv = *bestmv;
599
600
601
602 // go left then right and check error
603 this_mv.as_mv.row = startmv.as_mv.row;
604
605 if (startmv.as_mv.col & 7) {
606 this_mv.as_mv.col = startmv.as_mv.col - 2;
607 thismse = vfp->svf(y, y_stride,
608 SP(this_mv.as_mv.col), SP(this_mv.as_mv.row),
609 z, b->src_stride, &sse);
610 } else {
611 this_mv.as_mv.col = (startmv.as_mv.col - 8) | 6;
612 thismse = vfp->svf(y - 1, y_stride, SP(6), SP(this_mv.as_mv.row), z,
613 b->src_stride, &sse);
614 }
615
616 left = thismse + mv_err_cost(&this_mv, ref_mv, mvjcost, mvcost, error_per_bit,
617 xd->allow_high_precision_mv);
618
619 if (left < bestmse) {
620 *bestmv = this_mv;
621 bestmse = left;
622 *distortion = thismse;
623 *sse1 = sse;
624 }
625
626 this_mv.as_mv.col += 4;
627 thismse = vfp->svf(y, y_stride,
628 SP(this_mv.as_mv.col), SP(this_mv.as_mv.row),
629 z, b->src_stride, &sse);
630 right = thismse + mv_err_cost(&this_mv, ref_mv, mvjcost, mvcost,
631 error_per_bit, xd->allow_high_precision_mv);
632
633 if (right < bestmse) {
634 *bestmv = this_mv;
635 bestmse = right;
636 *distortion = thismse;
637 *sse1 = sse;
638 }
639
640 // go up then down and check error
641 this_mv.as_mv.col = startmv.as_mv.col;
642
643 if (startmv.as_mv.row & 7) {
644 this_mv.as_mv.row = startmv.as_mv.row - 2;
645 thismse = vfp->svf(y, y_stride,
646 SP(this_mv.as_mv.col), SP(this_mv.as_mv.row),
647 z, b->src_stride, &sse);
648 } else {
649 this_mv.as_mv.row = (startmv.as_mv.row - 8) | 6;
650 thismse = vfp->svf(y - y_stride, y_stride, SP(this_mv.as_mv.col), SP(6),
651 z, b->src_stride, &sse);
652 }
653
654 up = thismse + mv_err_cost(&this_mv, ref_mv, mvjcost, mvcost, error_per_bit,
655 xd->allow_high_precision_mv);
656
657 if (up < bestmse) {
658 *bestmv = this_mv;
659 bestmse = up;
660 *distortion = thismse;
661 *sse1 = sse;
662 }
663
664 this_mv.as_mv.row += 4;
665 thismse = vfp->svf(y, y_stride, SP(this_mv.as_mv.col), SP(this_mv.as_mv.row),
666 z, b->src_stride, &sse);
667 down = thismse + mv_err_cost(&this_mv, ref_mv, mvjcost, mvcost, error_per_bit,
668 xd->allow_high_precision_mv);
669
670 if (down < bestmse) {
671 *bestmv = this_mv;
672 bestmse = down;
673 *distortion = thismse;
674 *sse1 = sse;
675 }
676
677
678 // now check 1 more diagonal
679 whichdir = (left < right ? 0 : 1) + (up < down ? 0 : 2);
680
681 // for(whichdir=0;whichdir<4;whichdir++)
682 // {
683 this_mv = startmv;
684
685 switch (whichdir) {
686 case 0:
687
688 if (startmv.as_mv.row & 7) {
689 this_mv.as_mv.row -= 2;
690
691 if (startmv.as_mv.col & 7) {
692 this_mv.as_mv.col -= 2;
693 thismse = vfp->svf(y, y_stride, SP(this_mv.as_mv.col), SP(this_mv.as_m v.row), z, b->src_stride, &sse);
694 } else {
695 this_mv.as_mv.col = (startmv.as_mv.col - 8) | 6;
696 thismse = vfp->svf(y - 1, y_stride, SP(6), SP(this_mv.as_mv.row), z, b ->src_stride, &sse);;
697 }
698 } else {
699 this_mv.as_mv.row = (startmv.as_mv.row - 8) | 6;
700
701 if (startmv.as_mv.col & 7) {
702 this_mv.as_mv.col -= 2;
703 thismse = vfp->svf(y - y_stride, y_stride, SP(this_mv.as_mv.col), SP(6 ), z, b->src_stride, &sse);
704 } else {
705 this_mv.as_mv.col = (startmv.as_mv.col - 8) | 6;
706 thismse = vfp->svf(y - y_stride - 1, y_stride, SP(6), SP(6), z, b->src _stride, &sse);
707 }
708 }
709
710 break;
711 case 1:
712 this_mv.as_mv.col += 2;
713
714 if (startmv.as_mv.row & 7) {
715 this_mv.as_mv.row -= 2;
716 thismse = vfp->svf(y, y_stride, SP(this_mv.as_mv.col), SP(this_mv.as_mv. row), z, b->src_stride, &sse);
717 } else {
718 this_mv.as_mv.row = (startmv.as_mv.row - 8) | 6;
719 thismse = vfp->svf(y - y_stride, y_stride, SP(this_mv.as_mv.col), SP(6), z, b->src_stride, &sse);
720 }
721
722 break;
723 case 2:
724 this_mv.as_mv.row += 2;
725
726 if (startmv.as_mv.col & 7) {
727 this_mv.as_mv.col -= 2;
728 thismse = vfp->svf(y, y_stride, SP(this_mv.as_mv.col), SP(this_mv.as_mv. row),
729 z, b->src_stride, &sse);
730 } else {
731 this_mv.as_mv.col = (startmv.as_mv.col - 8) | 6;
732 thismse = vfp->svf(y - 1, y_stride, SP(6), SP(this_mv.as_mv.row), z,
733 b->src_stride, &sse);
734 }
735
736 break;
737 case 3:
738 this_mv.as_mv.col += 2;
739 this_mv.as_mv.row += 2;
740 thismse = vfp->svf(y, y_stride,
741 SP(this_mv.as_mv.col), SP(this_mv.as_mv.row),
742 z, b->src_stride, &sse);
743 break;
744 }
745
746 diag = thismse + mv_err_cost(&this_mv, ref_mv, mvjcost, mvcost, error_per_bit,
747 xd->allow_high_precision_mv);
748
749 if (diag < bestmse) {
750 *bestmv = this_mv;
751 bestmse = diag;
752 *distortion = thismse;
753 *sse1 = sse;
754 }
755
756 if (x->e_mbd.allow_high_precision_mv) {
757 usehp = vp9_use_nmv_hp(&ref_mv->as_mv);
758 } else {
759 usehp = 0;
760 }
761 if (!usehp)
762 return bestmse;
763
764 /* Now do 1/8th pixel */
765 if (bestmv->as_mv.row < orig_mv.as_mv.row && !yrow_movedback) {
766 y -= y_stride;
767 yrow_movedback = 1;
768 }
769
770 if (bestmv->as_mv.col < orig_mv.as_mv.col && !ycol_movedback) {
771 y--;
772 ycol_movedback = 1;
773 }
774
775 startmv = *bestmv;
776
777 // go left then right and check error
778 this_mv.as_mv.row = startmv.as_mv.row;
779
780 if (startmv.as_mv.col & 7) {
781 this_mv.as_mv.col = startmv.as_mv.col - 1;
782 thismse = vfp->svf(y, y_stride,
783 SP(this_mv.as_mv.col), SP(this_mv.as_mv.row),
784 z, b->src_stride, &sse);
785 } else {
786 this_mv.as_mv.col = (startmv.as_mv.col - 8) | 7;
787 thismse = vfp->svf(y - 1, y_stride, SP(7), SP(this_mv.as_mv.row),
788 z, b->src_stride, &sse);
789 }
790
791 left = thismse + mv_err_cost(&this_mv, ref_mv, mvjcost, mvcost, error_per_bit,
792 xd->allow_high_precision_mv);
793
794 if (left < bestmse) {
795 *bestmv = this_mv;
796 bestmse = left;
797 *distortion = thismse;
798 *sse1 = sse;
799 }
800
801 this_mv.as_mv.col += 2;
802 thismse = vfp->svf(y, y_stride, SP(this_mv.as_mv.col), SP(this_mv.as_mv.row),
803 z, b->src_stride, &sse);
804 right = thismse + mv_err_cost(&this_mv, ref_mv, mvjcost, mvcost,
805 error_per_bit, xd->allow_high_precision_mv);
806
807 if (right < bestmse) {
808 *bestmv = this_mv;
809 bestmse = right;
810 *distortion = thismse;
811 *sse1 = sse;
812 }
813
814 // go up then down and check error
815 this_mv.as_mv.col = startmv.as_mv.col;
816
817 if (startmv.as_mv.row & 7) {
818 this_mv.as_mv.row = startmv.as_mv.row - 1;
819 thismse = vfp->svf(y, y_stride, SP(this_mv.as_mv.col), SP(this_mv.as_mv.row) , z, b->src_stride, &sse);
820 } else {
821 this_mv.as_mv.row = (startmv.as_mv.row - 8) | 7;
822 thismse = vfp->svf(y - y_stride, y_stride, SP(this_mv.as_mv.col), SP(7), z, b->src_stride, &sse);
823 }
824
825 up = thismse + mv_err_cost(&this_mv, ref_mv, mvjcost, mvcost, error_per_bit,
826 xd->allow_high_precision_mv);
827
828 if (up < bestmse) {
829 *bestmv = this_mv;
830 bestmse = up;
831 *distortion = thismse;
832 *sse1 = sse;
833 }
834
835 this_mv.as_mv.row += 2;
836 thismse = vfp->svf(y, y_stride, SP(this_mv.as_mv.col), SP(this_mv.as_mv.row), z, b->src_stride, &sse);
837 down = thismse + mv_err_cost(&this_mv, ref_mv, mvjcost, mvcost, error_per_bit,
838 xd->allow_high_precision_mv);
839
840 if (down < bestmse) {
841 *bestmv = this_mv;
842 bestmse = down;
843 *distortion = thismse;
844 *sse1 = sse;
845 }
846
847 // now check 1 more diagonal
848 whichdir = (left < right ? 0 : 1) + (up < down ? 0 : 2);
849
850 // for(whichdir=0;whichdir<4;whichdir++)
851 // {
852 this_mv = startmv;
853
854 switch (whichdir) {
855 case 0:
856
857 if (startmv.as_mv.row & 7) {
858 this_mv.as_mv.row -= 1;
859
860 if (startmv.as_mv.col & 7) {
861 this_mv.as_mv.col -= 1;
862 thismse = vfp->svf(y, y_stride, SP(this_mv.as_mv.col), SP(this_mv.as_m v.row), z, b->src_stride, &sse);
863 } else {
864 this_mv.as_mv.col = (startmv.as_mv.col - 8) | 7;
865 thismse = vfp->svf(y - 1, y_stride, SP(7), SP(this_mv.as_mv.row), z, b ->src_stride, &sse);;
866 }
867 } else {
868 this_mv.as_mv.row = (startmv.as_mv.row - 8) | 7;
869
870 if (startmv.as_mv.col & 7) {
871 this_mv.as_mv.col -= 1;
872 thismse = vfp->svf(y - y_stride, y_stride, SP(this_mv.as_mv.col), SP(7 ), z, b->src_stride, &sse);
873 } else {
874 this_mv.as_mv.col = (startmv.as_mv.col - 8) | 7;
875 thismse = vfp->svf(y - y_stride - 1, y_stride, SP(7), SP(7), z, b->src _stride, &sse);
876 }
877 }
878
879 break;
880 case 1:
881 this_mv.as_mv.col += 1;
882
883 if (startmv.as_mv.row & 7) {
884 this_mv.as_mv.row -= 1;
885 thismse = vfp->svf(y, y_stride, SP(this_mv.as_mv.col), SP(this_mv.as_mv. row), z, b->src_stride, &sse);
886 } else {
887 this_mv.as_mv.row = (startmv.as_mv.row - 8) | 7;
888 thismse = vfp->svf(y - y_stride, y_stride, SP(this_mv.as_mv.col), SP(7), z, b->src_stride, &sse);
889 }
890
891 break;
892 case 2:
893 this_mv.as_mv.row += 1;
894
895 if (startmv.as_mv.col & 7) {
896 this_mv.as_mv.col -= 1;
897 thismse = vfp->svf(y, y_stride, SP(this_mv.as_mv.col), SP(this_mv.as_mv. row), z, b->src_stride, &sse);
898 } else {
899 this_mv.as_mv.col = (startmv.as_mv.col - 8) | 7;
900 thismse = vfp->svf(y - 1, y_stride, SP(7), SP(this_mv.as_mv.row), z, b-> src_stride, &sse);
901 }
902
903 break;
904 case 3:
905 this_mv.as_mv.col += 1;
906 this_mv.as_mv.row += 1;
907 thismse = vfp->svf(y, y_stride, SP(this_mv.as_mv.col), SP(this_mv.as_mv.r ow), z, b->src_stride, &sse);
908 break;
909 }
910
911 diag = thismse + mv_err_cost(&this_mv, ref_mv, mvjcost, mvcost, error_per_bit,
912 xd->allow_high_precision_mv);
913
914 if (diag < bestmse) {
915 *bestmv = this_mv;
916 bestmse = diag;
917 *distortion = thismse;
918 *sse1 = sse;
919 }
920
921 return bestmse;
922 }
923
924 #undef SP
925
926 int vp9_find_best_half_pixel_step(MACROBLOCK *x, BLOCK *b, BLOCKD *d,
927 int_mv *bestmv, int_mv *ref_mv,
928 int error_per_bit,
929 const vp9_variance_fn_ptr_t *vfp,
930 int *mvjcost, int *mvcost[2],
931 int *distortion,
932 unsigned int *sse1) {
933 int bestmse = INT_MAX;
934 int_mv startmv;
935 int_mv this_mv;
936 unsigned char *z = (*(b->base_src) + b->src);
937 int left, right, up, down, diag;
938 unsigned int sse;
939 int whichdir;
940 int thismse;
941 int y_stride;
942 MACROBLOCKD *xd = &x->e_mbd;
943
944 #if !CONFIG_SUPERBLOCKS && (ARCH_X86 || ARCH_X86_64)
945 unsigned char *y0 = *(d->base_pre) + d->pre +
946 (bestmv->as_mv.row) * d->pre_stride + bestmv->as_mv.col;
947 unsigned char *y;
948
949 y_stride = 32;
950 /* Copy 18 rows x 32 cols area to intermediate buffer before searching. */
951 vfp->copymem(y0 - 1 - d->pre_stride, d->pre_stride, xd->y_buf, y_stride, 18);
952 y = xd->y_buf + y_stride + 1;
953 #else
954 unsigned char *y = *(d->base_pre) + d->pre +
955 (bestmv->as_mv.row) * d->pre_stride + bestmv->as_mv.col;
956 y_stride = d->pre_stride;
957 #endif
958
959 // central mv
960 bestmv->as_mv.row <<= 3;
961 bestmv->as_mv.col <<= 3;
962 startmv = *bestmv;
963
964 // calculate central point error
965 bestmse = vfp->vf(y, y_stride, z, b->src_stride, sse1);
966 *distortion = bestmse;
967 bestmse += mv_err_cost(bestmv, ref_mv, mvjcost, mvcost, error_per_bit,
968 xd->allow_high_precision_mv);
969
970 // go left then right and check error
971 this_mv.as_mv.row = startmv.as_mv.row;
972 this_mv.as_mv.col = ((startmv.as_mv.col - 8) | 4);
973 thismse = vfp->svf_halfpix_h(y - 1, y_stride, z, b->src_stride, &sse);
974 left = thismse + mv_err_cost(&this_mv, ref_mv, mvjcost, mvcost, error_per_bit,
975 xd->allow_high_precision_mv);
976
977 if (left < bestmse) {
978 *bestmv = this_mv;
979 bestmse = left;
980 *distortion = thismse;
981 *sse1 = sse;
982 }
983
984 this_mv.as_mv.col += 8;
985 thismse = vfp->svf_halfpix_h(y, y_stride, z, b->src_stride, &sse);
986 right = thismse + mv_err_cost(&this_mv, ref_mv, mvjcost, mvcost,
987 error_per_bit, xd->allow_high_precision_mv);
988
989 if (right < bestmse) {
990 *bestmv = this_mv;
991 bestmse = right;
992 *distortion = thismse;
993 *sse1 = sse;
994 }
995
996 // go up then down and check error
997 this_mv.as_mv.col = startmv.as_mv.col;
998 this_mv.as_mv.row = ((startmv.as_mv.row - 8) | 4);
999 thismse = vfp->svf_halfpix_v(y - y_stride, y_stride, z, b->src_stride, &sse);
1000 up = thismse + mv_err_cost(&this_mv, ref_mv, mvjcost, mvcost, error_per_bit,
1001 xd->allow_high_precision_mv);
1002
1003 if (up < bestmse) {
1004 *bestmv = this_mv;
1005 bestmse = up;
1006 *distortion = thismse;
1007 *sse1 = sse;
1008 }
1009
1010 this_mv.as_mv.row += 8;
1011 thismse = vfp->svf_halfpix_v(y, y_stride, z, b->src_stride, &sse);
1012 down = thismse + mv_err_cost(&this_mv, ref_mv, mvjcost, mvcost, error_per_bit,
1013 xd->allow_high_precision_mv);
1014
1015 if (down < bestmse) {
1016 *bestmv = this_mv;
1017 bestmse = down;
1018 *distortion = thismse;
1019 *sse1 = sse;
1020 }
1021
1022 // now check 1 more diagonal -
1023 whichdir = (left < right ? 0 : 1) + (up < down ? 0 : 2);
1024 this_mv = startmv;
1025
1026 switch (whichdir) {
1027 case 0:
1028 this_mv.as_mv.col = (this_mv.as_mv.col - 8) | 4;
1029 this_mv.as_mv.row = (this_mv.as_mv.row - 8) | 4;
1030 thismse = vfp->svf_halfpix_hv(y - 1 - y_stride, y_stride, z, b->src_stride , &sse);
1031 break;
1032 case 1:
1033 this_mv.as_mv.col += 4;
1034 this_mv.as_mv.row = (this_mv.as_mv.row - 8) | 4;
1035 thismse = vfp->svf_halfpix_hv(y - y_stride, y_stride, z, b->src_stride, &s se);
1036 break;
1037 case 2:
1038 this_mv.as_mv.col = (this_mv.as_mv.col - 8) | 4;
1039 this_mv.as_mv.row += 4;
1040 thismse = vfp->svf_halfpix_hv(y - 1, y_stride, z, b->src_stride, &sse);
1041 break;
1042 case 3:
1043 default:
1044 this_mv.as_mv.col += 4;
1045 this_mv.as_mv.row += 4;
1046 thismse = vfp->svf_halfpix_hv(y, y_stride, z, b->src_stride, &sse);
1047 break;
1048 }
1049
1050 diag = thismse + mv_err_cost(&this_mv, ref_mv, mvjcost, mvcost, error_per_bit,
1051 xd->allow_high_precision_mv);
1052
1053 if (diag < bestmse) {
1054 *bestmv = this_mv;
1055 bestmse = diag;
1056 *distortion = thismse;
1057 *sse1 = sse;
1058 }
1059
1060 return bestmse;
1061 }
1062
1063 #define CHECK_BOUNDS(range) \
1064 {\
1065 all_in = 1;\
1066 all_in &= ((br-range) >= x->mv_row_min);\
1067 all_in &= ((br+range) <= x->mv_row_max);\
1068 all_in &= ((bc-range) >= x->mv_col_min);\
1069 all_in &= ((bc+range) <= x->mv_col_max);\
1070 }
1071
1072 #define CHECK_POINT \
1073 {\
1074 if (this_mv.as_mv.col < x->mv_col_min) continue;\
1075 if (this_mv.as_mv.col > x->mv_col_max) continue;\
1076 if (this_mv.as_mv.row < x->mv_row_min) continue;\
1077 if (this_mv.as_mv.row > x->mv_row_max) continue;\
1078 }
1079
1080 #define CHECK_BETTER \
1081 {\
1082 if (thissad < bestsad)\
1083 {\
1084 thissad += mvsad_err_cost(&this_mv, &fcenter_mv, mvjsadcost, mvsadcost, \
1085 sad_per_bit);\
1086 if (thissad < bestsad)\
1087 {\
1088 bestsad = thissad;\
1089 best_site = i;\
1090 }\
1091 }\
1092 }
1093
1094 static const MV next_chkpts[6][3] = {
1095 {{ -2, 0}, { -1, -2}, {1, -2}},
1096 {{ -1, -2}, {1, -2}, {2, 0}},
1097 {{1, -2}, {2, 0}, {1, 2}},
1098 {{2, 0}, {1, 2}, { -1, 2}},
1099 {{1, 2}, { -1, 2}, { -2, 0}},
1100 {{ -1, 2}, { -2, 0}, { -1, -2}}
1101 };
1102
1103 int vp9_hex_search
1104 (
1105 MACROBLOCK *x,
1106 BLOCK *b,
1107 BLOCKD *d,
1108 int_mv *ref_mv,
1109 int_mv *best_mv,
1110 int search_param,
1111 int sad_per_bit,
1112 const vp9_variance_fn_ptr_t *vfp,
1113 int *mvjsadcost, int *mvsadcost[2],
1114 int *mvjcost, int *mvcost[2],
1115 int_mv *center_mv
1116 ) {
1117 MV hex[6] = { { -1, -2}, {1, -2}, {2, 0}, {1, 2}, { -1, 2}, { -2, 0} };
1118 MV neighbors[4] = {{0, -1}, { -1, 0}, {1, 0}, {0, 1}};
1119 int i, j;
1120
1121 unsigned char *what = (*(b->base_src) + b->src);
1122 int what_stride = b->src_stride;
1123 int in_what_stride = d->pre_stride;
1124 int br, bc;
1125 int_mv this_mv;
1126 unsigned int bestsad = 0x7fffffff;
1127 unsigned int thissad;
1128 unsigned char *base_offset;
1129 unsigned char *this_offset;
1130 int k = -1;
1131 int all_in;
1132 int best_site = -1;
1133
1134 int_mv fcenter_mv;
1135 fcenter_mv.as_mv.row = center_mv->as_mv.row >> 3;
1136 fcenter_mv.as_mv.col = center_mv->as_mv.col >> 3;
1137
1138 // adjust ref_mv to make sure it is within MV range
1139 clamp_mv(ref_mv, x->mv_col_min, x->mv_col_max, x->mv_row_min, x->mv_row_max);
1140 br = ref_mv->as_mv.row;
1141 bc = ref_mv->as_mv.col;
1142
1143 // Work out the start point for the search
1144 base_offset = (unsigned char *)(*(d->base_pre) + d->pre);
1145 this_offset = base_offset + (br * (d->pre_stride)) + bc;
1146 this_mv.as_mv.row = br;
1147 this_mv.as_mv.col = bc;
1148 bestsad = vfp->sdf(what, what_stride, this_offset,
1149 in_what_stride, 0x7fffffff)
1150 + mvsad_err_cost(&this_mv, &fcenter_mv, mvjsadcost, mvsadcost,
1151 sad_per_bit);
1152
1153 // hex search
1154 // j=0
1155 CHECK_BOUNDS(2)
1156
1157 if (all_in) {
1158 for (i = 0; i < 6; i++) {
1159 this_mv.as_mv.row = br + hex[i].row;
1160 this_mv.as_mv.col = bc + hex[i].col;
1161 this_offset = base_offset + (this_mv.as_mv.row * in_what_stride) + this_mv .as_mv.col;
1162 thissad = vfp->sdf(what, what_stride, this_offset, in_what_stride, bestsad );
1163 CHECK_BETTER
1164 }
1165 } else {
1166 for (i = 0; i < 6; i++) {
1167 this_mv.as_mv.row = br + hex[i].row;
1168 this_mv.as_mv.col = bc + hex[i].col;
1169 CHECK_POINT
1170 this_offset = base_offset + (this_mv.as_mv.row * in_what_stride) + this_mv .as_mv.col;
1171 thissad = vfp->sdf(what, what_stride, this_offset, in_what_stride, bestsad );
1172 CHECK_BETTER
1173 }
1174 }
1175
1176 if (best_site == -1)
1177 goto cal_neighbors;
1178 else {
1179 br += hex[best_site].row;
1180 bc += hex[best_site].col;
1181 k = best_site;
1182 }
1183
1184 for (j = 1; j < 127; j++) {
1185 best_site = -1;
1186 CHECK_BOUNDS(2)
1187
1188 if (all_in) {
1189 for (i = 0; i < 3; i++) {
1190 this_mv.as_mv.row = br + next_chkpts[k][i].row;
1191 this_mv.as_mv.col = bc + next_chkpts[k][i].col;
1192 this_offset = base_offset + (this_mv.as_mv.row * (in_what_stride)) + thi s_mv.as_mv.col;
1193 thissad = vfp->sdf(what, what_stride, this_offset, in_what_stride, bests ad);
1194 CHECK_BETTER
1195 }
1196 } else {
1197 for (i = 0; i < 3; i++) {
1198 this_mv.as_mv.row = br + next_chkpts[k][i].row;
1199 this_mv.as_mv.col = bc + next_chkpts[k][i].col;
1200 CHECK_POINT
1201 this_offset = base_offset + (this_mv.as_mv.row * (in_what_stride)) + thi s_mv.as_mv.col;
1202 thissad = vfp->sdf(what, what_stride, this_offset, in_what_stride, bests ad);
1203 CHECK_BETTER
1204 }
1205 }
1206
1207 if (best_site == -1)
1208 break;
1209 else {
1210 br += next_chkpts[k][best_site].row;
1211 bc += next_chkpts[k][best_site].col;
1212 k += 5 + best_site;
1213 if (k >= 12) k -= 12;
1214 else if (k >= 6) k -= 6;
1215 }
1216 }
1217
1218 // check 4 1-away neighbors
1219 cal_neighbors:
1220 for (j = 0; j < 32; j++) {
1221 best_site = -1;
1222 CHECK_BOUNDS(1)
1223
1224 if (all_in) {
1225 for (i = 0; i < 4; i++) {
1226 this_mv.as_mv.row = br + neighbors[i].row;
1227 this_mv.as_mv.col = bc + neighbors[i].col;
1228 this_offset = base_offset + (this_mv.as_mv.row * (in_what_stride)) + thi s_mv.as_mv.col;
1229 thissad = vfp->sdf(what, what_stride, this_offset, in_what_stride, bests ad);
1230 CHECK_BETTER
1231 }
1232 } else {
1233 for (i = 0; i < 4; i++) {
1234 this_mv.as_mv.row = br + neighbors[i].row;
1235 this_mv.as_mv.col = bc + neighbors[i].col;
1236 CHECK_POINT
1237 this_offset = base_offset + (this_mv.as_mv.row * (in_what_stride)) + thi s_mv.as_mv.col;
1238 thissad = vfp->sdf(what, what_stride, this_offset, in_what_stride, bests ad);
1239 CHECK_BETTER
1240 }
1241 }
1242
1243 if (best_site == -1)
1244 break;
1245 else {
1246 br += neighbors[best_site].row;
1247 bc += neighbors[best_site].col;
1248 }
1249 }
1250
1251 best_mv->as_mv.row = br;
1252 best_mv->as_mv.col = bc;
1253
1254 return bestsad;
1255 }
1256 #undef CHECK_BOUNDS
1257 #undef CHECK_POINT
1258 #undef CHECK_BETTER
1259
1260 int vp9_diamond_search_sad_c(MACROBLOCK *x, BLOCK *b, BLOCKD *d,
1261 int_mv *ref_mv, int_mv *best_mv,
1262 int search_param, int sad_per_bit, int *num00,
1263 vp9_variance_fn_ptr_t *fn_ptr, int *mvjcost,
1264 int *mvcost[2], int_mv *center_mv) {
1265 int i, j, step;
1266
1267 unsigned char *what = (*(b->base_src) + b->src);
1268 int what_stride = b->src_stride;
1269 unsigned char *in_what;
1270 int in_what_stride = d->pre_stride;
1271 unsigned char *best_address;
1272
1273 int tot_steps;
1274 int_mv this_mv;
1275
1276 int bestsad = INT_MAX;
1277 int best_site = 0;
1278 int last_site = 0;
1279
1280 int ref_row, ref_col;
1281 int this_row_offset, this_col_offset;
1282 search_site *ss;
1283
1284 unsigned char *check_here;
1285 int thissad;
1286 MACROBLOCKD *xd = &x->e_mbd;
1287 int_mv fcenter_mv;
1288
1289 int *mvjsadcost = x->nmvjointsadcost;
1290 int *mvsadcost[2] = {x->nmvsadcost[0], x->nmvsadcost[1]};
1291
1292 fcenter_mv.as_mv.row = center_mv->as_mv.row >> 3;
1293 fcenter_mv.as_mv.col = center_mv->as_mv.col >> 3;
1294
1295 clamp_mv(ref_mv, x->mv_col_min, x->mv_col_max, x->mv_row_min, x->mv_row_max);
1296 ref_row = ref_mv->as_mv.row;
1297 ref_col = ref_mv->as_mv.col;
1298 *num00 = 0;
1299 best_mv->as_mv.row = ref_row;
1300 best_mv->as_mv.col = ref_col;
1301
1302 // Work out the start point for the search
1303 in_what = (unsigned char *)(*(d->base_pre) + d->pre + (ref_row * (d->pre_strid e)) + ref_col);
1304 best_address = in_what;
1305
1306 // Check the starting position
1307 bestsad = fn_ptr->sdf(what, what_stride, in_what,
1308 in_what_stride, 0x7fffffff)
1309 + mvsad_err_cost(best_mv, &fcenter_mv, mvjsadcost, mvsadcost,
1310 sad_per_bit);
1311
1312 // search_param determines the length of the initial step and hence the number of iterations
1313 // 0 = initial step (MAX_FIRST_STEP) pel : 1 = (MAX_FIRST_STEP/2) pel, 2 = (MA X_FIRST_STEP/4) pel... etc.
1314 ss = &x->ss[search_param * x->searches_per_step];
1315 tot_steps = (x->ss_count / x->searches_per_step) - search_param;
1316
1317 i = 1;
1318
1319 for (step = 0; step < tot_steps; step++) {
1320 for (j = 0; j < x->searches_per_step; j++) {
1321 // Trap illegal vectors
1322 this_row_offset = best_mv->as_mv.row + ss[i].mv.row;
1323 this_col_offset = best_mv->as_mv.col + ss[i].mv.col;
1324
1325 if ((this_col_offset > x->mv_col_min) && (this_col_offset < x->mv_col_max) &&
1326 (this_row_offset > x->mv_row_min) && (this_row_offset < x->mv_row_max) )
1327
1328 {
1329 check_here = ss[i].offset + best_address;
1330 thissad = fn_ptr->sdf(what, what_stride, check_here, in_what_stride, bes tsad);
1331
1332 if (thissad < bestsad) {
1333 this_mv.as_mv.row = this_row_offset;
1334 this_mv.as_mv.col = this_col_offset;
1335 thissad += mvsad_err_cost(&this_mv, &fcenter_mv,
1336 mvjsadcost, mvsadcost, sad_per_bit);
1337
1338 if (thissad < bestsad) {
1339 bestsad = thissad;
1340 best_site = i;
1341 }
1342 }
1343 }
1344
1345 i++;
1346 }
1347
1348 if (best_site != last_site) {
1349 best_mv->as_mv.row += ss[best_site].mv.row;
1350 best_mv->as_mv.col += ss[best_site].mv.col;
1351 best_address += ss[best_site].offset;
1352 last_site = best_site;
1353 } else if (best_address == in_what)
1354 (*num00)++;
1355 }
1356
1357 this_mv.as_mv.row = best_mv->as_mv.row << 3;
1358 this_mv.as_mv.col = best_mv->as_mv.col << 3;
1359
1360 if (bestsad == INT_MAX)
1361 return INT_MAX;
1362
1363 return
1364 fn_ptr->vf(what, what_stride, best_address, in_what_stride,
1365 (unsigned int *)(&thissad)) +
1366 mv_err_cost(&this_mv, center_mv, mvjcost, mvcost, x->errorperbit,
1367 xd->allow_high_precision_mv);
1368 }
1369
1370 int vp9_diamond_search_sadx4(MACROBLOCK *x, BLOCK *b, BLOCKD *d,
1371 int_mv *ref_mv, int_mv *best_mv, int search_param,
1372 int sad_per_bit, int *num00,
1373 vp9_variance_fn_ptr_t *fn_ptr,
1374 int *mvjcost, int *mvcost[2], int_mv *center_mv) {
1375 int i, j, step;
1376
1377 unsigned char *what = (*(b->base_src) + b->src);
1378 int what_stride = b->src_stride;
1379 unsigned char *in_what;
1380 int in_what_stride = d->pre_stride;
1381 unsigned char *best_address;
1382
1383 int tot_steps;
1384 int_mv this_mv;
1385
1386 unsigned int bestsad = INT_MAX;
1387 int best_site = 0;
1388 int last_site = 0;
1389
1390 int ref_row;
1391 int ref_col;
1392 int this_row_offset;
1393 int this_col_offset;
1394 search_site *ss;
1395
1396 unsigned char *check_here;
1397 unsigned int thissad;
1398 MACROBLOCKD *xd = &x->e_mbd;
1399 int_mv fcenter_mv;
1400
1401 int *mvjsadcost = x->nmvjointsadcost;
1402 int *mvsadcost[2] = {x->nmvsadcost[0], x->nmvsadcost[1]};
1403
1404 fcenter_mv.as_mv.row = center_mv->as_mv.row >> 3;
1405 fcenter_mv.as_mv.col = center_mv->as_mv.col >> 3;
1406
1407 clamp_mv(ref_mv, x->mv_col_min, x->mv_col_max, x->mv_row_min, x->mv_row_max);
1408 ref_row = ref_mv->as_mv.row;
1409 ref_col = ref_mv->as_mv.col;
1410 *num00 = 0;
1411 best_mv->as_mv.row = ref_row;
1412 best_mv->as_mv.col = ref_col;
1413
1414 // Work out the start point for the search
1415 in_what = (unsigned char *)(*(d->base_pre) + d->pre + (ref_row * (d->pre_strid e)) + ref_col);
1416 best_address = in_what;
1417
1418 // Check the starting position
1419 bestsad = fn_ptr->sdf(what, what_stride,
1420 in_what, in_what_stride, 0x7fffffff)
1421 + mvsad_err_cost(best_mv, &fcenter_mv, mvjsadcost, mvsadcost,
1422 sad_per_bit);
1423
1424 // search_param determines the length of the initial step and hence the number of iterations
1425 // 0 = initial step (MAX_FIRST_STEP) pel : 1 = (MAX_FIRST_STEP/2) pel, 2 = (MA X_FIRST_STEP/4) pel... etc.
1426 ss = &x->ss[search_param * x->searches_per_step];
1427 tot_steps = (x->ss_count / x->searches_per_step) - search_param;
1428
1429 i = 1;
1430
1431 for (step = 0; step < tot_steps; step++) {
1432 int all_in = 1, t;
1433
1434 // To know if all neighbor points are within the bounds, 4 bounds checking a re enough instead of
1435 // checking 4 bounds for each points.
1436 all_in &= ((best_mv->as_mv.row + ss[i].mv.row) > x->mv_row_min);
1437 all_in &= ((best_mv->as_mv.row + ss[i + 1].mv.row) < x->mv_row_max);
1438 all_in &= ((best_mv->as_mv.col + ss[i + 2].mv.col) > x->mv_col_min);
1439 all_in &= ((best_mv->as_mv.col + ss[i + 3].mv.col) < x->mv_col_max);
1440
1441 if (all_in) {
1442 unsigned int sad_array[4];
1443
1444 for (j = 0; j < x->searches_per_step; j += 4) {
1445 unsigned char const *block_offset[4];
1446
1447 for (t = 0; t < 4; t++)
1448 block_offset[t] = ss[i + t].offset + best_address;
1449
1450 fn_ptr->sdx4df(what, what_stride, block_offset, in_what_stride,
1451 sad_array);
1452
1453 for (t = 0; t < 4; t++, i++) {
1454 if (sad_array[t] < bestsad) {
1455 this_mv.as_mv.row = best_mv->as_mv.row + ss[i].mv.row;
1456 this_mv.as_mv.col = best_mv->as_mv.col + ss[i].mv.col;
1457 sad_array[t] += mvsad_err_cost(&this_mv, &fcenter_mv,
1458 mvjsadcost, mvsadcost, sad_per_bit);
1459
1460 if (sad_array[t] < bestsad) {
1461 bestsad = sad_array[t];
1462 best_site = i;
1463 }
1464 }
1465 }
1466 }
1467 } else {
1468 for (j = 0; j < x->searches_per_step; j++) {
1469 // Trap illegal vectors
1470 this_row_offset = best_mv->as_mv.row + ss[i].mv.row;
1471 this_col_offset = best_mv->as_mv.col + ss[i].mv.col;
1472
1473 if ((this_col_offset > x->mv_col_min) && (this_col_offset < x->mv_col_ma x) &&
1474 (this_row_offset > x->mv_row_min) && (this_row_offset < x->mv_row_ma x)) {
1475 check_here = ss[i].offset + best_address;
1476 thissad = fn_ptr->sdf(what, what_stride, check_here, in_what_stride, b estsad);
1477
1478 if (thissad < bestsad) {
1479 this_mv.as_mv.row = this_row_offset;
1480 this_mv.as_mv.col = this_col_offset;
1481 thissad += mvsad_err_cost(&this_mv, &fcenter_mv,
1482 mvjsadcost, mvsadcost, sad_per_bit);
1483
1484 if (thissad < bestsad) {
1485 bestsad = thissad;
1486 best_site = i;
1487 }
1488 }
1489 }
1490 i++;
1491 }
1492 }
1493
1494 if (best_site != last_site) {
1495 best_mv->as_mv.row += ss[best_site].mv.row;
1496 best_mv->as_mv.col += ss[best_site].mv.col;
1497 best_address += ss[best_site].offset;
1498 last_site = best_site;
1499 } else if (best_address == in_what)
1500 (*num00)++;
1501 }
1502
1503 this_mv.as_mv.row = best_mv->as_mv.row << 3;
1504 this_mv.as_mv.col = best_mv->as_mv.col << 3;
1505
1506 if (bestsad == INT_MAX)
1507 return INT_MAX;
1508
1509 return
1510 fn_ptr->vf(what, what_stride, best_address, in_what_stride,
1511 (unsigned int *)(&thissad)) +
1512 mv_err_cost(&this_mv, center_mv, mvjcost, mvcost, x->errorperbit,
1513 xd->allow_high_precision_mv);
1514 }
1515
1516 /* do_refine: If last step (1-away) of n-step search doesn't pick the center
1517 point as the best match, we will do a final 1-away diamond
1518 refining search */
1519 int vp9_full_pixel_diamond(VP9_COMP *cpi, MACROBLOCK *x, BLOCK *b,
1520 BLOCKD *d, int_mv *mvp_full, int step_param,
1521 int sadpb, int further_steps,
1522 int do_refine, vp9_variance_fn_ptr_t *fn_ptr,
1523 int_mv *ref_mv, int_mv *dst_mv) {
1524 int_mv temp_mv;
1525 int thissme, n, num00;
1526 int bestsme = cpi->diamond_search_sad(x, b, d, mvp_full, &temp_mv,
1527 step_param, sadpb, &num00,
1528 fn_ptr, x->nmvjointcost,
1529 x->mvcost, ref_mv);
1530 dst_mv->as_int = temp_mv.as_int;
1531
1532 n = num00;
1533 num00 = 0;
1534
1535 /* If there won't be more n-step search, check to see if refining search is ne eded. */
1536 if (n > further_steps)
1537 do_refine = 0;
1538
1539 while (n < further_steps) {
1540 n++;
1541
1542 if (num00)
1543 num00--;
1544 else {
1545 thissme = cpi->diamond_search_sad(x, b, d, mvp_full, &temp_mv,
1546 step_param + n, sadpb, &num00,
1547 fn_ptr, x->nmvjointcost, x->mvcost,
1548 ref_mv);
1549
1550 /* check to see if refining search is needed. */
1551 if (num00 > (further_steps - n))
1552 do_refine = 0;
1553
1554 if (thissme < bestsme) {
1555 bestsme = thissme;
1556 dst_mv->as_int = temp_mv.as_int;
1557 }
1558 }
1559 }
1560
1561 /* final 1-away diamond refining search */
1562 if (do_refine == 1) {
1563 int search_range = 8;
1564 int_mv best_mv;
1565 best_mv.as_int = dst_mv->as_int;
1566 thissme = cpi->refining_search_sad(x, b, d, &best_mv, sadpb, search_range,
1567 fn_ptr, x->nmvjointcost, x->mvcost,
1568 ref_mv);
1569
1570 if (thissme < bestsme) {
1571 bestsme = thissme;
1572 dst_mv->as_int = best_mv.as_int;
1573 }
1574 }
1575 return bestsme;
1576 }
1577
1578 int vp9_full_search_sad_c(MACROBLOCK *x, BLOCK *b, BLOCKD *d, int_mv *ref_mv,
1579 int sad_per_bit, int distance,
1580 vp9_variance_fn_ptr_t *fn_ptr, int *mvjcost,
1581 int *mvcost[2],
1582 int_mv *center_mv) {
1583 unsigned char *what = (*(b->base_src) + b->src);
1584 int what_stride = b->src_stride;
1585 unsigned char *in_what;
1586 int in_what_stride = d->pre_stride;
1587 int mv_stride = d->pre_stride;
1588 unsigned char *bestaddress;
1589 int_mv *best_mv = &d->bmi.as_mv.first;
1590 int_mv this_mv;
1591 int bestsad = INT_MAX;
1592 int r, c;
1593
1594 unsigned char *check_here;
1595 int thissad;
1596 MACROBLOCKD *xd = &x->e_mbd;
1597
1598 int ref_row = ref_mv->as_mv.row;
1599 int ref_col = ref_mv->as_mv.col;
1600
1601 int row_min = ref_row - distance;
1602 int row_max = ref_row + distance;
1603 int col_min = ref_col - distance;
1604 int col_max = ref_col + distance;
1605 int_mv fcenter_mv;
1606
1607 int *mvjsadcost = x->nmvjointsadcost;
1608 int *mvsadcost[2] = {x->nmvsadcost[0], x->nmvsadcost[1]};
1609
1610 fcenter_mv.as_mv.row = center_mv->as_mv.row >> 3;
1611 fcenter_mv.as_mv.col = center_mv->as_mv.col >> 3;
1612
1613 // Work out the mid point for the search
1614 in_what = *(d->base_pre) + d->pre;
1615 bestaddress = in_what + (ref_row * d->pre_stride) + ref_col;
1616
1617 best_mv->as_mv.row = ref_row;
1618 best_mv->as_mv.col = ref_col;
1619
1620 // Baseline value at the centre
1621 bestsad = fn_ptr->sdf(what, what_stride, bestaddress,
1622 in_what_stride, 0x7fffffff)
1623 + mvsad_err_cost(best_mv, &fcenter_mv, mvjsadcost, mvsadcost,
1624 sad_per_bit);
1625
1626 // Apply further limits to prevent us looking using vectors that stretch beyio nd the UMV border
1627 if (col_min < x->mv_col_min)
1628 col_min = x->mv_col_min;
1629
1630 if (col_max > x->mv_col_max)
1631 col_max = x->mv_col_max;
1632
1633 if (row_min < x->mv_row_min)
1634 row_min = x->mv_row_min;
1635
1636 if (row_max > x->mv_row_max)
1637 row_max = x->mv_row_max;
1638
1639 for (r = row_min; r < row_max; r++) {
1640 this_mv.as_mv.row = r;
1641 check_here = r * mv_stride + in_what + col_min;
1642
1643 for (c = col_min; c < col_max; c++) {
1644 thissad = fn_ptr->sdf(what, what_stride, check_here, in_what_stride, bests ad);
1645
1646 this_mv.as_mv.col = c;
1647 thissad += mvsad_err_cost(&this_mv, &fcenter_mv,
1648 mvjsadcost, mvsadcost, sad_per_bit);
1649
1650 if (thissad < bestsad) {
1651 bestsad = thissad;
1652 best_mv->as_mv.row = r;
1653 best_mv->as_mv.col = c;
1654 bestaddress = check_here;
1655 }
1656
1657 check_here++;
1658 }
1659 }
1660
1661 this_mv.as_mv.row = best_mv->as_mv.row << 3;
1662 this_mv.as_mv.col = best_mv->as_mv.col << 3;
1663
1664 if (bestsad < INT_MAX)
1665 return
1666 fn_ptr->vf(what, what_stride, bestaddress, in_what_stride,
1667 (unsigned int *)(&thissad)) +
1668 mv_err_cost(&this_mv, center_mv, mvjcost, mvcost, x->errorperbit,
1669 xd->allow_high_precision_mv);
1670 else
1671 return INT_MAX;
1672 }
1673
1674 int vp9_full_search_sadx3(MACROBLOCK *x, BLOCK *b, BLOCKD *d, int_mv *ref_mv,
1675 int sad_per_bit, int distance,
1676 vp9_variance_fn_ptr_t *fn_ptr, int *mvjcost,
1677 int *mvcost[2], int_mv *center_mv) {
1678 unsigned char *what = (*(b->base_src) + b->src);
1679 int what_stride = b->src_stride;
1680 unsigned char *in_what;
1681 int in_what_stride = d->pre_stride;
1682 int mv_stride = d->pre_stride;
1683 unsigned char *bestaddress;
1684 int_mv *best_mv = &d->bmi.as_mv.first;
1685 int_mv this_mv;
1686 unsigned int bestsad = INT_MAX;
1687 int r, c;
1688
1689 unsigned char *check_here;
1690 unsigned int thissad;
1691 MACROBLOCKD *xd = &x->e_mbd;
1692
1693 int ref_row = ref_mv->as_mv.row;
1694 int ref_col = ref_mv->as_mv.col;
1695
1696 int row_min = ref_row - distance;
1697 int row_max = ref_row + distance;
1698 int col_min = ref_col - distance;
1699 int col_max = ref_col + distance;
1700
1701 unsigned int sad_array[3];
1702 int_mv fcenter_mv;
1703
1704 int *mvjsadcost = x->nmvjointsadcost;
1705 int *mvsadcost[2] = {x->nmvsadcost[0], x->nmvsadcost[1]};
1706
1707 fcenter_mv.as_mv.row = center_mv->as_mv.row >> 3;
1708 fcenter_mv.as_mv.col = center_mv->as_mv.col >> 3;
1709
1710 // Work out the mid point for the search
1711 in_what = *(d->base_pre) + d->pre;
1712 bestaddress = in_what + (ref_row * d->pre_stride) + ref_col;
1713
1714 best_mv->as_mv.row = ref_row;
1715 best_mv->as_mv.col = ref_col;
1716
1717 // Baseline value at the centre
1718 bestsad = fn_ptr->sdf(what, what_stride,
1719 bestaddress, in_what_stride, 0x7fffffff)
1720 + mvsad_err_cost(best_mv, &fcenter_mv, mvjsadcost, mvsadcost,
1721 sad_per_bit);
1722
1723 // Apply further limits to prevent us looking using vectors that stretch beyio nd the UMV border
1724 if (col_min < x->mv_col_min)
1725 col_min = x->mv_col_min;
1726
1727 if (col_max > x->mv_col_max)
1728 col_max = x->mv_col_max;
1729
1730 if (row_min < x->mv_row_min)
1731 row_min = x->mv_row_min;
1732
1733 if (row_max > x->mv_row_max)
1734 row_max = x->mv_row_max;
1735
1736 for (r = row_min; r < row_max; r++) {
1737 this_mv.as_mv.row = r;
1738 check_here = r * mv_stride + in_what + col_min;
1739 c = col_min;
1740
1741 while ((c + 2) < col_max) {
1742 int i;
1743
1744 fn_ptr->sdx3f(what, what_stride, check_here, in_what_stride, sad_array);
1745
1746 for (i = 0; i < 3; i++) {
1747 thissad = sad_array[i];
1748
1749 if (thissad < bestsad) {
1750 this_mv.as_mv.col = c;
1751 thissad += mvsad_err_cost(&this_mv, &fcenter_mv,
1752 mvjsadcost, mvsadcost, sad_per_bit);
1753
1754 if (thissad < bestsad) {
1755 bestsad = thissad;
1756 best_mv->as_mv.row = r;
1757 best_mv->as_mv.col = c;
1758 bestaddress = check_here;
1759 }
1760 }
1761
1762 check_here++;
1763 c++;
1764 }
1765 }
1766
1767 while (c < col_max) {
1768 thissad = fn_ptr->sdf(what, what_stride, check_here, in_what_stride, bests ad);
1769
1770 if (thissad < bestsad) {
1771 this_mv.as_mv.col = c;
1772 thissad += mvsad_err_cost(&this_mv, &fcenter_mv,
1773 mvjsadcost, mvsadcost, sad_per_bit);
1774
1775 if (thissad < bestsad) {
1776 bestsad = thissad;
1777 best_mv->as_mv.row = r;
1778 best_mv->as_mv.col = c;
1779 bestaddress = check_here;
1780 }
1781 }
1782
1783 check_here++;
1784 c++;
1785 }
1786
1787 }
1788
1789 this_mv.as_mv.row = best_mv->as_mv.row << 3;
1790 this_mv.as_mv.col = best_mv->as_mv.col << 3;
1791
1792 if (bestsad < INT_MAX)
1793 return
1794 fn_ptr->vf(what, what_stride, bestaddress, in_what_stride,
1795 (unsigned int *)(&thissad)) +
1796 mv_err_cost(&this_mv, center_mv, mvjcost, mvcost, x->errorperbit,
1797 xd->allow_high_precision_mv);
1798 else
1799 return INT_MAX;
1800 }
1801
1802 int vp9_full_search_sadx8(MACROBLOCK *x, BLOCK *b, BLOCKD *d, int_mv *ref_mv,
1803 int sad_per_bit, int distance,
1804 vp9_variance_fn_ptr_t *fn_ptr,
1805 int *mvjcost, int *mvcost[2],
1806 int_mv *center_mv) {
1807 unsigned char *what = (*(b->base_src) + b->src);
1808 int what_stride = b->src_stride;
1809 unsigned char *in_what;
1810 int in_what_stride = d->pre_stride;
1811 int mv_stride = d->pre_stride;
1812 unsigned char *bestaddress;
1813 int_mv *best_mv = &d->bmi.as_mv.first;
1814 int_mv this_mv;
1815 unsigned int bestsad = INT_MAX;
1816 int r, c;
1817
1818 unsigned char *check_here;
1819 unsigned int thissad;
1820 MACROBLOCKD *xd = &x->e_mbd;
1821
1822 int ref_row = ref_mv->as_mv.row;
1823 int ref_col = ref_mv->as_mv.col;
1824
1825 int row_min = ref_row - distance;
1826 int row_max = ref_row + distance;
1827 int col_min = ref_col - distance;
1828 int col_max = ref_col + distance;
1829
1830 DECLARE_ALIGNED_ARRAY(16, unsigned short, sad_array8, 8);
1831 unsigned int sad_array[3];
1832 int_mv fcenter_mv;
1833
1834 int *mvjsadcost = x->nmvjointsadcost;
1835 int *mvsadcost[2] = {x->nmvsadcost[0], x->nmvsadcost[1]};
1836
1837 fcenter_mv.as_mv.row = center_mv->as_mv.row >> 3;
1838 fcenter_mv.as_mv.col = center_mv->as_mv.col >> 3;
1839
1840 // Work out the mid point for the search
1841 in_what = *(d->base_pre) + d->pre;
1842 bestaddress = in_what + (ref_row * d->pre_stride) + ref_col;
1843
1844 best_mv->as_mv.row = ref_row;
1845 best_mv->as_mv.col = ref_col;
1846
1847 // Baseline value at the centre
1848 bestsad = fn_ptr->sdf(what, what_stride,
1849 bestaddress, in_what_stride, 0x7fffffff)
1850 + mvsad_err_cost(best_mv, &fcenter_mv, mvjsadcost, mvsadcost,
1851 sad_per_bit);
1852
1853 // Apply further limits to prevent us looking using vectors that stretch beyio nd the UMV border
1854 if (col_min < x->mv_col_min)
1855 col_min = x->mv_col_min;
1856
1857 if (col_max > x->mv_col_max)
1858 col_max = x->mv_col_max;
1859
1860 if (row_min < x->mv_row_min)
1861 row_min = x->mv_row_min;
1862
1863 if (row_max > x->mv_row_max)
1864 row_max = x->mv_row_max;
1865
1866 for (r = row_min; r < row_max; r++) {
1867 this_mv.as_mv.row = r;
1868 check_here = r * mv_stride + in_what + col_min;
1869 c = col_min;
1870
1871 while ((c + 7) < col_max) {
1872 int i;
1873
1874 fn_ptr->sdx8f(what, what_stride, check_here, in_what_stride, sad_array8);
1875
1876 for (i = 0; i < 8; i++) {
1877 thissad = (unsigned int)sad_array8[i];
1878
1879 if (thissad < bestsad) {
1880 this_mv.as_mv.col = c;
1881 thissad += mvsad_err_cost(&this_mv, &fcenter_mv,
1882 mvjsadcost, mvsadcost, sad_per_bit);
1883
1884 if (thissad < bestsad) {
1885 bestsad = thissad;
1886 best_mv->as_mv.row = r;
1887 best_mv->as_mv.col = c;
1888 bestaddress = check_here;
1889 }
1890 }
1891
1892 check_here++;
1893 c++;
1894 }
1895 }
1896
1897 while ((c + 2) < col_max) {
1898 int i;
1899
1900 fn_ptr->sdx3f(what, what_stride, check_here, in_what_stride, sad_array);
1901
1902 for (i = 0; i < 3; i++) {
1903 thissad = sad_array[i];
1904
1905 if (thissad < bestsad) {
1906 this_mv.as_mv.col = c;
1907 thissad += mvsad_err_cost(&this_mv, &fcenter_mv,
1908 mvjsadcost, mvsadcost, sad_per_bit);
1909
1910 if (thissad < bestsad) {
1911 bestsad = thissad;
1912 best_mv->as_mv.row = r;
1913 best_mv->as_mv.col = c;
1914 bestaddress = check_here;
1915 }
1916 }
1917
1918 check_here++;
1919 c++;
1920 }
1921 }
1922
1923 while (c < col_max) {
1924 thissad = fn_ptr->sdf(what, what_stride, check_here, in_what_stride, bests ad);
1925
1926 if (thissad < bestsad) {
1927 this_mv.as_mv.col = c;
1928 thissad += mvsad_err_cost(&this_mv, &fcenter_mv,
1929 mvjsadcost, mvsadcost, sad_per_bit);
1930
1931 if (thissad < bestsad) {
1932 bestsad = thissad;
1933 best_mv->as_mv.row = r;
1934 best_mv->as_mv.col = c;
1935 bestaddress = check_here;
1936 }
1937 }
1938
1939 check_here++;
1940 c++;
1941 }
1942 }
1943
1944 this_mv.as_mv.row = best_mv->as_mv.row << 3;
1945 this_mv.as_mv.col = best_mv->as_mv.col << 3;
1946
1947 if (bestsad < INT_MAX)
1948 return
1949 fn_ptr->vf(what, what_stride, bestaddress, in_what_stride,
1950 (unsigned int *)(&thissad)) +
1951 mv_err_cost(&this_mv, center_mv, mvjcost, mvcost, x->errorperbit,
1952 xd->allow_high_precision_mv);
1953 else
1954 return INT_MAX;
1955 }
1956 int vp9_refining_search_sad_c(MACROBLOCK *x, BLOCK *b, BLOCKD *d,
1957 int_mv *ref_mv, int error_per_bit,
1958 int search_range, vp9_variance_fn_ptr_t *fn_ptr,
1959 int *mvjcost, int *mvcost[2], int_mv *center_mv) {
1960 MV neighbors[4] = {{ -1, 0}, {0, -1}, {0, 1}, {1, 0}};
1961 int i, j;
1962 short this_row_offset, this_col_offset;
1963
1964 int what_stride = b->src_stride;
1965 int in_what_stride = d->pre_stride;
1966 unsigned char *what = (*(b->base_src) + b->src);
1967 unsigned char *best_address = (unsigned char *)(*(d->base_pre) + d->pre +
1968 (ref_mv->as_mv.row * (d->pre_s tride)) + ref_mv->as_mv.col);
1969 unsigned char *check_here;
1970 unsigned int thissad;
1971 int_mv this_mv;
1972 unsigned int bestsad = INT_MAX;
1973 MACROBLOCKD *xd = &x->e_mbd;
1974 int_mv fcenter_mv;
1975
1976 int *mvjsadcost = x->nmvjointsadcost;
1977 int *mvsadcost[2] = {x->nmvsadcost[0], x->nmvsadcost[1]};
1978
1979 fcenter_mv.as_mv.row = center_mv->as_mv.row >> 3;
1980 fcenter_mv.as_mv.col = center_mv->as_mv.col >> 3;
1981
1982 bestsad = fn_ptr->sdf(what, what_stride, best_address, in_what_stride, 0x7ffff fff) +
1983 mvsad_err_cost(ref_mv, &fcenter_mv, mvjsadcost, mvsadcost, error_per_bit);
1984
1985 for (i = 0; i < search_range; i++) {
1986 int best_site = -1;
1987
1988 for (j = 0; j < 4; j++) {
1989 this_row_offset = ref_mv->as_mv.row + neighbors[j].row;
1990 this_col_offset = ref_mv->as_mv.col + neighbors[j].col;
1991
1992 if ((this_col_offset > x->mv_col_min) && (this_col_offset < x->mv_col_max) &&
1993 (this_row_offset > x->mv_row_min) && (this_row_offset < x->mv_row_max) ) {
1994 check_here = (neighbors[j].row) * in_what_stride + neighbors[j].col + be st_address;
1995 thissad = fn_ptr->sdf(what, what_stride, check_here, in_what_stride, bes tsad);
1996
1997 if (thissad < bestsad) {
1998 this_mv.as_mv.row = this_row_offset;
1999 this_mv.as_mv.col = this_col_offset;
2000 thissad += mvsad_err_cost(&this_mv, &fcenter_mv, mvjsadcost,
2001 mvsadcost, error_per_bit);
2002
2003 if (thissad < bestsad) {
2004 bestsad = thissad;
2005 best_site = j;
2006 }
2007 }
2008 }
2009 }
2010
2011 if (best_site == -1)
2012 break;
2013 else {
2014 ref_mv->as_mv.row += neighbors[best_site].row;
2015 ref_mv->as_mv.col += neighbors[best_site].col;
2016 best_address += (neighbors[best_site].row) * in_what_stride + neighbors[be st_site].col;
2017 }
2018 }
2019
2020 this_mv.as_mv.row = ref_mv->as_mv.row << 3;
2021 this_mv.as_mv.col = ref_mv->as_mv.col << 3;
2022
2023 if (bestsad < INT_MAX)
2024 return
2025 fn_ptr->vf(what, what_stride, best_address, in_what_stride,
2026 (unsigned int *)(&thissad)) +
2027 mv_err_cost(&this_mv, center_mv, mvjcost, mvcost, x->errorperbit,
2028 xd->allow_high_precision_mv);
2029 else
2030 return INT_MAX;
2031 }
2032
2033 int vp9_refining_search_sadx4(MACROBLOCK *x, BLOCK *b, BLOCKD *d,
2034 int_mv *ref_mv, int error_per_bit,
2035 int search_range, vp9_variance_fn_ptr_t *fn_ptr,
2036 int *mvjcost, int *mvcost[2], int_mv *center_mv) {
2037 MV neighbors[4] = {{ -1, 0}, {0, -1}, {0, 1}, {1, 0}};
2038 int i, j;
2039 short this_row_offset, this_col_offset;
2040
2041 int what_stride = b->src_stride;
2042 int in_what_stride = d->pre_stride;
2043 unsigned char *what = (*(b->base_src) + b->src);
2044 unsigned char *best_address = (unsigned char *)(*(d->base_pre) + d->pre +
2045 (ref_mv->as_mv.row * (d->pre_s tride)) + ref_mv->as_mv.col);
2046 unsigned char *check_here;
2047 unsigned int thissad;
2048 int_mv this_mv;
2049 unsigned int bestsad = INT_MAX;
2050 MACROBLOCKD *xd = &x->e_mbd;
2051 int_mv fcenter_mv;
2052
2053 int *mvjsadcost = x->nmvjointsadcost;
2054 int *mvsadcost[2] = {x->nmvsadcost[0], x->nmvsadcost[1]};
2055
2056 fcenter_mv.as_mv.row = center_mv->as_mv.row >> 3;
2057 fcenter_mv.as_mv.col = center_mv->as_mv.col >> 3;
2058
2059 bestsad = fn_ptr->sdf(what, what_stride, best_address, in_what_stride, 0x7ffff fff) +
2060 mvsad_err_cost(ref_mv, &fcenter_mv, mvjsadcost, mvsadcost, error_per_bit);
2061
2062 for (i = 0; i < search_range; i++) {
2063 int best_site = -1;
2064 int all_in = 1;
2065
2066 all_in &= ((ref_mv->as_mv.row - 1) > x->mv_row_min);
2067 all_in &= ((ref_mv->as_mv.row + 1) < x->mv_row_max);
2068 all_in &= ((ref_mv->as_mv.col - 1) > x->mv_col_min);
2069 all_in &= ((ref_mv->as_mv.col + 1) < x->mv_col_max);
2070
2071 if (all_in) {
2072 unsigned int sad_array[4];
2073 unsigned char const *block_offset[4];
2074 block_offset[0] = best_address - in_what_stride;
2075 block_offset[1] = best_address - 1;
2076 block_offset[2] = best_address + 1;
2077 block_offset[3] = best_address + in_what_stride;
2078
2079 fn_ptr->sdx4df(what, what_stride, block_offset, in_what_stride, sad_array) ;
2080
2081 for (j = 0; j < 4; j++) {
2082 if (sad_array[j] < bestsad) {
2083 this_mv.as_mv.row = ref_mv->as_mv.row + neighbors[j].row;
2084 this_mv.as_mv.col = ref_mv->as_mv.col + neighbors[j].col;
2085 sad_array[j] += mvsad_err_cost(&this_mv, &fcenter_mv, mvjsadcost,
2086 mvsadcost, error_per_bit);
2087
2088 if (sad_array[j] < bestsad) {
2089 bestsad = sad_array[j];
2090 best_site = j;
2091 }
2092 }
2093 }
2094 } else {
2095 for (j = 0; j < 4; j++) {
2096 this_row_offset = ref_mv->as_mv.row + neighbors[j].row;
2097 this_col_offset = ref_mv->as_mv.col + neighbors[j].col;
2098
2099 if ((this_col_offset > x->mv_col_min) && (this_col_offset < x->mv_col_ma x) &&
2100 (this_row_offset > x->mv_row_min) && (this_row_offset < x->mv_row_ma x)) {
2101 check_here = (neighbors[j].row) * in_what_stride + neighbors[j].col + best_address;
2102 thissad = fn_ptr->sdf(what, what_stride, check_here, in_what_stride, b estsad);
2103
2104 if (thissad < bestsad) {
2105 this_mv.as_mv.row = this_row_offset;
2106 this_mv.as_mv.col = this_col_offset;
2107 thissad += mvsad_err_cost(&this_mv, &fcenter_mv, mvjsadcost,
2108 mvsadcost, error_per_bit);
2109
2110 if (thissad < bestsad) {
2111 bestsad = thissad;
2112 best_site = j;
2113 }
2114 }
2115 }
2116 }
2117 }
2118
2119 if (best_site == -1)
2120 break;
2121 else {
2122 ref_mv->as_mv.row += neighbors[best_site].row;
2123 ref_mv->as_mv.col += neighbors[best_site].col;
2124 best_address += (neighbors[best_site].row) * in_what_stride + neighbors[be st_site].col;
2125 }
2126 }
2127
2128 this_mv.as_mv.row = ref_mv->as_mv.row << 3;
2129 this_mv.as_mv.col = ref_mv->as_mv.col << 3;
2130
2131 if (bestsad < INT_MAX)
2132 return
2133 fn_ptr->vf(what, what_stride, best_address, in_what_stride,
2134 (unsigned int *)(&thissad)) +
2135 mv_err_cost(&this_mv, center_mv, mvjcost, mvcost, x->errorperbit,
2136 xd->allow_high_precision_mv);
2137 else
2138 return INT_MAX;
2139 }
2140
2141
2142
2143 #ifdef ENTROPY_STATS
2144 void print_mode_context(void) {
2145 FILE *f = fopen("vp9_modecont.c", "a");
2146 int i, j;
2147
2148 fprintf(f, "#include \"vp9_entropy.h\"\n");
2149 fprintf(f, "const int vp9_mode_contexts[6][4] =");
2150 fprintf(f, "{\n");
2151 for (j = 0; j < 6; j++) {
2152 fprintf(f, " {/* %d */ ", j);
2153 fprintf(f, " ");
2154 for (i = 0; i < 4; i++) {
2155 int this_prob;
2156 int count;
2157
2158 // context probs
2159 count = mv_ref_ct[j][i][0] + mv_ref_ct[j][i][1];
2160 if (count)
2161 this_prob = 256 * mv_ref_ct[j][i][0] / count;
2162 else
2163 this_prob = 128;
2164
2165 if (this_prob == 0)
2166 this_prob = 1;
2167 fprintf(f, "%5d, ", this_prob);
2168 }
2169 fprintf(f, " },\n");
2170 }
2171
2172 fprintf(f, "};\n");
2173 fclose(f);
2174 }
2175
2176 /* MV ref count ENTROPY_STATS stats code */
2177 void init_mv_ref_counts() {
2178 vpx_memset(mv_ref_ct, 0, sizeof(mv_ref_ct));
2179 vpx_memset(mv_mode_cts, 0, sizeof(mv_mode_cts));
2180 }
2181
2182 void accum_mv_refs(MB_PREDICTION_MODE m, const int ct[4]) {
2183 if (m == ZEROMV) {
2184 ++mv_ref_ct [ct[0]] [0] [0];
2185 ++mv_mode_cts[0][0];
2186 } else {
2187 ++mv_ref_ct [ct[0]] [0] [1];
2188 ++mv_mode_cts[0][1];
2189
2190 if (m == NEARESTMV) {
2191 ++mv_ref_ct [ct[1]] [1] [0];
2192 ++mv_mode_cts[1][0];
2193 } else {
2194 ++mv_ref_ct [ct[1]] [1] [1];
2195 ++mv_mode_cts[1][1];
2196
2197 if (m == NEARMV) {
2198 ++mv_ref_ct [ct[2]] [2] [0];
2199 ++mv_mode_cts[2][0];
2200 } else {
2201 ++mv_ref_ct [ct[2]] [2] [1];
2202 ++mv_mode_cts[2][1];
2203
2204 if (m == NEWMV) {
2205 ++mv_ref_ct [ct[3]] [3] [0];
2206 ++mv_mode_cts[3][0];
2207 } else {
2208 ++mv_ref_ct [ct[3]] [3] [1];
2209 ++mv_mode_cts[3][1];
2210 }
2211 }
2212 }
2213 }
2214 }
2215
2216 #endif/* END MV ref count ENTROPY_STATS stats code */
OLDNEW

Powered by Google App Engine
This is Rietveld 408576698