Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(303)

Side by Side Diff: source/libvpx/vp9/encoder/vp9_rdopt.c

Issue 23600008: libvpx: Pull from upstream (Closed) Base URL: svn://chrome-svn/chrome/trunk/deps/third_party/libvpx/
Patch Set: Created 7 years, 3 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch | Annotate | Revision Log
« no previous file with comments | « source/libvpx/vp9/encoder/vp9_rdopt.h ('k') | source/libvpx/vp9/encoder/vp9_segmentation.c » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 /* 1 /*
2 * Copyright (c) 2010 The WebM project authors. All Rights Reserved. 2 * Copyright (c) 2010 The WebM project authors. All Rights Reserved.
3 * 3 *
4 * Use of this source code is governed by a BSD-style license 4 * Use of this source code is governed by a BSD-style license
5 * that can be found in the LICENSE file in the root of the source 5 * that can be found in the LICENSE file in the root of the source
6 * tree. An additional intellectual property rights grant can be found 6 * tree. An additional intellectual property rights grant can be found
7 * in the file PATENTS. All contributing project authors may 7 * in the file PATENTS. All contributing project authors may
8 * be found in the AUTHORS file in the root of the source tree. 8 * be found in the AUTHORS file in the root of the source tree.
9 */ 9 */
10 10
11
12 #include <stdio.h> 11 #include <stdio.h>
13 #include <math.h> 12 #include <math.h>
14 #include <limits.h> 13 #include <limits.h>
15 #include <assert.h> 14 #include <assert.h>
16 15
17 #include "vp9/common/vp9_pragmas.h" 16 #include "vp9/common/vp9_pragmas.h"
18 #include "vp9/encoder/vp9_tokenize.h" 17 #include "vp9/encoder/vp9_tokenize.h"
19 #include "vp9/encoder/vp9_treewriter.h" 18 #include "vp9/encoder/vp9_treewriter.h"
20 #include "vp9/encoder/vp9_onyx_int.h" 19 #include "vp9/encoder/vp9_onyx_int.h"
21 #include "vp9/encoder/vp9_modecosts.h" 20 #include "vp9/encoder/vp9_modecosts.h"
(...skipping 25 matching lines...) Expand all
47 #define SWITCHABLE_INTERP_RATE_FACTOR 1 46 #define SWITCHABLE_INTERP_RATE_FACTOR 1
48 47
49 DECLARE_ALIGNED(16, extern const uint8_t, 48 DECLARE_ALIGNED(16, extern const uint8_t,
50 vp9_pt_energy_class[MAX_ENTROPY_TOKENS]); 49 vp9_pt_energy_class[MAX_ENTROPY_TOKENS]);
51 50
52 #define I4X4_PRED 0x8000 51 #define I4X4_PRED 0x8000
53 #define SPLITMV 0x10000 52 #define SPLITMV 0x10000
54 53
55 const MODE_DEFINITION vp9_mode_order[MAX_MODES] = { 54 const MODE_DEFINITION vp9_mode_order[MAX_MODES] = {
56 {NEARESTMV, LAST_FRAME, NONE}, 55 {NEARESTMV, LAST_FRAME, NONE},
56 {DC_PRED, INTRA_FRAME, NONE},
57
57 {NEARESTMV, ALTREF_FRAME, NONE}, 58 {NEARESTMV, ALTREF_FRAME, NONE},
58 {NEARESTMV, GOLDEN_FRAME, NONE}, 59 {NEARESTMV, GOLDEN_FRAME, NONE},
59 {NEWMV, LAST_FRAME, NONE}, 60 {NEWMV, LAST_FRAME, NONE},
60 {NEARESTMV, LAST_FRAME, ALTREF_FRAME}, 61 {NEARESTMV, LAST_FRAME, ALTREF_FRAME},
61 {NEARMV, LAST_FRAME, NONE}, 62 {NEARMV, LAST_FRAME, NONE},
62 {NEARESTMV, GOLDEN_FRAME, ALTREF_FRAME}, 63 {NEARESTMV, GOLDEN_FRAME, ALTREF_FRAME},
63 64
64 {DC_PRED, INTRA_FRAME, NONE},
65
66 {NEWMV, GOLDEN_FRAME, NONE}, 65 {NEWMV, GOLDEN_FRAME, NONE},
67 {NEWMV, ALTREF_FRAME, NONE}, 66 {NEWMV, ALTREF_FRAME, NONE},
68 {NEARMV, ALTREF_FRAME, NONE}, 67 {NEARMV, ALTREF_FRAME, NONE},
69 68
70 {TM_PRED, INTRA_FRAME, NONE}, 69 {TM_PRED, INTRA_FRAME, NONE},
71 70
72 {NEARMV, LAST_FRAME, ALTREF_FRAME}, 71 {NEARMV, LAST_FRAME, ALTREF_FRAME},
73 {NEWMV, LAST_FRAME, ALTREF_FRAME}, 72 {NEWMV, LAST_FRAME, ALTREF_FRAME},
74 {NEARMV, GOLDEN_FRAME, NONE}, 73 {NEARMV, GOLDEN_FRAME, NONE},
75 {NEARMV, GOLDEN_FRAME, ALTREF_FRAME}, 74 {NEARMV, GOLDEN_FRAME, ALTREF_FRAME},
76 {NEWMV, GOLDEN_FRAME, ALTREF_FRAME}, 75 {NEWMV, GOLDEN_FRAME, ALTREF_FRAME},
77 76
78 {SPLITMV, LAST_FRAME, NONE}, 77 {SPLITMV, LAST_FRAME, NONE},
79 {SPLITMV, GOLDEN_FRAME, NONE}, 78 {SPLITMV, GOLDEN_FRAME, NONE},
80 {SPLITMV, ALTREF_FRAME, NONE}, 79 {SPLITMV, ALTREF_FRAME, NONE},
81 {SPLITMV, LAST_FRAME, ALTREF_FRAME}, 80 {SPLITMV, LAST_FRAME, ALTREF_FRAME},
82 {SPLITMV, GOLDEN_FRAME, ALTREF_FRAME}, 81 {SPLITMV, GOLDEN_FRAME, ALTREF_FRAME},
83 82
84 {ZEROMV, LAST_FRAME, NONE}, 83 {ZEROMV, LAST_FRAME, NONE},
85 {ZEROMV, GOLDEN_FRAME, NONE}, 84 {ZEROMV, GOLDEN_FRAME, NONE},
86 {ZEROMV, ALTREF_FRAME, NONE}, 85 {ZEROMV, ALTREF_FRAME, NONE},
87 {ZEROMV, LAST_FRAME, ALTREF_FRAME}, 86 {ZEROMV, LAST_FRAME, ALTREF_FRAME},
88 {ZEROMV, GOLDEN_FRAME, ALTREF_FRAME}, 87 {ZEROMV, GOLDEN_FRAME, ALTREF_FRAME},
89 88
90 {I4X4_PRED, INTRA_FRAME, NONE}, 89 {I4X4_PRED, INTRA_FRAME, NONE},
91 {H_PRED, INTRA_FRAME, NONE}, 90 {H_PRED, INTRA_FRAME, NONE},
92 {V_PRED, INTRA_FRAME, NONE}, 91 {V_PRED, INTRA_FRAME, NONE},
93 {D135_PRED, INTRA_FRAME, NONE}, 92 {D135_PRED, INTRA_FRAME, NONE},
94 {D27_PRED, INTRA_FRAME, NONE}, 93 {D207_PRED, INTRA_FRAME, NONE},
95 {D153_PRED, INTRA_FRAME, NONE}, 94 {D153_PRED, INTRA_FRAME, NONE},
96 {D63_PRED, INTRA_FRAME, NONE}, 95 {D63_PRED, INTRA_FRAME, NONE},
97 {D117_PRED, INTRA_FRAME, NONE}, 96 {D117_PRED, INTRA_FRAME, NONE},
98 {D45_PRED, INTRA_FRAME, NONE}, 97 {D45_PRED, INTRA_FRAME, NONE},
99 }; 98 };
100 99
101 // The baseline rd thresholds for breaking out of the rd loop for 100 // The baseline rd thresholds for breaking out of the rd loop for
102 // certain modes are assumed to be based on 8x8 blocks. 101 // certain modes are assumed to be based on 8x8 blocks.
103 // This table is used to correct for blocks size. 102 // This table is used to correct for blocks size.
104 // The factors here are << 2 (2 = x0.5, 32 = x8 etc). 103 // The factors here are << 2 (2 = x0.5, 32 = x8 etc).
105 static int rd_thresh_block_size_factor[BLOCK_SIZE_TYPES] = 104 static int rd_thresh_block_size_factor[BLOCK_SIZES] =
106 {2, 3, 3, 4, 6, 6, 8, 12, 12, 16, 24, 24, 32}; 105 {2, 3, 3, 4, 6, 6, 8, 12, 12, 16, 24, 24, 32};
107 106
108 #define BASE_RD_THRESH_FREQ_FACT 16 107 #define MAX_RD_THRESH_FACT 64
109 #define MAX_RD_THRESH_FREQ_FACT 32 108 #define RD_THRESH_INC 1
110 #define MAX_RD_THRESH_FREQ_INC 1
111 109
112 static void fill_token_costs(vp9_coeff_count (*c)[BLOCK_TYPES][2], 110 static void fill_token_costs(vp9_coeff_cost *c,
113 vp9_coeff_probs_model (*p)[BLOCK_TYPES]) { 111 vp9_coeff_probs_model (*p)[BLOCK_TYPES]) {
114 int i, j, k, l; 112 int i, j, k, l;
115 TX_SIZE t; 113 TX_SIZE t;
116 for (t = TX_4X4; t <= TX_32X32; t++) 114 for (t = TX_4X4; t <= TX_32X32; t++)
117 for (i = 0; i < BLOCK_TYPES; i++) 115 for (i = 0; i < BLOCK_TYPES; i++)
118 for (j = 0; j < REF_TYPES; j++) 116 for (j = 0; j < REF_TYPES; j++)
119 for (k = 0; k < COEF_BANDS; k++) 117 for (k = 0; k < COEF_BANDS; k++)
120 for (l = 0; l < PREV_COEF_CONTEXTS; l++) { 118 for (l = 0; l < PREV_COEF_CONTEXTS; l++) {
121 vp9_prob probs[ENTROPY_NODES]; 119 vp9_prob probs[ENTROPY_NODES];
122 vp9_model_to_full_probs(p[t][i][j][k][l], probs); 120 vp9_model_to_full_probs(p[t][i][j][k][l], probs);
123 vp9_cost_tokens((int *)c[t][i][j][0][k][l], probs, 121 vp9_cost_tokens((int *)c[t][i][j][k][0][l], probs,
124 vp9_coef_tree); 122 vp9_coef_tree);
125 vp9_cost_tokens_skip((int *)c[t][i][j][1][k][l], probs, 123 vp9_cost_tokens_skip((int *)c[t][i][j][k][1][l], probs,
126 vp9_coef_tree); 124 vp9_coef_tree);
127 assert(c[t][i][j][0][k][l][DCT_EOB_TOKEN] == 125 assert(c[t][i][j][k][0][l][DCT_EOB_TOKEN] ==
128 c[t][i][j][1][k][l][DCT_EOB_TOKEN]); 126 c[t][i][j][k][1][l][DCT_EOB_TOKEN]);
129 } 127 }
130 } 128 }
131 129
132 static const int rd_iifactor[32] = { 130 static const int rd_iifactor[32] = {
133 4, 4, 3, 2, 1, 0, 0, 0, 131 4, 4, 3, 2, 1, 0, 0, 0,
134 0, 0, 0, 0, 0, 0, 0, 0, 132 0, 0, 0, 0, 0, 0, 0, 0,
135 0, 0, 0, 0, 0, 0, 0, 0, 133 0, 0, 0, 0, 0, 0, 0, 0,
136 0, 0, 0, 0, 0, 0, 0, 0, 134 0, 0, 0, 0, 0, 0, 0, 0,
137 }; 135 };
138 136
(...skipping 53 matching lines...) Expand 10 before | Expand all | Expand 10 after
192 190
193 q = (int)pow(vp9_dc_quant(qindex, 0) >> 2, 1.25); 191 q = (int)pow(vp9_dc_quant(qindex, 0) >> 2, 1.25);
194 q <<= 2; 192 q <<= 2;
195 if (q < 8) 193 if (q < 8)
196 q = 8; 194 q = 8;
197 195
198 if (cpi->RDMULT > 1000) { 196 if (cpi->RDMULT > 1000) {
199 cpi->RDDIV = 1; 197 cpi->RDDIV = 1;
200 cpi->RDMULT /= 100; 198 cpi->RDMULT /= 100;
201 199
202 for (bsize = 0; bsize < BLOCK_SIZE_TYPES; ++bsize) { 200 for (bsize = 0; bsize < BLOCK_SIZES; ++bsize) {
203 for (i = 0; i < MAX_MODES; ++i) { 201 for (i = 0; i < MAX_MODES; ++i) {
204 // Threshold here seem unecessarily harsh but fine given actual 202 // Threshold here seem unecessarily harsh but fine given actual
205 // range of values used for cpi->sf.thresh_mult[] 203 // range of values used for cpi->sf.thresh_mult[]
206 int thresh_max = INT_MAX / (q * rd_thresh_block_size_factor[bsize]); 204 int thresh_max = INT_MAX / (q * rd_thresh_block_size_factor[bsize]);
207 205
208 // *4 relates to the scaling of rd_thresh_block_size_factor[] 206 // *4 relates to the scaling of rd_thresh_block_size_factor[]
209 if ((int64_t)cpi->sf.thresh_mult[i] < thresh_max) { 207 if ((int64_t)cpi->sf.thresh_mult[i] < thresh_max) {
210 cpi->rd_threshes[bsize][i] = 208 cpi->rd_threshes[bsize][i] =
211 cpi->sf.thresh_mult[i] * q * 209 cpi->sf.thresh_mult[i] * q *
212 rd_thresh_block_size_factor[bsize] / (4 * 100); 210 rd_thresh_block_size_factor[bsize] / (4 * 100);
213 } else { 211 } else {
214 cpi->rd_threshes[bsize][i] = INT_MAX; 212 cpi->rd_threshes[bsize][i] = INT_MAX;
215 } 213 }
216 cpi->rd_baseline_thresh[bsize][i] = cpi->rd_threshes[bsize][i];
217
218 if (cpi->sf.adaptive_rd_thresh)
219 cpi->rd_thresh_freq_fact[bsize][i] = MAX_RD_THRESH_FREQ_FACT;
220 else
221 cpi->rd_thresh_freq_fact[bsize][i] = BASE_RD_THRESH_FREQ_FACT;
222 } 214 }
223 } 215 }
224 } else { 216 } else {
225 cpi->RDDIV = 100; 217 cpi->RDDIV = 100;
226 218
227 for (bsize = 0; bsize < BLOCK_SIZE_TYPES; ++bsize) { 219 for (bsize = 0; bsize < BLOCK_SIZES; ++bsize) {
228 for (i = 0; i < MAX_MODES; i++) { 220 for (i = 0; i < MAX_MODES; i++) {
229 // Threshold here seem unecessarily harsh but fine given actual 221 // Threshold here seem unecessarily harsh but fine given actual
230 // range of values used for cpi->sf.thresh_mult[] 222 // range of values used for cpi->sf.thresh_mult[]
231 int thresh_max = INT_MAX / (q * rd_thresh_block_size_factor[bsize]); 223 int thresh_max = INT_MAX / (q * rd_thresh_block_size_factor[bsize]);
232 224
233 if (cpi->sf.thresh_mult[i] < thresh_max) { 225 if (cpi->sf.thresh_mult[i] < thresh_max) {
234 cpi->rd_threshes[bsize][i] = 226 cpi->rd_threshes[bsize][i] =
235 cpi->sf.thresh_mult[i] * q * 227 cpi->sf.thresh_mult[i] * q *
236 rd_thresh_block_size_factor[bsize] / 4; 228 rd_thresh_block_size_factor[bsize] / 4;
237 } else { 229 } else {
238 cpi->rd_threshes[bsize][i] = INT_MAX; 230 cpi->rd_threshes[bsize][i] = INT_MAX;
239 } 231 }
240 cpi->rd_baseline_thresh[bsize][i] = cpi->rd_threshes[bsize][i];
241
242 if (cpi->sf.adaptive_rd_thresh)
243 cpi->rd_thresh_freq_fact[bsize][i] = MAX_RD_THRESH_FREQ_FACT;
244 else
245 cpi->rd_thresh_freq_fact[bsize][i] = BASE_RD_THRESH_FREQ_FACT;
246 } 232 }
247 } 233 }
248 } 234 }
249 235
250 fill_token_costs(cpi->mb.token_costs, cpi->common.fc.coef_probs); 236 fill_token_costs(cpi->mb.token_costs, cpi->common.fc.coef_probs);
251 237
252 for (i = 0; i < NUM_PARTITION_CONTEXTS; i++) 238 for (i = 0; i < NUM_PARTITION_CONTEXTS; i++)
253 vp9_cost_tokens(cpi->mb.partition_cost[i], 239 vp9_cost_tokens(cpi->mb.partition_cost[i],
254 cpi->common.fc.partition_prob[cpi->common.frame_type][i], 240 cpi->common.fc.partition_prob[cpi->common.frame_type][i],
255 vp9_partition_tree); 241 vp9_partition_tree);
(...skipping 14 matching lines...) Expand all
270 256
271 for (m = NEARESTMV; m < MB_MODE_COUNT; m++) 257 for (m = NEARESTMV; m < MB_MODE_COUNT; m++)
272 cpi->mb.inter_mode_cost[i][m - NEARESTMV] = 258 cpi->mb.inter_mode_cost[i][m - NEARESTMV] =
273 cost_token(vp9_inter_mode_tree, 259 cost_token(vp9_inter_mode_tree,
274 cpi->common.fc.inter_mode_probs[i], 260 cpi->common.fc.inter_mode_probs[i],
275 vp9_inter_mode_encodings - NEARESTMV + m); 261 vp9_inter_mode_encodings - NEARESTMV + m);
276 } 262 }
277 } 263 }
278 } 264 }
279 265
280 static INLINE BLOCK_SIZE_TYPE get_block_size(int bwl, int bhl) {
281 return bsize_from_dim_lookup[bwl][bhl];
282 }
283
284 static BLOCK_SIZE_TYPE get_plane_block_size(BLOCK_SIZE_TYPE bsize,
285 struct macroblockd_plane *pd) {
286 return get_block_size(plane_block_width_log2by4(bsize, pd),
287 plane_block_height_log2by4(bsize, pd));
288 }
289
290 static INLINE void linear_interpolate2(double x, int ntab, int inv_step, 266 static INLINE void linear_interpolate2(double x, int ntab, int inv_step,
291 const double *tab1, const double *tab2, 267 const double *tab1, const double *tab2,
292 double *v1, double *v2) { 268 double *v1, double *v2) {
293 double y = x * inv_step; 269 double y = x * inv_step;
294 int d = (int) y; 270 int d = (int) y;
295 if (d >= ntab - 1) { 271 if (d >= ntab - 1) {
296 *v1 = tab1[ntab - 1]; 272 *v1 = tab1[ntab - 1];
297 *v2 = tab2[ntab - 1]; 273 *v2 = tab2[ntab - 1];
298 } else { 274 } else {
299 double a = y - d; 275 double a = y - d;
(...skipping 81 matching lines...) Expand 10 before | Expand all | Expand 10 after
381 double D, R; 357 double D, R;
382 double s2 = (double) var / n; 358 double s2 = (double) var / n;
383 double x = qstep / sqrt(s2); 359 double x = qstep / sqrt(s2);
384 model_rd_norm(x, &R, &D); 360 model_rd_norm(x, &R, &D);
385 *rate = ((n << 8) * R + 0.5); 361 *rate = ((n << 8) * R + 0.5);
386 *dist = (var * D + 0.5); 362 *dist = (var * D + 0.5);
387 } 363 }
388 vp9_clear_system_state(); 364 vp9_clear_system_state();
389 } 365 }
390 366
391 static void model_rd_for_sb(VP9_COMP *cpi, BLOCK_SIZE_TYPE bsize, 367 static void model_rd_for_sb(VP9_COMP *cpi, BLOCK_SIZE bsize,
392 MACROBLOCK *x, MACROBLOCKD *xd, 368 MACROBLOCK *x, MACROBLOCKD *xd,
393 int *out_rate_sum, int64_t *out_dist_sum) { 369 int *out_rate_sum, int64_t *out_dist_sum) {
394 // Note our transform coeffs are 8 times an orthogonal transform. 370 // Note our transform coeffs are 8 times an orthogonal transform.
395 // Hence quantizer step is also 8 times. To get effective quantizer 371 // Hence quantizer step is also 8 times. To get effective quantizer
396 // we need to divide by 8 before sending to modeling function. 372 // we need to divide by 8 before sending to modeling function.
397 int i, rate_sum = 0, dist_sum = 0; 373 int i, rate_sum = 0, dist_sum = 0;
398 374
399 for (i = 0; i < MAX_MB_PLANE; ++i) { 375 for (i = 0; i < MAX_MB_PLANE; ++i) {
400 struct macroblock_plane *const p = &x->plane[i]; 376 struct macroblock_plane *const p = &x->plane[i];
401 struct macroblockd_plane *const pd = &xd->plane[i]; 377 struct macroblockd_plane *const pd = &xd->plane[i];
402 378 const BLOCK_SIZE bs = get_plane_block_size(bsize, pd);
403 // TODO(dkovalev) the same code in get_plane_block_size
404 const int bwl = plane_block_width_log2by4(bsize, pd);
405 const int bhl = plane_block_height_log2by4(bsize, pd);
406 const BLOCK_SIZE_TYPE bs = get_block_size(bwl, bhl);
407 unsigned int sse; 379 unsigned int sse;
408 int rate; 380 int rate;
409 int64_t dist; 381 int64_t dist;
410 (void) cpi->fn_ptr[bs].vf(p->src.buf, p->src.stride, 382 (void) cpi->fn_ptr[bs].vf(p->src.buf, p->src.stride,
411 pd->dst.buf, pd->dst.stride, &sse); 383 pd->dst.buf, pd->dst.stride, &sse);
412 // sse works better than var, since there is no dc prediction used 384 // sse works better than var, since there is no dc prediction used
413 model_rd_from_var_lapndz(sse, 16 << (bwl + bhl), 385 model_rd_from_var_lapndz(sse, 1 << num_pels_log2_lookup[bs],
414 pd->dequant[1] >> 3, &rate, &dist); 386 pd->dequant[1] >> 3, &rate, &dist);
415 387
416 rate_sum += rate; 388 rate_sum += rate;
417 dist_sum += dist; 389 dist_sum += dist;
418 } 390 }
419 391
420 *out_rate_sum = rate_sum; 392 *out_rate_sum = rate_sum;
421 *out_dist_sum = dist_sum << 4; 393 *out_dist_sum = dist_sum << 4;
422 } 394 }
423 395
424 static void model_rd_for_sb_y(VP9_COMP *cpi, BLOCK_SIZE_TYPE bsize, 396 static void model_rd_for_sb_y_tx(VP9_COMP *cpi, BLOCK_SIZE bsize,
425 MACROBLOCK *x, MACROBLOCKD *xd,
426 int *out_rate_sum, int64_t *out_dist_sum) {
427 // Note our transform coeffs are 8 times an orthogonal transform.
428 // Hence quantizer step is also 8 times. To get effective quantizer
429 // we need to divide by 8 before sending to modeling function.
430 struct macroblock_plane *const p = &x->plane[0];
431 struct macroblockd_plane *const pd = &xd->plane[0];
432
433 // TODO(dkovalev) the same code in get_plane_block_size
434 const int bwl = plane_block_width_log2by4(bsize, pd);
435 const int bhl = plane_block_height_log2by4(bsize, pd);
436 const BLOCK_SIZE_TYPE bs = get_block_size(bwl, bhl);
437 unsigned int sse;
438 int rate;
439 int64_t dist;
440 (void) cpi->fn_ptr[bs].vf(p->src.buf, p->src.stride,
441 pd->dst.buf, pd->dst.stride, &sse);
442 // sse works better than var, since there is no dc prediction used
443 model_rd_from_var_lapndz(sse, 16 << (bwl + bhl),
444 pd->dequant[1] >> 3, &rate, &dist);
445
446 *out_rate_sum = rate;
447 *out_dist_sum = dist << 4;
448 }
449
450 static void model_rd_for_sb_y_tx(VP9_COMP *cpi, BLOCK_SIZE_TYPE bsize,
451 TX_SIZE tx_size, 397 TX_SIZE tx_size,
452 MACROBLOCK *x, MACROBLOCKD *xd, 398 MACROBLOCK *x, MACROBLOCKD *xd,
453 int *out_rate_sum, int64_t *out_dist_sum, 399 int *out_rate_sum, int64_t *out_dist_sum,
454 int *out_skip) { 400 int *out_skip) {
455 int t = 4, j, k; 401 int j, k;
456 BLOCK_SIZE_TYPE bs = BLOCK_SIZE_AB4X4; 402 BLOCK_SIZE bs;
457 struct macroblock_plane *const p = &x->plane[0]; 403 struct macroblock_plane *const p = &x->plane[0];
458 struct macroblockd_plane *const pd = &xd->plane[0]; 404 struct macroblockd_plane *const pd = &xd->plane[0];
459 const int width = plane_block_width(bsize, pd); 405 const int width = 4 << num_4x4_blocks_wide_lookup[bsize];
460 const int height = plane_block_height(bsize, pd); 406 const int height = 4 << num_4x4_blocks_high_lookup[bsize];
461 int rate_sum = 0; 407 int rate_sum = 0;
462 int64_t dist_sum = 0; 408 int64_t dist_sum = 0;
409 const int t = 4 << tx_size;
463 410
464 if (tx_size == TX_4X4) { 411 if (tx_size == TX_4X4) {
465 bs = BLOCK_4X4; 412 bs = BLOCK_4X4;
466 t = 4;
467 } else if (tx_size == TX_8X8) { 413 } else if (tx_size == TX_8X8) {
468 bs = BLOCK_8X8; 414 bs = BLOCK_8X8;
469 t = 8;
470 } else if (tx_size == TX_16X16) { 415 } else if (tx_size == TX_16X16) {
471 bs = BLOCK_16X16; 416 bs = BLOCK_16X16;
472 t = 16;
473 } else if (tx_size == TX_32X32) { 417 } else if (tx_size == TX_32X32) {
474 bs = BLOCK_32X32; 418 bs = BLOCK_32X32;
475 t = 32;
476 } else { 419 } else {
477 assert(0); 420 assert(0);
478 } 421 }
422
479 *out_skip = 1; 423 *out_skip = 1;
480 for (j = 0; j < height; j += t) { 424 for (j = 0; j < height; j += t) {
481 for (k = 0; k < width; k += t) { 425 for (k = 0; k < width; k += t) {
482 int rate; 426 int rate;
483 int64_t dist; 427 int64_t dist;
484 unsigned int sse; 428 unsigned int sse;
485 (void) cpi->fn_ptr[bs].vf(p->src.buf + j * p->src.stride + k, 429 cpi->fn_ptr[bs].vf(&p->src.buf[j * p->src.stride + k], p->src.stride,
486 p->src.stride, 430 &pd->dst.buf[j * pd->dst.stride + k], pd->dst.stride,
487 pd->dst.buf + j * pd->dst.stride + k, 431 &sse);
488 pd->dst.stride, &sse);
489 // sse works better than var, since there is no dc prediction used 432 // sse works better than var, since there is no dc prediction used
490 model_rd_from_var_lapndz(sse, t * t, pd->dequant[1] >> 3, 433 model_rd_from_var_lapndz(sse, t * t, pd->dequant[1] >> 3, &rate, &dist);
491 &rate, &dist);
492 rate_sum += rate; 434 rate_sum += rate;
493 dist_sum += dist; 435 dist_sum += dist;
494 *out_skip &= (rate < 1024); 436 *out_skip &= (rate < 1024);
495 } 437 }
496 } 438 }
439
497 *out_rate_sum = rate_sum; 440 *out_rate_sum = rate_sum;
498 *out_dist_sum = (dist_sum << 4); 441 *out_dist_sum = dist_sum << 4;
499 } 442 }
500 443
501 int64_t vp9_block_error_c(int16_t *coeff, int16_t *dqcoeff, 444 int64_t vp9_block_error_c(int16_t *coeff, int16_t *dqcoeff,
502 intptr_t block_size, int64_t *ssz) { 445 intptr_t block_size, int64_t *ssz) {
503 int i; 446 int i;
504 int64_t error = 0, sqcoeff = 0; 447 int64_t error = 0, sqcoeff = 0;
505 448
506 for (i = 0; i < block_size; i++) { 449 for (i = 0; i < block_size; i++) {
507 int this_diff = coeff[i] - dqcoeff[i]; 450 int this_diff = coeff[i] - dqcoeff[i];
508 error += (unsigned)this_diff * this_diff; 451 error += (unsigned)this_diff * this_diff;
509 sqcoeff += (unsigned) coeff[i] * coeff[i]; 452 sqcoeff += (unsigned) coeff[i] * coeff[i];
510 } 453 }
511 454
512 *ssz = sqcoeff; 455 *ssz = sqcoeff;
513 return error; 456 return error;
514 } 457 }
515 458
516 static const int16_t band_counts[TX_SIZE_MAX_SB][8] = { 459 /* The trailing '0' is a terminator which is used inside cost_coeffs() to
517 { 1, 2, 3, 4, 3, 16 - 13 }, 460 * decide whether to include cost of a trailing EOB node or not (i.e. we
518 { 1, 2, 3, 4, 11, 64 - 21 }, 461 * can skip this if the last coefficient in this transform block, e.g. the
519 { 1, 2, 3, 4, 11, 256 - 21 }, 462 * 16th coefficient in a 4x4 block or the 64th coefficient in a 8x8 block,
520 { 1, 2, 3, 4, 11, 1024 - 21 }, 463 * were non-zero). */
464 static const int16_t band_counts[TX_SIZES][8] = {
465 { 1, 2, 3, 4, 3, 16 - 13, 0 },
466 { 1, 2, 3, 4, 11, 64 - 21, 0 },
467 { 1, 2, 3, 4, 11, 256 - 21, 0 },
468 { 1, 2, 3, 4, 11, 1024 - 21, 0 },
521 }; 469 };
522 470
523 static INLINE int cost_coeffs(VP9_COMMON *const cm, MACROBLOCK *mb, 471 static INLINE int cost_coeffs(MACROBLOCK *mb,
524 int plane, int block, PLANE_TYPE type, 472 int plane, int block,
525 ENTROPY_CONTEXT *A, ENTROPY_CONTEXT *L, 473 ENTROPY_CONTEXT *A, ENTROPY_CONTEXT *L,
526 TX_SIZE tx_size, 474 TX_SIZE tx_size,
527 const int16_t *scan, const int16_t *nb) { 475 const int16_t *scan, const int16_t *nb) {
528 MACROBLOCKD *const xd = &mb->e_mbd; 476 MACROBLOCKD *const xd = &mb->e_mbd;
529 MB_MODE_INFO *mbmi = &xd->mode_info_context->mbmi; 477 MB_MODE_INFO *mbmi = &xd->mode_info_context->mbmi;
530 int pt, c, cost; 478 struct macroblockd_plane *pd = &xd->plane[plane];
531 const int16_t *band_count = band_counts[tx_size]; 479 const PLANE_TYPE type = pd->plane_type;
532 const int eob = xd->plane[plane].eobs[block]; 480 const int16_t *band_count = &band_counts[tx_size][1];
533 const int16_t *qcoeff_ptr = BLOCK_OFFSET(xd->plane[plane].qcoeff, block, 16); 481 const int eob = pd->eobs[block];
482 const int16_t *const qcoeff_ptr = BLOCK_OFFSET(pd->qcoeff, block);
534 const int ref = mbmi->ref_frame[0] != INTRA_FRAME; 483 const int ref = mbmi->ref_frame[0] != INTRA_FRAME;
535 unsigned int (*token_costs)[COEF_BANDS][PREV_COEF_CONTEXTS] 484 unsigned int (*token_costs)[2][PREV_COEF_CONTEXTS][MAX_ENTROPY_TOKENS] =
536 [MAX_ENTROPY_TOKENS] = mb->token_costs[tx_size][type][ref]; 485 mb->token_costs[tx_size][type][ref];
537 ENTROPY_CONTEXT above_ec = !!*A, left_ec = !!*L; 486 const ENTROPY_CONTEXT above_ec = !!*A, left_ec = !!*L;
538 uint8_t token_cache[1024]; 487 uint8_t token_cache[1024];
488 int pt = combine_entropy_contexts(above_ec, left_ec);
489 int c, cost;
539 490
540 // Check for consistency of tx_size with mode info 491 // Check for consistency of tx_size with mode info
541 assert((!type && !plane) || (type && plane)); 492 assert(type == PLANE_TYPE_Y_WITH_DC ? mbmi->txfm_size == tx_size
542 if (type == PLANE_TYPE_Y_WITH_DC) { 493 : get_uv_tx_size(mbmi) == tx_size);
543 assert(xd->mode_info_context->mbmi.txfm_size == tx_size);
544 } else {
545 assert(tx_size == get_uv_tx_size(mbmi));
546 }
547
548 pt = combine_entropy_contexts(above_ec, left_ec);
549 494
550 if (eob == 0) { 495 if (eob == 0) {
551 // single eob token 496 // single eob token
552 cost = token_costs[0][0][pt][DCT_EOB_TOKEN]; 497 cost = token_costs[0][0][pt][DCT_EOB_TOKEN];
553 c = 0; 498 c = 0;
554 } else { 499 } else {
555 int v, prev_t, band = 1, band_left = band_count[1]; 500 int band_left = *band_count++;
556 501
557 // dc token 502 // dc token
558 v = qcoeff_ptr[0]; 503 int v = qcoeff_ptr[0];
559 prev_t = vp9_dct_value_tokens_ptr[v].token; 504 int prev_t = vp9_dct_value_tokens_ptr[v].token;
560 cost = token_costs[0][0][pt][prev_t] + vp9_dct_value_cost_ptr[v]; 505 cost = (*token_costs)[0][pt][prev_t] + vp9_dct_value_cost_ptr[v];
561 token_cache[0] = vp9_pt_energy_class[prev_t]; 506 token_cache[0] = vp9_pt_energy_class[prev_t];
507 ++token_costs;
562 508
563 // ac tokens 509 // ac tokens
564 for (c = 1; c < eob; c++) { 510 for (c = 1; c < eob; c++) {
565 const int rc = scan[c]; 511 const int rc = scan[c];
566 int t; 512 int t;
567 513
568 v = qcoeff_ptr[rc]; 514 v = qcoeff_ptr[rc];
569 t = vp9_dct_value_tokens_ptr[v].token; 515 t = vp9_dct_value_tokens_ptr[v].token;
570 pt = get_coef_context(nb, token_cache, c); 516 pt = get_coef_context(nb, token_cache, c);
571 cost += token_costs[!prev_t][band][pt][t] + vp9_dct_value_cost_ptr[v]; 517 cost += (*token_costs)[!prev_t][pt][t] + vp9_dct_value_cost_ptr[v];
572 token_cache[rc] = vp9_pt_energy_class[t]; 518 token_cache[rc] = vp9_pt_energy_class[t];
573 prev_t = t; 519 prev_t = t;
574 if (!--band_left) { 520 if (!--band_left) {
575 band_left = band_count[++band]; 521 band_left = *band_count++;
522 ++token_costs;
576 } 523 }
577 } 524 }
578 525
579 // eob token 526 // eob token
580 if (band < 6) { 527 if (band_left) {
581 pt = get_coef_context(nb, token_cache, c); 528 pt = get_coef_context(nb, token_cache, c);
582 cost += token_costs[0][band][pt][DCT_EOB_TOKEN]; 529 cost += (*token_costs)[0][pt][DCT_EOB_TOKEN];
583 } 530 }
584 } 531 }
585 532
586 // is eob first coefficient; 533 // is eob first coefficient;
587 *A = *L = c > 0; 534 *A = *L = (c > 0);
588 535
589 return cost; 536 return cost;
590 } 537 }
591 538
592 struct rdcost_block_args { 539 struct rdcost_block_args {
593 VP9_COMMON *cm;
594 MACROBLOCK *x; 540 MACROBLOCK *x;
595 ENTROPY_CONTEXT t_above[16]; 541 ENTROPY_CONTEXT t_above[16];
596 ENTROPY_CONTEXT t_left[16]; 542 ENTROPY_CONTEXT t_left[16];
597 TX_SIZE tx_size; 543 TX_SIZE tx_size;
598 int bw; 544 int bw;
599 int bh; 545 int bh;
600 int rate; 546 int rate;
601 int64_t dist; 547 int64_t dist;
602 int64_t sse; 548 int64_t sse;
603 int64_t best_rd; 549 int64_t best_rd;
604 int skip; 550 int skip;
605 const int16_t *scan, *nb; 551 const int16_t *scan, *nb;
606 }; 552 };
607 553
608 static void dist_block(int plane, int block, BLOCK_SIZE_TYPE bsize, 554 static void dist_block(int plane, int block, TX_SIZE tx_size, void *arg) {
609 int ss_txfrm_size, void *arg) { 555 const int ss_txfrm_size = tx_size << 1;
610 struct rdcost_block_args* args = arg; 556 struct rdcost_block_args* args = arg;
611 MACROBLOCK* const x = args->x; 557 MACROBLOCK* const x = args->x;
612 MACROBLOCKD* const xd = &x->e_mbd; 558 MACROBLOCKD* const xd = &x->e_mbd;
613 struct macroblock_plane *const p = &x->plane[0]; 559 struct macroblock_plane *const p = &x->plane[plane];
614 struct macroblockd_plane *const pd = &xd->plane[0]; 560 struct macroblockd_plane *const pd = &xd->plane[plane];
615 int64_t this_sse; 561 int64_t this_sse;
616 int shift = args->tx_size == TX_32X32 ? 0 : 2; 562 int shift = args->tx_size == TX_32X32 ? 0 : 2;
617 int16_t *const coeff = BLOCK_OFFSET(p->coeff, block, 16); 563 int16_t *const coeff = BLOCK_OFFSET(p->coeff, block);
618 int16_t *const dqcoeff = BLOCK_OFFSET(pd->dqcoeff, block, 16); 564 int16_t *const dqcoeff = BLOCK_OFFSET(pd->dqcoeff, block);
619 args->dist += vp9_block_error(coeff, dqcoeff, 16 << ss_txfrm_size, 565 args->dist += vp9_block_error(coeff, dqcoeff, 16 << ss_txfrm_size,
620 &this_sse) >> shift; 566 &this_sse) >> shift;
621 args->sse += this_sse >> shift; 567 args->sse += this_sse >> shift;
622 568
623 if (x->skip_encode && 569 if (x->skip_encode &&
624 xd->mode_info_context->mbmi.ref_frame[0] == INTRA_FRAME) { 570 xd->mode_info_context->mbmi.ref_frame[0] == INTRA_FRAME) {
625 // TODO(jingning): tune the model to better capture the distortion. 571 // TODO(jingning): tune the model to better capture the distortion.
626 int64_t p = (pd->dequant[1] * pd->dequant[1] * 572 int64_t p = (pd->dequant[1] * pd->dequant[1] *
627 (1 << ss_txfrm_size)) >> shift; 573 (1 << ss_txfrm_size)) >> shift;
628 args->dist += p; 574 args->dist += p;
629 args->sse += p; 575 args->sse += p;
630 } 576 }
631 } 577 }
632 578
633 static void rate_block(int plane, int block, BLOCK_SIZE_TYPE bsize, 579 static void rate_block(int plane, int block, BLOCK_SIZE plane_bsize,
634 int ss_txfrm_size, void *arg) { 580 TX_SIZE tx_size, void *arg) {
635 struct rdcost_block_args* args = arg; 581 struct rdcost_block_args* args = arg;
582
636 int x_idx, y_idx; 583 int x_idx, y_idx;
637 MACROBLOCKD * const xd = &args->x->e_mbd; 584 txfrm_block_to_raster_xy(plane_bsize, args->tx_size, block, &x_idx, &y_idx);
638 585
639 txfrm_block_to_raster_xy(xd, bsize, plane, block, args->tx_size * 2, &x_idx, 586 args->rate += cost_coeffs(args->x, plane, block,
640 &y_idx); 587 args->t_above + x_idx,
641
642 args->rate += cost_coeffs(args->cm, args->x, plane, block,
643 xd->plane[plane].plane_type, args->t_above + x_idx,
644 args->t_left + y_idx, args->tx_size, 588 args->t_left + y_idx, args->tx_size,
645 args->scan, args->nb); 589 args->scan, args->nb);
646 } 590 }
647 591
648 // FIXME(jingning): need to make the rd test of chroma components consistent 592 static void block_yrd_txfm(int plane, int block, BLOCK_SIZE plane_bsize,
649 // with that of luma component. this function should be deprecated afterwards. 593 TX_SIZE tx_size, void *arg) {
650 static int rdcost_plane(VP9_COMMON * const cm, MACROBLOCK *x, int plane,
651 BLOCK_SIZE_TYPE bsize, TX_SIZE tx_size) {
652 MACROBLOCKD * const xd = &x->e_mbd;
653 const int bwl = plane_block_width_log2by4(bsize, &xd->plane[plane]);
654 const int bhl = plane_block_height_log2by4(bsize, &xd->plane[plane]);
655 const int bw = 1 << bwl, bh = 1 << bhl;
656 int i;
657 struct rdcost_block_args args = { cm, x, { 0 }, { 0 }, tx_size, bw, bh,
658 0, 0, 0, INT64_MAX, 0 };
659
660 switch (tx_size) {
661 case TX_4X4:
662 vpx_memcpy(&args.t_above, xd->plane[plane].above_context,
663 sizeof(ENTROPY_CONTEXT) * bw);
664 vpx_memcpy(&args.t_left, xd->plane[plane].left_context,
665 sizeof(ENTROPY_CONTEXT) * bh);
666 args.scan = vp9_default_scan_4x4;
667 args.nb = vp9_default_scan_4x4_neighbors;
668 break;
669 case TX_8X8:
670 for (i = 0; i < bw; i += 2)
671 args.t_above[i] = !!*(uint16_t *)&xd->plane[plane].above_context[i];
672 for (i = 0; i < bh; i += 2)
673 args.t_left[i] = !!*(uint16_t *)&xd->plane[plane].left_context[i];
674 args.scan = vp9_default_scan_8x8;
675 args.nb = vp9_default_scan_8x8_neighbors;
676 break;
677 case TX_16X16:
678 for (i = 0; i < bw; i += 4)
679 args.t_above[i] = !!*(uint32_t *)&xd->plane[plane].above_context[i];
680 for (i = 0; i < bh; i += 4)
681 args.t_left[i] = !!*(uint32_t *)&xd->plane[plane].left_context[i];
682 args.scan = vp9_default_scan_16x16;
683 args.nb = vp9_default_scan_16x16_neighbors;
684 break;
685 case TX_32X32:
686 for (i = 0; i < bw; i += 8)
687 args.t_above[i] = !!*(uint64_t *)&xd->plane[plane].above_context[i];
688 for (i = 0; i < bh; i += 8)
689 args.t_left[i] = !!*(uint64_t *)&xd->plane[plane].left_context[i];
690 args.scan = vp9_default_scan_32x32;
691 args.nb = vp9_default_scan_32x32_neighbors;
692 break;
693 default:
694 assert(0);
695 }
696
697 foreach_transformed_block_in_plane(xd, bsize, plane, rate_block, &args);
698 return args.rate;
699 }
700
701 static int rdcost_uv(VP9_COMMON *const cm, MACROBLOCK *x,
702 BLOCK_SIZE_TYPE bsize, TX_SIZE tx_size) {
703 int cost = 0, plane;
704
705 for (plane = 1; plane < MAX_MB_PLANE; plane++) {
706 cost += rdcost_plane(cm, x, plane, bsize, tx_size);
707 }
708 return cost;
709 }
710
711 static int block_error_sby(MACROBLOCK *x, BLOCK_SIZE_TYPE bsize,
712 int shift, int64_t *sse) {
713 struct macroblockd_plane *p = &x->e_mbd.plane[0];
714 const int bwl = plane_block_width_log2by4(bsize, p);
715 const int bhl = plane_block_height_log2by4(bsize, p);
716 int64_t e = vp9_block_error(x->plane[0].coeff, x->e_mbd.plane[0].dqcoeff,
717 16 << (bwl + bhl), sse) >> shift;
718 *sse >>= shift;
719 return e;
720 }
721
722 static int64_t block_error_sbuv(MACROBLOCK *x, BLOCK_SIZE_TYPE bsize,
723 int shift, int64_t *sse) {
724 int64_t sum = 0, this_sse;
725 int plane;
726
727 *sse = 0;
728 for (plane = 1; plane < MAX_MB_PLANE; plane++) {
729 struct macroblockd_plane *p = &x->e_mbd.plane[plane];
730 const int bwl = plane_block_width_log2by4(bsize, p);
731 const int bhl = plane_block_height_log2by4(bsize, p);
732 sum += vp9_block_error(x->plane[plane].coeff, x->e_mbd.plane[plane].dqcoeff,
733 16 << (bwl + bhl), &this_sse);
734 *sse += this_sse;
735 }
736 *sse >>= shift;
737 return sum >> shift;
738 }
739
740 static void block_yrd_txfm(int plane, int block, BLOCK_SIZE_TYPE bsize,
741 int ss_txfrm_size, void *arg) {
742 struct rdcost_block_args *args = arg; 594 struct rdcost_block_args *args = arg;
743 MACROBLOCK *const x = args->x; 595 MACROBLOCK *const x = args->x;
744 MACROBLOCKD *const xd = &x->e_mbd; 596 MACROBLOCKD *const xd = &x->e_mbd;
745 struct encode_b_args encode_args = {args->cm, x, NULL}; 597 struct encode_b_args encode_args = {x, NULL};
746 int64_t rd1, rd2, rd; 598 int64_t rd1, rd2, rd;
747 599
748 if (args->skip) 600 if (args->skip)
749 return; 601 return;
750 rd1 = RDCOST(x->rdmult, x->rddiv, args->rate, args->dist); 602 rd1 = RDCOST(x->rdmult, x->rddiv, args->rate, args->dist);
751 rd2 = RDCOST(x->rdmult, x->rddiv, 0, args->sse); 603 rd2 = RDCOST(x->rdmult, x->rddiv, 0, args->sse);
752 rd = MIN(rd1, rd2); 604 rd = MIN(rd1, rd2);
753 if (rd > args->best_rd) { 605 if (rd > args->best_rd) {
754 args->skip = 1; 606 args->skip = 1;
755 args->rate = INT_MAX; 607 args->rate = INT_MAX;
756 args->dist = INT64_MAX; 608 args->dist = INT64_MAX;
757 args->sse = INT64_MAX; 609 args->sse = INT64_MAX;
758 return; 610 return;
759 } 611 }
760 612
761 if (xd->mode_info_context->mbmi.ref_frame[0] == INTRA_FRAME) 613 if (!is_inter_block(&xd->mode_info_context->mbmi))
762 encode_block_intra(plane, block, bsize, ss_txfrm_size, &encode_args); 614 vp9_encode_block_intra(plane, block, plane_bsize, tx_size, &encode_args);
763 else 615 else
764 xform_quant(plane, block, bsize, ss_txfrm_size, &encode_args); 616 vp9_xform_quant(plane, block, plane_bsize, tx_size, &encode_args);
765 617
766 dist_block(plane, block, bsize, ss_txfrm_size, args); 618 dist_block(plane, block, tx_size, args);
767 rate_block(plane, block, bsize, ss_txfrm_size, args); 619 rate_block(plane, block, plane_bsize, tx_size, args);
768 } 620 }
769 621
770 static void super_block_yrd_for_txfm(VP9_COMMON *const cm, MACROBLOCK *x, 622 static void txfm_rd_in_plane(MACROBLOCK *x,
771 int *rate, int64_t *distortion, 623 int *rate, int64_t *distortion,
772 int *skippable, int64_t *sse, 624 int *skippable, int64_t *sse,
773 int64_t ref_best_rd, 625 int64_t ref_best_rd, int plane,
774 BLOCK_SIZE_TYPE bsize, TX_SIZE tx_size) { 626 BLOCK_SIZE bsize, TX_SIZE tx_size) {
775 MACROBLOCKD *const xd = &x->e_mbd; 627 MACROBLOCKD *const xd = &x->e_mbd;
776 struct macroblockd_plane *const pd = &xd->plane[0]; 628 struct macroblockd_plane *const pd = &xd->plane[plane];
777 const int bwl = plane_block_width_log2by4(bsize, pd); 629 const BLOCK_SIZE bs = get_plane_block_size(bsize, pd);
778 const int bhl = plane_block_height_log2by4(bsize, pd); 630 const int num_4x4_blocks_wide = num_4x4_blocks_wide_lookup[bs];
779 const int bw = 1 << bwl, bh = 1 << bhl; 631 const int num_4x4_blocks_high = num_4x4_blocks_high_lookup[bs];
780 int i; 632 int i;
781 struct rdcost_block_args args = { cm, x, { 0 }, { 0 }, tx_size, bw, bh, 633 struct rdcost_block_args args = { x, { 0 }, { 0 }, tx_size,
634 num_4x4_blocks_wide, num_4x4_blocks_high,
782 0, 0, 0, ref_best_rd, 0 }; 635 0, 0, 0, ref_best_rd, 0 };
783 xd->mode_info_context->mbmi.txfm_size = tx_size; 636 if (plane == 0)
637 xd->mode_info_context->mbmi.txfm_size = tx_size;
638
784 switch (tx_size) { 639 switch (tx_size) {
785 case TX_4X4: 640 case TX_4X4:
786 vpx_memcpy(&args.t_above, pd->above_context, 641 vpx_memcpy(&args.t_above, pd->above_context,
787 sizeof(ENTROPY_CONTEXT) * bw); 642 sizeof(ENTROPY_CONTEXT) * num_4x4_blocks_wide);
788 vpx_memcpy(&args.t_left, pd->left_context, 643 vpx_memcpy(&args.t_left, pd->left_context,
789 sizeof(ENTROPY_CONTEXT) * bh); 644 sizeof(ENTROPY_CONTEXT) * num_4x4_blocks_high);
790 get_scan_nb_4x4(get_tx_type_4x4(PLANE_TYPE_Y_WITH_DC, xd, 0), 645 get_scan_nb_4x4(get_tx_type_4x4(pd->plane_type, xd, 0),
791 &args.scan, &args.nb); 646 &args.scan, &args.nb);
792 break; 647 break;
793 case TX_8X8: 648 case TX_8X8:
794 for (i = 0; i < bw; i += 2) 649 for (i = 0; i < num_4x4_blocks_wide; i += 2)
795 args.t_above[i] = !!*(uint16_t *)&pd->above_context[i]; 650 args.t_above[i] = !!*(uint16_t *)&pd->above_context[i];
796 for (i = 0; i < bh; i += 2) 651 for (i = 0; i < num_4x4_blocks_high; i += 2)
797 args.t_left[i] = !!*(uint16_t *)&pd->left_context[i]; 652 args.t_left[i] = !!*(uint16_t *)&pd->left_context[i];
798 get_scan_nb_8x8(get_tx_type_8x8(PLANE_TYPE_Y_WITH_DC, xd), 653 get_scan_nb_8x8(get_tx_type_8x8(pd->plane_type, xd),
799 &args.scan, &args.nb); 654 &args.scan, &args.nb);
800 break; 655 break;
801 case TX_16X16: 656 case TX_16X16:
802 for (i = 0; i < bw; i += 4) 657 for (i = 0; i < num_4x4_blocks_wide; i += 4)
803 args.t_above[i] = !!*(uint32_t *)&pd->above_context[i]; 658 args.t_above[i] = !!*(uint32_t *)&pd->above_context[i];
804 for (i = 0; i < bh; i += 4) 659 for (i = 0; i < num_4x4_blocks_high; i += 4)
805 args.t_left[i] = !!*(uint32_t *)&pd->left_context[i]; 660 args.t_left[i] = !!*(uint32_t *)&pd->left_context[i];
806 get_scan_nb_16x16(get_tx_type_16x16(PLANE_TYPE_Y_WITH_DC, xd), 661 get_scan_nb_16x16(get_tx_type_16x16(pd->plane_type, xd),
807 &args.scan, &args.nb); 662 &args.scan, &args.nb);
808 break; 663 break;
809 case TX_32X32: 664 case TX_32X32:
810 for (i = 0; i < bw; i += 8) 665 for (i = 0; i < num_4x4_blocks_wide; i += 8)
811 args.t_above[i] = !!*(uint64_t *)&pd->above_context[i]; 666 args.t_above[i] = !!*(uint64_t *)&pd->above_context[i];
812 for (i = 0; i < bh; i += 8) 667 for (i = 0; i < num_4x4_blocks_high; i += 8)
813 args.t_left[i] = !!*(uint64_t *)&pd->left_context[i]; 668 args.t_left[i] = !!*(uint64_t *)&pd->left_context[i];
814 args.scan = vp9_default_scan_32x32; 669 args.scan = vp9_default_scan_32x32;
815 args.nb = vp9_default_scan_32x32_neighbors; 670 args.nb = vp9_default_scan_32x32_neighbors;
816 break; 671 break;
817 default: 672 default:
818 assert(0); 673 assert(0);
819 } 674 }
820 675
821 foreach_transformed_block_in_plane(xd, bsize, 0, block_yrd_txfm, &args); 676 foreach_transformed_block_in_plane(xd, bsize, plane, block_yrd_txfm, &args);
822 *distortion = args.dist; 677 *distortion = args.dist;
823 *rate = args.rate; 678 *rate = args.rate;
824 *sse = args.sse; 679 *sse = args.sse;
825 *skippable = vp9_sby_is_skippable(xd, bsize) && (!args.skip); 680 *skippable = vp9_is_skippable_in_plane(xd, bsize, plane) && (!args.skip);
826 } 681 }
827 682
828 static void choose_largest_txfm_size(VP9_COMP *cpi, MACROBLOCK *x, 683 static void choose_largest_txfm_size(VP9_COMP *cpi, MACROBLOCK *x,
829 int *rate, int64_t *distortion, 684 int *rate, int64_t *distortion,
830 int *skip, int64_t *sse, 685 int *skip, int64_t *sse,
831 int64_t ref_best_rd, 686 int64_t ref_best_rd,
832 BLOCK_SIZE_TYPE bs) { 687 BLOCK_SIZE bs) {
833 const TX_SIZE max_txfm_size = TX_32X32 688 const TX_SIZE max_txfm_size = max_txsize_lookup[bs];
834 - (bs < BLOCK_SIZE_SB32X32) - (bs < BLOCK_SIZE_MB16X16);
835 VP9_COMMON *const cm = &cpi->common; 689 VP9_COMMON *const cm = &cpi->common;
836 MACROBLOCKD *const xd = &x->e_mbd; 690 MACROBLOCKD *const xd = &x->e_mbd;
837 MB_MODE_INFO *const mbmi = &xd->mode_info_context->mbmi; 691 MB_MODE_INFO *const mbmi = &xd->mode_info_context->mbmi;
838 if (max_txfm_size == TX_32X32 && 692 if (max_txfm_size == TX_32X32 &&
839 (cm->tx_mode == ALLOW_32X32 || 693 (cm->tx_mode == ALLOW_32X32 ||
840 cm->tx_mode == TX_MODE_SELECT)) { 694 cm->tx_mode == TX_MODE_SELECT)) {
841 mbmi->txfm_size = TX_32X32; 695 mbmi->txfm_size = TX_32X32;
842 } else if (max_txfm_size >= TX_16X16 && 696 } else if (max_txfm_size >= TX_16X16 &&
843 (cm->tx_mode == ALLOW_16X16 || 697 (cm->tx_mode == ALLOW_16X16 ||
844 cm->tx_mode == ALLOW_32X32 || 698 cm->tx_mode == ALLOW_32X32 ||
845 cm->tx_mode == TX_MODE_SELECT)) { 699 cm->tx_mode == TX_MODE_SELECT)) {
846 mbmi->txfm_size = TX_16X16; 700 mbmi->txfm_size = TX_16X16;
847 } else if (cm->tx_mode != ONLY_4X4) { 701 } else if (cm->tx_mode != ONLY_4X4) {
848 mbmi->txfm_size = TX_8X8; 702 mbmi->txfm_size = TX_8X8;
849 } else { 703 } else {
850 mbmi->txfm_size = TX_4X4; 704 mbmi->txfm_size = TX_4X4;
851 } 705 }
852 super_block_yrd_for_txfm(cm, x, rate, distortion, skip, 706 txfm_rd_in_plane(x, rate, distortion, skip,
853 &sse[mbmi->txfm_size], ref_best_rd, bs, 707 &sse[mbmi->txfm_size], ref_best_rd, 0, bs,
854 mbmi->txfm_size); 708 mbmi->txfm_size);
855 cpi->txfm_stepdown_count[0]++; 709 cpi->txfm_stepdown_count[0]++;
856 } 710 }
857 711
858 static void choose_txfm_size_from_rd(VP9_COMP *cpi, MACROBLOCK *x, 712 static void choose_txfm_size_from_rd(VP9_COMP *cpi, MACROBLOCK *x,
859 int (*r)[2], int *rate, 713 int (*r)[2], int *rate,
860 int64_t *d, int64_t *distortion, 714 int64_t *d, int64_t *distortion,
861 int *s, int *skip, 715 int *s, int *skip,
862 int64_t txfm_cache[NB_TXFM_MODES], 716 int64_t tx_cache[TX_MODES],
863 BLOCK_SIZE_TYPE bs) { 717 BLOCK_SIZE bs) {
864 const TX_SIZE max_txfm_size = TX_32X32 718 const TX_SIZE max_tx_size = max_txsize_lookup[bs];
865 - (bs < BLOCK_SIZE_SB32X32) - (bs < BLOCK_SIZE_MB16X16);
866 VP9_COMMON *const cm = &cpi->common; 719 VP9_COMMON *const cm = &cpi->common;
867 MACROBLOCKD *const xd = &x->e_mbd; 720 MACROBLOCKD *const xd = &x->e_mbd;
868 MB_MODE_INFO *const mbmi = &xd->mode_info_context->mbmi; 721 MB_MODE_INFO *const mbmi = &xd->mode_info_context->mbmi;
869 vp9_prob skip_prob = vp9_get_pred_prob_mbskip(cm, xd); 722 vp9_prob skip_prob = vp9_get_pred_prob_mbskip(cm, xd);
870 int64_t rd[TX_SIZE_MAX_SB][2]; 723 int64_t rd[TX_SIZES][2];
871 int n, m; 724 int n, m;
872 int s0, s1; 725 int s0, s1;
873 726
874 const vp9_prob *tx_probs = get_tx_probs2(xd, &cm->fc.tx_probs); 727 const vp9_prob *tx_probs = get_tx_probs2(xd, &cm->fc.tx_probs);
875 728
876 for (n = TX_4X4; n <= max_txfm_size; n++) { 729 for (n = TX_4X4; n <= max_tx_size; n++) {
877 r[n][1] = r[n][0]; 730 r[n][1] = r[n][0];
878 if (r[n][0] == INT_MAX) 731 if (r[n][0] == INT_MAX)
879 continue; 732 continue;
880 for (m = 0; m <= n - (n == max_txfm_size); m++) { 733 for (m = 0; m <= n - (n == max_tx_size); m++) {
881 if (m == n) 734 if (m == n)
882 r[n][1] += vp9_cost_zero(tx_probs[m]); 735 r[n][1] += vp9_cost_zero(tx_probs[m]);
883 else 736 else
884 r[n][1] += vp9_cost_one(tx_probs[m]); 737 r[n][1] += vp9_cost_one(tx_probs[m]);
885 } 738 }
886 } 739 }
887 740
888 assert(skip_prob > 0); 741 assert(skip_prob > 0);
889 s0 = vp9_cost_bit(skip_prob, 0); 742 s0 = vp9_cost_bit(skip_prob, 0);
890 s1 = vp9_cost_bit(skip_prob, 1); 743 s1 = vp9_cost_bit(skip_prob, 1);
891 744
892 for (n = TX_4X4; n <= max_txfm_size; n++) { 745 for (n = TX_4X4; n <= max_tx_size; n++) {
893 if (d[n] == INT64_MAX) { 746 if (d[n] == INT64_MAX) {
894 rd[n][0] = rd[n][1] = INT64_MAX; 747 rd[n][0] = rd[n][1] = INT64_MAX;
895 continue; 748 continue;
896 } 749 }
897 if (s[n]) { 750 if (s[n]) {
898 rd[n][0] = rd[n][1] = RDCOST(x->rdmult, x->rddiv, s1, d[n]); 751 rd[n][0] = rd[n][1] = RDCOST(x->rdmult, x->rddiv, s1, d[n]);
899 } else { 752 } else {
900 rd[n][0] = RDCOST(x->rdmult, x->rddiv, r[n][0] + s0, d[n]); 753 rd[n][0] = RDCOST(x->rdmult, x->rddiv, r[n][0] + s0, d[n]);
901 rd[n][1] = RDCOST(x->rdmult, x->rddiv, r[n][1] + s0, d[n]); 754 rd[n][1] = RDCOST(x->rdmult, x->rddiv, r[n][1] + s0, d[n]);
902 } 755 }
903 } 756 }
904 757
905 if (max_txfm_size == TX_32X32 && 758 if (max_tx_size == TX_32X32 &&
906 (cm->tx_mode == ALLOW_32X32 || 759 (cm->tx_mode == ALLOW_32X32 ||
907 (cm->tx_mode == TX_MODE_SELECT && 760 (cm->tx_mode == TX_MODE_SELECT &&
908 rd[TX_32X32][1] < rd[TX_16X16][1] && rd[TX_32X32][1] < rd[TX_8X8][1] && 761 rd[TX_32X32][1] < rd[TX_16X16][1] && rd[TX_32X32][1] < rd[TX_8X8][1] &&
909 rd[TX_32X32][1] < rd[TX_4X4][1]))) { 762 rd[TX_32X32][1] < rd[TX_4X4][1]))) {
910 mbmi->txfm_size = TX_32X32; 763 mbmi->txfm_size = TX_32X32;
911 } else if (max_txfm_size >= TX_16X16 && 764 } else if (max_tx_size >= TX_16X16 &&
912 (cm->tx_mode == ALLOW_16X16 || 765 (cm->tx_mode == ALLOW_16X16 ||
913 cm->tx_mode == ALLOW_32X32 || 766 cm->tx_mode == ALLOW_32X32 ||
914 (cm->tx_mode == TX_MODE_SELECT && 767 (cm->tx_mode == TX_MODE_SELECT &&
915 rd[TX_16X16][1] < rd[TX_8X8][1] && 768 rd[TX_16X16][1] < rd[TX_8X8][1] &&
916 rd[TX_16X16][1] < rd[TX_4X4][1]))) { 769 rd[TX_16X16][1] < rd[TX_4X4][1]))) {
917 mbmi->txfm_size = TX_16X16; 770 mbmi->txfm_size = TX_16X16;
918 } else if (cm->tx_mode == ALLOW_8X8 || 771 } else if (cm->tx_mode == ALLOW_8X8 ||
919 cm->tx_mode == ALLOW_16X16 || 772 cm->tx_mode == ALLOW_16X16 ||
920 cm->tx_mode == ALLOW_32X32 || 773 cm->tx_mode == ALLOW_32X32 ||
921 (cm->tx_mode == TX_MODE_SELECT && rd[TX_8X8][1] < rd[TX_4X4][1])) { 774 (cm->tx_mode == TX_MODE_SELECT && rd[TX_8X8][1] < rd[TX_4X4][1])) {
922 mbmi->txfm_size = TX_8X8; 775 mbmi->txfm_size = TX_8X8;
923 } else { 776 } else {
924 mbmi->txfm_size = TX_4X4; 777 mbmi->txfm_size = TX_4X4;
925 } 778 }
926 779
927 *distortion = d[mbmi->txfm_size]; 780 *distortion = d[mbmi->txfm_size];
928 *rate = r[mbmi->txfm_size][cm->tx_mode == TX_MODE_SELECT]; 781 *rate = r[mbmi->txfm_size][cm->tx_mode == TX_MODE_SELECT];
929 *skip = s[mbmi->txfm_size]; 782 *skip = s[mbmi->txfm_size];
930 783
931 txfm_cache[ONLY_4X4] = rd[TX_4X4][0]; 784 tx_cache[ONLY_4X4] = rd[TX_4X4][0];
932 txfm_cache[ALLOW_8X8] = rd[TX_8X8][0]; 785 tx_cache[ALLOW_8X8] = rd[TX_8X8][0];
933 txfm_cache[ALLOW_16X16] = rd[MIN(max_txfm_size, TX_16X16)][0]; 786 tx_cache[ALLOW_16X16] = rd[MIN(max_tx_size, TX_16X16)][0];
934 txfm_cache[ALLOW_32X32] = rd[MIN(max_txfm_size, TX_32X32)][0]; 787 tx_cache[ALLOW_32X32] = rd[MIN(max_tx_size, TX_32X32)][0];
935 if (max_txfm_size == TX_32X32 && 788 if (max_tx_size == TX_32X32 &&
936 rd[TX_32X32][1] < rd[TX_16X16][1] && rd[TX_32X32][1] < rd[TX_8X8][1] && 789 rd[TX_32X32][1] < rd[TX_16X16][1] && rd[TX_32X32][1] < rd[TX_8X8][1] &&
937 rd[TX_32X32][1] < rd[TX_4X4][1]) 790 rd[TX_32X32][1] < rd[TX_4X4][1])
938 txfm_cache[TX_MODE_SELECT] = rd[TX_32X32][1]; 791 tx_cache[TX_MODE_SELECT] = rd[TX_32X32][1];
939 else if (max_txfm_size >= TX_16X16 && 792 else if (max_tx_size >= TX_16X16 &&
940 rd[TX_16X16][1] < rd[TX_8X8][1] && rd[TX_16X16][1] < rd[TX_4X4][1]) 793 rd[TX_16X16][1] < rd[TX_8X8][1] && rd[TX_16X16][1] < rd[TX_4X4][1])
941 txfm_cache[TX_MODE_SELECT] = rd[TX_16X16][1]; 794 tx_cache[TX_MODE_SELECT] = rd[TX_16X16][1];
942 else 795 else
943 txfm_cache[TX_MODE_SELECT] = rd[TX_4X4][1] < rd[TX_8X8][1] ? 796 tx_cache[TX_MODE_SELECT] = rd[TX_4X4][1] < rd[TX_8X8][1] ?
944 rd[TX_4X4][1] : rd[TX_8X8][1]; 797 rd[TX_4X4][1] : rd[TX_8X8][1];
945 798
946 if (max_txfm_size == TX_32X32 && 799 if (max_tx_size == TX_32X32 &&
947 rd[TX_32X32][1] < rd[TX_16X16][1] && 800 rd[TX_32X32][1] < rd[TX_16X16][1] &&
948 rd[TX_32X32][1] < rd[TX_8X8][1] && 801 rd[TX_32X32][1] < rd[TX_8X8][1] &&
949 rd[TX_32X32][1] < rd[TX_4X4][1]) { 802 rd[TX_32X32][1] < rd[TX_4X4][1]) {
950 cpi->txfm_stepdown_count[0]++; 803 cpi->txfm_stepdown_count[0]++;
951 } else if (max_txfm_size >= TX_16X16 && 804 } else if (max_tx_size >= TX_16X16 &&
952 rd[TX_16X16][1] < rd[TX_8X8][1] && 805 rd[TX_16X16][1] < rd[TX_8X8][1] &&
953 rd[TX_16X16][1] < rd[TX_4X4][1]) { 806 rd[TX_16X16][1] < rd[TX_4X4][1]) {
954 cpi->txfm_stepdown_count[max_txfm_size - TX_16X16]++; 807 cpi->txfm_stepdown_count[max_tx_size - TX_16X16]++;
955 } else if (rd[TX_8X8][1] < rd[TX_4X4][1]) { 808 } else if (rd[TX_8X8][1] < rd[TX_4X4][1]) {
956 cpi->txfm_stepdown_count[max_txfm_size - TX_8X8]++; 809 cpi->txfm_stepdown_count[max_tx_size - TX_8X8]++;
957 } else { 810 } else {
958 cpi->txfm_stepdown_count[max_txfm_size - TX_4X4]++; 811 cpi->txfm_stepdown_count[max_tx_size - TX_4X4]++;
959 } 812 }
960 } 813 }
961 814
962 static void choose_txfm_size_from_modelrd(VP9_COMP *cpi, MACROBLOCK *x, 815 static void choose_txfm_size_from_modelrd(VP9_COMP *cpi, MACROBLOCK *x,
963 int (*r)[2], int *rate, 816 int (*r)[2], int *rate,
964 int64_t *d, int64_t *distortion, 817 int64_t *d, int64_t *distortion,
965 int *s, int *skip, int64_t *sse, 818 int *s, int *skip, int64_t *sse,
966 int64_t ref_best_rd, 819 int64_t ref_best_rd,
967 BLOCK_SIZE_TYPE bs, 820 BLOCK_SIZE bs) {
968 int *model_used) { 821 const TX_SIZE max_txfm_size = max_txsize_lookup[bs];
969 const TX_SIZE max_txfm_size = TX_32X32
970 - (bs < BLOCK_SIZE_SB32X32) - (bs < BLOCK_SIZE_MB16X16);
971 VP9_COMMON *const cm = &cpi->common; 822 VP9_COMMON *const cm = &cpi->common;
972 MACROBLOCKD *const xd = &x->e_mbd; 823 MACROBLOCKD *const xd = &x->e_mbd;
973 MB_MODE_INFO *const mbmi = &xd->mode_info_context->mbmi; 824 MB_MODE_INFO *const mbmi = &xd->mode_info_context->mbmi;
974 vp9_prob skip_prob = vp9_get_pred_prob_mbskip(cm, xd); 825 vp9_prob skip_prob = vp9_get_pred_prob_mbskip(cm, xd);
975 int64_t rd[TX_SIZE_MAX_SB][2]; 826 int64_t rd[TX_SIZES][2];
976 int n, m; 827 int n, m;
977 int s0, s1; 828 int s0, s1;
978 double scale_rd[TX_SIZE_MAX_SB] = {1.73, 1.44, 1.20, 1.00}; 829 double scale_rd[TX_SIZES] = {1.73, 1.44, 1.20, 1.00};
979 // double scale_r[TX_SIZE_MAX_SB] = {2.82, 2.00, 1.41, 1.00}; 830 // double scale_r[TX_SIZES] = {2.82, 2.00, 1.41, 1.00};
980 831
981 const vp9_prob *tx_probs = get_tx_probs2(xd, &cm->fc.tx_probs); 832 const vp9_prob *tx_probs = get_tx_probs2(xd, &cm->fc.tx_probs);
982 833
983 // for (n = TX_4X4; n <= max_txfm_size; n++) 834 // for (n = TX_4X4; n <= max_txfm_size; n++)
984 // r[n][0] = (r[n][0] * scale_r[n]); 835 // r[n][0] = (r[n][0] * scale_r[n]);
985 836
986 for (n = TX_4X4; n <= max_txfm_size; n++) { 837 for (n = TX_4X4; n <= max_txfm_size; n++) {
987 r[n][1] = r[n][0]; 838 r[n][1] = r[n][0];
988 for (m = 0; m <= n - (n == max_txfm_size); m++) { 839 for (m = 0; m <= n - (n == max_txfm_size); m++) {
989 if (m == n) 840 if (m == n)
(...skipping 37 matching lines...) Expand 10 before | Expand all | Expand 10 after
1027 } else if (cm->tx_mode == ALLOW_8X8 || 878 } else if (cm->tx_mode == ALLOW_8X8 ||
1028 cm->tx_mode == ALLOW_16X16 || 879 cm->tx_mode == ALLOW_16X16 ||
1029 cm->tx_mode == ALLOW_32X32 || 880 cm->tx_mode == ALLOW_32X32 ||
1030 (cm->tx_mode == TX_MODE_SELECT && 881 (cm->tx_mode == TX_MODE_SELECT &&
1031 rd[TX_8X8][1] <= rd[TX_4X4][1])) { 882 rd[TX_8X8][1] <= rd[TX_4X4][1])) {
1032 mbmi->txfm_size = TX_8X8; 883 mbmi->txfm_size = TX_8X8;
1033 } else { 884 } else {
1034 mbmi->txfm_size = TX_4X4; 885 mbmi->txfm_size = TX_4X4;
1035 } 886 }
1036 887
1037 if (model_used[mbmi->txfm_size]) { 888 // Actually encode using the chosen mode if a model was used, but do not
1038 // Actually encode using the chosen mode if a model was used, but do not 889 // update the r, d costs
1039 // update the r, d costs 890 txfm_rd_in_plane(x, rate, distortion, skip, &sse[mbmi->txfm_size],
1040 super_block_yrd_for_txfm(cm, x, rate, distortion, skip, 891 ref_best_rd, 0, bs, mbmi->txfm_size);
1041 &sse[mbmi->txfm_size], ref_best_rd,
1042 bs, mbmi->txfm_size);
1043 } else {
1044 *distortion = d[mbmi->txfm_size];
1045 *rate = r[mbmi->txfm_size][cm->tx_mode == TX_MODE_SELECT];
1046 *skip = s[mbmi->txfm_size];
1047 }
1048 892
1049 if (max_txfm_size == TX_32X32 && 893 if (max_txfm_size == TX_32X32 &&
1050 rd[TX_32X32][1] <= rd[TX_16X16][1] && 894 rd[TX_32X32][1] <= rd[TX_16X16][1] &&
1051 rd[TX_32X32][1] <= rd[TX_8X8][1] && 895 rd[TX_32X32][1] <= rd[TX_8X8][1] &&
1052 rd[TX_32X32][1] <= rd[TX_4X4][1]) { 896 rd[TX_32X32][1] <= rd[TX_4X4][1]) {
1053 cpi->txfm_stepdown_count[0]++; 897 cpi->txfm_stepdown_count[0]++;
1054 } else if (max_txfm_size >= TX_16X16 && 898 } else if (max_txfm_size >= TX_16X16 &&
1055 rd[TX_16X16][1] <= rd[TX_8X8][1] && 899 rd[TX_16X16][1] <= rd[TX_8X8][1] &&
1056 rd[TX_16X16][1] <= rd[TX_4X4][1]) { 900 rd[TX_16X16][1] <= rd[TX_4X4][1]) {
1057 cpi->txfm_stepdown_count[max_txfm_size - TX_16X16]++; 901 cpi->txfm_stepdown_count[max_txfm_size - TX_16X16]++;
1058 } else if (rd[TX_8X8][1] <= rd[TX_4X4][1]) { 902 } else if (rd[TX_8X8][1] <= rd[TX_4X4][1]) {
1059 cpi->txfm_stepdown_count[max_txfm_size - TX_8X8]++; 903 cpi->txfm_stepdown_count[max_txfm_size - TX_8X8]++;
1060 } else { 904 } else {
1061 cpi->txfm_stepdown_count[max_txfm_size - TX_4X4]++; 905 cpi->txfm_stepdown_count[max_txfm_size - TX_4X4]++;
1062 } 906 }
1063 } 907 }
1064 908
1065 static void super_block_yrd(VP9_COMP *cpi, 909 static void super_block_yrd(VP9_COMP *cpi,
1066 MACROBLOCK *x, int *rate, int64_t *distortion, 910 MACROBLOCK *x, int *rate, int64_t *distortion,
1067 int *skip, int64_t *psse, BLOCK_SIZE_TYPE bs, 911 int *skip, int64_t *psse, BLOCK_SIZE bs,
1068 int64_t txfm_cache[NB_TXFM_MODES], 912 int64_t txfm_cache[TX_MODES],
1069 int64_t ref_best_rd) { 913 int64_t ref_best_rd) {
1070 VP9_COMMON *const cm = &cpi->common; 914 int r[TX_SIZES][2], s[TX_SIZES];
1071 int r[TX_SIZE_MAX_SB][2], s[TX_SIZE_MAX_SB]; 915 int64_t d[TX_SIZES], sse[TX_SIZES];
1072 int64_t d[TX_SIZE_MAX_SB], sse[TX_SIZE_MAX_SB];
1073 MACROBLOCKD *xd = &x->e_mbd; 916 MACROBLOCKD *xd = &x->e_mbd;
1074 MB_MODE_INFO *const mbmi = &xd->mode_info_context->mbmi; 917 MB_MODE_INFO *const mbmi = &xd->mode_info_context->mbmi;
1075 918
1076 assert(bs == mbmi->sb_type); 919 assert(bs == mbmi->sb_type);
1077 if (mbmi->ref_frame[0] > INTRA_FRAME) 920 if (mbmi->ref_frame[0] > INTRA_FRAME)
1078 vp9_subtract_sby(x, bs); 921 vp9_subtract_sby(x, bs);
1079 922
1080 if (cpi->sf.tx_size_search_method == USE_LARGESTALL || 923 if (cpi->sf.tx_size_search_method == USE_LARGESTALL ||
1081 (cpi->sf.tx_size_search_method != USE_FULL_RD && 924 (cpi->sf.tx_size_search_method != USE_FULL_RD &&
1082 mbmi->ref_frame[0] == INTRA_FRAME)) { 925 mbmi->ref_frame[0] == INTRA_FRAME)) {
1083 vpx_memset(txfm_cache, 0, NB_TXFM_MODES * sizeof(int64_t)); 926 vpx_memset(txfm_cache, 0, TX_MODES * sizeof(int64_t));
1084 choose_largest_txfm_size(cpi, x, rate, distortion, skip, sse, 927 choose_largest_txfm_size(cpi, x, rate, distortion, skip, sse,
1085 ref_best_rd, bs); 928 ref_best_rd, bs);
1086 if (psse) 929 if (psse)
1087 *psse = sse[mbmi->txfm_size]; 930 *psse = sse[mbmi->txfm_size];
1088 return; 931 return;
1089 } 932 }
1090 933
1091 if (cpi->sf.tx_size_search_method == USE_LARGESTINTRA_MODELINTER && 934 if (cpi->sf.tx_size_search_method == USE_LARGESTINTRA_MODELINTER &&
1092 mbmi->ref_frame[0] > INTRA_FRAME) { 935 mbmi->ref_frame[0] > INTRA_FRAME) {
1093 int model_used[TX_SIZE_MAX_SB] = {1, 1, 1, 1}; 936 if (bs >= BLOCK_32X32)
1094 if (bs >= BLOCK_SIZE_SB32X32) { 937 model_rd_for_sb_y_tx(cpi, bs, TX_32X32, x, xd,
1095 if (model_used[TX_32X32]) { 938 &r[TX_32X32][0], &d[TX_32X32], &s[TX_32X32]);
1096 model_rd_for_sb_y_tx(cpi, bs, TX_32X32, x, xd, 939 if (bs >= BLOCK_16X16)
1097 &r[TX_32X32][0], &d[TX_32X32], &s[TX_32X32]); 940 model_rd_for_sb_y_tx(cpi, bs, TX_16X16, x, xd,
1098 } else { 941 &r[TX_16X16][0], &d[TX_16X16], &s[TX_16X16]);
1099 super_block_yrd_for_txfm(cm, x, &r[TX_32X32][0], &d[TX_32X32], 942
1100 &s[TX_32X32], &sse[TX_32X32], INT64_MAX, 943 model_rd_for_sb_y_tx(cpi, bs, TX_8X8, x, xd,
1101 bs, TX_32X32); 944 &r[TX_8X8][0], &d[TX_8X8], &s[TX_8X8]);
1102 } 945
1103 } 946 model_rd_for_sb_y_tx(cpi, bs, TX_4X4, x, xd,
1104 if (bs >= BLOCK_SIZE_MB16X16) { 947 &r[TX_4X4][0], &d[TX_4X4], &s[TX_4X4]);
1105 if (model_used[TX_16X16]) { 948
1106 model_rd_for_sb_y_tx(cpi, bs, TX_16X16, x, xd,
1107 &r[TX_16X16][0], &d[TX_16X16], &s[TX_16X16]);
1108 } else {
1109 super_block_yrd_for_txfm(cm, x, &r[TX_16X16][0], &d[TX_16X16],
1110 &s[TX_16X16], &sse[TX_16X16], INT64_MAX,
1111 bs, TX_16X16);
1112 }
1113 }
1114 if (model_used[TX_8X8]) {
1115 model_rd_for_sb_y_tx(cpi, bs, TX_8X8, x, xd,
1116 &r[TX_8X8][0], &d[TX_8X8], &s[TX_8X8]);
1117 } else {
1118 super_block_yrd_for_txfm(cm, x, &r[TX_8X8][0], &d[TX_8X8], &s[TX_8X8],
1119 &sse[TX_8X8], INT64_MAX, bs, TX_8X8);
1120 }
1121 if (model_used[TX_4X4]) {
1122 model_rd_for_sb_y_tx(cpi, bs, TX_4X4, x, xd,
1123 &r[TX_4X4][0], &d[TX_4X4], &s[TX_4X4]);
1124 } else {
1125 super_block_yrd_for_txfm(cm, x, &r[TX_4X4][0], &d[TX_4X4], &s[TX_4X4],
1126 &sse[TX_4X4], INT64_MAX, bs, TX_4X4);
1127 }
1128 choose_txfm_size_from_modelrd(cpi, x, r, rate, d, distortion, s, 949 choose_txfm_size_from_modelrd(cpi, x, r, rate, d, distortion, s,
1129 skip, sse, ref_best_rd, bs, model_used); 950 skip, sse, ref_best_rd, bs);
1130 } else { 951 } else {
1131 if (bs >= BLOCK_SIZE_SB32X32) 952 if (bs >= BLOCK_32X32)
1132 super_block_yrd_for_txfm(cm, x, &r[TX_32X32][0], &d[TX_32X32], 953 txfm_rd_in_plane(x, &r[TX_32X32][0], &d[TX_32X32], &s[TX_32X32],
1133 &s[TX_32X32], &sse[TX_32X32], ref_best_rd, 954 &sse[TX_32X32], ref_best_rd, 0, bs, TX_32X32);
1134 bs, TX_32X32); 955 if (bs >= BLOCK_16X16)
1135 if (bs >= BLOCK_SIZE_MB16X16) 956 txfm_rd_in_plane(x, &r[TX_16X16][0], &d[TX_16X16], &s[TX_16X16],
1136 super_block_yrd_for_txfm(cm, x, &r[TX_16X16][0], &d[TX_16X16], 957 &sse[TX_16X16], ref_best_rd, 0, bs, TX_16X16);
1137 &s[TX_16X16], &sse[TX_16X16], ref_best_rd, 958 txfm_rd_in_plane(x, &r[TX_8X8][0], &d[TX_8X8], &s[TX_8X8],
1138 bs, TX_16X16); 959 &sse[TX_8X8], ref_best_rd, 0, bs, TX_8X8);
1139 super_block_yrd_for_txfm(cm, x, &r[TX_8X8][0], &d[TX_8X8], &s[TX_8X8], 960 txfm_rd_in_plane(x, &r[TX_4X4][0], &d[TX_4X4], &s[TX_4X4],
1140 &sse[TX_8X8], ref_best_rd, bs, TX_8X8); 961 &sse[TX_4X4], ref_best_rd, 0, bs, TX_4X4);
1141 super_block_yrd_for_txfm(cm, x, &r[TX_4X4][0], &d[TX_4X4], &s[TX_4X4],
1142 &sse[TX_4X4], ref_best_rd, bs, TX_4X4);
1143 choose_txfm_size_from_rd(cpi, x, r, rate, d, distortion, s, 962 choose_txfm_size_from_rd(cpi, x, r, rate, d, distortion, s,
1144 skip, txfm_cache, bs); 963 skip, txfm_cache, bs);
1145 } 964 }
1146 if (psse) 965 if (psse)
1147 *psse = sse[mbmi->txfm_size]; 966 *psse = sse[mbmi->txfm_size];
1148 } 967 }
1149 968
1150 static int conditional_skipintra(MB_PREDICTION_MODE mode, 969 static int conditional_skipintra(MB_PREDICTION_MODE mode,
1151 MB_PREDICTION_MODE best_intra_mode) { 970 MB_PREDICTION_MODE best_intra_mode) {
1152 if (mode == D117_PRED && 971 if (mode == D117_PRED &&
1153 best_intra_mode != V_PRED && 972 best_intra_mode != V_PRED &&
1154 best_intra_mode != D135_PRED) 973 best_intra_mode != D135_PRED)
1155 return 1; 974 return 1;
1156 if (mode == D63_PRED && 975 if (mode == D63_PRED &&
1157 best_intra_mode != V_PRED && 976 best_intra_mode != V_PRED &&
1158 best_intra_mode != D45_PRED) 977 best_intra_mode != D45_PRED)
1159 return 1; 978 return 1;
1160 if (mode == D27_PRED && 979 if (mode == D207_PRED &&
1161 best_intra_mode != H_PRED && 980 best_intra_mode != H_PRED &&
1162 best_intra_mode != D45_PRED) 981 best_intra_mode != D45_PRED)
1163 return 1; 982 return 1;
1164 if (mode == D153_PRED && 983 if (mode == D153_PRED &&
1165 best_intra_mode != H_PRED && 984 best_intra_mode != H_PRED &&
1166 best_intra_mode != D135_PRED) 985 best_intra_mode != D135_PRED)
1167 return 1; 986 return 1;
1168 return 0; 987 return 0;
1169 } 988 }
1170 989
1171 static int64_t rd_pick_intra4x4block(VP9_COMP *cpi, MACROBLOCK *x, int ib, 990 static int64_t rd_pick_intra4x4block(VP9_COMP *cpi, MACROBLOCK *x, int ib,
1172 MB_PREDICTION_MODE *best_mode, 991 MB_PREDICTION_MODE *best_mode,
1173 int *bmode_costs, 992 int *bmode_costs,
1174 ENTROPY_CONTEXT *a, ENTROPY_CONTEXT *l, 993 ENTROPY_CONTEXT *a, ENTROPY_CONTEXT *l,
1175 int *bestrate, int *bestratey, 994 int *bestrate, int *bestratey,
1176 int64_t *bestdistortion, 995 int64_t *bestdistortion,
1177 BLOCK_SIZE_TYPE bsize) { 996 BLOCK_SIZE bsize, int64_t rd_thresh) {
1178 MB_PREDICTION_MODE mode; 997 MB_PREDICTION_MODE mode;
1179 MACROBLOCKD *xd = &x->e_mbd; 998 MACROBLOCKD *xd = &x->e_mbd;
1180 int64_t best_rd = INT64_MAX; 999 int64_t best_rd = rd_thresh;
1181 int rate = 0; 1000 int rate = 0;
1182 int64_t distortion; 1001 int64_t distortion;
1183 VP9_COMMON *const cm = &cpi->common;
1184 struct macroblock_plane *p = &x->plane[0]; 1002 struct macroblock_plane *p = &x->plane[0];
1185 struct macroblockd_plane *pd = &xd->plane[0]; 1003 struct macroblockd_plane *pd = &xd->plane[0];
1186 const int src_stride = p->src.stride; 1004 const int src_stride = p->src.stride;
1187 const int dst_stride = pd->dst.stride; 1005 const int dst_stride = pd->dst.stride;
1188 uint8_t *src, *dst; 1006 uint8_t *src_init = raster_block_offset_uint8(BLOCK_8X8, ib,
1007 p->src.buf, src_stride);
1008 uint8_t *dst_init = raster_block_offset_uint8(BLOCK_8X8, ib,
1009 pd->dst.buf, dst_stride);
1189 int16_t *src_diff, *coeff; 1010 int16_t *src_diff, *coeff;
1190 1011
1191 ENTROPY_CONTEXT ta[2], tempa[2]; 1012 ENTROPY_CONTEXT ta[2], tempa[2];
1192 ENTROPY_CONTEXT tl[2], templ[2]; 1013 ENTROPY_CONTEXT tl[2], templ[2];
1193 TX_TYPE tx_type = DCT_DCT; 1014 TX_TYPE tx_type = DCT_DCT;
1194 TX_TYPE best_tx_type = DCT_DCT; 1015 const int num_4x4_blocks_wide = num_4x4_blocks_wide_lookup[bsize];
1195 int num_4x4_blocks_wide = num_4x4_blocks_wide_lookup[bsize]; 1016 const int num_4x4_blocks_high = num_4x4_blocks_high_lookup[bsize];
1196 int num_4x4_blocks_high = num_4x4_blocks_high_lookup[bsize];
1197 int idx, idy, block; 1017 int idx, idy, block;
1198 DECLARE_ALIGNED(16, int16_t, best_dqcoeff[4][16]); 1018 uint8_t best_dst[8 * 8];
1199 1019
1200 assert(ib < 4); 1020 assert(ib < 4);
1201 1021
1202 vpx_memcpy(ta, a, sizeof(ta)); 1022 vpx_memcpy(ta, a, sizeof(ta));
1203 vpx_memcpy(tl, l, sizeof(tl)); 1023 vpx_memcpy(tl, l, sizeof(tl));
1204 xd->mode_info_context->mbmi.txfm_size = TX_4X4; 1024 xd->mode_info_context->mbmi.txfm_size = TX_4X4;
1205 1025
1206 for (mode = DC_PRED; mode <= TM_PRED; ++mode) { 1026 for (mode = DC_PRED; mode <= TM_PRED; ++mode) {
1207 int64_t this_rd; 1027 int64_t this_rd;
1208 int ratey = 0; 1028 int ratey = 0;
1029
1030 if (!(cpi->sf.intra_y_mode_mask & (1 << mode)))
1031 continue;
1032
1209 // Only do the oblique modes if the best so far is 1033 // Only do the oblique modes if the best so far is
1210 // one of the neighboring directional modes 1034 // one of the neighboring directional modes
1211 if (cpi->sf.mode_search_skip_flags & FLAG_SKIP_INTRA_DIRMISMATCH) { 1035 if (cpi->sf.mode_search_skip_flags & FLAG_SKIP_INTRA_DIRMISMATCH) {
1212 if (conditional_skipintra(mode, *best_mode)) 1036 if (conditional_skipintra(mode, *best_mode))
1213 continue; 1037 continue;
1214 } 1038 }
1215 1039
1216 rate = bmode_costs[mode]; 1040 rate = bmode_costs[mode];
1217 distortion = 0; 1041 distortion = 0;
1218 1042
1219 vpx_memcpy(tempa, ta, sizeof(ta)); 1043 vpx_memcpy(tempa, ta, sizeof(ta));
1220 vpx_memcpy(templ, tl, sizeof(tl)); 1044 vpx_memcpy(templ, tl, sizeof(tl));
1221 1045
1222 for (idy = 0; idy < num_4x4_blocks_high; ++idy) { 1046 for (idy = 0; idy < num_4x4_blocks_high; ++idy) {
1223 for (idx = 0; idx < num_4x4_blocks_wide; ++idx) { 1047 for (idx = 0; idx < num_4x4_blocks_wide; ++idx) {
1224 int64_t ssz; 1048 int64_t ssz;
1225 const int16_t *scan; 1049 const int16_t *scan;
1050 uint8_t *src = src_init + idx * 4 + idy * 4 * src_stride;
1051 uint8_t *dst = dst_init + idx * 4 + idy * 4 * dst_stride;
1226 1052
1227 block = ib + idy * 2 + idx; 1053 block = ib + idy * 2 + idx;
1228 xd->mode_info_context->bmi[block].as_mode = mode; 1054 xd->mode_info_context->bmi[block].as_mode = mode;
1229 src = raster_block_offset_uint8(xd, BLOCK_SIZE_SB8X8, 0, block, 1055 src_diff = raster_block_offset_int16(BLOCK_8X8, block, p->src_diff);
1230 p->src.buf, src_stride); 1056 coeff = BLOCK_OFFSET(x->plane[0].coeff, block);
1231 src_diff = raster_block_offset_int16(xd, BLOCK_SIZE_SB8X8, 0, block, 1057 vp9_predict_intra_block(xd, block, 1,
1232 p->src_diff);
1233 coeff = BLOCK_OFFSET(x->plane[0].coeff, block, 16);
1234 dst = raster_block_offset_uint8(xd, BLOCK_SIZE_SB8X8, 0, block,
1235 pd->dst.buf, dst_stride);
1236 vp9_predict_intra_block(xd, block, b_width_log2(BLOCK_SIZE_SB8X8),
1237 TX_4X4, mode, 1058 TX_4X4, mode,
1238 x->skip_encode ? src : dst, 1059 x->skip_encode ? src : dst,
1239 x->skip_encode ? src_stride : dst_stride, 1060 x->skip_encode ? src_stride : dst_stride,
1240 dst, dst_stride); 1061 dst, dst_stride);
1241 vp9_subtract_block(4, 4, src_diff, 8, 1062 vp9_subtract_block(4, 4, src_diff, 8,
1242 src, src_stride, 1063 src, src_stride,
1243 dst, dst_stride); 1064 dst, dst_stride);
1244 1065
1245 tx_type = get_tx_type_4x4(PLANE_TYPE_Y_WITH_DC, xd, block); 1066 tx_type = get_tx_type_4x4(PLANE_TYPE_Y_WITH_DC, xd, block);
1246 if (tx_type != DCT_DCT) { 1067 if (tx_type != DCT_DCT) {
1247 vp9_short_fht4x4(src_diff, coeff, 8, tx_type); 1068 vp9_short_fht4x4(src_diff, coeff, 8, tx_type);
1248 x->quantize_b_4x4(x, block, tx_type, 16); 1069 x->quantize_b_4x4(x, block, tx_type, 16);
1249 } else { 1070 } else {
1250 x->fwd_txm4x4(src_diff, coeff, 16); 1071 x->fwd_txm4x4(src_diff, coeff, 16);
1251 x->quantize_b_4x4(x, block, tx_type, 16); 1072 x->quantize_b_4x4(x, block, tx_type, 16);
1252 } 1073 }
1253 1074
1254 scan = get_scan_4x4(get_tx_type_4x4(PLANE_TYPE_Y_WITH_DC, xd, block)); 1075 scan = get_scan_4x4(get_tx_type_4x4(PLANE_TYPE_Y_WITH_DC, xd, block));
1255 ratey += cost_coeffs(cm, x, 0, block, PLANE_TYPE_Y_WITH_DC, 1076 ratey += cost_coeffs(x, 0, block,
1256 tempa + idx, templ + idy, TX_4X4, scan, 1077 tempa + idx, templ + idy, TX_4X4, scan,
1257 vp9_get_coef_neighbors_handle(scan)); 1078 vp9_get_coef_neighbors_handle(scan));
1258 distortion += vp9_block_error(coeff, BLOCK_OFFSET(pd->dqcoeff, 1079 distortion += vp9_block_error(coeff, BLOCK_OFFSET(pd->dqcoeff, block),
1259 block, 16),
1260 16, &ssz) >> 2; 1080 16, &ssz) >> 2;
1081 if (RDCOST(x->rdmult, x->rddiv, ratey, distortion) >= best_rd)
1082 goto next;
1261 1083
1262 if (tx_type != DCT_DCT) 1084 if (tx_type != DCT_DCT)
1263 vp9_short_iht4x4_add(BLOCK_OFFSET(pd->dqcoeff, block, 16), 1085 vp9_short_iht4x4_add(BLOCK_OFFSET(pd->dqcoeff, block),
1264 dst, pd->dst.stride, tx_type); 1086 dst, pd->dst.stride, tx_type);
1265 else 1087 else
1266 xd->inv_txm4x4_add(BLOCK_OFFSET(pd->dqcoeff, block, 16), 1088 xd->inv_txm4x4_add(BLOCK_OFFSET(pd->dqcoeff, block),
1267 dst, pd->dst.stride); 1089 dst, pd->dst.stride);
1268 } 1090 }
1269 } 1091 }
1270 1092
1271 rate += ratey; 1093 rate += ratey;
1272 this_rd = RDCOST(x->rdmult, x->rddiv, rate, distortion); 1094 this_rd = RDCOST(x->rdmult, x->rddiv, rate, distortion);
1273 1095
1274 if (this_rd < best_rd) { 1096 if (this_rd < best_rd) {
1275 *bestrate = rate; 1097 *bestrate = rate;
1276 *bestratey = ratey; 1098 *bestratey = ratey;
1277 *bestdistortion = distortion; 1099 *bestdistortion = distortion;
1278 best_rd = this_rd; 1100 best_rd = this_rd;
1279 *best_mode = mode; 1101 *best_mode = mode;
1280 best_tx_type = tx_type;
1281 vpx_memcpy(a, tempa, sizeof(tempa)); 1102 vpx_memcpy(a, tempa, sizeof(tempa));
1282 vpx_memcpy(l, templ, sizeof(templ)); 1103 vpx_memcpy(l, templ, sizeof(templ));
1283 for (idy = 0; idy < num_4x4_blocks_high; ++idy) { 1104 for (idy = 0; idy < num_4x4_blocks_high * 4; ++idy)
1284 for (idx = 0; idx < num_4x4_blocks_wide; ++idx) { 1105 vpx_memcpy(best_dst + idy * 8, dst_init + idy * dst_stride,
1285 block = ib + idy * 2 + idx; 1106 num_4x4_blocks_wide * 4);
1286 vpx_memcpy(best_dqcoeff[idy * 2 + idx],
1287 BLOCK_OFFSET(pd->dqcoeff, block, 16),
1288 sizeof(best_dqcoeff[0]));
1289 }
1290 }
1291 } 1107 }
1108 next:
1109 {}
1292 } 1110 }
1293 1111
1294 if (x->skip_encode) 1112 if (best_rd >= rd_thresh || x->skip_encode)
1295 return best_rd; 1113 return best_rd;
1296 1114
1297 for (idy = 0; idy < num_4x4_blocks_high; ++idy) { 1115 for (idy = 0; idy < num_4x4_blocks_high * 4; ++idy)
1298 for (idx = 0; idx < num_4x4_blocks_wide; ++idx) { 1116 vpx_memcpy(dst_init + idy * dst_stride, best_dst + idy * 8,
1299 block = ib + idy * 2 + idx; 1117 num_4x4_blocks_wide * 4);
1300 xd->mode_info_context->bmi[block].as_mode = *best_mode;
1301 src = raster_block_offset_uint8(xd, BLOCK_SIZE_SB8X8, 0, block,
1302 p->src.buf, src_stride);
1303 dst = raster_block_offset_uint8(xd, BLOCK_SIZE_SB8X8, 0, block,
1304 pd->dst.buf, dst_stride);
1305
1306 vp9_predict_intra_block(xd, block, b_width_log2(BLOCK_SIZE_SB8X8), TX_4X4,
1307 *best_mode,
1308 x->skip_encode ? src : dst,
1309 x->skip_encode ? src_stride : dst_stride,
1310 dst, dst_stride);
1311 // inverse transform
1312 if (best_tx_type != DCT_DCT)
1313 vp9_short_iht4x4_add(best_dqcoeff[idy * 2 + idx], dst,
1314 dst_stride, best_tx_type);
1315 else
1316 xd->inv_txm4x4_add(best_dqcoeff[idy * 2 + idx], dst,
1317 dst_stride);
1318 }
1319 }
1320 1118
1321 return best_rd; 1119 return best_rd;
1322 } 1120 }
1323 1121
1324 static int64_t rd_pick_intra4x4mby_modes(VP9_COMP *cpi, MACROBLOCK *mb, 1122 static int64_t rd_pick_intra_sub_8x8_y_mode(VP9_COMP * const cpi,
1325 int *Rate, int *rate_y, 1123 MACROBLOCK * const mb,
1326 int64_t *Distortion, int64_t best_rd) { 1124 int * const rate,
1125 int * const rate_y,
1126 int64_t * const distortion,
1127 int64_t best_rd) {
1327 int i, j; 1128 int i, j;
1328 MACROBLOCKD *const xd = &mb->e_mbd; 1129 MACROBLOCKD *const xd = &mb->e_mbd;
1329 BLOCK_SIZE_TYPE bsize = xd->mode_info_context->mbmi.sb_type; 1130 const BLOCK_SIZE bsize = xd->mode_info_context->mbmi.sb_type;
1330 int num_4x4_blocks_wide = num_4x4_blocks_wide_lookup[bsize]; 1131 const int num_4x4_blocks_wide = num_4x4_blocks_wide_lookup[bsize];
1331 int num_4x4_blocks_high = num_4x4_blocks_high_lookup[bsize]; 1132 const int num_4x4_blocks_high = num_4x4_blocks_high_lookup[bsize];
1332 int idx, idy; 1133 int idx, idy;
1333 int cost = 0; 1134 int cost = 0;
1334 int64_t distortion = 0; 1135 int64_t total_distortion = 0;
1335 int tot_rate_y = 0; 1136 int tot_rate_y = 0;
1336 int64_t total_rd = 0; 1137 int64_t total_rd = 0;
1337 ENTROPY_CONTEXT t_above[4], t_left[4]; 1138 ENTROPY_CONTEXT t_above[4], t_left[4];
1338 int *bmode_costs; 1139 int *bmode_costs;
1339 MODE_INFO *const mic = xd->mode_info_context; 1140 MODE_INFO *const mic = xd->mode_info_context;
1340 1141
1341 vpx_memcpy(t_above, xd->plane[0].above_context, sizeof(t_above)); 1142 vpx_memcpy(t_above, xd->plane[0].above_context, sizeof(t_above));
1342 vpx_memcpy(t_left, xd->plane[0].left_context, sizeof(t_left)); 1143 vpx_memcpy(t_left, xd->plane[0].left_context, sizeof(t_left));
1343 1144
1344 bmode_costs = mb->mbmode_cost; 1145 bmode_costs = mb->mbmode_cost;
1345 1146
1147 // Pick modes for each sub-block (of size 4x4, 4x8, or 8x4) in an 8x8 block.
1346 for (idy = 0; idy < 2; idy += num_4x4_blocks_high) { 1148 for (idy = 0; idy < 2; idy += num_4x4_blocks_high) {
1347 for (idx = 0; idx < 2; idx += num_4x4_blocks_wide) { 1149 for (idx = 0; idx < 2; idx += num_4x4_blocks_wide) {
1348 const int mis = xd->mode_info_stride; 1150 const int mis = xd->mode_info_stride;
1349 MB_PREDICTION_MODE UNINITIALIZED_IS_SAFE(best_mode); 1151 MB_PREDICTION_MODE best_mode = DC_PRED;
1350 int UNINITIALIZED_IS_SAFE(r), UNINITIALIZED_IS_SAFE(ry); 1152 int r = INT_MAX, ry = INT_MAX;
1351 int64_t UNINITIALIZED_IS_SAFE(d); 1153 int64_t d = INT64_MAX, this_rd = INT64_MAX;
1352 i = idy * 2 + idx; 1154 i = idy * 2 + idx;
1353
1354 if (cpi->common.frame_type == KEY_FRAME) { 1155 if (cpi->common.frame_type == KEY_FRAME) {
1355 const MB_PREDICTION_MODE A = above_block_mode(mic, i, mis); 1156 const MB_PREDICTION_MODE A = above_block_mode(mic, i, mis);
1356 const MB_PREDICTION_MODE L = (xd->left_available || idx) ? 1157 const MB_PREDICTION_MODE L = (xd->left_available || idx) ?
1357 left_block_mode(mic, i) : DC_PRED; 1158 left_block_mode(mic, i) : DC_PRED;
1358 1159
1359 bmode_costs = mb->y_mode_costs[A][L]; 1160 bmode_costs = mb->y_mode_costs[A][L];
1360 } 1161 }
1361 1162
1362 total_rd += rd_pick_intra4x4block(cpi, mb, i, &best_mode, bmode_costs, 1163 this_rd = rd_pick_intra4x4block(cpi, mb, i, &best_mode, bmode_costs,
1363 t_above + idx, t_left + idy, 1164 t_above + idx, t_left + idy, &r, &ry, &d,
1364 &r, &ry, &d, bsize); 1165 bsize, best_rd - total_rd);
1166 if (this_rd >= best_rd - total_rd)
1167 return INT64_MAX;
1168
1169 total_rd += this_rd;
1365 cost += r; 1170 cost += r;
1366 distortion += d; 1171 total_distortion += d;
1367 tot_rate_y += ry; 1172 tot_rate_y += ry;
1368 1173
1369 mic->bmi[i].as_mode = best_mode; 1174 mic->bmi[i].as_mode = best_mode;
1370 for (j = 1; j < num_4x4_blocks_high; ++j) 1175 for (j = 1; j < num_4x4_blocks_high; ++j)
1371 mic->bmi[i + j * 2].as_mode = best_mode; 1176 mic->bmi[i + j * 2].as_mode = best_mode;
1372 for (j = 1; j < num_4x4_blocks_wide; ++j) 1177 for (j = 1; j < num_4x4_blocks_wide; ++j)
1373 mic->bmi[i + j].as_mode = best_mode; 1178 mic->bmi[i + j].as_mode = best_mode;
1374 1179
1375 if (total_rd >= best_rd) 1180 if (total_rd >= best_rd)
1376 return INT64_MAX; 1181 return INT64_MAX;
1377 } 1182 }
1378 } 1183 }
1379 1184
1380 *Rate = cost; 1185 *rate = cost;
1381 *rate_y = tot_rate_y; 1186 *rate_y = tot_rate_y;
1382 *Distortion = distortion; 1187 *distortion = total_distortion;
1383 xd->mode_info_context->mbmi.mode = mic->bmi[3].as_mode; 1188 xd->mode_info_context->mbmi.mode = mic->bmi[3].as_mode;
1384 1189
1385 return RDCOST(mb->rdmult, mb->rddiv, cost, distortion); 1190 return RDCOST(mb->rdmult, mb->rddiv, cost, total_distortion);
1386 } 1191 }
1387 1192
1388 static int64_t rd_pick_intra_sby_mode(VP9_COMP *cpi, MACROBLOCK *x, 1193 static int64_t rd_pick_intra_sby_mode(VP9_COMP *cpi, MACROBLOCK *x,
1389 int *rate, int *rate_tokenonly, 1194 int *rate, int *rate_tokenonly,
1390 int64_t *distortion, int *skippable, 1195 int64_t *distortion, int *skippable,
1391 BLOCK_SIZE_TYPE bsize, 1196 BLOCK_SIZE bsize,
1392 int64_t txfm_cache[NB_TXFM_MODES], 1197 int64_t tx_cache[TX_MODES],
1393 int64_t best_rd) { 1198 int64_t best_rd) {
1394 MB_PREDICTION_MODE mode; 1199 MB_PREDICTION_MODE mode;
1395 MB_PREDICTION_MODE UNINITIALIZED_IS_SAFE(mode_selected); 1200 MB_PREDICTION_MODE mode_selected = DC_PRED;
1396 MACROBLOCKD *const xd = &x->e_mbd; 1201 MACROBLOCKD *const xd = &x->e_mbd;
1202 MODE_INFO *const mic = xd->mode_info_context;
1397 int this_rate, this_rate_tokenonly, s; 1203 int this_rate, this_rate_tokenonly, s;
1398 int64_t this_distortion, this_rd; 1204 int64_t this_distortion, this_rd;
1399 TX_SIZE UNINITIALIZED_IS_SAFE(best_tx); 1205 TX_SIZE best_tx = TX_4X4;
1400 int i; 1206 int i;
1401 int *bmode_costs = x->mbmode_cost; 1207 int *bmode_costs = x->mbmode_cost;
1402 1208
1403 if (cpi->sf.tx_size_search_method == USE_FULL_RD) { 1209 if (cpi->sf.tx_size_search_method == USE_FULL_RD)
1404 for (i = 0; i < NB_TXFM_MODES; i++) 1210 for (i = 0; i < TX_MODES; i++)
1405 txfm_cache[i] = INT64_MAX; 1211 tx_cache[i] = INT64_MAX;
1406 }
1407 1212
1408 /* Y Search for 32x32 intra prediction mode */ 1213 /* Y Search for intra prediction mode */
1409 for (mode = DC_PRED; mode <= TM_PRED; mode++) { 1214 for (mode = DC_PRED; mode <= TM_PRED; mode++) {
1410 int64_t local_txfm_cache[NB_TXFM_MODES]; 1215 int64_t local_tx_cache[TX_MODES];
1411 MODE_INFO *const mic = xd->mode_info_context;
1412 const int mis = xd->mode_info_stride; 1216 const int mis = xd->mode_info_stride;
1413 1217
1218 if (!(cpi->sf.intra_y_mode_mask & (1 << mode)))
1219 continue;
1220
1414 if (cpi->common.frame_type == KEY_FRAME) { 1221 if (cpi->common.frame_type == KEY_FRAME) {
1415 const MB_PREDICTION_MODE A = above_block_mode(mic, 0, mis); 1222 const MB_PREDICTION_MODE A = above_block_mode(mic, 0, mis);
1416 const MB_PREDICTION_MODE L = xd->left_available ? 1223 const MB_PREDICTION_MODE L = xd->left_available ?
1417 left_block_mode(mic, 0) : DC_PRED; 1224 left_block_mode(mic, 0) : DC_PRED;
1418 1225
1419 bmode_costs = x->y_mode_costs[A][L]; 1226 bmode_costs = x->y_mode_costs[A][L];
1420 } 1227 }
1421 x->e_mbd.mode_info_context->mbmi.mode = mode; 1228 mic->mbmi.mode = mode;
1422 1229
1423 super_block_yrd(cpi, x, &this_rate_tokenonly, &this_distortion, &s, NULL, 1230 super_block_yrd(cpi, x, &this_rate_tokenonly, &this_distortion, &s, NULL,
1424 bsize, local_txfm_cache, best_rd); 1231 bsize, local_tx_cache, best_rd);
1425 1232
1426 if (this_rate_tokenonly == INT_MAX) 1233 if (this_rate_tokenonly == INT_MAX)
1427 continue; 1234 continue;
1428 1235
1429 this_rate = this_rate_tokenonly + bmode_costs[mode]; 1236 this_rate = this_rate_tokenonly + bmode_costs[mode];
1430 this_rd = RDCOST(x->rdmult, x->rddiv, this_rate, this_distortion); 1237 this_rd = RDCOST(x->rdmult, x->rddiv, this_rate, this_distortion);
1431 1238
1432 if (this_rd < best_rd) { 1239 if (this_rd < best_rd) {
1433 mode_selected = mode; 1240 mode_selected = mode;
1434 best_rd = this_rd; 1241 best_rd = this_rd;
1435 best_tx = x->e_mbd.mode_info_context->mbmi.txfm_size; 1242 best_tx = mic->mbmi.txfm_size;
1436 *rate = this_rate; 1243 *rate = this_rate;
1437 *rate_tokenonly = this_rate_tokenonly; 1244 *rate_tokenonly = this_rate_tokenonly;
1438 *distortion = this_distortion; 1245 *distortion = this_distortion;
1439 *skippable = s; 1246 *skippable = s;
1440 } 1247 }
1441 1248
1442 if (cpi->sf.tx_size_search_method == USE_FULL_RD && this_rd < INT64_MAX) { 1249 if (cpi->sf.tx_size_search_method == USE_FULL_RD && this_rd < INT64_MAX) {
1443 for (i = 0; i < NB_TXFM_MODES; i++) { 1250 for (i = 0; i < TX_MODES; i++) {
1444 int64_t adj_rd = this_rd + local_txfm_cache[i] - 1251 const int64_t adj_rd = this_rd + local_tx_cache[i] -
1445 local_txfm_cache[cpi->common.tx_mode]; 1252 local_tx_cache[cpi->common.tx_mode];
1446 if (adj_rd < txfm_cache[i]) { 1253 if (adj_rd < tx_cache[i]) {
1447 txfm_cache[i] = adj_rd; 1254 tx_cache[i] = adj_rd;
1448 } 1255 }
1449 } 1256 }
1450 } 1257 }
1451 } 1258 }
1452 1259
1453 x->e_mbd.mode_info_context->mbmi.mode = mode_selected; 1260 mic->mbmi.mode = mode_selected;
1454 x->e_mbd.mode_info_context->mbmi.txfm_size = best_tx; 1261 mic->mbmi.txfm_size = best_tx;
1455 1262
1456 return best_rd; 1263 return best_rd;
1457 } 1264 }
1458 1265
1459 static void super_block_uvrd_for_txfm(VP9_COMMON *const cm, MACROBLOCK *x,
1460 int *rate, int64_t *distortion,
1461 int *skippable, int64_t *sse,
1462 BLOCK_SIZE_TYPE bsize,
1463 TX_SIZE uv_tx_size) {
1464 MACROBLOCKD *const xd = &x->e_mbd;
1465 int64_t dummy;
1466 if (xd->mode_info_context->mbmi.ref_frame[0] == INTRA_FRAME)
1467 vp9_encode_intra_block_uv(cm, x, bsize);
1468 else
1469 vp9_xform_quant_sbuv(cm, x, bsize);
1470
1471 *distortion = block_error_sbuv(x, bsize, uv_tx_size == TX_32X32 ? 0 : 2,
1472 sse ? sse : &dummy);
1473 *rate = rdcost_uv(cm, x, bsize, uv_tx_size);
1474 *skippable = vp9_sbuv_is_skippable(xd, bsize);
1475 }
1476
1477 static void super_block_uvrd(VP9_COMMON *const cm, MACROBLOCK *x, 1266 static void super_block_uvrd(VP9_COMMON *const cm, MACROBLOCK *x,
1478 int *rate, int64_t *distortion, int *skippable, 1267 int *rate, int64_t *distortion, int *skippable,
1479 int64_t *sse, BLOCK_SIZE_TYPE bsize) { 1268 int64_t *sse, BLOCK_SIZE bsize,
1269 int64_t ref_best_rd) {
1480 MACROBLOCKD *const xd = &x->e_mbd; 1270 MACROBLOCKD *const xd = &x->e_mbd;
1481 MB_MODE_INFO *const mbmi = &xd->mode_info_context->mbmi; 1271 MB_MODE_INFO *const mbmi = &xd->mode_info_context->mbmi;
1482 TX_SIZE uv_txfm_size = get_uv_tx_size(mbmi); 1272 TX_SIZE uv_txfm_size = get_uv_tx_size(mbmi);
1273 int plane;
1274 int pnrate = 0, pnskip = 1;
1275 int64_t pndist = 0, pnsse = 0;
1483 1276
1484 if (mbmi->ref_frame[0] > INTRA_FRAME) 1277 if (ref_best_rd < 0)
1278 goto term;
1279
1280 if (is_inter_block(mbmi))
1485 vp9_subtract_sbuv(x, bsize); 1281 vp9_subtract_sbuv(x, bsize);
1486 1282
1487 super_block_uvrd_for_txfm(cm, x, rate, distortion, skippable, sse, bsize, 1283 *rate = 0;
1488 uv_txfm_size); 1284 *distortion = 0;
1285 *sse = 0;
1286 *skippable = 1;
1287
1288 for (plane = 1; plane < MAX_MB_PLANE; ++plane) {
1289 txfm_rd_in_plane(x, &pnrate, &pndist, &pnskip, &pnsse,
1290 ref_best_rd, plane, bsize, uv_txfm_size);
1291 if (pnrate == INT_MAX)
1292 goto term;
1293 *rate += pnrate;
1294 *distortion += pndist;
1295 *sse += pnsse;
1296 *skippable &= pnskip;
1297 }
1298 return;
1299
1300 term:
1301 *rate = INT_MAX;
1302 *distortion = INT64_MAX;
1303 *sse = INT64_MAX;
1304 *skippable = 0;
1305 return;
1489 } 1306 }
1490 1307
1491 static int64_t rd_pick_intra_sbuv_mode(VP9_COMP *cpi, MACROBLOCK *x, 1308 static int64_t rd_pick_intra_sbuv_mode(VP9_COMP *cpi, MACROBLOCK *x,
1492 int *rate, int *rate_tokenonly, 1309 int *rate, int *rate_tokenonly,
1493 int64_t *distortion, int *skippable, 1310 int64_t *distortion, int *skippable,
1494 BLOCK_SIZE_TYPE bsize) { 1311 BLOCK_SIZE bsize) {
1495 MB_PREDICTION_MODE mode; 1312 MB_PREDICTION_MODE mode;
1496 MB_PREDICTION_MODE UNINITIALIZED_IS_SAFE(mode_selected); 1313 MB_PREDICTION_MODE mode_selected = DC_PRED;
1497 int64_t best_rd = INT64_MAX, this_rd; 1314 int64_t best_rd = INT64_MAX, this_rd;
1498 int this_rate_tokenonly, this_rate, s; 1315 int this_rate_tokenonly, this_rate, s;
1499 int64_t this_distortion; 1316 int64_t this_distortion, this_sse;
1500 1317
1501 MB_PREDICTION_MODE last_mode = bsize <= BLOCK_SIZE_SB8X8 ? 1318 // int mode_mask = (bsize <= BLOCK_8X8)
1502 TM_PRED : cpi->sf.last_chroma_intra_mode; 1319 // ? ALL_INTRA_MODES : cpi->sf.intra_uv_mode_mask;
1503 1320
1504 for (mode = DC_PRED; mode <= last_mode; mode++) { 1321 for (mode = DC_PRED; mode <= TM_PRED; mode++) {
1322 // if (!(mode_mask & (1 << mode)))
1323 if (!(cpi->sf.intra_uv_mode_mask & (1 << mode)))
1324 continue;
1325
1505 x->e_mbd.mode_info_context->mbmi.uv_mode = mode; 1326 x->e_mbd.mode_info_context->mbmi.uv_mode = mode;
1506 super_block_uvrd(&cpi->common, x, &this_rate_tokenonly, 1327 super_block_uvrd(&cpi->common, x, &this_rate_tokenonly,
1507 &this_distortion, &s, NULL, bsize); 1328 &this_distortion, &s, &this_sse, bsize, best_rd);
1329 if (this_rate_tokenonly == INT_MAX)
1330 continue;
1508 this_rate = this_rate_tokenonly + 1331 this_rate = this_rate_tokenonly +
1509 x->intra_uv_mode_cost[cpi->common.frame_type][mode]; 1332 x->intra_uv_mode_cost[cpi->common.frame_type][mode];
1510 this_rd = RDCOST(x->rdmult, x->rddiv, this_rate, this_distortion); 1333 this_rd = RDCOST(x->rdmult, x->rddiv, this_rate, this_distortion);
1511 1334
1512 if (this_rd < best_rd) { 1335 if (this_rd < best_rd) {
1513 mode_selected = mode; 1336 mode_selected = mode;
1514 best_rd = this_rd; 1337 best_rd = this_rd;
1515 *rate = this_rate; 1338 *rate = this_rate;
1516 *rate_tokenonly = this_rate_tokenonly; 1339 *rate_tokenonly = this_rate_tokenonly;
1517 *distortion = this_distortion; 1340 *distortion = this_distortion;
1518 *skippable = s; 1341 *skippable = s;
1519 } 1342 }
1520 } 1343 }
1521 1344
1522 x->e_mbd.mode_info_context->mbmi.uv_mode = mode_selected; 1345 x->e_mbd.mode_info_context->mbmi.uv_mode = mode_selected;
1523 1346
1524 return best_rd; 1347 return best_rd;
1525 } 1348 }
1526 1349
1527 static int64_t rd_sbuv_dcpred(VP9_COMP *cpi, MACROBLOCK *x, 1350 static int64_t rd_sbuv_dcpred(VP9_COMP *cpi, MACROBLOCK *x,
1528 int *rate, int *rate_tokenonly, 1351 int *rate, int *rate_tokenonly,
1529 int64_t *distortion, int *skippable, 1352 int64_t *distortion, int *skippable,
1530 BLOCK_SIZE_TYPE bsize) { 1353 BLOCK_SIZE bsize) {
1531 int64_t this_rd; 1354 int64_t this_rd;
1355 int64_t this_sse;
1532 1356
1533 x->e_mbd.mode_info_context->mbmi.uv_mode = DC_PRED; 1357 x->e_mbd.mode_info_context->mbmi.uv_mode = DC_PRED;
1534 super_block_uvrd(&cpi->common, x, rate_tokenonly, 1358 super_block_uvrd(&cpi->common, x, rate_tokenonly,
1535 distortion, skippable, NULL, bsize); 1359 distortion, skippable, &this_sse, bsize, INT64_MAX);
1536 *rate = *rate_tokenonly + 1360 *rate = *rate_tokenonly +
1537 x->intra_uv_mode_cost[cpi->common.frame_type][DC_PRED]; 1361 x->intra_uv_mode_cost[cpi->common.frame_type][DC_PRED];
1538 this_rd = RDCOST(x->rdmult, x->rddiv, *rate, *distortion); 1362 this_rd = RDCOST(x->rdmult, x->rddiv, *rate, *distortion);
1539 1363
1540 x->e_mbd.mode_info_context->mbmi.uv_mode = DC_PRED;
1541
1542 return this_rd; 1364 return this_rd;
1543 } 1365 }
1544 1366
1545 static void choose_intra_uv_mode(VP9_COMP *cpi, BLOCK_SIZE_TYPE bsize, 1367 static void choose_intra_uv_mode(VP9_COMP *cpi, BLOCK_SIZE bsize,
1546 int *rate_uv, int *rate_uv_tokenonly, 1368 int *rate_uv, int *rate_uv_tokenonly,
1547 int64_t *dist_uv, int *skip_uv, 1369 int64_t *dist_uv, int *skip_uv,
1548 MB_PREDICTION_MODE *mode_uv) { 1370 MB_PREDICTION_MODE *mode_uv) {
1549 MACROBLOCK *const x = &cpi->mb; 1371 MACROBLOCK *const x = &cpi->mb;
1550 1372
1551 // Use an estimated rd for uv_intra based on DC_PRED if the 1373 // Use an estimated rd for uv_intra based on DC_PRED if the
1552 // appropriate speed flag is set. 1374 // appropriate speed flag is set.
1553 if (cpi->sf.use_uv_intra_rd_estimate) { 1375 if (cpi->sf.use_uv_intra_rd_estimate) {
1554 rd_sbuv_dcpred(cpi, x, rate_uv, rate_uv_tokenonly, dist_uv, skip_uv, 1376 rd_sbuv_dcpred(cpi, x, rate_uv, rate_uv_tokenonly, dist_uv, skip_uv,
1555 (bsize < BLOCK_SIZE_SB8X8) ? BLOCK_SIZE_SB8X8 : 1377 bsize < BLOCK_8X8 ? BLOCK_8X8 : bsize);
1556 bsize);
1557 // Else do a proper rd search for each possible transform size that may 1378 // Else do a proper rd search for each possible transform size that may
1558 // be considered in the main rd loop. 1379 // be considered in the main rd loop.
1559 } else { 1380 } else {
1560 rd_pick_intra_sbuv_mode(cpi, x, 1381 rd_pick_intra_sbuv_mode(cpi, x,
1561 rate_uv, rate_uv_tokenonly, dist_uv, skip_uv, 1382 rate_uv, rate_uv_tokenonly, dist_uv, skip_uv,
1562 (bsize < BLOCK_SIZE_SB8X8) ? BLOCK_SIZE_SB8X8 1383 bsize < BLOCK_8X8 ? BLOCK_8X8 : bsize);
1563 : bsize);
1564 } 1384 }
1565 *mode_uv = x->e_mbd.mode_info_context->mbmi.uv_mode; 1385 *mode_uv = x->e_mbd.mode_info_context->mbmi.uv_mode;
1566 } 1386 }
1567 1387
1568 static int cost_mv_ref(VP9_COMP *cpi, MB_PREDICTION_MODE mode, 1388 static int cost_mv_ref(VP9_COMP *cpi, MB_PREDICTION_MODE mode,
1569 int mode_context) { 1389 int mode_context) {
1570 MACROBLOCK *const x = &cpi->mb; 1390 MACROBLOCK *const x = &cpi->mb;
1571 MACROBLOCKD *const xd = &x->e_mbd; 1391 MACROBLOCKD *const xd = &x->e_mbd;
1572 const int segment_id = xd->mode_info_context->mbmi.segment_id; 1392 const int segment_id = xd->mode_info_context->mbmi.segment_id;
1573 1393
1574 // Don't account for mode here if segment skip is enabled. 1394 // Don't account for mode here if segment skip is enabled.
1575 if (!vp9_segfeature_active(&xd->seg, segment_id, SEG_LVL_SKIP)) { 1395 if (!vp9_segfeature_active(&cpi->common.seg, segment_id, SEG_LVL_SKIP)) {
1576 assert(is_inter_mode(mode)); 1396 assert(is_inter_mode(mode));
1577 return x->inter_mode_cost[mode_context][mode - NEARESTMV]; 1397 return x->inter_mode_cost[mode_context][mode - NEARESTMV];
1578 } else { 1398 } else {
1579 return 0; 1399 return 0;
1580 } 1400 }
1581 } 1401 }
1582 1402
1583 void vp9_set_mbmode_and_mvs(MACROBLOCK *x, MB_PREDICTION_MODE mb, int_mv *mv) { 1403 void vp9_set_mbmode_and_mvs(MACROBLOCK *x, MB_PREDICTION_MODE mb, int_mv *mv) {
1584 x->e_mbd.mode_info_context->mbmi.mode = mb; 1404 x->e_mbd.mode_info_context->mbmi.mode = mb;
1585 x->e_mbd.mode_info_context->mbmi.mv[0].as_int = mv->as_int; 1405 x->e_mbd.mode_info_context->mbmi.mv[0].as_int = mv->as_int;
1586 } 1406 }
1587 1407
1588 static void joint_motion_search(VP9_COMP *cpi, MACROBLOCK *x, 1408 static void joint_motion_search(VP9_COMP *cpi, MACROBLOCK *x,
1589 BLOCK_SIZE_TYPE bsize, 1409 BLOCK_SIZE bsize,
1590 int_mv *frame_mv, 1410 int_mv *frame_mv,
1591 int mi_row, int mi_col, 1411 int mi_row, int mi_col,
1592 int_mv single_newmv[MAX_REF_FRAMES], 1412 int_mv single_newmv[MAX_REF_FRAMES],
1593 int *rate_mv); 1413 int *rate_mv);
1594 static void single_motion_search(VP9_COMP *cpi, MACROBLOCK *x, 1414 static void single_motion_search(VP9_COMP *cpi, MACROBLOCK *x,
1595 BLOCK_SIZE_TYPE bsize, 1415 BLOCK_SIZE bsize,
1596 int mi_row, int mi_col, 1416 int mi_row, int mi_col,
1597 int_mv *tmp_mv, int *rate_mv); 1417 int_mv *tmp_mv, int *rate_mv);
1598 1418
1599 static int labels2mode(MACROBLOCK *x, int i, 1419 static int labels2mode(MACROBLOCK *x, int i,
1600 MB_PREDICTION_MODE this_mode, 1420 MB_PREDICTION_MODE this_mode,
1601 int_mv *this_mv, int_mv *this_second_mv, 1421 int_mv *this_mv, int_mv *this_second_mv,
1602 int_mv frame_mv[MB_MODE_COUNT][MAX_REF_FRAMES], 1422 int_mv frame_mv[MB_MODE_COUNT][MAX_REF_FRAMES],
1603 int_mv seg_mvs[MAX_REF_FRAMES], 1423 int_mv seg_mvs[MAX_REF_FRAMES],
1604 int_mv *best_ref_mv, 1424 int_mv *best_ref_mv,
1605 int_mv *second_best_ref_mv, 1425 int_mv *second_best_ref_mv,
1606 int *mvjcost, int *mvcost[2], VP9_COMP *cpi) { 1426 int *mvjcost, int *mvcost[2], VP9_COMP *cpi) {
1607 MACROBLOCKD *const xd = &x->e_mbd; 1427 MACROBLOCKD *const xd = &x->e_mbd;
1608 MODE_INFO *const mic = xd->mode_info_context; 1428 MODE_INFO *const mic = xd->mode_info_context;
1609 MB_MODE_INFO * mbmi = &mic->mbmi; 1429 MB_MODE_INFO *mbmi = &mic->mbmi;
1610 int cost = 0, thismvcost = 0; 1430 int cost = 0, thismvcost = 0;
1611 int idx, idy; 1431 int idx, idy;
1612 int num_4x4_blocks_wide = num_4x4_blocks_wide_lookup[mbmi->sb_type]; 1432 const int num_4x4_blocks_wide = num_4x4_blocks_wide_lookup[mbmi->sb_type];
1613 int num_4x4_blocks_high = num_4x4_blocks_high_lookup[mbmi->sb_type]; 1433 const int num_4x4_blocks_high = num_4x4_blocks_high_lookup[mbmi->sb_type];
1614 1434
1615 /* We have to be careful retrieving previously-encoded motion vectors. 1435 /* We have to be careful retrieving previously-encoded motion vectors.
1616 Ones from this macroblock have to be pulled from the BLOCKD array 1436 Ones from this macroblock have to be pulled from the BLOCKD array
1617 as they have not yet made it to the bmi array in our MB_MODE_INFO. */ 1437 as they have not yet made it to the bmi array in our MB_MODE_INFO. */
1618 MB_PREDICTION_MODE m; 1438 MB_PREDICTION_MODE m;
1619 1439
1620 // the only time we should do costing for new motion vector or mode 1440 // the only time we should do costing for new motion vector or mode
1621 // is when we are on a new label (jbb May 08, 2007) 1441 // is when we are on a new label (jbb May 08, 2007)
1622 switch (m = this_mode) { 1442 switch (m = this_mode) {
1623 case NEWMV: 1443 case NEWMV:
1624 this_mv->as_int = seg_mvs[mbmi->ref_frame[0]].as_int; 1444 this_mv->as_int = seg_mvs[mbmi->ref_frame[0]].as_int;
1625 thismvcost = vp9_mv_bit_cost(this_mv, best_ref_mv, mvjcost, mvcost, 1445 thismvcost = vp9_mv_bit_cost(this_mv, best_ref_mv, mvjcost, mvcost,
1626 102, xd->allow_high_precision_mv); 1446 102);
1627 if (mbmi->ref_frame[1] > 0) { 1447 if (mbmi->ref_frame[1] > 0) {
1628 this_second_mv->as_int = seg_mvs[mbmi->ref_frame[1]].as_int; 1448 this_second_mv->as_int = seg_mvs[mbmi->ref_frame[1]].as_int;
1629 thismvcost += vp9_mv_bit_cost(this_second_mv, second_best_ref_mv, 1449 thismvcost += vp9_mv_bit_cost(this_second_mv, second_best_ref_mv,
1630 mvjcost, mvcost, 102, 1450 mvjcost, mvcost, 102);
1631 xd->allow_high_precision_mv);
1632 } 1451 }
1633 break; 1452 break;
1634 case NEARESTMV: 1453 case NEARESTMV:
1635 this_mv->as_int = frame_mv[NEARESTMV][mbmi->ref_frame[0]].as_int; 1454 this_mv->as_int = frame_mv[NEARESTMV][mbmi->ref_frame[0]].as_int;
1636 if (mbmi->ref_frame[1] > 0) 1455 if (mbmi->ref_frame[1] > 0)
1637 this_second_mv->as_int = 1456 this_second_mv->as_int =
1638 frame_mv[NEARESTMV][mbmi->ref_frame[1]].as_int; 1457 frame_mv[NEARESTMV][mbmi->ref_frame[1]].as_int;
1639 break; 1458 break;
1640 case NEARMV: 1459 case NEARMV:
1641 this_mv->as_int = frame_mv[NEARMV][mbmi->ref_frame[0]].as_int; 1460 this_mv->as_int = frame_mv[NEARMV][mbmi->ref_frame[0]].as_int;
1642 if (mbmi->ref_frame[1] > 0) 1461 if (mbmi->ref_frame[1] > 0)
1643 this_second_mv->as_int = 1462 this_second_mv->as_int =
1644 frame_mv[NEARMV][mbmi->ref_frame[1]].as_int; 1463 frame_mv[NEARMV][mbmi->ref_frame[1]].as_int;
1645 break; 1464 break;
1646 case ZEROMV: 1465 case ZEROMV:
1647 this_mv->as_int = 0; 1466 this_mv->as_int = 0;
1648 if (mbmi->ref_frame[1] > 0) 1467 if (mbmi->ref_frame[1] > 0)
1649 this_second_mv->as_int = 0; 1468 this_second_mv->as_int = 0;
1650 break; 1469 break;
1651 default: 1470 default:
1652 break; 1471 break;
1653 } 1472 }
1654 1473
1655 cost = cost_mv_ref(cpi, this_mode, 1474 cost = cost_mv_ref(cpi, this_mode,
1656 mbmi->mb_mode_context[mbmi->ref_frame[0]]); 1475 mbmi->mode_context[mbmi->ref_frame[0]]);
1657 1476
1658 mic->bmi[i].as_mv[0].as_int = this_mv->as_int; 1477 mic->bmi[i].as_mv[0].as_int = this_mv->as_int;
1659 if (mbmi->ref_frame[1] > 0) 1478 if (mbmi->ref_frame[1] > 0)
1660 mic->bmi[i].as_mv[1].as_int = this_second_mv->as_int; 1479 mic->bmi[i].as_mv[1].as_int = this_second_mv->as_int;
1661 1480
1662 x->partition_info->bmi[i].mode = m; 1481 x->partition_info->bmi[i].mode = m;
1663 for (idy = 0; idy < num_4x4_blocks_high; ++idy) 1482 for (idy = 0; idy < num_4x4_blocks_high; ++idy)
1664 for (idx = 0; idx < num_4x4_blocks_wide; ++idx) 1483 for (idx = 0; idx < num_4x4_blocks_wide; ++idx)
1665 vpx_memcpy(&mic->bmi[i + idy * 2 + idx], 1484 vpx_memcpy(&mic->bmi[i + idy * 2 + idx],
1666 &mic->bmi[i], sizeof(mic->bmi[i])); 1485 &mic->bmi[i], sizeof(mic->bmi[i]));
1667 1486
1668 cost += thismvcost; 1487 cost += thismvcost;
1669 return cost; 1488 return cost;
1670 } 1489 }
1671 1490
1672 static int64_t encode_inter_mb_segment(VP9_COMP *cpi, 1491 static int64_t encode_inter_mb_segment(VP9_COMP *cpi,
1673 MACROBLOCK *x, 1492 MACROBLOCK *x,
1674 int64_t best_yrd, 1493 int64_t best_yrd,
1675 int i, 1494 int i,
1676 int *labelyrate, 1495 int *labelyrate,
1677 int64_t *distortion, int64_t *sse, 1496 int64_t *distortion, int64_t *sse,
1678 ENTROPY_CONTEXT *ta, 1497 ENTROPY_CONTEXT *ta,
1679 ENTROPY_CONTEXT *tl) { 1498 ENTROPY_CONTEXT *tl) {
1680 int k; 1499 int k;
1681 VP9_COMMON *const cm = &cpi->common;
1682 MACROBLOCKD *xd = &x->e_mbd; 1500 MACROBLOCKD *xd = &x->e_mbd;
1683 BLOCK_SIZE_TYPE bsize = xd->mode_info_context->mbmi.sb_type; 1501 struct macroblockd_plane *const pd = &xd->plane[0];
1684 const int width = plane_block_width(bsize, &xd->plane[0]); 1502 MODE_INFO *const mi = xd->mode_info_context;
1685 const int height = plane_block_height(bsize, &xd->plane[0]); 1503 const BLOCK_SIZE bsize = mi->mbmi.sb_type;
1504 const int width = plane_block_width(bsize, pd);
1505 const int height = plane_block_height(bsize, pd);
1686 int idx, idy; 1506 int idx, idy;
1687 const int src_stride = x->plane[0].src.stride; 1507 const int src_stride = x->plane[0].src.stride;
1688 uint8_t* const src = raster_block_offset_uint8(xd, BLOCK_SIZE_SB8X8, 0, i, 1508 uint8_t* const src = raster_block_offset_uint8(BLOCK_8X8, i,
1689 x->plane[0].src.buf, 1509 x->plane[0].src.buf,
1690 src_stride); 1510 src_stride);
1691 int16_t* src_diff = raster_block_offset_int16(xd, BLOCK_SIZE_SB8X8, 0, i, 1511 int16_t* src_diff = raster_block_offset_int16(BLOCK_8X8, i,
1692 x->plane[0].src_diff); 1512 x->plane[0].src_diff);
1693 int16_t* coeff = BLOCK_OFFSET(x->plane[0].coeff, 16, i); 1513 int16_t* coeff = BLOCK_OFFSET(x->plane[0].coeff, i);
1694 uint8_t* const pre = raster_block_offset_uint8(xd, BLOCK_SIZE_SB8X8, 0, i, 1514 uint8_t* const pre = raster_block_offset_uint8(BLOCK_8X8, i,
1695 xd->plane[0].pre[0].buf, 1515 pd->pre[0].buf,
1696 xd->plane[0].pre[0].stride); 1516 pd->pre[0].stride);
1697 uint8_t* const dst = raster_block_offset_uint8(xd, BLOCK_SIZE_SB8X8, 0, i, 1517 uint8_t* const dst = raster_block_offset_uint8(BLOCK_8X8, i,
1698 xd->plane[0].dst.buf, 1518 pd->dst.buf, pd->dst.stride);
1699 xd->plane[0].dst.stride);
1700 int64_t thisdistortion = 0, thissse = 0; 1519 int64_t thisdistortion = 0, thissse = 0;
1701 int thisrate = 0; 1520 int thisrate = 0;
1702 1521
1703 vp9_build_inter_predictor(pre, 1522 vp9_build_inter_predictor(pre, pd->pre[0].stride,
1704 xd->plane[0].pre[0].stride, 1523 dst, pd->dst.stride,
1705 dst, 1524 &mi->bmi[i].as_mv[0].as_mv,
1706 xd->plane[0].dst.stride,
1707 &xd->mode_info_context->bmi[i].as_mv[0],
1708 &xd->scale_factor[0], 1525 &xd->scale_factor[0],
1709 width, height, 0, &xd->subpix, 1526 width, height, 0, &xd->subpix, MV_PRECISION_Q3);
1710 MV_PRECISION_Q3);
1711 1527
1712 if (xd->mode_info_context->mbmi.ref_frame[1] > 0) { 1528 if (mi->mbmi.ref_frame[1] > 0) {
1713 uint8_t* const second_pre = 1529 uint8_t* const second_pre =
1714 raster_block_offset_uint8(xd, BLOCK_SIZE_SB8X8, 0, i, 1530 raster_block_offset_uint8(BLOCK_8X8, 0, pd->pre[1].buf, pd->pre[1].stride);
1715 xd->plane[0].pre[1].buf, 1531 vp9_build_inter_predictor(second_pre, pd->pre[1].stride,
1716 xd->plane[0].pre[1].stride); 1532 dst, pd->dst.stride,
1717 vp9_build_inter_predictor(second_pre, xd->plane[0].pre[1].stride, 1533 &mi->bmi[i].as_mv[1].as_mv,
1718 dst, xd->plane[0].dst.stride,
1719 &xd->mode_info_context->bmi[i].as_mv[1],
1720 &xd->scale_factor[1], 1534 &xd->scale_factor[1],
1721 width, height, 1, 1535 width, height, 1, &xd->subpix, MV_PRECISION_Q3);
1722 &xd->subpix, MV_PRECISION_Q3);
1723 } 1536 }
1724 1537
1725 vp9_subtract_block(height, width, src_diff, 8, 1538 vp9_subtract_block(height, width, src_diff, 8, src, src_stride,
1726 src, src_stride, 1539 dst, pd->dst.stride);
1727 dst, xd->plane[0].dst.stride);
1728 1540
1729 k = i; 1541 k = i;
1730 for (idy = 0; idy < height / 4; ++idy) { 1542 for (idy = 0; idy < height / 4; ++idy) {
1731 for (idx = 0; idx < width / 4; ++idx) { 1543 for (idx = 0; idx < width / 4; ++idx) {
1732 int64_t ssz, rd, rd1, rd2; 1544 int64_t ssz, rd, rd1, rd2;
1733 1545
1734 k += (idy * 2 + idx); 1546 k += (idy * 2 + idx);
1735 src_diff = raster_block_offset_int16(xd, BLOCK_SIZE_SB8X8, 0, k, 1547 src_diff = raster_block_offset_int16(BLOCK_8X8, k,
1736 x->plane[0].src_diff); 1548 x->plane[0].src_diff);
1737 coeff = BLOCK_OFFSET(x->plane[0].coeff, 16, k); 1549 coeff = BLOCK_OFFSET(x->plane[0].coeff, k);
1738 x->fwd_txm4x4(src_diff, coeff, 16); 1550 x->fwd_txm4x4(src_diff, coeff, 16);
1739 x->quantize_b_4x4(x, k, DCT_DCT, 16); 1551 x->quantize_b_4x4(x, k, DCT_DCT, 16);
1740 thisdistortion += vp9_block_error(coeff, 1552 thisdistortion += vp9_block_error(coeff, BLOCK_OFFSET(pd->dqcoeff, k),
1741 BLOCK_OFFSET(xd->plane[0].dqcoeff, 1553 16, &ssz);
1742 k, 16), 16, &ssz);
1743 thissse += ssz; 1554 thissse += ssz;
1744 thisrate += cost_coeffs(cm, x, 0, k, PLANE_TYPE_Y_WITH_DC, 1555 thisrate += cost_coeffs(x, 0, k,
1745 ta + (k & 1), 1556 ta + (k & 1),
1746 tl + (k >> 1), TX_4X4, 1557 tl + (k >> 1), TX_4X4,
1747 vp9_default_scan_4x4, 1558 vp9_default_scan_4x4,
1748 vp9_default_scan_4x4_neighbors); 1559 vp9_default_scan_4x4_neighbors);
1749 rd1 = RDCOST(x->rdmult, x->rddiv, thisrate, thisdistortion >> 2); 1560 rd1 = RDCOST(x->rdmult, x->rddiv, thisrate, thisdistortion >> 2);
1750 rd2 = RDCOST(x->rdmult, x->rddiv, 0, thissse >> 2); 1561 rd2 = RDCOST(x->rdmult, x->rddiv, 0, thissse >> 2);
1751 rd = MIN(rd1, rd2); 1562 rd = MIN(rd1, rd2);
1752 if (rd >= best_yrd) 1563 if (rd >= best_yrd)
1753 return INT64_MAX; 1564 return INT64_MAX;
1754 } 1565 }
(...skipping 20 matching lines...) Expand all
1775 typedef struct { 1586 typedef struct {
1776 int_mv *ref_mv, *second_ref_mv; 1587 int_mv *ref_mv, *second_ref_mv;
1777 int_mv mvp; 1588 int_mv mvp;
1778 1589
1779 int64_t segment_rd; 1590 int64_t segment_rd;
1780 int r; 1591 int r;
1781 int64_t d; 1592 int64_t d;
1782 int64_t sse; 1593 int64_t sse;
1783 int segment_yrate; 1594 int segment_yrate;
1784 MB_PREDICTION_MODE modes[4]; 1595 MB_PREDICTION_MODE modes[4];
1785 SEG_RDSTAT rdstat[4][VP9_INTER_MODES]; 1596 SEG_RDSTAT rdstat[4][INTER_MODES];
1786 int mvthresh; 1597 int mvthresh;
1787 } BEST_SEG_INFO; 1598 } BEST_SEG_INFO;
1788 1599
1789 static INLINE int mv_check_bounds(MACROBLOCK *x, int_mv *mv) { 1600 static INLINE int mv_check_bounds(MACROBLOCK *x, int_mv *mv) {
1790 int r = 0; 1601 int r = 0;
1791 r |= (mv->as_mv.row >> 3) < x->mv_row_min; 1602 r |= (mv->as_mv.row >> 3) < x->mv_row_min;
1792 r |= (mv->as_mv.row >> 3) > x->mv_row_max; 1603 r |= (mv->as_mv.row >> 3) > x->mv_row_max;
1793 r |= (mv->as_mv.col >> 3) < x->mv_col_min; 1604 r |= (mv->as_mv.col >> 3) < x->mv_col_min;
1794 r |= (mv->as_mv.col >> 3) > x->mv_col_max; 1605 r |= (mv->as_mv.col >> 3) > x->mv_col_max;
1795 return r; 1606 return r;
1796 } 1607 }
1797 1608
1798 static INLINE void mi_buf_shift(MACROBLOCK *x, int i) { 1609 static INLINE void mi_buf_shift(MACROBLOCK *x, int i) {
1799 MB_MODE_INFO *mbmi = &x->e_mbd.mode_info_context->mbmi; 1610 MB_MODE_INFO *const mbmi = &x->e_mbd.mode_info_context->mbmi;
1800 x->plane[0].src.buf = 1611 struct macroblock_plane *const p = &x->plane[0];
1801 raster_block_offset_uint8(&x->e_mbd, BLOCK_SIZE_SB8X8, 0, i, 1612 struct macroblockd_plane *const pd = &x->e_mbd.plane[0];
1802 x->plane[0].src.buf, 1613
1803 x->plane[0].src.stride); 1614 p->src.buf = raster_block_offset_uint8(BLOCK_8X8, i, p->src.buf,
1804 assert(((intptr_t)x->e_mbd.plane[0].pre[0].buf & 0x7) == 0); 1615 p->src.stride);
1805 x->e_mbd.plane[0].pre[0].buf = 1616 assert(((intptr_t)pd->pre[0].buf & 0x7) == 0);
1806 raster_block_offset_uint8(&x->e_mbd, BLOCK_SIZE_SB8X8, 0, i, 1617 pd->pre[0].buf = raster_block_offset_uint8(BLOCK_8X8, i, pd->pre[0].buf,
1807 x->e_mbd.plane[0].pre[0].buf, 1618 pd->pre[0].stride);
1808 x->e_mbd.plane[0].pre[0].stride);
1809 if (mbmi->ref_frame[1]) 1619 if (mbmi->ref_frame[1])
1810 x->e_mbd.plane[0].pre[1].buf = 1620 pd->pre[1].buf = raster_block_offset_uint8(BLOCK_8X8, i, pd->pre[1].buf,
1811 raster_block_offset_uint8(&x->e_mbd, BLOCK_SIZE_SB8X8, 0, i, 1621 pd->pre[1].stride);
1812 x->e_mbd.plane[0].pre[1].buf,
1813 x->e_mbd.plane[0].pre[1].stride);
1814 } 1622 }
1815 1623
1816 static INLINE void mi_buf_restore(MACROBLOCK *x, struct buf_2d orig_src, 1624 static INLINE void mi_buf_restore(MACROBLOCK *x, struct buf_2d orig_src,
1817 struct buf_2d orig_pre[2]) { 1625 struct buf_2d orig_pre[2]) {
1818 MB_MODE_INFO *mbmi = &x->e_mbd.mode_info_context->mbmi; 1626 MB_MODE_INFO *mbmi = &x->e_mbd.mode_info_context->mbmi;
1819 x->plane[0].src = orig_src; 1627 x->plane[0].src = orig_src;
1820 x->e_mbd.plane[0].pre[0] = orig_pre[0]; 1628 x->e_mbd.plane[0].pre[0] = orig_pre[0];
1821 if (mbmi->ref_frame[1]) 1629 if (mbmi->ref_frame[1])
1822 x->e_mbd.plane[0].pre[1] = orig_pre[1]; 1630 x->e_mbd.plane[0].pre[1] = orig_pre[1];
1823 } 1631 }
1824 1632
1825 static void rd_check_segment_txsize(VP9_COMP *cpi, MACROBLOCK *x, 1633 static void rd_check_segment_txsize(VP9_COMP *cpi, MACROBLOCK *x,
1826 BEST_SEG_INFO *bsi_buf, int filter_idx, 1634 BEST_SEG_INFO *bsi_buf, int filter_idx,
1827 int_mv seg_mvs[4][MAX_REF_FRAMES], 1635 int_mv seg_mvs[4][MAX_REF_FRAMES],
1828 int mi_row, int mi_col) { 1636 int mi_row, int mi_col) {
1829 int i, j, br = 0, idx, idy; 1637 int i, j, br = 0, idx, idy;
1830 int64_t bd = 0, block_sse = 0; 1638 int64_t bd = 0, block_sse = 0;
1831 MB_PREDICTION_MODE this_mode; 1639 MB_PREDICTION_MODE this_mode;
1832 MODE_INFO *mi = x->e_mbd.mode_info_context; 1640 MODE_INFO *mi = x->e_mbd.mode_info_context;
1833 MB_MODE_INFO *const mbmi = &mi->mbmi; 1641 MB_MODE_INFO *const mbmi = &mi->mbmi;
1834 const int label_count = 4; 1642 const int label_count = 4;
1835 int64_t this_segment_rd = 0; 1643 int64_t this_segment_rd = 0;
1836 int label_mv_thresh; 1644 int label_mv_thresh;
1837 int segmentyrate = 0; 1645 int segmentyrate = 0;
1838 BLOCK_SIZE_TYPE bsize = mbmi->sb_type; 1646 const BLOCK_SIZE bsize = mbmi->sb_type;
1839 int num_4x4_blocks_wide = num_4x4_blocks_wide_lookup[bsize]; 1647 const int num_4x4_blocks_wide = num_4x4_blocks_wide_lookup[bsize];
1840 int num_4x4_blocks_high = num_4x4_blocks_high_lookup[bsize]; 1648 const int num_4x4_blocks_high = num_4x4_blocks_high_lookup[bsize];
1841 vp9_variance_fn_ptr_t *v_fn_ptr; 1649 vp9_variance_fn_ptr_t *v_fn_ptr;
1842 ENTROPY_CONTEXT t_above[2], t_left[2]; 1650 ENTROPY_CONTEXT t_above[2], t_left[2];
1843 BEST_SEG_INFO *bsi = bsi_buf + filter_idx; 1651 BEST_SEG_INFO *bsi = bsi_buf + filter_idx;
1844 int mode_idx; 1652 int mode_idx;
1845 int subpelmv = 1, have_ref = 0; 1653 int subpelmv = 1, have_ref = 0;
1846 1654
1847 vpx_memcpy(t_above, x->e_mbd.plane[0].above_context, sizeof(t_above)); 1655 vpx_memcpy(t_above, x->e_mbd.plane[0].above_context, sizeof(t_above));
1848 vpx_memcpy(t_left, x->e_mbd.plane[0].left_context, sizeof(t_left)); 1656 vpx_memcpy(t_left, x->e_mbd.plane[0].left_context, sizeof(t_left));
1849 1657
1850 v_fn_ptr = &cpi->fn_ptr[bsize]; 1658 v_fn_ptr = &cpi->fn_ptr[bsize];
(...skipping 13 matching lines...) Expand all
1864 int_mv frame_mv[MB_MODE_COUNT][MAX_REF_FRAMES]; 1672 int_mv frame_mv[MB_MODE_COUNT][MAX_REF_FRAMES];
1865 MB_PREDICTION_MODE mode_selected = ZEROMV; 1673 MB_PREDICTION_MODE mode_selected = ZEROMV;
1866 int64_t best_rd = INT64_MAX; 1674 int64_t best_rd = INT64_MAX;
1867 i = idy * 2 + idx; 1675 i = idy * 2 + idx;
1868 1676
1869 frame_mv[ZEROMV][mbmi->ref_frame[0]].as_int = 0; 1677 frame_mv[ZEROMV][mbmi->ref_frame[0]].as_int = 0;
1870 frame_mv[ZEROMV][mbmi->ref_frame[1]].as_int = 0; 1678 frame_mv[ZEROMV][mbmi->ref_frame[1]].as_int = 0;
1871 vp9_append_sub8x8_mvs_for_idx(&cpi->common, &x->e_mbd, 1679 vp9_append_sub8x8_mvs_for_idx(&cpi->common, &x->e_mbd,
1872 &frame_mv[NEARESTMV][mbmi->ref_frame[0]], 1680 &frame_mv[NEARESTMV][mbmi->ref_frame[0]],
1873 &frame_mv[NEARMV][mbmi->ref_frame[0]], 1681 &frame_mv[NEARMV][mbmi->ref_frame[0]],
1874 i, 0); 1682 i, 0, mi_row, mi_col);
1875 if (mbmi->ref_frame[1] > 0) 1683 if (mbmi->ref_frame[1] > 0)
1876 vp9_append_sub8x8_mvs_for_idx(&cpi->common, &x->e_mbd, 1684 vp9_append_sub8x8_mvs_for_idx(&cpi->common, &x->e_mbd,
1877 &frame_mv[NEARESTMV][mbmi->ref_frame[1]], 1685 &frame_mv[NEARESTMV][mbmi->ref_frame[1]],
1878 &frame_mv[NEARMV][mbmi->ref_frame[1]], 1686 &frame_mv[NEARMV][mbmi->ref_frame[1]],
1879 i, 1); 1687 i, 1, mi_row, mi_col);
1880 1688
1881 // search for the best motion vector on this segment 1689 // search for the best motion vector on this segment
1882 for (this_mode = NEARESTMV; this_mode <= NEWMV; ++this_mode) { 1690 for (this_mode = NEARESTMV; this_mode <= NEWMV; ++this_mode) {
1883 const struct buf_2d orig_src = x->plane[0].src; 1691 const struct buf_2d orig_src = x->plane[0].src;
1884 struct buf_2d orig_pre[2]; 1692 struct buf_2d orig_pre[2];
1885 1693
1886 mode_idx = inter_mode_offset(this_mode); 1694 mode_idx = inter_mode_offset(this_mode);
1887 bsi->rdstat[i][mode_idx].brdcost = INT64_MAX; 1695 bsi->rdstat[i][mode_idx].brdcost = INT64_MAX;
1888 1696
1889 // if we're near/nearest and mv == 0,0, compare to zeromv 1697 // if we're near/nearest and mv == 0,0, compare to zeromv
1890 if ((this_mode == NEARMV || this_mode == NEARESTMV || 1698 if ((this_mode == NEARMV || this_mode == NEARESTMV ||
1891 this_mode == ZEROMV) && 1699 this_mode == ZEROMV) &&
1892 frame_mv[this_mode][mbmi->ref_frame[0]].as_int == 0 && 1700 frame_mv[this_mode][mbmi->ref_frame[0]].as_int == 0 &&
1893 (mbmi->ref_frame[1] <= 0 || 1701 (mbmi->ref_frame[1] <= 0 ||
1894 frame_mv[this_mode][mbmi->ref_frame[1]].as_int == 0)) { 1702 frame_mv[this_mode][mbmi->ref_frame[1]].as_int == 0)) {
1895 int rfc = mbmi->mb_mode_context[mbmi->ref_frame[0]]; 1703 int rfc = mbmi->mode_context[mbmi->ref_frame[0]];
1896 int c1 = cost_mv_ref(cpi, NEARMV, rfc); 1704 int c1 = cost_mv_ref(cpi, NEARMV, rfc);
1897 int c2 = cost_mv_ref(cpi, NEARESTMV, rfc); 1705 int c2 = cost_mv_ref(cpi, NEARESTMV, rfc);
1898 int c3 = cost_mv_ref(cpi, ZEROMV, rfc); 1706 int c3 = cost_mv_ref(cpi, ZEROMV, rfc);
1899 1707
1900 if (this_mode == NEARMV) { 1708 if (this_mode == NEARMV) {
1901 if (c1 > c3) 1709 if (c1 > c3)
1902 continue; 1710 continue;
1903 } else if (this_mode == NEARESTMV) { 1711 } else if (this_mode == NEARESTMV) {
1904 if (c2 > c3) 1712 if (c2 > c3)
1905 continue; 1713 continue;
(...skipping 24 matching lines...) Expand all
1930 sizeof(bsi->rdstat[i][mode_idx].tl)); 1738 sizeof(bsi->rdstat[i][mode_idx].tl));
1931 1739
1932 // motion search for newmv (single predictor case only) 1740 // motion search for newmv (single predictor case only)
1933 if (mbmi->ref_frame[1] <= 0 && this_mode == NEWMV && 1741 if (mbmi->ref_frame[1] <= 0 && this_mode == NEWMV &&
1934 seg_mvs[i][mbmi->ref_frame[0]].as_int == INVALID_MV) { 1742 seg_mvs[i][mbmi->ref_frame[0]].as_int == INVALID_MV) {
1935 int step_param = 0; 1743 int step_param = 0;
1936 int further_steps; 1744 int further_steps;
1937 int thissme, bestsme = INT_MAX; 1745 int thissme, bestsme = INT_MAX;
1938 int sadpb = x->sadperbit4; 1746 int sadpb = x->sadperbit4;
1939 int_mv mvp_full; 1747 int_mv mvp_full;
1748 int max_mv;
1940 1749
1941 /* Is the best so far sufficiently good that we cant justify doing 1750 /* Is the best so far sufficiently good that we cant justify doing
1942 * and new motion search. */ 1751 * and new motion search. */
1943 if (best_rd < label_mv_thresh) 1752 if (best_rd < label_mv_thresh)
1944 break; 1753 break;
1945 1754
1946 if (cpi->compressor_speed) { 1755 if (cpi->compressor_speed) {
1947 // use previous block's result as next block's MV predictor. 1756 // use previous block's result as next block's MV predictor.
1948 if (i > 0) { 1757 if (i > 0) {
1949 bsi->mvp.as_int = 1758 bsi->mvp.as_int =
1950 x->e_mbd.mode_info_context->bmi[i - 1].as_mv[0].as_int; 1759 x->e_mbd.mode_info_context->bmi[i - 1].as_mv[0].as_int;
1951 if (i == 2) 1760 if (i == 2)
1952 bsi->mvp.as_int = 1761 bsi->mvp.as_int =
1953 x->e_mbd.mode_info_context->bmi[i - 2].as_mv[0].as_int; 1762 x->e_mbd.mode_info_context->bmi[i - 2].as_mv[0].as_int;
1954 } 1763 }
1955 } 1764 }
1765 if (i == 0)
1766 max_mv = x->max_mv_context[mbmi->ref_frame[0]];
1767 else
1768 max_mv = MAX(abs(bsi->mvp.as_mv.row), abs(bsi->mvp.as_mv.col)) >> 3;
1956 if (cpi->sf.auto_mv_step_size && cpi->common.show_frame) { 1769 if (cpi->sf.auto_mv_step_size && cpi->common.show_frame) {
1957 // Take wtd average of the step_params based on the last frame's 1770 // Take wtd average of the step_params based on the last frame's
1958 // max mv magnitude and the best ref mvs of the current block for 1771 // max mv magnitude and the best ref mvs of the current block for
1959 // the given reference. 1772 // the given reference.
1960 if (i == 0) 1773 step_param = (vp9_init_search_range(cpi, max_mv) +
1961 step_param = (vp9_init_search_range( 1774 cpi->mv_step_param) >> 1;
1962 cpi, x->max_mv_context[mbmi->ref_frame[0]]) +
1963 cpi->mv_step_param) >> 1;
1964 else
1965 step_param = (vp9_init_search_range(
1966 cpi, MAX(abs(bsi->mvp.as_mv.row),
1967 abs(bsi->mvp.as_mv.col)) >> 3) +
1968 cpi->mv_step_param) >> 1;
1969 } else { 1775 } else {
1970 step_param = cpi->mv_step_param; 1776 step_param = cpi->mv_step_param;
1971 } 1777 }
1972 1778
1973 further_steps = (MAX_MVSEARCH_STEPS - 1) - step_param; 1779 further_steps = (MAX_MVSEARCH_STEPS - 1) - step_param;
1974 1780
1975 mvp_full.as_mv.row = bsi->mvp.as_mv.row >> 3; 1781 mvp_full.as_mv.row = bsi->mvp.as_mv.row >> 3;
1976 mvp_full.as_mv.col = bsi->mvp.as_mv.col >> 3; 1782 mvp_full.as_mv.col = bsi->mvp.as_mv.col >> 3;
1977 1783
1978 // adjust src pointer for this block 1784 // adjust src pointer for this block
1979 mi_buf_shift(x, i); 1785 mi_buf_shift(x, i);
1980 bestsme = vp9_full_pixel_diamond(cpi, x, &mvp_full, step_param, 1786 if (cpi->sf.search_method == HEX) {
1981 sadpb, further_steps, 0, v_fn_ptr, 1787 bestsme = vp9_hex_search(x, &mvp_full,
1982 bsi->ref_mv, &mode_mv[NEWMV]); 1788 step_param,
1789 sadpb, 1, v_fn_ptr, 1,
1790 bsi->ref_mv, &mode_mv[NEWMV]);
1791 } else if (cpi->sf.search_method == SQUARE) {
1792 bestsme = vp9_square_search(x, &mvp_full,
1793 step_param,
1794 sadpb, 1, v_fn_ptr, 1,
1795 bsi->ref_mv, &mode_mv[NEWMV]);
1796 } else if (cpi->sf.search_method == BIGDIA) {
1797 bestsme = vp9_bigdia_search(x, &mvp_full,
1798 step_param,
1799 sadpb, 1, v_fn_ptr, 1,
1800 bsi->ref_mv, &mode_mv[NEWMV]);
1801 } else {
1802 bestsme = vp9_full_pixel_diamond(cpi, x, &mvp_full, step_param,
1803 sadpb, further_steps, 0, v_fn_ptr,
1804 bsi->ref_mv, &mode_mv[NEWMV]);
1805 }
1983 1806
1984 // Should we do a full search (best quality only) 1807 // Should we do a full search (best quality only)
1985 if (cpi->compressor_speed == 0) { 1808 if (cpi->compressor_speed == 0) {
1986 /* Check if mvp_full is within the range. */ 1809 /* Check if mvp_full is within the range. */
1987 clamp_mv(&mvp_full, x->mv_col_min, x->mv_col_max, 1810 clamp_mv(&mvp_full.as_mv, x->mv_col_min, x->mv_col_max,
1988 x->mv_row_min, x->mv_row_max); 1811 x->mv_row_min, x->mv_row_max);
1989 1812
1990 thissme = cpi->full_search_sad(x, &mvp_full, 1813 thissme = cpi->full_search_sad(x, &mvp_full,
1991 sadpb, 16, v_fn_ptr, 1814 sadpb, 16, v_fn_ptr,
1992 x->nmvjointcost, x->mvcost, 1815 x->nmvjointcost, x->mvcost,
1993 bsi->ref_mv, i); 1816 bsi->ref_mv, i);
1994 1817
1995 if (thissme < bestsme) { 1818 if (thissme < bestsme) {
1996 bestsme = thissme; 1819 bestsme = thissme;
1997 mode_mv[NEWMV].as_int = 1820 mode_mv[NEWMV].as_int =
1998 x->e_mbd.mode_info_context->bmi[i].as_mv[0].as_int; 1821 x->e_mbd.mode_info_context->bmi[i].as_mv[0].as_int;
1999 } else { 1822 } else {
2000 /* The full search result is actually worse so re-instate the 1823 /* The full search result is actually worse so re-instate the
2001 * previous best vector */ 1824 * previous best vector */
2002 x->e_mbd.mode_info_context->bmi[i].as_mv[0].as_int = 1825 x->e_mbd.mode_info_context->bmi[i].as_mv[0].as_int =
2003 mode_mv[NEWMV].as_int; 1826 mode_mv[NEWMV].as_int;
2004 } 1827 }
2005 } 1828 }
2006 1829
2007 if (bestsme < INT_MAX) { 1830 if (bestsme < INT_MAX) {
2008 int distortion; 1831 int distortion;
2009 unsigned int sse; 1832 unsigned int sse;
2010 cpi->find_fractional_mv_step(x, &mode_mv[NEWMV], 1833 cpi->find_fractional_mv_step(x, &mode_mv[NEWMV],
2011 bsi->ref_mv, x->errorperbit, v_fn_ptr, 1834 bsi->ref_mv, x->errorperbit, v_fn_ptr,
1835 0, cpi->sf.subpel_iters_per_step,
2012 x->nmvjointcost, x->mvcost, 1836 x->nmvjointcost, x->mvcost,
2013 &distortion, &sse); 1837 &distortion, &sse);
2014 1838
2015 // safe motion search result for use in compound prediction 1839 // safe motion search result for use in compound prediction
2016 seg_mvs[i][mbmi->ref_frame[0]].as_int = mode_mv[NEWMV].as_int; 1840 seg_mvs[i][mbmi->ref_frame[0]].as_int = mode_mv[NEWMV].as_int;
2017 } 1841 }
2018 1842
2019 // restore src pointers 1843 // restore src pointers
2020 mi_buf_restore(x, orig_src, orig_pre); 1844 mi_buf_restore(x, orig_src, orig_pre);
2021 } 1845 }
2022 1846
2023 if (mbmi->ref_frame[1] > 0 && this_mode == NEWMV && 1847 if (mbmi->ref_frame[1] > 0 && this_mode == NEWMV &&
2024 mbmi->interp_filter == vp9_switchable_interp[0]) { 1848 mbmi->interp_filter == EIGHTTAP) {
2025 if (seg_mvs[i][mbmi->ref_frame[1]].as_int == INVALID_MV || 1849 if (seg_mvs[i][mbmi->ref_frame[1]].as_int == INVALID_MV ||
2026 seg_mvs[i][mbmi->ref_frame[0]].as_int == INVALID_MV) 1850 seg_mvs[i][mbmi->ref_frame[0]].as_int == INVALID_MV)
2027 continue; 1851 continue;
2028 1852
2029 // adjust src pointers 1853 // adjust src pointers
2030 mi_buf_shift(x, i); 1854 mi_buf_shift(x, i);
2031 if (cpi->sf.comp_inter_joint_search_thresh <= bsize) { 1855 if (cpi->sf.comp_inter_joint_search_thresh <= bsize) {
2032 int rate_mv; 1856 int rate_mv;
2033 joint_motion_search(cpi, x, bsize, frame_mv[this_mode], 1857 joint_motion_search(cpi, x, bsize, frame_mv[this_mode],
2034 mi_row, mi_col, seg_mvs[i], 1858 mi_row, mi_col, seg_mvs[i],
(...skipping 90 matching lines...) Expand 10 before | Expand all | Expand 10 after
2125 1949
2126 if (bsi->rdstat[i][mode_idx].brdcost < best_rd) { 1950 if (bsi->rdstat[i][mode_idx].brdcost < best_rd) {
2127 mode_selected = this_mode; 1951 mode_selected = this_mode;
2128 best_rd = bsi->rdstat[i][mode_idx].brdcost; 1952 best_rd = bsi->rdstat[i][mode_idx].brdcost;
2129 } 1953 }
2130 } /*for each 4x4 mode*/ 1954 } /*for each 4x4 mode*/
2131 1955
2132 if (best_rd == INT64_MAX) { 1956 if (best_rd == INT64_MAX) {
2133 int iy, midx; 1957 int iy, midx;
2134 for (iy = i + 1; iy < 4; ++iy) 1958 for (iy = i + 1; iy < 4; ++iy)
2135 for (midx = 0; midx < VP9_INTER_MODES; ++midx) 1959 for (midx = 0; midx < INTER_MODES; ++midx)
2136 bsi->rdstat[iy][midx].brdcost = INT64_MAX; 1960 bsi->rdstat[iy][midx].brdcost = INT64_MAX;
2137 bsi->segment_rd = INT64_MAX; 1961 bsi->segment_rd = INT64_MAX;
2138 return; 1962 return;
2139 } 1963 }
2140 1964
2141 mode_idx = inter_mode_offset(mode_selected); 1965 mode_idx = inter_mode_offset(mode_selected);
2142 vpx_memcpy(t_above, bsi->rdstat[i][mode_idx].ta, sizeof(t_above)); 1966 vpx_memcpy(t_above, bsi->rdstat[i][mode_idx].ta, sizeof(t_above));
2143 vpx_memcpy(t_left, bsi->rdstat[i][mode_idx].tl, sizeof(t_left)); 1967 vpx_memcpy(t_left, bsi->rdstat[i][mode_idx].tl, sizeof(t_left));
2144 1968
2145 labels2mode(x, i, mode_selected, &mode_mv[mode_selected], 1969 labels2mode(x, i, mode_selected, &mode_mv[mode_selected],
2146 &second_mode_mv[mode_selected], frame_mv, seg_mvs[i], 1970 &second_mode_mv[mode_selected], frame_mv, seg_mvs[i],
2147 bsi->ref_mv, bsi->second_ref_mv, x->nmvjointcost, 1971 bsi->ref_mv, bsi->second_ref_mv, x->nmvjointcost,
2148 x->mvcost, cpi); 1972 x->mvcost, cpi);
2149 1973
2150 br += bsi->rdstat[i][mode_idx].brate; 1974 br += bsi->rdstat[i][mode_idx].brate;
2151 bd += bsi->rdstat[i][mode_idx].bdist; 1975 bd += bsi->rdstat[i][mode_idx].bdist;
2152 block_sse += bsi->rdstat[i][mode_idx].bsse; 1976 block_sse += bsi->rdstat[i][mode_idx].bsse;
2153 segmentyrate += bsi->rdstat[i][mode_idx].byrate; 1977 segmentyrate += bsi->rdstat[i][mode_idx].byrate;
2154 this_segment_rd += bsi->rdstat[i][mode_idx].brdcost; 1978 this_segment_rd += bsi->rdstat[i][mode_idx].brdcost;
2155 1979
2156 if (this_segment_rd > bsi->segment_rd) { 1980 if (this_segment_rd > bsi->segment_rd) {
2157 int iy, midx; 1981 int iy, midx;
2158 for (iy = i + 1; iy < 4; ++iy) 1982 for (iy = i + 1; iy < 4; ++iy)
2159 for (midx = 0; midx < VP9_INTER_MODES; ++midx) 1983 for (midx = 0; midx < INTER_MODES; ++midx)
2160 bsi->rdstat[iy][midx].brdcost = INT64_MAX; 1984 bsi->rdstat[iy][midx].brdcost = INT64_MAX;
2161 bsi->segment_rd = INT64_MAX; 1985 bsi->segment_rd = INT64_MAX;
2162 return; 1986 return;
2163 } 1987 }
2164 1988
2165 for (j = 1; j < num_4x4_blocks_high; ++j) 1989 for (j = 1; j < num_4x4_blocks_high; ++j)
2166 vpx_memcpy(&x->partition_info->bmi[i + j * 2], 1990 vpx_memcpy(&x->partition_info->bmi[i + j * 2],
2167 &x->partition_info->bmi[i], 1991 &x->partition_info->bmi[i],
2168 sizeof(x->partition_info->bmi[i])); 1992 sizeof(x->partition_info->bmi[i]));
2169 for (j = 1; j < num_4x4_blocks_wide; ++j) 1993 for (j = 1; j < num_4x4_blocks_wide; ++j)
(...skipping 27 matching lines...) Expand all
2197 BEST_SEG_INFO *bsi_buf, 2021 BEST_SEG_INFO *bsi_buf,
2198 int filter_idx, 2022 int filter_idx,
2199 int mi_row, int mi_col) { 2023 int mi_row, int mi_col) {
2200 int i; 2024 int i;
2201 BEST_SEG_INFO *bsi = bsi_buf + filter_idx; 2025 BEST_SEG_INFO *bsi = bsi_buf + filter_idx;
2202 MACROBLOCKD *xd = &x->e_mbd; 2026 MACROBLOCKD *xd = &x->e_mbd;
2203 MODE_INFO *mi = xd->mode_info_context; 2027 MODE_INFO *mi = xd->mode_info_context;
2204 MB_MODE_INFO *mbmi = &mi->mbmi; 2028 MB_MODE_INFO *mbmi = &mi->mbmi;
2205 int mode_idx; 2029 int mode_idx;
2206 2030
2207 vpx_memset(bsi, 0, sizeof(*bsi)); 2031 vp9_zero(*bsi);
2208 2032
2209 bsi->segment_rd = best_rd; 2033 bsi->segment_rd = best_rd;
2210 bsi->ref_mv = best_ref_mv; 2034 bsi->ref_mv = best_ref_mv;
2211 bsi->second_ref_mv = second_best_ref_mv; 2035 bsi->second_ref_mv = second_best_ref_mv;
2212 bsi->mvp.as_int = best_ref_mv->as_int; 2036 bsi->mvp.as_int = best_ref_mv->as_int;
2213 bsi->mvthresh = mvthresh; 2037 bsi->mvthresh = mvthresh;
2214 2038
2215 for (i = 0; i < 4; i++) 2039 for (i = 0; i < 4; i++)
2216 bsi->modes[i] = ZEROMV; 2040 bsi->modes[i] = ZEROMV;
2217 2041
(...skipping 10 matching lines...) Expand all
2228 xd->plane[0].eobs[i] = bsi->rdstat[i][mode_idx].eobs; 2052 xd->plane[0].eobs[i] = bsi->rdstat[i][mode_idx].eobs;
2229 x->partition_info->bmi[i].mode = bsi->modes[i]; 2053 x->partition_info->bmi[i].mode = bsi->modes[i];
2230 } 2054 }
2231 2055
2232 /* 2056 /*
2233 * used to set mbmi->mv.as_int 2057 * used to set mbmi->mv.as_int
2234 */ 2058 */
2235 *returntotrate = bsi->r; 2059 *returntotrate = bsi->r;
2236 *returndistortion = bsi->d; 2060 *returndistortion = bsi->d;
2237 *returnyrate = bsi->segment_yrate; 2061 *returnyrate = bsi->segment_yrate;
2238 *skippable = vp9_sby_is_skippable(&x->e_mbd, BLOCK_SIZE_SB8X8); 2062 *skippable = vp9_is_skippable_in_plane(&x->e_mbd, BLOCK_8X8, 0);
2239 *psse = bsi->sse; 2063 *psse = bsi->sse;
2240 mbmi->mode = bsi->modes[3]; 2064 mbmi->mode = bsi->modes[3];
2241 2065
2242 return bsi->segment_rd; 2066 return bsi->segment_rd;
2243 } 2067 }
2244 2068
2245 static void mv_pred(VP9_COMP *cpi, MACROBLOCK *x, 2069 static void mv_pred(VP9_COMP *cpi, MACROBLOCK *x,
2246 uint8_t *ref_y_buffer, int ref_y_stride, 2070 uint8_t *ref_y_buffer, int ref_y_stride,
2247 int ref_frame, BLOCK_SIZE_TYPE block_size ) { 2071 int ref_frame, BLOCK_SIZE block_size ) {
2248 MACROBLOCKD *xd = &x->e_mbd; 2072 MACROBLOCKD *xd = &x->e_mbd;
2249 MB_MODE_INFO *mbmi = &xd->mode_info_context->mbmi; 2073 MB_MODE_INFO *mbmi = &xd->mode_info_context->mbmi;
2250 int_mv this_mv; 2074 int_mv this_mv;
2251 int i; 2075 int i;
2252 int zero_seen = 0; 2076 int zero_seen = 0;
2253 int best_index = 0; 2077 int best_index = 0;
2254 int best_sad = INT_MAX; 2078 int best_sad = INT_MAX;
2255 int this_sad = INT_MAX; 2079 int this_sad = INT_MAX;
2256 unsigned int max_mv = 0; 2080 unsigned int max_mv = 0;
2257 2081
(...skipping 32 matching lines...) Expand 10 before | Expand all | Expand 10 after
2290 x->mv_best_ref_index[ref_frame] = best_index; 2114 x->mv_best_ref_index[ref_frame] = best_index;
2291 x->max_mv_context[ref_frame] = max_mv; 2115 x->max_mv_context[ref_frame] = max_mv;
2292 } 2116 }
2293 2117
2294 static void estimate_ref_frame_costs(VP9_COMP *cpi, int segment_id, 2118 static void estimate_ref_frame_costs(VP9_COMP *cpi, int segment_id,
2295 unsigned int *ref_costs_single, 2119 unsigned int *ref_costs_single,
2296 unsigned int *ref_costs_comp, 2120 unsigned int *ref_costs_comp,
2297 vp9_prob *comp_mode_p) { 2121 vp9_prob *comp_mode_p) {
2298 VP9_COMMON *const cm = &cpi->common; 2122 VP9_COMMON *const cm = &cpi->common;
2299 MACROBLOCKD *const xd = &cpi->mb.e_mbd; 2123 MACROBLOCKD *const xd = &cpi->mb.e_mbd;
2300 int seg_ref_active = vp9_segfeature_active(&xd->seg, segment_id, 2124 int seg_ref_active = vp9_segfeature_active(&cm->seg, segment_id,
2301 SEG_LVL_REF_FRAME); 2125 SEG_LVL_REF_FRAME);
2302 if (seg_ref_active) { 2126 if (seg_ref_active) {
2303 vpx_memset(ref_costs_single, 0, MAX_REF_FRAMES * sizeof(*ref_costs_single)); 2127 vpx_memset(ref_costs_single, 0, MAX_REF_FRAMES * sizeof(*ref_costs_single));
2304 vpx_memset(ref_costs_comp, 0, MAX_REF_FRAMES * sizeof(*ref_costs_comp)); 2128 vpx_memset(ref_costs_comp, 0, MAX_REF_FRAMES * sizeof(*ref_costs_comp));
2305 *comp_mode_p = 128; 2129 *comp_mode_p = 128;
2306 } else { 2130 } else {
2307 vp9_prob intra_inter_p = vp9_get_pred_prob_intra_inter(cm, xd); 2131 vp9_prob intra_inter_p = vp9_get_pred_prob_intra_inter(cm, xd);
2308 vp9_prob comp_inter_p = 128; 2132 vp9_prob comp_inter_p = 128;
2309 2133
2310 if (cm->comp_pred_mode == HYBRID_PREDICTION) { 2134 if (cm->comp_pred_mode == HYBRID_PREDICTION) {
(...skipping 40 matching lines...) Expand 10 before | Expand all | Expand 10 after
2351 } 2175 }
2352 } 2176 }
2353 } 2177 }
2354 2178
2355 static void store_coding_context(MACROBLOCK *x, PICK_MODE_CONTEXT *ctx, 2179 static void store_coding_context(MACROBLOCK *x, PICK_MODE_CONTEXT *ctx,
2356 int mode_index, 2180 int mode_index,
2357 PARTITION_INFO *partition, 2181 PARTITION_INFO *partition,
2358 int_mv *ref_mv, 2182 int_mv *ref_mv,
2359 int_mv *second_ref_mv, 2183 int_mv *second_ref_mv,
2360 int64_t comp_pred_diff[NB_PREDICTION_TYPES], 2184 int64_t comp_pred_diff[NB_PREDICTION_TYPES],
2361 int64_t txfm_size_diff[NB_TXFM_MODES], 2185 int64_t tx_size_diff[TX_MODES],
2362 int64_t best_filter_diff[VP9_SWITCHABLE_FILTERS + 1]) { 2186 int64_t best_filter_diff[SWITCHABLE_FILTERS + 1]) {
2363 MACROBLOCKD *const xd = &x->e_mbd; 2187 MACROBLOCKD *const xd = &x->e_mbd;
2364 2188
2365 // Take a snapshot of the coding context so it can be 2189 // Take a snapshot of the coding context so it can be
2366 // restored if we decide to encode this way 2190 // restored if we decide to encode this way
2367 ctx->skip = x->skip; 2191 ctx->skip = x->skip;
2368 ctx->best_mode_index = mode_index; 2192 ctx->best_mode_index = mode_index;
2369 ctx->mic = *xd->mode_info_context; 2193 ctx->mic = *xd->mode_info_context;
2370 2194
2371 if (partition) 2195 if (partition)
2372 ctx->partition_info = *partition; 2196 ctx->partition_info = *partition;
2373 2197
2374 ctx->best_ref_mv.as_int = ref_mv->as_int; 2198 ctx->best_ref_mv.as_int = ref_mv->as_int;
2375 ctx->second_best_ref_mv.as_int = second_ref_mv->as_int; 2199 ctx->second_best_ref_mv.as_int = second_ref_mv->as_int;
2376 2200
2377 ctx->single_pred_diff = (int)comp_pred_diff[SINGLE_PREDICTION_ONLY]; 2201 ctx->single_pred_diff = (int)comp_pred_diff[SINGLE_PREDICTION_ONLY];
2378 ctx->comp_pred_diff = (int)comp_pred_diff[COMP_PREDICTION_ONLY]; 2202 ctx->comp_pred_diff = (int)comp_pred_diff[COMP_PREDICTION_ONLY];
2379 ctx->hybrid_pred_diff = (int)comp_pred_diff[HYBRID_PREDICTION]; 2203 ctx->hybrid_pred_diff = (int)comp_pred_diff[HYBRID_PREDICTION];
2380 2204
2381 // FIXME(rbultje) does this memcpy the whole array? I believe sizeof() 2205 // FIXME(rbultje) does this memcpy the whole array? I believe sizeof()
2382 // doesn't actually work this way 2206 // doesn't actually work this way
2383 memcpy(ctx->txfm_rd_diff, txfm_size_diff, sizeof(ctx->txfm_rd_diff)); 2207 memcpy(ctx->tx_rd_diff, tx_size_diff, sizeof(ctx->tx_rd_diff));
2384 memcpy(ctx->best_filter_diff, best_filter_diff, 2208 memcpy(ctx->best_filter_diff, best_filter_diff,
2385 sizeof(*best_filter_diff) * (VP9_SWITCHABLE_FILTERS + 1)); 2209 sizeof(*best_filter_diff) * (SWITCHABLE_FILTERS + 1));
2386 } 2210 }
2387 2211
2388 static void setup_pred_block(const MACROBLOCKD *xd, 2212 static void setup_pred_block(const MACROBLOCKD *xd,
2389 struct buf_2d dst[MAX_MB_PLANE], 2213 struct buf_2d dst[MAX_MB_PLANE],
2390 const YV12_BUFFER_CONFIG *src, 2214 const YV12_BUFFER_CONFIG *src,
2391 int mi_row, int mi_col, 2215 int mi_row, int mi_col,
2392 const struct scale_factors *scale, 2216 const struct scale_factors *scale,
2393 const struct scale_factors *scale_uv) { 2217 const struct scale_factors *scale_uv) {
2394 int i; 2218 int i;
2395 2219
(...skipping 10 matching lines...) Expand all
2406 // TODO(jkoleszar): Make scale factors per-plane data 2230 // TODO(jkoleszar): Make scale factors per-plane data
2407 for (i = 0; i < MAX_MB_PLANE; i++) { 2231 for (i = 0; i < MAX_MB_PLANE; i++) {
2408 setup_pred_plane(dst + i, dst[i].buf, dst[i].stride, mi_row, mi_col, 2232 setup_pred_plane(dst + i, dst[i].buf, dst[i].stride, mi_row, mi_col,
2409 i ? scale_uv : scale, 2233 i ? scale_uv : scale,
2410 xd->plane[i].subsampling_x, xd->plane[i].subsampling_y); 2234 xd->plane[i].subsampling_x, xd->plane[i].subsampling_y);
2411 } 2235 }
2412 } 2236 }
2413 2237
2414 static void setup_buffer_inter(VP9_COMP *cpi, MACROBLOCK *x, 2238 static void setup_buffer_inter(VP9_COMP *cpi, MACROBLOCK *x,
2415 int idx, MV_REFERENCE_FRAME frame_type, 2239 int idx, MV_REFERENCE_FRAME frame_type,
2416 BLOCK_SIZE_TYPE block_size, 2240 BLOCK_SIZE block_size,
2417 int mi_row, int mi_col, 2241 int mi_row, int mi_col,
2418 int_mv frame_nearest_mv[MAX_REF_FRAMES], 2242 int_mv frame_nearest_mv[MAX_REF_FRAMES],
2419 int_mv frame_near_mv[MAX_REF_FRAMES], 2243 int_mv frame_near_mv[MAX_REF_FRAMES],
2420 struct buf_2d yv12_mb[4][MAX_MB_PLANE], 2244 struct buf_2d yv12_mb[4][MAX_MB_PLANE],
2421 struct scale_factors scale[MAX_REF_FRAMES]) { 2245 struct scale_factors scale[MAX_REF_FRAMES]) {
2422 VP9_COMMON *cm = &cpi->common; 2246 VP9_COMMON *cm = &cpi->common;
2423 YV12_BUFFER_CONFIG *yv12 = &cm->yv12_fb[cpi->common.ref_frame_map[idx]]; 2247 YV12_BUFFER_CONFIG *yv12 = &cm->yv12_fb[cpi->common.ref_frame_map[idx]];
2424 MACROBLOCKD *const xd = &x->e_mbd; 2248 MACROBLOCKD *const xd = &x->e_mbd;
2425 MB_MODE_INFO *const mbmi = &xd->mode_info_context->mbmi; 2249 MB_MODE_INFO *const mbmi = &xd->mode_info_context->mbmi;
2426 2250
2427 // set up scaling factors 2251 // set up scaling factors
2428 scale[frame_type] = cpi->common.active_ref_scale[frame_type - 1]; 2252 scale[frame_type] = cpi->common.active_ref_scale[frame_type - 1];
2429 2253
2430 scale[frame_type].x_offset_q4 = 2254 scale[frame_type].x_offset_q4 =
2431 ROUND_POWER_OF_TWO(mi_col * MI_SIZE * scale[frame_type].x_scale_fp, 2255 ROUND_POWER_OF_TWO(mi_col * MI_SIZE * scale[frame_type].x_scale_fp,
2432 VP9_REF_SCALE_SHIFT) & 0xf; 2256 REF_SCALE_SHIFT) & 0xf;
2433 scale[frame_type].y_offset_q4 = 2257 scale[frame_type].y_offset_q4 =
2434 ROUND_POWER_OF_TWO(mi_row * MI_SIZE * scale[frame_type].y_scale_fp, 2258 ROUND_POWER_OF_TWO(mi_row * MI_SIZE * scale[frame_type].y_scale_fp,
2435 VP9_REF_SCALE_SHIFT) & 0xf; 2259 REF_SCALE_SHIFT) & 0xf;
2436 2260
2437 // TODO(jkoleszar): Is the UV buffer ever used here? If so, need to make this 2261 // TODO(jkoleszar): Is the UV buffer ever used here? If so, need to make this
2438 // use the UV scaling factors. 2262 // use the UV scaling factors.
2439 setup_pred_block(xd, yv12_mb[frame_type], yv12, mi_row, mi_col, 2263 setup_pred_block(xd, yv12_mb[frame_type], yv12, mi_row, mi_col,
2440 &scale[frame_type], &scale[frame_type]); 2264 &scale[frame_type], &scale[frame_type]);
2441 2265
2442 // Gets an initial list of candidate vectors from neighbours and orders them 2266 // Gets an initial list of candidate vectors from neighbours and orders them
2443 vp9_find_mv_refs(&cpi->common, xd, xd->mode_info_context, 2267 vp9_find_mv_refs(&cpi->common, xd, xd->mode_info_context,
2444 xd->prev_mode_info_context, 2268 xd->prev_mode_info_context,
2445 frame_type, 2269 frame_type,
2446 mbmi->ref_mvs[frame_type], 2270 mbmi->ref_mvs[frame_type], mi_row, mi_col);
2447 cpi->common.ref_frame_sign_bias);
2448 2271
2449 // Candidate refinement carried out at encoder and decoder 2272 // Candidate refinement carried out at encoder and decoder
2450 vp9_find_best_ref_mvs(xd, 2273 vp9_find_best_ref_mvs(xd,
2451 mbmi->ref_mvs[frame_type], 2274 mbmi->ref_mvs[frame_type],
2452 &frame_nearest_mv[frame_type], 2275 &frame_nearest_mv[frame_type],
2453 &frame_near_mv[frame_type]); 2276 &frame_near_mv[frame_type]);
2454 2277
2455 // Further refinement that is encode side only to test the top few candidates 2278 // Further refinement that is encode side only to test the top few candidates
2456 // in full and choose the best as the centre point for subsequent searches. 2279 // in full and choose the best as the centre point for subsequent searches.
2457 // The current implementation doesn't support scaling. 2280 // The current implementation doesn't support scaling.
2458 if (scale[frame_type].x_scale_fp == VP9_REF_NO_SCALE && 2281 if (!vp9_is_scaled(&scale[frame_type]))
2459 scale[frame_type].y_scale_fp == VP9_REF_NO_SCALE)
2460 mv_pred(cpi, x, yv12_mb[frame_type][0].buf, yv12->y_stride, 2282 mv_pred(cpi, x, yv12_mb[frame_type][0].buf, yv12->y_stride,
2461 frame_type, block_size); 2283 frame_type, block_size);
2462 } 2284 }
2463 2285
2464 static YV12_BUFFER_CONFIG *get_scaled_ref_frame(VP9_COMP *cpi, int ref_frame) { 2286 static YV12_BUFFER_CONFIG *get_scaled_ref_frame(VP9_COMP *cpi, int ref_frame) {
2465 YV12_BUFFER_CONFIG *scaled_ref_frame = NULL; 2287 YV12_BUFFER_CONFIG *scaled_ref_frame = NULL;
2466 int fb = get_ref_frame_idx(cpi, ref_frame); 2288 int fb = get_ref_frame_idx(cpi, ref_frame);
2467 if (cpi->scaled_ref_idx[fb] != cpi->common.ref_frame_map[fb]) 2289 if (cpi->scaled_ref_idx[fb] != cpi->common.ref_frame_map[fb])
2468 scaled_ref_frame = &cpi->common.yv12_fb[cpi->scaled_ref_idx[fb]]; 2290 scaled_ref_frame = &cpi->common.yv12_fb[cpi->scaled_ref_idx[fb]];
2469 return scaled_ref_frame; 2291 return scaled_ref_frame;
2470 } 2292 }
2471 2293
2472 static INLINE int get_switchable_rate(VP9_COMMON *cm, MACROBLOCK *x) { 2294 static INLINE int get_switchable_rate(const MACROBLOCK *x) {
2473 MACROBLOCKD *xd = &x->e_mbd; 2295 const MACROBLOCKD *const xd = &x->e_mbd;
2474 MB_MODE_INFO *const mbmi = &xd->mode_info_context->mbmi; 2296 const MB_MODE_INFO *const mbmi = &xd->mode_info_context->mbmi;
2475 2297 const int ctx = vp9_get_pred_context_switchable_interp(xd);
2476 const int c = vp9_get_pred_context_switchable_interp(xd); 2298 return SWITCHABLE_INTERP_RATE_FACTOR *
2477 const int m = vp9_switchable_interp_map[mbmi->interp_filter]; 2299 x->switchable_interp_costs[ctx][mbmi->interp_filter];
2478 return SWITCHABLE_INTERP_RATE_FACTOR * x->switchable_interp_costs[c][m];
2479 } 2300 }
2480 2301
2481 static void single_motion_search(VP9_COMP *cpi, MACROBLOCK *x, 2302 static void single_motion_search(VP9_COMP *cpi, MACROBLOCK *x,
2482 BLOCK_SIZE_TYPE bsize, 2303 BLOCK_SIZE bsize,
2483 int mi_row, int mi_col, 2304 int mi_row, int mi_col,
2484 int_mv *tmp_mv, int *rate_mv) { 2305 int_mv *tmp_mv, int *rate_mv) {
2485 MACROBLOCKD *xd = &x->e_mbd; 2306 MACROBLOCKD *xd = &x->e_mbd;
2486 VP9_COMMON *cm = &cpi->common; 2307 VP9_COMMON *cm = &cpi->common;
2487 MB_MODE_INFO *mbmi = &xd->mode_info_context->mbmi; 2308 MB_MODE_INFO *mbmi = &xd->mode_info_context->mbmi;
2488 struct buf_2d backup_yv12[MAX_MB_PLANE] = {{0}}; 2309 struct buf_2d backup_yv12[MAX_MB_PLANE] = {{0}};
2489 int bestsme = INT_MAX; 2310 int bestsme = INT_MAX;
2490 int further_steps, step_param; 2311 int further_steps, step_param;
2491 int sadpb = x->sadperbit16; 2312 int sadpb = x->sadperbit16;
2492 int_mv mvp_full; 2313 int_mv mvp_full;
2493 int ref = mbmi->ref_frame[0]; 2314 int ref = mbmi->ref_frame[0];
2494 int_mv ref_mv = mbmi->ref_mvs[ref][0]; 2315 int_mv ref_mv = mbmi->ref_mvs[ref][0];
2495 const BLOCK_SIZE_TYPE block_size = get_plane_block_size(bsize, &xd->plane[0]); 2316 const BLOCK_SIZE block_size = get_plane_block_size(bsize, &xd->plane[0]);
2496 2317
2497 int tmp_col_min = x->mv_col_min; 2318 int tmp_col_min = x->mv_col_min;
2498 int tmp_col_max = x->mv_col_max; 2319 int tmp_col_max = x->mv_col_max;
2499 int tmp_row_min = x->mv_row_min; 2320 int tmp_row_min = x->mv_row_min;
2500 int tmp_row_max = x->mv_row_max; 2321 int tmp_row_max = x->mv_row_max;
2501 2322
2502 YV12_BUFFER_CONFIG *scaled_ref_frame = get_scaled_ref_frame(cpi, ref); 2323 YV12_BUFFER_CONFIG *scaled_ref_frame = get_scaled_ref_frame(cpi, ref);
2503 2324
2504 if (scaled_ref_frame) { 2325 if (scaled_ref_frame) {
2505 int i; 2326 int i;
2506 // Swap out the reference frame for a version that's been scaled to 2327 // Swap out the reference frame for a version that's been scaled to
2507 // match the resolution of the current frame, allowing the existing 2328 // match the resolution of the current frame, allowing the existing
2508 // motion search code to be used without additional modifications. 2329 // motion search code to be used without additional modifications.
2509 for (i = 0; i < MAX_MB_PLANE; i++) 2330 for (i = 0; i < MAX_MB_PLANE; i++)
2510 backup_yv12[i] = xd->plane[i].pre[0]; 2331 backup_yv12[i] = xd->plane[i].pre[0];
2511 2332
2512 setup_pre_planes(xd, 0, scaled_ref_frame, mi_row, mi_col, NULL); 2333 setup_pre_planes(xd, 0, scaled_ref_frame, mi_row, mi_col, NULL);
2513 } 2334 }
2514 2335
2515 vp9_clamp_mv_min_max(x, &ref_mv); 2336 vp9_clamp_mv_min_max(x, &ref_mv.as_mv);
2516 2337
2517 // Adjust search parameters based on small partitions' result. 2338 // Adjust search parameters based on small partitions' result.
2518 if (x->fast_ms) { 2339 if (x->fast_ms) {
2519 // && abs(mvp_full.as_mv.row - x->pred_mv.as_mv.row) < 24 && 2340 // && abs(mvp_full.as_mv.row - x->pred_mv.as_mv.row) < 24 &&
2520 // abs(mvp_full.as_mv.col - x->pred_mv.as_mv.col) < 24) { 2341 // abs(mvp_full.as_mv.col - x->pred_mv.as_mv.col) < 24) {
2521 // adjust search range 2342 // adjust search range
2522 step_param = 6; 2343 step_param = 6;
2523 if (x->fast_ms > 1) 2344 if (x->fast_ms > 1)
2524 step_param = 8; 2345 step_param = 8;
2525 2346
(...skipping 21 matching lines...) Expand all
2547 mvp_full.as_int = 2368 mvp_full.as_int =
2548 mbmi->ref_mvs[ref][x->mv_best_ref_index[ref]].as_int; 2369 mbmi->ref_mvs[ref][x->mv_best_ref_index[ref]].as_int;
2549 } 2370 }
2550 2371
2551 mvp_full.as_mv.col >>= 3; 2372 mvp_full.as_mv.col >>= 3;
2552 mvp_full.as_mv.row >>= 3; 2373 mvp_full.as_mv.row >>= 3;
2553 2374
2554 // Further step/diamond searches as necessary 2375 // Further step/diamond searches as necessary
2555 further_steps = (cpi->sf.max_step_search_steps - 1) - step_param; 2376 further_steps = (cpi->sf.max_step_search_steps - 1) - step_param;
2556 2377
2557 bestsme = vp9_full_pixel_diamond(cpi, x, &mvp_full, step_param, 2378 if (cpi->sf.search_method == HEX) {
2558 sadpb, further_steps, 1, 2379 bestsme = vp9_hex_search(x, &mvp_full,
2559 &cpi->fn_ptr[block_size], 2380 step_param,
2560 &ref_mv, tmp_mv); 2381 sadpb, 1,
2382 &cpi->fn_ptr[block_size], 1,
2383 &ref_mv, tmp_mv);
2384 } else if (cpi->sf.search_method == SQUARE) {
2385 bestsme = vp9_square_search(x, &mvp_full,
2386 step_param,
2387 sadpb, 1,
2388 &cpi->fn_ptr[block_size], 1,
2389 &ref_mv, tmp_mv);
2390 } else if (cpi->sf.search_method == BIGDIA) {
2391 bestsme = vp9_bigdia_search(x, &mvp_full,
2392 step_param,
2393 sadpb, 1,
2394 &cpi->fn_ptr[block_size], 1,
2395 &ref_mv, tmp_mv);
2396 } else {
2397 bestsme = vp9_full_pixel_diamond(cpi, x, &mvp_full, step_param,
2398 sadpb, further_steps, 1,
2399 &cpi->fn_ptr[block_size],
2400 &ref_mv, tmp_mv);
2401 }
2561 2402
2562 x->mv_col_min = tmp_col_min; 2403 x->mv_col_min = tmp_col_min;
2563 x->mv_col_max = tmp_col_max; 2404 x->mv_col_max = tmp_col_max;
2564 x->mv_row_min = tmp_row_min; 2405 x->mv_row_min = tmp_row_min;
2565 x->mv_row_max = tmp_row_max; 2406 x->mv_row_max = tmp_row_max;
2566 2407
2567 if (bestsme < INT_MAX) { 2408 if (bestsme < INT_MAX) {
2568 int dis; /* TODO: use dis in distortion calculation later. */ 2409 int dis; /* TODO: use dis in distortion calculation later. */
2569 unsigned int sse; 2410 unsigned int sse;
2570 cpi->find_fractional_mv_step(x, tmp_mv, &ref_mv, 2411 cpi->find_fractional_mv_step(x, tmp_mv, &ref_mv,
2571 x->errorperbit, 2412 x->errorperbit,
2572 &cpi->fn_ptr[block_size], 2413 &cpi->fn_ptr[block_size],
2414 0, cpi->sf.subpel_iters_per_step,
2573 x->nmvjointcost, x->mvcost, 2415 x->nmvjointcost, x->mvcost,
2574 &dis, &sse); 2416 &dis, &sse);
2575 } 2417 }
2576 *rate_mv = vp9_mv_bit_cost(tmp_mv, &ref_mv, 2418 *rate_mv = vp9_mv_bit_cost(tmp_mv, &ref_mv,
2577 x->nmvjointcost, x->mvcost, 2419 x->nmvjointcost, x->mvcost,
2578 96, xd->allow_high_precision_mv); 2420 96);
2579 if (scaled_ref_frame) { 2421 if (scaled_ref_frame) {
2580 int i; 2422 int i;
2581 for (i = 0; i < MAX_MB_PLANE; i++) 2423 for (i = 0; i < MAX_MB_PLANE; i++)
2582 xd->plane[i].pre[0] = backup_yv12[i]; 2424 xd->plane[i].pre[0] = backup_yv12[i];
2583 } 2425 }
2584 } 2426 }
2585 2427
2586 static void joint_motion_search(VP9_COMP *cpi, MACROBLOCK *x, 2428 static void joint_motion_search(VP9_COMP *cpi, MACROBLOCK *x,
2587 BLOCK_SIZE_TYPE bsize, 2429 BLOCK_SIZE bsize,
2588 int_mv *frame_mv, 2430 int_mv *frame_mv,
2589 int mi_row, int mi_col, 2431 int mi_row, int mi_col,
2590 int_mv single_newmv[MAX_REF_FRAMES], 2432 int_mv single_newmv[MAX_REF_FRAMES],
2591 int *rate_mv) { 2433 int *rate_mv) {
2592 int pw = 4 << b_width_log2(bsize), ph = 4 << b_height_log2(bsize); 2434 int pw = 4 << b_width_log2(bsize), ph = 4 << b_height_log2(bsize);
2593 MACROBLOCKD *xd = &x->e_mbd; 2435 MACROBLOCKD *xd = &x->e_mbd;
2594 MB_MODE_INFO *mbmi = &xd->mode_info_context->mbmi; 2436 MB_MODE_INFO *mbmi = &xd->mode_info_context->mbmi;
2595 int refs[2] = { mbmi->ref_frame[0], 2437 int refs[2] = { mbmi->ref_frame[0],
2596 (mbmi->ref_frame[1] < 0 ? 0 : mbmi->ref_frame[1]) }; 2438 (mbmi->ref_frame[1] < 0 ? 0 : mbmi->ref_frame[1]) };
2597 int_mv ref_mv[2]; 2439 int_mv ref_mv[2];
2598 const BLOCK_SIZE_TYPE block_size = get_plane_block_size(bsize, &xd->plane[0]); 2440 const BLOCK_SIZE block_size = get_plane_block_size(bsize, &xd->plane[0]);
2599 int ite; 2441 int ite;
2600 // Prediction buffer from second frame. 2442 // Prediction buffer from second frame.
2601 uint8_t *second_pred = vpx_memalign(16, pw * ph * sizeof(uint8_t)); 2443 uint8_t *second_pred = vpx_memalign(16, pw * ph * sizeof(uint8_t));
2602 2444
2603 // Do joint motion search in compound mode to get more accurate mv. 2445 // Do joint motion search in compound mode to get more accurate mv.
2604 struct buf_2d backup_yv12[MAX_MB_PLANE] = {{0}}; 2446 struct buf_2d backup_yv12[MAX_MB_PLANE] = {{0}};
2605 struct buf_2d backup_second_yv12[MAX_MB_PLANE] = {{0}}; 2447 struct buf_2d backup_second_yv12[MAX_MB_PLANE] = {{0}};
2606 struct buf_2d scaled_first_yv12; 2448 struct buf_2d scaled_first_yv12;
2607 int last_besterr[2] = {INT_MAX, INT_MAX}; 2449 int last_besterr[2] = {INT_MAX, INT_MAX};
2608 YV12_BUFFER_CONFIG *scaled_ref_frame[2] = {NULL, NULL}; 2450 YV12_BUFFER_CONFIG *scaled_ref_frame[2] = {NULL, NULL};
(...skipping 47 matching lines...) Expand 10 before | Expand all | Expand 10 after
2656 int id = ite % 2; 2498 int id = ite % 2;
2657 2499
2658 // Initialized here because of compiler problem in Visual Studio. 2500 // Initialized here because of compiler problem in Visual Studio.
2659 ref_yv12[0] = xd->plane[0].pre[0]; 2501 ref_yv12[0] = xd->plane[0].pre[0];
2660 ref_yv12[1] = xd->plane[0].pre[1]; 2502 ref_yv12[1] = xd->plane[0].pre[1];
2661 2503
2662 // Get pred block from second frame. 2504 // Get pred block from second frame.
2663 vp9_build_inter_predictor(ref_yv12[!id].buf, 2505 vp9_build_inter_predictor(ref_yv12[!id].buf,
2664 ref_yv12[!id].stride, 2506 ref_yv12[!id].stride,
2665 second_pred, pw, 2507 second_pred, pw,
2666 &frame_mv[refs[!id]], 2508 &frame_mv[refs[!id]].as_mv,
2667 &xd->scale_factor[!id], 2509 &xd->scale_factor[!id],
2668 pw, ph, 0, 2510 pw, ph, 0,
2669 &xd->subpix, MV_PRECISION_Q3); 2511 &xd->subpix, MV_PRECISION_Q3);
2670 2512
2671 // Compound motion search on first ref frame. 2513 // Compound motion search on first ref frame.
2672 if (id) 2514 if (id)
2673 xd->plane[0].pre[0] = ref_yv12[id]; 2515 xd->plane[0].pre[0] = ref_yv12[id];
2674 vp9_clamp_mv_min_max(x, &ref_mv[id]); 2516 vp9_clamp_mv_min_max(x, &ref_mv[id].as_mv);
2675 2517
2676 // Use mv result from single mode as mvp. 2518 // Use mv result from single mode as mvp.
2677 tmp_mv.as_int = frame_mv[refs[id]].as_int; 2519 tmp_mv.as_int = frame_mv[refs[id]].as_int;
2678 2520
2679 tmp_mv.as_mv.col >>= 3; 2521 tmp_mv.as_mv.col >>= 3;
2680 tmp_mv.as_mv.row >>= 3; 2522 tmp_mv.as_mv.row >>= 3;
2681 2523
2682 // Small-range full-pixel motion search 2524 // Small-range full-pixel motion search
2683 bestsme = vp9_refining_search_8p_c(x, &tmp_mv, sadpb, 2525 bestsme = vp9_refining_search_8p_c(x, &tmp_mv, sadpb,
2684 search_range, 2526 search_range,
2685 &cpi->fn_ptr[block_size], 2527 &cpi->fn_ptr[block_size],
2686 x->nmvjointcost, x->mvcost, 2528 x->nmvjointcost, x->mvcost,
2687 &ref_mv[id], second_pred, 2529 &ref_mv[id], second_pred,
2688 pw, ph); 2530 pw, ph);
2689 2531
2690 x->mv_col_min = tmp_col_min; 2532 x->mv_col_min = tmp_col_min;
2691 x->mv_col_max = tmp_col_max; 2533 x->mv_col_max = tmp_col_max;
2692 x->mv_row_min = tmp_row_min; 2534 x->mv_row_min = tmp_row_min;
2693 x->mv_row_max = tmp_row_max; 2535 x->mv_row_max = tmp_row_max;
2694 2536
2695 if (bestsme < INT_MAX) { 2537 if (bestsme < INT_MAX) {
2696 int dis; /* TODO: use dis in distortion calculation later. */ 2538 int dis; /* TODO: use dis in distortion calculation later. */
2697 unsigned int sse; 2539 unsigned int sse;
2698 2540
2699 bestsme = vp9_find_best_sub_pixel_comp(x, &tmp_mv, 2541 bestsme = cpi->find_fractional_mv_step_comp(
2700 &ref_mv[id], 2542 x, &tmp_mv,
2701 x->errorperbit, 2543 &ref_mv[id],
2702 &cpi->fn_ptr[block_size], 2544 x->errorperbit,
2703 x->nmvjointcost, x->mvcost, 2545 &cpi->fn_ptr[block_size],
2704 &dis, &sse, second_pred, 2546 0, cpi->sf.subpel_iters_per_step,
2705 pw, ph); 2547 x->nmvjointcost, x->mvcost,
2548 &dis, &sse, second_pred,
2549 pw, ph);
2706 } 2550 }
2707 2551
2708 if (id) 2552 if (id)
2709 xd->plane[0].pre[0] = scaled_first_yv12; 2553 xd->plane[0].pre[0] = scaled_first_yv12;
2710 2554
2711 if (bestsme < last_besterr[id]) { 2555 if (bestsme < last_besterr[id]) {
2712 frame_mv[refs[id]].as_int = tmp_mv.as_int; 2556 frame_mv[refs[id]].as_int = tmp_mv.as_int;
2713 last_besterr[id] = bestsme; 2557 last_besterr[id] = bestsme;
2714 } else { 2558 } else {
2715 break; 2559 break;
2716 } 2560 }
2717 } 2561 }
2718 2562
2719 // restore the predictor 2563 // restore the predictor
2720 if (scaled_ref_frame[0]) { 2564 if (scaled_ref_frame[0]) {
2721 int i; 2565 int i;
2722 for (i = 0; i < MAX_MB_PLANE; i++) 2566 for (i = 0; i < MAX_MB_PLANE; i++)
2723 xd->plane[i].pre[0] = backup_yv12[i]; 2567 xd->plane[i].pre[0] = backup_yv12[i];
2724 } 2568 }
2725 2569
2726 if (scaled_ref_frame[1]) { 2570 if (scaled_ref_frame[1]) {
2727 int i; 2571 int i;
2728 for (i = 0; i < MAX_MB_PLANE; i++) 2572 for (i = 0; i < MAX_MB_PLANE; i++)
2729 xd->plane[i].pre[1] = backup_second_yv12[i]; 2573 xd->plane[i].pre[1] = backup_second_yv12[i];
2730 } 2574 }
2731 *rate_mv = vp9_mv_bit_cost(&frame_mv[refs[0]], 2575 *rate_mv = vp9_mv_bit_cost(&frame_mv[refs[0]],
2732 &mbmi->ref_mvs[refs[0]][0], 2576 &mbmi->ref_mvs[refs[0]][0],
2733 x->nmvjointcost, x->mvcost, 96, 2577 x->nmvjointcost, x->mvcost, 96);
2734 x->e_mbd.allow_high_precision_mv);
2735 *rate_mv += vp9_mv_bit_cost(&frame_mv[refs[1]], 2578 *rate_mv += vp9_mv_bit_cost(&frame_mv[refs[1]],
2736 &mbmi->ref_mvs[refs[1]][0], 2579 &mbmi->ref_mvs[refs[1]][0],
2737 x->nmvjointcost, x->mvcost, 96, 2580 x->nmvjointcost, x->mvcost, 96);
2738 x->e_mbd.allow_high_precision_mv);
2739 2581
2740 vpx_free(second_pred); 2582 vpx_free(second_pred);
2741 } 2583 }
2742 2584
2743 static int64_t handle_inter_mode(VP9_COMP *cpi, MACROBLOCK *x, 2585 static int64_t handle_inter_mode(VP9_COMP *cpi, MACROBLOCK *x,
2744 BLOCK_SIZE_TYPE bsize, 2586 BLOCK_SIZE bsize,
2745 int64_t txfm_cache[], 2587 int64_t txfm_cache[],
2746 int *rate2, int64_t *distortion, 2588 int *rate2, int64_t *distortion,
2747 int *skippable, 2589 int *skippable,
2748 int *rate_y, int64_t *distortion_y, 2590 int *rate_y, int64_t *distortion_y,
2749 int *rate_uv, int64_t *distortion_uv, 2591 int *rate_uv, int64_t *distortion_uv,
2750 int *mode_excluded, int *disable_skip, 2592 int *mode_excluded, int *disable_skip,
2751 INTERPOLATIONFILTERTYPE *best_filter, 2593 INTERPOLATIONFILTERTYPE *best_filter,
2752 int_mv (*mode_mv)[MAX_REF_FRAMES], 2594 int_mv (*mode_mv)[MAX_REF_FRAMES],
2753 int mi_row, int mi_col, 2595 int mi_row, int mi_col,
2754 int_mv single_newmv[MAX_REF_FRAMES], 2596 int_mv single_newmv[MAX_REF_FRAMES],
2755 int64_t *psse, int64_t ref_best_rd) { 2597 int64_t *psse,
2598 const int64_t ref_best_rd) {
2756 VP9_COMMON *cm = &cpi->common; 2599 VP9_COMMON *cm = &cpi->common;
2757 MACROBLOCKD *xd = &x->e_mbd; 2600 MACROBLOCKD *xd = &x->e_mbd;
2758 MB_MODE_INFO *mbmi = &xd->mode_info_context->mbmi; 2601 MB_MODE_INFO *mbmi = &xd->mode_info_context->mbmi;
2759 const int is_comp_pred = (mbmi->ref_frame[1] > 0); 2602 const int is_comp_pred = (mbmi->ref_frame[1] > 0);
2760 const int num_refs = is_comp_pred ? 2 : 1; 2603 const int num_refs = is_comp_pred ? 2 : 1;
2761 const int this_mode = mbmi->mode; 2604 const int this_mode = mbmi->mode;
2762 int_mv *frame_mv = mode_mv[this_mode]; 2605 int_mv *frame_mv = mode_mv[this_mode];
2763 int i; 2606 int i;
2764 int refs[2] = { mbmi->ref_frame[0], 2607 int refs[2] = { mbmi->ref_frame[0],
2765 (mbmi->ref_frame[1] < 0 ? 0 : mbmi->ref_frame[1]) }; 2608 (mbmi->ref_frame[1] < 0 ? 0 : mbmi->ref_frame[1]) };
2766 int_mv cur_mv[2]; 2609 int_mv cur_mv[2];
2767 int64_t this_rd = 0; 2610 int64_t this_rd = 0;
2768 DECLARE_ALIGNED_ARRAY(16, uint8_t, tmp_buf, MAX_MB_PLANE * 64 * 64); 2611 DECLARE_ALIGNED_ARRAY(16, uint8_t, tmp_buf, MAX_MB_PLANE * 64 * 64);
2769 int pred_exists = 0; 2612 int pred_exists = 0;
2770 int interpolating_intpel_seen = 0;
2771 int intpel_mv; 2613 int intpel_mv;
2772 int64_t rd, best_rd = INT64_MAX; 2614 int64_t rd, best_rd = INT64_MAX;
2773 int best_needs_copy = 0; 2615 int best_needs_copy = 0;
2774 uint8_t *orig_dst[MAX_MB_PLANE]; 2616 uint8_t *orig_dst[MAX_MB_PLANE];
2775 int orig_dst_stride[MAX_MB_PLANE]; 2617 int orig_dst_stride[MAX_MB_PLANE];
2776 int rs = 0; 2618 int rs = 0;
2777 2619
2778 switch (this_mode) { 2620 if (this_mode == NEWMV) {
2779 int rate_mv; 2621 int rate_mv;
2780 case NEWMV: 2622 if (is_comp_pred) {
2781 if (is_comp_pred) { 2623 // Initialize mv using single prediction mode result.
2782 // Initialize mv using single prediction mode result. 2624 frame_mv[refs[0]].as_int = single_newmv[refs[0]].as_int;
2783 frame_mv[refs[0]].as_int = single_newmv[refs[0]].as_int; 2625 frame_mv[refs[1]].as_int = single_newmv[refs[1]].as_int;
2784 frame_mv[refs[1]].as_int = single_newmv[refs[1]].as_int;
2785 2626
2786 if (cpi->sf.comp_inter_joint_search_thresh <= bsize) { 2627 if (cpi->sf.comp_inter_joint_search_thresh <= bsize) {
2787 joint_motion_search(cpi, x, bsize, frame_mv, 2628 joint_motion_search(cpi, x, bsize, frame_mv,
2788 mi_row, mi_col, single_newmv, &rate_mv); 2629 mi_row, mi_col, single_newmv, &rate_mv);
2789 } else {
2790 rate_mv = vp9_mv_bit_cost(&frame_mv[refs[0]],
2791 &mbmi->ref_mvs[refs[0]][0],
2792 x->nmvjointcost, x->mvcost, 96,
2793 x->e_mbd.allow_high_precision_mv);
2794 rate_mv += vp9_mv_bit_cost(&frame_mv[refs[1]],
2795 &mbmi->ref_mvs[refs[1]][0],
2796 x->nmvjointcost, x->mvcost, 96,
2797 x->e_mbd.allow_high_precision_mv);
2798 }
2799 if (frame_mv[refs[0]].as_int == INVALID_MV ||
2800 frame_mv[refs[1]].as_int == INVALID_MV)
2801 return INT64_MAX;
2802 *rate2 += rate_mv;
2803 } else { 2630 } else {
2804 int_mv tmp_mv; 2631 rate_mv = vp9_mv_bit_cost(&frame_mv[refs[0]],
2805 single_motion_search(cpi, x, bsize, mi_row, mi_col, 2632 &mbmi->ref_mvs[refs[0]][0],
2806 &tmp_mv, &rate_mv); 2633 x->nmvjointcost, x->mvcost, 96);
2807 *rate2 += rate_mv; 2634 rate_mv += vp9_mv_bit_cost(&frame_mv[refs[1]],
2808 frame_mv[refs[0]].as_int = 2635 &mbmi->ref_mvs[refs[1]][0],
2809 xd->mode_info_context->bmi[0].as_mv[0].as_int = tmp_mv.as_int; 2636 x->nmvjointcost, x->mvcost, 96);
2810 single_newmv[refs[0]].as_int = tmp_mv.as_int;
2811 } 2637 }
2812 break; 2638 if (frame_mv[refs[0]].as_int == INVALID_MV ||
2813 case NEARMV: 2639 frame_mv[refs[1]].as_int == INVALID_MV)
2814 case NEARESTMV: 2640 return INT64_MAX;
2815 case ZEROMV: 2641 *rate2 += rate_mv;
2816 default: 2642 } else {
2817 break; 2643 int_mv tmp_mv;
2644 single_motion_search(cpi, x, bsize, mi_row, mi_col, &tmp_mv, &rate_mv);
2645 *rate2 += rate_mv;
2646 frame_mv[refs[0]].as_int =
2647 xd->mode_info_context->bmi[0].as_mv[0].as_int = tmp_mv.as_int;
2648 single_newmv[refs[0]].as_int = tmp_mv.as_int;
2649 }
2818 } 2650 }
2819 2651
2820 // if we're near/nearest and mv == 0,0, compare to zeromv 2652 // if we're near/nearest and mv == 0,0, compare to zeromv
2821 if ((this_mode == NEARMV || this_mode == NEARESTMV || this_mode == ZEROMV) && 2653 if ((this_mode == NEARMV || this_mode == NEARESTMV || this_mode == ZEROMV) &&
2822 frame_mv[refs[0]].as_int == 0 && 2654 frame_mv[refs[0]].as_int == 0 &&
2823 !vp9_segfeature_active(&xd->seg, mbmi->segment_id, SEG_LVL_SKIP) && 2655 !vp9_segfeature_active(&cm->seg, mbmi->segment_id, SEG_LVL_SKIP) &&
2824 (num_refs == 1 || frame_mv[refs[1]].as_int == 0)) { 2656 (num_refs == 1 || frame_mv[refs[1]].as_int == 0)) {
2825 int rfc = mbmi->mb_mode_context[mbmi->ref_frame[0]]; 2657 int rfc = mbmi->mode_context[mbmi->ref_frame[0]];
2826 int c1 = cost_mv_ref(cpi, NEARMV, rfc); 2658 int c1 = cost_mv_ref(cpi, NEARMV, rfc);
2827 int c2 = cost_mv_ref(cpi, NEARESTMV, rfc); 2659 int c2 = cost_mv_ref(cpi, NEARESTMV, rfc);
2828 int c3 = cost_mv_ref(cpi, ZEROMV, rfc); 2660 int c3 = cost_mv_ref(cpi, ZEROMV, rfc);
2829 2661
2830 if (this_mode == NEARMV) { 2662 if (this_mode == NEARMV) {
2831 if (c1 > c3) 2663 if (c1 > c3)
2832 return INT64_MAX; 2664 return INT64_MAX;
2833 } else if (this_mode == NEARESTMV) { 2665 } else if (this_mode == NEARESTMV) {
2834 if (c2 > c3) 2666 if (c2 > c3)
2835 return INT64_MAX; 2667 return INT64_MAX;
(...skipping 13 matching lines...) Expand all
2849 mode_mv[NEARMV][mbmi->ref_frame[0]].as_int == 0 && 2681 mode_mv[NEARMV][mbmi->ref_frame[0]].as_int == 0 &&
2850 mode_mv[NEARMV][mbmi->ref_frame[1]].as_int == 0)) 2682 mode_mv[NEARMV][mbmi->ref_frame[1]].as_int == 0))
2851 return INT64_MAX; 2683 return INT64_MAX;
2852 } 2684 }
2853 } 2685 }
2854 } 2686 }
2855 2687
2856 for (i = 0; i < num_refs; ++i) { 2688 for (i = 0; i < num_refs; ++i) {
2857 cur_mv[i] = frame_mv[refs[i]]; 2689 cur_mv[i] = frame_mv[refs[i]];
2858 // Clip "next_nearest" so that it does not extend to far out of image 2690 // Clip "next_nearest" so that it does not extend to far out of image
2859 if (this_mode == NEWMV) 2691 if (this_mode != NEWMV)
2860 assert(!clamp_mv2(&cur_mv[i], xd)); 2692 clamp_mv2(&cur_mv[i].as_mv, xd);
2861 else
2862 clamp_mv2(&cur_mv[i], xd);
2863 2693
2864 if (mv_check_bounds(x, &cur_mv[i])) 2694 if (mv_check_bounds(x, &cur_mv[i]))
2865 return INT64_MAX; 2695 return INT64_MAX;
2866 mbmi->mv[i].as_int = cur_mv[i].as_int; 2696 mbmi->mv[i].as_int = cur_mv[i].as_int;
2867 } 2697 }
2868 2698
2869 // do first prediction into the destination buffer. Do the next 2699 // do first prediction into the destination buffer. Do the next
2870 // prediction into a temporary buffer. Then keep track of which one 2700 // prediction into a temporary buffer. Then keep track of which one
2871 // of these currently holds the best predictor, and use the other 2701 // of these currently holds the best predictor, and use the other
2872 // one for future predictions. In the end, copy from tmp_buf to 2702 // one for future predictions. In the end, copy from tmp_buf to
2873 // dst if necessary. 2703 // dst if necessary.
2874 for (i = 0; i < MAX_MB_PLANE; i++) { 2704 for (i = 0; i < MAX_MB_PLANE; i++) {
2875 orig_dst[i] = xd->plane[i].dst.buf; 2705 orig_dst[i] = xd->plane[i].dst.buf;
2876 orig_dst_stride[i] = xd->plane[i].dst.stride; 2706 orig_dst_stride[i] = xd->plane[i].dst.stride;
2877 } 2707 }
2878 2708
2879 /* We don't include the cost of the second reference here, because there 2709 /* We don't include the cost of the second reference here, because there
2880 * are only three options: Last/Golden, ARF/Last or Golden/ARF, or in other 2710 * are only three options: Last/Golden, ARF/Last or Golden/ARF, or in other
2881 * words if you present them in that order, the second one is always known 2711 * words if you present them in that order, the second one is always known
2882 * if the first is known */ 2712 * if the first is known */
2883 *rate2 += cost_mv_ref(cpi, this_mode, 2713 *rate2 += cost_mv_ref(cpi, this_mode,
2884 mbmi->mb_mode_context[mbmi->ref_frame[0]]); 2714 mbmi->mode_context[mbmi->ref_frame[0]]);
2885 2715
2886 if (!(*mode_excluded)) { 2716 if (!(*mode_excluded)) {
2887 if (is_comp_pred) { 2717 if (is_comp_pred) {
2888 *mode_excluded = (cpi->common.comp_pred_mode == SINGLE_PREDICTION_ONLY); 2718 *mode_excluded = (cpi->common.comp_pred_mode == SINGLE_PREDICTION_ONLY);
2889 } else { 2719 } else {
2890 *mode_excluded = (cpi->common.comp_pred_mode == COMP_PREDICTION_ONLY); 2720 *mode_excluded = (cpi->common.comp_pred_mode == COMP_PREDICTION_ONLY);
2891 } 2721 }
2892 } 2722 }
2893 2723
2894 pred_exists = 0; 2724 pred_exists = 0;
2895 interpolating_intpel_seen = 0;
2896 // Are all MVs integer pel for Y and UV 2725 // Are all MVs integer pel for Y and UV
2897 intpel_mv = (mbmi->mv[0].as_mv.row & 15) == 0 && 2726 intpel_mv = (mbmi->mv[0].as_mv.row & 15) == 0 &&
2898 (mbmi->mv[0].as_mv.col & 15) == 0; 2727 (mbmi->mv[0].as_mv.col & 15) == 0;
2899 if (is_comp_pred) 2728 if (is_comp_pred)
2900 intpel_mv &= (mbmi->mv[1].as_mv.row & 15) == 0 && 2729 intpel_mv &= (mbmi->mv[1].as_mv.row & 15) == 0 &&
2901 (mbmi->mv[1].as_mv.col & 15) == 0; 2730 (mbmi->mv[1].as_mv.col & 15) == 0;
2902 // Search for best switchable filter by checking the variance of 2731 // Search for best switchable filter by checking the variance of
2903 // pred error irrespective of whether the filter will be used 2732 // pred error irrespective of whether the filter will be used
2904 *best_filter = EIGHTTAP; 2733 if (cm->mcomp_filter_type != BILINEAR) {
2905 if (cpi->sf.use_8tap_always) {
2906 *best_filter = EIGHTTAP; 2734 *best_filter = EIGHTTAP;
2907 vp9_zero(cpi->rd_filter_cache); 2735 if (x->source_variance <
2908 } else { 2736 cpi->sf.disable_filter_search_var_thresh) {
2909 int i, newbest; 2737 *best_filter = EIGHTTAP;
2910 int tmp_rate_sum = 0; 2738 vp9_zero(cpi->rd_filter_cache);
2911 int64_t tmp_dist_sum = 0; 2739 } else {
2740 int i, newbest;
2741 int tmp_rate_sum = 0;
2742 int64_t tmp_dist_sum = 0;
2912 2743
2913 cpi->rd_filter_cache[VP9_SWITCHABLE_FILTERS] = INT64_MAX; 2744 cpi->rd_filter_cache[SWITCHABLE_FILTERS] = INT64_MAX;
2914 for (i = 0; i < VP9_SWITCHABLE_FILTERS; ++i) { 2745 for (i = 0; i < SWITCHABLE_FILTERS; ++i) {
2915 int j; 2746 int j;
2916 int64_t rs_rd; 2747 int64_t rs_rd;
2917 const INTERPOLATIONFILTERTYPE filter = vp9_switchable_interp[i]; 2748 mbmi->interp_filter = i;
2918 const int is_intpel_interp = intpel_mv; 2749 vp9_setup_interp_filters(xd, mbmi->interp_filter, cm);
2919 mbmi->interp_filter = filter; 2750 rs = get_switchable_rate(x);
2920 vp9_setup_interp_filters(xd, mbmi->interp_filter, cm); 2751 rs_rd = RDCOST(x->rdmult, x->rddiv, rs, 0);
2921 rs = get_switchable_rate(cm, x);
2922 rs_rd = RDCOST(x->rdmult, x->rddiv, rs, 0);
2923 2752
2924 if (interpolating_intpel_seen && is_intpel_interp) { 2753 if (i > 0 && intpel_mv) {
2925 cpi->rd_filter_cache[i] = RDCOST(x->rdmult, x->rddiv, 2754 cpi->rd_filter_cache[i] = RDCOST(x->rdmult, x->rddiv,
2926 tmp_rate_sum, tmp_dist_sum); 2755 tmp_rate_sum, tmp_dist_sum);
2927 cpi->rd_filter_cache[VP9_SWITCHABLE_FILTERS] = 2756 cpi->rd_filter_cache[SWITCHABLE_FILTERS] =
2928 MIN(cpi->rd_filter_cache[VP9_SWITCHABLE_FILTERS], 2757 MIN(cpi->rd_filter_cache[SWITCHABLE_FILTERS],
2929 cpi->rd_filter_cache[i] + rs_rd); 2758 cpi->rd_filter_cache[i] + rs_rd);
2930 rd = cpi->rd_filter_cache[i]; 2759 rd = cpi->rd_filter_cache[i];
2931 if (cm->mcomp_filter_type == SWITCHABLE) 2760 if (cm->mcomp_filter_type == SWITCHABLE)
2932 rd += rs_rd; 2761 rd += rs_rd;
2933 } else { 2762 } else {
2934 int rate_sum = 0; 2763 int rate_sum = 0;
2935 int64_t dist_sum = 0; 2764 int64_t dist_sum = 0;
2936 if ((cm->mcomp_filter_type == SWITCHABLE && 2765 if ((cm->mcomp_filter_type == SWITCHABLE &&
2937 (!i || best_needs_copy)) || 2766 (!i || best_needs_copy)) ||
2938 (cm->mcomp_filter_type != SWITCHABLE && 2767 (cm->mcomp_filter_type != SWITCHABLE &&
2939 (cm->mcomp_filter_type == mbmi->interp_filter || 2768 (cm->mcomp_filter_type == mbmi->interp_filter ||
2940 (!interpolating_intpel_seen && is_intpel_interp)))) { 2769 (i == 0 && intpel_mv)))) {
2941 for (j = 0; j < MAX_MB_PLANE; j++) { 2770 for (j = 0; j < MAX_MB_PLANE; j++) {
2942 xd->plane[j].dst.buf = orig_dst[j]; 2771 xd->plane[j].dst.buf = orig_dst[j];
2943 xd->plane[j].dst.stride = orig_dst_stride[j]; 2772 xd->plane[j].dst.stride = orig_dst_stride[j];
2773 }
2774 } else {
2775 for (j = 0; j < MAX_MB_PLANE; j++) {
2776 xd->plane[j].dst.buf = tmp_buf + j * 64 * 64;
2777 xd->plane[j].dst.stride = 64;
2778 }
2944 } 2779 }
2945 } else { 2780 vp9_build_inter_predictors_sb(xd, mi_row, mi_col, bsize);
2946 for (j = 0; j < MAX_MB_PLANE; j++) { 2781 model_rd_for_sb(cpi, bsize, x, xd, &rate_sum, &dist_sum);
2947 xd->plane[j].dst.buf = tmp_buf + j * 64 * 64; 2782 cpi->rd_filter_cache[i] = RDCOST(x->rdmult, x->rddiv,
2948 xd->plane[j].dst.stride = 64; 2783 rate_sum, dist_sum);
2784 cpi->rd_filter_cache[SWITCHABLE_FILTERS] =
2785 MIN(cpi->rd_filter_cache[SWITCHABLE_FILTERS],
2786 cpi->rd_filter_cache[i] + rs_rd);
2787 rd = cpi->rd_filter_cache[i];
2788 if (cm->mcomp_filter_type == SWITCHABLE)
2789 rd += rs_rd;
2790 if (i == 0 && intpel_mv) {
2791 tmp_rate_sum = rate_sum;
2792 tmp_dist_sum = dist_sum;
2949 } 2793 }
2950 } 2794 }
2951 vp9_build_inter_predictors_sb(xd, mi_row, mi_col, bsize); 2795 if (i == 0 && cpi->sf.use_rd_breakout && ref_best_rd < INT64_MAX) {
2952 model_rd_for_sb(cpi, bsize, x, xd, &rate_sum, &dist_sum); 2796 if (rd / 2 > ref_best_rd) {
2953 cpi->rd_filter_cache[i] = RDCOST(x->rdmult, x->rddiv, 2797 for (i = 0; i < MAX_MB_PLANE; i++) {
2954 rate_sum, dist_sum); 2798 xd->plane[i].dst.buf = orig_dst[i];
2955 cpi->rd_filter_cache[VP9_SWITCHABLE_FILTERS] = 2799 xd->plane[i].dst.stride = orig_dst_stride[i];
2956 MIN(cpi->rd_filter_cache[VP9_SWITCHABLE_FILTERS], 2800 }
2957 cpi->rd_filter_cache[i] + rs_rd); 2801 return INT64_MAX;
2958 rd = cpi->rd_filter_cache[i]; 2802 }
2959 if (cm->mcomp_filter_type == SWITCHABLE) 2803 }
2960 rd += rs_rd; 2804 newbest = i == 0 || rd < best_rd;
2961 if (!interpolating_intpel_seen && is_intpel_interp) { 2805
2962 tmp_rate_sum = rate_sum; 2806 if (newbest) {
2963 tmp_dist_sum = dist_sum; 2807 best_rd = rd;
2808 *best_filter = mbmi->interp_filter;
2809 if (cm->mcomp_filter_type == SWITCHABLE && i && !intpel_mv)
2810 best_needs_copy = !best_needs_copy;
2811 }
2812
2813 if ((cm->mcomp_filter_type == SWITCHABLE && newbest) ||
2814 (cm->mcomp_filter_type != SWITCHABLE &&
2815 cm->mcomp_filter_type == mbmi->interp_filter)) {
2816 pred_exists = 1;
2964 } 2817 }
2965 } 2818 }
2966 if (i == 0 && cpi->sf.use_rd_breakout && ref_best_rd < INT64_MAX) { 2819
2967 if (rd / 2 > ref_best_rd) { 2820 for (i = 0; i < MAX_MB_PLANE; i++) {
2968 for (i = 0; i < MAX_MB_PLANE; i++) { 2821 xd->plane[i].dst.buf = orig_dst[i];
2969 xd->plane[i].dst.buf = orig_dst[i]; 2822 xd->plane[i].dst.stride = orig_dst_stride[i];
2970 xd->plane[i].dst.stride = orig_dst_stride[i];
2971 }
2972 return INT64_MAX;
2973 }
2974 } 2823 }
2975 newbest = i == 0 || rd < best_rd;
2976
2977 if (newbest) {
2978 best_rd = rd;
2979 *best_filter = mbmi->interp_filter;
2980 if (cm->mcomp_filter_type == SWITCHABLE && i &&
2981 !(interpolating_intpel_seen && is_intpel_interp))
2982 best_needs_copy = !best_needs_copy;
2983 }
2984
2985 if ((cm->mcomp_filter_type == SWITCHABLE && newbest) ||
2986 (cm->mcomp_filter_type != SWITCHABLE &&
2987 cm->mcomp_filter_type == mbmi->interp_filter)) {
2988 pred_exists = 1;
2989 }
2990 interpolating_intpel_seen |= is_intpel_interp;
2991 }
2992
2993 for (i = 0; i < MAX_MB_PLANE; i++) {
2994 xd->plane[i].dst.buf = orig_dst[i];
2995 xd->plane[i].dst.stride = orig_dst_stride[i];
2996 } 2824 }
2997 } 2825 }
2998 // Set the appripriate filter 2826 // Set the appropriate filter
2999 mbmi->interp_filter = cm->mcomp_filter_type != SWITCHABLE ? 2827 mbmi->interp_filter = cm->mcomp_filter_type != SWITCHABLE ?
3000 cm->mcomp_filter_type : *best_filter; 2828 cm->mcomp_filter_type : *best_filter;
3001 vp9_setup_interp_filters(xd, mbmi->interp_filter, cm); 2829 vp9_setup_interp_filters(xd, mbmi->interp_filter, cm);
3002 rs = (cm->mcomp_filter_type == SWITCHABLE ? get_switchable_rate(cm, x) : 0); 2830 rs = cm->mcomp_filter_type == SWITCHABLE ? get_switchable_rate(x) : 0;
3003 2831
3004 if (pred_exists) { 2832 if (pred_exists) {
3005 if (best_needs_copy) { 2833 if (best_needs_copy) {
3006 // again temporarily set the buffers to local memory to prevent a memcpy 2834 // again temporarily set the buffers to local memory to prevent a memcpy
3007 for (i = 0; i < MAX_MB_PLANE; i++) { 2835 for (i = 0; i < MAX_MB_PLANE; i++) {
3008 xd->plane[i].dst.buf = tmp_buf + i * 64 * 64; 2836 xd->plane[i].dst.buf = tmp_buf + i * 64 * 64;
3009 xd->plane[i].dst.stride = 64; 2837 xd->plane[i].dst.stride = 64;
3010 } 2838 }
3011 } 2839 }
3012 } else { 2840 } else {
(...skipping 13 matching lines...) Expand all
3026 if (rd / 2 > ref_best_rd) { 2854 if (rd / 2 > ref_best_rd) {
3027 for (i = 0; i < MAX_MB_PLANE; i++) { 2855 for (i = 0; i < MAX_MB_PLANE; i++) {
3028 xd->plane[i].dst.buf = orig_dst[i]; 2856 xd->plane[i].dst.buf = orig_dst[i];
3029 xd->plane[i].dst.stride = orig_dst_stride[i]; 2857 xd->plane[i].dst.stride = orig_dst_stride[i];
3030 } 2858 }
3031 return INT64_MAX; 2859 return INT64_MAX;
3032 } 2860 }
3033 } 2861 }
3034 2862
3035 if (cpi->common.mcomp_filter_type == SWITCHABLE) 2863 if (cpi->common.mcomp_filter_type == SWITCHABLE)
3036 *rate2 += get_switchable_rate(cm, x); 2864 *rate2 += get_switchable_rate(x);
3037 2865
3038 if (cpi->active_map_enabled && x->active_ptr[0] == 0) 2866 if (!is_comp_pred) {
3039 x->skip = 1; 2867 if (cpi->active_map_enabled && x->active_ptr[0] == 0)
3040 else if (x->encode_breakout) { 2868 x->skip = 1;
3041 const BLOCK_SIZE_TYPE y_size = get_plane_block_size(bsize, &xd->plane[0]); 2869 else if (x->encode_breakout) {
3042 const BLOCK_SIZE_TYPE uv_size = get_plane_block_size(bsize, &xd->plane[1]); 2870 const BLOCK_SIZE y_size = get_plane_block_size(bsize, &xd->plane[0]);
2871 const BLOCK_SIZE uv_size = get_plane_block_size(bsize, &xd->plane[1]);
2872 unsigned int var, sse;
2873 // Skipping threshold for ac.
2874 unsigned int thresh_ac;
2875 // The encode_breakout input
2876 unsigned int encode_breakout = x->encode_breakout << 4;
3043 2877
3044 unsigned int var, sse; 2878 // Calculate threshold according to dequant value.
3045 int threshold = (xd->plane[0].dequant[1] * xd->plane[0].dequant[1] >> 4); 2879 thresh_ac = (xd->plane[0].dequant[1] * xd->plane[0].dequant[1]) / 9;
3046 2880
2881 // Set a maximum for threshold to avoid big PSNR loss in low bitrate case.
2882 if (thresh_ac > 36000)
2883 thresh_ac = 36000;
3047 2884
3048 if (threshold < x->encode_breakout) 2885 // Use encode_breakout input if it is bigger than internal threshold.
3049 threshold = x->encode_breakout; 2886 if (thresh_ac < encode_breakout)
2887 thresh_ac = encode_breakout;
3050 2888
3051 var = cpi->fn_ptr[y_size].vf(x->plane[0].src.buf, x->plane[0].src.stride, 2889 var = cpi->fn_ptr[y_size].vf(x->plane[0].src.buf, x->plane[0].src.stride,
3052 xd->plane[0].dst.buf, xd->plane[0].dst.stride, 2890 xd->plane[0].dst.buf,
3053 &sse); 2891 xd->plane[0].dst.stride, &sse);
3054 2892
3055 if ((int)sse < threshold) { 2893 // Adjust threshold according to partition size.
3056 unsigned int q2dc = xd->plane[0].dequant[0]; 2894 thresh_ac >>= 8 - (b_width_log2_lookup[bsize] +
3057 // If there is no codeable 2nd order dc 2895 b_height_log2_lookup[bsize]);
3058 // or a very small uniform pixel change change
3059 if ((sse - var < q2dc * q2dc >> 4) ||
3060 (sse / 2 > var && sse - var < 64)) {
3061 // Check u and v to make sure skip is ok
3062 int sse2;
3063 unsigned int sse2u, sse2v;
3064 var = cpi->fn_ptr[uv_size].vf(x->plane[1].src.buf,
3065 x->plane[1].src.stride,
3066 xd->plane[1].dst.buf,
3067 xd->plane[1].dst.stride, &sse2u);
3068 var = cpi->fn_ptr[uv_size].vf(x->plane[2].src.buf,
3069 x->plane[2].src.stride,
3070 xd->plane[2].dst.buf,
3071 xd->plane[2].dst.stride, &sse2v);
3072 sse2 = sse2u + sse2v;
3073 2896
3074 if (sse2 * 2 < threshold) { 2897 // Y skipping condition checking
3075 x->skip = 1; 2898 if (sse < thresh_ac || sse == 0) {
3076 *distortion = sse + sse2; 2899 // Skipping threshold for dc
3077 *rate2 = 500; 2900 unsigned int thresh_dc;
3078 2901
3079 // for best yrd calculation 2902 thresh_dc = (xd->plane[0].dequant[0] * xd->plane[0].dequant[0] >> 6);
3080 *rate_uv = 0;
3081 *distortion_uv = sse2;
3082 2903
3083 *disable_skip = 1; 2904 // dc skipping checking
3084 this_rd = RDCOST(x->rdmult, x->rddiv, *rate2, *distortion); 2905 if ((sse - var) < thresh_dc || sse == var) {
2906 unsigned int sse_u, sse_v;
2907 unsigned int var_u, var_v;
2908
2909 var_u = cpi->fn_ptr[uv_size].vf(x->plane[1].src.buf,
2910 x->plane[1].src.stride,
2911 xd->plane[1].dst.buf,
2912 xd->plane[1].dst.stride, &sse_u);
2913
2914 // U skipping condition checking
2915 if ((sse_u * 4 < thresh_ac || sse_u == 0) &&
2916 (sse_u - var_u < thresh_dc || sse_u == var_u)) {
2917 var_v = cpi->fn_ptr[uv_size].vf(x->plane[2].src.buf,
2918 x->plane[2].src.stride,
2919 xd->plane[2].dst.buf,
2920 xd->plane[2].dst.stride, &sse_v);
2921
2922 // V skipping condition checking
2923 if ((sse_v * 4 < thresh_ac || sse_v == 0) &&
2924 (sse_v - var_v < thresh_dc || sse_v == var_v)) {
2925 x->skip = 1;
2926
2927 *rate2 = 500;
2928 *rate_uv = 0;
2929
2930 // Scaling factor for SSE from spatial domain to frequency domain
2931 // is 16. Adjust distortion accordingly.
2932 *distortion_uv = (sse_u + sse_v) << 4;
2933 *distortion = (sse << 4) + *distortion_uv;
2934
2935 *disable_skip = 1;
2936 this_rd = RDCOST(x->rdmult, x->rddiv, *rate2, *distortion);
2937 }
2938 }
3085 } 2939 }
3086 } 2940 }
3087 } 2941 }
3088 } 2942 }
3089 2943
3090 if (!x->skip) { 2944 if (!x->skip) {
3091 int skippable_y, skippable_uv; 2945 int skippable_y, skippable_uv;
3092 int64_t sseuv = INT_MAX; 2946 int64_t sseuv = INT64_MAX;
2947 int64_t rdcosty = INT64_MAX;
3093 2948
3094 // Y cost and distortion 2949 // Y cost and distortion
3095 super_block_yrd(cpi, x, rate_y, distortion_y, &skippable_y, psse, 2950 super_block_yrd(cpi, x, rate_y, distortion_y, &skippable_y, psse,
3096 bsize, txfm_cache, ref_best_rd); 2951 bsize, txfm_cache, ref_best_rd);
3097 2952
3098 if (*rate_y == INT_MAX) { 2953 if (*rate_y == INT_MAX) {
3099 *rate2 = INT_MAX; 2954 *rate2 = INT_MAX;
3100 *distortion = INT64_MAX; 2955 *distortion = INT64_MAX;
3101 for (i = 0; i < MAX_MB_PLANE; i++) { 2956 for (i = 0; i < MAX_MB_PLANE; i++) {
3102 xd->plane[i].dst.buf = orig_dst[i]; 2957 xd->plane[i].dst.buf = orig_dst[i];
3103 xd->plane[i].dst.stride = orig_dst_stride[i]; 2958 xd->plane[i].dst.stride = orig_dst_stride[i];
3104 } 2959 }
3105 return INT64_MAX; 2960 return INT64_MAX;
3106 } 2961 }
3107 2962
3108 *rate2 += *rate_y; 2963 *rate2 += *rate_y;
3109 *distortion += *distortion_y; 2964 *distortion += *distortion_y;
3110 2965
3111 super_block_uvrd(cm, x, rate_uv, distortion_uv, 2966 rdcosty = RDCOST(x->rdmult, x->rddiv, *rate2, *distortion);
3112 &skippable_uv, &sseuv, bsize); 2967 rdcosty = MIN(rdcosty, RDCOST(x->rdmult, x->rddiv, 0, *psse));
2968
2969 super_block_uvrd(cm, x, rate_uv, distortion_uv, &skippable_uv, &sseuv,
2970 bsize, ref_best_rd - rdcosty);
2971 if (*rate_uv == INT_MAX) {
2972 *rate2 = INT_MAX;
2973 *distortion = INT64_MAX;
2974 for (i = 0; i < MAX_MB_PLANE; i++) {
2975 xd->plane[i].dst.buf = orig_dst[i];
2976 xd->plane[i].dst.stride = orig_dst_stride[i];
2977 }
2978 return INT64_MAX;
2979 }
3113 2980
3114 *psse += sseuv; 2981 *psse += sseuv;
3115 *rate2 += *rate_uv; 2982 *rate2 += *rate_uv;
3116 *distortion += *distortion_uv; 2983 *distortion += *distortion_uv;
3117 *skippable = skippable_y && skippable_uv; 2984 *skippable = skippable_y && skippable_uv;
3118 } 2985 }
3119 2986
3120 for (i = 0; i < MAX_MB_PLANE; i++) { 2987 for (i = 0; i < MAX_MB_PLANE; i++) {
3121 xd->plane[i].dst.buf = orig_dst[i]; 2988 xd->plane[i].dst.buf = orig_dst[i];
3122 xd->plane[i].dst.stride = orig_dst_stride[i]; 2989 xd->plane[i].dst.stride = orig_dst_stride[i];
3123 } 2990 }
3124 2991
3125 return this_rd; // if 0, this will be re-calculated by caller 2992 return this_rd; // if 0, this will be re-calculated by caller
3126 } 2993 }
3127 2994
3128 void vp9_rd_pick_intra_mode_sb(VP9_COMP *cpi, MACROBLOCK *x, 2995 void vp9_rd_pick_intra_mode_sb(VP9_COMP *cpi, MACROBLOCK *x,
3129 int *returnrate, int64_t *returndist, 2996 int *returnrate, int64_t *returndist,
3130 BLOCK_SIZE_TYPE bsize, 2997 BLOCK_SIZE bsize,
3131 PICK_MODE_CONTEXT *ctx, int64_t best_rd) { 2998 PICK_MODE_CONTEXT *ctx, int64_t best_rd) {
3132 VP9_COMMON *const cm = &cpi->common; 2999 VP9_COMMON *const cm = &cpi->common;
3133 MACROBLOCKD *const xd = &x->e_mbd; 3000 MACROBLOCKD *const xd = &x->e_mbd;
3134 int rate_y = 0, rate_uv = 0, rate_y_tokenonly = 0, rate_uv_tokenonly = 0; 3001 int rate_y = 0, rate_uv = 0, rate_y_tokenonly = 0, rate_uv_tokenonly = 0;
3135 int y_skip = 0, uv_skip; 3002 int y_skip = 0, uv_skip;
3136 int64_t dist_y = 0, dist_uv = 0, txfm_cache[NB_TXFM_MODES]; 3003 int64_t dist_y = 0, dist_uv = 0, tx_cache[TX_MODES] = { 0 };
3137
3138 x->skip_encode = 0; 3004 x->skip_encode = 0;
3139 vpx_memset(&txfm_cache, 0, sizeof(txfm_cache));
3140 ctx->skip = 0; 3005 ctx->skip = 0;
3141 xd->mode_info_context->mbmi.ref_frame[0] = INTRA_FRAME; 3006 xd->mode_info_context->mbmi.ref_frame[0] = INTRA_FRAME;
3142 if (bsize >= BLOCK_SIZE_SB8X8) { 3007 if (bsize >= BLOCK_8X8) {
3143 if (rd_pick_intra_sby_mode(cpi, x, &rate_y, &rate_y_tokenonly, 3008 if (rd_pick_intra_sby_mode(cpi, x, &rate_y, &rate_y_tokenonly,
3144 &dist_y, &y_skip, bsize, txfm_cache, 3009 &dist_y, &y_skip, bsize, tx_cache,
3145 best_rd) >= best_rd) { 3010 best_rd) >= best_rd) {
3146 *returnrate = INT_MAX; 3011 *returnrate = INT_MAX;
3147 return; 3012 return;
3148 } 3013 }
3149 rd_pick_intra_sbuv_mode(cpi, x, &rate_uv, &rate_uv_tokenonly, 3014 rd_pick_intra_sbuv_mode(cpi, x, &rate_uv, &rate_uv_tokenonly,
3150 &dist_uv, &uv_skip, bsize); 3015 &dist_uv, &uv_skip, bsize);
3151 } else { 3016 } else {
3152 y_skip = 0; 3017 y_skip = 0;
3153 if (rd_pick_intra4x4mby_modes(cpi, x, &rate_y, &rate_y_tokenonly, 3018 if (rd_pick_intra_sub_8x8_y_mode(cpi, x, &rate_y, &rate_y_tokenonly,
3154 &dist_y, best_rd) >= best_rd) { 3019 &dist_y, best_rd) >= best_rd) {
3155 *returnrate = INT_MAX; 3020 *returnrate = INT_MAX;
3156 return; 3021 return;
3157 } 3022 }
3158 rd_pick_intra_sbuv_mode(cpi, x, &rate_uv, &rate_uv_tokenonly, 3023 rd_pick_intra_sbuv_mode(cpi, x, &rate_uv, &rate_uv_tokenonly,
3159 &dist_uv, &uv_skip, BLOCK_SIZE_SB8X8); 3024 &dist_uv, &uv_skip, BLOCK_8X8);
3160 } 3025 }
3161 3026
3162 if (y_skip && uv_skip) { 3027 if (y_skip && uv_skip) {
3163 *returnrate = rate_y + rate_uv - rate_y_tokenonly - rate_uv_tokenonly + 3028 *returnrate = rate_y + rate_uv - rate_y_tokenonly - rate_uv_tokenonly +
3164 vp9_cost_bit(vp9_get_pred_prob_mbskip(cm, xd), 1); 3029 vp9_cost_bit(vp9_get_pred_prob_mbskip(cm, xd), 1);
3165 *returndist = dist_y + (dist_uv >> 2); 3030 *returndist = dist_y + dist_uv;
3166 memset(ctx->txfm_rd_diff, 0, sizeof(ctx->txfm_rd_diff)); 3031 vp9_zero(ctx->tx_rd_diff);
3167 } else { 3032 } else {
3168 int i; 3033 int i;
3169 *returnrate = rate_y + rate_uv + 3034 *returnrate = rate_y + rate_uv +
3170 vp9_cost_bit(vp9_get_pred_prob_mbskip(cm, xd), 0); 3035 vp9_cost_bit(vp9_get_pred_prob_mbskip(cm, xd), 0);
3171 *returndist = dist_y + (dist_uv >> 2); 3036 *returndist = dist_y + dist_uv;
3172 if (cpi->sf.tx_size_search_method == USE_FULL_RD) { 3037 if (cpi->sf.tx_size_search_method == USE_FULL_RD)
3173 for (i = 0; i < NB_TXFM_MODES; i++) { 3038 for (i = 0; i < TX_MODES; i++)
3174 ctx->txfm_rd_diff[i] = txfm_cache[i] - txfm_cache[cm->tx_mode]; 3039 ctx->tx_rd_diff[i] = tx_cache[i] - tx_cache[cm->tx_mode];
3175 }
3176 }
3177 } 3040 }
3178 3041
3179 ctx->mic = *xd->mode_info_context; 3042 ctx->mic = *xd->mode_info_context;
3180 } 3043 }
3181 3044
3182 int64_t vp9_rd_pick_inter_mode_sb(VP9_COMP *cpi, MACROBLOCK *x, 3045 int64_t vp9_rd_pick_inter_mode_sb(VP9_COMP *cpi, MACROBLOCK *x,
3183 int mi_row, int mi_col, 3046 int mi_row, int mi_col,
3184 int *returnrate, 3047 int *returnrate,
3185 int64_t *returndistortion, 3048 int64_t *returndistortion,
3186 BLOCK_SIZE_TYPE bsize, 3049 BLOCK_SIZE bsize,
3187 PICK_MODE_CONTEXT *ctx, 3050 PICK_MODE_CONTEXT *ctx,
3188 int64_t best_rd_so_far) { 3051 int64_t best_rd_so_far) {
3189 VP9_COMMON *cm = &cpi->common; 3052 VP9_COMMON *cm = &cpi->common;
3190 MACROBLOCKD *xd = &x->e_mbd; 3053 MACROBLOCKD *xd = &x->e_mbd;
3191 MB_MODE_INFO *mbmi = &xd->mode_info_context->mbmi; 3054 MB_MODE_INFO *mbmi = &xd->mode_info_context->mbmi;
3192 const BLOCK_SIZE_TYPE block_size = get_plane_block_size(bsize, &xd->plane[0]); 3055 const struct segmentation *seg = &cm->seg;
3056 const BLOCK_SIZE block_size = get_plane_block_size(bsize, &xd->plane[0]);
3193 MB_PREDICTION_MODE this_mode; 3057 MB_PREDICTION_MODE this_mode;
3194 MV_REFERENCE_FRAME ref_frame; 3058 MV_REFERENCE_FRAME ref_frame, second_ref_frame;
3195 unsigned char segment_id = xd->mode_info_context->mbmi.segment_id; 3059 unsigned char segment_id = xd->mode_info_context->mbmi.segment_id;
3196 int comp_pred, i; 3060 int comp_pred, i;
3197 int_mv frame_mv[MB_MODE_COUNT][MAX_REF_FRAMES]; 3061 int_mv frame_mv[MB_MODE_COUNT][MAX_REF_FRAMES];
3198 struct buf_2d yv12_mb[4][MAX_MB_PLANE]; 3062 struct buf_2d yv12_mb[4][MAX_MB_PLANE];
3199 int_mv single_newmv[MAX_REF_FRAMES]; 3063 int_mv single_newmv[MAX_REF_FRAMES] = { { 0 } };
3200 static const int flag_list[4] = { 0, VP9_LAST_FLAG, VP9_GOLD_FLAG, 3064 static const int flag_list[4] = { 0, VP9_LAST_FLAG, VP9_GOLD_FLAG,
3201 VP9_ALT_FLAG }; 3065 VP9_ALT_FLAG };
3202 int idx_list[4] = {0, 3066 int idx_list[4] = {0,
3203 cpi->lst_fb_idx, 3067 cpi->lst_fb_idx,
3204 cpi->gld_fb_idx, 3068 cpi->gld_fb_idx,
3205 cpi->alt_fb_idx}; 3069 cpi->alt_fb_idx};
3206 int64_t best_rd = best_rd_so_far; 3070 int64_t best_rd = best_rd_so_far;
3207 int64_t best_yrd = best_rd_so_far; // FIXME(rbultje) more precise 3071 int64_t best_yrd = best_rd_so_far; // FIXME(rbultje) more precise
3208 int64_t best_txfm_rd[NB_TXFM_MODES]; 3072 int64_t best_tx_rd[TX_MODES];
3209 int64_t best_txfm_diff[NB_TXFM_MODES]; 3073 int64_t best_tx_diff[TX_MODES];
3210 int64_t best_pred_diff[NB_PREDICTION_TYPES]; 3074 int64_t best_pred_diff[NB_PREDICTION_TYPES];
3211 int64_t best_pred_rd[NB_PREDICTION_TYPES]; 3075 int64_t best_pred_rd[NB_PREDICTION_TYPES];
3212 int64_t best_filter_rd[VP9_SWITCHABLE_FILTERS + 1]; 3076 int64_t best_filter_rd[SWITCHABLE_FILTERS + 1];
3213 int64_t best_filter_diff[VP9_SWITCHABLE_FILTERS + 1]; 3077 int64_t best_filter_diff[SWITCHABLE_FILTERS + 1];
3214 MB_MODE_INFO best_mbmode; 3078 MB_MODE_INFO best_mbmode = { 0 };
3215 int j; 3079 int j;
3216 int mode_index, best_mode_index = 0; 3080 int mode_index, best_mode_index = 0;
3217 unsigned int ref_costs_single[MAX_REF_FRAMES], ref_costs_comp[MAX_REF_FRAMES]; 3081 unsigned int ref_costs_single[MAX_REF_FRAMES], ref_costs_comp[MAX_REF_FRAMES];
3218 vp9_prob comp_mode_p; 3082 vp9_prob comp_mode_p;
3219 int64_t best_intra_rd = INT64_MAX; 3083 int64_t best_intra_rd = INT64_MAX;
3220 int64_t best_inter_rd = INT64_MAX; 3084 int64_t best_inter_rd = INT64_MAX;
3221 MB_PREDICTION_MODE best_intra_mode = DC_PRED; 3085 MB_PREDICTION_MODE best_intra_mode = DC_PRED;
3222 // MB_PREDICTION_MODE best_inter_mode = ZEROMV; 3086 // MB_PREDICTION_MODE best_inter_mode = ZEROMV;
3223 MV_REFERENCE_FRAME best_inter_ref_frame = LAST_FRAME; 3087 MV_REFERENCE_FRAME best_inter_ref_frame = LAST_FRAME;
3224 INTERPOLATIONFILTERTYPE tmp_best_filter = SWITCHABLE; 3088 INTERPOLATIONFILTERTYPE tmp_best_filter = SWITCHABLE;
3225 int rate_uv_intra[TX_SIZE_MAX_SB], rate_uv_tokenonly[TX_SIZE_MAX_SB]; 3089 int rate_uv_intra[TX_SIZES], rate_uv_tokenonly[TX_SIZES];
3226 int64_t dist_uv[TX_SIZE_MAX_SB]; 3090 int64_t dist_uv[TX_SIZES];
3227 int skip_uv[TX_SIZE_MAX_SB]; 3091 int skip_uv[TX_SIZES];
3228 MB_PREDICTION_MODE mode_uv[TX_SIZE_MAX_SB]; 3092 MB_PREDICTION_MODE mode_uv[TX_SIZES];
3229 struct scale_factors scale_factor[4]; 3093 struct scale_factors scale_factor[4];
3230 unsigned int ref_frame_mask = 0; 3094 unsigned int ref_frame_mask = 0;
3231 unsigned int mode_mask = 0; 3095 unsigned int mode_mask = 0;
3232 int64_t mode_distortions[MB_MODE_COUNT] = {-1}; 3096 int64_t mode_distortions[MB_MODE_COUNT] = {-1};
3233 int64_t frame_distortions[MAX_REF_FRAMES] = {-1}; 3097 int64_t frame_distortions[MAX_REF_FRAMES] = {-1};
3234 int intra_cost_penalty = 20 * vp9_dc_quant(cpi->common.base_qindex, 3098 int intra_cost_penalty = 20 * vp9_dc_quant(cpi->common.base_qindex,
3235 cpi->common.y_dc_delta_q); 3099 cpi->common.y_dc_delta_q);
3236 int_mv seg_mvs[4][MAX_REF_FRAMES]; 3100 int_mv seg_mvs[4][MAX_REF_FRAMES];
3237 union b_mode_info best_bmodes[4]; 3101 union b_mode_info best_bmodes[4];
3238 PARTITION_INFO best_partition; 3102 PARTITION_INFO best_partition;
3239 int bwsl = b_width_log2(bsize); 3103 int bwsl = b_width_log2(bsize);
3240 int bws = (1 << bwsl) / 4; // mode_info step for subsize 3104 int bws = (1 << bwsl) / 4; // mode_info step for subsize
3241 int bhsl = b_height_log2(bsize); 3105 int bhsl = b_height_log2(bsize);
3242 int bhs = (1 << bhsl) / 4; // mode_info step for subsize 3106 int bhs = (1 << bhsl) / 4; // mode_info step for subsize
3243 int best_skip2 = 0; 3107 int best_skip2 = 0;
3244 3108
3245 x->skip_encode = (cpi->sf.skip_encode_frame && 3109 x->skip_encode = (cpi->sf.skip_encode_frame &&
3246 xd->q_index < QIDX_SKIP_THRESH); 3110 xd->q_index < QIDX_SKIP_THRESH);
3247 3111
3248 for (i = 0; i < 4; i++) { 3112 for (i = 0; i < 4; i++) {
3249 int j; 3113 int j;
3250 for (j = 0; j < MAX_REF_FRAMES; j++) 3114 for (j = 0; j < MAX_REF_FRAMES; j++)
3251 seg_mvs[i][j].as_int = INVALID_MV; 3115 seg_mvs[i][j].as_int = INVALID_MV;
3252 } 3116 }
3253 // Everywhere the flag is set the error is much higher than its neighbors. 3117 // Everywhere the flag is set the error is much higher than its neighbors.
3254 ctx->frames_with_high_error = 0; 3118 ctx->frames_with_high_error = 0;
3255 ctx->modes_with_high_error = 0; 3119 ctx->modes_with_high_error = 0;
3256 3120
3257 xd->mode_info_context->mbmi.segment_id = segment_id;
3258 estimate_ref_frame_costs(cpi, segment_id, ref_costs_single, ref_costs_comp, 3121 estimate_ref_frame_costs(cpi, segment_id, ref_costs_single, ref_costs_comp,
3259 &comp_mode_p); 3122 &comp_mode_p);
3260 vpx_memset(&best_mbmode, 0, sizeof(best_mbmode));
3261 vpx_memset(&single_newmv, 0, sizeof(single_newmv));
3262 3123
3263 for (i = 0; i < NB_PREDICTION_TYPES; ++i) 3124 for (i = 0; i < NB_PREDICTION_TYPES; ++i)
3264 best_pred_rd[i] = INT64_MAX; 3125 best_pred_rd[i] = INT64_MAX;
3265 for (i = 0; i < NB_TXFM_MODES; i++) 3126 for (i = 0; i < TX_MODES; i++)
3266 best_txfm_rd[i] = INT64_MAX; 3127 best_tx_rd[i] = INT64_MAX;
3267 for (i = 0; i <= VP9_SWITCHABLE_FILTERS; i++) 3128 for (i = 0; i <= SWITCHABLE_FILTERS; i++)
3268 best_filter_rd[i] = INT64_MAX; 3129 best_filter_rd[i] = INT64_MAX;
3269 for (i = 0; i < TX_SIZE_MAX_SB; i++) 3130 for (i = 0; i < TX_SIZES; i++)
3270 rate_uv_intra[i] = INT_MAX; 3131 rate_uv_intra[i] = INT_MAX;
3271 3132
3272 *returnrate = INT_MAX; 3133 *returnrate = INT_MAX;
3273 3134
3274 // Create a mask set to 1 for each frame used by a smaller resolution. 3135 // Create a mask set to 1 for each reference frame used by a smaller
3136 // resolution.
3275 if (cpi->sf.use_avoid_tested_higherror) { 3137 if (cpi->sf.use_avoid_tested_higherror) {
3276 switch (block_size) { 3138 switch (block_size) {
3277 case BLOCK_64X64: 3139 case BLOCK_64X64:
3278 for (i = 0; i < 4; i++) { 3140 for (i = 0; i < 4; i++) {
3279 for (j = 0; j < 4; j++) { 3141 for (j = 0; j < 4; j++) {
3280 ref_frame_mask |= x->mb_context[i][j].frames_with_high_error; 3142 ref_frame_mask |= x->mb_context[i][j].frames_with_high_error;
3281 mode_mask |= x->mb_context[i][j].modes_with_high_error; 3143 mode_mask |= x->mb_context[i][j].modes_with_high_error;
3282 } 3144 }
3283 } 3145 }
3284 for (i = 0; i < 4; i++) { 3146 for (i = 0; i < 4; i++) {
(...skipping 28 matching lines...) Expand all
3313 frame_mv[ZEROMV][ref_frame].as_int = 0; 3175 frame_mv[ZEROMV][ref_frame].as_int = 0;
3314 } 3176 }
3315 3177
3316 for (mode_index = 0; mode_index < MAX_MODES; ++mode_index) { 3178 for (mode_index = 0; mode_index < MAX_MODES; ++mode_index) {
3317 int mode_excluded = 0; 3179 int mode_excluded = 0;
3318 int64_t this_rd = INT64_MAX; 3180 int64_t this_rd = INT64_MAX;
3319 int disable_skip = 0; 3181 int disable_skip = 0;
3320 int compmode_cost = 0; 3182 int compmode_cost = 0;
3321 int rate2 = 0, rate_y = 0, rate_uv = 0; 3183 int rate2 = 0, rate_y = 0, rate_uv = 0;
3322 int64_t distortion2 = 0, distortion_y = 0, distortion_uv = 0; 3184 int64_t distortion2 = 0, distortion_y = 0, distortion_uv = 0;
3323 int skippable; 3185 int skippable = 0;
3324 int64_t txfm_cache[NB_TXFM_MODES]; 3186 int64_t tx_cache[TX_MODES];
3325 int i; 3187 int i;
3326 int this_skip2 = 0; 3188 int this_skip2 = 0;
3327 int64_t total_sse = INT_MAX; 3189 int64_t total_sse = INT_MAX;
3328 int early_term = 0; 3190 int early_term = 0;
3329 3191
3330 for (i = 0; i < NB_TXFM_MODES; ++i) 3192 for (i = 0; i < TX_MODES; ++i)
3331 txfm_cache[i] = INT64_MAX; 3193 tx_cache[i] = INT64_MAX;
3332 3194
3195 x->skip = 0;
3333 this_mode = vp9_mode_order[mode_index].mode; 3196 this_mode = vp9_mode_order[mode_index].mode;
3334 ref_frame = vp9_mode_order[mode_index].ref_frame; 3197 ref_frame = vp9_mode_order[mode_index].ref_frame;
3198 second_ref_frame = vp9_mode_order[mode_index].second_ref_frame;
3335 3199
3336 // Slip modes that have been masked off but always consider first mode. 3200 // Skip modes that have been masked off but always consider first mode.
3337 if ( mode_index && (bsize > cpi->sf.unused_mode_skip_lvl) && 3201 if (mode_index && (bsize > cpi->sf.unused_mode_skip_lvl) &&
3338 (cpi->unused_mode_skip_mask & (1 << mode_index)) ) 3202 (cpi->unused_mode_skip_mask & (1 << mode_index)) )
3339 continue; 3203 continue;
3340 3204
3341 // Skip if the current refernce frame has been masked off 3205 // Skip if the current reference frame has been masked off
3342 if (cpi->sf.reference_masking && !cpi->set_ref_frame_mask && 3206 if (cpi->sf.reference_masking && !cpi->set_ref_frame_mask &&
3343 (cpi->ref_frame_mask & (1 << ref_frame))) 3207 (cpi->ref_frame_mask & (1 << ref_frame)))
3344 continue; 3208 continue;
3345 3209
3346 // Test best rd so far against threshold for trying this mode. 3210 // Test best rd so far against threshold for trying this mode.
3347 if ((best_rd < ((cpi->rd_threshes[bsize][mode_index] * 3211 if ((best_rd < ((cpi->rd_threshes[bsize][mode_index] *
3348 cpi->rd_thresh_freq_fact[bsize][mode_index]) >> 4)) || 3212 cpi->rd_thresh_freq_fact[bsize][mode_index]) >> 5)) ||
3349 cpi->rd_threshes[bsize][mode_index] == INT_MAX) 3213 cpi->rd_threshes[bsize][mode_index] == INT_MAX)
3350 continue; 3214 continue;
3351 3215
3352 // Do not allow compound prediction if the segment level reference 3216 // Do not allow compound prediction if the segment level reference
3353 // frame feature is in use as in this case there can only be one reference. 3217 // frame feature is in use as in this case there can only be one reference.
3354 if ((vp9_mode_order[mode_index].second_ref_frame > INTRA_FRAME) && 3218 if ((second_ref_frame > INTRA_FRAME) &&
3355 vp9_segfeature_active(&xd->seg, segment_id, SEG_LVL_REF_FRAME)) 3219 vp9_segfeature_active(seg, segment_id, SEG_LVL_REF_FRAME))
3356 continue; 3220 continue;
3357 3221
3358 x->skip = 0;
3359
3360 // Skip some checking based on small partitions' result. 3222 // Skip some checking based on small partitions' result.
3361 if (x->fast_ms > 1 && !ref_frame) 3223 if (x->fast_ms > 1 && !ref_frame)
3362 continue; 3224 continue;
3363 if (x->fast_ms > 2 && ref_frame != x->subblock_ref) 3225 if (x->fast_ms > 2 && ref_frame != x->subblock_ref)
3364 continue; 3226 continue;
3365 3227
3366 if (cpi->sf.use_avoid_tested_higherror && bsize >= BLOCK_SIZE_SB8X8) { 3228 if (cpi->sf.use_avoid_tested_higherror && bsize >= BLOCK_8X8) {
3367 if (!(ref_frame_mask & (1 << ref_frame))) { 3229 if (!(ref_frame_mask & (1 << ref_frame))) {
3368 continue; 3230 continue;
3369 } 3231 }
3370 if (!(mode_mask & (1 << this_mode))) { 3232 if (!(mode_mask & (1 << this_mode))) {
3371 continue; 3233 continue;
3372 } 3234 }
3373 if (vp9_mode_order[mode_index].second_ref_frame != NONE 3235 if (second_ref_frame != NONE
3374 && !(ref_frame_mask 3236 && !(ref_frame_mask & (1 << second_ref_frame))) {
3375 & (1 << vp9_mode_order[mode_index].second_ref_frame))) {
3376 continue; 3237 continue;
3377 } 3238 }
3378 } 3239 }
3379 3240
3380 mbmi->ref_frame[0] = ref_frame; 3241 mbmi->ref_frame[0] = ref_frame;
3381 mbmi->ref_frame[1] = vp9_mode_order[mode_index].second_ref_frame; 3242 mbmi->ref_frame[1] = second_ref_frame;
3382 3243
3383 if (!(ref_frame == INTRA_FRAME 3244 if (!(ref_frame == INTRA_FRAME
3384 || (cpi->ref_frame_flags & flag_list[ref_frame]))) { 3245 || (cpi->ref_frame_flags & flag_list[ref_frame]))) {
3385 continue; 3246 continue;
3386 } 3247 }
3387 if (!(mbmi->ref_frame[1] == NONE 3248 if (!(second_ref_frame == NONE
3388 || (cpi->ref_frame_flags & flag_list[mbmi->ref_frame[1]]))) { 3249 || (cpi->ref_frame_flags & flag_list[second_ref_frame]))) {
3389 continue; 3250 continue;
3390 } 3251 }
3391 3252
3392 comp_pred = mbmi->ref_frame[1] > INTRA_FRAME; 3253 comp_pred = second_ref_frame > INTRA_FRAME;
3393 if (comp_pred) { 3254 if (comp_pred) {
3394 if (cpi->sf.mode_search_skip_flags & FLAG_SKIP_COMP_BESTINTRA) 3255 if (cpi->sf.mode_search_skip_flags & FLAG_SKIP_COMP_BESTINTRA)
3395 if (vp9_mode_order[best_mode_index].ref_frame == INTRA_FRAME) 3256 if (vp9_mode_order[best_mode_index].ref_frame == INTRA_FRAME)
3396 continue; 3257 continue;
3397 if (cpi->sf.mode_search_skip_flags & FLAG_SKIP_COMP_REFMISMATCH) 3258 if (cpi->sf.mode_search_skip_flags & FLAG_SKIP_COMP_REFMISMATCH)
3398 if (vp9_mode_order[mode_index].ref_frame != best_inter_ref_frame && 3259 if (ref_frame != best_inter_ref_frame &&
3399 vp9_mode_order[mode_index].second_ref_frame != best_inter_ref_frame) 3260 second_ref_frame != best_inter_ref_frame)
3400 continue; 3261 continue;
3401 } 3262 }
3402 // TODO(jingning, jkoleszar): scaling reference frame not supported for 3263 // TODO(jingning, jkoleszar): scaling reference frame not supported for
3403 // SPLITMV. 3264 // SPLITMV.
3404 if (mbmi->ref_frame[0] > 0 && 3265 if (ref_frame > 0 &&
3405 (scale_factor[mbmi->ref_frame[0]].x_scale_fp != VP9_REF_NO_SCALE || 3266 vp9_is_scaled(&scale_factor[ref_frame]) &&
3406 scale_factor[mbmi->ref_frame[0]].y_scale_fp != VP9_REF_NO_SCALE) &&
3407 this_mode == SPLITMV) 3267 this_mode == SPLITMV)
3408 continue; 3268 continue;
3409 3269
3410 if (mbmi->ref_frame[1] > 0 && 3270 if (second_ref_frame > 0 &&
3411 (scale_factor[mbmi->ref_frame[1]].x_scale_fp != VP9_REF_NO_SCALE || 3271 vp9_is_scaled(&scale_factor[second_ref_frame]) &&
3412 scale_factor[mbmi->ref_frame[1]].y_scale_fp != VP9_REF_NO_SCALE) &&
3413 this_mode == SPLITMV) 3272 this_mode == SPLITMV)
3414 continue; 3273 continue;
3415 3274
3416 set_scale_factors(xd, mbmi->ref_frame[0], mbmi->ref_frame[1], 3275 set_scale_factors(xd, ref_frame, second_ref_frame, scale_factor);
3417 scale_factor);
3418 mbmi->mode = this_mode; 3276 mbmi->mode = this_mode;
3419 mbmi->uv_mode = DC_PRED; 3277 mbmi->uv_mode = DC_PRED;
3420 3278
3421 // Evaluate all sub-pel filters irrespective of whether we can use 3279 // Evaluate all sub-pel filters irrespective of whether we can use
3422 // them for this frame. 3280 // them for this frame.
3423 mbmi->interp_filter = cm->mcomp_filter_type; 3281 mbmi->interp_filter = cm->mcomp_filter_type;
3424 vp9_setup_interp_filters(xd, mbmi->interp_filter, &cpi->common); 3282 vp9_setup_interp_filters(xd, mbmi->interp_filter, &cpi->common);
3425 3283
3426 if (bsize >= BLOCK_SIZE_SB8X8 && 3284 if (bsize >= BLOCK_8X8 &&
3427 (this_mode == I4X4_PRED || this_mode == SPLITMV)) 3285 (this_mode == I4X4_PRED || this_mode == SPLITMV))
3428 continue; 3286 continue;
3429 if (bsize < BLOCK_SIZE_SB8X8 && 3287 if (bsize < BLOCK_8X8 &&
3430 !(this_mode == I4X4_PRED || this_mode == SPLITMV)) 3288 !(this_mode == I4X4_PRED || this_mode == SPLITMV))
3431 continue; 3289 continue;
3432 3290
3433 if (comp_pred) { 3291 if (comp_pred) {
3434 if (!(cpi->ref_frame_flags & flag_list[mbmi->ref_frame[1]])) 3292 if (!(cpi->ref_frame_flags & flag_list[second_ref_frame]))
3435 continue; 3293 continue;
3436 set_scale_factors(xd, mbmi->ref_frame[0], mbmi->ref_frame[1], 3294 set_scale_factors(xd, ref_frame, second_ref_frame, scale_factor);
3437 scale_factor);
3438 3295
3439 mode_excluded = mode_excluded 3296 mode_excluded = mode_excluded
3440 ? mode_excluded 3297 ? mode_excluded
3441 : cm->comp_pred_mode == SINGLE_PREDICTION_ONLY; 3298 : cm->comp_pred_mode == SINGLE_PREDICTION_ONLY;
3442 } else { 3299 } else {
3443 // mbmi->ref_frame[1] = vp9_mode_order[mode_index].ref_frame[1]; 3300 if (ref_frame != INTRA_FRAME && second_ref_frame != INTRA_FRAME) {
3444 if (ref_frame != INTRA_FRAME) { 3301 mode_excluded =
3445 if (mbmi->ref_frame[1] != INTRA_FRAME) 3302 mode_excluded ?
3446 mode_excluded = 3303 mode_excluded : cm->comp_pred_mode == COMP_PREDICTION_ONLY;
3447 mode_excluded ?
3448 mode_excluded : cm->comp_pred_mode == COMP_PREDICTION_ONLY;
3449 } 3304 }
3450 } 3305 }
3451 3306
3452 // Select predictors 3307 // Select prediction reference frames.
3453 for (i = 0; i < MAX_MB_PLANE; i++) { 3308 for (i = 0; i < MAX_MB_PLANE; i++) {
3454 xd->plane[i].pre[0] = yv12_mb[ref_frame][i]; 3309 xd->plane[i].pre[0] = yv12_mb[ref_frame][i];
3455 if (comp_pred) 3310 if (comp_pred)
3456 xd->plane[i].pre[1] = yv12_mb[mbmi->ref_frame[1]][i]; 3311 xd->plane[i].pre[1] = yv12_mb[second_ref_frame][i];
3457 } 3312 }
3458 3313
3459 // If the segment reference frame feature is enabled.... 3314 // If the segment reference frame feature is enabled....
3460 // then do nothing if the current ref frame is not allowed.. 3315 // then do nothing if the current ref frame is not allowed..
3461 if (vp9_segfeature_active(&xd->seg, segment_id, SEG_LVL_REF_FRAME) && 3316 if (vp9_segfeature_active(seg, segment_id, SEG_LVL_REF_FRAME) &&
3462 vp9_get_segdata(&xd->seg, segment_id, SEG_LVL_REF_FRAME) != 3317 vp9_get_segdata(seg, segment_id, SEG_LVL_REF_FRAME) !=
3463 (int)ref_frame) { 3318 (int)ref_frame) {
3464 continue; 3319 continue;
3465 // If the segment skip feature is enabled.... 3320 // If the segment skip feature is enabled....
3466 // then do nothing if the current mode is not allowed.. 3321 // then do nothing if the current mode is not allowed..
3467 } else if (vp9_segfeature_active(&xd->seg, segment_id, SEG_LVL_SKIP) && 3322 } else if (vp9_segfeature_active(seg, segment_id, SEG_LVL_SKIP) &&
3468 (this_mode != ZEROMV && ref_frame != INTRA_FRAME)) { 3323 (this_mode != ZEROMV && ref_frame != INTRA_FRAME)) {
3469 continue; 3324 continue;
3470 // Disable this drop out case if the ref frame 3325 // Disable this drop out case if the ref frame
3471 // segment level feature is enabled for this segment. This is to 3326 // segment level feature is enabled for this segment. This is to
3472 // prevent the possibility that we end up unable to pick any mode. 3327 // prevent the possibility that we end up unable to pick any mode.
3473 } else if (!vp9_segfeature_active(&xd->seg, segment_id, 3328 } else if (!vp9_segfeature_active(seg, segment_id,
3474 SEG_LVL_REF_FRAME)) { 3329 SEG_LVL_REF_FRAME)) {
3475 // Only consider ZEROMV/ALTREF_FRAME for alt ref frame, 3330 // Only consider ZEROMV/ALTREF_FRAME for alt ref frame,
3476 // unless ARNR filtering is enabled in which case we want 3331 // unless ARNR filtering is enabled in which case we want
3477 // an unfiltered alternative. We allow near/nearest as well 3332 // an unfiltered alternative. We allow near/nearest as well
3478 // because they may result in zero-zero MVs but be cheaper. 3333 // because they may result in zero-zero MVs but be cheaper.
3479 if (cpi->is_src_frame_alt_ref && (cpi->oxcf.arnr_max_frames == 0)) { 3334 if (cpi->is_src_frame_alt_ref && (cpi->oxcf.arnr_max_frames == 0)) {
3480 if ((this_mode != ZEROMV && 3335 if ((this_mode != ZEROMV &&
3481 !(this_mode == NEARMV && 3336 !(this_mode == NEARMV &&
3482 frame_mv[NEARMV][ALTREF_FRAME].as_int == 0) && 3337 frame_mv[NEARMV][ALTREF_FRAME].as_int == 0) &&
3483 !(this_mode == NEARESTMV && 3338 !(this_mode == NEARESTMV &&
(...skipping 15 matching lines...) Expand all
3499 3354
3500 if (this_mode == I4X4_PRED) { 3355 if (this_mode == I4X4_PRED) {
3501 int rate; 3356 int rate;
3502 3357
3503 /* 3358 /*
3504 if ((cpi->sf.mode_search_skip_flags & FLAG_SKIP_INTRA_BESTINTER) && 3359 if ((cpi->sf.mode_search_skip_flags & FLAG_SKIP_INTRA_BESTINTER) &&
3505 (vp9_mode_order[best_mode_index].ref_frame > INTRA_FRAME)) 3360 (vp9_mode_order[best_mode_index].ref_frame > INTRA_FRAME))
3506 continue; 3361 continue;
3507 */ 3362 */
3508 3363
3364 // I4X4_PRED is only considered for block sizes less than 8x8.
3509 mbmi->txfm_size = TX_4X4; 3365 mbmi->txfm_size = TX_4X4;
3510 rd_pick_intra4x4mby_modes(cpi, x, &rate, &rate_y, 3366 if (rd_pick_intra_sub_8x8_y_mode(cpi, x, &rate, &rate_y,
3511 &distortion_y, INT64_MAX); 3367 &distortion_y, best_rd) >= best_rd)
3368 continue;
3512 rate2 += rate; 3369 rate2 += rate;
3513 rate2 += intra_cost_penalty; 3370 rate2 += intra_cost_penalty;
3514 distortion2 += distortion_y; 3371 distortion2 += distortion_y;
3515 3372
3516 if (rate_uv_intra[TX_4X4] == INT_MAX) { 3373 if (rate_uv_intra[TX_4X4] == INT_MAX) {
3517 choose_intra_uv_mode(cpi, bsize, &rate_uv_intra[TX_4X4], 3374 choose_intra_uv_mode(cpi, bsize, &rate_uv_intra[TX_4X4],
3518 &rate_uv_tokenonly[TX_4X4], 3375 &rate_uv_tokenonly[TX_4X4],
3519 &dist_uv[TX_4X4], &skip_uv[TX_4X4], 3376 &dist_uv[TX_4X4], &skip_uv[TX_4X4],
3520 &mode_uv[TX_4X4]); 3377 &mode_uv[TX_4X4]);
3521 } 3378 }
3522 rate2 += rate_uv_intra[TX_4X4]; 3379 rate2 += rate_uv_intra[TX_4X4];
3523 rate_uv = rate_uv_tokenonly[TX_4X4]; 3380 rate_uv = rate_uv_tokenonly[TX_4X4];
3524 distortion2 += dist_uv[TX_4X4]; 3381 distortion2 += dist_uv[TX_4X4];
3525 distortion_uv = dist_uv[TX_4X4]; 3382 distortion_uv = dist_uv[TX_4X4];
3526 mbmi->uv_mode = mode_uv[TX_4X4]; 3383 mbmi->uv_mode = mode_uv[TX_4X4];
3527 txfm_cache[ONLY_4X4] = RDCOST(x->rdmult, x->rddiv, rate2, distortion2); 3384 tx_cache[ONLY_4X4] = RDCOST(x->rdmult, x->rddiv, rate2, distortion2);
3528 for (i = 0; i < NB_TXFM_MODES; ++i) 3385 for (i = 0; i < TX_MODES; ++i)
3529 txfm_cache[i] = txfm_cache[ONLY_4X4]; 3386 tx_cache[i] = tx_cache[ONLY_4X4];
3530 } else if (ref_frame == INTRA_FRAME) { 3387 } else if (ref_frame == INTRA_FRAME) {
3531 TX_SIZE uv_tx; 3388 TX_SIZE uv_tx;
3389 // Disable intra modes other than DC_PRED for blocks with low variance
3390 // Threshold for intra skipping based on source variance
3391 // TODO(debargha): Specialize the threshold for super block sizes
3392 static const int skip_intra_var_thresh[BLOCK_SIZES] = {
3393 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
3394 };
3395 if ((cpi->sf.mode_search_skip_flags & FLAG_SKIP_INTRA_LOWVAR) &&
3396 this_mode != DC_PRED &&
3397 x->source_variance < skip_intra_var_thresh[mbmi->sb_type])
3398 continue;
3532 // Only search the oblique modes if the best so far is 3399 // Only search the oblique modes if the best so far is
3533 // one of the neighboring directional modes 3400 // one of the neighboring directional modes
3534 if ((cpi->sf.mode_search_skip_flags & FLAG_SKIP_INTRA_BESTINTER) && 3401 if ((cpi->sf.mode_search_skip_flags & FLAG_SKIP_INTRA_BESTINTER) &&
3535 (this_mode >= D45_PRED && this_mode <= TM_PRED)) { 3402 (this_mode >= D45_PRED && this_mode <= TM_PRED)) {
3536 if (vp9_mode_order[best_mode_index].ref_frame > INTRA_FRAME) 3403 if (vp9_mode_order[best_mode_index].ref_frame > INTRA_FRAME)
3537 continue; 3404 continue;
3538 } 3405 }
3539 if (cpi->sf.mode_search_skip_flags & FLAG_SKIP_INTRA_DIRMISMATCH) { 3406 if (cpi->sf.mode_search_skip_flags & FLAG_SKIP_INTRA_DIRMISMATCH) {
3540 if (conditional_skipintra(mbmi->mode, best_intra_mode)) 3407 if (conditional_skipintra(mbmi->mode, best_intra_mode))
3541 continue; 3408 continue;
3542 } 3409 }
3543 super_block_yrd(cpi, x, &rate_y, &distortion_y, &skippable, NULL, 3410 super_block_yrd(cpi, x, &rate_y, &distortion_y, &skippable, NULL,
3544 bsize, txfm_cache, best_rd); 3411 bsize, tx_cache, best_rd);
3545 3412
3546 if (rate_y == INT_MAX) 3413 if (rate_y == INT_MAX)
3547 continue; 3414 continue;
3548 3415
3549 uv_tx = MIN(mbmi->txfm_size, max_uv_txsize_lookup[bsize]); 3416 uv_tx = MIN(mbmi->txfm_size, max_uv_txsize_lookup[bsize]);
3550 if (rate_uv_intra[uv_tx] == INT_MAX) { 3417 if (rate_uv_intra[uv_tx] == INT_MAX) {
3551 choose_intra_uv_mode(cpi, bsize, &rate_uv_intra[uv_tx], 3418 choose_intra_uv_mode(cpi, bsize, &rate_uv_intra[uv_tx],
3552 &rate_uv_tokenonly[uv_tx], 3419 &rate_uv_tokenonly[uv_tx],
3553 &dist_uv[uv_tx], &skip_uv[uv_tx], 3420 &dist_uv[uv_tx], &skip_uv[uv_tx],
3554 &mode_uv[uv_tx]); 3421 &mode_uv[uv_tx]);
3555 } 3422 }
3556 3423
3557 rate_uv = rate_uv_tokenonly[uv_tx]; 3424 rate_uv = rate_uv_tokenonly[uv_tx];
3558 distortion_uv = dist_uv[uv_tx]; 3425 distortion_uv = dist_uv[uv_tx];
3559 skippable = skippable && skip_uv[uv_tx]; 3426 skippable = skippable && skip_uv[uv_tx];
3560 mbmi->uv_mode = mode_uv[uv_tx]; 3427 mbmi->uv_mode = mode_uv[uv_tx];
3561 3428
3562 rate2 = rate_y + x->mbmode_cost[mbmi->mode] + rate_uv_intra[uv_tx]; 3429 rate2 = rate_y + x->mbmode_cost[mbmi->mode] + rate_uv_intra[uv_tx];
3563 if (mbmi->mode != DC_PRED && mbmi->mode != TM_PRED) 3430 if (mbmi->mode != DC_PRED && mbmi->mode != TM_PRED)
3564 rate2 += intra_cost_penalty; 3431 rate2 += intra_cost_penalty;
3565 distortion2 = distortion_y + distortion_uv; 3432 distortion2 = distortion_y + distortion_uv;
3566 } else if (this_mode == SPLITMV) { 3433 } else if (this_mode == SPLITMV) {
3567 const int is_comp_pred = mbmi->ref_frame[1] > 0; 3434 const int is_comp_pred = second_ref_frame > 0;
3568 int rate; 3435 int rate;
3569 int64_t distortion; 3436 int64_t distortion;
3570 int64_t this_rd_thresh; 3437 int64_t this_rd_thresh;
3571 int64_t tmp_rd, tmp_best_rd = INT64_MAX, tmp_best_rdu = INT64_MAX; 3438 int64_t tmp_rd, tmp_best_rd = INT64_MAX, tmp_best_rdu = INT64_MAX;
3572 int tmp_best_rate = INT_MAX, tmp_best_ratey = INT_MAX; 3439 int tmp_best_rate = INT_MAX, tmp_best_ratey = INT_MAX;
3573 int64_t tmp_best_distortion = INT_MAX, tmp_best_sse, uv_sse; 3440 int64_t tmp_best_distortion = INT_MAX, tmp_best_sse, uv_sse;
3574 int tmp_best_skippable = 0; 3441 int tmp_best_skippable = 0;
3575 int switchable_filter_index; 3442 int switchable_filter_index;
3576 int_mv *second_ref = is_comp_pred ? 3443 int_mv *second_ref = is_comp_pred ?
3577 &mbmi->ref_mvs[mbmi->ref_frame[1]][0] : NULL; 3444 &mbmi->ref_mvs[second_ref_frame][0] : NULL;
3578 union b_mode_info tmp_best_bmodes[16]; 3445 union b_mode_info tmp_best_bmodes[16];
3579 MB_MODE_INFO tmp_best_mbmode; 3446 MB_MODE_INFO tmp_best_mbmode;
3580 PARTITION_INFO tmp_best_partition; 3447 PARTITION_INFO tmp_best_partition;
3581 BEST_SEG_INFO bsi[VP9_SWITCHABLE_FILTERS]; 3448 BEST_SEG_INFO bsi[SWITCHABLE_FILTERS];
3582 int pred_exists = 0; 3449 int pred_exists = 0;
3583 int uv_skippable; 3450 int uv_skippable;
3584 if (is_comp_pred) { 3451 if (is_comp_pred) {
3585 if (cpi->sf.mode_search_skip_flags & FLAG_SKIP_COMP_BESTINTRA) 3452 if (cpi->sf.mode_search_skip_flags & FLAG_SKIP_COMP_BESTINTRA)
3586 if (vp9_mode_order[best_mode_index].ref_frame == INTRA_FRAME) 3453 if (vp9_mode_order[best_mode_index].ref_frame == INTRA_FRAME)
3587 continue; 3454 continue;
3588 if (cpi->sf.mode_search_skip_flags & FLAG_SKIP_COMP_REFMISMATCH) 3455 if (cpi->sf.mode_search_skip_flags & FLAG_SKIP_COMP_REFMISMATCH)
3589 if (vp9_mode_order[mode_index].ref_frame != best_inter_ref_frame && 3456 if (ref_frame != best_inter_ref_frame &&
3590 vp9_mode_order[mode_index].second_ref_frame != 3457 second_ref_frame != best_inter_ref_frame)
3591 best_inter_ref_frame)
3592 continue; 3458 continue;
3593 } 3459 }
3594 3460
3595 this_rd_thresh = (mbmi->ref_frame[0] == LAST_FRAME) ? 3461 this_rd_thresh = (ref_frame == LAST_FRAME) ?
3596 cpi->rd_threshes[bsize][THR_NEWMV] : 3462 cpi->rd_threshes[bsize][THR_NEWMV] :
3597 cpi->rd_threshes[bsize][THR_NEWA]; 3463 cpi->rd_threshes[bsize][THR_NEWA];
3598 this_rd_thresh = (mbmi->ref_frame[0] == GOLDEN_FRAME) ? 3464 this_rd_thresh = (ref_frame == GOLDEN_FRAME) ?
3599 cpi->rd_threshes[bsize][THR_NEWG] : this_rd_thresh; 3465 cpi->rd_threshes[bsize][THR_NEWG] : this_rd_thresh;
3600 xd->mode_info_context->mbmi.txfm_size = TX_4X4; 3466 xd->mode_info_context->mbmi.txfm_size = TX_4X4;
3601 3467
3602 cpi->rd_filter_cache[VP9_SWITCHABLE_FILTERS] = INT64_MAX; 3468 cpi->rd_filter_cache[SWITCHABLE_FILTERS] = INT64_MAX;
3603 for (switchable_filter_index = 0; 3469 if (cm->mcomp_filter_type != BILINEAR) {
3604 switchable_filter_index < VP9_SWITCHABLE_FILTERS; 3470 tmp_best_filter = EIGHTTAP;
3605 ++switchable_filter_index) { 3471 if (x->source_variance <
3606 int newbest, rs; 3472 cpi->sf.disable_filter_search_var_thresh) {
3607 int64_t rs_rd; 3473 tmp_best_filter = EIGHTTAP;
3608 mbmi->interp_filter = 3474 vp9_zero(cpi->rd_filter_cache);
3609 vp9_switchable_interp[switchable_filter_index]; 3475 } else {
3610 vp9_setup_interp_filters(xd, mbmi->interp_filter, &cpi->common); 3476 for (switchable_filter_index = 0;
3477 switchable_filter_index < SWITCHABLE_FILTERS;
3478 ++switchable_filter_index) {
3479 int newbest, rs;
3480 int64_t rs_rd;
3481 mbmi->interp_filter = switchable_filter_index;
3482 vp9_setup_interp_filters(xd, mbmi->interp_filter, &cpi->common);
3611 3483
3612 tmp_rd = rd_pick_best_mbsegmentation(cpi, x, 3484 tmp_rd = rd_pick_best_mbsegmentation(cpi, x,
3613 &mbmi->ref_mvs[mbmi->ref_frame[0]][0], 3485 &mbmi->ref_mvs[ref_frame][0],
3614 second_ref, 3486 second_ref,
3615 best_yrd, 3487 best_yrd,
3616 &rate, &rate_y, &distortion, 3488 &rate, &rate_y, &distortion,
3617 &skippable, &total_sse, 3489 &skippable, &total_sse,
3618 (int)this_rd_thresh, seg_mvs, 3490 (int)this_rd_thresh, seg_mvs,
3619 bsi, switchable_filter_index, 3491 bsi, switchable_filter_index,
3620 mi_row, mi_col); 3492 mi_row, mi_col);
3621 3493
3622 if (tmp_rd == INT64_MAX) 3494 if (tmp_rd == INT64_MAX)
3623 continue; 3495 continue;
3624 cpi->rd_filter_cache[switchable_filter_index] = tmp_rd; 3496 cpi->rd_filter_cache[switchable_filter_index] = tmp_rd;
3625 rs = get_switchable_rate(cm, x); 3497 rs = get_switchable_rate(x);
3626 rs_rd = RDCOST(x->rdmult, x->rddiv, rs, 0); 3498 rs_rd = RDCOST(x->rdmult, x->rddiv, rs, 0);
3627 cpi->rd_filter_cache[VP9_SWITCHABLE_FILTERS] = 3499 cpi->rd_filter_cache[SWITCHABLE_FILTERS] =
3628 MIN(cpi->rd_filter_cache[VP9_SWITCHABLE_FILTERS], tmp_rd + rs_rd); 3500 MIN(cpi->rd_filter_cache[SWITCHABLE_FILTERS],
3629 if (cm->mcomp_filter_type == SWITCHABLE) 3501 tmp_rd + rs_rd);
3630 tmp_rd += rs_rd; 3502 if (cm->mcomp_filter_type == SWITCHABLE)
3503 tmp_rd += rs_rd;
3631 3504
3632 newbest = (tmp_rd < tmp_best_rd); 3505 newbest = (tmp_rd < tmp_best_rd);
3633 if (newbest) { 3506 if (newbest) {
3634 tmp_best_filter = mbmi->interp_filter; 3507 tmp_best_filter = mbmi->interp_filter;
3635 tmp_best_rd = tmp_rd; 3508 tmp_best_rd = tmp_rd;
3509 }
3510 if ((newbest && cm->mcomp_filter_type == SWITCHABLE) ||
3511 (mbmi->interp_filter == cm->mcomp_filter_type &&
3512 cm->mcomp_filter_type != SWITCHABLE)) {
3513 tmp_best_rdu = tmp_rd;
3514 tmp_best_rate = rate;
3515 tmp_best_ratey = rate_y;
3516 tmp_best_distortion = distortion;
3517 tmp_best_sse = total_sse;
3518 tmp_best_skippable = skippable;
3519 tmp_best_mbmode = *mbmi;
3520 tmp_best_partition = *x->partition_info;
3521 for (i = 0; i < 4; i++)
3522 tmp_best_bmodes[i] = xd->mode_info_context->bmi[i];
3523 pred_exists = 1;
3524 if (switchable_filter_index == 0 &&
3525 cpi->sf.use_rd_breakout &&
3526 best_rd < INT64_MAX) {
3527 if (tmp_best_rdu / 2 > best_rd) {
3528 // skip searching the other filters if the first is
3529 // already substantially larger than the best so far
3530 tmp_best_filter = mbmi->interp_filter;
3531 tmp_best_rdu = INT64_MAX;
3532 break;
3533 }
3534 }
3535 }
3536 } // switchable_filter_index loop
3636 } 3537 }
3637 if ((newbest && cm->mcomp_filter_type == SWITCHABLE) || 3538 }
3638 (mbmi->interp_filter == cm->mcomp_filter_type &&
3639 cm->mcomp_filter_type != SWITCHABLE)) {
3640 tmp_best_rdu = tmp_rd;
3641 tmp_best_rate = rate;
3642 tmp_best_ratey = rate_y;
3643 tmp_best_distortion = distortion;
3644 tmp_best_sse = total_sse;
3645 tmp_best_skippable = skippable;
3646 tmp_best_mbmode = *mbmi;
3647 tmp_best_partition = *x->partition_info;
3648 for (i = 0; i < 4; i++)
3649 tmp_best_bmodes[i] = xd->mode_info_context->bmi[i];
3650 pred_exists = 1;
3651 if (switchable_filter_index == 0 &&
3652 cpi->sf.use_rd_breakout &&
3653 best_rd < INT64_MAX) {
3654 if (tmp_best_rdu / 2 > best_rd) {
3655 // skip searching the other filters if the first is
3656 // already substantially larger than the best so far
3657 tmp_best_filter = mbmi->interp_filter;
3658 tmp_best_rdu = INT64_MAX;
3659 break;
3660 }
3661 }
3662 }
3663 } // switchable_filter_index loop
3664 3539
3665 if (tmp_best_rdu == INT64_MAX) 3540 if (tmp_best_rdu == INT64_MAX)
3666 continue; 3541 continue;
3667 3542
3668 mbmi->interp_filter = (cm->mcomp_filter_type == SWITCHABLE ? 3543 mbmi->interp_filter = (cm->mcomp_filter_type == SWITCHABLE ?
3669 tmp_best_filter : cm->mcomp_filter_type); 3544 tmp_best_filter : cm->mcomp_filter_type);
3670 vp9_setup_interp_filters(xd, mbmi->interp_filter, &cpi->common); 3545 vp9_setup_interp_filters(xd, mbmi->interp_filter, &cpi->common);
3671 if (!pred_exists) { 3546 if (!pred_exists) {
3672 // Handles the special case when a filter that is not in the 3547 // Handles the special case when a filter that is not in the
3673 // switchable list (bilinear, 6-tap) is indicated at the frame level 3548 // switchable list (bilinear, 6-tap) is indicated at the frame level
3674 tmp_rd = rd_pick_best_mbsegmentation(cpi, x, 3549 tmp_rd = rd_pick_best_mbsegmentation(cpi, x,
3675 &mbmi->ref_mvs[mbmi->ref_frame[0]][0], 3550 &mbmi->ref_mvs[ref_frame][0],
3676 second_ref, 3551 second_ref,
3677 best_yrd, 3552 best_yrd,
3678 &rate, &rate_y, &distortion, 3553 &rate, &rate_y, &distortion,
3679 &skippable, &total_sse, 3554 &skippable, &total_sse,
3680 (int)this_rd_thresh, seg_mvs, 3555 (int)this_rd_thresh, seg_mvs,
3681 bsi, 0, 3556 bsi, 0,
3682 mi_row, mi_col); 3557 mi_row, mi_col);
3683 if (tmp_rd == INT64_MAX) 3558 if (tmp_rd == INT64_MAX)
3684 continue; 3559 continue;
3685 } else { 3560 } else {
3686 if (cpi->common.mcomp_filter_type == SWITCHABLE) { 3561 if (cpi->common.mcomp_filter_type == SWITCHABLE) {
3687 int rs = get_switchable_rate(cm, x); 3562 int rs = get_switchable_rate(x);
3688 tmp_best_rdu -= RDCOST(x->rdmult, x->rddiv, rs, 0); 3563 tmp_best_rdu -= RDCOST(x->rdmult, x->rddiv, rs, 0);
3689 } 3564 }
3690 tmp_rd = tmp_best_rdu; 3565 tmp_rd = tmp_best_rdu;
3691 total_sse = tmp_best_sse; 3566 total_sse = tmp_best_sse;
3692 rate = tmp_best_rate; 3567 rate = tmp_best_rate;
3693 rate_y = tmp_best_ratey; 3568 rate_y = tmp_best_ratey;
3694 distortion = tmp_best_distortion; 3569 distortion = tmp_best_distortion;
3695 skippable = tmp_best_skippable; 3570 skippable = tmp_best_skippable;
3696 *mbmi = tmp_best_mbmode; 3571 *mbmi = tmp_best_mbmode;
3697 *x->partition_info = tmp_best_partition; 3572 *x->partition_info = tmp_best_partition;
3698 for (i = 0; i < 4; i++) 3573 for (i = 0; i < 4; i++)
3699 xd->mode_info_context->bmi[i] = tmp_best_bmodes[i]; 3574 xd->mode_info_context->bmi[i] = tmp_best_bmodes[i];
3700 } 3575 }
3701 3576
3702 rate2 += rate; 3577 rate2 += rate;
3703 distortion2 += distortion; 3578 distortion2 += distortion;
3704 3579
3705 if (cpi->common.mcomp_filter_type == SWITCHABLE) 3580 if (cpi->common.mcomp_filter_type == SWITCHABLE)
3706 rate2 += get_switchable_rate(cm, x); 3581 rate2 += get_switchable_rate(x);
3707 3582
3708 if (!mode_excluded) { 3583 if (!mode_excluded) {
3709 if (is_comp_pred) 3584 if (is_comp_pred)
3710 mode_excluded = cpi->common.comp_pred_mode == SINGLE_PREDICTION_ONLY; 3585 mode_excluded = cpi->common.comp_pred_mode == SINGLE_PREDICTION_ONLY;
3711 else 3586 else
3712 mode_excluded = cpi->common.comp_pred_mode == COMP_PREDICTION_ONLY; 3587 mode_excluded = cpi->common.comp_pred_mode == COMP_PREDICTION_ONLY;
3713 } 3588 }
3714 compmode_cost = vp9_cost_bit(comp_mode_p, is_comp_pred); 3589 compmode_cost = vp9_cost_bit(comp_mode_p, is_comp_pred);
3715 3590
3716 if (RDCOST(x->rdmult, x->rddiv, rate2, distortion2) < 3591 tmp_best_rdu = best_rd -
3717 best_rd) { 3592 MIN(RDCOST(x->rdmult, x->rddiv, rate2, distortion2),
3593 RDCOST(x->rdmult, x->rddiv, 0, total_sse));
3594
3595 if (tmp_best_rdu > 0) {
3718 // If even the 'Y' rd value of split is higher than best so far 3596 // If even the 'Y' rd value of split is higher than best so far
3719 // then dont bother looking at UV 3597 // then dont bother looking at UV
3720 vp9_build_inter_predictors_sbuv(&x->e_mbd, mi_row, mi_col, 3598 vp9_build_inter_predictors_sbuv(&x->e_mbd, mi_row, mi_col,
3721 BLOCK_SIZE_SB8X8); 3599 BLOCK_8X8);
3722 vp9_subtract_sbuv(x, BLOCK_SIZE_SB8X8); 3600 super_block_uvrd(cm, x, &rate_uv, &distortion_uv, &uv_skippable,
3723 super_block_uvrd_for_txfm(cm, x, &rate_uv, &distortion_uv, 3601 &uv_sse, BLOCK_8X8, tmp_best_rdu);
3724 &uv_skippable, &uv_sse, 3602 if (rate_uv == INT_MAX)
3725 BLOCK_SIZE_SB8X8, TX_4X4); 3603 continue;
3726 rate2 += rate_uv; 3604 rate2 += rate_uv;
3727 distortion2 += distortion_uv; 3605 distortion2 += distortion_uv;
3728 skippable = skippable && uv_skippable; 3606 skippable = skippable && uv_skippable;
3729 total_sse += uv_sse; 3607 total_sse += uv_sse;
3730 3608
3731 txfm_cache[ONLY_4X4] = RDCOST(x->rdmult, x->rddiv, rate2, distortion2); 3609 tx_cache[ONLY_4X4] = RDCOST(x->rdmult, x->rddiv, rate2, distortion2);
3732 for (i = 0; i < NB_TXFM_MODES; ++i) 3610 for (i = 0; i < TX_MODES; ++i)
3733 txfm_cache[i] = txfm_cache[ONLY_4X4]; 3611 tx_cache[i] = tx_cache[ONLY_4X4];
3734 } 3612 }
3735 } else { 3613 } else {
3736 compmode_cost = vp9_cost_bit(comp_mode_p, 3614 compmode_cost = vp9_cost_bit(comp_mode_p, second_ref_frame > INTRA_FRAME);
3737 mbmi->ref_frame[1] > INTRA_FRAME);
3738 this_rd = handle_inter_mode(cpi, x, bsize, 3615 this_rd = handle_inter_mode(cpi, x, bsize,
3739 txfm_cache, 3616 tx_cache,
3740 &rate2, &distortion2, &skippable, 3617 &rate2, &distortion2, &skippable,
3741 &rate_y, &distortion_y, 3618 &rate_y, &distortion_y,
3742 &rate_uv, &distortion_uv, 3619 &rate_uv, &distortion_uv,
3743 &mode_excluded, &disable_skip, 3620 &mode_excluded, &disable_skip,
3744 &tmp_best_filter, frame_mv, 3621 &tmp_best_filter, frame_mv,
3745 mi_row, mi_col, 3622 mi_row, mi_col,
3746 single_newmv, &total_sse, best_rd); 3623 single_newmv, &total_sse, best_rd);
3747 if (this_rd == INT64_MAX) 3624 if (this_rd == INT64_MAX)
3748 continue; 3625 continue;
3749 } 3626 }
3750 3627
3751 if (cpi->common.comp_pred_mode == HYBRID_PREDICTION) { 3628 if (cpi->common.comp_pred_mode == HYBRID_PREDICTION) {
3752 rate2 += compmode_cost; 3629 rate2 += compmode_cost;
3753 } 3630 }
3754 3631
3755 // Estimate the reference frame signaling cost and add it 3632 // Estimate the reference frame signaling cost and add it
3756 // to the rolling cost variable. 3633 // to the rolling cost variable.
3757 if (mbmi->ref_frame[1] > INTRA_FRAME) { 3634 if (second_ref_frame > INTRA_FRAME) {
3758 rate2 += ref_costs_comp[mbmi->ref_frame[0]]; 3635 rate2 += ref_costs_comp[ref_frame];
3759 } else { 3636 } else {
3760 rate2 += ref_costs_single[mbmi->ref_frame[0]]; 3637 rate2 += ref_costs_single[ref_frame];
3761 } 3638 }
3762 3639
3763 if (!disable_skip) { 3640 if (!disable_skip) {
3764 // Test for the condition where skip block will be activated 3641 // Test for the condition where skip block will be activated
3765 // because there are no non zero coefficients and make any 3642 // because there are no non zero coefficients and make any
3766 // necessary adjustment for rate. Ignore if skip is coded at 3643 // necessary adjustment for rate. Ignore if skip is coded at
3767 // segment level as the cost wont have been added in. 3644 // segment level as the cost wont have been added in.
3768 // Is Mb level skip allowed (i.e. not coded at segment level). 3645 // Is Mb level skip allowed (i.e. not coded at segment level).
3769 const int mb_skip_allowed = !vp9_segfeature_active(&xd->seg, segment_id, 3646 const int mb_skip_allowed = !vp9_segfeature_active(seg, segment_id,
3770 SEG_LVL_SKIP); 3647 SEG_LVL_SKIP);
3771 3648
3772 if (skippable && bsize >= BLOCK_SIZE_SB8X8) { 3649 if (skippable && bsize >= BLOCK_8X8) {
3773 // Back out the coefficient coding costs 3650 // Back out the coefficient coding costs
3774 rate2 -= (rate_y + rate_uv); 3651 rate2 -= (rate_y + rate_uv);
3775 // for best yrd calculation 3652 // for best yrd calculation
3776 rate_uv = 0; 3653 rate_uv = 0;
3777 3654
3778 if (mb_skip_allowed) { 3655 if (mb_skip_allowed) {
3779 int prob_skip_cost; 3656 int prob_skip_cost;
3780 3657
3781 // Cost the skip mb case 3658 // Cost the skip mb case
3782 vp9_prob skip_prob = 3659 vp9_prob skip_prob =
3783 vp9_get_pred_prob_mbskip(cm, xd); 3660 vp9_get_pred_prob_mbskip(cm, xd);
3784 3661
3785 if (skip_prob) { 3662 if (skip_prob) {
3786 prob_skip_cost = vp9_cost_bit(skip_prob, 1); 3663 prob_skip_cost = vp9_cost_bit(skip_prob, 1);
3787 rate2 += prob_skip_cost; 3664 rate2 += prob_skip_cost;
3788 } 3665 }
3789 } 3666 }
3790 } else if (mb_skip_allowed && ref_frame != INTRA_FRAME && 3667 } else if (mb_skip_allowed && ref_frame != INTRA_FRAME && !xd->lossless) {
3791 !xd->lossless) {
3792 if (RDCOST(x->rdmult, x->rddiv, rate_y + rate_uv, distortion2) < 3668 if (RDCOST(x->rdmult, x->rddiv, rate_y + rate_uv, distortion2) <
3793 RDCOST(x->rdmult, x->rddiv, 0, total_sse)) { 3669 RDCOST(x->rdmult, x->rddiv, 0, total_sse)) {
3794 // Add in the cost of the no skip flag. 3670 // Add in the cost of the no skip flag.
3795 int prob_skip_cost = vp9_cost_bit(vp9_get_pred_prob_mbskip(cm, xd), 3671 int prob_skip_cost = vp9_cost_bit(vp9_get_pred_prob_mbskip(cm, xd),
3796 0); 3672 0);
3797 rate2 += prob_skip_cost; 3673 rate2 += prob_skip_cost;
3798 } else { 3674 } else {
3799 // FIXME(rbultje) make this work for splitmv also 3675 // FIXME(rbultje) make this work for splitmv also
3800 int prob_skip_cost = vp9_cost_bit(vp9_get_pred_prob_mbskip(cm, xd), 3676 int prob_skip_cost = vp9_cost_bit(vp9_get_pred_prob_mbskip(cm, xd),
3801 1); 3677 1);
(...skipping 26 matching lines...) Expand all
3828 // Keep record of best inter rd with single reference 3704 // Keep record of best inter rd with single reference
3829 if (xd->mode_info_context->mbmi.ref_frame[0] > INTRA_FRAME && 3705 if (xd->mode_info_context->mbmi.ref_frame[0] > INTRA_FRAME &&
3830 xd->mode_info_context->mbmi.ref_frame[1] == NONE && 3706 xd->mode_info_context->mbmi.ref_frame[1] == NONE &&
3831 !mode_excluded && 3707 !mode_excluded &&
3832 this_rd < best_inter_rd) { 3708 this_rd < best_inter_rd) {
3833 best_inter_rd = this_rd; 3709 best_inter_rd = this_rd;
3834 best_inter_ref_frame = ref_frame; 3710 best_inter_ref_frame = ref_frame;
3835 // best_inter_mode = xd->mode_info_context->mbmi.mode; 3711 // best_inter_mode = xd->mode_info_context->mbmi.mode;
3836 } 3712 }
3837 3713
3838 if (!disable_skip && mbmi->ref_frame[0] == INTRA_FRAME) { 3714 if (!disable_skip && ref_frame == INTRA_FRAME) {
3839 for (i = 0; i < NB_PREDICTION_TYPES; ++i) 3715 for (i = 0; i < NB_PREDICTION_TYPES; ++i)
3840 best_pred_rd[i] = MIN(best_pred_rd[i], this_rd); 3716 best_pred_rd[i] = MIN(best_pred_rd[i], this_rd);
3841 for (i = 0; i <= VP9_SWITCHABLE_FILTERS; i++) 3717 for (i = 0; i <= SWITCHABLE_FILTERS; i++)
3842 best_filter_rd[i] = MIN(best_filter_rd[i], this_rd); 3718 best_filter_rd[i] = MIN(best_filter_rd[i], this_rd);
3843 } 3719 }
3844 3720
3845 if (this_mode != I4X4_PRED && this_mode != SPLITMV) { 3721 if (this_mode != I4X4_PRED && this_mode != SPLITMV) {
3846 // Store the respective mode distortions for later use. 3722 // Store the respective mode distortions for later use.
3847 if (mode_distortions[this_mode] == -1 3723 if (mode_distortions[this_mode] == -1
3848 || distortion2 < mode_distortions[this_mode]) { 3724 || distortion2 < mode_distortions[this_mode]) {
3849 mode_distortions[this_mode] = distortion2; 3725 mode_distortions[this_mode] = distortion2;
3850 } 3726 }
3851 if (frame_distortions[mbmi->ref_frame[0]] == -1 3727 if (frame_distortions[ref_frame] == -1
3852 || distortion2 < frame_distortions[mbmi->ref_frame[0]]) { 3728 || distortion2 < frame_distortions[ref_frame]) {
3853 frame_distortions[mbmi->ref_frame[0]] = distortion2; 3729 frame_distortions[ref_frame] = distortion2;
3854 } 3730 }
3855 } 3731 }
3856 3732
3857 // Did this mode help.. i.e. is it the new best mode 3733 // Did this mode help.. i.e. is it the new best mode
3858 if (this_rd < best_rd || x->skip) { 3734 if (this_rd < best_rd || x->skip) {
3859 if (!mode_excluded) { 3735 if (!mode_excluded) {
3860 // Note index of best mode so far 3736 // Note index of best mode so far
3861 const int qstep = xd->plane[0].dequant[1];
3862
3863 best_mode_index = mode_index; 3737 best_mode_index = mode_index;
3864 3738
3865 if (ref_frame == INTRA_FRAME) { 3739 if (ref_frame == INTRA_FRAME) {
3866 /* required for left and above block mv */ 3740 /* required for left and above block mv */
3867 mbmi->mv[0].as_int = 0; 3741 mbmi->mv[0].as_int = 0;
3868 } 3742 }
3869 3743
3870 *returnrate = rate2; 3744 *returnrate = rate2;
3871 *returndistortion = distortion2; 3745 *returndistortion = distortion2;
3872 best_rd = this_rd; 3746 best_rd = this_rd;
3873 best_yrd = best_rd - 3747 best_yrd = best_rd -
3874 RDCOST(x->rdmult, x->rddiv, rate_uv, distortion_uv); 3748 RDCOST(x->rdmult, x->rddiv, rate_uv, distortion_uv);
3875 best_mbmode = *mbmi; 3749 best_mbmode = *mbmi;
3876 best_skip2 = this_skip2; 3750 best_skip2 = this_skip2;
3877 best_partition = *x->partition_info; 3751 best_partition = *x->partition_info;
3878 3752
3879 if (this_mode == I4X4_PRED || this_mode == SPLITMV) 3753 if (this_mode == I4X4_PRED || this_mode == SPLITMV)
3880 for (i = 0; i < 4; i++) 3754 for (i = 0; i < 4; i++)
3881 best_bmodes[i] = xd->mode_info_context->bmi[i]; 3755 best_bmodes[i] = xd->mode_info_context->bmi[i];
3882 3756
3883 // TODO(debargha): enhance this test with a better distortion prediction 3757 // TODO(debargha): enhance this test with a better distortion prediction
3884 // based on qp, activity mask and history 3758 // based on qp, activity mask and history
3885 if (cpi->sf.mode_search_skip_flags & FLAG_EARLY_TERMINATE) 3759 if (cpi->sf.mode_search_skip_flags & FLAG_EARLY_TERMINATE) {
3886 if (ref_frame > INTRA_FRAME && distortion2 * 4 < qstep * qstep) 3760 const int qstep = xd->plane[0].dequant[1];
3761 // TODO(debargha): Enhance this by specializing for each mode_index
3762 int scale = 4;
3763 if (x->source_variance < UINT_MAX) {
3764 const int var_adjust = (x->source_variance < 16);
3765 scale -= var_adjust;
3766 }
3767 if (ref_frame > INTRA_FRAME &&
3768 distortion2 * scale < qstep * qstep) {
3887 early_term = 1; 3769 early_term = 1;
3770 }
3771 }
3888 } 3772 }
3889 #if 0
3890 // Testing this mode gave rise to an improvement in best error score.
3891 // Lower threshold a bit for next time
3892 cpi->rd_thresh_mult[mode_index] =
3893 (cpi->rd_thresh_mult[mode_index] >= (MIN_THRESHMULT + 2)) ?
3894 cpi->rd_thresh_mult[mode_index] - 2 : MIN_THRESHMULT;
3895 cpi->rd_threshes[mode_index] =
3896 (cpi->rd_baseline_thresh[mode_index] >> 7)
3897 * cpi->rd_thresh_mult[mode_index];
3898 #endif
3899 } else {
3900 // If the mode did not help improve the best error case then
3901 // raise the threshold for testing that mode next time around.
3902 #if 0
3903 cpi->rd_thresh_mult[mode_index] += 4;
3904
3905 if (cpi->rd_thresh_mult[mode_index] > MAX_THRESHMULT)
3906 cpi->rd_thresh_mult[mode_index] = MAX_THRESHMULT;
3907
3908 cpi->rd_threshes[mode_index] =
3909 (cpi->rd_baseline_thresh[mode_index] >> 7)
3910 * cpi->rd_thresh_mult[mode_index];
3911 #endif
3912 } 3773 }
3913 3774
3914 /* keep record of best compound/single-only prediction */ 3775 /* keep record of best compound/single-only prediction */
3915 if (!disable_skip && mbmi->ref_frame[0] != INTRA_FRAME) { 3776 if (!disable_skip && ref_frame != INTRA_FRAME) {
3916 int single_rd, hybrid_rd, single_rate, hybrid_rate; 3777 int single_rd, hybrid_rd, single_rate, hybrid_rate;
3917 3778
3918 if (cpi->common.comp_pred_mode == HYBRID_PREDICTION) { 3779 if (cpi->common.comp_pred_mode == HYBRID_PREDICTION) {
3919 single_rate = rate2 - compmode_cost; 3780 single_rate = rate2 - compmode_cost;
3920 hybrid_rate = rate2; 3781 hybrid_rate = rate2;
3921 } else { 3782 } else {
3922 single_rate = rate2; 3783 single_rate = rate2;
3923 hybrid_rate = rate2 + compmode_cost; 3784 hybrid_rate = rate2 + compmode_cost;
3924 } 3785 }
3925 3786
3926 single_rd = RDCOST(x->rdmult, x->rddiv, single_rate, distortion2); 3787 single_rd = RDCOST(x->rdmult, x->rddiv, single_rate, distortion2);
3927 hybrid_rd = RDCOST(x->rdmult, x->rddiv, hybrid_rate, distortion2); 3788 hybrid_rd = RDCOST(x->rdmult, x->rddiv, hybrid_rate, distortion2);
3928 3789
3929 if (mbmi->ref_frame[1] <= INTRA_FRAME && 3790 if (second_ref_frame <= INTRA_FRAME &&
3930 single_rd < best_pred_rd[SINGLE_PREDICTION_ONLY]) { 3791 single_rd < best_pred_rd[SINGLE_PREDICTION_ONLY]) {
3931 best_pred_rd[SINGLE_PREDICTION_ONLY] = single_rd; 3792 best_pred_rd[SINGLE_PREDICTION_ONLY] = single_rd;
3932 } else if (mbmi->ref_frame[1] > INTRA_FRAME && 3793 } else if (second_ref_frame > INTRA_FRAME &&
3933 single_rd < best_pred_rd[COMP_PREDICTION_ONLY]) { 3794 single_rd < best_pred_rd[COMP_PREDICTION_ONLY]) {
3934 best_pred_rd[COMP_PREDICTION_ONLY] = single_rd; 3795 best_pred_rd[COMP_PREDICTION_ONLY] = single_rd;
3935 } 3796 }
3936 if (hybrid_rd < best_pred_rd[HYBRID_PREDICTION]) 3797 if (hybrid_rd < best_pred_rd[HYBRID_PREDICTION])
3937 best_pred_rd[HYBRID_PREDICTION] = hybrid_rd; 3798 best_pred_rd[HYBRID_PREDICTION] = hybrid_rd;
3938 } 3799 }
3939 3800
3940 /* keep record of best filter type */ 3801 /* keep record of best filter type */
3941 if (!mode_excluded && !disable_skip && mbmi->ref_frame[0] != INTRA_FRAME && 3802 if (!mode_excluded && !disable_skip && ref_frame != INTRA_FRAME &&
3942 cm->mcomp_filter_type != BILINEAR) { 3803 cm->mcomp_filter_type != BILINEAR) {
3943 int64_t ref = cpi->rd_filter_cache[cm->mcomp_filter_type == SWITCHABLE ? 3804 int64_t ref = cpi->rd_filter_cache[cm->mcomp_filter_type == SWITCHABLE ?
3944 VP9_SWITCHABLE_FILTERS : 3805 SWITCHABLE_FILTERS : cm->mcomp_filter_type];
3945 vp9_switchable_interp_map[cm->mcomp_filter_type]]; 3806 for (i = 0; i <= SWITCHABLE_FILTERS; i++) {
3946 for (i = 0; i <= VP9_SWITCHABLE_FILTERS; i++) {
3947 int64_t adj_rd; 3807 int64_t adj_rd;
3948 // In cases of poor prediction, filter_cache[] can contain really big 3808 // In cases of poor prediction, filter_cache[] can contain really big
3949 // values, which actually are bigger than this_rd itself. This can 3809 // values, which actually are bigger than this_rd itself. This can
3950 // cause negative best_filter_rd[] values, which is obviously silly. 3810 // cause negative best_filter_rd[] values, which is obviously silly.
3951 // Therefore, if filter_cache < ref, we do an adjusted calculation. 3811 // Therefore, if filter_cache < ref, we do an adjusted calculation.
3952 if (cpi->rd_filter_cache[i] >= ref) 3812 if (cpi->rd_filter_cache[i] >= ref)
3953 adj_rd = this_rd + cpi->rd_filter_cache[i] - ref; 3813 adj_rd = this_rd + cpi->rd_filter_cache[i] - ref;
3954 else // FIXME(rbultje) do this for comppred also 3814 else // FIXME(rbultje) do this for comppred also
3955 adj_rd = this_rd - (ref - cpi->rd_filter_cache[i]) * this_rd / ref; 3815 adj_rd = this_rd - (ref - cpi->rd_filter_cache[i]) * this_rd / ref;
3956 best_filter_rd[i] = MIN(best_filter_rd[i], adj_rd); 3816 best_filter_rd[i] = MIN(best_filter_rd[i], adj_rd);
3957 } 3817 }
3958 } 3818 }
3959 3819
3960 /* keep record of best txfm size */ 3820 /* keep record of best txfm size */
3961 if (bsize < BLOCK_SIZE_SB32X32) { 3821 if (bsize < BLOCK_32X32) {
3962 if (bsize < BLOCK_SIZE_MB16X16) { 3822 if (bsize < BLOCK_16X16) {
3963 if (this_mode == SPLITMV || this_mode == I4X4_PRED) 3823 if (this_mode == SPLITMV || this_mode == I4X4_PRED)
3964 txfm_cache[ALLOW_8X8] = txfm_cache[ONLY_4X4]; 3824 tx_cache[ALLOW_8X8] = tx_cache[ONLY_4X4];
3965 txfm_cache[ALLOW_16X16] = txfm_cache[ALLOW_8X8]; 3825 tx_cache[ALLOW_16X16] = tx_cache[ALLOW_8X8];
3966 } 3826 }
3967 txfm_cache[ALLOW_32X32] = txfm_cache[ALLOW_16X16]; 3827 tx_cache[ALLOW_32X32] = tx_cache[ALLOW_16X16];
3968 } 3828 }
3969 if (!mode_excluded && this_rd != INT64_MAX) { 3829 if (!mode_excluded && this_rd != INT64_MAX) {
3970 for (i = 0; i < NB_TXFM_MODES; i++) { 3830 for (i = 0; i < TX_MODES; i++) {
3971 int64_t adj_rd = INT64_MAX; 3831 int64_t adj_rd = INT64_MAX;
3972 if (this_mode != I4X4_PRED) { 3832 if (this_mode != I4X4_PRED) {
3973 adj_rd = this_rd + txfm_cache[i] - txfm_cache[cm->tx_mode]; 3833 adj_rd = this_rd + tx_cache[i] - tx_cache[cm->tx_mode];
3974 } else { 3834 } else {
3975 adj_rd = this_rd; 3835 adj_rd = this_rd;
3976 } 3836 }
3977 3837
3978 if (adj_rd < best_txfm_rd[i]) 3838 if (adj_rd < best_tx_rd[i])
3979 best_txfm_rd[i] = adj_rd; 3839 best_tx_rd[i] = adj_rd;
3980 } 3840 }
3981 } 3841 }
3982 3842
3983 if (early_term) 3843 if (early_term)
3984 break; 3844 break;
3985 3845
3986 if (x->skip && !mode_excluded) 3846 if (x->skip && !comp_pred)
3987 break; 3847 break;
3988 } 3848 }
3849
3989 if (best_rd >= best_rd_so_far) 3850 if (best_rd >= best_rd_so_far)
3990 return INT64_MAX; 3851 return INT64_MAX;
3991 3852
3992 // If we used an estimate for the uv intra rd in the loop above... 3853 // If we used an estimate for the uv intra rd in the loop above...
3993 if (cpi->sf.use_uv_intra_rd_estimate) { 3854 if (cpi->sf.use_uv_intra_rd_estimate) {
3994 // Do Intra UV best rd mode selection if best mode choice above was intra. 3855 // Do Intra UV best rd mode selection if best mode choice above was intra.
3995 if (vp9_mode_order[best_mode_index].ref_frame == INTRA_FRAME) { 3856 if (vp9_mode_order[best_mode_index].ref_frame == INTRA_FRAME) {
3996 TX_SIZE uv_tx_size = get_uv_tx_size(mbmi); 3857 TX_SIZE uv_tx_size = get_uv_tx_size(mbmi);
3997 rd_pick_intra_sbuv_mode(cpi, x, &rate_uv_intra[uv_tx_size], 3858 rd_pick_intra_sbuv_mode(cpi, x, &rate_uv_intra[uv_tx_size],
3998 &rate_uv_tokenonly[uv_tx_size], 3859 &rate_uv_tokenonly[uv_tx_size],
3999 &dist_uv[uv_tx_size], 3860 &dist_uv[uv_tx_size],
4000 &skip_uv[uv_tx_size], 3861 &skip_uv[uv_tx_size],
4001 (bsize < BLOCK_SIZE_SB8X8) ? BLOCK_SIZE_SB8X8 3862 bsize < BLOCK_8X8 ? BLOCK_8X8 : bsize);
4002 : bsize);
4003 } 3863 }
4004 } 3864 }
4005 3865
4006 // If indicated then mark the index of the chosen mode to be inspected at 3866 // If indicated then mark the index of the chosen mode to be inspected at
4007 // other block sizes. 3867 // other block sizes.
4008 if (bsize <= cpi->sf.unused_mode_skip_lvl) { 3868 if (bsize <= cpi->sf.unused_mode_skip_lvl) {
4009 cpi->unused_mode_skip_mask = cpi->unused_mode_skip_mask & 3869 cpi->unused_mode_skip_mask = cpi->unused_mode_skip_mask &
4010 (~((int64_t)1 << best_mode_index)); 3870 (~((int64_t)1 << best_mode_index));
4011 } 3871 }
4012 3872
(...skipping 14 matching lines...) Expand all
4027 } 3887 }
4028 3888
4029 // Flag all ref frames that have a distortion thats > 2x the best we found at 3889 // Flag all ref frames that have a distortion thats > 2x the best we found at
4030 // this level. 3890 // this level.
4031 for (ref_frame = INTRA_FRAME; ref_frame <= ALTREF_FRAME; ref_frame++) { 3891 for (ref_frame = INTRA_FRAME; ref_frame <= ALTREF_FRAME; ref_frame++) {
4032 if (frame_distortions[ref_frame] > 2 * *returndistortion) { 3892 if (frame_distortions[ref_frame] > 2 * *returndistortion) {
4033 ctx->frames_with_high_error |= (1 << ref_frame); 3893 ctx->frames_with_high_error |= (1 << ref_frame);
4034 } 3894 }
4035 } 3895 }
4036 3896
4037 if (best_rd == INT64_MAX && bsize < BLOCK_SIZE_SB8X8) { 3897 if (best_rd == INT64_MAX && bsize < BLOCK_8X8) {
4038 *returnrate = INT_MAX; 3898 *returnrate = INT_MAX;
4039 *returndistortion = INT_MAX; 3899 *returndistortion = INT_MAX;
4040 return best_rd; 3900 return best_rd;
4041 } 3901 }
4042 3902
4043 assert((cm->mcomp_filter_type == SWITCHABLE) || 3903 assert((cm->mcomp_filter_type == SWITCHABLE) ||
4044 (cm->mcomp_filter_type == best_mbmode.interp_filter) || 3904 (cm->mcomp_filter_type == best_mbmode.interp_filter) ||
4045 (best_mbmode.ref_frame[0] == INTRA_FRAME)); 3905 (best_mbmode.ref_frame[0] == INTRA_FRAME));
4046 3906
4047 // Updating rd_thresh_freq_fact[] here means that the differnt 3907 // Updating rd_thresh_freq_fact[] here means that the different
4048 // partition/block sizes are handled independently based on the best 3908 // partition/block sizes are handled independently based on the best
4049 // choice for the current partition. It may well be better to keep a scaled 3909 // choice for the current partition. It may well be better to keep a scaled
4050 // best rd so far value and update rd_thresh_freq_fact based on the mode/size 3910 // best rd so far value and update rd_thresh_freq_fact based on the mode/size
4051 // combination that wins out. 3911 // combination that wins out.
4052 if (cpi->sf.adaptive_rd_thresh) { 3912 if (cpi->sf.adaptive_rd_thresh) {
4053 for (mode_index = 0; mode_index < MAX_MODES; ++mode_index) { 3913 for (mode_index = 0; mode_index < MAX_MODES; ++mode_index) {
4054 if (mode_index == best_mode_index) { 3914 if (mode_index == best_mode_index) {
4055 cpi->rd_thresh_freq_fact[bsize][mode_index] = BASE_RD_THRESH_FREQ_FACT; 3915 cpi->rd_thresh_freq_fact[bsize][mode_index] -=
3916 (cpi->rd_thresh_freq_fact[bsize][mode_index] >> 3);
4056 } else { 3917 } else {
4057 cpi->rd_thresh_freq_fact[bsize][mode_index] += MAX_RD_THRESH_FREQ_INC; 3918 cpi->rd_thresh_freq_fact[bsize][mode_index] += RD_THRESH_INC;
4058 if (cpi->rd_thresh_freq_fact[bsize][mode_index] > 3919 if (cpi->rd_thresh_freq_fact[bsize][mode_index] >
4059 (cpi->sf.adaptive_rd_thresh * MAX_RD_THRESH_FREQ_FACT)) { 3920 (cpi->sf.adaptive_rd_thresh * MAX_RD_THRESH_FACT)) {
4060 cpi->rd_thresh_freq_fact[bsize][mode_index] = 3921 cpi->rd_thresh_freq_fact[bsize][mode_index] =
4061 cpi->sf.adaptive_rd_thresh * MAX_RD_THRESH_FREQ_FACT; 3922 cpi->sf.adaptive_rd_thresh * MAX_RD_THRESH_FACT;
4062 } 3923 }
4063 } 3924 }
4064 } 3925 }
4065 } 3926 }
4066 3927
4067 // TODO(rbultje) integrate with RD trd_thresh_freq_facthresholding
4068 #if 0
4069 // Reduce the activation RD thresholds for the best choice mode
4070 if ((cpi->rd_baseline_thresh[best_mode_index] > 0) &&
4071 (cpi->rd_baseline_thresh[best_mode_index] < (INT_MAX >> 2))) {
4072 int best_adjustment = (cpi->rd_thresh_mult[best_mode_index] >> 2);
4073
4074 cpi->rd_thresh_mult[best_mode_index] =
4075 (cpi->rd_thresh_mult[best_mode_index] >= (MIN_THRESHMULT + best_adjustment )) ?
4076 cpi->rd_thresh_mult[best_mode_index] - best_adjustment : MIN_THRESHMULT;
4077 cpi->rd_threshes[best_mode_index] =
4078 (cpi->rd_baseline_thresh[best_mode_index] >> 7) * cpi->rd_thresh_mult[best _mode_index];
4079 }
4080 #endif
4081
4082 // macroblock modes 3928 // macroblock modes
4083 *mbmi = best_mbmode; 3929 *mbmi = best_mbmode;
4084 x->skip |= best_skip2; 3930 x->skip |= best_skip2;
4085 if (best_mbmode.ref_frame[0] == INTRA_FRAME && 3931 if (best_mbmode.ref_frame[0] == INTRA_FRAME &&
4086 best_mbmode.sb_type < BLOCK_SIZE_SB8X8) { 3932 best_mbmode.sb_type < BLOCK_8X8) {
4087 for (i = 0; i < 4; i++) 3933 for (i = 0; i < 4; i++)
4088 xd->mode_info_context->bmi[i].as_mode = best_bmodes[i].as_mode; 3934 xd->mode_info_context->bmi[i].as_mode = best_bmodes[i].as_mode;
4089 } 3935 }
4090 3936
4091 if (best_mbmode.ref_frame[0] != INTRA_FRAME && 3937 if (best_mbmode.ref_frame[0] != INTRA_FRAME &&
4092 best_mbmode.sb_type < BLOCK_SIZE_SB8X8) { 3938 best_mbmode.sb_type < BLOCK_8X8) {
4093 for (i = 0; i < 4; i++) 3939 for (i = 0; i < 4; i++)
4094 xd->mode_info_context->bmi[i].as_mv[0].as_int = 3940 xd->mode_info_context->bmi[i].as_mv[0].as_int =
4095 best_bmodes[i].as_mv[0].as_int; 3941 best_bmodes[i].as_mv[0].as_int;
4096 3942
4097 if (mbmi->ref_frame[1] > 0) 3943 if (mbmi->ref_frame[1] > 0)
4098 for (i = 0; i < 4; i++) 3944 for (i = 0; i < 4; i++)
4099 xd->mode_info_context->bmi[i].as_mv[1].as_int = 3945 xd->mode_info_context->bmi[i].as_mv[1].as_int =
4100 best_bmodes[i].as_mv[1].as_int; 3946 best_bmodes[i].as_mv[1].as_int;
4101 3947
4102 *x->partition_info = best_partition; 3948 *x->partition_info = best_partition;
4103 3949
4104 mbmi->mv[0].as_int = xd->mode_info_context->bmi[3].as_mv[0].as_int; 3950 mbmi->mv[0].as_int = xd->mode_info_context->bmi[3].as_mv[0].as_int;
4105 mbmi->mv[1].as_int = xd->mode_info_context->bmi[3].as_mv[1].as_int; 3951 mbmi->mv[1].as_int = xd->mode_info_context->bmi[3].as_mv[1].as_int;
4106 } 3952 }
4107 3953
4108 for (i = 0; i < NB_PREDICTION_TYPES; ++i) { 3954 for (i = 0; i < NB_PREDICTION_TYPES; ++i) {
4109 if (best_pred_rd[i] == INT64_MAX) 3955 if (best_pred_rd[i] == INT64_MAX)
4110 best_pred_diff[i] = INT_MIN; 3956 best_pred_diff[i] = INT_MIN;
4111 else 3957 else
4112 best_pred_diff[i] = best_rd - best_pred_rd[i]; 3958 best_pred_diff[i] = best_rd - best_pred_rd[i];
4113 } 3959 }
4114 3960
4115 if (!x->skip) { 3961 if (!x->skip) {
4116 for (i = 0; i <= VP9_SWITCHABLE_FILTERS; i++) { 3962 for (i = 0; i <= SWITCHABLE_FILTERS; i++) {
4117 if (best_filter_rd[i] == INT64_MAX) 3963 if (best_filter_rd[i] == INT64_MAX)
4118 best_filter_diff[i] = 0; 3964 best_filter_diff[i] = 0;
4119 else 3965 else
4120 best_filter_diff[i] = best_rd - best_filter_rd[i]; 3966 best_filter_diff[i] = best_rd - best_filter_rd[i];
4121 } 3967 }
4122 if (cm->mcomp_filter_type == SWITCHABLE) 3968 if (cm->mcomp_filter_type == SWITCHABLE)
4123 assert(best_filter_diff[VP9_SWITCHABLE_FILTERS] == 0); 3969 assert(best_filter_diff[SWITCHABLE_FILTERS] == 0);
4124 } else { 3970 } else {
4125 vpx_memset(best_filter_diff, 0, sizeof(best_filter_diff)); 3971 vpx_memset(best_filter_diff, 0, sizeof(best_filter_diff));
4126 } 3972 }
4127 3973
4128 if (!x->skip) { 3974 if (!x->skip) {
4129 for (i = 0; i < NB_TXFM_MODES; i++) { 3975 for (i = 0; i < TX_MODES; i++) {
4130 if (best_txfm_rd[i] == INT64_MAX) 3976 if (best_tx_rd[i] == INT64_MAX)
4131 best_txfm_diff[i] = 0; 3977 best_tx_diff[i] = 0;
4132 else 3978 else
4133 best_txfm_diff[i] = best_rd - best_txfm_rd[i]; 3979 best_tx_diff[i] = best_rd - best_tx_rd[i];
4134 } 3980 }
4135 } else { 3981 } else {
4136 vpx_memset(best_txfm_diff, 0, sizeof(best_txfm_diff)); 3982 vpx_memset(best_tx_diff, 0, sizeof(best_tx_diff));
4137 } 3983 }
4138 3984
4139 set_scale_factors(xd, mbmi->ref_frame[0], mbmi->ref_frame[1], 3985 set_scale_factors(xd, mbmi->ref_frame[0], mbmi->ref_frame[1],
4140 scale_factor); 3986 scale_factor);
4141 store_coding_context(x, ctx, best_mode_index, 3987 store_coding_context(x, ctx, best_mode_index,
4142 &best_partition, 3988 &best_partition,
4143 &mbmi->ref_mvs[mbmi->ref_frame[0]][0], 3989 &mbmi->ref_mvs[mbmi->ref_frame[0]][0],
4144 &mbmi->ref_mvs[mbmi->ref_frame[1] < 0 ? 0 : 3990 &mbmi->ref_mvs[mbmi->ref_frame[1] < 0 ? 0 :
4145 mbmi->ref_frame[1]][0], 3991 mbmi->ref_frame[1]][0],
4146 best_pred_diff, best_txfm_diff, best_filter_diff); 3992 best_pred_diff, best_tx_diff, best_filter_diff);
4147 3993
4148 return best_rd; 3994 return best_rd;
4149 } 3995 }
OLDNEW
« no previous file with comments | « source/libvpx/vp9/encoder/vp9_rdopt.h ('k') | source/libvpx/vp9/encoder/vp9_segmentation.c » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698