Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(82)

Side by Side Diff: source/libvpx/vp9/encoder/vp9_rdopt.c

Issue 668403002: libvpx: Pull from upstream (Closed) Base URL: svn://svn.chromium.org/chrome/trunk/deps/third_party/libvpx/
Patch Set: Created 6 years, 2 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch | Annotate | Revision Log
« no previous file with comments | « source/libvpx/vp9/encoder/vp9_rdopt.h ('k') | source/libvpx/vp9/encoder/vp9_resize.h » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 /* 1 /*
2 * Copyright (c) 2010 The WebM project authors. All Rights Reserved. 2 * Copyright (c) 2010 The WebM project authors. All Rights Reserved.
3 * 3 *
4 * Use of this source code is governed by a BSD-style license 4 * Use of this source code is governed by a BSD-style license
5 * that can be found in the LICENSE file in the root of the source 5 * that can be found in the LICENSE file in the root of the source
6 * tree. An additional intellectual property rights grant can be found 6 * tree. An additional intellectual property rights grant can be found
7 * in the file PATENTS. All contributing project authors may 7 * in the file PATENTS. All contributing project authors may
8 * be found in the AUTHORS file in the root of the source tree. 8 * be found in the AUTHORS file in the root of the source tree.
9 */ 9 */
10 10
(...skipping 113 matching lines...) Expand 10 before | Expand all | Expand 10 after
124 {{LAST_FRAME, NONE}}, 124 {{LAST_FRAME, NONE}},
125 {{GOLDEN_FRAME, NONE}}, 125 {{GOLDEN_FRAME, NONE}},
126 {{ALTREF_FRAME, NONE}}, 126 {{ALTREF_FRAME, NONE}},
127 {{LAST_FRAME, ALTREF_FRAME}}, 127 {{LAST_FRAME, ALTREF_FRAME}},
128 {{GOLDEN_FRAME, ALTREF_FRAME}}, 128 {{GOLDEN_FRAME, ALTREF_FRAME}},
129 {{INTRA_FRAME, NONE}}, 129 {{INTRA_FRAME, NONE}},
130 }; 130 };
131 131
132 static int raster_block_offset(BLOCK_SIZE plane_bsize, 132 static int raster_block_offset(BLOCK_SIZE plane_bsize,
133 int raster_block, int stride) { 133 int raster_block, int stride) {
134 const int bw = b_width_log2(plane_bsize); 134 const int bw = b_width_log2_lookup[plane_bsize];
135 const int y = 4 * (raster_block >> bw); 135 const int y = 4 * (raster_block >> bw);
136 const int x = 4 * (raster_block & ((1 << bw) - 1)); 136 const int x = 4 * (raster_block & ((1 << bw) - 1));
137 return y * stride + x; 137 return y * stride + x;
138 } 138 }
139 static int16_t* raster_block_offset_int16(BLOCK_SIZE plane_bsize, 139 static int16_t* raster_block_offset_int16(BLOCK_SIZE plane_bsize,
140 int raster_block, int16_t *base) { 140 int raster_block, int16_t *base) {
141 const int stride = 4 * num_4x4_blocks_wide_lookup[plane_bsize]; 141 const int stride = 4 * num_4x4_blocks_wide_lookup[plane_bsize];
142 return base + raster_block_offset(plane_bsize, raster_block, stride); 142 return base + raster_block_offset(plane_bsize, raster_block, stride);
143 } 143 }
144 144
(...skipping 17 matching lines...) Expand all
162 162
163 ctx->coeff_pbuf[i][n] = p->coeff; 163 ctx->coeff_pbuf[i][n] = p->coeff;
164 ctx->qcoeff_pbuf[i][n] = p->qcoeff; 164 ctx->qcoeff_pbuf[i][n] = p->qcoeff;
165 ctx->dqcoeff_pbuf[i][n] = pd->dqcoeff; 165 ctx->dqcoeff_pbuf[i][n] = pd->dqcoeff;
166 ctx->eobs_pbuf[i][n] = p->eobs; 166 ctx->eobs_pbuf[i][n] = p->eobs;
167 } 167 }
168 } 168 }
169 169
170 static void model_rd_for_sb(VP9_COMP *cpi, BLOCK_SIZE bsize, 170 static void model_rd_for_sb(VP9_COMP *cpi, BLOCK_SIZE bsize,
171 MACROBLOCK *x, MACROBLOCKD *xd, 171 MACROBLOCK *x, MACROBLOCKD *xd,
172 int *out_rate_sum, int64_t *out_dist_sum) { 172 int *out_rate_sum, int64_t *out_dist_sum,
173 int *skip_txfm_sb, int64_t *skip_sse_sb) {
173 // Note our transform coeffs are 8 times an orthogonal transform. 174 // Note our transform coeffs are 8 times an orthogonal transform.
174 // Hence quantizer step is also 8 times. To get effective quantizer 175 // Hence quantizer step is also 8 times. To get effective quantizer
175 // we need to divide by 8 before sending to modeling function. 176 // we need to divide by 8 before sending to modeling function.
176 int i; 177 int i;
177 int64_t rate_sum = 0; 178 int64_t rate_sum = 0;
178 int64_t dist_sum = 0; 179 int64_t dist_sum = 0;
179 const int ref = xd->mi[0].src_mi->mbmi.ref_frame[0]; 180 const int ref = xd->mi[0].src_mi->mbmi.ref_frame[0];
180 unsigned int sse; 181 unsigned int sse;
181 unsigned int var = 0; 182 unsigned int var = 0;
182 unsigned int sum_sse = 0; 183 unsigned int sum_sse = 0;
183 const int shift = 8; 184 int64_t total_sse = 0;
185 int skip_flag = 1;
186 const int shift = 6;
184 int rate; 187 int rate;
185 int64_t dist; 188 int64_t dist;
186 189
187 x->pred_sse[ref] = 0; 190 x->pred_sse[ref] = 0;
188 191
189 for (i = 0; i < MAX_MB_PLANE; ++i) { 192 for (i = 0; i < MAX_MB_PLANE; ++i) {
190 struct macroblock_plane *const p = &x->plane[i]; 193 struct macroblock_plane *const p = &x->plane[i];
191 struct macroblockd_plane *const pd = &xd->plane[i]; 194 struct macroblockd_plane *const pd = &xd->plane[i];
192 const BLOCK_SIZE bs = get_plane_block_size(bsize, pd); 195 const BLOCK_SIZE bs = get_plane_block_size(bsize, pd);
193 const TX_SIZE max_tx_size = max_txsize_lookup[bs]; 196 const TX_SIZE max_tx_size = max_txsize_lookup[bs];
194 const BLOCK_SIZE unit_size = txsize_to_bsize[max_tx_size]; 197 const BLOCK_SIZE unit_size = txsize_to_bsize[max_tx_size];
198 const int64_t dc_thr = p->quant_thred[0] >> shift;
199 const int64_t ac_thr = p->quant_thred[1] >> shift;
200 // The low thresholds are used to measure if the prediction errors are
201 // low enough so that we can skip the mode search.
202 const int64_t low_dc_thr = MIN(50, dc_thr >> 2);
203 const int64_t low_ac_thr = MIN(80, ac_thr >> 2);
195 int bw = 1 << (b_width_log2_lookup[bs] - b_width_log2_lookup[unit_size]); 204 int bw = 1 << (b_width_log2_lookup[bs] - b_width_log2_lookup[unit_size]);
196 int bh = 1 << (b_height_log2_lookup[bs] - b_width_log2_lookup[unit_size]); 205 int bh = 1 << (b_height_log2_lookup[bs] - b_width_log2_lookup[unit_size]);
197 int idx, idy; 206 int idx, idy;
198 int lw = b_width_log2_lookup[unit_size] + 2; 207 int lw = b_width_log2_lookup[unit_size] + 2;
199 int lh = b_height_log2_lookup[unit_size] + 2; 208 int lh = b_height_log2_lookup[unit_size] + 2;
200 209
201 sum_sse = 0; 210 sum_sse = 0;
202 211
203 for (idy = 0; idy < bh; ++idy) { 212 for (idy = 0; idy < bh; ++idy) {
204 for (idx = 0; idx < bw; ++idx) { 213 for (idx = 0; idx < bw; ++idx) {
205 uint8_t *src = p->src.buf + (idy * p->src.stride << lh) + (idx << lw); 214 uint8_t *src = p->src.buf + (idy * p->src.stride << lh) + (idx << lw);
206 uint8_t *dst = pd->dst.buf + (idy * pd->dst.stride << lh) + (idx << lh); 215 uint8_t *dst = pd->dst.buf + (idy * pd->dst.stride << lh) + (idx << lh);
207 int block_idx = (idy << 1) + idx; 216 int block_idx = (idy << 1) + idx;
217 int low_err_skip = 0;
208 218
209 var = cpi->fn_ptr[unit_size].vf(src, p->src.stride, 219 var = cpi->fn_ptr[unit_size].vf(src, p->src.stride,
210 dst, pd->dst.stride, &sse); 220 dst, pd->dst.stride, &sse);
211 x->bsse[(i << 2) + block_idx] = sse; 221 x->bsse[(i << 2) + block_idx] = sse;
212 sum_sse += sse; 222 sum_sse += sse;
213 223
224 x->skip_txfm[(i << 2) + block_idx] = 0;
214 if (!x->select_tx_size) { 225 if (!x->select_tx_size) {
215 if (x->bsse[(i << 2) + block_idx] < p->quant_thred[0] >> shift) 226 // Check if all ac coefficients can be quantized to zero.
216 x->skip_txfm[(i << 2) + block_idx] = 1; 227 if (var < ac_thr || var == 0) {
217 else if (var < p->quant_thred[1] >> shift)
218 x->skip_txfm[(i << 2) + block_idx] = 2; 228 x->skip_txfm[(i << 2) + block_idx] = 2;
219 else 229
220 x->skip_txfm[(i << 2) + block_idx] = 0; 230 // Check if dc coefficient can be quantized to zero.
231 if (sse - var < dc_thr || sse == var) {
232 x->skip_txfm[(i << 2) + block_idx] = 1;
233
234 if (!sse || (var < low_ac_thr && sse - var < low_dc_thr))
235 low_err_skip = 1;
236 }
237 }
221 } 238 }
222 239
240 if (skip_flag && !low_err_skip)
241 skip_flag = 0;
242
223 if (i == 0) 243 if (i == 0)
224 x->pred_sse[ref] += sse; 244 x->pred_sse[ref] += sse;
225 } 245 }
226 } 246 }
227 247
248 total_sse += sum_sse;
249
228 // Fast approximate the modelling function. 250 // Fast approximate the modelling function.
229 if (cpi->oxcf.speed > 4) { 251 if (cpi->oxcf.speed > 4) {
230 int64_t rate; 252 int64_t rate;
231 int64_t dist; 253 const int64_t square_error = sum_sse;
232 int64_t square_error = sse;
233 int quantizer = (pd->dequant[1] >> 3); 254 int quantizer = (pd->dequant[1] >> 3);
255 #if CONFIG_VP9_HIGHBITDEPTH
256 if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
257 quantizer >>= (xd->bd - 8);
258 }
259 #endif // CONFIG_VP9_HIGHBITDEPTH
234 260
235 if (quantizer < 120) 261 if (quantizer < 120)
236 rate = (square_error * (280 - quantizer)) >> 8; 262 rate = (square_error * (280 - quantizer)) >> 8;
237 else 263 else
238 rate = 0; 264 rate = 0;
239 dist = (square_error * quantizer) >> 8; 265 dist = (square_error * quantizer) >> 8;
240 rate_sum += rate; 266 rate_sum += rate;
241 dist_sum += dist; 267 dist_sum += dist;
242 } else { 268 } else {
269 #if CONFIG_VP9_HIGHBITDEPTH
270 if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
271 vp9_model_rd_from_var_lapndz(sum_sse, 1 << num_pels_log2_lookup[bs],
272 pd->dequant[1] >> (xd->bd - 5),
273 &rate, &dist);
274 } else {
275 vp9_model_rd_from_var_lapndz(sum_sse, 1 << num_pels_log2_lookup[bs],
276 pd->dequant[1] >> 3, &rate, &dist);
277 }
278 #else
243 vp9_model_rd_from_var_lapndz(sum_sse, 1 << num_pels_log2_lookup[bs], 279 vp9_model_rd_from_var_lapndz(sum_sse, 1 << num_pels_log2_lookup[bs],
244 pd->dequant[1] >> 3, &rate, &dist); 280 pd->dequant[1] >> 3, &rate, &dist);
281 #endif // CONFIG_VP9_HIGHBITDEPTH
245 rate_sum += rate; 282 rate_sum += rate;
246 dist_sum += dist; 283 dist_sum += dist;
247 } 284 }
248 } 285 }
249 286
287 *skip_txfm_sb = skip_flag;
288 *skip_sse_sb = total_sse << 4;
250 *out_rate_sum = (int)rate_sum; 289 *out_rate_sum = (int)rate_sum;
251 *out_dist_sum = dist_sum << 4; 290 *out_dist_sum = dist_sum << 4;
252 } 291 }
253 292
254 int64_t vp9_block_error_c(const tran_low_t *coeff, const tran_low_t *dqcoeff, 293 int64_t vp9_block_error_c(const tran_low_t *coeff, const tran_low_t *dqcoeff,
255 intptr_t block_size, int64_t *ssz) { 294 intptr_t block_size, int64_t *ssz) {
256 int i; 295 int i;
257 int64_t error = 0, sqcoeff = 0; 296 int64_t error = 0, sqcoeff = 0;
258 297
259 for (i = 0; i < block_size; i++) { 298 for (i = 0; i < block_size; i++) {
260 const int diff = coeff[i] - dqcoeff[i]; 299 const int diff = coeff[i] - dqcoeff[i];
261 error += diff * diff; 300 error += diff * diff;
262 sqcoeff += coeff[i] * coeff[i]; 301 sqcoeff += coeff[i] * coeff[i];
263 } 302 }
264 303
265 *ssz = sqcoeff; 304 *ssz = sqcoeff;
266 return error; 305 return error;
267 } 306 }
268 307
308
309 #if CONFIG_VP9_HIGHBITDEPTH
310 int64_t vp9_highbd_block_error_c(const tran_low_t *coeff,
311 const tran_low_t *dqcoeff,
312 intptr_t block_size,
313 int64_t *ssz, int bd) {
314 int i;
315 int64_t error = 0, sqcoeff = 0;
316 int shift = 2 * (bd - 8);
317 int rounding = shift > 0 ? 1 << (shift - 1) : 0;
318
319 for (i = 0; i < block_size; i++) {
320 const int64_t diff = coeff[i] - dqcoeff[i];
321 error += diff * diff;
322 sqcoeff += (int64_t)coeff[i] * (int64_t)coeff[i];
323 }
324 assert(error >= 0 && sqcoeff >= 0);
325 error = (error + rounding) >> shift;
326 sqcoeff = (sqcoeff + rounding) >> shift;
327
328 *ssz = sqcoeff;
329 return error;
330 }
331 #endif // CONFIG_VP9_HIGHBITDEPTH
332
269 /* The trailing '0' is a terminator which is used inside cost_coeffs() to 333 /* The trailing '0' is a terminator which is used inside cost_coeffs() to
270 * decide whether to include cost of a trailing EOB node or not (i.e. we 334 * decide whether to include cost of a trailing EOB node or not (i.e. we
271 * can skip this if the last coefficient in this transform block, e.g. the 335 * can skip this if the last coefficient in this transform block, e.g. the
272 * 16th coefficient in a 4x4 block or the 64th coefficient in a 8x8 block, 336 * 16th coefficient in a 4x4 block or the 64th coefficient in a 8x8 block,
273 * were non-zero). */ 337 * were non-zero). */
274 static const int16_t band_counts[TX_SIZES][8] = { 338 static const int16_t band_counts[TX_SIZES][8] = {
275 { 1, 2, 3, 4, 3, 16 - 13, 0 }, 339 { 1, 2, 3, 4, 3, 16 - 13, 0 },
276 { 1, 2, 3, 4, 11, 64 - 21, 0 }, 340 { 1, 2, 3, 4, 11, 64 - 21, 0 },
277 { 1, 2, 3, 4, 11, 256 - 21, 0 }, 341 { 1, 2, 3, 4, 11, 256 - 21, 0 },
278 { 1, 2, 3, 4, 11, 1024 - 21, 0 }, 342 { 1, 2, 3, 4, 11, 1024 - 21, 0 },
(...skipping 65 matching lines...) Expand 10 before | Expand all | Expand 10 after
344 cost += (*token_costs)[0][pt][EOB_TOKEN]; 408 cost += (*token_costs)[0][pt][EOB_TOKEN];
345 } 409 }
346 } 410 }
347 } 411 }
348 412
349 // is eob first coefficient; 413 // is eob first coefficient;
350 *A = *L = (c > 0); 414 *A = *L = (c > 0);
351 415
352 return cost; 416 return cost;
353 } 417 }
418
419 #if CONFIG_VP9_HIGHBITDEPTH
420 static void dist_block(int plane, int block, TX_SIZE tx_size,
421 struct rdcost_block_args* args, int bd) {
422 #else
354 static void dist_block(int plane, int block, TX_SIZE tx_size, 423 static void dist_block(int plane, int block, TX_SIZE tx_size,
355 struct rdcost_block_args* args) { 424 struct rdcost_block_args* args) {
425 #endif // CONFIG_VP9_HIGHBITDEPTH
356 const int ss_txfrm_size = tx_size << 1; 426 const int ss_txfrm_size = tx_size << 1;
357 MACROBLOCK* const x = args->x; 427 MACROBLOCK* const x = args->x;
358 MACROBLOCKD* const xd = &x->e_mbd; 428 MACROBLOCKD* const xd = &x->e_mbd;
359 const struct macroblock_plane *const p = &x->plane[plane]; 429 const struct macroblock_plane *const p = &x->plane[plane];
360 const struct macroblockd_plane *const pd = &xd->plane[plane]; 430 const struct macroblockd_plane *const pd = &xd->plane[plane];
361 int64_t this_sse; 431 int64_t this_sse;
362 int shift = tx_size == TX_32X32 ? 0 : 2; 432 int shift = tx_size == TX_32X32 ? 0 : 2;
363 tran_low_t *const coeff = BLOCK_OFFSET(p->coeff, block); 433 tran_low_t *const coeff = BLOCK_OFFSET(p->coeff, block);
364 tran_low_t *const dqcoeff = BLOCK_OFFSET(pd->dqcoeff, block); 434 tran_low_t *const dqcoeff = BLOCK_OFFSET(pd->dqcoeff, block);
435 #if CONFIG_VP9_HIGHBITDEPTH
436 args->dist = vp9_highbd_block_error(coeff, dqcoeff, 16 << ss_txfrm_size,
437 &this_sse, bd) >> shift;
438 #else
365 args->dist = vp9_block_error(coeff, dqcoeff, 16 << ss_txfrm_size, 439 args->dist = vp9_block_error(coeff, dqcoeff, 16 << ss_txfrm_size,
366 &this_sse) >> shift; 440 &this_sse) >> shift;
441 #endif // CONFIG_VP9_HIGHBITDEPTH
367 args->sse = this_sse >> shift; 442 args->sse = this_sse >> shift;
368 443
369 if (x->skip_encode && !is_inter_block(&xd->mi[0].src_mi->mbmi)) { 444 if (x->skip_encode && !is_inter_block(&xd->mi[0].src_mi->mbmi)) {
370 // TODO(jingning): tune the model to better capture the distortion. 445 // TODO(jingning): tune the model to better capture the distortion.
371 int64_t p = (pd->dequant[1] * pd->dequant[1] * 446 int64_t p = (pd->dequant[1] * pd->dequant[1] *
372 (1 << ss_txfrm_size)) >> (shift + 2); 447 (1 << ss_txfrm_size)) >> (shift + 2);
448 #if CONFIG_VP9_HIGHBITDEPTH
449 if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
450 p >>= ((xd->bd - 8) * 2);
451 }
452 #endif // CONFIG_VP9_HIGHBITDEPTH
373 args->dist += (p >> 4); 453 args->dist += (p >> 4);
374 args->sse += p; 454 args->sse += p;
375 } 455 }
376 } 456 }
377 457
378 static void rate_block(int plane, int block, BLOCK_SIZE plane_bsize, 458 static void rate_block(int plane, int block, BLOCK_SIZE plane_bsize,
379 TX_SIZE tx_size, struct rdcost_block_args* args) { 459 TX_SIZE tx_size, struct rdcost_block_args* args) {
380 int x_idx, y_idx; 460 int x_idx, y_idx;
381 txfrm_block_to_raster_xy(plane_bsize, tx_size, block, &x_idx, &y_idx); 461 txfrm_block_to_raster_xy(plane_bsize, tx_size, block, &x_idx, &y_idx);
382 462
383 args->rate = cost_coeffs(args->x, plane, block, args->t_above + x_idx, 463 args->rate = cost_coeffs(args->x, plane, block, args->t_above + x_idx,
384 args->t_left + y_idx, tx_size, 464 args->t_left + y_idx, tx_size,
385 args->so->scan, args->so->neighbors, 465 args->so->scan, args->so->neighbors,
386 args->use_fast_coef_costing); 466 args->use_fast_coef_costing);
387 } 467 }
388 468
389 static void block_rd_txfm(int plane, int block, BLOCK_SIZE plane_bsize, 469 static void block_rd_txfm(int plane, int block, BLOCK_SIZE plane_bsize,
390 TX_SIZE tx_size, void *arg) { 470 TX_SIZE tx_size, void *arg) {
391 struct rdcost_block_args *args = arg; 471 struct rdcost_block_args *args = arg;
392 MACROBLOCK *const x = args->x; 472 MACROBLOCK *const x = args->x;
393 MACROBLOCKD *const xd = &x->e_mbd; 473 MACROBLOCKD *const xd = &x->e_mbd;
394 MB_MODE_INFO *const mbmi = &xd->mi[0].src_mi->mbmi; 474 MB_MODE_INFO *const mbmi = &xd->mi[0].src_mi->mbmi;
395 int64_t rd1, rd2, rd; 475 int64_t rd1, rd2, rd;
396 476
397 if (args->skip) 477 if (args->skip)
398 return; 478 return;
399 479
400 if (!is_inter_block(mbmi)) { 480 if (!is_inter_block(mbmi)) {
401 vp9_encode_block_intra(x, plane, block, plane_bsize, tx_size, &mbmi->skip); 481 vp9_encode_block_intra(x, plane, block, plane_bsize, tx_size, &mbmi->skip);
482 #if CONFIG_VP9_HIGHBITDEPTH
483 if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
484 dist_block(plane, block, tx_size, args, xd->bd);
485 } else {
486 dist_block(plane, block, tx_size, args, 8);
487 }
488 #else
402 dist_block(plane, block, tx_size, args); 489 dist_block(plane, block, tx_size, args);
490 #endif // CONFIG_VP9_HIGHBITDEPTH
403 } else if (max_txsize_lookup[plane_bsize] == tx_size) { 491 } else if (max_txsize_lookup[plane_bsize] == tx_size) {
404 if (x->skip_txfm[(plane << 2) + (block >> (tx_size << 1))] == 0) { 492 if (x->skip_txfm[(plane << 2) + (block >> (tx_size << 1))] == 0) {
405 // full forward transform and quantization 493 // full forward transform and quantization
406 vp9_xform_quant(x, plane, block, plane_bsize, tx_size); 494 vp9_xform_quant(x, plane, block, plane_bsize, tx_size);
495 #if CONFIG_VP9_HIGHBITDEPTH
496 if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
497 dist_block(plane, block, tx_size, args, xd->bd);
498 } else {
499 dist_block(plane, block, tx_size, args, 8);
500 }
501 #else
407 dist_block(plane, block, tx_size, args); 502 dist_block(plane, block, tx_size, args);
503 #endif // CONFIG_VP9_HIGHBITDEPTH
408 } else if (x->skip_txfm[(plane << 2) + (block >> (tx_size << 1))] == 2) { 504 } else if (x->skip_txfm[(plane << 2) + (block >> (tx_size << 1))] == 2) {
409 // compute DC coefficient 505 // compute DC coefficient
410 tran_low_t *const coeff = BLOCK_OFFSET(x->plane[plane].coeff, block); 506 tran_low_t *const coeff = BLOCK_OFFSET(x->plane[plane].coeff, block);
411 tran_low_t *const dqcoeff = BLOCK_OFFSET(xd->plane[plane].dqcoeff, block); 507 tran_low_t *const dqcoeff = BLOCK_OFFSET(xd->plane[plane].dqcoeff, block);
412 vp9_xform_quant_dc(x, plane, block, plane_bsize, tx_size); 508 vp9_xform_quant_dc(x, plane, block, plane_bsize, tx_size);
413 args->sse = x->bsse[(plane << 2) + (block >> (tx_size << 1))] << 4; 509 args->sse = x->bsse[(plane << 2) + (block >> (tx_size << 1))] << 4;
414 args->dist = args->sse; 510 args->dist = args->sse;
415 if (!x->plane[plane].eobs[block]) 511 if (x->plane[plane].eobs[block]) {
416 args->dist = args->sse - ((coeff[0] * coeff[0] - 512 int64_t dc_correct = coeff[0] * coeff[0] -
417 (coeff[0] - dqcoeff[0]) * (coeff[0] - dqcoeff[0])) >> 2); 513 (coeff[0] - dqcoeff[0]) * (coeff[0] - dqcoeff[0]);
514 #if CONFIG_VP9_HIGHBITDEPTH
515 dc_correct >>= ((xd->bd - 8) * 2);
516 #endif
517 if (tx_size != TX_32X32)
518 dc_correct >>= 2;
519
520 args->dist = MAX(0, args->sse - dc_correct);
521 }
418 } else { 522 } else {
419 // skip forward transform 523 // skip forward transform
420 x->plane[plane].eobs[block] = 0; 524 x->plane[plane].eobs[block] = 0;
421 args->sse = x->bsse[(plane << 2) + (block >> (tx_size << 1))] << 4; 525 args->sse = x->bsse[(plane << 2) + (block >> (tx_size << 1))] << 4;
422 args->dist = args->sse; 526 args->dist = args->sse;
423 } 527 }
424 } else { 528 } else {
425 // full forward transform and quantization 529 // full forward transform and quantization
426 vp9_xform_quant(x, plane, block, plane_bsize, tx_size); 530 vp9_xform_quant(x, plane, block, plane_bsize, tx_size);
531 #if CONFIG_VP9_HIGHBITDEPTH
532 if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
533 dist_block(plane, block, tx_size, args, xd->bd);
534 } else {
535 dist_block(plane, block, tx_size, args, 8);
536 }
537 #else
427 dist_block(plane, block, tx_size, args); 538 dist_block(plane, block, tx_size, args);
539 #endif // CONFIG_VP9_HIGHBITDEPTH
428 } 540 }
429 541
430 rate_block(plane, block, plane_bsize, tx_size, args); 542 rate_block(plane, block, plane_bsize, tx_size, args);
431 rd1 = RDCOST(x->rdmult, x->rddiv, args->rate, args->dist); 543 rd1 = RDCOST(x->rdmult, x->rddiv, args->rate, args->dist);
432 rd2 = RDCOST(x->rdmult, x->rddiv, 0, args->sse); 544 rd2 = RDCOST(x->rdmult, x->rddiv, 0, args->sse);
433 545
434 // TODO(jingning): temporarily enabled only for luma component 546 // TODO(jingning): temporarily enabled only for luma component
435 rd = MIN(rd1, rd2); 547 rd = MIN(rd1, rd2);
436 if (plane == 0) 548 if (plane == 0)
437 x->zcoeff_blk[tx_size][block] = !x->plane[plane].eobs[block] || 549 x->zcoeff_blk[tx_size][block] = !x->plane[plane].eobs[block] ||
(...skipping 214 matching lines...) Expand 10 before | Expand all | Expand 10 after
652 src_stride)]; 764 src_stride)];
653 uint8_t *dst_init = &pd->dst.buf[raster_block_offset(BLOCK_8X8, ib, 765 uint8_t *dst_init = &pd->dst.buf[raster_block_offset(BLOCK_8X8, ib,
654 dst_stride)]; 766 dst_stride)];
655 ENTROPY_CONTEXT ta[2], tempa[2]; 767 ENTROPY_CONTEXT ta[2], tempa[2];
656 ENTROPY_CONTEXT tl[2], templ[2]; 768 ENTROPY_CONTEXT tl[2], templ[2];
657 769
658 const int num_4x4_blocks_wide = num_4x4_blocks_wide_lookup[bsize]; 770 const int num_4x4_blocks_wide = num_4x4_blocks_wide_lookup[bsize];
659 const int num_4x4_blocks_high = num_4x4_blocks_high_lookup[bsize]; 771 const int num_4x4_blocks_high = num_4x4_blocks_high_lookup[bsize];
660 int idx, idy; 772 int idx, idy;
661 uint8_t best_dst[8 * 8]; 773 uint8_t best_dst[8 * 8];
774 #if CONFIG_VP9_HIGHBITDEPTH
775 uint16_t best_dst16[8 * 8];
776 #endif
662 777
663 assert(ib < 4); 778 assert(ib < 4);
664 779
665 vpx_memcpy(ta, a, sizeof(ta)); 780 vpx_memcpy(ta, a, sizeof(ta));
666 vpx_memcpy(tl, l, sizeof(tl)); 781 vpx_memcpy(tl, l, sizeof(tl));
667 xd->mi[0].src_mi->mbmi.tx_size = TX_4X4; 782 xd->mi[0].src_mi->mbmi.tx_size = TX_4X4;
668 783
784 #if CONFIG_VP9_HIGHBITDEPTH
785 if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
786 for (mode = DC_PRED; mode <= TM_PRED; ++mode) {
787 int64_t this_rd;
788 int ratey = 0;
789 int64_t distortion = 0;
790 int rate = bmode_costs[mode];
791
792 if (!(cpi->sf.intra_y_mode_mask[TX_4X4] & (1 << mode)))
793 continue;
794
795 // Only do the oblique modes if the best so far is
796 // one of the neighboring directional modes
797 if (cpi->sf.mode_search_skip_flags & FLAG_SKIP_INTRA_DIRMISMATCH) {
798 if (conditional_skipintra(mode, *best_mode))
799 continue;
800 }
801
802 vpx_memcpy(tempa, ta, sizeof(ta));
803 vpx_memcpy(templ, tl, sizeof(tl));
804
805 for (idy = 0; idy < num_4x4_blocks_high; ++idy) {
806 for (idx = 0; idx < num_4x4_blocks_wide; ++idx) {
807 const int block = ib + idy * 2 + idx;
808 const uint8_t *const src = &src_init[idx * 4 + idy * 4 * src_stride];
809 uint8_t *const dst = &dst_init[idx * 4 + idy * 4 * dst_stride];
810 int16_t *const src_diff = raster_block_offset_int16(BLOCK_8X8, block,
811 p->src_diff);
812 tran_low_t *const coeff = BLOCK_OFFSET(x->plane[0].coeff, block);
813 xd->mi[0].src_mi->bmi[block].as_mode = mode;
814 vp9_predict_intra_block(xd, block, 1,
815 TX_4X4, mode,
816 x->skip_encode ? src : dst,
817 x->skip_encode ? src_stride : dst_stride,
818 dst, dst_stride, idx, idy, 0);
819 vp9_highbd_subtract_block(4, 4, src_diff, 8, src, src_stride,
820 dst, dst_stride, xd->bd);
821 if (xd->lossless) {
822 const scan_order *so = &vp9_default_scan_orders[TX_4X4];
823 vp9_highbd_fwht4x4(src_diff, coeff, 8);
824 vp9_regular_quantize_b_4x4(x, 0, block, so->scan, so->iscan);
825 ratey += cost_coeffs(x, 0, block, tempa + idx, templ + idy, TX_4X4,
826 so->scan, so->neighbors,
827 cpi->sf.use_fast_coef_costing);
828 if (RDCOST(x->rdmult, x->rddiv, ratey, distortion) >= best_rd)
829 goto next_highbd;
830 vp9_highbd_iwht4x4_add(BLOCK_OFFSET(pd->dqcoeff, block),
831 dst, dst_stride,
832 p->eobs[block], xd->bd);
833 } else {
834 int64_t unused;
835 const TX_TYPE tx_type = get_tx_type_4x4(PLANE_TYPE_Y, xd, block);
836 const scan_order *so = &vp9_scan_orders[TX_4X4][tx_type];
837 vp9_highbd_fht4x4(src_diff, coeff, 8, tx_type);
838 vp9_regular_quantize_b_4x4(x, 0, block, so->scan, so->iscan);
839 ratey += cost_coeffs(x, 0, block, tempa + idx, templ + idy, TX_4X4,
840 so->scan, so->neighbors,
841 cpi->sf.use_fast_coef_costing);
842 distortion += vp9_highbd_block_error(
843 coeff, BLOCK_OFFSET(pd->dqcoeff, block),
844 16, &unused, xd->bd) >> 2;
845 if (RDCOST(x->rdmult, x->rddiv, ratey, distortion) >= best_rd)
846 goto next_highbd;
847 vp9_highbd_iht4x4_add(tx_type, BLOCK_OFFSET(pd->dqcoeff, block),
848 dst, dst_stride, p->eobs[block], xd->bd);
849 }
850 }
851 }
852
853 rate += ratey;
854 this_rd = RDCOST(x->rdmult, x->rddiv, rate, distortion);
855
856 if (this_rd < best_rd) {
857 *bestrate = rate;
858 *bestratey = ratey;
859 *bestdistortion = distortion;
860 best_rd = this_rd;
861 *best_mode = mode;
862 vpx_memcpy(a, tempa, sizeof(tempa));
863 vpx_memcpy(l, templ, sizeof(templ));
864 for (idy = 0; idy < num_4x4_blocks_high * 4; ++idy) {
865 vpx_memcpy(best_dst16 + idy * 8,
866 CONVERT_TO_SHORTPTR(dst_init + idy * dst_stride),
867 num_4x4_blocks_wide * 4 * sizeof(uint16_t));
868 }
869 }
870 next_highbd:
871 {}
872 }
873 if (best_rd >= rd_thresh || x->skip_encode)
874 return best_rd;
875
876 for (idy = 0; idy < num_4x4_blocks_high * 4; ++idy) {
877 vpx_memcpy(CONVERT_TO_SHORTPTR(dst_init + idy * dst_stride),
878 best_dst16 + idy * 8,
879 num_4x4_blocks_wide * 4 * sizeof(uint16_t));
880 }
881
882 return best_rd;
883 }
884 #endif // CONFIG_VP9_HIGHBITDEPTH
885
669 for (mode = DC_PRED; mode <= TM_PRED; ++mode) { 886 for (mode = DC_PRED; mode <= TM_PRED; ++mode) {
670 int64_t this_rd; 887 int64_t this_rd;
671 int ratey = 0; 888 int ratey = 0;
672 int64_t distortion = 0; 889 int64_t distortion = 0;
673 int rate = bmode_costs[mode]; 890 int rate = bmode_costs[mode];
674 891
675 if (!(cpi->sf.intra_y_mode_mask[TX_4X4] & (1 << mode))) 892 if (!(cpi->sf.intra_y_mode_mask[TX_4X4] & (1 << mode)))
676 continue; 893 continue;
677 894
678 // Only do the oblique modes if the best so far is 895 // Only do the oblique modes if the best so far is
(...skipping 141 matching lines...) Expand 10 before | Expand all | Expand 10 after
820 } 1037 }
821 1038
822 *rate = cost; 1039 *rate = cost;
823 *rate_y = tot_rate_y; 1040 *rate_y = tot_rate_y;
824 *distortion = total_distortion; 1041 *distortion = total_distortion;
825 mic->mbmi.mode = mic->bmi[3].as_mode; 1042 mic->mbmi.mode = mic->bmi[3].as_mode;
826 1043
827 return RDCOST(mb->rdmult, mb->rddiv, cost, total_distortion); 1044 return RDCOST(mb->rdmult, mb->rddiv, cost, total_distortion);
828 } 1045 }
829 1046
1047 // This function is used only for intra_only frames
830 static int64_t rd_pick_intra_sby_mode(VP9_COMP *cpi, MACROBLOCK *x, 1048 static int64_t rd_pick_intra_sby_mode(VP9_COMP *cpi, MACROBLOCK *x,
831 int *rate, int *rate_tokenonly, 1049 int *rate, int *rate_tokenonly,
832 int64_t *distortion, int *skippable, 1050 int64_t *distortion, int *skippable,
833 BLOCK_SIZE bsize, 1051 BLOCK_SIZE bsize,
834 int64_t tx_cache[TX_MODES], 1052 int64_t tx_cache[TX_MODES],
835 int64_t best_rd) { 1053 int64_t best_rd) {
836 PREDICTION_MODE mode; 1054 PREDICTION_MODE mode;
837 PREDICTION_MODE mode_selected = DC_PRED; 1055 PREDICTION_MODE mode_selected = DC_PRED;
838 MACROBLOCKD *const xd = &x->e_mbd; 1056 MACROBLOCKD *const xd = &x->e_mbd;
839 MODE_INFO *const mic = xd->mi[0].src_mi; 1057 MODE_INFO *const mic = xd->mi[0].src_mi;
840 int this_rate, this_rate_tokenonly, s; 1058 int this_rate, this_rate_tokenonly, s;
841 int64_t this_distortion, this_rd; 1059 int64_t this_distortion, this_rd;
842 TX_SIZE best_tx = TX_4X4; 1060 TX_SIZE best_tx = TX_4X4;
843 int i; 1061 int i;
844 int *bmode_costs = cpi->mbmode_cost; 1062 int *bmode_costs;
1063 const MODE_INFO *above_mi = xd->mi[-xd->mi_stride].src_mi;
1064 const MODE_INFO *left_mi = xd->left_available ? xd->mi[-1].src_mi : NULL;
1065 const PREDICTION_MODE A = vp9_above_block_mode(mic, above_mi, 0);
1066 const PREDICTION_MODE L = vp9_left_block_mode(mic, left_mi, 0);
1067 bmode_costs = cpi->y_mode_costs[A][L];
845 1068
846 if (cpi->sf.tx_size_search_method == USE_FULL_RD) 1069 if (cpi->sf.tx_size_search_method == USE_FULL_RD)
847 for (i = 0; i < TX_MODES; i++) 1070 for (i = 0; i < TX_MODES; i++)
848 tx_cache[i] = INT64_MAX; 1071 tx_cache[i] = INT64_MAX;
849 1072
1073 vpx_memset(x->skip_txfm, 0, sizeof(x->skip_txfm));
850 /* Y Search for intra prediction mode */ 1074 /* Y Search for intra prediction mode */
851 for (mode = DC_PRED; mode <= TM_PRED; mode++) { 1075 for (mode = DC_PRED; mode <= TM_PRED; mode++) {
852 int64_t local_tx_cache[TX_MODES]; 1076 int64_t local_tx_cache[TX_MODES];
853 MODE_INFO *above_mi = xd->mi[-xd->mi_stride].src_mi;
854 MODE_INFO *left_mi = xd->left_available ? xd->mi[-1].src_mi : NULL;
855
856 if (cpi->common.frame_type == KEY_FRAME) {
857 const PREDICTION_MODE A = vp9_above_block_mode(mic, above_mi, 0);
858 const PREDICTION_MODE L = vp9_left_block_mode(mic, left_mi, 0);
859
860 bmode_costs = cpi->y_mode_costs[A][L];
861 }
862 mic->mbmi.mode = mode; 1077 mic->mbmi.mode = mode;
863 1078
864 super_block_yrd(cpi, x, &this_rate_tokenonly, &this_distortion, 1079 super_block_yrd(cpi, x, &this_rate_tokenonly, &this_distortion,
865 &s, NULL, bsize, local_tx_cache, best_rd); 1080 &s, NULL, bsize, local_tx_cache, best_rd);
866 1081
867 if (this_rate_tokenonly == INT_MAX) 1082 if (this_rate_tokenonly == INT_MAX)
868 continue; 1083 continue;
869 1084
870 this_rate = this_rate_tokenonly + bmode_costs[mode]; 1085 this_rate = this_rate_tokenonly + bmode_costs[mode];
871 this_rd = RDCOST(x->rdmult, x->rddiv, this_rate, this_distortion); 1086 this_rd = RDCOST(x->rdmult, x->rddiv, this_rate, this_distortion);
(...skipping 18 matching lines...) Expand all
890 } 1105 }
891 } 1106 }
892 } 1107 }
893 1108
894 mic->mbmi.mode = mode_selected; 1109 mic->mbmi.mode = mode_selected;
895 mic->mbmi.tx_size = best_tx; 1110 mic->mbmi.tx_size = best_tx;
896 1111
897 return best_rd; 1112 return best_rd;
898 } 1113 }
899 1114
900 static void super_block_uvrd(const VP9_COMP *cpi, MACROBLOCK *x, 1115 // Return value 0: early termination triggered, no valid rd cost available;
901 int *rate, int64_t *distortion, int *skippable, 1116 // 1: rd cost values are valid.
902 int64_t *sse, BLOCK_SIZE bsize, 1117 static int super_block_uvrd(const VP9_COMP *cpi, MACROBLOCK *x,
903 int64_t ref_best_rd) { 1118 int *rate, int64_t *distortion, int *skippable,
1119 int64_t *sse, BLOCK_SIZE bsize,
1120 int64_t ref_best_rd) {
904 MACROBLOCKD *const xd = &x->e_mbd; 1121 MACROBLOCKD *const xd = &x->e_mbd;
905 MB_MODE_INFO *const mbmi = &xd->mi[0].src_mi->mbmi; 1122 MB_MODE_INFO *const mbmi = &xd->mi[0].src_mi->mbmi;
906 const TX_SIZE uv_tx_size = get_uv_tx_size(mbmi, &xd->plane[1]); 1123 const TX_SIZE uv_tx_size = get_uv_tx_size(mbmi, &xd->plane[1]);
907 int plane; 1124 int plane;
908 int pnrate = 0, pnskip = 1; 1125 int pnrate = 0, pnskip = 1;
909 int64_t pndist = 0, pnsse = 0; 1126 int64_t pndist = 0, pnsse = 0;
1127 int is_cost_valid = 1;
910 1128
911 if (ref_best_rd < 0) 1129 if (ref_best_rd < 0)
912 goto term; 1130 is_cost_valid = 0;
913 1131
914 if (is_inter_block(mbmi)) { 1132 if (is_inter_block(mbmi) && is_cost_valid) {
915 int plane; 1133 int plane;
916 for (plane = 1; plane < MAX_MB_PLANE; ++plane) 1134 for (plane = 1; plane < MAX_MB_PLANE; ++plane)
917 vp9_subtract_plane(x, bsize, plane); 1135 vp9_subtract_plane(x, bsize, plane);
918 } 1136 }
919 1137
920 *rate = 0; 1138 *rate = 0;
921 *distortion = 0; 1139 *distortion = 0;
922 *sse = 0; 1140 *sse = 0;
923 *skippable = 1; 1141 *skippable = 1;
924 1142
925 for (plane = 1; plane < MAX_MB_PLANE; ++plane) { 1143 for (plane = 1; plane < MAX_MB_PLANE; ++plane) {
926 txfm_rd_in_plane(x, &pnrate, &pndist, &pnskip, &pnsse, 1144 txfm_rd_in_plane(x, &pnrate, &pndist, &pnskip, &pnsse,
927 ref_best_rd, plane, bsize, uv_tx_size, 1145 ref_best_rd, plane, bsize, uv_tx_size,
928 cpi->sf.use_fast_coef_costing); 1146 cpi->sf.use_fast_coef_costing);
929 if (pnrate == INT_MAX) 1147 if (pnrate == INT_MAX) {
930 goto term; 1148 is_cost_valid = 0;
1149 break;
1150 }
931 *rate += pnrate; 1151 *rate += pnrate;
932 *distortion += pndist; 1152 *distortion += pndist;
933 *sse += pnsse; 1153 *sse += pnsse;
934 *skippable &= pnskip; 1154 *skippable &= pnskip;
935 } 1155 }
936 return;
937 1156
938 term: 1157 if (!is_cost_valid) {
939 *rate = INT_MAX; 1158 // reset cost value
940 *distortion = INT64_MAX; 1159 *rate = INT_MAX;
941 *sse = INT64_MAX; 1160 *distortion = INT64_MAX;
942 *skippable = 0; 1161 *sse = INT64_MAX;
943 return; 1162 *skippable = 0;
1163 }
1164
1165 return is_cost_valid;
944 } 1166 }
945 1167
946 static int64_t rd_pick_intra_sbuv_mode(VP9_COMP *cpi, MACROBLOCK *x, 1168 static int64_t rd_pick_intra_sbuv_mode(VP9_COMP *cpi, MACROBLOCK *x,
947 PICK_MODE_CONTEXT *ctx, 1169 PICK_MODE_CONTEXT *ctx,
948 int *rate, int *rate_tokenonly, 1170 int *rate, int *rate_tokenonly,
949 int64_t *distortion, int *skippable, 1171 int64_t *distortion, int *skippable,
950 BLOCK_SIZE bsize, TX_SIZE max_tx_size) { 1172 BLOCK_SIZE bsize, TX_SIZE max_tx_size) {
951 MACROBLOCKD *xd = &x->e_mbd; 1173 MACROBLOCKD *xd = &x->e_mbd;
952 PREDICTION_MODE mode; 1174 PREDICTION_MODE mode;
953 PREDICTION_MODE mode_selected = DC_PRED; 1175 PREDICTION_MODE mode_selected = DC_PRED;
954 int64_t best_rd = INT64_MAX, this_rd; 1176 int64_t best_rd = INT64_MAX, this_rd;
955 int this_rate_tokenonly, this_rate, s; 1177 int this_rate_tokenonly, this_rate, s;
956 int64_t this_distortion, this_sse; 1178 int64_t this_distortion, this_sse;
957 1179
1180 vpx_memset(x->skip_txfm, 0, sizeof(x->skip_txfm));
958 for (mode = DC_PRED; mode <= TM_PRED; ++mode) { 1181 for (mode = DC_PRED; mode <= TM_PRED; ++mode) {
959 if (!(cpi->sf.intra_uv_mode_mask[max_tx_size] & (1 << mode))) 1182 if (!(cpi->sf.intra_uv_mode_mask[max_tx_size] & (1 << mode)))
960 continue; 1183 continue;
961 1184
962 xd->mi[0].src_mi->mbmi.uv_mode = mode; 1185 xd->mi[0].src_mi->mbmi.uv_mode = mode;
963 1186
964 super_block_uvrd(cpi, x, &this_rate_tokenonly, 1187 if (!super_block_uvrd(cpi, x, &this_rate_tokenonly,
965 &this_distortion, &s, &this_sse, bsize, best_rd); 1188 &this_distortion, &s, &this_sse, bsize, best_rd))
966 if (this_rate_tokenonly == INT_MAX)
967 continue; 1189 continue;
968 this_rate = this_rate_tokenonly + 1190 this_rate = this_rate_tokenonly +
969 cpi->intra_uv_mode_cost[cpi->common.frame_type][mode]; 1191 cpi->intra_uv_mode_cost[cpi->common.frame_type][mode];
970 this_rd = RDCOST(x->rdmult, x->rddiv, this_rate, this_distortion); 1192 this_rd = RDCOST(x->rdmult, x->rddiv, this_rate, this_distortion);
971 1193
972 if (this_rd < best_rd) { 1194 if (this_rd < best_rd) {
973 mode_selected = mode; 1195 mode_selected = mode;
974 best_rd = this_rd; 1196 best_rd = this_rd;
975 *rate = this_rate; 1197 *rate = this_rate;
976 *rate_tokenonly = this_rate_tokenonly; 1198 *rate_tokenonly = this_rate_tokenonly;
977 *distortion = this_distortion; 1199 *distortion = this_distortion;
978 *skippable = s; 1200 *skippable = s;
979 if (!x->select_tx_size) 1201 if (!x->select_tx_size)
980 swap_block_ptr(x, ctx, 2, 0, 1, MAX_MB_PLANE); 1202 swap_block_ptr(x, ctx, 2, 0, 1, MAX_MB_PLANE);
981 } 1203 }
982 } 1204 }
983 1205
984 xd->mi[0].src_mi->mbmi.uv_mode = mode_selected; 1206 xd->mi[0].src_mi->mbmi.uv_mode = mode_selected;
985 return best_rd; 1207 return best_rd;
986 } 1208 }
987 1209
988 static int64_t rd_sbuv_dcpred(const VP9_COMP *cpi, MACROBLOCK *x, 1210 static int64_t rd_sbuv_dcpred(const VP9_COMP *cpi, MACROBLOCK *x,
989 int *rate, int *rate_tokenonly, 1211 int *rate, int *rate_tokenonly,
990 int64_t *distortion, int *skippable, 1212 int64_t *distortion, int *skippable,
991 BLOCK_SIZE bsize) { 1213 BLOCK_SIZE bsize) {
992 const VP9_COMMON *cm = &cpi->common; 1214 const VP9_COMMON *cm = &cpi->common;
993 int64_t unused; 1215 int64_t unused;
994 1216
995 x->e_mbd.mi[0].src_mi->mbmi.uv_mode = DC_PRED; 1217 x->e_mbd.mi[0].src_mi->mbmi.uv_mode = DC_PRED;
1218 vpx_memset(x->skip_txfm, 0, sizeof(x->skip_txfm));
996 super_block_uvrd(cpi, x, rate_tokenonly, distortion, 1219 super_block_uvrd(cpi, x, rate_tokenonly, distortion,
997 skippable, &unused, bsize, INT64_MAX); 1220 skippable, &unused, bsize, INT64_MAX);
998 *rate = *rate_tokenonly + cpi->intra_uv_mode_cost[cm->frame_type][DC_PRED]; 1221 *rate = *rate_tokenonly + cpi->intra_uv_mode_cost[cm->frame_type][DC_PRED];
999 return RDCOST(x->rdmult, x->rddiv, *rate, *distortion); 1222 return RDCOST(x->rdmult, x->rddiv, *rate, *distortion);
1000 } 1223 }
1001 1224
1002 static void choose_intra_uv_mode(VP9_COMP *cpi, PICK_MODE_CONTEXT *ctx, 1225 static void choose_intra_uv_mode(VP9_COMP *cpi, PICK_MODE_CONTEXT *ctx,
1003 BLOCK_SIZE bsize, TX_SIZE max_tx_size, 1226 BLOCK_SIZE bsize, TX_SIZE max_tx_size,
1004 int *rate_uv, int *rate_uv_tokenonly, 1227 int *rate_uv, int *rate_uv_tokenonly,
1005 int64_t *dist_uv, int *skip_uv, 1228 int64_t *dist_uv, int *skip_uv,
(...skipping 108 matching lines...) Expand 10 before | Expand all | Expand 10 after
1114 pd->dst.stride)]; 1337 pd->dst.stride)];
1115 int64_t thisdistortion = 0, thissse = 0; 1338 int64_t thisdistortion = 0, thissse = 0;
1116 int thisrate = 0, ref; 1339 int thisrate = 0, ref;
1117 const scan_order *so = &vp9_default_scan_orders[TX_4X4]; 1340 const scan_order *so = &vp9_default_scan_orders[TX_4X4];
1118 const int is_compound = has_second_ref(&mi->mbmi); 1341 const int is_compound = has_second_ref(&mi->mbmi);
1119 const InterpKernel *kernel = vp9_get_interp_kernel(mi->mbmi.interp_filter); 1342 const InterpKernel *kernel = vp9_get_interp_kernel(mi->mbmi.interp_filter);
1120 1343
1121 for (ref = 0; ref < 1 + is_compound; ++ref) { 1344 for (ref = 0; ref < 1 + is_compound; ++ref) {
1122 const uint8_t *pre = &pd->pre[ref].buf[raster_block_offset(BLOCK_8X8, i, 1345 const uint8_t *pre = &pd->pre[ref].buf[raster_block_offset(BLOCK_8X8, i,
1123 pd->pre[ref].stride)]; 1346 pd->pre[ref].stride)];
1347 #if CONFIG_VP9_HIGHBITDEPTH
1348 if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
1349 vp9_highbd_build_inter_predictor(pre, pd->pre[ref].stride,
1350 dst, pd->dst.stride,
1351 &mi->bmi[i].as_mv[ref].as_mv,
1352 &xd->block_refs[ref]->sf, width, height,
1353 ref, kernel, MV_PRECISION_Q3,
1354 mi_col * MI_SIZE + 4 * (i % 2),
1355 mi_row * MI_SIZE + 4 * (i / 2), xd->bd);
1356 } else {
1124 vp9_build_inter_predictor(pre, pd->pre[ref].stride, 1357 vp9_build_inter_predictor(pre, pd->pre[ref].stride,
1125 dst, pd->dst.stride, 1358 dst, pd->dst.stride,
1126 &mi->bmi[i].as_mv[ref].as_mv, 1359 &mi->bmi[i].as_mv[ref].as_mv,
1127 &xd->block_refs[ref]->sf, width, height, ref, 1360 &xd->block_refs[ref]->sf, width, height, ref,
1128 kernel, MV_PRECISION_Q3, 1361 kernel, MV_PRECISION_Q3,
1129 mi_col * MI_SIZE + 4 * (i % 2), 1362 mi_col * MI_SIZE + 4 * (i % 2),
1130 mi_row * MI_SIZE + 4 * (i / 2)); 1363 mi_row * MI_SIZE + 4 * (i / 2));
1131 } 1364 }
1365 #else
1366 vp9_build_inter_predictor(pre, pd->pre[ref].stride,
1367 dst, pd->dst.stride,
1368 &mi->bmi[i].as_mv[ref].as_mv,
1369 &xd->block_refs[ref]->sf, width, height, ref,
1370 kernel, MV_PRECISION_Q3,
1371 mi_col * MI_SIZE + 4 * (i % 2),
1372 mi_row * MI_SIZE + 4 * (i / 2));
1373 #endif // CONFIG_VP9_HIGHBITDEPTH
1374 }
1132 1375
1376 #if CONFIG_VP9_HIGHBITDEPTH
1377 if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
1378 vp9_highbd_subtract_block(
1379 height, width, raster_block_offset_int16(BLOCK_8X8, i, p->src_diff), 8,
1380 src, p->src.stride, dst, pd->dst.stride, xd->bd);
1381 } else {
1382 vp9_subtract_block(
1383 height, width, raster_block_offset_int16(BLOCK_8X8, i, p->src_diff), 8,
1384 src, p->src.stride, dst, pd->dst.stride);
1385 }
1386 #else
1133 vp9_subtract_block(height, width, 1387 vp9_subtract_block(height, width,
1134 raster_block_offset_int16(BLOCK_8X8, i, p->src_diff), 8, 1388 raster_block_offset_int16(BLOCK_8X8, i, p->src_diff), 8,
1135 src, p->src.stride, 1389 src, p->src.stride, dst, pd->dst.stride);
1136 dst, pd->dst.stride); 1390 #endif // CONFIG_VP9_HIGHBITDEPTH
1137 1391
1138 k = i; 1392 k = i;
1139 for (idy = 0; idy < height / 4; ++idy) { 1393 for (idy = 0; idy < height / 4; ++idy) {
1140 for (idx = 0; idx < width / 4; ++idx) { 1394 for (idx = 0; idx < width / 4; ++idx) {
1141 int64_t ssz, rd, rd1, rd2; 1395 int64_t ssz, rd, rd1, rd2;
1142 tran_low_t* coeff; 1396 tran_low_t* coeff;
1143 1397
1144 k += (idy * 2 + idx); 1398 k += (idy * 2 + idx);
1145 coeff = BLOCK_OFFSET(p->coeff, k); 1399 coeff = BLOCK_OFFSET(p->coeff, k);
1146 x->fwd_txm4x4(raster_block_offset_int16(BLOCK_8X8, k, p->src_diff), 1400 x->fwd_txm4x4(raster_block_offset_int16(BLOCK_8X8, k, p->src_diff),
1147 coeff, 8); 1401 coeff, 8);
1148 vp9_regular_quantize_b_4x4(x, 0, k, so->scan, so->iscan); 1402 vp9_regular_quantize_b_4x4(x, 0, k, so->scan, so->iscan);
1403 #if CONFIG_VP9_HIGHBITDEPTH
1404 if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
1405 thisdistortion += vp9_highbd_block_error(coeff,
1406 BLOCK_OFFSET(pd->dqcoeff, k),
1407 16, &ssz, xd->bd);
1408 } else {
1409 thisdistortion += vp9_block_error(coeff, BLOCK_OFFSET(pd->dqcoeff, k),
1410 16, &ssz);
1411 }
1412 #else
1149 thisdistortion += vp9_block_error(coeff, BLOCK_OFFSET(pd->dqcoeff, k), 1413 thisdistortion += vp9_block_error(coeff, BLOCK_OFFSET(pd->dqcoeff, k),
1150 16, &ssz); 1414 16, &ssz);
1415 #endif // CONFIG_VP9_HIGHBITDEPTH
1151 thissse += ssz; 1416 thissse += ssz;
1152 thisrate += cost_coeffs(x, 0, k, ta + (k & 1), tl + (k >> 1), TX_4X4, 1417 thisrate += cost_coeffs(x, 0, k, ta + (k & 1), tl + (k >> 1), TX_4X4,
1153 so->scan, so->neighbors, 1418 so->scan, so->neighbors,
1154 cpi->sf.use_fast_coef_costing); 1419 cpi->sf.use_fast_coef_costing);
1155 rd1 = RDCOST(x->rdmult, x->rddiv, thisrate, thisdistortion >> 2); 1420 rd1 = RDCOST(x->rdmult, x->rddiv, thisrate, thisdistortion >> 2);
1156 rd2 = RDCOST(x->rdmult, x->rddiv, 0, thissse >> 2); 1421 rd2 = RDCOST(x->rdmult, x->rddiv, 0, thissse >> 2);
1157 rd = MIN(rd1, rd2); 1422 rd = MIN(rd1, rd2);
1158 if (rd >= best_yrd) 1423 if (rd >= best_yrd)
1159 return INT64_MAX; 1424 return INT64_MAX;
1160 } 1425 }
(...skipping 201 matching lines...) Expand 10 before | Expand all | Expand 10 after
1362 1627
1363 // motion search for newmv (single predictor case only) 1628 // motion search for newmv (single predictor case only)
1364 if (!has_second_rf && this_mode == NEWMV && 1629 if (!has_second_rf && this_mode == NEWMV &&
1365 seg_mvs[i][mbmi->ref_frame[0]].as_int == INVALID_MV) { 1630 seg_mvs[i][mbmi->ref_frame[0]].as_int == INVALID_MV) {
1366 MV *const new_mv = &mode_mv[NEWMV][0].as_mv; 1631 MV *const new_mv = &mode_mv[NEWMV][0].as_mv;
1367 int step_param = 0; 1632 int step_param = 0;
1368 int thissme, bestsme = INT_MAX; 1633 int thissme, bestsme = INT_MAX;
1369 int sadpb = x->sadperbit4; 1634 int sadpb = x->sadperbit4;
1370 MV mvp_full; 1635 MV mvp_full;
1371 int max_mv; 1636 int max_mv;
1372 int sad_list[5]; 1637 int cost_list[5];
1373 1638
1374 /* Is the best so far sufficiently good that we cant justify doing 1639 /* Is the best so far sufficiently good that we cant justify doing
1375 * and new motion search. */ 1640 * and new motion search. */
1376 if (best_rd < label_mv_thresh) 1641 if (best_rd < label_mv_thresh)
1377 break; 1642 break;
1378 1643
1379 if (cpi->oxcf.mode != BEST) { 1644 if (cpi->oxcf.mode != BEST) {
1380 // use previous block's result as next block's MV predictor. 1645 // use previous block's result as next block's MV predictor.
1381 if (i > 0) { 1646 if (i > 0) {
1382 bsi->mvp.as_int = mi->bmi[i - 1].as_mv[0].as_int; 1647 bsi->mvp.as_int = mi->bmi[i - 1].as_mv[0].as_int;
(...skipping 25 matching lines...) Expand all
1408 step_param = MAX(step_param, 8); 1673 step_param = MAX(step_param, 8);
1409 } 1674 }
1410 1675
1411 // adjust src pointer for this block 1676 // adjust src pointer for this block
1412 mi_buf_shift(x, i); 1677 mi_buf_shift(x, i);
1413 1678
1414 vp9_set_mv_search_range(x, &bsi->ref_mv[0]->as_mv); 1679 vp9_set_mv_search_range(x, &bsi->ref_mv[0]->as_mv);
1415 1680
1416 bestsme = vp9_full_pixel_search( 1681 bestsme = vp9_full_pixel_search(
1417 cpi, x, bsize, &mvp_full, step_param, sadpb, 1682 cpi, x, bsize, &mvp_full, step_param, sadpb,
1418 cpi->sf.mv.subpel_search_method != SUBPEL_TREE ? sad_list : NULL, 1683 cpi->sf.mv.subpel_search_method != SUBPEL_TREE ? cost_list : NULL,
1419 &bsi->ref_mv[0]->as_mv, new_mv, 1684 &bsi->ref_mv[0]->as_mv, new_mv,
1420 INT_MAX, 1); 1685 INT_MAX, 1);
1421 1686
1422 // Should we do a full search (best quality only) 1687 // Should we do a full search (best quality only)
1423 if (cpi->oxcf.mode == BEST) { 1688 if (cpi->oxcf.mode == BEST) {
1424 int_mv *const best_mv = &mi->bmi[i].as_mv[0]; 1689 int_mv *const best_mv = &mi->bmi[i].as_mv[0];
1425 /* Check if mvp_full is within the range. */ 1690 /* Check if mvp_full is within the range. */
1426 clamp_mv(&mvp_full, x->mv_col_min, x->mv_col_max, 1691 clamp_mv(&mvp_full, x->mv_col_min, x->mv_col_max,
1427 x->mv_row_min, x->mv_row_max); 1692 x->mv_row_min, x->mv_row_max);
1428 thissme = cpi->full_search_sad(x, &mvp_full, 1693 thissme = cpi->full_search_sad(x, &mvp_full,
1429 sadpb, 16, &cpi->fn_ptr[bsize], 1694 sadpb, 16, &cpi->fn_ptr[bsize],
1430 &bsi->ref_mv[0]->as_mv, 1695 &bsi->ref_mv[0]->as_mv,
1431 &best_mv->as_mv); 1696 &best_mv->as_mv);
1432 sad_list[1] = sad_list[2] = sad_list[3] = sad_list[4] = INT_MAX; 1697 cost_list[1] = cost_list[2] = cost_list[3] = cost_list[4] = INT_MAX;
1433 if (thissme < bestsme) { 1698 if (thissme < bestsme) {
1434 bestsme = thissme; 1699 bestsme = thissme;
1435 *new_mv = best_mv->as_mv; 1700 *new_mv = best_mv->as_mv;
1436 } else { 1701 } else {
1437 // The full search result is actually worse so re-instate the 1702 // The full search result is actually worse so re-instate the
1438 // previous best vector 1703 // previous best vector
1439 best_mv->as_mv = *new_mv; 1704 best_mv->as_mv = *new_mv;
1440 } 1705 }
1441 } 1706 }
1442 1707
1443 if (bestsme < INT_MAX) { 1708 if (bestsme < INT_MAX) {
1444 int distortion; 1709 int distortion;
1445 cpi->find_fractional_mv_step( 1710 cpi->find_fractional_mv_step(
1446 x, 1711 x,
1447 new_mv, 1712 new_mv,
1448 &bsi->ref_mv[0]->as_mv, 1713 &bsi->ref_mv[0]->as_mv,
1449 cm->allow_high_precision_mv, 1714 cm->allow_high_precision_mv,
1450 x->errorperbit, &cpi->fn_ptr[bsize], 1715 x->errorperbit, &cpi->fn_ptr[bsize],
1451 cpi->sf.mv.subpel_force_stop, 1716 cpi->sf.mv.subpel_force_stop,
1452 cpi->sf.mv.subpel_iters_per_step, 1717 cpi->sf.mv.subpel_iters_per_step,
1453 cond_sad_list(cpi, sad_list), 1718 cond_cost_list(cpi, cost_list),
1454 x->nmvjointcost, x->mvcost, 1719 x->nmvjointcost, x->mvcost,
1455 &distortion, 1720 &distortion,
1456 &x->pred_sse[mbmi->ref_frame[0]], 1721 &x->pred_sse[mbmi->ref_frame[0]],
1457 NULL, 0, 0); 1722 NULL, 0, 0);
1458 1723
1459 // save motion search result for use in compound prediction 1724 // save motion search result for use in compound prediction
1460 seg_mvs[i][mbmi->ref_frame[0]].as_mv = *new_mv; 1725 seg_mvs[i][mbmi->ref_frame[0]].as_mv = *new_mv;
1461 } 1726 }
1462 1727
1463 if (cpi->sf.adaptive_motion_search) 1728 if (cpi->sf.adaptive_motion_search)
(...skipping 313 matching lines...) Expand 10 before | Expand all | Expand 10 after
1777 int step_param; 2042 int step_param;
1778 int sadpb = x->sadperbit16; 2043 int sadpb = x->sadperbit16;
1779 MV mvp_full; 2044 MV mvp_full;
1780 int ref = mbmi->ref_frame[0]; 2045 int ref = mbmi->ref_frame[0];
1781 MV ref_mv = mbmi->ref_mvs[ref][0].as_mv; 2046 MV ref_mv = mbmi->ref_mvs[ref][0].as_mv;
1782 2047
1783 int tmp_col_min = x->mv_col_min; 2048 int tmp_col_min = x->mv_col_min;
1784 int tmp_col_max = x->mv_col_max; 2049 int tmp_col_max = x->mv_col_max;
1785 int tmp_row_min = x->mv_row_min; 2050 int tmp_row_min = x->mv_row_min;
1786 int tmp_row_max = x->mv_row_max; 2051 int tmp_row_max = x->mv_row_max;
1787 int sad_list[5]; 2052 int cost_list[5];
1788 2053
1789 const YV12_BUFFER_CONFIG *scaled_ref_frame = vp9_get_scaled_ref_frame(cpi, 2054 const YV12_BUFFER_CONFIG *scaled_ref_frame = vp9_get_scaled_ref_frame(cpi,
1790 ref); 2055 ref);
1791 2056
1792 MV pred_mv[3]; 2057 MV pred_mv[3];
1793 pred_mv[0] = mbmi->ref_mvs[ref][0].as_mv; 2058 pred_mv[0] = mbmi->ref_mvs[ref][0].as_mv;
1794 pred_mv[1] = mbmi->ref_mvs[ref][1].as_mv; 2059 pred_mv[1] = mbmi->ref_mvs[ref][1].as_mv;
1795 pred_mv[2] = x->pred_mv[ref]; 2060 pred_mv[2] = x->pred_mv[ref];
1796 2061
1797 if (scaled_ref_frame) { 2062 if (scaled_ref_frame) {
(...skipping 15 matching lines...) Expand all
1813 // Take wtd average of the step_params based on the last frame's 2078 // Take wtd average of the step_params based on the last frame's
1814 // max mv magnitude and that based on the best ref mvs of the current 2079 // max mv magnitude and that based on the best ref mvs of the current
1815 // block for the given reference. 2080 // block for the given reference.
1816 step_param = (vp9_init_search_range(x->max_mv_context[ref]) + 2081 step_param = (vp9_init_search_range(x->max_mv_context[ref]) +
1817 cpi->mv_step_param) / 2; 2082 cpi->mv_step_param) / 2;
1818 } else { 2083 } else {
1819 step_param = cpi->mv_step_param; 2084 step_param = cpi->mv_step_param;
1820 } 2085 }
1821 2086
1822 if (cpi->sf.adaptive_motion_search && bsize < BLOCK_64X64) { 2087 if (cpi->sf.adaptive_motion_search && bsize < BLOCK_64X64) {
1823 int boffset = 2 * (b_width_log2(BLOCK_64X64) - MIN(b_height_log2(bsize), 2088 int boffset = 2 * (b_width_log2_lookup[BLOCK_64X64] -
1824 b_width_log2(bsize))); 2089 MIN(b_height_log2_lookup[bsize], b_width_log2_lookup[bsize]));
1825 step_param = MAX(step_param, boffset); 2090 step_param = MAX(step_param, boffset);
1826 } 2091 }
1827 2092
1828 if (cpi->sf.adaptive_motion_search) { 2093 if (cpi->sf.adaptive_motion_search) {
1829 int bwl = b_width_log2(bsize); 2094 int bwl = b_width_log2_lookup[bsize];
1830 int bhl = b_height_log2(bsize); 2095 int bhl = b_height_log2_lookup[bsize];
1831 int i; 2096 int i;
1832 int tlevel = x->pred_mv_sad[ref] >> (bwl + bhl + 4); 2097 int tlevel = x->pred_mv_sad[ref] >> (bwl + bhl + 4);
1833 2098
1834 if (tlevel < 5) 2099 if (tlevel < 5)
1835 step_param += 2; 2100 step_param += 2;
1836 2101
1837 for (i = LAST_FRAME; i <= ALTREF_FRAME && cm->show_frame; ++i) { 2102 for (i = LAST_FRAME; i <= ALTREF_FRAME && cm->show_frame; ++i) {
1838 if ((x->pred_mv_sad[ref] >> 3) > x->pred_mv_sad[i]) { 2103 if ((x->pred_mv_sad[ref] >> 3) > x->pred_mv_sad[i]) {
1839 x->pred_mv[ref].row = 0; 2104 x->pred_mv[ref].row = 0;
1840 x->pred_mv[ref].col = 0; 2105 x->pred_mv[ref].col = 0;
1841 tmp_mv->as_int = INVALID_MV; 2106 tmp_mv->as_int = INVALID_MV;
1842 2107
1843 if (scaled_ref_frame) { 2108 if (scaled_ref_frame) {
1844 int i; 2109 int i;
1845 for (i = 0; i < MAX_MB_PLANE; i++) 2110 for (i = 0; i < MAX_MB_PLANE; i++)
1846 xd->plane[i].pre[0] = backup_yv12[i]; 2111 xd->plane[i].pre[0] = backup_yv12[i];
1847 } 2112 }
1848 return; 2113 return;
1849 } 2114 }
1850 } 2115 }
1851 } 2116 }
1852 2117
1853 mvp_full = pred_mv[x->mv_best_ref_index[ref]]; 2118 mvp_full = pred_mv[x->mv_best_ref_index[ref]];
1854 2119
1855 mvp_full.col >>= 3; 2120 mvp_full.col >>= 3;
1856 mvp_full.row >>= 3; 2121 mvp_full.row >>= 3;
1857 2122
1858 bestsme = vp9_full_pixel_search(cpi, x, bsize, &mvp_full, step_param, sadpb, 2123 bestsme = vp9_full_pixel_search(cpi, x, bsize, &mvp_full, step_param, sadpb,
1859 cond_sad_list(cpi, sad_list), 2124 cond_cost_list(cpi, cost_list),
1860 &ref_mv, &tmp_mv->as_mv, INT_MAX, 1); 2125 &ref_mv, &tmp_mv->as_mv, INT_MAX, 1);
1861 2126
1862 x->mv_col_min = tmp_col_min; 2127 x->mv_col_min = tmp_col_min;
1863 x->mv_col_max = tmp_col_max; 2128 x->mv_col_max = tmp_col_max;
1864 x->mv_row_min = tmp_row_min; 2129 x->mv_row_min = tmp_row_min;
1865 x->mv_row_max = tmp_row_max; 2130 x->mv_row_max = tmp_row_max;
1866 2131
1867 if (bestsme < INT_MAX) { 2132 if (bestsme < INT_MAX) {
1868 int dis; /* TODO: use dis in distortion calculation later. */ 2133 int dis; /* TODO: use dis in distortion calculation later. */
1869 cpi->find_fractional_mv_step(x, &tmp_mv->as_mv, &ref_mv, 2134 cpi->find_fractional_mv_step(x, &tmp_mv->as_mv, &ref_mv,
1870 cm->allow_high_precision_mv, 2135 cm->allow_high_precision_mv,
1871 x->errorperbit, 2136 x->errorperbit,
1872 &cpi->fn_ptr[bsize], 2137 &cpi->fn_ptr[bsize],
1873 cpi->sf.mv.subpel_force_stop, 2138 cpi->sf.mv.subpel_force_stop,
1874 cpi->sf.mv.subpel_iters_per_step, 2139 cpi->sf.mv.subpel_iters_per_step,
1875 cond_sad_list(cpi, sad_list), 2140 cond_cost_list(cpi, cost_list),
1876 x->nmvjointcost, x->mvcost, 2141 x->nmvjointcost, x->mvcost,
1877 &dis, &x->pred_sse[ref], NULL, 0, 0); 2142 &dis, &x->pred_sse[ref], NULL, 0, 0);
1878 } 2143 }
1879 *rate_mv = vp9_mv_bit_cost(&tmp_mv->as_mv, &ref_mv, 2144 *rate_mv = vp9_mv_bit_cost(&tmp_mv->as_mv, &ref_mv,
1880 x->nmvjointcost, x->mvcost, MV_COST_WEIGHT); 2145 x->nmvjointcost, x->mvcost, MV_COST_WEIGHT);
1881 2146
1882 if (cpi->sf.adaptive_motion_search) 2147 if (cpi->sf.adaptive_motion_search)
1883 x->pred_mv[ref] = tmp_mv->as_mv; 2148 x->pred_mv[ref] = tmp_mv->as_mv;
1884 2149
1885 if (scaled_ref_frame) { 2150 if (scaled_ref_frame) {
(...skipping 11 matching lines...) Expand all
1897 int *rate_mv) { 2162 int *rate_mv) {
1898 const int pw = 4 * num_4x4_blocks_wide_lookup[bsize]; 2163 const int pw = 4 * num_4x4_blocks_wide_lookup[bsize];
1899 const int ph = 4 * num_4x4_blocks_high_lookup[bsize]; 2164 const int ph = 4 * num_4x4_blocks_high_lookup[bsize];
1900 MACROBLOCKD *xd = &x->e_mbd; 2165 MACROBLOCKD *xd = &x->e_mbd;
1901 MB_MODE_INFO *mbmi = &xd->mi[0].src_mi->mbmi; 2166 MB_MODE_INFO *mbmi = &xd->mi[0].src_mi->mbmi;
1902 const int refs[2] = { mbmi->ref_frame[0], 2167 const int refs[2] = { mbmi->ref_frame[0],
1903 mbmi->ref_frame[1] < 0 ? 0 : mbmi->ref_frame[1] }; 2168 mbmi->ref_frame[1] < 0 ? 0 : mbmi->ref_frame[1] };
1904 int_mv ref_mv[2]; 2169 int_mv ref_mv[2];
1905 int ite, ref; 2170 int ite, ref;
1906 // Prediction buffer from second frame. 2171 // Prediction buffer from second frame.
2172 #if CONFIG_VP9_HIGHBITDEPTH
2173 uint8_t *second_pred;
2174 uint8_t *second_pred_alloc;
2175 #else
1907 uint8_t *second_pred = vpx_memalign(16, pw * ph * sizeof(uint8_t)); 2176 uint8_t *second_pred = vpx_memalign(16, pw * ph * sizeof(uint8_t));
2177 #endif // CONFIG_VP9_HIGHBITDEPTH
1908 const InterpKernel *kernel = vp9_get_interp_kernel(mbmi->interp_filter); 2178 const InterpKernel *kernel = vp9_get_interp_kernel(mbmi->interp_filter);
1909 2179
1910 // Do joint motion search in compound mode to get more accurate mv. 2180 // Do joint motion search in compound mode to get more accurate mv.
1911 struct buf_2d backup_yv12[2][MAX_MB_PLANE]; 2181 struct buf_2d backup_yv12[2][MAX_MB_PLANE];
1912 struct buf_2d scaled_first_yv12 = xd->plane[0].pre[0]; 2182 struct buf_2d scaled_first_yv12 = xd->plane[0].pre[0];
1913 int last_besterr[2] = {INT_MAX, INT_MAX}; 2183 int last_besterr[2] = {INT_MAX, INT_MAX};
1914 const YV12_BUFFER_CONFIG *const scaled_ref_frame[2] = { 2184 const YV12_BUFFER_CONFIG *const scaled_ref_frame[2] = {
1915 vp9_get_scaled_ref_frame(cpi, mbmi->ref_frame[0]), 2185 vp9_get_scaled_ref_frame(cpi, mbmi->ref_frame[0]),
1916 vp9_get_scaled_ref_frame(cpi, mbmi->ref_frame[1]) 2186 vp9_get_scaled_ref_frame(cpi, mbmi->ref_frame[1])
1917 }; 2187 };
2188 #if CONFIG_VP9_HIGHBITDEPTH
2189 if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
2190 second_pred_alloc = vpx_memalign(16, pw * ph * sizeof(uint16_t));
2191 second_pred = CONVERT_TO_BYTEPTR(second_pred_alloc);
2192 } else {
2193 second_pred_alloc = vpx_memalign(16, pw * ph * sizeof(uint8_t));
2194 second_pred = second_pred_alloc;
2195 }
2196 #endif // CONFIG_VP9_HIGHBITDEPTH
1918 2197
1919 for (ref = 0; ref < 2; ++ref) { 2198 for (ref = 0; ref < 2; ++ref) {
1920 ref_mv[ref] = mbmi->ref_mvs[refs[ref]][0]; 2199 ref_mv[ref] = mbmi->ref_mvs[refs[ref]][0];
1921 2200
1922 if (scaled_ref_frame[ref]) { 2201 if (scaled_ref_frame[ref]) {
1923 int i; 2202 int i;
1924 // Swap out the reference frame for a version that's been scaled to 2203 // Swap out the reference frame for a version that's been scaled to
1925 // match the resolution of the current frame, allowing the existing 2204 // match the resolution of the current frame, allowing the existing
1926 // motion search code to be used without additional modifications. 2205 // motion search code to be used without additional modifications.
1927 for (i = 0; i < MAX_MB_PLANE; i++) 2206 for (i = 0; i < MAX_MB_PLANE; i++)
(...skipping 18 matching lines...) Expand all
1946 int tmp_col_max = x->mv_col_max; 2225 int tmp_col_max = x->mv_col_max;
1947 int tmp_row_min = x->mv_row_min; 2226 int tmp_row_min = x->mv_row_min;
1948 int tmp_row_max = x->mv_row_max; 2227 int tmp_row_max = x->mv_row_max;
1949 int id = ite % 2; 2228 int id = ite % 2;
1950 2229
1951 // Initialized here because of compiler problem in Visual Studio. 2230 // Initialized here because of compiler problem in Visual Studio.
1952 ref_yv12[0] = xd->plane[0].pre[0]; 2231 ref_yv12[0] = xd->plane[0].pre[0];
1953 ref_yv12[1] = xd->plane[0].pre[1]; 2232 ref_yv12[1] = xd->plane[0].pre[1];
1954 2233
1955 // Get pred block from second frame. 2234 // Get pred block from second frame.
2235 #if CONFIG_VP9_HIGHBITDEPTH
2236 if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
2237 vp9_highbd_build_inter_predictor(ref_yv12[!id].buf,
2238 ref_yv12[!id].stride,
2239 second_pred, pw,
2240 &frame_mv[refs[!id]].as_mv,
2241 &xd->block_refs[!id]->sf,
2242 pw, ph, 0,
2243 kernel, MV_PRECISION_Q3,
2244 mi_col * MI_SIZE, mi_row * MI_SIZE,
2245 xd->bd);
2246 } else {
2247 vp9_build_inter_predictor(ref_yv12[!id].buf,
2248 ref_yv12[!id].stride,
2249 second_pred, pw,
2250 &frame_mv[refs[!id]].as_mv,
2251 &xd->block_refs[!id]->sf,
2252 pw, ph, 0,
2253 kernel, MV_PRECISION_Q3,
2254 mi_col * MI_SIZE, mi_row * MI_SIZE);
2255 }
2256 #else
1956 vp9_build_inter_predictor(ref_yv12[!id].buf, 2257 vp9_build_inter_predictor(ref_yv12[!id].buf,
1957 ref_yv12[!id].stride, 2258 ref_yv12[!id].stride,
1958 second_pred, pw, 2259 second_pred, pw,
1959 &frame_mv[refs[!id]].as_mv, 2260 &frame_mv[refs[!id]].as_mv,
1960 &xd->block_refs[!id]->sf, 2261 &xd->block_refs[!id]->sf,
1961 pw, ph, 0, 2262 pw, ph, 0,
1962 kernel, MV_PRECISION_Q3, 2263 kernel, MV_PRECISION_Q3,
1963 mi_col * MI_SIZE, mi_row * MI_SIZE); 2264 mi_col * MI_SIZE, mi_row * MI_SIZE);
2265 #endif // CONFIG_VP9_HIGHBITDEPTH
1964 2266
1965 // Compound motion search on first ref frame. 2267 // Compound motion search on first ref frame.
1966 if (id) 2268 if (id)
1967 xd->plane[0].pre[0] = ref_yv12[id]; 2269 xd->plane[0].pre[0] = ref_yv12[id];
1968 vp9_set_mv_search_range(x, &ref_mv[id].as_mv); 2270 vp9_set_mv_search_range(x, &ref_mv[id].as_mv);
1969 2271
1970 // Use mv result from single mode as mvp. 2272 // Use mv result from single mode as mvp.
1971 tmp_mv = frame_mv[refs[id]].as_mv; 2273 tmp_mv = frame_mv[refs[id]].as_mv;
1972 2274
1973 tmp_mv.col >>= 3; 2275 tmp_mv.col >>= 3;
(...skipping 48 matching lines...) Expand 10 before | Expand all | Expand 10 after
2022 int i; 2324 int i;
2023 for (i = 0; i < MAX_MB_PLANE; i++) 2325 for (i = 0; i < MAX_MB_PLANE; i++)
2024 xd->plane[i].pre[ref] = backup_yv12[ref][i]; 2326 xd->plane[i].pre[ref] = backup_yv12[ref][i];
2025 } 2327 }
2026 2328
2027 *rate_mv += vp9_mv_bit_cost(&frame_mv[refs[ref]].as_mv, 2329 *rate_mv += vp9_mv_bit_cost(&frame_mv[refs[ref]].as_mv,
2028 &mbmi->ref_mvs[refs[ref]][0].as_mv, 2330 &mbmi->ref_mvs[refs[ref]][0].as_mv,
2029 x->nmvjointcost, x->mvcost, MV_COST_WEIGHT); 2331 x->nmvjointcost, x->mvcost, MV_COST_WEIGHT);
2030 } 2332 }
2031 2333
2334 #if CONFIG_VP9_HIGHBITDEPTH
2335 vpx_free(second_pred_alloc);
2336 #else
2032 vpx_free(second_pred); 2337 vpx_free(second_pred);
2338 #endif // CONFIG_VP9_HIGHBITDEPTH
2033 } 2339 }
2034 2340
2035 static INLINE void restore_dst_buf(MACROBLOCKD *xd, 2341 static INLINE void restore_dst_buf(MACROBLOCKD *xd,
2036 uint8_t *orig_dst[MAX_MB_PLANE], 2342 uint8_t *orig_dst[MAX_MB_PLANE],
2037 int orig_dst_stride[MAX_MB_PLANE]) { 2343 int orig_dst_stride[MAX_MB_PLANE]) {
2038 int i; 2344 int i;
2039 for (i = 0; i < MAX_MB_PLANE; i++) { 2345 for (i = 0; i < MAX_MB_PLANE; i++) {
2040 xd->plane[i].dst.buf = orig_dst[i]; 2346 xd->plane[i].dst.buf = orig_dst[i];
2041 xd->plane[i].dst.stride = orig_dst_stride[i]; 2347 xd->plane[i].dst.stride = orig_dst_stride[i];
2042 } 2348 }
2043 } 2349 }
2044 2350
2045 static void rd_encode_breakout_test(VP9_COMP *cpi, MACROBLOCK *x,
2046 BLOCK_SIZE bsize, int *rate2,
2047 int64_t *distortion, int64_t *distortion_uv,
2048 int *disable_skip) {
2049 VP9_COMMON *cm = &cpi->common;
2050 MACROBLOCKD *xd = &x->e_mbd;
2051 const BLOCK_SIZE y_size = get_plane_block_size(bsize, &xd->plane[0]);
2052 const BLOCK_SIZE uv_size = get_plane_block_size(bsize, &xd->plane[1]);
2053 unsigned int var, sse;
2054 // Skipping threshold for ac.
2055 unsigned int thresh_ac;
2056 // Skipping threshold for dc
2057 unsigned int thresh_dc;
2058
2059 var = cpi->fn_ptr[y_size].vf(x->plane[0].src.buf, x->plane[0].src.stride,
2060 xd->plane[0].dst.buf,
2061 xd->plane[0].dst.stride, &sse);
2062
2063 if (x->encode_breakout > 0) {
2064 // Set a maximum for threshold to avoid big PSNR loss in low bitrate
2065 // case. Use extreme low threshold for static frames to limit skipping.
2066 const unsigned int max_thresh = (cpi->allow_encode_breakout ==
2067 ENCODE_BREAKOUT_LIMITED) ? 128 : 36000;
2068 // The encode_breakout input
2069 const unsigned int min_thresh =
2070 MIN(((unsigned int)x->encode_breakout << 4), max_thresh);
2071
2072 // Calculate threshold according to dequant value.
2073 thresh_ac = (xd->plane[0].dequant[1] * xd->plane[0].dequant[1]) / 9;
2074 thresh_ac = clamp(thresh_ac, min_thresh, max_thresh);
2075
2076 // Adjust threshold according to partition size.
2077 thresh_ac >>= 8 - (b_width_log2(bsize) +
2078 b_height_log2(bsize));
2079 thresh_dc = (xd->plane[0].dequant[0] * xd->plane[0].dequant[0] >> 6);
2080 } else {
2081 thresh_ac = 0;
2082 thresh_dc = 0;
2083 }
2084
2085 // Y skipping condition checking
2086 if (sse < thresh_ac || sse == 0) {
2087 // dc skipping checking
2088 if ((sse - var) < thresh_dc || sse == var) {
2089 unsigned int sse_u, sse_v;
2090 unsigned int var_u, var_v;
2091
2092 var_u = cpi->fn_ptr[uv_size].vf(x->plane[1].src.buf,
2093 x->plane[1].src.stride,
2094 xd->plane[1].dst.buf,
2095 xd->plane[1].dst.stride, &sse_u);
2096
2097 // U skipping condition checking
2098 if ((sse_u * 4 < thresh_ac || sse_u == 0) &&
2099 (sse_u - var_u < thresh_dc || sse_u == var_u)) {
2100 var_v = cpi->fn_ptr[uv_size].vf(x->plane[2].src.buf,
2101 x->plane[2].src.stride,
2102 xd->plane[2].dst.buf,
2103 xd->plane[2].dst.stride, &sse_v);
2104
2105 // V skipping condition checking
2106 if ((sse_v * 4 < thresh_ac || sse_v == 0) &&
2107 (sse_v - var_v < thresh_dc || sse_v == var_v)) {
2108 x->skip = 1;
2109
2110 // The cost of skip bit needs to be added.
2111 *rate2 += vp9_cost_bit(vp9_get_skip_prob(cm, xd), 1);
2112
2113 // Scaling factor for SSE from spatial domain to frequency domain
2114 // is 16. Adjust distortion accordingly.
2115 *distortion_uv = (sse_u + sse_v) << 4;
2116 *distortion = (sse << 4) + *distortion_uv;
2117
2118 *disable_skip = 1;
2119 }
2120 }
2121 }
2122 }
2123 }
2124
2125 static int64_t handle_inter_mode(VP9_COMP *cpi, MACROBLOCK *x, 2351 static int64_t handle_inter_mode(VP9_COMP *cpi, MACROBLOCK *x,
2126 BLOCK_SIZE bsize, 2352 BLOCK_SIZE bsize,
2127 int64_t txfm_cache[], 2353 int64_t txfm_cache[],
2128 int *rate2, int64_t *distortion, 2354 int *rate2, int64_t *distortion,
2129 int *skippable, 2355 int *skippable,
2130 int *rate_y, int64_t *distortion_y, 2356 int *rate_y, int *rate_uv,
2131 int *rate_uv, int64_t *distortion_uv,
2132 int *disable_skip, 2357 int *disable_skip,
2133 int_mv (*mode_mv)[MAX_REF_FRAMES], 2358 int_mv (*mode_mv)[MAX_REF_FRAMES],
2134 int mi_row, int mi_col, 2359 int mi_row, int mi_col,
2135 int_mv single_newmv[MAX_REF_FRAMES], 2360 int_mv single_newmv[MAX_REF_FRAMES],
2136 INTERP_FILTER (*single_filter)[MAX_REF_FRAMES], 2361 INTERP_FILTER (*single_filter)[MAX_REF_FRAMES],
2137 int (*single_skippable)[MAX_REF_FRAMES], 2362 int (*single_skippable)[MAX_REF_FRAMES],
2138 int64_t *psse, 2363 int64_t *psse,
2139 const int64_t ref_best_rd) { 2364 const int64_t ref_best_rd) {
2140 VP9_COMMON *cm = &cpi->common; 2365 VP9_COMMON *cm = &cpi->common;
2141 RD_OPT *rd_opt = &cpi->rd; 2366 RD_OPT *rd_opt = &cpi->rd;
2142 MACROBLOCKD *xd = &x->e_mbd; 2367 MACROBLOCKD *xd = &x->e_mbd;
2143 MB_MODE_INFO *mbmi = &xd->mi[0].src_mi->mbmi; 2368 MB_MODE_INFO *mbmi = &xd->mi[0].src_mi->mbmi;
2144 const int is_comp_pred = has_second_ref(mbmi); 2369 const int is_comp_pred = has_second_ref(mbmi);
2145 const int this_mode = mbmi->mode; 2370 const int this_mode = mbmi->mode;
2146 int_mv *frame_mv = mode_mv[this_mode]; 2371 int_mv *frame_mv = mode_mv[this_mode];
2147 int i; 2372 int i;
2148 int refs[2] = { mbmi->ref_frame[0], 2373 int refs[2] = { mbmi->ref_frame[0],
2149 (mbmi->ref_frame[1] < 0 ? 0 : mbmi->ref_frame[1]) }; 2374 (mbmi->ref_frame[1] < 0 ? 0 : mbmi->ref_frame[1]) };
2150 int_mv cur_mv[2]; 2375 int_mv cur_mv[2];
2151 int64_t this_rd = 0; 2376 #if CONFIG_VP9_HIGHBITDEPTH
2377 DECLARE_ALIGNED_ARRAY(16, uint16_t, tmp_buf16, MAX_MB_PLANE * 64 * 64);
2378 DECLARE_ALIGNED_ARRAY(16, uint8_t, tmp_buf8, MAX_MB_PLANE * 64 * 64);
2379 uint8_t *tmp_buf;
2380 #else
2152 DECLARE_ALIGNED_ARRAY(16, uint8_t, tmp_buf, MAX_MB_PLANE * 64 * 64); 2381 DECLARE_ALIGNED_ARRAY(16, uint8_t, tmp_buf, MAX_MB_PLANE * 64 * 64);
2382 #endif // CONFIG_VP9_HIGHBITDEPTH
2153 int pred_exists = 0; 2383 int pred_exists = 0;
2154 int intpel_mv; 2384 int intpel_mv;
2155 int64_t rd, tmp_rd, best_rd = INT64_MAX; 2385 int64_t rd, tmp_rd, best_rd = INT64_MAX;
2156 int best_needs_copy = 0; 2386 int best_needs_copy = 0;
2157 uint8_t *orig_dst[MAX_MB_PLANE]; 2387 uint8_t *orig_dst[MAX_MB_PLANE];
2158 int orig_dst_stride[MAX_MB_PLANE]; 2388 int orig_dst_stride[MAX_MB_PLANE];
2159 int rs = 0; 2389 int rs = 0;
2160 INTERP_FILTER best_filter = SWITCHABLE; 2390 INTERP_FILTER best_filter = SWITCHABLE;
2161 uint8_t skip_txfm[MAX_MB_PLANE << 2] = {0}; 2391 uint8_t skip_txfm[MAX_MB_PLANE << 2] = {0};
2162 int64_t bsse[MAX_MB_PLANE << 2] = {0}; 2392 int64_t bsse[MAX_MB_PLANE << 2] = {0};
2163 2393
2164 int bsl = mi_width_log2_lookup[bsize]; 2394 int bsl = mi_width_log2_lookup[bsize];
2165 int pred_filter_search = cpi->sf.cb_pred_filter_search ? 2395 int pred_filter_search = cpi->sf.cb_pred_filter_search ?
2166 (((mi_row + mi_col) >> bsl) + 2396 (((mi_row + mi_col) >> bsl) +
2167 get_chessboard_index(cm->current_video_frame)) & 0x1 : 0; 2397 get_chessboard_index(cm->current_video_frame)) & 0x1 : 0;
2168 2398
2399 int skip_txfm_sb = 0;
2400 int64_t skip_sse_sb = INT64_MAX;
2401 int64_t distortion_y = 0, distortion_uv = 0;
2402
2403 #if CONFIG_VP9_HIGHBITDEPTH
2404 if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
2405 tmp_buf = CONVERT_TO_BYTEPTR(tmp_buf16);
2406 } else {
2407 tmp_buf = tmp_buf8;
2408 }
2409 #endif // CONFIG_VP9_HIGHBITDEPTH
2410
2169 if (pred_filter_search) { 2411 if (pred_filter_search) {
2170 INTERP_FILTER af = SWITCHABLE, lf = SWITCHABLE; 2412 INTERP_FILTER af = SWITCHABLE, lf = SWITCHABLE;
2171 if (xd->up_available) 2413 if (xd->up_available)
2172 af = xd->mi[-xd->mi_stride].src_mi->mbmi.interp_filter; 2414 af = xd->mi[-xd->mi_stride].src_mi->mbmi.interp_filter;
2173 if (xd->left_available) 2415 if (xd->left_available)
2174 lf = xd->mi[-1].src_mi->mbmi.interp_filter; 2416 lf = xd->mi[-1].src_mi->mbmi.interp_filter;
2175 2417
2176 if ((this_mode != NEWMV) || (af == lf)) 2418 if ((this_mode != NEWMV) || (af == lf))
2177 best_filter = af; 2419 best_filter = af;
2178 } 2420 }
(...skipping 89 matching lines...) Expand 10 before | Expand all | Expand 10 after
2268 if (x->source_variance < cpi->sf.disable_filter_search_var_thresh) { 2510 if (x->source_variance < cpi->sf.disable_filter_search_var_thresh) {
2269 best_filter = EIGHTTAP; 2511 best_filter = EIGHTTAP;
2270 } else if (best_filter == SWITCHABLE) { 2512 } else if (best_filter == SWITCHABLE) {
2271 int newbest; 2513 int newbest;
2272 int tmp_rate_sum = 0; 2514 int tmp_rate_sum = 0;
2273 int64_t tmp_dist_sum = 0; 2515 int64_t tmp_dist_sum = 0;
2274 2516
2275 for (i = 0; i < SWITCHABLE_FILTERS; ++i) { 2517 for (i = 0; i < SWITCHABLE_FILTERS; ++i) {
2276 int j; 2518 int j;
2277 int64_t rs_rd; 2519 int64_t rs_rd;
2520 int tmp_skip_sb = 0;
2521 int64_t tmp_skip_sse = INT64_MAX;
2522
2278 mbmi->interp_filter = i; 2523 mbmi->interp_filter = i;
2279 rs = vp9_get_switchable_rate(cpi); 2524 rs = vp9_get_switchable_rate(cpi);
2280 rs_rd = RDCOST(x->rdmult, x->rddiv, rs, 0); 2525 rs_rd = RDCOST(x->rdmult, x->rddiv, rs, 0);
2281 2526
2282 if (i > 0 && intpel_mv) { 2527 if (i > 0 && intpel_mv) {
2283 rd = RDCOST(x->rdmult, x->rddiv, tmp_rate_sum, tmp_dist_sum); 2528 rd = RDCOST(x->rdmult, x->rddiv, tmp_rate_sum, tmp_dist_sum);
2284 rd_opt->filter_cache[i] = rd; 2529 rd_opt->filter_cache[i] = rd;
2285 rd_opt->filter_cache[SWITCHABLE_FILTERS] = 2530 rd_opt->filter_cache[SWITCHABLE_FILTERS] =
2286 MIN(rd_opt->filter_cache[SWITCHABLE_FILTERS], rd + rs_rd); 2531 MIN(rd_opt->filter_cache[SWITCHABLE_FILTERS], rd + rs_rd);
2287 if (cm->interp_filter == SWITCHABLE) 2532 if (cm->interp_filter == SWITCHABLE)
(...skipping 15 matching lines...) Expand all
2303 (cm->interp_filter == mbmi->interp_filter || 2548 (cm->interp_filter == mbmi->interp_filter ||
2304 (i == 0 && intpel_mv)))) { 2549 (i == 0 && intpel_mv)))) {
2305 restore_dst_buf(xd, orig_dst, orig_dst_stride); 2550 restore_dst_buf(xd, orig_dst, orig_dst_stride);
2306 } else { 2551 } else {
2307 for (j = 0; j < MAX_MB_PLANE; j++) { 2552 for (j = 0; j < MAX_MB_PLANE; j++) {
2308 xd->plane[j].dst.buf = tmp_buf + j * 64 * 64; 2553 xd->plane[j].dst.buf = tmp_buf + j * 64 * 64;
2309 xd->plane[j].dst.stride = 64; 2554 xd->plane[j].dst.stride = 64;
2310 } 2555 }
2311 } 2556 }
2312 vp9_build_inter_predictors_sb(xd, mi_row, mi_col, bsize); 2557 vp9_build_inter_predictors_sb(xd, mi_row, mi_col, bsize);
2313 model_rd_for_sb(cpi, bsize, x, xd, &rate_sum, &dist_sum); 2558 model_rd_for_sb(cpi, bsize, x, xd, &rate_sum, &dist_sum,
2559 &tmp_skip_sb, &tmp_skip_sse);
2314 2560
2315 rd = RDCOST(x->rdmult, x->rddiv, rate_sum, dist_sum); 2561 rd = RDCOST(x->rdmult, x->rddiv, rate_sum, dist_sum);
2316 rd_opt->filter_cache[i] = rd; 2562 rd_opt->filter_cache[i] = rd;
2317 rd_opt->filter_cache[SWITCHABLE_FILTERS] = 2563 rd_opt->filter_cache[SWITCHABLE_FILTERS] =
2318 MIN(rd_opt->filter_cache[SWITCHABLE_FILTERS], rd + rs_rd); 2564 MIN(rd_opt->filter_cache[SWITCHABLE_FILTERS], rd + rs_rd);
2319 if (cm->interp_filter == SWITCHABLE) 2565 if (cm->interp_filter == SWITCHABLE)
2320 rd += rs_rd; 2566 rd += rs_rd;
2321 rd_opt->mask_filter = MAX(rd_opt->mask_filter, rd); 2567 rd_opt->mask_filter = MAX(rd_opt->mask_filter, rd);
2322 2568
2323 if (i == 0 && intpel_mv) { 2569 if (i == 0 && intpel_mv) {
2324 tmp_rate_sum = rate_sum; 2570 tmp_rate_sum = rate_sum;
2325 tmp_dist_sum = dist_sum; 2571 tmp_dist_sum = dist_sum;
2326 } 2572 }
2327 } 2573 }
2328 2574
2329 if (i == 0 && cpi->sf.use_rd_breakout && ref_best_rd < INT64_MAX) { 2575 if (i == 0 && cpi->sf.use_rd_breakout && ref_best_rd < INT64_MAX) {
2330 if (rd / 2 > ref_best_rd) { 2576 if (rd / 2 > ref_best_rd) {
2331 restore_dst_buf(xd, orig_dst, orig_dst_stride); 2577 restore_dst_buf(xd, orig_dst, orig_dst_stride);
2332 return INT64_MAX; 2578 return INT64_MAX;
2333 } 2579 }
2334 } 2580 }
2335 newbest = i == 0 || rd < best_rd; 2581 newbest = i == 0 || rd < best_rd;
2336 2582
2337 if (newbest) { 2583 if (newbest) {
2338 best_rd = rd; 2584 best_rd = rd;
2339 best_filter = mbmi->interp_filter; 2585 best_filter = mbmi->interp_filter;
2340 if (cm->interp_filter == SWITCHABLE && i && !intpel_mv) 2586 if (cm->interp_filter == SWITCHABLE && i && !intpel_mv)
2341 best_needs_copy = !best_needs_copy; 2587 best_needs_copy = !best_needs_copy;
2342 vpx_memcpy(skip_txfm, x->skip_txfm, sizeof(skip_txfm));
2343 vpx_memcpy(bsse, x->bsse, sizeof(bsse));
2344 } 2588 }
2345 2589
2346 if ((cm->interp_filter == SWITCHABLE && newbest) || 2590 if ((cm->interp_filter == SWITCHABLE && newbest) ||
2347 (cm->interp_filter != SWITCHABLE && 2591 (cm->interp_filter != SWITCHABLE &&
2348 cm->interp_filter == mbmi->interp_filter)) { 2592 cm->interp_filter == mbmi->interp_filter)) {
2349 pred_exists = 1; 2593 pred_exists = 1;
2350 tmp_rd = best_rd; 2594 tmp_rd = best_rd;
2595
2596 skip_txfm_sb = tmp_skip_sb;
2597 skip_sse_sb = tmp_skip_sse;
2598 vpx_memcpy(skip_txfm, x->skip_txfm, sizeof(skip_txfm));
2599 vpx_memcpy(bsse, x->bsse, sizeof(bsse));
2351 } 2600 }
2352 } 2601 }
2353 restore_dst_buf(xd, orig_dst, orig_dst_stride); 2602 restore_dst_buf(xd, orig_dst, orig_dst_stride);
2354 } 2603 }
2355 } 2604 }
2356 // Set the appropriate filter 2605 // Set the appropriate filter
2357 mbmi->interp_filter = cm->interp_filter != SWITCHABLE ? 2606 mbmi->interp_filter = cm->interp_filter != SWITCHABLE ?
2358 cm->interp_filter : best_filter; 2607 cm->interp_filter : best_filter;
2359 rs = cm->interp_filter == SWITCHABLE ? vp9_get_switchable_rate(cpi) : 0; 2608 rs = cm->interp_filter == SWITCHABLE ? vp9_get_switchable_rate(cpi) : 0;
2360 2609
2361 if (pred_exists) { 2610 if (pred_exists) {
2362 if (best_needs_copy) { 2611 if (best_needs_copy) {
2363 // again temporarily set the buffers to local memory to prevent a memcpy 2612 // again temporarily set the buffers to local memory to prevent a memcpy
2364 for (i = 0; i < MAX_MB_PLANE; i++) { 2613 for (i = 0; i < MAX_MB_PLANE; i++) {
2365 xd->plane[i].dst.buf = tmp_buf + i * 64 * 64; 2614 xd->plane[i].dst.buf = tmp_buf + i * 64 * 64;
2366 xd->plane[i].dst.stride = 64; 2615 xd->plane[i].dst.stride = 64;
2367 } 2616 }
2368 } 2617 }
2369 rd = tmp_rd + RDCOST(x->rdmult, x->rddiv, rs, 0); 2618 rd = tmp_rd + RDCOST(x->rdmult, x->rddiv, rs, 0);
2370 } else { 2619 } else {
2371 int tmp_rate; 2620 int tmp_rate;
2372 int64_t tmp_dist; 2621 int64_t tmp_dist;
2373 // Handles the special case when a filter that is not in the 2622 // Handles the special case when a filter that is not in the
2374 // switchable list (ex. bilinear) is indicated at the frame level, or 2623 // switchable list (ex. bilinear) is indicated at the frame level, or
2375 // skip condition holds. 2624 // skip condition holds.
2376 vp9_build_inter_predictors_sb(xd, mi_row, mi_col, bsize); 2625 vp9_build_inter_predictors_sb(xd, mi_row, mi_col, bsize);
2377 model_rd_for_sb(cpi, bsize, x, xd, &tmp_rate, &tmp_dist); 2626 model_rd_for_sb(cpi, bsize, x, xd, &tmp_rate, &tmp_dist,
2627 &skip_txfm_sb, &skip_sse_sb);
2378 rd = RDCOST(x->rdmult, x->rddiv, rs + tmp_rate, tmp_dist); 2628 rd = RDCOST(x->rdmult, x->rddiv, rs + tmp_rate, tmp_dist);
2379 vpx_memcpy(skip_txfm, x->skip_txfm, sizeof(skip_txfm)); 2629 vpx_memcpy(skip_txfm, x->skip_txfm, sizeof(skip_txfm));
2380 vpx_memcpy(bsse, x->bsse, sizeof(bsse)); 2630 vpx_memcpy(bsse, x->bsse, sizeof(bsse));
2381 } 2631 }
2382 2632
2383 if (!is_comp_pred) 2633 if (!is_comp_pred)
2384 single_filter[this_mode][refs[0]] = mbmi->interp_filter; 2634 single_filter[this_mode][refs[0]] = mbmi->interp_filter;
2385 2635
2386 if (cpi->sf.adaptive_mode_search) 2636 if (cpi->sf.adaptive_mode_search)
2387 if (is_comp_pred) 2637 if (is_comp_pred)
2388 if (single_skippable[this_mode][refs[0]] && 2638 if (single_skippable[this_mode][refs[0]] &&
2389 single_skippable[this_mode][refs[1]]) 2639 single_skippable[this_mode][refs[1]])
2390 vpx_memset(skip_txfm, 1, sizeof(skip_txfm)); 2640 vpx_memset(skip_txfm, 1, sizeof(skip_txfm));
2391 2641
2392 if (cpi->sf.use_rd_breakout && ref_best_rd < INT64_MAX) { 2642 if (cpi->sf.use_rd_breakout && ref_best_rd < INT64_MAX) {
2393 // if current pred_error modeled rd is substantially more than the best 2643 // if current pred_error modeled rd is substantially more than the best
2394 // so far, do not bother doing full rd 2644 // so far, do not bother doing full rd
2395 if (rd / 2 > ref_best_rd) { 2645 if (rd / 2 > ref_best_rd) {
2396 restore_dst_buf(xd, orig_dst, orig_dst_stride); 2646 restore_dst_buf(xd, orig_dst, orig_dst_stride);
2397 return INT64_MAX; 2647 return INT64_MAX;
2398 } 2648 }
2399 } 2649 }
2400 2650
2401 if (cm->interp_filter == SWITCHABLE) 2651 if (cm->interp_filter == SWITCHABLE)
2402 *rate2 += rs; 2652 *rate2 += rs;
2403 2653
2404 if (!is_comp_pred) {
2405 if (cpi->allow_encode_breakout)
2406 rd_encode_breakout_test(cpi, x, bsize, rate2, distortion, distortion_uv,
2407 disable_skip);
2408 }
2409
2410 vpx_memcpy(x->skip_txfm, skip_txfm, sizeof(skip_txfm)); 2654 vpx_memcpy(x->skip_txfm, skip_txfm, sizeof(skip_txfm));
2411 vpx_memcpy(x->bsse, bsse, sizeof(bsse)); 2655 vpx_memcpy(x->bsse, bsse, sizeof(bsse));
2412 2656
2413 if (!x->skip) { 2657 if (!skip_txfm_sb) {
2414 int skippable_y, skippable_uv; 2658 int skippable_y, skippable_uv;
2415 int64_t sseuv = INT64_MAX; 2659 int64_t sseuv = INT64_MAX;
2416 int64_t rdcosty = INT64_MAX; 2660 int64_t rdcosty = INT64_MAX;
2417 2661
2418 // Y cost and distortion 2662 // Y cost and distortion
2419 vp9_subtract_plane(x, bsize, 0); 2663 vp9_subtract_plane(x, bsize, 0);
2420 super_block_yrd(cpi, x, rate_y, distortion_y, &skippable_y, psse, 2664 super_block_yrd(cpi, x, rate_y, &distortion_y, &skippable_y, psse,
2421 bsize, txfm_cache, ref_best_rd); 2665 bsize, txfm_cache, ref_best_rd);
2422 2666
2423 if (*rate_y == INT_MAX) { 2667 if (*rate_y == INT_MAX) {
2424 *rate2 = INT_MAX; 2668 *rate2 = INT_MAX;
2425 *distortion = INT64_MAX; 2669 *distortion = INT64_MAX;
2426 restore_dst_buf(xd, orig_dst, orig_dst_stride); 2670 restore_dst_buf(xd, orig_dst, orig_dst_stride);
2427 return INT64_MAX; 2671 return INT64_MAX;
2428 } 2672 }
2429 2673
2430 *rate2 += *rate_y; 2674 *rate2 += *rate_y;
2431 *distortion += *distortion_y; 2675 *distortion += distortion_y;
2432 2676
2433 rdcosty = RDCOST(x->rdmult, x->rddiv, *rate2, *distortion); 2677 rdcosty = RDCOST(x->rdmult, x->rddiv, *rate2, *distortion);
2434 rdcosty = MIN(rdcosty, RDCOST(x->rdmult, x->rddiv, 0, *psse)); 2678 rdcosty = MIN(rdcosty, RDCOST(x->rdmult, x->rddiv, 0, *psse));
2435 2679
2436 super_block_uvrd(cpi, x, rate_uv, distortion_uv, &skippable_uv, &sseuv, 2680 if (!super_block_uvrd(cpi, x, rate_uv, &distortion_uv, &skippable_uv,
2437 bsize, ref_best_rd - rdcosty); 2681 &sseuv, bsize, ref_best_rd - rdcosty)) {
2438 if (*rate_uv == INT_MAX) {
2439 *rate2 = INT_MAX; 2682 *rate2 = INT_MAX;
2440 *distortion = INT64_MAX; 2683 *distortion = INT64_MAX;
2441 restore_dst_buf(xd, orig_dst, orig_dst_stride); 2684 restore_dst_buf(xd, orig_dst, orig_dst_stride);
2442 return INT64_MAX; 2685 return INT64_MAX;
2443 } 2686 }
2444 2687
2445 *psse += sseuv; 2688 *psse += sseuv;
2446 *rate2 += *rate_uv; 2689 *rate2 += *rate_uv;
2447 *distortion += *distortion_uv; 2690 *distortion += distortion_uv;
2448 *skippable = skippable_y && skippable_uv; 2691 *skippable = skippable_y && skippable_uv;
2692 } else {
2693 x->skip = 1;
2694 *disable_skip = 1;
2695
2696 // The cost of skip bit needs to be added.
2697 *rate2 += vp9_cost_bit(vp9_get_skip_prob(cm, xd), 1);
2698
2699 *distortion = skip_sse_sb;
2449 } 2700 }
2450 2701
2451 if (!is_comp_pred) 2702 if (!is_comp_pred)
2452 single_skippable[this_mode][refs[0]] = *skippable; 2703 single_skippable[this_mode][refs[0]] = *skippable;
2453 2704
2454 restore_dst_buf(xd, orig_dst, orig_dst_stride); 2705 restore_dst_buf(xd, orig_dst, orig_dst_stride);
2455 return this_rd; // if 0, this will be re-calculated by caller 2706 return 0; // The rate-distortion cost will be re-calculated by caller.
2456 } 2707 }
2457 2708
2458 void vp9_rd_pick_intra_mode_sb(VP9_COMP *cpi, MACROBLOCK *x, 2709 void vp9_rd_pick_intra_mode_sb(VP9_COMP *cpi, MACROBLOCK *x,
2459 int *returnrate, int64_t *returndist, 2710 RD_COST *rd_cost, BLOCK_SIZE bsize,
2460 BLOCK_SIZE bsize,
2461 PICK_MODE_CONTEXT *ctx, int64_t best_rd) { 2711 PICK_MODE_CONTEXT *ctx, int64_t best_rd) {
2462 VP9_COMMON *const cm = &cpi->common; 2712 VP9_COMMON *const cm = &cpi->common;
2463 MACROBLOCKD *const xd = &x->e_mbd; 2713 MACROBLOCKD *const xd = &x->e_mbd;
2464 struct macroblockd_plane *const pd = xd->plane; 2714 struct macroblockd_plane *const pd = xd->plane;
2465 int rate_y = 0, rate_uv = 0, rate_y_tokenonly = 0, rate_uv_tokenonly = 0; 2715 int rate_y = 0, rate_uv = 0, rate_y_tokenonly = 0, rate_uv_tokenonly = 0;
2466 int y_skip = 0, uv_skip = 0; 2716 int y_skip = 0, uv_skip = 0;
2467 int64_t dist_y = 0, dist_uv = 0, tx_cache[TX_MODES] = { 0 }; 2717 int64_t dist_y = 0, dist_uv = 0, tx_cache[TX_MODES] = { 0 };
2468 TX_SIZE max_uv_tx_size; 2718 TX_SIZE max_uv_tx_size;
2469 x->skip_encode = 0; 2719 x->skip_encode = 0;
2470 ctx->skip = 0; 2720 ctx->skip = 0;
2471 xd->mi[0].src_mi->mbmi.ref_frame[0] = INTRA_FRAME; 2721 xd->mi[0].src_mi->mbmi.ref_frame[0] = INTRA_FRAME;
2472 2722
2473 if (bsize >= BLOCK_8X8) { 2723 if (bsize >= BLOCK_8X8) {
2474 if (rd_pick_intra_sby_mode(cpi, x, &rate_y, &rate_y_tokenonly, 2724 if (rd_pick_intra_sby_mode(cpi, x, &rate_y, &rate_y_tokenonly,
2475 &dist_y, &y_skip, bsize, tx_cache, 2725 &dist_y, &y_skip, bsize, tx_cache,
2476 best_rd) >= best_rd) { 2726 best_rd) >= best_rd) {
2477 *returnrate = INT_MAX; 2727 rd_cost->rate = INT_MAX;
2478 return; 2728 return;
2479 } 2729 }
2480 max_uv_tx_size = get_uv_tx_size_impl(xd->mi[0].src_mi->mbmi.tx_size, bsize,
2481 pd[1].subsampling_x,
2482 pd[1].subsampling_y);
2483 rd_pick_intra_sbuv_mode(cpi, x, ctx, &rate_uv, &rate_uv_tokenonly,
2484 &dist_uv, &uv_skip, bsize, max_uv_tx_size);
2485 } else { 2730 } else {
2486 y_skip = 0; 2731 y_skip = 0;
2487 if (rd_pick_intra_sub_8x8_y_mode(cpi, x, &rate_y, &rate_y_tokenonly, 2732 if (rd_pick_intra_sub_8x8_y_mode(cpi, x, &rate_y, &rate_y_tokenonly,
2488 &dist_y, best_rd) >= best_rd) { 2733 &dist_y, best_rd) >= best_rd) {
2489 *returnrate = INT_MAX; 2734 rd_cost->rate = INT_MAX;
2490 return; 2735 return;
2491 } 2736 }
2492 max_uv_tx_size = get_uv_tx_size_impl(xd->mi[0].src_mi->mbmi.tx_size, bsize,
2493 pd[1].subsampling_x,
2494 pd[1].subsampling_y);
2495 rd_pick_intra_sbuv_mode(cpi, x, ctx, &rate_uv, &rate_uv_tokenonly,
2496 &dist_uv, &uv_skip, BLOCK_8X8, max_uv_tx_size);
2497 } 2737 }
2738 max_uv_tx_size = get_uv_tx_size_impl(xd->mi[0].src_mi->mbmi.tx_size, bsize,
2739 pd[1].subsampling_x,
2740 pd[1].subsampling_y);
2741 rd_pick_intra_sbuv_mode(cpi, x, ctx, &rate_uv, &rate_uv_tokenonly,
2742 &dist_uv, &uv_skip, MAX(BLOCK_8X8, bsize),
2743 max_uv_tx_size);
2498 2744
2499 if (y_skip && uv_skip) { 2745 if (y_skip && uv_skip) {
2500 *returnrate = rate_y + rate_uv - rate_y_tokenonly - rate_uv_tokenonly + 2746 rd_cost->rate = rate_y + rate_uv - rate_y_tokenonly - rate_uv_tokenonly +
2501 vp9_cost_bit(vp9_get_skip_prob(cm, xd), 1); 2747 vp9_cost_bit(vp9_get_skip_prob(cm, xd), 1);
2502 *returndist = dist_y + dist_uv; 2748 rd_cost->dist = dist_y + dist_uv;
2503 vp9_zero(ctx->tx_rd_diff); 2749 vp9_zero(ctx->tx_rd_diff);
2504 } else { 2750 } else {
2505 int i; 2751 int i;
2506 *returnrate = rate_y + rate_uv + vp9_cost_bit(vp9_get_skip_prob(cm, xd), 0); 2752 rd_cost->rate = rate_y + rate_uv +
2507 *returndist = dist_y + dist_uv; 2753 vp9_cost_bit(vp9_get_skip_prob(cm, xd), 0);
2754 rd_cost->dist = dist_y + dist_uv;
2508 if (cpi->sf.tx_size_search_method == USE_FULL_RD) 2755 if (cpi->sf.tx_size_search_method == USE_FULL_RD)
2509 for (i = 0; i < TX_MODES; i++) { 2756 for (i = 0; i < TX_MODES; i++) {
2510 if (tx_cache[i] < INT64_MAX && tx_cache[cm->tx_mode] < INT64_MAX) 2757 if (tx_cache[i] < INT64_MAX && tx_cache[cm->tx_mode] < INT64_MAX)
2511 ctx->tx_rd_diff[i] = tx_cache[i] - tx_cache[cm->tx_mode]; 2758 ctx->tx_rd_diff[i] = tx_cache[i] - tx_cache[cm->tx_mode];
2512 else 2759 else
2513 ctx->tx_rd_diff[i] = 0; 2760 ctx->tx_rd_diff[i] = 0;
2514 } 2761 }
2515 } 2762 }
2516 2763
2517 ctx->mic = *xd->mi[0].src_mi; 2764 ctx->mic = *xd->mi[0].src_mi;
2765 rd_cost->rdcost = RDCOST(x->rdmult, x->rddiv, rd_cost->rate, rd_cost->dist);
2518 } 2766 }
2519 2767
2520 // Updating rd_thresh_freq_fact[] here means that the different
2521 // partition/block sizes are handled independently based on the best
2522 // choice for the current partition. It may well be better to keep a scaled
2523 // best rd so far value and update rd_thresh_freq_fact based on the mode/size
2524 // combination that wins out.
2525 static void update_rd_thresh_fact(VP9_COMP *cpi, int bsize, 2768 static void update_rd_thresh_fact(VP9_COMP *cpi, int bsize,
2526 int best_mode_index) { 2769 int best_mode_index) {
2527 if (cpi->sf.adaptive_rd_thresh > 0) { 2770 if (cpi->sf.adaptive_rd_thresh > 0) {
2528 const int top_mode = bsize < BLOCK_8X8 ? MAX_REFS : MAX_MODES; 2771 const int top_mode = bsize < BLOCK_8X8 ? MAX_REFS : MAX_MODES;
2529 int mode; 2772 int mode;
2530 for (mode = 0; mode < top_mode; ++mode) { 2773 for (mode = 0; mode < top_mode; ++mode) {
2531 int *const fact = &cpi->rd.thresh_freq_fact[bsize][mode]; 2774 const BLOCK_SIZE min_size = MAX(bsize - 1, BLOCK_4X4);
2532 2775 const BLOCK_SIZE max_size = MIN(bsize + 2, BLOCK_64X64);
2533 if (mode == best_mode_index) { 2776 BLOCK_SIZE bs;
2534 *fact -= (*fact >> 3); 2777 for (bs = min_size; bs <= max_size; ++bs) {
2535 } else { 2778 int *const fact = &cpi->rd.thresh_freq_fact[bs][mode];
2536 *fact = MIN(*fact + RD_THRESH_INC, 2779 if (mode == best_mode_index) {
2537 cpi->sf.adaptive_rd_thresh * RD_THRESH_MAX_FACT); 2780 *fact -= (*fact >> 4);
2781 } else {
2782 *fact = MIN(*fact + RD_THRESH_INC,
2783 cpi->sf.adaptive_rd_thresh * RD_THRESH_MAX_FACT);
2784 }
2538 } 2785 }
2539 } 2786 }
2540 } 2787 }
2541 } 2788 }
2542 2789
2543 int64_t vp9_rd_pick_inter_mode_sb(VP9_COMP *cpi, MACROBLOCK *x, 2790 void vp9_rd_pick_inter_mode_sb(VP9_COMP *cpi, MACROBLOCK *x,
2544 const TileInfo *const tile, 2791 const TileInfo *const tile,
2545 int mi_row, int mi_col, 2792 int mi_row, int mi_col,
2546 int *returnrate, 2793 RD_COST *rd_cost, BLOCK_SIZE bsize,
2547 int64_t *returndistortion, 2794 PICK_MODE_CONTEXT *ctx,
2548 BLOCK_SIZE bsize, 2795 int64_t best_rd_so_far) {
2549 PICK_MODE_CONTEXT *ctx,
2550 int64_t best_rd_so_far) {
2551 VP9_COMMON *const cm = &cpi->common; 2796 VP9_COMMON *const cm = &cpi->common;
2552 RD_OPT *const rd_opt = &cpi->rd; 2797 RD_OPT *const rd_opt = &cpi->rd;
2798 SPEED_FEATURES *const sf = &cpi->sf;
2553 MACROBLOCKD *const xd = &x->e_mbd; 2799 MACROBLOCKD *const xd = &x->e_mbd;
2554 MB_MODE_INFO *const mbmi = &xd->mi[0].src_mi->mbmi; 2800 MB_MODE_INFO *const mbmi = &xd->mi[0].src_mi->mbmi;
2555 const struct segmentation *const seg = &cm->seg; 2801 const struct segmentation *const seg = &cm->seg;
2556 struct macroblockd_plane *const pd = xd->plane;
2557 PREDICTION_MODE this_mode; 2802 PREDICTION_MODE this_mode;
2558 MV_REFERENCE_FRAME ref_frame, second_ref_frame; 2803 MV_REFERENCE_FRAME ref_frame, second_ref_frame;
2559 unsigned char segment_id = mbmi->segment_id; 2804 unsigned char segment_id = mbmi->segment_id;
2560 int comp_pred, i, k; 2805 int comp_pred, i, k;
2561 int_mv frame_mv[MB_MODE_COUNT][MAX_REF_FRAMES]; 2806 int_mv frame_mv[MB_MODE_COUNT][MAX_REF_FRAMES];
2562 struct buf_2d yv12_mb[4][MAX_MB_PLANE]; 2807 struct buf_2d yv12_mb[4][MAX_MB_PLANE];
2563 int_mv single_newmv[MAX_REF_FRAMES] = { { 0 } }; 2808 int_mv single_newmv[MAX_REF_FRAMES] = { { 0 } };
2564 INTERP_FILTER single_inter_filter[MB_MODE_COUNT][MAX_REF_FRAMES]; 2809 INTERP_FILTER single_inter_filter[MB_MODE_COUNT][MAX_REF_FRAMES];
2565 int single_skippable[MB_MODE_COUNT][MAX_REF_FRAMES]; 2810 int single_skippable[MB_MODE_COUNT][MAX_REF_FRAMES];
2566 static const int flag_list[4] = { 0, VP9_LAST_FLAG, VP9_GOLD_FLAG, 2811 static const int flag_list[4] = { 0, VP9_LAST_FLAG, VP9_GOLD_FLAG,
(...skipping 10 matching lines...) Expand all
2577 int midx, best_mode_index = -1; 2822 int midx, best_mode_index = -1;
2578 unsigned int ref_costs_single[MAX_REF_FRAMES], ref_costs_comp[MAX_REF_FRAMES]; 2823 unsigned int ref_costs_single[MAX_REF_FRAMES], ref_costs_comp[MAX_REF_FRAMES];
2579 vp9_prob comp_mode_p; 2824 vp9_prob comp_mode_p;
2580 int64_t best_intra_rd = INT64_MAX; 2825 int64_t best_intra_rd = INT64_MAX;
2581 unsigned int best_pred_sse = UINT_MAX; 2826 unsigned int best_pred_sse = UINT_MAX;
2582 PREDICTION_MODE best_intra_mode = DC_PRED; 2827 PREDICTION_MODE best_intra_mode = DC_PRED;
2583 int rate_uv_intra[TX_SIZES], rate_uv_tokenonly[TX_SIZES]; 2828 int rate_uv_intra[TX_SIZES], rate_uv_tokenonly[TX_SIZES];
2584 int64_t dist_uv[TX_SIZES]; 2829 int64_t dist_uv[TX_SIZES];
2585 int skip_uv[TX_SIZES]; 2830 int skip_uv[TX_SIZES];
2586 PREDICTION_MODE mode_uv[TX_SIZES]; 2831 PREDICTION_MODE mode_uv[TX_SIZES];
2587 const int intra_cost_penalty = 2832 const int intra_cost_penalty = vp9_get_intra_cost_penalty(
2588 20 * vp9_dc_quant(cm->base_qindex, cm->y_dc_delta_q, cm->bit_depth); 2833 cm->base_qindex, cm->y_dc_delta_q, cm->bit_depth);
2589 int best_skip2 = 0; 2834 int best_skip2 = 0;
2590 uint8_t ref_frame_skip_mask[2] = { 0 }; 2835 uint8_t ref_frame_skip_mask[2] = { 0 };
2591 uint16_t mode_skip_mask[MAX_REF_FRAMES] = { 0 }; 2836 uint16_t mode_skip_mask[MAX_REF_FRAMES] = { 0 };
2592 int mode_skip_start = cpi->sf.mode_skip_start + 1; 2837 int mode_skip_start = sf->mode_skip_start + 1;
2593 const int *const rd_threshes = rd_opt->threshes[segment_id][bsize]; 2838 const int *const rd_threshes = rd_opt->threshes[segment_id][bsize];
2594 const int *const rd_thresh_freq_fact = rd_opt->thresh_freq_fact[bsize]; 2839 const int *const rd_thresh_freq_fact = rd_opt->thresh_freq_fact[bsize];
2595 int mode_threshold[MAX_MODES]; 2840 int64_t mode_threshold[MAX_MODES];
2596 int *mode_map = rd_opt->mode_map[bsize]; 2841 int *mode_map = rd_opt->mode_map[bsize];
2597 const int mode_search_skip_flags = cpi->sf.mode_search_skip_flags; 2842 const int mode_search_skip_flags = sf->mode_search_skip_flags;
2598 vp9_zero(best_mbmode); 2843 vp9_zero(best_mbmode);
2599 2844
2600 x->skip_encode = cpi->sf.skip_encode_frame && x->q_index < QIDX_SKIP_THRESH; 2845 x->skip_encode = sf->skip_encode_frame && x->q_index < QIDX_SKIP_THRESH;
2601 2846
2602 estimate_ref_frame_costs(cm, xd, segment_id, ref_costs_single, ref_costs_comp, 2847 estimate_ref_frame_costs(cm, xd, segment_id, ref_costs_single, ref_costs_comp,
2603 &comp_mode_p); 2848 &comp_mode_p);
2604 2849
2605 for (i = 0; i < REFERENCE_MODES; ++i) 2850 for (i = 0; i < REFERENCE_MODES; ++i)
2606 best_pred_rd[i] = INT64_MAX; 2851 best_pred_rd[i] = INT64_MAX;
2607 for (i = 0; i < TX_MODES; i++) 2852 for (i = 0; i < TX_MODES; i++)
2608 best_tx_rd[i] = INT64_MAX; 2853 best_tx_rd[i] = INT64_MAX;
2609 for (i = 0; i < SWITCHABLE_FILTER_CONTEXTS; i++) 2854 for (i = 0; i < SWITCHABLE_FILTER_CONTEXTS; i++)
2610 best_filter_rd[i] = INT64_MAX; 2855 best_filter_rd[i] = INT64_MAX;
2611 for (i = 0; i < TX_SIZES; i++) 2856 for (i = 0; i < TX_SIZES; i++)
2612 rate_uv_intra[i] = INT_MAX; 2857 rate_uv_intra[i] = INT_MAX;
2613 for (i = 0; i < MAX_REF_FRAMES; ++i) 2858 for (i = 0; i < MAX_REF_FRAMES; ++i)
2614 x->pred_sse[i] = INT_MAX; 2859 x->pred_sse[i] = INT_MAX;
2615 for (i = 0; i < MB_MODE_COUNT; ++i) { 2860 for (i = 0; i < MB_MODE_COUNT; ++i) {
2616 for (k = 0; k < MAX_REF_FRAMES; ++k) { 2861 for (k = 0; k < MAX_REF_FRAMES; ++k) {
2617 single_inter_filter[i][k] = SWITCHABLE; 2862 single_inter_filter[i][k] = SWITCHABLE;
2618 single_skippable[i][k] = 0; 2863 single_skippable[i][k] = 0;
2619 } 2864 }
2620 } 2865 }
2621 2866
2622 *returnrate = INT_MAX; 2867 rd_cost->rate = INT_MAX;
2623 2868
2624 for (ref_frame = LAST_FRAME; ref_frame <= ALTREF_FRAME; ++ref_frame) { 2869 for (ref_frame = LAST_FRAME; ref_frame <= ALTREF_FRAME; ++ref_frame) {
2625 x->pred_mv_sad[ref_frame] = INT_MAX; 2870 x->pred_mv_sad[ref_frame] = INT_MAX;
2626 if (cpi->ref_frame_flags & flag_list[ref_frame]) { 2871 if (cpi->ref_frame_flags & flag_list[ref_frame]) {
2627 setup_buffer_inter(cpi, x, tile, ref_frame, bsize, mi_row, mi_col, 2872 setup_buffer_inter(cpi, x, tile, ref_frame, bsize, mi_row, mi_col,
2628 frame_mv[NEARESTMV], frame_mv[NEARMV], yv12_mb); 2873 frame_mv[NEARESTMV], frame_mv[NEARMV], yv12_mb);
2629 } 2874 }
2630 frame_mv[NEWMV][ref_frame].as_int = INVALID_MV; 2875 frame_mv[NEWMV][ref_frame].as_int = INVALID_MV;
2631 frame_mv[ZEROMV][ref_frame].as_int = 0; 2876 frame_mv[ZEROMV][ref_frame].as_int = 0;
2632 } 2877 }
2633 2878
2634 for (ref_frame = LAST_FRAME; ref_frame <= ALTREF_FRAME; ++ref_frame) { 2879 for (ref_frame = LAST_FRAME; ref_frame <= ALTREF_FRAME; ++ref_frame) {
2635 if (!(cpi->ref_frame_flags & flag_list[ref_frame])) { 2880 if (!(cpi->ref_frame_flags & flag_list[ref_frame])) {
2636 // Skip checking missing references in both single and compound reference 2881 // Skip checking missing references in both single and compound reference
2637 // modes. Note that a mode will be skipped iff both reference frames 2882 // modes. Note that a mode will be skipped iff both reference frames
2638 // are masked out. 2883 // are masked out.
2639 ref_frame_skip_mask[0] |= (1 << ref_frame); 2884 ref_frame_skip_mask[0] |= (1 << ref_frame);
2640 ref_frame_skip_mask[1] |= SECOND_REF_FRAME_MASK; 2885 ref_frame_skip_mask[1] |= SECOND_REF_FRAME_MASK;
2641 } else if (cpi->sf.reference_masking) { 2886 } else if (sf->reference_masking) {
2642 for (i = LAST_FRAME; i <= ALTREF_FRAME; ++i) { 2887 for (i = LAST_FRAME; i <= ALTREF_FRAME; ++i) {
2643 // Skip fixed mv modes for poor references 2888 // Skip fixed mv modes for poor references
2644 if ((x->pred_mv_sad[ref_frame] >> 2) > x->pred_mv_sad[i]) { 2889 if ((x->pred_mv_sad[ref_frame] >> 2) > x->pred_mv_sad[i]) {
2645 mode_skip_mask[ref_frame] |= INTER_NEAREST_NEAR_ZERO; 2890 mode_skip_mask[ref_frame] |= INTER_NEAREST_NEAR_ZERO;
2646 break; 2891 break;
2647 } 2892 }
2648 } 2893 }
2649 } 2894 }
2650 // If the segment reference frame feature is enabled.... 2895 // If the segment reference frame feature is enabled....
2651 // then do nothing if the current ref frame is not allowed.. 2896 // then do nothing if the current ref frame is not allowed..
(...skipping 17 matching lines...) Expand all
2669 ref_frame_skip_mask[1] = SECOND_REF_FRAME_MASK; 2914 ref_frame_skip_mask[1] = SECOND_REF_FRAME_MASK;
2670 mode_skip_mask[ALTREF_FRAME] = ~INTER_NEAREST_NEAR_ZERO; 2915 mode_skip_mask[ALTREF_FRAME] = ~INTER_NEAREST_NEAR_ZERO;
2671 if (frame_mv[NEARMV][ALTREF_FRAME].as_int != 0) 2916 if (frame_mv[NEARMV][ALTREF_FRAME].as_int != 0)
2672 mode_skip_mask[ALTREF_FRAME] |= (1 << NEARMV); 2917 mode_skip_mask[ALTREF_FRAME] |= (1 << NEARMV);
2673 if (frame_mv[NEARESTMV][ALTREF_FRAME].as_int != 0) 2918 if (frame_mv[NEARESTMV][ALTREF_FRAME].as_int != 0)
2674 mode_skip_mask[ALTREF_FRAME] |= (1 << NEARESTMV); 2919 mode_skip_mask[ALTREF_FRAME] |= (1 << NEARESTMV);
2675 } 2920 }
2676 } 2921 }
2677 2922
2678 if (cpi->rc.is_src_frame_alt_ref) { 2923 if (cpi->rc.is_src_frame_alt_ref) {
2679 if (cpi->sf.alt_ref_search_fp) { 2924 if (sf->alt_ref_search_fp) {
2680 mode_skip_mask[ALTREF_FRAME] = 0; 2925 mode_skip_mask[ALTREF_FRAME] = 0;
2681 ref_frame_skip_mask[0] = ~(1 << ALTREF_FRAME); 2926 ref_frame_skip_mask[0] = ~(1 << ALTREF_FRAME);
2682 ref_frame_skip_mask[1] = SECOND_REF_FRAME_MASK; 2927 ref_frame_skip_mask[1] = SECOND_REF_FRAME_MASK;
2683 } 2928 }
2684 } 2929 }
2685 2930
2686 if (bsize > cpi->sf.max_intra_bsize) { 2931 if (sf->alt_ref_search_fp)
2932 if (!cm->show_frame && x->pred_mv_sad[GOLDEN_FRAME] < INT_MAX)
2933 if (x->pred_mv_sad[ALTREF_FRAME] > (x->pred_mv_sad[GOLDEN_FRAME] << 1))
2934 mode_skip_mask[ALTREF_FRAME] |= INTER_ALL;
2935
2936 if (sf->adaptive_mode_search) {
2937 if (cm->show_frame && !cpi->rc.is_src_frame_alt_ref &&
2938 cpi->rc.frames_since_golden >= 3)
2939 if (x->pred_mv_sad[GOLDEN_FRAME] > (x->pred_mv_sad[LAST_FRAME] << 1))
2940 mode_skip_mask[GOLDEN_FRAME] |= INTER_ALL;
2941 }
2942
2943 if (bsize > sf->max_intra_bsize) {
2687 ref_frame_skip_mask[0] |= (1 << INTRA_FRAME); 2944 ref_frame_skip_mask[0] |= (1 << INTRA_FRAME);
2688 ref_frame_skip_mask[1] |= (1 << INTRA_FRAME); 2945 ref_frame_skip_mask[1] |= (1 << INTRA_FRAME);
2689 } 2946 }
2690 2947
2691 mode_skip_mask[INTRA_FRAME] |= 2948 mode_skip_mask[INTRA_FRAME] |=
2692 ~(cpi->sf.intra_y_mode_mask[max_txsize_lookup[bsize]]); 2949 ~(sf->intra_y_mode_mask[max_txsize_lookup[bsize]]);
2693 2950
2694 for (i = 0; i < MAX_MODES; ++i) 2951 for (i = 0; i < MAX_MODES; ++i)
2695 mode_threshold[i] = ((int64_t)rd_threshes[i] * rd_thresh_freq_fact[i]) >> 5; 2952 mode_threshold[i] = ((int64_t)rd_threshes[i] * rd_thresh_freq_fact[i]) >> 5;
2696 2953
2697 midx = cpi->sf.schedule_mode_search ? mode_skip_start : 0; 2954 midx = sf->schedule_mode_search ? mode_skip_start : 0;
2698 while (midx > 4) { 2955 while (midx > 4) {
2699 uint8_t end_pos = 0; 2956 uint8_t end_pos = 0;
2700 for (i = 5; i < midx; ++i) { 2957 for (i = 5; i < midx; ++i) {
2701 if (mode_threshold[mode_map[i - 1]] > mode_threshold[mode_map[i]]) { 2958 if (mode_threshold[mode_map[i - 1]] > mode_threshold[mode_map[i]]) {
2702 uint8_t tmp = mode_map[i]; 2959 uint8_t tmp = mode_map[i];
2703 mode_map[i] = mode_map[i - 1]; 2960 mode_map[i] = mode_map[i - 1];
2704 mode_map[i - 1] = tmp; 2961 mode_map[i - 1] = tmp;
2705 end_pos = i; 2962 end_pos = i;
2706 } 2963 }
2707 } 2964 }
(...skipping 43 matching lines...) Expand 10 before | Expand all | Expand 10 after
2751 } 3008 }
2752 3009
2753 if (ref_frame_skip_mask[0] & (1 << ref_frame) && 3010 if (ref_frame_skip_mask[0] & (1 << ref_frame) &&
2754 ref_frame_skip_mask[1] & (1 << MAX(0, second_ref_frame))) 3011 ref_frame_skip_mask[1] & (1 << MAX(0, second_ref_frame)))
2755 continue; 3012 continue;
2756 3013
2757 if (mode_skip_mask[ref_frame] & (1 << this_mode)) 3014 if (mode_skip_mask[ref_frame] & (1 << this_mode))
2758 continue; 3015 continue;
2759 3016
2760 // Test best rd so far against threshold for trying this mode. 3017 // Test best rd so far against threshold for trying this mode.
2761 if (best_mode_skippable && cpi->sf.schedule_mode_search) 3018 if (best_mode_skippable && sf->schedule_mode_search)
2762 mode_threshold[mode_index] <<= 1; 3019 mode_threshold[mode_index] <<= 1;
2763 3020
2764 if (best_rd < mode_threshold[mode_index]) 3021 if (best_rd < mode_threshold[mode_index])
2765 continue; 3022 continue;
2766 3023
2767 if (cpi->sf.motion_field_mode_search) { 3024 if (sf->motion_field_mode_search) {
2768 const int mi_width = MIN(num_8x8_blocks_wide_lookup[bsize], 3025 const int mi_width = MIN(num_8x8_blocks_wide_lookup[bsize],
2769 tile->mi_col_end - mi_col); 3026 tile->mi_col_end - mi_col);
2770 const int mi_height = MIN(num_8x8_blocks_high_lookup[bsize], 3027 const int mi_height = MIN(num_8x8_blocks_high_lookup[bsize],
2771 tile->mi_row_end - mi_row); 3028 tile->mi_row_end - mi_row);
2772 const int bsl = mi_width_log2(bsize); 3029 const int bsl = mi_width_log2_lookup[bsize];
2773 int cb_partition_search_ctrl = (((mi_row + mi_col) >> bsl) 3030 int cb_partition_search_ctrl = (((mi_row + mi_col) >> bsl)
2774 + get_chessboard_index(cm->current_video_frame)) & 0x1; 3031 + get_chessboard_index(cm->current_video_frame)) & 0x1;
2775 MB_MODE_INFO *ref_mbmi; 3032 MB_MODE_INFO *ref_mbmi;
2776 int const_motion = 1; 3033 int const_motion = 1;
2777 int skip_ref_frame = !cb_partition_search_ctrl; 3034 int skip_ref_frame = !cb_partition_search_ctrl;
2778 MV_REFERENCE_FRAME rf = NONE; 3035 MV_REFERENCE_FRAME rf = NONE;
2779 int_mv ref_mv; 3036 int_mv ref_mv;
2780 ref_mv.as_int = INVALID_MV; 3037 ref_mv.as_int = INVALID_MV;
2781 3038
2782 if ((mi_row - 1) >= tile->mi_row_start) { 3039 if ((mi_row - 1) >= tile->mi_row_start) {
(...skipping 48 matching lines...) Expand 10 before | Expand all | Expand 10 after
2831 best_mode_index >= 0 && best_mbmode.ref_frame[0] == INTRA_FRAME) 3088 best_mode_index >= 0 && best_mbmode.ref_frame[0] == INTRA_FRAME)
2832 continue; 3089 continue;
2833 3090
2834 mode_excluded = cm->reference_mode == SINGLE_REFERENCE; 3091 mode_excluded = cm->reference_mode == SINGLE_REFERENCE;
2835 } else { 3092 } else {
2836 if (ref_frame != INTRA_FRAME) 3093 if (ref_frame != INTRA_FRAME)
2837 mode_excluded = cm->reference_mode == COMPOUND_REFERENCE; 3094 mode_excluded = cm->reference_mode == COMPOUND_REFERENCE;
2838 } 3095 }
2839 3096
2840 if (ref_frame == INTRA_FRAME) { 3097 if (ref_frame == INTRA_FRAME) {
2841 if (cpi->sf.adaptive_mode_search) 3098 if (sf->adaptive_mode_search)
2842 if ((x->source_variance << num_pels_log2_lookup[bsize]) > best_pred_sse) 3099 if ((x->source_variance << num_pels_log2_lookup[bsize]) > best_pred_sse)
2843 continue; 3100 continue;
2844 3101
2845 if (this_mode != DC_PRED) { 3102 if (this_mode != DC_PRED) {
2846 // Disable intra modes other than DC_PRED for blocks with low variance 3103 // Disable intra modes other than DC_PRED for blocks with low variance
2847 // Threshold for intra skipping based on source variance 3104 // Threshold for intra skipping based on source variance
2848 // TODO(debargha): Specialize the threshold for super block sizes 3105 // TODO(debargha): Specialize the threshold for super block sizes
2849 const unsigned int skip_intra_var_thresh = 64; 3106 const unsigned int skip_intra_var_thresh = 64;
2850 if ((mode_search_skip_flags & FLAG_SKIP_INTRA_LOWVAR) && 3107 if ((mode_search_skip_flags & FLAG_SKIP_INTRA_LOWVAR) &&
2851 x->source_variance < skip_intra_var_thresh) 3108 x->source_variance < skip_intra_var_thresh)
(...skipping 36 matching lines...) Expand 10 before | Expand all | Expand 10 after
2888 xd->plane[i].pre[0] = yv12_mb[ref_frame][i]; 3145 xd->plane[i].pre[0] = yv12_mb[ref_frame][i];
2889 if (comp_pred) 3146 if (comp_pred)
2890 xd->plane[i].pre[1] = yv12_mb[second_ref_frame][i]; 3147 xd->plane[i].pre[1] = yv12_mb[second_ref_frame][i];
2891 } 3148 }
2892 3149
2893 for (i = 0; i < TX_MODES; ++i) 3150 for (i = 0; i < TX_MODES; ++i)
2894 tx_cache[i] = INT64_MAX; 3151 tx_cache[i] = INT64_MAX;
2895 3152
2896 if (ref_frame == INTRA_FRAME) { 3153 if (ref_frame == INTRA_FRAME) {
2897 TX_SIZE uv_tx; 3154 TX_SIZE uv_tx;
3155 struct macroblockd_plane *const pd = &xd->plane[1];
3156 vpx_memset(x->skip_txfm, 0, sizeof(x->skip_txfm));
2898 super_block_yrd(cpi, x, &rate_y, &distortion_y, &skippable, 3157 super_block_yrd(cpi, x, &rate_y, &distortion_y, &skippable,
2899 NULL, bsize, tx_cache, best_rd); 3158 NULL, bsize, tx_cache, best_rd);
2900
2901 if (rate_y == INT_MAX) 3159 if (rate_y == INT_MAX)
2902 continue; 3160 continue;
2903 3161
2904 uv_tx = get_uv_tx_size_impl(mbmi->tx_size, bsize, pd[1].subsampling_x, 3162 uv_tx = get_uv_tx_size_impl(mbmi->tx_size, bsize, pd->subsampling_x,
2905 pd[1].subsampling_y); 3163 pd->subsampling_y);
2906 if (rate_uv_intra[uv_tx] == INT_MAX) { 3164 if (rate_uv_intra[uv_tx] == INT_MAX) {
2907 choose_intra_uv_mode(cpi, ctx, bsize, uv_tx, 3165 choose_intra_uv_mode(cpi, ctx, bsize, uv_tx,
2908 &rate_uv_intra[uv_tx], &rate_uv_tokenonly[uv_tx], 3166 &rate_uv_intra[uv_tx], &rate_uv_tokenonly[uv_tx],
2909 &dist_uv[uv_tx], &skip_uv[uv_tx], &mode_uv[uv_tx]); 3167 &dist_uv[uv_tx], &skip_uv[uv_tx], &mode_uv[uv_tx]);
2910 } 3168 }
2911 3169
2912 rate_uv = rate_uv_tokenonly[uv_tx]; 3170 rate_uv = rate_uv_tokenonly[uv_tx];
2913 distortion_uv = dist_uv[uv_tx]; 3171 distortion_uv = dist_uv[uv_tx];
2914 skippable = skippable && skip_uv[uv_tx]; 3172 skippable = skippable && skip_uv[uv_tx];
2915 mbmi->uv_mode = mode_uv[uv_tx]; 3173 mbmi->uv_mode = mode_uv[uv_tx];
2916 3174
2917 rate2 = rate_y + cpi->mbmode_cost[mbmi->mode] + rate_uv_intra[uv_tx]; 3175 rate2 = rate_y + cpi->mbmode_cost[mbmi->mode] + rate_uv_intra[uv_tx];
2918 if (this_mode != DC_PRED && this_mode != TM_PRED) 3176 if (this_mode != DC_PRED && this_mode != TM_PRED)
2919 rate2 += intra_cost_penalty; 3177 rate2 += intra_cost_penalty;
2920 distortion2 = distortion_y + distortion_uv; 3178 distortion2 = distortion_y + distortion_uv;
2921 } else { 3179 } else {
2922 this_rd = handle_inter_mode(cpi, x, bsize, 3180 this_rd = handle_inter_mode(cpi, x, bsize,
2923 tx_cache, 3181 tx_cache,
2924 &rate2, &distortion2, &skippable, 3182 &rate2, &distortion2, &skippable,
2925 &rate_y, &distortion_y, 3183 &rate_y, &rate_uv,
2926 &rate_uv, &distortion_uv,
2927 &disable_skip, frame_mv, 3184 &disable_skip, frame_mv,
2928 mi_row, mi_col, 3185 mi_row, mi_col,
2929 single_newmv, single_inter_filter, 3186 single_newmv, single_inter_filter,
2930 single_skippable, &total_sse, best_rd); 3187 single_skippable, &total_sse, best_rd);
2931 if (this_rd == INT64_MAX) 3188 if (this_rd == INT64_MAX)
2932 continue; 3189 continue;
2933 3190
2934 compmode_cost = vp9_cost_bit(comp_mode_p, comp_pred); 3191 compmode_cost = vp9_cost_bit(comp_mode_p, comp_pred);
2935 3192
2936 if (cm->reference_mode == REFERENCE_MODE_SELECT) 3193 if (cm->reference_mode == REFERENCE_MODE_SELECT)
(...skipping 60 matching lines...) Expand 10 before | Expand all | Expand 10 after
2997 best_mode_index = mode_index; 3254 best_mode_index = mode_index;
2998 3255
2999 if (ref_frame == INTRA_FRAME) { 3256 if (ref_frame == INTRA_FRAME) {
3000 /* required for left and above block mv */ 3257 /* required for left and above block mv */
3001 mbmi->mv[0].as_int = 0; 3258 mbmi->mv[0].as_int = 0;
3002 max_plane = 1; 3259 max_plane = 1;
3003 } else { 3260 } else {
3004 best_pred_sse = x->pred_sse[ref_frame]; 3261 best_pred_sse = x->pred_sse[ref_frame];
3005 } 3262 }
3006 3263
3007 *returnrate = rate2; 3264 rd_cost->rate = rate2;
3008 *returndistortion = distortion2; 3265 rd_cost->dist = distortion2;
3266 rd_cost->rdcost = this_rd;
3009 best_rd = this_rd; 3267 best_rd = this_rd;
3010 best_mbmode = *mbmi; 3268 best_mbmode = *mbmi;
3011 best_skip2 = this_skip2; 3269 best_skip2 = this_skip2;
3012 best_mode_skippable = skippable; 3270 best_mode_skippable = skippable;
3013 3271
3014 if (!x->select_tx_size) 3272 if (!x->select_tx_size)
3015 swap_block_ptr(x, ctx, 1, 0, 0, max_plane); 3273 swap_block_ptr(x, ctx, 1, 0, 0, max_plane);
3016 vpx_memcpy(ctx->zcoeff_blk, x->zcoeff_blk[mbmi->tx_size], 3274 vpx_memcpy(ctx->zcoeff_blk, x->zcoeff_blk[mbmi->tx_size],
3017 sizeof(uint8_t) * ctx->num_4x4_blk); 3275 sizeof(uint8_t) * ctx->num_4x4_blk);
3018 3276
3019 // TODO(debargha): enhance this test with a better distortion prediction 3277 // TODO(debargha): enhance this test with a better distortion prediction
3020 // based on qp, activity mask and history 3278 // based on qp, activity mask and history
3021 if ((mode_search_skip_flags & FLAG_EARLY_TERMINATE) && 3279 if ((mode_search_skip_flags & FLAG_EARLY_TERMINATE) &&
3022 (mode_index > MIN_EARLY_TERM_INDEX)) { 3280 (mode_index > MIN_EARLY_TERM_INDEX)) {
3023 const int qstep = xd->plane[0].dequant[1]; 3281 int qstep = xd->plane[0].dequant[1];
3024 // TODO(debargha): Enhance this by specializing for each mode_index 3282 // TODO(debargha): Enhance this by specializing for each mode_index
3025 int scale = 4; 3283 int scale = 4;
3284 #if CONFIG_VP9_HIGHBITDEPTH
3285 if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
3286 qstep >>= (xd->bd - 8);
3287 }
3288 #endif // CONFIG_VP9_HIGHBITDEPTH
3026 if (x->source_variance < UINT_MAX) { 3289 if (x->source_variance < UINT_MAX) {
3027 const int var_adjust = (x->source_variance < 16); 3290 const int var_adjust = (x->source_variance < 16);
3028 scale -= var_adjust; 3291 scale -= var_adjust;
3029 } 3292 }
3030 if (ref_frame > INTRA_FRAME && 3293 if (ref_frame > INTRA_FRAME &&
3031 distortion2 * scale < qstep * qstep) { 3294 distortion2 * scale < qstep * qstep) {
3032 early_term = 1; 3295 early_term = 1;
3033 } 3296 }
3034 } 3297 }
3035 } 3298 }
(...skipping 87 matching lines...) Expand 10 before | Expand all | Expand 10 after
3123 best_mbmode.mode = NEARESTMV; 3386 best_mbmode.mode = NEARESTMV;
3124 else if (frame_mv[NEARMV][refs[0]].as_int == best_mbmode.mv[0].as_int && 3387 else if (frame_mv[NEARMV][refs[0]].as_int == best_mbmode.mv[0].as_int &&
3125 ((comp_pred_mode && frame_mv[NEARMV][refs[1]].as_int == 3388 ((comp_pred_mode && frame_mv[NEARMV][refs[1]].as_int ==
3126 best_mbmode.mv[1].as_int) || !comp_pred_mode)) 3389 best_mbmode.mv[1].as_int) || !comp_pred_mode))
3127 best_mbmode.mode = NEARMV; 3390 best_mbmode.mode = NEARMV;
3128 else if (best_mbmode.mv[0].as_int == 0 && 3391 else if (best_mbmode.mv[0].as_int == 0 &&
3129 ((comp_pred_mode && best_mbmode.mv[1].as_int == 0) || !comp_pred_mode)) 3392 ((comp_pred_mode && best_mbmode.mv[1].as_int == 0) || !comp_pred_mode))
3130 best_mbmode.mode = ZEROMV; 3393 best_mbmode.mode = ZEROMV;
3131 } 3394 }
3132 3395
3133 if (best_mode_index < 0 || best_rd >= best_rd_so_far) 3396 if (best_mode_index < 0 || best_rd >= best_rd_so_far) {
3134 return INT64_MAX; 3397 rd_cost->rate = INT_MAX;
3398 rd_cost->rdcost = INT64_MAX;
3399 return;
3400 }
3135 3401
3136 // If we used an estimate for the uv intra rd in the loop above... 3402 // If we used an estimate for the uv intra rd in the loop above...
3137 if (cpi->sf.use_uv_intra_rd_estimate) { 3403 if (sf->use_uv_intra_rd_estimate) {
3138 // Do Intra UV best rd mode selection if best mode choice above was intra. 3404 // Do Intra UV best rd mode selection if best mode choice above was intra.
3139 if (best_mbmode.ref_frame[0] == INTRA_FRAME) { 3405 if (best_mbmode.ref_frame[0] == INTRA_FRAME) {
3140 TX_SIZE uv_tx_size; 3406 TX_SIZE uv_tx_size;
3141 *mbmi = best_mbmode; 3407 *mbmi = best_mbmode;
3142 uv_tx_size = get_uv_tx_size(mbmi, &xd->plane[1]); 3408 uv_tx_size = get_uv_tx_size(mbmi, &xd->plane[1]);
3143 rd_pick_intra_sbuv_mode(cpi, x, ctx, &rate_uv_intra[uv_tx_size], 3409 rd_pick_intra_sbuv_mode(cpi, x, ctx, &rate_uv_intra[uv_tx_size],
3144 &rate_uv_tokenonly[uv_tx_size], 3410 &rate_uv_tokenonly[uv_tx_size],
3145 &dist_uv[uv_tx_size], 3411 &dist_uv[uv_tx_size],
3146 &skip_uv[uv_tx_size], 3412 &skip_uv[uv_tx_size],
3147 bsize < BLOCK_8X8 ? BLOCK_8X8 : bsize, 3413 bsize < BLOCK_8X8 ? BLOCK_8X8 : bsize,
(...skipping 36 matching lines...) Expand 10 before | Expand all | Expand 10 after
3184 } 3450 }
3185 } else { 3451 } else {
3186 vp9_zero(best_filter_diff); 3452 vp9_zero(best_filter_diff);
3187 vp9_zero(best_tx_diff); 3453 vp9_zero(best_tx_diff);
3188 } 3454 }
3189 3455
3190 // TODO(yunqingwang): Moving this line in front of the above best_filter_diff 3456 // TODO(yunqingwang): Moving this line in front of the above best_filter_diff
3191 // updating code causes PSNR loss. Need to figure out the confliction. 3457 // updating code causes PSNR loss. Need to figure out the confliction.
3192 x->skip |= best_mode_skippable; 3458 x->skip |= best_mode_skippable;
3193 3459
3460 if (!x->skip && !x->select_tx_size) {
3461 int has_high_freq_coeff = 0;
3462 int plane;
3463 int max_plane = is_inter_block(&xd->mi[0].src_mi->mbmi)
3464 ? MAX_MB_PLANE : 1;
3465 for (plane = 0; plane < max_plane; ++plane) {
3466 x->plane[plane].eobs = ctx->eobs_pbuf[plane][1];
3467 has_high_freq_coeff |= vp9_has_high_freq_in_plane(x, bsize, plane);
3468 }
3469
3470 for (plane = max_plane; plane < MAX_MB_PLANE; ++plane) {
3471 x->plane[plane].eobs = ctx->eobs_pbuf[plane][2];
3472 has_high_freq_coeff |= vp9_has_high_freq_in_plane(x, bsize, plane);
3473 }
3474
3475 best_mode_skippable |= !has_high_freq_coeff;
3476 }
3477
3194 store_coding_context(x, ctx, best_mode_index, best_pred_diff, 3478 store_coding_context(x, ctx, best_mode_index, best_pred_diff,
3195 best_tx_diff, best_filter_diff, best_mode_skippable); 3479 best_tx_diff, best_filter_diff, best_mode_skippable);
3196
3197 return best_rd;
3198 } 3480 }
3199 3481
3200 int64_t vp9_rd_pick_inter_mode_sb_seg_skip(VP9_COMP *cpi, MACROBLOCK *x, 3482 void vp9_rd_pick_inter_mode_sb_seg_skip(VP9_COMP *cpi, MACROBLOCK *x,
3201 int *returnrate, 3483 RD_COST *rd_cost,
3202 int64_t *returndistortion, 3484 BLOCK_SIZE bsize,
3203 BLOCK_SIZE bsize, 3485 PICK_MODE_CONTEXT *ctx,
3204 PICK_MODE_CONTEXT *ctx, 3486 int64_t best_rd_so_far) {
3205 int64_t best_rd_so_far) {
3206 VP9_COMMON *const cm = &cpi->common; 3487 VP9_COMMON *const cm = &cpi->common;
3207 RD_OPT *const rd_opt = &cpi->rd; 3488 RD_OPT *const rd_opt = &cpi->rd;
3208 MACROBLOCKD *const xd = &x->e_mbd; 3489 MACROBLOCKD *const xd = &x->e_mbd;
3209 MB_MODE_INFO *const mbmi = &xd->mi[0].src_mi->mbmi; 3490 MB_MODE_INFO *const mbmi = &xd->mi[0].src_mi->mbmi;
3210 unsigned char segment_id = mbmi->segment_id; 3491 unsigned char segment_id = mbmi->segment_id;
3211 const int comp_pred = 0; 3492 const int comp_pred = 0;
3212 int i; 3493 int i;
3213 int64_t best_tx_diff[TX_MODES]; 3494 int64_t best_tx_diff[TX_MODES];
3214 int64_t best_pred_diff[REFERENCE_MODES]; 3495 int64_t best_pred_diff[REFERENCE_MODES];
3215 int64_t best_filter_diff[SWITCHABLE_FILTER_CONTEXTS]; 3496 int64_t best_filter_diff[SWITCHABLE_FILTER_CONTEXTS];
3216 unsigned int ref_costs_single[MAX_REF_FRAMES], ref_costs_comp[MAX_REF_FRAMES]; 3497 unsigned int ref_costs_single[MAX_REF_FRAMES], ref_costs_comp[MAX_REF_FRAMES];
3217 vp9_prob comp_mode_p; 3498 vp9_prob comp_mode_p;
3218 INTERP_FILTER best_filter = SWITCHABLE; 3499 INTERP_FILTER best_filter = SWITCHABLE;
3219 int64_t this_rd = INT64_MAX; 3500 int64_t this_rd = INT64_MAX;
3220 int rate2 = 0; 3501 int rate2 = 0;
3221 const int64_t distortion2 = 0; 3502 const int64_t distortion2 = 0;
3222 3503
3223 x->skip_encode = cpi->sf.skip_encode_frame && x->q_index < QIDX_SKIP_THRESH; 3504 x->skip_encode = cpi->sf.skip_encode_frame && x->q_index < QIDX_SKIP_THRESH;
3224 3505
3225 estimate_ref_frame_costs(cm, xd, segment_id, ref_costs_single, ref_costs_comp, 3506 estimate_ref_frame_costs(cm, xd, segment_id, ref_costs_single, ref_costs_comp,
3226 &comp_mode_p); 3507 &comp_mode_p);
3227 3508
3228 for (i = 0; i < MAX_REF_FRAMES; ++i) 3509 for (i = 0; i < MAX_REF_FRAMES; ++i)
3229 x->pred_sse[i] = INT_MAX; 3510 x->pred_sse[i] = INT_MAX;
3230 for (i = LAST_FRAME; i < MAX_REF_FRAMES; ++i) 3511 for (i = LAST_FRAME; i < MAX_REF_FRAMES; ++i)
3231 x->pred_mv_sad[i] = INT_MAX; 3512 x->pred_mv_sad[i] = INT_MAX;
3232 3513
3233 *returnrate = INT_MAX; 3514 rd_cost->rate = INT_MAX;
3234 3515
3235 assert(vp9_segfeature_active(&cm->seg, segment_id, SEG_LVL_SKIP)); 3516 assert(vp9_segfeature_active(&cm->seg, segment_id, SEG_LVL_SKIP));
3236 3517
3237 mbmi->mode = ZEROMV; 3518 mbmi->mode = ZEROMV;
3238 mbmi->uv_mode = DC_PRED; 3519 mbmi->uv_mode = DC_PRED;
3239 mbmi->ref_frame[0] = LAST_FRAME; 3520 mbmi->ref_frame[0] = LAST_FRAME;
3240 mbmi->ref_frame[1] = NONE; 3521 mbmi->ref_frame[1] = NONE;
3241 mbmi->mv[0].as_int = 0; 3522 mbmi->mv[0].as_int = 0;
3242 x->skip = 1; 3523 x->skip = 1;
3243 3524
(...skipping 28 matching lines...) Expand all
3272 } 3553 }
3273 3554
3274 if (cm->reference_mode == REFERENCE_MODE_SELECT) 3555 if (cm->reference_mode == REFERENCE_MODE_SELECT)
3275 rate2 += vp9_cost_bit(comp_mode_p, comp_pred); 3556 rate2 += vp9_cost_bit(comp_mode_p, comp_pred);
3276 3557
3277 // Estimate the reference frame signaling cost and add it 3558 // Estimate the reference frame signaling cost and add it
3278 // to the rolling cost variable. 3559 // to the rolling cost variable.
3279 rate2 += ref_costs_single[LAST_FRAME]; 3560 rate2 += ref_costs_single[LAST_FRAME];
3280 this_rd = RDCOST(x->rdmult, x->rddiv, rate2, distortion2); 3561 this_rd = RDCOST(x->rdmult, x->rddiv, rate2, distortion2);
3281 3562
3282 *returnrate = rate2; 3563 rd_cost->rate = rate2;
3283 *returndistortion = distortion2; 3564 rd_cost->dist = distortion2;
3565 rd_cost->rdcost = this_rd;
3284 3566
3285 if (this_rd >= best_rd_so_far) 3567 if (this_rd >= best_rd_so_far) {
3286 return INT64_MAX; 3568 rd_cost->rate = INT_MAX;
3569 rd_cost->rdcost = INT64_MAX;
3570 return;
3571 }
3287 3572
3288 assert((cm->interp_filter == SWITCHABLE) || 3573 assert((cm->interp_filter == SWITCHABLE) ||
3289 (cm->interp_filter == mbmi->interp_filter)); 3574 (cm->interp_filter == mbmi->interp_filter));
3290 3575
3291 update_rd_thresh_fact(cpi, bsize, THR_ZEROMV); 3576 update_rd_thresh_fact(cpi, bsize, THR_ZEROMV);
3292 3577
3293 vp9_zero(best_pred_diff); 3578 vp9_zero(best_pred_diff);
3294 vp9_zero(best_filter_diff); 3579 vp9_zero(best_filter_diff);
3295 vp9_zero(best_tx_diff); 3580 vp9_zero(best_tx_diff);
3296 3581
3297 if (!x->select_tx_size) 3582 if (!x->select_tx_size)
3298 swap_block_ptr(x, ctx, 1, 0, 0, MAX_MB_PLANE); 3583 swap_block_ptr(x, ctx, 1, 0, 0, MAX_MB_PLANE);
3299 store_coding_context(x, ctx, THR_ZEROMV, 3584 store_coding_context(x, ctx, THR_ZEROMV,
3300 best_pred_diff, best_tx_diff, best_filter_diff, 0); 3585 best_pred_diff, best_tx_diff, best_filter_diff, 0);
3301
3302 return this_rd;
3303 } 3586 }
3304 3587
3305 int64_t vp9_rd_pick_inter_mode_sub8x8(VP9_COMP *cpi, MACROBLOCK *x, 3588 void vp9_rd_pick_inter_mode_sub8x8(VP9_COMP *cpi, MACROBLOCK *x,
3306 const TileInfo *const tile, 3589 const TileInfo *const tile,
3307 int mi_row, int mi_col, 3590 int mi_row, int mi_col,
3308 int *returnrate, 3591 RD_COST *rd_cost,
3309 int64_t *returndistortion, 3592 BLOCK_SIZE bsize,
3310 BLOCK_SIZE bsize, 3593 PICK_MODE_CONTEXT *ctx,
3311 PICK_MODE_CONTEXT *ctx, 3594 int64_t best_rd_so_far) {
3312 int64_t best_rd_so_far) {
3313 VP9_COMMON *const cm = &cpi->common; 3595 VP9_COMMON *const cm = &cpi->common;
3314 RD_OPT *const rd_opt = &cpi->rd; 3596 RD_OPT *const rd_opt = &cpi->rd;
3597 SPEED_FEATURES *const sf = &cpi->sf;
3315 MACROBLOCKD *const xd = &x->e_mbd; 3598 MACROBLOCKD *const xd = &x->e_mbd;
3316 MB_MODE_INFO *const mbmi = &xd->mi[0].src_mi->mbmi; 3599 MB_MODE_INFO *const mbmi = &xd->mi[0].src_mi->mbmi;
3317 const struct segmentation *const seg = &cm->seg; 3600 const struct segmentation *const seg = &cm->seg;
3318 MV_REFERENCE_FRAME ref_frame, second_ref_frame; 3601 MV_REFERENCE_FRAME ref_frame, second_ref_frame;
3319 unsigned char segment_id = mbmi->segment_id; 3602 unsigned char segment_id = mbmi->segment_id;
3320 int comp_pred, i; 3603 int comp_pred, i;
3321 int_mv frame_mv[MB_MODE_COUNT][MAX_REF_FRAMES]; 3604 int_mv frame_mv[MB_MODE_COUNT][MAX_REF_FRAMES];
3322 struct buf_2d yv12_mb[4][MAX_MB_PLANE]; 3605 struct buf_2d yv12_mb[4][MAX_MB_PLANE];
3323 static const int flag_list[4] = { 0, VP9_LAST_FLAG, VP9_GOLD_FLAG, 3606 static const int flag_list[4] = { 0, VP9_LAST_FLAG, VP9_GOLD_FLAG,
3324 VP9_ALT_FLAG }; 3607 VP9_ALT_FLAG };
3325 int64_t best_rd = best_rd_so_far; 3608 int64_t best_rd = best_rd_so_far;
3326 int64_t best_yrd = best_rd_so_far; // FIXME(rbultje) more precise 3609 int64_t best_yrd = best_rd_so_far; // FIXME(rbultje) more precise
3327 static const int64_t best_tx_diff[TX_MODES] = { 0 }; 3610 static const int64_t best_tx_diff[TX_MODES] = { 0 };
3328 int64_t best_pred_diff[REFERENCE_MODES]; 3611 int64_t best_pred_diff[REFERENCE_MODES];
3329 int64_t best_pred_rd[REFERENCE_MODES]; 3612 int64_t best_pred_rd[REFERENCE_MODES];
3330 int64_t best_filter_rd[SWITCHABLE_FILTER_CONTEXTS]; 3613 int64_t best_filter_rd[SWITCHABLE_FILTER_CONTEXTS];
3331 int64_t best_filter_diff[SWITCHABLE_FILTER_CONTEXTS]; 3614 int64_t best_filter_diff[SWITCHABLE_FILTER_CONTEXTS];
3332 MB_MODE_INFO best_mbmode; 3615 MB_MODE_INFO best_mbmode;
3333 int ref_index, best_ref_index = 0; 3616 int ref_index, best_ref_index = 0;
3334 unsigned int ref_costs_single[MAX_REF_FRAMES], ref_costs_comp[MAX_REF_FRAMES]; 3617 unsigned int ref_costs_single[MAX_REF_FRAMES], ref_costs_comp[MAX_REF_FRAMES];
3335 vp9_prob comp_mode_p; 3618 vp9_prob comp_mode_p;
3336 INTERP_FILTER tmp_best_filter = SWITCHABLE; 3619 INTERP_FILTER tmp_best_filter = SWITCHABLE;
3337 int rate_uv_intra, rate_uv_tokenonly; 3620 int rate_uv_intra, rate_uv_tokenonly;
3338 int64_t dist_uv; 3621 int64_t dist_uv;
3339 int skip_uv; 3622 int skip_uv;
3340 PREDICTION_MODE mode_uv = DC_PRED; 3623 PREDICTION_MODE mode_uv = DC_PRED;
3341 const int intra_cost_penalty = 3624 const int intra_cost_penalty = vp9_get_intra_cost_penalty(
3342 20 * vp9_dc_quant(cm->base_qindex, cm->y_dc_delta_q, cm->bit_depth); 3625 cm->base_qindex, cm->y_dc_delta_q, cm->bit_depth);
3343 int_mv seg_mvs[4][MAX_REF_FRAMES]; 3626 int_mv seg_mvs[4][MAX_REF_FRAMES];
3344 b_mode_info best_bmodes[4]; 3627 b_mode_info best_bmodes[4];
3345 int best_skip2 = 0; 3628 int best_skip2 = 0;
3346 int ref_frame_skip_mask[2] = { 0 }; 3629 int ref_frame_skip_mask[2] = { 0 };
3347 3630
3348 x->skip_encode = cpi->sf.skip_encode_frame && x->q_index < QIDX_SKIP_THRESH; 3631 x->skip_encode = sf->skip_encode_frame && x->q_index < QIDX_SKIP_THRESH;
3349 vpx_memset(x->zcoeff_blk[TX_4X4], 0, 4); 3632 vpx_memset(x->zcoeff_blk[TX_4X4], 0, 4);
3350 vp9_zero(best_mbmode); 3633 vp9_zero(best_mbmode);
3351 3634
3352 for (i = 0; i < 4; i++) { 3635 for (i = 0; i < 4; i++) {
3353 int j; 3636 int j;
3354 for (j = 0; j < MAX_REF_FRAMES; j++) 3637 for (j = 0; j < MAX_REF_FRAMES; j++)
3355 seg_mvs[i][j].as_int = INVALID_MV; 3638 seg_mvs[i][j].as_int = INVALID_MV;
3356 } 3639 }
3357 3640
3358 estimate_ref_frame_costs(cm, xd, segment_id, ref_costs_single, ref_costs_comp, 3641 estimate_ref_frame_costs(cm, xd, segment_id, ref_costs_single, ref_costs_comp,
3359 &comp_mode_p); 3642 &comp_mode_p);
3360 3643
3361 for (i = 0; i < REFERENCE_MODES; ++i) 3644 for (i = 0; i < REFERENCE_MODES; ++i)
3362 best_pred_rd[i] = INT64_MAX; 3645 best_pred_rd[i] = INT64_MAX;
3363 for (i = 0; i < SWITCHABLE_FILTER_CONTEXTS; i++) 3646 for (i = 0; i < SWITCHABLE_FILTER_CONTEXTS; i++)
3364 best_filter_rd[i] = INT64_MAX; 3647 best_filter_rd[i] = INT64_MAX;
3365 rate_uv_intra = INT_MAX; 3648 rate_uv_intra = INT_MAX;
3366 3649
3367 *returnrate = INT_MAX; 3650 rd_cost->rate = INT_MAX;
3368 3651
3369 for (ref_frame = LAST_FRAME; ref_frame <= ALTREF_FRAME; ref_frame++) { 3652 for (ref_frame = LAST_FRAME; ref_frame <= ALTREF_FRAME; ref_frame++) {
3370 if (cpi->ref_frame_flags & flag_list[ref_frame]) { 3653 if (cpi->ref_frame_flags & flag_list[ref_frame]) {
3371 setup_buffer_inter(cpi, x, tile, 3654 setup_buffer_inter(cpi, x, tile,
3372 ref_frame, bsize, mi_row, mi_col, 3655 ref_frame, bsize, mi_row, mi_col,
3373 frame_mv[NEARESTMV], frame_mv[NEARMV], 3656 frame_mv[NEARESTMV], frame_mv[NEARMV],
3374 yv12_mb); 3657 yv12_mb);
3375 } else { 3658 } else {
3376 ref_frame_skip_mask[0] |= (1 << ref_frame); 3659 ref_frame_skip_mask[0] |= (1 << ref_frame);
3377 ref_frame_skip_mask[1] |= SECOND_REF_FRAME_MASK; 3660 ref_frame_skip_mask[1] |= SECOND_REF_FRAME_MASK;
(...skipping 13 matching lines...) Expand all
3391 int i; 3674 int i;
3392 int this_skip2 = 0; 3675 int this_skip2 = 0;
3393 int64_t total_sse = INT_MAX; 3676 int64_t total_sse = INT_MAX;
3394 int early_term = 0; 3677 int early_term = 0;
3395 3678
3396 ref_frame = vp9_ref_order[ref_index].ref_frame[0]; 3679 ref_frame = vp9_ref_order[ref_index].ref_frame[0];
3397 second_ref_frame = vp9_ref_order[ref_index].ref_frame[1]; 3680 second_ref_frame = vp9_ref_order[ref_index].ref_frame[1];
3398 3681
3399 // Look at the reference frame of the best mode so far and set the 3682 // Look at the reference frame of the best mode so far and set the
3400 // skip mask to look at a subset of the remaining modes. 3683 // skip mask to look at a subset of the remaining modes.
3401 if (ref_index > 2 && cpi->sf.mode_skip_start < MAX_MODES) { 3684 if (ref_index > 2 && sf->mode_skip_start < MAX_MODES) {
3402 if (ref_index == 3) { 3685 if (ref_index == 3) {
3403 switch (best_mbmode.ref_frame[0]) { 3686 switch (best_mbmode.ref_frame[0]) {
3404 case INTRA_FRAME: 3687 case INTRA_FRAME:
3405 break; 3688 break;
3406 case LAST_FRAME: 3689 case LAST_FRAME:
3407 ref_frame_skip_mask[0] |= (1 << GOLDEN_FRAME) | (1 << ALTREF_FRAME); 3690 ref_frame_skip_mask[0] |= (1 << GOLDEN_FRAME) | (1 << ALTREF_FRAME);
3408 ref_frame_skip_mask[1] |= SECOND_REF_FRAME_MASK; 3691 ref_frame_skip_mask[1] |= SECOND_REF_FRAME_MASK;
3409 break; 3692 break;
3410 case GOLDEN_FRAME: 3693 case GOLDEN_FRAME:
3411 ref_frame_skip_mask[0] |= (1 << LAST_FRAME) | (1 << ALTREF_FRAME); 3694 ref_frame_skip_mask[0] |= (1 << LAST_FRAME) | (1 << ALTREF_FRAME);
(...skipping 24 matching lines...) Expand all
3436 if (comp_pred) { 3719 if (comp_pred) {
3437 if (!cm->allow_comp_inter_inter) 3720 if (!cm->allow_comp_inter_inter)
3438 continue; 3721 continue;
3439 if (!(cpi->ref_frame_flags & flag_list[second_ref_frame])) 3722 if (!(cpi->ref_frame_flags & flag_list[second_ref_frame]))
3440 continue; 3723 continue;
3441 // Do not allow compound prediction if the segment level reference frame 3724 // Do not allow compound prediction if the segment level reference frame
3442 // feature is in use as in this case there can only be one reference. 3725 // feature is in use as in this case there can only be one reference.
3443 if (vp9_segfeature_active(seg, segment_id, SEG_LVL_REF_FRAME)) 3726 if (vp9_segfeature_active(seg, segment_id, SEG_LVL_REF_FRAME))
3444 continue; 3727 continue;
3445 3728
3446 if ((cpi->sf.mode_search_skip_flags & FLAG_SKIP_COMP_BESTINTRA) && 3729 if ((sf->mode_search_skip_flags & FLAG_SKIP_COMP_BESTINTRA) &&
3447 best_mbmode.ref_frame[0] == INTRA_FRAME) 3730 best_mbmode.ref_frame[0] == INTRA_FRAME)
3448 continue; 3731 continue;
3449 } 3732 }
3450 3733
3451 // TODO(jingning, jkoleszar): scaling reference frame not supported for 3734 // TODO(jingning, jkoleszar): scaling reference frame not supported for
3452 // sub8x8 blocks. 3735 // sub8x8 blocks.
3453 if (ref_frame > INTRA_FRAME && 3736 if (ref_frame > INTRA_FRAME &&
3454 vp9_is_scaled(&cm->frame_refs[ref_frame - 1].sf)) 3737 vp9_is_scaled(&cm->frame_refs[ref_frame - 1].sf))
3455 continue; 3738 continue;
3456 3739
(...skipping 83 matching lines...) Expand 10 before | Expand all | Expand 10 after
3540 rd_opt->threshes[segment_id][bsize][THR_LAST] : 3823 rd_opt->threshes[segment_id][bsize][THR_LAST] :
3541 rd_opt->threshes[segment_id][bsize][THR_ALTR]; 3824 rd_opt->threshes[segment_id][bsize][THR_ALTR];
3542 this_rd_thresh = (ref_frame == GOLDEN_FRAME) ? 3825 this_rd_thresh = (ref_frame == GOLDEN_FRAME) ?
3543 rd_opt->threshes[segment_id][bsize][THR_GOLD] : this_rd_thresh; 3826 rd_opt->threshes[segment_id][bsize][THR_GOLD] : this_rd_thresh;
3544 rd_opt->mask_filter = 0; 3827 rd_opt->mask_filter = 0;
3545 for (i = 0; i < SWITCHABLE_FILTER_CONTEXTS; ++i) 3828 for (i = 0; i < SWITCHABLE_FILTER_CONTEXTS; ++i)
3546 rd_opt->filter_cache[i] = INT64_MAX; 3829 rd_opt->filter_cache[i] = INT64_MAX;
3547 3830
3548 if (cm->interp_filter != BILINEAR) { 3831 if (cm->interp_filter != BILINEAR) {
3549 tmp_best_filter = EIGHTTAP; 3832 tmp_best_filter = EIGHTTAP;
3550 if (x->source_variance < cpi->sf.disable_filter_search_var_thresh) { 3833 if (x->source_variance < sf->disable_filter_search_var_thresh) {
3551 tmp_best_filter = EIGHTTAP; 3834 tmp_best_filter = EIGHTTAP;
3552 } else if (cpi->sf.adaptive_pred_interp_filter == 1 && 3835 } else if (sf->adaptive_pred_interp_filter == 1 &&
3553 ctx->pred_interp_filter < SWITCHABLE) { 3836 ctx->pred_interp_filter < SWITCHABLE) {
3554 tmp_best_filter = ctx->pred_interp_filter; 3837 tmp_best_filter = ctx->pred_interp_filter;
3555 } else if (cpi->sf.adaptive_pred_interp_filter == 2) { 3838 } else if (sf->adaptive_pred_interp_filter == 2) {
3556 tmp_best_filter = ctx->pred_interp_filter < SWITCHABLE ? 3839 tmp_best_filter = ctx->pred_interp_filter < SWITCHABLE ?
3557 ctx->pred_interp_filter : 0; 3840 ctx->pred_interp_filter : 0;
3558 } else { 3841 } else {
3559 for (switchable_filter_index = 0; 3842 for (switchable_filter_index = 0;
3560 switchable_filter_index < SWITCHABLE_FILTERS; 3843 switchable_filter_index < SWITCHABLE_FILTERS;
3561 ++switchable_filter_index) { 3844 ++switchable_filter_index) {
3562 int newbest, rs; 3845 int newbest, rs;
3563 int64_t rs_rd; 3846 int64_t rs_rd;
3564 mbmi->interp_filter = switchable_filter_index; 3847 mbmi->interp_filter = switchable_filter_index;
3565 tmp_rd = rd_pick_best_sub8x8_mode(cpi, x, tile, 3848 tmp_rd = rd_pick_best_sub8x8_mode(cpi, x, tile,
(...skipping 32 matching lines...) Expand 10 before | Expand all | Expand 10 after
3598 tmp_best_distortion = distortion; 3881 tmp_best_distortion = distortion;
3599 tmp_best_sse = total_sse; 3882 tmp_best_sse = total_sse;
3600 tmp_best_skippable = skippable; 3883 tmp_best_skippable = skippable;
3601 tmp_best_mbmode = *mbmi; 3884 tmp_best_mbmode = *mbmi;
3602 for (i = 0; i < 4; i++) { 3885 for (i = 0; i < 4; i++) {
3603 tmp_best_bmodes[i] = xd->mi[0].src_mi->bmi[i]; 3886 tmp_best_bmodes[i] = xd->mi[0].src_mi->bmi[i];
3604 x->zcoeff_blk[TX_4X4][i] = !x->plane[0].eobs[i]; 3887 x->zcoeff_blk[TX_4X4][i] = !x->plane[0].eobs[i];
3605 } 3888 }
3606 pred_exists = 1; 3889 pred_exists = 1;
3607 if (switchable_filter_index == 0 && 3890 if (switchable_filter_index == 0 &&
3608 cpi->sf.use_rd_breakout && 3891 sf->use_rd_breakout &&
3609 best_rd < INT64_MAX) { 3892 best_rd < INT64_MAX) {
3610 if (tmp_best_rdu / 2 > best_rd) { 3893 if (tmp_best_rdu / 2 > best_rd) {
3611 // skip searching the other filters if the first is 3894 // skip searching the other filters if the first is
3612 // already substantially larger than the best so far 3895 // already substantially larger than the best so far
3613 tmp_best_filter = mbmi->interp_filter; 3896 tmp_best_filter = mbmi->interp_filter;
3614 tmp_best_rdu = INT64_MAX; 3897 tmp_best_rdu = INT64_MAX;
3615 break; 3898 break;
3616 } 3899 }
3617 } 3900 }
3618 } 3901 }
(...skipping 42 matching lines...) Expand 10 before | Expand all | Expand 10 after
3661 3944
3662 tmp_best_rdu = best_rd - 3945 tmp_best_rdu = best_rd -
3663 MIN(RDCOST(x->rdmult, x->rddiv, rate2, distortion2), 3946 MIN(RDCOST(x->rdmult, x->rddiv, rate2, distortion2),
3664 RDCOST(x->rdmult, x->rddiv, 0, total_sse)); 3947 RDCOST(x->rdmult, x->rddiv, 0, total_sse));
3665 3948
3666 if (tmp_best_rdu > 0) { 3949 if (tmp_best_rdu > 0) {
3667 // If even the 'Y' rd value of split is higher than best so far 3950 // If even the 'Y' rd value of split is higher than best so far
3668 // then dont bother looking at UV 3951 // then dont bother looking at UV
3669 vp9_build_inter_predictors_sbuv(&x->e_mbd, mi_row, mi_col, 3952 vp9_build_inter_predictors_sbuv(&x->e_mbd, mi_row, mi_col,
3670 BLOCK_8X8); 3953 BLOCK_8X8);
3671 super_block_uvrd(cpi, x, &rate_uv, &distortion_uv, &uv_skippable, 3954 vpx_memset(x->skip_txfm, 0, sizeof(x->skip_txfm));
3672 &uv_sse, BLOCK_8X8, tmp_best_rdu); 3955 if (!super_block_uvrd(cpi, x, &rate_uv, &distortion_uv, &uv_skippable,
3673 if (rate_uv == INT_MAX) 3956 &uv_sse, BLOCK_8X8, tmp_best_rdu))
3674 continue; 3957 continue;
3958
3675 rate2 += rate_uv; 3959 rate2 += rate_uv;
3676 distortion2 += distortion_uv; 3960 distortion2 += distortion_uv;
3677 skippable = skippable && uv_skippable; 3961 skippable = skippable && uv_skippable;
3678 total_sse += uv_sse; 3962 total_sse += uv_sse;
3679 } 3963 }
3680 } 3964 }
3681 3965
3682 if (cm->reference_mode == REFERENCE_MODE_SELECT) 3966 if (cm->reference_mode == REFERENCE_MODE_SELECT)
3683 rate2 += compmode_cost; 3967 rate2 += compmode_cost;
3684 3968
(...skipping 46 matching lines...) Expand 10 before | Expand all | Expand 10 after
3731 int max_plane = MAX_MB_PLANE; 4015 int max_plane = MAX_MB_PLANE;
3732 // Note index of best mode so far 4016 // Note index of best mode so far
3733 best_ref_index = ref_index; 4017 best_ref_index = ref_index;
3734 4018
3735 if (ref_frame == INTRA_FRAME) { 4019 if (ref_frame == INTRA_FRAME) {
3736 /* required for left and above block mv */ 4020 /* required for left and above block mv */
3737 mbmi->mv[0].as_int = 0; 4021 mbmi->mv[0].as_int = 0;
3738 max_plane = 1; 4022 max_plane = 1;
3739 } 4023 }
3740 4024
3741 *returnrate = rate2; 4025 rd_cost->rate = rate2;
3742 *returndistortion = distortion2; 4026 rd_cost->dist = distortion2;
4027 rd_cost->rdcost = this_rd;
3743 best_rd = this_rd; 4028 best_rd = this_rd;
3744 best_yrd = best_rd - 4029 best_yrd = best_rd -
3745 RDCOST(x->rdmult, x->rddiv, rate_uv, distortion_uv); 4030 RDCOST(x->rdmult, x->rddiv, rate_uv, distortion_uv);
3746 best_mbmode = *mbmi; 4031 best_mbmode = *mbmi;
3747 best_skip2 = this_skip2; 4032 best_skip2 = this_skip2;
3748 if (!x->select_tx_size) 4033 if (!x->select_tx_size)
3749 swap_block_ptr(x, ctx, 1, 0, 0, max_plane); 4034 swap_block_ptr(x, ctx, 1, 0, 0, max_plane);
3750 vpx_memcpy(ctx->zcoeff_blk, x->zcoeff_blk[TX_4X4], 4035 vpx_memcpy(ctx->zcoeff_blk, x->zcoeff_blk[TX_4X4],
3751 sizeof(uint8_t) * ctx->num_4x4_blk); 4036 sizeof(uint8_t) * ctx->num_4x4_blk);
3752 4037
3753 for (i = 0; i < 4; i++) 4038 for (i = 0; i < 4; i++)
3754 best_bmodes[i] = xd->mi[0].src_mi->bmi[i]; 4039 best_bmodes[i] = xd->mi[0].src_mi->bmi[i];
3755 4040
3756 // TODO(debargha): enhance this test with a better distortion prediction 4041 // TODO(debargha): enhance this test with a better distortion prediction
3757 // based on qp, activity mask and history 4042 // based on qp, activity mask and history
3758 if ((cpi->sf.mode_search_skip_flags & FLAG_EARLY_TERMINATE) && 4043 if ((sf->mode_search_skip_flags & FLAG_EARLY_TERMINATE) &&
3759 (ref_index > MIN_EARLY_TERM_INDEX)) { 4044 (ref_index > MIN_EARLY_TERM_INDEX)) {
3760 const int qstep = xd->plane[0].dequant[1]; 4045 int qstep = xd->plane[0].dequant[1];
3761 // TODO(debargha): Enhance this by specializing for each mode_index 4046 // TODO(debargha): Enhance this by specializing for each mode_index
3762 int scale = 4; 4047 int scale = 4;
4048 #if CONFIG_VP9_HIGHBITDEPTH
4049 if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
4050 qstep >>= (xd->bd - 8);
4051 }
4052 #endif // CONFIG_VP9_HIGHBITDEPTH
3763 if (x->source_variance < UINT_MAX) { 4053 if (x->source_variance < UINT_MAX) {
3764 const int var_adjust = (x->source_variance < 16); 4054 const int var_adjust = (x->source_variance < 16);
3765 scale -= var_adjust; 4055 scale -= var_adjust;
3766 } 4056 }
3767 if (ref_frame > INTRA_FRAME && 4057 if (ref_frame > INTRA_FRAME &&
3768 distortion2 * scale < qstep * qstep) { 4058 distortion2 * scale < qstep * qstep) {
3769 early_term = 1; 4059 early_term = 1;
3770 } 4060 }
3771 } 4061 }
3772 } 4062 }
(...skipping 46 matching lines...) Expand 10 before | Expand all | Expand 10 after
3819 } 4109 }
3820 } 4110 }
3821 4111
3822 if (early_term) 4112 if (early_term)
3823 break; 4113 break;
3824 4114
3825 if (x->skip && !comp_pred) 4115 if (x->skip && !comp_pred)
3826 break; 4116 break;
3827 } 4117 }
3828 4118
3829 if (best_rd >= best_rd_so_far) 4119 if (best_rd >= best_rd_so_far) {
3830 return INT64_MAX; 4120 rd_cost->rate = INT_MAX;
4121 rd_cost->rdcost = INT64_MAX;
4122 return;
4123 }
3831 4124
3832 // If we used an estimate for the uv intra rd in the loop above... 4125 // If we used an estimate for the uv intra rd in the loop above...
3833 if (cpi->sf.use_uv_intra_rd_estimate) { 4126 if (sf->use_uv_intra_rd_estimate) {
3834 // Do Intra UV best rd mode selection if best mode choice above was intra. 4127 // Do Intra UV best rd mode selection if best mode choice above was intra.
3835 if (best_mbmode.ref_frame[0] == INTRA_FRAME) { 4128 if (best_mbmode.ref_frame[0] == INTRA_FRAME) {
3836 *mbmi = best_mbmode; 4129 *mbmi = best_mbmode;
3837 rd_pick_intra_sbuv_mode(cpi, x, ctx, &rate_uv_intra, 4130 rd_pick_intra_sbuv_mode(cpi, x, ctx, &rate_uv_intra,
3838 &rate_uv_tokenonly, 4131 &rate_uv_tokenonly,
3839 &dist_uv, 4132 &dist_uv,
3840 &skip_uv, 4133 &skip_uv,
3841 BLOCK_8X8, TX_4X4); 4134 BLOCK_8X8, TX_4X4);
3842 } 4135 }
3843 } 4136 }
3844 4137
3845 if (best_rd == INT64_MAX) { 4138 if (best_rd == INT64_MAX) {
3846 *returnrate = INT_MAX; 4139 rd_cost->rate = INT_MAX;
3847 *returndistortion = INT64_MAX; 4140 rd_cost->dist = INT64_MAX;
3848 return best_rd; 4141 rd_cost->rdcost = INT64_MAX;
4142 return;
3849 } 4143 }
3850 4144
3851 assert((cm->interp_filter == SWITCHABLE) || 4145 assert((cm->interp_filter == SWITCHABLE) ||
3852 (cm->interp_filter == best_mbmode.interp_filter) || 4146 (cm->interp_filter == best_mbmode.interp_filter) ||
3853 !is_inter_block(&best_mbmode)); 4147 !is_inter_block(&best_mbmode));
3854 4148
3855 update_rd_thresh_fact(cpi, bsize, best_ref_index); 4149 update_rd_thresh_fact(cpi, bsize, best_ref_index);
3856 4150
3857 // macroblock modes 4151 // macroblock modes
3858 *mbmi = best_mbmode; 4152 *mbmi = best_mbmode;
(...skipping 25 matching lines...) Expand all
3884 best_filter_diff[i] = best_rd - best_filter_rd[i]; 4178 best_filter_diff[i] = best_rd - best_filter_rd[i];
3885 } 4179 }
3886 if (cm->interp_filter == SWITCHABLE) 4180 if (cm->interp_filter == SWITCHABLE)
3887 assert(best_filter_diff[SWITCHABLE_FILTERS] == 0); 4181 assert(best_filter_diff[SWITCHABLE_FILTERS] == 0);
3888 } else { 4182 } else {
3889 vp9_zero(best_filter_diff); 4183 vp9_zero(best_filter_diff);
3890 } 4184 }
3891 4185
3892 store_coding_context(x, ctx, best_ref_index, 4186 store_coding_context(x, ctx, best_ref_index,
3893 best_pred_diff, best_tx_diff, best_filter_diff, 0); 4187 best_pred_diff, best_tx_diff, best_filter_diff, 0);
3894
3895 return best_rd;
3896 } 4188 }
3897 4189
OLDNEW
« no previous file with comments | « source/libvpx/vp9/encoder/vp9_rdopt.h ('k') | source/libvpx/vp9/encoder/vp9_resize.h » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698