Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(697)

Side by Side Diff: source/libvpx/vp9/encoder/vp9_encodemb.c

Issue 11555023: libvpx: Add VP9 decoder. (Closed) Base URL: svn://chrome-svn/chrome/trunk/deps/third_party/libvpx/
Patch Set: Created 8 years ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch | Annotate | Revision Log
Property Changes:
Added: svn:eol-style
+ LF
OLDNEW
(Empty)
1 /*
2 * Copyright (c) 2010 The WebM project authors. All Rights Reserved.
3 *
4 * Use of this source code is governed by a BSD-style license
5 * that can be found in the LICENSE file in the root of the source
6 * tree. An additional intellectual property rights grant can be found
7 * in the file PATENTS. All contributing project authors may
8 * be found in the AUTHORS file in the root of the source tree.
9 */
10
11 #include "vpx_ports/config.h"
12 #include "vp9/encoder/vp9_encodemb.h"
13 #include "vp9/common/vp9_reconinter.h"
14 #include "vp9/encoder/vp9_quantize.h"
15 #include "vp9/encoder/vp9_tokenize.h"
16 #include "vp9/common/vp9_invtrans.h"
17 #include "vp9/common/vp9_reconintra.h"
18 #include "vpx_mem/vpx_mem.h"
19 #include "vp9/encoder/vp9_rdopt.h"
20 #include "vp9/common/vp9_systemdependent.h"
21 #include "vp9_rtcd.h"
22
23 void vp9_subtract_b_c(BLOCK *be, BLOCKD *bd, int pitch) {
24 unsigned char *src_ptr = (*(be->base_src) + be->src);
25 short *diff_ptr = be->src_diff;
26 unsigned char *pred_ptr = bd->predictor;
27 int src_stride = be->src_stride;
28
29 int r, c;
30
31 for (r = 0; r < 4; r++) {
32 for (c = 0; c < 4; c++) {
33 diff_ptr[c] = src_ptr[c] - pred_ptr[c];
34 }
35
36 diff_ptr += pitch;
37 pred_ptr += pitch;
38 src_ptr += src_stride;
39 }
40 }
41
42 void vp9_subtract_4b_c(BLOCK *be, BLOCKD *bd, int pitch) {
43 unsigned char *src_ptr = (*(be->base_src) + be->src);
44 short *diff_ptr = be->src_diff;
45 unsigned char *pred_ptr = bd->predictor;
46 int src_stride = be->src_stride;
47 int r, c;
48
49 for (r = 0; r < 8; r++) {
50 for (c = 0; c < 8; c++) {
51 diff_ptr[c] = src_ptr[c] - pred_ptr[c];
52 }
53 diff_ptr += pitch;
54 pred_ptr += pitch;
55 src_ptr += src_stride;
56 }
57 }
58
59 void vp9_subtract_mbuv_s_c(short *diff, const unsigned char *usrc,
60 const unsigned char *vsrc, int src_stride,
61 const unsigned char *upred,
62 const unsigned char *vpred, int dst_stride) {
63 short *udiff = diff + 256;
64 short *vdiff = diff + 320;
65 int r, c;
66
67 for (r = 0; r < 8; r++) {
68 for (c = 0; c < 8; c++) {
69 udiff[c] = usrc[c] - upred[c];
70 }
71
72 udiff += 8;
73 upred += dst_stride;
74 usrc += src_stride;
75 }
76
77 for (r = 0; r < 8; r++) {
78 for (c = 0; c < 8; c++) {
79 vdiff[c] = vsrc[c] - vpred[c];
80 }
81
82 vdiff += 8;
83 vpred += dst_stride;
84 vsrc += src_stride;
85 }
86 }
87
88 void vp9_subtract_mbuv_c(short *diff, unsigned char *usrc,
89 unsigned char *vsrc, unsigned char *pred, int stride) {
90 unsigned char *upred = pred + 256;
91 unsigned char *vpred = pred + 320;
92
93 vp9_subtract_mbuv_s_c(diff, usrc, vsrc, stride, upred, vpred, 8);
94 }
95
96 void vp9_subtract_mby_s_c(short *diff, const unsigned char *src, int src_stride,
97 const unsigned char *pred, int dst_stride) {
98 int r, c;
99
100 for (r = 0; r < 16; r++) {
101 for (c = 0; c < 16; c++) {
102 diff[c] = src[c] - pred[c];
103 }
104
105 diff += 16;
106 pred += dst_stride;
107 src += src_stride;
108 }
109 }
110
111 void vp9_subtract_mby_c(short *diff, unsigned char *src,
112 unsigned char *pred, int stride) {
113 vp9_subtract_mby_s_c(diff, src, stride, pred, 16);
114 }
115
116 static void subtract_mb(MACROBLOCK *x) {
117 BLOCK *b = &x->block[0];
118
119 vp9_subtract_mby(x->src_diff, *(b->base_src), x->e_mbd.predictor,
120 b->src_stride);
121 vp9_subtract_mbuv(x->src_diff, x->src.u_buffer, x->src.v_buffer,
122 x->e_mbd.predictor, x->src.uv_stride);
123 }
124
125 static void build_dcblock_4x4(MACROBLOCK *x) {
126 short *src_diff_ptr = &x->src_diff[384];
127 int i;
128
129 for (i = 0; i < 16; i++) {
130 src_diff_ptr[i] = x->coeff[i * 16];
131 x->coeff[i * 16] = 0;
132 }
133 }
134
135 void vp9_transform_mby_4x4(MACROBLOCK *x) {
136 int i;
137 MACROBLOCKD *xd = &x->e_mbd;
138 int has_2nd_order = get_2nd_order_usage(xd);
139
140 for (i = 0; i < 16; i++) {
141 BLOCK *b = &x->block[i];
142 TX_TYPE tx_type = get_tx_type_4x4(xd, &xd->block[i]);
143 if (tx_type != DCT_DCT) {
144 assert(has_2nd_order == 0);
145 vp9_fht_c(b->src_diff, 32, b->coeff, tx_type, 4);
146 } else {
147 x->vp9_short_fdct4x4(&x->block[i].src_diff[0],
148 &x->block[i].coeff[0], 32);
149 }
150 }
151
152 if (has_2nd_order) {
153 // build dc block from 16 y dc values
154 build_dcblock_4x4(x);
155
156 // do 2nd order transform on the dc block
157 x->short_walsh4x4(&x->block[24].src_diff[0],
158 &x->block[24].coeff[0], 8);
159 } else {
160 vpx_memset(x->block[24].coeff, 0, 16 * sizeof(x->block[24].coeff[0]));
161 }
162 }
163
164 void vp9_transform_mbuv_4x4(MACROBLOCK *x) {
165 int i;
166
167 for (i = 16; i < 24; i += 2) {
168 x->vp9_short_fdct8x4(&x->block[i].src_diff[0],
169 &x->block[i].coeff[0], 16);
170 }
171 }
172
173 static void transform_mb_4x4(MACROBLOCK *x) {
174 vp9_transform_mby_4x4(x);
175 vp9_transform_mbuv_4x4(x);
176 }
177
178 static void build_dcblock_8x8(MACROBLOCK *x) {
179 int16_t *src_diff_ptr = x->block[24].src_diff;
180 int i;
181
182 for (i = 0; i < 16; i++) {
183 src_diff_ptr[i] = 0;
184 }
185 src_diff_ptr[0] = x->coeff[0 * 16];
186 src_diff_ptr[1] = x->coeff[4 * 16];
187 src_diff_ptr[4] = x->coeff[8 * 16];
188 src_diff_ptr[8] = x->coeff[12 * 16];
189 x->coeff[0 * 16] = 0;
190 x->coeff[4 * 16] = 0;
191 x->coeff[8 * 16] = 0;
192 x->coeff[12 * 16] = 0;
193 }
194
195 void vp9_transform_mby_8x8(MACROBLOCK *x) {
196 int i;
197 MACROBLOCKD *xd = &x->e_mbd;
198 TX_TYPE tx_type;
199 int has_2nd_order = get_2nd_order_usage(xd);
200
201 for (i = 0; i < 9; i += 8) {
202 BLOCK *b = &x->block[i];
203 tx_type = get_tx_type_8x8(xd, &xd->block[i]);
204 if (tx_type != DCT_DCT) {
205 assert(has_2nd_order == 0);
206 vp9_fht_c(b->src_diff, 32, b->coeff, tx_type, 8);
207 } else {
208 x->vp9_short_fdct8x8(&x->block[i].src_diff[0],
209 &x->block[i].coeff[0], 32);
210 }
211 }
212 for (i = 2; i < 11; i += 8) {
213 BLOCK *b = &x->block[i];
214 tx_type = get_tx_type_8x8(xd, &xd->block[i]);
215 if (tx_type != DCT_DCT) {
216 assert(has_2nd_order == 0);
217 vp9_fht_c(b->src_diff, 32, (b + 2)->coeff, tx_type, 8);
218 } else {
219 x->vp9_short_fdct8x8(&x->block[i].src_diff[0],
220 &x->block[i + 2].coeff[0], 32);
221 }
222 }
223
224 if (has_2nd_order) {
225 // build dc block from 2x2 y dc values
226 build_dcblock_8x8(x);
227
228 // do 2nd order transform on the dc block
229 x->short_fhaar2x2(&x->block[24].src_diff[0],
230 &x->block[24].coeff[0], 8);
231 } else {
232 vpx_memset(x->block[24].coeff, 0, 16 * sizeof(x->block[24].coeff[0]));
233 }
234 }
235
236 void vp9_transform_mbuv_8x8(MACROBLOCK *x) {
237 int i;
238
239 for (i = 16; i < 24; i += 4) {
240 x->vp9_short_fdct8x8(&x->block[i].src_diff[0],
241 &x->block[i].coeff[0], 16);
242 }
243 }
244
245 void vp9_transform_mb_8x8(MACROBLOCK *x) {
246 vp9_transform_mby_8x8(x);
247 vp9_transform_mbuv_8x8(x);
248 }
249
250 void vp9_transform_mby_16x16(MACROBLOCK *x) {
251 MACROBLOCKD *xd = &x->e_mbd;
252 BLOCK *b = &x->block[0];
253 TX_TYPE tx_type = get_tx_type_16x16(xd, &xd->block[0]);
254 vp9_clear_system_state();
255 if (tx_type != DCT_DCT) {
256 vp9_fht_c(b->src_diff, 32, b->coeff, tx_type, 16);
257 } else {
258 x->vp9_short_fdct16x16(&x->block[0].src_diff[0],
259 &x->block[0].coeff[0], 32);
260 }
261 }
262
263 void vp9_transform_mb_16x16(MACROBLOCK *x) {
264 vp9_transform_mby_16x16(x);
265 vp9_transform_mbuv_8x8(x);
266 }
267
268 #define RDTRUNC(RM,DM,R,D) ( (128+(R)*(RM)) & 0xFF )
269 #define RDTRUNC_8x8(RM,DM,R,D) ( (128+(R)*(RM)) & 0xFF )
270 typedef struct vp9_token_state vp9_token_state;
271
272 struct vp9_token_state {
273 int rate;
274 int error;
275 int next;
276 signed char token;
277 short qc;
278 };
279
280 // TODO: experiments to find optimal multiple numbers
281 #define Y1_RD_MULT 4
282 #define UV_RD_MULT 2
283 #define Y2_RD_MULT 4
284
285 static const int plane_rd_mult[4] = {
286 Y1_RD_MULT,
287 Y2_RD_MULT,
288 UV_RD_MULT,
289 Y1_RD_MULT
290 };
291
292 #define UPDATE_RD_COST()\
293 {\
294 rd_cost0 = RDCOST(rdmult, rddiv, rate0, error0);\
295 rd_cost1 = RDCOST(rdmult, rddiv, rate1, error1);\
296 if (rd_cost0 == rd_cost1) {\
297 rd_cost0 = RDTRUNC(rdmult, rddiv, rate0, error0);\
298 rd_cost1 = RDTRUNC(rdmult, rddiv, rate1, error1);\
299 }\
300 }
301
302 static void optimize_b(MACROBLOCK *mb, int i, PLANE_TYPE type,
303 ENTROPY_CONTEXT *a, ENTROPY_CONTEXT *l,
304 int tx_size) {
305 BLOCK *b;
306 BLOCKD *d;
307 vp9_token_state tokens[65][2];
308 uint64_t best_mask[2];
309 const short *dequant_ptr;
310 const short *coeff_ptr;
311 short *qcoeff_ptr;
312 short *dqcoeff_ptr;
313 int eob;
314 int i0;
315 int rc;
316 int x;
317 int sz = 0;
318 int next;
319 int rdmult;
320 int rddiv;
321 int final_eob;
322 int64_t rd_cost0, rd_cost1;
323 int rate0, rate1;
324 int error0, error1;
325 int t0, t1;
326 int best;
327 int band;
328 int pt;
329 int err_mult = plane_rd_mult[type];
330 int default_eob;
331 int const *scan, *bands;
332
333 b = &mb->block[i];
334 d = &mb->e_mbd.block[i];
335 switch (tx_size) {
336 default:
337 case TX_4X4:
338 scan = vp9_default_zig_zag1d;
339 bands = vp9_coef_bands;
340 default_eob = 16;
341 // TODO: this isn't called (for intra4x4 modes), but will be left in
342 // since it could be used later
343 {
344 TX_TYPE tx_type = get_tx_type_4x4(&mb->e_mbd, d);
345 if (tx_type != DCT_DCT) {
346 switch (tx_type) {
347 case ADST_DCT:
348 scan = vp9_row_scan;
349 break;
350
351 case DCT_ADST:
352 scan = vp9_col_scan;
353 break;
354
355 default:
356 scan = vp9_default_zig_zag1d;
357 break;
358 }
359 } else {
360 scan = vp9_default_zig_zag1d;
361 }
362 }
363 break;
364 case TX_8X8:
365 scan = vp9_default_zig_zag1d_8x8;
366 bands = vp9_coef_bands_8x8;
367 default_eob = 64;
368 break;
369 }
370
371 dequant_ptr = d->dequant;
372 coeff_ptr = b->coeff;
373 qcoeff_ptr = d->qcoeff;
374 dqcoeff_ptr = d->dqcoeff;
375 i0 = (type == PLANE_TYPE_Y_NO_DC);
376 eob = d->eob;
377
378 /* Now set up a Viterbi trellis to evaluate alternative roundings. */
379 rdmult = mb->rdmult * err_mult;
380 if (mb->e_mbd.mode_info_context->mbmi.ref_frame == INTRA_FRAME)
381 rdmult = (rdmult * 9) >> 4;
382 rddiv = mb->rddiv;
383 best_mask[0] = best_mask[1] = 0;
384 /* Initialize the sentinel node of the trellis. */
385 tokens[eob][0].rate = 0;
386 tokens[eob][0].error = 0;
387 tokens[eob][0].next = default_eob;
388 tokens[eob][0].token = DCT_EOB_TOKEN;
389 tokens[eob][0].qc = 0;
390 *(tokens[eob] + 1) = *(tokens[eob] + 0);
391 next = eob;
392 for (i = eob; i-- > i0;) {
393 int base_bits;
394 int d2;
395 int dx;
396
397 rc = scan[i];
398 x = qcoeff_ptr[rc];
399 /* Only add a trellis state for non-zero coefficients. */
400 if (x) {
401 int shortcut = 0;
402 error0 = tokens[next][0].error;
403 error1 = tokens[next][1].error;
404 /* Evaluate the first possibility for this state. */
405 rate0 = tokens[next][0].rate;
406 rate1 = tokens[next][1].rate;
407 t0 = (vp9_dct_value_tokens_ptr + x)->Token;
408 /* Consider both possible successor states. */
409 if (next < default_eob) {
410 band = bands[i + 1];
411 pt = vp9_prev_token_class[t0];
412 rate0 +=
413 mb->token_costs[tx_size][type][band][pt][tokens[next][0].token];
414 rate1 +=
415 mb->token_costs[tx_size][type][band][pt][tokens[next][1].token];
416 }
417 UPDATE_RD_COST();
418 /* And pick the best. */
419 best = rd_cost1 < rd_cost0;
420 base_bits = *(vp9_dct_value_cost_ptr + x);
421 dx = dqcoeff_ptr[rc] - coeff_ptr[rc];
422 d2 = dx * dx;
423 tokens[i][0].rate = base_bits + (best ? rate1 : rate0);
424 tokens[i][0].error = d2 + (best ? error1 : error0);
425 tokens[i][0].next = next;
426 tokens[i][0].token = t0;
427 tokens[i][0].qc = x;
428 best_mask[0] |= best << i;
429 /* Evaluate the second possibility for this state. */
430 rate0 = tokens[next][0].rate;
431 rate1 = tokens[next][1].rate;
432
433 if ((abs(x)*dequant_ptr[rc != 0] > abs(coeff_ptr[rc])) &&
434 (abs(x)*dequant_ptr[rc != 0] < abs(coeff_ptr[rc]) + dequant_ptr[rc != 0]))
435 shortcut = 1;
436 else
437 shortcut = 0;
438
439 if (shortcut) {
440 sz = -(x < 0);
441 x -= 2 * sz + 1;
442 }
443
444 /* Consider both possible successor states. */
445 if (!x) {
446 /* If we reduced this coefficient to zero, check to see if
447 * we need to move the EOB back here.
448 */
449 t0 = tokens[next][0].token == DCT_EOB_TOKEN ?
450 DCT_EOB_TOKEN : ZERO_TOKEN;
451 t1 = tokens[next][1].token == DCT_EOB_TOKEN ?
452 DCT_EOB_TOKEN : ZERO_TOKEN;
453 } else {
454 t0 = t1 = (vp9_dct_value_tokens_ptr + x)->Token;
455 }
456 if (next < default_eob) {
457 band = bands[i + 1];
458 if (t0 != DCT_EOB_TOKEN) {
459 pt = vp9_prev_token_class[t0];
460 rate0 += mb->token_costs[tx_size][type][band][pt][
461 tokens[next][0].token];
462 }
463 if (t1 != DCT_EOB_TOKEN) {
464 pt = vp9_prev_token_class[t1];
465 rate1 += mb->token_costs[tx_size][type][band][pt][
466 tokens[next][1].token];
467 }
468 }
469
470 UPDATE_RD_COST();
471 /* And pick the best. */
472 best = rd_cost1 < rd_cost0;
473 base_bits = *(vp9_dct_value_cost_ptr + x);
474
475 if (shortcut) {
476 dx -= (dequant_ptr[rc != 0] + sz) ^ sz;
477 d2 = dx * dx;
478 }
479 tokens[i][1].rate = base_bits + (best ? rate1 : rate0);
480 tokens[i][1].error = d2 + (best ? error1 : error0);
481 tokens[i][1].next = next;
482 tokens[i][1].token = best ? t1 : t0;
483 tokens[i][1].qc = x;
484 best_mask[1] |= best << i;
485 /* Finally, make this the new head of the trellis. */
486 next = i;
487 }
488 /* There's no choice to make for a zero coefficient, so we don't
489 * add a new trellis node, but we do need to update the costs.
490 */
491 else {
492 band = bands[i + 1];
493 t0 = tokens[next][0].token;
494 t1 = tokens[next][1].token;
495 /* Update the cost of each path if we're past the EOB token. */
496 if (t0 != DCT_EOB_TOKEN) {
497 tokens[next][0].rate += mb->token_costs[tx_size][type][band][0][t0];
498 tokens[next][0].token = ZERO_TOKEN;
499 }
500 if (t1 != DCT_EOB_TOKEN) {
501 tokens[next][1].rate += mb->token_costs[tx_size][type][band][0][t1];
502 tokens[next][1].token = ZERO_TOKEN;
503 }
504 /* Don't update next, because we didn't add a new node. */
505 }
506 }
507
508 /* Now pick the best path through the whole trellis. */
509 band = bands[i + 1];
510 VP9_COMBINEENTROPYCONTEXTS(pt, *a, *l);
511 rate0 = tokens[next][0].rate;
512 rate1 = tokens[next][1].rate;
513 error0 = tokens[next][0].error;
514 error1 = tokens[next][1].error;
515 t0 = tokens[next][0].token;
516 t1 = tokens[next][1].token;
517 rate0 += mb->token_costs[tx_size][type][band][pt][t0];
518 rate1 += mb->token_costs[tx_size][type][band][pt][t1];
519 UPDATE_RD_COST();
520 best = rd_cost1 < rd_cost0;
521 final_eob = i0 - 1;
522 for (i = next; i < eob; i = next) {
523 x = tokens[i][best].qc;
524 if (x)
525 final_eob = i;
526 rc = scan[i];
527 qcoeff_ptr[rc] = x;
528 dqcoeff_ptr[rc] = (x * dequant_ptr[rc != 0]);
529
530 next = tokens[i][best].next;
531 best = (best_mask[best] >> i) & 1;
532 }
533 final_eob++;
534
535 d->eob = final_eob;
536 *a = *l = (d->eob > !type);
537 }
538
539 /**************************************************************************
540 our inverse hadamard transform effectively is weighted sum of all 16 inputs
541 with weight either 1 or -1. It has a last stage scaling of (sum+1)>>2. And
542 dc only idct is (dc+16)>>5. So if all the sums are between -65 and 63 the
543 output after inverse wht and idct will be all zero. A sum of absolute value
544 smaller than 65 guarantees all 16 different (+1/-1) weighted sums in wht
545 fall between -65 and +65.
546 **************************************************************************/
547 #define SUM_2ND_COEFF_THRESH 65
548
549 static void check_reset_2nd_coeffs(MACROBLOCKD *xd,
550 ENTROPY_CONTEXT *a, ENTROPY_CONTEXT *l) {
551 int sum = 0;
552 int i;
553 BLOCKD *bd = &xd->block[24];
554 if (bd->dequant[0] >= SUM_2ND_COEFF_THRESH
555 && bd->dequant[1] >= SUM_2ND_COEFF_THRESH)
556 return;
557
558 for (i = 0; i < bd->eob; i++) {
559 int coef = bd->dqcoeff[vp9_default_zig_zag1d[i]];
560 sum += (coef >= 0) ? coef : -coef;
561 if (sum >= SUM_2ND_COEFF_THRESH)
562 return;
563 }
564
565 if (sum < SUM_2ND_COEFF_THRESH) {
566 for (i = 0; i < bd->eob; i++) {
567 int rc = vp9_default_zig_zag1d[i];
568 bd->qcoeff[rc] = 0;
569 bd->dqcoeff[rc] = 0;
570 }
571 bd->eob = 0;
572 *a = *l = (bd->eob != 0);
573 }
574 }
575
576 #define SUM_2ND_COEFF_THRESH_8X8 32
577 static void check_reset_8x8_2nd_coeffs(MACROBLOCKD *xd,
578 ENTROPY_CONTEXT *a, ENTROPY_CONTEXT *l) {
579 int sum = 0;
580 BLOCKD *bd = &xd->block[24];
581 int coef;
582
583 coef = bd->dqcoeff[0];
584 sum += (coef >= 0) ? coef : -coef;
585 coef = bd->dqcoeff[1];
586 sum += (coef >= 0) ? coef : -coef;
587 coef = bd->dqcoeff[4];
588 sum += (coef >= 0) ? coef : -coef;
589 coef = bd->dqcoeff[8];
590 sum += (coef >= 0) ? coef : -coef;
591
592 if (sum < SUM_2ND_COEFF_THRESH_8X8) {
593 bd->qcoeff[0] = 0;
594 bd->dqcoeff[0] = 0;
595 bd->qcoeff[1] = 0;
596 bd->dqcoeff[1] = 0;
597 bd->qcoeff[4] = 0;
598 bd->dqcoeff[4] = 0;
599 bd->qcoeff[8] = 0;
600 bd->dqcoeff[8] = 0;
601 bd->eob = 0;
602 *a = *l = (bd->eob != 0);
603 }
604 }
605
606 void vp9_optimize_mby_4x4(MACROBLOCK *x) {
607 int b;
608 PLANE_TYPE type;
609 int has_2nd_order;
610 ENTROPY_CONTEXT_PLANES t_above, t_left;
611 ENTROPY_CONTEXT *ta;
612 ENTROPY_CONTEXT *tl;
613
614 if (!x->e_mbd.above_context || !x->e_mbd.left_context)
615 return;
616
617 vpx_memcpy(&t_above, x->e_mbd.above_context, sizeof(ENTROPY_CONTEXT_PLANES));
618 vpx_memcpy(&t_left, x->e_mbd.left_context, sizeof(ENTROPY_CONTEXT_PLANES));
619
620 ta = (ENTROPY_CONTEXT *)&t_above;
621 tl = (ENTROPY_CONTEXT *)&t_left;
622
623 has_2nd_order = get_2nd_order_usage(&x->e_mbd);
624
625 type = has_2nd_order ? PLANE_TYPE_Y_NO_DC : PLANE_TYPE_Y_WITH_DC;
626
627 for (b = 0; b < 16; b++) {
628 optimize_b(x, b, type,
629 ta + vp9_block2above[b], tl + vp9_block2left[b], TX_4X4);
630 }
631
632 if (has_2nd_order) {
633 b = 24;
634 optimize_b(x, b, PLANE_TYPE_Y2,
635 ta + vp9_block2above[b], tl + vp9_block2left[b], TX_4X4);
636 check_reset_2nd_coeffs(&x->e_mbd,
637 ta + vp9_block2above[b], tl + vp9_block2left[b]);
638 }
639 }
640
641 void vp9_optimize_mbuv_4x4(MACROBLOCK *x) {
642 int b;
643 ENTROPY_CONTEXT_PLANES t_above, t_left;
644 ENTROPY_CONTEXT *ta;
645 ENTROPY_CONTEXT *tl;
646
647 if (!x->e_mbd.above_context || !x->e_mbd.left_context)
648 return;
649
650 vpx_memcpy(&t_above, x->e_mbd.above_context, sizeof(ENTROPY_CONTEXT_PLANES));
651 vpx_memcpy(&t_left, x->e_mbd.left_context, sizeof(ENTROPY_CONTEXT_PLANES));
652
653 ta = (ENTROPY_CONTEXT *)&t_above;
654 tl = (ENTROPY_CONTEXT *)&t_left;
655
656 for (b = 16; b < 24; b++) {
657 optimize_b(x, b, PLANE_TYPE_UV,
658 ta + vp9_block2above[b], tl + vp9_block2left[b], TX_4X4);
659 }
660 }
661
662 static void optimize_mb_4x4(MACROBLOCK *x) {
663 vp9_optimize_mby_4x4(x);
664 vp9_optimize_mbuv_4x4(x);
665 }
666
667 void vp9_optimize_mby_8x8(MACROBLOCK *x) {
668 int b;
669 PLANE_TYPE type;
670 ENTROPY_CONTEXT_PLANES t_above, t_left;
671 ENTROPY_CONTEXT *ta;
672 ENTROPY_CONTEXT *tl;
673 int has_2nd_order = get_2nd_order_usage(&x->e_mbd);
674
675 if (!x->e_mbd.above_context || !x->e_mbd.left_context)
676 return;
677
678 vpx_memcpy(&t_above, x->e_mbd.above_context, sizeof(ENTROPY_CONTEXT_PLANES));
679 vpx_memcpy(&t_left, x->e_mbd.left_context, sizeof(ENTROPY_CONTEXT_PLANES));
680
681 ta = (ENTROPY_CONTEXT *)&t_above;
682 tl = (ENTROPY_CONTEXT *)&t_left;
683 type = has_2nd_order ? PLANE_TYPE_Y_NO_DC : PLANE_TYPE_Y_WITH_DC;
684 for (b = 0; b < 16; b += 4) {
685 optimize_b(x, b, type,
686 ta + vp9_block2above_8x8[b], tl + vp9_block2left_8x8[b],
687 TX_8X8);
688 ta[vp9_block2above_8x8[b] + 1] = ta[vp9_block2above_8x8[b]];
689 tl[vp9_block2left_8x8[b] + 1] = tl[vp9_block2left_8x8[b]];
690 }
691
692 // 8x8 always have 2nd roder haar block
693 if (has_2nd_order) {
694 check_reset_8x8_2nd_coeffs(&x->e_mbd,
695 ta + vp9_block2above_8x8[24],
696 tl + vp9_block2left_8x8[24]);
697 }
698 }
699
700 void vp9_optimize_mbuv_8x8(MACROBLOCK *x) {
701 int b;
702 ENTROPY_CONTEXT_PLANES t_above, t_left;
703 ENTROPY_CONTEXT *ta;
704 ENTROPY_CONTEXT *tl;
705
706 if (!x->e_mbd.above_context || !x->e_mbd.left_context)
707 return;
708
709 vpx_memcpy(&t_above, x->e_mbd.above_context, sizeof(ENTROPY_CONTEXT_PLANES));
710 vpx_memcpy(&t_left, x->e_mbd.left_context, sizeof(ENTROPY_CONTEXT_PLANES));
711
712 ta = (ENTROPY_CONTEXT *)&t_above;
713 tl = (ENTROPY_CONTEXT *)&t_left;
714
715 for (b = 16; b < 24; b += 4) {
716 optimize_b(x, b, PLANE_TYPE_UV,
717 ta + vp9_block2above_8x8[b], tl + vp9_block2left_8x8[b],
718 TX_8X8);
719 ta[vp9_block2above_8x8[b] + 1] = ta[vp9_block2above_8x8[b]];
720 tl[vp9_block2left_8x8[b] + 1] = tl[vp9_block2left_8x8[b]];
721 }
722 }
723
724 static void optimize_mb_8x8(MACROBLOCK *x) {
725 vp9_optimize_mby_8x8(x);
726 vp9_optimize_mbuv_8x8(x);
727 }
728
729 static void optimize_b_16x16(MACROBLOCK *mb, int i, PLANE_TYPE type,
730 ENTROPY_CONTEXT *a, ENTROPY_CONTEXT *l) {
731 BLOCK *b = &mb->block[i];
732 BLOCKD *d = &mb->e_mbd.block[i];
733 vp9_token_state tokens[257][2];
734 unsigned best_index[257][2];
735 const short *dequant_ptr = d->dequant, *coeff_ptr = b->coeff;
736 short *qcoeff_ptr = qcoeff_ptr = d->qcoeff;
737 short *dqcoeff_ptr = dqcoeff_ptr = d->dqcoeff;
738 int eob = d->eob, final_eob, sz = 0;
739 int rc, x, next;
740 int64_t rdmult, rddiv, rd_cost0, rd_cost1;
741 int rate0, rate1, error0, error1, t0, t1;
742 int best, band, pt;
743 int err_mult = plane_rd_mult[type];
744
745 /* Now set up a Viterbi trellis to evaluate alternative roundings. */
746 rdmult = mb->rdmult * err_mult;
747 if (mb->e_mbd.mode_info_context->mbmi.ref_frame == INTRA_FRAME)
748 rdmult = (rdmult * 9)>>4;
749 rddiv = mb->rddiv;
750 memset(best_index, 0, sizeof(best_index));
751 /* Initialize the sentinel node of the trellis. */
752 tokens[eob][0].rate = 0;
753 tokens[eob][0].error = 0;
754 tokens[eob][0].next = 256;
755 tokens[eob][0].token = DCT_EOB_TOKEN;
756 tokens[eob][0].qc = 0;
757 *(tokens[eob] + 1) = *(tokens[eob] + 0);
758 next = eob;
759 for (i = eob; i-- > 0;) {
760 int base_bits, d2, dx;
761
762 rc = vp9_default_zig_zag1d_16x16[i];
763 x = qcoeff_ptr[rc];
764 /* Only add a trellis state for non-zero coefficients. */
765 if (x) {
766 int shortcut = 0;
767 error0 = tokens[next][0].error;
768 error1 = tokens[next][1].error;
769 /* Evaluate the first possibility for this state. */
770 rate0 = tokens[next][0].rate;
771 rate1 = tokens[next][1].rate;
772 t0 = (vp9_dct_value_tokens_ptr + x)->Token;
773 /* Consider both possible successor states. */
774 if (next < 256) {
775 band = vp9_coef_bands_16x16[i + 1];
776 pt = vp9_prev_token_class[t0];
777 rate0 += mb->token_costs[TX_16X16][type][band][pt][tokens[next][0].token ];
778 rate1 += mb->token_costs[TX_16X16][type][band][pt][tokens[next][1].token ];
779 }
780 UPDATE_RD_COST();
781 /* And pick the best. */
782 best = rd_cost1 < rd_cost0;
783 base_bits = *(vp9_dct_value_cost_ptr + x);
784 dx = dqcoeff_ptr[rc] - coeff_ptr[rc];
785 d2 = dx*dx;
786 tokens[i][0].rate = base_bits + (best ? rate1 : rate0);
787 tokens[i][0].error = d2 + (best ? error1 : error0);
788 tokens[i][0].next = next;
789 tokens[i][0].token = t0;
790 tokens[i][0].qc = x;
791 best_index[i][0] = best;
792 /* Evaluate the second possibility for this state. */
793 rate0 = tokens[next][0].rate;
794 rate1 = tokens[next][1].rate;
795
796 if((abs(x)*dequant_ptr[rc!=0]>abs(coeff_ptr[rc])) &&
797 (abs(x)*dequant_ptr[rc!=0]<abs(coeff_ptr[rc])+dequant_ptr[rc!=0]))
798 shortcut = 1;
799 else
800 shortcut = 0;
801
802 if (shortcut) {
803 sz = -(x < 0);
804 x -= 2*sz + 1;
805 }
806
807 /* Consider both possible successor states. */
808 if (!x) {
809 /* If we reduced this coefficient to zero, check to see if
810 * we need to move the EOB back here.
811 */
812 t0 = tokens[next][0].token == DCT_EOB_TOKEN ?
813 DCT_EOB_TOKEN : ZERO_TOKEN;
814 t1 = tokens[next][1].token == DCT_EOB_TOKEN ?
815 DCT_EOB_TOKEN : ZERO_TOKEN;
816 }
817 else
818 t0=t1 = (vp9_dct_value_tokens_ptr + x)->Token;
819 if (next < 256) {
820 band = vp9_coef_bands_16x16[i + 1];
821 if (t0 != DCT_EOB_TOKEN) {
822 pt = vp9_prev_token_class[t0];
823 rate0 += mb->token_costs[TX_16X16][type][band][pt]
824 [tokens[next][0].token];
825 }
826 if (t1!=DCT_EOB_TOKEN) {
827 pt = vp9_prev_token_class[t1];
828 rate1 += mb->token_costs[TX_16X16][type][band][pt]
829 [tokens[next][1].token];
830 }
831 }
832 UPDATE_RD_COST();
833 /* And pick the best. */
834 best = rd_cost1 < rd_cost0;
835 base_bits = *(vp9_dct_value_cost_ptr + x);
836
837 if(shortcut) {
838 dx -= (dequant_ptr[rc!=0] + sz) ^ sz;
839 d2 = dx*dx;
840 }
841 tokens[i][1].rate = base_bits + (best ? rate1 : rate0);
842 tokens[i][1].error = d2 + (best ? error1 : error0);
843 tokens[i][1].next = next;
844 tokens[i][1].token = best ? t1 : t0;
845 tokens[i][1].qc = x;
846 best_index[i][1] = best;
847 /* Finally, make this the new head of the trellis. */
848 next = i;
849 }
850 /* There's no choice to make for a zero coefficient, so we don't
851 * add a new trellis node, but we do need to update the costs.
852 */
853 else {
854 band = vp9_coef_bands_16x16[i + 1];
855 t0 = tokens[next][0].token;
856 t1 = tokens[next][1].token;
857 /* Update the cost of each path if we're past the EOB token. */
858 if (t0 != DCT_EOB_TOKEN) {
859 tokens[next][0].rate += mb->token_costs[TX_16X16][type][band][0][t0];
860 tokens[next][0].token = ZERO_TOKEN;
861 }
862 if (t1 != DCT_EOB_TOKEN) {
863 tokens[next][1].rate += mb->token_costs[TX_16X16][type][band][0][t1];
864 tokens[next][1].token = ZERO_TOKEN;
865 }
866 /* Don't update next, because we didn't add a new node. */
867 }
868 }
869
870 /* Now pick the best path through the whole trellis. */
871 band = vp9_coef_bands_16x16[i + 1];
872 VP9_COMBINEENTROPYCONTEXTS(pt, *a, *l);
873 rate0 = tokens[next][0].rate;
874 rate1 = tokens[next][1].rate;
875 error0 = tokens[next][0].error;
876 error1 = tokens[next][1].error;
877 t0 = tokens[next][0].token;
878 t1 = tokens[next][1].token;
879 rate0 += mb->token_costs[TX_16X16][type][band][pt][t0];
880 rate1 += mb->token_costs[TX_16X16][type][band][pt][t1];
881 UPDATE_RD_COST();
882 best = rd_cost1 < rd_cost0;
883 final_eob = -1;
884
885 for (i = next; i < eob; i = next) {
886 x = tokens[i][best].qc;
887 if (x)
888 final_eob = i;
889 rc = vp9_default_zig_zag1d_16x16[i];
890 qcoeff_ptr[rc] = x;
891 dqcoeff_ptr[rc] = (x * dequant_ptr[rc!=0]);
892
893 next = tokens[i][best].next;
894 best = best_index[i][best];
895 }
896 final_eob++;
897
898 d->eob = final_eob;
899 *a = *l = (d->eob > !type);
900 }
901
902 void vp9_optimize_mby_16x16(MACROBLOCK *x) {
903 ENTROPY_CONTEXT_PLANES t_above, t_left;
904 ENTROPY_CONTEXT *ta, *tl;
905
906 if (!x->e_mbd.above_context || !x->e_mbd.left_context)
907 return;
908
909 vpx_memcpy(&t_above, x->e_mbd.above_context, sizeof(ENTROPY_CONTEXT_PLANES));
910 vpx_memcpy(&t_left, x->e_mbd.left_context, sizeof(ENTROPY_CONTEXT_PLANES));
911
912 ta = (ENTROPY_CONTEXT *)&t_above;
913 tl = (ENTROPY_CONTEXT *)&t_left;
914 optimize_b_16x16(x, 0, PLANE_TYPE_Y_WITH_DC, ta, tl);
915 }
916
917 static void optimize_mb_16x16(MACROBLOCK *x) {
918 vp9_optimize_mby_16x16(x);
919 vp9_optimize_mbuv_8x8(x);
920 }
921
922 void vp9_fidct_mb(MACROBLOCK *x) {
923 MACROBLOCKD *const xd = &x->e_mbd;
924 TX_SIZE tx_size = xd->mode_info_context->mbmi.txfm_size;
925
926 if (tx_size == TX_16X16) {
927 vp9_transform_mb_16x16(x);
928 vp9_quantize_mb_16x16(x);
929 if (x->optimize)
930 optimize_mb_16x16(x);
931 vp9_inverse_transform_mb_16x16(xd);
932 } else if (tx_size == TX_8X8) {
933 if (xd->mode_info_context->mbmi.mode == SPLITMV) {
934 assert(xd->mode_info_context->mbmi.partitioning != PARTITIONING_4X4);
935 vp9_transform_mby_8x8(x);
936 vp9_transform_mbuv_4x4(x);
937 vp9_quantize_mby_8x8(x);
938 vp9_quantize_mbuv_4x4(x);
939 if (x->optimize) {
940 vp9_optimize_mby_8x8(x);
941 vp9_optimize_mbuv_4x4(x);
942 }
943 vp9_inverse_transform_mby_8x8(xd);
944 vp9_inverse_transform_mbuv_4x4(xd);
945 } else {
946 vp9_transform_mb_8x8(x);
947 vp9_quantize_mb_8x8(x);
948 if (x->optimize)
949 optimize_mb_8x8(x);
950 vp9_inverse_transform_mb_8x8(xd);
951 }
952 } else {
953 transform_mb_4x4(x);
954 vp9_quantize_mb_4x4(x);
955 if (x->optimize)
956 optimize_mb_4x4(x);
957 vp9_inverse_transform_mb_4x4(xd);
958 }
959 }
960
961 void vp9_encode_inter16x16(MACROBLOCK *x) {
962 MACROBLOCKD *const xd = &x->e_mbd;
963
964 vp9_build_inter_predictors_mb(xd);
965 subtract_mb(x);
966 vp9_fidct_mb(x);
967 vp9_recon_mb(xd);
968 }
969
970 /* this function is used by first pass only */
971 void vp9_encode_inter16x16y(MACROBLOCK *x) {
972 MACROBLOCKD *xd = &x->e_mbd;
973 BLOCK *b = &x->block[0];
974
975 #if CONFIG_PRED_FILTER
976 // Disable the prediction filter for firstpass
977 xd->mode_info_context->mbmi.pred_filter_enabled = 0;
978 #endif
979
980 vp9_build_1st_inter16x16_predictors_mby(xd, xd->predictor, 16, 0);
981
982 vp9_subtract_mby(x->src_diff, *(b->base_src), xd->predictor, b->src_stride);
983
984 vp9_transform_mby_4x4(x);
985 vp9_quantize_mby_4x4(x);
986 vp9_inverse_transform_mby_4x4(xd);
987
988 vp9_recon_mby(xd);
989 }
OLDNEW

Powered by Google App Engine
This is Rietveld 408576698