OLD | NEW |
1 // Copyright 2011 Google Inc. All Rights Reserved. | 1 // Copyright 2011 Google Inc. All Rights Reserved. |
2 // | 2 // |
3 // Use of this source code is governed by a BSD-style license | 3 // Use of this source code is governed by a BSD-style license |
4 // that can be found in the COPYING file in the root of the source | 4 // that can be found in the COPYING file in the root of the source |
5 // tree. An additional intellectual property rights grant can be found | 5 // tree. An additional intellectual property rights grant can be found |
6 // in the file PATENTS. All contributing project authors may | 6 // in the file PATENTS. All contributing project authors may |
7 // be found in the AUTHORS file in the root of the source tree. | 7 // be found in the AUTHORS file in the root of the source tree. |
8 // ----------------------------------------------------------------------------- | 8 // ----------------------------------------------------------------------------- |
9 // | 9 // |
10 // Quantization | 10 // Quantization |
(...skipping 12 matching lines...) Expand all Loading... |
23 #define DO_TRELLIS_UV 0 // disable trellis for UV. Risky. Not worth. | 23 #define DO_TRELLIS_UV 0 // disable trellis for UV. Risky. Not worth. |
24 #define USE_TDISTO 1 | 24 #define USE_TDISTO 1 |
25 | 25 |
26 #define MID_ALPHA 64 // neutral value for susceptibility | 26 #define MID_ALPHA 64 // neutral value for susceptibility |
27 #define MIN_ALPHA 30 // lowest usable value for susceptibility | 27 #define MIN_ALPHA 30 // lowest usable value for susceptibility |
28 #define MAX_ALPHA 100 // higher meaningful value for susceptibility | 28 #define MAX_ALPHA 100 // higher meaningful value for susceptibility |
29 | 29 |
30 #define SNS_TO_DQ 0.9 // Scaling constant between the sns value and the QP | 30 #define SNS_TO_DQ 0.9 // Scaling constant between the sns value and the QP |
31 // power-law modulation. Must be strictly less than 1. | 31 // power-law modulation. Must be strictly less than 1. |
32 | 32 |
33 #define I4_PENALTY 4000 // Rate-penalty for quick i4/i16 decision | 33 #define I4_PENALTY 14000 // Rate-penalty for quick i4/i16 decision |
34 | 34 |
35 // number of non-zero coeffs below which we consider the block very flat | 35 // number of non-zero coeffs below which we consider the block very flat |
36 // (and apply a penalty to complex predictions) | 36 // (and apply a penalty to complex predictions) |
37 #define FLATNESS_LIMIT_I16 10 // I16 mode | 37 #define FLATNESS_LIMIT_I16 10 // I16 mode |
38 #define FLATNESS_LIMIT_I4 3 // I4 mode | 38 #define FLATNESS_LIMIT_I4 3 // I4 mode |
39 #define FLATNESS_LIMIT_UV 2 // UV mode | 39 #define FLATNESS_LIMIT_UV 2 // UV mode |
40 #define FLATNESS_PENALTY 140 // roughly ~1bit per block | 40 #define FLATNESS_PENALTY 140 // roughly ~1bit per block |
41 | 41 |
42 #define MULT_8B(a, b) (((a) * (b) + 128) >> 8) | 42 #define MULT_8B(a, b) (((a) * (b) + 128) >> 8) |
43 | 43 |
| 44 #define RD_DISTO_MULT 256 // distortion multiplier (equivalent of lambda) |
| 45 |
44 // #define DEBUG_BLOCK | 46 // #define DEBUG_BLOCK |
45 | 47 |
46 //------------------------------------------------------------------------------ | 48 //------------------------------------------------------------------------------ |
47 | 49 |
48 #if defined(DEBUG_BLOCK) | 50 #if defined(DEBUG_BLOCK) |
49 | 51 |
50 #include <stdio.h> | 52 #include <stdio.h> |
51 #include <stdlib.h> | 53 #include <stdlib.h> |
52 | 54 |
53 static void PrintBlockInfo(const VP8EncIterator* const it, | 55 static void PrintBlockInfo(const VP8EncIterator* const it, |
54 const VP8ModeScore* const rd) { | 56 const VP8ModeScore* const rd) { |
55 int i, j; | 57 int i, j; |
56 const int is_i16 = (it->mb_->type_ == 1); | 58 const int is_i16 = (it->mb_->type_ == 1); |
| 59 const uint8_t* const y_in = it->yuv_in_ + Y_OFF_ENC; |
| 60 const uint8_t* const y_out = it->yuv_out_ + Y_OFF_ENC; |
| 61 const uint8_t* const uv_in = it->yuv_in_ + U_OFF_ENC; |
| 62 const uint8_t* const uv_out = it->yuv_out_ + U_OFF_ENC; |
57 printf("SOURCE / OUTPUT / ABS DELTA\n"); | 63 printf("SOURCE / OUTPUT / ABS DELTA\n"); |
58 for (j = 0; j < 24; ++j) { | 64 for (j = 0; j < 16; ++j) { |
59 if (j == 16) printf("\n"); // newline before the U/V block | 65 for (i = 0; i < 16; ++i) printf("%3d ", y_in[i + j * BPS]); |
60 for (i = 0; i < 16; ++i) printf("%3d ", it->yuv_in_[i + j * BPS]); | |
61 printf(" "); | 66 printf(" "); |
62 for (i = 0; i < 16; ++i) printf("%3d ", it->yuv_out_[i + j * BPS]); | 67 for (i = 0; i < 16; ++i) printf("%3d ", y_out[i + j * BPS]); |
63 printf(" "); | 68 printf(" "); |
64 for (i = 0; i < 16; ++i) { | 69 for (i = 0; i < 16; ++i) { |
65 printf("%1d ", abs(it->yuv_out_[i + j * BPS] - it->yuv_in_[i + j * BPS])); | 70 printf("%1d ", abs(y_in[i + j * BPS] - y_out[i + j * BPS])); |
66 } | 71 } |
67 printf("\n"); | 72 printf("\n"); |
68 } | 73 } |
| 74 printf("\n"); // newline before the U/V block |
| 75 for (j = 0; j < 8; ++j) { |
| 76 for (i = 0; i < 8; ++i) printf("%3d ", uv_in[i + j * BPS]); |
| 77 printf(" "); |
| 78 for (i = 8; i < 16; ++i) printf("%3d ", uv_in[i + j * BPS]); |
| 79 printf(" "); |
| 80 for (i = 0; i < 8; ++i) printf("%3d ", uv_out[i + j * BPS]); |
| 81 printf(" "); |
| 82 for (i = 8; i < 16; ++i) printf("%3d ", uv_out[i + j * BPS]); |
| 83 printf(" "); |
| 84 for (i = 0; i < 8; ++i) { |
| 85 printf("%1d ", abs(uv_out[i + j * BPS] - uv_in[i + j * BPS])); |
| 86 } |
| 87 printf(" "); |
| 88 for (i = 8; i < 16; ++i) { |
| 89 printf("%1d ", abs(uv_out[i + j * BPS] - uv_in[i + j * BPS])); |
| 90 } |
| 91 printf("\n"); |
| 92 } |
69 printf("\nD:%d SD:%d R:%d H:%d nz:0x%x score:%d\n", | 93 printf("\nD:%d SD:%d R:%d H:%d nz:0x%x score:%d\n", |
70 (int)rd->D, (int)rd->SD, (int)rd->R, (int)rd->H, (int)rd->nz, | 94 (int)rd->D, (int)rd->SD, (int)rd->R, (int)rd->H, (int)rd->nz, |
71 (int)rd->score); | 95 (int)rd->score); |
72 if (is_i16) { | 96 if (is_i16) { |
73 printf("Mode: %d\n", rd->mode_i16); | 97 printf("Mode: %d\n", rd->mode_i16); |
74 printf("y_dc_levels:"); | 98 printf("y_dc_levels:"); |
75 for (i = 0; i < 16; ++i) printf("%3d ", rd->y_dc_levels[i]); | 99 for (i = 0; i < 16; ++i) printf("%3d ", rd->y_dc_levels[i]); |
76 printf("\n"); | 100 printf("\n"); |
77 } else { | 101 } else { |
78 printf("Modes[16]: "); | 102 printf("Modes[16]: "); |
(...skipping 358 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
437 } | 461 } |
438 | 462 |
439 void VP8MakeIntra4Preds(const VP8EncIterator* const it) { | 463 void VP8MakeIntra4Preds(const VP8EncIterator* const it) { |
440 VP8EncPredLuma4(it->yuv_p_, it->i4_top_); | 464 VP8EncPredLuma4(it->yuv_p_, it->i4_top_); |
441 } | 465 } |
442 | 466 |
443 //------------------------------------------------------------------------------ | 467 //------------------------------------------------------------------------------ |
444 // Quantize | 468 // Quantize |
445 | 469 |
446 // Layout: | 470 // Layout: |
447 // +----+ | 471 // +----+----+ |
448 // |YYYY| 0 | 472 // |YYYY|UUVV| 0 |
449 // |YYYY| 4 | 473 // |YYYY|UUVV| 4 |
450 // |YYYY| 8 | 474 // |YYYY|....| 8 |
451 // |YYYY| 12 | 475 // |YYYY|....| 12 |
452 // +----+ | 476 // +----+----+ |
453 // |UUVV| 16 | |
454 // |UUVV| 20 | |
455 // +----+ | |
456 | 477 |
457 const int VP8Scan[16] = { // Luma | 478 const int VP8Scan[16] = { // Luma |
458 0 + 0 * BPS, 4 + 0 * BPS, 8 + 0 * BPS, 12 + 0 * BPS, | 479 0 + 0 * BPS, 4 + 0 * BPS, 8 + 0 * BPS, 12 + 0 * BPS, |
459 0 + 4 * BPS, 4 + 4 * BPS, 8 + 4 * BPS, 12 + 4 * BPS, | 480 0 + 4 * BPS, 4 + 4 * BPS, 8 + 4 * BPS, 12 + 4 * BPS, |
460 0 + 8 * BPS, 4 + 8 * BPS, 8 + 8 * BPS, 12 + 8 * BPS, | 481 0 + 8 * BPS, 4 + 8 * BPS, 8 + 8 * BPS, 12 + 8 * BPS, |
461 0 + 12 * BPS, 4 + 12 * BPS, 8 + 12 * BPS, 12 + 12 * BPS, | 482 0 + 12 * BPS, 4 + 12 * BPS, 8 + 12 * BPS, 12 + 12 * BPS, |
462 }; | 483 }; |
463 | 484 |
464 static const int VP8ScanUV[4 + 4] = { | 485 static const int VP8ScanUV[4 + 4] = { |
465 0 + 0 * BPS, 4 + 0 * BPS, 0 + 4 * BPS, 4 + 4 * BPS, // U | 486 0 + 0 * BPS, 4 + 0 * BPS, 0 + 4 * BPS, 4 + 4 * BPS, // U |
(...skipping 65 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
531 // If a coefficient was quantized to a value Q (using a neutral bias), | 552 // If a coefficient was quantized to a value Q (using a neutral bias), |
532 // we test all alternate possibilities between [Q-MIN_DELTA, Q+MAX_DELTA] | 553 // we test all alternate possibilities between [Q-MIN_DELTA, Q+MAX_DELTA] |
533 // We don't test negative values though. | 554 // We don't test negative values though. |
534 #define MIN_DELTA 0 // how much lower level to try | 555 #define MIN_DELTA 0 // how much lower level to try |
535 #define MAX_DELTA 1 // how much higher | 556 #define MAX_DELTA 1 // how much higher |
536 #define NUM_NODES (MIN_DELTA + 1 + MAX_DELTA) | 557 #define NUM_NODES (MIN_DELTA + 1 + MAX_DELTA) |
537 #define NODE(n, l) (nodes[(n)][(l) + MIN_DELTA]) | 558 #define NODE(n, l) (nodes[(n)][(l) + MIN_DELTA]) |
538 #define SCORE_STATE(n, l) (score_states[n][(l) + MIN_DELTA]) | 559 #define SCORE_STATE(n, l) (score_states[n][(l) + MIN_DELTA]) |
539 | 560 |
540 static WEBP_INLINE void SetRDScore(int lambda, VP8ModeScore* const rd) { | 561 static WEBP_INLINE void SetRDScore(int lambda, VP8ModeScore* const rd) { |
541 // TODO: incorporate the "* 256" in the tables? | 562 rd->score = (rd->R + rd->H) * lambda + RD_DISTO_MULT * (rd->D + rd->SD); |
542 rd->score = (rd->R + rd->H) * lambda + 256 * (rd->D + rd->SD); | |
543 } | 563 } |
544 | 564 |
545 static WEBP_INLINE score_t RDScoreTrellis(int lambda, score_t rate, | 565 static WEBP_INLINE score_t RDScoreTrellis(int lambda, score_t rate, |
546 score_t distortion) { | 566 score_t distortion) { |
547 return rate * lambda + 256 * distortion; | 567 return rate * lambda + RD_DISTO_MULT * distortion; |
548 } | 568 } |
549 | 569 |
550 static int TrellisQuantizeBlock(const VP8Encoder* const enc, | 570 static int TrellisQuantizeBlock(const VP8Encoder* const enc, |
551 int16_t in[16], int16_t out[16], | 571 int16_t in[16], int16_t out[16], |
552 int ctx0, int coeff_type, | 572 int ctx0, int coeff_type, |
553 const VP8Matrix* const mtx, | 573 const VP8Matrix* const mtx, |
554 int lambda) { | 574 int lambda) { |
555 const ProbaArray* const probas = enc->proba_.coeffs_[coeff_type]; | 575 const ProbaArray* const probas = enc->proba_.coeffs_[coeff_type]; |
556 const CostArray* const costs = enc->proba_.level_cost_[coeff_type]; | 576 CostArrayPtr const costs = |
| 577 (CostArrayPtr)enc->proba_.remapped_costs_[coeff_type]; |
557 const int first = (coeff_type == 0) ? 1 : 0; | 578 const int first = (coeff_type == 0) ? 1 : 0; |
558 Node nodes[16][NUM_NODES]; | 579 Node nodes[16][NUM_NODES]; |
559 ScoreState score_states[2][NUM_NODES]; | 580 ScoreState score_states[2][NUM_NODES]; |
560 ScoreState* ss_cur = &SCORE_STATE(0, MIN_DELTA); | 581 ScoreState* ss_cur = &SCORE_STATE(0, MIN_DELTA); |
561 ScoreState* ss_prev = &SCORE_STATE(1, MIN_DELTA); | 582 ScoreState* ss_prev = &SCORE_STATE(1, MIN_DELTA); |
562 int best_path[3] = {-1, -1, -1}; // store best-last/best-level/best-previous | 583 int best_path[3] = {-1, -1, -1}; // store best-last/best-level/best-previous |
563 score_t best_score; | 584 score_t best_score; |
564 int n, m, p, last; | 585 int n, m, p, last; |
565 | 586 |
566 { | 587 { |
(...skipping 16 matching lines...) Expand all Loading... |
583 if (last < 15) ++last; | 604 if (last < 15) ++last; |
584 | 605 |
585 // compute 'skip' score. This is the max score one can do. | 606 // compute 'skip' score. This is the max score one can do. |
586 cost = VP8BitCost(0, last_proba); | 607 cost = VP8BitCost(0, last_proba); |
587 best_score = RDScoreTrellis(lambda, cost, 0); | 608 best_score = RDScoreTrellis(lambda, cost, 0); |
588 | 609 |
589 // initialize source node. | 610 // initialize source node. |
590 for (m = -MIN_DELTA; m <= MAX_DELTA; ++m) { | 611 for (m = -MIN_DELTA; m <= MAX_DELTA; ++m) { |
591 const score_t rate = (ctx0 == 0) ? VP8BitCost(1, last_proba) : 0; | 612 const score_t rate = (ctx0 == 0) ? VP8BitCost(1, last_proba) : 0; |
592 ss_cur[m].score = RDScoreTrellis(lambda, rate, 0); | 613 ss_cur[m].score = RDScoreTrellis(lambda, rate, 0); |
593 ss_cur[m].costs = costs[VP8EncBands[first]][ctx0]; | 614 ss_cur[m].costs = costs[first][ctx0]; |
594 } | 615 } |
595 } | 616 } |
596 | 617 |
597 // traverse trellis. | 618 // traverse trellis. |
598 for (n = first; n <= last; ++n) { | 619 for (n = first; n <= last; ++n) { |
599 const int j = kZigzag[n]; | 620 const int j = kZigzag[n]; |
600 const uint32_t Q = mtx->q_[j]; | 621 const uint32_t Q = mtx->q_[j]; |
601 const uint32_t iQ = mtx->iq_[j]; | 622 const uint32_t iQ = mtx->iq_[j]; |
602 const uint32_t B = BIAS(0x00); // neutral bias | 623 const uint32_t B = BIAS(0x00); // neutral bias |
603 // note: it's important to take sign of the _original_ coeff, | 624 // note: it's important to take sign of the _original_ coeff, |
(...skipping 13 matching lines...) Expand all Loading... |
617 for (m = -MIN_DELTA; m <= MAX_DELTA; ++m) { | 638 for (m = -MIN_DELTA; m <= MAX_DELTA; ++m) { |
618 Node* const cur = &NODE(n, m); | 639 Node* const cur = &NODE(n, m); |
619 int level = level0 + m; | 640 int level = level0 + m; |
620 const int ctx = (level > 2) ? 2 : level; | 641 const int ctx = (level > 2) ? 2 : level; |
621 const int band = VP8EncBands[n + 1]; | 642 const int band = VP8EncBands[n + 1]; |
622 score_t base_score, last_pos_score; | 643 score_t base_score, last_pos_score; |
623 score_t best_cur_score = MAX_COST; | 644 score_t best_cur_score = MAX_COST; |
624 int best_prev = 0; // default, in case | 645 int best_prev = 0; // default, in case |
625 | 646 |
626 ss_cur[m].score = MAX_COST; | 647 ss_cur[m].score = MAX_COST; |
627 ss_cur[m].costs = costs[band][ctx]; | 648 ss_cur[m].costs = costs[n + 1][ctx]; |
628 if (level > MAX_LEVEL || level < 0) { // node is dead? | 649 if (level > MAX_LEVEL || level < 0) { // node is dead? |
629 continue; | 650 continue; |
630 } | 651 } |
631 | 652 |
632 // Compute extra rate cost if last coeff's position is < 15 | 653 // Compute extra rate cost if last coeff's position is < 15 |
633 { | 654 { |
634 const score_t last_pos_cost = | 655 const score_t last_pos_cost = |
635 (n < 15) ? VP8BitCost(0, probas[band][ctx][0]) : 0; | 656 (n < 15) ? VP8BitCost(0, probas[band][ctx][0]) : 0; |
636 last_pos_score = RDScoreTrellis(lambda, last_pos_cost, 0); | 657 last_pos_score = RDScoreTrellis(lambda, last_pos_cost, 0); |
637 } | 658 } |
(...skipping 74 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
712 // Performs: difference, transform, quantize, back-transform, add | 733 // Performs: difference, transform, quantize, back-transform, add |
713 // all at once. Output is the reconstructed block in *yuv_out, and the | 734 // all at once. Output is the reconstructed block in *yuv_out, and the |
714 // quantized levels in *levels. | 735 // quantized levels in *levels. |
715 | 736 |
716 static int ReconstructIntra16(VP8EncIterator* const it, | 737 static int ReconstructIntra16(VP8EncIterator* const it, |
717 VP8ModeScore* const rd, | 738 VP8ModeScore* const rd, |
718 uint8_t* const yuv_out, | 739 uint8_t* const yuv_out, |
719 int mode) { | 740 int mode) { |
720 const VP8Encoder* const enc = it->enc_; | 741 const VP8Encoder* const enc = it->enc_; |
721 const uint8_t* const ref = it->yuv_p_ + VP8I16ModeOffsets[mode]; | 742 const uint8_t* const ref = it->yuv_p_ + VP8I16ModeOffsets[mode]; |
722 const uint8_t* const src = it->yuv_in_ + Y_OFF; | 743 const uint8_t* const src = it->yuv_in_ + Y_OFF_ENC; |
723 const VP8SegmentInfo* const dqm = &enc->dqm_[it->mb_->segment_]; | 744 const VP8SegmentInfo* const dqm = &enc->dqm_[it->mb_->segment_]; |
724 int nz = 0; | 745 int nz = 0; |
725 int n; | 746 int n; |
726 int16_t tmp[16][16], dc_tmp[16]; | 747 int16_t tmp[16][16], dc_tmp[16]; |
727 | 748 |
728 for (n = 0; n < 16; ++n) { | 749 for (n = 0; n < 16; n += 2) { |
729 VP8FTransform(src + VP8Scan[n], ref + VP8Scan[n], tmp[n]); | 750 VP8FTransform2(src + VP8Scan[n], ref + VP8Scan[n], tmp[n]); |
730 } | 751 } |
731 VP8FTransformWHT(tmp[0], dc_tmp); | 752 VP8FTransformWHT(tmp[0], dc_tmp); |
732 nz |= VP8EncQuantizeBlockWHT(dc_tmp, rd->y_dc_levels, &dqm->y2_) << 24; | 753 nz |= VP8EncQuantizeBlockWHT(dc_tmp, rd->y_dc_levels, &dqm->y2_) << 24; |
733 | 754 |
734 if (DO_TRELLIS_I16 && it->do_trellis_) { | 755 if (DO_TRELLIS_I16 && it->do_trellis_) { |
735 int x, y; | 756 int x, y; |
736 VP8IteratorNzToBytes(it); | 757 VP8IteratorNzToBytes(it); |
737 for (y = 0, n = 0; y < 4; ++y) { | 758 for (y = 0, n = 0; y < 4; ++y) { |
738 for (x = 0; x < 4; ++x, ++n) { | 759 for (x = 0; x < 4; ++x, ++n) { |
739 const int ctx = it->top_nz_[x] + it->left_nz_[y]; | 760 const int ctx = it->top_nz_[x] + it->left_nz_[y]; |
740 const int non_zero = | 761 const int non_zero = |
741 TrellisQuantizeBlock(enc, tmp[n], rd->y_ac_levels[n], ctx, 0, | 762 TrellisQuantizeBlock(enc, tmp[n], rd->y_ac_levels[n], ctx, 0, |
742 &dqm->y1_, dqm->lambda_trellis_i16_); | 763 &dqm->y1_, dqm->lambda_trellis_i16_); |
743 it->top_nz_[x] = it->left_nz_[y] = non_zero; | 764 it->top_nz_[x] = it->left_nz_[y] = non_zero; |
744 rd->y_ac_levels[n][0] = 0; | 765 rd->y_ac_levels[n][0] = 0; |
745 nz |= non_zero << n; | 766 nz |= non_zero << n; |
746 } | 767 } |
747 } | 768 } |
748 } else { | 769 } else { |
749 for (n = 0; n < 16; ++n) { | 770 for (n = 0; n < 16; n += 2) { |
750 // Zero-out the first coeff, so that: a) nz is correct below, and | 771 // Zero-out the first coeff, so that: a) nz is correct below, and |
751 // b) finding 'last' non-zero coeffs in SetResidualCoeffs() is simplified. | 772 // b) finding 'last' non-zero coeffs in SetResidualCoeffs() is simplified. |
752 tmp[n][0] = 0; | 773 tmp[n][0] = tmp[n + 1][0] = 0; |
753 nz |= VP8EncQuantizeBlock(tmp[n], rd->y_ac_levels[n], &dqm->y1_) << n; | 774 nz |= VP8EncQuantize2Blocks(tmp[n], rd->y_ac_levels[n], &dqm->y1_) << n; |
754 assert(rd->y_ac_levels[n][0] == 0); | 775 assert(rd->y_ac_levels[n + 0][0] == 0); |
| 776 assert(rd->y_ac_levels[n + 1][0] == 0); |
755 } | 777 } |
756 } | 778 } |
757 | 779 |
758 // Transform back | 780 // Transform back |
759 VP8TransformWHT(dc_tmp, tmp[0]); | 781 VP8TransformWHT(dc_tmp, tmp[0]); |
760 for (n = 0; n < 16; n += 2) { | 782 for (n = 0; n < 16; n += 2) { |
761 VP8ITransform(ref + VP8Scan[n], tmp[n], yuv_out + VP8Scan[n], 1); | 783 VP8ITransform(ref + VP8Scan[n], tmp[n], yuv_out + VP8Scan[n], 1); |
762 } | 784 } |
763 | 785 |
764 return nz; | 786 return nz; |
(...skipping 20 matching lines...) Expand all Loading... |
785 nz = VP8EncQuantizeBlock(tmp, levels, &dqm->y1_); | 807 nz = VP8EncQuantizeBlock(tmp, levels, &dqm->y1_); |
786 } | 808 } |
787 VP8ITransform(ref, tmp, yuv_out, 0); | 809 VP8ITransform(ref, tmp, yuv_out, 0); |
788 return nz; | 810 return nz; |
789 } | 811 } |
790 | 812 |
791 static int ReconstructUV(VP8EncIterator* const it, VP8ModeScore* const rd, | 813 static int ReconstructUV(VP8EncIterator* const it, VP8ModeScore* const rd, |
792 uint8_t* const yuv_out, int mode) { | 814 uint8_t* const yuv_out, int mode) { |
793 const VP8Encoder* const enc = it->enc_; | 815 const VP8Encoder* const enc = it->enc_; |
794 const uint8_t* const ref = it->yuv_p_ + VP8UVModeOffsets[mode]; | 816 const uint8_t* const ref = it->yuv_p_ + VP8UVModeOffsets[mode]; |
795 const uint8_t* const src = it->yuv_in_ + U_OFF; | 817 const uint8_t* const src = it->yuv_in_ + U_OFF_ENC; |
796 const VP8SegmentInfo* const dqm = &enc->dqm_[it->mb_->segment_]; | 818 const VP8SegmentInfo* const dqm = &enc->dqm_[it->mb_->segment_]; |
797 int nz = 0; | 819 int nz = 0; |
798 int n; | 820 int n; |
799 int16_t tmp[8][16]; | 821 int16_t tmp[8][16]; |
800 | 822 |
801 for (n = 0; n < 8; ++n) { | 823 for (n = 0; n < 8; n += 2) { |
802 VP8FTransform(src + VP8ScanUV[n], ref + VP8ScanUV[n], tmp[n]); | 824 VP8FTransform2(src + VP8ScanUV[n], ref + VP8ScanUV[n], tmp[n]); |
803 } | 825 } |
804 if (DO_TRELLIS_UV && it->do_trellis_) { | 826 if (DO_TRELLIS_UV && it->do_trellis_) { |
805 int ch, x, y; | 827 int ch, x, y; |
806 for (ch = 0, n = 0; ch <= 2; ch += 2) { | 828 for (ch = 0, n = 0; ch <= 2; ch += 2) { |
807 for (y = 0; y < 2; ++y) { | 829 for (y = 0; y < 2; ++y) { |
808 for (x = 0; x < 2; ++x, ++n) { | 830 for (x = 0; x < 2; ++x, ++n) { |
809 const int ctx = it->top_nz_[4 + ch + x] + it->left_nz_[4 + ch + y]; | 831 const int ctx = it->top_nz_[4 + ch + x] + it->left_nz_[4 + ch + y]; |
810 const int non_zero = | 832 const int non_zero = |
811 TrellisQuantizeBlock(enc, tmp[n], rd->uv_levels[n], ctx, 2, | 833 TrellisQuantizeBlock(enc, tmp[n], rd->uv_levels[n], ctx, 2, |
812 &dqm->uv_, dqm->lambda_trellis_uv_); | 834 &dqm->uv_, dqm->lambda_trellis_uv_); |
813 it->top_nz_[4 + ch + x] = it->left_nz_[4 + ch + y] = non_zero; | 835 it->top_nz_[4 + ch + x] = it->left_nz_[4 + ch + y] = non_zero; |
814 nz |= non_zero << n; | 836 nz |= non_zero << n; |
815 } | 837 } |
816 } | 838 } |
817 } | 839 } |
818 } else { | 840 } else { |
819 for (n = 0; n < 8; ++n) { | 841 for (n = 0; n < 8; n += 2) { |
820 nz |= VP8EncQuantizeBlock(tmp[n], rd->uv_levels[n], &dqm->uv_) << n; | 842 nz |= VP8EncQuantize2Blocks(tmp[n], rd->uv_levels[n], &dqm->uv_) << n; |
821 } | 843 } |
822 } | 844 } |
823 | 845 |
824 for (n = 0; n < 8; n += 2) { | 846 for (n = 0; n < 8; n += 2) { |
825 VP8ITransform(ref + VP8ScanUV[n], tmp[n], yuv_out + VP8ScanUV[n], 1); | 847 VP8ITransform(ref + VP8ScanUV[n], tmp[n], yuv_out + VP8ScanUV[n], 1); |
826 } | 848 } |
827 return (nz << 16); | 849 return (nz << 16); |
828 } | 850 } |
829 | 851 |
830 //------------------------------------------------------------------------------ | 852 //------------------------------------------------------------------------------ |
831 // RD-opt decision. Reconstruct each modes, evalue distortion and bit-cost. | 853 // RD-opt decision. Reconstruct each modes, evalue distortion and bit-cost. |
832 // Pick the mode is lower RD-cost = Rate + lambda * Distortion. | 854 // Pick the mode is lower RD-cost = Rate + lambda * Distortion. |
833 | 855 |
834 static void StoreMaxDelta(VP8SegmentInfo* const dqm, const int16_t DCs[16]) { | 856 static void StoreMaxDelta(VP8SegmentInfo* const dqm, const int16_t DCs[16]) { |
835 // We look at the first three AC coefficients to determine what is the average | 857 // We look at the first three AC coefficients to determine what is the average |
836 // delta between each sub-4x4 block. | 858 // delta between each sub-4x4 block. |
837 const int v0 = abs(DCs[1]); | 859 const int v0 = abs(DCs[1]); |
838 const int v1 = abs(DCs[4]); | 860 const int v1 = abs(DCs[4]); |
839 const int v2 = abs(DCs[5]); | 861 const int v2 = abs(DCs[5]); |
840 int max_v = (v0 > v1) ? v1 : v0; | 862 int max_v = (v0 > v1) ? v1 : v0; |
841 max_v = (v2 > max_v) ? v2 : max_v; | 863 max_v = (v2 > max_v) ? v2 : max_v; |
842 if (max_v > dqm->max_edge_) dqm->max_edge_ = max_v; | 864 if (max_v > dqm->max_edge_) dqm->max_edge_ = max_v; |
843 } | 865 } |
844 | 866 |
| 867 static void SwapModeScore(VP8ModeScore** a, VP8ModeScore** b) { |
| 868 VP8ModeScore* const tmp = *a; |
| 869 *a = *b; |
| 870 *b = tmp; |
| 871 } |
| 872 |
845 static void SwapPtr(uint8_t** a, uint8_t** b) { | 873 static void SwapPtr(uint8_t** a, uint8_t** b) { |
846 uint8_t* const tmp = *a; | 874 uint8_t* const tmp = *a; |
847 *a = *b; | 875 *a = *b; |
848 *b = tmp; | 876 *b = tmp; |
849 } | 877 } |
850 | 878 |
851 static void SwapOut(VP8EncIterator* const it) { | 879 static void SwapOut(VP8EncIterator* const it) { |
852 SwapPtr(&it->yuv_out_, &it->yuv_out2_); | 880 SwapPtr(&it->yuv_out_, &it->yuv_out2_); |
853 } | 881 } |
854 | 882 |
855 static score_t IsFlat(const int16_t* levels, int num_blocks, score_t thresh) { | 883 static score_t IsFlat(const int16_t* levels, int num_blocks, score_t thresh) { |
856 score_t score = 0; | 884 score_t score = 0; |
857 while (num_blocks-- > 0) { // TODO(skal): refine positional scoring? | 885 while (num_blocks-- > 0) { // TODO(skal): refine positional scoring? |
858 int i; | 886 int i; |
859 for (i = 1; i < 16; ++i) { // omit DC, we're only interested in AC | 887 for (i = 1; i < 16; ++i) { // omit DC, we're only interested in AC |
860 score += (levels[i] != 0); | 888 score += (levels[i] != 0); |
861 if (score > thresh) return 0; | 889 if (score > thresh) return 0; |
862 } | 890 } |
863 levels += 16; | 891 levels += 16; |
864 } | 892 } |
865 return 1; | 893 return 1; |
866 } | 894 } |
867 | 895 |
868 static void PickBestIntra16(VP8EncIterator* const it, VP8ModeScore* const rd) { | 896 static void PickBestIntra16(VP8EncIterator* const it, VP8ModeScore* rd) { |
869 const int kNumBlocks = 16; | 897 const int kNumBlocks = 16; |
870 VP8SegmentInfo* const dqm = &it->enc_->dqm_[it->mb_->segment_]; | 898 VP8SegmentInfo* const dqm = &it->enc_->dqm_[it->mb_->segment_]; |
871 const int lambda = dqm->lambda_i16_; | 899 const int lambda = dqm->lambda_i16_; |
872 const int tlambda = dqm->tlambda_; | 900 const int tlambda = dqm->tlambda_; |
873 const uint8_t* const src = it->yuv_in_ + Y_OFF; | 901 const uint8_t* const src = it->yuv_in_ + Y_OFF_ENC; |
874 VP8ModeScore rd16; | 902 VP8ModeScore rd_tmp; |
| 903 VP8ModeScore* rd_cur = &rd_tmp; |
| 904 VP8ModeScore* rd_best = rd; |
875 int mode; | 905 int mode; |
876 | 906 |
877 rd->mode_i16 = -1; | 907 rd->mode_i16 = -1; |
878 for (mode = 0; mode < NUM_PRED_MODES; ++mode) { | 908 for (mode = 0; mode < NUM_PRED_MODES; ++mode) { |
879 uint8_t* const tmp_dst = it->yuv_out2_ + Y_OFF; // scratch buffer | 909 uint8_t* const tmp_dst = it->yuv_out2_ + Y_OFF_ENC; // scratch buffer |
880 int nz; | 910 rd_cur->mode_i16 = mode; |
881 | 911 |
882 // Reconstruct | 912 // Reconstruct |
883 nz = ReconstructIntra16(it, &rd16, tmp_dst, mode); | 913 rd_cur->nz = ReconstructIntra16(it, rd_cur, tmp_dst, mode); |
884 | 914 |
885 // Measure RD-score | 915 // Measure RD-score |
886 rd16.D = VP8SSE16x16(src, tmp_dst); | 916 rd_cur->D = VP8SSE16x16(src, tmp_dst); |
887 rd16.SD = tlambda ? MULT_8B(tlambda, VP8TDisto16x16(src, tmp_dst, kWeightY)) | 917 rd_cur->SD = |
888 : 0; | 918 tlambda ? MULT_8B(tlambda, VP8TDisto16x16(src, tmp_dst, kWeightY)) : 0; |
889 rd16.H = VP8FixedCostsI16[mode]; | 919 rd_cur->H = VP8FixedCostsI16[mode]; |
890 rd16.R = VP8GetCostLuma16(it, &rd16); | 920 rd_cur->R = VP8GetCostLuma16(it, rd_cur); |
891 if (mode > 0 && | 921 if (mode > 0 && |
892 IsFlat(rd16.y_ac_levels[0], kNumBlocks, FLATNESS_LIMIT_I16)) { | 922 IsFlat(rd_cur->y_ac_levels[0], kNumBlocks, FLATNESS_LIMIT_I16)) { |
893 // penalty to avoid flat area to be mispredicted by complex mode | 923 // penalty to avoid flat area to be mispredicted by complex mode |
894 rd16.R += FLATNESS_PENALTY * kNumBlocks; | 924 rd_cur->R += FLATNESS_PENALTY * kNumBlocks; |
895 } | 925 } |
896 | 926 |
897 // Since we always examine Intra16 first, we can overwrite *rd directly. | 927 // Since we always examine Intra16 first, we can overwrite *rd directly. |
898 SetRDScore(lambda, &rd16); | 928 SetRDScore(lambda, rd_cur); |
899 if (mode == 0 || rd16.score < rd->score) { | 929 if (mode == 0 || rd_cur->score < rd_best->score) { |
900 CopyScore(rd, &rd16); | 930 SwapModeScore(&rd_cur, &rd_best); |
901 rd->mode_i16 = mode; | |
902 rd->nz = nz; | |
903 memcpy(rd->y_ac_levels, rd16.y_ac_levels, sizeof(rd16.y_ac_levels)); | |
904 memcpy(rd->y_dc_levels, rd16.y_dc_levels, sizeof(rd16.y_dc_levels)); | |
905 SwapOut(it); | 931 SwapOut(it); |
906 } | 932 } |
907 } | 933 } |
| 934 if (rd_best != rd) { |
| 935 memcpy(rd, rd_best, sizeof(*rd)); |
| 936 } |
908 SetRDScore(dqm->lambda_mode_, rd); // finalize score for mode decision. | 937 SetRDScore(dqm->lambda_mode_, rd); // finalize score for mode decision. |
909 VP8SetIntra16Mode(it, rd->mode_i16); | 938 VP8SetIntra16Mode(it, rd->mode_i16); |
910 | 939 |
911 // we have a blocky macroblock (only DCs are non-zero) with fairly high | 940 // we have a blocky macroblock (only DCs are non-zero) with fairly high |
912 // distortion, record max delta so we can later adjust the minimal filtering | 941 // distortion, record max delta so we can later adjust the minimal filtering |
913 // strength needed to smooth these blocks out. | 942 // strength needed to smooth these blocks out. |
914 if ((rd->nz & 0xffff) == 0 && rd->D > dqm->min_disto_) { | 943 if ((rd->nz & 0xffff) == 0 && rd->D > dqm->min_disto_) { |
915 StoreMaxDelta(dqm, rd->y_dc_levels); | 944 StoreMaxDelta(dqm, rd->y_dc_levels); |
916 } | 945 } |
917 } | 946 } |
918 | 947 |
919 //------------------------------------------------------------------------------ | 948 //------------------------------------------------------------------------------ |
920 | 949 |
921 // return the cost array corresponding to the surrounding prediction modes. | 950 // return the cost array corresponding to the surrounding prediction modes. |
922 static const uint16_t* GetCostModeI4(VP8EncIterator* const it, | 951 static const uint16_t* GetCostModeI4(VP8EncIterator* const it, |
923 const uint8_t modes[16]) { | 952 const uint8_t modes[16]) { |
924 const int preds_w = it->enc_->preds_w_; | 953 const int preds_w = it->enc_->preds_w_; |
925 const int x = (it->i4_ & 3), y = it->i4_ >> 2; | 954 const int x = (it->i4_ & 3), y = it->i4_ >> 2; |
926 const int left = (x == 0) ? it->preds_[y * preds_w - 1] : modes[it->i4_ - 1]; | 955 const int left = (x == 0) ? it->preds_[y * preds_w - 1] : modes[it->i4_ - 1]; |
927 const int top = (y == 0) ? it->preds_[-preds_w + x] : modes[it->i4_ - 4]; | 956 const int top = (y == 0) ? it->preds_[-preds_w + x] : modes[it->i4_ - 4]; |
928 return VP8FixedCostsI4[top][left]; | 957 return VP8FixedCostsI4[top][left]; |
929 } | 958 } |
930 | 959 |
931 static int PickBestIntra4(VP8EncIterator* const it, VP8ModeScore* const rd) { | 960 static int PickBestIntra4(VP8EncIterator* const it, VP8ModeScore* const rd) { |
932 const VP8Encoder* const enc = it->enc_; | 961 const VP8Encoder* const enc = it->enc_; |
933 const VP8SegmentInfo* const dqm = &enc->dqm_[it->mb_->segment_]; | 962 const VP8SegmentInfo* const dqm = &enc->dqm_[it->mb_->segment_]; |
934 const int lambda = dqm->lambda_i4_; | 963 const int lambda = dqm->lambda_i4_; |
935 const int tlambda = dqm->tlambda_; | 964 const int tlambda = dqm->tlambda_; |
936 const uint8_t* const src0 = it->yuv_in_ + Y_OFF; | 965 const uint8_t* const src0 = it->yuv_in_ + Y_OFF_ENC; |
937 uint8_t* const best_blocks = it->yuv_out2_ + Y_OFF; | 966 uint8_t* const best_blocks = it->yuv_out2_ + Y_OFF_ENC; |
938 int total_header_bits = 0; | 967 int total_header_bits = 0; |
939 VP8ModeScore rd_best; | 968 VP8ModeScore rd_best; |
940 | 969 |
941 if (enc->max_i4_header_bits_ == 0) { | 970 if (enc->max_i4_header_bits_ == 0) { |
942 return 0; | 971 return 0; |
943 } | 972 } |
944 | 973 |
945 InitScore(&rd_best); | 974 InitScore(&rd_best); |
946 rd_best.H = 211; // '211' is the value of VP8BitCost(0, 145) | 975 rd_best.H = 211; // '211' is the value of VP8BitCost(0, 145) |
947 SetRDScore(dqm->lambda_mode_, &rd_best); | 976 SetRDScore(dqm->lambda_mode_, &rd_best); |
(...skipping 17 matching lines...) Expand all Loading... |
965 // Reconstruct | 994 // Reconstruct |
966 rd_tmp.nz = | 995 rd_tmp.nz = |
967 ReconstructIntra4(it, tmp_levels, src, tmp_dst, mode) << it->i4_; | 996 ReconstructIntra4(it, tmp_levels, src, tmp_dst, mode) << it->i4_; |
968 | 997 |
969 // Compute RD-score | 998 // Compute RD-score |
970 rd_tmp.D = VP8SSE4x4(src, tmp_dst); | 999 rd_tmp.D = VP8SSE4x4(src, tmp_dst); |
971 rd_tmp.SD = | 1000 rd_tmp.SD = |
972 tlambda ? MULT_8B(tlambda, VP8TDisto4x4(src, tmp_dst, kWeightY)) | 1001 tlambda ? MULT_8B(tlambda, VP8TDisto4x4(src, tmp_dst, kWeightY)) |
973 : 0; | 1002 : 0; |
974 rd_tmp.H = mode_costs[mode]; | 1003 rd_tmp.H = mode_costs[mode]; |
975 rd_tmp.R = VP8GetCostLuma4(it, tmp_levels); | 1004 |
| 1005 // Add flatness penalty |
976 if (mode > 0 && IsFlat(tmp_levels, kNumBlocks, FLATNESS_LIMIT_I4)) { | 1006 if (mode > 0 && IsFlat(tmp_levels, kNumBlocks, FLATNESS_LIMIT_I4)) { |
977 rd_tmp.R += FLATNESS_PENALTY * kNumBlocks; | 1007 rd_tmp.R = FLATNESS_PENALTY * kNumBlocks; |
| 1008 } else { |
| 1009 rd_tmp.R = 0; |
978 } | 1010 } |
979 | 1011 |
| 1012 // early-out check |
980 SetRDScore(lambda, &rd_tmp); | 1013 SetRDScore(lambda, &rd_tmp); |
| 1014 if (best_mode >= 0 && rd_tmp.score >= rd_i4.score) continue; |
| 1015 |
| 1016 // finish computing score |
| 1017 rd_tmp.R += VP8GetCostLuma4(it, tmp_levels); |
| 1018 SetRDScore(lambda, &rd_tmp); |
| 1019 |
981 if (best_mode < 0 || rd_tmp.score < rd_i4.score) { | 1020 if (best_mode < 0 || rd_tmp.score < rd_i4.score) { |
982 CopyScore(&rd_i4, &rd_tmp); | 1021 CopyScore(&rd_i4, &rd_tmp); |
983 best_mode = mode; | 1022 best_mode = mode; |
984 SwapPtr(&tmp_dst, &best_block); | 1023 SwapPtr(&tmp_dst, &best_block); |
985 memcpy(rd_best.y_ac_levels[it->i4_], tmp_levels, sizeof(tmp_levels)); | 1024 memcpy(rd_best.y_ac_levels[it->i4_], tmp_levels, |
| 1025 sizeof(rd_best.y_ac_levels[it->i4_])); |
986 } | 1026 } |
987 } | 1027 } |
988 SetRDScore(dqm->lambda_mode_, &rd_i4); | 1028 SetRDScore(dqm->lambda_mode_, &rd_i4); |
989 AddScore(&rd_best, &rd_i4); | 1029 AddScore(&rd_best, &rd_i4); |
990 if (rd_best.score >= rd->score) { | 1030 if (rd_best.score >= rd->score) { |
991 return 0; | 1031 return 0; |
992 } | 1032 } |
993 total_header_bits += (int)rd_i4.H; // <- equal to mode_costs[best_mode]; | 1033 total_header_bits += (int)rd_i4.H; // <- equal to mode_costs[best_mode]; |
994 if (total_header_bits > enc->max_i4_header_bits_) { | 1034 if (total_header_bits > enc->max_i4_header_bits_) { |
995 return 0; | 1035 return 0; |
(...skipping 13 matching lines...) Expand all Loading... |
1009 memcpy(rd->y_ac_levels, rd_best.y_ac_levels, sizeof(rd->y_ac_levels)); | 1049 memcpy(rd->y_ac_levels, rd_best.y_ac_levels, sizeof(rd->y_ac_levels)); |
1010 return 1; // select intra4x4 over intra16x16 | 1050 return 1; // select intra4x4 over intra16x16 |
1011 } | 1051 } |
1012 | 1052 |
1013 //------------------------------------------------------------------------------ | 1053 //------------------------------------------------------------------------------ |
1014 | 1054 |
1015 static void PickBestUV(VP8EncIterator* const it, VP8ModeScore* const rd) { | 1055 static void PickBestUV(VP8EncIterator* const it, VP8ModeScore* const rd) { |
1016 const int kNumBlocks = 8; | 1056 const int kNumBlocks = 8; |
1017 const VP8SegmentInfo* const dqm = &it->enc_->dqm_[it->mb_->segment_]; | 1057 const VP8SegmentInfo* const dqm = &it->enc_->dqm_[it->mb_->segment_]; |
1018 const int lambda = dqm->lambda_uv_; | 1058 const int lambda = dqm->lambda_uv_; |
1019 const uint8_t* const src = it->yuv_in_ + U_OFF; | 1059 const uint8_t* const src = it->yuv_in_ + U_OFF_ENC; |
1020 uint8_t* const tmp_dst = it->yuv_out2_ + U_OFF; // scratch buffer | 1060 uint8_t* tmp_dst = it->yuv_out2_ + U_OFF_ENC; // scratch buffer |
1021 uint8_t* const dst0 = it->yuv_out_ + U_OFF; | 1061 uint8_t* dst0 = it->yuv_out_ + U_OFF_ENC; |
| 1062 uint8_t* dst = dst0; |
1022 VP8ModeScore rd_best; | 1063 VP8ModeScore rd_best; |
1023 int mode; | 1064 int mode; |
1024 | 1065 |
1025 rd->mode_uv = -1; | 1066 rd->mode_uv = -1; |
1026 InitScore(&rd_best); | 1067 InitScore(&rd_best); |
1027 for (mode = 0; mode < NUM_PRED_MODES; ++mode) { | 1068 for (mode = 0; mode < NUM_PRED_MODES; ++mode) { |
1028 VP8ModeScore rd_uv; | 1069 VP8ModeScore rd_uv; |
1029 | 1070 |
1030 // Reconstruct | 1071 // Reconstruct |
1031 rd_uv.nz = ReconstructUV(it, &rd_uv, tmp_dst, mode); | 1072 rd_uv.nz = ReconstructUV(it, &rd_uv, tmp_dst, mode); |
1032 | 1073 |
1033 // Compute RD-score | 1074 // Compute RD-score |
1034 rd_uv.D = VP8SSE16x8(src, tmp_dst); | 1075 rd_uv.D = VP8SSE16x8(src, tmp_dst); |
1035 rd_uv.SD = 0; // TODO: should we call TDisto? it tends to flatten areas. | 1076 rd_uv.SD = 0; // not calling TDisto here: it tends to flatten areas. |
1036 rd_uv.H = VP8FixedCostsUV[mode]; | 1077 rd_uv.H = VP8FixedCostsUV[mode]; |
1037 rd_uv.R = VP8GetCostUV(it, &rd_uv); | 1078 rd_uv.R = VP8GetCostUV(it, &rd_uv); |
1038 if (mode > 0 && IsFlat(rd_uv.uv_levels[0], kNumBlocks, FLATNESS_LIMIT_UV)) { | 1079 if (mode > 0 && IsFlat(rd_uv.uv_levels[0], kNumBlocks, FLATNESS_LIMIT_UV)) { |
1039 rd_uv.R += FLATNESS_PENALTY * kNumBlocks; | 1080 rd_uv.R += FLATNESS_PENALTY * kNumBlocks; |
1040 } | 1081 } |
1041 | 1082 |
1042 SetRDScore(lambda, &rd_uv); | 1083 SetRDScore(lambda, &rd_uv); |
1043 if (mode == 0 || rd_uv.score < rd_best.score) { | 1084 if (mode == 0 || rd_uv.score < rd_best.score) { |
1044 CopyScore(&rd_best, &rd_uv); | 1085 CopyScore(&rd_best, &rd_uv); |
1045 rd->mode_uv = mode; | 1086 rd->mode_uv = mode; |
1046 memcpy(rd->uv_levels, rd_uv.uv_levels, sizeof(rd->uv_levels)); | 1087 memcpy(rd->uv_levels, rd_uv.uv_levels, sizeof(rd->uv_levels)); |
1047 memcpy(dst0, tmp_dst, UV_SIZE); // TODO: SwapUVOut() ? | 1088 SwapPtr(&dst, &tmp_dst); |
1048 } | 1089 } |
1049 } | 1090 } |
1050 VP8SetIntraUVMode(it, rd->mode_uv); | 1091 VP8SetIntraUVMode(it, rd->mode_uv); |
1051 AddScore(rd, &rd_best); | 1092 AddScore(rd, &rd_best); |
| 1093 if (dst != dst0) { // copy 16x8 block if needed |
| 1094 VP8Copy16x8(dst, dst0); |
| 1095 } |
1052 } | 1096 } |
1053 | 1097 |
1054 //------------------------------------------------------------------------------ | 1098 //------------------------------------------------------------------------------ |
1055 // Final reconstruction and quantization. | 1099 // Final reconstruction and quantization. |
1056 | 1100 |
1057 static void SimpleQuantize(VP8EncIterator* const it, VP8ModeScore* const rd) { | 1101 static void SimpleQuantize(VP8EncIterator* const it, VP8ModeScore* const rd) { |
1058 const VP8Encoder* const enc = it->enc_; | 1102 const VP8Encoder* const enc = it->enc_; |
1059 const int is_i16 = (it->mb_->type_ == 1); | 1103 const int is_i16 = (it->mb_->type_ == 1); |
1060 int nz = 0; | 1104 int nz = 0; |
1061 | 1105 |
1062 if (is_i16) { | 1106 if (is_i16) { |
1063 nz = ReconstructIntra16(it, rd, it->yuv_out_ + Y_OFF, it->preds_[0]); | 1107 nz = ReconstructIntra16(it, rd, it->yuv_out_ + Y_OFF_ENC, it->preds_[0]); |
1064 } else { | 1108 } else { |
1065 VP8IteratorStartI4(it); | 1109 VP8IteratorStartI4(it); |
1066 do { | 1110 do { |
1067 const int mode = | 1111 const int mode = |
1068 it->preds_[(it->i4_ & 3) + (it->i4_ >> 2) * enc->preds_w_]; | 1112 it->preds_[(it->i4_ & 3) + (it->i4_ >> 2) * enc->preds_w_]; |
1069 const uint8_t* const src = it->yuv_in_ + Y_OFF + VP8Scan[it->i4_]; | 1113 const uint8_t* const src = it->yuv_in_ + Y_OFF_ENC + VP8Scan[it->i4_]; |
1070 uint8_t* const dst = it->yuv_out_ + Y_OFF + VP8Scan[it->i4_]; | 1114 uint8_t* const dst = it->yuv_out_ + Y_OFF_ENC + VP8Scan[it->i4_]; |
1071 VP8MakeIntra4Preds(it); | 1115 VP8MakeIntra4Preds(it); |
1072 nz |= ReconstructIntra4(it, rd->y_ac_levels[it->i4_], | 1116 nz |= ReconstructIntra4(it, rd->y_ac_levels[it->i4_], |
1073 src, dst, mode) << it->i4_; | 1117 src, dst, mode) << it->i4_; |
1074 } while (VP8IteratorRotateI4(it, it->yuv_out_ + Y_OFF)); | 1118 } while (VP8IteratorRotateI4(it, it->yuv_out_ + Y_OFF_ENC)); |
1075 } | 1119 } |
1076 | 1120 |
1077 nz |= ReconstructUV(it, rd, it->yuv_out_ + U_OFF, it->mb_->uv_mode_); | 1121 nz |= ReconstructUV(it, rd, it->yuv_out_ + U_OFF_ENC, it->mb_->uv_mode_); |
1078 rd->nz = nz; | 1122 rd->nz = nz; |
1079 } | 1123 } |
1080 | 1124 |
1081 // Refine intra16/intra4 sub-modes based on distortion only (not rate). | 1125 // Refine intra16/intra4 sub-modes based on distortion only (not rate). |
1082 static void DistoRefine(VP8EncIterator* const it, int try_both_i4_i16) { | 1126 static void RefineUsingDistortion(VP8EncIterator* const it, |
1083 const int is_i16 = (it->mb_->type_ == 1); | 1127 int try_both_modes, int refine_uv_mode, |
| 1128 VP8ModeScore* const rd) { |
1084 score_t best_score = MAX_COST; | 1129 score_t best_score = MAX_COST; |
| 1130 score_t score_i4 = (score_t)I4_PENALTY; |
| 1131 int16_t tmp_levels[16][16]; |
| 1132 uint8_t modes_i4[16]; |
| 1133 int nz = 0; |
| 1134 int mode; |
| 1135 int is_i16 = try_both_modes || (it->mb_->type_ == 1); |
1085 | 1136 |
1086 if (try_both_i4_i16 || is_i16) { | 1137 if (is_i16) { // First, evaluate Intra16 distortion |
1087 int mode; | |
1088 int best_mode = -1; | 1138 int best_mode = -1; |
| 1139 const uint8_t* const src = it->yuv_in_ + Y_OFF_ENC; |
1089 for (mode = 0; mode < NUM_PRED_MODES; ++mode) { | 1140 for (mode = 0; mode < NUM_PRED_MODES; ++mode) { |
1090 const uint8_t* const ref = it->yuv_p_ + VP8I16ModeOffsets[mode]; | 1141 const uint8_t* const ref = it->yuv_p_ + VP8I16ModeOffsets[mode]; |
1091 const uint8_t* const src = it->yuv_in_ + Y_OFF; | |
1092 const score_t score = VP8SSE16x16(src, ref); | 1142 const score_t score = VP8SSE16x16(src, ref); |
1093 if (score < best_score) { | 1143 if (score < best_score) { |
1094 best_mode = mode; | 1144 best_mode = mode; |
1095 best_score = score; | 1145 best_score = score; |
1096 } | 1146 } |
1097 } | 1147 } |
1098 VP8SetIntra16Mode(it, best_mode); | 1148 VP8SetIntra16Mode(it, best_mode); |
| 1149 // we'll reconstruct later, if i16 mode actually gets selected |
1099 } | 1150 } |
1100 if (try_both_i4_i16 || !is_i16) { | 1151 |
1101 uint8_t modes_i4[16]; | 1152 // Next, evaluate Intra4 |
| 1153 if (try_both_modes || !is_i16) { |
1102 // We don't evaluate the rate here, but just account for it through a | 1154 // We don't evaluate the rate here, but just account for it through a |
1103 // constant penalty (i4 mode usually needs more bits compared to i16). | 1155 // constant penalty (i4 mode usually needs more bits compared to i16). |
1104 score_t score_i4 = (score_t)I4_PENALTY; | 1156 is_i16 = 0; |
1105 | |
1106 VP8IteratorStartI4(it); | 1157 VP8IteratorStartI4(it); |
1107 do { | 1158 do { |
1108 int mode; | 1159 int best_i4_mode = -1; |
1109 int best_sub_mode = -1; | 1160 score_t best_i4_score = MAX_COST; |
1110 score_t best_sub_score = MAX_COST; | 1161 const uint8_t* const src = it->yuv_in_ + Y_OFF_ENC + VP8Scan[it->i4_]; |
1111 const uint8_t* const src = it->yuv_in_ + Y_OFF + VP8Scan[it->i4_]; | |
1112 | 1162 |
1113 // TODO(skal): we don't really need the prediction pixels here, | |
1114 // but just the distortion against 'src'. | |
1115 VP8MakeIntra4Preds(it); | 1163 VP8MakeIntra4Preds(it); |
1116 for (mode = 0; mode < NUM_BMODES; ++mode) { | 1164 for (mode = 0; mode < NUM_BMODES; ++mode) { |
1117 const uint8_t* const ref = it->yuv_p_ + VP8I4ModeOffsets[mode]; | 1165 const uint8_t* const ref = it->yuv_p_ + VP8I4ModeOffsets[mode]; |
1118 const score_t score = VP8SSE4x4(src, ref); | 1166 const score_t score = VP8SSE4x4(src, ref); |
1119 if (score < best_sub_score) { | 1167 if (score < best_i4_score) { |
1120 best_sub_mode = mode; | 1168 best_i4_mode = mode; |
1121 best_sub_score = score; | 1169 best_i4_score = score; |
1122 } | 1170 } |
1123 } | 1171 } |
1124 modes_i4[it->i4_] = best_sub_mode; | 1172 modes_i4[it->i4_] = best_i4_mode; |
1125 score_i4 += best_sub_score; | 1173 score_i4 += best_i4_score; |
1126 if (score_i4 >= best_score) break; | 1174 if (score_i4 >= best_score) { |
1127 } while (VP8IteratorRotateI4(it, it->yuv_in_ + Y_OFF)); | 1175 // Intra4 won't be better than Intra16. Bail out and pick Intra16. |
1128 if (score_i4 < best_score) { | 1176 is_i16 = 1; |
1129 VP8SetIntra4Mode(it, modes_i4); | 1177 break; |
| 1178 } else { // reconstruct partial block inside yuv_out2_ buffer |
| 1179 uint8_t* const tmp_dst = it->yuv_out2_ + Y_OFF_ENC + VP8Scan[it->i4_]; |
| 1180 nz |= ReconstructIntra4(it, tmp_levels[it->i4_], |
| 1181 src, tmp_dst, best_i4_mode) << it->i4_; |
| 1182 } |
| 1183 } while (VP8IteratorRotateI4(it, it->yuv_out2_ + Y_OFF_ENC)); |
| 1184 } |
| 1185 |
| 1186 // Final reconstruction, depending on which mode is selected. |
| 1187 if (!is_i16) { |
| 1188 VP8SetIntra4Mode(it, modes_i4); |
| 1189 memcpy(rd->y_ac_levels, tmp_levels, sizeof(tmp_levels)); |
| 1190 SwapOut(it); |
| 1191 best_score = score_i4; |
| 1192 } else { |
| 1193 nz = ReconstructIntra16(it, rd, it->yuv_out_ + Y_OFF_ENC, it->preds_[0]); |
| 1194 } |
| 1195 |
| 1196 // ... and UV! |
| 1197 if (refine_uv_mode) { |
| 1198 int best_mode = -1; |
| 1199 score_t best_uv_score = MAX_COST; |
| 1200 const uint8_t* const src = it->yuv_in_ + U_OFF_ENC; |
| 1201 for (mode = 0; mode < NUM_PRED_MODES; ++mode) { |
| 1202 const uint8_t* const ref = it->yuv_p_ + VP8UVModeOffsets[mode]; |
| 1203 const score_t score = VP8SSE16x8(src, ref); |
| 1204 if (score < best_uv_score) { |
| 1205 best_mode = mode; |
| 1206 best_uv_score = score; |
| 1207 } |
1130 } | 1208 } |
| 1209 VP8SetIntraUVMode(it, best_mode); |
1131 } | 1210 } |
| 1211 nz |= ReconstructUV(it, rd, it->yuv_out_ + U_OFF_ENC, it->mb_->uv_mode_); |
| 1212 |
| 1213 rd->nz = nz; |
| 1214 rd->score = best_score; |
1132 } | 1215 } |
1133 | 1216 |
1134 //------------------------------------------------------------------------------ | 1217 //------------------------------------------------------------------------------ |
1135 // Entry point | 1218 // Entry point |
1136 | 1219 |
1137 int VP8Decimate(VP8EncIterator* const it, VP8ModeScore* const rd, | 1220 int VP8Decimate(VP8EncIterator* const it, VP8ModeScore* const rd, |
1138 VP8RDLevel rd_opt) { | 1221 VP8RDLevel rd_opt) { |
1139 int is_skipped; | 1222 int is_skipped; |
1140 const int method = it->enc_->method_; | 1223 const int method = it->enc_->method_; |
1141 | 1224 |
1142 InitScore(rd); | 1225 InitScore(rd); |
1143 | 1226 |
1144 // We can perform predictions for Luma16x16 and Chroma8x8 already. | 1227 // We can perform predictions for Luma16x16 and Chroma8x8 already. |
1145 // Luma4x4 predictions needs to be done as-we-go. | 1228 // Luma4x4 predictions needs to be done as-we-go. |
1146 VP8MakeLuma16Preds(it); | 1229 VP8MakeLuma16Preds(it); |
1147 VP8MakeChroma8Preds(it); | 1230 VP8MakeChroma8Preds(it); |
1148 | 1231 |
1149 if (rd_opt > RD_OPT_NONE) { | 1232 if (rd_opt > RD_OPT_NONE) { |
1150 it->do_trellis_ = (rd_opt >= RD_OPT_TRELLIS_ALL); | 1233 it->do_trellis_ = (rd_opt >= RD_OPT_TRELLIS_ALL); |
1151 PickBestIntra16(it, rd); | 1234 PickBestIntra16(it, rd); |
1152 if (method >= 2) { | 1235 if (method >= 2) { |
1153 PickBestIntra4(it, rd); | 1236 PickBestIntra4(it, rd); |
1154 } | 1237 } |
1155 PickBestUV(it, rd); | 1238 PickBestUV(it, rd); |
1156 if (rd_opt == RD_OPT_TRELLIS) { // finish off with trellis-optim now | 1239 if (rd_opt == RD_OPT_TRELLIS) { // finish off with trellis-optim now |
1157 it->do_trellis_ = 1; | 1240 it->do_trellis_ = 1; |
1158 SimpleQuantize(it, rd); | 1241 SimpleQuantize(it, rd); |
1159 } | 1242 } |
1160 } else { | 1243 } else { |
1161 // For method == 2, pick the best intra4/intra16 based on SSE (~tad slower). | 1244 // At this point we have heuristically decided intra16 / intra4. |
1162 // For method <= 1, we refine intra4 or intra16 (but don't re-examine mode). | 1245 // For method >= 2, pick the best intra4/intra16 based on SSE (~tad slower). |
1163 DistoRefine(it, (method >= 2)); | 1246 // For method <= 1, we don't re-examine the decision but just go ahead with |
1164 SimpleQuantize(it, rd); | 1247 // quantization/reconstruction. |
| 1248 RefineUsingDistortion(it, (method >= 2), (method >= 1), rd); |
1165 } | 1249 } |
1166 is_skipped = (rd->nz == 0); | 1250 is_skipped = (rd->nz == 0); |
1167 VP8SetSkip(it, is_skipped); | 1251 VP8SetSkip(it, is_skipped); |
1168 return is_skipped; | 1252 return is_skipped; |
1169 } | 1253 } |
1170 | |
OLD | NEW |