Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(804)

Side by Side Diff: third_party/libwebp/enc/quant.c

Issue 1546003002: libwebp: update to 0.5.0 (Closed) Base URL: https://chromium.googlesource.com/chromium/src.git@master
Patch Set: rebase around clang-cl fix Created 4 years, 12 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
OLDNEW
1 // Copyright 2011 Google Inc. All Rights Reserved. 1 // Copyright 2011 Google Inc. All Rights Reserved.
2 // 2 //
3 // Use of this source code is governed by a BSD-style license 3 // Use of this source code is governed by a BSD-style license
4 // that can be found in the COPYING file in the root of the source 4 // that can be found in the COPYING file in the root of the source
5 // tree. An additional intellectual property rights grant can be found 5 // tree. An additional intellectual property rights grant can be found
6 // in the file PATENTS. All contributing project authors may 6 // in the file PATENTS. All contributing project authors may
7 // be found in the AUTHORS file in the root of the source tree. 7 // be found in the AUTHORS file in the root of the source tree.
8 // ----------------------------------------------------------------------------- 8 // -----------------------------------------------------------------------------
9 // 9 //
10 // Quantization 10 // Quantization
(...skipping 12 matching lines...) Expand all
23 #define DO_TRELLIS_UV 0 // disable trellis for UV. Risky. Not worth. 23 #define DO_TRELLIS_UV 0 // disable trellis for UV. Risky. Not worth.
24 #define USE_TDISTO 1 24 #define USE_TDISTO 1
25 25
26 #define MID_ALPHA 64 // neutral value for susceptibility 26 #define MID_ALPHA 64 // neutral value for susceptibility
27 #define MIN_ALPHA 30 // lowest usable value for susceptibility 27 #define MIN_ALPHA 30 // lowest usable value for susceptibility
28 #define MAX_ALPHA 100 // higher meaningful value for susceptibility 28 #define MAX_ALPHA 100 // higher meaningful value for susceptibility
29 29
30 #define SNS_TO_DQ 0.9 // Scaling constant between the sns value and the QP 30 #define SNS_TO_DQ 0.9 // Scaling constant between the sns value and the QP
31 // power-law modulation. Must be strictly less than 1. 31 // power-law modulation. Must be strictly less than 1.
32 32
33 #define I4_PENALTY 4000 // Rate-penalty for quick i4/i16 decision 33 #define I4_PENALTY 14000 // Rate-penalty for quick i4/i16 decision
34 34
35 // number of non-zero coeffs below which we consider the block very flat 35 // number of non-zero coeffs below which we consider the block very flat
36 // (and apply a penalty to complex predictions) 36 // (and apply a penalty to complex predictions)
37 #define FLATNESS_LIMIT_I16 10 // I16 mode 37 #define FLATNESS_LIMIT_I16 10 // I16 mode
38 #define FLATNESS_LIMIT_I4 3 // I4 mode 38 #define FLATNESS_LIMIT_I4 3 // I4 mode
39 #define FLATNESS_LIMIT_UV 2 // UV mode 39 #define FLATNESS_LIMIT_UV 2 // UV mode
40 #define FLATNESS_PENALTY 140 // roughly ~1bit per block 40 #define FLATNESS_PENALTY 140 // roughly ~1bit per block
41 41
42 #define MULT_8B(a, b) (((a) * (b) + 128) >> 8) 42 #define MULT_8B(a, b) (((a) * (b) + 128) >> 8)
43 43
44 #define RD_DISTO_MULT 256 // distortion multiplier (equivalent of lambda)
45
44 // #define DEBUG_BLOCK 46 // #define DEBUG_BLOCK
45 47
46 //------------------------------------------------------------------------------ 48 //------------------------------------------------------------------------------
47 49
48 #if defined(DEBUG_BLOCK) 50 #if defined(DEBUG_BLOCK)
49 51
50 #include <stdio.h> 52 #include <stdio.h>
51 #include <stdlib.h> 53 #include <stdlib.h>
52 54
53 static void PrintBlockInfo(const VP8EncIterator* const it, 55 static void PrintBlockInfo(const VP8EncIterator* const it,
54 const VP8ModeScore* const rd) { 56 const VP8ModeScore* const rd) {
55 int i, j; 57 int i, j;
56 const int is_i16 = (it->mb_->type_ == 1); 58 const int is_i16 = (it->mb_->type_ == 1);
59 const uint8_t* const y_in = it->yuv_in_ + Y_OFF_ENC;
60 const uint8_t* const y_out = it->yuv_out_ + Y_OFF_ENC;
61 const uint8_t* const uv_in = it->yuv_in_ + U_OFF_ENC;
62 const uint8_t* const uv_out = it->yuv_out_ + U_OFF_ENC;
57 printf("SOURCE / OUTPUT / ABS DELTA\n"); 63 printf("SOURCE / OUTPUT / ABS DELTA\n");
58 for (j = 0; j < 24; ++j) { 64 for (j = 0; j < 16; ++j) {
59 if (j == 16) printf("\n"); // newline before the U/V block 65 for (i = 0; i < 16; ++i) printf("%3d ", y_in[i + j * BPS]);
60 for (i = 0; i < 16; ++i) printf("%3d ", it->yuv_in_[i + j * BPS]);
61 printf(" "); 66 printf(" ");
62 for (i = 0; i < 16; ++i) printf("%3d ", it->yuv_out_[i + j * BPS]); 67 for (i = 0; i < 16; ++i) printf("%3d ", y_out[i + j * BPS]);
63 printf(" "); 68 printf(" ");
64 for (i = 0; i < 16; ++i) { 69 for (i = 0; i < 16; ++i) {
65 printf("%1d ", abs(it->yuv_out_[i + j * BPS] - it->yuv_in_[i + j * BPS])); 70 printf("%1d ", abs(y_in[i + j * BPS] - y_out[i + j * BPS]));
66 } 71 }
67 printf("\n"); 72 printf("\n");
68 } 73 }
74 printf("\n"); // newline before the U/V block
75 for (j = 0; j < 8; ++j) {
76 for (i = 0; i < 8; ++i) printf("%3d ", uv_in[i + j * BPS]);
77 printf(" ");
78 for (i = 8; i < 16; ++i) printf("%3d ", uv_in[i + j * BPS]);
79 printf(" ");
80 for (i = 0; i < 8; ++i) printf("%3d ", uv_out[i + j * BPS]);
81 printf(" ");
82 for (i = 8; i < 16; ++i) printf("%3d ", uv_out[i + j * BPS]);
83 printf(" ");
84 for (i = 0; i < 8; ++i) {
85 printf("%1d ", abs(uv_out[i + j * BPS] - uv_in[i + j * BPS]));
86 }
87 printf(" ");
88 for (i = 8; i < 16; ++i) {
89 printf("%1d ", abs(uv_out[i + j * BPS] - uv_in[i + j * BPS]));
90 }
91 printf("\n");
92 }
69 printf("\nD:%d SD:%d R:%d H:%d nz:0x%x score:%d\n", 93 printf("\nD:%d SD:%d R:%d H:%d nz:0x%x score:%d\n",
70 (int)rd->D, (int)rd->SD, (int)rd->R, (int)rd->H, (int)rd->nz, 94 (int)rd->D, (int)rd->SD, (int)rd->R, (int)rd->H, (int)rd->nz,
71 (int)rd->score); 95 (int)rd->score);
72 if (is_i16) { 96 if (is_i16) {
73 printf("Mode: %d\n", rd->mode_i16); 97 printf("Mode: %d\n", rd->mode_i16);
74 printf("y_dc_levels:"); 98 printf("y_dc_levels:");
75 for (i = 0; i < 16; ++i) printf("%3d ", rd->y_dc_levels[i]); 99 for (i = 0; i < 16; ++i) printf("%3d ", rd->y_dc_levels[i]);
76 printf("\n"); 100 printf("\n");
77 } else { 101 } else {
78 printf("Modes[16]: "); 102 printf("Modes[16]: ");
(...skipping 358 matching lines...) Expand 10 before | Expand all | Expand 10 after
437 } 461 }
438 462
439 void VP8MakeIntra4Preds(const VP8EncIterator* const it) { 463 void VP8MakeIntra4Preds(const VP8EncIterator* const it) {
440 VP8EncPredLuma4(it->yuv_p_, it->i4_top_); 464 VP8EncPredLuma4(it->yuv_p_, it->i4_top_);
441 } 465 }
442 466
443 //------------------------------------------------------------------------------ 467 //------------------------------------------------------------------------------
444 // Quantize 468 // Quantize
445 469
446 // Layout: 470 // Layout:
447 // +----+ 471 // +----+----+
448 // |YYYY| 0 472 // |YYYY|UUVV| 0
449 // |YYYY| 4 473 // |YYYY|UUVV| 4
450 // |YYYY| 8 474 // |YYYY|....| 8
451 // |YYYY| 12 475 // |YYYY|....| 12
452 // +----+ 476 // +----+----+
453 // |UUVV| 16
454 // |UUVV| 20
455 // +----+
456 477
457 const int VP8Scan[16] = { // Luma 478 const int VP8Scan[16] = { // Luma
458 0 + 0 * BPS, 4 + 0 * BPS, 8 + 0 * BPS, 12 + 0 * BPS, 479 0 + 0 * BPS, 4 + 0 * BPS, 8 + 0 * BPS, 12 + 0 * BPS,
459 0 + 4 * BPS, 4 + 4 * BPS, 8 + 4 * BPS, 12 + 4 * BPS, 480 0 + 4 * BPS, 4 + 4 * BPS, 8 + 4 * BPS, 12 + 4 * BPS,
460 0 + 8 * BPS, 4 + 8 * BPS, 8 + 8 * BPS, 12 + 8 * BPS, 481 0 + 8 * BPS, 4 + 8 * BPS, 8 + 8 * BPS, 12 + 8 * BPS,
461 0 + 12 * BPS, 4 + 12 * BPS, 8 + 12 * BPS, 12 + 12 * BPS, 482 0 + 12 * BPS, 4 + 12 * BPS, 8 + 12 * BPS, 12 + 12 * BPS,
462 }; 483 };
463 484
464 static const int VP8ScanUV[4 + 4] = { 485 static const int VP8ScanUV[4 + 4] = {
465 0 + 0 * BPS, 4 + 0 * BPS, 0 + 4 * BPS, 4 + 4 * BPS, // U 486 0 + 0 * BPS, 4 + 0 * BPS, 0 + 4 * BPS, 4 + 4 * BPS, // U
(...skipping 65 matching lines...) Expand 10 before | Expand all | Expand 10 after
531 // If a coefficient was quantized to a value Q (using a neutral bias), 552 // If a coefficient was quantized to a value Q (using a neutral bias),
532 // we test all alternate possibilities between [Q-MIN_DELTA, Q+MAX_DELTA] 553 // we test all alternate possibilities between [Q-MIN_DELTA, Q+MAX_DELTA]
533 // We don't test negative values though. 554 // We don't test negative values though.
534 #define MIN_DELTA 0 // how much lower level to try 555 #define MIN_DELTA 0 // how much lower level to try
535 #define MAX_DELTA 1 // how much higher 556 #define MAX_DELTA 1 // how much higher
536 #define NUM_NODES (MIN_DELTA + 1 + MAX_DELTA) 557 #define NUM_NODES (MIN_DELTA + 1 + MAX_DELTA)
537 #define NODE(n, l) (nodes[(n)][(l) + MIN_DELTA]) 558 #define NODE(n, l) (nodes[(n)][(l) + MIN_DELTA])
538 #define SCORE_STATE(n, l) (score_states[n][(l) + MIN_DELTA]) 559 #define SCORE_STATE(n, l) (score_states[n][(l) + MIN_DELTA])
539 560
540 static WEBP_INLINE void SetRDScore(int lambda, VP8ModeScore* const rd) { 561 static WEBP_INLINE void SetRDScore(int lambda, VP8ModeScore* const rd) {
541 // TODO: incorporate the "* 256" in the tables? 562 rd->score = (rd->R + rd->H) * lambda + RD_DISTO_MULT * (rd->D + rd->SD);
542 rd->score = (rd->R + rd->H) * lambda + 256 * (rd->D + rd->SD);
543 } 563 }
544 564
545 static WEBP_INLINE score_t RDScoreTrellis(int lambda, score_t rate, 565 static WEBP_INLINE score_t RDScoreTrellis(int lambda, score_t rate,
546 score_t distortion) { 566 score_t distortion) {
547 return rate * lambda + 256 * distortion; 567 return rate * lambda + RD_DISTO_MULT * distortion;
548 } 568 }
549 569
550 static int TrellisQuantizeBlock(const VP8Encoder* const enc, 570 static int TrellisQuantizeBlock(const VP8Encoder* const enc,
551 int16_t in[16], int16_t out[16], 571 int16_t in[16], int16_t out[16],
552 int ctx0, int coeff_type, 572 int ctx0, int coeff_type,
553 const VP8Matrix* const mtx, 573 const VP8Matrix* const mtx,
554 int lambda) { 574 int lambda) {
555 const ProbaArray* const probas = enc->proba_.coeffs_[coeff_type]; 575 const ProbaArray* const probas = enc->proba_.coeffs_[coeff_type];
556 const CostArray* const costs = enc->proba_.level_cost_[coeff_type]; 576 CostArrayPtr const costs =
577 (CostArrayPtr)enc->proba_.remapped_costs_[coeff_type];
557 const int first = (coeff_type == 0) ? 1 : 0; 578 const int first = (coeff_type == 0) ? 1 : 0;
558 Node nodes[16][NUM_NODES]; 579 Node nodes[16][NUM_NODES];
559 ScoreState score_states[2][NUM_NODES]; 580 ScoreState score_states[2][NUM_NODES];
560 ScoreState* ss_cur = &SCORE_STATE(0, MIN_DELTA); 581 ScoreState* ss_cur = &SCORE_STATE(0, MIN_DELTA);
561 ScoreState* ss_prev = &SCORE_STATE(1, MIN_DELTA); 582 ScoreState* ss_prev = &SCORE_STATE(1, MIN_DELTA);
562 int best_path[3] = {-1, -1, -1}; // store best-last/best-level/best-previous 583 int best_path[3] = {-1, -1, -1}; // store best-last/best-level/best-previous
563 score_t best_score; 584 score_t best_score;
564 int n, m, p, last; 585 int n, m, p, last;
565 586
566 { 587 {
(...skipping 16 matching lines...) Expand all
583 if (last < 15) ++last; 604 if (last < 15) ++last;
584 605
585 // compute 'skip' score. This is the max score one can do. 606 // compute 'skip' score. This is the max score one can do.
586 cost = VP8BitCost(0, last_proba); 607 cost = VP8BitCost(0, last_proba);
587 best_score = RDScoreTrellis(lambda, cost, 0); 608 best_score = RDScoreTrellis(lambda, cost, 0);
588 609
589 // initialize source node. 610 // initialize source node.
590 for (m = -MIN_DELTA; m <= MAX_DELTA; ++m) { 611 for (m = -MIN_DELTA; m <= MAX_DELTA; ++m) {
591 const score_t rate = (ctx0 == 0) ? VP8BitCost(1, last_proba) : 0; 612 const score_t rate = (ctx0 == 0) ? VP8BitCost(1, last_proba) : 0;
592 ss_cur[m].score = RDScoreTrellis(lambda, rate, 0); 613 ss_cur[m].score = RDScoreTrellis(lambda, rate, 0);
593 ss_cur[m].costs = costs[VP8EncBands[first]][ctx0]; 614 ss_cur[m].costs = costs[first][ctx0];
594 } 615 }
595 } 616 }
596 617
597 // traverse trellis. 618 // traverse trellis.
598 for (n = first; n <= last; ++n) { 619 for (n = first; n <= last; ++n) {
599 const int j = kZigzag[n]; 620 const int j = kZigzag[n];
600 const uint32_t Q = mtx->q_[j]; 621 const uint32_t Q = mtx->q_[j];
601 const uint32_t iQ = mtx->iq_[j]; 622 const uint32_t iQ = mtx->iq_[j];
602 const uint32_t B = BIAS(0x00); // neutral bias 623 const uint32_t B = BIAS(0x00); // neutral bias
603 // note: it's important to take sign of the _original_ coeff, 624 // note: it's important to take sign of the _original_ coeff,
(...skipping 13 matching lines...) Expand all
617 for (m = -MIN_DELTA; m <= MAX_DELTA; ++m) { 638 for (m = -MIN_DELTA; m <= MAX_DELTA; ++m) {
618 Node* const cur = &NODE(n, m); 639 Node* const cur = &NODE(n, m);
619 int level = level0 + m; 640 int level = level0 + m;
620 const int ctx = (level > 2) ? 2 : level; 641 const int ctx = (level > 2) ? 2 : level;
621 const int band = VP8EncBands[n + 1]; 642 const int band = VP8EncBands[n + 1];
622 score_t base_score, last_pos_score; 643 score_t base_score, last_pos_score;
623 score_t best_cur_score = MAX_COST; 644 score_t best_cur_score = MAX_COST;
624 int best_prev = 0; // default, in case 645 int best_prev = 0; // default, in case
625 646
626 ss_cur[m].score = MAX_COST; 647 ss_cur[m].score = MAX_COST;
627 ss_cur[m].costs = costs[band][ctx]; 648 ss_cur[m].costs = costs[n + 1][ctx];
628 if (level > MAX_LEVEL || level < 0) { // node is dead? 649 if (level > MAX_LEVEL || level < 0) { // node is dead?
629 continue; 650 continue;
630 } 651 }
631 652
632 // Compute extra rate cost if last coeff's position is < 15 653 // Compute extra rate cost if last coeff's position is < 15
633 { 654 {
634 const score_t last_pos_cost = 655 const score_t last_pos_cost =
635 (n < 15) ? VP8BitCost(0, probas[band][ctx][0]) : 0; 656 (n < 15) ? VP8BitCost(0, probas[band][ctx][0]) : 0;
636 last_pos_score = RDScoreTrellis(lambda, last_pos_cost, 0); 657 last_pos_score = RDScoreTrellis(lambda, last_pos_cost, 0);
637 } 658 }
(...skipping 74 matching lines...) Expand 10 before | Expand all | Expand 10 after
712 // Performs: difference, transform, quantize, back-transform, add 733 // Performs: difference, transform, quantize, back-transform, add
713 // all at once. Output is the reconstructed block in *yuv_out, and the 734 // all at once. Output is the reconstructed block in *yuv_out, and the
714 // quantized levels in *levels. 735 // quantized levels in *levels.
715 736
716 static int ReconstructIntra16(VP8EncIterator* const it, 737 static int ReconstructIntra16(VP8EncIterator* const it,
717 VP8ModeScore* const rd, 738 VP8ModeScore* const rd,
718 uint8_t* const yuv_out, 739 uint8_t* const yuv_out,
719 int mode) { 740 int mode) {
720 const VP8Encoder* const enc = it->enc_; 741 const VP8Encoder* const enc = it->enc_;
721 const uint8_t* const ref = it->yuv_p_ + VP8I16ModeOffsets[mode]; 742 const uint8_t* const ref = it->yuv_p_ + VP8I16ModeOffsets[mode];
722 const uint8_t* const src = it->yuv_in_ + Y_OFF; 743 const uint8_t* const src = it->yuv_in_ + Y_OFF_ENC;
723 const VP8SegmentInfo* const dqm = &enc->dqm_[it->mb_->segment_]; 744 const VP8SegmentInfo* const dqm = &enc->dqm_[it->mb_->segment_];
724 int nz = 0; 745 int nz = 0;
725 int n; 746 int n;
726 int16_t tmp[16][16], dc_tmp[16]; 747 int16_t tmp[16][16], dc_tmp[16];
727 748
728 for (n = 0; n < 16; ++n) { 749 for (n = 0; n < 16; n += 2) {
729 VP8FTransform(src + VP8Scan[n], ref + VP8Scan[n], tmp[n]); 750 VP8FTransform2(src + VP8Scan[n], ref + VP8Scan[n], tmp[n]);
730 } 751 }
731 VP8FTransformWHT(tmp[0], dc_tmp); 752 VP8FTransformWHT(tmp[0], dc_tmp);
732 nz |= VP8EncQuantizeBlockWHT(dc_tmp, rd->y_dc_levels, &dqm->y2_) << 24; 753 nz |= VP8EncQuantizeBlockWHT(dc_tmp, rd->y_dc_levels, &dqm->y2_) << 24;
733 754
734 if (DO_TRELLIS_I16 && it->do_trellis_) { 755 if (DO_TRELLIS_I16 && it->do_trellis_) {
735 int x, y; 756 int x, y;
736 VP8IteratorNzToBytes(it); 757 VP8IteratorNzToBytes(it);
737 for (y = 0, n = 0; y < 4; ++y) { 758 for (y = 0, n = 0; y < 4; ++y) {
738 for (x = 0; x < 4; ++x, ++n) { 759 for (x = 0; x < 4; ++x, ++n) {
739 const int ctx = it->top_nz_[x] + it->left_nz_[y]; 760 const int ctx = it->top_nz_[x] + it->left_nz_[y];
740 const int non_zero = 761 const int non_zero =
741 TrellisQuantizeBlock(enc, tmp[n], rd->y_ac_levels[n], ctx, 0, 762 TrellisQuantizeBlock(enc, tmp[n], rd->y_ac_levels[n], ctx, 0,
742 &dqm->y1_, dqm->lambda_trellis_i16_); 763 &dqm->y1_, dqm->lambda_trellis_i16_);
743 it->top_nz_[x] = it->left_nz_[y] = non_zero; 764 it->top_nz_[x] = it->left_nz_[y] = non_zero;
744 rd->y_ac_levels[n][0] = 0; 765 rd->y_ac_levels[n][0] = 0;
745 nz |= non_zero << n; 766 nz |= non_zero << n;
746 } 767 }
747 } 768 }
748 } else { 769 } else {
749 for (n = 0; n < 16; ++n) { 770 for (n = 0; n < 16; n += 2) {
750 // Zero-out the first coeff, so that: a) nz is correct below, and 771 // Zero-out the first coeff, so that: a) nz is correct below, and
751 // b) finding 'last' non-zero coeffs in SetResidualCoeffs() is simplified. 772 // b) finding 'last' non-zero coeffs in SetResidualCoeffs() is simplified.
752 tmp[n][0] = 0; 773 tmp[n][0] = tmp[n + 1][0] = 0;
753 nz |= VP8EncQuantizeBlock(tmp[n], rd->y_ac_levels[n], &dqm->y1_) << n; 774 nz |= VP8EncQuantize2Blocks(tmp[n], rd->y_ac_levels[n], &dqm->y1_) << n;
754 assert(rd->y_ac_levels[n][0] == 0); 775 assert(rd->y_ac_levels[n + 0][0] == 0);
776 assert(rd->y_ac_levels[n + 1][0] == 0);
755 } 777 }
756 } 778 }
757 779
758 // Transform back 780 // Transform back
759 VP8TransformWHT(dc_tmp, tmp[0]); 781 VP8TransformWHT(dc_tmp, tmp[0]);
760 for (n = 0; n < 16; n += 2) { 782 for (n = 0; n < 16; n += 2) {
761 VP8ITransform(ref + VP8Scan[n], tmp[n], yuv_out + VP8Scan[n], 1); 783 VP8ITransform(ref + VP8Scan[n], tmp[n], yuv_out + VP8Scan[n], 1);
762 } 784 }
763 785
764 return nz; 786 return nz;
(...skipping 20 matching lines...) Expand all
785 nz = VP8EncQuantizeBlock(tmp, levels, &dqm->y1_); 807 nz = VP8EncQuantizeBlock(tmp, levels, &dqm->y1_);
786 } 808 }
787 VP8ITransform(ref, tmp, yuv_out, 0); 809 VP8ITransform(ref, tmp, yuv_out, 0);
788 return nz; 810 return nz;
789 } 811 }
790 812
791 static int ReconstructUV(VP8EncIterator* const it, VP8ModeScore* const rd, 813 static int ReconstructUV(VP8EncIterator* const it, VP8ModeScore* const rd,
792 uint8_t* const yuv_out, int mode) { 814 uint8_t* const yuv_out, int mode) {
793 const VP8Encoder* const enc = it->enc_; 815 const VP8Encoder* const enc = it->enc_;
794 const uint8_t* const ref = it->yuv_p_ + VP8UVModeOffsets[mode]; 816 const uint8_t* const ref = it->yuv_p_ + VP8UVModeOffsets[mode];
795 const uint8_t* const src = it->yuv_in_ + U_OFF; 817 const uint8_t* const src = it->yuv_in_ + U_OFF_ENC;
796 const VP8SegmentInfo* const dqm = &enc->dqm_[it->mb_->segment_]; 818 const VP8SegmentInfo* const dqm = &enc->dqm_[it->mb_->segment_];
797 int nz = 0; 819 int nz = 0;
798 int n; 820 int n;
799 int16_t tmp[8][16]; 821 int16_t tmp[8][16];
800 822
801 for (n = 0; n < 8; ++n) { 823 for (n = 0; n < 8; n += 2) {
802 VP8FTransform(src + VP8ScanUV[n], ref + VP8ScanUV[n], tmp[n]); 824 VP8FTransform2(src + VP8ScanUV[n], ref + VP8ScanUV[n], tmp[n]);
803 } 825 }
804 if (DO_TRELLIS_UV && it->do_trellis_) { 826 if (DO_TRELLIS_UV && it->do_trellis_) {
805 int ch, x, y; 827 int ch, x, y;
806 for (ch = 0, n = 0; ch <= 2; ch += 2) { 828 for (ch = 0, n = 0; ch <= 2; ch += 2) {
807 for (y = 0; y < 2; ++y) { 829 for (y = 0; y < 2; ++y) {
808 for (x = 0; x < 2; ++x, ++n) { 830 for (x = 0; x < 2; ++x, ++n) {
809 const int ctx = it->top_nz_[4 + ch + x] + it->left_nz_[4 + ch + y]; 831 const int ctx = it->top_nz_[4 + ch + x] + it->left_nz_[4 + ch + y];
810 const int non_zero = 832 const int non_zero =
811 TrellisQuantizeBlock(enc, tmp[n], rd->uv_levels[n], ctx, 2, 833 TrellisQuantizeBlock(enc, tmp[n], rd->uv_levels[n], ctx, 2,
812 &dqm->uv_, dqm->lambda_trellis_uv_); 834 &dqm->uv_, dqm->lambda_trellis_uv_);
813 it->top_nz_[4 + ch + x] = it->left_nz_[4 + ch + y] = non_zero; 835 it->top_nz_[4 + ch + x] = it->left_nz_[4 + ch + y] = non_zero;
814 nz |= non_zero << n; 836 nz |= non_zero << n;
815 } 837 }
816 } 838 }
817 } 839 }
818 } else { 840 } else {
819 for (n = 0; n < 8; ++n) { 841 for (n = 0; n < 8; n += 2) {
820 nz |= VP8EncQuantizeBlock(tmp[n], rd->uv_levels[n], &dqm->uv_) << n; 842 nz |= VP8EncQuantize2Blocks(tmp[n], rd->uv_levels[n], &dqm->uv_) << n;
821 } 843 }
822 } 844 }
823 845
824 for (n = 0; n < 8; n += 2) { 846 for (n = 0; n < 8; n += 2) {
825 VP8ITransform(ref + VP8ScanUV[n], tmp[n], yuv_out + VP8ScanUV[n], 1); 847 VP8ITransform(ref + VP8ScanUV[n], tmp[n], yuv_out + VP8ScanUV[n], 1);
826 } 848 }
827 return (nz << 16); 849 return (nz << 16);
828 } 850 }
829 851
830 //------------------------------------------------------------------------------ 852 //------------------------------------------------------------------------------
831 // RD-opt decision. Reconstruct each modes, evalue distortion and bit-cost. 853 // RD-opt decision. Reconstruct each modes, evalue distortion and bit-cost.
832 // Pick the mode is lower RD-cost = Rate + lambda * Distortion. 854 // Pick the mode is lower RD-cost = Rate + lambda * Distortion.
833 855
834 static void StoreMaxDelta(VP8SegmentInfo* const dqm, const int16_t DCs[16]) { 856 static void StoreMaxDelta(VP8SegmentInfo* const dqm, const int16_t DCs[16]) {
835 // We look at the first three AC coefficients to determine what is the average 857 // We look at the first three AC coefficients to determine what is the average
836 // delta between each sub-4x4 block. 858 // delta between each sub-4x4 block.
837 const int v0 = abs(DCs[1]); 859 const int v0 = abs(DCs[1]);
838 const int v1 = abs(DCs[4]); 860 const int v1 = abs(DCs[4]);
839 const int v2 = abs(DCs[5]); 861 const int v2 = abs(DCs[5]);
840 int max_v = (v0 > v1) ? v1 : v0; 862 int max_v = (v0 > v1) ? v1 : v0;
841 max_v = (v2 > max_v) ? v2 : max_v; 863 max_v = (v2 > max_v) ? v2 : max_v;
842 if (max_v > dqm->max_edge_) dqm->max_edge_ = max_v; 864 if (max_v > dqm->max_edge_) dqm->max_edge_ = max_v;
843 } 865 }
844 866
867 static void SwapModeScore(VP8ModeScore** a, VP8ModeScore** b) {
868 VP8ModeScore* const tmp = *a;
869 *a = *b;
870 *b = tmp;
871 }
872
845 static void SwapPtr(uint8_t** a, uint8_t** b) { 873 static void SwapPtr(uint8_t** a, uint8_t** b) {
846 uint8_t* const tmp = *a; 874 uint8_t* const tmp = *a;
847 *a = *b; 875 *a = *b;
848 *b = tmp; 876 *b = tmp;
849 } 877 }
850 878
851 static void SwapOut(VP8EncIterator* const it) { 879 static void SwapOut(VP8EncIterator* const it) {
852 SwapPtr(&it->yuv_out_, &it->yuv_out2_); 880 SwapPtr(&it->yuv_out_, &it->yuv_out2_);
853 } 881 }
854 882
855 static score_t IsFlat(const int16_t* levels, int num_blocks, score_t thresh) { 883 static score_t IsFlat(const int16_t* levels, int num_blocks, score_t thresh) {
856 score_t score = 0; 884 score_t score = 0;
857 while (num_blocks-- > 0) { // TODO(skal): refine positional scoring? 885 while (num_blocks-- > 0) { // TODO(skal): refine positional scoring?
858 int i; 886 int i;
859 for (i = 1; i < 16; ++i) { // omit DC, we're only interested in AC 887 for (i = 1; i < 16; ++i) { // omit DC, we're only interested in AC
860 score += (levels[i] != 0); 888 score += (levels[i] != 0);
861 if (score > thresh) return 0; 889 if (score > thresh) return 0;
862 } 890 }
863 levels += 16; 891 levels += 16;
864 } 892 }
865 return 1; 893 return 1;
866 } 894 }
867 895
868 static void PickBestIntra16(VP8EncIterator* const it, VP8ModeScore* const rd) { 896 static void PickBestIntra16(VP8EncIterator* const it, VP8ModeScore* rd) {
869 const int kNumBlocks = 16; 897 const int kNumBlocks = 16;
870 VP8SegmentInfo* const dqm = &it->enc_->dqm_[it->mb_->segment_]; 898 VP8SegmentInfo* const dqm = &it->enc_->dqm_[it->mb_->segment_];
871 const int lambda = dqm->lambda_i16_; 899 const int lambda = dqm->lambda_i16_;
872 const int tlambda = dqm->tlambda_; 900 const int tlambda = dqm->tlambda_;
873 const uint8_t* const src = it->yuv_in_ + Y_OFF; 901 const uint8_t* const src = it->yuv_in_ + Y_OFF_ENC;
874 VP8ModeScore rd16; 902 VP8ModeScore rd_tmp;
903 VP8ModeScore* rd_cur = &rd_tmp;
904 VP8ModeScore* rd_best = rd;
875 int mode; 905 int mode;
876 906
877 rd->mode_i16 = -1; 907 rd->mode_i16 = -1;
878 for (mode = 0; mode < NUM_PRED_MODES; ++mode) { 908 for (mode = 0; mode < NUM_PRED_MODES; ++mode) {
879 uint8_t* const tmp_dst = it->yuv_out2_ + Y_OFF; // scratch buffer 909 uint8_t* const tmp_dst = it->yuv_out2_ + Y_OFF_ENC; // scratch buffer
880 int nz; 910 rd_cur->mode_i16 = mode;
881 911
882 // Reconstruct 912 // Reconstruct
883 nz = ReconstructIntra16(it, &rd16, tmp_dst, mode); 913 rd_cur->nz = ReconstructIntra16(it, rd_cur, tmp_dst, mode);
884 914
885 // Measure RD-score 915 // Measure RD-score
886 rd16.D = VP8SSE16x16(src, tmp_dst); 916 rd_cur->D = VP8SSE16x16(src, tmp_dst);
887 rd16.SD = tlambda ? MULT_8B(tlambda, VP8TDisto16x16(src, tmp_dst, kWeightY)) 917 rd_cur->SD =
888 : 0; 918 tlambda ? MULT_8B(tlambda, VP8TDisto16x16(src, tmp_dst, kWeightY)) : 0;
889 rd16.H = VP8FixedCostsI16[mode]; 919 rd_cur->H = VP8FixedCostsI16[mode];
890 rd16.R = VP8GetCostLuma16(it, &rd16); 920 rd_cur->R = VP8GetCostLuma16(it, rd_cur);
891 if (mode > 0 && 921 if (mode > 0 &&
892 IsFlat(rd16.y_ac_levels[0], kNumBlocks, FLATNESS_LIMIT_I16)) { 922 IsFlat(rd_cur->y_ac_levels[0], kNumBlocks, FLATNESS_LIMIT_I16)) {
893 // penalty to avoid flat area to be mispredicted by complex mode 923 // penalty to avoid flat area to be mispredicted by complex mode
894 rd16.R += FLATNESS_PENALTY * kNumBlocks; 924 rd_cur->R += FLATNESS_PENALTY * kNumBlocks;
895 } 925 }
896 926
897 // Since we always examine Intra16 first, we can overwrite *rd directly. 927 // Since we always examine Intra16 first, we can overwrite *rd directly.
898 SetRDScore(lambda, &rd16); 928 SetRDScore(lambda, rd_cur);
899 if (mode == 0 || rd16.score < rd->score) { 929 if (mode == 0 || rd_cur->score < rd_best->score) {
900 CopyScore(rd, &rd16); 930 SwapModeScore(&rd_cur, &rd_best);
901 rd->mode_i16 = mode;
902 rd->nz = nz;
903 memcpy(rd->y_ac_levels, rd16.y_ac_levels, sizeof(rd16.y_ac_levels));
904 memcpy(rd->y_dc_levels, rd16.y_dc_levels, sizeof(rd16.y_dc_levels));
905 SwapOut(it); 931 SwapOut(it);
906 } 932 }
907 } 933 }
934 if (rd_best != rd) {
935 memcpy(rd, rd_best, sizeof(*rd));
936 }
908 SetRDScore(dqm->lambda_mode_, rd); // finalize score for mode decision. 937 SetRDScore(dqm->lambda_mode_, rd); // finalize score for mode decision.
909 VP8SetIntra16Mode(it, rd->mode_i16); 938 VP8SetIntra16Mode(it, rd->mode_i16);
910 939
911 // we have a blocky macroblock (only DCs are non-zero) with fairly high 940 // we have a blocky macroblock (only DCs are non-zero) with fairly high
912 // distortion, record max delta so we can later adjust the minimal filtering 941 // distortion, record max delta so we can later adjust the minimal filtering
913 // strength needed to smooth these blocks out. 942 // strength needed to smooth these blocks out.
914 if ((rd->nz & 0xffff) == 0 && rd->D > dqm->min_disto_) { 943 if ((rd->nz & 0xffff) == 0 && rd->D > dqm->min_disto_) {
915 StoreMaxDelta(dqm, rd->y_dc_levels); 944 StoreMaxDelta(dqm, rd->y_dc_levels);
916 } 945 }
917 } 946 }
918 947
919 //------------------------------------------------------------------------------ 948 //------------------------------------------------------------------------------
920 949
921 // return the cost array corresponding to the surrounding prediction modes. 950 // return the cost array corresponding to the surrounding prediction modes.
922 static const uint16_t* GetCostModeI4(VP8EncIterator* const it, 951 static const uint16_t* GetCostModeI4(VP8EncIterator* const it,
923 const uint8_t modes[16]) { 952 const uint8_t modes[16]) {
924 const int preds_w = it->enc_->preds_w_; 953 const int preds_w = it->enc_->preds_w_;
925 const int x = (it->i4_ & 3), y = it->i4_ >> 2; 954 const int x = (it->i4_ & 3), y = it->i4_ >> 2;
926 const int left = (x == 0) ? it->preds_[y * preds_w - 1] : modes[it->i4_ - 1]; 955 const int left = (x == 0) ? it->preds_[y * preds_w - 1] : modes[it->i4_ - 1];
927 const int top = (y == 0) ? it->preds_[-preds_w + x] : modes[it->i4_ - 4]; 956 const int top = (y == 0) ? it->preds_[-preds_w + x] : modes[it->i4_ - 4];
928 return VP8FixedCostsI4[top][left]; 957 return VP8FixedCostsI4[top][left];
929 } 958 }
930 959
931 static int PickBestIntra4(VP8EncIterator* const it, VP8ModeScore* const rd) { 960 static int PickBestIntra4(VP8EncIterator* const it, VP8ModeScore* const rd) {
932 const VP8Encoder* const enc = it->enc_; 961 const VP8Encoder* const enc = it->enc_;
933 const VP8SegmentInfo* const dqm = &enc->dqm_[it->mb_->segment_]; 962 const VP8SegmentInfo* const dqm = &enc->dqm_[it->mb_->segment_];
934 const int lambda = dqm->lambda_i4_; 963 const int lambda = dqm->lambda_i4_;
935 const int tlambda = dqm->tlambda_; 964 const int tlambda = dqm->tlambda_;
936 const uint8_t* const src0 = it->yuv_in_ + Y_OFF; 965 const uint8_t* const src0 = it->yuv_in_ + Y_OFF_ENC;
937 uint8_t* const best_blocks = it->yuv_out2_ + Y_OFF; 966 uint8_t* const best_blocks = it->yuv_out2_ + Y_OFF_ENC;
938 int total_header_bits = 0; 967 int total_header_bits = 0;
939 VP8ModeScore rd_best; 968 VP8ModeScore rd_best;
940 969
941 if (enc->max_i4_header_bits_ == 0) { 970 if (enc->max_i4_header_bits_ == 0) {
942 return 0; 971 return 0;
943 } 972 }
944 973
945 InitScore(&rd_best); 974 InitScore(&rd_best);
946 rd_best.H = 211; // '211' is the value of VP8BitCost(0, 145) 975 rd_best.H = 211; // '211' is the value of VP8BitCost(0, 145)
947 SetRDScore(dqm->lambda_mode_, &rd_best); 976 SetRDScore(dqm->lambda_mode_, &rd_best);
(...skipping 17 matching lines...) Expand all
965 // Reconstruct 994 // Reconstruct
966 rd_tmp.nz = 995 rd_tmp.nz =
967 ReconstructIntra4(it, tmp_levels, src, tmp_dst, mode) << it->i4_; 996 ReconstructIntra4(it, tmp_levels, src, tmp_dst, mode) << it->i4_;
968 997
969 // Compute RD-score 998 // Compute RD-score
970 rd_tmp.D = VP8SSE4x4(src, tmp_dst); 999 rd_tmp.D = VP8SSE4x4(src, tmp_dst);
971 rd_tmp.SD = 1000 rd_tmp.SD =
972 tlambda ? MULT_8B(tlambda, VP8TDisto4x4(src, tmp_dst, kWeightY)) 1001 tlambda ? MULT_8B(tlambda, VP8TDisto4x4(src, tmp_dst, kWeightY))
973 : 0; 1002 : 0;
974 rd_tmp.H = mode_costs[mode]; 1003 rd_tmp.H = mode_costs[mode];
975 rd_tmp.R = VP8GetCostLuma4(it, tmp_levels); 1004
1005 // Add flatness penalty
976 if (mode > 0 && IsFlat(tmp_levels, kNumBlocks, FLATNESS_LIMIT_I4)) { 1006 if (mode > 0 && IsFlat(tmp_levels, kNumBlocks, FLATNESS_LIMIT_I4)) {
977 rd_tmp.R += FLATNESS_PENALTY * kNumBlocks; 1007 rd_tmp.R = FLATNESS_PENALTY * kNumBlocks;
1008 } else {
1009 rd_tmp.R = 0;
978 } 1010 }
979 1011
1012 // early-out check
980 SetRDScore(lambda, &rd_tmp); 1013 SetRDScore(lambda, &rd_tmp);
1014 if (best_mode >= 0 && rd_tmp.score >= rd_i4.score) continue;
1015
1016 // finish computing score
1017 rd_tmp.R += VP8GetCostLuma4(it, tmp_levels);
1018 SetRDScore(lambda, &rd_tmp);
1019
981 if (best_mode < 0 || rd_tmp.score < rd_i4.score) { 1020 if (best_mode < 0 || rd_tmp.score < rd_i4.score) {
982 CopyScore(&rd_i4, &rd_tmp); 1021 CopyScore(&rd_i4, &rd_tmp);
983 best_mode = mode; 1022 best_mode = mode;
984 SwapPtr(&tmp_dst, &best_block); 1023 SwapPtr(&tmp_dst, &best_block);
985 memcpy(rd_best.y_ac_levels[it->i4_], tmp_levels, sizeof(tmp_levels)); 1024 memcpy(rd_best.y_ac_levels[it->i4_], tmp_levels,
1025 sizeof(rd_best.y_ac_levels[it->i4_]));
986 } 1026 }
987 } 1027 }
988 SetRDScore(dqm->lambda_mode_, &rd_i4); 1028 SetRDScore(dqm->lambda_mode_, &rd_i4);
989 AddScore(&rd_best, &rd_i4); 1029 AddScore(&rd_best, &rd_i4);
990 if (rd_best.score >= rd->score) { 1030 if (rd_best.score >= rd->score) {
991 return 0; 1031 return 0;
992 } 1032 }
993 total_header_bits += (int)rd_i4.H; // <- equal to mode_costs[best_mode]; 1033 total_header_bits += (int)rd_i4.H; // <- equal to mode_costs[best_mode];
994 if (total_header_bits > enc->max_i4_header_bits_) { 1034 if (total_header_bits > enc->max_i4_header_bits_) {
995 return 0; 1035 return 0;
(...skipping 13 matching lines...) Expand all
1009 memcpy(rd->y_ac_levels, rd_best.y_ac_levels, sizeof(rd->y_ac_levels)); 1049 memcpy(rd->y_ac_levels, rd_best.y_ac_levels, sizeof(rd->y_ac_levels));
1010 return 1; // select intra4x4 over intra16x16 1050 return 1; // select intra4x4 over intra16x16
1011 } 1051 }
1012 1052
1013 //------------------------------------------------------------------------------ 1053 //------------------------------------------------------------------------------
1014 1054
1015 static void PickBestUV(VP8EncIterator* const it, VP8ModeScore* const rd) { 1055 static void PickBestUV(VP8EncIterator* const it, VP8ModeScore* const rd) {
1016 const int kNumBlocks = 8; 1056 const int kNumBlocks = 8;
1017 const VP8SegmentInfo* const dqm = &it->enc_->dqm_[it->mb_->segment_]; 1057 const VP8SegmentInfo* const dqm = &it->enc_->dqm_[it->mb_->segment_];
1018 const int lambda = dqm->lambda_uv_; 1058 const int lambda = dqm->lambda_uv_;
1019 const uint8_t* const src = it->yuv_in_ + U_OFF; 1059 const uint8_t* const src = it->yuv_in_ + U_OFF_ENC;
1020 uint8_t* const tmp_dst = it->yuv_out2_ + U_OFF; // scratch buffer 1060 uint8_t* tmp_dst = it->yuv_out2_ + U_OFF_ENC; // scratch buffer
1021 uint8_t* const dst0 = it->yuv_out_ + U_OFF; 1061 uint8_t* dst0 = it->yuv_out_ + U_OFF_ENC;
1062 uint8_t* dst = dst0;
1022 VP8ModeScore rd_best; 1063 VP8ModeScore rd_best;
1023 int mode; 1064 int mode;
1024 1065
1025 rd->mode_uv = -1; 1066 rd->mode_uv = -1;
1026 InitScore(&rd_best); 1067 InitScore(&rd_best);
1027 for (mode = 0; mode < NUM_PRED_MODES; ++mode) { 1068 for (mode = 0; mode < NUM_PRED_MODES; ++mode) {
1028 VP8ModeScore rd_uv; 1069 VP8ModeScore rd_uv;
1029 1070
1030 // Reconstruct 1071 // Reconstruct
1031 rd_uv.nz = ReconstructUV(it, &rd_uv, tmp_dst, mode); 1072 rd_uv.nz = ReconstructUV(it, &rd_uv, tmp_dst, mode);
1032 1073
1033 // Compute RD-score 1074 // Compute RD-score
1034 rd_uv.D = VP8SSE16x8(src, tmp_dst); 1075 rd_uv.D = VP8SSE16x8(src, tmp_dst);
1035 rd_uv.SD = 0; // TODO: should we call TDisto? it tends to flatten areas. 1076 rd_uv.SD = 0; // not calling TDisto here: it tends to flatten areas.
1036 rd_uv.H = VP8FixedCostsUV[mode]; 1077 rd_uv.H = VP8FixedCostsUV[mode];
1037 rd_uv.R = VP8GetCostUV(it, &rd_uv); 1078 rd_uv.R = VP8GetCostUV(it, &rd_uv);
1038 if (mode > 0 && IsFlat(rd_uv.uv_levels[0], kNumBlocks, FLATNESS_LIMIT_UV)) { 1079 if (mode > 0 && IsFlat(rd_uv.uv_levels[0], kNumBlocks, FLATNESS_LIMIT_UV)) {
1039 rd_uv.R += FLATNESS_PENALTY * kNumBlocks; 1080 rd_uv.R += FLATNESS_PENALTY * kNumBlocks;
1040 } 1081 }
1041 1082
1042 SetRDScore(lambda, &rd_uv); 1083 SetRDScore(lambda, &rd_uv);
1043 if (mode == 0 || rd_uv.score < rd_best.score) { 1084 if (mode == 0 || rd_uv.score < rd_best.score) {
1044 CopyScore(&rd_best, &rd_uv); 1085 CopyScore(&rd_best, &rd_uv);
1045 rd->mode_uv = mode; 1086 rd->mode_uv = mode;
1046 memcpy(rd->uv_levels, rd_uv.uv_levels, sizeof(rd->uv_levels)); 1087 memcpy(rd->uv_levels, rd_uv.uv_levels, sizeof(rd->uv_levels));
1047 memcpy(dst0, tmp_dst, UV_SIZE); // TODO: SwapUVOut() ? 1088 SwapPtr(&dst, &tmp_dst);
1048 } 1089 }
1049 } 1090 }
1050 VP8SetIntraUVMode(it, rd->mode_uv); 1091 VP8SetIntraUVMode(it, rd->mode_uv);
1051 AddScore(rd, &rd_best); 1092 AddScore(rd, &rd_best);
1093 if (dst != dst0) { // copy 16x8 block if needed
1094 VP8Copy16x8(dst, dst0);
1095 }
1052 } 1096 }
1053 1097
1054 //------------------------------------------------------------------------------ 1098 //------------------------------------------------------------------------------
1055 // Final reconstruction and quantization. 1099 // Final reconstruction and quantization.
1056 1100
1057 static void SimpleQuantize(VP8EncIterator* const it, VP8ModeScore* const rd) { 1101 static void SimpleQuantize(VP8EncIterator* const it, VP8ModeScore* const rd) {
1058 const VP8Encoder* const enc = it->enc_; 1102 const VP8Encoder* const enc = it->enc_;
1059 const int is_i16 = (it->mb_->type_ == 1); 1103 const int is_i16 = (it->mb_->type_ == 1);
1060 int nz = 0; 1104 int nz = 0;
1061 1105
1062 if (is_i16) { 1106 if (is_i16) {
1063 nz = ReconstructIntra16(it, rd, it->yuv_out_ + Y_OFF, it->preds_[0]); 1107 nz = ReconstructIntra16(it, rd, it->yuv_out_ + Y_OFF_ENC, it->preds_[0]);
1064 } else { 1108 } else {
1065 VP8IteratorStartI4(it); 1109 VP8IteratorStartI4(it);
1066 do { 1110 do {
1067 const int mode = 1111 const int mode =
1068 it->preds_[(it->i4_ & 3) + (it->i4_ >> 2) * enc->preds_w_]; 1112 it->preds_[(it->i4_ & 3) + (it->i4_ >> 2) * enc->preds_w_];
1069 const uint8_t* const src = it->yuv_in_ + Y_OFF + VP8Scan[it->i4_]; 1113 const uint8_t* const src = it->yuv_in_ + Y_OFF_ENC + VP8Scan[it->i4_];
1070 uint8_t* const dst = it->yuv_out_ + Y_OFF + VP8Scan[it->i4_]; 1114 uint8_t* const dst = it->yuv_out_ + Y_OFF_ENC + VP8Scan[it->i4_];
1071 VP8MakeIntra4Preds(it); 1115 VP8MakeIntra4Preds(it);
1072 nz |= ReconstructIntra4(it, rd->y_ac_levels[it->i4_], 1116 nz |= ReconstructIntra4(it, rd->y_ac_levels[it->i4_],
1073 src, dst, mode) << it->i4_; 1117 src, dst, mode) << it->i4_;
1074 } while (VP8IteratorRotateI4(it, it->yuv_out_ + Y_OFF)); 1118 } while (VP8IteratorRotateI4(it, it->yuv_out_ + Y_OFF_ENC));
1075 } 1119 }
1076 1120
1077 nz |= ReconstructUV(it, rd, it->yuv_out_ + U_OFF, it->mb_->uv_mode_); 1121 nz |= ReconstructUV(it, rd, it->yuv_out_ + U_OFF_ENC, it->mb_->uv_mode_);
1078 rd->nz = nz; 1122 rd->nz = nz;
1079 } 1123 }
1080 1124
1081 // Refine intra16/intra4 sub-modes based on distortion only (not rate). 1125 // Refine intra16/intra4 sub-modes based on distortion only (not rate).
1082 static void DistoRefine(VP8EncIterator* const it, int try_both_i4_i16) { 1126 static void RefineUsingDistortion(VP8EncIterator* const it,
1083 const int is_i16 = (it->mb_->type_ == 1); 1127 int try_both_modes, int refine_uv_mode,
1128 VP8ModeScore* const rd) {
1084 score_t best_score = MAX_COST; 1129 score_t best_score = MAX_COST;
1130 score_t score_i4 = (score_t)I4_PENALTY;
1131 int16_t tmp_levels[16][16];
1132 uint8_t modes_i4[16];
1133 int nz = 0;
1134 int mode;
1135 int is_i16 = try_both_modes || (it->mb_->type_ == 1);
1085 1136
1086 if (try_both_i4_i16 || is_i16) { 1137 if (is_i16) { // First, evaluate Intra16 distortion
1087 int mode;
1088 int best_mode = -1; 1138 int best_mode = -1;
1139 const uint8_t* const src = it->yuv_in_ + Y_OFF_ENC;
1089 for (mode = 0; mode < NUM_PRED_MODES; ++mode) { 1140 for (mode = 0; mode < NUM_PRED_MODES; ++mode) {
1090 const uint8_t* const ref = it->yuv_p_ + VP8I16ModeOffsets[mode]; 1141 const uint8_t* const ref = it->yuv_p_ + VP8I16ModeOffsets[mode];
1091 const uint8_t* const src = it->yuv_in_ + Y_OFF;
1092 const score_t score = VP8SSE16x16(src, ref); 1142 const score_t score = VP8SSE16x16(src, ref);
1093 if (score < best_score) { 1143 if (score < best_score) {
1094 best_mode = mode; 1144 best_mode = mode;
1095 best_score = score; 1145 best_score = score;
1096 } 1146 }
1097 } 1147 }
1098 VP8SetIntra16Mode(it, best_mode); 1148 VP8SetIntra16Mode(it, best_mode);
1149 // we'll reconstruct later, if i16 mode actually gets selected
1099 } 1150 }
1100 if (try_both_i4_i16 || !is_i16) { 1151
1101 uint8_t modes_i4[16]; 1152 // Next, evaluate Intra4
1153 if (try_both_modes || !is_i16) {
1102 // We don't evaluate the rate here, but just account for it through a 1154 // We don't evaluate the rate here, but just account for it through a
1103 // constant penalty (i4 mode usually needs more bits compared to i16). 1155 // constant penalty (i4 mode usually needs more bits compared to i16).
1104 score_t score_i4 = (score_t)I4_PENALTY; 1156 is_i16 = 0;
1105
1106 VP8IteratorStartI4(it); 1157 VP8IteratorStartI4(it);
1107 do { 1158 do {
1108 int mode; 1159 int best_i4_mode = -1;
1109 int best_sub_mode = -1; 1160 score_t best_i4_score = MAX_COST;
1110 score_t best_sub_score = MAX_COST; 1161 const uint8_t* const src = it->yuv_in_ + Y_OFF_ENC + VP8Scan[it->i4_];
1111 const uint8_t* const src = it->yuv_in_ + Y_OFF + VP8Scan[it->i4_];
1112 1162
1113 // TODO(skal): we don't really need the prediction pixels here,
1114 // but just the distortion against 'src'.
1115 VP8MakeIntra4Preds(it); 1163 VP8MakeIntra4Preds(it);
1116 for (mode = 0; mode < NUM_BMODES; ++mode) { 1164 for (mode = 0; mode < NUM_BMODES; ++mode) {
1117 const uint8_t* const ref = it->yuv_p_ + VP8I4ModeOffsets[mode]; 1165 const uint8_t* const ref = it->yuv_p_ + VP8I4ModeOffsets[mode];
1118 const score_t score = VP8SSE4x4(src, ref); 1166 const score_t score = VP8SSE4x4(src, ref);
1119 if (score < best_sub_score) { 1167 if (score < best_i4_score) {
1120 best_sub_mode = mode; 1168 best_i4_mode = mode;
1121 best_sub_score = score; 1169 best_i4_score = score;
1122 } 1170 }
1123 } 1171 }
1124 modes_i4[it->i4_] = best_sub_mode; 1172 modes_i4[it->i4_] = best_i4_mode;
1125 score_i4 += best_sub_score; 1173 score_i4 += best_i4_score;
1126 if (score_i4 >= best_score) break; 1174 if (score_i4 >= best_score) {
1127 } while (VP8IteratorRotateI4(it, it->yuv_in_ + Y_OFF)); 1175 // Intra4 won't be better than Intra16. Bail out and pick Intra16.
1128 if (score_i4 < best_score) { 1176 is_i16 = 1;
1129 VP8SetIntra4Mode(it, modes_i4); 1177 break;
1178 } else { // reconstruct partial block inside yuv_out2_ buffer
1179 uint8_t* const tmp_dst = it->yuv_out2_ + Y_OFF_ENC + VP8Scan[it->i4_];
1180 nz |= ReconstructIntra4(it, tmp_levels[it->i4_],
1181 src, tmp_dst, best_i4_mode) << it->i4_;
1182 }
1183 } while (VP8IteratorRotateI4(it, it->yuv_out2_ + Y_OFF_ENC));
1184 }
1185
1186 // Final reconstruction, depending on which mode is selected.
1187 if (!is_i16) {
1188 VP8SetIntra4Mode(it, modes_i4);
1189 memcpy(rd->y_ac_levels, tmp_levels, sizeof(tmp_levels));
1190 SwapOut(it);
1191 best_score = score_i4;
1192 } else {
1193 nz = ReconstructIntra16(it, rd, it->yuv_out_ + Y_OFF_ENC, it->preds_[0]);
1194 }
1195
1196 // ... and UV!
1197 if (refine_uv_mode) {
1198 int best_mode = -1;
1199 score_t best_uv_score = MAX_COST;
1200 const uint8_t* const src = it->yuv_in_ + U_OFF_ENC;
1201 for (mode = 0; mode < NUM_PRED_MODES; ++mode) {
1202 const uint8_t* const ref = it->yuv_p_ + VP8UVModeOffsets[mode];
1203 const score_t score = VP8SSE16x8(src, ref);
1204 if (score < best_uv_score) {
1205 best_mode = mode;
1206 best_uv_score = score;
1207 }
1130 } 1208 }
1209 VP8SetIntraUVMode(it, best_mode);
1131 } 1210 }
1211 nz |= ReconstructUV(it, rd, it->yuv_out_ + U_OFF_ENC, it->mb_->uv_mode_);
1212
1213 rd->nz = nz;
1214 rd->score = best_score;
1132 } 1215 }
1133 1216
1134 //------------------------------------------------------------------------------ 1217 //------------------------------------------------------------------------------
1135 // Entry point 1218 // Entry point
1136 1219
1137 int VP8Decimate(VP8EncIterator* const it, VP8ModeScore* const rd, 1220 int VP8Decimate(VP8EncIterator* const it, VP8ModeScore* const rd,
1138 VP8RDLevel rd_opt) { 1221 VP8RDLevel rd_opt) {
1139 int is_skipped; 1222 int is_skipped;
1140 const int method = it->enc_->method_; 1223 const int method = it->enc_->method_;
1141 1224
1142 InitScore(rd); 1225 InitScore(rd);
1143 1226
1144 // We can perform predictions for Luma16x16 and Chroma8x8 already. 1227 // We can perform predictions for Luma16x16 and Chroma8x8 already.
1145 // Luma4x4 predictions needs to be done as-we-go. 1228 // Luma4x4 predictions needs to be done as-we-go.
1146 VP8MakeLuma16Preds(it); 1229 VP8MakeLuma16Preds(it);
1147 VP8MakeChroma8Preds(it); 1230 VP8MakeChroma8Preds(it);
1148 1231
1149 if (rd_opt > RD_OPT_NONE) { 1232 if (rd_opt > RD_OPT_NONE) {
1150 it->do_trellis_ = (rd_opt >= RD_OPT_TRELLIS_ALL); 1233 it->do_trellis_ = (rd_opt >= RD_OPT_TRELLIS_ALL);
1151 PickBestIntra16(it, rd); 1234 PickBestIntra16(it, rd);
1152 if (method >= 2) { 1235 if (method >= 2) {
1153 PickBestIntra4(it, rd); 1236 PickBestIntra4(it, rd);
1154 } 1237 }
1155 PickBestUV(it, rd); 1238 PickBestUV(it, rd);
1156 if (rd_opt == RD_OPT_TRELLIS) { // finish off with trellis-optim now 1239 if (rd_opt == RD_OPT_TRELLIS) { // finish off with trellis-optim now
1157 it->do_trellis_ = 1; 1240 it->do_trellis_ = 1;
1158 SimpleQuantize(it, rd); 1241 SimpleQuantize(it, rd);
1159 } 1242 }
1160 } else { 1243 } else {
1161 // For method == 2, pick the best intra4/intra16 based on SSE (~tad slower). 1244 // At this point we have heuristically decided intra16 / intra4.
1162 // For method <= 1, we refine intra4 or intra16 (but don't re-examine mode). 1245 // For method >= 2, pick the best intra4/intra16 based on SSE (~tad slower).
1163 DistoRefine(it, (method >= 2)); 1246 // For method <= 1, we don't re-examine the decision but just go ahead with
1164 SimpleQuantize(it, rd); 1247 // quantization/reconstruction.
1248 RefineUsingDistortion(it, (method >= 2), (method >= 1), rd);
1165 } 1249 }
1166 is_skipped = (rd->nz == 0); 1250 is_skipped = (rd->nz == 0);
1167 VP8SetSkip(it, is_skipped); 1251 VP8SetSkip(it, is_skipped);
1168 return is_skipped; 1252 return is_skipped;
1169 } 1253 }
1170
OLDNEW

Powered by Google App Engine
This is Rietveld 408576698