third_party/libwebp/enc/quant.c - Issue 1546003002: libwebp: update to 0.5.0

Side by Side Diff: third_party/libwebp/enc/quant.c

Issue 1546003002: libwebp: update to 0.5.0 (Closed) Base URL: https://chromium.googlesource.com/chromium/src.git@master

Patch Set: rebase around clang-cl fix Created 4 years, 12 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View unified diff | Download patch

OLD	NEW
1 // Copyright 2011 Google Inc. All Rights Reserved.	1 // Copyright 2011 Google Inc. All Rights Reserved.

2 //	2 //

3 // Use of this source code is governed by a BSD-style license	3 // Use of this source code is governed by a BSD-style license

4 // that can be found in the COPYING file in the root of the source	4 // that can be found in the COPYING file in the root of the source

5 // tree. An additional intellectual property rights grant can be found	5 // tree. An additional intellectual property rights grant can be found

6 // in the file PATENTS. All contributing project authors may	6 // in the file PATENTS. All contributing project authors may

7 // be found in the AUTHORS file in the root of the source tree.	7 // be found in the AUTHORS file in the root of the source tree.

8 // -----------------------------------------------------------------------------	8 // -----------------------------------------------------------------------------

9 //	9 //

10 // Quantization	10 // Quantization

(...skipping 12 matching lines...) Expand all Loading...
23 #define DO_TRELLIS_UV 0 // disable trellis for UV. Risky. Not worth.	23 #define DO_TRELLIS_UV 0 // disable trellis for UV. Risky. Not worth.

24 #define USE_TDISTO 1	24 #define USE_TDISTO 1

25	25

26 #define MID_ALPHA 64 // neutral value for susceptibility	26 #define MID_ALPHA 64 // neutral value for susceptibility

27 #define MIN_ALPHA 30 // lowest usable value for susceptibility	27 #define MIN_ALPHA 30 // lowest usable value for susceptibility

28 #define MAX_ALPHA 100 // higher meaningful value for susceptibility	28 #define MAX_ALPHA 100 // higher meaningful value for susceptibility

29	29

30 #define SNS_TO_DQ 0.9 // Scaling constant between the sns value and the QP	30 #define SNS_TO_DQ 0.9 // Scaling constant between the sns value and the QP

31 // power-law modulation. Must be strictly less than 1.	31 // power-law modulation. Must be strictly less than 1.

32	32

33 #define I4_PENALTY 4000 // Rate-penalty for quick i4/i16 decision	33 #define I4_PENALTY 14000 // Rate-penalty for quick i4/i16 decision

34	34

35 // number of non-zero coeffs below which we consider the block very flat	35 // number of non-zero coeffs below which we consider the block very flat

36 // (and apply a penalty to complex predictions)	36 // (and apply a penalty to complex predictions)

37 #define FLATNESS_LIMIT_I16 10 // I16 mode	37 #define FLATNESS_LIMIT_I16 10 // I16 mode

38 #define FLATNESS_LIMIT_I4 3 // I4 mode	38 #define FLATNESS_LIMIT_I4 3 // I4 mode

39 #define FLATNESS_LIMIT_UV 2 // UV mode	39 #define FLATNESS_LIMIT_UV 2 // UV mode

40 #define FLATNESS_PENALTY 140 // roughly ~1bit per block	40 #define FLATNESS_PENALTY 140 // roughly ~1bit per block

41	41

42 #define MULT_8B(a, b) (((a) * (b) + 128) >> 8)	42 #define MULT_8B(a, b) (((a) * (b) + 128) >> 8)

43	43

	44 #define RD_DISTO_MULT 256 // distortion multiplier (equivalent of lambda)

	45

44 // #define DEBUG_BLOCK	46 // #define DEBUG_BLOCK

45	47

46 //------------------------------------------------------------------------------	48 //------------------------------------------------------------------------------

47	49

48 #if defined(DEBUG_BLOCK)	50 #if defined(DEBUG_BLOCK)

49	51

50 #include <stdio.h>	52 #include <stdio.h>

51 #include <stdlib.h>	53 #include <stdlib.h>

52	54

53 static void PrintBlockInfo(const VP8EncIterator* const it,	55 static void PrintBlockInfo(const VP8EncIterator* const it,

54 const VP8ModeScore* const rd) {	56 const VP8ModeScore* const rd) {

55 int i, j;	57 int i, j;

56 const int is_i16 = (it->mb_->type_ == 1);	58 const int is_i16 = (it->mb_->type_ == 1);

	59 const uint8_t* const y_in = it->yuv_in_ + Y_OFF_ENC;

	60 const uint8_t* const y_out = it->yuv_out_ + Y_OFF_ENC;

	61 const uint8_t* const uv_in = it->yuv_in_ + U_OFF_ENC;

	62 const uint8_t* const uv_out = it->yuv_out_ + U_OFF_ENC;

57 printf("SOURCE / OUTPUT / ABS DELTA\n");	63 printf("SOURCE / OUTPUT / ABS DELTA\n");

58 for (j = 0; j < 24; ++j) {	64 for (j = 0; j < 16; ++j) {

59 if (j == 16) printf("\n"); // newline before the U/V block	65 for (i = 0; i < 16; ++i) printf("%3d ", y_in[i + j * BPS]);

60 for (i = 0; i < 16; ++i) printf("%3d ", it->yuv_in_[i + j * BPS]);

61 printf(" ");	66 printf(" ");

62 for (i = 0; i < 16; ++i) printf("%3d ", it->yuv_out_[i + j * BPS]);	67 for (i = 0; i < 16; ++i) printf("%3d ", y_out[i + j * BPS]);

63 printf(" ");	68 printf(" ");

64 for (i = 0; i < 16; ++i) {	69 for (i = 0; i < 16; ++i) {

65 printf("%1d ", abs(it->yuv_out_[i + j * BPS] - it->yuv_in_[i + j * BPS]));	70 printf("%1d ", abs(y_in[i + j * BPS] - y_out[i + j * BPS]));

66 }	71 }

67 printf("\n");	72 printf("\n");

68 }	73 }

	74 printf("\n"); // newline before the U/V block

	75 for (j = 0; j < 8; ++j) {

	76 for (i = 0; i < 8; ++i) printf("%3d ", uv_in[i + j * BPS]);

	77 printf(" ");

	78 for (i = 8; i < 16; ++i) printf("%3d ", uv_in[i + j * BPS]);

	79 printf(" ");

	80 for (i = 0; i < 8; ++i) printf("%3d ", uv_out[i + j * BPS]);

	81 printf(" ");

	82 for (i = 8; i < 16; ++i) printf("%3d ", uv_out[i + j * BPS]);

	83 printf(" ");

	84 for (i = 0; i < 8; ++i) {

	85 printf("%1d ", abs(uv_out[i + j * BPS] - uv_in[i + j * BPS]));

	86 }

	87 printf(" ");

	88 for (i = 8; i < 16; ++i) {

	89 printf("%1d ", abs(uv_out[i + j * BPS] - uv_in[i + j * BPS]));

	90 }

	91 printf("\n");

	92 }

69 printf("\nD:%d SD:%d R:%d H:%d nz:0x%x score:%d\n",	93 printf("\nD:%d SD:%d R:%d H:%d nz:0x%x score:%d\n",

70 (int)rd->D, (int)rd->SD, (int)rd->R, (int)rd->H, (int)rd->nz,	94 (int)rd->D, (int)rd->SD, (int)rd->R, (int)rd->H, (int)rd->nz,

71 (int)rd->score);	95 (int)rd->score);

72 if (is_i16) {	96 if (is_i16) {

73 printf("Mode: %d\n", rd->mode_i16);	97 printf("Mode: %d\n", rd->mode_i16);

74 printf("y_dc_levels:");	98 printf("y_dc_levels:");

75 for (i = 0; i < 16; ++i) printf("%3d ", rd->y_dc_levels[i]);	99 for (i = 0; i < 16; ++i) printf("%3d ", rd->y_dc_levels[i]);

76 printf("\n");	100 printf("\n");

77 } else {	101 } else {

78 printf("Modes[16]: ");	102 printf("Modes[16]: ");

(...skipping 358 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
437 }	461 }

438	462

439 void VP8MakeIntra4Preds(const VP8EncIterator* const it) {	463 void VP8MakeIntra4Preds(const VP8EncIterator* const it) {

440 VP8EncPredLuma4(it->yuv_p_, it->i4_top_);	464 VP8EncPredLuma4(it->yuv_p_, it->i4_top_);

441 }	465 }

442	466

443 //------------------------------------------------------------------------------	467 //------------------------------------------------------------------------------

444 // Quantize	468 // Quantize

445	469

446 // Layout:	470 // Layout:

447 // +----+	471 // +----+----+

448 // \|YYYY\| 0	472 // \|YYYY\|UUVV\| 0

449 // \|YYYY\| 4	473 // \|YYYY\|UUVV\| 4

450 // \|YYYY\| 8	474 // \|YYYY\|....\| 8

451 // \|YYYY\| 12	475 // \|YYYY\|....\| 12

452 // +----+	476 // +----+----+

453 // \|UUVV\| 16

454 // \|UUVV\| 20

455 // +----+

456	477

457 const int VP8Scan[16] = { // Luma	478 const int VP8Scan[16] = { // Luma

458 0 + 0 * BPS, 4 + 0 * BPS, 8 + 0 * BPS, 12 + 0 * BPS,	479 0 + 0 * BPS, 4 + 0 * BPS, 8 + 0 * BPS, 12 + 0 * BPS,

459 0 + 4 * BPS, 4 + 4 * BPS, 8 + 4 * BPS, 12 + 4 * BPS,	480 0 + 4 * BPS, 4 + 4 * BPS, 8 + 4 * BPS, 12 + 4 * BPS,

460 0 + 8 * BPS, 4 + 8 * BPS, 8 + 8 * BPS, 12 + 8 * BPS,	481 0 + 8 * BPS, 4 + 8 * BPS, 8 + 8 * BPS, 12 + 8 * BPS,

461 0 + 12 * BPS, 4 + 12 * BPS, 8 + 12 * BPS, 12 + 12 * BPS,	482 0 + 12 * BPS, 4 + 12 * BPS, 8 + 12 * BPS, 12 + 12 * BPS,

462 };	483 };

463	484

464 static const int VP8ScanUV[4 + 4] = {	485 static const int VP8ScanUV[4 + 4] = {

465 0 + 0 * BPS, 4 + 0 * BPS, 0 + 4 * BPS, 4 + 4 * BPS, // U	486 0 + 0 * BPS, 4 + 0 * BPS, 0 + 4 * BPS, 4 + 4 * BPS, // U

(...skipping 65 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
531 // If a coefficient was quantized to a value Q (using a neutral bias),	552 // If a coefficient was quantized to a value Q (using a neutral bias),

532 // we test all alternate possibilities between [Q-MIN_DELTA, Q+MAX_DELTA]	553 // we test all alternate possibilities between [Q-MIN_DELTA, Q+MAX_DELTA]

533 // We don't test negative values though.	554 // We don't test negative values though.

534 #define MIN_DELTA 0 // how much lower level to try	555 #define MIN_DELTA 0 // how much lower level to try

535 #define MAX_DELTA 1 // how much higher	556 #define MAX_DELTA 1 // how much higher

536 #define NUM_NODES (MIN_DELTA + 1 + MAX_DELTA)	557 #define NUM_NODES (MIN_DELTA + 1 + MAX_DELTA)

537 #define NODE(n, l) (nodes[(n)][(l) + MIN_DELTA])	558 #define NODE(n, l) (nodes[(n)][(l) + MIN_DELTA])

538 #define SCORE_STATE(n, l) (score_states[n][(l) + MIN_DELTA])	559 #define SCORE_STATE(n, l) (score_states[n][(l) + MIN_DELTA])

539	560

540 static WEBP_INLINE void SetRDScore(int lambda, VP8ModeScore* const rd) {	561 static WEBP_INLINE void SetRDScore(int lambda, VP8ModeScore* const rd) {

541 // TODO: incorporate the "* 256" in the tables?	562 rd->score = (rd->R + rd->H) * lambda + RD_DISTO_MULT * (rd->D + rd->SD);

542 rd->score = (rd->R + rd->H) * lambda + 256 * (rd->D + rd->SD);

543 }	563 }

544	564

545 static WEBP_INLINE score_t RDScoreTrellis(int lambda, score_t rate,	565 static WEBP_INLINE score_t RDScoreTrellis(int lambda, score_t rate,

546 score_t distortion) {	566 score_t distortion) {

547 return rate * lambda + 256 * distortion;	567 return rate * lambda + RD_DISTO_MULT * distortion;

548 }	568 }

549	569

550 static int TrellisQuantizeBlock(const VP8Encoder* const enc,	570 static int TrellisQuantizeBlock(const VP8Encoder* const enc,

551 int16_t in[16], int16_t out[16],	571 int16_t in[16], int16_t out[16],

552 int ctx0, int coeff_type,	572 int ctx0, int coeff_type,

553 const VP8Matrix* const mtx,	573 const VP8Matrix* const mtx,

554 int lambda) {	574 int lambda) {

555 const ProbaArray* const probas = enc->proba_.coeffs_[coeff_type];	575 const ProbaArray* const probas = enc->proba_.coeffs_[coeff_type];

556 const CostArray* const costs = enc->proba_.level_cost_[coeff_type];	576 CostArrayPtr const costs =

	577 (CostArrayPtr)enc->proba_.remapped_costs_[coeff_type];

557 const int first = (coeff_type == 0) ? 1 : 0;	578 const int first = (coeff_type == 0) ? 1 : 0;

558 Node nodes[16][NUM_NODES];	579 Node nodes[16][NUM_NODES];

559 ScoreState score_states[2][NUM_NODES];	580 ScoreState score_states[2][NUM_NODES];

560 ScoreState* ss_cur = &SCORE_STATE(0, MIN_DELTA);	581 ScoreState* ss_cur = &SCORE_STATE(0, MIN_DELTA);

561 ScoreState* ss_prev = &SCORE_STATE(1, MIN_DELTA);	582 ScoreState* ss_prev = &SCORE_STATE(1, MIN_DELTA);

562 int best_path[3] = {-1, -1, -1}; // store best-last/best-level/best-previous	583 int best_path[3] = {-1, -1, -1}; // store best-last/best-level/best-previous

563 score_t best_score;	584 score_t best_score;

564 int n, m, p, last;	585 int n, m, p, last;

565	586

566 {	587 {

(...skipping 16 matching lines...) Expand all Loading...
583 if (last < 15) ++last;	604 if (last < 15) ++last;

584	605

585 // compute 'skip' score. This is the max score one can do.	606 // compute 'skip' score. This is the max score one can do.

586 cost = VP8BitCost(0, last_proba);	607 cost = VP8BitCost(0, last_proba);

587 best_score = RDScoreTrellis(lambda, cost, 0);	608 best_score = RDScoreTrellis(lambda, cost, 0);

588	609

589 // initialize source node.	610 // initialize source node.

590 for (m = -MIN_DELTA; m <= MAX_DELTA; ++m) {	611 for (m = -MIN_DELTA; m <= MAX_DELTA; ++m) {

591 const score_t rate = (ctx0 == 0) ? VP8BitCost(1, last_proba) : 0;	612 const score_t rate = (ctx0 == 0) ? VP8BitCost(1, last_proba) : 0;

592 ss_cur[m].score = RDScoreTrellis(lambda, rate, 0);	613 ss_cur[m].score = RDScoreTrellis(lambda, rate, 0);

593 ss_cur[m].costs = costs[VP8EncBands[first]][ctx0];	614 ss_cur[m].costs = costs[first][ctx0];

594 }	615 }

595 }	616 }

596	617

597 // traverse trellis.	618 // traverse trellis.

598 for (n = first; n <= last; ++n) {	619 for (n = first; n <= last; ++n) {

599 const int j = kZigzag[n];	620 const int j = kZigzag[n];

600 const uint32_t Q = mtx->q_[j];	621 const uint32_t Q = mtx->q_[j];

601 const uint32_t iQ = mtx->iq_[j];	622 const uint32_t iQ = mtx->iq_[j];

602 const uint32_t B = BIAS(0x00); // neutral bias	623 const uint32_t B = BIAS(0x00); // neutral bias

603 // note: it's important to take sign of the _original_ coeff,	624 // note: it's important to take sign of the _original_ coeff,

(...skipping 13 matching lines...) Expand all Loading...
617 for (m = -MIN_DELTA; m <= MAX_DELTA; ++m) {	638 for (m = -MIN_DELTA; m <= MAX_DELTA; ++m) {

618 Node* const cur = &NODE(n, m);	639 Node* const cur = &NODE(n, m);

619 int level = level0 + m;	640 int level = level0 + m;

620 const int ctx = (level > 2) ? 2 : level;	641 const int ctx = (level > 2) ? 2 : level;

621 const int band = VP8EncBands[n + 1];	642 const int band = VP8EncBands[n + 1];

622 score_t base_score, last_pos_score;	643 score_t base_score, last_pos_score;

623 score_t best_cur_score = MAX_COST;	644 score_t best_cur_score = MAX_COST;

624 int best_prev = 0; // default, in case	645 int best_prev = 0; // default, in case

625	646

626 ss_cur[m].score = MAX_COST;	647 ss_cur[m].score = MAX_COST;

627 ss_cur[m].costs = costs[band][ctx];	648 ss_cur[m].costs = costs[n + 1][ctx];

628 if (level > MAX_LEVEL \|\| level < 0) { // node is dead?	649 if (level > MAX_LEVEL \|\| level < 0) { // node is dead?

629 continue;	650 continue;

630 }	651 }

631	652

632 // Compute extra rate cost if last coeff's position is < 15	653 // Compute extra rate cost if last coeff's position is < 15

633 {	654 {

634 const score_t last_pos_cost =	655 const score_t last_pos_cost =

635 (n < 15) ? VP8BitCost(0, probas[band][ctx][0]) : 0;	656 (n < 15) ? VP8BitCost(0, probas[band][ctx][0]) : 0;

636 last_pos_score = RDScoreTrellis(lambda, last_pos_cost, 0);	657 last_pos_score = RDScoreTrellis(lambda, last_pos_cost, 0);

637 }	658 }

(...skipping 74 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
712 // Performs: difference, transform, quantize, back-transform, add	733 // Performs: difference, transform, quantize, back-transform, add

713 // all at once. Output is the reconstructed block in *yuv_out, and the	734 // all at once. Output is the reconstructed block in *yuv_out, and the

714 // quantized levels in *levels.	735 // quantized levels in *levels.

715	736

716 static int ReconstructIntra16(VP8EncIterator* const it,	737 static int ReconstructIntra16(VP8EncIterator* const it,

717 VP8ModeScore* const rd,	738 VP8ModeScore* const rd,

718 uint8_t* const yuv_out,	739 uint8_t* const yuv_out,

719 int mode) {	740 int mode) {

720 const VP8Encoder* const enc = it->enc_;	741 const VP8Encoder* const enc = it->enc_;

721 const uint8_t* const ref = it->yuv_p_ + VP8I16ModeOffsets[mode];	742 const uint8_t* const ref = it->yuv_p_ + VP8I16ModeOffsets[mode];

722 const uint8_t* const src = it->yuv_in_ + Y_OFF;	743 const uint8_t* const src = it->yuv_in_ + Y_OFF_ENC;

723 const VP8SegmentInfo* const dqm = &enc->dqm_[it->mb_->segment_];	744 const VP8SegmentInfo* const dqm = &enc->dqm_[it->mb_->segment_];

724 int nz = 0;	745 int nz = 0;

725 int n;	746 int n;

726 int16_t tmp[16][16], dc_tmp[16];	747 int16_t tmp[16][16], dc_tmp[16];

727	748

728 for (n = 0; n < 16; ++n) {	749 for (n = 0; n < 16; n += 2) {

729 VP8FTransform(src + VP8Scan[n], ref + VP8Scan[n], tmp[n]);	750 VP8FTransform2(src + VP8Scan[n], ref + VP8Scan[n], tmp[n]);

730 }	751 }

731 VP8FTransformWHT(tmp[0], dc_tmp);	752 VP8FTransformWHT(tmp[0], dc_tmp);

732 nz \|= VP8EncQuantizeBlockWHT(dc_tmp, rd->y_dc_levels, &dqm->y2_) << 24;	753 nz \|= VP8EncQuantizeBlockWHT(dc_tmp, rd->y_dc_levels, &dqm->y2_) << 24;

733	754

734 if (DO_TRELLIS_I16 && it->do_trellis_) {	755 if (DO_TRELLIS_I16 && it->do_trellis_) {

735 int x, y;	756 int x, y;

736 VP8IteratorNzToBytes(it);	757 VP8IteratorNzToBytes(it);

737 for (y = 0, n = 0; y < 4; ++y) {	758 for (y = 0, n = 0; y < 4; ++y) {

738 for (x = 0; x < 4; ++x, ++n) {	759 for (x = 0; x < 4; ++x, ++n) {

739 const int ctx = it->top_nz_[x] + it->left_nz_[y];	760 const int ctx = it->top_nz_[x] + it->left_nz_[y];

740 const int non_zero =	761 const int non_zero =

741 TrellisQuantizeBlock(enc, tmp[n], rd->y_ac_levels[n], ctx, 0,	762 TrellisQuantizeBlock(enc, tmp[n], rd->y_ac_levels[n], ctx, 0,

742 &dqm->y1_, dqm->lambda_trellis_i16_);	763 &dqm->y1_, dqm->lambda_trellis_i16_);

743 it->top_nz_[x] = it->left_nz_[y] = non_zero;	764 it->top_nz_[x] = it->left_nz_[y] = non_zero;

744 rd->y_ac_levels[n][0] = 0;	765 rd->y_ac_levels[n][0] = 0;

745 nz \|= non_zero << n;	766 nz \|= non_zero << n;

746 }	767 }

747 }	768 }

748 } else {	769 } else {

749 for (n = 0; n < 16; ++n) {	770 for (n = 0; n < 16; n += 2) {

750 // Zero-out the first coeff, so that: a) nz is correct below, and	771 // Zero-out the first coeff, so that: a) nz is correct below, and

751 // b) finding 'last' non-zero coeffs in SetResidualCoeffs() is simplified.	772 // b) finding 'last' non-zero coeffs in SetResidualCoeffs() is simplified.

752 tmp[n][0] = 0;	773 tmp[n][0] = tmp[n + 1][0] = 0;

753 nz \|= VP8EncQuantizeBlock(tmp[n], rd->y_ac_levels[n], &dqm->y1_) << n;	774 nz \|= VP8EncQuantize2Blocks(tmp[n], rd->y_ac_levels[n], &dqm->y1_) << n;

754 assert(rd->y_ac_levels[n][0] == 0);	775 assert(rd->y_ac_levels[n + 0][0] == 0);

	776 assert(rd->y_ac_levels[n + 1][0] == 0);

755 }	777 }

756 }	778 }

757	779

758 // Transform back	780 // Transform back

759 VP8TransformWHT(dc_tmp, tmp[0]);	781 VP8TransformWHT(dc_tmp, tmp[0]);

760 for (n = 0; n < 16; n += 2) {	782 for (n = 0; n < 16; n += 2) {

761 VP8ITransform(ref + VP8Scan[n], tmp[n], yuv_out + VP8Scan[n], 1);	783 VP8ITransform(ref + VP8Scan[n], tmp[n], yuv_out + VP8Scan[n], 1);

762 }	784 }

763	785

764 return nz;	786 return nz;

(...skipping 20 matching lines...) Expand all Loading...
785 nz = VP8EncQuantizeBlock(tmp, levels, &dqm->y1_);	807 nz = VP8EncQuantizeBlock(tmp, levels, &dqm->y1_);

786 }	808 }

787 VP8ITransform(ref, tmp, yuv_out, 0);	809 VP8ITransform(ref, tmp, yuv_out, 0);

788 return nz;	810 return nz;

789 }	811 }

790	812

791 static int ReconstructUV(VP8EncIterator* const it, VP8ModeScore* const rd,	813 static int ReconstructUV(VP8EncIterator* const it, VP8ModeScore* const rd,

792 uint8_t* const yuv_out, int mode) {	814 uint8_t* const yuv_out, int mode) {

793 const VP8Encoder* const enc = it->enc_;	815 const VP8Encoder* const enc = it->enc_;

794 const uint8_t* const ref = it->yuv_p_ + VP8UVModeOffsets[mode];	816 const uint8_t* const ref = it->yuv_p_ + VP8UVModeOffsets[mode];

795 const uint8_t* const src = it->yuv_in_ + U_OFF;	817 const uint8_t* const src = it->yuv_in_ + U_OFF_ENC;

796 const VP8SegmentInfo* const dqm = &enc->dqm_[it->mb_->segment_];	818 const VP8SegmentInfo* const dqm = &enc->dqm_[it->mb_->segment_];

797 int nz = 0;	819 int nz = 0;

798 int n;	820 int n;

799 int16_t tmp[8][16];	821 int16_t tmp[8][16];

800	822

801 for (n = 0; n < 8; ++n) {	823 for (n = 0; n < 8; n += 2) {

802 VP8FTransform(src + VP8ScanUV[n], ref + VP8ScanUV[n], tmp[n]);	824 VP8FTransform2(src + VP8ScanUV[n], ref + VP8ScanUV[n], tmp[n]);

803 }	825 }

804 if (DO_TRELLIS_UV && it->do_trellis_) {	826 if (DO_TRELLIS_UV && it->do_trellis_) {

805 int ch, x, y;	827 int ch, x, y;

806 for (ch = 0, n = 0; ch <= 2; ch += 2) {	828 for (ch = 0, n = 0; ch <= 2; ch += 2) {

807 for (y = 0; y < 2; ++y) {	829 for (y = 0; y < 2; ++y) {

808 for (x = 0; x < 2; ++x, ++n) {	830 for (x = 0; x < 2; ++x, ++n) {

809 const int ctx = it->top_nz_[4 + ch + x] + it->left_nz_[4 + ch + y];	831 const int ctx = it->top_nz_[4 + ch + x] + it->left_nz_[4 + ch + y];

810 const int non_zero =	832 const int non_zero =

811 TrellisQuantizeBlock(enc, tmp[n], rd->uv_levels[n], ctx, 2,	833 TrellisQuantizeBlock(enc, tmp[n], rd->uv_levels[n], ctx, 2,

812 &dqm->uv_, dqm->lambda_trellis_uv_);	834 &dqm->uv_, dqm->lambda_trellis_uv_);

813 it->top_nz_[4 + ch + x] = it->left_nz_[4 + ch + y] = non_zero;	835 it->top_nz_[4 + ch + x] = it->left_nz_[4 + ch + y] = non_zero;

814 nz \|= non_zero << n;	836 nz \|= non_zero << n;

815 }	837 }

816 }	838 }

817 }	839 }

818 } else {	840 } else {

819 for (n = 0; n < 8; ++n) {	841 for (n = 0; n < 8; n += 2) {

820 nz \|= VP8EncQuantizeBlock(tmp[n], rd->uv_levels[n], &dqm->uv_) << n;	842 nz \|= VP8EncQuantize2Blocks(tmp[n], rd->uv_levels[n], &dqm->uv_) << n;

821 }	843 }

822 }	844 }

823	845

824 for (n = 0; n < 8; n += 2) {	846 for (n = 0; n < 8; n += 2) {

825 VP8ITransform(ref + VP8ScanUV[n], tmp[n], yuv_out + VP8ScanUV[n], 1);	847 VP8ITransform(ref + VP8ScanUV[n], tmp[n], yuv_out + VP8ScanUV[n], 1);

826 }	848 }

827 return (nz << 16);	849 return (nz << 16);

828 }	850 }

829	851

830 //------------------------------------------------------------------------------	852 //------------------------------------------------------------------------------

831 // RD-opt decision. Reconstruct each modes, evalue distortion and bit-cost.	853 // RD-opt decision. Reconstruct each modes, evalue distortion and bit-cost.

832 // Pick the mode is lower RD-cost = Rate + lambda * Distortion.	854 // Pick the mode is lower RD-cost = Rate + lambda * Distortion.

833	855

834 static void StoreMaxDelta(VP8SegmentInfo* const dqm, const int16_t DCs[16]) {	856 static void StoreMaxDelta(VP8SegmentInfo* const dqm, const int16_t DCs[16]) {

835 // We look at the first three AC coefficients to determine what is the average	857 // We look at the first three AC coefficients to determine what is the average

836 // delta between each sub-4x4 block.	858 // delta between each sub-4x4 block.

837 const int v0 = abs(DCs[1]);	859 const int v0 = abs(DCs[1]);

838 const int v1 = abs(DCs[4]);	860 const int v1 = abs(DCs[4]);

839 const int v2 = abs(DCs[5]);	861 const int v2 = abs(DCs[5]);

840 int max_v = (v0 > v1) ? v1 : v0;	862 int max_v = (v0 > v1) ? v1 : v0;

841 max_v = (v2 > max_v) ? v2 : max_v;	863 max_v = (v2 > max_v) ? v2 : max_v;

842 if (max_v > dqm->max_edge_) dqm->max_edge_ = max_v;	864 if (max_v > dqm->max_edge_) dqm->max_edge_ = max_v;

843 }	865 }

844	866

	867 static void SwapModeScore(VP8ModeScore a, VP8ModeScore b) {

	868 VP8ModeScore* const tmp = *a;

	869 a = b;

	870 *b = tmp;

	871 }

	872

845 static void SwapPtr(uint8_t a, uint8_t b) {	873 static void SwapPtr(uint8_t a, uint8_t b) {

846 uint8_t* const tmp = *a;	874 uint8_t* const tmp = *a;

847 a = b;	875 a = b;

848 *b = tmp;	876 *b = tmp;

849 }	877 }

850	878

851 static void SwapOut(VP8EncIterator* const it) {	879 static void SwapOut(VP8EncIterator* const it) {

852 SwapPtr(&it->yuv_out_, &it->yuv_out2_);	880 SwapPtr(&it->yuv_out_, &it->yuv_out2_);

853 }	881 }

854	882

855 static score_t IsFlat(const int16_t* levels, int num_blocks, score_t thresh) {	883 static score_t IsFlat(const int16_t* levels, int num_blocks, score_t thresh) {

856 score_t score = 0;	884 score_t score = 0;

857 while (num_blocks-- > 0) { // TODO(skal): refine positional scoring?	885 while (num_blocks-- > 0) { // TODO(skal): refine positional scoring?

858 int i;	886 int i;

859 for (i = 1; i < 16; ++i) { // omit DC, we're only interested in AC	887 for (i = 1; i < 16; ++i) { // omit DC, we're only interested in AC

860 score += (levels[i] != 0);	888 score += (levels[i] != 0);

861 if (score > thresh) return 0;	889 if (score > thresh) return 0;

862 }	890 }

863 levels += 16;	891 levels += 16;

864 }	892 }

865 return 1;	893 return 1;

866 }	894 }

867	895

868 static void PickBestIntra16(VP8EncIterator* const it, VP8ModeScore* const rd) {	896 static void PickBestIntra16(VP8EncIterator* const it, VP8ModeScore* rd) {

869 const int kNumBlocks = 16;	897 const int kNumBlocks = 16;

870 VP8SegmentInfo* const dqm = &it->enc_->dqm_[it->mb_->segment_];	898 VP8SegmentInfo* const dqm = &it->enc_->dqm_[it->mb_->segment_];

871 const int lambda = dqm->lambda_i16_;	899 const int lambda = dqm->lambda_i16_;

872 const int tlambda = dqm->tlambda_;	900 const int tlambda = dqm->tlambda_;

873 const uint8_t* const src = it->yuv_in_ + Y_OFF;	901 const uint8_t* const src = it->yuv_in_ + Y_OFF_ENC;

874 VP8ModeScore rd16;	902 VP8ModeScore rd_tmp;

	903 VP8ModeScore* rd_cur = &rd_tmp;

	904 VP8ModeScore* rd_best = rd;

875 int mode;	905 int mode;

876	906

877 rd->mode_i16 = -1;	907 rd->mode_i16 = -1;

878 for (mode = 0; mode < NUM_PRED_MODES; ++mode) {	908 for (mode = 0; mode < NUM_PRED_MODES; ++mode) {

879 uint8_t* const tmp_dst = it->yuv_out2_ + Y_OFF; // scratch buffer	909 uint8_t* const tmp_dst = it->yuv_out2_ + Y_OFF_ENC; // scratch buffer

880 int nz;	910 rd_cur->mode_i16 = mode;

881	911

882 // Reconstruct	912 // Reconstruct

883 nz = ReconstructIntra16(it, &rd16, tmp_dst, mode);	913 rd_cur->nz = ReconstructIntra16(it, rd_cur, tmp_dst, mode);

884	914

885 // Measure RD-score	915 // Measure RD-score

886 rd16.D = VP8SSE16x16(src, tmp_dst);	916 rd_cur->D = VP8SSE16x16(src, tmp_dst);

887 rd16.SD = tlambda ? MULT_8B(tlambda, VP8TDisto16x16(src, tmp_dst, kWeightY))	917 rd_cur->SD =

888 : 0;	918 tlambda ? MULT_8B(tlambda, VP8TDisto16x16(src, tmp_dst, kWeightY)) : 0;

889 rd16.H = VP8FixedCostsI16[mode];	919 rd_cur->H = VP8FixedCostsI16[mode];

890 rd16.R = VP8GetCostLuma16(it, &rd16);	920 rd_cur->R = VP8GetCostLuma16(it, rd_cur);

891 if (mode > 0 &&	921 if (mode > 0 &&

892 IsFlat(rd16.y_ac_levels[0], kNumBlocks, FLATNESS_LIMIT_I16)) {	922 IsFlat(rd_cur->y_ac_levels[0], kNumBlocks, FLATNESS_LIMIT_I16)) {

893 // penalty to avoid flat area to be mispredicted by complex mode	923 // penalty to avoid flat area to be mispredicted by complex mode

894 rd16.R += FLATNESS_PENALTY * kNumBlocks;	924 rd_cur->R += FLATNESS_PENALTY * kNumBlocks;

895 }	925 }

896	926

897 // Since we always examine Intra16 first, we can overwrite *rd directly.	927 // Since we always examine Intra16 first, we can overwrite *rd directly.

898 SetRDScore(lambda, &rd16);	928 SetRDScore(lambda, rd_cur);

899 if (mode == 0 \|\| rd16.score < rd->score) {	929 if (mode == 0 \|\| rd_cur->score < rd_best->score) {

900 CopyScore(rd, &rd16);	930 SwapModeScore(&rd_cur, &rd_best);

901 rd->mode_i16 = mode;

902 rd->nz = nz;

903 memcpy(rd->y_ac_levels, rd16.y_ac_levels, sizeof(rd16.y_ac_levels));

904 memcpy(rd->y_dc_levels, rd16.y_dc_levels, sizeof(rd16.y_dc_levels));

905 SwapOut(it);	931 SwapOut(it);

906 }	932 }

907 }	933 }

	934 if (rd_best != rd) {

	935 memcpy(rd, rd_best, sizeof(*rd));

	936 }

908 SetRDScore(dqm->lambda_mode_, rd); // finalize score for mode decision.	937 SetRDScore(dqm->lambda_mode_, rd); // finalize score for mode decision.

909 VP8SetIntra16Mode(it, rd->mode_i16);	938 VP8SetIntra16Mode(it, rd->mode_i16);

910	939

911 // we have a blocky macroblock (only DCs are non-zero) with fairly high	940 // we have a blocky macroblock (only DCs are non-zero) with fairly high

912 // distortion, record max delta so we can later adjust the minimal filtering	941 // distortion, record max delta so we can later adjust the minimal filtering

913 // strength needed to smooth these blocks out.	942 // strength needed to smooth these blocks out.

914 if ((rd->nz & 0xffff) == 0 && rd->D > dqm->min_disto_) {	943 if ((rd->nz & 0xffff) == 0 && rd->D > dqm->min_disto_) {

915 StoreMaxDelta(dqm, rd->y_dc_levels);	944 StoreMaxDelta(dqm, rd->y_dc_levels);

916 }	945 }

917 }	946 }

918	947

919 //------------------------------------------------------------------------------	948 //------------------------------------------------------------------------------

920	949

921 // return the cost array corresponding to the surrounding prediction modes.	950 // return the cost array corresponding to the surrounding prediction modes.

922 static const uint16_t* GetCostModeI4(VP8EncIterator* const it,	951 static const uint16_t* GetCostModeI4(VP8EncIterator* const it,

923 const uint8_t modes[16]) {	952 const uint8_t modes[16]) {

924 const int preds_w = it->enc_->preds_w_;	953 const int preds_w = it->enc_->preds_w_;

925 const int x = (it->i4_ & 3), y = it->i4_ >> 2;	954 const int x = (it->i4_ & 3), y = it->i4_ >> 2;

926 const int left = (x == 0) ? it->preds_[y * preds_w - 1] : modes[it->i4_ - 1];	955 const int left = (x == 0) ? it->preds_[y * preds_w - 1] : modes[it->i4_ - 1];

927 const int top = (y == 0) ? it->preds_[-preds_w + x] : modes[it->i4_ - 4];	956 const int top = (y == 0) ? it->preds_[-preds_w + x] : modes[it->i4_ - 4];

928 return VP8FixedCostsI4[top][left];	957 return VP8FixedCostsI4[top][left];

929 }	958 }

930	959

931 static int PickBestIntra4(VP8EncIterator* const it, VP8ModeScore* const rd) {	960 static int PickBestIntra4(VP8EncIterator* const it, VP8ModeScore* const rd) {

932 const VP8Encoder* const enc = it->enc_;	961 const VP8Encoder* const enc = it->enc_;

933 const VP8SegmentInfo* const dqm = &enc->dqm_[it->mb_->segment_];	962 const VP8SegmentInfo* const dqm = &enc->dqm_[it->mb_->segment_];

934 const int lambda = dqm->lambda_i4_;	963 const int lambda = dqm->lambda_i4_;

935 const int tlambda = dqm->tlambda_;	964 const int tlambda = dqm->tlambda_;

936 const uint8_t* const src0 = it->yuv_in_ + Y_OFF;	965 const uint8_t* const src0 = it->yuv_in_ + Y_OFF_ENC;

937 uint8_t* const best_blocks = it->yuv_out2_ + Y_OFF;	966 uint8_t* const best_blocks = it->yuv_out2_ + Y_OFF_ENC;

938 int total_header_bits = 0;	967 int total_header_bits = 0;

939 VP8ModeScore rd_best;	968 VP8ModeScore rd_best;

940	969

941 if (enc->max_i4_header_bits_ == 0) {	970 if (enc->max_i4_header_bits_ == 0) {

942 return 0;	971 return 0;

943 }	972 }

944	973

945 InitScore(&rd_best);	974 InitScore(&rd_best);

946 rd_best.H = 211; // '211' is the value of VP8BitCost(0, 145)	975 rd_best.H = 211; // '211' is the value of VP8BitCost(0, 145)

947 SetRDScore(dqm->lambda_mode_, &rd_best);	976 SetRDScore(dqm->lambda_mode_, &rd_best);

(...skipping 17 matching lines...) Expand all Loading...
965 // Reconstruct	994 // Reconstruct

966 rd_tmp.nz =	995 rd_tmp.nz =

967 ReconstructIntra4(it, tmp_levels, src, tmp_dst, mode) << it->i4_;	996 ReconstructIntra4(it, tmp_levels, src, tmp_dst, mode) << it->i4_;

968	997

969 // Compute RD-score	998 // Compute RD-score

970 rd_tmp.D = VP8SSE4x4(src, tmp_dst);	999 rd_tmp.D = VP8SSE4x4(src, tmp_dst);

971 rd_tmp.SD =	1000 rd_tmp.SD =

972 tlambda ? MULT_8B(tlambda, VP8TDisto4x4(src, tmp_dst, kWeightY))	1001 tlambda ? MULT_8B(tlambda, VP8TDisto4x4(src, tmp_dst, kWeightY))

973 : 0;	1002 : 0;

974 rd_tmp.H = mode_costs[mode];	1003 rd_tmp.H = mode_costs[mode];

975 rd_tmp.R = VP8GetCostLuma4(it, tmp_levels);	1004

	1005 // Add flatness penalty

976 if (mode > 0 && IsFlat(tmp_levels, kNumBlocks, FLATNESS_LIMIT_I4)) {	1006 if (mode > 0 && IsFlat(tmp_levels, kNumBlocks, FLATNESS_LIMIT_I4)) {

977 rd_tmp.R += FLATNESS_PENALTY * kNumBlocks;	1007 rd_tmp.R = FLATNESS_PENALTY * kNumBlocks;

	1008 } else {

	1009 rd_tmp.R = 0;

978 }	1010 }

979	1011

	1012 // early-out check

980 SetRDScore(lambda, &rd_tmp);	1013 SetRDScore(lambda, &rd_tmp);

	1014 if (best_mode >= 0 && rd_tmp.score >= rd_i4.score) continue;

	1015

	1016 // finish computing score

	1017 rd_tmp.R += VP8GetCostLuma4(it, tmp_levels);

	1018 SetRDScore(lambda, &rd_tmp);

	1019

981 if (best_mode < 0 \|\| rd_tmp.score < rd_i4.score) {	1020 if (best_mode < 0 \|\| rd_tmp.score < rd_i4.score) {

982 CopyScore(&rd_i4, &rd_tmp);	1021 CopyScore(&rd_i4, &rd_tmp);

983 best_mode = mode;	1022 best_mode = mode;

984 SwapPtr(&tmp_dst, &best_block);	1023 SwapPtr(&tmp_dst, &best_block);

985 memcpy(rd_best.y_ac_levels[it->i4_], tmp_levels, sizeof(tmp_levels));	1024 memcpy(rd_best.y_ac_levels[it->i4_], tmp_levels,

	1025 sizeof(rd_best.y_ac_levels[it->i4_]));

986 }	1026 }

987 }	1027 }

988 SetRDScore(dqm->lambda_mode_, &rd_i4);	1028 SetRDScore(dqm->lambda_mode_, &rd_i4);

989 AddScore(&rd_best, &rd_i4);	1029 AddScore(&rd_best, &rd_i4);

990 if (rd_best.score >= rd->score) {	1030 if (rd_best.score >= rd->score) {

991 return 0;	1031 return 0;

992 }	1032 }

993 total_header_bits += (int)rd_i4.H; // <- equal to mode_costs[best_mode];	1033 total_header_bits += (int)rd_i4.H; // <- equal to mode_costs[best_mode];

994 if (total_header_bits > enc->max_i4_header_bits_) {	1034 if (total_header_bits > enc->max_i4_header_bits_) {

995 return 0;	1035 return 0;

(...skipping 13 matching lines...) Expand all Loading...
1009 memcpy(rd->y_ac_levels, rd_best.y_ac_levels, sizeof(rd->y_ac_levels));	1049 memcpy(rd->y_ac_levels, rd_best.y_ac_levels, sizeof(rd->y_ac_levels));

1010 return 1; // select intra4x4 over intra16x16	1050 return 1; // select intra4x4 over intra16x16

1011 }	1051 }

1012	1052

1013 //------------------------------------------------------------------------------	1053 //------------------------------------------------------------------------------

1014	1054

1015 static void PickBestUV(VP8EncIterator* const it, VP8ModeScore* const rd) {	1055 static void PickBestUV(VP8EncIterator* const it, VP8ModeScore* const rd) {

1016 const int kNumBlocks = 8;	1056 const int kNumBlocks = 8;

1017 const VP8SegmentInfo* const dqm = &it->enc_->dqm_[it->mb_->segment_];	1057 const VP8SegmentInfo* const dqm = &it->enc_->dqm_[it->mb_->segment_];

1018 const int lambda = dqm->lambda_uv_;	1058 const int lambda = dqm->lambda_uv_;

1019 const uint8_t* const src = it->yuv_in_ + U_OFF;	1059 const uint8_t* const src = it->yuv_in_ + U_OFF_ENC;

1020 uint8_t* const tmp_dst = it->yuv_out2_ + U_OFF; // scratch buffer	1060 uint8_t* tmp_dst = it->yuv_out2_ + U_OFF_ENC; // scratch buffer

1021 uint8_t* const dst0 = it->yuv_out_ + U_OFF;	1061 uint8_t* dst0 = it->yuv_out_ + U_OFF_ENC;

	1062 uint8_t* dst = dst0;

1022 VP8ModeScore rd_best;	1063 VP8ModeScore rd_best;

1023 int mode;	1064 int mode;

1024	1065

1025 rd->mode_uv = -1;	1066 rd->mode_uv = -1;

1026 InitScore(&rd_best);	1067 InitScore(&rd_best);

1027 for (mode = 0; mode < NUM_PRED_MODES; ++mode) {	1068 for (mode = 0; mode < NUM_PRED_MODES; ++mode) {

1028 VP8ModeScore rd_uv;	1069 VP8ModeScore rd_uv;

1029	1070

1030 // Reconstruct	1071 // Reconstruct

1031 rd_uv.nz = ReconstructUV(it, &rd_uv, tmp_dst, mode);	1072 rd_uv.nz = ReconstructUV(it, &rd_uv, tmp_dst, mode);

1032	1073

1033 // Compute RD-score	1074 // Compute RD-score

1034 rd_uv.D = VP8SSE16x8(src, tmp_dst);	1075 rd_uv.D = VP8SSE16x8(src, tmp_dst);

1035 rd_uv.SD = 0; // TODO: should we call TDisto? it tends to flatten areas.	1076 rd_uv.SD = 0; // not calling TDisto here: it tends to flatten areas.

1036 rd_uv.H = VP8FixedCostsUV[mode];	1077 rd_uv.H = VP8FixedCostsUV[mode];

1037 rd_uv.R = VP8GetCostUV(it, &rd_uv);	1078 rd_uv.R = VP8GetCostUV(it, &rd_uv);

1038 if (mode > 0 && IsFlat(rd_uv.uv_levels[0], kNumBlocks, FLATNESS_LIMIT_UV)) {	1079 if (mode > 0 && IsFlat(rd_uv.uv_levels[0], kNumBlocks, FLATNESS_LIMIT_UV)) {

1039 rd_uv.R += FLATNESS_PENALTY * kNumBlocks;	1080 rd_uv.R += FLATNESS_PENALTY * kNumBlocks;

1040 }	1081 }

1041	1082

1042 SetRDScore(lambda, &rd_uv);	1083 SetRDScore(lambda, &rd_uv);

1043 if (mode == 0 \|\| rd_uv.score < rd_best.score) {	1084 if (mode == 0 \|\| rd_uv.score < rd_best.score) {

1044 CopyScore(&rd_best, &rd_uv);	1085 CopyScore(&rd_best, &rd_uv);

1045 rd->mode_uv = mode;	1086 rd->mode_uv = mode;

1046 memcpy(rd->uv_levels, rd_uv.uv_levels, sizeof(rd->uv_levels));	1087 memcpy(rd->uv_levels, rd_uv.uv_levels, sizeof(rd->uv_levels));

1047 memcpy(dst0, tmp_dst, UV_SIZE); // TODO: SwapUVOut() ?	1088 SwapPtr(&dst, &tmp_dst);

1048 }	1089 }

1049 }	1090 }

1050 VP8SetIntraUVMode(it, rd->mode_uv);	1091 VP8SetIntraUVMode(it, rd->mode_uv);

1051 AddScore(rd, &rd_best);	1092 AddScore(rd, &rd_best);

	1093 if (dst != dst0) { // copy 16x8 block if needed

	1094 VP8Copy16x8(dst, dst0);

	1095 }

1052 }	1096 }

1053	1097

1054 //------------------------------------------------------------------------------	1098 //------------------------------------------------------------------------------

1055 // Final reconstruction and quantization.	1099 // Final reconstruction and quantization.

1056	1100

1057 static void SimpleQuantize(VP8EncIterator* const it, VP8ModeScore* const rd) {	1101 static void SimpleQuantize(VP8EncIterator* const it, VP8ModeScore* const rd) {

1058 const VP8Encoder* const enc = it->enc_;	1102 const VP8Encoder* const enc = it->enc_;

1059 const int is_i16 = (it->mb_->type_ == 1);	1103 const int is_i16 = (it->mb_->type_ == 1);

1060 int nz = 0;	1104 int nz = 0;

1061	1105

1062 if (is_i16) {	1106 if (is_i16) {

1063 nz = ReconstructIntra16(it, rd, it->yuv_out_ + Y_OFF, it->preds_[0]);	1107 nz = ReconstructIntra16(it, rd, it->yuv_out_ + Y_OFF_ENC, it->preds_[0]);

1064 } else {	1108 } else {

1065 VP8IteratorStartI4(it);	1109 VP8IteratorStartI4(it);

1066 do {	1110 do {

1067 const int mode =	1111 const int mode =

1068 it->preds_[(it->i4_ & 3) + (it->i4_ >> 2) * enc->preds_w_];	1112 it->preds_[(it->i4_ & 3) + (it->i4_ >> 2) * enc->preds_w_];

1069 const uint8_t* const src = it->yuv_in_ + Y_OFF + VP8Scan[it->i4_];	1113 const uint8_t* const src = it->yuv_in_ + Y_OFF_ENC + VP8Scan[it->i4_];

1070 uint8_t* const dst = it->yuv_out_ + Y_OFF + VP8Scan[it->i4_];	1114 uint8_t* const dst = it->yuv_out_ + Y_OFF_ENC + VP8Scan[it->i4_];

1071 VP8MakeIntra4Preds(it);	1115 VP8MakeIntra4Preds(it);

1072 nz \|= ReconstructIntra4(it, rd->y_ac_levels[it->i4_],	1116 nz \|= ReconstructIntra4(it, rd->y_ac_levels[it->i4_],

1073 src, dst, mode) << it->i4_;	1117 src, dst, mode) << it->i4_;

1074 } while (VP8IteratorRotateI4(it, it->yuv_out_ + Y_OFF));	1118 } while (VP8IteratorRotateI4(it, it->yuv_out_ + Y_OFF_ENC));

1075 }	1119 }

1076	1120

1077 nz \|= ReconstructUV(it, rd, it->yuv_out_ + U_OFF, it->mb_->uv_mode_);	1121 nz \|= ReconstructUV(it, rd, it->yuv_out_ + U_OFF_ENC, it->mb_->uv_mode_);

1078 rd->nz = nz;	1122 rd->nz = nz;

1079 }	1123 }

1080	1124

1081 // Refine intra16/intra4 sub-modes based on distortion only (not rate).	1125 // Refine intra16/intra4 sub-modes based on distortion only (not rate).

1082 static void DistoRefine(VP8EncIterator* const it, int try_both_i4_i16) {	1126 static void RefineUsingDistortion(VP8EncIterator* const it,

1083 const int is_i16 = (it->mb_->type_ == 1);	1127 int try_both_modes, int refine_uv_mode,

	1128 VP8ModeScore* const rd) {

1084 score_t best_score = MAX_COST;	1129 score_t best_score = MAX_COST;

	1130 score_t score_i4 = (score_t)I4_PENALTY;

	1131 int16_t tmp_levels[16][16];

	1132 uint8_t modes_i4[16];

	1133 int nz = 0;

	1134 int mode;

	1135 int is_i16 = try_both_modes \|\| (it->mb_->type_ == 1);

1085	1136

1086 if (try_both_i4_i16 \|\| is_i16) {	1137 if (is_i16) { // First, evaluate Intra16 distortion

1087 int mode;

1088 int best_mode = -1;	1138 int best_mode = -1;

	1139 const uint8_t* const src = it->yuv_in_ + Y_OFF_ENC;

1089 for (mode = 0; mode < NUM_PRED_MODES; ++mode) {	1140 for (mode = 0; mode < NUM_PRED_MODES; ++mode) {

1090 const uint8_t* const ref = it->yuv_p_ + VP8I16ModeOffsets[mode];	1141 const uint8_t* const ref = it->yuv_p_ + VP8I16ModeOffsets[mode];

1091 const uint8_t* const src = it->yuv_in_ + Y_OFF;

1092 const score_t score = VP8SSE16x16(src, ref);	1142 const score_t score = VP8SSE16x16(src, ref);

1093 if (score < best_score) {	1143 if (score < best_score) {

1094 best_mode = mode;	1144 best_mode = mode;

1095 best_score = score;	1145 best_score = score;

1096 }	1146 }

1097 }	1147 }

1098 VP8SetIntra16Mode(it, best_mode);	1148 VP8SetIntra16Mode(it, best_mode);

	1149 // we'll reconstruct later, if i16 mode actually gets selected

1099 }	1150 }

1100 if (try_both_i4_i16 \|\| !is_i16) {	1151

1101 uint8_t modes_i4[16];	1152 // Next, evaluate Intra4

	1153 if (try_both_modes \|\| !is_i16) {

1102 // We don't evaluate the rate here, but just account for it through a	1154 // We don't evaluate the rate here, but just account for it through a

1103 // constant penalty (i4 mode usually needs more bits compared to i16).	1155 // constant penalty (i4 mode usually needs more bits compared to i16).

1104 score_t score_i4 = (score_t)I4_PENALTY;	1156 is_i16 = 0;

1105

1106 VP8IteratorStartI4(it);	1157 VP8IteratorStartI4(it);

1107 do {	1158 do {

1108 int mode;	1159 int best_i4_mode = -1;

1109 int best_sub_mode = -1;	1160 score_t best_i4_score = MAX_COST;

1110 score_t best_sub_score = MAX_COST;	1161 const uint8_t* const src = it->yuv_in_ + Y_OFF_ENC + VP8Scan[it->i4_];

1111 const uint8_t* const src = it->yuv_in_ + Y_OFF + VP8Scan[it->i4_];

1112	1162

1113 // TODO(skal): we don't really need the prediction pixels here,

1114 // but just the distortion against 'src'.

1115 VP8MakeIntra4Preds(it);	1163 VP8MakeIntra4Preds(it);

1116 for (mode = 0; mode < NUM_BMODES; ++mode) {	1164 for (mode = 0; mode < NUM_BMODES; ++mode) {

1117 const uint8_t* const ref = it->yuv_p_ + VP8I4ModeOffsets[mode];	1165 const uint8_t* const ref = it->yuv_p_ + VP8I4ModeOffsets[mode];

1118 const score_t score = VP8SSE4x4(src, ref);	1166 const score_t score = VP8SSE4x4(src, ref);

1119 if (score < best_sub_score) {	1167 if (score < best_i4_score) {

1120 best_sub_mode = mode;	1168 best_i4_mode = mode;

1121 best_sub_score = score;	1169 best_i4_score = score;

1122 }	1170 }

1123 }	1171 }

1124 modes_i4[it->i4_] = best_sub_mode;	1172 modes_i4[it->i4_] = best_i4_mode;

1125 score_i4 += best_sub_score;	1173 score_i4 += best_i4_score;

1126 if (score_i4 >= best_score) break;	1174 if (score_i4 >= best_score) {

1127 } while (VP8IteratorRotateI4(it, it->yuv_in_ + Y_OFF));	1175 // Intra4 won't be better than Intra16. Bail out and pick Intra16.

1128 if (score_i4 < best_score) {	1176 is_i16 = 1;

1129 VP8SetIntra4Mode(it, modes_i4);	1177 break;

	1178 } else { // reconstruct partial block inside yuv_out2_ buffer

	1179 uint8_t* const tmp_dst = it->yuv_out2_ + Y_OFF_ENC + VP8Scan[it->i4_];

	1180 nz \|= ReconstructIntra4(it, tmp_levels[it->i4_],

	1181 src, tmp_dst, best_i4_mode) << it->i4_;

	1182 }

	1183 } while (VP8IteratorRotateI4(it, it->yuv_out2_ + Y_OFF_ENC));

	1184 }

	1185

	1186 // Final reconstruction, depending on which mode is selected.

	1187 if (!is_i16) {

	1188 VP8SetIntra4Mode(it, modes_i4);

	1189 memcpy(rd->y_ac_levels, tmp_levels, sizeof(tmp_levels));

	1190 SwapOut(it);

	1191 best_score = score_i4;

	1192 } else {

	1193 nz = ReconstructIntra16(it, rd, it->yuv_out_ + Y_OFF_ENC, it->preds_[0]);

	1194 }

	1195

	1196 // ... and UV!

	1197 if (refine_uv_mode) {

	1198 int best_mode = -1;

	1199 score_t best_uv_score = MAX_COST;

	1200 const uint8_t* const src = it->yuv_in_ + U_OFF_ENC;

	1201 for (mode = 0; mode < NUM_PRED_MODES; ++mode) {

	1202 const uint8_t* const ref = it->yuv_p_ + VP8UVModeOffsets[mode];

	1203 const score_t score = VP8SSE16x8(src, ref);

	1204 if (score < best_uv_score) {

	1205 best_mode = mode;

	1206 best_uv_score = score;

	1207 }

1130 }	1208 }

	1209 VP8SetIntraUVMode(it, best_mode);

1131 }	1210 }

	1211 nz \|= ReconstructUV(it, rd, it->yuv_out_ + U_OFF_ENC, it->mb_->uv_mode_);

	1212

	1213 rd->nz = nz;

	1214 rd->score = best_score;

1132 }	1215 }

1133	1216

1134 //------------------------------------------------------------------------------	1217 //------------------------------------------------------------------------------

1135 // Entry point	1218 // Entry point

1136	1219

1137 int VP8Decimate(VP8EncIterator* const it, VP8ModeScore* const rd,	1220 int VP8Decimate(VP8EncIterator* const it, VP8ModeScore* const rd,

1138 VP8RDLevel rd_opt) {	1221 VP8RDLevel rd_opt) {

1139 int is_skipped;	1222 int is_skipped;

1140 const int method = it->enc_->method_;	1223 const int method = it->enc_->method_;

1141	1224

1142 InitScore(rd);	1225 InitScore(rd);

1143	1226

1144 // We can perform predictions for Luma16x16 and Chroma8x8 already.	1227 // We can perform predictions for Luma16x16 and Chroma8x8 already.

1145 // Luma4x4 predictions needs to be done as-we-go.	1228 // Luma4x4 predictions needs to be done as-we-go.

1146 VP8MakeLuma16Preds(it);	1229 VP8MakeLuma16Preds(it);

1147 VP8MakeChroma8Preds(it);	1230 VP8MakeChroma8Preds(it);

1148	1231

1149 if (rd_opt > RD_OPT_NONE) {	1232 if (rd_opt > RD_OPT_NONE) {

1150 it->do_trellis_ = (rd_opt >= RD_OPT_TRELLIS_ALL);	1233 it->do_trellis_ = (rd_opt >= RD_OPT_TRELLIS_ALL);

1151 PickBestIntra16(it, rd);	1234 PickBestIntra16(it, rd);

1152 if (method >= 2) {	1235 if (method >= 2) {

1153 PickBestIntra4(it, rd);	1236 PickBestIntra4(it, rd);

1154 }	1237 }

1155 PickBestUV(it, rd);	1238 PickBestUV(it, rd);

1156 if (rd_opt == RD_OPT_TRELLIS) { // finish off with trellis-optim now	1239 if (rd_opt == RD_OPT_TRELLIS) { // finish off with trellis-optim now

1157 it->do_trellis_ = 1;	1240 it->do_trellis_ = 1;

1158 SimpleQuantize(it, rd);	1241 SimpleQuantize(it, rd);

1159 }	1242 }

1160 } else {	1243 } else {

1161 // For method == 2, pick the best intra4/intra16 based on SSE (~tad slower).	1244 // At this point we have heuristically decided intra16 / intra4.

1162 // For method <= 1, we refine intra4 or intra16 (but don't re-examine mode).	1245 // For method >= 2, pick the best intra4/intra16 based on SSE (~tad slower).

1163 DistoRefine(it, (method >= 2));	1246 // For method <= 1, we don't re-examine the decision but just go ahead with

1164 SimpleQuantize(it, rd);	1247 // quantization/reconstruction.

	1248 RefineUsingDistortion(it, (method >= 2), (method >= 1), rd);

1165 }	1249 }

1166 is_skipped = (rd->nz == 0);	1250 is_skipped = (rd->nz == 0);

1167 VP8SetSkip(it, is_skipped);	1251 VP8SetSkip(it, is_skipped);

1168 return is_skipped;	1252 return is_skipped;

1169 }	1253 }

1170

OLD	NEW

« third_party/libwebp/BUILD.gn ('K') | « third_party/libwebp/enc/picture_tools.c ('k') | third_party/libwebp/enc/syntax.c » ('j') | third_party/libwebp/libwebp.gyp » ('J')