third_party/libwebp/enc/quant.c - Issue 1178013008: Use the upstream version of libwebp, v0.4.3.

Side by Side Diff: third_party/libwebp/enc/quant.c

Issue 1178013008: Use the upstream version of libwebp, v0.4.3. (Closed) Base URL: https://skia.googlesource.com/skia.git@master

Patch Set: Fixes for SkWebpImageDecoder and SkWebpCodec. Created 5 years, 6 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View unified diff | Download patch

OLD	NEW
(Empty)
	1 // Copyright 2011 Google Inc. All Rights Reserved.

	2 //

	3 // Use of this source code is governed by a BSD-style license

	4 // that can be found in the COPYING file in the root of the source

	5 // tree. An additional intellectual property rights grant can be found

	6 // in the file PATENTS. All contributing project authors may

	7 // be found in the AUTHORS file in the root of the source tree.

	8 // -----------------------------------------------------------------------------

	9 //

	10 // Quantization

	11 //

	12 // Author: Skal (pascal.massimino@gmail.com)

	13

	14 #include <assert.h>

	15 #include <math.h>

	16 #include <stdlib.h> // for abs()

	17

	18 #include "./vp8enci.h"

	19 #include "./cost.h"

	20

	21 #define DO_TRELLIS_I4 1

	22 #define DO_TRELLIS_I16 1 // not a huge gain, but ok at low bitrate.

	23 #define DO_TRELLIS_UV 0 // disable trellis for UV. Risky. Not worth.

	24 #define USE_TDISTO 1

	25

	26 #define MID_ALPHA 64 // neutral value for susceptibility

	27 #define MIN_ALPHA 30 // lowest usable value for susceptibility

	28 #define MAX_ALPHA 100 // higher meaningful value for susceptibility

	29

	30 #define SNS_TO_DQ 0.9 // Scaling constant between the sns value and the QP

	31 // power-law modulation. Must be strictly less than 1.

	32

	33 #define I4_PENALTY 4000 // Rate-penalty for quick i4/i16 decision

	34

	35 // number of non-zero coeffs below which we consider the block very flat

	36 // (and apply a penalty to complex predictions)

	37 #define FLATNESS_LIMIT_I16 10 // I16 mode

	38 #define FLATNESS_LIMIT_I4 3 // I4 mode

	39 #define FLATNESS_LIMIT_UV 2 // UV mode

	40 #define FLATNESS_PENALTY 140 // roughly ~1bit per block

	41

	42 #define MULT_8B(a, b) (((a) * (b) + 128) >> 8)

	43

	44 // #define DEBUG_BLOCK

	45

	46 //------------------------------------------------------------------------------

	47

	48 #if defined(DEBUG_BLOCK)

	49

	50 #include <stdio.h>

	51 #include <stdlib.h>

	52

	53 static void PrintBlockInfo(const VP8EncIterator* const it,

	54 const VP8ModeScore* const rd) {

	55 int i, j;

	56 const int is_i16 = (it->mb_->type_ == 1);

	57 printf("SOURCE / OUTPUT / ABS DELTA\n");

	58 for (j = 0; j < 24; ++j) {

	59 if (j == 16) printf("\n"); // newline before the U/V block

	60 for (i = 0; i < 16; ++i) printf("%3d ", it->yuv_in_[i + j * BPS]);

	61 printf(" ");

	62 for (i = 0; i < 16; ++i) printf("%3d ", it->yuv_out_[i + j * BPS]);

	63 printf(" ");

	64 for (i = 0; i < 16; ++i) {

	65 printf("%1d ", abs(it->yuv_out_[i + j * BPS] - it->yuv_in_[i + j * BPS]));

	66 }

	67 printf("\n");

	68 }

	69 printf("\nD:%d SD:%d R:%d H:%d nz:0x%x score:%d\n",

	70 (int)rd->D, (int)rd->SD, (int)rd->R, (int)rd->H, (int)rd->nz,

	71 (int)rd->score);

	72 if (is_i16) {

	73 printf("Mode: %d\n", rd->mode_i16);

	74 printf("y_dc_levels:");

	75 for (i = 0; i < 16; ++i) printf("%3d ", rd->y_dc_levels[i]);

	76 printf("\n");

	77 } else {

	78 printf("Modes[16]: ");

	79 for (i = 0; i < 16; ++i) printf("%d ", rd->modes_i4[i]);

	80 printf("\n");

	81 }

	82 printf("y_ac_levels:\n");

	83 for (j = 0; j < 16; ++j) {

	84 for (i = is_i16 ? 1 : 0; i < 16; ++i) {

	85 printf("%4d ", rd->y_ac_levels[j][i]);

	86 }

	87 printf("\n");

	88 }

	89 printf("\n");

	90 printf("uv_levels (mode=%d):\n", rd->mode_uv);

	91 for (j = 0; j < 8; ++j) {

	92 for (i = 0; i < 16; ++i) {

	93 printf("%4d ", rd->uv_levels[j][i]);

	94 }

	95 printf("\n");

	96 }

	97 }

	98

	99 #endif // DEBUG_BLOCK

	100

	101 //------------------------------------------------------------------------------

	102

	103 static WEBP_INLINE int clip(int v, int m, int M) {

	104 return v < m ? m : v > M ? M : v;

	105 }

	106

	107 static const uint8_t kZigzag[16] = {

	108 0, 1, 4, 8, 5, 2, 3, 6, 9, 12, 13, 10, 7, 11, 14, 15

	109 };

	110

	111 static const uint8_t kDcTable[128] = {

	112 4, 5, 6, 7, 8, 9, 10, 10,

	113 11, 12, 13, 14, 15, 16, 17, 17,

	114 18, 19, 20, 20, 21, 21, 22, 22,

	115 23, 23, 24, 25, 25, 26, 27, 28,

	116 29, 30, 31, 32, 33, 34, 35, 36,

	117 37, 37, 38, 39, 40, 41, 42, 43,

	118 44, 45, 46, 46, 47, 48, 49, 50,

	119 51, 52, 53, 54, 55, 56, 57, 58,

	120 59, 60, 61, 62, 63, 64, 65, 66,

	121 67, 68, 69, 70, 71, 72, 73, 74,

	122 75, 76, 76, 77, 78, 79, 80, 81,

	123 82, 83, 84, 85, 86, 87, 88, 89,

	124 91, 93, 95, 96, 98, 100, 101, 102,

	125 104, 106, 108, 110, 112, 114, 116, 118,

	126 122, 124, 126, 128, 130, 132, 134, 136,

	127 138, 140, 143, 145, 148, 151, 154, 157

	128 };

	129

	130 static const uint16_t kAcTable[128] = {

	131 4, 5, 6, 7, 8, 9, 10, 11,

	132 12, 13, 14, 15, 16, 17, 18, 19,

	133 20, 21, 22, 23, 24, 25, 26, 27,

	134 28, 29, 30, 31, 32, 33, 34, 35,

	135 36, 37, 38, 39, 40, 41, 42, 43,

	136 44, 45, 46, 47, 48, 49, 50, 51,

	137 52, 53, 54, 55, 56, 57, 58, 60,

	138 62, 64, 66, 68, 70, 72, 74, 76,

	139 78, 80, 82, 84, 86, 88, 90, 92,

	140 94, 96, 98, 100, 102, 104, 106, 108,

	141 110, 112, 114, 116, 119, 122, 125, 128,

	142 131, 134, 137, 140, 143, 146, 149, 152,

	143 155, 158, 161, 164, 167, 170, 173, 177,

	144 181, 185, 189, 193, 197, 201, 205, 209,

	145 213, 217, 221, 225, 229, 234, 239, 245,

	146 249, 254, 259, 264, 269, 274, 279, 284

	147 };

	148

	149 static const uint16_t kAcTable2[128] = {

	150 8, 8, 9, 10, 12, 13, 15, 17,

	151 18, 20, 21, 23, 24, 26, 27, 29,

	152 31, 32, 34, 35, 37, 38, 40, 41,

	153 43, 44, 46, 48, 49, 51, 52, 54,

	154 55, 57, 58, 60, 62, 63, 65, 66,

	155 68, 69, 71, 72, 74, 75, 77, 79,

	156 80, 82, 83, 85, 86, 88, 89, 93,

	157 96, 99, 102, 105, 108, 111, 114, 117,

	158 120, 124, 127, 130, 133, 136, 139, 142,

	159 145, 148, 151, 155, 158, 161, 164, 167,

	160 170, 173, 176, 179, 184, 189, 193, 198,

	161 203, 207, 212, 217, 221, 226, 230, 235,

	162 240, 244, 249, 254, 258, 263, 268, 274,

	163 280, 286, 292, 299, 305, 311, 317, 323,

	164 330, 336, 342, 348, 354, 362, 370, 379,

	165 385, 393, 401, 409, 416, 424, 432, 440

	166 };

	167

	168 static const uint8_t kBiasMatrices[3][2] = { // [luma-ac,luma-dc,chroma][dc,ac]

	169 { 96, 110 }, { 96, 108 }, { 110, 115 }

	170 };

	171

	172 // Sharpening by (slightly) raising the hi-frequency coeffs.

	173 // Hack-ish but helpful for mid-bitrate range. Use with care.

	174 #define SHARPEN_BITS 11 // number of descaling bits for sharpening bias

	175 static const uint8_t kFreqSharpening[16] = {

	176 0, 30, 60, 90,

	177 30, 60, 90, 90,

	178 60, 90, 90, 90,

	179 90, 90, 90, 90

	180 };

	181

	182 //------------------------------------------------------------------------------

	183 // Initialize quantization parameters in VP8Matrix

	184

	185 // Returns the average quantizer

	186 static int ExpandMatrix(VP8Matrix* const m, int type) {

	187 int i, sum;

	188 for (i = 0; i < 2; ++i) {

	189 const int is_ac_coeff = (i > 0);

	190 const int bias = kBiasMatrices[type][is_ac_coeff];

	191 m->iq_[i] = (1 << QFIX) / m->q_[i];

	192 m->bias_[i] = BIAS(bias);

	193 // zthresh_ is the exact value such that QUANTDIV(coeff, iQ, B) is:

	194 // * zero if coeff <= zthresh

	195 // * non-zero if coeff > zthresh

	196 m->zthresh_[i] = ((1 << QFIX) - 1 - m->bias_[i]) / m->iq_[i];

	197 }

	198 for (i = 2; i < 16; ++i) {

	199 m->q_[i] = m->q_[1];

	200 m->iq_[i] = m->iq_[1];

	201 m->bias_[i] = m->bias_[1];

	202 m->zthresh_[i] = m->zthresh_[1];

	203 }

	204 for (sum = 0, i = 0; i < 16; ++i) {

	205 if (type == 0) { // we only use sharpening for AC luma coeffs

	206 m->sharpen_[i] = (kFreqSharpening[i] * m->q_[i]) >> SHARPEN_BITS;

	207 } else {

	208 m->sharpen_[i] = 0;

	209 }

	210 sum += m->q_[i];

	211 }

	212 return (sum + 8) >> 4;

	213 }

	214

	215 static void SetupMatrices(VP8Encoder* enc) {

	216 int i;

	217 const int tlambda_scale =

	218 (enc->method_ >= 4) ? enc->config_->sns_strength

	219 : 0;

	220 const int num_segments = enc->segment_hdr_.num_segments_;

	221 for (i = 0; i < num_segments; ++i) {

	222 VP8SegmentInfo* const m = &enc->dqm_[i];

	223 const int q = m->quant_;

	224 int q4, q16, quv;

	225 m->y1_.q_[0] = kDcTable[clip(q + enc->dq_y1_dc_, 0, 127)];

	226 m->y1_.q_[1] = kAcTable[clip(q, 0, 127)];

	227

	228 m->y2_.q_[0] = kDcTable[ clip(q + enc->dq_y2_dc_, 0, 127)] * 2;

	229 m->y2_.q_[1] = kAcTable2[clip(q + enc->dq_y2_ac_, 0, 127)];

	230

	231 m->uv_.q_[0] = kDcTable[clip(q + enc->dq_uv_dc_, 0, 117)];

	232 m->uv_.q_[1] = kAcTable[clip(q + enc->dq_uv_ac_, 0, 127)];

	233

	234 q4 = ExpandMatrix(&m->y1_, 0);

	235 q16 = ExpandMatrix(&m->y2_, 1);

	236 quv = ExpandMatrix(&m->uv_, 2);

	237

	238 m->lambda_i4_ = (3 * q4 * q4) >> 7;

	239 m->lambda_i16_ = (3 * q16 * q16);

	240 m->lambda_uv_ = (3 * quv * quv) >> 6;

	241 m->lambda_mode_ = (1 * q4 * q4) >> 7;

	242 m->lambda_trellis_i4_ = (7 * q4 * q4) >> 3;

	243 m->lambda_trellis_i16_ = (q16 * q16) >> 2;

	244 m->lambda_trellis_uv_ = (quv *quv) << 1;

	245 m->tlambda_ = (tlambda_scale * q4) >> 5;

	246

	247 m->min_disto_ = 10 * m->y1_.q_[0]; // quantization-aware min disto

	248 m->max_edge_ = 0;

	249 }

	250 }

	251

	252 //------------------------------------------------------------------------------

	253 // Initialize filtering parameters

	254

	255 // Very small filter-strength values have close to no visual effect. So we can

	256 // save a little decoding-CPU by turning filtering off for these.

	257 #define FSTRENGTH_CUTOFF 2

	258

	259 static void SetupFilterStrength(VP8Encoder* const enc) {

	260 int i;

	261 // level0 is in [0..500]. Using '-f 50' as filter_strength is mid-filtering.

	262 const int level0 = 5 * enc->config_->filter_strength;

	263 for (i = 0; i < NUM_MB_SEGMENTS; ++i) {

	264 VP8SegmentInfo* const m = &enc->dqm_[i];

	265 // We focus on the quantization of AC coeffs.

	266 const int qstep = kAcTable[clip(m->quant_, 0, 127)] >> 2;

	267 const int base_strength =

	268 VP8FilterStrengthFromDelta(enc->filter_hdr_.sharpness_, qstep);

	269 // Segments with lower complexity ('beta') will be less filtered.

	270 const int f = base_strength * level0 / (256 + m->beta_);

	271 m->fstrength_ = (f < FSTRENGTH_CUTOFF) ? 0 : (f > 63) ? 63 : f;

	272 }

	273 // We record the initial strength (mainly for the case of 1-segment only).

	274 enc->filter_hdr_.level_ = enc->dqm_[0].fstrength_;

	275 enc->filter_hdr_.simple_ = (enc->config_->filter_type == 0);

	276 enc->filter_hdr_.sharpness_ = enc->config_->filter_sharpness;

	277 }

	278

	279 //------------------------------------------------------------------------------

	280

	281 // Note: if you change the values below, remember that the max range

	282 // allowed by the syntax for DQ_UV is [-16,16].

	283 #define MAX_DQ_UV (6)

	284 #define MIN_DQ_UV (-4)

	285

	286 // We want to emulate jpeg-like behaviour where the expected "good" quality

	287 // is around q=75. Internally, our "good" middle is around c=50. So we

	288 // map accordingly using linear piece-wise function

	289 static double QualityToCompression(double c) {

	290 const double linear_c = (c < 0.75) ? c * (2. / 3.) : 2. * c - 1.;

	291 // The file size roughly scales as pow(quantizer, 3.). Actually, the

	292 // exponent is somewhere between 2.8 and 3.2, but we're mostly interested

	293 // in the mid-quant range. So we scale the compressibility inversely to

	294 // this power-law: quant ~= compression ^ 1/3. This law holds well for

	295 // low quant. Finer modeling for high-quant would make use of kAcTable[]

	296 // more explicitly.

	297 const double v = pow(linear_c, 1 / 3.);

	298 return v;

	299 }

	300

	301 static double QualityToJPEGCompression(double c, double alpha) {

	302 // We map the complexity 'alpha' and quality setting 'c' to a compression

	303 // exponent empirically matched to the compression curve of libjpeg6b.

	304 // On average, the WebP output size will be roughly similar to that of a

	305 // JPEG file compressed with same quality factor.

	306 const double amin = 0.30;

	307 const double amax = 0.85;

	308 const double exp_min = 0.4;

	309 const double exp_max = 0.9;

	310 const double slope = (exp_min - exp_max) / (amax - amin);

	311 // Linearly interpolate 'expn' from exp_min to exp_max

	312 // in the [amin, amax] range.

	313 const double expn = (alpha > amax) ? exp_min

	314 : (alpha < amin) ? exp_max

	315 : exp_max + slope * (alpha - amin);

	316 const double v = pow(c, expn);

	317 return v;

	318 }

	319

	320 static int SegmentsAreEquivalent(const VP8SegmentInfo* const S1,

	321 const VP8SegmentInfo* const S2) {

	322 return (S1->quant_ == S2->quant_) && (S1->fstrength_ == S2->fstrength_);

	323 }

	324

	325 static void SimplifySegments(VP8Encoder* const enc) {

	326 int map[NUM_MB_SEGMENTS] = { 0, 1, 2, 3 };

	327 const int num_segments = enc->segment_hdr_.num_segments_;

	328 int num_final_segments = 1;

	329 int s1, s2;

	330 for (s1 = 1; s1 < num_segments; ++s1) { // find similar segments

	331 const VP8SegmentInfo* const S1 = &enc->dqm_[s1];

	332 int found = 0;

	333 // check if we already have similar segment

	334 for (s2 = 0; s2 < num_final_segments; ++s2) {

	335 const VP8SegmentInfo* const S2 = &enc->dqm_[s2];

	336 if (SegmentsAreEquivalent(S1, S2)) {

	337 found = 1;

	338 break;

	339 }

	340 }

	341 map[s1] = s2;

	342 if (!found) {

	343 if (num_final_segments != s1) {

	344 enc->dqm_[num_final_segments] = enc->dqm_[s1];

	345 }

	346 ++num_final_segments;

	347 }

	348 }

	349 if (num_final_segments < num_segments) { // Remap

	350 int i = enc->mb_w_ * enc->mb_h_;

	351 while (i-- > 0) enc->mb_info_[i].segment_ = map[enc->mb_info_[i].segment_];

	352 enc->segment_hdr_.num_segments_ = num_final_segments;

	353 // Replicate the trailing segment infos (it's mostly cosmetics)

	354 for (i = num_final_segments; i < num_segments; ++i) {

	355 enc->dqm_[i] = enc->dqm_[num_final_segments - 1];

	356 }

	357 }

	358 }

	359

	360 void VP8SetSegmentParams(VP8Encoder* const enc, float quality) {

	361 int i;

	362 int dq_uv_ac, dq_uv_dc;

	363 const int num_segments = enc->segment_hdr_.num_segments_;

	364 const double amp = SNS_TO_DQ * enc->config_->sns_strength / 100. / 128.;

	365 const double Q = quality / 100.;

	366 const double c_base = enc->config_->emulate_jpeg_size ?

	367 QualityToJPEGCompression(Q, enc->alpha_ / 255.) :

	368 QualityToCompression(Q);

	369 for (i = 0; i < num_segments; ++i) {

	370 // We modulate the base coefficient to accommodate for the quantization

	371 // susceptibility and allow denser segments to be quantized more.

	372 const double expn = 1. - amp * enc->dqm_[i].alpha_;

	373 const double c = pow(c_base, expn);

	374 const int q = (int)(127. * (1. - c));

	375 assert(expn > 0.);

	376 enc->dqm_[i].quant_ = clip(q, 0, 127);

	377 }

	378

	379 // purely indicative in the bitstream (except for the 1-segment case)

	380 enc->base_quant_ = enc->dqm_[0].quant_;

	381

	382 // fill-in values for the unused segments (required by the syntax)

	383 for (i = num_segments; i < NUM_MB_SEGMENTS; ++i) {

	384 enc->dqm_[i].quant_ = enc->base_quant_;

	385 }

	386

	387 // uv_alpha_ is normally spread around ~60. The useful range is

	388 // typically ~30 (quite bad) to ~100 (ok to decimate UV more).

	389 // We map it to the safe maximal range of MAX/MIN_DQ_UV for dq_uv.

	390 dq_uv_ac = (enc->uv_alpha_ - MID_ALPHA) * (MAX_DQ_UV - MIN_DQ_UV)

	391 / (MAX_ALPHA - MIN_ALPHA);

	392 // we rescale by the user-defined strength of adaptation

	393 dq_uv_ac = dq_uv_ac * enc->config_->sns_strength / 100;

	394 // and make it safe.

	395 dq_uv_ac = clip(dq_uv_ac, MIN_DQ_UV, MAX_DQ_UV);

	396 // We also boost the dc-uv-quant a little, based on sns-strength, since

	397 // U/V channels are quite more reactive to high quants (flat DC-blocks

	398 // tend to appear, and are unpleasant).

	399 dq_uv_dc = -4 * enc->config_->sns_strength / 100;

	400 dq_uv_dc = clip(dq_uv_dc, -15, 15); // 4bit-signed max allowed

	401

	402 enc->dq_y1_dc_ = 0; // TODO(skal): dq-lum

	403 enc->dq_y2_dc_ = 0;

	404 enc->dq_y2_ac_ = 0;

	405 enc->dq_uv_dc_ = dq_uv_dc;

	406 enc->dq_uv_ac_ = dq_uv_ac;

	407

	408 SetupFilterStrength(enc); // initialize segments' filtering, eventually

	409

	410 if (num_segments > 1) SimplifySegments(enc);

	411

	412 SetupMatrices(enc); // finalize quantization matrices

	413 }

	414

	415 //------------------------------------------------------------------------------

	416 // Form the predictions in cache

	417

	418 // Must be ordered using {DC_PRED, TM_PRED, V_PRED, H_PRED} as index

	419 const int VP8I16ModeOffsets[4] = { I16DC16, I16TM16, I16VE16, I16HE16 };

	420 const int VP8UVModeOffsets[4] = { C8DC8, C8TM8, C8VE8, C8HE8 };

	421

	422 // Must be indexed using {B_DC_PRED -> B_HU_PRED} as index

	423 const int VP8I4ModeOffsets[NUM_BMODES] = {

	424 I4DC4, I4TM4, I4VE4, I4HE4, I4RD4, I4VR4, I4LD4, I4VL4, I4HD4, I4HU4

	425 };

	426

	427 void VP8MakeLuma16Preds(const VP8EncIterator* const it) {

	428 const uint8_t* const left = it->x_ ? it->y_left_ : NULL;

	429 const uint8_t* const top = it->y_ ? it->y_top_ : NULL;

	430 VP8EncPredLuma16(it->yuv_p_, left, top);

	431 }

	432

	433 void VP8MakeChroma8Preds(const VP8EncIterator* const it) {

	434 const uint8_t* const left = it->x_ ? it->u_left_ : NULL;

	435 const uint8_t* const top = it->y_ ? it->uv_top_ : NULL;

	436 VP8EncPredChroma8(it->yuv_p_, left, top);

	437 }

	438

	439 void VP8MakeIntra4Preds(const VP8EncIterator* const it) {

	440 VP8EncPredLuma4(it->yuv_p_, it->i4_top_);

	441 }

	442

	443 //------------------------------------------------------------------------------

	444 // Quantize

	445

	446 // Layout:

	447 // +----+

	448 // \|YYYY\| 0

	449 // \|YYYY\| 4

	450 // \|YYYY\| 8

	451 // \|YYYY\| 12

	452 // +----+

	453 // \|UUVV\| 16

	454 // \|UUVV\| 20

	455 // +----+

	456

	457 const int VP8Scan[16] = { // Luma

	458 0 + 0 * BPS, 4 + 0 * BPS, 8 + 0 * BPS, 12 + 0 * BPS,

	459 0 + 4 * BPS, 4 + 4 * BPS, 8 + 4 * BPS, 12 + 4 * BPS,

	460 0 + 8 * BPS, 4 + 8 * BPS, 8 + 8 * BPS, 12 + 8 * BPS,

	461 0 + 12 * BPS, 4 + 12 * BPS, 8 + 12 * BPS, 12 + 12 * BPS,

	462 };

	463

	464 static const int VP8ScanUV[4 + 4] = {

	465 0 + 0 * BPS, 4 + 0 * BPS, 0 + 4 * BPS, 4 + 4 * BPS, // U

	466 8 + 0 * BPS, 12 + 0 * BPS, 8 + 4 * BPS, 12 + 4 * BPS // V

	467 };

	468

	469 //------------------------------------------------------------------------------

	470 // Distortion measurement

	471

	472 static const uint16_t kWeightY[16] = {

	473 38, 32, 20, 9, 32, 28, 17, 7, 20, 17, 10, 4, 9, 7, 4, 2

	474 };

	475

	476 static const uint16_t kWeightTrellis[16] = {

	477 #if USE_TDISTO == 0

	478 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16

	479 #else

	480 30, 27, 19, 11,

	481 27, 24, 17, 10,

	482 19, 17, 12, 8,

	483 11, 10, 8, 6

	484 #endif

	485 };

	486

	487 // Init/Copy the common fields in score.

	488 static void InitScore(VP8ModeScore* const rd) {

	489 rd->D = 0;

	490 rd->SD = 0;

	491 rd->R = 0;

	492 rd->H = 0;

	493 rd->nz = 0;

	494 rd->score = MAX_COST;

	495 }

	496

	497 static void CopyScore(VP8ModeScore* const dst, const VP8ModeScore* const src) {

	498 dst->D = src->D;

	499 dst->SD = src->SD;

	500 dst->R = src->R;

	501 dst->H = src->H;

	502 dst->nz = src->nz; // note that nz is not accumulated, but just copied.

	503 dst->score = src->score;

	504 }

	505

	506 static void AddScore(VP8ModeScore* const dst, const VP8ModeScore* const src) {

	507 dst->D += src->D;

	508 dst->SD += src->SD;

	509 dst->R += src->R;

	510 dst->H += src->H;

	511 dst->nz \|= src->nz; // here, new nz bits are accumulated.

	512 dst->score += src->score;

	513 }

	514

	515 //------------------------------------------------------------------------------

	516 // Performs trellis-optimized quantization.

	517

	518 // Trellis node

	519 typedef struct {

	520 int8_t prev; // best previous node

	521 int8_t sign; // sign of coeff_i

	522 int16_t level; // level

	523 } Node;

	524

	525 // Score state

	526 typedef struct {

	527 score_t score; // partial RD score

	528 const uint16_t* costs; // shortcut to cost tables

	529 } ScoreState;

	530

	531 // If a coefficient was quantized to a value Q (using a neutral bias),

	532 // we test all alternate possibilities between [Q-MIN_DELTA, Q+MAX_DELTA]

	533 // We don't test negative values though.

	534 #define MIN_DELTA 0 // how much lower level to try

	535 #define MAX_DELTA 1 // how much higher

	536 #define NUM_NODES (MIN_DELTA + 1 + MAX_DELTA)

	537 #define NODE(n, l) (nodes[(n)][(l) + MIN_DELTA])

	538 #define SCORE_STATE(n, l) (score_states[n][(l) + MIN_DELTA])

	539

	540 static WEBP_INLINE void SetRDScore(int lambda, VP8ModeScore* const rd) {

	541 // TODO: incorporate the "* 256" in the tables?

	542 rd->score = (rd->R + rd->H) * lambda + 256 * (rd->D + rd->SD);

	543 }

	544

	545 static WEBP_INLINE score_t RDScoreTrellis(int lambda, score_t rate,

	546 score_t distortion) {

	547 return rate * lambda + 256 * distortion;

	548 }

	549

	550 static int TrellisQuantizeBlock(const VP8Encoder* const enc,

	551 int16_t in[16], int16_t out[16],

	552 int ctx0, int coeff_type,

	553 const VP8Matrix* const mtx,

	554 int lambda) {

	555 const ProbaArray* const probas = enc->proba_.coeffs_[coeff_type];

	556 const CostArray* const costs = enc->proba_.level_cost_[coeff_type];

	557 const int first = (coeff_type == 0) ? 1 : 0;

	558 Node nodes[16][NUM_NODES];

	559 ScoreState score_states[2][NUM_NODES];

	560 ScoreState* ss_cur = &SCORE_STATE(0, MIN_DELTA);

	561 ScoreState* ss_prev = &SCORE_STATE(1, MIN_DELTA);

	562 int best_path[3] = {-1, -1, -1}; // store best-last/best-level/best-previous

	563 score_t best_score;

	564 int n, m, p, last;

	565

	566 {

	567 score_t cost;

	568 const int thresh = mtx->q_[1] * mtx->q_[1] / 4;

	569 const int last_proba = probas[VP8EncBands[first]][ctx0][0];

	570

	571 // compute the position of the last interesting coefficient

	572 last = first - 1;

	573 for (n = 15; n >= first; --n) {

	574 const int j = kZigzag[n];

	575 const int err = in[j] * in[j];

	576 if (err > thresh) {

	577 last = n;

	578 break;

	579 }

	580 }

	581 // we don't need to go inspect up to n = 16 coeffs. We can just go up

	582 // to last + 1 (inclusive) without losing much.

	583 if (last < 15) ++last;

	584

	585 // compute 'skip' score. This is the max score one can do.

	586 cost = VP8BitCost(0, last_proba);

	587 best_score = RDScoreTrellis(lambda, cost, 0);

	588

	589 // initialize source node.

	590 for (m = -MIN_DELTA; m <= MAX_DELTA; ++m) {

	591 const score_t rate = (ctx0 == 0) ? VP8BitCost(1, last_proba) : 0;

	592 ss_cur[m].score = RDScoreTrellis(lambda, rate, 0);

	593 ss_cur[m].costs = costs[VP8EncBands[first]][ctx0];

	594 }

	595 }

	596

	597 // traverse trellis.

	598 for (n = first; n <= last; ++n) {

	599 const int j = kZigzag[n];

	600 const uint32_t Q = mtx->q_[j];

	601 const uint32_t iQ = mtx->iq_[j];

	602 const uint32_t B = BIAS(0x00); // neutral bias

	603 // note: it's important to take sign of the _original_ coeff,

	604 // so we don't have to consider level < 0 afterward.

	605 const int sign = (in[j] < 0);

	606 const uint32_t coeff0 = (sign ? -in[j] : in[j]) + mtx->sharpen_[j];

	607 int level0 = QUANTDIV(coeff0, iQ, B);

	608 if (level0 > MAX_LEVEL) level0 = MAX_LEVEL;

	609

	610 { // Swap current and previous score states

	611 ScoreState* const tmp = ss_cur;

	612 ss_cur = ss_prev;

	613 ss_prev = tmp;

	614 }

	615

	616 // test all alternate level values around level0.

	617 for (m = -MIN_DELTA; m <= MAX_DELTA; ++m) {

	618 Node* const cur = &NODE(n, m);

	619 int level = level0 + m;

	620 const int ctx = (level > 2) ? 2 : level;

	621 const int band = VP8EncBands[n + 1];

	622 score_t base_score, last_pos_score;

	623 score_t best_cur_score = MAX_COST;

	624 int best_prev = 0; // default, in case

	625

	626 ss_cur[m].score = MAX_COST;

	627 ss_cur[m].costs = costs[band][ctx];

	628 if (level > MAX_LEVEL \|\| level < 0) { // node is dead?

	629 continue;

	630 }

	631

	632 // Compute extra rate cost if last coeff's position is < 15

	633 {

	634 const score_t last_pos_cost =

	635 (n < 15) ? VP8BitCost(0, probas[band][ctx][0]) : 0;

	636 last_pos_score = RDScoreTrellis(lambda, last_pos_cost, 0);

	637 }

	638

	639 {

	640 // Compute delta_error = how much coding this level will

	641 // subtract to max_error as distortion.

	642 // Here, distortion = sum of (\|coeff_i\| - level_i * Q_i)^2

	643 const int new_error = coeff0 - level * Q;

	644 const int delta_error =

	645 kWeightTrellis[j] * (new_error * new_error - coeff0 * coeff0);

	646 base_score = RDScoreTrellis(lambda, 0, delta_error);

	647 }

	648

	649 // Inspect all possible non-dead predecessors. Retain only the best one.

	650 for (p = -MIN_DELTA; p <= MAX_DELTA; ++p) {

	651 // Dead nodes (with ss_prev[p].score >= MAX_COST) are automatically

	652 // eliminated since their score can't be better than the current best.

	653 const score_t cost = VP8LevelCost(ss_prev[p].costs, level);

	654 // Examine node assuming it's a non-terminal one.

	655 const score_t score =

	656 base_score + ss_prev[p].score + RDScoreTrellis(lambda, cost, 0);

	657 if (score < best_cur_score) {

	658 best_cur_score = score;

	659 best_prev = p;

	660 }

	661 }

	662 // Store best finding in current node.

	663 cur->sign = sign;

	664 cur->level = level;

	665 cur->prev = best_prev;

	666 ss_cur[m].score = best_cur_score;

	667

	668 // Now, record best terminal node (and thus best entry in the graph).

	669 if (level != 0) {

	670 const score_t score = best_cur_score + last_pos_score;

	671 if (score < best_score) {

	672 best_score = score;

	673 best_path[0] = n; // best eob position

	674 best_path[1] = m; // best node index

	675 best_path[2] = best_prev; // best predecessor

	676 }

	677 }

	678 }

	679 }

	680

	681 // Fresh start

	682 memset(in + first, 0, (16 - first) * sizeof(*in));

	683 memset(out + first, 0, (16 - first) * sizeof(*out));

	684 if (best_path[0] == -1) {

	685 return 0; // skip!

	686 }

	687

	688 {

	689 // Unwind the best path.

	690 // Note: best-prev on terminal node is not necessarily equal to the

	691 // best_prev for non-terminal. So we patch best_path[2] in.

	692 int nz = 0;

	693 int best_node = best_path[1];

	694 n = best_path[0];

	695 NODE(n, best_node).prev = best_path[2]; // force best-prev for terminal

	696

	697 for (; n >= first; --n) {

	698 const Node* const node = &NODE(n, best_node);

	699 const int j = kZigzag[n];

	700 out[n] = node->sign ? -node->level : node->level;

	701 nz \|= node->level;

	702 in[j] = out[n] * mtx->q_[j];

	703 best_node = node->prev;

	704 }

	705 return (nz != 0);

	706 }

	707 }

	708

	709 #undef NODE

	710

	711 //------------------------------------------------------------------------------

	712 // Performs: difference, transform, quantize, back-transform, add

	713 // all at once. Output is the reconstructed block in *yuv_out, and the

	714 // quantized levels in *levels.

	715

	716 static int ReconstructIntra16(VP8EncIterator* const it,

	717 VP8ModeScore* const rd,

	718 uint8_t* const yuv_out,

	719 int mode) {

	720 const VP8Encoder* const enc = it->enc_;

	721 const uint8_t* const ref = it->yuv_p_ + VP8I16ModeOffsets[mode];

	722 const uint8_t* const src = it->yuv_in_ + Y_OFF;

	723 const VP8SegmentInfo* const dqm = &enc->dqm_[it->mb_->segment_];

	724 int nz = 0;

	725 int n;

	726 int16_t tmp[16][16], dc_tmp[16];

	727

	728 for (n = 0; n < 16; ++n) {

	729 VP8FTransform(src + VP8Scan[n], ref + VP8Scan[n], tmp[n]);

	730 }

	731 VP8FTransformWHT(tmp[0], dc_tmp);

	732 nz \|= VP8EncQuantizeBlockWHT(dc_tmp, rd->y_dc_levels, &dqm->y2_) << 24;

	733

	734 if (DO_TRELLIS_I16 && it->do_trellis_) {

	735 int x, y;

	736 VP8IteratorNzToBytes(it);

	737 for (y = 0, n = 0; y < 4; ++y) {

	738 for (x = 0; x < 4; ++x, ++n) {

	739 const int ctx = it->top_nz_[x] + it->left_nz_[y];

	740 const int non_zero =

	741 TrellisQuantizeBlock(enc, tmp[n], rd->y_ac_levels[n], ctx, 0,

	742 &dqm->y1_, dqm->lambda_trellis_i16_);

	743 it->top_nz_[x] = it->left_nz_[y] = non_zero;

	744 rd->y_ac_levels[n][0] = 0;

	745 nz \|= non_zero << n;

	746 }

	747 }

	748 } else {

	749 for (n = 0; n < 16; ++n) {

	750 // Zero-out the first coeff, so that: a) nz is correct below, and

	751 // b) finding 'last' non-zero coeffs in SetResidualCoeffs() is simplified.

	752 tmp[n][0] = 0;

	753 nz \|= VP8EncQuantizeBlock(tmp[n], rd->y_ac_levels[n], &dqm->y1_) << n;

	754 assert(rd->y_ac_levels[n][0] == 0);

	755 }

	756 }

	757

	758 // Transform back

	759 VP8TransformWHT(dc_tmp, tmp[0]);

	760 for (n = 0; n < 16; n += 2) {

	761 VP8ITransform(ref + VP8Scan[n], tmp[n], yuv_out + VP8Scan[n], 1);

	762 }

	763

	764 return nz;

	765 }

	766

	767 static int ReconstructIntra4(VP8EncIterator* const it,

	768 int16_t levels[16],

	769 const uint8_t* const src,

	770 uint8_t* const yuv_out,

	771 int mode) {

	772 const VP8Encoder* const enc = it->enc_;

	773 const uint8_t* const ref = it->yuv_p_ + VP8I4ModeOffsets[mode];

	774 const VP8SegmentInfo* const dqm = &enc->dqm_[it->mb_->segment_];

	775 int nz = 0;

	776 int16_t tmp[16];

	777

	778 VP8FTransform(src, ref, tmp);

	779 if (DO_TRELLIS_I4 && it->do_trellis_) {

	780 const int x = it->i4_ & 3, y = it->i4_ >> 2;

	781 const int ctx = it->top_nz_[x] + it->left_nz_[y];

	782 nz = TrellisQuantizeBlock(enc, tmp, levels, ctx, 3, &dqm->y1_,

	783 dqm->lambda_trellis_i4_);

	784 } else {

	785 nz = VP8EncQuantizeBlock(tmp, levels, &dqm->y1_);

	786 }

	787 VP8ITransform(ref, tmp, yuv_out, 0);

	788 return nz;

	789 }

	790

	791 static int ReconstructUV(VP8EncIterator* const it, VP8ModeScore* const rd,

	792 uint8_t* const yuv_out, int mode) {

	793 const VP8Encoder* const enc = it->enc_;

	794 const uint8_t* const ref = it->yuv_p_ + VP8UVModeOffsets[mode];

	795 const uint8_t* const src = it->yuv_in_ + U_OFF;

	796 const VP8SegmentInfo* const dqm = &enc->dqm_[it->mb_->segment_];

	797 int nz = 0;

	798 int n;

	799 int16_t tmp[8][16];

	800

	801 for (n = 0; n < 8; ++n) {

	802 VP8FTransform(src + VP8ScanUV[n], ref + VP8ScanUV[n], tmp[n]);

	803 }

	804 if (DO_TRELLIS_UV && it->do_trellis_) {

	805 int ch, x, y;

	806 for (ch = 0, n = 0; ch <= 2; ch += 2) {

	807 for (y = 0; y < 2; ++y) {

	808 for (x = 0; x < 2; ++x, ++n) {

	809 const int ctx = it->top_nz_[4 + ch + x] + it->left_nz_[4 + ch + y];

	810 const int non_zero =

	811 TrellisQuantizeBlock(enc, tmp[n], rd->uv_levels[n], ctx, 2,

	812 &dqm->uv_, dqm->lambda_trellis_uv_);

	813 it->top_nz_[4 + ch + x] = it->left_nz_[4 + ch + y] = non_zero;

	814 nz \|= non_zero << n;

	815 }

	816 }

	817 }

	818 } else {

	819 for (n = 0; n < 8; ++n) {

	820 nz \|= VP8EncQuantizeBlock(tmp[n], rd->uv_levels[n], &dqm->uv_) << n;

	821 }

	822 }

	823

	824 for (n = 0; n < 8; n += 2) {

	825 VP8ITransform(ref + VP8ScanUV[n], tmp[n], yuv_out + VP8ScanUV[n], 1);

	826 }

	827 return (nz << 16);

	828 }

	829

	830 //------------------------------------------------------------------------------

	831 // RD-opt decision. Reconstruct each modes, evalue distortion and bit-cost.

	832 // Pick the mode is lower RD-cost = Rate + lambda * Distortion.

	833

	834 static void StoreMaxDelta(VP8SegmentInfo* const dqm, const int16_t DCs[16]) {

	835 // We look at the first three AC coefficients to determine what is the average

	836 // delta between each sub-4x4 block.

	837 const int v0 = abs(DCs[1]);

	838 const int v1 = abs(DCs[4]);

	839 const int v2 = abs(DCs[5]);

	840 int max_v = (v0 > v1) ? v1 : v0;

	841 max_v = (v2 > max_v) ? v2 : max_v;

	842 if (max_v > dqm->max_edge_) dqm->max_edge_ = max_v;

	843 }

	844

	845 static void SwapPtr(uint8_t a, uint8_t b) {

	846 uint8_t* const tmp = *a;

	847 a = b;

	848 *b = tmp;

	849 }

	850

	851 static void SwapOut(VP8EncIterator* const it) {

	852 SwapPtr(&it->yuv_out_, &it->yuv_out2_);

	853 }

	854

	855 static score_t IsFlat(const int16_t* levels, int num_blocks, score_t thresh) {

	856 score_t score = 0;

	857 while (num_blocks-- > 0) { // TODO(skal): refine positional scoring?

	858 int i;

	859 for (i = 1; i < 16; ++i) { // omit DC, we're only interested in AC

	860 score += (levels[i] != 0);

	861 if (score > thresh) return 0;

	862 }

	863 levels += 16;

	864 }

	865 return 1;

	866 }

	867

	868 static void PickBestIntra16(VP8EncIterator* const it, VP8ModeScore* const rd) {

	869 const int kNumBlocks = 16;

	870 VP8SegmentInfo* const dqm = &it->enc_->dqm_[it->mb_->segment_];

	871 const int lambda = dqm->lambda_i16_;

	872 const int tlambda = dqm->tlambda_;

	873 const uint8_t* const src = it->yuv_in_ + Y_OFF;

	874 VP8ModeScore rd16;

	875 int mode;

	876

	877 rd->mode_i16 = -1;

	878 for (mode = 0; mode < NUM_PRED_MODES; ++mode) {

	879 uint8_t* const tmp_dst = it->yuv_out2_ + Y_OFF; // scratch buffer

	880 int nz;

	881

	882 // Reconstruct

	883 nz = ReconstructIntra16(it, &rd16, tmp_dst, mode);

	884

	885 // Measure RD-score

	886 rd16.D = VP8SSE16x16(src, tmp_dst);

	887 rd16.SD = tlambda ? MULT_8B(tlambda, VP8TDisto16x16(src, tmp_dst, kWeightY))

	888 : 0;

	889 rd16.H = VP8FixedCostsI16[mode];

	890 rd16.R = VP8GetCostLuma16(it, &rd16);

	891 if (mode > 0 &&

	892 IsFlat(rd16.y_ac_levels[0], kNumBlocks, FLATNESS_LIMIT_I16)) {

	893 // penalty to avoid flat area to be mispredicted by complex mode

	894 rd16.R += FLATNESS_PENALTY * kNumBlocks;

	895 }

	896

	897 // Since we always examine Intra16 first, we can overwrite *rd directly.

	898 SetRDScore(lambda, &rd16);

	899 if (mode == 0 \|\| rd16.score < rd->score) {

	900 CopyScore(rd, &rd16);

	901 rd->mode_i16 = mode;

	902 rd->nz = nz;

	903 memcpy(rd->y_ac_levels, rd16.y_ac_levels, sizeof(rd16.y_ac_levels));

	904 memcpy(rd->y_dc_levels, rd16.y_dc_levels, sizeof(rd16.y_dc_levels));

	905 SwapOut(it);

	906 }

	907 }

	908 SetRDScore(dqm->lambda_mode_, rd); // finalize score for mode decision.

	909 VP8SetIntra16Mode(it, rd->mode_i16);

	910

	911 // we have a blocky macroblock (only DCs are non-zero) with fairly high

	912 // distortion, record max delta so we can later adjust the minimal filtering

	913 // strength needed to smooth these blocks out.

	914 if ((rd->nz & 0xffff) == 0 && rd->D > dqm->min_disto_) {

	915 StoreMaxDelta(dqm, rd->y_dc_levels);

	916 }

	917 }

	918

	919 //------------------------------------------------------------------------------

	920

	921 // return the cost array corresponding to the surrounding prediction modes.

	922 static const uint16_t* GetCostModeI4(VP8EncIterator* const it,

	923 const uint8_t modes[16]) {

	924 const int preds_w = it->enc_->preds_w_;

	925 const int x = (it->i4_ & 3), y = it->i4_ >> 2;

	926 const int left = (x == 0) ? it->preds_[y * preds_w - 1] : modes[it->i4_ - 1];

	927 const int top = (y == 0) ? it->preds_[-preds_w + x] : modes[it->i4_ - 4];

	928 return VP8FixedCostsI4[top][left];

	929 }

	930

	931 static int PickBestIntra4(VP8EncIterator* const it, VP8ModeScore* const rd) {

	932 const VP8Encoder* const enc = it->enc_;

	933 const VP8SegmentInfo* const dqm = &enc->dqm_[it->mb_->segment_];

	934 const int lambda = dqm->lambda_i4_;

	935 const int tlambda = dqm->tlambda_;

	936 const uint8_t* const src0 = it->yuv_in_ + Y_OFF;

	937 uint8_t* const best_blocks = it->yuv_out2_ + Y_OFF;

	938 int total_header_bits = 0;

	939 VP8ModeScore rd_best;

	940

	941 if (enc->max_i4_header_bits_ == 0) {

	942 return 0;

	943 }

	944

	945 InitScore(&rd_best);

	946 rd_best.H = 211; // '211' is the value of VP8BitCost(0, 145)

	947 SetRDScore(dqm->lambda_mode_, &rd_best);

	948 VP8IteratorStartI4(it);

	949 do {

	950 const int kNumBlocks = 1;

	951 VP8ModeScore rd_i4;

	952 int mode;

	953 int best_mode = -1;

	954 const uint8_t* const src = src0 + VP8Scan[it->i4_];

	955 const uint16_t* const mode_costs = GetCostModeI4(it, rd->modes_i4);

	956 uint8_t* best_block = best_blocks + VP8Scan[it->i4_];

	957 uint8_t* tmp_dst = it->yuv_p_ + I4TMP; // scratch buffer.

	958

	959 InitScore(&rd_i4);

	960 VP8MakeIntra4Preds(it);

	961 for (mode = 0; mode < NUM_BMODES; ++mode) {

	962 VP8ModeScore rd_tmp;

	963 int16_t tmp_levels[16];

	964

	965 // Reconstruct

	966 rd_tmp.nz =

	967 ReconstructIntra4(it, tmp_levels, src, tmp_dst, mode) << it->i4_;

	968

	969 // Compute RD-score

	970 rd_tmp.D = VP8SSE4x4(src, tmp_dst);

	971 rd_tmp.SD =

	972 tlambda ? MULT_8B(tlambda, VP8TDisto4x4(src, tmp_dst, kWeightY))

	973 : 0;

	974 rd_tmp.H = mode_costs[mode];

	975 rd_tmp.R = VP8GetCostLuma4(it, tmp_levels);

	976 if (mode > 0 && IsFlat(tmp_levels, kNumBlocks, FLATNESS_LIMIT_I4)) {

	977 rd_tmp.R += FLATNESS_PENALTY * kNumBlocks;

	978 }

	979

	980 SetRDScore(lambda, &rd_tmp);

	981 if (best_mode < 0 \|\| rd_tmp.score < rd_i4.score) {

	982 CopyScore(&rd_i4, &rd_tmp);

	983 best_mode = mode;

	984 SwapPtr(&tmp_dst, &best_block);

	985 memcpy(rd_best.y_ac_levels[it->i4_], tmp_levels, sizeof(tmp_levels));

	986 }

	987 }

	988 SetRDScore(dqm->lambda_mode_, &rd_i4);

	989 AddScore(&rd_best, &rd_i4);

	990 if (rd_best.score >= rd->score) {

	991 return 0;

	992 }

	993 total_header_bits += (int)rd_i4.H; // <- equal to mode_costs[best_mode];

	994 if (total_header_bits > enc->max_i4_header_bits_) {

	995 return 0;

	996 }

	997 // Copy selected samples if not in the right place already.

	998 if (best_block != best_blocks + VP8Scan[it->i4_]) {

	999 VP8Copy4x4(best_block, best_blocks + VP8Scan[it->i4_]);

	1000 }

	1001 rd->modes_i4[it->i4_] = best_mode;

	1002 it->top_nz_[it->i4_ & 3] = it->left_nz_[it->i4_ >> 2] = (rd_i4.nz ? 1 : 0);

	1003 } while (VP8IteratorRotateI4(it, best_blocks));

	1004

	1005 // finalize state

	1006 CopyScore(rd, &rd_best);

	1007 VP8SetIntra4Mode(it, rd->modes_i4);

	1008 SwapOut(it);

	1009 memcpy(rd->y_ac_levels, rd_best.y_ac_levels, sizeof(rd->y_ac_levels));

	1010 return 1; // select intra4x4 over intra16x16

	1011 }

	1012

	1013 //------------------------------------------------------------------------------

	1014

	1015 static void PickBestUV(VP8EncIterator* const it, VP8ModeScore* const rd) {

	1016 const int kNumBlocks = 8;

	1017 const VP8SegmentInfo* const dqm = &it->enc_->dqm_[it->mb_->segment_];

	1018 const int lambda = dqm->lambda_uv_;

	1019 const uint8_t* const src = it->yuv_in_ + U_OFF;

	1020 uint8_t* const tmp_dst = it->yuv_out2_ + U_OFF; // scratch buffer

	1021 uint8_t* const dst0 = it->yuv_out_ + U_OFF;

	1022 VP8ModeScore rd_best;

	1023 int mode;

	1024

	1025 rd->mode_uv = -1;

	1026 InitScore(&rd_best);

	1027 for (mode = 0; mode < NUM_PRED_MODES; ++mode) {

	1028 VP8ModeScore rd_uv;

	1029

	1030 // Reconstruct

	1031 rd_uv.nz = ReconstructUV(it, &rd_uv, tmp_dst, mode);

	1032

	1033 // Compute RD-score

	1034 rd_uv.D = VP8SSE16x8(src, tmp_dst);

	1035 rd_uv.SD = 0; // TODO: should we call TDisto? it tends to flatten areas.

	1036 rd_uv.H = VP8FixedCostsUV[mode];

	1037 rd_uv.R = VP8GetCostUV(it, &rd_uv);

	1038 if (mode > 0 && IsFlat(rd_uv.uv_levels[0], kNumBlocks, FLATNESS_LIMIT_UV)) {

	1039 rd_uv.R += FLATNESS_PENALTY * kNumBlocks;

	1040 }

	1041

	1042 SetRDScore(lambda, &rd_uv);

	1043 if (mode == 0 \|\| rd_uv.score < rd_best.score) {

	1044 CopyScore(&rd_best, &rd_uv);

	1045 rd->mode_uv = mode;

	1046 memcpy(rd->uv_levels, rd_uv.uv_levels, sizeof(rd->uv_levels));

	1047 memcpy(dst0, tmp_dst, UV_SIZE); // TODO: SwapUVOut() ?

	1048 }

	1049 }

	1050 VP8SetIntraUVMode(it, rd->mode_uv);

	1051 AddScore(rd, &rd_best);

	1052 }

	1053

	1054 //------------------------------------------------------------------------------

	1055 // Final reconstruction and quantization.

	1056

	1057 static void SimpleQuantize(VP8EncIterator* const it, VP8ModeScore* const rd) {

	1058 const VP8Encoder* const enc = it->enc_;

	1059 const int is_i16 = (it->mb_->type_ == 1);

	1060 int nz = 0;

	1061

	1062 if (is_i16) {

	1063 nz = ReconstructIntra16(it, rd, it->yuv_out_ + Y_OFF, it->preds_[0]);

	1064 } else {

	1065 VP8IteratorStartI4(it);

	1066 do {

	1067 const int mode =

	1068 it->preds_[(it->i4_ & 3) + (it->i4_ >> 2) * enc->preds_w_];

	1069 const uint8_t* const src = it->yuv_in_ + Y_OFF + VP8Scan[it->i4_];

	1070 uint8_t* const dst = it->yuv_out_ + Y_OFF + VP8Scan[it->i4_];

	1071 VP8MakeIntra4Preds(it);

	1072 nz \|= ReconstructIntra4(it, rd->y_ac_levels[it->i4_],

	1073 src, dst, mode) << it->i4_;

	1074 } while (VP8IteratorRotateI4(it, it->yuv_out_ + Y_OFF));

	1075 }

	1076

	1077 nz \|= ReconstructUV(it, rd, it->yuv_out_ + U_OFF, it->mb_->uv_mode_);

	1078 rd->nz = nz;

	1079 }

	1080

	1081 // Refine intra16/intra4 sub-modes based on distortion only (not rate).

	1082 static void DistoRefine(VP8EncIterator* const it, int try_both_i4_i16) {

	1083 const int is_i16 = (it->mb_->type_ == 1);

	1084 score_t best_score = MAX_COST;

	1085

	1086 if (try_both_i4_i16 \|\| is_i16) {

	1087 int mode;

	1088 int best_mode = -1;

	1089 for (mode = 0; mode < NUM_PRED_MODES; ++mode) {

	1090 const uint8_t* const ref = it->yuv_p_ + VP8I16ModeOffsets[mode];

	1091 const uint8_t* const src = it->yuv_in_ + Y_OFF;

	1092 const score_t score = VP8SSE16x16(src, ref);

	1093 if (score < best_score) {

	1094 best_mode = mode;

	1095 best_score = score;

	1096 }

	1097 }

	1098 VP8SetIntra16Mode(it, best_mode);

	1099 }

	1100 if (try_both_i4_i16 \|\| !is_i16) {

	1101 uint8_t modes_i4[16];

	1102 // We don't evaluate the rate here, but just account for it through a

	1103 // constant penalty (i4 mode usually needs more bits compared to i16).

	1104 score_t score_i4 = (score_t)I4_PENALTY;

	1105

	1106 VP8IteratorStartI4(it);

	1107 do {

	1108 int mode;

	1109 int best_sub_mode = -1;

	1110 score_t best_sub_score = MAX_COST;

	1111 const uint8_t* const src = it->yuv_in_ + Y_OFF + VP8Scan[it->i4_];

	1112

	1113 // TODO(skal): we don't really need the prediction pixels here,

	1114 // but just the distortion against 'src'.

	1115 VP8MakeIntra4Preds(it);

	1116 for (mode = 0; mode < NUM_BMODES; ++mode) {

	1117 const uint8_t* const ref = it->yuv_p_ + VP8I4ModeOffsets[mode];

	1118 const score_t score = VP8SSE4x4(src, ref);

	1119 if (score < best_sub_score) {

	1120 best_sub_mode = mode;

	1121 best_sub_score = score;

	1122 }

	1123 }

	1124 modes_i4[it->i4_] = best_sub_mode;

	1125 score_i4 += best_sub_score;

	1126 if (score_i4 >= best_score) break;

	1127 } while (VP8IteratorRotateI4(it, it->yuv_in_ + Y_OFF));

	1128 if (score_i4 < best_score) {

	1129 VP8SetIntra4Mode(it, modes_i4);

	1130 }

	1131 }

	1132 }

	1133

	1134 //------------------------------------------------------------------------------

	1135 // Entry point

	1136

	1137 int VP8Decimate(VP8EncIterator* const it, VP8ModeScore* const rd,

	1138 VP8RDLevel rd_opt) {

	1139 int is_skipped;

	1140 const int method = it->enc_->method_;

	1141

	1142 InitScore(rd);

	1143

	1144 // We can perform predictions for Luma16x16 and Chroma8x8 already.

	1145 // Luma4x4 predictions needs to be done as-we-go.

	1146 VP8MakeLuma16Preds(it);

	1147 VP8MakeChroma8Preds(it);

	1148

	1149 if (rd_opt > RD_OPT_NONE) {

	1150 it->do_trellis_ = (rd_opt >= RD_OPT_TRELLIS_ALL);

	1151 PickBestIntra16(it, rd);

	1152 if (method >= 2) {

	1153 PickBestIntra4(it, rd);

	1154 }

	1155 PickBestUV(it, rd);

	1156 if (rd_opt == RD_OPT_TRELLIS) { // finish off with trellis-optim now

	1157 it->do_trellis_ = 1;

	1158 SimpleQuantize(it, rd);

	1159 }

	1160 } else {

	1161 // For method == 2, pick the best intra4/intra16 based on SSE (~tad slower).

	1162 // For method <= 1, we refine intra4 or intra16 (but don't re-examine mode).

	1163 DistoRefine(it, (method >= 2));

	1164 SimpleQuantize(it, rd);

	1165 }

	1166 is_skipped = (rd->nz == 0);

	1167 VP8SetSkip(it, is_skipped);

	1168 return is_skipped;

	1169 }

	1170

OLD	NEW

« src/codec/SkWebpCodec.cpp ('K') | « third_party/libwebp/enc/picture_tools.c ('k') | third_party/libwebp/enc/syntax.c » ('j') | no next file with comments »