Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(1262)

Side by Side Diff: third_party/libwebp/enc/quant.c

Issue 2651883004: libwebp-0.6.0-rc1 (Closed)
Patch Set: Created 3 years, 11 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
« no previous file with comments | « third_party/libwebp/enc/predictor_enc.c ('k') | third_party/libwebp/enc/quant_enc.c » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
(Empty)
1 // Copyright 2011 Google Inc. All Rights Reserved.
2 //
3 // Use of this source code is governed by a BSD-style license
4 // that can be found in the COPYING file in the root of the source
5 // tree. An additional intellectual property rights grant can be found
6 // in the file PATENTS. All contributing project authors may
7 // be found in the AUTHORS file in the root of the source tree.
8 // -----------------------------------------------------------------------------
9 //
10 // Quantization
11 //
12 // Author: Skal (pascal.massimino@gmail.com)
13
14 #include <assert.h>
15 #include <math.h>
16 #include <stdlib.h> // for abs()
17
18 #include "./vp8enci.h"
19 #include "./cost.h"
20
21 #define DO_TRELLIS_I4 1
22 #define DO_TRELLIS_I16 1 // not a huge gain, but ok at low bitrate.
23 #define DO_TRELLIS_UV 0 // disable trellis for UV. Risky. Not worth.
24 #define USE_TDISTO 1
25
26 #define MID_ALPHA 64 // neutral value for susceptibility
27 #define MIN_ALPHA 30 // lowest usable value for susceptibility
28 #define MAX_ALPHA 100 // higher meaningful value for susceptibility
29
30 #define SNS_TO_DQ 0.9 // Scaling constant between the sns value and the QP
31 // power-law modulation. Must be strictly less than 1.
32
33 // number of non-zero coeffs below which we consider the block very flat
34 // (and apply a penalty to complex predictions)
35 #define FLATNESS_LIMIT_I16 10 // I16 mode
36 #define FLATNESS_LIMIT_I4 3 // I4 mode
37 #define FLATNESS_LIMIT_UV 2 // UV mode
38 #define FLATNESS_PENALTY 140 // roughly ~1bit per block
39
40 #define MULT_8B(a, b) (((a) * (b) + 128) >> 8)
41
42 #define RD_DISTO_MULT 256 // distortion multiplier (equivalent of lambda)
43
44 // #define DEBUG_BLOCK
45
46 //------------------------------------------------------------------------------
47
48 #if defined(DEBUG_BLOCK)
49
50 #include <stdio.h>
51 #include <stdlib.h>
52
53 static void PrintBlockInfo(const VP8EncIterator* const it,
54 const VP8ModeScore* const rd) {
55 int i, j;
56 const int is_i16 = (it->mb_->type_ == 1);
57 const uint8_t* const y_in = it->yuv_in_ + Y_OFF_ENC;
58 const uint8_t* const y_out = it->yuv_out_ + Y_OFF_ENC;
59 const uint8_t* const uv_in = it->yuv_in_ + U_OFF_ENC;
60 const uint8_t* const uv_out = it->yuv_out_ + U_OFF_ENC;
61 printf("SOURCE / OUTPUT / ABS DELTA\n");
62 for (j = 0; j < 16; ++j) {
63 for (i = 0; i < 16; ++i) printf("%3d ", y_in[i + j * BPS]);
64 printf(" ");
65 for (i = 0; i < 16; ++i) printf("%3d ", y_out[i + j * BPS]);
66 printf(" ");
67 for (i = 0; i < 16; ++i) {
68 printf("%1d ", abs(y_in[i + j * BPS] - y_out[i + j * BPS]));
69 }
70 printf("\n");
71 }
72 printf("\n"); // newline before the U/V block
73 for (j = 0; j < 8; ++j) {
74 for (i = 0; i < 8; ++i) printf("%3d ", uv_in[i + j * BPS]);
75 printf(" ");
76 for (i = 8; i < 16; ++i) printf("%3d ", uv_in[i + j * BPS]);
77 printf(" ");
78 for (i = 0; i < 8; ++i) printf("%3d ", uv_out[i + j * BPS]);
79 printf(" ");
80 for (i = 8; i < 16; ++i) printf("%3d ", uv_out[i + j * BPS]);
81 printf(" ");
82 for (i = 0; i < 8; ++i) {
83 printf("%1d ", abs(uv_out[i + j * BPS] - uv_in[i + j * BPS]));
84 }
85 printf(" ");
86 for (i = 8; i < 16; ++i) {
87 printf("%1d ", abs(uv_out[i + j * BPS] - uv_in[i + j * BPS]));
88 }
89 printf("\n");
90 }
91 printf("\nD:%d SD:%d R:%d H:%d nz:0x%x score:%d\n",
92 (int)rd->D, (int)rd->SD, (int)rd->R, (int)rd->H, (int)rd->nz,
93 (int)rd->score);
94 if (is_i16) {
95 printf("Mode: %d\n", rd->mode_i16);
96 printf("y_dc_levels:");
97 for (i = 0; i < 16; ++i) printf("%3d ", rd->y_dc_levels[i]);
98 printf("\n");
99 } else {
100 printf("Modes[16]: ");
101 for (i = 0; i < 16; ++i) printf("%d ", rd->modes_i4[i]);
102 printf("\n");
103 }
104 printf("y_ac_levels:\n");
105 for (j = 0; j < 16; ++j) {
106 for (i = is_i16 ? 1 : 0; i < 16; ++i) {
107 printf("%4d ", rd->y_ac_levels[j][i]);
108 }
109 printf("\n");
110 }
111 printf("\n");
112 printf("uv_levels (mode=%d):\n", rd->mode_uv);
113 for (j = 0; j < 8; ++j) {
114 for (i = 0; i < 16; ++i) {
115 printf("%4d ", rd->uv_levels[j][i]);
116 }
117 printf("\n");
118 }
119 }
120
121 #endif // DEBUG_BLOCK
122
123 //------------------------------------------------------------------------------
124
125 static WEBP_INLINE int clip(int v, int m, int M) {
126 return v < m ? m : v > M ? M : v;
127 }
128
129 static const uint8_t kZigzag[16] = {
130 0, 1, 4, 8, 5, 2, 3, 6, 9, 12, 13, 10, 7, 11, 14, 15
131 };
132
133 static const uint8_t kDcTable[128] = {
134 4, 5, 6, 7, 8, 9, 10, 10,
135 11, 12, 13, 14, 15, 16, 17, 17,
136 18, 19, 20, 20, 21, 21, 22, 22,
137 23, 23, 24, 25, 25, 26, 27, 28,
138 29, 30, 31, 32, 33, 34, 35, 36,
139 37, 37, 38, 39, 40, 41, 42, 43,
140 44, 45, 46, 46, 47, 48, 49, 50,
141 51, 52, 53, 54, 55, 56, 57, 58,
142 59, 60, 61, 62, 63, 64, 65, 66,
143 67, 68, 69, 70, 71, 72, 73, 74,
144 75, 76, 76, 77, 78, 79, 80, 81,
145 82, 83, 84, 85, 86, 87, 88, 89,
146 91, 93, 95, 96, 98, 100, 101, 102,
147 104, 106, 108, 110, 112, 114, 116, 118,
148 122, 124, 126, 128, 130, 132, 134, 136,
149 138, 140, 143, 145, 148, 151, 154, 157
150 };
151
152 static const uint16_t kAcTable[128] = {
153 4, 5, 6, 7, 8, 9, 10, 11,
154 12, 13, 14, 15, 16, 17, 18, 19,
155 20, 21, 22, 23, 24, 25, 26, 27,
156 28, 29, 30, 31, 32, 33, 34, 35,
157 36, 37, 38, 39, 40, 41, 42, 43,
158 44, 45, 46, 47, 48, 49, 50, 51,
159 52, 53, 54, 55, 56, 57, 58, 60,
160 62, 64, 66, 68, 70, 72, 74, 76,
161 78, 80, 82, 84, 86, 88, 90, 92,
162 94, 96, 98, 100, 102, 104, 106, 108,
163 110, 112, 114, 116, 119, 122, 125, 128,
164 131, 134, 137, 140, 143, 146, 149, 152,
165 155, 158, 161, 164, 167, 170, 173, 177,
166 181, 185, 189, 193, 197, 201, 205, 209,
167 213, 217, 221, 225, 229, 234, 239, 245,
168 249, 254, 259, 264, 269, 274, 279, 284
169 };
170
171 static const uint16_t kAcTable2[128] = {
172 8, 8, 9, 10, 12, 13, 15, 17,
173 18, 20, 21, 23, 24, 26, 27, 29,
174 31, 32, 34, 35, 37, 38, 40, 41,
175 43, 44, 46, 48, 49, 51, 52, 54,
176 55, 57, 58, 60, 62, 63, 65, 66,
177 68, 69, 71, 72, 74, 75, 77, 79,
178 80, 82, 83, 85, 86, 88, 89, 93,
179 96, 99, 102, 105, 108, 111, 114, 117,
180 120, 124, 127, 130, 133, 136, 139, 142,
181 145, 148, 151, 155, 158, 161, 164, 167,
182 170, 173, 176, 179, 184, 189, 193, 198,
183 203, 207, 212, 217, 221, 226, 230, 235,
184 240, 244, 249, 254, 258, 263, 268, 274,
185 280, 286, 292, 299, 305, 311, 317, 323,
186 330, 336, 342, 348, 354, 362, 370, 379,
187 385, 393, 401, 409, 416, 424, 432, 440
188 };
189
190 static const uint8_t kBiasMatrices[3][2] = { // [luma-ac,luma-dc,chroma][dc,ac]
191 { 96, 110 }, { 96, 108 }, { 110, 115 }
192 };
193
194 // Sharpening by (slightly) raising the hi-frequency coeffs.
195 // Hack-ish but helpful for mid-bitrate range. Use with care.
196 #define SHARPEN_BITS 11 // number of descaling bits for sharpening bias
197 static const uint8_t kFreqSharpening[16] = {
198 0, 30, 60, 90,
199 30, 60, 90, 90,
200 60, 90, 90, 90,
201 90, 90, 90, 90
202 };
203
204 //------------------------------------------------------------------------------
205 // Initialize quantization parameters in VP8Matrix
206
207 // Returns the average quantizer
208 static int ExpandMatrix(VP8Matrix* const m, int type) {
209 int i, sum;
210 for (i = 0; i < 2; ++i) {
211 const int is_ac_coeff = (i > 0);
212 const int bias = kBiasMatrices[type][is_ac_coeff];
213 m->iq_[i] = (1 << QFIX) / m->q_[i];
214 m->bias_[i] = BIAS(bias);
215 // zthresh_ is the exact value such that QUANTDIV(coeff, iQ, B) is:
216 // * zero if coeff <= zthresh
217 // * non-zero if coeff > zthresh
218 m->zthresh_[i] = ((1 << QFIX) - 1 - m->bias_[i]) / m->iq_[i];
219 }
220 for (i = 2; i < 16; ++i) {
221 m->q_[i] = m->q_[1];
222 m->iq_[i] = m->iq_[1];
223 m->bias_[i] = m->bias_[1];
224 m->zthresh_[i] = m->zthresh_[1];
225 }
226 for (sum = 0, i = 0; i < 16; ++i) {
227 if (type == 0) { // we only use sharpening for AC luma coeffs
228 m->sharpen_[i] = (kFreqSharpening[i] * m->q_[i]) >> SHARPEN_BITS;
229 } else {
230 m->sharpen_[i] = 0;
231 }
232 sum += m->q_[i];
233 }
234 return (sum + 8) >> 4;
235 }
236
237 static void CheckLambdaValue(int* const v) { if (*v < 1) *v = 1; }
238
239 static void SetupMatrices(VP8Encoder* enc) {
240 int i;
241 const int tlambda_scale =
242 (enc->method_ >= 4) ? enc->config_->sns_strength
243 : 0;
244 const int num_segments = enc->segment_hdr_.num_segments_;
245 for (i = 0; i < num_segments; ++i) {
246 VP8SegmentInfo* const m = &enc->dqm_[i];
247 const int q = m->quant_;
248 int q_i4, q_i16, q_uv;
249 m->y1_.q_[0] = kDcTable[clip(q + enc->dq_y1_dc_, 0, 127)];
250 m->y1_.q_[1] = kAcTable[clip(q, 0, 127)];
251
252 m->y2_.q_[0] = kDcTable[ clip(q + enc->dq_y2_dc_, 0, 127)] * 2;
253 m->y2_.q_[1] = kAcTable2[clip(q + enc->dq_y2_ac_, 0, 127)];
254
255 m->uv_.q_[0] = kDcTable[clip(q + enc->dq_uv_dc_, 0, 117)];
256 m->uv_.q_[1] = kAcTable[clip(q + enc->dq_uv_ac_, 0, 127)];
257
258 q_i4 = ExpandMatrix(&m->y1_, 0);
259 q_i16 = ExpandMatrix(&m->y2_, 1);
260 q_uv = ExpandMatrix(&m->uv_, 2);
261
262 m->lambda_i4_ = (3 * q_i4 * q_i4) >> 7;
263 m->lambda_i16_ = (3 * q_i16 * q_i16);
264 m->lambda_uv_ = (3 * q_uv * q_uv) >> 6;
265 m->lambda_mode_ = (1 * q_i4 * q_i4) >> 7;
266 m->lambda_trellis_i4_ = (7 * q_i4 * q_i4) >> 3;
267 m->lambda_trellis_i16_ = (q_i16 * q_i16) >> 2;
268 m->lambda_trellis_uv_ = (q_uv * q_uv) << 1;
269 m->tlambda_ = (tlambda_scale * q_i4) >> 5;
270
271 // none of these constants should be < 1
272 CheckLambdaValue(&m->lambda_i4_);
273 CheckLambdaValue(&m->lambda_i16_);
274 CheckLambdaValue(&m->lambda_uv_);
275 CheckLambdaValue(&m->lambda_mode_);
276 CheckLambdaValue(&m->lambda_trellis_i4_);
277 CheckLambdaValue(&m->lambda_trellis_i16_);
278 CheckLambdaValue(&m->lambda_trellis_uv_);
279 CheckLambdaValue(&m->tlambda_);
280
281 m->min_disto_ = 20 * m->y1_.q_[0]; // quantization-aware min disto
282 m->max_edge_ = 0;
283
284 m->i4_penalty_ = 1000 * q_i4 * q_i4;
285 }
286 }
287
288 //------------------------------------------------------------------------------
289 // Initialize filtering parameters
290
291 // Very small filter-strength values have close to no visual effect. So we can
292 // save a little decoding-CPU by turning filtering off for these.
293 #define FSTRENGTH_CUTOFF 2
294
295 static void SetupFilterStrength(VP8Encoder* const enc) {
296 int i;
297 // level0 is in [0..500]. Using '-f 50' as filter_strength is mid-filtering.
298 const int level0 = 5 * enc->config_->filter_strength;
299 for (i = 0; i < NUM_MB_SEGMENTS; ++i) {
300 VP8SegmentInfo* const m = &enc->dqm_[i];
301 // We focus on the quantization of AC coeffs.
302 const int qstep = kAcTable[clip(m->quant_, 0, 127)] >> 2;
303 const int base_strength =
304 VP8FilterStrengthFromDelta(enc->filter_hdr_.sharpness_, qstep);
305 // Segments with lower complexity ('beta') will be less filtered.
306 const int f = base_strength * level0 / (256 + m->beta_);
307 m->fstrength_ = (f < FSTRENGTH_CUTOFF) ? 0 : (f > 63) ? 63 : f;
308 }
309 // We record the initial strength (mainly for the case of 1-segment only).
310 enc->filter_hdr_.level_ = enc->dqm_[0].fstrength_;
311 enc->filter_hdr_.simple_ = (enc->config_->filter_type == 0);
312 enc->filter_hdr_.sharpness_ = enc->config_->filter_sharpness;
313 }
314
315 //------------------------------------------------------------------------------
316
317 // Note: if you change the values below, remember that the max range
318 // allowed by the syntax for DQ_UV is [-16,16].
319 #define MAX_DQ_UV (6)
320 #define MIN_DQ_UV (-4)
321
322 // We want to emulate jpeg-like behaviour where the expected "good" quality
323 // is around q=75. Internally, our "good" middle is around c=50. So we
324 // map accordingly using linear piece-wise function
325 static double QualityToCompression(double c) {
326 const double linear_c = (c < 0.75) ? c * (2. / 3.) : 2. * c - 1.;
327 // The file size roughly scales as pow(quantizer, 3.). Actually, the
328 // exponent is somewhere between 2.8 and 3.2, but we're mostly interested
329 // in the mid-quant range. So we scale the compressibility inversely to
330 // this power-law: quant ~= compression ^ 1/3. This law holds well for
331 // low quant. Finer modeling for high-quant would make use of kAcTable[]
332 // more explicitly.
333 const double v = pow(linear_c, 1 / 3.);
334 return v;
335 }
336
337 static double QualityToJPEGCompression(double c, double alpha) {
338 // We map the complexity 'alpha' and quality setting 'c' to a compression
339 // exponent empirically matched to the compression curve of libjpeg6b.
340 // On average, the WebP output size will be roughly similar to that of a
341 // JPEG file compressed with same quality factor.
342 const double amin = 0.30;
343 const double amax = 0.85;
344 const double exp_min = 0.4;
345 const double exp_max = 0.9;
346 const double slope = (exp_min - exp_max) / (amax - amin);
347 // Linearly interpolate 'expn' from exp_min to exp_max
348 // in the [amin, amax] range.
349 const double expn = (alpha > amax) ? exp_min
350 : (alpha < amin) ? exp_max
351 : exp_max + slope * (alpha - amin);
352 const double v = pow(c, expn);
353 return v;
354 }
355
356 static int SegmentsAreEquivalent(const VP8SegmentInfo* const S1,
357 const VP8SegmentInfo* const S2) {
358 return (S1->quant_ == S2->quant_) && (S1->fstrength_ == S2->fstrength_);
359 }
360
361 static void SimplifySegments(VP8Encoder* const enc) {
362 int map[NUM_MB_SEGMENTS] = { 0, 1, 2, 3 };
363 // 'num_segments_' is previously validated and <= NUM_MB_SEGMENTS, but an
364 // explicit check is needed to avoid a spurious warning about 'i' exceeding
365 // array bounds of 'dqm_' with some compilers (noticed with gcc-4.9).
366 const int num_segments = (enc->segment_hdr_.num_segments_ < NUM_MB_SEGMENTS)
367 ? enc->segment_hdr_.num_segments_
368 : NUM_MB_SEGMENTS;
369 int num_final_segments = 1;
370 int s1, s2;
371 for (s1 = 1; s1 < num_segments; ++s1) { // find similar segments
372 const VP8SegmentInfo* const S1 = &enc->dqm_[s1];
373 int found = 0;
374 // check if we already have similar segment
375 for (s2 = 0; s2 < num_final_segments; ++s2) {
376 const VP8SegmentInfo* const S2 = &enc->dqm_[s2];
377 if (SegmentsAreEquivalent(S1, S2)) {
378 found = 1;
379 break;
380 }
381 }
382 map[s1] = s2;
383 if (!found) {
384 if (num_final_segments != s1) {
385 enc->dqm_[num_final_segments] = enc->dqm_[s1];
386 }
387 ++num_final_segments;
388 }
389 }
390 if (num_final_segments < num_segments) { // Remap
391 int i = enc->mb_w_ * enc->mb_h_;
392 while (i-- > 0) enc->mb_info_[i].segment_ = map[enc->mb_info_[i].segment_];
393 enc->segment_hdr_.num_segments_ = num_final_segments;
394 // Replicate the trailing segment infos (it's mostly cosmetics)
395 for (i = num_final_segments; i < num_segments; ++i) {
396 enc->dqm_[i] = enc->dqm_[num_final_segments - 1];
397 }
398 }
399 }
400
401 void VP8SetSegmentParams(VP8Encoder* const enc, float quality) {
402 int i;
403 int dq_uv_ac, dq_uv_dc;
404 const int num_segments = enc->segment_hdr_.num_segments_;
405 const double amp = SNS_TO_DQ * enc->config_->sns_strength / 100. / 128.;
406 const double Q = quality / 100.;
407 const double c_base = enc->config_->emulate_jpeg_size ?
408 QualityToJPEGCompression(Q, enc->alpha_ / 255.) :
409 QualityToCompression(Q);
410 for (i = 0; i < num_segments; ++i) {
411 // We modulate the base coefficient to accommodate for the quantization
412 // susceptibility and allow denser segments to be quantized more.
413 const double expn = 1. - amp * enc->dqm_[i].alpha_;
414 const double c = pow(c_base, expn);
415 const int q = (int)(127. * (1. - c));
416 assert(expn > 0.);
417 enc->dqm_[i].quant_ = clip(q, 0, 127);
418 }
419
420 // purely indicative in the bitstream (except for the 1-segment case)
421 enc->base_quant_ = enc->dqm_[0].quant_;
422
423 // fill-in values for the unused segments (required by the syntax)
424 for (i = num_segments; i < NUM_MB_SEGMENTS; ++i) {
425 enc->dqm_[i].quant_ = enc->base_quant_;
426 }
427
428 // uv_alpha_ is normally spread around ~60. The useful range is
429 // typically ~30 (quite bad) to ~100 (ok to decimate UV more).
430 // We map it to the safe maximal range of MAX/MIN_DQ_UV for dq_uv.
431 dq_uv_ac = (enc->uv_alpha_ - MID_ALPHA) * (MAX_DQ_UV - MIN_DQ_UV)
432 / (MAX_ALPHA - MIN_ALPHA);
433 // we rescale by the user-defined strength of adaptation
434 dq_uv_ac = dq_uv_ac * enc->config_->sns_strength / 100;
435 // and make it safe.
436 dq_uv_ac = clip(dq_uv_ac, MIN_DQ_UV, MAX_DQ_UV);
437 // We also boost the dc-uv-quant a little, based on sns-strength, since
438 // U/V channels are quite more reactive to high quants (flat DC-blocks
439 // tend to appear, and are unpleasant).
440 dq_uv_dc = -4 * enc->config_->sns_strength / 100;
441 dq_uv_dc = clip(dq_uv_dc, -15, 15); // 4bit-signed max allowed
442
443 enc->dq_y1_dc_ = 0; // TODO(skal): dq-lum
444 enc->dq_y2_dc_ = 0;
445 enc->dq_y2_ac_ = 0;
446 enc->dq_uv_dc_ = dq_uv_dc;
447 enc->dq_uv_ac_ = dq_uv_ac;
448
449 SetupFilterStrength(enc); // initialize segments' filtering, eventually
450
451 if (num_segments > 1) SimplifySegments(enc);
452
453 SetupMatrices(enc); // finalize quantization matrices
454 }
455
456 //------------------------------------------------------------------------------
457 // Form the predictions in cache
458
459 // Must be ordered using {DC_PRED, TM_PRED, V_PRED, H_PRED} as index
460 const int VP8I16ModeOffsets[4] = { I16DC16, I16TM16, I16VE16, I16HE16 };
461 const int VP8UVModeOffsets[4] = { C8DC8, C8TM8, C8VE8, C8HE8 };
462
463 // Must be indexed using {B_DC_PRED -> B_HU_PRED} as index
464 const int VP8I4ModeOffsets[NUM_BMODES] = {
465 I4DC4, I4TM4, I4VE4, I4HE4, I4RD4, I4VR4, I4LD4, I4VL4, I4HD4, I4HU4
466 };
467
468 void VP8MakeLuma16Preds(const VP8EncIterator* const it) {
469 const uint8_t* const left = it->x_ ? it->y_left_ : NULL;
470 const uint8_t* const top = it->y_ ? it->y_top_ : NULL;
471 VP8EncPredLuma16(it->yuv_p_, left, top);
472 }
473
474 void VP8MakeChroma8Preds(const VP8EncIterator* const it) {
475 const uint8_t* const left = it->x_ ? it->u_left_ : NULL;
476 const uint8_t* const top = it->y_ ? it->uv_top_ : NULL;
477 VP8EncPredChroma8(it->yuv_p_, left, top);
478 }
479
480 void VP8MakeIntra4Preds(const VP8EncIterator* const it) {
481 VP8EncPredLuma4(it->yuv_p_, it->i4_top_);
482 }
483
484 //------------------------------------------------------------------------------
485 // Quantize
486
487 // Layout:
488 // +----+----+
489 // |YYYY|UUVV| 0
490 // |YYYY|UUVV| 4
491 // |YYYY|....| 8
492 // |YYYY|....| 12
493 // +----+----+
494
495 const int VP8Scan[16] = { // Luma
496 0 + 0 * BPS, 4 + 0 * BPS, 8 + 0 * BPS, 12 + 0 * BPS,
497 0 + 4 * BPS, 4 + 4 * BPS, 8 + 4 * BPS, 12 + 4 * BPS,
498 0 + 8 * BPS, 4 + 8 * BPS, 8 + 8 * BPS, 12 + 8 * BPS,
499 0 + 12 * BPS, 4 + 12 * BPS, 8 + 12 * BPS, 12 + 12 * BPS,
500 };
501
502 static const int VP8ScanUV[4 + 4] = {
503 0 + 0 * BPS, 4 + 0 * BPS, 0 + 4 * BPS, 4 + 4 * BPS, // U
504 8 + 0 * BPS, 12 + 0 * BPS, 8 + 4 * BPS, 12 + 4 * BPS // V
505 };
506
507 //------------------------------------------------------------------------------
508 // Distortion measurement
509
510 static const uint16_t kWeightY[16] = {
511 38, 32, 20, 9, 32, 28, 17, 7, 20, 17, 10, 4, 9, 7, 4, 2
512 };
513
514 static const uint16_t kWeightTrellis[16] = {
515 #if USE_TDISTO == 0
516 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16
517 #else
518 30, 27, 19, 11,
519 27, 24, 17, 10,
520 19, 17, 12, 8,
521 11, 10, 8, 6
522 #endif
523 };
524
525 // Init/Copy the common fields in score.
526 static void InitScore(VP8ModeScore* const rd) {
527 rd->D = 0;
528 rd->SD = 0;
529 rd->R = 0;
530 rd->H = 0;
531 rd->nz = 0;
532 rd->score = MAX_COST;
533 }
534
535 static void CopyScore(VP8ModeScore* const dst, const VP8ModeScore* const src) {
536 dst->D = src->D;
537 dst->SD = src->SD;
538 dst->R = src->R;
539 dst->H = src->H;
540 dst->nz = src->nz; // note that nz is not accumulated, but just copied.
541 dst->score = src->score;
542 }
543
544 static void AddScore(VP8ModeScore* const dst, const VP8ModeScore* const src) {
545 dst->D += src->D;
546 dst->SD += src->SD;
547 dst->R += src->R;
548 dst->H += src->H;
549 dst->nz |= src->nz; // here, new nz bits are accumulated.
550 dst->score += src->score;
551 }
552
553 //------------------------------------------------------------------------------
554 // Performs trellis-optimized quantization.
555
556 // Trellis node
557 typedef struct {
558 int8_t prev; // best previous node
559 int8_t sign; // sign of coeff_i
560 int16_t level; // level
561 } Node;
562
563 // Score state
564 typedef struct {
565 score_t score; // partial RD score
566 const uint16_t* costs; // shortcut to cost tables
567 } ScoreState;
568
569 // If a coefficient was quantized to a value Q (using a neutral bias),
570 // we test all alternate possibilities between [Q-MIN_DELTA, Q+MAX_DELTA]
571 // We don't test negative values though.
572 #define MIN_DELTA 0 // how much lower level to try
573 #define MAX_DELTA 1 // how much higher
574 #define NUM_NODES (MIN_DELTA + 1 + MAX_DELTA)
575 #define NODE(n, l) (nodes[(n)][(l) + MIN_DELTA])
576 #define SCORE_STATE(n, l) (score_states[n][(l) + MIN_DELTA])
577
578 static WEBP_INLINE void SetRDScore(int lambda, VP8ModeScore* const rd) {
579 rd->score = (rd->R + rd->H) * lambda + RD_DISTO_MULT * (rd->D + rd->SD);
580 }
581
582 static WEBP_INLINE score_t RDScoreTrellis(int lambda, score_t rate,
583 score_t distortion) {
584 return rate * lambda + RD_DISTO_MULT * distortion;
585 }
586
587 static int TrellisQuantizeBlock(const VP8Encoder* const enc,
588 int16_t in[16], int16_t out[16],
589 int ctx0, int coeff_type,
590 const VP8Matrix* const mtx,
591 int lambda) {
592 const ProbaArray* const probas = enc->proba_.coeffs_[coeff_type];
593 CostArrayPtr const costs =
594 (CostArrayPtr)enc->proba_.remapped_costs_[coeff_type];
595 const int first = (coeff_type == 0) ? 1 : 0;
596 Node nodes[16][NUM_NODES];
597 ScoreState score_states[2][NUM_NODES];
598 ScoreState* ss_cur = &SCORE_STATE(0, MIN_DELTA);
599 ScoreState* ss_prev = &SCORE_STATE(1, MIN_DELTA);
600 int best_path[3] = {-1, -1, -1}; // store best-last/best-level/best-previous
601 score_t best_score;
602 int n, m, p, last;
603
604 {
605 score_t cost;
606 const int thresh = mtx->q_[1] * mtx->q_[1] / 4;
607 const int last_proba = probas[VP8EncBands[first]][ctx0][0];
608
609 // compute the position of the last interesting coefficient
610 last = first - 1;
611 for (n = 15; n >= first; --n) {
612 const int j = kZigzag[n];
613 const int err = in[j] * in[j];
614 if (err > thresh) {
615 last = n;
616 break;
617 }
618 }
619 // we don't need to go inspect up to n = 16 coeffs. We can just go up
620 // to last + 1 (inclusive) without losing much.
621 if (last < 15) ++last;
622
623 // compute 'skip' score. This is the max score one can do.
624 cost = VP8BitCost(0, last_proba);
625 best_score = RDScoreTrellis(lambda, cost, 0);
626
627 // initialize source node.
628 for (m = -MIN_DELTA; m <= MAX_DELTA; ++m) {
629 const score_t rate = (ctx0 == 0) ? VP8BitCost(1, last_proba) : 0;
630 ss_cur[m].score = RDScoreTrellis(lambda, rate, 0);
631 ss_cur[m].costs = costs[first][ctx0];
632 }
633 }
634
635 // traverse trellis.
636 for (n = first; n <= last; ++n) {
637 const int j = kZigzag[n];
638 const uint32_t Q = mtx->q_[j];
639 const uint32_t iQ = mtx->iq_[j];
640 const uint32_t B = BIAS(0x00); // neutral bias
641 // note: it's important to take sign of the _original_ coeff,
642 // so we don't have to consider level < 0 afterward.
643 const int sign = (in[j] < 0);
644 const uint32_t coeff0 = (sign ? -in[j] : in[j]) + mtx->sharpen_[j];
645 int level0 = QUANTDIV(coeff0, iQ, B);
646 if (level0 > MAX_LEVEL) level0 = MAX_LEVEL;
647
648 { // Swap current and previous score states
649 ScoreState* const tmp = ss_cur;
650 ss_cur = ss_prev;
651 ss_prev = tmp;
652 }
653
654 // test all alternate level values around level0.
655 for (m = -MIN_DELTA; m <= MAX_DELTA; ++m) {
656 Node* const cur = &NODE(n, m);
657 int level = level0 + m;
658 const int ctx = (level > 2) ? 2 : level;
659 const int band = VP8EncBands[n + 1];
660 score_t base_score, last_pos_score;
661 score_t best_cur_score = MAX_COST;
662 int best_prev = 0; // default, in case
663
664 ss_cur[m].score = MAX_COST;
665 ss_cur[m].costs = costs[n + 1][ctx];
666 if (level > MAX_LEVEL || level < 0) { // node is dead?
667 continue;
668 }
669
670 // Compute extra rate cost if last coeff's position is < 15
671 {
672 const score_t last_pos_cost =
673 (n < 15) ? VP8BitCost(0, probas[band][ctx][0]) : 0;
674 last_pos_score = RDScoreTrellis(lambda, last_pos_cost, 0);
675 }
676
677 {
678 // Compute delta_error = how much coding this level will
679 // subtract to max_error as distortion.
680 // Here, distortion = sum of (|coeff_i| - level_i * Q_i)^2
681 const int new_error = coeff0 - level * Q;
682 const int delta_error =
683 kWeightTrellis[j] * (new_error * new_error - coeff0 * coeff0);
684 base_score = RDScoreTrellis(lambda, 0, delta_error);
685 }
686
687 // Inspect all possible non-dead predecessors. Retain only the best one.
688 for (p = -MIN_DELTA; p <= MAX_DELTA; ++p) {
689 // Dead nodes (with ss_prev[p].score >= MAX_COST) are automatically
690 // eliminated since their score can't be better than the current best.
691 const score_t cost = VP8LevelCost(ss_prev[p].costs, level);
692 // Examine node assuming it's a non-terminal one.
693 const score_t score =
694 base_score + ss_prev[p].score + RDScoreTrellis(lambda, cost, 0);
695 if (score < best_cur_score) {
696 best_cur_score = score;
697 best_prev = p;
698 }
699 }
700 // Store best finding in current node.
701 cur->sign = sign;
702 cur->level = level;
703 cur->prev = best_prev;
704 ss_cur[m].score = best_cur_score;
705
706 // Now, record best terminal node (and thus best entry in the graph).
707 if (level != 0) {
708 const score_t score = best_cur_score + last_pos_score;
709 if (score < best_score) {
710 best_score = score;
711 best_path[0] = n; // best eob position
712 best_path[1] = m; // best node index
713 best_path[2] = best_prev; // best predecessor
714 }
715 }
716 }
717 }
718
719 // Fresh start
720 memset(in + first, 0, (16 - first) * sizeof(*in));
721 memset(out + first, 0, (16 - first) * sizeof(*out));
722 if (best_path[0] == -1) {
723 return 0; // skip!
724 }
725
726 {
727 // Unwind the best path.
728 // Note: best-prev on terminal node is not necessarily equal to the
729 // best_prev for non-terminal. So we patch best_path[2] in.
730 int nz = 0;
731 int best_node = best_path[1];
732 n = best_path[0];
733 NODE(n, best_node).prev = best_path[2]; // force best-prev for terminal
734
735 for (; n >= first; --n) {
736 const Node* const node = &NODE(n, best_node);
737 const int j = kZigzag[n];
738 out[n] = node->sign ? -node->level : node->level;
739 nz |= node->level;
740 in[j] = out[n] * mtx->q_[j];
741 best_node = node->prev;
742 }
743 return (nz != 0);
744 }
745 }
746
747 #undef NODE
748
749 //------------------------------------------------------------------------------
750 // Performs: difference, transform, quantize, back-transform, add
751 // all at once. Output is the reconstructed block in *yuv_out, and the
752 // quantized levels in *levels.
753
754 static int ReconstructIntra16(VP8EncIterator* const it,
755 VP8ModeScore* const rd,
756 uint8_t* const yuv_out,
757 int mode) {
758 const VP8Encoder* const enc = it->enc_;
759 const uint8_t* const ref = it->yuv_p_ + VP8I16ModeOffsets[mode];
760 const uint8_t* const src = it->yuv_in_ + Y_OFF_ENC;
761 const VP8SegmentInfo* const dqm = &enc->dqm_[it->mb_->segment_];
762 int nz = 0;
763 int n;
764 int16_t tmp[16][16], dc_tmp[16];
765
766 for (n = 0; n < 16; n += 2) {
767 VP8FTransform2(src + VP8Scan[n], ref + VP8Scan[n], tmp[n]);
768 }
769 VP8FTransformWHT(tmp[0], dc_tmp);
770 nz |= VP8EncQuantizeBlockWHT(dc_tmp, rd->y_dc_levels, &dqm->y2_) << 24;
771
772 if (DO_TRELLIS_I16 && it->do_trellis_) {
773 int x, y;
774 VP8IteratorNzToBytes(it);
775 for (y = 0, n = 0; y < 4; ++y) {
776 for (x = 0; x < 4; ++x, ++n) {
777 const int ctx = it->top_nz_[x] + it->left_nz_[y];
778 const int non_zero =
779 TrellisQuantizeBlock(enc, tmp[n], rd->y_ac_levels[n], ctx, 0,
780 &dqm->y1_, dqm->lambda_trellis_i16_);
781 it->top_nz_[x] = it->left_nz_[y] = non_zero;
782 rd->y_ac_levels[n][0] = 0;
783 nz |= non_zero << n;
784 }
785 }
786 } else {
787 for (n = 0; n < 16; n += 2) {
788 // Zero-out the first coeff, so that: a) nz is correct below, and
789 // b) finding 'last' non-zero coeffs in SetResidualCoeffs() is simplified.
790 tmp[n][0] = tmp[n + 1][0] = 0;
791 nz |= VP8EncQuantize2Blocks(tmp[n], rd->y_ac_levels[n], &dqm->y1_) << n;
792 assert(rd->y_ac_levels[n + 0][0] == 0);
793 assert(rd->y_ac_levels[n + 1][0] == 0);
794 }
795 }
796
797 // Transform back
798 VP8TransformWHT(dc_tmp, tmp[0]);
799 for (n = 0; n < 16; n += 2) {
800 VP8ITransform(ref + VP8Scan[n], tmp[n], yuv_out + VP8Scan[n], 1);
801 }
802
803 return nz;
804 }
805
806 static int ReconstructIntra4(VP8EncIterator* const it,
807 int16_t levels[16],
808 const uint8_t* const src,
809 uint8_t* const yuv_out,
810 int mode) {
811 const VP8Encoder* const enc = it->enc_;
812 const uint8_t* const ref = it->yuv_p_ + VP8I4ModeOffsets[mode];
813 const VP8SegmentInfo* const dqm = &enc->dqm_[it->mb_->segment_];
814 int nz = 0;
815 int16_t tmp[16];
816
817 VP8FTransform(src, ref, tmp);
818 if (DO_TRELLIS_I4 && it->do_trellis_) {
819 const int x = it->i4_ & 3, y = it->i4_ >> 2;
820 const int ctx = it->top_nz_[x] + it->left_nz_[y];
821 nz = TrellisQuantizeBlock(enc, tmp, levels, ctx, 3, &dqm->y1_,
822 dqm->lambda_trellis_i4_);
823 } else {
824 nz = VP8EncQuantizeBlock(tmp, levels, &dqm->y1_);
825 }
826 VP8ITransform(ref, tmp, yuv_out, 0);
827 return nz;
828 }
829
830 static int ReconstructUV(VP8EncIterator* const it, VP8ModeScore* const rd,
831 uint8_t* const yuv_out, int mode) {
832 const VP8Encoder* const enc = it->enc_;
833 const uint8_t* const ref = it->yuv_p_ + VP8UVModeOffsets[mode];
834 const uint8_t* const src = it->yuv_in_ + U_OFF_ENC;
835 const VP8SegmentInfo* const dqm = &enc->dqm_[it->mb_->segment_];
836 int nz = 0;
837 int n;
838 int16_t tmp[8][16];
839
840 for (n = 0; n < 8; n += 2) {
841 VP8FTransform2(src + VP8ScanUV[n], ref + VP8ScanUV[n], tmp[n]);
842 }
843 if (DO_TRELLIS_UV && it->do_trellis_) {
844 int ch, x, y;
845 for (ch = 0, n = 0; ch <= 2; ch += 2) {
846 for (y = 0; y < 2; ++y) {
847 for (x = 0; x < 2; ++x, ++n) {
848 const int ctx = it->top_nz_[4 + ch + x] + it->left_nz_[4 + ch + y];
849 const int non_zero =
850 TrellisQuantizeBlock(enc, tmp[n], rd->uv_levels[n], ctx, 2,
851 &dqm->uv_, dqm->lambda_trellis_uv_);
852 it->top_nz_[4 + ch + x] = it->left_nz_[4 + ch + y] = non_zero;
853 nz |= non_zero << n;
854 }
855 }
856 }
857 } else {
858 for (n = 0; n < 8; n += 2) {
859 nz |= VP8EncQuantize2Blocks(tmp[n], rd->uv_levels[n], &dqm->uv_) << n;
860 }
861 }
862
863 for (n = 0; n < 8; n += 2) {
864 VP8ITransform(ref + VP8ScanUV[n], tmp[n], yuv_out + VP8ScanUV[n], 1);
865 }
866 return (nz << 16);
867 }
868
869 //------------------------------------------------------------------------------
870 // RD-opt decision. Reconstruct each modes, evalue distortion and bit-cost.
871 // Pick the mode is lower RD-cost = Rate + lambda * Distortion.
872
873 static void StoreMaxDelta(VP8SegmentInfo* const dqm, const int16_t DCs[16]) {
874 // We look at the first three AC coefficients to determine what is the average
875 // delta between each sub-4x4 block.
876 const int v0 = abs(DCs[1]);
877 const int v1 = abs(DCs[2]);
878 const int v2 = abs(DCs[4]);
879 int max_v = (v1 > v0) ? v1 : v0;
880 max_v = (v2 > max_v) ? v2 : max_v;
881 if (max_v > dqm->max_edge_) dqm->max_edge_ = max_v;
882 }
883
884 static void SwapModeScore(VP8ModeScore** a, VP8ModeScore** b) {
885 VP8ModeScore* const tmp = *a;
886 *a = *b;
887 *b = tmp;
888 }
889
890 static void SwapPtr(uint8_t** a, uint8_t** b) {
891 uint8_t* const tmp = *a;
892 *a = *b;
893 *b = tmp;
894 }
895
896 static void SwapOut(VP8EncIterator* const it) {
897 SwapPtr(&it->yuv_out_, &it->yuv_out2_);
898 }
899
900 static score_t IsFlat(const int16_t* levels, int num_blocks, score_t thresh) {
901 score_t score = 0;
902 while (num_blocks-- > 0) { // TODO(skal): refine positional scoring?
903 int i;
904 for (i = 1; i < 16; ++i) { // omit DC, we're only interested in AC
905 score += (levels[i] != 0);
906 if (score > thresh) return 0;
907 }
908 levels += 16;
909 }
910 return 1;
911 }
912
913 static void PickBestIntra16(VP8EncIterator* const it, VP8ModeScore* rd) {
914 const int kNumBlocks = 16;
915 VP8SegmentInfo* const dqm = &it->enc_->dqm_[it->mb_->segment_];
916 const int lambda = dqm->lambda_i16_;
917 const int tlambda = dqm->tlambda_;
918 const uint8_t* const src = it->yuv_in_ + Y_OFF_ENC;
919 VP8ModeScore rd_tmp;
920 VP8ModeScore* rd_cur = &rd_tmp;
921 VP8ModeScore* rd_best = rd;
922 int mode;
923
924 rd->mode_i16 = -1;
925 for (mode = 0; mode < NUM_PRED_MODES; ++mode) {
926 uint8_t* const tmp_dst = it->yuv_out2_ + Y_OFF_ENC; // scratch buffer
927 rd_cur->mode_i16 = mode;
928
929 // Reconstruct
930 rd_cur->nz = ReconstructIntra16(it, rd_cur, tmp_dst, mode);
931
932 // Measure RD-score
933 rd_cur->D = VP8SSE16x16(src, tmp_dst);
934 rd_cur->SD =
935 tlambda ? MULT_8B(tlambda, VP8TDisto16x16(src, tmp_dst, kWeightY)) : 0;
936 rd_cur->H = VP8FixedCostsI16[mode];
937 rd_cur->R = VP8GetCostLuma16(it, rd_cur);
938 if (mode > 0 &&
939 IsFlat(rd_cur->y_ac_levels[0], kNumBlocks, FLATNESS_LIMIT_I16)) {
940 // penalty to avoid flat area to be mispredicted by complex mode
941 rd_cur->R += FLATNESS_PENALTY * kNumBlocks;
942 }
943
944 // Since we always examine Intra16 first, we can overwrite *rd directly.
945 SetRDScore(lambda, rd_cur);
946 if (mode == 0 || rd_cur->score < rd_best->score) {
947 SwapModeScore(&rd_cur, &rd_best);
948 SwapOut(it);
949 }
950 }
951 if (rd_best != rd) {
952 memcpy(rd, rd_best, sizeof(*rd));
953 }
954 SetRDScore(dqm->lambda_mode_, rd); // finalize score for mode decision.
955 VP8SetIntra16Mode(it, rd->mode_i16);
956
957 // we have a blocky macroblock (only DCs are non-zero) with fairly high
958 // distortion, record max delta so we can later adjust the minimal filtering
959 // strength needed to smooth these blocks out.
960 if ((rd->nz & 0x100ffff) == 0x1000000 && rd->D > dqm->min_disto_) {
961 StoreMaxDelta(dqm, rd->y_dc_levels);
962 }
963 }
964
965 //------------------------------------------------------------------------------
966
967 // return the cost array corresponding to the surrounding prediction modes.
968 static const uint16_t* GetCostModeI4(VP8EncIterator* const it,
969 const uint8_t modes[16]) {
970 const int preds_w = it->enc_->preds_w_;
971 const int x = (it->i4_ & 3), y = it->i4_ >> 2;
972 const int left = (x == 0) ? it->preds_[y * preds_w - 1] : modes[it->i4_ - 1];
973 const int top = (y == 0) ? it->preds_[-preds_w + x] : modes[it->i4_ - 4];
974 return VP8FixedCostsI4[top][left];
975 }
976
977 static int PickBestIntra4(VP8EncIterator* const it, VP8ModeScore* const rd) {
978 const VP8Encoder* const enc = it->enc_;
979 const VP8SegmentInfo* const dqm = &enc->dqm_[it->mb_->segment_];
980 const int lambda = dqm->lambda_i4_;
981 const int tlambda = dqm->tlambda_;
982 const uint8_t* const src0 = it->yuv_in_ + Y_OFF_ENC;
983 uint8_t* const best_blocks = it->yuv_out2_ + Y_OFF_ENC;
984 int total_header_bits = 0;
985 VP8ModeScore rd_best;
986
987 if (enc->max_i4_header_bits_ == 0) {
988 return 0;
989 }
990
991 InitScore(&rd_best);
992 rd_best.H = 211; // '211' is the value of VP8BitCost(0, 145)
993 SetRDScore(dqm->lambda_mode_, &rd_best);
994 VP8IteratorStartI4(it);
995 do {
996 const int kNumBlocks = 1;
997 VP8ModeScore rd_i4;
998 int mode;
999 int best_mode = -1;
1000 const uint8_t* const src = src0 + VP8Scan[it->i4_];
1001 const uint16_t* const mode_costs = GetCostModeI4(it, rd->modes_i4);
1002 uint8_t* best_block = best_blocks + VP8Scan[it->i4_];
1003 uint8_t* tmp_dst = it->yuv_p_ + I4TMP; // scratch buffer.
1004
1005 InitScore(&rd_i4);
1006 VP8MakeIntra4Preds(it);
1007 for (mode = 0; mode < NUM_BMODES; ++mode) {
1008 VP8ModeScore rd_tmp;
1009 int16_t tmp_levels[16];
1010
1011 // Reconstruct
1012 rd_tmp.nz =
1013 ReconstructIntra4(it, tmp_levels, src, tmp_dst, mode) << it->i4_;
1014
1015 // Compute RD-score
1016 rd_tmp.D = VP8SSE4x4(src, tmp_dst);
1017 rd_tmp.SD =
1018 tlambda ? MULT_8B(tlambda, VP8TDisto4x4(src, tmp_dst, kWeightY))
1019 : 0;
1020 rd_tmp.H = mode_costs[mode];
1021
1022 // Add flatness penalty
1023 if (mode > 0 && IsFlat(tmp_levels, kNumBlocks, FLATNESS_LIMIT_I4)) {
1024 rd_tmp.R = FLATNESS_PENALTY * kNumBlocks;
1025 } else {
1026 rd_tmp.R = 0;
1027 }
1028
1029 // early-out check
1030 SetRDScore(lambda, &rd_tmp);
1031 if (best_mode >= 0 && rd_tmp.score >= rd_i4.score) continue;
1032
1033 // finish computing score
1034 rd_tmp.R += VP8GetCostLuma4(it, tmp_levels);
1035 SetRDScore(lambda, &rd_tmp);
1036
1037 if (best_mode < 0 || rd_tmp.score < rd_i4.score) {
1038 CopyScore(&rd_i4, &rd_tmp);
1039 best_mode = mode;
1040 SwapPtr(&tmp_dst, &best_block);
1041 memcpy(rd_best.y_ac_levels[it->i4_], tmp_levels,
1042 sizeof(rd_best.y_ac_levels[it->i4_]));
1043 }
1044 }
1045 SetRDScore(dqm->lambda_mode_, &rd_i4);
1046 AddScore(&rd_best, &rd_i4);
1047 if (rd_best.score >= rd->score) {
1048 return 0;
1049 }
1050 total_header_bits += (int)rd_i4.H; // <- equal to mode_costs[best_mode];
1051 if (total_header_bits > enc->max_i4_header_bits_) {
1052 return 0;
1053 }
1054 // Copy selected samples if not in the right place already.
1055 if (best_block != best_blocks + VP8Scan[it->i4_]) {
1056 VP8Copy4x4(best_block, best_blocks + VP8Scan[it->i4_]);
1057 }
1058 rd->modes_i4[it->i4_] = best_mode;
1059 it->top_nz_[it->i4_ & 3] = it->left_nz_[it->i4_ >> 2] = (rd_i4.nz ? 1 : 0);
1060 } while (VP8IteratorRotateI4(it, best_blocks));
1061
1062 // finalize state
1063 CopyScore(rd, &rd_best);
1064 VP8SetIntra4Mode(it, rd->modes_i4);
1065 SwapOut(it);
1066 memcpy(rd->y_ac_levels, rd_best.y_ac_levels, sizeof(rd->y_ac_levels));
1067 return 1; // select intra4x4 over intra16x16
1068 }
1069
1070 //------------------------------------------------------------------------------
1071
1072 static void PickBestUV(VP8EncIterator* const it, VP8ModeScore* const rd) {
1073 const int kNumBlocks = 8;
1074 const VP8SegmentInfo* const dqm = &it->enc_->dqm_[it->mb_->segment_];
1075 const int lambda = dqm->lambda_uv_;
1076 const uint8_t* const src = it->yuv_in_ + U_OFF_ENC;
1077 uint8_t* tmp_dst = it->yuv_out2_ + U_OFF_ENC; // scratch buffer
1078 uint8_t* dst0 = it->yuv_out_ + U_OFF_ENC;
1079 uint8_t* dst = dst0;
1080 VP8ModeScore rd_best;
1081 int mode;
1082
1083 rd->mode_uv = -1;
1084 InitScore(&rd_best);
1085 for (mode = 0; mode < NUM_PRED_MODES; ++mode) {
1086 VP8ModeScore rd_uv;
1087
1088 // Reconstruct
1089 rd_uv.nz = ReconstructUV(it, &rd_uv, tmp_dst, mode);
1090
1091 // Compute RD-score
1092 rd_uv.D = VP8SSE16x8(src, tmp_dst);
1093 rd_uv.SD = 0; // not calling TDisto here: it tends to flatten areas.
1094 rd_uv.H = VP8FixedCostsUV[mode];
1095 rd_uv.R = VP8GetCostUV(it, &rd_uv);
1096 if (mode > 0 && IsFlat(rd_uv.uv_levels[0], kNumBlocks, FLATNESS_LIMIT_UV)) {
1097 rd_uv.R += FLATNESS_PENALTY * kNumBlocks;
1098 }
1099
1100 SetRDScore(lambda, &rd_uv);
1101 if (mode == 0 || rd_uv.score < rd_best.score) {
1102 CopyScore(&rd_best, &rd_uv);
1103 rd->mode_uv = mode;
1104 memcpy(rd->uv_levels, rd_uv.uv_levels, sizeof(rd->uv_levels));
1105 SwapPtr(&dst, &tmp_dst);
1106 }
1107 }
1108 VP8SetIntraUVMode(it, rd->mode_uv);
1109 AddScore(rd, &rd_best);
1110 if (dst != dst0) { // copy 16x8 block if needed
1111 VP8Copy16x8(dst, dst0);
1112 }
1113 }
1114
1115 //------------------------------------------------------------------------------
1116 // Final reconstruction and quantization.
1117
1118 static void SimpleQuantize(VP8EncIterator* const it, VP8ModeScore* const rd) {
1119 const VP8Encoder* const enc = it->enc_;
1120 const int is_i16 = (it->mb_->type_ == 1);
1121 int nz = 0;
1122
1123 if (is_i16) {
1124 nz = ReconstructIntra16(it, rd, it->yuv_out_ + Y_OFF_ENC, it->preds_[0]);
1125 } else {
1126 VP8IteratorStartI4(it);
1127 do {
1128 const int mode =
1129 it->preds_[(it->i4_ & 3) + (it->i4_ >> 2) * enc->preds_w_];
1130 const uint8_t* const src = it->yuv_in_ + Y_OFF_ENC + VP8Scan[it->i4_];
1131 uint8_t* const dst = it->yuv_out_ + Y_OFF_ENC + VP8Scan[it->i4_];
1132 VP8MakeIntra4Preds(it);
1133 nz |= ReconstructIntra4(it, rd->y_ac_levels[it->i4_],
1134 src, dst, mode) << it->i4_;
1135 } while (VP8IteratorRotateI4(it, it->yuv_out_ + Y_OFF_ENC));
1136 }
1137
1138 nz |= ReconstructUV(it, rd, it->yuv_out_ + U_OFF_ENC, it->mb_->uv_mode_);
1139 rd->nz = nz;
1140 }
1141
1142 // Refine intra16/intra4 sub-modes based on distortion only (not rate).
1143 static void RefineUsingDistortion(VP8EncIterator* const it,
1144 int try_both_modes, int refine_uv_mode,
1145 VP8ModeScore* const rd) {
1146 score_t best_score = MAX_COST;
1147 int nz = 0;
1148 int mode;
1149 int is_i16 = try_both_modes || (it->mb_->type_ == 1);
1150
1151 const VP8SegmentInfo* const dqm = &it->enc_->dqm_[it->mb_->segment_];
1152 // Some empiric constants, of approximate order of magnitude.
1153 const int lambda_d_i16 = 106;
1154 const int lambda_d_i4 = 11;
1155 const int lambda_d_uv = 120;
1156 score_t score_i4 = dqm->i4_penalty_;
1157 score_t i4_bit_sum = 0;
1158 const score_t bit_limit = try_both_modes ? it->enc_->mb_header_limit_
1159 : MAX_COST; // no early-out allowed
1160
1161 if (is_i16) { // First, evaluate Intra16 distortion
1162 int best_mode = -1;
1163 const uint8_t* const src = it->yuv_in_ + Y_OFF_ENC;
1164 for (mode = 0; mode < NUM_PRED_MODES; ++mode) {
1165 const uint8_t* const ref = it->yuv_p_ + VP8I16ModeOffsets[mode];
1166 const score_t score = VP8SSE16x16(src, ref) * RD_DISTO_MULT
1167 + VP8FixedCostsI16[mode] * lambda_d_i16;
1168 if (mode > 0 && VP8FixedCostsI16[mode] > bit_limit) {
1169 continue;
1170 }
1171 if (score < best_score) {
1172 best_mode = mode;
1173 best_score = score;
1174 }
1175 }
1176 VP8SetIntra16Mode(it, best_mode);
1177 // we'll reconstruct later, if i16 mode actually gets selected
1178 }
1179
1180 // Next, evaluate Intra4
1181 if (try_both_modes || !is_i16) {
1182 // We don't evaluate the rate here, but just account for it through a
1183 // constant penalty (i4 mode usually needs more bits compared to i16).
1184 is_i16 = 0;
1185 VP8IteratorStartI4(it);
1186 do {
1187 int best_i4_mode = -1;
1188 score_t best_i4_score = MAX_COST;
1189 const uint8_t* const src = it->yuv_in_ + Y_OFF_ENC + VP8Scan[it->i4_];
1190 const uint16_t* const mode_costs = GetCostModeI4(it, rd->modes_i4);
1191
1192 VP8MakeIntra4Preds(it);
1193 for (mode = 0; mode < NUM_BMODES; ++mode) {
1194 const uint8_t* const ref = it->yuv_p_ + VP8I4ModeOffsets[mode];
1195 const score_t score = VP8SSE4x4(src, ref) * RD_DISTO_MULT
1196 + mode_costs[mode] * lambda_d_i4;
1197 if (score < best_i4_score) {
1198 best_i4_mode = mode;
1199 best_i4_score = score;
1200 }
1201 }
1202 i4_bit_sum += mode_costs[best_i4_mode];
1203 rd->modes_i4[it->i4_] = best_i4_mode;
1204 score_i4 += best_i4_score;
1205 if (score_i4 >= best_score || i4_bit_sum > bit_limit) {
1206 // Intra4 won't be better than Intra16. Bail out and pick Intra16.
1207 is_i16 = 1;
1208 break;
1209 } else { // reconstruct partial block inside yuv_out2_ buffer
1210 uint8_t* const tmp_dst = it->yuv_out2_ + Y_OFF_ENC + VP8Scan[it->i4_];
1211 nz |= ReconstructIntra4(it, rd->y_ac_levels[it->i4_],
1212 src, tmp_dst, best_i4_mode) << it->i4_;
1213 }
1214 } while (VP8IteratorRotateI4(it, it->yuv_out2_ + Y_OFF_ENC));
1215 }
1216
1217 // Final reconstruction, depending on which mode is selected.
1218 if (!is_i16) {
1219 VP8SetIntra4Mode(it, rd->modes_i4);
1220 SwapOut(it);
1221 best_score = score_i4;
1222 } else {
1223 nz = ReconstructIntra16(it, rd, it->yuv_out_ + Y_OFF_ENC, it->preds_[0]);
1224 }
1225
1226 // ... and UV!
1227 if (refine_uv_mode) {
1228 int best_mode = -1;
1229 score_t best_uv_score = MAX_COST;
1230 const uint8_t* const src = it->yuv_in_ + U_OFF_ENC;
1231 for (mode = 0; mode < NUM_PRED_MODES; ++mode) {
1232 const uint8_t* const ref = it->yuv_p_ + VP8UVModeOffsets[mode];
1233 const score_t score = VP8SSE16x8(src, ref) * RD_DISTO_MULT
1234 + VP8FixedCostsUV[mode] * lambda_d_uv;
1235 if (score < best_uv_score) {
1236 best_mode = mode;
1237 best_uv_score = score;
1238 }
1239 }
1240 VP8SetIntraUVMode(it, best_mode);
1241 }
1242 nz |= ReconstructUV(it, rd, it->yuv_out_ + U_OFF_ENC, it->mb_->uv_mode_);
1243
1244 rd->nz = nz;
1245 rd->score = best_score;
1246 }
1247
1248 //------------------------------------------------------------------------------
1249 // Entry point
1250
1251 int VP8Decimate(VP8EncIterator* const it, VP8ModeScore* const rd,
1252 VP8RDLevel rd_opt) {
1253 int is_skipped;
1254 const int method = it->enc_->method_;
1255
1256 InitScore(rd);
1257
1258 // We can perform predictions for Luma16x16 and Chroma8x8 already.
1259 // Luma4x4 predictions needs to be done as-we-go.
1260 VP8MakeLuma16Preds(it);
1261 VP8MakeChroma8Preds(it);
1262
1263 if (rd_opt > RD_OPT_NONE) {
1264 it->do_trellis_ = (rd_opt >= RD_OPT_TRELLIS_ALL);
1265 PickBestIntra16(it, rd);
1266 if (method >= 2) {
1267 PickBestIntra4(it, rd);
1268 }
1269 PickBestUV(it, rd);
1270 if (rd_opt == RD_OPT_TRELLIS) { // finish off with trellis-optim now
1271 it->do_trellis_ = 1;
1272 SimpleQuantize(it, rd);
1273 }
1274 } else {
1275 // At this point we have heuristically decided intra16 / intra4.
1276 // For method >= 2, pick the best intra4/intra16 based on SSE (~tad slower).
1277 // For method <= 1, we don't re-examine the decision but just go ahead with
1278 // quantization/reconstruction.
1279 RefineUsingDistortion(it, (method >= 2), (method >= 1), rd);
1280 }
1281 is_skipped = (rd->nz == 0);
1282 VP8SetSkip(it, is_skipped);
1283 return is_skipped;
1284 }
OLDNEW
« no previous file with comments | « third_party/libwebp/enc/predictor_enc.c ('k') | third_party/libwebp/enc/quant_enc.c » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698