Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(135)

Side by Side Diff: third_party/libwebp/enc/quant.c

Issue 116213006: Update libwebp to 0.4.0 (Closed) Base URL: svn://svn.chromium.org/chrome/trunk/src
Patch Set: After Blink Roll Created 6 years, 11 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch | Annotate | Revision Log
« no previous file with comments | « third_party/libwebp/enc/picture.c ('k') | third_party/libwebp/enc/syntax.c » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 // Copyright 2011 Google Inc. All Rights Reserved. 1 // Copyright 2011 Google Inc. All Rights Reserved.
2 // 2 //
3 // Use of this source code is governed by a BSD-style license 3 // Use of this source code is governed by a BSD-style license
4 // that can be found in the COPYING file in the root of the source 4 // that can be found in the COPYING file in the root of the source
5 // tree. An additional intellectual property rights grant can be found 5 // tree. An additional intellectual property rights grant can be found
6 // in the file PATENTS. All contributing project authors may 6 // in the file PATENTS. All contributing project authors may
7 // be found in the AUTHORS file in the root of the source tree. 7 // be found in the AUTHORS file in the root of the source tree.
8 // ----------------------------------------------------------------------------- 8 // -----------------------------------------------------------------------------
9 // 9 //
10 // Quantization 10 // Quantization
11 // 11 //
12 // Author: Skal (pascal.massimino@gmail.com) 12 // Author: Skal (pascal.massimino@gmail.com)
13 13
14 #include <assert.h> 14 #include <assert.h>
15 #include <math.h> 15 #include <math.h>
16 #include <stdlib.h> // for abs()
16 17
17 #include "./vp8enci.h" 18 #include "./vp8enci.h"
18 #include "./cost.h" 19 #include "./cost.h"
19 20
20 #define DO_TRELLIS_I4 1 21 #define DO_TRELLIS_I4 1
21 #define DO_TRELLIS_I16 1 // not a huge gain, but ok at low bitrate. 22 #define DO_TRELLIS_I16 1 // not a huge gain, but ok at low bitrate.
22 #define DO_TRELLIS_UV 0 // disable trellis for UV. Risky. Not worth. 23 #define DO_TRELLIS_UV 0 // disable trellis for UV. Risky. Not worth.
23 #define USE_TDISTO 1 24 #define USE_TDISTO 1
24 25
25 #define MID_ALPHA 64 // neutral value for susceptibility 26 #define MID_ALPHA 64 // neutral value for susceptibility
26 #define MIN_ALPHA 30 // lowest usable value for susceptibility 27 #define MIN_ALPHA 30 // lowest usable value for susceptibility
27 #define MAX_ALPHA 100 // higher meaninful value for susceptibility 28 #define MAX_ALPHA 100 // higher meaningful value for susceptibility
28 29
29 #define SNS_TO_DQ 0.9 // Scaling constant between the sns value and the QP 30 #define SNS_TO_DQ 0.9 // Scaling constant between the sns value and the QP
30 // power-law modulation. Must be strictly less than 1. 31 // power-law modulation. Must be strictly less than 1.
31 32
32 #define I4_PENALTY 4000 // Rate-penalty for quick i4/i16 decision 33 #define I4_PENALTY 4000 // Rate-penalty for quick i4/i16 decision
33 34
35 // number of non-zero coeffs below which we consider the block very flat
36 // (and apply a penalty to complex predictions)
37 #define FLATNESS_LIMIT_I16 10 // I16 mode
38 #define FLATNESS_LIMIT_I4 3 // I4 mode
39 #define FLATNESS_LIMIT_UV 2 // UV mode
40 #define FLATNESS_PENALTY 140 // roughly ~1bit per block
41
34 #define MULT_8B(a, b) (((a) * (b) + 128) >> 8) 42 #define MULT_8B(a, b) (((a) * (b) + 128) >> 8)
35 43
36 #if defined(__cplusplus) || defined(c_plusplus) 44 // #define DEBUG_BLOCK
37 extern "C" {
38 #endif
39 45
40 //------------------------------------------------------------------------------ 46 //------------------------------------------------------------------------------
41 47
48 #if defined(DEBUG_BLOCK)
49
50 #include <stdio.h>
51 #include <stdlib.h>
52
53 static void PrintBlockInfo(const VP8EncIterator* const it,
54 const VP8ModeScore* const rd) {
55 int i, j;
56 const int is_i16 = (it->mb_->type_ == 1);
57 printf("SOURCE / OUTPUT / ABS DELTA\n");
58 for (j = 0; j < 24; ++j) {
59 if (j == 16) printf("\n"); // newline before the U/V block
60 for (i = 0; i < 16; ++i) printf("%3d ", it->yuv_in_[i + j * BPS]);
61 printf(" ");
62 for (i = 0; i < 16; ++i) printf("%3d ", it->yuv_out_[i + j * BPS]);
63 printf(" ");
64 for (i = 0; i < 16; ++i) {
65 printf("%1d ", abs(it->yuv_out_[i + j * BPS] - it->yuv_in_[i + j * BPS]));
66 }
67 printf("\n");
68 }
69 printf("\nD:%d SD:%d R:%d H:%d nz:0x%x score:%d\n",
70 (int)rd->D, (int)rd->SD, (int)rd->R, (int)rd->H, (int)rd->nz,
71 (int)rd->score);
72 if (is_i16) {
73 printf("Mode: %d\n", rd->mode_i16);
74 printf("y_dc_levels:");
75 for (i = 0; i < 16; ++i) printf("%3d ", rd->y_dc_levels[i]);
76 printf("\n");
77 } else {
78 printf("Modes[16]: ");
79 for (i = 0; i < 16; ++i) printf("%d ", rd->modes_i4[i]);
80 printf("\n");
81 }
82 printf("y_ac_levels:\n");
83 for (j = 0; j < 16; ++j) {
84 for (i = is_i16 ? 1 : 0; i < 16; ++i) {
85 printf("%4d ", rd->y_ac_levels[j][i]);
86 }
87 printf("\n");
88 }
89 printf("\n");
90 printf("uv_levels (mode=%d):\n", rd->mode_uv);
91 for (j = 0; j < 8; ++j) {
92 for (i = 0; i < 16; ++i) {
93 printf("%4d ", rd->uv_levels[j][i]);
94 }
95 printf("\n");
96 }
97 }
98
99 #endif // DEBUG_BLOCK
100
101 //------------------------------------------------------------------------------
102
42 static WEBP_INLINE int clip(int v, int m, int M) { 103 static WEBP_INLINE int clip(int v, int m, int M) {
43 return v < m ? m : v > M ? M : v; 104 return v < m ? m : v > M ? M : v;
44 } 105 }
45 106
46 static const uint8_t kZigzag[16] = { 107 static const uint8_t kZigzag[16] = {
47 0, 1, 4, 8, 5, 2, 3, 6, 9, 12, 13, 10, 7, 11, 14, 15 108 0, 1, 4, 8, 5, 2, 3, 6, 9, 12, 13, 10, 7, 11, 14, 15
48 }; 109 };
49 110
50 static const uint8_t kDcTable[128] = { 111 static const uint8_t kDcTable[128] = {
51 4, 5, 6, 7, 8, 9, 10, 10, 112 4, 5, 6, 7, 8, 9, 10, 10,
(...skipping 45 matching lines...) Expand 10 before | Expand all | Expand 10 after
97 120, 124, 127, 130, 133, 136, 139, 142, 158 120, 124, 127, 130, 133, 136, 139, 142,
98 145, 148, 151, 155, 158, 161, 164, 167, 159 145, 148, 151, 155, 158, 161, 164, 167,
99 170, 173, 176, 179, 184, 189, 193, 198, 160 170, 173, 176, 179, 184, 189, 193, 198,
100 203, 207, 212, 217, 221, 226, 230, 235, 161 203, 207, 212, 217, 221, 226, 230, 235,
101 240, 244, 249, 254, 258, 263, 268, 274, 162 240, 244, 249, 254, 258, 263, 268, 274,
102 280, 286, 292, 299, 305, 311, 317, 323, 163 280, 286, 292, 299, 305, 311, 317, 323,
103 330, 336, 342, 348, 354, 362, 370, 379, 164 330, 336, 342, 348, 354, 362, 370, 379,
104 385, 393, 401, 409, 416, 424, 432, 440 165 385, 393, 401, 409, 416, 424, 432, 440
105 }; 166 };
106 167
107 static const uint16_t kCoeffThresh[16] = { 168 static const uint8_t kBiasMatrices[3][2] = { // [luma-ac,luma-dc,chroma][dc,ac]
108 0, 10, 20, 30, 169 { 96, 110 }, { 96, 108 }, { 110, 115 }
109 10, 20, 30, 30,
110 20, 30, 30, 30,
111 30, 30, 30, 30
112 }; 170 };
113 171
114 // TODO(skal): tune more. Coeff thresholding? 172 // Sharpening by (slightly) raising the hi-frequency coeffs.
115 static const uint8_t kBiasMatrices[3][16] = { // [3] = [luma-ac,luma-dc,chroma]
116 { 96, 96, 96, 96,
117 96, 96, 96, 96,
118 96, 96, 96, 96,
119 96, 96, 96, 96 },
120 { 96, 96, 96, 96,
121 96, 96, 96, 96,
122 96, 96, 96, 96,
123 96, 96, 96, 96 },
124 { 96, 96, 96, 96,
125 96, 96, 96, 96,
126 96, 96, 96, 96,
127 96, 96, 96, 96 }
128 };
129
130 // Sharpening by (slightly) raising the hi-frequency coeffs (only for trellis).
131 // Hack-ish but helpful for mid-bitrate range. Use with care. 173 // Hack-ish but helpful for mid-bitrate range. Use with care.
174 #define SHARPEN_BITS 11 // number of descaling bits for sharpening bias
132 static const uint8_t kFreqSharpening[16] = { 175 static const uint8_t kFreqSharpening[16] = {
133 0, 30, 60, 90, 176 0, 30, 60, 90,
134 30, 60, 90, 90, 177 30, 60, 90, 90,
135 60, 90, 90, 90, 178 60, 90, 90, 90,
136 90, 90, 90, 90 179 90, 90, 90, 90
137 }; 180 };
138 181
139 //------------------------------------------------------------------------------ 182 //------------------------------------------------------------------------------
140 // Initialize quantization parameters in VP8Matrix 183 // Initialize quantization parameters in VP8Matrix
141 184
142 // Returns the average quantizer 185 // Returns the average quantizer
143 static int ExpandMatrix(VP8Matrix* const m, int type) { 186 static int ExpandMatrix(VP8Matrix* const m, int type) {
144 int i; 187 int i, sum;
145 int sum = 0; 188 for (i = 0; i < 2; ++i) {
189 const int is_ac_coeff = (i > 0);
190 const int bias = kBiasMatrices[type][is_ac_coeff];
191 m->iq_[i] = (1 << QFIX) / m->q_[i];
192 m->bias_[i] = BIAS(bias);
193 // zthresh_ is the exact value such that QUANTDIV(coeff, iQ, B) is:
194 // * zero if coeff <= zthresh
195 // * non-zero if coeff > zthresh
196 m->zthresh_[i] = ((1 << QFIX) - 1 - m->bias_[i]) / m->iq_[i];
197 }
146 for (i = 2; i < 16; ++i) { 198 for (i = 2; i < 16; ++i) {
147 m->q_[i] = m->q_[1]; 199 m->q_[i] = m->q_[1];
200 m->iq_[i] = m->iq_[1];
201 m->bias_[i] = m->bias_[1];
202 m->zthresh_[i] = m->zthresh_[1];
148 } 203 }
149 for (i = 0; i < 16; ++i) { 204 for (sum = 0, i = 0; i < 16; ++i) {
150 const int j = kZigzag[i]; 205 if (type == 0) { // we only use sharpening for AC luma coeffs
151 const int bias = kBiasMatrices[type][j]; 206 m->sharpen_[i] = (kFreqSharpening[i] * m->q_[i]) >> SHARPEN_BITS;
152 m->iq_[j] = (1 << QFIX) / m->q_[j]; 207 } else {
153 m->bias_[j] = BIAS(bias); 208 m->sharpen_[i] = 0;
154 // TODO(skal): tune kCoeffThresh[] 209 }
155 m->zthresh_[j] = ((256 /*+ kCoeffThresh[j]*/ - bias) * m->q_[j] + 127) >> 8; 210 sum += m->q_[i];
156 m->sharpen_[j] = (kFreqSharpening[j] * m->q_[j]) >> 11;
157 sum += m->q_[j];
158 } 211 }
159 return (sum + 8) >> 4; 212 return (sum + 8) >> 4;
160 } 213 }
161 214
162 static void SetupMatrices(VP8Encoder* enc) { 215 static void SetupMatrices(VP8Encoder* enc) {
163 int i; 216 int i;
164 const int tlambda_scale = 217 const int tlambda_scale =
165 (enc->method_ >= 4) ? enc->config_->sns_strength 218 (enc->method_ >= 4) ? enc->config_->sns_strength
166 : 0; 219 : 0;
167 const int num_segments = enc->segment_hdr_.num_segments_; 220 const int num_segments = enc->segment_hdr_.num_segments_;
168 for (i = 0; i < num_segments; ++i) { 221 for (i = 0; i < num_segments; ++i) {
169 VP8SegmentInfo* const m = &enc->dqm_[i]; 222 VP8SegmentInfo* const m = &enc->dqm_[i];
170 const int q = m->quant_; 223 const int q = m->quant_;
171 int q4, q16, quv; 224 int q4, q16, quv;
172 m->y1_.q_[0] = kDcTable[clip(q + enc->dq_y1_dc_, 0, 127)]; 225 m->y1_.q_[0] = kDcTable[clip(q + enc->dq_y1_dc_, 0, 127)];
173 m->y1_.q_[1] = kAcTable[clip(q, 0, 127)]; 226 m->y1_.q_[1] = kAcTable[clip(q, 0, 127)];
174 227
175 m->y2_.q_[0] = kDcTable[ clip(q + enc->dq_y2_dc_, 0, 127)] * 2; 228 m->y2_.q_[0] = kDcTable[ clip(q + enc->dq_y2_dc_, 0, 127)] * 2;
176 m->y2_.q_[1] = kAcTable2[clip(q + enc->dq_y2_ac_, 0, 127)]; 229 m->y2_.q_[1] = kAcTable2[clip(q + enc->dq_y2_ac_, 0, 127)];
177 230
178 m->uv_.q_[0] = kDcTable[clip(q + enc->dq_uv_dc_, 0, 117)]; 231 m->uv_.q_[0] = kDcTable[clip(q + enc->dq_uv_dc_, 0, 117)];
179 m->uv_.q_[1] = kAcTable[clip(q + enc->dq_uv_ac_, 0, 127)]; 232 m->uv_.q_[1] = kAcTable[clip(q + enc->dq_uv_ac_, 0, 127)];
180 233
181 q4 = ExpandMatrix(&m->y1_, 0); 234 q4 = ExpandMatrix(&m->y1_, 0);
182 q16 = ExpandMatrix(&m->y2_, 1); 235 q16 = ExpandMatrix(&m->y2_, 1);
183 quv = ExpandMatrix(&m->uv_, 2); 236 quv = ExpandMatrix(&m->uv_, 2);
184 237
185 // TODO: Switch to kLambda*[] tables? 238 m->lambda_i4_ = (3 * q4 * q4) >> 7;
186 { 239 m->lambda_i16_ = (3 * q16 * q16);
187 m->lambda_i4_ = (3 * q4 * q4) >> 7; 240 m->lambda_uv_ = (3 * quv * quv) >> 6;
188 m->lambda_i16_ = (3 * q16 * q16); 241 m->lambda_mode_ = (1 * q4 * q4) >> 7;
189 m->lambda_uv_ = (3 * quv * quv) >> 6; 242 m->lambda_trellis_i4_ = (7 * q4 * q4) >> 3;
190 m->lambda_mode_ = (1 * q4 * q4) >> 7; 243 m->lambda_trellis_i16_ = (q16 * q16) >> 2;
191 m->lambda_trellis_i4_ = (7 * q4 * q4) >> 3; 244 m->lambda_trellis_uv_ = (quv *quv) << 1;
192 m->lambda_trellis_i16_ = (q16 * q16) >> 2; 245 m->tlambda_ = (tlambda_scale * q4) >> 5;
193 m->lambda_trellis_uv_ = (quv *quv) << 1; 246
194 m->tlambda_ = (tlambda_scale * q4) >> 5; 247 m->min_disto_ = 10 * m->y1_.q_[0]; // quantization-aware min disto
195 } 248 m->max_edge_ = 0;
196 } 249 }
197 } 250 }
198 251
199 //------------------------------------------------------------------------------ 252 //------------------------------------------------------------------------------
200 // Initialize filtering parameters 253 // Initialize filtering parameters
201 254
202 // Very small filter-strength values have close to no visual effect. So we can 255 // Very small filter-strength values have close to no visual effect. So we can
203 // save a little decoding-CPU by turning filtering off for these. 256 // save a little decoding-CPU by turning filtering off for these.
204 #define FSTRENGTH_CUTOFF 3 257 #define FSTRENGTH_CUTOFF 2
205 258
206 static void SetupFilterStrength(VP8Encoder* const enc) { 259 static void SetupFilterStrength(VP8Encoder* const enc) {
207 int i; 260 int i;
208 const int level0 = enc->config_->filter_strength; 261 // level0 is in [0..500]. Using '-f 50' as filter_strength is mid-filtering.
262 const int level0 = 5 * enc->config_->filter_strength;
209 for (i = 0; i < NUM_MB_SEGMENTS; ++i) { 263 for (i = 0; i < NUM_MB_SEGMENTS; ++i) {
210 // Segments with lower quantizer will be less filtered. TODO: tune (wrt SNS) 264 VP8SegmentInfo* const m = &enc->dqm_[i];
211 const int level = level0 * 256 * enc->dqm_[i].quant_ / 128; 265 // We focus on the quantization of AC coeffs.
212 const int f = level / (256 + enc->dqm_[i].beta_); 266 const int qstep = kAcTable[clip(m->quant_, 0, 127)] >> 2;
213 enc->dqm_[i].fstrength_ = (f < FSTRENGTH_CUTOFF) ? 0 : (f > 63) ? 63 : f; 267 const int base_strength =
268 VP8FilterStrengthFromDelta(enc->filter_hdr_.sharpness_, qstep);
269 // Segments with lower complexity ('beta') will be less filtered.
270 const int f = base_strength * level0 / (256 + m->beta_);
271 m->fstrength_ = (f < FSTRENGTH_CUTOFF) ? 0 : (f > 63) ? 63 : f;
214 } 272 }
215 // We record the initial strength (mainly for the case of 1-segment only). 273 // We record the initial strength (mainly for the case of 1-segment only).
216 enc->filter_hdr_.level_ = enc->dqm_[0].fstrength_; 274 enc->filter_hdr_.level_ = enc->dqm_[0].fstrength_;
217 enc->filter_hdr_.simple_ = (enc->config_->filter_type == 0); 275 enc->filter_hdr_.simple_ = (enc->config_->filter_type == 0);
218 enc->filter_hdr_.sharpness_ = enc->config_->filter_sharpness; 276 enc->filter_hdr_.sharpness_ = enc->config_->filter_sharpness;
219 } 277 }
220 278
221 //------------------------------------------------------------------------------ 279 //------------------------------------------------------------------------------
222 280
223 // Note: if you change the values below, remember that the max range 281 // Note: if you change the values below, remember that the max range
224 // allowed by the syntax for DQ_UV is [-16,16]. 282 // allowed by the syntax for DQ_UV is [-16,16].
225 #define MAX_DQ_UV (6) 283 #define MAX_DQ_UV (6)
226 #define MIN_DQ_UV (-4) 284 #define MIN_DQ_UV (-4)
227 285
228 // We want to emulate jpeg-like behaviour where the expected "good" quality 286 // We want to emulate jpeg-like behaviour where the expected "good" quality
229 // is around q=75. Internally, our "good" middle is around c=50. So we 287 // is around q=75. Internally, our "good" middle is around c=50. So we
230 // map accordingly using linear piece-wise function 288 // map accordingly using linear piece-wise function
231 static double QualityToCompression(double c) { 289 static double QualityToCompression(double c) {
232 const double linear_c = (c < 0.75) ? c * (2. / 3.) : 2. * c - 1.; 290 const double linear_c = (c < 0.75) ? c * (2. / 3.) : 2. * c - 1.;
233 // The file size roughly scales as pow(quantizer, 3.). Actually, the 291 // The file size roughly scales as pow(quantizer, 3.). Actually, the
234 // exponent is somewhere between 2.8 and 3.2, but we're mostly interested 292 // exponent is somewhere between 2.8 and 3.2, but we're mostly interested
235 // in the mid-quant range. So we scale the compressibility inversely to 293 // in the mid-quant range. So we scale the compressibility inversely to
236 // this power-law: quant ~= compression ^ 1/3. This law holds well for 294 // this power-law: quant ~= compression ^ 1/3. This law holds well for
237 // low quant. Finer modelling for high-quant would make use of kAcTable[] 295 // low quant. Finer modeling for high-quant would make use of kAcTable[]
238 // more explicitly. 296 // more explicitly.
239 const double v = pow(linear_c, 1 / 3.); 297 const double v = pow(linear_c, 1 / 3.);
240 return v; 298 return v;
241 } 299 }
242 300
243 static double QualityToJPEGCompression(double c, double alpha) { 301 static double QualityToJPEGCompression(double c, double alpha) {
244 // We map the complexity 'alpha' and quality setting 'c' to a compression 302 // We map the complexity 'alpha' and quality setting 'c' to a compression
245 // exponent empirically matched to the compression curve of libjpeg6b. 303 // exponent empirically matched to the compression curve of libjpeg6b.
246 // On average, the WebP output size will be roughly similar to that of a 304 // On average, the WebP output size will be roughly similar to that of a
247 // JPEG file compressed with same quality factor. 305 // JPEG file compressed with same quality factor.
(...skipping 112 matching lines...) Expand 10 before | Expand all | Expand 10 after
360 // Must be ordered using {DC_PRED, TM_PRED, V_PRED, H_PRED} as index 418 // Must be ordered using {DC_PRED, TM_PRED, V_PRED, H_PRED} as index
361 const int VP8I16ModeOffsets[4] = { I16DC16, I16TM16, I16VE16, I16HE16 }; 419 const int VP8I16ModeOffsets[4] = { I16DC16, I16TM16, I16VE16, I16HE16 };
362 const int VP8UVModeOffsets[4] = { C8DC8, C8TM8, C8VE8, C8HE8 }; 420 const int VP8UVModeOffsets[4] = { C8DC8, C8TM8, C8VE8, C8HE8 };
363 421
364 // Must be indexed using {B_DC_PRED -> B_HU_PRED} as index 422 // Must be indexed using {B_DC_PRED -> B_HU_PRED} as index
365 const int VP8I4ModeOffsets[NUM_BMODES] = { 423 const int VP8I4ModeOffsets[NUM_BMODES] = {
366 I4DC4, I4TM4, I4VE4, I4HE4, I4RD4, I4VR4, I4LD4, I4VL4, I4HD4, I4HU4 424 I4DC4, I4TM4, I4VE4, I4HE4, I4RD4, I4VR4, I4LD4, I4VL4, I4HD4, I4HU4
367 }; 425 };
368 426
369 void VP8MakeLuma16Preds(const VP8EncIterator* const it) { 427 void VP8MakeLuma16Preds(const VP8EncIterator* const it) {
370 const VP8Encoder* const enc = it->enc_; 428 const uint8_t* const left = it->x_ ? it->y_left_ : NULL;
371 const uint8_t* const left = it->x_ ? enc->y_left_ : NULL; 429 const uint8_t* const top = it->y_ ? it->y_top_ : NULL;
372 const uint8_t* const top = it->y_ ? enc->y_top_ + it->x_ * 16 : NULL;
373 VP8EncPredLuma16(it->yuv_p_, left, top); 430 VP8EncPredLuma16(it->yuv_p_, left, top);
374 } 431 }
375 432
376 void VP8MakeChroma8Preds(const VP8EncIterator* const it) { 433 void VP8MakeChroma8Preds(const VP8EncIterator* const it) {
377 const VP8Encoder* const enc = it->enc_; 434 const uint8_t* const left = it->x_ ? it->u_left_ : NULL;
378 const uint8_t* const left = it->x_ ? enc->u_left_ : NULL; 435 const uint8_t* const top = it->y_ ? it->uv_top_ : NULL;
379 const uint8_t* const top = it->y_ ? enc->uv_top_ + it->x_ * 16 : NULL;
380 VP8EncPredChroma8(it->yuv_p_, left, top); 436 VP8EncPredChroma8(it->yuv_p_, left, top);
381 } 437 }
382 438
383 void VP8MakeIntra4Preds(const VP8EncIterator* const it) { 439 void VP8MakeIntra4Preds(const VP8EncIterator* const it) {
384 VP8EncPredLuma4(it->yuv_p_, it->i4_top_); 440 VP8EncPredLuma4(it->yuv_p_, it->i4_top_);
385 } 441 }
386 442
387 //------------------------------------------------------------------------------ 443 //------------------------------------------------------------------------------
388 // Quantize 444 // Quantize
389 445
(...skipping 35 matching lines...) Expand 10 before | Expand all | Expand 10 after
425 19, 17, 12, 8, 481 19, 17, 12, 8,
426 11, 10, 8, 6 482 11, 10, 8, 6
427 #endif 483 #endif
428 }; 484 };
429 485
430 // Init/Copy the common fields in score. 486 // Init/Copy the common fields in score.
431 static void InitScore(VP8ModeScore* const rd) { 487 static void InitScore(VP8ModeScore* const rd) {
432 rd->D = 0; 488 rd->D = 0;
433 rd->SD = 0; 489 rd->SD = 0;
434 rd->R = 0; 490 rd->R = 0;
491 rd->H = 0;
435 rd->nz = 0; 492 rd->nz = 0;
436 rd->score = MAX_COST; 493 rd->score = MAX_COST;
437 } 494 }
438 495
439 static void CopyScore(VP8ModeScore* const dst, const VP8ModeScore* const src) { 496 static void CopyScore(VP8ModeScore* const dst, const VP8ModeScore* const src) {
440 dst->D = src->D; 497 dst->D = src->D;
441 dst->SD = src->SD; 498 dst->SD = src->SD;
442 dst->R = src->R; 499 dst->R = src->R;
500 dst->H = src->H;
443 dst->nz = src->nz; // note that nz is not accumulated, but just copied. 501 dst->nz = src->nz; // note that nz is not accumulated, but just copied.
444 dst->score = src->score; 502 dst->score = src->score;
445 } 503 }
446 504
447 static void AddScore(VP8ModeScore* const dst, const VP8ModeScore* const src) { 505 static void AddScore(VP8ModeScore* const dst, const VP8ModeScore* const src) {
448 dst->D += src->D; 506 dst->D += src->D;
449 dst->SD += src->SD; 507 dst->SD += src->SD;
450 dst->R += src->R; 508 dst->R += src->R;
509 dst->H += src->H;
451 dst->nz |= src->nz; // here, new nz bits are accumulated. 510 dst->nz |= src->nz; // here, new nz bits are accumulated.
452 dst->score += src->score; 511 dst->score += src->score;
453 } 512 }
454 513
455 //------------------------------------------------------------------------------ 514 //------------------------------------------------------------------------------
456 // Performs trellis-optimized quantization. 515 // Performs trellis-optimized quantization.
457 516
458 // Trellis 517 // Trellis
459 518
460 typedef struct { 519 typedef struct {
461 int prev; // best previous 520 int prev; // best previous
462 int level; // level 521 int level; // level
463 int sign; // sign of coeff_i 522 int sign; // sign of coeff_i
464 score_t cost; // bit cost 523 score_t cost; // bit cost
465 score_t error; // distortion = sum of (|coeff_i| - level_i * Q_i)^2 524 score_t error; // distortion = sum of (|coeff_i| - level_i * Q_i)^2
466 int ctx; // context (only depends on 'level'. Could be spared.) 525 int ctx; // context (only depends on 'level'. Could be spared.)
467 } Node; 526 } Node;
468 527
469 // If a coefficient was quantized to a value Q (using a neutral bias), 528 // If a coefficient was quantized to a value Q (using a neutral bias),
470 // we test all alternate possibilities between [Q-MIN_DELTA, Q+MAX_DELTA] 529 // we test all alternate possibilities between [Q-MIN_DELTA, Q+MAX_DELTA]
471 // We don't test negative values though. 530 // We don't test negative values though.
472 #define MIN_DELTA 0 // how much lower level to try 531 #define MIN_DELTA 0 // how much lower level to try
473 #define MAX_DELTA 1 // how much higher 532 #define MAX_DELTA 1 // how much higher
474 #define NUM_NODES (MIN_DELTA + 1 + MAX_DELTA) 533 #define NUM_NODES (MIN_DELTA + 1 + MAX_DELTA)
475 #define NODE(n, l) (nodes[(n) + 1][(l) + MIN_DELTA]) 534 #define NODE(n, l) (nodes[(n) + 1][(l) + MIN_DELTA])
476 535
477 static WEBP_INLINE void SetRDScore(int lambda, VP8ModeScore* const rd) { 536 static WEBP_INLINE void SetRDScore(int lambda, VP8ModeScore* const rd) {
478 // TODO: incorporate the "* 256" in the tables? 537 // TODO: incorporate the "* 256" in the tables?
479 rd->score = rd->R * lambda + 256 * (rd->D + rd->SD); 538 rd->score = (rd->R + rd->H) * lambda + 256 * (rd->D + rd->SD);
480 } 539 }
481 540
482 static WEBP_INLINE score_t RDScoreTrellis(int lambda, score_t rate, 541 static WEBP_INLINE score_t RDScoreTrellis(int lambda, score_t rate,
483 score_t distortion) { 542 score_t distortion) {
484 return rate * lambda + 256 * distortion; 543 return rate * lambda + 256 * distortion;
485 } 544 }
486 545
487 static int TrellisQuantizeBlock(const VP8EncIterator* const it, 546 static int TrellisQuantizeBlock(const VP8EncIterator* const it,
488 int16_t in[16], int16_t out[16], 547 int16_t in[16], int16_t out[16],
489 int ctx0, int coeff_type, 548 int ctx0, int coeff_type,
(...skipping 42 matching lines...) Expand 10 before | Expand all | Expand 10 after
532 591
533 // traverse trellis. 592 // traverse trellis.
534 for (n = first; n <= last; ++n) { 593 for (n = first; n <= last; ++n) {
535 const int j = kZigzag[n]; 594 const int j = kZigzag[n];
536 const int Q = mtx->q_[j]; 595 const int Q = mtx->q_[j];
537 const int iQ = mtx->iq_[j]; 596 const int iQ = mtx->iq_[j];
538 const int B = BIAS(0x00); // neutral bias 597 const int B = BIAS(0x00); // neutral bias
539 // note: it's important to take sign of the _original_ coeff, 598 // note: it's important to take sign of the _original_ coeff,
540 // so we don't have to consider level < 0 afterward. 599 // so we don't have to consider level < 0 afterward.
541 const int sign = (in[j] < 0); 600 const int sign = (in[j] < 0);
542 int coeff0 = (sign ? -in[j] : in[j]) + mtx->sharpen_[j]; 601 const int coeff0 = (sign ? -in[j] : in[j]) + mtx->sharpen_[j];
543 int level0; 602 int level0 = QUANTDIV(coeff0, iQ, B);
544 if (coeff0 > 2047) coeff0 = 2047; 603 if (level0 > MAX_LEVEL) level0 = MAX_LEVEL;
545 604
546 level0 = QUANTDIV(coeff0, iQ, B);
547 // test all alternate level values around level0. 605 // test all alternate level values around level0.
548 for (m = -MIN_DELTA; m <= MAX_DELTA; ++m) { 606 for (m = -MIN_DELTA; m <= MAX_DELTA; ++m) {
549 Node* const cur = &NODE(n, m); 607 Node* const cur = &NODE(n, m);
550 int delta_error, new_error; 608 int delta_error, new_error;
551 score_t cur_score = MAX_COST; 609 score_t cur_score = MAX_COST;
552 int level = level0 + m; 610 int level = level0 + m;
553 int last_proba; 611 int last_proba;
554 612
555 cur->sign = sign; 613 cur->sign = sign;
556 cur->level = level; 614 cur->level = level;
557 cur->ctx = (level == 0) ? 0 : (level == 1) ? 1 : 2; 615 cur->ctx = (level == 0) ? 0 : (level == 1) ? 1 : 2;
558 if (level >= 2048 || level < 0) { // node is dead? 616 if (level > MAX_LEVEL || level < 0) { // node is dead?
559 cur->cost = MAX_COST; 617 cur->cost = MAX_COST;
560 continue; 618 continue;
561 } 619 }
562 last_proba = last_costs[VP8EncBands[n + 1]][cur->ctx][0]; 620 last_proba = last_costs[VP8EncBands[n + 1]][cur->ctx][0];
563 621
564 // Compute delta_error = how much coding this level will 622 // Compute delta_error = how much coding this level will
565 // subtract as distortion to max_error 623 // subtract as distortion to max_error
566 new_error = coeff0 - level * Q; 624 new_error = coeff0 - level * Q;
567 delta_error = 625 delta_error =
568 kWeightTrellis[j] * (coeff0 * coeff0 - new_error * new_error); 626 kWeightTrellis[j] * (coeff0 * coeff0 - new_error * new_error);
(...skipping 72 matching lines...) Expand 10 before | Expand all | Expand 10 after
641 699
642 //------------------------------------------------------------------------------ 700 //------------------------------------------------------------------------------
643 // Performs: difference, transform, quantize, back-transform, add 701 // Performs: difference, transform, quantize, back-transform, add
644 // all at once. Output is the reconstructed block in *yuv_out, and the 702 // all at once. Output is the reconstructed block in *yuv_out, and the
645 // quantized levels in *levels. 703 // quantized levels in *levels.
646 704
647 static int ReconstructIntra16(VP8EncIterator* const it, 705 static int ReconstructIntra16(VP8EncIterator* const it,
648 VP8ModeScore* const rd, 706 VP8ModeScore* const rd,
649 uint8_t* const yuv_out, 707 uint8_t* const yuv_out,
650 int mode) { 708 int mode) {
651 const VP8Encoder* const enc = it->enc_; 709 VP8Encoder* const enc = it->enc_;
652 const uint8_t* const ref = it->yuv_p_ + VP8I16ModeOffsets[mode]; 710 const uint8_t* const ref = it->yuv_p_ + VP8I16ModeOffsets[mode];
653 const uint8_t* const src = it->yuv_in_ + Y_OFF; 711 const uint8_t* const src = it->yuv_in_ + Y_OFF;
654 const VP8SegmentInfo* const dqm = &enc->dqm_[it->mb_->segment_]; 712 VP8SegmentInfo* const dqm = &enc->dqm_[it->mb_->segment_];
655 int nz = 0; 713 int nz = 0;
656 int n; 714 int n;
657 int16_t tmp[16][16], dc_tmp[16]; 715 int16_t tmp[16][16], dc_tmp[16];
658 716
659 for (n = 0; n < 16; ++n) { 717 for (n = 0; n < 16; ++n) {
660 VP8FTransform(src + VP8Scan[n], ref + VP8Scan[n], tmp[n]); 718 VP8FTransform(src + VP8Scan[n], ref + VP8Scan[n], tmp[n]);
661 } 719 }
662 VP8FTransformWHT(tmp[0], dc_tmp); 720 VP8FTransformWHT(tmp[0], dc_tmp);
663 nz |= VP8EncQuantizeBlock(dc_tmp, rd->y_dc_levels, 0, &dqm->y2_) << 24; 721 nz |= VP8EncQuantizeBlockWHT(dc_tmp, rd->y_dc_levels, &dqm->y2_) << 24;
664 722
665 if (DO_TRELLIS_I16 && it->do_trellis_) { 723 if (DO_TRELLIS_I16 && it->do_trellis_) {
666 int x, y; 724 int x, y;
667 VP8IteratorNzToBytes(it); 725 VP8IteratorNzToBytes(it);
668 for (y = 0, n = 0; y < 4; ++y) { 726 for (y = 0, n = 0; y < 4; ++y) {
669 for (x = 0; x < 4; ++x, ++n) { 727 for (x = 0; x < 4; ++x, ++n) {
670 const int ctx = it->top_nz_[x] + it->left_nz_[y]; 728 const int ctx = it->top_nz_[x] + it->left_nz_[y];
671 const int non_zero = 729 const int non_zero =
672 TrellisQuantizeBlock(it, tmp[n], rd->y_ac_levels[n], ctx, 0, 730 TrellisQuantizeBlock(it, tmp[n], rd->y_ac_levels[n], ctx, 0,
673 &dqm->y1_, dqm->lambda_trellis_i16_); 731 &dqm->y1_, dqm->lambda_trellis_i16_);
(...skipping 74 matching lines...) Expand 10 before | Expand all | Expand 10 after
748 } 806 }
749 807
750 for (n = 0; n < 8; n += 2) { 808 for (n = 0; n < 8; n += 2) {
751 VP8ITransform(ref + VP8Scan[16 + n], tmp[n], yuv_out + VP8Scan[16 + n], 1); 809 VP8ITransform(ref + VP8Scan[16 + n], tmp[n], yuv_out + VP8Scan[16 + n], 1);
752 } 810 }
753 return (nz << 16); 811 return (nz << 16);
754 } 812 }
755 813
756 //------------------------------------------------------------------------------ 814 //------------------------------------------------------------------------------
757 // RD-opt decision. Reconstruct each modes, evalue distortion and bit-cost. 815 // RD-opt decision. Reconstruct each modes, evalue distortion and bit-cost.
758 // Pick the mode is lower RD-cost = Rate + lamba * Distortion. 816 // Pick the mode is lower RD-cost = Rate + lambda * Distortion.
817
818 static void StoreMaxDelta(VP8SegmentInfo* const dqm, const int16_t DCs[16]) {
819 // We look at the first three AC coefficients to determine what is the average
820 // delta between each sub-4x4 block.
821 const int v0 = abs(DCs[1]);
822 const int v1 = abs(DCs[4]);
823 const int v2 = abs(DCs[5]);
824 int max_v = (v0 > v1) ? v1 : v0;
825 max_v = (v2 > max_v) ? v2 : max_v;
826 if (max_v > dqm->max_edge_) dqm->max_edge_ = max_v;
827 }
759 828
760 static void SwapPtr(uint8_t** a, uint8_t** b) { 829 static void SwapPtr(uint8_t** a, uint8_t** b) {
761 uint8_t* const tmp = *a; 830 uint8_t* const tmp = *a;
762 *a = *b; 831 *a = *b;
763 *b = tmp; 832 *b = tmp;
764 } 833 }
765 834
766 static void SwapOut(VP8EncIterator* const it) { 835 static void SwapOut(VP8EncIterator* const it) {
767 SwapPtr(&it->yuv_out_, &it->yuv_out2_); 836 SwapPtr(&it->yuv_out_, &it->yuv_out2_);
768 } 837 }
769 838
839 static score_t IsFlat(const int16_t* levels, int num_blocks, score_t thresh) {
840 score_t score = 0;
841 while (num_blocks-- > 0) { // TODO(skal): refine positional scoring?
842 int i;
843 for (i = 1; i < 16; ++i) { // omit DC, we're only interested in AC
844 score += (levels[i] != 0);
845 if (score > thresh) return 0;
846 }
847 levels += 16;
848 }
849 return 1;
850 }
851
770 static void PickBestIntra16(VP8EncIterator* const it, VP8ModeScore* const rd) { 852 static void PickBestIntra16(VP8EncIterator* const it, VP8ModeScore* const rd) {
771 const VP8Encoder* const enc = it->enc_; 853 const int kNumBlocks = 16;
772 const VP8SegmentInfo* const dqm = &enc->dqm_[it->mb_->segment_]; 854 VP8Encoder* const enc = it->enc_;
855 VP8SegmentInfo* const dqm = &enc->dqm_[it->mb_->segment_];
773 const int lambda = dqm->lambda_i16_; 856 const int lambda = dqm->lambda_i16_;
774 const int tlambda = dqm->tlambda_; 857 const int tlambda = dqm->tlambda_;
775 const uint8_t* const src = it->yuv_in_ + Y_OFF; 858 const uint8_t* const src = it->yuv_in_ + Y_OFF;
776 VP8ModeScore rd16; 859 VP8ModeScore rd16;
777 int mode; 860 int mode;
778 861
779 rd->mode_i16 = -1; 862 rd->mode_i16 = -1;
780 for (mode = 0; mode < NUM_PRED_MODES; ++mode) { 863 for (mode = 0; mode < NUM_PRED_MODES; ++mode) {
781 uint8_t* const tmp_dst = it->yuv_out2_ + Y_OFF; // scratch buffer 864 uint8_t* const tmp_dst = it->yuv_out2_ + Y_OFF; // scratch buffer
782 int nz; 865 int nz;
783 866
784 // Reconstruct 867 // Reconstruct
785 nz = ReconstructIntra16(it, &rd16, tmp_dst, mode); 868 nz = ReconstructIntra16(it, &rd16, tmp_dst, mode);
786 869
787 // Measure RD-score 870 // Measure RD-score
788 rd16.D = VP8SSE16x16(src, tmp_dst); 871 rd16.D = VP8SSE16x16(src, tmp_dst);
789 rd16.SD = tlambda ? MULT_8B(tlambda, VP8TDisto16x16(src, tmp_dst, kWeightY)) 872 rd16.SD = tlambda ? MULT_8B(tlambda, VP8TDisto16x16(src, tmp_dst, kWeightY))
790 : 0; 873 : 0;
874 rd16.H = VP8FixedCostsI16[mode];
791 rd16.R = VP8GetCostLuma16(it, &rd16); 875 rd16.R = VP8GetCostLuma16(it, &rd16);
792 rd16.R += VP8FixedCostsI16[mode]; 876 if (mode > 0 &&
877 IsFlat(rd16.y_ac_levels[0], kNumBlocks, FLATNESS_LIMIT_I16)) {
878 // penalty to avoid flat area to be mispredicted by complex mode
879 rd16.R += FLATNESS_PENALTY * kNumBlocks;
880 }
793 881
794 // Since we always examine Intra16 first, we can overwrite *rd directly. 882 // Since we always examine Intra16 first, we can overwrite *rd directly.
795 SetRDScore(lambda, &rd16); 883 SetRDScore(lambda, &rd16);
796 if (mode == 0 || rd16.score < rd->score) { 884 if (mode == 0 || rd16.score < rd->score) {
797 CopyScore(rd, &rd16); 885 CopyScore(rd, &rd16);
798 rd->mode_i16 = mode; 886 rd->mode_i16 = mode;
799 rd->nz = nz; 887 rd->nz = nz;
800 memcpy(rd->y_ac_levels, rd16.y_ac_levels, sizeof(rd16.y_ac_levels)); 888 memcpy(rd->y_ac_levels, rd16.y_ac_levels, sizeof(rd16.y_ac_levels));
801 memcpy(rd->y_dc_levels, rd16.y_dc_levels, sizeof(rd16.y_dc_levels)); 889 memcpy(rd->y_dc_levels, rd16.y_dc_levels, sizeof(rd16.y_dc_levels));
802 SwapOut(it); 890 SwapOut(it);
803 } 891 }
804 } 892 }
805 SetRDScore(dqm->lambda_mode_, rd); // finalize score for mode decision. 893 SetRDScore(dqm->lambda_mode_, rd); // finalize score for mode decision.
806 VP8SetIntra16Mode(it, rd->mode_i16); 894 VP8SetIntra16Mode(it, rd->mode_i16);
895
896 // we have a blocky macroblock (only DCs are non-zero) with fairly high
897 // distortion, record max delta so we can later adjust the minimal filtering
898 // strength needed to smooth these blocks out.
899 if ((rd->nz & 0xffff) == 0 && rd->D > dqm->min_disto_) {
900 StoreMaxDelta(dqm, rd->y_dc_levels);
901 }
807 } 902 }
808 903
809 //------------------------------------------------------------------------------ 904 //------------------------------------------------------------------------------
810 905
811 // return the cost array corresponding to the surrounding prediction modes. 906 // return the cost array corresponding to the surrounding prediction modes.
812 static const uint16_t* GetCostModeI4(VP8EncIterator* const it, 907 static const uint16_t* GetCostModeI4(VP8EncIterator* const it,
813 const uint8_t modes[16]) { 908 const uint8_t modes[16]) {
814 const int preds_w = it->enc_->preds_w_; 909 const int preds_w = it->enc_->preds_w_;
815 const int x = (it->i4_ & 3), y = it->i4_ >> 2; 910 const int x = (it->i4_ & 3), y = it->i4_ >> 2;
816 const int left = (x == 0) ? it->preds_[y * preds_w - 1] : modes[it->i4_ - 1]; 911 const int left = (x == 0) ? it->preds_[y * preds_w - 1] : modes[it->i4_ - 1];
817 const int top = (y == 0) ? it->preds_[-preds_w + x] : modes[it->i4_ - 4]; 912 const int top = (y == 0) ? it->preds_[-preds_w + x] : modes[it->i4_ - 4];
818 return VP8FixedCostsI4[top][left]; 913 return VP8FixedCostsI4[top][left];
819 } 914 }
820 915
821 static int PickBestIntra4(VP8EncIterator* const it, VP8ModeScore* const rd) { 916 static int PickBestIntra4(VP8EncIterator* const it, VP8ModeScore* const rd) {
822 const VP8Encoder* const enc = it->enc_; 917 const VP8Encoder* const enc = it->enc_;
823 const VP8SegmentInfo* const dqm = &enc->dqm_[it->mb_->segment_]; 918 const VP8SegmentInfo* const dqm = &enc->dqm_[it->mb_->segment_];
824 const int lambda = dqm->lambda_i4_; 919 const int lambda = dqm->lambda_i4_;
825 const int tlambda = dqm->tlambda_; 920 const int tlambda = dqm->tlambda_;
826 const uint8_t* const src0 = it->yuv_in_ + Y_OFF; 921 const uint8_t* const src0 = it->yuv_in_ + Y_OFF;
827 uint8_t* const best_blocks = it->yuv_out2_ + Y_OFF; 922 uint8_t* const best_blocks = it->yuv_out2_ + Y_OFF;
828 int total_header_bits = 0; 923 int total_header_bits = 0;
829 VP8ModeScore rd_best; 924 VP8ModeScore rd_best;
830 925
831 if (enc->max_i4_header_bits_ == 0) { 926 if (enc->max_i4_header_bits_ == 0) {
832 return 0; 927 return 0;
833 } 928 }
834 929
835 InitScore(&rd_best); 930 InitScore(&rd_best);
836 rd_best.score = 211; // '211' is the value of VP8BitCost(0, 145) 931 rd_best.H = 211; // '211' is the value of VP8BitCost(0, 145)
932 SetRDScore(dqm->lambda_mode_, &rd_best);
837 VP8IteratorStartI4(it); 933 VP8IteratorStartI4(it);
838 do { 934 do {
935 const int kNumBlocks = 1;
839 VP8ModeScore rd_i4; 936 VP8ModeScore rd_i4;
840 int mode; 937 int mode;
841 int best_mode = -1; 938 int best_mode = -1;
842 const uint8_t* const src = src0 + VP8Scan[it->i4_]; 939 const uint8_t* const src = src0 + VP8Scan[it->i4_];
843 const uint16_t* const mode_costs = GetCostModeI4(it, rd->modes_i4); 940 const uint16_t* const mode_costs = GetCostModeI4(it, rd->modes_i4);
844 uint8_t* best_block = best_blocks + VP8Scan[it->i4_]; 941 uint8_t* best_block = best_blocks + VP8Scan[it->i4_];
845 uint8_t* tmp_dst = it->yuv_p_ + I4TMP; // scratch buffer. 942 uint8_t* tmp_dst = it->yuv_p_ + I4TMP; // scratch buffer.
846 943
847 InitScore(&rd_i4); 944 InitScore(&rd_i4);
848 VP8MakeIntra4Preds(it); 945 VP8MakeIntra4Preds(it);
849 for (mode = 0; mode < NUM_BMODES; ++mode) { 946 for (mode = 0; mode < NUM_BMODES; ++mode) {
850 VP8ModeScore rd_tmp; 947 VP8ModeScore rd_tmp;
851 int16_t tmp_levels[16]; 948 int16_t tmp_levels[16];
852 949
853 // Reconstruct 950 // Reconstruct
854 rd_tmp.nz = 951 rd_tmp.nz =
855 ReconstructIntra4(it, tmp_levels, src, tmp_dst, mode) << it->i4_; 952 ReconstructIntra4(it, tmp_levels, src, tmp_dst, mode) << it->i4_;
856 953
857 // Compute RD-score 954 // Compute RD-score
858 rd_tmp.D = VP8SSE4x4(src, tmp_dst); 955 rd_tmp.D = VP8SSE4x4(src, tmp_dst);
859 rd_tmp.SD = 956 rd_tmp.SD =
860 tlambda ? MULT_8B(tlambda, VP8TDisto4x4(src, tmp_dst, kWeightY)) 957 tlambda ? MULT_8B(tlambda, VP8TDisto4x4(src, tmp_dst, kWeightY))
861 : 0; 958 : 0;
959 rd_tmp.H = mode_costs[mode];
862 rd_tmp.R = VP8GetCostLuma4(it, tmp_levels); 960 rd_tmp.R = VP8GetCostLuma4(it, tmp_levels);
863 rd_tmp.R += mode_costs[mode]; 961 if (mode > 0 && IsFlat(tmp_levels, kNumBlocks, FLATNESS_LIMIT_I4)) {
962 rd_tmp.R += FLATNESS_PENALTY * kNumBlocks;
963 }
864 964
865 SetRDScore(lambda, &rd_tmp); 965 SetRDScore(lambda, &rd_tmp);
866 if (best_mode < 0 || rd_tmp.score < rd_i4.score) { 966 if (best_mode < 0 || rd_tmp.score < rd_i4.score) {
867 CopyScore(&rd_i4, &rd_tmp); 967 CopyScore(&rd_i4, &rd_tmp);
868 best_mode = mode; 968 best_mode = mode;
869 SwapPtr(&tmp_dst, &best_block); 969 SwapPtr(&tmp_dst, &best_block);
870 memcpy(rd_best.y_ac_levels[it->i4_], tmp_levels, sizeof(tmp_levels)); 970 memcpy(rd_best.y_ac_levels[it->i4_], tmp_levels, sizeof(tmp_levels));
871 } 971 }
872 } 972 }
873 SetRDScore(dqm->lambda_mode_, &rd_i4); 973 SetRDScore(dqm->lambda_mode_, &rd_i4);
874 AddScore(&rd_best, &rd_i4); 974 AddScore(&rd_best, &rd_i4);
875 total_header_bits += mode_costs[best_mode]; 975 if (rd_best.score >= rd->score) {
876 if (rd_best.score >= rd->score || 976 return 0;
877 total_header_bits > enc->max_i4_header_bits_) { 977 }
978 total_header_bits += (int)rd_i4.H; // <- equal to mode_costs[best_mode];
979 if (total_header_bits > enc->max_i4_header_bits_) {
878 return 0; 980 return 0;
879 } 981 }
880 // Copy selected samples if not in the right place already. 982 // Copy selected samples if not in the right place already.
881 if (best_block != best_blocks + VP8Scan[it->i4_]) 983 if (best_block != best_blocks + VP8Scan[it->i4_]) {
882 VP8Copy4x4(best_block, best_blocks + VP8Scan[it->i4_]); 984 VP8Copy4x4(best_block, best_blocks + VP8Scan[it->i4_]);
985 }
883 rd->modes_i4[it->i4_] = best_mode; 986 rd->modes_i4[it->i4_] = best_mode;
884 it->top_nz_[it->i4_ & 3] = it->left_nz_[it->i4_ >> 2] = (rd_i4.nz ? 1 : 0); 987 it->top_nz_[it->i4_ & 3] = it->left_nz_[it->i4_ >> 2] = (rd_i4.nz ? 1 : 0);
885 } while (VP8IteratorRotateI4(it, best_blocks)); 988 } while (VP8IteratorRotateI4(it, best_blocks));
886 989
887 // finalize state 990 // finalize state
888 CopyScore(rd, &rd_best); 991 CopyScore(rd, &rd_best);
889 VP8SetIntra4Mode(it, rd->modes_i4); 992 VP8SetIntra4Mode(it, rd->modes_i4);
890 SwapOut(it); 993 SwapOut(it);
891 memcpy(rd->y_ac_levels, rd_best.y_ac_levels, sizeof(rd->y_ac_levels)); 994 memcpy(rd->y_ac_levels, rd_best.y_ac_levels, sizeof(rd->y_ac_levels));
892 return 1; // select intra4x4 over intra16x16 995 return 1; // select intra4x4 over intra16x16
893 } 996 }
894 997
895 //------------------------------------------------------------------------------ 998 //------------------------------------------------------------------------------
896 999
897 static void PickBestUV(VP8EncIterator* const it, VP8ModeScore* const rd) { 1000 static void PickBestUV(VP8EncIterator* const it, VP8ModeScore* const rd) {
1001 const int kNumBlocks = 8;
898 const VP8Encoder* const enc = it->enc_; 1002 const VP8Encoder* const enc = it->enc_;
899 const VP8SegmentInfo* const dqm = &enc->dqm_[it->mb_->segment_]; 1003 const VP8SegmentInfo* const dqm = &enc->dqm_[it->mb_->segment_];
900 const int lambda = dqm->lambda_uv_; 1004 const int lambda = dqm->lambda_uv_;
901 const uint8_t* const src = it->yuv_in_ + U_OFF; 1005 const uint8_t* const src = it->yuv_in_ + U_OFF;
902 uint8_t* const tmp_dst = it->yuv_out2_ + U_OFF; // scratch buffer 1006 uint8_t* const tmp_dst = it->yuv_out2_ + U_OFF; // scratch buffer
903 uint8_t* const dst0 = it->yuv_out_ + U_OFF; 1007 uint8_t* const dst0 = it->yuv_out_ + U_OFF;
904 VP8ModeScore rd_best; 1008 VP8ModeScore rd_best;
905 int mode; 1009 int mode;
906 1010
907 rd->mode_uv = -1; 1011 rd->mode_uv = -1;
908 InitScore(&rd_best); 1012 InitScore(&rd_best);
909 for (mode = 0; mode < NUM_PRED_MODES; ++mode) { 1013 for (mode = 0; mode < NUM_PRED_MODES; ++mode) {
910 VP8ModeScore rd_uv; 1014 VP8ModeScore rd_uv;
911 1015
912 // Reconstruct 1016 // Reconstruct
913 rd_uv.nz = ReconstructUV(it, &rd_uv, tmp_dst, mode); 1017 rd_uv.nz = ReconstructUV(it, &rd_uv, tmp_dst, mode);
914 1018
915 // Compute RD-score 1019 // Compute RD-score
916 rd_uv.D = VP8SSE16x8(src, tmp_dst); 1020 rd_uv.D = VP8SSE16x8(src, tmp_dst);
917 rd_uv.SD = 0; // TODO: should we call TDisto? it tends to flatten areas. 1021 rd_uv.SD = 0; // TODO: should we call TDisto? it tends to flatten areas.
1022 rd_uv.H = VP8FixedCostsUV[mode];
918 rd_uv.R = VP8GetCostUV(it, &rd_uv); 1023 rd_uv.R = VP8GetCostUV(it, &rd_uv);
919 rd_uv.R += VP8FixedCostsUV[mode]; 1024 if (mode > 0 && IsFlat(rd_uv.uv_levels[0], kNumBlocks, FLATNESS_LIMIT_UV)) {
1025 rd_uv.R += FLATNESS_PENALTY * kNumBlocks;
1026 }
920 1027
921 SetRDScore(lambda, &rd_uv); 1028 SetRDScore(lambda, &rd_uv);
922 if (mode == 0 || rd_uv.score < rd_best.score) { 1029 if (mode == 0 || rd_uv.score < rd_best.score) {
923 CopyScore(&rd_best, &rd_uv); 1030 CopyScore(&rd_best, &rd_uv);
924 rd->mode_uv = mode; 1031 rd->mode_uv = mode;
925 memcpy(rd->uv_levels, rd_uv.uv_levels, sizeof(rd->uv_levels)); 1032 memcpy(rd->uv_levels, rd_uv.uv_levels, sizeof(rd->uv_levels));
926 memcpy(dst0, tmp_dst, UV_SIZE); // TODO: SwapUVOut() ? 1033 memcpy(dst0, tmp_dst, UV_SIZE); // TODO: SwapUVOut() ?
927 } 1034 }
928 } 1035 }
929 VP8SetIntraUVMode(it, rd->mode_uv); 1036 VP8SetIntraUVMode(it, rd->mode_uv);
(...skipping 110 matching lines...) Expand 10 before | Expand all | Expand 10 after
1040 // For method == 2, pick the best intra4/intra16 based on SSE (~tad slower). 1147 // For method == 2, pick the best intra4/intra16 based on SSE (~tad slower).
1041 // For method <= 1, we refine intra4 or intra16 (but don't re-examine mode). 1148 // For method <= 1, we refine intra4 or intra16 (but don't re-examine mode).
1042 DistoRefine(it, (method >= 2)); 1149 DistoRefine(it, (method >= 2));
1043 SimpleQuantize(it, rd); 1150 SimpleQuantize(it, rd);
1044 } 1151 }
1045 is_skipped = (rd->nz == 0); 1152 is_skipped = (rd->nz == 0);
1046 VP8SetSkip(it, is_skipped); 1153 VP8SetSkip(it, is_skipped);
1047 return is_skipped; 1154 return is_skipped;
1048 } 1155 }
1049 1156
1050 #if defined(__cplusplus) || defined(c_plusplus)
1051 } // extern "C"
1052 #endif
OLDNEW
« no previous file with comments | « third_party/libwebp/enc/picture.c ('k') | third_party/libwebp/enc/syntax.c » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698