third_party/libwebp/enc/quant.c - Issue 116213006: Update libwebp to 0.4.0

Side by Side Diff: third_party/libwebp/enc/quant.c

Issue 116213006: Update libwebp to 0.4.0 (Closed) Base URL: svn://svn.chromium.org/chrome/trunk/src

Patch Set: After Blink Roll Created 6 years, 11 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View unified diff | Download patch | Annotate | Revision Log

OLD	NEW
1 // Copyright 2011 Google Inc. All Rights Reserved.	1 // Copyright 2011 Google Inc. All Rights Reserved.

2 //	2 //

3 // Use of this source code is governed by a BSD-style license	3 // Use of this source code is governed by a BSD-style license

4 // that can be found in the COPYING file in the root of the source	4 // that can be found in the COPYING file in the root of the source

5 // tree. An additional intellectual property rights grant can be found	5 // tree. An additional intellectual property rights grant can be found

6 // in the file PATENTS. All contributing project authors may	6 // in the file PATENTS. All contributing project authors may

7 // be found in the AUTHORS file in the root of the source tree.	7 // be found in the AUTHORS file in the root of the source tree.

8 // -----------------------------------------------------------------------------	8 // -----------------------------------------------------------------------------

9 //	9 //

10 // Quantization	10 // Quantization

11 //	11 //

12 // Author: Skal (pascal.massimino@gmail.com)	12 // Author: Skal (pascal.massimino@gmail.com)

13	13

14 #include <assert.h>	14 #include <assert.h>

15 #include <math.h>	15 #include <math.h>

	16 #include <stdlib.h> // for abs()

16	17

17 #include "./vp8enci.h"	18 #include "./vp8enci.h"

18 #include "./cost.h"	19 #include "./cost.h"

19	20

20 #define DO_TRELLIS_I4 1	21 #define DO_TRELLIS_I4 1

21 #define DO_TRELLIS_I16 1 // not a huge gain, but ok at low bitrate.	22 #define DO_TRELLIS_I16 1 // not a huge gain, but ok at low bitrate.

22 #define DO_TRELLIS_UV 0 // disable trellis for UV. Risky. Not worth.	23 #define DO_TRELLIS_UV 0 // disable trellis for UV. Risky. Not worth.

23 #define USE_TDISTO 1	24 #define USE_TDISTO 1

24	25

25 #define MID_ALPHA 64 // neutral value for susceptibility	26 #define MID_ALPHA 64 // neutral value for susceptibility

26 #define MIN_ALPHA 30 // lowest usable value for susceptibility	27 #define MIN_ALPHA 30 // lowest usable value for susceptibility

27 #define MAX_ALPHA 100 // higher meaninful value for susceptibility	28 #define MAX_ALPHA 100 // higher meaningful value for susceptibility

28	29

29 #define SNS_TO_DQ 0.9 // Scaling constant between the sns value and the QP	30 #define SNS_TO_DQ 0.9 // Scaling constant between the sns value and the QP

30 // power-law modulation. Must be strictly less than 1.	31 // power-law modulation. Must be strictly less than 1.

31	32

32 #define I4_PENALTY 4000 // Rate-penalty for quick i4/i16 decision	33 #define I4_PENALTY 4000 // Rate-penalty for quick i4/i16 decision

33	34

	35 // number of non-zero coeffs below which we consider the block very flat

	36 // (and apply a penalty to complex predictions)

	37 #define FLATNESS_LIMIT_I16 10 // I16 mode

	38 #define FLATNESS_LIMIT_I4 3 // I4 mode

	39 #define FLATNESS_LIMIT_UV 2 // UV mode

	40 #define FLATNESS_PENALTY 140 // roughly ~1bit per block

	41

34 #define MULT_8B(a, b) (((a) * (b) + 128) >> 8)	42 #define MULT_8B(a, b) (((a) * (b) + 128) >> 8)

35	43

36 #if defined(__cplusplus) \|\| defined(c_plusplus)	44 // #define DEBUG_BLOCK

37 extern "C" {

38 #endif

39	45

40 //------------------------------------------------------------------------------	46 //------------------------------------------------------------------------------

41	47

	48 #if defined(DEBUG_BLOCK)

	49

	50 #include <stdio.h>

	51 #include <stdlib.h>

	52

	53 static void PrintBlockInfo(const VP8EncIterator* const it,

	54 const VP8ModeScore* const rd) {

	55 int i, j;

	56 const int is_i16 = (it->mb_->type_ == 1);

	57 printf("SOURCE / OUTPUT / ABS DELTA\n");

	58 for (j = 0; j < 24; ++j) {

	59 if (j == 16) printf("\n"); // newline before the U/V block

	60 for (i = 0; i < 16; ++i) printf("%3d ", it->yuv_in_[i + j * BPS]);

	61 printf(" ");

	62 for (i = 0; i < 16; ++i) printf("%3d ", it->yuv_out_[i + j * BPS]);

	63 printf(" ");

	64 for (i = 0; i < 16; ++i) {

	65 printf("%1d ", abs(it->yuv_out_[i + j * BPS] - it->yuv_in_[i + j * BPS]));

	66 }

	67 printf("\n");

	68 }

	69 printf("\nD:%d SD:%d R:%d H:%d nz:0x%x score:%d\n",

	70 (int)rd->D, (int)rd->SD, (int)rd->R, (int)rd->H, (int)rd->nz,

	71 (int)rd->score);

	72 if (is_i16) {

	73 printf("Mode: %d\n", rd->mode_i16);

	74 printf("y_dc_levels:");

	75 for (i = 0; i < 16; ++i) printf("%3d ", rd->y_dc_levels[i]);

	76 printf("\n");

	77 } else {

	78 printf("Modes[16]: ");

	79 for (i = 0; i < 16; ++i) printf("%d ", rd->modes_i4[i]);

	80 printf("\n");

	81 }

	82 printf("y_ac_levels:\n");

	83 for (j = 0; j < 16; ++j) {

	84 for (i = is_i16 ? 1 : 0; i < 16; ++i) {

	85 printf("%4d ", rd->y_ac_levels[j][i]);

	86 }

	87 printf("\n");

	88 }

	89 printf("\n");

	90 printf("uv_levels (mode=%d):\n", rd->mode_uv);

	91 for (j = 0; j < 8; ++j) {

	92 for (i = 0; i < 16; ++i) {

	93 printf("%4d ", rd->uv_levels[j][i]);

	94 }

	95 printf("\n");

	96 }

	97 }

	98

	99 #endif // DEBUG_BLOCK

	100

	101 //------------------------------------------------------------------------------

	102

42 static WEBP_INLINE int clip(int v, int m, int M) {	103 static WEBP_INLINE int clip(int v, int m, int M) {

43 return v < m ? m : v > M ? M : v;	104 return v < m ? m : v > M ? M : v;

44 }	105 }

45	106

46 static const uint8_t kZigzag[16] = {	107 static const uint8_t kZigzag[16] = {

47 0, 1, 4, 8, 5, 2, 3, 6, 9, 12, 13, 10, 7, 11, 14, 15	108 0, 1, 4, 8, 5, 2, 3, 6, 9, 12, 13, 10, 7, 11, 14, 15

48 };	109 };

49	110

50 static const uint8_t kDcTable[128] = {	111 static const uint8_t kDcTable[128] = {

51 4, 5, 6, 7, 8, 9, 10, 10,	112 4, 5, 6, 7, 8, 9, 10, 10,

(...skipping 45 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
97 120, 124, 127, 130, 133, 136, 139, 142,	158 120, 124, 127, 130, 133, 136, 139, 142,

98 145, 148, 151, 155, 158, 161, 164, 167,	159 145, 148, 151, 155, 158, 161, 164, 167,

99 170, 173, 176, 179, 184, 189, 193, 198,	160 170, 173, 176, 179, 184, 189, 193, 198,

100 203, 207, 212, 217, 221, 226, 230, 235,	161 203, 207, 212, 217, 221, 226, 230, 235,

101 240, 244, 249, 254, 258, 263, 268, 274,	162 240, 244, 249, 254, 258, 263, 268, 274,

102 280, 286, 292, 299, 305, 311, 317, 323,	163 280, 286, 292, 299, 305, 311, 317, 323,

103 330, 336, 342, 348, 354, 362, 370, 379,	164 330, 336, 342, 348, 354, 362, 370, 379,

104 385, 393, 401, 409, 416, 424, 432, 440	165 385, 393, 401, 409, 416, 424, 432, 440

105 };	166 };

106	167

107 static const uint16_t kCoeffThresh[16] = {	168 static const uint8_t kBiasMatrices[3][2] = { // [luma-ac,luma-dc,chroma][dc,ac]

108 0, 10, 20, 30,	169 { 96, 110 }, { 96, 108 }, { 110, 115 }

109 10, 20, 30, 30,

110 20, 30, 30, 30,

111 30, 30, 30, 30

112 };	170 };

113	171

114 // TODO(skal): tune more. Coeff thresholding?	172 // Sharpening by (slightly) raising the hi-frequency coeffs.

115 static const uint8_t kBiasMatrices[3][16] = { // [3] = [luma-ac,luma-dc,chroma]

116 { 96, 96, 96, 96,

117 96, 96, 96, 96,

118 96, 96, 96, 96,

119 96, 96, 96, 96 },

120 { 96, 96, 96, 96,

121 96, 96, 96, 96,

122 96, 96, 96, 96,

123 96, 96, 96, 96 },

124 { 96, 96, 96, 96,

125 96, 96, 96, 96,

126 96, 96, 96, 96,

127 96, 96, 96, 96 }

128 };

129

130 // Sharpening by (slightly) raising the hi-frequency coeffs (only for trellis).

131 // Hack-ish but helpful for mid-bitrate range. Use with care.	173 // Hack-ish but helpful for mid-bitrate range. Use with care.

	174 #define SHARPEN_BITS 11 // number of descaling bits for sharpening bias

132 static const uint8_t kFreqSharpening[16] = {	175 static const uint8_t kFreqSharpening[16] = {

133 0, 30, 60, 90,	176 0, 30, 60, 90,

134 30, 60, 90, 90,	177 30, 60, 90, 90,

135 60, 90, 90, 90,	178 60, 90, 90, 90,

136 90, 90, 90, 90	179 90, 90, 90, 90

137 };	180 };

138	181

139 //------------------------------------------------------------------------------	182 //------------------------------------------------------------------------------

140 // Initialize quantization parameters in VP8Matrix	183 // Initialize quantization parameters in VP8Matrix

141	184

142 // Returns the average quantizer	185 // Returns the average quantizer

143 static int ExpandMatrix(VP8Matrix* const m, int type) {	186 static int ExpandMatrix(VP8Matrix* const m, int type) {

144 int i;	187 int i, sum;

145 int sum = 0;	188 for (i = 0; i < 2; ++i) {

	189 const int is_ac_coeff = (i > 0);

	190 const int bias = kBiasMatrices[type][is_ac_coeff];

	191 m->iq_[i] = (1 << QFIX) / m->q_[i];

	192 m->bias_[i] = BIAS(bias);

	193 // zthresh_ is the exact value such that QUANTDIV(coeff, iQ, B) is:

	194 // * zero if coeff <= zthresh

	195 // * non-zero if coeff > zthresh

	196 m->zthresh_[i] = ((1 << QFIX) - 1 - m->bias_[i]) / m->iq_[i];

	197 }

146 for (i = 2; i < 16; ++i) {	198 for (i = 2; i < 16; ++i) {

147 m->q_[i] = m->q_[1];	199 m->q_[i] = m->q_[1];

	200 m->iq_[i] = m->iq_[1];

	201 m->bias_[i] = m->bias_[1];

	202 m->zthresh_[i] = m->zthresh_[1];

148 }	203 }

149 for (i = 0; i < 16; ++i) {	204 for (sum = 0, i = 0; i < 16; ++i) {

150 const int j = kZigzag[i];	205 if (type == 0) { // we only use sharpening for AC luma coeffs

151 const int bias = kBiasMatrices[type][j];	206 m->sharpen_[i] = (kFreqSharpening[i] * m->q_[i]) >> SHARPEN_BITS;

152 m->iq_[j] = (1 << QFIX) / m->q_[j];	207 } else {

153 m->bias_[j] = BIAS(bias);	208 m->sharpen_[i] = 0;

154 // TODO(skal): tune kCoeffThresh[]	209 }

155 m->zthresh_[j] = ((256 /+ kCoeffThresh[j]/ - bias) * m->q_[j] + 127) >> 8;	210 sum += m->q_[i];

156 m->sharpen_[j] = (kFreqSharpening[j] * m->q_[j]) >> 11;

157 sum += m->q_[j];

158 }	211 }

159 return (sum + 8) >> 4;	212 return (sum + 8) >> 4;

160 }	213 }

161	214

162 static void SetupMatrices(VP8Encoder* enc) {	215 static void SetupMatrices(VP8Encoder* enc) {

163 int i;	216 int i;

164 const int tlambda_scale =	217 const int tlambda_scale =

165 (enc->method_ >= 4) ? enc->config_->sns_strength	218 (enc->method_ >= 4) ? enc->config_->sns_strength

166 : 0;	219 : 0;

167 const int num_segments = enc->segment_hdr_.num_segments_;	220 const int num_segments = enc->segment_hdr_.num_segments_;

168 for (i = 0; i < num_segments; ++i) {	221 for (i = 0; i < num_segments; ++i) {

169 VP8SegmentInfo* const m = &enc->dqm_[i];	222 VP8SegmentInfo* const m = &enc->dqm_[i];

170 const int q = m->quant_;	223 const int q = m->quant_;

171 int q4, q16, quv;	224 int q4, q16, quv;

172 m->y1_.q_[0] = kDcTable[clip(q + enc->dq_y1_dc_, 0, 127)];	225 m->y1_.q_[0] = kDcTable[clip(q + enc->dq_y1_dc_, 0, 127)];

173 m->y1_.q_[1] = kAcTable[clip(q, 0, 127)];	226 m->y1_.q_[1] = kAcTable[clip(q, 0, 127)];

174	227

175 m->y2_.q_[0] = kDcTable[ clip(q + enc->dq_y2_dc_, 0, 127)] * 2;	228 m->y2_.q_[0] = kDcTable[ clip(q + enc->dq_y2_dc_, 0, 127)] * 2;

176 m->y2_.q_[1] = kAcTable2[clip(q + enc->dq_y2_ac_, 0, 127)];	229 m->y2_.q_[1] = kAcTable2[clip(q + enc->dq_y2_ac_, 0, 127)];

177	230

178 m->uv_.q_[0] = kDcTable[clip(q + enc->dq_uv_dc_, 0, 117)];	231 m->uv_.q_[0] = kDcTable[clip(q + enc->dq_uv_dc_, 0, 117)];

179 m->uv_.q_[1] = kAcTable[clip(q + enc->dq_uv_ac_, 0, 127)];	232 m->uv_.q_[1] = kAcTable[clip(q + enc->dq_uv_ac_, 0, 127)];

180	233

181 q4 = ExpandMatrix(&m->y1_, 0);	234 q4 = ExpandMatrix(&m->y1_, 0);

182 q16 = ExpandMatrix(&m->y2_, 1);	235 q16 = ExpandMatrix(&m->y2_, 1);

183 quv = ExpandMatrix(&m->uv_, 2);	236 quv = ExpandMatrix(&m->uv_, 2);

184	237

185 // TODO: Switch to kLambda*[] tables?	238 m->lambda_i4_ = (3 * q4 * q4) >> 7;

186 {	239 m->lambda_i16_ = (3 * q16 * q16);

187 m->lambda_i4_ = (3 * q4 * q4) >> 7;	240 m->lambda_uv_ = (3 * quv * quv) >> 6;

188 m->lambda_i16_ = (3 * q16 * q16);	241 m->lambda_mode_ = (1 * q4 * q4) >> 7;

189 m->lambda_uv_ = (3 * quv * quv) >> 6;	242 m->lambda_trellis_i4_ = (7 * q4 * q4) >> 3;

190 m->lambda_mode_ = (1 * q4 * q4) >> 7;	243 m->lambda_trellis_i16_ = (q16 * q16) >> 2;

191 m->lambda_trellis_i4_ = (7 * q4 * q4) >> 3;	244 m->lambda_trellis_uv_ = (quv *quv) << 1;

192 m->lambda_trellis_i16_ = (q16 * q16) >> 2;	245 m->tlambda_ = (tlambda_scale * q4) >> 5;

193 m->lambda_trellis_uv_ = (quv *quv) << 1;	246

194 m->tlambda_ = (tlambda_scale * q4) >> 5;	247 m->min_disto_ = 10 * m->y1_.q_[0]; // quantization-aware min disto

195 }	248 m->max_edge_ = 0;

196 }	249 }

197 }	250 }

198	251

199 //------------------------------------------------------------------------------	252 //------------------------------------------------------------------------------

200 // Initialize filtering parameters	253 // Initialize filtering parameters

201	254

202 // Very small filter-strength values have close to no visual effect. So we can	255 // Very small filter-strength values have close to no visual effect. So we can

203 // save a little decoding-CPU by turning filtering off for these.	256 // save a little decoding-CPU by turning filtering off for these.

204 #define FSTRENGTH_CUTOFF 3	257 #define FSTRENGTH_CUTOFF 2

205	258

206 static void SetupFilterStrength(VP8Encoder* const enc) {	259 static void SetupFilterStrength(VP8Encoder* const enc) {

207 int i;	260 int i;

208 const int level0 = enc->config_->filter_strength;	261 // level0 is in [0..500]. Using '-f 50' as filter_strength is mid-filtering.

	262 const int level0 = 5 * enc->config_->filter_strength;

209 for (i = 0; i < NUM_MB_SEGMENTS; ++i) {	263 for (i = 0; i < NUM_MB_SEGMENTS; ++i) {

210 // Segments with lower quantizer will be less filtered. TODO: tune (wrt SNS)	264 VP8SegmentInfo* const m = &enc->dqm_[i];

211 const int level = level0 * 256 * enc->dqm_[i].quant_ / 128;	265 // We focus on the quantization of AC coeffs.

212 const int f = level / (256 + enc->dqm_[i].beta_);	266 const int qstep = kAcTable[clip(m->quant_, 0, 127)] >> 2;

213 enc->dqm_[i].fstrength_ = (f < FSTRENGTH_CUTOFF) ? 0 : (f > 63) ? 63 : f;	267 const int base_strength =

	268 VP8FilterStrengthFromDelta(enc->filter_hdr_.sharpness_, qstep);

	269 // Segments with lower complexity ('beta') will be less filtered.

	270 const int f = base_strength * level0 / (256 + m->beta_);

	271 m->fstrength_ = (f < FSTRENGTH_CUTOFF) ? 0 : (f > 63) ? 63 : f;

214 }	272 }

215 // We record the initial strength (mainly for the case of 1-segment only).	273 // We record the initial strength (mainly for the case of 1-segment only).

216 enc->filter_hdr_.level_ = enc->dqm_[0].fstrength_;	274 enc->filter_hdr_.level_ = enc->dqm_[0].fstrength_;

217 enc->filter_hdr_.simple_ = (enc->config_->filter_type == 0);	275 enc->filter_hdr_.simple_ = (enc->config_->filter_type == 0);

218 enc->filter_hdr_.sharpness_ = enc->config_->filter_sharpness;	276 enc->filter_hdr_.sharpness_ = enc->config_->filter_sharpness;

219 }	277 }

220	278

221 //------------------------------------------------------------------------------	279 //------------------------------------------------------------------------------

222	280

223 // Note: if you change the values below, remember that the max range	281 // Note: if you change the values below, remember that the max range

224 // allowed by the syntax for DQ_UV is [-16,16].	282 // allowed by the syntax for DQ_UV is [-16,16].

225 #define MAX_DQ_UV (6)	283 #define MAX_DQ_UV (6)

226 #define MIN_DQ_UV (-4)	284 #define MIN_DQ_UV (-4)

227	285

228 // We want to emulate jpeg-like behaviour where the expected "good" quality	286 // We want to emulate jpeg-like behaviour where the expected "good" quality

229 // is around q=75. Internally, our "good" middle is around c=50. So we	287 // is around q=75. Internally, our "good" middle is around c=50. So we

230 // map accordingly using linear piece-wise function	288 // map accordingly using linear piece-wise function

231 static double QualityToCompression(double c) {	289 static double QualityToCompression(double c) {

232 const double linear_c = (c < 0.75) ? c * (2. / 3.) : 2. * c - 1.;	290 const double linear_c = (c < 0.75) ? c * (2. / 3.) : 2. * c - 1.;

233 // The file size roughly scales as pow(quantizer, 3.). Actually, the	291 // The file size roughly scales as pow(quantizer, 3.). Actually, the

234 // exponent is somewhere between 2.8 and 3.2, but we're mostly interested	292 // exponent is somewhere between 2.8 and 3.2, but we're mostly interested

235 // in the mid-quant range. So we scale the compressibility inversely to	293 // in the mid-quant range. So we scale the compressibility inversely to

236 // this power-law: quant ~= compression ^ 1/3. This law holds well for	294 // this power-law: quant ~= compression ^ 1/3. This law holds well for

237 // low quant. Finer modelling for high-quant would make use of kAcTable[]	295 // low quant. Finer modeling for high-quant would make use of kAcTable[]

238 // more explicitly.	296 // more explicitly.

239 const double v = pow(linear_c, 1 / 3.);	297 const double v = pow(linear_c, 1 / 3.);

240 return v;	298 return v;

241 }	299 }

242	300

243 static double QualityToJPEGCompression(double c, double alpha) {	301 static double QualityToJPEGCompression(double c, double alpha) {

244 // We map the complexity 'alpha' and quality setting 'c' to a compression	302 // We map the complexity 'alpha' and quality setting 'c' to a compression

245 // exponent empirically matched to the compression curve of libjpeg6b.	303 // exponent empirically matched to the compression curve of libjpeg6b.

246 // On average, the WebP output size will be roughly similar to that of a	304 // On average, the WebP output size will be roughly similar to that of a

247 // JPEG file compressed with same quality factor.	305 // JPEG file compressed with same quality factor.

(...skipping 112 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
360 // Must be ordered using {DC_PRED, TM_PRED, V_PRED, H_PRED} as index	418 // Must be ordered using {DC_PRED, TM_PRED, V_PRED, H_PRED} as index

361 const int VP8I16ModeOffsets[4] = { I16DC16, I16TM16, I16VE16, I16HE16 };	419 const int VP8I16ModeOffsets[4] = { I16DC16, I16TM16, I16VE16, I16HE16 };

362 const int VP8UVModeOffsets[4] = { C8DC8, C8TM8, C8VE8, C8HE8 };	420 const int VP8UVModeOffsets[4] = { C8DC8, C8TM8, C8VE8, C8HE8 };

363	421

364 // Must be indexed using {B_DC_PRED -> B_HU_PRED} as index	422 // Must be indexed using {B_DC_PRED -> B_HU_PRED} as index

365 const int VP8I4ModeOffsets[NUM_BMODES] = {	423 const int VP8I4ModeOffsets[NUM_BMODES] = {

366 I4DC4, I4TM4, I4VE4, I4HE4, I4RD4, I4VR4, I4LD4, I4VL4, I4HD4, I4HU4	424 I4DC4, I4TM4, I4VE4, I4HE4, I4RD4, I4VR4, I4LD4, I4VL4, I4HD4, I4HU4

367 };	425 };

368	426

369 void VP8MakeLuma16Preds(const VP8EncIterator* const it) {	427 void VP8MakeLuma16Preds(const VP8EncIterator* const it) {

370 const VP8Encoder* const enc = it->enc_;	428 const uint8_t* const left = it->x_ ? it->y_left_ : NULL;

371 const uint8_t* const left = it->x_ ? enc->y_left_ : NULL;	429 const uint8_t* const top = it->y_ ? it->y_top_ : NULL;

372 const uint8_t* const top = it->y_ ? enc->y_top_ + it->x_ * 16 : NULL;

373 VP8EncPredLuma16(it->yuv_p_, left, top);	430 VP8EncPredLuma16(it->yuv_p_, left, top);

374 }	431 }

375	432

376 void VP8MakeChroma8Preds(const VP8EncIterator* const it) {	433 void VP8MakeChroma8Preds(const VP8EncIterator* const it) {

377 const VP8Encoder* const enc = it->enc_;	434 const uint8_t* const left = it->x_ ? it->u_left_ : NULL;

378 const uint8_t* const left = it->x_ ? enc->u_left_ : NULL;	435 const uint8_t* const top = it->y_ ? it->uv_top_ : NULL;

379 const uint8_t* const top = it->y_ ? enc->uv_top_ + it->x_ * 16 : NULL;

380 VP8EncPredChroma8(it->yuv_p_, left, top);	436 VP8EncPredChroma8(it->yuv_p_, left, top);

381 }	437 }

382	438

383 void VP8MakeIntra4Preds(const VP8EncIterator* const it) {	439 void VP8MakeIntra4Preds(const VP8EncIterator* const it) {

384 VP8EncPredLuma4(it->yuv_p_, it->i4_top_);	440 VP8EncPredLuma4(it->yuv_p_, it->i4_top_);

385 }	441 }

386	442

387 //------------------------------------------------------------------------------	443 //------------------------------------------------------------------------------

388 // Quantize	444 // Quantize

389	445

(...skipping 35 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
425 19, 17, 12, 8,	481 19, 17, 12, 8,

426 11, 10, 8, 6	482 11, 10, 8, 6

427 #endif	483 #endif

428 };	484 };

429	485

430 // Init/Copy the common fields in score.	486 // Init/Copy the common fields in score.

431 static void InitScore(VP8ModeScore* const rd) {	487 static void InitScore(VP8ModeScore* const rd) {

432 rd->D = 0;	488 rd->D = 0;

433 rd->SD = 0;	489 rd->SD = 0;

434 rd->R = 0;	490 rd->R = 0;

	491 rd->H = 0;

435 rd->nz = 0;	492 rd->nz = 0;

436 rd->score = MAX_COST;	493 rd->score = MAX_COST;

437 }	494 }

438	495

439 static void CopyScore(VP8ModeScore* const dst, const VP8ModeScore* const src) {	496 static void CopyScore(VP8ModeScore* const dst, const VP8ModeScore* const src) {

440 dst->D = src->D;	497 dst->D = src->D;

441 dst->SD = src->SD;	498 dst->SD = src->SD;

442 dst->R = src->R;	499 dst->R = src->R;

	500 dst->H = src->H;

443 dst->nz = src->nz; // note that nz is not accumulated, but just copied.	501 dst->nz = src->nz; // note that nz is not accumulated, but just copied.

444 dst->score = src->score;	502 dst->score = src->score;

445 }	503 }

446	504

447 static void AddScore(VP8ModeScore* const dst, const VP8ModeScore* const src) {	505 static void AddScore(VP8ModeScore* const dst, const VP8ModeScore* const src) {

448 dst->D += src->D;	506 dst->D += src->D;

449 dst->SD += src->SD;	507 dst->SD += src->SD;

450 dst->R += src->R;	508 dst->R += src->R;

	509 dst->H += src->H;

451 dst->nz \|= src->nz; // here, new nz bits are accumulated.	510 dst->nz \|= src->nz; // here, new nz bits are accumulated.

452 dst->score += src->score;	511 dst->score += src->score;

453 }	512 }

454	513

455 //------------------------------------------------------------------------------	514 //------------------------------------------------------------------------------

456 // Performs trellis-optimized quantization.	515 // Performs trellis-optimized quantization.

457	516

458 // Trellis	517 // Trellis

459	518

460 typedef struct {	519 typedef struct {

461 int prev; // best previous	520 int prev; // best previous

462 int level; // level	521 int level; // level

463 int sign; // sign of coeff_i	522 int sign; // sign of coeff_i

464 score_t cost; // bit cost	523 score_t cost; // bit cost

465 score_t error; // distortion = sum of (\|coeff_i\| - level_i * Q_i)^2	524 score_t error; // distortion = sum of (\|coeff_i\| - level_i * Q_i)^2

466 int ctx; // context (only depends on 'level'. Could be spared.)	525 int ctx; // context (only depends on 'level'. Could be spared.)

467 } Node;	526 } Node;

468	527

469 // If a coefficient was quantized to a value Q (using a neutral bias),	528 // If a coefficient was quantized to a value Q (using a neutral bias),

470 // we test all alternate possibilities between [Q-MIN_DELTA, Q+MAX_DELTA]	529 // we test all alternate possibilities between [Q-MIN_DELTA, Q+MAX_DELTA]

471 // We don't test negative values though.	530 // We don't test negative values though.

472 #define MIN_DELTA 0 // how much lower level to try	531 #define MIN_DELTA 0 // how much lower level to try

473 #define MAX_DELTA 1 // how much higher	532 #define MAX_DELTA 1 // how much higher

474 #define NUM_NODES (MIN_DELTA + 1 + MAX_DELTA)	533 #define NUM_NODES (MIN_DELTA + 1 + MAX_DELTA)

475 #define NODE(n, l) (nodes[(n) + 1][(l) + MIN_DELTA])	534 #define NODE(n, l) (nodes[(n) + 1][(l) + MIN_DELTA])

476	535

477 static WEBP_INLINE void SetRDScore(int lambda, VP8ModeScore* const rd) {	536 static WEBP_INLINE void SetRDScore(int lambda, VP8ModeScore* const rd) {

478 // TODO: incorporate the "* 256" in the tables?	537 // TODO: incorporate the "* 256" in the tables?

479 rd->score = rd->R * lambda + 256 * (rd->D + rd->SD);	538 rd->score = (rd->R + rd->H) * lambda + 256 * (rd->D + rd->SD);

480 }	539 }

481	540

482 static WEBP_INLINE score_t RDScoreTrellis(int lambda, score_t rate,	541 static WEBP_INLINE score_t RDScoreTrellis(int lambda, score_t rate,

483 score_t distortion) {	542 score_t distortion) {

484 return rate * lambda + 256 * distortion;	543 return rate * lambda + 256 * distortion;

485 }	544 }

486	545

487 static int TrellisQuantizeBlock(const VP8EncIterator* const it,	546 static int TrellisQuantizeBlock(const VP8EncIterator* const it,

488 int16_t in[16], int16_t out[16],	547 int16_t in[16], int16_t out[16],

489 int ctx0, int coeff_type,	548 int ctx0, int coeff_type,

(...skipping 42 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
532	591

533 // traverse trellis.	592 // traverse trellis.

534 for (n = first; n <= last; ++n) {	593 for (n = first; n <= last; ++n) {

535 const int j = kZigzag[n];	594 const int j = kZigzag[n];

536 const int Q = mtx->q_[j];	595 const int Q = mtx->q_[j];

537 const int iQ = mtx->iq_[j];	596 const int iQ = mtx->iq_[j];

538 const int B = BIAS(0x00); // neutral bias	597 const int B = BIAS(0x00); // neutral bias

539 // note: it's important to take sign of the _original_ coeff,	598 // note: it's important to take sign of the _original_ coeff,

540 // so we don't have to consider level < 0 afterward.	599 // so we don't have to consider level < 0 afterward.

541 const int sign = (in[j] < 0);	600 const int sign = (in[j] < 0);

542 int coeff0 = (sign ? -in[j] : in[j]) + mtx->sharpen_[j];	601 const int coeff0 = (sign ? -in[j] : in[j]) + mtx->sharpen_[j];

543 int level0;	602 int level0 = QUANTDIV(coeff0, iQ, B);

544 if (coeff0 > 2047) coeff0 = 2047;	603 if (level0 > MAX_LEVEL) level0 = MAX_LEVEL;

545	604

546 level0 = QUANTDIV(coeff0, iQ, B);

547 // test all alternate level values around level0.	605 // test all alternate level values around level0.

548 for (m = -MIN_DELTA; m <= MAX_DELTA; ++m) {	606 for (m = -MIN_DELTA; m <= MAX_DELTA; ++m) {

549 Node* const cur = &NODE(n, m);	607 Node* const cur = &NODE(n, m);

550 int delta_error, new_error;	608 int delta_error, new_error;

551 score_t cur_score = MAX_COST;	609 score_t cur_score = MAX_COST;

552 int level = level0 + m;	610 int level = level0 + m;

553 int last_proba;	611 int last_proba;

554	612

555 cur->sign = sign;	613 cur->sign = sign;

556 cur->level = level;	614 cur->level = level;

557 cur->ctx = (level == 0) ? 0 : (level == 1) ? 1 : 2;	615 cur->ctx = (level == 0) ? 0 : (level == 1) ? 1 : 2;

558 if (level >= 2048 \|\| level < 0) { // node is dead?	616 if (level > MAX_LEVEL \|\| level < 0) { // node is dead?

559 cur->cost = MAX_COST;	617 cur->cost = MAX_COST;

560 continue;	618 continue;

561 }	619 }

562 last_proba = last_costs[VP8EncBands[n + 1]][cur->ctx][0];	620 last_proba = last_costs[VP8EncBands[n + 1]][cur->ctx][0];

563	621

564 // Compute delta_error = how much coding this level will	622 // Compute delta_error = how much coding this level will

565 // subtract as distortion to max_error	623 // subtract as distortion to max_error

566 new_error = coeff0 - level * Q;	624 new_error = coeff0 - level * Q;

567 delta_error =	625 delta_error =

568 kWeightTrellis[j] * (coeff0 * coeff0 - new_error * new_error);	626 kWeightTrellis[j] * (coeff0 * coeff0 - new_error * new_error);

(...skipping 72 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
641	699

642 //------------------------------------------------------------------------------	700 //------------------------------------------------------------------------------

643 // Performs: difference, transform, quantize, back-transform, add	701 // Performs: difference, transform, quantize, back-transform, add

644 // all at once. Output is the reconstructed block in *yuv_out, and the	702 // all at once. Output is the reconstructed block in *yuv_out, and the

645 // quantized levels in *levels.	703 // quantized levels in *levels.

646	704

647 static int ReconstructIntra16(VP8EncIterator* const it,	705 static int ReconstructIntra16(VP8EncIterator* const it,

648 VP8ModeScore* const rd,	706 VP8ModeScore* const rd,

649 uint8_t* const yuv_out,	707 uint8_t* const yuv_out,

650 int mode) {	708 int mode) {

651 const VP8Encoder* const enc = it->enc_;	709 VP8Encoder* const enc = it->enc_;

652 const uint8_t* const ref = it->yuv_p_ + VP8I16ModeOffsets[mode];	710 const uint8_t* const ref = it->yuv_p_ + VP8I16ModeOffsets[mode];

653 const uint8_t* const src = it->yuv_in_ + Y_OFF;	711 const uint8_t* const src = it->yuv_in_ + Y_OFF;

654 const VP8SegmentInfo* const dqm = &enc->dqm_[it->mb_->segment_];	712 VP8SegmentInfo* const dqm = &enc->dqm_[it->mb_->segment_];

655 int nz = 0;	713 int nz = 0;

656 int n;	714 int n;

657 int16_t tmp[16][16], dc_tmp[16];	715 int16_t tmp[16][16], dc_tmp[16];

658	716

659 for (n = 0; n < 16; ++n) {	717 for (n = 0; n < 16; ++n) {

660 VP8FTransform(src + VP8Scan[n], ref + VP8Scan[n], tmp[n]);	718 VP8FTransform(src + VP8Scan[n], ref + VP8Scan[n], tmp[n]);

661 }	719 }

662 VP8FTransformWHT(tmp[0], dc_tmp);	720 VP8FTransformWHT(tmp[0], dc_tmp);

663 nz \|= VP8EncQuantizeBlock(dc_tmp, rd->y_dc_levels, 0, &dqm->y2_) << 24;	721 nz \|= VP8EncQuantizeBlockWHT(dc_tmp, rd->y_dc_levels, &dqm->y2_) << 24;

664	722

665 if (DO_TRELLIS_I16 && it->do_trellis_) {	723 if (DO_TRELLIS_I16 && it->do_trellis_) {

666 int x, y;	724 int x, y;

667 VP8IteratorNzToBytes(it);	725 VP8IteratorNzToBytes(it);

668 for (y = 0, n = 0; y < 4; ++y) {	726 for (y = 0, n = 0; y < 4; ++y) {

669 for (x = 0; x < 4; ++x, ++n) {	727 for (x = 0; x < 4; ++x, ++n) {

670 const int ctx = it->top_nz_[x] + it->left_nz_[y];	728 const int ctx = it->top_nz_[x] + it->left_nz_[y];

671 const int non_zero =	729 const int non_zero =

672 TrellisQuantizeBlock(it, tmp[n], rd->y_ac_levels[n], ctx, 0,	730 TrellisQuantizeBlock(it, tmp[n], rd->y_ac_levels[n], ctx, 0,

673 &dqm->y1_, dqm->lambda_trellis_i16_);	731 &dqm->y1_, dqm->lambda_trellis_i16_);

(...skipping 74 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
748 }	806 }

749	807

750 for (n = 0; n < 8; n += 2) {	808 for (n = 0; n < 8; n += 2) {

751 VP8ITransform(ref + VP8Scan[16 + n], tmp[n], yuv_out + VP8Scan[16 + n], 1);	809 VP8ITransform(ref + VP8Scan[16 + n], tmp[n], yuv_out + VP8Scan[16 + n], 1);

752 }	810 }

753 return (nz << 16);	811 return (nz << 16);

754 }	812 }

755	813

756 //------------------------------------------------------------------------------	814 //------------------------------------------------------------------------------

757 // RD-opt decision. Reconstruct each modes, evalue distortion and bit-cost.	815 // RD-opt decision. Reconstruct each modes, evalue distortion and bit-cost.

758 // Pick the mode is lower RD-cost = Rate + lamba * Distortion.	816 // Pick the mode is lower RD-cost = Rate + lambda * Distortion.

	817

	818 static void StoreMaxDelta(VP8SegmentInfo* const dqm, const int16_t DCs[16]) {

	819 // We look at the first three AC coefficients to determine what is the average

	820 // delta between each sub-4x4 block.

	821 const int v0 = abs(DCs[1]);

	822 const int v1 = abs(DCs[4]);

	823 const int v2 = abs(DCs[5]);

	824 int max_v = (v0 > v1) ? v1 : v0;

	825 max_v = (v2 > max_v) ? v2 : max_v;

	826 if (max_v > dqm->max_edge_) dqm->max_edge_ = max_v;

	827 }

759	828

760 static void SwapPtr(uint8_t a, uint8_t b) {	829 static void SwapPtr(uint8_t a, uint8_t b) {

761 uint8_t* const tmp = *a;	830 uint8_t* const tmp = *a;

762 a = b;	831 a = b;

763 *b = tmp;	832 *b = tmp;

764 }	833 }

765	834

766 static void SwapOut(VP8EncIterator* const it) {	835 static void SwapOut(VP8EncIterator* const it) {

767 SwapPtr(&it->yuv_out_, &it->yuv_out2_);	836 SwapPtr(&it->yuv_out_, &it->yuv_out2_);

768 }	837 }

769	838

	839 static score_t IsFlat(const int16_t* levels, int num_blocks, score_t thresh) {

	840 score_t score = 0;

	841 while (num_blocks-- > 0) { // TODO(skal): refine positional scoring?

	842 int i;

	843 for (i = 1; i < 16; ++i) { // omit DC, we're only interested in AC

	844 score += (levels[i] != 0);

	845 if (score > thresh) return 0;

	846 }

	847 levels += 16;

	848 }

	849 return 1;

	850 }

	851

770 static void PickBestIntra16(VP8EncIterator* const it, VP8ModeScore* const rd) {	852 static void PickBestIntra16(VP8EncIterator* const it, VP8ModeScore* const rd) {

771 const VP8Encoder* const enc = it->enc_;	853 const int kNumBlocks = 16;

772 const VP8SegmentInfo* const dqm = &enc->dqm_[it->mb_->segment_];	854 VP8Encoder* const enc = it->enc_;

	855 VP8SegmentInfo* const dqm = &enc->dqm_[it->mb_->segment_];

773 const int lambda = dqm->lambda_i16_;	856 const int lambda = dqm->lambda_i16_;

774 const int tlambda = dqm->tlambda_;	857 const int tlambda = dqm->tlambda_;

775 const uint8_t* const src = it->yuv_in_ + Y_OFF;	858 const uint8_t* const src = it->yuv_in_ + Y_OFF;

776 VP8ModeScore rd16;	859 VP8ModeScore rd16;

777 int mode;	860 int mode;

778	861

779 rd->mode_i16 = -1;	862 rd->mode_i16 = -1;

780 for (mode = 0; mode < NUM_PRED_MODES; ++mode) {	863 for (mode = 0; mode < NUM_PRED_MODES; ++mode) {

781 uint8_t* const tmp_dst = it->yuv_out2_ + Y_OFF; // scratch buffer	864 uint8_t* const tmp_dst = it->yuv_out2_ + Y_OFF; // scratch buffer

782 int nz;	865 int nz;

783	866

784 // Reconstruct	867 // Reconstruct

785 nz = ReconstructIntra16(it, &rd16, tmp_dst, mode);	868 nz = ReconstructIntra16(it, &rd16, tmp_dst, mode);

786	869

787 // Measure RD-score	870 // Measure RD-score

788 rd16.D = VP8SSE16x16(src, tmp_dst);	871 rd16.D = VP8SSE16x16(src, tmp_dst);

789 rd16.SD = tlambda ? MULT_8B(tlambda, VP8TDisto16x16(src, tmp_dst, kWeightY))	872 rd16.SD = tlambda ? MULT_8B(tlambda, VP8TDisto16x16(src, tmp_dst, kWeightY))

790 : 0;	873 : 0;

	874 rd16.H = VP8FixedCostsI16[mode];

791 rd16.R = VP8GetCostLuma16(it, &rd16);	875 rd16.R = VP8GetCostLuma16(it, &rd16);

792 rd16.R += VP8FixedCostsI16[mode];	876 if (mode > 0 &&

	877 IsFlat(rd16.y_ac_levels[0], kNumBlocks, FLATNESS_LIMIT_I16)) {

	878 // penalty to avoid flat area to be mispredicted by complex mode

	879 rd16.R += FLATNESS_PENALTY * kNumBlocks;

	880 }

793	881

794 // Since we always examine Intra16 first, we can overwrite *rd directly.	882 // Since we always examine Intra16 first, we can overwrite *rd directly.

795 SetRDScore(lambda, &rd16);	883 SetRDScore(lambda, &rd16);

796 if (mode == 0 \|\| rd16.score < rd->score) {	884 if (mode == 0 \|\| rd16.score < rd->score) {

797 CopyScore(rd, &rd16);	885 CopyScore(rd, &rd16);

798 rd->mode_i16 = mode;	886 rd->mode_i16 = mode;

799 rd->nz = nz;	887 rd->nz = nz;

800 memcpy(rd->y_ac_levels, rd16.y_ac_levels, sizeof(rd16.y_ac_levels));	888 memcpy(rd->y_ac_levels, rd16.y_ac_levels, sizeof(rd16.y_ac_levels));

801 memcpy(rd->y_dc_levels, rd16.y_dc_levels, sizeof(rd16.y_dc_levels));	889 memcpy(rd->y_dc_levels, rd16.y_dc_levels, sizeof(rd16.y_dc_levels));

802 SwapOut(it);	890 SwapOut(it);

803 }	891 }

804 }	892 }

805 SetRDScore(dqm->lambda_mode_, rd); // finalize score for mode decision.	893 SetRDScore(dqm->lambda_mode_, rd); // finalize score for mode decision.

806 VP8SetIntra16Mode(it, rd->mode_i16);	894 VP8SetIntra16Mode(it, rd->mode_i16);

	895

	896 // we have a blocky macroblock (only DCs are non-zero) with fairly high

	897 // distortion, record max delta so we can later adjust the minimal filtering

	898 // strength needed to smooth these blocks out.

	899 if ((rd->nz & 0xffff) == 0 && rd->D > dqm->min_disto_) {

	900 StoreMaxDelta(dqm, rd->y_dc_levels);

	901 }

807 }	902 }

808	903

809 //------------------------------------------------------------------------------	904 //------------------------------------------------------------------------------

810	905

811 // return the cost array corresponding to the surrounding prediction modes.	906 // return the cost array corresponding to the surrounding prediction modes.

812 static const uint16_t* GetCostModeI4(VP8EncIterator* const it,	907 static const uint16_t* GetCostModeI4(VP8EncIterator* const it,

813 const uint8_t modes[16]) {	908 const uint8_t modes[16]) {

814 const int preds_w = it->enc_->preds_w_;	909 const int preds_w = it->enc_->preds_w_;

815 const int x = (it->i4_ & 3), y = it->i4_ >> 2;	910 const int x = (it->i4_ & 3), y = it->i4_ >> 2;

816 const int left = (x == 0) ? it->preds_[y * preds_w - 1] : modes[it->i4_ - 1];	911 const int left = (x == 0) ? it->preds_[y * preds_w - 1] : modes[it->i4_ - 1];

817 const int top = (y == 0) ? it->preds_[-preds_w + x] : modes[it->i4_ - 4];	912 const int top = (y == 0) ? it->preds_[-preds_w + x] : modes[it->i4_ - 4];

818 return VP8FixedCostsI4[top][left];	913 return VP8FixedCostsI4[top][left];

819 }	914 }

820	915

821 static int PickBestIntra4(VP8EncIterator* const it, VP8ModeScore* const rd) {	916 static int PickBestIntra4(VP8EncIterator* const it, VP8ModeScore* const rd) {

822 const VP8Encoder* const enc = it->enc_;	917 const VP8Encoder* const enc = it->enc_;

823 const VP8SegmentInfo* const dqm = &enc->dqm_[it->mb_->segment_];	918 const VP8SegmentInfo* const dqm = &enc->dqm_[it->mb_->segment_];

824 const int lambda = dqm->lambda_i4_;	919 const int lambda = dqm->lambda_i4_;

825 const int tlambda = dqm->tlambda_;	920 const int tlambda = dqm->tlambda_;

826 const uint8_t* const src0 = it->yuv_in_ + Y_OFF;	921 const uint8_t* const src0 = it->yuv_in_ + Y_OFF;

827 uint8_t* const best_blocks = it->yuv_out2_ + Y_OFF;	922 uint8_t* const best_blocks = it->yuv_out2_ + Y_OFF;

828 int total_header_bits = 0;	923 int total_header_bits = 0;

829 VP8ModeScore rd_best;	924 VP8ModeScore rd_best;

830	925

831 if (enc->max_i4_header_bits_ == 0) {	926 if (enc->max_i4_header_bits_ == 0) {

832 return 0;	927 return 0;

833 }	928 }

834	929

835 InitScore(&rd_best);	930 InitScore(&rd_best);

836 rd_best.score = 211; // '211' is the value of VP8BitCost(0, 145)	931 rd_best.H = 211; // '211' is the value of VP8BitCost(0, 145)

	932 SetRDScore(dqm->lambda_mode_, &rd_best);

837 VP8IteratorStartI4(it);	933 VP8IteratorStartI4(it);

838 do {	934 do {

	935 const int kNumBlocks = 1;

839 VP8ModeScore rd_i4;	936 VP8ModeScore rd_i4;

840 int mode;	937 int mode;

841 int best_mode = -1;	938 int best_mode = -1;

842 const uint8_t* const src = src0 + VP8Scan[it->i4_];	939 const uint8_t* const src = src0 + VP8Scan[it->i4_];

843 const uint16_t* const mode_costs = GetCostModeI4(it, rd->modes_i4);	940 const uint16_t* const mode_costs = GetCostModeI4(it, rd->modes_i4);

844 uint8_t* best_block = best_blocks + VP8Scan[it->i4_];	941 uint8_t* best_block = best_blocks + VP8Scan[it->i4_];

845 uint8_t* tmp_dst = it->yuv_p_ + I4TMP; // scratch buffer.	942 uint8_t* tmp_dst = it->yuv_p_ + I4TMP; // scratch buffer.

846	943

847 InitScore(&rd_i4);	944 InitScore(&rd_i4);

848 VP8MakeIntra4Preds(it);	945 VP8MakeIntra4Preds(it);

849 for (mode = 0; mode < NUM_BMODES; ++mode) {	946 for (mode = 0; mode < NUM_BMODES; ++mode) {

850 VP8ModeScore rd_tmp;	947 VP8ModeScore rd_tmp;

851 int16_t tmp_levels[16];	948 int16_t tmp_levels[16];

852	949

853 // Reconstruct	950 // Reconstruct

854 rd_tmp.nz =	951 rd_tmp.nz =

855 ReconstructIntra4(it, tmp_levels, src, tmp_dst, mode) << it->i4_;	952 ReconstructIntra4(it, tmp_levels, src, tmp_dst, mode) << it->i4_;

856	953

857 // Compute RD-score	954 // Compute RD-score

858 rd_tmp.D = VP8SSE4x4(src, tmp_dst);	955 rd_tmp.D = VP8SSE4x4(src, tmp_dst);

859 rd_tmp.SD =	956 rd_tmp.SD =

860 tlambda ? MULT_8B(tlambda, VP8TDisto4x4(src, tmp_dst, kWeightY))	957 tlambda ? MULT_8B(tlambda, VP8TDisto4x4(src, tmp_dst, kWeightY))

861 : 0;	958 : 0;

	959 rd_tmp.H = mode_costs[mode];

862 rd_tmp.R = VP8GetCostLuma4(it, tmp_levels);	960 rd_tmp.R = VP8GetCostLuma4(it, tmp_levels);

863 rd_tmp.R += mode_costs[mode];	961 if (mode > 0 && IsFlat(tmp_levels, kNumBlocks, FLATNESS_LIMIT_I4)) {

	962 rd_tmp.R += FLATNESS_PENALTY * kNumBlocks;

	963 }

864	964

865 SetRDScore(lambda, &rd_tmp);	965 SetRDScore(lambda, &rd_tmp);

866 if (best_mode < 0 \|\| rd_tmp.score < rd_i4.score) {	966 if (best_mode < 0 \|\| rd_tmp.score < rd_i4.score) {

867 CopyScore(&rd_i4, &rd_tmp);	967 CopyScore(&rd_i4, &rd_tmp);

868 best_mode = mode;	968 best_mode = mode;

869 SwapPtr(&tmp_dst, &best_block);	969 SwapPtr(&tmp_dst, &best_block);

870 memcpy(rd_best.y_ac_levels[it->i4_], tmp_levels, sizeof(tmp_levels));	970 memcpy(rd_best.y_ac_levels[it->i4_], tmp_levels, sizeof(tmp_levels));

871 }	971 }

872 }	972 }

873 SetRDScore(dqm->lambda_mode_, &rd_i4);	973 SetRDScore(dqm->lambda_mode_, &rd_i4);

874 AddScore(&rd_best, &rd_i4);	974 AddScore(&rd_best, &rd_i4);

875 total_header_bits += mode_costs[best_mode];	975 if (rd_best.score >= rd->score) {

876 if (rd_best.score >= rd->score \|\|	976 return 0;

877 total_header_bits > enc->max_i4_header_bits_) {	977 }

	978 total_header_bits += (int)rd_i4.H; // <- equal to mode_costs[best_mode];

	979 if (total_header_bits > enc->max_i4_header_bits_) {

878 return 0;	980 return 0;

879 }	981 }

880 // Copy selected samples if not in the right place already.	982 // Copy selected samples if not in the right place already.

881 if (best_block != best_blocks + VP8Scan[it->i4_])	983 if (best_block != best_blocks + VP8Scan[it->i4_]) {

882 VP8Copy4x4(best_block, best_blocks + VP8Scan[it->i4_]);	984 VP8Copy4x4(best_block, best_blocks + VP8Scan[it->i4_]);

	985 }

883 rd->modes_i4[it->i4_] = best_mode;	986 rd->modes_i4[it->i4_] = best_mode;

884 it->top_nz_[it->i4_ & 3] = it->left_nz_[it->i4_ >> 2] = (rd_i4.nz ? 1 : 0);	987 it->top_nz_[it->i4_ & 3] = it->left_nz_[it->i4_ >> 2] = (rd_i4.nz ? 1 : 0);

885 } while (VP8IteratorRotateI4(it, best_blocks));	988 } while (VP8IteratorRotateI4(it, best_blocks));

886	989

887 // finalize state	990 // finalize state

888 CopyScore(rd, &rd_best);	991 CopyScore(rd, &rd_best);

889 VP8SetIntra4Mode(it, rd->modes_i4);	992 VP8SetIntra4Mode(it, rd->modes_i4);

890 SwapOut(it);	993 SwapOut(it);

891 memcpy(rd->y_ac_levels, rd_best.y_ac_levels, sizeof(rd->y_ac_levels));	994 memcpy(rd->y_ac_levels, rd_best.y_ac_levels, sizeof(rd->y_ac_levels));

892 return 1; // select intra4x4 over intra16x16	995 return 1; // select intra4x4 over intra16x16

893 }	996 }

894	997

895 //------------------------------------------------------------------------------	998 //------------------------------------------------------------------------------

896	999

897 static void PickBestUV(VP8EncIterator* const it, VP8ModeScore* const rd) {	1000 static void PickBestUV(VP8EncIterator* const it, VP8ModeScore* const rd) {

	1001 const int kNumBlocks = 8;

898 const VP8Encoder* const enc = it->enc_;	1002 const VP8Encoder* const enc = it->enc_;

899 const VP8SegmentInfo* const dqm = &enc->dqm_[it->mb_->segment_];	1003 const VP8SegmentInfo* const dqm = &enc->dqm_[it->mb_->segment_];

900 const int lambda = dqm->lambda_uv_;	1004 const int lambda = dqm->lambda_uv_;

901 const uint8_t* const src = it->yuv_in_ + U_OFF;	1005 const uint8_t* const src = it->yuv_in_ + U_OFF;

902 uint8_t* const tmp_dst = it->yuv_out2_ + U_OFF; // scratch buffer	1006 uint8_t* const tmp_dst = it->yuv_out2_ + U_OFF; // scratch buffer

903 uint8_t* const dst0 = it->yuv_out_ + U_OFF;	1007 uint8_t* const dst0 = it->yuv_out_ + U_OFF;

904 VP8ModeScore rd_best;	1008 VP8ModeScore rd_best;

905 int mode;	1009 int mode;

906	1010

907 rd->mode_uv = -1;	1011 rd->mode_uv = -1;

908 InitScore(&rd_best);	1012 InitScore(&rd_best);

909 for (mode = 0; mode < NUM_PRED_MODES; ++mode) {	1013 for (mode = 0; mode < NUM_PRED_MODES; ++mode) {

910 VP8ModeScore rd_uv;	1014 VP8ModeScore rd_uv;

911	1015

912 // Reconstruct	1016 // Reconstruct

913 rd_uv.nz = ReconstructUV(it, &rd_uv, tmp_dst, mode);	1017 rd_uv.nz = ReconstructUV(it, &rd_uv, tmp_dst, mode);

914	1018

915 // Compute RD-score	1019 // Compute RD-score

916 rd_uv.D = VP8SSE16x8(src, tmp_dst);	1020 rd_uv.D = VP8SSE16x8(src, tmp_dst);

917 rd_uv.SD = 0; // TODO: should we call TDisto? it tends to flatten areas.	1021 rd_uv.SD = 0; // TODO: should we call TDisto? it tends to flatten areas.

	1022 rd_uv.H = VP8FixedCostsUV[mode];

918 rd_uv.R = VP8GetCostUV(it, &rd_uv);	1023 rd_uv.R = VP8GetCostUV(it, &rd_uv);

919 rd_uv.R += VP8FixedCostsUV[mode];	1024 if (mode > 0 && IsFlat(rd_uv.uv_levels[0], kNumBlocks, FLATNESS_LIMIT_UV)) {

	1025 rd_uv.R += FLATNESS_PENALTY * kNumBlocks;

	1026 }

920	1027

921 SetRDScore(lambda, &rd_uv);	1028 SetRDScore(lambda, &rd_uv);

922 if (mode == 0 \|\| rd_uv.score < rd_best.score) {	1029 if (mode == 0 \|\| rd_uv.score < rd_best.score) {

923 CopyScore(&rd_best, &rd_uv);	1030 CopyScore(&rd_best, &rd_uv);

924 rd->mode_uv = mode;	1031 rd->mode_uv = mode;

925 memcpy(rd->uv_levels, rd_uv.uv_levels, sizeof(rd->uv_levels));	1032 memcpy(rd->uv_levels, rd_uv.uv_levels, sizeof(rd->uv_levels));

926 memcpy(dst0, tmp_dst, UV_SIZE); // TODO: SwapUVOut() ?	1033 memcpy(dst0, tmp_dst, UV_SIZE); // TODO: SwapUVOut() ?

927 }	1034 }

928 }	1035 }

929 VP8SetIntraUVMode(it, rd->mode_uv);	1036 VP8SetIntraUVMode(it, rd->mode_uv);

(...skipping 110 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
1040 // For method == 2, pick the best intra4/intra16 based on SSE (~tad slower).	1147 // For method == 2, pick the best intra4/intra16 based on SSE (~tad slower).

1041 // For method <= 1, we refine intra4 or intra16 (but don't re-examine mode).	1148 // For method <= 1, we refine intra4 or intra16 (but don't re-examine mode).

1042 DistoRefine(it, (method >= 2));	1149 DistoRefine(it, (method >= 2));

1043 SimpleQuantize(it, rd);	1150 SimpleQuantize(it, rd);

1044 }	1151 }

1045 is_skipped = (rd->nz == 0);	1152 is_skipped = (rd->nz == 0);

1046 VP8SetSkip(it, is_skipped);	1153 VP8SetSkip(it, is_skipped);

1047 return is_skipped;	1154 return is_skipped;

1048 }	1155 }

1049	1156

1050 #if defined(__cplusplus) \|\| defined(c_plusplus)

1051 } // extern "C"

1052 #endif

OLD	NEW

« no previous file with comments | « third_party/libwebp/enc/picture.c ('k') | third_party/libwebp/enc/syntax.c » ('j') | no next file with comments »