third_party/libwebp/enc/quant.c - Issue 2651883004: libwebp-0.6.0-rc1

Side by Side Diff: third_party/libwebp/enc/quant.c

Issue 2651883004: libwebp-0.6.0-rc1 (Closed)

Patch Set: Created 3 years, 11 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

OLD	NEW
	(Empty)
1 // Copyright 2011 Google Inc. All Rights Reserved.

2 //

3 // Use of this source code is governed by a BSD-style license

4 // that can be found in the COPYING file in the root of the source

5 // tree. An additional intellectual property rights grant can be found

6 // in the file PATENTS. All contributing project authors may

7 // be found in the AUTHORS file in the root of the source tree.

8 // -----------------------------------------------------------------------------

9 //

10 // Quantization

11 //

12 // Author: Skal (pascal.massimino@gmail.com)

13

14 #include <assert.h>

15 #include <math.h>

16 #include <stdlib.h> // for abs()

17

18 #include "./vp8enci.h"

19 #include "./cost.h"

20

21 #define DO_TRELLIS_I4 1

22 #define DO_TRELLIS_I16 1 // not a huge gain, but ok at low bitrate.

23 #define DO_TRELLIS_UV 0 // disable trellis for UV. Risky. Not worth.

24 #define USE_TDISTO 1

25

26 #define MID_ALPHA 64 // neutral value for susceptibility

27 #define MIN_ALPHA 30 // lowest usable value for susceptibility

28 #define MAX_ALPHA 100 // higher meaningful value for susceptibility

29

30 #define SNS_TO_DQ 0.9 // Scaling constant between the sns value and the QP

31 // power-law modulation. Must be strictly less than 1.

32

33 // number of non-zero coeffs below which we consider the block very flat

34 // (and apply a penalty to complex predictions)

35 #define FLATNESS_LIMIT_I16 10 // I16 mode

36 #define FLATNESS_LIMIT_I4 3 // I4 mode

37 #define FLATNESS_LIMIT_UV 2 // UV mode

38 #define FLATNESS_PENALTY 140 // roughly ~1bit per block

39

40 #define MULT_8B(a, b) (((a) * (b) + 128) >> 8)

41

42 #define RD_DISTO_MULT 256 // distortion multiplier (equivalent of lambda)

43

44 // #define DEBUG_BLOCK

45

46 //------------------------------------------------------------------------------

47

48 #if defined(DEBUG_BLOCK)

49

50 #include <stdio.h>

51 #include <stdlib.h>

52

53 static void PrintBlockInfo(const VP8EncIterator* const it,

54 const VP8ModeScore* const rd) {

55 int i, j;

56 const int is_i16 = (it->mb_->type_ == 1);

57 const uint8_t* const y_in = it->yuv_in_ + Y_OFF_ENC;

58 const uint8_t* const y_out = it->yuv_out_ + Y_OFF_ENC;

59 const uint8_t* const uv_in = it->yuv_in_ + U_OFF_ENC;

60 const uint8_t* const uv_out = it->yuv_out_ + U_OFF_ENC;

61 printf("SOURCE / OUTPUT / ABS DELTA\n");

62 for (j = 0; j < 16; ++j) {

63 for (i = 0; i < 16; ++i) printf("%3d ", y_in[i + j * BPS]);

64 printf(" ");

65 for (i = 0; i < 16; ++i) printf("%3d ", y_out[i + j * BPS]);

66 printf(" ");

67 for (i = 0; i < 16; ++i) {

68 printf("%1d ", abs(y_in[i + j * BPS] - y_out[i + j * BPS]));

69 }

70 printf("\n");

71 }

72 printf("\n"); // newline before the U/V block

73 for (j = 0; j < 8; ++j) {

74 for (i = 0; i < 8; ++i) printf("%3d ", uv_in[i + j * BPS]);

75 printf(" ");

76 for (i = 8; i < 16; ++i) printf("%3d ", uv_in[i + j * BPS]);

77 printf(" ");

78 for (i = 0; i < 8; ++i) printf("%3d ", uv_out[i + j * BPS]);

79 printf(" ");

80 for (i = 8; i < 16; ++i) printf("%3d ", uv_out[i + j * BPS]);

81 printf(" ");

82 for (i = 0; i < 8; ++i) {

83 printf("%1d ", abs(uv_out[i + j * BPS] - uv_in[i + j * BPS]));

84 }

85 printf(" ");

86 for (i = 8; i < 16; ++i) {

87 printf("%1d ", abs(uv_out[i + j * BPS] - uv_in[i + j * BPS]));

88 }

89 printf("\n");

90 }

91 printf("\nD:%d SD:%d R:%d H:%d nz:0x%x score:%d\n",

92 (int)rd->D, (int)rd->SD, (int)rd->R, (int)rd->H, (int)rd->nz,

93 (int)rd->score);

94 if (is_i16) {

95 printf("Mode: %d\n", rd->mode_i16);

96 printf("y_dc_levels:");

97 for (i = 0; i < 16; ++i) printf("%3d ", rd->y_dc_levels[i]);

98 printf("\n");

99 } else {

100 printf("Modes[16]: ");

101 for (i = 0; i < 16; ++i) printf("%d ", rd->modes_i4[i]);

102 printf("\n");

103 }

104 printf("y_ac_levels:\n");

105 for (j = 0; j < 16; ++j) {

106 for (i = is_i16 ? 1 : 0; i < 16; ++i) {

107 printf("%4d ", rd->y_ac_levels[j][i]);

108 }

109 printf("\n");

110 }

111 printf("\n");

112 printf("uv_levels (mode=%d):\n", rd->mode_uv);

113 for (j = 0; j < 8; ++j) {

114 for (i = 0; i < 16; ++i) {

115 printf("%4d ", rd->uv_levels[j][i]);

116 }

117 printf("\n");

118 }

119 }

120

121 #endif // DEBUG_BLOCK

122

123 //------------------------------------------------------------------------------

124

125 static WEBP_INLINE int clip(int v, int m, int M) {

126 return v < m ? m : v > M ? M : v;

127 }

128

129 static const uint8_t kZigzag[16] = {

130 0, 1, 4, 8, 5, 2, 3, 6, 9, 12, 13, 10, 7, 11, 14, 15

131 };

132

133 static const uint8_t kDcTable[128] = {

134 4, 5, 6, 7, 8, 9, 10, 10,

135 11, 12, 13, 14, 15, 16, 17, 17,

136 18, 19, 20, 20, 21, 21, 22, 22,

137 23, 23, 24, 25, 25, 26, 27, 28,

138 29, 30, 31, 32, 33, 34, 35, 36,

139 37, 37, 38, 39, 40, 41, 42, 43,

140 44, 45, 46, 46, 47, 48, 49, 50,

141 51, 52, 53, 54, 55, 56, 57, 58,

142 59, 60, 61, 62, 63, 64, 65, 66,

143 67, 68, 69, 70, 71, 72, 73, 74,

144 75, 76, 76, 77, 78, 79, 80, 81,

145 82, 83, 84, 85, 86, 87, 88, 89,

146 91, 93, 95, 96, 98, 100, 101, 102,

147 104, 106, 108, 110, 112, 114, 116, 118,

148 122, 124, 126, 128, 130, 132, 134, 136,

149 138, 140, 143, 145, 148, 151, 154, 157

150 };

151

152 static const uint16_t kAcTable[128] = {

153 4, 5, 6, 7, 8, 9, 10, 11,

154 12, 13, 14, 15, 16, 17, 18, 19,

155 20, 21, 22, 23, 24, 25, 26, 27,

156 28, 29, 30, 31, 32, 33, 34, 35,

157 36, 37, 38, 39, 40, 41, 42, 43,

158 44, 45, 46, 47, 48, 49, 50, 51,

159 52, 53, 54, 55, 56, 57, 58, 60,

160 62, 64, 66, 68, 70, 72, 74, 76,

161 78, 80, 82, 84, 86, 88, 90, 92,

162 94, 96, 98, 100, 102, 104, 106, 108,

163 110, 112, 114, 116, 119, 122, 125, 128,

164 131, 134, 137, 140, 143, 146, 149, 152,

165 155, 158, 161, 164, 167, 170, 173, 177,

166 181, 185, 189, 193, 197, 201, 205, 209,

167 213, 217, 221, 225, 229, 234, 239, 245,

168 249, 254, 259, 264, 269, 274, 279, 284

169 };

170

171 static const uint16_t kAcTable2[128] = {

172 8, 8, 9, 10, 12, 13, 15, 17,

173 18, 20, 21, 23, 24, 26, 27, 29,

174 31, 32, 34, 35, 37, 38, 40, 41,

175 43, 44, 46, 48, 49, 51, 52, 54,

176 55, 57, 58, 60, 62, 63, 65, 66,

177 68, 69, 71, 72, 74, 75, 77, 79,

178 80, 82, 83, 85, 86, 88, 89, 93,

179 96, 99, 102, 105, 108, 111, 114, 117,

180 120, 124, 127, 130, 133, 136, 139, 142,

181 145, 148, 151, 155, 158, 161, 164, 167,

182 170, 173, 176, 179, 184, 189, 193, 198,

183 203, 207, 212, 217, 221, 226, 230, 235,

184 240, 244, 249, 254, 258, 263, 268, 274,

185 280, 286, 292, 299, 305, 311, 317, 323,

186 330, 336, 342, 348, 354, 362, 370, 379,

187 385, 393, 401, 409, 416, 424, 432, 440

188 };

189

190 static const uint8_t kBiasMatrices[3][2] = { // [luma-ac,luma-dc,chroma][dc,ac]

191 { 96, 110 }, { 96, 108 }, { 110, 115 }

192 };

193

194 // Sharpening by (slightly) raising the hi-frequency coeffs.

195 // Hack-ish but helpful for mid-bitrate range. Use with care.

196 #define SHARPEN_BITS 11 // number of descaling bits for sharpening bias

197 static const uint8_t kFreqSharpening[16] = {

198 0, 30, 60, 90,

199 30, 60, 90, 90,

200 60, 90, 90, 90,

201 90, 90, 90, 90

202 };

203

204 //------------------------------------------------------------------------------

205 // Initialize quantization parameters in VP8Matrix

206

207 // Returns the average quantizer

208 static int ExpandMatrix(VP8Matrix* const m, int type) {

209 int i, sum;

210 for (i = 0; i < 2; ++i) {

211 const int is_ac_coeff = (i > 0);

212 const int bias = kBiasMatrices[type][is_ac_coeff];

213 m->iq_[i] = (1 << QFIX) / m->q_[i];

214 m->bias_[i] = BIAS(bias);

215 // zthresh_ is the exact value such that QUANTDIV(coeff, iQ, B) is:

216 // * zero if coeff <= zthresh

217 // * non-zero if coeff > zthresh

218 m->zthresh_[i] = ((1 << QFIX) - 1 - m->bias_[i]) / m->iq_[i];

219 }

220 for (i = 2; i < 16; ++i) {

221 m->q_[i] = m->q_[1];

222 m->iq_[i] = m->iq_[1];

223 m->bias_[i] = m->bias_[1];

224 m->zthresh_[i] = m->zthresh_[1];

225 }

226 for (sum = 0, i = 0; i < 16; ++i) {

227 if (type == 0) { // we only use sharpening for AC luma coeffs

228 m->sharpen_[i] = (kFreqSharpening[i] * m->q_[i]) >> SHARPEN_BITS;

229 } else {

230 m->sharpen_[i] = 0;

231 }

232 sum += m->q_[i];

233 }

234 return (sum + 8) >> 4;

235 }

236

237 static void CheckLambdaValue(int* const v) { if (v < 1) v = 1; }

238

239 static void SetupMatrices(VP8Encoder* enc) {

240 int i;

241 const int tlambda_scale =

242 (enc->method_ >= 4) ? enc->config_->sns_strength

243 : 0;

244 const int num_segments = enc->segment_hdr_.num_segments_;

245 for (i = 0; i < num_segments; ++i) {

246 VP8SegmentInfo* const m = &enc->dqm_[i];

247 const int q = m->quant_;

248 int q_i4, q_i16, q_uv;

249 m->y1_.q_[0] = kDcTable[clip(q + enc->dq_y1_dc_, 0, 127)];

250 m->y1_.q_[1] = kAcTable[clip(q, 0, 127)];

251

252 m->y2_.q_[0] = kDcTable[ clip(q + enc->dq_y2_dc_, 0, 127)] * 2;

253 m->y2_.q_[1] = kAcTable2[clip(q + enc->dq_y2_ac_, 0, 127)];

254

255 m->uv_.q_[0] = kDcTable[clip(q + enc->dq_uv_dc_, 0, 117)];

256 m->uv_.q_[1] = kAcTable[clip(q + enc->dq_uv_ac_, 0, 127)];

257

258 q_i4 = ExpandMatrix(&m->y1_, 0);

259 q_i16 = ExpandMatrix(&m->y2_, 1);

260 q_uv = ExpandMatrix(&m->uv_, 2);

261

262 m->lambda_i4_ = (3 * q_i4 * q_i4) >> 7;

263 m->lambda_i16_ = (3 * q_i16 * q_i16);

264 m->lambda_uv_ = (3 * q_uv * q_uv) >> 6;

265 m->lambda_mode_ = (1 * q_i4 * q_i4) >> 7;

266 m->lambda_trellis_i4_ = (7 * q_i4 * q_i4) >> 3;

267 m->lambda_trellis_i16_ = (q_i16 * q_i16) >> 2;

268 m->lambda_trellis_uv_ = (q_uv * q_uv) << 1;

269 m->tlambda_ = (tlambda_scale * q_i4) >> 5;

270

271 // none of these constants should be < 1

272 CheckLambdaValue(&m->lambda_i4_);

273 CheckLambdaValue(&m->lambda_i16_);

274 CheckLambdaValue(&m->lambda_uv_);

275 CheckLambdaValue(&m->lambda_mode_);

276 CheckLambdaValue(&m->lambda_trellis_i4_);

277 CheckLambdaValue(&m->lambda_trellis_i16_);

278 CheckLambdaValue(&m->lambda_trellis_uv_);

279 CheckLambdaValue(&m->tlambda_);

280

281 m->min_disto_ = 20 * m->y1_.q_[0]; // quantization-aware min disto

282 m->max_edge_ = 0;

283

284 m->i4_penalty_ = 1000 * q_i4 * q_i4;

285 }

286 }

287

288 //------------------------------------------------------------------------------

289 // Initialize filtering parameters

290

291 // Very small filter-strength values have close to no visual effect. So we can

292 // save a little decoding-CPU by turning filtering off for these.

293 #define FSTRENGTH_CUTOFF 2

294

295 static void SetupFilterStrength(VP8Encoder* const enc) {

296 int i;

297 // level0 is in [0..500]. Using '-f 50' as filter_strength is mid-filtering.

298 const int level0 = 5 * enc->config_->filter_strength;

299 for (i = 0; i < NUM_MB_SEGMENTS; ++i) {

300 VP8SegmentInfo* const m = &enc->dqm_[i];

301 // We focus on the quantization of AC coeffs.

302 const int qstep = kAcTable[clip(m->quant_, 0, 127)] >> 2;

303 const int base_strength =

304 VP8FilterStrengthFromDelta(enc->filter_hdr_.sharpness_, qstep);

305 // Segments with lower complexity ('beta') will be less filtered.

306 const int f = base_strength * level0 / (256 + m->beta_);

307 m->fstrength_ = (f < FSTRENGTH_CUTOFF) ? 0 : (f > 63) ? 63 : f;

308 }

309 // We record the initial strength (mainly for the case of 1-segment only).

310 enc->filter_hdr_.level_ = enc->dqm_[0].fstrength_;

311 enc->filter_hdr_.simple_ = (enc->config_->filter_type == 0);

312 enc->filter_hdr_.sharpness_ = enc->config_->filter_sharpness;

313 }

314

315 //------------------------------------------------------------------------------

316

317 // Note: if you change the values below, remember that the max range

318 // allowed by the syntax for DQ_UV is [-16,16].

319 #define MAX_DQ_UV (6)

320 #define MIN_DQ_UV (-4)

321

322 // We want to emulate jpeg-like behaviour where the expected "good" quality

323 // is around q=75. Internally, our "good" middle is around c=50. So we

324 // map accordingly using linear piece-wise function

325 static double QualityToCompression(double c) {

326 const double linear_c = (c < 0.75) ? c * (2. / 3.) : 2. * c - 1.;

327 // The file size roughly scales as pow(quantizer, 3.). Actually, the

328 // exponent is somewhere between 2.8 and 3.2, but we're mostly interested

329 // in the mid-quant range. So we scale the compressibility inversely to

330 // this power-law: quant ~= compression ^ 1/3. This law holds well for

331 // low quant. Finer modeling for high-quant would make use of kAcTable[]

332 // more explicitly.

333 const double v = pow(linear_c, 1 / 3.);

334 return v;

335 }

336

337 static double QualityToJPEGCompression(double c, double alpha) {

338 // We map the complexity 'alpha' and quality setting 'c' to a compression

339 // exponent empirically matched to the compression curve of libjpeg6b.

340 // On average, the WebP output size will be roughly similar to that of a

341 // JPEG file compressed with same quality factor.

342 const double amin = 0.30;

343 const double amax = 0.85;

344 const double exp_min = 0.4;

345 const double exp_max = 0.9;

346 const double slope = (exp_min - exp_max) / (amax - amin);

347 // Linearly interpolate 'expn' from exp_min to exp_max

348 // in the [amin, amax] range.

349 const double expn = (alpha > amax) ? exp_min

350 : (alpha < amin) ? exp_max

351 : exp_max + slope * (alpha - amin);

352 const double v = pow(c, expn);

353 return v;

354 }

355

356 static int SegmentsAreEquivalent(const VP8SegmentInfo* const S1,

357 const VP8SegmentInfo* const S2) {

358 return (S1->quant_ == S2->quant_) && (S1->fstrength_ == S2->fstrength_);

359 }

360

361 static void SimplifySegments(VP8Encoder* const enc) {

362 int map[NUM_MB_SEGMENTS] = { 0, 1, 2, 3 };

363 // 'num_segments_' is previously validated and <= NUM_MB_SEGMENTS, but an

364 // explicit check is needed to avoid a spurious warning about 'i' exceeding

365 // array bounds of 'dqm_' with some compilers (noticed with gcc-4.9).

366 const int num_segments = (enc->segment_hdr_.num_segments_ < NUM_MB_SEGMENTS)

367 ? enc->segment_hdr_.num_segments_

368 : NUM_MB_SEGMENTS;

369 int num_final_segments = 1;

370 int s1, s2;

371 for (s1 = 1; s1 < num_segments; ++s1) { // find similar segments

372 const VP8SegmentInfo* const S1 = &enc->dqm_[s1];

373 int found = 0;

374 // check if we already have similar segment

375 for (s2 = 0; s2 < num_final_segments; ++s2) {

376 const VP8SegmentInfo* const S2 = &enc->dqm_[s2];

377 if (SegmentsAreEquivalent(S1, S2)) {

378 found = 1;

379 break;

380 }

381 }

382 map[s1] = s2;

383 if (!found) {

384 if (num_final_segments != s1) {

385 enc->dqm_[num_final_segments] = enc->dqm_[s1];

386 }

387 ++num_final_segments;

388 }

389 }

390 if (num_final_segments < num_segments) { // Remap

391 int i = enc->mb_w_ * enc->mb_h_;

392 while (i-- > 0) enc->mb_info_[i].segment_ = map[enc->mb_info_[i].segment_];

393 enc->segment_hdr_.num_segments_ = num_final_segments;

394 // Replicate the trailing segment infos (it's mostly cosmetics)

395 for (i = num_final_segments; i < num_segments; ++i) {

396 enc->dqm_[i] = enc->dqm_[num_final_segments - 1];

397 }

398 }

399 }

400

401 void VP8SetSegmentParams(VP8Encoder* const enc, float quality) {

402 int i;

403 int dq_uv_ac, dq_uv_dc;

404 const int num_segments = enc->segment_hdr_.num_segments_;

405 const double amp = SNS_TO_DQ * enc->config_->sns_strength / 100. / 128.;

406 const double Q = quality / 100.;

407 const double c_base = enc->config_->emulate_jpeg_size ?

408 QualityToJPEGCompression(Q, enc->alpha_ / 255.) :

409 QualityToCompression(Q);

410 for (i = 0; i < num_segments; ++i) {

411 // We modulate the base coefficient to accommodate for the quantization

412 // susceptibility and allow denser segments to be quantized more.

413 const double expn = 1. - amp * enc->dqm_[i].alpha_;

414 const double c = pow(c_base, expn);

415 const int q = (int)(127. * (1. - c));

416 assert(expn > 0.);

417 enc->dqm_[i].quant_ = clip(q, 0, 127);

418 }

419

420 // purely indicative in the bitstream (except for the 1-segment case)

421 enc->base_quant_ = enc->dqm_[0].quant_;

422

423 // fill-in values for the unused segments (required by the syntax)

424 for (i = num_segments; i < NUM_MB_SEGMENTS; ++i) {

425 enc->dqm_[i].quant_ = enc->base_quant_;

426 }

427

428 // uv_alpha_ is normally spread around ~60. The useful range is

429 // typically ~30 (quite bad) to ~100 (ok to decimate UV more).

430 // We map it to the safe maximal range of MAX/MIN_DQ_UV for dq_uv.

431 dq_uv_ac = (enc->uv_alpha_ - MID_ALPHA) * (MAX_DQ_UV - MIN_DQ_UV)

432 / (MAX_ALPHA - MIN_ALPHA);

433 // we rescale by the user-defined strength of adaptation

434 dq_uv_ac = dq_uv_ac * enc->config_->sns_strength / 100;

435 // and make it safe.

436 dq_uv_ac = clip(dq_uv_ac, MIN_DQ_UV, MAX_DQ_UV);

437 // We also boost the dc-uv-quant a little, based on sns-strength, since

438 // U/V channels are quite more reactive to high quants (flat DC-blocks

439 // tend to appear, and are unpleasant).

440 dq_uv_dc = -4 * enc->config_->sns_strength / 100;

441 dq_uv_dc = clip(dq_uv_dc, -15, 15); // 4bit-signed max allowed

442

443 enc->dq_y1_dc_ = 0; // TODO(skal): dq-lum

444 enc->dq_y2_dc_ = 0;

445 enc->dq_y2_ac_ = 0;

446 enc->dq_uv_dc_ = dq_uv_dc;

447 enc->dq_uv_ac_ = dq_uv_ac;

448

449 SetupFilterStrength(enc); // initialize segments' filtering, eventually

450

451 if (num_segments > 1) SimplifySegments(enc);

452

453 SetupMatrices(enc); // finalize quantization matrices

454 }

455

456 //------------------------------------------------------------------------------

457 // Form the predictions in cache

458

459 // Must be ordered using {DC_PRED, TM_PRED, V_PRED, H_PRED} as index

460 const int VP8I16ModeOffsets[4] = { I16DC16, I16TM16, I16VE16, I16HE16 };

461 const int VP8UVModeOffsets[4] = { C8DC8, C8TM8, C8VE8, C8HE8 };

462

463 // Must be indexed using {B_DC_PRED -> B_HU_PRED} as index

464 const int VP8I4ModeOffsets[NUM_BMODES] = {

465 I4DC4, I4TM4, I4VE4, I4HE4, I4RD4, I4VR4, I4LD4, I4VL4, I4HD4, I4HU4

466 };

467

468 void VP8MakeLuma16Preds(const VP8EncIterator* const it) {

469 const uint8_t* const left = it->x_ ? it->y_left_ : NULL;

470 const uint8_t* const top = it->y_ ? it->y_top_ : NULL;

471 VP8EncPredLuma16(it->yuv_p_, left, top);

472 }

473

474 void VP8MakeChroma8Preds(const VP8EncIterator* const it) {

475 const uint8_t* const left = it->x_ ? it->u_left_ : NULL;

476 const uint8_t* const top = it->y_ ? it->uv_top_ : NULL;

477 VP8EncPredChroma8(it->yuv_p_, left, top);

478 }

479

480 void VP8MakeIntra4Preds(const VP8EncIterator* const it) {

481 VP8EncPredLuma4(it->yuv_p_, it->i4_top_);

482 }

483

484 //------------------------------------------------------------------------------

485 // Quantize

486

487 // Layout:

488 // +----+----+

489 // \|YYYY\|UUVV\| 0

490 // \|YYYY\|UUVV\| 4

491 // \|YYYY\|....\| 8

492 // \|YYYY\|....\| 12

493 // +----+----+

494

495 const int VP8Scan[16] = { // Luma

496 0 + 0 * BPS, 4 + 0 * BPS, 8 + 0 * BPS, 12 + 0 * BPS,

497 0 + 4 * BPS, 4 + 4 * BPS, 8 + 4 * BPS, 12 + 4 * BPS,

498 0 + 8 * BPS, 4 + 8 * BPS, 8 + 8 * BPS, 12 + 8 * BPS,

499 0 + 12 * BPS, 4 + 12 * BPS, 8 + 12 * BPS, 12 + 12 * BPS,

500 };

501

502 static const int VP8ScanUV[4 + 4] = {

503 0 + 0 * BPS, 4 + 0 * BPS, 0 + 4 * BPS, 4 + 4 * BPS, // U

504 8 + 0 * BPS, 12 + 0 * BPS, 8 + 4 * BPS, 12 + 4 * BPS // V

505 };

506

507 //------------------------------------------------------------------------------

508 // Distortion measurement

509

510 static const uint16_t kWeightY[16] = {

511 38, 32, 20, 9, 32, 28, 17, 7, 20, 17, 10, 4, 9, 7, 4, 2

512 };

513

514 static const uint16_t kWeightTrellis[16] = {

515 #if USE_TDISTO == 0

516 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16

517 #else

518 30, 27, 19, 11,

519 27, 24, 17, 10,

520 19, 17, 12, 8,

521 11, 10, 8, 6

522 #endif

523 };

524

525 // Init/Copy the common fields in score.

526 static void InitScore(VP8ModeScore* const rd) {

527 rd->D = 0;

528 rd->SD = 0;

529 rd->R = 0;

530 rd->H = 0;

531 rd->nz = 0;

532 rd->score = MAX_COST;

533 }

534

535 static void CopyScore(VP8ModeScore* const dst, const VP8ModeScore* const src) {

536 dst->D = src->D;

537 dst->SD = src->SD;

538 dst->R = src->R;

539 dst->H = src->H;

540 dst->nz = src->nz; // note that nz is not accumulated, but just copied.

541 dst->score = src->score;

542 }

543

544 static void AddScore(VP8ModeScore* const dst, const VP8ModeScore* const src) {

545 dst->D += src->D;

546 dst->SD += src->SD;

547 dst->R += src->R;

548 dst->H += src->H;

549 dst->nz \|= src->nz; // here, new nz bits are accumulated.

550 dst->score += src->score;

551 }

552

553 //------------------------------------------------------------------------------

554 // Performs trellis-optimized quantization.

555

556 // Trellis node

557 typedef struct {

558 int8_t prev; // best previous node

559 int8_t sign; // sign of coeff_i

560 int16_t level; // level

561 } Node;

562

563 // Score state

564 typedef struct {

565 score_t score; // partial RD score

566 const uint16_t* costs; // shortcut to cost tables

567 } ScoreState;

568

569 // If a coefficient was quantized to a value Q (using a neutral bias),

570 // we test all alternate possibilities between [Q-MIN_DELTA, Q+MAX_DELTA]

571 // We don't test negative values though.

572 #define MIN_DELTA 0 // how much lower level to try

573 #define MAX_DELTA 1 // how much higher

574 #define NUM_NODES (MIN_DELTA + 1 + MAX_DELTA)

575 #define NODE(n, l) (nodes[(n)][(l) + MIN_DELTA])

576 #define SCORE_STATE(n, l) (score_states[n][(l) + MIN_DELTA])

577

578 static WEBP_INLINE void SetRDScore(int lambda, VP8ModeScore* const rd) {

579 rd->score = (rd->R + rd->H) * lambda + RD_DISTO_MULT * (rd->D + rd->SD);

580 }

581

582 static WEBP_INLINE score_t RDScoreTrellis(int lambda, score_t rate,

583 score_t distortion) {

584 return rate * lambda + RD_DISTO_MULT * distortion;

585 }

586

587 static int TrellisQuantizeBlock(const VP8Encoder* const enc,

588 int16_t in[16], int16_t out[16],

589 int ctx0, int coeff_type,

590 const VP8Matrix* const mtx,

591 int lambda) {

592 const ProbaArray* const probas = enc->proba_.coeffs_[coeff_type];

593 CostArrayPtr const costs =

594 (CostArrayPtr)enc->proba_.remapped_costs_[coeff_type];

595 const int first = (coeff_type == 0) ? 1 : 0;

596 Node nodes[16][NUM_NODES];

597 ScoreState score_states[2][NUM_NODES];

598 ScoreState* ss_cur = &SCORE_STATE(0, MIN_DELTA);

599 ScoreState* ss_prev = &SCORE_STATE(1, MIN_DELTA);

600 int best_path[3] = {-1, -1, -1}; // store best-last/best-level/best-previous

601 score_t best_score;

602 int n, m, p, last;

603

604 {

605 score_t cost;

606 const int thresh = mtx->q_[1] * mtx->q_[1] / 4;

607 const int last_proba = probas[VP8EncBands[first]][ctx0][0];

608

609 // compute the position of the last interesting coefficient

610 last = first - 1;

611 for (n = 15; n >= first; --n) {

612 const int j = kZigzag[n];

613 const int err = in[j] * in[j];

614 if (err > thresh) {

615 last = n;

616 break;

617 }

618 }

619 // we don't need to go inspect up to n = 16 coeffs. We can just go up

620 // to last + 1 (inclusive) without losing much.

621 if (last < 15) ++last;

622

623 // compute 'skip' score. This is the max score one can do.

624 cost = VP8BitCost(0, last_proba);

625 best_score = RDScoreTrellis(lambda, cost, 0);

626

627 // initialize source node.

628 for (m = -MIN_DELTA; m <= MAX_DELTA; ++m) {

629 const score_t rate = (ctx0 == 0) ? VP8BitCost(1, last_proba) : 0;

630 ss_cur[m].score = RDScoreTrellis(lambda, rate, 0);

631 ss_cur[m].costs = costs[first][ctx0];

632 }

633 }

634

635 // traverse trellis.

636 for (n = first; n <= last; ++n) {

637 const int j = kZigzag[n];

638 const uint32_t Q = mtx->q_[j];

639 const uint32_t iQ = mtx->iq_[j];

640 const uint32_t B = BIAS(0x00); // neutral bias

641 // note: it's important to take sign of the _original_ coeff,

642 // so we don't have to consider level < 0 afterward.

643 const int sign = (in[j] < 0);

644 const uint32_t coeff0 = (sign ? -in[j] : in[j]) + mtx->sharpen_[j];

645 int level0 = QUANTDIV(coeff0, iQ, B);

646 if (level0 > MAX_LEVEL) level0 = MAX_LEVEL;

647

648 { // Swap current and previous score states

649 ScoreState* const tmp = ss_cur;

650 ss_cur = ss_prev;

651 ss_prev = tmp;

652 }

653

654 // test all alternate level values around level0.

655 for (m = -MIN_DELTA; m <= MAX_DELTA; ++m) {

656 Node* const cur = &NODE(n, m);

657 int level = level0 + m;

658 const int ctx = (level > 2) ? 2 : level;

659 const int band = VP8EncBands[n + 1];

660 score_t base_score, last_pos_score;

661 score_t best_cur_score = MAX_COST;

662 int best_prev = 0; // default, in case

663

664 ss_cur[m].score = MAX_COST;

665 ss_cur[m].costs = costs[n + 1][ctx];

666 if (level > MAX_LEVEL \|\| level < 0) { // node is dead?

667 continue;

668 }

669

670 // Compute extra rate cost if last coeff's position is < 15

671 {

672 const score_t last_pos_cost =

673 (n < 15) ? VP8BitCost(0, probas[band][ctx][0]) : 0;

674 last_pos_score = RDScoreTrellis(lambda, last_pos_cost, 0);

675 }

676

677 {

678 // Compute delta_error = how much coding this level will

679 // subtract to max_error as distortion.

680 // Here, distortion = sum of (\|coeff_i\| - level_i * Q_i)^2

681 const int new_error = coeff0 - level * Q;

682 const int delta_error =

683 kWeightTrellis[j] * (new_error * new_error - coeff0 * coeff0);

684 base_score = RDScoreTrellis(lambda, 0, delta_error);

685 }

686

687 // Inspect all possible non-dead predecessors. Retain only the best one.

688 for (p = -MIN_DELTA; p <= MAX_DELTA; ++p) {

689 // Dead nodes (with ss_prev[p].score >= MAX_COST) are automatically

690 // eliminated since their score can't be better than the current best.

691 const score_t cost = VP8LevelCost(ss_prev[p].costs, level);

692 // Examine node assuming it's a non-terminal one.

693 const score_t score =

694 base_score + ss_prev[p].score + RDScoreTrellis(lambda, cost, 0);

695 if (score < best_cur_score) {

696 best_cur_score = score;

697 best_prev = p;

698 }

699 }

700 // Store best finding in current node.

701 cur->sign = sign;

702 cur->level = level;

703 cur->prev = best_prev;

704 ss_cur[m].score = best_cur_score;

705

706 // Now, record best terminal node (and thus best entry in the graph).

707 if (level != 0) {

708 const score_t score = best_cur_score + last_pos_score;

709 if (score < best_score) {

710 best_score = score;

711 best_path[0] = n; // best eob position

712 best_path[1] = m; // best node index

713 best_path[2] = best_prev; // best predecessor

714 }

715 }

716 }

717 }

718

719 // Fresh start

720 memset(in + first, 0, (16 - first) * sizeof(*in));

721 memset(out + first, 0, (16 - first) * sizeof(*out));

722 if (best_path[0] == -1) {

723 return 0; // skip!

724 }

725

726 {

727 // Unwind the best path.

728 // Note: best-prev on terminal node is not necessarily equal to the

729 // best_prev for non-terminal. So we patch best_path[2] in.

730 int nz = 0;

731 int best_node = best_path[1];

732 n = best_path[0];

733 NODE(n, best_node).prev = best_path[2]; // force best-prev for terminal

734

735 for (; n >= first; --n) {

736 const Node* const node = &NODE(n, best_node);

737 const int j = kZigzag[n];

738 out[n] = node->sign ? -node->level : node->level;

739 nz \|= node->level;

740 in[j] = out[n] * mtx->q_[j];

741 best_node = node->prev;

742 }

743 return (nz != 0);

744 }

745 }

746

747 #undef NODE

748

749 //------------------------------------------------------------------------------

750 // Performs: difference, transform, quantize, back-transform, add

751 // all at once. Output is the reconstructed block in *yuv_out, and the

752 // quantized levels in *levels.

753

754 static int ReconstructIntra16(VP8EncIterator* const it,

755 VP8ModeScore* const rd,

756 uint8_t* const yuv_out,

757 int mode) {

758 const VP8Encoder* const enc = it->enc_;

759 const uint8_t* const ref = it->yuv_p_ + VP8I16ModeOffsets[mode];

760 const uint8_t* const src = it->yuv_in_ + Y_OFF_ENC;

761 const VP8SegmentInfo* const dqm = &enc->dqm_[it->mb_->segment_];

762 int nz = 0;

763 int n;

764 int16_t tmp[16][16], dc_tmp[16];

765

766 for (n = 0; n < 16; n += 2) {

767 VP8FTransform2(src + VP8Scan[n], ref + VP8Scan[n], tmp[n]);

768 }

769 VP8FTransformWHT(tmp[0], dc_tmp);

770 nz \|= VP8EncQuantizeBlockWHT(dc_tmp, rd->y_dc_levels, &dqm->y2_) << 24;

771

772 if (DO_TRELLIS_I16 && it->do_trellis_) {

773 int x, y;

774 VP8IteratorNzToBytes(it);

775 for (y = 0, n = 0; y < 4; ++y) {

776 for (x = 0; x < 4; ++x, ++n) {

777 const int ctx = it->top_nz_[x] + it->left_nz_[y];

778 const int non_zero =

779 TrellisQuantizeBlock(enc, tmp[n], rd->y_ac_levels[n], ctx, 0,

780 &dqm->y1_, dqm->lambda_trellis_i16_);

781 it->top_nz_[x] = it->left_nz_[y] = non_zero;

782 rd->y_ac_levels[n][0] = 0;

783 nz \|= non_zero << n;

784 }

785 }

786 } else {

787 for (n = 0; n < 16; n += 2) {

788 // Zero-out the first coeff, so that: a) nz is correct below, and

789 // b) finding 'last' non-zero coeffs in SetResidualCoeffs() is simplified.

790 tmp[n][0] = tmp[n + 1][0] = 0;

791 nz \|= VP8EncQuantize2Blocks(tmp[n], rd->y_ac_levels[n], &dqm->y1_) << n;

792 assert(rd->y_ac_levels[n + 0][0] == 0);

793 assert(rd->y_ac_levels[n + 1][0] == 0);

794 }

795 }

796

797 // Transform back

798 VP8TransformWHT(dc_tmp, tmp[0]);

799 for (n = 0; n < 16; n += 2) {

800 VP8ITransform(ref + VP8Scan[n], tmp[n], yuv_out + VP8Scan[n], 1);

801 }

802

803 return nz;

804 }

805

806 static int ReconstructIntra4(VP8EncIterator* const it,

807 int16_t levels[16],

808 const uint8_t* const src,

809 uint8_t* const yuv_out,

810 int mode) {

811 const VP8Encoder* const enc = it->enc_;

812 const uint8_t* const ref = it->yuv_p_ + VP8I4ModeOffsets[mode];

813 const VP8SegmentInfo* const dqm = &enc->dqm_[it->mb_->segment_];

814 int nz = 0;

815 int16_t tmp[16];

816

817 VP8FTransform(src, ref, tmp);

818 if (DO_TRELLIS_I4 && it->do_trellis_) {

819 const int x = it->i4_ & 3, y = it->i4_ >> 2;

820 const int ctx = it->top_nz_[x] + it->left_nz_[y];

821 nz = TrellisQuantizeBlock(enc, tmp, levels, ctx, 3, &dqm->y1_,

822 dqm->lambda_trellis_i4_);

823 } else {

824 nz = VP8EncQuantizeBlock(tmp, levels, &dqm->y1_);

825 }

826 VP8ITransform(ref, tmp, yuv_out, 0);

827 return nz;

828 }

829

830 static int ReconstructUV(VP8EncIterator* const it, VP8ModeScore* const rd,

831 uint8_t* const yuv_out, int mode) {

832 const VP8Encoder* const enc = it->enc_;

833 const uint8_t* const ref = it->yuv_p_ + VP8UVModeOffsets[mode];

834 const uint8_t* const src = it->yuv_in_ + U_OFF_ENC;

835 const VP8SegmentInfo* const dqm = &enc->dqm_[it->mb_->segment_];

836 int nz = 0;

837 int n;

838 int16_t tmp[8][16];

839

840 for (n = 0; n < 8; n += 2) {

841 VP8FTransform2(src + VP8ScanUV[n], ref + VP8ScanUV[n], tmp[n]);

842 }

843 if (DO_TRELLIS_UV && it->do_trellis_) {

844 int ch, x, y;

845 for (ch = 0, n = 0; ch <= 2; ch += 2) {

846 for (y = 0; y < 2; ++y) {

847 for (x = 0; x < 2; ++x, ++n) {

848 const int ctx = it->top_nz_[4 + ch + x] + it->left_nz_[4 + ch + y];

849 const int non_zero =

850 TrellisQuantizeBlock(enc, tmp[n], rd->uv_levels[n], ctx, 2,

851 &dqm->uv_, dqm->lambda_trellis_uv_);

852 it->top_nz_[4 + ch + x] = it->left_nz_[4 + ch + y] = non_zero;

853 nz \|= non_zero << n;

854 }

855 }

856 }

857 } else {

858 for (n = 0; n < 8; n += 2) {

859 nz \|= VP8EncQuantize2Blocks(tmp[n], rd->uv_levels[n], &dqm->uv_) << n;

860 }

861 }

862

863 for (n = 0; n < 8; n += 2) {

864 VP8ITransform(ref + VP8ScanUV[n], tmp[n], yuv_out + VP8ScanUV[n], 1);

865 }

866 return (nz << 16);

867 }

868

869 //------------------------------------------------------------------------------

870 // RD-opt decision. Reconstruct each modes, evalue distortion and bit-cost.

871 // Pick the mode is lower RD-cost = Rate + lambda * Distortion.

872

873 static void StoreMaxDelta(VP8SegmentInfo* const dqm, const int16_t DCs[16]) {

874 // We look at the first three AC coefficients to determine what is the average

875 // delta between each sub-4x4 block.

876 const int v0 = abs(DCs[1]);

877 const int v1 = abs(DCs[2]);

878 const int v2 = abs(DCs[4]);

879 int max_v = (v1 > v0) ? v1 : v0;

880 max_v = (v2 > max_v) ? v2 : max_v;

881 if (max_v > dqm->max_edge_) dqm->max_edge_ = max_v;

882 }

883

884 static void SwapModeScore(VP8ModeScore a, VP8ModeScore b) {

885 VP8ModeScore* const tmp = *a;

886 a = b;

887 *b = tmp;

888 }

889

890 static void SwapPtr(uint8_t a, uint8_t b) {

891 uint8_t* const tmp = *a;

892 a = b;

893 *b = tmp;

894 }

895

896 static void SwapOut(VP8EncIterator* const it) {

897 SwapPtr(&it->yuv_out_, &it->yuv_out2_);

898 }

899

900 static score_t IsFlat(const int16_t* levels, int num_blocks, score_t thresh) {

901 score_t score = 0;

902 while (num_blocks-- > 0) { // TODO(skal): refine positional scoring?

903 int i;

904 for (i = 1; i < 16; ++i) { // omit DC, we're only interested in AC

905 score += (levels[i] != 0);

906 if (score > thresh) return 0;

907 }

908 levels += 16;

909 }

910 return 1;

911 }

912

913 static void PickBestIntra16(VP8EncIterator* const it, VP8ModeScore* rd) {

914 const int kNumBlocks = 16;

915 VP8SegmentInfo* const dqm = &it->enc_->dqm_[it->mb_->segment_];

916 const int lambda = dqm->lambda_i16_;

917 const int tlambda = dqm->tlambda_;

918 const uint8_t* const src = it->yuv_in_ + Y_OFF_ENC;

919 VP8ModeScore rd_tmp;

920 VP8ModeScore* rd_cur = &rd_tmp;

921 VP8ModeScore* rd_best = rd;

922 int mode;

923

924 rd->mode_i16 = -1;

925 for (mode = 0; mode < NUM_PRED_MODES; ++mode) {

926 uint8_t* const tmp_dst = it->yuv_out2_ + Y_OFF_ENC; // scratch buffer

927 rd_cur->mode_i16 = mode;

928

929 // Reconstruct

930 rd_cur->nz = ReconstructIntra16(it, rd_cur, tmp_dst, mode);

931

932 // Measure RD-score

933 rd_cur->D = VP8SSE16x16(src, tmp_dst);

934 rd_cur->SD =

935 tlambda ? MULT_8B(tlambda, VP8TDisto16x16(src, tmp_dst, kWeightY)) : 0;

936 rd_cur->H = VP8FixedCostsI16[mode];

937 rd_cur->R = VP8GetCostLuma16(it, rd_cur);

938 if (mode > 0 &&

939 IsFlat(rd_cur->y_ac_levels[0], kNumBlocks, FLATNESS_LIMIT_I16)) {

940 // penalty to avoid flat area to be mispredicted by complex mode

941 rd_cur->R += FLATNESS_PENALTY * kNumBlocks;

942 }

943

944 // Since we always examine Intra16 first, we can overwrite *rd directly.

945 SetRDScore(lambda, rd_cur);

946 if (mode == 0 \|\| rd_cur->score < rd_best->score) {

947 SwapModeScore(&rd_cur, &rd_best);

948 SwapOut(it);

949 }

950 }

951 if (rd_best != rd) {

952 memcpy(rd, rd_best, sizeof(*rd));

953 }

954 SetRDScore(dqm->lambda_mode_, rd); // finalize score for mode decision.

955 VP8SetIntra16Mode(it, rd->mode_i16);

956

957 // we have a blocky macroblock (only DCs are non-zero) with fairly high

958 // distortion, record max delta so we can later adjust the minimal filtering

959 // strength needed to smooth these blocks out.

960 if ((rd->nz & 0x100ffff) == 0x1000000 && rd->D > dqm->min_disto_) {

961 StoreMaxDelta(dqm, rd->y_dc_levels);

962 }

963 }

964

965 //------------------------------------------------------------------------------

966

967 // return the cost array corresponding to the surrounding prediction modes.

968 static const uint16_t* GetCostModeI4(VP8EncIterator* const it,

969 const uint8_t modes[16]) {

970 const int preds_w = it->enc_->preds_w_;

971 const int x = (it->i4_ & 3), y = it->i4_ >> 2;

972 const int left = (x == 0) ? it->preds_[y * preds_w - 1] : modes[it->i4_ - 1];

973 const int top = (y == 0) ? it->preds_[-preds_w + x] : modes[it->i4_ - 4];

974 return VP8FixedCostsI4[top][left];

975 }

976

977 static int PickBestIntra4(VP8EncIterator* const it, VP8ModeScore* const rd) {

978 const VP8Encoder* const enc = it->enc_;

979 const VP8SegmentInfo* const dqm = &enc->dqm_[it->mb_->segment_];

980 const int lambda = dqm->lambda_i4_;

981 const int tlambda = dqm->tlambda_;

982 const uint8_t* const src0 = it->yuv_in_ + Y_OFF_ENC;

983 uint8_t* const best_blocks = it->yuv_out2_ + Y_OFF_ENC;

984 int total_header_bits = 0;

985 VP8ModeScore rd_best;

986

987 if (enc->max_i4_header_bits_ == 0) {

988 return 0;

989 }

990

991 InitScore(&rd_best);

992 rd_best.H = 211; // '211' is the value of VP8BitCost(0, 145)

993 SetRDScore(dqm->lambda_mode_, &rd_best);

994 VP8IteratorStartI4(it);

995 do {

996 const int kNumBlocks = 1;

997 VP8ModeScore rd_i4;

998 int mode;

999 int best_mode = -1;

1000 const uint8_t* const src = src0 + VP8Scan[it->i4_];

1001 const uint16_t* const mode_costs = GetCostModeI4(it, rd->modes_i4);

1002 uint8_t* best_block = best_blocks + VP8Scan[it->i4_];

1003 uint8_t* tmp_dst = it->yuv_p_ + I4TMP; // scratch buffer.

1004

1005 InitScore(&rd_i4);

1006 VP8MakeIntra4Preds(it);

1007 for (mode = 0; mode < NUM_BMODES; ++mode) {

1008 VP8ModeScore rd_tmp;

1009 int16_t tmp_levels[16];

1010

1011 // Reconstruct

1012 rd_tmp.nz =

1013 ReconstructIntra4(it, tmp_levels, src, tmp_dst, mode) << it->i4_;

1014

1015 // Compute RD-score

1016 rd_tmp.D = VP8SSE4x4(src, tmp_dst);

1017 rd_tmp.SD =

1018 tlambda ? MULT_8B(tlambda, VP8TDisto4x4(src, tmp_dst, kWeightY))

1019 : 0;

1020 rd_tmp.H = mode_costs[mode];

1021

1022 // Add flatness penalty

1023 if (mode > 0 && IsFlat(tmp_levels, kNumBlocks, FLATNESS_LIMIT_I4)) {

1024 rd_tmp.R = FLATNESS_PENALTY * kNumBlocks;

1025 } else {

1026 rd_tmp.R = 0;

1027 }

1028

1029 // early-out check

1030 SetRDScore(lambda, &rd_tmp);

1031 if (best_mode >= 0 && rd_tmp.score >= rd_i4.score) continue;

1032

1033 // finish computing score

1034 rd_tmp.R += VP8GetCostLuma4(it, tmp_levels);

1035 SetRDScore(lambda, &rd_tmp);

1036

1037 if (best_mode < 0 \|\| rd_tmp.score < rd_i4.score) {

1038 CopyScore(&rd_i4, &rd_tmp);

1039 best_mode = mode;

1040 SwapPtr(&tmp_dst, &best_block);

1041 memcpy(rd_best.y_ac_levels[it->i4_], tmp_levels,

1042 sizeof(rd_best.y_ac_levels[it->i4_]));

1043 }

1044 }

1045 SetRDScore(dqm->lambda_mode_, &rd_i4);

1046 AddScore(&rd_best, &rd_i4);

1047 if (rd_best.score >= rd->score) {

1048 return 0;

1049 }

1050 total_header_bits += (int)rd_i4.H; // <- equal to mode_costs[best_mode];

1051 if (total_header_bits > enc->max_i4_header_bits_) {

1052 return 0;

1053 }

1054 // Copy selected samples if not in the right place already.

1055 if (best_block != best_blocks + VP8Scan[it->i4_]) {

1056 VP8Copy4x4(best_block, best_blocks + VP8Scan[it->i4_]);

1057 }

1058 rd->modes_i4[it->i4_] = best_mode;

1059 it->top_nz_[it->i4_ & 3] = it->left_nz_[it->i4_ >> 2] = (rd_i4.nz ? 1 : 0);

1060 } while (VP8IteratorRotateI4(it, best_blocks));

1061

1062 // finalize state

1063 CopyScore(rd, &rd_best);

1064 VP8SetIntra4Mode(it, rd->modes_i4);

1065 SwapOut(it);

1066 memcpy(rd->y_ac_levels, rd_best.y_ac_levels, sizeof(rd->y_ac_levels));

1067 return 1; // select intra4x4 over intra16x16

1068 }

1069

1070 //------------------------------------------------------------------------------

1071

1072 static void PickBestUV(VP8EncIterator* const it, VP8ModeScore* const rd) {

1073 const int kNumBlocks = 8;

1074 const VP8SegmentInfo* const dqm = &it->enc_->dqm_[it->mb_->segment_];

1075 const int lambda = dqm->lambda_uv_;

1076 const uint8_t* const src = it->yuv_in_ + U_OFF_ENC;

1077 uint8_t* tmp_dst = it->yuv_out2_ + U_OFF_ENC; // scratch buffer

1078 uint8_t* dst0 = it->yuv_out_ + U_OFF_ENC;

1079 uint8_t* dst = dst0;

1080 VP8ModeScore rd_best;

1081 int mode;

1082

1083 rd->mode_uv = -1;

1084 InitScore(&rd_best);

1085 for (mode = 0; mode < NUM_PRED_MODES; ++mode) {

1086 VP8ModeScore rd_uv;

1087

1088 // Reconstruct

1089 rd_uv.nz = ReconstructUV(it, &rd_uv, tmp_dst, mode);

1090

1091 // Compute RD-score

1092 rd_uv.D = VP8SSE16x8(src, tmp_dst);

1093 rd_uv.SD = 0; // not calling TDisto here: it tends to flatten areas.

1094 rd_uv.H = VP8FixedCostsUV[mode];

1095 rd_uv.R = VP8GetCostUV(it, &rd_uv);

1096 if (mode > 0 && IsFlat(rd_uv.uv_levels[0], kNumBlocks, FLATNESS_LIMIT_UV)) {

1097 rd_uv.R += FLATNESS_PENALTY * kNumBlocks;

1098 }

1099

1100 SetRDScore(lambda, &rd_uv);

1101 if (mode == 0 \|\| rd_uv.score < rd_best.score) {

1102 CopyScore(&rd_best, &rd_uv);

1103 rd->mode_uv = mode;

1104 memcpy(rd->uv_levels, rd_uv.uv_levels, sizeof(rd->uv_levels));

1105 SwapPtr(&dst, &tmp_dst);

1106 }

1107 }

1108 VP8SetIntraUVMode(it, rd->mode_uv);

1109 AddScore(rd, &rd_best);

1110 if (dst != dst0) { // copy 16x8 block if needed

1111 VP8Copy16x8(dst, dst0);

1112 }

1113 }

1114

1115 //------------------------------------------------------------------------------

1116 // Final reconstruction and quantization.

1117

1118 static void SimpleQuantize(VP8EncIterator* const it, VP8ModeScore* const rd) {

1119 const VP8Encoder* const enc = it->enc_;

1120 const int is_i16 = (it->mb_->type_ == 1);

1121 int nz = 0;

1122

1123 if (is_i16) {

1124 nz = ReconstructIntra16(it, rd, it->yuv_out_ + Y_OFF_ENC, it->preds_[0]);

1125 } else {

1126 VP8IteratorStartI4(it);

1127 do {

1128 const int mode =

1129 it->preds_[(it->i4_ & 3) + (it->i4_ >> 2) * enc->preds_w_];

1130 const uint8_t* const src = it->yuv_in_ + Y_OFF_ENC + VP8Scan[it->i4_];

1131 uint8_t* const dst = it->yuv_out_ + Y_OFF_ENC + VP8Scan[it->i4_];

1132 VP8MakeIntra4Preds(it);

1133 nz \|= ReconstructIntra4(it, rd->y_ac_levels[it->i4_],

1134 src, dst, mode) << it->i4_;

1135 } while (VP8IteratorRotateI4(it, it->yuv_out_ + Y_OFF_ENC));

1136 }

1137

1138 nz \|= ReconstructUV(it, rd, it->yuv_out_ + U_OFF_ENC, it->mb_->uv_mode_);

1139 rd->nz = nz;

1140 }

1141

1142 // Refine intra16/intra4 sub-modes based on distortion only (not rate).

1143 static void RefineUsingDistortion(VP8EncIterator* const it,

1144 int try_both_modes, int refine_uv_mode,

1145 VP8ModeScore* const rd) {

1146 score_t best_score = MAX_COST;

1147 int nz = 0;

1148 int mode;

1149 int is_i16 = try_both_modes \|\| (it->mb_->type_ == 1);

1150

1151 const VP8SegmentInfo* const dqm = &it->enc_->dqm_[it->mb_->segment_];

1152 // Some empiric constants, of approximate order of magnitude.

1153 const int lambda_d_i16 = 106;

1154 const int lambda_d_i4 = 11;

1155 const int lambda_d_uv = 120;

1156 score_t score_i4 = dqm->i4_penalty_;

1157 score_t i4_bit_sum = 0;

1158 const score_t bit_limit = try_both_modes ? it->enc_->mb_header_limit_

1159 : MAX_COST; // no early-out allowed

1160

1161 if (is_i16) { // First, evaluate Intra16 distortion

1162 int best_mode = -1;

1163 const uint8_t* const src = it->yuv_in_ + Y_OFF_ENC;

1164 for (mode = 0; mode < NUM_PRED_MODES; ++mode) {

1165 const uint8_t* const ref = it->yuv_p_ + VP8I16ModeOffsets[mode];

1166 const score_t score = VP8SSE16x16(src, ref) * RD_DISTO_MULT

1167 + VP8FixedCostsI16[mode] * lambda_d_i16;

1168 if (mode > 0 && VP8FixedCostsI16[mode] > bit_limit) {

1169 continue;

1170 }

1171 if (score < best_score) {

1172 best_mode = mode;

1173 best_score = score;

1174 }

1175 }

1176 VP8SetIntra16Mode(it, best_mode);

1177 // we'll reconstruct later, if i16 mode actually gets selected

1178 }

1179

1180 // Next, evaluate Intra4

1181 if (try_both_modes \|\| !is_i16) {

1182 // We don't evaluate the rate here, but just account for it through a

1183 // constant penalty (i4 mode usually needs more bits compared to i16).

1184 is_i16 = 0;

1185 VP8IteratorStartI4(it);

1186 do {

1187 int best_i4_mode = -1;

1188 score_t best_i4_score = MAX_COST;

1189 const uint8_t* const src = it->yuv_in_ + Y_OFF_ENC + VP8Scan[it->i4_];

1190 const uint16_t* const mode_costs = GetCostModeI4(it, rd->modes_i4);

1191

1192 VP8MakeIntra4Preds(it);

1193 for (mode = 0; mode < NUM_BMODES; ++mode) {

1194 const uint8_t* const ref = it->yuv_p_ + VP8I4ModeOffsets[mode];

1195 const score_t score = VP8SSE4x4(src, ref) * RD_DISTO_MULT

1196 + mode_costs[mode] * lambda_d_i4;

1197 if (score < best_i4_score) {

1198 best_i4_mode = mode;

1199 best_i4_score = score;

1200 }

1201 }

1202 i4_bit_sum += mode_costs[best_i4_mode];

1203 rd->modes_i4[it->i4_] = best_i4_mode;

1204 score_i4 += best_i4_score;

1205 if (score_i4 >= best_score \|\| i4_bit_sum > bit_limit) {

1206 // Intra4 won't be better than Intra16. Bail out and pick Intra16.

1207 is_i16 = 1;

1208 break;

1209 } else { // reconstruct partial block inside yuv_out2_ buffer

1210 uint8_t* const tmp_dst = it->yuv_out2_ + Y_OFF_ENC + VP8Scan[it->i4_];

1211 nz \|= ReconstructIntra4(it, rd->y_ac_levels[it->i4_],

1212 src, tmp_dst, best_i4_mode) << it->i4_;

1213 }

1214 } while (VP8IteratorRotateI4(it, it->yuv_out2_ + Y_OFF_ENC));

1215 }

1216

1217 // Final reconstruction, depending on which mode is selected.

1218 if (!is_i16) {

1219 VP8SetIntra4Mode(it, rd->modes_i4);

1220 SwapOut(it);

1221 best_score = score_i4;

1222 } else {

1223 nz = ReconstructIntra16(it, rd, it->yuv_out_ + Y_OFF_ENC, it->preds_[0]);

1224 }

1225

1226 // ... and UV!

1227 if (refine_uv_mode) {

1228 int best_mode = -1;

1229 score_t best_uv_score = MAX_COST;

1230 const uint8_t* const src = it->yuv_in_ + U_OFF_ENC;

1231 for (mode = 0; mode < NUM_PRED_MODES; ++mode) {

1232 const uint8_t* const ref = it->yuv_p_ + VP8UVModeOffsets[mode];

1233 const score_t score = VP8SSE16x8(src, ref) * RD_DISTO_MULT

1234 + VP8FixedCostsUV[mode] * lambda_d_uv;

1235 if (score < best_uv_score) {

1236 best_mode = mode;

1237 best_uv_score = score;

1238 }

1239 }

1240 VP8SetIntraUVMode(it, best_mode);

1241 }

1242 nz \|= ReconstructUV(it, rd, it->yuv_out_ + U_OFF_ENC, it->mb_->uv_mode_);

1243

1244 rd->nz = nz;

1245 rd->score = best_score;

1246 }

1247

1248 //------------------------------------------------------------------------------

1249 // Entry point

1250

1251 int VP8Decimate(VP8EncIterator* const it, VP8ModeScore* const rd,

1252 VP8RDLevel rd_opt) {

1253 int is_skipped;

1254 const int method = it->enc_->method_;

1255

1256 InitScore(rd);

1257

1258 // We can perform predictions for Luma16x16 and Chroma8x8 already.

1259 // Luma4x4 predictions needs to be done as-we-go.

1260 VP8MakeLuma16Preds(it);

1261 VP8MakeChroma8Preds(it);

1262

1263 if (rd_opt > RD_OPT_NONE) {

1264 it->do_trellis_ = (rd_opt >= RD_OPT_TRELLIS_ALL);

1265 PickBestIntra16(it, rd);

1266 if (method >= 2) {

1267 PickBestIntra4(it, rd);

1268 }

1269 PickBestUV(it, rd);

1270 if (rd_opt == RD_OPT_TRELLIS) { // finish off with trellis-optim now

1271 it->do_trellis_ = 1;

1272 SimpleQuantize(it, rd);

1273 }

1274 } else {

1275 // At this point we have heuristically decided intra16 / intra4.

1276 // For method >= 2, pick the best intra4/intra16 based on SSE (~tad slower).

1277 // For method <= 1, we don't re-examine the decision but just go ahead with

1278 // quantization/reconstruction.

1279 RefineUsingDistortion(it, (method >= 2), (method >= 1), rd);

1280 }

1281 is_skipped = (rd->nz == 0);

1282 VP8SetSkip(it, is_skipped);

1283 return is_skipped;

1284 }

OLD	NEW

« no previous file with comments | « third_party/libwebp/enc/predictor_enc.c ('k') | third_party/libwebp/enc/quant_enc.c » ('j') | no next file with comments »