OLD | NEW |
1 // Copyright 2011 Google Inc. All Rights Reserved. | 1 // Copyright 2011 Google Inc. All Rights Reserved. |
2 // | 2 // |
3 // Use of this source code is governed by a BSD-style license | 3 // Use of this source code is governed by a BSD-style license |
4 // that can be found in the COPYING file in the root of the source | 4 // that can be found in the COPYING file in the root of the source |
5 // tree. An additional intellectual property rights grant can be found | 5 // tree. An additional intellectual property rights grant can be found |
6 // in the file PATENTS. All contributing project authors may | 6 // in the file PATENTS. All contributing project authors may |
7 // be found in the AUTHORS file in the root of the source tree. | 7 // be found in the AUTHORS file in the root of the source tree. |
8 // ----------------------------------------------------------------------------- | 8 // ----------------------------------------------------------------------------- |
9 // | 9 // |
10 // Quantization | 10 // Quantization |
(...skipping 12 matching lines...) Expand all Loading... |
23 #define DO_TRELLIS_UV 0 // disable trellis for UV. Risky. Not worth. | 23 #define DO_TRELLIS_UV 0 // disable trellis for UV. Risky. Not worth. |
24 #define USE_TDISTO 1 | 24 #define USE_TDISTO 1 |
25 | 25 |
26 #define MID_ALPHA 64 // neutral value for susceptibility | 26 #define MID_ALPHA 64 // neutral value for susceptibility |
27 #define MIN_ALPHA 30 // lowest usable value for susceptibility | 27 #define MIN_ALPHA 30 // lowest usable value for susceptibility |
28 #define MAX_ALPHA 100 // higher meaningful value for susceptibility | 28 #define MAX_ALPHA 100 // higher meaningful value for susceptibility |
29 | 29 |
30 #define SNS_TO_DQ 0.9 // Scaling constant between the sns value and the QP | 30 #define SNS_TO_DQ 0.9 // Scaling constant between the sns value and the QP |
31 // power-law modulation. Must be strictly less than 1. | 31 // power-law modulation. Must be strictly less than 1. |
32 | 32 |
33 #define I4_PENALTY 14000 // Rate-penalty for quick i4/i16 decision | |
34 | |
35 // number of non-zero coeffs below which we consider the block very flat | 33 // number of non-zero coeffs below which we consider the block very flat |
36 // (and apply a penalty to complex predictions) | 34 // (and apply a penalty to complex predictions) |
37 #define FLATNESS_LIMIT_I16 10 // I16 mode | 35 #define FLATNESS_LIMIT_I16 10 // I16 mode |
38 #define FLATNESS_LIMIT_I4 3 // I4 mode | 36 #define FLATNESS_LIMIT_I4 3 // I4 mode |
39 #define FLATNESS_LIMIT_UV 2 // UV mode | 37 #define FLATNESS_LIMIT_UV 2 // UV mode |
40 #define FLATNESS_PENALTY 140 // roughly ~1bit per block | 38 #define FLATNESS_PENALTY 140 // roughly ~1bit per block |
41 | 39 |
42 #define MULT_8B(a, b) (((a) * (b) + 128) >> 8) | 40 #define MULT_8B(a, b) (((a) * (b) + 128) >> 8) |
43 | 41 |
44 #define RD_DISTO_MULT 256 // distortion multiplier (equivalent of lambda) | 42 #define RD_DISTO_MULT 256 // distortion multiplier (equivalent of lambda) |
(...skipping 184 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
229 if (type == 0) { // we only use sharpening for AC luma coeffs | 227 if (type == 0) { // we only use sharpening for AC luma coeffs |
230 m->sharpen_[i] = (kFreqSharpening[i] * m->q_[i]) >> SHARPEN_BITS; | 228 m->sharpen_[i] = (kFreqSharpening[i] * m->q_[i]) >> SHARPEN_BITS; |
231 } else { | 229 } else { |
232 m->sharpen_[i] = 0; | 230 m->sharpen_[i] = 0; |
233 } | 231 } |
234 sum += m->q_[i]; | 232 sum += m->q_[i]; |
235 } | 233 } |
236 return (sum + 8) >> 4; | 234 return (sum + 8) >> 4; |
237 } | 235 } |
238 | 236 |
| 237 static void CheckLambdaValue(int* const v) { if (*v < 1) *v = 1; } |
| 238 |
239 static void SetupMatrices(VP8Encoder* enc) { | 239 static void SetupMatrices(VP8Encoder* enc) { |
240 int i; | 240 int i; |
241 const int tlambda_scale = | 241 const int tlambda_scale = |
242 (enc->method_ >= 4) ? enc->config_->sns_strength | 242 (enc->method_ >= 4) ? enc->config_->sns_strength |
243 : 0; | 243 : 0; |
244 const int num_segments = enc->segment_hdr_.num_segments_; | 244 const int num_segments = enc->segment_hdr_.num_segments_; |
245 for (i = 0; i < num_segments; ++i) { | 245 for (i = 0; i < num_segments; ++i) { |
246 VP8SegmentInfo* const m = &enc->dqm_[i]; | 246 VP8SegmentInfo* const m = &enc->dqm_[i]; |
247 const int q = m->quant_; | 247 const int q = m->quant_; |
248 int q4, q16, quv; | 248 int q_i4, q_i16, q_uv; |
249 m->y1_.q_[0] = kDcTable[clip(q + enc->dq_y1_dc_, 0, 127)]; | 249 m->y1_.q_[0] = kDcTable[clip(q + enc->dq_y1_dc_, 0, 127)]; |
250 m->y1_.q_[1] = kAcTable[clip(q, 0, 127)]; | 250 m->y1_.q_[1] = kAcTable[clip(q, 0, 127)]; |
251 | 251 |
252 m->y2_.q_[0] = kDcTable[ clip(q + enc->dq_y2_dc_, 0, 127)] * 2; | 252 m->y2_.q_[0] = kDcTable[ clip(q + enc->dq_y2_dc_, 0, 127)] * 2; |
253 m->y2_.q_[1] = kAcTable2[clip(q + enc->dq_y2_ac_, 0, 127)]; | 253 m->y2_.q_[1] = kAcTable2[clip(q + enc->dq_y2_ac_, 0, 127)]; |
254 | 254 |
255 m->uv_.q_[0] = kDcTable[clip(q + enc->dq_uv_dc_, 0, 117)]; | 255 m->uv_.q_[0] = kDcTable[clip(q + enc->dq_uv_dc_, 0, 117)]; |
256 m->uv_.q_[1] = kAcTable[clip(q + enc->dq_uv_ac_, 0, 127)]; | 256 m->uv_.q_[1] = kAcTable[clip(q + enc->dq_uv_ac_, 0, 127)]; |
257 | 257 |
258 q4 = ExpandMatrix(&m->y1_, 0); | 258 q_i4 = ExpandMatrix(&m->y1_, 0); |
259 q16 = ExpandMatrix(&m->y2_, 1); | 259 q_i16 = ExpandMatrix(&m->y2_, 1); |
260 quv = ExpandMatrix(&m->uv_, 2); | 260 q_uv = ExpandMatrix(&m->uv_, 2); |
261 | 261 |
262 m->lambda_i4_ = (3 * q4 * q4) >> 7; | 262 m->lambda_i4_ = (3 * q_i4 * q_i4) >> 7; |
263 m->lambda_i16_ = (3 * q16 * q16); | 263 m->lambda_i16_ = (3 * q_i16 * q_i16); |
264 m->lambda_uv_ = (3 * quv * quv) >> 6; | 264 m->lambda_uv_ = (3 * q_uv * q_uv) >> 6; |
265 m->lambda_mode_ = (1 * q4 * q4) >> 7; | 265 m->lambda_mode_ = (1 * q_i4 * q_i4) >> 7; |
266 m->lambda_trellis_i4_ = (7 * q4 * q4) >> 3; | 266 m->lambda_trellis_i4_ = (7 * q_i4 * q_i4) >> 3; |
267 m->lambda_trellis_i16_ = (q16 * q16) >> 2; | 267 m->lambda_trellis_i16_ = (q_i16 * q_i16) >> 2; |
268 m->lambda_trellis_uv_ = (quv *quv) << 1; | 268 m->lambda_trellis_uv_ = (q_uv * q_uv) << 1; |
269 m->tlambda_ = (tlambda_scale * q4) >> 5; | 269 m->tlambda_ = (tlambda_scale * q_i4) >> 5; |
| 270 |
| 271 // none of these constants should be < 1 |
| 272 CheckLambdaValue(&m->lambda_i4_); |
| 273 CheckLambdaValue(&m->lambda_i16_); |
| 274 CheckLambdaValue(&m->lambda_uv_); |
| 275 CheckLambdaValue(&m->lambda_mode_); |
| 276 CheckLambdaValue(&m->lambda_trellis_i4_); |
| 277 CheckLambdaValue(&m->lambda_trellis_i16_); |
| 278 CheckLambdaValue(&m->lambda_trellis_uv_); |
| 279 CheckLambdaValue(&m->tlambda_); |
270 | 280 |
271 m->min_disto_ = 10 * m->y1_.q_[0]; // quantization-aware min disto | 281 m->min_disto_ = 10 * m->y1_.q_[0]; // quantization-aware min disto |
272 m->max_edge_ = 0; | 282 m->max_edge_ = 0; |
| 283 |
| 284 m->i4_penalty_ = 1000 * q_i4 * q_i4; |
273 } | 285 } |
274 } | 286 } |
275 | 287 |
276 //------------------------------------------------------------------------------ | 288 //------------------------------------------------------------------------------ |
277 // Initialize filtering parameters | 289 // Initialize filtering parameters |
278 | 290 |
279 // Very small filter-strength values have close to no visual effect. So we can | 291 // Very small filter-strength values have close to no visual effect. So we can |
280 // save a little decoding-CPU by turning filtering off for these. | 292 // save a little decoding-CPU by turning filtering off for these. |
281 #define FSTRENGTH_CUTOFF 2 | 293 #define FSTRENGTH_CUTOFF 2 |
282 | 294 |
(...skipping 58 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
341 return v; | 353 return v; |
342 } | 354 } |
343 | 355 |
344 static int SegmentsAreEquivalent(const VP8SegmentInfo* const S1, | 356 static int SegmentsAreEquivalent(const VP8SegmentInfo* const S1, |
345 const VP8SegmentInfo* const S2) { | 357 const VP8SegmentInfo* const S2) { |
346 return (S1->quant_ == S2->quant_) && (S1->fstrength_ == S2->fstrength_); | 358 return (S1->quant_ == S2->quant_) && (S1->fstrength_ == S2->fstrength_); |
347 } | 359 } |
348 | 360 |
349 static void SimplifySegments(VP8Encoder* const enc) { | 361 static void SimplifySegments(VP8Encoder* const enc) { |
350 int map[NUM_MB_SEGMENTS] = { 0, 1, 2, 3 }; | 362 int map[NUM_MB_SEGMENTS] = { 0, 1, 2, 3 }; |
351 const int num_segments = enc->segment_hdr_.num_segments_; | 363 // 'num_segments_' is previously validated and <= NUM_MB_SEGMENTS, but an |
| 364 // explicit check is needed to avoid a spurious warning about 'i' exceeding |
| 365 // array bounds of 'dqm_' with some compilers (noticed with gcc-4.9). |
| 366 const int num_segments = (enc->segment_hdr_.num_segments_ < NUM_MB_SEGMENTS) |
| 367 ? enc->segment_hdr_.num_segments_ |
| 368 : NUM_MB_SEGMENTS; |
352 int num_final_segments = 1; | 369 int num_final_segments = 1; |
353 int s1, s2; | 370 int s1, s2; |
354 for (s1 = 1; s1 < num_segments; ++s1) { // find similar segments | 371 for (s1 = 1; s1 < num_segments; ++s1) { // find similar segments |
355 const VP8SegmentInfo* const S1 = &enc->dqm_[s1]; | 372 const VP8SegmentInfo* const S1 = &enc->dqm_[s1]; |
356 int found = 0; | 373 int found = 0; |
357 // check if we already have similar segment | 374 // check if we already have similar segment |
358 for (s2 = 0; s2 < num_final_segments; ++s2) { | 375 for (s2 = 0; s2 < num_final_segments; ++s2) { |
359 const VP8SegmentInfo* const S2 = &enc->dqm_[s2]; | 376 const VP8SegmentInfo* const S2 = &enc->dqm_[s2]; |
360 if (SegmentsAreEquivalent(S1, S2)) { | 377 if (SegmentsAreEquivalent(S1, S2)) { |
361 found = 1; | 378 found = 1; |
(...skipping 758 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
1120 | 1137 |
1121 nz |= ReconstructUV(it, rd, it->yuv_out_ + U_OFF_ENC, it->mb_->uv_mode_); | 1138 nz |= ReconstructUV(it, rd, it->yuv_out_ + U_OFF_ENC, it->mb_->uv_mode_); |
1122 rd->nz = nz; | 1139 rd->nz = nz; |
1123 } | 1140 } |
1124 | 1141 |
1125 // Refine intra16/intra4 sub-modes based on distortion only (not rate). | 1142 // Refine intra16/intra4 sub-modes based on distortion only (not rate). |
1126 static void RefineUsingDistortion(VP8EncIterator* const it, | 1143 static void RefineUsingDistortion(VP8EncIterator* const it, |
1127 int try_both_modes, int refine_uv_mode, | 1144 int try_both_modes, int refine_uv_mode, |
1128 VP8ModeScore* const rd) { | 1145 VP8ModeScore* const rd) { |
1129 score_t best_score = MAX_COST; | 1146 score_t best_score = MAX_COST; |
1130 score_t score_i4 = (score_t)I4_PENALTY; | |
1131 int16_t tmp_levels[16][16]; | |
1132 uint8_t modes_i4[16]; | |
1133 int nz = 0; | 1147 int nz = 0; |
1134 int mode; | 1148 int mode; |
1135 int is_i16 = try_both_modes || (it->mb_->type_ == 1); | 1149 int is_i16 = try_both_modes || (it->mb_->type_ == 1); |
1136 | 1150 |
| 1151 const VP8SegmentInfo* const dqm = &it->enc_->dqm_[it->mb_->segment_]; |
| 1152 // Some empiric constants, of approximate order of magnitude. |
| 1153 const int lambda_d_i16 = 106; |
| 1154 const int lambda_d_i4 = 11; |
| 1155 const int lambda_d_uv = 120; |
| 1156 score_t score_i4 = dqm->i4_penalty_; |
| 1157 score_t i4_bit_sum = 0; |
| 1158 const score_t bit_limit = it->enc_->mb_header_limit_; |
| 1159 |
1137 if (is_i16) { // First, evaluate Intra16 distortion | 1160 if (is_i16) { // First, evaluate Intra16 distortion |
1138 int best_mode = -1; | 1161 int best_mode = -1; |
1139 const uint8_t* const src = it->yuv_in_ + Y_OFF_ENC; | 1162 const uint8_t* const src = it->yuv_in_ + Y_OFF_ENC; |
1140 for (mode = 0; mode < NUM_PRED_MODES; ++mode) { | 1163 for (mode = 0; mode < NUM_PRED_MODES; ++mode) { |
1141 const uint8_t* const ref = it->yuv_p_ + VP8I16ModeOffsets[mode]; | 1164 const uint8_t* const ref = it->yuv_p_ + VP8I16ModeOffsets[mode]; |
1142 const score_t score = VP8SSE16x16(src, ref); | 1165 const score_t score = VP8SSE16x16(src, ref) * RD_DISTO_MULT |
| 1166 + VP8FixedCostsI16[mode] * lambda_d_i16; |
| 1167 if (mode > 0 && VP8FixedCostsI16[mode] > bit_limit) { |
| 1168 continue; |
| 1169 } |
1143 if (score < best_score) { | 1170 if (score < best_score) { |
1144 best_mode = mode; | 1171 best_mode = mode; |
1145 best_score = score; | 1172 best_score = score; |
1146 } | 1173 } |
1147 } | 1174 } |
1148 VP8SetIntra16Mode(it, best_mode); | 1175 VP8SetIntra16Mode(it, best_mode); |
1149 // we'll reconstruct later, if i16 mode actually gets selected | 1176 // we'll reconstruct later, if i16 mode actually gets selected |
1150 } | 1177 } |
1151 | 1178 |
1152 // Next, evaluate Intra4 | 1179 // Next, evaluate Intra4 |
1153 if (try_both_modes || !is_i16) { | 1180 if (try_both_modes || !is_i16) { |
1154 // We don't evaluate the rate here, but just account for it through a | 1181 // We don't evaluate the rate here, but just account for it through a |
1155 // constant penalty (i4 mode usually needs more bits compared to i16). | 1182 // constant penalty (i4 mode usually needs more bits compared to i16). |
1156 is_i16 = 0; | 1183 is_i16 = 0; |
1157 VP8IteratorStartI4(it); | 1184 VP8IteratorStartI4(it); |
1158 do { | 1185 do { |
1159 int best_i4_mode = -1; | 1186 int best_i4_mode = -1; |
1160 score_t best_i4_score = MAX_COST; | 1187 score_t best_i4_score = MAX_COST; |
1161 const uint8_t* const src = it->yuv_in_ + Y_OFF_ENC + VP8Scan[it->i4_]; | 1188 const uint8_t* const src = it->yuv_in_ + Y_OFF_ENC + VP8Scan[it->i4_]; |
| 1189 const uint16_t* const mode_costs = GetCostModeI4(it, rd->modes_i4); |
1162 | 1190 |
1163 VP8MakeIntra4Preds(it); | 1191 VP8MakeIntra4Preds(it); |
1164 for (mode = 0; mode < NUM_BMODES; ++mode) { | 1192 for (mode = 0; mode < NUM_BMODES; ++mode) { |
1165 const uint8_t* const ref = it->yuv_p_ + VP8I4ModeOffsets[mode]; | 1193 const uint8_t* const ref = it->yuv_p_ + VP8I4ModeOffsets[mode]; |
1166 const score_t score = VP8SSE4x4(src, ref); | 1194 const score_t score = VP8SSE4x4(src, ref) * RD_DISTO_MULT |
| 1195 + mode_costs[mode] * lambda_d_i4; |
1167 if (score < best_i4_score) { | 1196 if (score < best_i4_score) { |
1168 best_i4_mode = mode; | 1197 best_i4_mode = mode; |
1169 best_i4_score = score; | 1198 best_i4_score = score; |
1170 } | 1199 } |
1171 } | 1200 } |
1172 modes_i4[it->i4_] = best_i4_mode; | 1201 i4_bit_sum += mode_costs[best_i4_mode]; |
| 1202 rd->modes_i4[it->i4_] = best_i4_mode; |
1173 score_i4 += best_i4_score; | 1203 score_i4 += best_i4_score; |
1174 if (score_i4 >= best_score) { | 1204 if (score_i4 >= best_score || i4_bit_sum > bit_limit) { |
1175 // Intra4 won't be better than Intra16. Bail out and pick Intra16. | 1205 // Intra4 won't be better than Intra16. Bail out and pick Intra16. |
1176 is_i16 = 1; | 1206 is_i16 = 1; |
1177 break; | 1207 break; |
1178 } else { // reconstruct partial block inside yuv_out2_ buffer | 1208 } else { // reconstruct partial block inside yuv_out2_ buffer |
1179 uint8_t* const tmp_dst = it->yuv_out2_ + Y_OFF_ENC + VP8Scan[it->i4_]; | 1209 uint8_t* const tmp_dst = it->yuv_out2_ + Y_OFF_ENC + VP8Scan[it->i4_]; |
1180 nz |= ReconstructIntra4(it, tmp_levels[it->i4_], | 1210 nz |= ReconstructIntra4(it, rd->y_ac_levels[it->i4_], |
1181 src, tmp_dst, best_i4_mode) << it->i4_; | 1211 src, tmp_dst, best_i4_mode) << it->i4_; |
1182 } | 1212 } |
1183 } while (VP8IteratorRotateI4(it, it->yuv_out2_ + Y_OFF_ENC)); | 1213 } while (VP8IteratorRotateI4(it, it->yuv_out2_ + Y_OFF_ENC)); |
1184 } | 1214 } |
1185 | 1215 |
1186 // Final reconstruction, depending on which mode is selected. | 1216 // Final reconstruction, depending on which mode is selected. |
1187 if (!is_i16) { | 1217 if (!is_i16) { |
1188 VP8SetIntra4Mode(it, modes_i4); | 1218 VP8SetIntra4Mode(it, rd->modes_i4); |
1189 memcpy(rd->y_ac_levels, tmp_levels, sizeof(tmp_levels)); | |
1190 SwapOut(it); | 1219 SwapOut(it); |
1191 best_score = score_i4; | 1220 best_score = score_i4; |
1192 } else { | 1221 } else { |
1193 nz = ReconstructIntra16(it, rd, it->yuv_out_ + Y_OFF_ENC, it->preds_[0]); | 1222 nz = ReconstructIntra16(it, rd, it->yuv_out_ + Y_OFF_ENC, it->preds_[0]); |
1194 } | 1223 } |
1195 | 1224 |
1196 // ... and UV! | 1225 // ... and UV! |
1197 if (refine_uv_mode) { | 1226 if (refine_uv_mode) { |
1198 int best_mode = -1; | 1227 int best_mode = -1; |
1199 score_t best_uv_score = MAX_COST; | 1228 score_t best_uv_score = MAX_COST; |
1200 const uint8_t* const src = it->yuv_in_ + U_OFF_ENC; | 1229 const uint8_t* const src = it->yuv_in_ + U_OFF_ENC; |
1201 for (mode = 0; mode < NUM_PRED_MODES; ++mode) { | 1230 for (mode = 0; mode < NUM_PRED_MODES; ++mode) { |
1202 const uint8_t* const ref = it->yuv_p_ + VP8UVModeOffsets[mode]; | 1231 const uint8_t* const ref = it->yuv_p_ + VP8UVModeOffsets[mode]; |
1203 const score_t score = VP8SSE16x8(src, ref); | 1232 const score_t score = VP8SSE16x8(src, ref) * RD_DISTO_MULT |
| 1233 + VP8FixedCostsUV[mode] * lambda_d_uv; |
1204 if (score < best_uv_score) { | 1234 if (score < best_uv_score) { |
1205 best_mode = mode; | 1235 best_mode = mode; |
1206 best_uv_score = score; | 1236 best_uv_score = score; |
1207 } | 1237 } |
1208 } | 1238 } |
1209 VP8SetIntraUVMode(it, best_mode); | 1239 VP8SetIntraUVMode(it, best_mode); |
1210 } | 1240 } |
1211 nz |= ReconstructUV(it, rd, it->yuv_out_ + U_OFF_ENC, it->mb_->uv_mode_); | 1241 nz |= ReconstructUV(it, rd, it->yuv_out_ + U_OFF_ENC, it->mb_->uv_mode_); |
1212 | 1242 |
1213 rd->nz = nz; | 1243 rd->nz = nz; |
(...skipping 30 matching lines...) Expand all Loading... |
1244 // At this point we have heuristically decided intra16 / intra4. | 1274 // At this point we have heuristically decided intra16 / intra4. |
1245 // For method >= 2, pick the best intra4/intra16 based on SSE (~tad slower). | 1275 // For method >= 2, pick the best intra4/intra16 based on SSE (~tad slower). |
1246 // For method <= 1, we don't re-examine the decision but just go ahead with | 1276 // For method <= 1, we don't re-examine the decision but just go ahead with |
1247 // quantization/reconstruction. | 1277 // quantization/reconstruction. |
1248 RefineUsingDistortion(it, (method >= 2), (method >= 1), rd); | 1278 RefineUsingDistortion(it, (method >= 2), (method >= 1), rd); |
1249 } | 1279 } |
1250 is_skipped = (rd->nz == 0); | 1280 is_skipped = (rd->nz == 0); |
1251 VP8SetSkip(it, is_skipped); | 1281 VP8SetSkip(it, is_skipped); |
1252 return is_skipped; | 1282 return is_skipped; |
1253 } | 1283 } |
OLD | NEW |