| OLD | NEW |
| 1 // Copyright 2011 Google Inc. All Rights Reserved. | 1 // Copyright 2011 Google Inc. All Rights Reserved. |
| 2 // | 2 // |
| 3 // Use of this source code is governed by a BSD-style license | 3 // Use of this source code is governed by a BSD-style license |
| 4 // that can be found in the COPYING file in the root of the source | 4 // that can be found in the COPYING file in the root of the source |
| 5 // tree. An additional intellectual property rights grant can be found | 5 // tree. An additional intellectual property rights grant can be found |
| 6 // in the file PATENTS. All contributing project authors may | 6 // in the file PATENTS. All contributing project authors may |
| 7 // be found in the AUTHORS file in the root of the source tree. | 7 // be found in the AUTHORS file in the root of the source tree. |
| 8 // ----------------------------------------------------------------------------- | 8 // ----------------------------------------------------------------------------- |
| 9 // | 9 // |
| 10 // Speed-critical encoding functions. | 10 // Speed-critical encoding functions. |
| (...skipping 22 matching lines...) Expand all Loading... |
| 33 // Luma | 33 // Luma |
| 34 0 + 0 * BPS, 4 + 0 * BPS, 8 + 0 * BPS, 12 + 0 * BPS, | 34 0 + 0 * BPS, 4 + 0 * BPS, 8 + 0 * BPS, 12 + 0 * BPS, |
| 35 0 + 4 * BPS, 4 + 4 * BPS, 8 + 4 * BPS, 12 + 4 * BPS, | 35 0 + 4 * BPS, 4 + 4 * BPS, 8 + 4 * BPS, 12 + 4 * BPS, |
| 36 0 + 8 * BPS, 4 + 8 * BPS, 8 + 8 * BPS, 12 + 8 * BPS, | 36 0 + 8 * BPS, 4 + 8 * BPS, 8 + 8 * BPS, 12 + 8 * BPS, |
| 37 0 + 12 * BPS, 4 + 12 * BPS, 8 + 12 * BPS, 12 + 12 * BPS, | 37 0 + 12 * BPS, 4 + 12 * BPS, 8 + 12 * BPS, 12 + 12 * BPS, |
| 38 | 38 |
| 39 0 + 0 * BPS, 4 + 0 * BPS, 0 + 4 * BPS, 4 + 4 * BPS, // U | 39 0 + 0 * BPS, 4 + 0 * BPS, 0 + 4 * BPS, 4 + 4 * BPS, // U |
| 40 8 + 0 * BPS, 12 + 0 * BPS, 8 + 4 * BPS, 12 + 4 * BPS // V | 40 8 + 0 * BPS, 12 + 0 * BPS, 8 + 4 * BPS, 12 + 4 * BPS // V |
| 41 }; | 41 }; |
| 42 | 42 |
| 43 // general-purpose util function |
| 44 void VP8SetHistogramData(const int distribution[MAX_COEFF_THRESH + 1], |
| 45 VP8Histogram* const histo) { |
| 46 int max_value = 0, last_non_zero = 1; |
| 47 int k; |
| 48 for (k = 0; k <= MAX_COEFF_THRESH; ++k) { |
| 49 const int value = distribution[k]; |
| 50 if (value > 0) { |
| 51 if (value > max_value) max_value = value; |
| 52 last_non_zero = k; |
| 53 } |
| 54 } |
| 55 histo->max_value = max_value; |
| 56 histo->last_non_zero = last_non_zero; |
| 57 } |
| 58 |
| 43 static void CollectHistogram(const uint8_t* ref, const uint8_t* pred, | 59 static void CollectHistogram(const uint8_t* ref, const uint8_t* pred, |
| 44 int start_block, int end_block, | 60 int start_block, int end_block, |
| 45 VP8Histogram* const histo) { | 61 VP8Histogram* const histo) { |
| 46 int j; | 62 int j; |
| 63 int distribution[MAX_COEFF_THRESH + 1] = { 0 }; |
| 47 for (j = start_block; j < end_block; ++j) { | 64 for (j = start_block; j < end_block; ++j) { |
| 48 int k; | 65 int k; |
| 49 int16_t out[16]; | 66 int16_t out[16]; |
| 50 | 67 |
| 51 VP8FTransform(ref + VP8DspScan[j], pred + VP8DspScan[j], out); | 68 VP8FTransform(ref + VP8DspScan[j], pred + VP8DspScan[j], out); |
| 52 | 69 |
| 53 // Convert coefficients to bin. | 70 // Convert coefficients to bin. |
| 54 for (k = 0; k < 16; ++k) { | 71 for (k = 0; k < 16; ++k) { |
| 55 const int v = abs(out[k]) >> 3; // TODO(skal): add rounding? | 72 const int v = abs(out[k]) >> 3; // TODO(skal): add rounding? |
| 56 const int clipped_value = clip_max(v, MAX_COEFF_THRESH); | 73 const int clipped_value = clip_max(v, MAX_COEFF_THRESH); |
| 57 histo->distribution[clipped_value]++; | 74 ++distribution[clipped_value]; |
| 58 } | 75 } |
| 59 } | 76 } |
| 77 VP8SetHistogramData(distribution, histo); |
| 60 } | 78 } |
| 61 | 79 |
| 62 //------------------------------------------------------------------------------ | 80 //------------------------------------------------------------------------------ |
| 63 // run-time tables (~4k) | 81 // run-time tables (~4k) |
| 64 | 82 |
| 65 static uint8_t clip1[255 + 510 + 1]; // clips [-255,510] to [0,255] | 83 static uint8_t clip1[255 + 510 + 1]; // clips [-255,510] to [0,255] |
| 66 | 84 |
| 67 // We declare this variable 'volatile' to prevent instruction reordering | 85 // We declare this variable 'volatile' to prevent instruction reordering |
| 68 // and make sure it's set to true _last_ (so as to be thread-safe) | 86 // and make sure it's set to true _last_ (so as to be thread-safe) |
| 69 static volatile int tables_ok = 0; | 87 static volatile int tables_ok = 0; |
| 70 | 88 |
| 71 static void InitTables(void) { | 89 static WEBP_TSAN_IGNORE_FUNCTION void InitTables(void) { |
| 72 if (!tables_ok) { | 90 if (!tables_ok) { |
| 73 int i; | 91 int i; |
| 74 for (i = -255; i <= 255 + 255; ++i) { | 92 for (i = -255; i <= 255 + 255; ++i) { |
| 75 clip1[255 + i] = clip_8b(i); | 93 clip1[255 + i] = clip_8b(i); |
| 76 } | 94 } |
| 77 tables_ok = 1; | 95 tables_ok = 1; |
| 78 } | 96 } |
| 79 } | 97 } |
| 80 | 98 |
| 81 | 99 |
| (...skipping 70 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 152 const int a1 = (tmp[4 + i] + tmp[ 8 + i]); | 170 const int a1 = (tmp[4 + i] + tmp[ 8 + i]); |
| 153 const int a2 = (tmp[4 + i] - tmp[ 8 + i]); | 171 const int a2 = (tmp[4 + i] - tmp[ 8 + i]); |
| 154 const int a3 = (tmp[0 + i] - tmp[12 + i]); | 172 const int a3 = (tmp[0 + i] - tmp[12 + i]); |
| 155 out[0 + i] = (a0 + a1 + 7) >> 4; // 12b | 173 out[0 + i] = (a0 + a1 + 7) >> 4; // 12b |
| 156 out[4 + i] = ((a2 * 2217 + a3 * 5352 + 12000) >> 16) + (a3 != 0); | 174 out[4 + i] = ((a2 * 2217 + a3 * 5352 + 12000) >> 16) + (a3 != 0); |
| 157 out[8 + i] = (a0 - a1 + 7) >> 4; | 175 out[8 + i] = (a0 - a1 + 7) >> 4; |
| 158 out[12+ i] = ((a3 * 2217 - a2 * 5352 + 51000) >> 16); | 176 out[12+ i] = ((a3 * 2217 - a2 * 5352 + 51000) >> 16); |
| 159 } | 177 } |
| 160 } | 178 } |
| 161 | 179 |
| 180 static void FTransform2(const uint8_t* src, const uint8_t* ref, int16_t* out) { |
| 181 VP8FTransform(src, ref, out); |
| 182 VP8FTransform(src + 4, ref + 4, out + 16); |
| 183 } |
| 184 |
| 162 static void FTransformWHT(const int16_t* in, int16_t* out) { | 185 static void FTransformWHT(const int16_t* in, int16_t* out) { |
| 163 // input is 12b signed | 186 // input is 12b signed |
| 164 int32_t tmp[16]; | 187 int32_t tmp[16]; |
| 165 int i; | 188 int i; |
| 166 for (i = 0; i < 4; ++i, in += 64) { | 189 for (i = 0; i < 4; ++i, in += 64) { |
| 167 const int a0 = (in[0 * 16] + in[2 * 16]); // 13b | 190 const int a0 = (in[0 * 16] + in[2 * 16]); // 13b |
| 168 const int a1 = (in[1 * 16] + in[3 * 16]); | 191 const int a1 = (in[1 * 16] + in[3 * 16]); |
| 169 const int a2 = (in[1 * 16] - in[3 * 16]); | 192 const int a2 = (in[1 * 16] - in[3 * 16]); |
| 170 const int a3 = (in[0 * 16] - in[2 * 16]); | 193 const int a3 = (in[0 * 16] - in[2 * 16]); |
| 171 tmp[0 + i * 4] = a0 + a1; // 14b | 194 tmp[0 + i * 4] = a0 + a1; // 14b |
| (...skipping 16 matching lines...) Expand all Loading... |
| 188 out[12 + i] = b3 >> 1; | 211 out[12 + i] = b3 >> 1; |
| 189 } | 212 } |
| 190 } | 213 } |
| 191 | 214 |
| 192 #undef MUL | 215 #undef MUL |
| 193 #undef STORE | 216 #undef STORE |
| 194 | 217 |
| 195 //------------------------------------------------------------------------------ | 218 //------------------------------------------------------------------------------ |
| 196 // Intra predictions | 219 // Intra predictions |
| 197 | 220 |
| 198 #define DST(x, y) dst[(x) + (y) * BPS] | |
| 199 | |
| 200 static WEBP_INLINE void Fill(uint8_t* dst, int value, int size) { | 221 static WEBP_INLINE void Fill(uint8_t* dst, int value, int size) { |
| 201 int j; | 222 int j; |
| 202 for (j = 0; j < size; ++j) { | 223 for (j = 0; j < size; ++j) { |
| 203 memset(dst + j * BPS, value, size); | 224 memset(dst + j * BPS, value, size); |
| 204 } | 225 } |
| 205 } | 226 } |
| 206 | 227 |
| 207 static WEBP_INLINE void VerticalPred(uint8_t* dst, | 228 static WEBP_INLINE void VerticalPred(uint8_t* dst, |
| 208 const uint8_t* top, int size) { | 229 const uint8_t* top, int size) { |
| 209 int j; | 230 int j; |
| 210 if (top) { | 231 if (top != NULL) { |
| 211 for (j = 0; j < size; ++j) memcpy(dst + j * BPS, top, size); | 232 for (j = 0; j < size; ++j) memcpy(dst + j * BPS, top, size); |
| 212 } else { | 233 } else { |
| 213 Fill(dst, 127, size); | 234 Fill(dst, 127, size); |
| 214 } | 235 } |
| 215 } | 236 } |
| 216 | 237 |
| 217 static WEBP_INLINE void HorizontalPred(uint8_t* dst, | 238 static WEBP_INLINE void HorizontalPred(uint8_t* dst, |
| 218 const uint8_t* left, int size) { | 239 const uint8_t* left, int size) { |
| 219 if (left) { | 240 if (left != NULL) { |
| 220 int j; | 241 int j; |
| 221 for (j = 0; j < size; ++j) { | 242 for (j = 0; j < size; ++j) { |
| 222 memset(dst + j * BPS, left[j], size); | 243 memset(dst + j * BPS, left[j], size); |
| 223 } | 244 } |
| 224 } else { | 245 } else { |
| 225 Fill(dst, 129, size); | 246 Fill(dst, 129, size); |
| 226 } | 247 } |
| 227 } | 248 } |
| 228 | 249 |
| 229 static WEBP_INLINE void TrueMotion(uint8_t* dst, const uint8_t* left, | 250 static WEBP_INLINE void TrueMotion(uint8_t* dst, const uint8_t* left, |
| 230 const uint8_t* top, int size) { | 251 const uint8_t* top, int size) { |
| 231 int y; | 252 int y; |
| 232 if (left) { | 253 if (left != NULL) { |
| 233 if (top) { | 254 if (top != NULL) { |
| 234 const uint8_t* const clip = clip1 + 255 - left[-1]; | 255 const uint8_t* const clip = clip1 + 255 - left[-1]; |
| 235 for (y = 0; y < size; ++y) { | 256 for (y = 0; y < size; ++y) { |
| 236 const uint8_t* const clip_table = clip + left[y]; | 257 const uint8_t* const clip_table = clip + left[y]; |
| 237 int x; | 258 int x; |
| 238 for (x = 0; x < size; ++x) { | 259 for (x = 0; x < size; ++x) { |
| 239 dst[x] = clip_table[top[x]]; | 260 dst[x] = clip_table[top[x]]; |
| 240 } | 261 } |
| 241 dst += BPS; | 262 dst += BPS; |
| 242 } | 263 } |
| 243 } else { | 264 } else { |
| 244 HorizontalPred(dst, left, size); | 265 HorizontalPred(dst, left, size); |
| 245 } | 266 } |
| 246 } else { | 267 } else { |
| 247 // true motion without left samples (hence: with default 129 value) | 268 // true motion without left samples (hence: with default 129 value) |
| 248 // is equivalent to VE prediction where you just copy the top samples. | 269 // is equivalent to VE prediction where you just copy the top samples. |
| 249 // Note that if top samples are not available, the default value is | 270 // Note that if top samples are not available, the default value is |
| 250 // then 129, and not 127 as in the VerticalPred case. | 271 // then 129, and not 127 as in the VerticalPred case. |
| 251 if (top) { | 272 if (top != NULL) { |
| 252 VerticalPred(dst, top, size); | 273 VerticalPred(dst, top, size); |
| 253 } else { | 274 } else { |
| 254 Fill(dst, 129, size); | 275 Fill(dst, 129, size); |
| 255 } | 276 } |
| 256 } | 277 } |
| 257 } | 278 } |
| 258 | 279 |
| 259 static WEBP_INLINE void DCMode(uint8_t* dst, const uint8_t* left, | 280 static WEBP_INLINE void DCMode(uint8_t* dst, const uint8_t* left, |
| 260 const uint8_t* top, | 281 const uint8_t* top, |
| 261 int size, int round, int shift) { | 282 int size, int round, int shift) { |
| 262 int DC = 0; | 283 int DC = 0; |
| 263 int j; | 284 int j; |
| 264 if (top) { | 285 if (top != NULL) { |
| 265 for (j = 0; j < size; ++j) DC += top[j]; | 286 for (j = 0; j < size; ++j) DC += top[j]; |
| 266 if (left) { // top and left present | 287 if (left != NULL) { // top and left present |
| 267 for (j = 0; j < size; ++j) DC += left[j]; | 288 for (j = 0; j < size; ++j) DC += left[j]; |
| 268 } else { // top, but no left | 289 } else { // top, but no left |
| 269 DC += DC; | 290 DC += DC; |
| 270 } | 291 } |
| 271 DC = (DC + round) >> shift; | 292 DC = (DC + round) >> shift; |
| 272 } else if (left) { // left but no top | 293 } else if (left != NULL) { // left but no top |
| 273 for (j = 0; j < size; ++j) DC += left[j]; | 294 for (j = 0; j < size; ++j) DC += left[j]; |
| 274 DC += DC; | 295 DC += DC; |
| 275 DC = (DC + round) >> shift; | 296 DC = (DC + round) >> shift; |
| 276 } else { // no top, no left, nothing. | 297 } else { // no top, no left, nothing. |
| 277 DC = 0x80; | 298 DC = 0x80; |
| 278 } | 299 } |
| 279 Fill(dst, DC, size); | 300 Fill(dst, DC, size); |
| 280 } | 301 } |
| 281 | 302 |
| 282 //------------------------------------------------------------------------------ | 303 //------------------------------------------------------------------------------ |
| 283 // Chroma 8x8 prediction (paragraph 12.2) | 304 // Chroma 8x8 prediction (paragraph 12.2) |
| 284 | 305 |
| 285 static void IntraChromaPreds(uint8_t* dst, const uint8_t* left, | 306 static void IntraChromaPreds(uint8_t* dst, const uint8_t* left, |
| 286 const uint8_t* top) { | 307 const uint8_t* top) { |
| 287 // U block | 308 // U block |
| 288 DCMode(C8DC8 + dst, left, top, 8, 8, 4); | 309 DCMode(C8DC8 + dst, left, top, 8, 8, 4); |
| 289 VerticalPred(C8VE8 + dst, top, 8); | 310 VerticalPred(C8VE8 + dst, top, 8); |
| 290 HorizontalPred(C8HE8 + dst, left, 8); | 311 HorizontalPred(C8HE8 + dst, left, 8); |
| 291 TrueMotion(C8TM8 + dst, left, top, 8); | 312 TrueMotion(C8TM8 + dst, left, top, 8); |
| 292 // V block | 313 // V block |
| 293 dst += 8; | 314 dst += 8; |
| 294 if (top) top += 8; | 315 if (top != NULL) top += 8; |
| 295 if (left) left += 16; | 316 if (left != NULL) left += 16; |
| 296 DCMode(C8DC8 + dst, left, top, 8, 8, 4); | 317 DCMode(C8DC8 + dst, left, top, 8, 8, 4); |
| 297 VerticalPred(C8VE8 + dst, top, 8); | 318 VerticalPred(C8VE8 + dst, top, 8); |
| 298 HorizontalPred(C8HE8 + dst, left, 8); | 319 HorizontalPred(C8HE8 + dst, left, 8); |
| 299 TrueMotion(C8TM8 + dst, left, top, 8); | 320 TrueMotion(C8TM8 + dst, left, top, 8); |
| 300 } | 321 } |
| 301 | 322 |
| 302 //------------------------------------------------------------------------------ | 323 //------------------------------------------------------------------------------ |
| 303 // luma 16x16 prediction (paragraph 12.3) | 324 // luma 16x16 prediction (paragraph 12.3) |
| 304 | 325 |
| 305 static void Intra16Preds(uint8_t* dst, | 326 static void Intra16Preds(uint8_t* dst, |
| 306 const uint8_t* left, const uint8_t* top) { | 327 const uint8_t* left, const uint8_t* top) { |
| 307 DCMode(I16DC16 + dst, left, top, 16, 16, 5); | 328 DCMode(I16DC16 + dst, left, top, 16, 16, 5); |
| 308 VerticalPred(I16VE16 + dst, top, 16); | 329 VerticalPred(I16VE16 + dst, top, 16); |
| 309 HorizontalPred(I16HE16 + dst, left, 16); | 330 HorizontalPred(I16HE16 + dst, left, 16); |
| 310 TrueMotion(I16TM16 + dst, left, top, 16); | 331 TrueMotion(I16TM16 + dst, left, top, 16); |
| 311 } | 332 } |
| 312 | 333 |
| 313 //------------------------------------------------------------------------------ | 334 //------------------------------------------------------------------------------ |
| 314 // luma 4x4 prediction | 335 // luma 4x4 prediction |
| 315 | 336 |
| 337 #define DST(x, y) dst[(x) + (y) * BPS] |
| 316 #define AVG3(a, b, c) (((a) + 2 * (b) + (c) + 2) >> 2) | 338 #define AVG3(a, b, c) (((a) + 2 * (b) + (c) + 2) >> 2) |
| 317 #define AVG2(a, b) (((a) + (b) + 1) >> 1) | 339 #define AVG2(a, b) (((a) + (b) + 1) >> 1) |
| 318 | 340 |
| 319 static void VE4(uint8_t* dst, const uint8_t* top) { // vertical | 341 static void VE4(uint8_t* dst, const uint8_t* top) { // vertical |
| 320 const uint8_t vals[4] = { | 342 const uint8_t vals[4] = { |
| 321 AVG3(top[-1], top[0], top[1]), | 343 AVG3(top[-1], top[0], top[1]), |
| 322 AVG3(top[ 0], top[1], top[2]), | 344 AVG3(top[ 0], top[1], top[2]), |
| 323 AVG3(top[ 1], top[2], top[3]), | 345 AVG3(top[ 1], top[2], top[3]), |
| 324 AVG3(top[ 2], top[3], top[4]) | 346 AVG3(top[ 2], top[3], top[4]) |
| 325 }; | 347 }; |
| 326 int i; | 348 int i; |
| 327 for (i = 0; i < 4; ++i) { | 349 for (i = 0; i < 4; ++i) { |
| 328 memcpy(dst + i * BPS, vals, 4); | 350 memcpy(dst + i * BPS, vals, 4); |
| 329 } | 351 } |
| 330 } | 352 } |
| 331 | 353 |
| 332 static void HE4(uint8_t* dst, const uint8_t* top) { // horizontal | 354 static void HE4(uint8_t* dst, const uint8_t* top) { // horizontal |
| 333 const int X = top[-1]; | 355 const int X = top[-1]; |
| 334 const int I = top[-2]; | 356 const int I = top[-2]; |
| 335 const int J = top[-3]; | 357 const int J = top[-3]; |
| 336 const int K = top[-4]; | 358 const int K = top[-4]; |
| 337 const int L = top[-5]; | 359 const int L = top[-5]; |
| 338 *(uint32_t*)(dst + 0 * BPS) = 0x01010101U * AVG3(X, I, J); | 360 WebPUint32ToMem(dst + 0 * BPS, 0x01010101U * AVG3(X, I, J)); |
| 339 *(uint32_t*)(dst + 1 * BPS) = 0x01010101U * AVG3(I, J, K); | 361 WebPUint32ToMem(dst + 1 * BPS, 0x01010101U * AVG3(I, J, K)); |
| 340 *(uint32_t*)(dst + 2 * BPS) = 0x01010101U * AVG3(J, K, L); | 362 WebPUint32ToMem(dst + 2 * BPS, 0x01010101U * AVG3(J, K, L)); |
| 341 *(uint32_t*)(dst + 3 * BPS) = 0x01010101U * AVG3(K, L, L); | 363 WebPUint32ToMem(dst + 3 * BPS, 0x01010101U * AVG3(K, L, L)); |
| 342 } | 364 } |
| 343 | 365 |
| 344 static void DC4(uint8_t* dst, const uint8_t* top) { | 366 static void DC4(uint8_t* dst, const uint8_t* top) { |
| 345 uint32_t dc = 4; | 367 uint32_t dc = 4; |
| 346 int i; | 368 int i; |
| 347 for (i = 0; i < 4; ++i) dc += top[i] + top[-5 + i]; | 369 for (i = 0; i < 4; ++i) dc += top[i] + top[-5 + i]; |
| 348 Fill(dst, dc >> 3, 4); | 370 Fill(dst, dc >> 3, 4); |
| 349 } | 371 } |
| 350 | 372 |
| 351 static void RD4(uint8_t* dst, const uint8_t* top) { | 373 static void RD4(uint8_t* dst, const uint8_t* top) { |
| (...skipping 266 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 618 out[n] = level; | 640 out[n] = level; |
| 619 if (level) last = n; | 641 if (level) last = n; |
| 620 } else { | 642 } else { |
| 621 out[n] = 0; | 643 out[n] = 0; |
| 622 in[j] = 0; | 644 in[j] = 0; |
| 623 } | 645 } |
| 624 } | 646 } |
| 625 return (last >= 0); | 647 return (last >= 0); |
| 626 } | 648 } |
| 627 | 649 |
| 650 static int Quantize2Blocks(int16_t in[32], int16_t out[32], |
| 651 const VP8Matrix* const mtx) { |
| 652 int nz; |
| 653 nz = VP8EncQuantizeBlock(in + 0 * 16, out + 0 * 16, mtx) << 0; |
| 654 nz |= VP8EncQuantizeBlock(in + 1 * 16, out + 1 * 16, mtx) << 1; |
| 655 return nz; |
| 656 } |
| 657 |
| 628 static int QuantizeBlockWHT(int16_t in[16], int16_t out[16], | 658 static int QuantizeBlockWHT(int16_t in[16], int16_t out[16], |
| 629 const VP8Matrix* const mtx) { | 659 const VP8Matrix* const mtx) { |
| 630 int n, last = -1; | 660 int n, last = -1; |
| 631 for (n = 0; n < 16; ++n) { | 661 for (n = 0; n < 16; ++n) { |
| 632 const int j = kZigzag[n]; | 662 const int j = kZigzag[n]; |
| 633 const int sign = (in[j] < 0); | 663 const int sign = (in[j] < 0); |
| 634 const uint32_t coeff = sign ? -in[j] : in[j]; | 664 const uint32_t coeff = sign ? -in[j] : in[j]; |
| 635 assert(mtx->sharpen_[j] == 0); | 665 assert(mtx->sharpen_[j] == 0); |
| 636 if (coeff > mtx->zthresh_[j]) { | 666 if (coeff > mtx->zthresh_[j]) { |
| 637 const uint32_t Q = mtx->q_[j]; | 667 const uint32_t Q = mtx->q_[j]; |
| 638 const uint32_t iQ = mtx->iq_[j]; | 668 const uint32_t iQ = mtx->iq_[j]; |
| 639 const uint32_t B = mtx->bias_[j]; | 669 const uint32_t B = mtx->bias_[j]; |
| 640 int level = QUANTDIV(coeff, iQ, B); | 670 int level = QUANTDIV(coeff, iQ, B); |
| 641 if (level > MAX_LEVEL) level = MAX_LEVEL; | 671 if (level > MAX_LEVEL) level = MAX_LEVEL; |
| 642 if (sign) level = -level; | 672 if (sign) level = -level; |
| 643 in[j] = level * Q; | 673 in[j] = level * Q; |
| 644 out[n] = level; | 674 out[n] = level; |
| 645 if (level) last = n; | 675 if (level) last = n; |
| 646 } else { | 676 } else { |
| 647 out[n] = 0; | 677 out[n] = 0; |
| 648 in[j] = 0; | 678 in[j] = 0; |
| 649 } | 679 } |
| 650 } | 680 } |
| 651 return (last >= 0); | 681 return (last >= 0); |
| 652 } | 682 } |
| 653 | 683 |
| 654 //------------------------------------------------------------------------------ | 684 //------------------------------------------------------------------------------ |
| 655 // Block copy | 685 // Block copy |
| 656 | 686 |
| 657 static WEBP_INLINE void Copy(const uint8_t* src, uint8_t* dst, int size) { | 687 static WEBP_INLINE void Copy(const uint8_t* src, uint8_t* dst, int w, int h) { |
| 658 int y; | 688 int y; |
| 659 for (y = 0; y < size; ++y) { | 689 for (y = 0; y < h; ++y) { |
| 660 memcpy(dst, src, size); | 690 memcpy(dst, src, w); |
| 661 src += BPS; | 691 src += BPS; |
| 662 dst += BPS; | 692 dst += BPS; |
| 663 } | 693 } |
| 664 } | 694 } |
| 665 | 695 |
| 666 static void Copy4x4(const uint8_t* src, uint8_t* dst) { Copy(src, dst, 4); } | 696 static void Copy4x4(const uint8_t* src, uint8_t* dst) { |
| 697 Copy(src, dst, 4, 4); |
| 698 } |
| 699 |
| 700 static void Copy16x8(const uint8_t* src, uint8_t* dst) { |
| 701 Copy(src, dst, 16, 8); |
| 702 } |
| 667 | 703 |
| 668 //------------------------------------------------------------------------------ | 704 //------------------------------------------------------------------------------ |
| 669 // Initialization | 705 // Initialization |
| 670 | 706 |
| 671 // Speed-critical function pointers. We have to initialize them to the default | 707 // Speed-critical function pointers. We have to initialize them to the default |
| 672 // implementations within VP8EncDspInit(). | 708 // implementations within VP8EncDspInit(). |
| 673 VP8CHisto VP8CollectHistogram; | 709 VP8CHisto VP8CollectHistogram; |
| 674 VP8Idct VP8ITransform; | 710 VP8Idct VP8ITransform; |
| 675 VP8Fdct VP8FTransform; | 711 VP8Fdct VP8FTransform; |
| 712 VP8Fdct VP8FTransform2; |
| 676 VP8WHT VP8FTransformWHT; | 713 VP8WHT VP8FTransformWHT; |
| 677 VP8Intra4Preds VP8EncPredLuma4; | 714 VP8Intra4Preds VP8EncPredLuma4; |
| 678 VP8IntraPreds VP8EncPredLuma16; | 715 VP8IntraPreds VP8EncPredLuma16; |
| 679 VP8IntraPreds VP8EncPredChroma8; | 716 VP8IntraPreds VP8EncPredChroma8; |
| 680 VP8Metric VP8SSE16x16; | 717 VP8Metric VP8SSE16x16; |
| 681 VP8Metric VP8SSE8x8; | 718 VP8Metric VP8SSE8x8; |
| 682 VP8Metric VP8SSE16x8; | 719 VP8Metric VP8SSE16x8; |
| 683 VP8Metric VP8SSE4x4; | 720 VP8Metric VP8SSE4x4; |
| 684 VP8WMetric VP8TDisto4x4; | 721 VP8WMetric VP8TDisto4x4; |
| 685 VP8WMetric VP8TDisto16x16; | 722 VP8WMetric VP8TDisto16x16; |
| 686 VP8QuantizeBlock VP8EncQuantizeBlock; | 723 VP8QuantizeBlock VP8EncQuantizeBlock; |
| 724 VP8Quantize2Blocks VP8EncQuantize2Blocks; |
| 687 VP8QuantizeBlockWHT VP8EncQuantizeBlockWHT; | 725 VP8QuantizeBlockWHT VP8EncQuantizeBlockWHT; |
| 688 VP8BlockCopy VP8Copy4x4; | 726 VP8BlockCopy VP8Copy4x4; |
| 727 VP8BlockCopy VP8Copy16x8; |
| 689 | 728 |
| 690 extern void VP8EncDspInitSSE2(void); | 729 extern void VP8EncDspInitSSE2(void); |
| 730 extern void VP8EncDspInitSSE41(void); |
| 691 extern void VP8EncDspInitAVX2(void); | 731 extern void VP8EncDspInitAVX2(void); |
| 692 extern void VP8EncDspInitNEON(void); | 732 extern void VP8EncDspInitNEON(void); |
| 693 extern void VP8EncDspInitMIPS32(void); | 733 extern void VP8EncDspInitMIPS32(void); |
| 734 extern void VP8EncDspInitMIPSdspR2(void); |
| 694 | 735 |
| 695 static volatile VP8CPUInfo enc_last_cpuinfo_used = | 736 static volatile VP8CPUInfo enc_last_cpuinfo_used = |
| 696 (VP8CPUInfo)&enc_last_cpuinfo_used; | 737 (VP8CPUInfo)&enc_last_cpuinfo_used; |
| 697 | 738 |
| 698 void VP8EncDspInit(void) { | 739 WEBP_TSAN_IGNORE_FUNCTION void VP8EncDspInit(void) { |
| 699 if (enc_last_cpuinfo_used == VP8GetCPUInfo) return; | 740 if (enc_last_cpuinfo_used == VP8GetCPUInfo) return; |
| 700 | 741 |
| 701 VP8DspInit(); // common inverse transforms | 742 VP8DspInit(); // common inverse transforms |
| 702 InitTables(); | 743 InitTables(); |
| 703 | 744 |
| 704 // default C implementations | 745 // default C implementations |
| 705 VP8CollectHistogram = CollectHistogram; | 746 VP8CollectHistogram = CollectHistogram; |
| 706 VP8ITransform = ITransform; | 747 VP8ITransform = ITransform; |
| 707 VP8FTransform = FTransform; | 748 VP8FTransform = FTransform; |
| 749 VP8FTransform2 = FTransform2; |
| 708 VP8FTransformWHT = FTransformWHT; | 750 VP8FTransformWHT = FTransformWHT; |
| 709 VP8EncPredLuma4 = Intra4Preds; | 751 VP8EncPredLuma4 = Intra4Preds; |
| 710 VP8EncPredLuma16 = Intra16Preds; | 752 VP8EncPredLuma16 = Intra16Preds; |
| 711 VP8EncPredChroma8 = IntraChromaPreds; | 753 VP8EncPredChroma8 = IntraChromaPreds; |
| 712 VP8SSE16x16 = SSE16x16; | 754 VP8SSE16x16 = SSE16x16; |
| 713 VP8SSE8x8 = SSE8x8; | 755 VP8SSE8x8 = SSE8x8; |
| 714 VP8SSE16x8 = SSE16x8; | 756 VP8SSE16x8 = SSE16x8; |
| 715 VP8SSE4x4 = SSE4x4; | 757 VP8SSE4x4 = SSE4x4; |
| 716 VP8TDisto4x4 = Disto4x4; | 758 VP8TDisto4x4 = Disto4x4; |
| 717 VP8TDisto16x16 = Disto16x16; | 759 VP8TDisto16x16 = Disto16x16; |
| 718 VP8EncQuantizeBlock = QuantizeBlock; | 760 VP8EncQuantizeBlock = QuantizeBlock; |
| 761 VP8EncQuantize2Blocks = Quantize2Blocks; |
| 719 VP8EncQuantizeBlockWHT = QuantizeBlockWHT; | 762 VP8EncQuantizeBlockWHT = QuantizeBlockWHT; |
| 720 VP8Copy4x4 = Copy4x4; | 763 VP8Copy4x4 = Copy4x4; |
| 764 VP8Copy16x8 = Copy16x8; |
| 721 | 765 |
| 722 // If defined, use CPUInfo() to overwrite some pointers with faster versions. | 766 // If defined, use CPUInfo() to overwrite some pointers with faster versions. |
| 723 if (VP8GetCPUInfo != NULL) { | 767 if (VP8GetCPUInfo != NULL) { |
| 724 #if defined(WEBP_USE_SSE2) | 768 #if defined(WEBP_USE_SSE2) |
| 725 if (VP8GetCPUInfo(kSSE2)) { | 769 if (VP8GetCPUInfo(kSSE2)) { |
| 726 VP8EncDspInitSSE2(); | 770 VP8EncDspInitSSE2(); |
| 771 #if defined(WEBP_USE_SSE41) |
| 772 if (VP8GetCPUInfo(kSSE4_1)) { |
| 773 VP8EncDspInitSSE41(); |
| 774 } |
| 775 #endif |
| 727 } | 776 } |
| 728 #endif | 777 #endif |
| 729 #if defined(WEBP_USE_AVX2) | 778 #if defined(WEBP_USE_AVX2) |
| 730 if (VP8GetCPUInfo(kAVX2)) { | 779 if (VP8GetCPUInfo(kAVX2)) { |
| 731 VP8EncDspInitAVX2(); | 780 VP8EncDspInitAVX2(); |
| 732 } | 781 } |
| 733 #endif | 782 #endif |
| 734 #if defined(WEBP_USE_NEON) | 783 #if defined(WEBP_USE_NEON) |
| 735 if (VP8GetCPUInfo(kNEON)) { | 784 if (VP8GetCPUInfo(kNEON)) { |
| 736 VP8EncDspInitNEON(); | 785 VP8EncDspInitNEON(); |
| 737 } | 786 } |
| 738 #endif | 787 #endif |
| 739 #if defined(WEBP_USE_MIPS32) | 788 #if defined(WEBP_USE_MIPS32) |
| 740 if (VP8GetCPUInfo(kMIPS32)) { | 789 if (VP8GetCPUInfo(kMIPS32)) { |
| 741 VP8EncDspInitMIPS32(); | 790 VP8EncDspInitMIPS32(); |
| 742 } | 791 } |
| 743 #endif | 792 #endif |
| 793 #if defined(WEBP_USE_MIPS_DSP_R2) |
| 794 if (VP8GetCPUInfo(kMIPSdspR2)) { |
| 795 VP8EncDspInitMIPSdspR2(); |
| 796 } |
| 797 #endif |
| 744 } | 798 } |
| 745 enc_last_cpuinfo_used = VP8GetCPUInfo; | 799 enc_last_cpuinfo_used = VP8GetCPUInfo; |
| 746 } | 800 } |
| 747 | |
| OLD | NEW |