| OLD | NEW |
| 1 // Copyright 2011 Google Inc. | 1 // Copyright 2011 Google Inc. All Rights Reserved. |
| 2 // | 2 // |
| 3 // This code is licensed under the same terms as WebM: | 3 // This code is licensed under the same terms as WebM: |
| 4 // Software License Agreement: http://www.webmproject.org/license/software/ | 4 // Software License Agreement: http://www.webmproject.org/license/software/ |
| 5 // Additional IP Rights Grant: http://www.webmproject.org/license/additional/ | 5 // Additional IP Rights Grant: http://www.webmproject.org/license/additional/ |
| 6 // ----------------------------------------------------------------------------- | 6 // ----------------------------------------------------------------------------- |
| 7 // | 7 // |
| 8 // Speed-critical encoding functions. | 8 // Speed-critical encoding functions. |
| 9 // | 9 // |
| 10 // Author: Skal (pascal.massimino@gmail.com) | 10 // Author: Skal (pascal.massimino@gmail.com) |
| 11 | 11 |
| 12 #include <stdlib.h> // for abs() |
| 13 #include "./dsp.h" |
| 12 #include "../enc/vp8enci.h" | 14 #include "../enc/vp8enci.h" |
| 13 | 15 |
| 14 #if defined(__cplusplus) || defined(c_plusplus) | 16 #if defined(__cplusplus) || defined(c_plusplus) |
| 15 extern "C" { | 17 extern "C" { |
| 16 #endif | 18 #endif |
| 17 | 19 |
| 18 //------------------------------------------------------------------------------ | 20 //------------------------------------------------------------------------------ |
| 19 // Compute susceptibility based on DCT-coeff histograms: | 21 // Compute susceptibility based on DCT-coeff histograms: |
| 20 // the higher, the "easier" the macroblock is to compress. | 22 // the higher, the "easier" the macroblock is to compress. |
| 21 | 23 |
| (...skipping 64 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 86 static void InitTables(void) { | 88 static void InitTables(void) { |
| 87 if (!tables_ok) { | 89 if (!tables_ok) { |
| 88 int i; | 90 int i; |
| 89 for (i = -255; i <= 255 + 255; ++i) { | 91 for (i = -255; i <= 255 + 255; ++i) { |
| 90 clip1[255 + i] = (i < 0) ? 0 : (i > 255) ? 255 : i; | 92 clip1[255 + i] = (i < 0) ? 0 : (i > 255) ? 255 : i; |
| 91 } | 93 } |
| 92 tables_ok = 1; | 94 tables_ok = 1; |
| 93 } | 95 } |
| 94 } | 96 } |
| 95 | 97 |
| 96 static inline uint8_t clip_8b(int v) { | 98 static WEBP_INLINE uint8_t clip_8b(int v) { |
| 97 return (!(v & ~0xff)) ? v : v < 0 ? 0 : 255; | 99 return (!(v & ~0xff)) ? v : v < 0 ? 0 : 255; |
| 98 } | 100 } |
| 99 | 101 |
| 100 //------------------------------------------------------------------------------ | 102 //------------------------------------------------------------------------------ |
| 101 // Transforms (Paragraph 14.4) | 103 // Transforms (Paragraph 14.4) |
| 102 | 104 |
| 103 #define STORE(x, y, v) \ | 105 #define STORE(x, y, v) \ |
| 104 dst[(x) + (y) * BPS] = clip_8b(ref[(x) + (y) * BPS] + ((v) >> 3)) | 106 dst[(x) + (y) * BPS] = clip_8b(ref[(x) + (y) * BPS] + ((v) >> 3)) |
| 105 | 107 |
| 106 static const int kC1 = 20091 + (1 << 16); | 108 static const int kC1 = 20091 + (1 << 16); |
| 107 static const int kC2 = 35468; | 109 static const int kC2 = 35468; |
| 108 #define MUL(a, b) (((a) * (b)) >> 16) | 110 #define MUL(a, b) (((a) * (b)) >> 16) |
| 109 | 111 |
| 110 static inline void ITransformOne(const uint8_t* ref, const int16_t* in, | 112 static WEBP_INLINE void ITransformOne(const uint8_t* ref, const int16_t* in, |
| 111 uint8_t* dst) { | 113 uint8_t* dst) { |
| 112 int C[4 * 4], *tmp; | 114 int C[4 * 4], *tmp; |
| 113 int i; | 115 int i; |
| 114 tmp = C; | 116 tmp = C; |
| 115 for (i = 0; i < 4; ++i) { // vertical pass | 117 for (i = 0; i < 4; ++i) { // vertical pass |
| 116 const int a = in[0] + in[8]; | 118 const int a = in[0] + in[8]; |
| 117 const int b = in[0] - in[8]; | 119 const int b = in[0] - in[8]; |
| 118 const int c = MUL(in[4], kC2) - MUL(in[12], kC1); | 120 const int c = MUL(in[4], kC2) - MUL(in[12], kC1); |
| 119 const int d = MUL(in[4], kC1) + MUL(in[12], kC2); | 121 const int d = MUL(in[4], kC1) + MUL(in[12], kC2); |
| 120 tmp[0] = a + d; | 122 tmp[0] = a + d; |
| 121 tmp[1] = b + c; | 123 tmp[1] = b + c; |
| (...skipping 112 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 234 } | 236 } |
| 235 | 237 |
| 236 #undef MUL | 238 #undef MUL |
| 237 #undef STORE | 239 #undef STORE |
| 238 | 240 |
| 239 //------------------------------------------------------------------------------ | 241 //------------------------------------------------------------------------------ |
| 240 // Intra predictions | 242 // Intra predictions |
| 241 | 243 |
| 242 #define DST(x, y) dst[(x) + (y) * BPS] | 244 #define DST(x, y) dst[(x) + (y) * BPS] |
| 243 | 245 |
| 244 static inline void Fill(uint8_t* dst, int value, int size) { | 246 static WEBP_INLINE void Fill(uint8_t* dst, int value, int size) { |
| 245 int j; | 247 int j; |
| 246 for (j = 0; j < size; ++j) { | 248 for (j = 0; j < size; ++j) { |
| 247 memset(dst + j * BPS, value, size); | 249 memset(dst + j * BPS, value, size); |
| 248 } | 250 } |
| 249 } | 251 } |
| 250 | 252 |
| 251 static inline void VerticalPred(uint8_t* dst, const uint8_t* top, int size) { | 253 static WEBP_INLINE void VerticalPred(uint8_t* dst, |
| 254 const uint8_t* top, int size) { |
| 252 int j; | 255 int j; |
| 253 if (top) { | 256 if (top) { |
| 254 for (j = 0; j < size; ++j) memcpy(dst + j * BPS, top, size); | 257 for (j = 0; j < size; ++j) memcpy(dst + j * BPS, top, size); |
| 255 } else { | 258 } else { |
| 256 Fill(dst, 127, size); | 259 Fill(dst, 127, size); |
| 257 } | 260 } |
| 258 } | 261 } |
| 259 | 262 |
| 260 static inline void HorizontalPred(uint8_t* dst, const uint8_t* left, int size) { | 263 static WEBP_INLINE void HorizontalPred(uint8_t* dst, |
| 264 const uint8_t* left, int size) { |
| 261 if (left) { | 265 if (left) { |
| 262 int j; | 266 int j; |
| 263 for (j = 0; j < size; ++j) { | 267 for (j = 0; j < size; ++j) { |
| 264 memset(dst + j * BPS, left[j], size); | 268 memset(dst + j * BPS, left[j], size); |
| 265 } | 269 } |
| 266 } else { | 270 } else { |
| 267 Fill(dst, 129, size); | 271 Fill(dst, 129, size); |
| 268 } | 272 } |
| 269 } | 273 } |
| 270 | 274 |
| 271 static inline void TrueMotion(uint8_t* dst, const uint8_t* left, | 275 static WEBP_INLINE void TrueMotion(uint8_t* dst, const uint8_t* left, |
| 272 const uint8_t* top, int size) { | 276 const uint8_t* top, int size) { |
| 273 int y; | 277 int y; |
| 274 if (left) { | 278 if (left) { |
| 275 if (top) { | 279 if (top) { |
| 276 const uint8_t* const clip = clip1 + 255 - left[-1]; | 280 const uint8_t* const clip = clip1 + 255 - left[-1]; |
| 277 for (y = 0; y < size; ++y) { | 281 for (y = 0; y < size; ++y) { |
| 278 const uint8_t* const clip_table = clip + left[y]; | 282 const uint8_t* const clip_table = clip + left[y]; |
| 279 int x; | 283 int x; |
| 280 for (x = 0; x < size; ++x) { | 284 for (x = 0; x < size; ++x) { |
| 281 dst[x] = clip_table[top[x]]; | 285 dst[x] = clip_table[top[x]]; |
| 282 } | 286 } |
| 283 dst += BPS; | 287 dst += BPS; |
| 284 } | 288 } |
| 285 } else { | 289 } else { |
| 286 HorizontalPred(dst, left, size); | 290 HorizontalPred(dst, left, size); |
| 287 } | 291 } |
| 288 } else { | 292 } else { |
| 289 // true motion without left samples (hence: with default 129 value) | 293 // true motion without left samples (hence: with default 129 value) |
| 290 // is equivalent to VE prediction where you just copy the top samples. | 294 // is equivalent to VE prediction where you just copy the top samples. |
| 291 // Note that if top samples are not available, the default value is | 295 // Note that if top samples are not available, the default value is |
| 292 // then 129, and not 127 as in the VerticalPred case. | 296 // then 129, and not 127 as in the VerticalPred case. |
| 293 if (top) { | 297 if (top) { |
| 294 VerticalPred(dst, top, size); | 298 VerticalPred(dst, top, size); |
| 295 } else { | 299 } else { |
| 296 Fill(dst, 129, size); | 300 Fill(dst, 129, size); |
| 297 } | 301 } |
| 298 } | 302 } |
| 299 } | 303 } |
| 300 | 304 |
| 301 static inline void DCMode(uint8_t* dst, const uint8_t* left, | 305 static WEBP_INLINE void DCMode(uint8_t* dst, const uint8_t* left, |
| 302 const uint8_t* top, | 306 const uint8_t* top, |
| 303 int size, int round, int shift) { | 307 int size, int round, int shift) { |
| 304 int DC = 0; | 308 int DC = 0; |
| 305 int j; | 309 int j; |
| 306 if (top) { | 310 if (top) { |
| 307 for (j = 0; j < size; ++j) DC += top[j]; | 311 for (j = 0; j < size; ++j) DC += top[j]; |
| 308 if (left) { // top and left present | 312 if (left) { // top and left present |
| 309 for (j = 0; j < size; ++j) DC += left[j]; | 313 for (j = 0; j < size; ++j) DC += left[j]; |
| 310 } else { // top, but no left | 314 } else { // top, but no left |
| 311 DC += DC; | 315 DC += DC; |
| 312 } | 316 } |
| 313 DC = (DC + round) >> shift; | 317 DC = (DC + round) >> shift; |
| (...skipping 222 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 536 VR4(I4VR4 + dst, top); | 540 VR4(I4VR4 + dst, top); |
| 537 LD4(I4LD4 + dst, top); | 541 LD4(I4LD4 + dst, top); |
| 538 VL4(I4VL4 + dst, top); | 542 VL4(I4VL4 + dst, top); |
| 539 HD4(I4HD4 + dst, top); | 543 HD4(I4HD4 + dst, top); |
| 540 HU4(I4HU4 + dst, top); | 544 HU4(I4HU4 + dst, top); |
| 541 } | 545 } |
| 542 | 546 |
| 543 //------------------------------------------------------------------------------ | 547 //------------------------------------------------------------------------------ |
| 544 // Metric | 548 // Metric |
| 545 | 549 |
| 546 static inline int GetSSE(const uint8_t* a, const uint8_t* b, int w, int h) { | 550 static WEBP_INLINE int GetSSE(const uint8_t* a, const uint8_t* b, |
| 551 int w, int h) { |
| 547 int count = 0; | 552 int count = 0; |
| 548 int y, x; | 553 int y, x; |
| 549 for (y = 0; y < h; ++y) { | 554 for (y = 0; y < h; ++y) { |
| 550 for (x = 0; x < w; ++x) { | 555 for (x = 0; x < w; ++x) { |
| 551 const int diff = (int)a[x] - b[x]; | 556 const int diff = (int)a[x] - b[x]; |
| 552 count += diff * diff; | 557 count += diff * diff; |
| 553 } | 558 } |
| 554 a += BPS; | 559 a += BPS; |
| 555 b += BPS; | 560 b += BPS; |
| 556 } | 561 } |
| (...skipping 103 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 660 out[n] = 0; | 665 out[n] = 0; |
| 661 in[j] = 0; | 666 in[j] = 0; |
| 662 } | 667 } |
| 663 } | 668 } |
| 664 return (last >= 0); | 669 return (last >= 0); |
| 665 } | 670 } |
| 666 | 671 |
| 667 //------------------------------------------------------------------------------ | 672 //------------------------------------------------------------------------------ |
| 668 // Block copy | 673 // Block copy |
| 669 | 674 |
| 670 static inline void Copy(const uint8_t* src, uint8_t* dst, int size) { | 675 static WEBP_INLINE void Copy(const uint8_t* src, uint8_t* dst, int size) { |
| 671 int y; | 676 int y; |
| 672 for (y = 0; y < size; ++y) { | 677 for (y = 0; y < size; ++y) { |
| 673 memcpy(dst, src, size); | 678 memcpy(dst, src, size); |
| 674 src += BPS; | 679 src += BPS; |
| 675 dst += BPS; | 680 dst += BPS; |
| 676 } | 681 } |
| 677 } | 682 } |
| 678 | 683 |
| 679 static void Copy4x4(const uint8_t* src, uint8_t* dst) { Copy(src, dst, 4); } | 684 static void Copy4x4(const uint8_t* src, uint8_t* dst) { Copy(src, dst, 4); } |
| 680 static void Copy8x8(const uint8_t* src, uint8_t* dst) { Copy(src, dst, 8); } | |
| 681 static void Copy16x16(const uint8_t* src, uint8_t* dst) { Copy(src, dst, 16); } | |
| 682 | 685 |
| 683 //------------------------------------------------------------------------------ | 686 //------------------------------------------------------------------------------ |
| 684 // Initialization | 687 // Initialization |
| 685 | 688 |
| 686 // Speed-critical function pointers. We have to initialize them to the default | 689 // Speed-critical function pointers. We have to initialize them to the default |
| 687 // implementations within VP8EncDspInit(). | 690 // implementations within VP8EncDspInit(). |
| 688 VP8CHisto VP8CollectHistogram; | 691 VP8CHisto VP8CollectHistogram; |
| 689 VP8Idct VP8ITransform; | 692 VP8Idct VP8ITransform; |
| 690 VP8Fdct VP8FTransform; | 693 VP8Fdct VP8FTransform; |
| 691 VP8WHT VP8ITransformWHT; | 694 VP8WHT VP8ITransformWHT; |
| 692 VP8WHT VP8FTransformWHT; | 695 VP8WHT VP8FTransformWHT; |
| 693 VP8Intra4Preds VP8EncPredLuma4; | 696 VP8Intra4Preds VP8EncPredLuma4; |
| 694 VP8IntraPreds VP8EncPredLuma16; | 697 VP8IntraPreds VP8EncPredLuma16; |
| 695 VP8IntraPreds VP8EncPredChroma8; | 698 VP8IntraPreds VP8EncPredChroma8; |
| 696 VP8Metric VP8SSE16x16; | 699 VP8Metric VP8SSE16x16; |
| 697 VP8Metric VP8SSE8x8; | 700 VP8Metric VP8SSE8x8; |
| 698 VP8Metric VP8SSE16x8; | 701 VP8Metric VP8SSE16x8; |
| 699 VP8Metric VP8SSE4x4; | 702 VP8Metric VP8SSE4x4; |
| 700 VP8WMetric VP8TDisto4x4; | 703 VP8WMetric VP8TDisto4x4; |
| 701 VP8WMetric VP8TDisto16x16; | 704 VP8WMetric VP8TDisto16x16; |
| 702 VP8QuantizeBlock VP8EncQuantizeBlock; | 705 VP8QuantizeBlock VP8EncQuantizeBlock; |
| 703 VP8BlockCopy VP8Copy4x4; | 706 VP8BlockCopy VP8Copy4x4; |
| 704 VP8BlockCopy VP8Copy8x8; | |
| 705 VP8BlockCopy VP8Copy16x16; | |
| 706 | 707 |
| 707 extern void VP8EncDspInitSSE2(void); | 708 extern void VP8EncDspInitSSE2(void); |
| 708 | 709 |
| 709 void VP8EncDspInit(void) { | 710 void VP8EncDspInit(void) { |
| 710 InitTables(); | 711 InitTables(); |
| 711 | 712 |
| 712 // default C implementations | 713 // default C implementations |
| 713 VP8CollectHistogram = CollectHistogram; | 714 VP8CollectHistogram = CollectHistogram; |
| 714 VP8ITransform = ITransform; | 715 VP8ITransform = ITransform; |
| 715 VP8FTransform = FTransform; | 716 VP8FTransform = FTransform; |
| 716 VP8ITransformWHT = ITransformWHT; | 717 VP8ITransformWHT = ITransformWHT; |
| 717 VP8FTransformWHT = FTransformWHT; | 718 VP8FTransformWHT = FTransformWHT; |
| 718 VP8EncPredLuma4 = Intra4Preds; | 719 VP8EncPredLuma4 = Intra4Preds; |
| 719 VP8EncPredLuma16 = Intra16Preds; | 720 VP8EncPredLuma16 = Intra16Preds; |
| 720 VP8EncPredChroma8 = IntraChromaPreds; | 721 VP8EncPredChroma8 = IntraChromaPreds; |
| 721 VP8SSE16x16 = SSE16x16; | 722 VP8SSE16x16 = SSE16x16; |
| 722 VP8SSE8x8 = SSE8x8; | 723 VP8SSE8x8 = SSE8x8; |
| 723 VP8SSE16x8 = SSE16x8; | 724 VP8SSE16x8 = SSE16x8; |
| 724 VP8SSE4x4 = SSE4x4; | 725 VP8SSE4x4 = SSE4x4; |
| 725 VP8TDisto4x4 = Disto4x4; | 726 VP8TDisto4x4 = Disto4x4; |
| 726 VP8TDisto16x16 = Disto16x16; | 727 VP8TDisto16x16 = Disto16x16; |
| 727 VP8EncQuantizeBlock = QuantizeBlock; | 728 VP8EncQuantizeBlock = QuantizeBlock; |
| 728 VP8Copy4x4 = Copy4x4; | 729 VP8Copy4x4 = Copy4x4; |
| 729 VP8Copy8x8 = Copy8x8; | |
| 730 VP8Copy16x16 = Copy16x16; | |
| 731 | 730 |
| 732 // If defined, use CPUInfo() to overwrite some pointers with faster versions. | 731 // If defined, use CPUInfo() to overwrite some pointers with faster versions. |
| 733 if (VP8GetCPUInfo) { | 732 if (VP8GetCPUInfo) { |
| 734 #if defined(__SSE2__) || defined(_MSC_VER) | 733 #if defined(WEBP_USE_SSE2) |
| 735 if (VP8GetCPUInfo(kSSE2)) { | 734 if (VP8GetCPUInfo(kSSE2)) { |
| 736 VP8EncDspInitSSE2(); | 735 VP8EncDspInitSSE2(); |
| 737 } | 736 } |
| 738 #endif | 737 #endif |
| 739 } | 738 } |
| 740 } | 739 } |
| 741 | 740 |
| 742 #if defined(__cplusplus) || defined(c_plusplus) | 741 #if defined(__cplusplus) || defined(c_plusplus) |
| 743 } // extern "C" | 742 } // extern "C" |
| 744 #endif | 743 #endif |
| OLD | NEW |