OLD | NEW |
1 // Copyright 2011 Google Inc. | 1 // Copyright 2011 Google Inc. All Rights Reserved. |
2 // | 2 // |
3 // This code is licensed under the same terms as WebM: | 3 // This code is licensed under the same terms as WebM: |
4 // Software License Agreement: http://www.webmproject.org/license/software/ | 4 // Software License Agreement: http://www.webmproject.org/license/software/ |
5 // Additional IP Rights Grant: http://www.webmproject.org/license/additional/ | 5 // Additional IP Rights Grant: http://www.webmproject.org/license/additional/ |
6 // ----------------------------------------------------------------------------- | 6 // ----------------------------------------------------------------------------- |
7 // | 7 // |
8 // Speed-critical encoding functions. | 8 // Speed-critical encoding functions. |
9 // | 9 // |
10 // Author: Skal (pascal.massimino@gmail.com) | 10 // Author: Skal (pascal.massimino@gmail.com) |
11 | 11 |
| 12 #include <stdlib.h> // for abs() |
| 13 #include "./dsp.h" |
12 #include "../enc/vp8enci.h" | 14 #include "../enc/vp8enci.h" |
13 | 15 |
14 #if defined(__cplusplus) || defined(c_plusplus) | 16 #if defined(__cplusplus) || defined(c_plusplus) |
15 extern "C" { | 17 extern "C" { |
16 #endif | 18 #endif |
17 | 19 |
18 //------------------------------------------------------------------------------ | 20 //------------------------------------------------------------------------------ |
19 // Compute susceptibility based on DCT-coeff histograms: | 21 // Compute susceptibility based on DCT-coeff histograms: |
20 // the higher, the "easier" the macroblock is to compress. | 22 // the higher, the "easier" the macroblock is to compress. |
21 | 23 |
(...skipping 64 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
86 static void InitTables(void) { | 88 static void InitTables(void) { |
87 if (!tables_ok) { | 89 if (!tables_ok) { |
88 int i; | 90 int i; |
89 for (i = -255; i <= 255 + 255; ++i) { | 91 for (i = -255; i <= 255 + 255; ++i) { |
90 clip1[255 + i] = (i < 0) ? 0 : (i > 255) ? 255 : i; | 92 clip1[255 + i] = (i < 0) ? 0 : (i > 255) ? 255 : i; |
91 } | 93 } |
92 tables_ok = 1; | 94 tables_ok = 1; |
93 } | 95 } |
94 } | 96 } |
95 | 97 |
96 static inline uint8_t clip_8b(int v) { | 98 static WEBP_INLINE uint8_t clip_8b(int v) { |
97 return (!(v & ~0xff)) ? v : v < 0 ? 0 : 255; | 99 return (!(v & ~0xff)) ? v : v < 0 ? 0 : 255; |
98 } | 100 } |
99 | 101 |
100 //------------------------------------------------------------------------------ | 102 //------------------------------------------------------------------------------ |
101 // Transforms (Paragraph 14.4) | 103 // Transforms (Paragraph 14.4) |
102 | 104 |
103 #define STORE(x, y, v) \ | 105 #define STORE(x, y, v) \ |
104 dst[(x) + (y) * BPS] = clip_8b(ref[(x) + (y) * BPS] + ((v) >> 3)) | 106 dst[(x) + (y) * BPS] = clip_8b(ref[(x) + (y) * BPS] + ((v) >> 3)) |
105 | 107 |
106 static const int kC1 = 20091 + (1 << 16); | 108 static const int kC1 = 20091 + (1 << 16); |
107 static const int kC2 = 35468; | 109 static const int kC2 = 35468; |
108 #define MUL(a, b) (((a) * (b)) >> 16) | 110 #define MUL(a, b) (((a) * (b)) >> 16) |
109 | 111 |
110 static inline void ITransformOne(const uint8_t* ref, const int16_t* in, | 112 static WEBP_INLINE void ITransformOne(const uint8_t* ref, const int16_t* in, |
111 uint8_t* dst) { | 113 uint8_t* dst) { |
112 int C[4 * 4], *tmp; | 114 int C[4 * 4], *tmp; |
113 int i; | 115 int i; |
114 tmp = C; | 116 tmp = C; |
115 for (i = 0; i < 4; ++i) { // vertical pass | 117 for (i = 0; i < 4; ++i) { // vertical pass |
116 const int a = in[0] + in[8]; | 118 const int a = in[0] + in[8]; |
117 const int b = in[0] - in[8]; | 119 const int b = in[0] - in[8]; |
118 const int c = MUL(in[4], kC2) - MUL(in[12], kC1); | 120 const int c = MUL(in[4], kC2) - MUL(in[12], kC1); |
119 const int d = MUL(in[4], kC1) + MUL(in[12], kC2); | 121 const int d = MUL(in[4], kC1) + MUL(in[12], kC2); |
120 tmp[0] = a + d; | 122 tmp[0] = a + d; |
121 tmp[1] = b + c; | 123 tmp[1] = b + c; |
(...skipping 112 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
234 } | 236 } |
235 | 237 |
236 #undef MUL | 238 #undef MUL |
237 #undef STORE | 239 #undef STORE |
238 | 240 |
239 //------------------------------------------------------------------------------ | 241 //------------------------------------------------------------------------------ |
240 // Intra predictions | 242 // Intra predictions |
241 | 243 |
242 #define DST(x, y) dst[(x) + (y) * BPS] | 244 #define DST(x, y) dst[(x) + (y) * BPS] |
243 | 245 |
244 static inline void Fill(uint8_t* dst, int value, int size) { | 246 static WEBP_INLINE void Fill(uint8_t* dst, int value, int size) { |
245 int j; | 247 int j; |
246 for (j = 0; j < size; ++j) { | 248 for (j = 0; j < size; ++j) { |
247 memset(dst + j * BPS, value, size); | 249 memset(dst + j * BPS, value, size); |
248 } | 250 } |
249 } | 251 } |
250 | 252 |
251 static inline void VerticalPred(uint8_t* dst, const uint8_t* top, int size) { | 253 static WEBP_INLINE void VerticalPred(uint8_t* dst, |
| 254 const uint8_t* top, int size) { |
252 int j; | 255 int j; |
253 if (top) { | 256 if (top) { |
254 for (j = 0; j < size; ++j) memcpy(dst + j * BPS, top, size); | 257 for (j = 0; j < size; ++j) memcpy(dst + j * BPS, top, size); |
255 } else { | 258 } else { |
256 Fill(dst, 127, size); | 259 Fill(dst, 127, size); |
257 } | 260 } |
258 } | 261 } |
259 | 262 |
260 static inline void HorizontalPred(uint8_t* dst, const uint8_t* left, int size) { | 263 static WEBP_INLINE void HorizontalPred(uint8_t* dst, |
| 264 const uint8_t* left, int size) { |
261 if (left) { | 265 if (left) { |
262 int j; | 266 int j; |
263 for (j = 0; j < size; ++j) { | 267 for (j = 0; j < size; ++j) { |
264 memset(dst + j * BPS, left[j], size); | 268 memset(dst + j * BPS, left[j], size); |
265 } | 269 } |
266 } else { | 270 } else { |
267 Fill(dst, 129, size); | 271 Fill(dst, 129, size); |
268 } | 272 } |
269 } | 273 } |
270 | 274 |
271 static inline void TrueMotion(uint8_t* dst, const uint8_t* left, | 275 static WEBP_INLINE void TrueMotion(uint8_t* dst, const uint8_t* left, |
272 const uint8_t* top, int size) { | 276 const uint8_t* top, int size) { |
273 int y; | 277 int y; |
274 if (left) { | 278 if (left) { |
275 if (top) { | 279 if (top) { |
276 const uint8_t* const clip = clip1 + 255 - left[-1]; | 280 const uint8_t* const clip = clip1 + 255 - left[-1]; |
277 for (y = 0; y < size; ++y) { | 281 for (y = 0; y < size; ++y) { |
278 const uint8_t* const clip_table = clip + left[y]; | 282 const uint8_t* const clip_table = clip + left[y]; |
279 int x; | 283 int x; |
280 for (x = 0; x < size; ++x) { | 284 for (x = 0; x < size; ++x) { |
281 dst[x] = clip_table[top[x]]; | 285 dst[x] = clip_table[top[x]]; |
282 } | 286 } |
283 dst += BPS; | 287 dst += BPS; |
284 } | 288 } |
285 } else { | 289 } else { |
286 HorizontalPred(dst, left, size); | 290 HorizontalPred(dst, left, size); |
287 } | 291 } |
288 } else { | 292 } else { |
289 // true motion without left samples (hence: with default 129 value) | 293 // true motion without left samples (hence: with default 129 value) |
290 // is equivalent to VE prediction where you just copy the top samples. | 294 // is equivalent to VE prediction where you just copy the top samples. |
291 // Note that if top samples are not available, the default value is | 295 // Note that if top samples are not available, the default value is |
292 // then 129, and not 127 as in the VerticalPred case. | 296 // then 129, and not 127 as in the VerticalPred case. |
293 if (top) { | 297 if (top) { |
294 VerticalPred(dst, top, size); | 298 VerticalPred(dst, top, size); |
295 } else { | 299 } else { |
296 Fill(dst, 129, size); | 300 Fill(dst, 129, size); |
297 } | 301 } |
298 } | 302 } |
299 } | 303 } |
300 | 304 |
301 static inline void DCMode(uint8_t* dst, const uint8_t* left, | 305 static WEBP_INLINE void DCMode(uint8_t* dst, const uint8_t* left, |
302 const uint8_t* top, | 306 const uint8_t* top, |
303 int size, int round, int shift) { | 307 int size, int round, int shift) { |
304 int DC = 0; | 308 int DC = 0; |
305 int j; | 309 int j; |
306 if (top) { | 310 if (top) { |
307 for (j = 0; j < size; ++j) DC += top[j]; | 311 for (j = 0; j < size; ++j) DC += top[j]; |
308 if (left) { // top and left present | 312 if (left) { // top and left present |
309 for (j = 0; j < size; ++j) DC += left[j]; | 313 for (j = 0; j < size; ++j) DC += left[j]; |
310 } else { // top, but no left | 314 } else { // top, but no left |
311 DC += DC; | 315 DC += DC; |
312 } | 316 } |
313 DC = (DC + round) >> shift; | 317 DC = (DC + round) >> shift; |
(...skipping 222 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
536 VR4(I4VR4 + dst, top); | 540 VR4(I4VR4 + dst, top); |
537 LD4(I4LD4 + dst, top); | 541 LD4(I4LD4 + dst, top); |
538 VL4(I4VL4 + dst, top); | 542 VL4(I4VL4 + dst, top); |
539 HD4(I4HD4 + dst, top); | 543 HD4(I4HD4 + dst, top); |
540 HU4(I4HU4 + dst, top); | 544 HU4(I4HU4 + dst, top); |
541 } | 545 } |
542 | 546 |
543 //------------------------------------------------------------------------------ | 547 //------------------------------------------------------------------------------ |
544 // Metric | 548 // Metric |
545 | 549 |
546 static inline int GetSSE(const uint8_t* a, const uint8_t* b, int w, int h) { | 550 static WEBP_INLINE int GetSSE(const uint8_t* a, const uint8_t* b, |
| 551 int w, int h) { |
547 int count = 0; | 552 int count = 0; |
548 int y, x; | 553 int y, x; |
549 for (y = 0; y < h; ++y) { | 554 for (y = 0; y < h; ++y) { |
550 for (x = 0; x < w; ++x) { | 555 for (x = 0; x < w; ++x) { |
551 const int diff = (int)a[x] - b[x]; | 556 const int diff = (int)a[x] - b[x]; |
552 count += diff * diff; | 557 count += diff * diff; |
553 } | 558 } |
554 a += BPS; | 559 a += BPS; |
555 b += BPS; | 560 b += BPS; |
556 } | 561 } |
(...skipping 103 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
660 out[n] = 0; | 665 out[n] = 0; |
661 in[j] = 0; | 666 in[j] = 0; |
662 } | 667 } |
663 } | 668 } |
664 return (last >= 0); | 669 return (last >= 0); |
665 } | 670 } |
666 | 671 |
667 //------------------------------------------------------------------------------ | 672 //------------------------------------------------------------------------------ |
668 // Block copy | 673 // Block copy |
669 | 674 |
670 static inline void Copy(const uint8_t* src, uint8_t* dst, int size) { | 675 static WEBP_INLINE void Copy(const uint8_t* src, uint8_t* dst, int size) { |
671 int y; | 676 int y; |
672 for (y = 0; y < size; ++y) { | 677 for (y = 0; y < size; ++y) { |
673 memcpy(dst, src, size); | 678 memcpy(dst, src, size); |
674 src += BPS; | 679 src += BPS; |
675 dst += BPS; | 680 dst += BPS; |
676 } | 681 } |
677 } | 682 } |
678 | 683 |
679 static void Copy4x4(const uint8_t* src, uint8_t* dst) { Copy(src, dst, 4); } | 684 static void Copy4x4(const uint8_t* src, uint8_t* dst) { Copy(src, dst, 4); } |
680 static void Copy8x8(const uint8_t* src, uint8_t* dst) { Copy(src, dst, 8); } | |
681 static void Copy16x16(const uint8_t* src, uint8_t* dst) { Copy(src, dst, 16); } | |
682 | 685 |
683 //------------------------------------------------------------------------------ | 686 //------------------------------------------------------------------------------ |
684 // Initialization | 687 // Initialization |
685 | 688 |
686 // Speed-critical function pointers. We have to initialize them to the default | 689 // Speed-critical function pointers. We have to initialize them to the default |
687 // implementations within VP8EncDspInit(). | 690 // implementations within VP8EncDspInit(). |
688 VP8CHisto VP8CollectHistogram; | 691 VP8CHisto VP8CollectHistogram; |
689 VP8Idct VP8ITransform; | 692 VP8Idct VP8ITransform; |
690 VP8Fdct VP8FTransform; | 693 VP8Fdct VP8FTransform; |
691 VP8WHT VP8ITransformWHT; | 694 VP8WHT VP8ITransformWHT; |
692 VP8WHT VP8FTransformWHT; | 695 VP8WHT VP8FTransformWHT; |
693 VP8Intra4Preds VP8EncPredLuma4; | 696 VP8Intra4Preds VP8EncPredLuma4; |
694 VP8IntraPreds VP8EncPredLuma16; | 697 VP8IntraPreds VP8EncPredLuma16; |
695 VP8IntraPreds VP8EncPredChroma8; | 698 VP8IntraPreds VP8EncPredChroma8; |
696 VP8Metric VP8SSE16x16; | 699 VP8Metric VP8SSE16x16; |
697 VP8Metric VP8SSE8x8; | 700 VP8Metric VP8SSE8x8; |
698 VP8Metric VP8SSE16x8; | 701 VP8Metric VP8SSE16x8; |
699 VP8Metric VP8SSE4x4; | 702 VP8Metric VP8SSE4x4; |
700 VP8WMetric VP8TDisto4x4; | 703 VP8WMetric VP8TDisto4x4; |
701 VP8WMetric VP8TDisto16x16; | 704 VP8WMetric VP8TDisto16x16; |
702 VP8QuantizeBlock VP8EncQuantizeBlock; | 705 VP8QuantizeBlock VP8EncQuantizeBlock; |
703 VP8BlockCopy VP8Copy4x4; | 706 VP8BlockCopy VP8Copy4x4; |
704 VP8BlockCopy VP8Copy8x8; | |
705 VP8BlockCopy VP8Copy16x16; | |
706 | 707 |
707 extern void VP8EncDspInitSSE2(void); | 708 extern void VP8EncDspInitSSE2(void); |
708 | 709 |
709 void VP8EncDspInit(void) { | 710 void VP8EncDspInit(void) { |
710 InitTables(); | 711 InitTables(); |
711 | 712 |
712 // default C implementations | 713 // default C implementations |
713 VP8CollectHistogram = CollectHistogram; | 714 VP8CollectHistogram = CollectHistogram; |
714 VP8ITransform = ITransform; | 715 VP8ITransform = ITransform; |
715 VP8FTransform = FTransform; | 716 VP8FTransform = FTransform; |
716 VP8ITransformWHT = ITransformWHT; | 717 VP8ITransformWHT = ITransformWHT; |
717 VP8FTransformWHT = FTransformWHT; | 718 VP8FTransformWHT = FTransformWHT; |
718 VP8EncPredLuma4 = Intra4Preds; | 719 VP8EncPredLuma4 = Intra4Preds; |
719 VP8EncPredLuma16 = Intra16Preds; | 720 VP8EncPredLuma16 = Intra16Preds; |
720 VP8EncPredChroma8 = IntraChromaPreds; | 721 VP8EncPredChroma8 = IntraChromaPreds; |
721 VP8SSE16x16 = SSE16x16; | 722 VP8SSE16x16 = SSE16x16; |
722 VP8SSE8x8 = SSE8x8; | 723 VP8SSE8x8 = SSE8x8; |
723 VP8SSE16x8 = SSE16x8; | 724 VP8SSE16x8 = SSE16x8; |
724 VP8SSE4x4 = SSE4x4; | 725 VP8SSE4x4 = SSE4x4; |
725 VP8TDisto4x4 = Disto4x4; | 726 VP8TDisto4x4 = Disto4x4; |
726 VP8TDisto16x16 = Disto16x16; | 727 VP8TDisto16x16 = Disto16x16; |
727 VP8EncQuantizeBlock = QuantizeBlock; | 728 VP8EncQuantizeBlock = QuantizeBlock; |
728 VP8Copy4x4 = Copy4x4; | 729 VP8Copy4x4 = Copy4x4; |
729 VP8Copy8x8 = Copy8x8; | |
730 VP8Copy16x16 = Copy16x16; | |
731 | 730 |
732 // If defined, use CPUInfo() to overwrite some pointers with faster versions. | 731 // If defined, use CPUInfo() to overwrite some pointers with faster versions. |
733 if (VP8GetCPUInfo) { | 732 if (VP8GetCPUInfo) { |
734 #if defined(__SSE2__) || defined(_MSC_VER) | 733 #if defined(WEBP_USE_SSE2) |
735 if (VP8GetCPUInfo(kSSE2)) { | 734 if (VP8GetCPUInfo(kSSE2)) { |
736 VP8EncDspInitSSE2(); | 735 VP8EncDspInitSSE2(); |
737 } | 736 } |
738 #endif | 737 #endif |
739 } | 738 } |
740 } | 739 } |
741 | 740 |
742 #if defined(__cplusplus) || defined(c_plusplus) | 741 #if defined(__cplusplus) || defined(c_plusplus) |
743 } // extern "C" | 742 } // extern "C" |
744 #endif | 743 #endif |
OLD | NEW |