| OLD | NEW |
| 1 // Copyright 2010 Google Inc. All Rights Reserved. | 1 // Copyright 2010 Google Inc. All Rights Reserved. |
| 2 // | 2 // |
| 3 // Use of this source code is governed by a BSD-style license | 3 // Use of this source code is governed by a BSD-style license |
| 4 // that can be found in the COPYING file in the root of the source | 4 // that can be found in the COPYING file in the root of the source |
| 5 // tree. An additional intellectual property rights grant can be found | 5 // tree. An additional intellectual property rights grant can be found |
| 6 // in the file PATENTS. All contributing project authors may | 6 // in the file PATENTS. All contributing project authors may |
| 7 // be found in the AUTHORS file in the root of the source tree. | 7 // be found in the AUTHORS file in the root of the source tree. |
| 8 // ----------------------------------------------------------------------------- | 8 // ----------------------------------------------------------------------------- |
| 9 // | 9 // |
| 10 // Speed-critical decoding functions. | 10 // Speed-critical decoding functions. |
| 11 // | 11 // |
| 12 // Author: Skal (pascal.massimino@gmail.com) | 12 // Author: Skal (pascal.massimino@gmail.com) |
| 13 | 13 |
| 14 #include "./dsp.h" | 14 #include "./dsp.h" |
| 15 #include "../dec/vp8i.h" | 15 #include "../dec/vp8i.h" |
| 16 | 16 |
| 17 #if defined(__cplusplus) || defined(c_plusplus) | |
| 18 extern "C" { | |
| 19 #endif | |
| 20 | |
| 21 //------------------------------------------------------------------------------ | 17 //------------------------------------------------------------------------------ |
| 22 // run-time tables (~4k) | 18 // run-time tables (~4k) |
| 23 | 19 |
| 24 static uint8_t abs0[255 + 255 + 1]; // abs(i) | 20 static uint8_t abs0[255 + 255 + 1]; // abs(i) |
| 25 static uint8_t abs1[255 + 255 + 1]; // abs(i)>>1 | 21 static uint8_t abs1[255 + 255 + 1]; // abs(i)>>1 |
| 26 static int8_t sclip1[1020 + 1020 + 1]; // clips [-1020, 1020] to [-128, 127] | 22 static int8_t sclip1[1020 + 1020 + 1]; // clips [-1020, 1020] to [-128, 127] |
| 27 static int8_t sclip2[112 + 112 + 1]; // clips [-112, 112] to [-16, 15] | 23 static int8_t sclip2[112 + 112 + 1]; // clips [-112, 112] to [-16, 15] |
| 28 static uint8_t clip1[255 + 510 + 1]; // clips [-255,510] to [0,255] | 24 static uint8_t clip1[255 + 510 + 1]; // clips [-255,510] to [0,255] |
| 29 | 25 |
| 30 // We declare this variable 'volatile' to prevent instruction reordering | 26 // We declare this variable 'volatile' to prevent instruction reordering |
| (...skipping 23 matching lines...) Expand all Loading... |
| 54 static WEBP_INLINE uint8_t clip_8b(int v) { | 50 static WEBP_INLINE uint8_t clip_8b(int v) { |
| 55 return (!(v & ~0xff)) ? v : (v < 0) ? 0 : 255; | 51 return (!(v & ~0xff)) ? v : (v < 0) ? 0 : 255; |
| 56 } | 52 } |
| 57 | 53 |
| 58 //------------------------------------------------------------------------------ | 54 //------------------------------------------------------------------------------ |
| 59 // Transforms (Paragraph 14.4) | 55 // Transforms (Paragraph 14.4) |
| 60 | 56 |
| 61 #define STORE(x, y, v) \ | 57 #define STORE(x, y, v) \ |
| 62 dst[x + y * BPS] = clip_8b(dst[x + y * BPS] + ((v) >> 3)) | 58 dst[x + y * BPS] = clip_8b(dst[x + y * BPS] + ((v) >> 3)) |
| 63 | 59 |
| 60 #define STORE2(y, dc, d, c) do { \ |
| 61 const int DC = (dc); \ |
| 62 STORE(0, y, DC + (d)); \ |
| 63 STORE(1, y, DC + (c)); \ |
| 64 STORE(2, y, DC - (c)); \ |
| 65 STORE(3, y, DC - (d)); \ |
| 66 } while (0) |
| 67 |
| 64 static const int kC1 = 20091 + (1 << 16); | 68 static const int kC1 = 20091 + (1 << 16); |
| 65 static const int kC2 = 35468; | 69 static const int kC2 = 35468; |
| 66 #define MUL(a, b) (((a) * (b)) >> 16) | 70 #define MUL(a, b) (((a) * (b)) >> 16) |
| 67 | 71 |
| 68 static void TransformOne(const int16_t* in, uint8_t* dst) { | 72 static void TransformOne(const int16_t* in, uint8_t* dst) { |
| 69 int C[4 * 4], *tmp; | 73 int C[4 * 4], *tmp; |
| 70 int i; | 74 int i; |
| 71 tmp = C; | 75 tmp = C; |
| 72 for (i = 0; i < 4; ++i) { // vertical pass | 76 for (i = 0; i < 4; ++i) { // vertical pass |
| 73 const int a = in[0] + in[8]; // [-4096, 4094] | 77 const int a = in[0] + in[8]; // [-4096, 4094] |
| (...skipping 22 matching lines...) Expand all Loading... |
| 96 const int c = MUL(tmp[4], kC2) - MUL(tmp[12], kC1); | 100 const int c = MUL(tmp[4], kC2) - MUL(tmp[12], kC1); |
| 97 const int d = MUL(tmp[4], kC1) + MUL(tmp[12], kC2); | 101 const int d = MUL(tmp[4], kC1) + MUL(tmp[12], kC2); |
| 98 STORE(0, 0, a + d); | 102 STORE(0, 0, a + d); |
| 99 STORE(1, 0, b + c); | 103 STORE(1, 0, b + c); |
| 100 STORE(2, 0, b - c); | 104 STORE(2, 0, b - c); |
| 101 STORE(3, 0, a - d); | 105 STORE(3, 0, a - d); |
| 102 tmp++; | 106 tmp++; |
| 103 dst += BPS; | 107 dst += BPS; |
| 104 } | 108 } |
| 105 } | 109 } |
| 110 |
| 111 // Simplified transform when only in[0], in[1] and in[4] are non-zero |
| 112 static void TransformAC3(const int16_t* in, uint8_t* dst) { |
| 113 const int a = in[0] + 4; |
| 114 const int c4 = MUL(in[4], kC2); |
| 115 const int d4 = MUL(in[4], kC1); |
| 116 const int c1 = MUL(in[1], kC2); |
| 117 const int d1 = MUL(in[1], kC1); |
| 118 STORE2(0, a + d4, d1, c1); |
| 119 STORE2(1, a + c4, d1, c1); |
| 120 STORE2(2, a - c4, d1, c1); |
| 121 STORE2(3, a - d4, d1, c1); |
| 122 } |
| 106 #undef MUL | 123 #undef MUL |
| 124 #undef STORE2 |
| 107 | 125 |
| 108 static void TransformTwo(const int16_t* in, uint8_t* dst, int do_two) { | 126 static void TransformTwo(const int16_t* in, uint8_t* dst, int do_two) { |
| 109 TransformOne(in, dst); | 127 TransformOne(in, dst); |
| 110 if (do_two) { | 128 if (do_two) { |
| 111 TransformOne(in + 16, dst + 4); | 129 TransformOne(in + 16, dst + 4); |
| 112 } | 130 } |
| 113 } | 131 } |
| 114 | 132 |
| 115 static void TransformUV(const int16_t* in, uint8_t* dst) { | 133 static void TransformUV(const int16_t* in, uint8_t* dst) { |
| 116 VP8Transform(in + 0 * 16, dst, 1); | 134 VP8Transform(in + 0 * 16, dst, 1); |
| (...skipping 555 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 672 | 690 |
| 673 static void HFilter8i(uint8_t* u, uint8_t* v, int stride, | 691 static void HFilter8i(uint8_t* u, uint8_t* v, int stride, |
| 674 int thresh, int ithresh, int hev_thresh) { | 692 int thresh, int ithresh, int hev_thresh) { |
| 675 FilterLoop24(u + 4, 1, stride, 8, thresh, ithresh, hev_thresh); | 693 FilterLoop24(u + 4, 1, stride, 8, thresh, ithresh, hev_thresh); |
| 676 FilterLoop24(v + 4, 1, stride, 8, thresh, ithresh, hev_thresh); | 694 FilterLoop24(v + 4, 1, stride, 8, thresh, ithresh, hev_thresh); |
| 677 } | 695 } |
| 678 | 696 |
| 679 //------------------------------------------------------------------------------ | 697 //------------------------------------------------------------------------------ |
| 680 | 698 |
| 681 VP8DecIdct2 VP8Transform; | 699 VP8DecIdct2 VP8Transform; |
| 700 VP8DecIdct VP8TransformAC3; |
| 682 VP8DecIdct VP8TransformUV; | 701 VP8DecIdct VP8TransformUV; |
| 683 VP8DecIdct VP8TransformDC; | 702 VP8DecIdct VP8TransformDC; |
| 684 VP8DecIdct VP8TransformDCUV; | 703 VP8DecIdct VP8TransformDCUV; |
| 685 | 704 |
| 686 VP8LumaFilterFunc VP8VFilter16; | 705 VP8LumaFilterFunc VP8VFilter16; |
| 687 VP8LumaFilterFunc VP8HFilter16; | 706 VP8LumaFilterFunc VP8HFilter16; |
| 688 VP8ChromaFilterFunc VP8VFilter8; | 707 VP8ChromaFilterFunc VP8VFilter8; |
| 689 VP8ChromaFilterFunc VP8HFilter8; | 708 VP8ChromaFilterFunc VP8HFilter8; |
| 690 VP8LumaFilterFunc VP8VFilter16i; | 709 VP8LumaFilterFunc VP8VFilter16i; |
| 691 VP8LumaFilterFunc VP8HFilter16i; | 710 VP8LumaFilterFunc VP8HFilter16i; |
| 692 VP8ChromaFilterFunc VP8VFilter8i; | 711 VP8ChromaFilterFunc VP8VFilter8i; |
| 693 VP8ChromaFilterFunc VP8HFilter8i; | 712 VP8ChromaFilterFunc VP8HFilter8i; |
| 694 VP8SimpleFilterFunc VP8SimpleVFilter16; | 713 VP8SimpleFilterFunc VP8SimpleVFilter16; |
| 695 VP8SimpleFilterFunc VP8SimpleHFilter16; | 714 VP8SimpleFilterFunc VP8SimpleHFilter16; |
| 696 VP8SimpleFilterFunc VP8SimpleVFilter16i; | 715 VP8SimpleFilterFunc VP8SimpleVFilter16i; |
| 697 VP8SimpleFilterFunc VP8SimpleHFilter16i; | 716 VP8SimpleFilterFunc VP8SimpleHFilter16i; |
| 698 | 717 |
| 699 extern void VP8DspInitSSE2(void); | 718 extern void VP8DspInitSSE2(void); |
| 700 #if defined(WEBP_USE_NEON) | |
| 701 extern void VP8DspInitNEON(void); | 719 extern void VP8DspInitNEON(void); |
| 702 #endif | |
| 703 | 720 |
| 704 void VP8DspInit(void) { | 721 void VP8DspInit(void) { |
| 705 DspInitTables(); | 722 DspInitTables(); |
| 706 | 723 |
| 707 VP8Transform = TransformTwo; | 724 VP8Transform = TransformTwo; |
| 708 VP8TransformUV = TransformUV; | 725 VP8TransformUV = TransformUV; |
| 709 VP8TransformDC = TransformDC; | 726 VP8TransformDC = TransformDC; |
| 710 VP8TransformDCUV = TransformDCUV; | 727 VP8TransformDCUV = TransformDCUV; |
| 728 VP8TransformAC3 = TransformAC3; |
| 711 | 729 |
| 712 VP8VFilter16 = VFilter16; | 730 VP8VFilter16 = VFilter16; |
| 713 VP8HFilter16 = HFilter16; | 731 VP8HFilter16 = HFilter16; |
| 714 VP8VFilter8 = VFilter8; | 732 VP8VFilter8 = VFilter8; |
| 715 VP8HFilter8 = HFilter8; | 733 VP8HFilter8 = HFilter8; |
| 716 VP8VFilter16i = VFilter16i; | 734 VP8VFilter16i = VFilter16i; |
| 717 VP8HFilter16i = HFilter16i; | 735 VP8HFilter16i = HFilter16i; |
| 718 VP8VFilter8i = VFilter8i; | 736 VP8VFilter8i = VFilter8i; |
| 719 VP8HFilter8i = HFilter8i; | 737 VP8HFilter8i = HFilter8i; |
| 720 VP8SimpleVFilter16 = SimpleVFilter16; | 738 VP8SimpleVFilter16 = SimpleVFilter16; |
| 721 VP8SimpleHFilter16 = SimpleHFilter16; | 739 VP8SimpleHFilter16 = SimpleHFilter16; |
| 722 VP8SimpleVFilter16i = SimpleVFilter16i; | 740 VP8SimpleVFilter16i = SimpleVFilter16i; |
| 723 VP8SimpleHFilter16i = SimpleHFilter16i; | 741 VP8SimpleHFilter16i = SimpleHFilter16i; |
| 724 | 742 |
| 725 // If defined, use CPUInfo() to overwrite some pointers with faster versions. | 743 // If defined, use CPUInfo() to overwrite some pointers with faster versions. |
| 726 if (VP8GetCPUInfo) { | 744 if (VP8GetCPUInfo) { |
| 727 #if defined(WEBP_USE_SSE2) | 745 #if defined(WEBP_USE_SSE2) |
| 728 if (VP8GetCPUInfo(kSSE2)) { | 746 if (VP8GetCPUInfo(kSSE2)) { |
| 729 VP8DspInitSSE2(); | 747 VP8DspInitSSE2(); |
| 730 } | 748 } |
| 731 #elif defined(WEBP_USE_NEON) | 749 #elif defined(WEBP_USE_NEON) |
| 732 if (VP8GetCPUInfo(kNEON)) { | 750 if (VP8GetCPUInfo(kNEON)) { |
| 733 VP8DspInitNEON(); | 751 VP8DspInitNEON(); |
| 734 } | 752 } |
| 735 #endif | 753 #endif |
| 736 } | 754 } |
| 737 } | 755 } |
| 738 | 756 |
| 739 #if defined(__cplusplus) || defined(c_plusplus) | |
| 740 } // extern "C" | |
| 741 #endif | |
| OLD | NEW |