OLD | NEW |
1 // Copyright 2010 Google Inc. All Rights Reserved. | 1 // Copyright 2010 Google Inc. All Rights Reserved. |
2 // | 2 // |
3 // Use of this source code is governed by a BSD-style license | 3 // Use of this source code is governed by a BSD-style license |
4 // that can be found in the COPYING file in the root of the source | 4 // that can be found in the COPYING file in the root of the source |
5 // tree. An additional intellectual property rights grant can be found | 5 // tree. An additional intellectual property rights grant can be found |
6 // in the file PATENTS. All contributing project authors may | 6 // in the file PATENTS. All contributing project authors may |
7 // be found in the AUTHORS file in the root of the source tree. | 7 // be found in the AUTHORS file in the root of the source tree. |
8 // ----------------------------------------------------------------------------- | 8 // ----------------------------------------------------------------------------- |
9 // | 9 // |
10 // Speed-critical decoding functions. | 10 // Speed-critical decoding functions. |
11 // | 11 // |
12 // Author: Skal (pascal.massimino@gmail.com) | 12 // Author: Skal (pascal.massimino@gmail.com) |
13 | 13 |
14 #include "./dsp.h" | 14 #include "./dsp.h" |
15 #include "../dec/vp8i.h" | 15 #include "../dec/vp8i.h" |
16 | 16 |
17 #if defined(__cplusplus) || defined(c_plusplus) | |
18 extern "C" { | |
19 #endif | |
20 | |
21 //------------------------------------------------------------------------------ | 17 //------------------------------------------------------------------------------ |
22 // run-time tables (~4k) | 18 // run-time tables (~4k) |
23 | 19 |
24 static uint8_t abs0[255 + 255 + 1]; // abs(i) | 20 static uint8_t abs0[255 + 255 + 1]; // abs(i) |
25 static uint8_t abs1[255 + 255 + 1]; // abs(i)>>1 | 21 static uint8_t abs1[255 + 255 + 1]; // abs(i)>>1 |
26 static int8_t sclip1[1020 + 1020 + 1]; // clips [-1020, 1020] to [-128, 127] | 22 static int8_t sclip1[1020 + 1020 + 1]; // clips [-1020, 1020] to [-128, 127] |
27 static int8_t sclip2[112 + 112 + 1]; // clips [-112, 112] to [-16, 15] | 23 static int8_t sclip2[112 + 112 + 1]; // clips [-112, 112] to [-16, 15] |
28 static uint8_t clip1[255 + 510 + 1]; // clips [-255,510] to [0,255] | 24 static uint8_t clip1[255 + 510 + 1]; // clips [-255,510] to [0,255] |
29 | 25 |
30 // We declare this variable 'volatile' to prevent instruction reordering | 26 // We declare this variable 'volatile' to prevent instruction reordering |
(...skipping 23 matching lines...) Expand all Loading... |
54 static WEBP_INLINE uint8_t clip_8b(int v) { | 50 static WEBP_INLINE uint8_t clip_8b(int v) { |
55 return (!(v & ~0xff)) ? v : (v < 0) ? 0 : 255; | 51 return (!(v & ~0xff)) ? v : (v < 0) ? 0 : 255; |
56 } | 52 } |
57 | 53 |
58 //------------------------------------------------------------------------------ | 54 //------------------------------------------------------------------------------ |
59 // Transforms (Paragraph 14.4) | 55 // Transforms (Paragraph 14.4) |
60 | 56 |
61 #define STORE(x, y, v) \ | 57 #define STORE(x, y, v) \ |
62 dst[x + y * BPS] = clip_8b(dst[x + y * BPS] + ((v) >> 3)) | 58 dst[x + y * BPS] = clip_8b(dst[x + y * BPS] + ((v) >> 3)) |
63 | 59 |
| 60 #define STORE2(y, dc, d, c) do { \ |
| 61 const int DC = (dc); \ |
| 62 STORE(0, y, DC + (d)); \ |
| 63 STORE(1, y, DC + (c)); \ |
| 64 STORE(2, y, DC - (c)); \ |
| 65 STORE(3, y, DC - (d)); \ |
| 66 } while (0) |
| 67 |
64 static const int kC1 = 20091 + (1 << 16); | 68 static const int kC1 = 20091 + (1 << 16); |
65 static const int kC2 = 35468; | 69 static const int kC2 = 35468; |
66 #define MUL(a, b) (((a) * (b)) >> 16) | 70 #define MUL(a, b) (((a) * (b)) >> 16) |
67 | 71 |
68 static void TransformOne(const int16_t* in, uint8_t* dst) { | 72 static void TransformOne(const int16_t* in, uint8_t* dst) { |
69 int C[4 * 4], *tmp; | 73 int C[4 * 4], *tmp; |
70 int i; | 74 int i; |
71 tmp = C; | 75 tmp = C; |
72 for (i = 0; i < 4; ++i) { // vertical pass | 76 for (i = 0; i < 4; ++i) { // vertical pass |
73 const int a = in[0] + in[8]; // [-4096, 4094] | 77 const int a = in[0] + in[8]; // [-4096, 4094] |
(...skipping 22 matching lines...) Expand all Loading... |
96 const int c = MUL(tmp[4], kC2) - MUL(tmp[12], kC1); | 100 const int c = MUL(tmp[4], kC2) - MUL(tmp[12], kC1); |
97 const int d = MUL(tmp[4], kC1) + MUL(tmp[12], kC2); | 101 const int d = MUL(tmp[4], kC1) + MUL(tmp[12], kC2); |
98 STORE(0, 0, a + d); | 102 STORE(0, 0, a + d); |
99 STORE(1, 0, b + c); | 103 STORE(1, 0, b + c); |
100 STORE(2, 0, b - c); | 104 STORE(2, 0, b - c); |
101 STORE(3, 0, a - d); | 105 STORE(3, 0, a - d); |
102 tmp++; | 106 tmp++; |
103 dst += BPS; | 107 dst += BPS; |
104 } | 108 } |
105 } | 109 } |
| 110 |
| 111 // Simplified transform when only in[0], in[1] and in[4] are non-zero |
| 112 static void TransformAC3(const int16_t* in, uint8_t* dst) { |
| 113 const int a = in[0] + 4; |
| 114 const int c4 = MUL(in[4], kC2); |
| 115 const int d4 = MUL(in[4], kC1); |
| 116 const int c1 = MUL(in[1], kC2); |
| 117 const int d1 = MUL(in[1], kC1); |
| 118 STORE2(0, a + d4, d1, c1); |
| 119 STORE2(1, a + c4, d1, c1); |
| 120 STORE2(2, a - c4, d1, c1); |
| 121 STORE2(3, a - d4, d1, c1); |
| 122 } |
106 #undef MUL | 123 #undef MUL |
| 124 #undef STORE2 |
107 | 125 |
108 static void TransformTwo(const int16_t* in, uint8_t* dst, int do_two) { | 126 static void TransformTwo(const int16_t* in, uint8_t* dst, int do_two) { |
109 TransformOne(in, dst); | 127 TransformOne(in, dst); |
110 if (do_two) { | 128 if (do_two) { |
111 TransformOne(in + 16, dst + 4); | 129 TransformOne(in + 16, dst + 4); |
112 } | 130 } |
113 } | 131 } |
114 | 132 |
115 static void TransformUV(const int16_t* in, uint8_t* dst) { | 133 static void TransformUV(const int16_t* in, uint8_t* dst) { |
116 VP8Transform(in + 0 * 16, dst, 1); | 134 VP8Transform(in + 0 * 16, dst, 1); |
(...skipping 555 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
672 | 690 |
673 static void HFilter8i(uint8_t* u, uint8_t* v, int stride, | 691 static void HFilter8i(uint8_t* u, uint8_t* v, int stride, |
674 int thresh, int ithresh, int hev_thresh) { | 692 int thresh, int ithresh, int hev_thresh) { |
675 FilterLoop24(u + 4, 1, stride, 8, thresh, ithresh, hev_thresh); | 693 FilterLoop24(u + 4, 1, stride, 8, thresh, ithresh, hev_thresh); |
676 FilterLoop24(v + 4, 1, stride, 8, thresh, ithresh, hev_thresh); | 694 FilterLoop24(v + 4, 1, stride, 8, thresh, ithresh, hev_thresh); |
677 } | 695 } |
678 | 696 |
679 //------------------------------------------------------------------------------ | 697 //------------------------------------------------------------------------------ |
680 | 698 |
681 VP8DecIdct2 VP8Transform; | 699 VP8DecIdct2 VP8Transform; |
| 700 VP8DecIdct VP8TransformAC3; |
682 VP8DecIdct VP8TransformUV; | 701 VP8DecIdct VP8TransformUV; |
683 VP8DecIdct VP8TransformDC; | 702 VP8DecIdct VP8TransformDC; |
684 VP8DecIdct VP8TransformDCUV; | 703 VP8DecIdct VP8TransformDCUV; |
685 | 704 |
686 VP8LumaFilterFunc VP8VFilter16; | 705 VP8LumaFilterFunc VP8VFilter16; |
687 VP8LumaFilterFunc VP8HFilter16; | 706 VP8LumaFilterFunc VP8HFilter16; |
688 VP8ChromaFilterFunc VP8VFilter8; | 707 VP8ChromaFilterFunc VP8VFilter8; |
689 VP8ChromaFilterFunc VP8HFilter8; | 708 VP8ChromaFilterFunc VP8HFilter8; |
690 VP8LumaFilterFunc VP8VFilter16i; | 709 VP8LumaFilterFunc VP8VFilter16i; |
691 VP8LumaFilterFunc VP8HFilter16i; | 710 VP8LumaFilterFunc VP8HFilter16i; |
692 VP8ChromaFilterFunc VP8VFilter8i; | 711 VP8ChromaFilterFunc VP8VFilter8i; |
693 VP8ChromaFilterFunc VP8HFilter8i; | 712 VP8ChromaFilterFunc VP8HFilter8i; |
694 VP8SimpleFilterFunc VP8SimpleVFilter16; | 713 VP8SimpleFilterFunc VP8SimpleVFilter16; |
695 VP8SimpleFilterFunc VP8SimpleHFilter16; | 714 VP8SimpleFilterFunc VP8SimpleHFilter16; |
696 VP8SimpleFilterFunc VP8SimpleVFilter16i; | 715 VP8SimpleFilterFunc VP8SimpleVFilter16i; |
697 VP8SimpleFilterFunc VP8SimpleHFilter16i; | 716 VP8SimpleFilterFunc VP8SimpleHFilter16i; |
698 | 717 |
699 extern void VP8DspInitSSE2(void); | 718 extern void VP8DspInitSSE2(void); |
700 #if defined(WEBP_USE_NEON) | |
701 extern void VP8DspInitNEON(void); | 719 extern void VP8DspInitNEON(void); |
702 #endif | |
703 | 720 |
704 void VP8DspInit(void) { | 721 void VP8DspInit(void) { |
705 DspInitTables(); | 722 DspInitTables(); |
706 | 723 |
707 VP8Transform = TransformTwo; | 724 VP8Transform = TransformTwo; |
708 VP8TransformUV = TransformUV; | 725 VP8TransformUV = TransformUV; |
709 VP8TransformDC = TransformDC; | 726 VP8TransformDC = TransformDC; |
710 VP8TransformDCUV = TransformDCUV; | 727 VP8TransformDCUV = TransformDCUV; |
| 728 VP8TransformAC3 = TransformAC3; |
711 | 729 |
712 VP8VFilter16 = VFilter16; | 730 VP8VFilter16 = VFilter16; |
713 VP8HFilter16 = HFilter16; | 731 VP8HFilter16 = HFilter16; |
714 VP8VFilter8 = VFilter8; | 732 VP8VFilter8 = VFilter8; |
715 VP8HFilter8 = HFilter8; | 733 VP8HFilter8 = HFilter8; |
716 VP8VFilter16i = VFilter16i; | 734 VP8VFilter16i = VFilter16i; |
717 VP8HFilter16i = HFilter16i; | 735 VP8HFilter16i = HFilter16i; |
718 VP8VFilter8i = VFilter8i; | 736 VP8VFilter8i = VFilter8i; |
719 VP8HFilter8i = HFilter8i; | 737 VP8HFilter8i = HFilter8i; |
720 VP8SimpleVFilter16 = SimpleVFilter16; | 738 VP8SimpleVFilter16 = SimpleVFilter16; |
721 VP8SimpleHFilter16 = SimpleHFilter16; | 739 VP8SimpleHFilter16 = SimpleHFilter16; |
722 VP8SimpleVFilter16i = SimpleVFilter16i; | 740 VP8SimpleVFilter16i = SimpleVFilter16i; |
723 VP8SimpleHFilter16i = SimpleHFilter16i; | 741 VP8SimpleHFilter16i = SimpleHFilter16i; |
724 | 742 |
725 // If defined, use CPUInfo() to overwrite some pointers with faster versions. | 743 // If defined, use CPUInfo() to overwrite some pointers with faster versions. |
726 if (VP8GetCPUInfo) { | 744 if (VP8GetCPUInfo) { |
727 #if defined(WEBP_USE_SSE2) | 745 #if defined(WEBP_USE_SSE2) |
728 if (VP8GetCPUInfo(kSSE2)) { | 746 if (VP8GetCPUInfo(kSSE2)) { |
729 VP8DspInitSSE2(); | 747 VP8DspInitSSE2(); |
730 } | 748 } |
731 #elif defined(WEBP_USE_NEON) | 749 #elif defined(WEBP_USE_NEON) |
732 if (VP8GetCPUInfo(kNEON)) { | 750 if (VP8GetCPUInfo(kNEON)) { |
733 VP8DspInitNEON(); | 751 VP8DspInitNEON(); |
734 } | 752 } |
735 #endif | 753 #endif |
736 } | 754 } |
737 } | 755 } |
738 | 756 |
739 #if defined(__cplusplus) || defined(c_plusplus) | |
740 } // extern "C" | |
741 #endif | |
OLD | NEW |