OLD | NEW |
1 // Copyright 2010 Google Inc. All Rights Reserved. | 1 // Copyright 2010 Google Inc. All Rights Reserved. |
2 // | 2 // |
3 // Use of this source code is governed by a BSD-style license | 3 // Use of this source code is governed by a BSD-style license |
4 // that can be found in the COPYING file in the root of the source | 4 // that can be found in the COPYING file in the root of the source |
5 // tree. An additional intellectual property rights grant can be found | 5 // tree. An additional intellectual property rights grant can be found |
6 // in the file PATENTS. All contributing project authors may | 6 // in the file PATENTS. All contributing project authors may |
7 // be found in the AUTHORS file in the root of the source tree. | 7 // be found in the AUTHORS file in the root of the source tree. |
8 // ----------------------------------------------------------------------------- | 8 // ----------------------------------------------------------------------------- |
9 // | 9 // |
10 // Speed-critical decoding functions. | 10 // Speed-critical decoding functions, default plain-C implementations. |
11 // | 11 // |
12 // Author: Skal (pascal.massimino@gmail.com) | 12 // Author: Skal (pascal.massimino@gmail.com) |
13 | 13 |
14 #include "./dsp.h" | 14 #include "./dsp.h" |
15 #include "../dec/vp8i.h" | 15 #include "../dec/vp8i.h" |
16 | 16 |
17 //------------------------------------------------------------------------------ | 17 //------------------------------------------------------------------------------ |
18 | 18 |
19 static WEBP_INLINE uint8_t clip_8b(int v) { | 19 static WEBP_INLINE uint8_t clip_8b(int v) { |
20 return (!(v & ~0xff)) ? v : (v < 0) ? 0 : 255; | 20 return (!(v & ~0xff)) ? v : (v < 0) ? 0 : 255; |
21 } | 21 } |
22 | 22 |
23 //------------------------------------------------------------------------------ | 23 //------------------------------------------------------------------------------ |
24 // Transforms (Paragraph 14.4) | 24 // Transforms (Paragraph 14.4) |
25 | 25 |
26 #define STORE(x, y, v) \ | 26 #define STORE(x, y, v) \ |
27 dst[x + y * BPS] = clip_8b(dst[x + y * BPS] + ((v) >> 3)) | 27 dst[x + y * BPS] = clip_8b(dst[x + y * BPS] + ((v) >> 3)) |
28 | 28 |
29 #define STORE2(y, dc, d, c) do { \ | 29 #define STORE2(y, dc, d, c) do { \ |
30 const int DC = (dc); \ | 30 const int DC = (dc); \ |
31 STORE(0, y, DC + (d)); \ | 31 STORE(0, y, DC + (d)); \ |
32 STORE(1, y, DC + (c)); \ | 32 STORE(1, y, DC + (c)); \ |
33 STORE(2, y, DC - (c)); \ | 33 STORE(2, y, DC - (c)); \ |
34 STORE(3, y, DC - (d)); \ | 34 STORE(3, y, DC - (d)); \ |
35 } while (0) | 35 } while (0) |
36 | 36 |
37 static const int kC1 = 20091 + (1 << 16); | 37 #define MUL1(a) ((((a) * 20091) >> 16) + (a)) |
38 static const int kC2 = 35468; | 38 #define MUL2(a) (((a) * 35468) >> 16) |
39 #define MUL(a, b) (((a) * (b)) >> 16) | |
40 | 39 |
41 static void TransformOne(const int16_t* in, uint8_t* dst) { | 40 static void TransformOne(const int16_t* in, uint8_t* dst) { |
42 int C[4 * 4], *tmp; | 41 int C[4 * 4], *tmp; |
43 int i; | 42 int i; |
44 tmp = C; | 43 tmp = C; |
45 for (i = 0; i < 4; ++i) { // vertical pass | 44 for (i = 0; i < 4; ++i) { // vertical pass |
46 const int a = in[0] + in[8]; // [-4096, 4094] | 45 const int a = in[0] + in[8]; // [-4096, 4094] |
47 const int b = in[0] - in[8]; // [-4095, 4095] | 46 const int b = in[0] - in[8]; // [-4095, 4095] |
48 const int c = MUL(in[4], kC2) - MUL(in[12], kC1); // [-3783, 3783] | 47 const int c = MUL2(in[4]) - MUL1(in[12]); // [-3783, 3783] |
49 const int d = MUL(in[4], kC1) + MUL(in[12], kC2); // [-3785, 3781] | 48 const int d = MUL1(in[4]) + MUL2(in[12]); // [-3785, 3781] |
50 tmp[0] = a + d; // [-7881, 7875] | 49 tmp[0] = a + d; // [-7881, 7875] |
51 tmp[1] = b + c; // [-7878, 7878] | 50 tmp[1] = b + c; // [-7878, 7878] |
52 tmp[2] = b - c; // [-7878, 7878] | 51 tmp[2] = b - c; // [-7878, 7878] |
53 tmp[3] = a - d; // [-7877, 7879] | 52 tmp[3] = a - d; // [-7877, 7879] |
54 tmp += 4; | 53 tmp += 4; |
55 in++; | 54 in++; |
56 } | 55 } |
57 // Each pass is expanding the dynamic range by ~3.85 (upper bound). | 56 // Each pass is expanding the dynamic range by ~3.85 (upper bound). |
58 // The exact value is (2. + (kC1 + kC2) / 65536). | 57 // The exact value is (2. + (20091 + 35468) / 65536). |
59 // After the second pass, maximum interval is [-3794, 3794], assuming | 58 // After the second pass, maximum interval is [-3794, 3794], assuming |
60 // an input in [-2048, 2047] interval. We then need to add a dst value | 59 // an input in [-2048, 2047] interval. We then need to add a dst value |
61 // in the [0, 255] range. | 60 // in the [0, 255] range. |
62 // In the worst case scenario, the input to clip_8b() can be as large as | 61 // In the worst case scenario, the input to clip_8b() can be as large as |
63 // [-60713, 60968]. | 62 // [-60713, 60968]. |
64 tmp = C; | 63 tmp = C; |
65 for (i = 0; i < 4; ++i) { // horizontal pass | 64 for (i = 0; i < 4; ++i) { // horizontal pass |
66 const int dc = tmp[0] + 4; | 65 const int dc = tmp[0] + 4; |
67 const int a = dc + tmp[8]; | 66 const int a = dc + tmp[8]; |
68 const int b = dc - tmp[8]; | 67 const int b = dc - tmp[8]; |
69 const int c = MUL(tmp[4], kC2) - MUL(tmp[12], kC1); | 68 const int c = MUL2(tmp[4]) - MUL1(tmp[12]); |
70 const int d = MUL(tmp[4], kC1) + MUL(tmp[12], kC2); | 69 const int d = MUL1(tmp[4]) + MUL2(tmp[12]); |
71 STORE(0, 0, a + d); | 70 STORE(0, 0, a + d); |
72 STORE(1, 0, b + c); | 71 STORE(1, 0, b + c); |
73 STORE(2, 0, b - c); | 72 STORE(2, 0, b - c); |
74 STORE(3, 0, a - d); | 73 STORE(3, 0, a - d); |
75 tmp++; | 74 tmp++; |
76 dst += BPS; | 75 dst += BPS; |
77 } | 76 } |
78 } | 77 } |
79 | 78 |
80 // Simplified transform when only in[0], in[1] and in[4] are non-zero | 79 // Simplified transform when only in[0], in[1] and in[4] are non-zero |
81 static void TransformAC3(const int16_t* in, uint8_t* dst) { | 80 static void TransformAC3(const int16_t* in, uint8_t* dst) { |
82 const int a = in[0] + 4; | 81 const int a = in[0] + 4; |
83 const int c4 = MUL(in[4], kC2); | 82 const int c4 = MUL2(in[4]); |
84 const int d4 = MUL(in[4], kC1); | 83 const int d4 = MUL1(in[4]); |
85 const int c1 = MUL(in[1], kC2); | 84 const int c1 = MUL2(in[1]); |
86 const int d1 = MUL(in[1], kC1); | 85 const int d1 = MUL1(in[1]); |
87 STORE2(0, a + d4, d1, c1); | 86 STORE2(0, a + d4, d1, c1); |
88 STORE2(1, a + c4, d1, c1); | 87 STORE2(1, a + c4, d1, c1); |
89 STORE2(2, a - c4, d1, c1); | 88 STORE2(2, a - c4, d1, c1); |
90 STORE2(3, a - d4, d1, c1); | 89 STORE2(3, a - d4, d1, c1); |
91 } | 90 } |
92 #undef MUL | 91 #undef MUL1 |
| 92 #undef MUL2 |
93 #undef STORE2 | 93 #undef STORE2 |
94 | 94 |
95 static void TransformTwo(const int16_t* in, uint8_t* dst, int do_two) { | 95 static void TransformTwo(const int16_t* in, uint8_t* dst, int do_two) { |
96 TransformOne(in, dst); | 96 TransformOne(in, dst); |
97 if (do_two) { | 97 if (do_two) { |
98 TransformOne(in + 16, dst + 4); | 98 TransformOne(in + 16, dst + 4); |
99 } | 99 } |
100 } | 100 } |
101 | 101 |
102 static void TransformUV(const int16_t* in, uint8_t* dst) { | 102 static void TransformUV(const int16_t* in, uint8_t* dst) { |
103 VP8Transform(in + 0 * 16, dst, 1); | 103 VP8Transform(in + 0 * 16, dst, 1); |
104 VP8Transform(in + 2 * 16, dst + 4 * BPS, 1); | 104 VP8Transform(in + 2 * 16, dst + 4 * BPS, 1); |
105 } | 105 } |
106 | 106 |
107 static void TransformDC(const int16_t *in, uint8_t* dst) { | 107 static void TransformDC(const int16_t* in, uint8_t* dst) { |
108 const int DC = in[0] + 4; | 108 const int DC = in[0] + 4; |
109 int i, j; | 109 int i, j; |
110 for (j = 0; j < 4; ++j) { | 110 for (j = 0; j < 4; ++j) { |
111 for (i = 0; i < 4; ++i) { | 111 for (i = 0; i < 4; ++i) { |
112 STORE(i, j, DC); | 112 STORE(i, j, DC); |
113 } | 113 } |
114 } | 114 } |
115 } | 115 } |
116 | 116 |
117 static void TransformDCUV(const int16_t* in, uint8_t* dst) { | 117 static void TransformDCUV(const int16_t* in, uint8_t* dst) { |
(...skipping 35 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
153 } | 153 } |
154 } | 154 } |
155 | 155 |
156 void (*VP8TransformWHT)(const int16_t* in, int16_t* out); | 156 void (*VP8TransformWHT)(const int16_t* in, int16_t* out); |
157 | 157 |
158 //------------------------------------------------------------------------------ | 158 //------------------------------------------------------------------------------ |
159 // Intra predictions | 159 // Intra predictions |
160 | 160 |
161 #define DST(x, y) dst[(x) + (y) * BPS] | 161 #define DST(x, y) dst[(x) + (y) * BPS] |
162 | 162 |
163 static WEBP_INLINE void TrueMotion(uint8_t *dst, int size) { | 163 static WEBP_INLINE void TrueMotion(uint8_t* dst, int size) { |
164 const uint8_t* top = dst - BPS; | 164 const uint8_t* top = dst - BPS; |
165 const uint8_t* const clip0 = VP8kclip1 - top[-1]; | 165 const uint8_t* const clip0 = VP8kclip1 - top[-1]; |
166 int y; | 166 int y; |
167 for (y = 0; y < size; ++y) { | 167 for (y = 0; y < size; ++y) { |
168 const uint8_t* const clip = clip0 + dst[-1]; | 168 const uint8_t* const clip = clip0 + dst[-1]; |
169 int x; | 169 int x; |
170 for (x = 0; x < size; ++x) { | 170 for (x = 0; x < size; ++x) { |
171 dst[x] = clip[top[x]]; | 171 dst[x] = clip[top[x]]; |
172 } | 172 } |
173 dst += BPS; | 173 dst += BPS; |
174 } | 174 } |
175 } | 175 } |
176 static void TM4(uint8_t *dst) { TrueMotion(dst, 4); } | 176 static void TM4(uint8_t* dst) { TrueMotion(dst, 4); } |
177 static void TM8uv(uint8_t *dst) { TrueMotion(dst, 8); } | 177 static void TM8uv(uint8_t* dst) { TrueMotion(dst, 8); } |
178 static void TM16(uint8_t *dst) { TrueMotion(dst, 16); } | 178 static void TM16(uint8_t* dst) { TrueMotion(dst, 16); } |
179 | 179 |
180 //------------------------------------------------------------------------------ | 180 //------------------------------------------------------------------------------ |
181 // 16x16 | 181 // 16x16 |
182 | 182 |
183 static void VE16(uint8_t *dst) { // vertical | 183 static void VE16(uint8_t* dst) { // vertical |
184 int j; | 184 int j; |
185 for (j = 0; j < 16; ++j) { | 185 for (j = 0; j < 16; ++j) { |
186 memcpy(dst + j * BPS, dst - BPS, 16); | 186 memcpy(dst + j * BPS, dst - BPS, 16); |
187 } | 187 } |
188 } | 188 } |
189 | 189 |
190 static void HE16(uint8_t *dst) { // horizontal | 190 static void HE16(uint8_t* dst) { // horizontal |
191 int j; | 191 int j; |
192 for (j = 16; j > 0; --j) { | 192 for (j = 16; j > 0; --j) { |
193 memset(dst, dst[-1], 16); | 193 memset(dst, dst[-1], 16); |
194 dst += BPS; | 194 dst += BPS; |
195 } | 195 } |
196 } | 196 } |
197 | 197 |
198 static WEBP_INLINE void Put16(int v, uint8_t* dst) { | 198 static WEBP_INLINE void Put16(int v, uint8_t* dst) { |
199 int j; | 199 int j; |
200 for (j = 0; j < 16; ++j) { | 200 for (j = 0; j < 16; ++j) { |
201 memset(dst + j * BPS, v, 16); | 201 memset(dst + j * BPS, v, 16); |
202 } | 202 } |
203 } | 203 } |
204 | 204 |
205 static void DC16(uint8_t *dst) { // DC | 205 static void DC16(uint8_t* dst) { // DC |
206 int DC = 16; | 206 int DC = 16; |
207 int j; | 207 int j; |
208 for (j = 0; j < 16; ++j) { | 208 for (j = 0; j < 16; ++j) { |
209 DC += dst[-1 + j * BPS] + dst[j - BPS]; | 209 DC += dst[-1 + j * BPS] + dst[j - BPS]; |
210 } | 210 } |
211 Put16(DC >> 5, dst); | 211 Put16(DC >> 5, dst); |
212 } | 212 } |
213 | 213 |
214 static void DC16NoTop(uint8_t *dst) { // DC with top samples not available | 214 static void DC16NoTop(uint8_t* dst) { // DC with top samples not available |
215 int DC = 8; | 215 int DC = 8; |
216 int j; | 216 int j; |
217 for (j = 0; j < 16; ++j) { | 217 for (j = 0; j < 16; ++j) { |
218 DC += dst[-1 + j * BPS]; | 218 DC += dst[-1 + j * BPS]; |
219 } | 219 } |
220 Put16(DC >> 4, dst); | 220 Put16(DC >> 4, dst); |
221 } | 221 } |
222 | 222 |
223 static void DC16NoLeft(uint8_t *dst) { // DC with left samples not available | 223 static void DC16NoLeft(uint8_t* dst) { // DC with left samples not available |
224 int DC = 8; | 224 int DC = 8; |
225 int i; | 225 int i; |
226 for (i = 0; i < 16; ++i) { | 226 for (i = 0; i < 16; ++i) { |
227 DC += dst[i - BPS]; | 227 DC += dst[i - BPS]; |
228 } | 228 } |
229 Put16(DC >> 4, dst); | 229 Put16(DC >> 4, dst); |
230 } | 230 } |
231 | 231 |
232 static void DC16NoTopLeft(uint8_t *dst) { // DC with no top and left samples | 232 static void DC16NoTopLeft(uint8_t* dst) { // DC with no top and left samples |
233 Put16(0x80, dst); | 233 Put16(0x80, dst); |
234 } | 234 } |
235 | 235 |
| 236 VP8PredFunc VP8PredLuma16[NUM_B_DC_MODES]; |
| 237 |
236 //------------------------------------------------------------------------------ | 238 //------------------------------------------------------------------------------ |
237 // 4x4 | 239 // 4x4 |
238 | 240 |
239 #define AVG3(a, b, c) (((a) + 2 * (b) + (c) + 2) >> 2) | 241 #define AVG3(a, b, c) (((a) + 2 * (b) + (c) + 2) >> 2) |
240 #define AVG2(a, b) (((a) + (b) + 1) >> 1) | 242 #define AVG2(a, b) (((a) + (b) + 1) >> 1) |
241 | 243 |
242 static void VE4(uint8_t *dst) { // vertical | 244 static void VE4(uint8_t* dst) { // vertical |
243 const uint8_t* top = dst - BPS; | 245 const uint8_t* top = dst - BPS; |
244 const uint8_t vals[4] = { | 246 const uint8_t vals[4] = { |
245 AVG3(top[-1], top[0], top[1]), | 247 AVG3(top[-1], top[0], top[1]), |
246 AVG3(top[ 0], top[1], top[2]), | 248 AVG3(top[ 0], top[1], top[2]), |
247 AVG3(top[ 1], top[2], top[3]), | 249 AVG3(top[ 1], top[2], top[3]), |
248 AVG3(top[ 2], top[3], top[4]) | 250 AVG3(top[ 2], top[3], top[4]) |
249 }; | 251 }; |
250 int i; | 252 int i; |
251 for (i = 0; i < 4; ++i) { | 253 for (i = 0; i < 4; ++i) { |
252 memcpy(dst + i * BPS, vals, sizeof(vals)); | 254 memcpy(dst + i * BPS, vals, sizeof(vals)); |
253 } | 255 } |
254 } | 256 } |
255 | 257 |
256 static void HE4(uint8_t *dst) { // horizontal | 258 static void HE4(uint8_t* dst) { // horizontal |
257 const int A = dst[-1 - BPS]; | 259 const int A = dst[-1 - BPS]; |
258 const int B = dst[-1]; | 260 const int B = dst[-1]; |
259 const int C = dst[-1 + BPS]; | 261 const int C = dst[-1 + BPS]; |
260 const int D = dst[-1 + 2 * BPS]; | 262 const int D = dst[-1 + 2 * BPS]; |
261 const int E = dst[-1 + 3 * BPS]; | 263 const int E = dst[-1 + 3 * BPS]; |
262 *(uint32_t*)(dst + 0 * BPS) = 0x01010101U * AVG3(A, B, C); | 264 WebPUint32ToMem(dst + 0 * BPS, 0x01010101U * AVG3(A, B, C)); |
263 *(uint32_t*)(dst + 1 * BPS) = 0x01010101U * AVG3(B, C, D); | 265 WebPUint32ToMem(dst + 1 * BPS, 0x01010101U * AVG3(B, C, D)); |
264 *(uint32_t*)(dst + 2 * BPS) = 0x01010101U * AVG3(C, D, E); | 266 WebPUint32ToMem(dst + 2 * BPS, 0x01010101U * AVG3(C, D, E)); |
265 *(uint32_t*)(dst + 3 * BPS) = 0x01010101U * AVG3(D, E, E); | 267 WebPUint32ToMem(dst + 3 * BPS, 0x01010101U * AVG3(D, E, E)); |
266 } | 268 } |
267 | 269 |
268 static void DC4(uint8_t *dst) { // DC | 270 static void DC4(uint8_t* dst) { // DC |
269 uint32_t dc = 4; | 271 uint32_t dc = 4; |
270 int i; | 272 int i; |
271 for (i = 0; i < 4; ++i) dc += dst[i - BPS] + dst[-1 + i * BPS]; | 273 for (i = 0; i < 4; ++i) dc += dst[i - BPS] + dst[-1 + i * BPS]; |
272 dc >>= 3; | 274 dc >>= 3; |
273 for (i = 0; i < 4; ++i) memset(dst + i * BPS, dc, 4); | 275 for (i = 0; i < 4; ++i) memset(dst + i * BPS, dc, 4); |
274 } | 276 } |
275 | 277 |
276 static void RD4(uint8_t *dst) { // Down-right | 278 static void RD4(uint8_t* dst) { // Down-right |
277 const int I = dst[-1 + 0 * BPS]; | 279 const int I = dst[-1 + 0 * BPS]; |
278 const int J = dst[-1 + 1 * BPS]; | 280 const int J = dst[-1 + 1 * BPS]; |
279 const int K = dst[-1 + 2 * BPS]; | 281 const int K = dst[-1 + 2 * BPS]; |
280 const int L = dst[-1 + 3 * BPS]; | 282 const int L = dst[-1 + 3 * BPS]; |
281 const int X = dst[-1 - BPS]; | 283 const int X = dst[-1 - BPS]; |
282 const int A = dst[0 - BPS]; | 284 const int A = dst[0 - BPS]; |
283 const int B = dst[1 - BPS]; | 285 const int B = dst[1 - BPS]; |
284 const int C = dst[2 - BPS]; | 286 const int C = dst[2 - BPS]; |
285 const int D = dst[3 - BPS]; | 287 const int D = dst[3 - BPS]; |
286 DST(0, 3) = AVG3(J, K, L); | 288 DST(0, 3) = AVG3(J, K, L); |
287 DST(0, 2) = DST(1, 3) = AVG3(I, J, K); | 289 DST(1, 3) = DST(0, 2) = AVG3(I, J, K); |
288 DST(0, 1) = DST(1, 2) = DST(2, 3) = AVG3(X, I, J); | 290 DST(2, 3) = DST(1, 2) = DST(0, 1) = AVG3(X, I, J); |
289 DST(0, 0) = DST(1, 1) = DST(2, 2) = DST(3, 3) = AVG3(A, X, I); | 291 DST(3, 3) = DST(2, 2) = DST(1, 1) = DST(0, 0) = AVG3(A, X, I); |
290 DST(1, 0) = DST(2, 1) = DST(3, 2) = AVG3(B, A, X); | 292 DST(3, 2) = DST(2, 1) = DST(1, 0) = AVG3(B, A, X); |
291 DST(2, 0) = DST(3, 1) = AVG3(C, B, A); | 293 DST(3, 1) = DST(2, 0) = AVG3(C, B, A); |
292 DST(3, 0) = AVG3(D, C, B); | 294 DST(3, 0) = AVG3(D, C, B); |
293 } | 295 } |
294 | 296 |
295 static void LD4(uint8_t *dst) { // Down-Left | 297 static void LD4(uint8_t* dst) { // Down-Left |
296 const int A = dst[0 - BPS]; | 298 const int A = dst[0 - BPS]; |
297 const int B = dst[1 - BPS]; | 299 const int B = dst[1 - BPS]; |
298 const int C = dst[2 - BPS]; | 300 const int C = dst[2 - BPS]; |
299 const int D = dst[3 - BPS]; | 301 const int D = dst[3 - BPS]; |
300 const int E = dst[4 - BPS]; | 302 const int E = dst[4 - BPS]; |
301 const int F = dst[5 - BPS]; | 303 const int F = dst[5 - BPS]; |
302 const int G = dst[6 - BPS]; | 304 const int G = dst[6 - BPS]; |
303 const int H = dst[7 - BPS]; | 305 const int H = dst[7 - BPS]; |
304 DST(0, 0) = AVG3(A, B, C); | 306 DST(0, 0) = AVG3(A, B, C); |
305 DST(1, 0) = DST(0, 1) = AVG3(B, C, D); | 307 DST(1, 0) = DST(0, 1) = AVG3(B, C, D); |
306 DST(2, 0) = DST(1, 1) = DST(0, 2) = AVG3(C, D, E); | 308 DST(2, 0) = DST(1, 1) = DST(0, 2) = AVG3(C, D, E); |
307 DST(3, 0) = DST(2, 1) = DST(1, 2) = DST(0, 3) = AVG3(D, E, F); | 309 DST(3, 0) = DST(2, 1) = DST(1, 2) = DST(0, 3) = AVG3(D, E, F); |
308 DST(3, 1) = DST(2, 2) = DST(1, 3) = AVG3(E, F, G); | 310 DST(3, 1) = DST(2, 2) = DST(1, 3) = AVG3(E, F, G); |
309 DST(3, 2) = DST(2, 3) = AVG3(F, G, H); | 311 DST(3, 2) = DST(2, 3) = AVG3(F, G, H); |
310 DST(3, 3) = AVG3(G, H, H); | 312 DST(3, 3) = AVG3(G, H, H); |
311 } | 313 } |
312 | 314 |
313 static void VR4(uint8_t *dst) { // Vertical-Right | 315 static void VR4(uint8_t* dst) { // Vertical-Right |
314 const int I = dst[-1 + 0 * BPS]; | 316 const int I = dst[-1 + 0 * BPS]; |
315 const int J = dst[-1 + 1 * BPS]; | 317 const int J = dst[-1 + 1 * BPS]; |
316 const int K = dst[-1 + 2 * BPS]; | 318 const int K = dst[-1 + 2 * BPS]; |
317 const int X = dst[-1 - BPS]; | 319 const int X = dst[-1 - BPS]; |
318 const int A = dst[0 - BPS]; | 320 const int A = dst[0 - BPS]; |
319 const int B = dst[1 - BPS]; | 321 const int B = dst[1 - BPS]; |
320 const int C = dst[2 - BPS]; | 322 const int C = dst[2 - BPS]; |
321 const int D = dst[3 - BPS]; | 323 const int D = dst[3 - BPS]; |
322 DST(0, 0) = DST(1, 2) = AVG2(X, A); | 324 DST(0, 0) = DST(1, 2) = AVG2(X, A); |
323 DST(1, 0) = DST(2, 2) = AVG2(A, B); | 325 DST(1, 0) = DST(2, 2) = AVG2(A, B); |
324 DST(2, 0) = DST(3, 2) = AVG2(B, C); | 326 DST(2, 0) = DST(3, 2) = AVG2(B, C); |
325 DST(3, 0) = AVG2(C, D); | 327 DST(3, 0) = AVG2(C, D); |
326 | 328 |
327 DST(0, 3) = AVG3(K, J, I); | 329 DST(0, 3) = AVG3(K, J, I); |
328 DST(0, 2) = AVG3(J, I, X); | 330 DST(0, 2) = AVG3(J, I, X); |
329 DST(0, 1) = DST(1, 3) = AVG3(I, X, A); | 331 DST(0, 1) = DST(1, 3) = AVG3(I, X, A); |
330 DST(1, 1) = DST(2, 3) = AVG3(X, A, B); | 332 DST(1, 1) = DST(2, 3) = AVG3(X, A, B); |
331 DST(2, 1) = DST(3, 3) = AVG3(A, B, C); | 333 DST(2, 1) = DST(3, 3) = AVG3(A, B, C); |
332 DST(3, 1) = AVG3(B, C, D); | 334 DST(3, 1) = AVG3(B, C, D); |
333 } | 335 } |
334 | 336 |
335 static void VL4(uint8_t *dst) { // Vertical-Left | 337 static void VL4(uint8_t* dst) { // Vertical-Left |
336 const int A = dst[0 - BPS]; | 338 const int A = dst[0 - BPS]; |
337 const int B = dst[1 - BPS]; | 339 const int B = dst[1 - BPS]; |
338 const int C = dst[2 - BPS]; | 340 const int C = dst[2 - BPS]; |
339 const int D = dst[3 - BPS]; | 341 const int D = dst[3 - BPS]; |
340 const int E = dst[4 - BPS]; | 342 const int E = dst[4 - BPS]; |
341 const int F = dst[5 - BPS]; | 343 const int F = dst[5 - BPS]; |
342 const int G = dst[6 - BPS]; | 344 const int G = dst[6 - BPS]; |
343 const int H = dst[7 - BPS]; | 345 const int H = dst[7 - BPS]; |
344 DST(0, 0) = AVG2(A, B); | 346 DST(0, 0) = AVG2(A, B); |
345 DST(1, 0) = DST(0, 2) = AVG2(B, C); | 347 DST(1, 0) = DST(0, 2) = AVG2(B, C); |
346 DST(2, 0) = DST(1, 2) = AVG2(C, D); | 348 DST(2, 0) = DST(1, 2) = AVG2(C, D); |
347 DST(3, 0) = DST(2, 2) = AVG2(D, E); | 349 DST(3, 0) = DST(2, 2) = AVG2(D, E); |
348 | 350 |
349 DST(0, 1) = AVG3(A, B, C); | 351 DST(0, 1) = AVG3(A, B, C); |
350 DST(1, 1) = DST(0, 3) = AVG3(B, C, D); | 352 DST(1, 1) = DST(0, 3) = AVG3(B, C, D); |
351 DST(2, 1) = DST(1, 3) = AVG3(C, D, E); | 353 DST(2, 1) = DST(1, 3) = AVG3(C, D, E); |
352 DST(3, 1) = DST(2, 3) = AVG3(D, E, F); | 354 DST(3, 1) = DST(2, 3) = AVG3(D, E, F); |
353 DST(3, 2) = AVG3(E, F, G); | 355 DST(3, 2) = AVG3(E, F, G); |
354 DST(3, 3) = AVG3(F, G, H); | 356 DST(3, 3) = AVG3(F, G, H); |
355 } | 357 } |
356 | 358 |
357 static void HU4(uint8_t *dst) { // Horizontal-Up | 359 static void HU4(uint8_t* dst) { // Horizontal-Up |
358 const int I = dst[-1 + 0 * BPS]; | 360 const int I = dst[-1 + 0 * BPS]; |
359 const int J = dst[-1 + 1 * BPS]; | 361 const int J = dst[-1 + 1 * BPS]; |
360 const int K = dst[-1 + 2 * BPS]; | 362 const int K = dst[-1 + 2 * BPS]; |
361 const int L = dst[-1 + 3 * BPS]; | 363 const int L = dst[-1 + 3 * BPS]; |
362 DST(0, 0) = AVG2(I, J); | 364 DST(0, 0) = AVG2(I, J); |
363 DST(2, 0) = DST(0, 1) = AVG2(J, K); | 365 DST(2, 0) = DST(0, 1) = AVG2(J, K); |
364 DST(2, 1) = DST(0, 2) = AVG2(K, L); | 366 DST(2, 1) = DST(0, 2) = AVG2(K, L); |
365 DST(1, 0) = AVG3(I, J, K); | 367 DST(1, 0) = AVG3(I, J, K); |
366 DST(3, 0) = DST(1, 1) = AVG3(J, K, L); | 368 DST(3, 0) = DST(1, 1) = AVG3(J, K, L); |
367 DST(3, 1) = DST(1, 2) = AVG3(K, L, L); | 369 DST(3, 1) = DST(1, 2) = AVG3(K, L, L); |
368 DST(3, 2) = DST(2, 2) = | 370 DST(3, 2) = DST(2, 2) = |
369 DST(0, 3) = DST(1, 3) = DST(2, 3) = DST(3, 3) = L; | 371 DST(0, 3) = DST(1, 3) = DST(2, 3) = DST(3, 3) = L; |
370 } | 372 } |
371 | 373 |
372 static void HD4(uint8_t *dst) { // Horizontal-Down | 374 static void HD4(uint8_t* dst) { // Horizontal-Down |
373 const int I = dst[-1 + 0 * BPS]; | 375 const int I = dst[-1 + 0 * BPS]; |
374 const int J = dst[-1 + 1 * BPS]; | 376 const int J = dst[-1 + 1 * BPS]; |
375 const int K = dst[-1 + 2 * BPS]; | 377 const int K = dst[-1 + 2 * BPS]; |
376 const int L = dst[-1 + 3 * BPS]; | 378 const int L = dst[-1 + 3 * BPS]; |
377 const int X = dst[-1 - BPS]; | 379 const int X = dst[-1 - BPS]; |
378 const int A = dst[0 - BPS]; | 380 const int A = dst[0 - BPS]; |
379 const int B = dst[1 - BPS]; | 381 const int B = dst[1 - BPS]; |
380 const int C = dst[2 - BPS]; | 382 const int C = dst[2 - BPS]; |
381 | 383 |
382 DST(0, 0) = DST(2, 1) = AVG2(I, X); | 384 DST(0, 0) = DST(2, 1) = AVG2(I, X); |
383 DST(0, 1) = DST(2, 2) = AVG2(J, I); | 385 DST(0, 1) = DST(2, 2) = AVG2(J, I); |
384 DST(0, 2) = DST(2, 3) = AVG2(K, J); | 386 DST(0, 2) = DST(2, 3) = AVG2(K, J); |
385 DST(0, 3) = AVG2(L, K); | 387 DST(0, 3) = AVG2(L, K); |
386 | 388 |
387 DST(3, 0) = AVG3(A, B, C); | 389 DST(3, 0) = AVG3(A, B, C); |
388 DST(2, 0) = AVG3(X, A, B); | 390 DST(2, 0) = AVG3(X, A, B); |
389 DST(1, 0) = DST(3, 1) = AVG3(I, X, A); | 391 DST(1, 0) = DST(3, 1) = AVG3(I, X, A); |
390 DST(1, 1) = DST(3, 2) = AVG3(J, I, X); | 392 DST(1, 1) = DST(3, 2) = AVG3(J, I, X); |
391 DST(1, 2) = DST(3, 3) = AVG3(K, J, I); | 393 DST(1, 2) = DST(3, 3) = AVG3(K, J, I); |
392 DST(1, 3) = AVG3(L, K, J); | 394 DST(1, 3) = AVG3(L, K, J); |
393 } | 395 } |
394 | 396 |
395 #undef DST | 397 #undef DST |
396 #undef AVG3 | 398 #undef AVG3 |
397 #undef AVG2 | 399 #undef AVG2 |
398 | 400 |
| 401 VP8PredFunc VP8PredLuma4[NUM_BMODES]; |
| 402 |
399 //------------------------------------------------------------------------------ | 403 //------------------------------------------------------------------------------ |
400 // Chroma | 404 // Chroma |
401 | 405 |
402 static void VE8uv(uint8_t *dst) { // vertical | 406 static void VE8uv(uint8_t* dst) { // vertical |
403 int j; | 407 int j; |
404 for (j = 0; j < 8; ++j) { | 408 for (j = 0; j < 8; ++j) { |
405 memcpy(dst + j * BPS, dst - BPS, 8); | 409 memcpy(dst + j * BPS, dst - BPS, 8); |
406 } | 410 } |
407 } | 411 } |
408 | 412 |
409 static void HE8uv(uint8_t *dst) { // horizontal | 413 static void HE8uv(uint8_t* dst) { // horizontal |
410 int j; | 414 int j; |
411 for (j = 0; j < 8; ++j) { | 415 for (j = 0; j < 8; ++j) { |
412 memset(dst, dst[-1], 8); | 416 memset(dst, dst[-1], 8); |
413 dst += BPS; | 417 dst += BPS; |
414 } | 418 } |
415 } | 419 } |
416 | 420 |
417 // helper for chroma-DC predictions | 421 // helper for chroma-DC predictions |
418 static WEBP_INLINE void Put8x8uv(uint8_t value, uint8_t* dst) { | 422 static WEBP_INLINE void Put8x8uv(uint8_t value, uint8_t* dst) { |
419 int j; | 423 int j; |
420 for (j = 0; j < 8; ++j) { | 424 for (j = 0; j < 8; ++j) { |
421 memset(dst + j * BPS, value, 8); | 425 memset(dst + j * BPS, value, 8); |
422 } | 426 } |
423 } | 427 } |
424 | 428 |
425 static void DC8uv(uint8_t *dst) { // DC | 429 static void DC8uv(uint8_t* dst) { // DC |
426 int dc0 = 8; | 430 int dc0 = 8; |
427 int i; | 431 int i; |
428 for (i = 0; i < 8; ++i) { | 432 for (i = 0; i < 8; ++i) { |
429 dc0 += dst[i - BPS] + dst[-1 + i * BPS]; | 433 dc0 += dst[i - BPS] + dst[-1 + i * BPS]; |
430 } | 434 } |
431 Put8x8uv(dc0 >> 4, dst); | 435 Put8x8uv(dc0 >> 4, dst); |
432 } | 436 } |
433 | 437 |
434 static void DC8uvNoLeft(uint8_t *dst) { // DC with no left samples | 438 static void DC8uvNoLeft(uint8_t* dst) { // DC with no left samples |
435 int dc0 = 4; | 439 int dc0 = 4; |
436 int i; | 440 int i; |
437 for (i = 0; i < 8; ++i) { | 441 for (i = 0; i < 8; ++i) { |
438 dc0 += dst[i - BPS]; | 442 dc0 += dst[i - BPS]; |
439 } | 443 } |
440 Put8x8uv(dc0 >> 3, dst); | 444 Put8x8uv(dc0 >> 3, dst); |
441 } | 445 } |
442 | 446 |
443 static void DC8uvNoTop(uint8_t *dst) { // DC with no top samples | 447 static void DC8uvNoTop(uint8_t* dst) { // DC with no top samples |
444 int dc0 = 4; | 448 int dc0 = 4; |
445 int i; | 449 int i; |
446 for (i = 0; i < 8; ++i) { | 450 for (i = 0; i < 8; ++i) { |
447 dc0 += dst[-1 + i * BPS]; | 451 dc0 += dst[-1 + i * BPS]; |
448 } | 452 } |
449 Put8x8uv(dc0 >> 3, dst); | 453 Put8x8uv(dc0 >> 3, dst); |
450 } | 454 } |
451 | 455 |
452 static void DC8uvNoTopLeft(uint8_t *dst) { // DC with nothing | 456 static void DC8uvNoTopLeft(uint8_t* dst) { // DC with nothing |
453 Put8x8uv(0x80, dst); | 457 Put8x8uv(0x80, dst); |
454 } | 458 } |
455 | 459 |
456 //------------------------------------------------------------------------------ | 460 VP8PredFunc VP8PredChroma8[NUM_B_DC_MODES]; |
457 // default C implementations | |
458 | |
459 const VP8PredFunc VP8PredLuma4[NUM_BMODES] = { | |
460 DC4, TM4, VE4, HE4, RD4, VR4, LD4, VL4, HD4, HU4 | |
461 }; | |
462 | |
463 const VP8PredFunc VP8PredLuma16[NUM_B_DC_MODES] = { | |
464 DC16, TM16, VE16, HE16, | |
465 DC16NoTop, DC16NoLeft, DC16NoTopLeft | |
466 }; | |
467 | |
468 const VP8PredFunc VP8PredChroma8[NUM_B_DC_MODES] = { | |
469 DC8uv, TM8uv, VE8uv, HE8uv, | |
470 DC8uvNoTop, DC8uvNoLeft, DC8uvNoTopLeft | |
471 }; | |
472 | 461 |
473 //------------------------------------------------------------------------------ | 462 //------------------------------------------------------------------------------ |
474 // Edge filtering functions | 463 // Edge filtering functions |
475 | 464 |
476 // 4 pixels in, 2 pixels out | 465 // 4 pixels in, 2 pixels out |
477 static WEBP_INLINE void do_filter2(uint8_t* p, int step) { | 466 static WEBP_INLINE void do_filter2(uint8_t* p, int step) { |
478 const int p1 = p[-2*step], p0 = p[-step], q0 = p[0], q1 = p[step]; | 467 const int p1 = p[-2*step], p0 = p[-step], q0 = p[0], q1 = p[step]; |
479 const int a = 3 * (q0 - p0) + VP8ksclip1[p1 - q1]; // in [-893,892] | 468 const int a = 3 * (q0 - p0) + VP8ksclip1[p1 - q1]; // in [-893,892] |
480 const int a1 = VP8ksclip2[(a + 4) >> 3]; // in [-16,15] | 469 const int a1 = VP8ksclip2[(a + 4) >> 3]; // in [-16,15] |
481 const int a2 = VP8ksclip2[(a + 3) >> 3]; | 470 const int a2 = VP8ksclip2[(a + 3) >> 3]; |
(...skipping 196 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
678 VP8LumaFilterFunc VP8VFilter16i; | 667 VP8LumaFilterFunc VP8VFilter16i; |
679 VP8LumaFilterFunc VP8HFilter16i; | 668 VP8LumaFilterFunc VP8HFilter16i; |
680 VP8ChromaFilterFunc VP8VFilter8i; | 669 VP8ChromaFilterFunc VP8VFilter8i; |
681 VP8ChromaFilterFunc VP8HFilter8i; | 670 VP8ChromaFilterFunc VP8HFilter8i; |
682 VP8SimpleFilterFunc VP8SimpleVFilter16; | 671 VP8SimpleFilterFunc VP8SimpleVFilter16; |
683 VP8SimpleFilterFunc VP8SimpleHFilter16; | 672 VP8SimpleFilterFunc VP8SimpleHFilter16; |
684 VP8SimpleFilterFunc VP8SimpleVFilter16i; | 673 VP8SimpleFilterFunc VP8SimpleVFilter16i; |
685 VP8SimpleFilterFunc VP8SimpleHFilter16i; | 674 VP8SimpleFilterFunc VP8SimpleHFilter16i; |
686 | 675 |
687 extern void VP8DspInitSSE2(void); | 676 extern void VP8DspInitSSE2(void); |
| 677 extern void VP8DspInitSSE41(void); |
688 extern void VP8DspInitNEON(void); | 678 extern void VP8DspInitNEON(void); |
689 extern void VP8DspInitMIPS32(void); | 679 extern void VP8DspInitMIPS32(void); |
| 680 extern void VP8DspInitMIPSdspR2(void); |
690 | 681 |
691 static volatile VP8CPUInfo dec_last_cpuinfo_used = | 682 static volatile VP8CPUInfo dec_last_cpuinfo_used = |
692 (VP8CPUInfo)&dec_last_cpuinfo_used; | 683 (VP8CPUInfo)&dec_last_cpuinfo_used; |
693 | 684 |
694 void VP8DspInit(void) { | 685 WEBP_TSAN_IGNORE_FUNCTION void VP8DspInit(void) { |
695 if (dec_last_cpuinfo_used == VP8GetCPUInfo) return; | 686 if (dec_last_cpuinfo_used == VP8GetCPUInfo) return; |
696 | 687 |
697 VP8InitClipTables(); | 688 VP8InitClipTables(); |
698 | 689 |
699 VP8TransformWHT = TransformWHT; | 690 VP8TransformWHT = TransformWHT; |
700 VP8Transform = TransformTwo; | 691 VP8Transform = TransformTwo; |
701 VP8TransformUV = TransformUV; | 692 VP8TransformUV = TransformUV; |
702 VP8TransformDC = TransformDC; | 693 VP8TransformDC = TransformDC; |
703 VP8TransformDCUV = TransformDCUV; | 694 VP8TransformDCUV = TransformDCUV; |
704 VP8TransformAC3 = TransformAC3; | 695 VP8TransformAC3 = TransformAC3; |
705 | 696 |
706 VP8VFilter16 = VFilter16; | 697 VP8VFilter16 = VFilter16; |
707 VP8HFilter16 = HFilter16; | 698 VP8HFilter16 = HFilter16; |
708 VP8VFilter8 = VFilter8; | 699 VP8VFilter8 = VFilter8; |
709 VP8HFilter8 = HFilter8; | 700 VP8HFilter8 = HFilter8; |
710 VP8VFilter16i = VFilter16i; | 701 VP8VFilter16i = VFilter16i; |
711 VP8HFilter16i = HFilter16i; | 702 VP8HFilter16i = HFilter16i; |
712 VP8VFilter8i = VFilter8i; | 703 VP8VFilter8i = VFilter8i; |
713 VP8HFilter8i = HFilter8i; | 704 VP8HFilter8i = HFilter8i; |
714 VP8SimpleVFilter16 = SimpleVFilter16; | 705 VP8SimpleVFilter16 = SimpleVFilter16; |
715 VP8SimpleHFilter16 = SimpleHFilter16; | 706 VP8SimpleHFilter16 = SimpleHFilter16; |
716 VP8SimpleVFilter16i = SimpleVFilter16i; | 707 VP8SimpleVFilter16i = SimpleVFilter16i; |
717 VP8SimpleHFilter16i = SimpleHFilter16i; | 708 VP8SimpleHFilter16i = SimpleHFilter16i; |
718 | 709 |
| 710 VP8PredLuma4[0] = DC4; |
| 711 VP8PredLuma4[1] = TM4; |
| 712 VP8PredLuma4[2] = VE4; |
| 713 VP8PredLuma4[3] = HE4; |
| 714 VP8PredLuma4[4] = RD4; |
| 715 VP8PredLuma4[5] = VR4; |
| 716 VP8PredLuma4[6] = LD4; |
| 717 VP8PredLuma4[7] = VL4; |
| 718 VP8PredLuma4[8] = HD4; |
| 719 VP8PredLuma4[9] = HU4; |
| 720 |
| 721 VP8PredLuma16[0] = DC16; |
| 722 VP8PredLuma16[1] = TM16; |
| 723 VP8PredLuma16[2] = VE16; |
| 724 VP8PredLuma16[3] = HE16; |
| 725 VP8PredLuma16[4] = DC16NoTop; |
| 726 VP8PredLuma16[5] = DC16NoLeft; |
| 727 VP8PredLuma16[6] = DC16NoTopLeft; |
| 728 |
| 729 VP8PredChroma8[0] = DC8uv; |
| 730 VP8PredChroma8[1] = TM8uv; |
| 731 VP8PredChroma8[2] = VE8uv; |
| 732 VP8PredChroma8[3] = HE8uv; |
| 733 VP8PredChroma8[4] = DC8uvNoTop; |
| 734 VP8PredChroma8[5] = DC8uvNoLeft; |
| 735 VP8PredChroma8[6] = DC8uvNoTopLeft; |
| 736 |
719 // If defined, use CPUInfo() to overwrite some pointers with faster versions. | 737 // If defined, use CPUInfo() to overwrite some pointers with faster versions. |
720 if (VP8GetCPUInfo != NULL) { | 738 if (VP8GetCPUInfo != NULL) { |
721 #if defined(WEBP_USE_SSE2) | 739 #if defined(WEBP_USE_SSE2) |
722 if (VP8GetCPUInfo(kSSE2)) { | 740 if (VP8GetCPUInfo(kSSE2)) { |
723 VP8DspInitSSE2(); | 741 VP8DspInitSSE2(); |
| 742 #if defined(WEBP_USE_SSE41) |
| 743 if (VP8GetCPUInfo(kSSE4_1)) { |
| 744 VP8DspInitSSE41(); |
| 745 } |
| 746 #endif |
724 } | 747 } |
725 #elif defined(WEBP_USE_NEON) | 748 #endif |
| 749 #if defined(WEBP_USE_NEON) |
726 if (VP8GetCPUInfo(kNEON)) { | 750 if (VP8GetCPUInfo(kNEON)) { |
727 VP8DspInitNEON(); | 751 VP8DspInitNEON(); |
728 } | 752 } |
729 #elif defined(WEBP_USE_MIPS32) | 753 #endif |
| 754 #if defined(WEBP_USE_MIPS32) |
730 if (VP8GetCPUInfo(kMIPS32)) { | 755 if (VP8GetCPUInfo(kMIPS32)) { |
731 VP8DspInitMIPS32(); | 756 VP8DspInitMIPS32(); |
732 } | 757 } |
733 #endif | 758 #endif |
| 759 #if defined(WEBP_USE_MIPS_DSP_R2) |
| 760 if (VP8GetCPUInfo(kMIPSdspR2)) { |
| 761 VP8DspInitMIPSdspR2(); |
| 762 } |
| 763 #endif |
734 } | 764 } |
735 dec_last_cpuinfo_used = VP8GetCPUInfo; | 765 dec_last_cpuinfo_used = VP8GetCPUInfo; |
736 } | 766 } |
OLD | NEW |