Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(117)

Side by Side Diff: third_party/libwebp/dsp/dec.c

Issue 1546003002: libwebp: update to 0.5.0 (Closed) Base URL: https://chromium.googlesource.com/chromium/src.git@master
Patch Set: rebase around clang-cl fix Created 4 years, 12 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
OLDNEW
1 // Copyright 2010 Google Inc. All Rights Reserved. 1 // Copyright 2010 Google Inc. All Rights Reserved.
2 // 2 //
3 // Use of this source code is governed by a BSD-style license 3 // Use of this source code is governed by a BSD-style license
4 // that can be found in the COPYING file in the root of the source 4 // that can be found in the COPYING file in the root of the source
5 // tree. An additional intellectual property rights grant can be found 5 // tree. An additional intellectual property rights grant can be found
6 // in the file PATENTS. All contributing project authors may 6 // in the file PATENTS. All contributing project authors may
7 // be found in the AUTHORS file in the root of the source tree. 7 // be found in the AUTHORS file in the root of the source tree.
8 // ----------------------------------------------------------------------------- 8 // -----------------------------------------------------------------------------
9 // 9 //
10 // Speed-critical decoding functions. 10 // Speed-critical decoding functions, default plain-C implementations.
11 // 11 //
12 // Author: Skal (pascal.massimino@gmail.com) 12 // Author: Skal (pascal.massimino@gmail.com)
13 13
14 #include "./dsp.h" 14 #include "./dsp.h"
15 #include "../dec/vp8i.h" 15 #include "../dec/vp8i.h"
16 16
17 //------------------------------------------------------------------------------ 17 //------------------------------------------------------------------------------
18 18
19 static WEBP_INLINE uint8_t clip_8b(int v) { 19 static WEBP_INLINE uint8_t clip_8b(int v) {
20 return (!(v & ~0xff)) ? v : (v < 0) ? 0 : 255; 20 return (!(v & ~0xff)) ? v : (v < 0) ? 0 : 255;
21 } 21 }
22 22
23 //------------------------------------------------------------------------------ 23 //------------------------------------------------------------------------------
24 // Transforms (Paragraph 14.4) 24 // Transforms (Paragraph 14.4)
25 25
26 #define STORE(x, y, v) \ 26 #define STORE(x, y, v) \
27 dst[x + y * BPS] = clip_8b(dst[x + y * BPS] + ((v) >> 3)) 27 dst[x + y * BPS] = clip_8b(dst[x + y * BPS] + ((v) >> 3))
28 28
29 #define STORE2(y, dc, d, c) do { \ 29 #define STORE2(y, dc, d, c) do { \
30 const int DC = (dc); \ 30 const int DC = (dc); \
31 STORE(0, y, DC + (d)); \ 31 STORE(0, y, DC + (d)); \
32 STORE(1, y, DC + (c)); \ 32 STORE(1, y, DC + (c)); \
33 STORE(2, y, DC - (c)); \ 33 STORE(2, y, DC - (c)); \
34 STORE(3, y, DC - (d)); \ 34 STORE(3, y, DC - (d)); \
35 } while (0) 35 } while (0)
36 36
37 static const int kC1 = 20091 + (1 << 16); 37 #define MUL1(a) ((((a) * 20091) >> 16) + (a))
38 static const int kC2 = 35468; 38 #define MUL2(a) (((a) * 35468) >> 16)
39 #define MUL(a, b) (((a) * (b)) >> 16)
40 39
41 static void TransformOne(const int16_t* in, uint8_t* dst) { 40 static void TransformOne(const int16_t* in, uint8_t* dst) {
42 int C[4 * 4], *tmp; 41 int C[4 * 4], *tmp;
43 int i; 42 int i;
44 tmp = C; 43 tmp = C;
45 for (i = 0; i < 4; ++i) { // vertical pass 44 for (i = 0; i < 4; ++i) { // vertical pass
46 const int a = in[0] + in[8]; // [-4096, 4094] 45 const int a = in[0] + in[8]; // [-4096, 4094]
47 const int b = in[0] - in[8]; // [-4095, 4095] 46 const int b = in[0] - in[8]; // [-4095, 4095]
48 const int c = MUL(in[4], kC2) - MUL(in[12], kC1); // [-3783, 3783] 47 const int c = MUL2(in[4]) - MUL1(in[12]); // [-3783, 3783]
49 const int d = MUL(in[4], kC1) + MUL(in[12], kC2); // [-3785, 3781] 48 const int d = MUL1(in[4]) + MUL2(in[12]); // [-3785, 3781]
50 tmp[0] = a + d; // [-7881, 7875] 49 tmp[0] = a + d; // [-7881, 7875]
51 tmp[1] = b + c; // [-7878, 7878] 50 tmp[1] = b + c; // [-7878, 7878]
52 tmp[2] = b - c; // [-7878, 7878] 51 tmp[2] = b - c; // [-7878, 7878]
53 tmp[3] = a - d; // [-7877, 7879] 52 tmp[3] = a - d; // [-7877, 7879]
54 tmp += 4; 53 tmp += 4;
55 in++; 54 in++;
56 } 55 }
57 // Each pass is expanding the dynamic range by ~3.85 (upper bound). 56 // Each pass is expanding the dynamic range by ~3.85 (upper bound).
58 // The exact value is (2. + (kC1 + kC2) / 65536). 57 // The exact value is (2. + (20091 + 35468) / 65536).
59 // After the second pass, maximum interval is [-3794, 3794], assuming 58 // After the second pass, maximum interval is [-3794, 3794], assuming
60 // an input in [-2048, 2047] interval. We then need to add a dst value 59 // an input in [-2048, 2047] interval. We then need to add a dst value
61 // in the [0, 255] range. 60 // in the [0, 255] range.
62 // In the worst case scenario, the input to clip_8b() can be as large as 61 // In the worst case scenario, the input to clip_8b() can be as large as
63 // [-60713, 60968]. 62 // [-60713, 60968].
64 tmp = C; 63 tmp = C;
65 for (i = 0; i < 4; ++i) { // horizontal pass 64 for (i = 0; i < 4; ++i) { // horizontal pass
66 const int dc = tmp[0] + 4; 65 const int dc = tmp[0] + 4;
67 const int a = dc + tmp[8]; 66 const int a = dc + tmp[8];
68 const int b = dc - tmp[8]; 67 const int b = dc - tmp[8];
69 const int c = MUL(tmp[4], kC2) - MUL(tmp[12], kC1); 68 const int c = MUL2(tmp[4]) - MUL1(tmp[12]);
70 const int d = MUL(tmp[4], kC1) + MUL(tmp[12], kC2); 69 const int d = MUL1(tmp[4]) + MUL2(tmp[12]);
71 STORE(0, 0, a + d); 70 STORE(0, 0, a + d);
72 STORE(1, 0, b + c); 71 STORE(1, 0, b + c);
73 STORE(2, 0, b - c); 72 STORE(2, 0, b - c);
74 STORE(3, 0, a - d); 73 STORE(3, 0, a - d);
75 tmp++; 74 tmp++;
76 dst += BPS; 75 dst += BPS;
77 } 76 }
78 } 77 }
79 78
80 // Simplified transform when only in[0], in[1] and in[4] are non-zero 79 // Simplified transform when only in[0], in[1] and in[4] are non-zero
81 static void TransformAC3(const int16_t* in, uint8_t* dst) { 80 static void TransformAC3(const int16_t* in, uint8_t* dst) {
82 const int a = in[0] + 4; 81 const int a = in[0] + 4;
83 const int c4 = MUL(in[4], kC2); 82 const int c4 = MUL2(in[4]);
84 const int d4 = MUL(in[4], kC1); 83 const int d4 = MUL1(in[4]);
85 const int c1 = MUL(in[1], kC2); 84 const int c1 = MUL2(in[1]);
86 const int d1 = MUL(in[1], kC1); 85 const int d1 = MUL1(in[1]);
87 STORE2(0, a + d4, d1, c1); 86 STORE2(0, a + d4, d1, c1);
88 STORE2(1, a + c4, d1, c1); 87 STORE2(1, a + c4, d1, c1);
89 STORE2(2, a - c4, d1, c1); 88 STORE2(2, a - c4, d1, c1);
90 STORE2(3, a - d4, d1, c1); 89 STORE2(3, a - d4, d1, c1);
91 } 90 }
92 #undef MUL 91 #undef MUL1
92 #undef MUL2
93 #undef STORE2 93 #undef STORE2
94 94
95 static void TransformTwo(const int16_t* in, uint8_t* dst, int do_two) { 95 static void TransformTwo(const int16_t* in, uint8_t* dst, int do_two) {
96 TransformOne(in, dst); 96 TransformOne(in, dst);
97 if (do_two) { 97 if (do_two) {
98 TransformOne(in + 16, dst + 4); 98 TransformOne(in + 16, dst + 4);
99 } 99 }
100 } 100 }
101 101
102 static void TransformUV(const int16_t* in, uint8_t* dst) { 102 static void TransformUV(const int16_t* in, uint8_t* dst) {
103 VP8Transform(in + 0 * 16, dst, 1); 103 VP8Transform(in + 0 * 16, dst, 1);
104 VP8Transform(in + 2 * 16, dst + 4 * BPS, 1); 104 VP8Transform(in + 2 * 16, dst + 4 * BPS, 1);
105 } 105 }
106 106
107 static void TransformDC(const int16_t *in, uint8_t* dst) { 107 static void TransformDC(const int16_t* in, uint8_t* dst) {
108 const int DC = in[0] + 4; 108 const int DC = in[0] + 4;
109 int i, j; 109 int i, j;
110 for (j = 0; j < 4; ++j) { 110 for (j = 0; j < 4; ++j) {
111 for (i = 0; i < 4; ++i) { 111 for (i = 0; i < 4; ++i) {
112 STORE(i, j, DC); 112 STORE(i, j, DC);
113 } 113 }
114 } 114 }
115 } 115 }
116 116
117 static void TransformDCUV(const int16_t* in, uint8_t* dst) { 117 static void TransformDCUV(const int16_t* in, uint8_t* dst) {
(...skipping 35 matching lines...) Expand 10 before | Expand all | Expand 10 after
153 } 153 }
154 } 154 }
155 155
156 void (*VP8TransformWHT)(const int16_t* in, int16_t* out); 156 void (*VP8TransformWHT)(const int16_t* in, int16_t* out);
157 157
158 //------------------------------------------------------------------------------ 158 //------------------------------------------------------------------------------
159 // Intra predictions 159 // Intra predictions
160 160
161 #define DST(x, y) dst[(x) + (y) * BPS] 161 #define DST(x, y) dst[(x) + (y) * BPS]
162 162
163 static WEBP_INLINE void TrueMotion(uint8_t *dst, int size) { 163 static WEBP_INLINE void TrueMotion(uint8_t* dst, int size) {
164 const uint8_t* top = dst - BPS; 164 const uint8_t* top = dst - BPS;
165 const uint8_t* const clip0 = VP8kclip1 - top[-1]; 165 const uint8_t* const clip0 = VP8kclip1 - top[-1];
166 int y; 166 int y;
167 for (y = 0; y < size; ++y) { 167 for (y = 0; y < size; ++y) {
168 const uint8_t* const clip = clip0 + dst[-1]; 168 const uint8_t* const clip = clip0 + dst[-1];
169 int x; 169 int x;
170 for (x = 0; x < size; ++x) { 170 for (x = 0; x < size; ++x) {
171 dst[x] = clip[top[x]]; 171 dst[x] = clip[top[x]];
172 } 172 }
173 dst += BPS; 173 dst += BPS;
174 } 174 }
175 } 175 }
176 static void TM4(uint8_t *dst) { TrueMotion(dst, 4); } 176 static void TM4(uint8_t* dst) { TrueMotion(dst, 4); }
177 static void TM8uv(uint8_t *dst) { TrueMotion(dst, 8); } 177 static void TM8uv(uint8_t* dst) { TrueMotion(dst, 8); }
178 static void TM16(uint8_t *dst) { TrueMotion(dst, 16); } 178 static void TM16(uint8_t* dst) { TrueMotion(dst, 16); }
179 179
180 //------------------------------------------------------------------------------ 180 //------------------------------------------------------------------------------
181 // 16x16 181 // 16x16
182 182
183 static void VE16(uint8_t *dst) { // vertical 183 static void VE16(uint8_t* dst) { // vertical
184 int j; 184 int j;
185 for (j = 0; j < 16; ++j) { 185 for (j = 0; j < 16; ++j) {
186 memcpy(dst + j * BPS, dst - BPS, 16); 186 memcpy(dst + j * BPS, dst - BPS, 16);
187 } 187 }
188 } 188 }
189 189
190 static void HE16(uint8_t *dst) { // horizontal 190 static void HE16(uint8_t* dst) { // horizontal
191 int j; 191 int j;
192 for (j = 16; j > 0; --j) { 192 for (j = 16; j > 0; --j) {
193 memset(dst, dst[-1], 16); 193 memset(dst, dst[-1], 16);
194 dst += BPS; 194 dst += BPS;
195 } 195 }
196 } 196 }
197 197
198 static WEBP_INLINE void Put16(int v, uint8_t* dst) { 198 static WEBP_INLINE void Put16(int v, uint8_t* dst) {
199 int j; 199 int j;
200 for (j = 0; j < 16; ++j) { 200 for (j = 0; j < 16; ++j) {
201 memset(dst + j * BPS, v, 16); 201 memset(dst + j * BPS, v, 16);
202 } 202 }
203 } 203 }
204 204
205 static void DC16(uint8_t *dst) { // DC 205 static void DC16(uint8_t* dst) { // DC
206 int DC = 16; 206 int DC = 16;
207 int j; 207 int j;
208 for (j = 0; j < 16; ++j) { 208 for (j = 0; j < 16; ++j) {
209 DC += dst[-1 + j * BPS] + dst[j - BPS]; 209 DC += dst[-1 + j * BPS] + dst[j - BPS];
210 } 210 }
211 Put16(DC >> 5, dst); 211 Put16(DC >> 5, dst);
212 } 212 }
213 213
214 static void DC16NoTop(uint8_t *dst) { // DC with top samples not available 214 static void DC16NoTop(uint8_t* dst) { // DC with top samples not available
215 int DC = 8; 215 int DC = 8;
216 int j; 216 int j;
217 for (j = 0; j < 16; ++j) { 217 for (j = 0; j < 16; ++j) {
218 DC += dst[-1 + j * BPS]; 218 DC += dst[-1 + j * BPS];
219 } 219 }
220 Put16(DC >> 4, dst); 220 Put16(DC >> 4, dst);
221 } 221 }
222 222
223 static void DC16NoLeft(uint8_t *dst) { // DC with left samples not available 223 static void DC16NoLeft(uint8_t* dst) { // DC with left samples not available
224 int DC = 8; 224 int DC = 8;
225 int i; 225 int i;
226 for (i = 0; i < 16; ++i) { 226 for (i = 0; i < 16; ++i) {
227 DC += dst[i - BPS]; 227 DC += dst[i - BPS];
228 } 228 }
229 Put16(DC >> 4, dst); 229 Put16(DC >> 4, dst);
230 } 230 }
231 231
232 static void DC16NoTopLeft(uint8_t *dst) { // DC with no top and left samples 232 static void DC16NoTopLeft(uint8_t* dst) { // DC with no top and left samples
233 Put16(0x80, dst); 233 Put16(0x80, dst);
234 } 234 }
235 235
236 VP8PredFunc VP8PredLuma16[NUM_B_DC_MODES];
237
236 //------------------------------------------------------------------------------ 238 //------------------------------------------------------------------------------
237 // 4x4 239 // 4x4
238 240
239 #define AVG3(a, b, c) (((a) + 2 * (b) + (c) + 2) >> 2) 241 #define AVG3(a, b, c) (((a) + 2 * (b) + (c) + 2) >> 2)
240 #define AVG2(a, b) (((a) + (b) + 1) >> 1) 242 #define AVG2(a, b) (((a) + (b) + 1) >> 1)
241 243
242 static void VE4(uint8_t *dst) { // vertical 244 static void VE4(uint8_t* dst) { // vertical
243 const uint8_t* top = dst - BPS; 245 const uint8_t* top = dst - BPS;
244 const uint8_t vals[4] = { 246 const uint8_t vals[4] = {
245 AVG3(top[-1], top[0], top[1]), 247 AVG3(top[-1], top[0], top[1]),
246 AVG3(top[ 0], top[1], top[2]), 248 AVG3(top[ 0], top[1], top[2]),
247 AVG3(top[ 1], top[2], top[3]), 249 AVG3(top[ 1], top[2], top[3]),
248 AVG3(top[ 2], top[3], top[4]) 250 AVG3(top[ 2], top[3], top[4])
249 }; 251 };
250 int i; 252 int i;
251 for (i = 0; i < 4; ++i) { 253 for (i = 0; i < 4; ++i) {
252 memcpy(dst + i * BPS, vals, sizeof(vals)); 254 memcpy(dst + i * BPS, vals, sizeof(vals));
253 } 255 }
254 } 256 }
255 257
256 static void HE4(uint8_t *dst) { // horizontal 258 static void HE4(uint8_t* dst) { // horizontal
257 const int A = dst[-1 - BPS]; 259 const int A = dst[-1 - BPS];
258 const int B = dst[-1]; 260 const int B = dst[-1];
259 const int C = dst[-1 + BPS]; 261 const int C = dst[-1 + BPS];
260 const int D = dst[-1 + 2 * BPS]; 262 const int D = dst[-1 + 2 * BPS];
261 const int E = dst[-1 + 3 * BPS]; 263 const int E = dst[-1 + 3 * BPS];
262 *(uint32_t*)(dst + 0 * BPS) = 0x01010101U * AVG3(A, B, C); 264 WebPUint32ToMem(dst + 0 * BPS, 0x01010101U * AVG3(A, B, C));
263 *(uint32_t*)(dst + 1 * BPS) = 0x01010101U * AVG3(B, C, D); 265 WebPUint32ToMem(dst + 1 * BPS, 0x01010101U * AVG3(B, C, D));
264 *(uint32_t*)(dst + 2 * BPS) = 0x01010101U * AVG3(C, D, E); 266 WebPUint32ToMem(dst + 2 * BPS, 0x01010101U * AVG3(C, D, E));
265 *(uint32_t*)(dst + 3 * BPS) = 0x01010101U * AVG3(D, E, E); 267 WebPUint32ToMem(dst + 3 * BPS, 0x01010101U * AVG3(D, E, E));
266 } 268 }
267 269
268 static void DC4(uint8_t *dst) { // DC 270 static void DC4(uint8_t* dst) { // DC
269 uint32_t dc = 4; 271 uint32_t dc = 4;
270 int i; 272 int i;
271 for (i = 0; i < 4; ++i) dc += dst[i - BPS] + dst[-1 + i * BPS]; 273 for (i = 0; i < 4; ++i) dc += dst[i - BPS] + dst[-1 + i * BPS];
272 dc >>= 3; 274 dc >>= 3;
273 for (i = 0; i < 4; ++i) memset(dst + i * BPS, dc, 4); 275 for (i = 0; i < 4; ++i) memset(dst + i * BPS, dc, 4);
274 } 276 }
275 277
276 static void RD4(uint8_t *dst) { // Down-right 278 static void RD4(uint8_t* dst) { // Down-right
277 const int I = dst[-1 + 0 * BPS]; 279 const int I = dst[-1 + 0 * BPS];
278 const int J = dst[-1 + 1 * BPS]; 280 const int J = dst[-1 + 1 * BPS];
279 const int K = dst[-1 + 2 * BPS]; 281 const int K = dst[-1 + 2 * BPS];
280 const int L = dst[-1 + 3 * BPS]; 282 const int L = dst[-1 + 3 * BPS];
281 const int X = dst[-1 - BPS]; 283 const int X = dst[-1 - BPS];
282 const int A = dst[0 - BPS]; 284 const int A = dst[0 - BPS];
283 const int B = dst[1 - BPS]; 285 const int B = dst[1 - BPS];
284 const int C = dst[2 - BPS]; 286 const int C = dst[2 - BPS];
285 const int D = dst[3 - BPS]; 287 const int D = dst[3 - BPS];
286 DST(0, 3) = AVG3(J, K, L); 288 DST(0, 3) = AVG3(J, K, L);
287 DST(0, 2) = DST(1, 3) = AVG3(I, J, K); 289 DST(1, 3) = DST(0, 2) = AVG3(I, J, K);
288 DST(0, 1) = DST(1, 2) = DST(2, 3) = AVG3(X, I, J); 290 DST(2, 3) = DST(1, 2) = DST(0, 1) = AVG3(X, I, J);
289 DST(0, 0) = DST(1, 1) = DST(2, 2) = DST(3, 3) = AVG3(A, X, I); 291 DST(3, 3) = DST(2, 2) = DST(1, 1) = DST(0, 0) = AVG3(A, X, I);
290 DST(1, 0) = DST(2, 1) = DST(3, 2) = AVG3(B, A, X); 292 DST(3, 2) = DST(2, 1) = DST(1, 0) = AVG3(B, A, X);
291 DST(2, 0) = DST(3, 1) = AVG3(C, B, A); 293 DST(3, 1) = DST(2, 0) = AVG3(C, B, A);
292 DST(3, 0) = AVG3(D, C, B); 294 DST(3, 0) = AVG3(D, C, B);
293 } 295 }
294 296
295 static void LD4(uint8_t *dst) { // Down-Left 297 static void LD4(uint8_t* dst) { // Down-Left
296 const int A = dst[0 - BPS]; 298 const int A = dst[0 - BPS];
297 const int B = dst[1 - BPS]; 299 const int B = dst[1 - BPS];
298 const int C = dst[2 - BPS]; 300 const int C = dst[2 - BPS];
299 const int D = dst[3 - BPS]; 301 const int D = dst[3 - BPS];
300 const int E = dst[4 - BPS]; 302 const int E = dst[4 - BPS];
301 const int F = dst[5 - BPS]; 303 const int F = dst[5 - BPS];
302 const int G = dst[6 - BPS]; 304 const int G = dst[6 - BPS];
303 const int H = dst[7 - BPS]; 305 const int H = dst[7 - BPS];
304 DST(0, 0) = AVG3(A, B, C); 306 DST(0, 0) = AVG3(A, B, C);
305 DST(1, 0) = DST(0, 1) = AVG3(B, C, D); 307 DST(1, 0) = DST(0, 1) = AVG3(B, C, D);
306 DST(2, 0) = DST(1, 1) = DST(0, 2) = AVG3(C, D, E); 308 DST(2, 0) = DST(1, 1) = DST(0, 2) = AVG3(C, D, E);
307 DST(3, 0) = DST(2, 1) = DST(1, 2) = DST(0, 3) = AVG3(D, E, F); 309 DST(3, 0) = DST(2, 1) = DST(1, 2) = DST(0, 3) = AVG3(D, E, F);
308 DST(3, 1) = DST(2, 2) = DST(1, 3) = AVG3(E, F, G); 310 DST(3, 1) = DST(2, 2) = DST(1, 3) = AVG3(E, F, G);
309 DST(3, 2) = DST(2, 3) = AVG3(F, G, H); 311 DST(3, 2) = DST(2, 3) = AVG3(F, G, H);
310 DST(3, 3) = AVG3(G, H, H); 312 DST(3, 3) = AVG3(G, H, H);
311 } 313 }
312 314
313 static void VR4(uint8_t *dst) { // Vertical-Right 315 static void VR4(uint8_t* dst) { // Vertical-Right
314 const int I = dst[-1 + 0 * BPS]; 316 const int I = dst[-1 + 0 * BPS];
315 const int J = dst[-1 + 1 * BPS]; 317 const int J = dst[-1 + 1 * BPS];
316 const int K = dst[-1 + 2 * BPS]; 318 const int K = dst[-1 + 2 * BPS];
317 const int X = dst[-1 - BPS]; 319 const int X = dst[-1 - BPS];
318 const int A = dst[0 - BPS]; 320 const int A = dst[0 - BPS];
319 const int B = dst[1 - BPS]; 321 const int B = dst[1 - BPS];
320 const int C = dst[2 - BPS]; 322 const int C = dst[2 - BPS];
321 const int D = dst[3 - BPS]; 323 const int D = dst[3 - BPS];
322 DST(0, 0) = DST(1, 2) = AVG2(X, A); 324 DST(0, 0) = DST(1, 2) = AVG2(X, A);
323 DST(1, 0) = DST(2, 2) = AVG2(A, B); 325 DST(1, 0) = DST(2, 2) = AVG2(A, B);
324 DST(2, 0) = DST(3, 2) = AVG2(B, C); 326 DST(2, 0) = DST(3, 2) = AVG2(B, C);
325 DST(3, 0) = AVG2(C, D); 327 DST(3, 0) = AVG2(C, D);
326 328
327 DST(0, 3) = AVG3(K, J, I); 329 DST(0, 3) = AVG3(K, J, I);
328 DST(0, 2) = AVG3(J, I, X); 330 DST(0, 2) = AVG3(J, I, X);
329 DST(0, 1) = DST(1, 3) = AVG3(I, X, A); 331 DST(0, 1) = DST(1, 3) = AVG3(I, X, A);
330 DST(1, 1) = DST(2, 3) = AVG3(X, A, B); 332 DST(1, 1) = DST(2, 3) = AVG3(X, A, B);
331 DST(2, 1) = DST(3, 3) = AVG3(A, B, C); 333 DST(2, 1) = DST(3, 3) = AVG3(A, B, C);
332 DST(3, 1) = AVG3(B, C, D); 334 DST(3, 1) = AVG3(B, C, D);
333 } 335 }
334 336
335 static void VL4(uint8_t *dst) { // Vertical-Left 337 static void VL4(uint8_t* dst) { // Vertical-Left
336 const int A = dst[0 - BPS]; 338 const int A = dst[0 - BPS];
337 const int B = dst[1 - BPS]; 339 const int B = dst[1 - BPS];
338 const int C = dst[2 - BPS]; 340 const int C = dst[2 - BPS];
339 const int D = dst[3 - BPS]; 341 const int D = dst[3 - BPS];
340 const int E = dst[4 - BPS]; 342 const int E = dst[4 - BPS];
341 const int F = dst[5 - BPS]; 343 const int F = dst[5 - BPS];
342 const int G = dst[6 - BPS]; 344 const int G = dst[6 - BPS];
343 const int H = dst[7 - BPS]; 345 const int H = dst[7 - BPS];
344 DST(0, 0) = AVG2(A, B); 346 DST(0, 0) = AVG2(A, B);
345 DST(1, 0) = DST(0, 2) = AVG2(B, C); 347 DST(1, 0) = DST(0, 2) = AVG2(B, C);
346 DST(2, 0) = DST(1, 2) = AVG2(C, D); 348 DST(2, 0) = DST(1, 2) = AVG2(C, D);
347 DST(3, 0) = DST(2, 2) = AVG2(D, E); 349 DST(3, 0) = DST(2, 2) = AVG2(D, E);
348 350
349 DST(0, 1) = AVG3(A, B, C); 351 DST(0, 1) = AVG3(A, B, C);
350 DST(1, 1) = DST(0, 3) = AVG3(B, C, D); 352 DST(1, 1) = DST(0, 3) = AVG3(B, C, D);
351 DST(2, 1) = DST(1, 3) = AVG3(C, D, E); 353 DST(2, 1) = DST(1, 3) = AVG3(C, D, E);
352 DST(3, 1) = DST(2, 3) = AVG3(D, E, F); 354 DST(3, 1) = DST(2, 3) = AVG3(D, E, F);
353 DST(3, 2) = AVG3(E, F, G); 355 DST(3, 2) = AVG3(E, F, G);
354 DST(3, 3) = AVG3(F, G, H); 356 DST(3, 3) = AVG3(F, G, H);
355 } 357 }
356 358
357 static void HU4(uint8_t *dst) { // Horizontal-Up 359 static void HU4(uint8_t* dst) { // Horizontal-Up
358 const int I = dst[-1 + 0 * BPS]; 360 const int I = dst[-1 + 0 * BPS];
359 const int J = dst[-1 + 1 * BPS]; 361 const int J = dst[-1 + 1 * BPS];
360 const int K = dst[-1 + 2 * BPS]; 362 const int K = dst[-1 + 2 * BPS];
361 const int L = dst[-1 + 3 * BPS]; 363 const int L = dst[-1 + 3 * BPS];
362 DST(0, 0) = AVG2(I, J); 364 DST(0, 0) = AVG2(I, J);
363 DST(2, 0) = DST(0, 1) = AVG2(J, K); 365 DST(2, 0) = DST(0, 1) = AVG2(J, K);
364 DST(2, 1) = DST(0, 2) = AVG2(K, L); 366 DST(2, 1) = DST(0, 2) = AVG2(K, L);
365 DST(1, 0) = AVG3(I, J, K); 367 DST(1, 0) = AVG3(I, J, K);
366 DST(3, 0) = DST(1, 1) = AVG3(J, K, L); 368 DST(3, 0) = DST(1, 1) = AVG3(J, K, L);
367 DST(3, 1) = DST(1, 2) = AVG3(K, L, L); 369 DST(3, 1) = DST(1, 2) = AVG3(K, L, L);
368 DST(3, 2) = DST(2, 2) = 370 DST(3, 2) = DST(2, 2) =
369 DST(0, 3) = DST(1, 3) = DST(2, 3) = DST(3, 3) = L; 371 DST(0, 3) = DST(1, 3) = DST(2, 3) = DST(3, 3) = L;
370 } 372 }
371 373
372 static void HD4(uint8_t *dst) { // Horizontal-Down 374 static void HD4(uint8_t* dst) { // Horizontal-Down
373 const int I = dst[-1 + 0 * BPS]; 375 const int I = dst[-1 + 0 * BPS];
374 const int J = dst[-1 + 1 * BPS]; 376 const int J = dst[-1 + 1 * BPS];
375 const int K = dst[-1 + 2 * BPS]; 377 const int K = dst[-1 + 2 * BPS];
376 const int L = dst[-1 + 3 * BPS]; 378 const int L = dst[-1 + 3 * BPS];
377 const int X = dst[-1 - BPS]; 379 const int X = dst[-1 - BPS];
378 const int A = dst[0 - BPS]; 380 const int A = dst[0 - BPS];
379 const int B = dst[1 - BPS]; 381 const int B = dst[1 - BPS];
380 const int C = dst[2 - BPS]; 382 const int C = dst[2 - BPS];
381 383
382 DST(0, 0) = DST(2, 1) = AVG2(I, X); 384 DST(0, 0) = DST(2, 1) = AVG2(I, X);
383 DST(0, 1) = DST(2, 2) = AVG2(J, I); 385 DST(0, 1) = DST(2, 2) = AVG2(J, I);
384 DST(0, 2) = DST(2, 3) = AVG2(K, J); 386 DST(0, 2) = DST(2, 3) = AVG2(K, J);
385 DST(0, 3) = AVG2(L, K); 387 DST(0, 3) = AVG2(L, K);
386 388
387 DST(3, 0) = AVG3(A, B, C); 389 DST(3, 0) = AVG3(A, B, C);
388 DST(2, 0) = AVG3(X, A, B); 390 DST(2, 0) = AVG3(X, A, B);
389 DST(1, 0) = DST(3, 1) = AVG3(I, X, A); 391 DST(1, 0) = DST(3, 1) = AVG3(I, X, A);
390 DST(1, 1) = DST(3, 2) = AVG3(J, I, X); 392 DST(1, 1) = DST(3, 2) = AVG3(J, I, X);
391 DST(1, 2) = DST(3, 3) = AVG3(K, J, I); 393 DST(1, 2) = DST(3, 3) = AVG3(K, J, I);
392 DST(1, 3) = AVG3(L, K, J); 394 DST(1, 3) = AVG3(L, K, J);
393 } 395 }
394 396
395 #undef DST 397 #undef DST
396 #undef AVG3 398 #undef AVG3
397 #undef AVG2 399 #undef AVG2
398 400
401 VP8PredFunc VP8PredLuma4[NUM_BMODES];
402
399 //------------------------------------------------------------------------------ 403 //------------------------------------------------------------------------------
400 // Chroma 404 // Chroma
401 405
402 static void VE8uv(uint8_t *dst) { // vertical 406 static void VE8uv(uint8_t* dst) { // vertical
403 int j; 407 int j;
404 for (j = 0; j < 8; ++j) { 408 for (j = 0; j < 8; ++j) {
405 memcpy(dst + j * BPS, dst - BPS, 8); 409 memcpy(dst + j * BPS, dst - BPS, 8);
406 } 410 }
407 } 411 }
408 412
409 static void HE8uv(uint8_t *dst) { // horizontal 413 static void HE8uv(uint8_t* dst) { // horizontal
410 int j; 414 int j;
411 for (j = 0; j < 8; ++j) { 415 for (j = 0; j < 8; ++j) {
412 memset(dst, dst[-1], 8); 416 memset(dst, dst[-1], 8);
413 dst += BPS; 417 dst += BPS;
414 } 418 }
415 } 419 }
416 420
417 // helper for chroma-DC predictions 421 // helper for chroma-DC predictions
418 static WEBP_INLINE void Put8x8uv(uint8_t value, uint8_t* dst) { 422 static WEBP_INLINE void Put8x8uv(uint8_t value, uint8_t* dst) {
419 int j; 423 int j;
420 for (j = 0; j < 8; ++j) { 424 for (j = 0; j < 8; ++j) {
421 memset(dst + j * BPS, value, 8); 425 memset(dst + j * BPS, value, 8);
422 } 426 }
423 } 427 }
424 428
425 static void DC8uv(uint8_t *dst) { // DC 429 static void DC8uv(uint8_t* dst) { // DC
426 int dc0 = 8; 430 int dc0 = 8;
427 int i; 431 int i;
428 for (i = 0; i < 8; ++i) { 432 for (i = 0; i < 8; ++i) {
429 dc0 += dst[i - BPS] + dst[-1 + i * BPS]; 433 dc0 += dst[i - BPS] + dst[-1 + i * BPS];
430 } 434 }
431 Put8x8uv(dc0 >> 4, dst); 435 Put8x8uv(dc0 >> 4, dst);
432 } 436 }
433 437
434 static void DC8uvNoLeft(uint8_t *dst) { // DC with no left samples 438 static void DC8uvNoLeft(uint8_t* dst) { // DC with no left samples
435 int dc0 = 4; 439 int dc0 = 4;
436 int i; 440 int i;
437 for (i = 0; i < 8; ++i) { 441 for (i = 0; i < 8; ++i) {
438 dc0 += dst[i - BPS]; 442 dc0 += dst[i - BPS];
439 } 443 }
440 Put8x8uv(dc0 >> 3, dst); 444 Put8x8uv(dc0 >> 3, dst);
441 } 445 }
442 446
443 static void DC8uvNoTop(uint8_t *dst) { // DC with no top samples 447 static void DC8uvNoTop(uint8_t* dst) { // DC with no top samples
444 int dc0 = 4; 448 int dc0 = 4;
445 int i; 449 int i;
446 for (i = 0; i < 8; ++i) { 450 for (i = 0; i < 8; ++i) {
447 dc0 += dst[-1 + i * BPS]; 451 dc0 += dst[-1 + i * BPS];
448 } 452 }
449 Put8x8uv(dc0 >> 3, dst); 453 Put8x8uv(dc0 >> 3, dst);
450 } 454 }
451 455
452 static void DC8uvNoTopLeft(uint8_t *dst) { // DC with nothing 456 static void DC8uvNoTopLeft(uint8_t* dst) { // DC with nothing
453 Put8x8uv(0x80, dst); 457 Put8x8uv(0x80, dst);
454 } 458 }
455 459
456 //------------------------------------------------------------------------------ 460 VP8PredFunc VP8PredChroma8[NUM_B_DC_MODES];
457 // default C implementations
458
459 const VP8PredFunc VP8PredLuma4[NUM_BMODES] = {
460 DC4, TM4, VE4, HE4, RD4, VR4, LD4, VL4, HD4, HU4
461 };
462
463 const VP8PredFunc VP8PredLuma16[NUM_B_DC_MODES] = {
464 DC16, TM16, VE16, HE16,
465 DC16NoTop, DC16NoLeft, DC16NoTopLeft
466 };
467
468 const VP8PredFunc VP8PredChroma8[NUM_B_DC_MODES] = {
469 DC8uv, TM8uv, VE8uv, HE8uv,
470 DC8uvNoTop, DC8uvNoLeft, DC8uvNoTopLeft
471 };
472 461
473 //------------------------------------------------------------------------------ 462 //------------------------------------------------------------------------------
474 // Edge filtering functions 463 // Edge filtering functions
475 464
476 // 4 pixels in, 2 pixels out 465 // 4 pixels in, 2 pixels out
477 static WEBP_INLINE void do_filter2(uint8_t* p, int step) { 466 static WEBP_INLINE void do_filter2(uint8_t* p, int step) {
478 const int p1 = p[-2*step], p0 = p[-step], q0 = p[0], q1 = p[step]; 467 const int p1 = p[-2*step], p0 = p[-step], q0 = p[0], q1 = p[step];
479 const int a = 3 * (q0 - p0) + VP8ksclip1[p1 - q1]; // in [-893,892] 468 const int a = 3 * (q0 - p0) + VP8ksclip1[p1 - q1]; // in [-893,892]
480 const int a1 = VP8ksclip2[(a + 4) >> 3]; // in [-16,15] 469 const int a1 = VP8ksclip2[(a + 4) >> 3]; // in [-16,15]
481 const int a2 = VP8ksclip2[(a + 3) >> 3]; 470 const int a2 = VP8ksclip2[(a + 3) >> 3];
(...skipping 196 matching lines...) Expand 10 before | Expand all | Expand 10 after
678 VP8LumaFilterFunc VP8VFilter16i; 667 VP8LumaFilterFunc VP8VFilter16i;
679 VP8LumaFilterFunc VP8HFilter16i; 668 VP8LumaFilterFunc VP8HFilter16i;
680 VP8ChromaFilterFunc VP8VFilter8i; 669 VP8ChromaFilterFunc VP8VFilter8i;
681 VP8ChromaFilterFunc VP8HFilter8i; 670 VP8ChromaFilterFunc VP8HFilter8i;
682 VP8SimpleFilterFunc VP8SimpleVFilter16; 671 VP8SimpleFilterFunc VP8SimpleVFilter16;
683 VP8SimpleFilterFunc VP8SimpleHFilter16; 672 VP8SimpleFilterFunc VP8SimpleHFilter16;
684 VP8SimpleFilterFunc VP8SimpleVFilter16i; 673 VP8SimpleFilterFunc VP8SimpleVFilter16i;
685 VP8SimpleFilterFunc VP8SimpleHFilter16i; 674 VP8SimpleFilterFunc VP8SimpleHFilter16i;
686 675
687 extern void VP8DspInitSSE2(void); 676 extern void VP8DspInitSSE2(void);
677 extern void VP8DspInitSSE41(void);
688 extern void VP8DspInitNEON(void); 678 extern void VP8DspInitNEON(void);
689 extern void VP8DspInitMIPS32(void); 679 extern void VP8DspInitMIPS32(void);
680 extern void VP8DspInitMIPSdspR2(void);
690 681
691 static volatile VP8CPUInfo dec_last_cpuinfo_used = 682 static volatile VP8CPUInfo dec_last_cpuinfo_used =
692 (VP8CPUInfo)&dec_last_cpuinfo_used; 683 (VP8CPUInfo)&dec_last_cpuinfo_used;
693 684
694 void VP8DspInit(void) { 685 WEBP_TSAN_IGNORE_FUNCTION void VP8DspInit(void) {
695 if (dec_last_cpuinfo_used == VP8GetCPUInfo) return; 686 if (dec_last_cpuinfo_used == VP8GetCPUInfo) return;
696 687
697 VP8InitClipTables(); 688 VP8InitClipTables();
698 689
699 VP8TransformWHT = TransformWHT; 690 VP8TransformWHT = TransformWHT;
700 VP8Transform = TransformTwo; 691 VP8Transform = TransformTwo;
701 VP8TransformUV = TransformUV; 692 VP8TransformUV = TransformUV;
702 VP8TransformDC = TransformDC; 693 VP8TransformDC = TransformDC;
703 VP8TransformDCUV = TransformDCUV; 694 VP8TransformDCUV = TransformDCUV;
704 VP8TransformAC3 = TransformAC3; 695 VP8TransformAC3 = TransformAC3;
705 696
706 VP8VFilter16 = VFilter16; 697 VP8VFilter16 = VFilter16;
707 VP8HFilter16 = HFilter16; 698 VP8HFilter16 = HFilter16;
708 VP8VFilter8 = VFilter8; 699 VP8VFilter8 = VFilter8;
709 VP8HFilter8 = HFilter8; 700 VP8HFilter8 = HFilter8;
710 VP8VFilter16i = VFilter16i; 701 VP8VFilter16i = VFilter16i;
711 VP8HFilter16i = HFilter16i; 702 VP8HFilter16i = HFilter16i;
712 VP8VFilter8i = VFilter8i; 703 VP8VFilter8i = VFilter8i;
713 VP8HFilter8i = HFilter8i; 704 VP8HFilter8i = HFilter8i;
714 VP8SimpleVFilter16 = SimpleVFilter16; 705 VP8SimpleVFilter16 = SimpleVFilter16;
715 VP8SimpleHFilter16 = SimpleHFilter16; 706 VP8SimpleHFilter16 = SimpleHFilter16;
716 VP8SimpleVFilter16i = SimpleVFilter16i; 707 VP8SimpleVFilter16i = SimpleVFilter16i;
717 VP8SimpleHFilter16i = SimpleHFilter16i; 708 VP8SimpleHFilter16i = SimpleHFilter16i;
718 709
710 VP8PredLuma4[0] = DC4;
711 VP8PredLuma4[1] = TM4;
712 VP8PredLuma4[2] = VE4;
713 VP8PredLuma4[3] = HE4;
714 VP8PredLuma4[4] = RD4;
715 VP8PredLuma4[5] = VR4;
716 VP8PredLuma4[6] = LD4;
717 VP8PredLuma4[7] = VL4;
718 VP8PredLuma4[8] = HD4;
719 VP8PredLuma4[9] = HU4;
720
721 VP8PredLuma16[0] = DC16;
722 VP8PredLuma16[1] = TM16;
723 VP8PredLuma16[2] = VE16;
724 VP8PredLuma16[3] = HE16;
725 VP8PredLuma16[4] = DC16NoTop;
726 VP8PredLuma16[5] = DC16NoLeft;
727 VP8PredLuma16[6] = DC16NoTopLeft;
728
729 VP8PredChroma8[0] = DC8uv;
730 VP8PredChroma8[1] = TM8uv;
731 VP8PredChroma8[2] = VE8uv;
732 VP8PredChroma8[3] = HE8uv;
733 VP8PredChroma8[4] = DC8uvNoTop;
734 VP8PredChroma8[5] = DC8uvNoLeft;
735 VP8PredChroma8[6] = DC8uvNoTopLeft;
736
719 // If defined, use CPUInfo() to overwrite some pointers with faster versions. 737 // If defined, use CPUInfo() to overwrite some pointers with faster versions.
720 if (VP8GetCPUInfo != NULL) { 738 if (VP8GetCPUInfo != NULL) {
721 #if defined(WEBP_USE_SSE2) 739 #if defined(WEBP_USE_SSE2)
722 if (VP8GetCPUInfo(kSSE2)) { 740 if (VP8GetCPUInfo(kSSE2)) {
723 VP8DspInitSSE2(); 741 VP8DspInitSSE2();
742 #if defined(WEBP_USE_SSE41)
743 if (VP8GetCPUInfo(kSSE4_1)) {
744 VP8DspInitSSE41();
745 }
746 #endif
724 } 747 }
725 #elif defined(WEBP_USE_NEON) 748 #endif
749 #if defined(WEBP_USE_NEON)
726 if (VP8GetCPUInfo(kNEON)) { 750 if (VP8GetCPUInfo(kNEON)) {
727 VP8DspInitNEON(); 751 VP8DspInitNEON();
728 } 752 }
729 #elif defined(WEBP_USE_MIPS32) 753 #endif
754 #if defined(WEBP_USE_MIPS32)
730 if (VP8GetCPUInfo(kMIPS32)) { 755 if (VP8GetCPUInfo(kMIPS32)) {
731 VP8DspInitMIPS32(); 756 VP8DspInitMIPS32();
732 } 757 }
733 #endif 758 #endif
759 #if defined(WEBP_USE_MIPS_DSP_R2)
760 if (VP8GetCPUInfo(kMIPSdspR2)) {
761 VP8DspInitMIPSdspR2();
762 }
763 #endif
734 } 764 }
735 dec_last_cpuinfo_used = VP8GetCPUInfo; 765 dec_last_cpuinfo_used = VP8GetCPUInfo;
736 } 766 }
OLDNEW

Powered by Google App Engine
This is Rietveld 408576698