OLD | NEW |
1 // Copyright 2011 Google Inc. All Rights Reserved. | 1 // Copyright 2011 Google Inc. All Rights Reserved. |
2 // | 2 // |
3 // Use of this source code is governed by a BSD-style license | 3 // Use of this source code is governed by a BSD-style license |
4 // that can be found in the COPYING file in the root of the source | 4 // that can be found in the COPYING file in the root of the source |
5 // tree. An additional intellectual property rights grant can be found | 5 // tree. An additional intellectual property rights grant can be found |
6 // in the file PATENTS. All contributing project authors may | 6 // in the file PATENTS. All contributing project authors may |
7 // be found in the AUTHORS file in the root of the source tree. | 7 // be found in the AUTHORS file in the root of the source tree. |
8 // ----------------------------------------------------------------------------- | 8 // ----------------------------------------------------------------------------- |
9 // | 9 // |
10 // Speed-critical encoding functions. | 10 // Speed-critical encoding functions. |
(...skipping 22 matching lines...) Expand all Loading... |
33 // Luma | 33 // Luma |
34 0 + 0 * BPS, 4 + 0 * BPS, 8 + 0 * BPS, 12 + 0 * BPS, | 34 0 + 0 * BPS, 4 + 0 * BPS, 8 + 0 * BPS, 12 + 0 * BPS, |
35 0 + 4 * BPS, 4 + 4 * BPS, 8 + 4 * BPS, 12 + 4 * BPS, | 35 0 + 4 * BPS, 4 + 4 * BPS, 8 + 4 * BPS, 12 + 4 * BPS, |
36 0 + 8 * BPS, 4 + 8 * BPS, 8 + 8 * BPS, 12 + 8 * BPS, | 36 0 + 8 * BPS, 4 + 8 * BPS, 8 + 8 * BPS, 12 + 8 * BPS, |
37 0 + 12 * BPS, 4 + 12 * BPS, 8 + 12 * BPS, 12 + 12 * BPS, | 37 0 + 12 * BPS, 4 + 12 * BPS, 8 + 12 * BPS, 12 + 12 * BPS, |
38 | 38 |
39 0 + 0 * BPS, 4 + 0 * BPS, 0 + 4 * BPS, 4 + 4 * BPS, // U | 39 0 + 0 * BPS, 4 + 0 * BPS, 0 + 4 * BPS, 4 + 4 * BPS, // U |
40 8 + 0 * BPS, 12 + 0 * BPS, 8 + 4 * BPS, 12 + 4 * BPS // V | 40 8 + 0 * BPS, 12 + 0 * BPS, 8 + 4 * BPS, 12 + 4 * BPS // V |
41 }; | 41 }; |
42 | 42 |
| 43 // general-purpose util function |
| 44 void VP8SetHistogramData(const int distribution[MAX_COEFF_THRESH + 1], |
| 45 VP8Histogram* const histo) { |
| 46 int max_value = 0, last_non_zero = 1; |
| 47 int k; |
| 48 for (k = 0; k <= MAX_COEFF_THRESH; ++k) { |
| 49 const int value = distribution[k]; |
| 50 if (value > 0) { |
| 51 if (value > max_value) max_value = value; |
| 52 last_non_zero = k; |
| 53 } |
| 54 } |
| 55 histo->max_value = max_value; |
| 56 histo->last_non_zero = last_non_zero; |
| 57 } |
| 58 |
43 static void CollectHistogram(const uint8_t* ref, const uint8_t* pred, | 59 static void CollectHistogram(const uint8_t* ref, const uint8_t* pred, |
44 int start_block, int end_block, | 60 int start_block, int end_block, |
45 VP8Histogram* const histo) { | 61 VP8Histogram* const histo) { |
46 int j; | 62 int j; |
| 63 int distribution[MAX_COEFF_THRESH + 1] = { 0 }; |
47 for (j = start_block; j < end_block; ++j) { | 64 for (j = start_block; j < end_block; ++j) { |
48 int k; | 65 int k; |
49 int16_t out[16]; | 66 int16_t out[16]; |
50 | 67 |
51 VP8FTransform(ref + VP8DspScan[j], pred + VP8DspScan[j], out); | 68 VP8FTransform(ref + VP8DspScan[j], pred + VP8DspScan[j], out); |
52 | 69 |
53 // Convert coefficients to bin. | 70 // Convert coefficients to bin. |
54 for (k = 0; k < 16; ++k) { | 71 for (k = 0; k < 16; ++k) { |
55 const int v = abs(out[k]) >> 3; // TODO(skal): add rounding? | 72 const int v = abs(out[k]) >> 3; // TODO(skal): add rounding? |
56 const int clipped_value = clip_max(v, MAX_COEFF_THRESH); | 73 const int clipped_value = clip_max(v, MAX_COEFF_THRESH); |
57 histo->distribution[clipped_value]++; | 74 ++distribution[clipped_value]; |
58 } | 75 } |
59 } | 76 } |
| 77 VP8SetHistogramData(distribution, histo); |
60 } | 78 } |
61 | 79 |
62 //------------------------------------------------------------------------------ | 80 //------------------------------------------------------------------------------ |
63 // run-time tables (~4k) | 81 // run-time tables (~4k) |
64 | 82 |
65 static uint8_t clip1[255 + 510 + 1]; // clips [-255,510] to [0,255] | 83 static uint8_t clip1[255 + 510 + 1]; // clips [-255,510] to [0,255] |
66 | 84 |
67 // We declare this variable 'volatile' to prevent instruction reordering | 85 // We declare this variable 'volatile' to prevent instruction reordering |
68 // and make sure it's set to true _last_ (so as to be thread-safe) | 86 // and make sure it's set to true _last_ (so as to be thread-safe) |
69 static volatile int tables_ok = 0; | 87 static volatile int tables_ok = 0; |
70 | 88 |
71 static void InitTables(void) { | 89 static WEBP_TSAN_IGNORE_FUNCTION void InitTables(void) { |
72 if (!tables_ok) { | 90 if (!tables_ok) { |
73 int i; | 91 int i; |
74 for (i = -255; i <= 255 + 255; ++i) { | 92 for (i = -255; i <= 255 + 255; ++i) { |
75 clip1[255 + i] = clip_8b(i); | 93 clip1[255 + i] = clip_8b(i); |
76 } | 94 } |
77 tables_ok = 1; | 95 tables_ok = 1; |
78 } | 96 } |
79 } | 97 } |
80 | 98 |
81 | 99 |
(...skipping 70 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
152 const int a1 = (tmp[4 + i] + tmp[ 8 + i]); | 170 const int a1 = (tmp[4 + i] + tmp[ 8 + i]); |
153 const int a2 = (tmp[4 + i] - tmp[ 8 + i]); | 171 const int a2 = (tmp[4 + i] - tmp[ 8 + i]); |
154 const int a3 = (tmp[0 + i] - tmp[12 + i]); | 172 const int a3 = (tmp[0 + i] - tmp[12 + i]); |
155 out[0 + i] = (a0 + a1 + 7) >> 4; // 12b | 173 out[0 + i] = (a0 + a1 + 7) >> 4; // 12b |
156 out[4 + i] = ((a2 * 2217 + a3 * 5352 + 12000) >> 16) + (a3 != 0); | 174 out[4 + i] = ((a2 * 2217 + a3 * 5352 + 12000) >> 16) + (a3 != 0); |
157 out[8 + i] = (a0 - a1 + 7) >> 4; | 175 out[8 + i] = (a0 - a1 + 7) >> 4; |
158 out[12+ i] = ((a3 * 2217 - a2 * 5352 + 51000) >> 16); | 176 out[12+ i] = ((a3 * 2217 - a2 * 5352 + 51000) >> 16); |
159 } | 177 } |
160 } | 178 } |
161 | 179 |
| 180 static void FTransform2(const uint8_t* src, const uint8_t* ref, int16_t* out) { |
| 181 VP8FTransform(src, ref, out); |
| 182 VP8FTransform(src + 4, ref + 4, out + 16); |
| 183 } |
| 184 |
162 static void FTransformWHT(const int16_t* in, int16_t* out) { | 185 static void FTransformWHT(const int16_t* in, int16_t* out) { |
163 // input is 12b signed | 186 // input is 12b signed |
164 int32_t tmp[16]; | 187 int32_t tmp[16]; |
165 int i; | 188 int i; |
166 for (i = 0; i < 4; ++i, in += 64) { | 189 for (i = 0; i < 4; ++i, in += 64) { |
167 const int a0 = (in[0 * 16] + in[2 * 16]); // 13b | 190 const int a0 = (in[0 * 16] + in[2 * 16]); // 13b |
168 const int a1 = (in[1 * 16] + in[3 * 16]); | 191 const int a1 = (in[1 * 16] + in[3 * 16]); |
169 const int a2 = (in[1 * 16] - in[3 * 16]); | 192 const int a2 = (in[1 * 16] - in[3 * 16]); |
170 const int a3 = (in[0 * 16] - in[2 * 16]); | 193 const int a3 = (in[0 * 16] - in[2 * 16]); |
171 tmp[0 + i * 4] = a0 + a1; // 14b | 194 tmp[0 + i * 4] = a0 + a1; // 14b |
(...skipping 16 matching lines...) Expand all Loading... |
188 out[12 + i] = b3 >> 1; | 211 out[12 + i] = b3 >> 1; |
189 } | 212 } |
190 } | 213 } |
191 | 214 |
192 #undef MUL | 215 #undef MUL |
193 #undef STORE | 216 #undef STORE |
194 | 217 |
195 //------------------------------------------------------------------------------ | 218 //------------------------------------------------------------------------------ |
196 // Intra predictions | 219 // Intra predictions |
197 | 220 |
198 #define DST(x, y) dst[(x) + (y) * BPS] | |
199 | |
200 static WEBP_INLINE void Fill(uint8_t* dst, int value, int size) { | 221 static WEBP_INLINE void Fill(uint8_t* dst, int value, int size) { |
201 int j; | 222 int j; |
202 for (j = 0; j < size; ++j) { | 223 for (j = 0; j < size; ++j) { |
203 memset(dst + j * BPS, value, size); | 224 memset(dst + j * BPS, value, size); |
204 } | 225 } |
205 } | 226 } |
206 | 227 |
207 static WEBP_INLINE void VerticalPred(uint8_t* dst, | 228 static WEBP_INLINE void VerticalPred(uint8_t* dst, |
208 const uint8_t* top, int size) { | 229 const uint8_t* top, int size) { |
209 int j; | 230 int j; |
210 if (top) { | 231 if (top != NULL) { |
211 for (j = 0; j < size; ++j) memcpy(dst + j * BPS, top, size); | 232 for (j = 0; j < size; ++j) memcpy(dst + j * BPS, top, size); |
212 } else { | 233 } else { |
213 Fill(dst, 127, size); | 234 Fill(dst, 127, size); |
214 } | 235 } |
215 } | 236 } |
216 | 237 |
217 static WEBP_INLINE void HorizontalPred(uint8_t* dst, | 238 static WEBP_INLINE void HorizontalPred(uint8_t* dst, |
218 const uint8_t* left, int size) { | 239 const uint8_t* left, int size) { |
219 if (left) { | 240 if (left != NULL) { |
220 int j; | 241 int j; |
221 for (j = 0; j < size; ++j) { | 242 for (j = 0; j < size; ++j) { |
222 memset(dst + j * BPS, left[j], size); | 243 memset(dst + j * BPS, left[j], size); |
223 } | 244 } |
224 } else { | 245 } else { |
225 Fill(dst, 129, size); | 246 Fill(dst, 129, size); |
226 } | 247 } |
227 } | 248 } |
228 | 249 |
229 static WEBP_INLINE void TrueMotion(uint8_t* dst, const uint8_t* left, | 250 static WEBP_INLINE void TrueMotion(uint8_t* dst, const uint8_t* left, |
230 const uint8_t* top, int size) { | 251 const uint8_t* top, int size) { |
231 int y; | 252 int y; |
232 if (left) { | 253 if (left != NULL) { |
233 if (top) { | 254 if (top != NULL) { |
234 const uint8_t* const clip = clip1 + 255 - left[-1]; | 255 const uint8_t* const clip = clip1 + 255 - left[-1]; |
235 for (y = 0; y < size; ++y) { | 256 for (y = 0; y < size; ++y) { |
236 const uint8_t* const clip_table = clip + left[y]; | 257 const uint8_t* const clip_table = clip + left[y]; |
237 int x; | 258 int x; |
238 for (x = 0; x < size; ++x) { | 259 for (x = 0; x < size; ++x) { |
239 dst[x] = clip_table[top[x]]; | 260 dst[x] = clip_table[top[x]]; |
240 } | 261 } |
241 dst += BPS; | 262 dst += BPS; |
242 } | 263 } |
243 } else { | 264 } else { |
244 HorizontalPred(dst, left, size); | 265 HorizontalPred(dst, left, size); |
245 } | 266 } |
246 } else { | 267 } else { |
247 // true motion without left samples (hence: with default 129 value) | 268 // true motion without left samples (hence: with default 129 value) |
248 // is equivalent to VE prediction where you just copy the top samples. | 269 // is equivalent to VE prediction where you just copy the top samples. |
249 // Note that if top samples are not available, the default value is | 270 // Note that if top samples are not available, the default value is |
250 // then 129, and not 127 as in the VerticalPred case. | 271 // then 129, and not 127 as in the VerticalPred case. |
251 if (top) { | 272 if (top != NULL) { |
252 VerticalPred(dst, top, size); | 273 VerticalPred(dst, top, size); |
253 } else { | 274 } else { |
254 Fill(dst, 129, size); | 275 Fill(dst, 129, size); |
255 } | 276 } |
256 } | 277 } |
257 } | 278 } |
258 | 279 |
259 static WEBP_INLINE void DCMode(uint8_t* dst, const uint8_t* left, | 280 static WEBP_INLINE void DCMode(uint8_t* dst, const uint8_t* left, |
260 const uint8_t* top, | 281 const uint8_t* top, |
261 int size, int round, int shift) { | 282 int size, int round, int shift) { |
262 int DC = 0; | 283 int DC = 0; |
263 int j; | 284 int j; |
264 if (top) { | 285 if (top != NULL) { |
265 for (j = 0; j < size; ++j) DC += top[j]; | 286 for (j = 0; j < size; ++j) DC += top[j]; |
266 if (left) { // top and left present | 287 if (left != NULL) { // top and left present |
267 for (j = 0; j < size; ++j) DC += left[j]; | 288 for (j = 0; j < size; ++j) DC += left[j]; |
268 } else { // top, but no left | 289 } else { // top, but no left |
269 DC += DC; | 290 DC += DC; |
270 } | 291 } |
271 DC = (DC + round) >> shift; | 292 DC = (DC + round) >> shift; |
272 } else if (left) { // left but no top | 293 } else if (left != NULL) { // left but no top |
273 for (j = 0; j < size; ++j) DC += left[j]; | 294 for (j = 0; j < size; ++j) DC += left[j]; |
274 DC += DC; | 295 DC += DC; |
275 DC = (DC + round) >> shift; | 296 DC = (DC + round) >> shift; |
276 } else { // no top, no left, nothing. | 297 } else { // no top, no left, nothing. |
277 DC = 0x80; | 298 DC = 0x80; |
278 } | 299 } |
279 Fill(dst, DC, size); | 300 Fill(dst, DC, size); |
280 } | 301 } |
281 | 302 |
282 //------------------------------------------------------------------------------ | 303 //------------------------------------------------------------------------------ |
283 // Chroma 8x8 prediction (paragraph 12.2) | 304 // Chroma 8x8 prediction (paragraph 12.2) |
284 | 305 |
285 static void IntraChromaPreds(uint8_t* dst, const uint8_t* left, | 306 static void IntraChromaPreds(uint8_t* dst, const uint8_t* left, |
286 const uint8_t* top) { | 307 const uint8_t* top) { |
287 // U block | 308 // U block |
288 DCMode(C8DC8 + dst, left, top, 8, 8, 4); | 309 DCMode(C8DC8 + dst, left, top, 8, 8, 4); |
289 VerticalPred(C8VE8 + dst, top, 8); | 310 VerticalPred(C8VE8 + dst, top, 8); |
290 HorizontalPred(C8HE8 + dst, left, 8); | 311 HorizontalPred(C8HE8 + dst, left, 8); |
291 TrueMotion(C8TM8 + dst, left, top, 8); | 312 TrueMotion(C8TM8 + dst, left, top, 8); |
292 // V block | 313 // V block |
293 dst += 8; | 314 dst += 8; |
294 if (top) top += 8; | 315 if (top != NULL) top += 8; |
295 if (left) left += 16; | 316 if (left != NULL) left += 16; |
296 DCMode(C8DC8 + dst, left, top, 8, 8, 4); | 317 DCMode(C8DC8 + dst, left, top, 8, 8, 4); |
297 VerticalPred(C8VE8 + dst, top, 8); | 318 VerticalPred(C8VE8 + dst, top, 8); |
298 HorizontalPred(C8HE8 + dst, left, 8); | 319 HorizontalPred(C8HE8 + dst, left, 8); |
299 TrueMotion(C8TM8 + dst, left, top, 8); | 320 TrueMotion(C8TM8 + dst, left, top, 8); |
300 } | 321 } |
301 | 322 |
302 //------------------------------------------------------------------------------ | 323 //------------------------------------------------------------------------------ |
303 // luma 16x16 prediction (paragraph 12.3) | 324 // luma 16x16 prediction (paragraph 12.3) |
304 | 325 |
305 static void Intra16Preds(uint8_t* dst, | 326 static void Intra16Preds(uint8_t* dst, |
306 const uint8_t* left, const uint8_t* top) { | 327 const uint8_t* left, const uint8_t* top) { |
307 DCMode(I16DC16 + dst, left, top, 16, 16, 5); | 328 DCMode(I16DC16 + dst, left, top, 16, 16, 5); |
308 VerticalPred(I16VE16 + dst, top, 16); | 329 VerticalPred(I16VE16 + dst, top, 16); |
309 HorizontalPred(I16HE16 + dst, left, 16); | 330 HorizontalPred(I16HE16 + dst, left, 16); |
310 TrueMotion(I16TM16 + dst, left, top, 16); | 331 TrueMotion(I16TM16 + dst, left, top, 16); |
311 } | 332 } |
312 | 333 |
313 //------------------------------------------------------------------------------ | 334 //------------------------------------------------------------------------------ |
314 // luma 4x4 prediction | 335 // luma 4x4 prediction |
315 | 336 |
| 337 #define DST(x, y) dst[(x) + (y) * BPS] |
316 #define AVG3(a, b, c) (((a) + 2 * (b) + (c) + 2) >> 2) | 338 #define AVG3(a, b, c) (((a) + 2 * (b) + (c) + 2) >> 2) |
317 #define AVG2(a, b) (((a) + (b) + 1) >> 1) | 339 #define AVG2(a, b) (((a) + (b) + 1) >> 1) |
318 | 340 |
319 static void VE4(uint8_t* dst, const uint8_t* top) { // vertical | 341 static void VE4(uint8_t* dst, const uint8_t* top) { // vertical |
320 const uint8_t vals[4] = { | 342 const uint8_t vals[4] = { |
321 AVG3(top[-1], top[0], top[1]), | 343 AVG3(top[-1], top[0], top[1]), |
322 AVG3(top[ 0], top[1], top[2]), | 344 AVG3(top[ 0], top[1], top[2]), |
323 AVG3(top[ 1], top[2], top[3]), | 345 AVG3(top[ 1], top[2], top[3]), |
324 AVG3(top[ 2], top[3], top[4]) | 346 AVG3(top[ 2], top[3], top[4]) |
325 }; | 347 }; |
326 int i; | 348 int i; |
327 for (i = 0; i < 4; ++i) { | 349 for (i = 0; i < 4; ++i) { |
328 memcpy(dst + i * BPS, vals, 4); | 350 memcpy(dst + i * BPS, vals, 4); |
329 } | 351 } |
330 } | 352 } |
331 | 353 |
332 static void HE4(uint8_t* dst, const uint8_t* top) { // horizontal | 354 static void HE4(uint8_t* dst, const uint8_t* top) { // horizontal |
333 const int X = top[-1]; | 355 const int X = top[-1]; |
334 const int I = top[-2]; | 356 const int I = top[-2]; |
335 const int J = top[-3]; | 357 const int J = top[-3]; |
336 const int K = top[-4]; | 358 const int K = top[-4]; |
337 const int L = top[-5]; | 359 const int L = top[-5]; |
338 *(uint32_t*)(dst + 0 * BPS) = 0x01010101U * AVG3(X, I, J); | 360 WebPUint32ToMem(dst + 0 * BPS, 0x01010101U * AVG3(X, I, J)); |
339 *(uint32_t*)(dst + 1 * BPS) = 0x01010101U * AVG3(I, J, K); | 361 WebPUint32ToMem(dst + 1 * BPS, 0x01010101U * AVG3(I, J, K)); |
340 *(uint32_t*)(dst + 2 * BPS) = 0x01010101U * AVG3(J, K, L); | 362 WebPUint32ToMem(dst + 2 * BPS, 0x01010101U * AVG3(J, K, L)); |
341 *(uint32_t*)(dst + 3 * BPS) = 0x01010101U * AVG3(K, L, L); | 363 WebPUint32ToMem(dst + 3 * BPS, 0x01010101U * AVG3(K, L, L)); |
342 } | 364 } |
343 | 365 |
344 static void DC4(uint8_t* dst, const uint8_t* top) { | 366 static void DC4(uint8_t* dst, const uint8_t* top) { |
345 uint32_t dc = 4; | 367 uint32_t dc = 4; |
346 int i; | 368 int i; |
347 for (i = 0; i < 4; ++i) dc += top[i] + top[-5 + i]; | 369 for (i = 0; i < 4; ++i) dc += top[i] + top[-5 + i]; |
348 Fill(dst, dc >> 3, 4); | 370 Fill(dst, dc >> 3, 4); |
349 } | 371 } |
350 | 372 |
351 static void RD4(uint8_t* dst, const uint8_t* top) { | 373 static void RD4(uint8_t* dst, const uint8_t* top) { |
(...skipping 266 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
618 out[n] = level; | 640 out[n] = level; |
619 if (level) last = n; | 641 if (level) last = n; |
620 } else { | 642 } else { |
621 out[n] = 0; | 643 out[n] = 0; |
622 in[j] = 0; | 644 in[j] = 0; |
623 } | 645 } |
624 } | 646 } |
625 return (last >= 0); | 647 return (last >= 0); |
626 } | 648 } |
627 | 649 |
| 650 static int Quantize2Blocks(int16_t in[32], int16_t out[32], |
| 651 const VP8Matrix* const mtx) { |
| 652 int nz; |
| 653 nz = VP8EncQuantizeBlock(in + 0 * 16, out + 0 * 16, mtx) << 0; |
| 654 nz |= VP8EncQuantizeBlock(in + 1 * 16, out + 1 * 16, mtx) << 1; |
| 655 return nz; |
| 656 } |
| 657 |
628 static int QuantizeBlockWHT(int16_t in[16], int16_t out[16], | 658 static int QuantizeBlockWHT(int16_t in[16], int16_t out[16], |
629 const VP8Matrix* const mtx) { | 659 const VP8Matrix* const mtx) { |
630 int n, last = -1; | 660 int n, last = -1; |
631 for (n = 0; n < 16; ++n) { | 661 for (n = 0; n < 16; ++n) { |
632 const int j = kZigzag[n]; | 662 const int j = kZigzag[n]; |
633 const int sign = (in[j] < 0); | 663 const int sign = (in[j] < 0); |
634 const uint32_t coeff = sign ? -in[j] : in[j]; | 664 const uint32_t coeff = sign ? -in[j] : in[j]; |
635 assert(mtx->sharpen_[j] == 0); | 665 assert(mtx->sharpen_[j] == 0); |
636 if (coeff > mtx->zthresh_[j]) { | 666 if (coeff > mtx->zthresh_[j]) { |
637 const uint32_t Q = mtx->q_[j]; | 667 const uint32_t Q = mtx->q_[j]; |
638 const uint32_t iQ = mtx->iq_[j]; | 668 const uint32_t iQ = mtx->iq_[j]; |
639 const uint32_t B = mtx->bias_[j]; | 669 const uint32_t B = mtx->bias_[j]; |
640 int level = QUANTDIV(coeff, iQ, B); | 670 int level = QUANTDIV(coeff, iQ, B); |
641 if (level > MAX_LEVEL) level = MAX_LEVEL; | 671 if (level > MAX_LEVEL) level = MAX_LEVEL; |
642 if (sign) level = -level; | 672 if (sign) level = -level; |
643 in[j] = level * Q; | 673 in[j] = level * Q; |
644 out[n] = level; | 674 out[n] = level; |
645 if (level) last = n; | 675 if (level) last = n; |
646 } else { | 676 } else { |
647 out[n] = 0; | 677 out[n] = 0; |
648 in[j] = 0; | 678 in[j] = 0; |
649 } | 679 } |
650 } | 680 } |
651 return (last >= 0); | 681 return (last >= 0); |
652 } | 682 } |
653 | 683 |
654 //------------------------------------------------------------------------------ | 684 //------------------------------------------------------------------------------ |
655 // Block copy | 685 // Block copy |
656 | 686 |
657 static WEBP_INLINE void Copy(const uint8_t* src, uint8_t* dst, int size) { | 687 static WEBP_INLINE void Copy(const uint8_t* src, uint8_t* dst, int w, int h) { |
658 int y; | 688 int y; |
659 for (y = 0; y < size; ++y) { | 689 for (y = 0; y < h; ++y) { |
660 memcpy(dst, src, size); | 690 memcpy(dst, src, w); |
661 src += BPS; | 691 src += BPS; |
662 dst += BPS; | 692 dst += BPS; |
663 } | 693 } |
664 } | 694 } |
665 | 695 |
666 static void Copy4x4(const uint8_t* src, uint8_t* dst) { Copy(src, dst, 4); } | 696 static void Copy4x4(const uint8_t* src, uint8_t* dst) { |
| 697 Copy(src, dst, 4, 4); |
| 698 } |
| 699 |
| 700 static void Copy16x8(const uint8_t* src, uint8_t* dst) { |
| 701 Copy(src, dst, 16, 8); |
| 702 } |
667 | 703 |
668 //------------------------------------------------------------------------------ | 704 //------------------------------------------------------------------------------ |
669 // Initialization | 705 // Initialization |
670 | 706 |
671 // Speed-critical function pointers. We have to initialize them to the default | 707 // Speed-critical function pointers. We have to initialize them to the default |
672 // implementations within VP8EncDspInit(). | 708 // implementations within VP8EncDspInit(). |
673 VP8CHisto VP8CollectHistogram; | 709 VP8CHisto VP8CollectHistogram; |
674 VP8Idct VP8ITransform; | 710 VP8Idct VP8ITransform; |
675 VP8Fdct VP8FTransform; | 711 VP8Fdct VP8FTransform; |
| 712 VP8Fdct VP8FTransform2; |
676 VP8WHT VP8FTransformWHT; | 713 VP8WHT VP8FTransformWHT; |
677 VP8Intra4Preds VP8EncPredLuma4; | 714 VP8Intra4Preds VP8EncPredLuma4; |
678 VP8IntraPreds VP8EncPredLuma16; | 715 VP8IntraPreds VP8EncPredLuma16; |
679 VP8IntraPreds VP8EncPredChroma8; | 716 VP8IntraPreds VP8EncPredChroma8; |
680 VP8Metric VP8SSE16x16; | 717 VP8Metric VP8SSE16x16; |
681 VP8Metric VP8SSE8x8; | 718 VP8Metric VP8SSE8x8; |
682 VP8Metric VP8SSE16x8; | 719 VP8Metric VP8SSE16x8; |
683 VP8Metric VP8SSE4x4; | 720 VP8Metric VP8SSE4x4; |
684 VP8WMetric VP8TDisto4x4; | 721 VP8WMetric VP8TDisto4x4; |
685 VP8WMetric VP8TDisto16x16; | 722 VP8WMetric VP8TDisto16x16; |
686 VP8QuantizeBlock VP8EncQuantizeBlock; | 723 VP8QuantizeBlock VP8EncQuantizeBlock; |
| 724 VP8Quantize2Blocks VP8EncQuantize2Blocks; |
687 VP8QuantizeBlockWHT VP8EncQuantizeBlockWHT; | 725 VP8QuantizeBlockWHT VP8EncQuantizeBlockWHT; |
688 VP8BlockCopy VP8Copy4x4; | 726 VP8BlockCopy VP8Copy4x4; |
| 727 VP8BlockCopy VP8Copy16x8; |
689 | 728 |
690 extern void VP8EncDspInitSSE2(void); | 729 extern void VP8EncDspInitSSE2(void); |
| 730 extern void VP8EncDspInitSSE41(void); |
691 extern void VP8EncDspInitAVX2(void); | 731 extern void VP8EncDspInitAVX2(void); |
692 extern void VP8EncDspInitNEON(void); | 732 extern void VP8EncDspInitNEON(void); |
693 extern void VP8EncDspInitMIPS32(void); | 733 extern void VP8EncDspInitMIPS32(void); |
| 734 extern void VP8EncDspInitMIPSdspR2(void); |
694 | 735 |
695 static volatile VP8CPUInfo enc_last_cpuinfo_used = | 736 static volatile VP8CPUInfo enc_last_cpuinfo_used = |
696 (VP8CPUInfo)&enc_last_cpuinfo_used; | 737 (VP8CPUInfo)&enc_last_cpuinfo_used; |
697 | 738 |
698 void VP8EncDspInit(void) { | 739 WEBP_TSAN_IGNORE_FUNCTION void VP8EncDspInit(void) { |
699 if (enc_last_cpuinfo_used == VP8GetCPUInfo) return; | 740 if (enc_last_cpuinfo_used == VP8GetCPUInfo) return; |
700 | 741 |
701 VP8DspInit(); // common inverse transforms | 742 VP8DspInit(); // common inverse transforms |
702 InitTables(); | 743 InitTables(); |
703 | 744 |
704 // default C implementations | 745 // default C implementations |
705 VP8CollectHistogram = CollectHistogram; | 746 VP8CollectHistogram = CollectHistogram; |
706 VP8ITransform = ITransform; | 747 VP8ITransform = ITransform; |
707 VP8FTransform = FTransform; | 748 VP8FTransform = FTransform; |
| 749 VP8FTransform2 = FTransform2; |
708 VP8FTransformWHT = FTransformWHT; | 750 VP8FTransformWHT = FTransformWHT; |
709 VP8EncPredLuma4 = Intra4Preds; | 751 VP8EncPredLuma4 = Intra4Preds; |
710 VP8EncPredLuma16 = Intra16Preds; | 752 VP8EncPredLuma16 = Intra16Preds; |
711 VP8EncPredChroma8 = IntraChromaPreds; | 753 VP8EncPredChroma8 = IntraChromaPreds; |
712 VP8SSE16x16 = SSE16x16; | 754 VP8SSE16x16 = SSE16x16; |
713 VP8SSE8x8 = SSE8x8; | 755 VP8SSE8x8 = SSE8x8; |
714 VP8SSE16x8 = SSE16x8; | 756 VP8SSE16x8 = SSE16x8; |
715 VP8SSE4x4 = SSE4x4; | 757 VP8SSE4x4 = SSE4x4; |
716 VP8TDisto4x4 = Disto4x4; | 758 VP8TDisto4x4 = Disto4x4; |
717 VP8TDisto16x16 = Disto16x16; | 759 VP8TDisto16x16 = Disto16x16; |
718 VP8EncQuantizeBlock = QuantizeBlock; | 760 VP8EncQuantizeBlock = QuantizeBlock; |
| 761 VP8EncQuantize2Blocks = Quantize2Blocks; |
719 VP8EncQuantizeBlockWHT = QuantizeBlockWHT; | 762 VP8EncQuantizeBlockWHT = QuantizeBlockWHT; |
720 VP8Copy4x4 = Copy4x4; | 763 VP8Copy4x4 = Copy4x4; |
| 764 VP8Copy16x8 = Copy16x8; |
721 | 765 |
722 // If defined, use CPUInfo() to overwrite some pointers with faster versions. | 766 // If defined, use CPUInfo() to overwrite some pointers with faster versions. |
723 if (VP8GetCPUInfo != NULL) { | 767 if (VP8GetCPUInfo != NULL) { |
724 #if defined(WEBP_USE_SSE2) | 768 #if defined(WEBP_USE_SSE2) |
725 if (VP8GetCPUInfo(kSSE2)) { | 769 if (VP8GetCPUInfo(kSSE2)) { |
726 VP8EncDspInitSSE2(); | 770 VP8EncDspInitSSE2(); |
| 771 #if defined(WEBP_USE_SSE41) |
| 772 if (VP8GetCPUInfo(kSSE4_1)) { |
| 773 VP8EncDspInitSSE41(); |
| 774 } |
| 775 #endif |
727 } | 776 } |
728 #endif | 777 #endif |
729 #if defined(WEBP_USE_AVX2) | 778 #if defined(WEBP_USE_AVX2) |
730 if (VP8GetCPUInfo(kAVX2)) { | 779 if (VP8GetCPUInfo(kAVX2)) { |
731 VP8EncDspInitAVX2(); | 780 VP8EncDspInitAVX2(); |
732 } | 781 } |
733 #endif | 782 #endif |
734 #if defined(WEBP_USE_NEON) | 783 #if defined(WEBP_USE_NEON) |
735 if (VP8GetCPUInfo(kNEON)) { | 784 if (VP8GetCPUInfo(kNEON)) { |
736 VP8EncDspInitNEON(); | 785 VP8EncDspInitNEON(); |
737 } | 786 } |
738 #endif | 787 #endif |
739 #if defined(WEBP_USE_MIPS32) | 788 #if defined(WEBP_USE_MIPS32) |
740 if (VP8GetCPUInfo(kMIPS32)) { | 789 if (VP8GetCPUInfo(kMIPS32)) { |
741 VP8EncDspInitMIPS32(); | 790 VP8EncDspInitMIPS32(); |
742 } | 791 } |
743 #endif | 792 #endif |
| 793 #if defined(WEBP_USE_MIPS_DSP_R2) |
| 794 if (VP8GetCPUInfo(kMIPSdspR2)) { |
| 795 VP8EncDspInitMIPSdspR2(); |
| 796 } |
| 797 #endif |
744 } | 798 } |
745 enc_last_cpuinfo_used = VP8GetCPUInfo; | 799 enc_last_cpuinfo_used = VP8GetCPUInfo; |
746 } | 800 } |
747 | |
OLD | NEW |