OLD | NEW |
1 // Copyright 2011 Google Inc. All Rights Reserved. | 1 // Copyright 2011 Google Inc. All Rights Reserved. |
2 // | 2 // |
3 // Use of this source code is governed by a BSD-style license | 3 // Use of this source code is governed by a BSD-style license |
4 // that can be found in the COPYING file in the root of the source | 4 // that can be found in the COPYING file in the root of the source |
5 // tree. An additional intellectual property rights grant can be found | 5 // tree. An additional intellectual property rights grant can be found |
6 // in the file PATENTS. All contributing project authors may | 6 // in the file PATENTS. All contributing project authors may |
7 // be found in the AUTHORS file in the root of the source tree. | 7 // be found in the AUTHORS file in the root of the source tree. |
8 // ----------------------------------------------------------------------------- | 8 // ----------------------------------------------------------------------------- |
9 // | 9 // |
10 // Speed-critical functions. | 10 // Speed-critical functions. |
11 // | 11 // |
12 // Author: Skal (pascal.massimino@gmail.com) | 12 // Author: Skal (pascal.massimino@gmail.com) |
13 | 13 |
14 #ifndef WEBP_DSP_DSP_H_ | 14 #ifndef WEBP_DSP_DSP_H_ |
15 #define WEBP_DSP_DSP_H_ | 15 #define WEBP_DSP_DSP_H_ |
16 | 16 |
| 17 #ifdef HAVE_CONFIG_H |
| 18 #include "../webp/config.h" |
| 19 #endif |
| 20 |
17 #include "../webp/types.h" | 21 #include "../webp/types.h" |
18 #include "../utils/utils.h" | |
19 | 22 |
20 #ifdef __cplusplus | 23 #ifdef __cplusplus |
21 extern "C" { | 24 extern "C" { |
22 #endif | 25 #endif |
23 | 26 |
24 #define BPS 32 // this is the common stride for enc/dec | 27 #define BPS 32 // this is the common stride for enc/dec |
25 | 28 |
26 //------------------------------------------------------------------------------ | 29 //------------------------------------------------------------------------------ |
27 // CPU detection | 30 // CPU detection |
28 | 31 |
(...skipping 36 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
65 #define WEBP_USE_AVX2 | 68 #define WEBP_USE_AVX2 |
66 #endif | 69 #endif |
67 | 70 |
68 #if defined(__ANDROID__) && defined(__ARM_ARCH_7A__) | 71 #if defined(__ANDROID__) && defined(__ARM_ARCH_7A__) |
69 #define WEBP_ANDROID_NEON // Android targets that might support NEON | 72 #define WEBP_ANDROID_NEON // Android targets that might support NEON |
70 #endif | 73 #endif |
71 | 74 |
72 // The intrinsics currently cause compiler errors with arm-nacl-gcc and the | 75 // The intrinsics currently cause compiler errors with arm-nacl-gcc and the |
73 // inline assembly would need to be modified for use with Native Client. | 76 // inline assembly would need to be modified for use with Native Client. |
74 #if (defined(__ARM_NEON__) || defined(WEBP_ANDROID_NEON) || \ | 77 #if (defined(__ARM_NEON__) || defined(WEBP_ANDROID_NEON) || \ |
75 defined(__aarch64__)) && !defined(__native_client__) | 78 defined(__aarch64__) || defined(WEBP_HAVE_NEON)) && \ |
| 79 !defined(__native_client__) |
76 #define WEBP_USE_NEON | 80 #define WEBP_USE_NEON |
77 #endif | 81 #endif |
78 | 82 |
79 #if defined(_MSC_VER) && _MSC_VER >= 1700 && defined(_M_ARM) | 83 #if defined(_MSC_VER) && _MSC_VER >= 1700 && defined(_M_ARM) |
80 #define WEBP_USE_NEON | 84 #define WEBP_USE_NEON |
81 #define WEBP_USE_INTRINSICS | 85 #define WEBP_USE_INTRINSICS |
82 #endif | 86 #endif |
83 | 87 |
84 #if defined(__mips__) && !defined(__mips64) && \ | 88 #if defined(__mips__) && !defined(__mips64) && \ |
85 defined(__mips_isa_rev) && (__mips_isa_rev >= 1) && (__mips_isa_rev < 6) | 89 defined(__mips_isa_rev) && (__mips_isa_rev >= 1) && (__mips_isa_rev < 6) |
86 #define WEBP_USE_MIPS32 | 90 #define WEBP_USE_MIPS32 |
87 #if (__mips_isa_rev >= 2) | 91 #if (__mips_isa_rev >= 2) |
88 #define WEBP_USE_MIPS32_R2 | 92 #define WEBP_USE_MIPS32_R2 |
89 #if defined(__mips_dspr2) || (__mips_dsp_rev >= 2) | 93 #if defined(__mips_dspr2) || (__mips_dsp_rev >= 2) |
90 #define WEBP_USE_MIPS_DSP_R2 | 94 #define WEBP_USE_MIPS_DSP_R2 |
91 #endif | 95 #endif |
92 #endif | 96 #endif |
93 #endif | 97 #endif |
94 | 98 |
| 99 #if defined(__mips_msa) && defined(__mips_isa_rev) && (__mips_isa_rev >= 5) |
| 100 #define WEBP_USE_MSA |
| 101 #endif |
| 102 |
95 // This macro prevents thread_sanitizer from reporting known concurrent writes. | 103 // This macro prevents thread_sanitizer from reporting known concurrent writes. |
96 #define WEBP_TSAN_IGNORE_FUNCTION | 104 #define WEBP_TSAN_IGNORE_FUNCTION |
97 #if defined(__has_feature) | 105 #if defined(__has_feature) |
98 #if __has_feature(thread_sanitizer) | 106 #if __has_feature(thread_sanitizer) |
99 #undef WEBP_TSAN_IGNORE_FUNCTION | 107 #undef WEBP_TSAN_IGNORE_FUNCTION |
100 #define WEBP_TSAN_IGNORE_FUNCTION __attribute__((no_sanitize_thread)) | 108 #define WEBP_TSAN_IGNORE_FUNCTION __attribute__((no_sanitize_thread)) |
101 #endif | 109 #endif |
102 #endif | 110 #endif |
103 | 111 |
| 112 #define WEBP_UBSAN_IGNORE_UNDEF |
| 113 #define WEBP_UBSAN_IGNORE_UNSIGNED_OVERFLOW |
| 114 #if !defined(WEBP_FORCE_ALIGNED) && defined(__clang__) && \ |
| 115 defined(__has_attribute) |
| 116 #if __has_attribute(no_sanitize) |
| 117 // This macro prevents the undefined behavior sanitizer from reporting |
| 118 // failures. This is only meant to silence unaligned loads on platforms that |
| 119 // are known to support them. |
| 120 #undef WEBP_UBSAN_IGNORE_UNDEF |
| 121 #define WEBP_UBSAN_IGNORE_UNDEF \ |
| 122 __attribute__((no_sanitize("undefined"))) |
| 123 |
| 124 // This macro prevents the undefined behavior sanitizer from reporting |
| 125 // failures related to unsigned integer overflows. This is only meant to |
| 126 // silence cases where this well defined behavior is expected. |
| 127 #undef WEBP_UBSAN_IGNORE_UNSIGNED_OVERFLOW |
| 128 #define WEBP_UBSAN_IGNORE_UNSIGNED_OVERFLOW \ |
| 129 __attribute__((no_sanitize("unsigned-integer-overflow"))) |
| 130 #endif |
| 131 #endif |
| 132 |
104 typedef enum { | 133 typedef enum { |
105 kSSE2, | 134 kSSE2, |
106 kSSE3, | 135 kSSE3, |
107 kSSE4_1, | 136 kSSE4_1, |
108 kAVX, | 137 kAVX, |
109 kAVX2, | 138 kAVX2, |
110 kNEON, | 139 kNEON, |
111 kMIPS32, | 140 kMIPS32, |
112 kMIPSdspR2 | 141 kMIPSdspR2, |
| 142 kMSA |
113 } CPUFeature; | 143 } CPUFeature; |
114 // returns true if the CPU supports the feature. | 144 // returns true if the CPU supports the feature. |
115 typedef int (*VP8CPUInfo)(CPUFeature feature); | 145 typedef int (*VP8CPUInfo)(CPUFeature feature); |
116 extern VP8CPUInfo VP8GetCPUInfo; | 146 extern VP8CPUInfo VP8GetCPUInfo; |
117 | 147 |
118 //------------------------------------------------------------------------------ | 148 //------------------------------------------------------------------------------ |
119 // Init stub generator | 149 // Init stub generator |
120 | 150 |
121 // Defines an init function stub to ensure each module exposes a symbol, | 151 // Defines an init function stub to ensure each module exposes a symbol, |
122 // avoiding a compiler warning. | 152 // avoiding a compiler warning. |
(...skipping 21 matching lines...) Expand all Loading... |
144 const uint8_t* top); | 174 const uint8_t* top); |
145 typedef void (*VP8Intra4Preds)(uint8_t *dst, const uint8_t* top); | 175 typedef void (*VP8Intra4Preds)(uint8_t *dst, const uint8_t* top); |
146 extern VP8Intra4Preds VP8EncPredLuma4; | 176 extern VP8Intra4Preds VP8EncPredLuma4; |
147 extern VP8IntraPreds VP8EncPredLuma16; | 177 extern VP8IntraPreds VP8EncPredLuma16; |
148 extern VP8IntraPreds VP8EncPredChroma8; | 178 extern VP8IntraPreds VP8EncPredChroma8; |
149 | 179 |
150 typedef int (*VP8Metric)(const uint8_t* pix, const uint8_t* ref); | 180 typedef int (*VP8Metric)(const uint8_t* pix, const uint8_t* ref); |
151 extern VP8Metric VP8SSE16x16, VP8SSE16x8, VP8SSE8x8, VP8SSE4x4; | 181 extern VP8Metric VP8SSE16x16, VP8SSE16x8, VP8SSE8x8, VP8SSE4x4; |
152 typedef int (*VP8WMetric)(const uint8_t* pix, const uint8_t* ref, | 182 typedef int (*VP8WMetric)(const uint8_t* pix, const uint8_t* ref, |
153 const uint16_t* const weights); | 183 const uint16_t* const weights); |
| 184 // The weights for VP8TDisto4x4 and VP8TDisto16x16 contain a row-major |
| 185 // 4 by 4 symmetric matrix. |
154 extern VP8WMetric VP8TDisto4x4, VP8TDisto16x16; | 186 extern VP8WMetric VP8TDisto4x4, VP8TDisto16x16; |
155 | 187 |
156 typedef void (*VP8BlockCopy)(const uint8_t* src, uint8_t* dst); | 188 typedef void (*VP8BlockCopy)(const uint8_t* src, uint8_t* dst); |
157 extern VP8BlockCopy VP8Copy4x4; | 189 extern VP8BlockCopy VP8Copy4x4; |
158 extern VP8BlockCopy VP8Copy16x8; | 190 extern VP8BlockCopy VP8Copy16x8; |
159 // Quantization | 191 // Quantization |
160 struct VP8Matrix; // forward declaration | 192 struct VP8Matrix; // forward declaration |
161 typedef int (*VP8QuantizeBlock)(int16_t in[16], int16_t out[16], | 193 typedef int (*VP8QuantizeBlock)(int16_t in[16], int16_t out[16], |
162 const struct VP8Matrix* const mtx); | 194 const struct VP8Matrix* const mtx); |
163 // Same as VP8QuantizeBlock, but quantizes two consecutive blocks. | 195 // Same as VP8QuantizeBlock, but quantizes two consecutive blocks. |
(...skipping 43 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
207 | 239 |
208 // Cost calculation function. | 240 // Cost calculation function. |
209 typedef int (*VP8GetResidualCostFunc)(int ctx0, | 241 typedef int (*VP8GetResidualCostFunc)(int ctx0, |
210 const struct VP8Residual* const res); | 242 const struct VP8Residual* const res); |
211 extern VP8GetResidualCostFunc VP8GetResidualCost; | 243 extern VP8GetResidualCostFunc VP8GetResidualCost; |
212 | 244 |
213 // must be called before anything using the above | 245 // must be called before anything using the above |
214 void VP8EncDspCostInit(void); | 246 void VP8EncDspCostInit(void); |
215 | 247 |
216 //------------------------------------------------------------------------------ | 248 //------------------------------------------------------------------------------ |
| 249 // SSIM utils |
| 250 |
| 251 // struct for accumulating statistical moments |
| 252 typedef struct { |
| 253 double w; // sum(w_i) : sum of weights |
| 254 double xm, ym; // sum(w_i * x_i), sum(w_i * y_i) |
| 255 double xxm, xym, yym; // sum(w_i * x_i * x_i), etc. |
| 256 } VP8DistoStats; |
| 257 |
| 258 #define VP8_SSIM_KERNEL 3 // total size of the kernel: 2 * VP8_SSIM_KERNEL + 1 |
| 259 typedef void (*VP8SSIMAccumulateClippedFunc)(const uint8_t* src1, int stride1, |
| 260 const uint8_t* src2, int stride2, |
| 261 int xo, int yo, // center position |
| 262 int W, int H, // plane dimension |
| 263 VP8DistoStats* const stats); |
| 264 |
| 265 // This version is called with the guarantee that you can load 8 bytes and |
| 266 // 8 rows at offset src1 and src2 |
| 267 typedef void (*VP8SSIMAccumulateFunc)(const uint8_t* src1, int stride1, |
| 268 const uint8_t* src2, int stride2, |
| 269 VP8DistoStats* const stats); |
| 270 |
| 271 extern VP8SSIMAccumulateFunc VP8SSIMAccumulate; // unclipped / unchecked |
| 272 extern VP8SSIMAccumulateClippedFunc VP8SSIMAccumulateClipped; // with clipping |
| 273 |
| 274 // must be called before using any of the above directly |
| 275 void VP8SSIMDspInit(void); |
| 276 |
| 277 //------------------------------------------------------------------------------ |
217 // Decoding | 278 // Decoding |
218 | 279 |
219 typedef void (*VP8DecIdct)(const int16_t* coeffs, uint8_t* dst); | 280 typedef void (*VP8DecIdct)(const int16_t* coeffs, uint8_t* dst); |
220 // when doing two transforms, coeffs is actually int16_t[2][16]. | 281 // when doing two transforms, coeffs is actually int16_t[2][16]. |
221 typedef void (*VP8DecIdct2)(const int16_t* coeffs, uint8_t* dst, int do_two); | 282 typedef void (*VP8DecIdct2)(const int16_t* coeffs, uint8_t* dst, int do_two); |
222 extern VP8DecIdct2 VP8Transform; | 283 extern VP8DecIdct2 VP8Transform; |
223 extern VP8DecIdct VP8TransformAC3; | 284 extern VP8DecIdct VP8TransformAC3; |
224 extern VP8DecIdct VP8TransformUV; | 285 extern VP8DecIdct VP8TransformUV; |
225 extern VP8DecIdct VP8TransformDC; | 286 extern VP8DecIdct VP8TransformDC; |
226 extern VP8DecIdct VP8TransformDCUV; | 287 extern VP8DecIdct VP8TransformDCUV; |
(...skipping 31 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
258 extern VP8LumaFilterFunc VP8HFilter16; | 319 extern VP8LumaFilterFunc VP8HFilter16; |
259 extern VP8ChromaFilterFunc VP8VFilter8; | 320 extern VP8ChromaFilterFunc VP8VFilter8; |
260 extern VP8ChromaFilterFunc VP8HFilter8; | 321 extern VP8ChromaFilterFunc VP8HFilter8; |
261 | 322 |
262 // on inner edge | 323 // on inner edge |
263 extern VP8LumaFilterFunc VP8VFilter16i; // filtering 3 inner edges altogether | 324 extern VP8LumaFilterFunc VP8VFilter16i; // filtering 3 inner edges altogether |
264 extern VP8LumaFilterFunc VP8HFilter16i; | 325 extern VP8LumaFilterFunc VP8HFilter16i; |
265 extern VP8ChromaFilterFunc VP8VFilter8i; // filtering u and v altogether | 326 extern VP8ChromaFilterFunc VP8VFilter8i; // filtering u and v altogether |
266 extern VP8ChromaFilterFunc VP8HFilter8i; | 327 extern VP8ChromaFilterFunc VP8HFilter8i; |
267 | 328 |
| 329 // Dithering. Combines dithering values (centered around 128) with dst[], |
| 330 // according to: dst[] = clip(dst[] + (((dither[]-128) + 8) >> 4) |
| 331 #define VP8_DITHER_DESCALE 4 |
| 332 #define VP8_DITHER_DESCALE_ROUNDER (1 << (VP8_DITHER_DESCALE - 1)) |
| 333 #define VP8_DITHER_AMP_BITS 7 |
| 334 #define VP8_DITHER_AMP_CENTER (1 << VP8_DITHER_AMP_BITS) |
| 335 extern void (*VP8DitherCombine8x8)(const uint8_t* dither, uint8_t* dst, |
| 336 int dst_stride); |
| 337 |
268 // must be called before anything using the above | 338 // must be called before anything using the above |
269 void VP8DspInit(void); | 339 void VP8DspInit(void); |
270 | 340 |
271 //------------------------------------------------------------------------------ | 341 //------------------------------------------------------------------------------ |
272 // WebP I/O | 342 // WebP I/O |
273 | 343 |
274 #define FANCY_UPSAMPLING // undefined to remove fancy upsampling support | 344 #define FANCY_UPSAMPLING // undefined to remove fancy upsampling support |
275 | 345 |
276 // Convert a pair of y/u/v lines together to the output rgb/a colorspace. | 346 // Convert a pair of y/u/v lines together to the output rgb/a colorspace. |
277 // bottom_y can be NULL if only one line of output is needed (at top/bottom). | 347 // bottom_y can be NULL if only one line of output is needed (at top/bottom). |
(...skipping 187 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
465 WEBP_FILTER_HORIZONTAL, | 535 WEBP_FILTER_HORIZONTAL, |
466 WEBP_FILTER_VERTICAL, | 536 WEBP_FILTER_VERTICAL, |
467 WEBP_FILTER_GRADIENT, | 537 WEBP_FILTER_GRADIENT, |
468 WEBP_FILTER_LAST = WEBP_FILTER_GRADIENT + 1, // end marker | 538 WEBP_FILTER_LAST = WEBP_FILTER_GRADIENT + 1, // end marker |
469 WEBP_FILTER_BEST, // meta-types | 539 WEBP_FILTER_BEST, // meta-types |
470 WEBP_FILTER_FAST | 540 WEBP_FILTER_FAST |
471 } WEBP_FILTER_TYPE; | 541 } WEBP_FILTER_TYPE; |
472 | 542 |
473 typedef void (*WebPFilterFunc)(const uint8_t* in, int width, int height, | 543 typedef void (*WebPFilterFunc)(const uint8_t* in, int width, int height, |
474 int stride, uint8_t* out); | 544 int stride, uint8_t* out); |
475 typedef void (*WebPUnfilterFunc)(int width, int height, int stride, | 545 // In-place un-filtering. |
476 int row, int num_rows, uint8_t* data); | 546 // Warning! 'prev_line' pointer can be equal to 'cur_line' or 'preds'. |
| 547 typedef void (*WebPUnfilterFunc)(const uint8_t* prev_line, const uint8_t* preds, |
| 548 uint8_t* cur_line, int width); |
477 | 549 |
478 // Filter the given data using the given predictor. | 550 // Filter the given data using the given predictor. |
479 // 'in' corresponds to a 2-dimensional pixel array of size (stride * height) | 551 // 'in' corresponds to a 2-dimensional pixel array of size (stride * height) |
480 // in raster order. | 552 // in raster order. |
481 // 'stride' is number of bytes per scan line (with possible padding). | 553 // 'stride' is number of bytes per scan line (with possible padding). |
482 // 'out' should be pre-allocated. | 554 // 'out' should be pre-allocated. |
483 extern WebPFilterFunc WebPFilters[WEBP_FILTER_LAST]; | 555 extern WebPFilterFunc WebPFilters[WEBP_FILTER_LAST]; |
484 | 556 |
485 // In-place reconstruct the original data from the given filtered data. | 557 // In-place reconstruct the original data from the given filtered data. |
486 // The reconstruction will be done for 'num_rows' rows starting from 'row' | 558 // The reconstruction will be done for 'num_rows' rows starting from 'row' |
487 // (assuming rows upto 'row - 1' are already reconstructed). | 559 // (assuming rows upto 'row - 1' are already reconstructed). |
488 extern WebPUnfilterFunc WebPUnfilters[WEBP_FILTER_LAST]; | 560 extern WebPUnfilterFunc WebPUnfilters[WEBP_FILTER_LAST]; |
489 | 561 |
490 // To be called first before using the above. | 562 // To be called first before using the above. |
491 void VP8FiltersInit(void); | 563 void VP8FiltersInit(void); |
492 | 564 |
493 #ifdef __cplusplus | 565 #ifdef __cplusplus |
494 } // extern "C" | 566 } // extern "C" |
495 #endif | 567 #endif |
496 | 568 |
497 #endif /* WEBP_DSP_DSP_H_ */ | 569 #endif /* WEBP_DSP_DSP_H_ */ |
OLD | NEW |