OLD | NEW |
1 // Copyright 2012 Google Inc. All Rights Reserved. | 1 // Copyright 2012 Google Inc. All Rights Reserved. |
2 // | 2 // |
3 // This code is licensed under the same terms as WebM: | 3 // This code is licensed under the same terms as WebM: |
4 // Software License Agreement: http://www.webmproject.org/license/software/ | 4 // Software License Agreement: http://www.webmproject.org/license/software/ |
5 // Additional IP Rights Grant: http://www.webmproject.org/license/additional/ | 5 // Additional IP Rights Grant: http://www.webmproject.org/license/additional/ |
6 // ----------------------------------------------------------------------------- | 6 // ----------------------------------------------------------------------------- |
7 // | 7 // |
8 // Image transforms and color space conversion methods for lossless decoder. | 8 // Image transforms and color space conversion methods for lossless decoder. |
9 // | 9 // |
10 // Authors: Vikas Arora (vikaas.arora@gmail.com) | 10 // Authors: Vikas Arora (vikaas.arora@gmail.com) |
11 // Jyrki Alakuijala (jyrki@google.com) | 11 // Jyrki Alakuijala (jyrki@google.com) |
12 // Urvang Joshi (urvang@google.com) | 12 // Urvang Joshi (urvang@google.com) |
13 | 13 |
| 14 #include "./dsp.h" |
| 15 |
| 16 // Define the following if target arch is sure to have SSE2 |
| 17 // #define WEBP_TARGET_HAS_SSE2 |
| 18 |
14 #if defined(__cplusplus) || defined(c_plusplus) | 19 #if defined(__cplusplus) || defined(c_plusplus) |
15 extern "C" { | 20 extern "C" { |
16 #endif | 21 #endif |
17 | 22 |
| 23 #if defined(WEBP_TARGET_HAS_SSE2) |
| 24 #include <emmintrin.h> |
| 25 #endif |
| 26 |
18 #include <math.h> | 27 #include <math.h> |
19 #include <stdlib.h> | 28 #include <stdlib.h> |
20 #include "./lossless.h" | 29 #include "./lossless.h" |
21 #include "../dec/vp8li.h" | 30 #include "../dec/vp8li.h" |
22 #include "../dsp/yuv.h" | 31 #include "./yuv.h" |
23 #include "../dsp/dsp.h" | |
24 #include "../enc/histogram.h" | |
25 | 32 |
26 #define MAX_DIFF_COST (1e30f) | 33 #define MAX_DIFF_COST (1e30f) |
27 | 34 |
28 // lookup table for small values of log2(int) | 35 // lookup table for small values of log2(int) |
29 #define APPROX_LOG_MAX 4096 | 36 #define APPROX_LOG_MAX 4096 |
30 #define LOG_2_RECIPROCAL 1.44269504088896338700465094007086 | 37 #define LOG_2_RECIPROCAL 1.44269504088896338700465094007086 |
31 #define LOG_LOOKUP_IDX_MAX 256 | 38 const float kLog2Table[LOG_LOOKUP_IDX_MAX] = { |
32 static const float kLog2Table[LOG_LOOKUP_IDX_MAX] = { | |
33 0.0000000000000000f, 0.0000000000000000f, | 39 0.0000000000000000f, 0.0000000000000000f, |
34 1.0000000000000000f, 1.5849625007211560f, | 40 1.0000000000000000f, 1.5849625007211560f, |
35 2.0000000000000000f, 2.3219280948873621f, | 41 2.0000000000000000f, 2.3219280948873621f, |
36 2.5849625007211560f, 2.8073549220576041f, | 42 2.5849625007211560f, 2.8073549220576041f, |
37 3.0000000000000000f, 3.1699250014423121f, | 43 3.0000000000000000f, 3.1699250014423121f, |
38 3.3219280948873621f, 3.4594316186372973f, | 44 3.3219280948873621f, 3.4594316186372973f, |
39 3.5849625007211560f, 3.7004397181410921f, | 45 3.5849625007211560f, 3.7004397181410921f, |
40 3.8073549220576041f, 3.9068905956085187f, | 46 3.8073549220576041f, 3.9068905956085187f, |
41 4.0000000000000000f, 4.0874628412503390f, | 47 4.0000000000000000f, 4.0874628412503390f, |
42 4.1699250014423121f, 4.2479275134435852f, | 48 4.1699250014423121f, 4.2479275134435852f, |
(...skipping 110 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
153 7.9068905956085187f, 7.9128893362299619f, | 159 7.9068905956085187f, 7.9128893362299619f, |
154 7.9188632372745946f, 7.9248125036057812f, | 160 7.9188632372745946f, 7.9248125036057812f, |
155 7.9307373375628866f, 7.9366379390025709f, | 161 7.9307373375628866f, 7.9366379390025709f, |
156 7.9425145053392398f, 7.9483672315846778f, | 162 7.9425145053392398f, 7.9483672315846778f, |
157 7.9541963103868749f, 7.9600019320680805f, | 163 7.9541963103868749f, 7.9600019320680805f, |
158 7.9657842846620869f, 7.9715435539507719f, | 164 7.9657842846620869f, 7.9715435539507719f, |
159 7.9772799234999167f, 7.9829935746943103f, | 165 7.9772799234999167f, 7.9829935746943103f, |
160 7.9886846867721654f, 7.9943534368588577f | 166 7.9886846867721654f, 7.9943534368588577f |
161 }; | 167 }; |
162 | 168 |
163 float VP8LFastLog2(int v) { | 169 const float kSLog2Table[LOG_LOOKUP_IDX_MAX] = { |
164 if (v < LOG_LOOKUP_IDX_MAX) { | 170 0.00000000f, 0.00000000f, 2.00000000f, 4.75488750f, |
165 return kLog2Table[v]; | 171 8.00000000f, 11.60964047f, 15.50977500f, 19.65148445f, |
166 } else if (v < APPROX_LOG_MAX) { | 172 24.00000000f, 28.52932501f, 33.21928095f, 38.05374781f, |
| 173 43.01955001f, 48.10571634f, 53.30296891f, 58.60335893f, |
| 174 64.00000000f, 69.48686830f, 75.05865003f, 80.71062276f, |
| 175 86.43856190f, 92.23866588f, 98.10749561f, 104.04192499f, |
| 176 110.03910002f, 116.09640474f, 122.21143267f, 128.38196256f, |
| 177 134.60593782f, 140.88144886f, 147.20671787f, 153.58008562f, |
| 178 160.00000000f, 166.46500594f, 172.97373660f, 179.52490559f, |
| 179 186.11730005f, 192.74977453f, 199.42124551f, 206.13068654f, |
| 180 212.87712380f, 219.65963219f, 226.47733176f, 233.32938445f, |
| 181 240.21499122f, 247.13338933f, 254.08384998f, 261.06567603f, |
| 182 268.07820003f, 275.12078236f, 282.19280949f, 289.29369244f, |
| 183 296.42286534f, 303.57978409f, 310.76392512f, 317.97478424f, |
| 184 325.21187564f, 332.47473081f, 339.76289772f, 347.07593991f, |
| 185 354.41343574f, 361.77497759f, 369.16017124f, 376.56863518f, |
| 186 384.00000000f, 391.45390785f, 398.93001188f, 406.42797576f, |
| 187 413.94747321f, 421.48818752f, 429.04981119f, 436.63204548f, |
| 188 444.23460010f, 451.85719280f, 459.49954906f, 467.16140179f, |
| 189 474.84249102f, 482.54256363f, 490.26137307f, 497.99867911f, |
| 190 505.75424759f, 513.52785023f, 521.31926438f, 529.12827280f, |
| 191 536.95466351f, 544.79822957f, 552.65876890f, 560.53608414f, |
| 192 568.42998244f, 576.34027536f, 584.26677867f, 592.20931226f, |
| 193 600.16769996f, 608.14176943f, 616.13135206f, 624.13628279f, |
| 194 632.15640007f, 640.19154569f, 648.24156472f, 656.30630539f, |
| 195 664.38561898f, 672.47935976f, 680.58738488f, 688.70955430f, |
| 196 696.84573069f, 704.99577935f, 713.15956818f, 721.33696754f, |
| 197 729.52785023f, 737.73209140f, 745.94956849f, 754.18016116f, |
| 198 762.42375127f, 770.68022275f, 778.94946161f, 787.23135586f, |
| 199 795.52579543f, 803.83267219f, 812.15187982f, 820.48331383f, |
| 200 828.82687147f, 837.18245171f, 845.54995518f, 853.92928416f, |
| 201 862.32034249f, 870.72303558f, 879.13727036f, 887.56295522f, |
| 202 896.00000000f, 904.44831595f, 912.90781569f, 921.37841320f, |
| 203 929.86002376f, 938.35256392f, 946.85595152f, 955.37010560f, |
| 204 963.89494641f, 972.43039537f, 980.97637504f, 989.53280911f, |
| 205 998.09962237f, 1006.67674069f, 1015.26409097f, 1023.86160116f, |
| 206 1032.46920021f, 1041.08681805f, 1049.71438560f, 1058.35183469f, |
| 207 1066.99909811f, 1075.65610955f, 1084.32280357f, 1092.99911564f, |
| 208 1101.68498204f, 1110.38033993f, 1119.08512727f, 1127.79928282f, |
| 209 1136.52274614f, 1145.25545758f, 1153.99735821f, 1162.74838989f, |
| 210 1171.50849518f, 1180.27761738f, 1189.05570047f, 1197.84268914f, |
| 211 1206.63852876f, 1215.44316535f, 1224.25654560f, 1233.07861684f, |
| 212 1241.90932703f, 1250.74862473f, 1259.59645914f, 1268.45278005f, |
| 213 1277.31753781f, 1286.19068338f, 1295.07216828f, 1303.96194457f, |
| 214 1312.85996488f, 1321.76618236f, 1330.68055071f, 1339.60302413f, |
| 215 1348.53355734f, 1357.47210556f, 1366.41862452f, 1375.37307041f, |
| 216 1384.33539991f, 1393.30557020f, 1402.28353887f, 1411.26926400f, |
| 217 1420.26270412f, 1429.26381818f, 1438.27256558f, 1447.28890615f, |
| 218 1456.31280014f, 1465.34420819f, 1474.38309138f, 1483.42941118f, |
| 219 1492.48312945f, 1501.54420843f, 1510.61261078f, 1519.68829949f, |
| 220 1528.77123795f, 1537.86138993f, 1546.95871952f, 1556.06319119f, |
| 221 1565.17476976f, 1574.29342040f, 1583.41910860f, 1592.55180020f, |
| 222 1601.69146137f, 1610.83805860f, 1619.99155871f, 1629.15192882f, |
| 223 1638.31913637f, 1647.49314911f, 1656.67393509f, 1665.86146266f, |
| 224 1675.05570047f, 1684.25661744f, 1693.46418280f, 1702.67836605f, |
| 225 1711.89913698f, 1721.12646563f, 1730.36032233f, 1739.60067768f, |
| 226 1748.84750254f, 1758.10076802f, 1767.36044551f, 1776.62650662f, |
| 227 1785.89892323f, 1795.17766747f, 1804.46271172f, 1813.75402857f, |
| 228 1823.05159087f, 1832.35537170f, 1841.66534438f, 1850.98148244f, |
| 229 1860.30375965f, 1869.63214999f, 1878.96662767f, 1888.30716711f, |
| 230 1897.65374295f, 1907.00633003f, 1916.36490342f, 1925.72943838f, |
| 231 1935.09991037f, 1944.47629506f, 1953.85856831f, 1963.24670620f, |
| 232 1972.64068498f, 1982.04048108f, 1991.44607117f, 2000.85743204f, |
| 233 2010.27454072f, 2019.69737440f, 2029.12591044f, 2038.56012640f |
| 234 }; |
| 235 |
| 236 float VP8LFastSLog2Slow(int v) { |
| 237 assert(v >= LOG_LOOKUP_IDX_MAX); |
| 238 if (v < APPROX_LOG_MAX) { |
| 239 int log_cnt = 0; |
| 240 const float v_f = (float)v; |
| 241 while (v >= LOG_LOOKUP_IDX_MAX) { |
| 242 ++log_cnt; |
| 243 v = v >> 1; |
| 244 } |
| 245 return v_f * (kLog2Table[v] + log_cnt); |
| 246 } else { |
| 247 return (float)(LOG_2_RECIPROCAL * v * log((double)v)); |
| 248 } |
| 249 } |
| 250 |
| 251 float VP8LFastLog2Slow(int v) { |
| 252 assert(v >= LOG_LOOKUP_IDX_MAX); |
| 253 if (v < APPROX_LOG_MAX) { |
167 int log_cnt = 0; | 254 int log_cnt = 0; |
168 while (v >= LOG_LOOKUP_IDX_MAX) { | 255 while (v >= LOG_LOOKUP_IDX_MAX) { |
169 ++log_cnt; | 256 ++log_cnt; |
170 v = v >> 1; | 257 v = v >> 1; |
171 } | 258 } |
172 return kLog2Table[v] + (float)log_cnt; | 259 return kLog2Table[v] + log_cnt; |
173 } else { | 260 } else { |
174 return (float)(LOG_2_RECIPROCAL * log((double)v)); | 261 return (float)(LOG_2_RECIPROCAL * log((double)v)); |
175 } | 262 } |
176 } | 263 } |
177 | 264 |
178 //------------------------------------------------------------------------------ | 265 //------------------------------------------------------------------------------ |
179 // Image transforms. | 266 // Image transforms. |
180 | 267 |
181 // In-place sum of each component with mod 256. | 268 // In-place sum of each component with mod 256. |
182 static WEBP_INLINE void AddPixelsEq(uint32_t* a, uint32_t b) { | 269 static WEBP_INLINE void AddPixelsEq(uint32_t* a, uint32_t b) { |
183 const uint32_t alpha_and_green = (*a & 0xff00ff00u) + (b & 0xff00ff00u); | 270 const uint32_t alpha_and_green = (*a & 0xff00ff00u) + (b & 0xff00ff00u); |
184 const uint32_t red_and_blue = (*a & 0x00ff00ffu) + (b & 0x00ff00ffu); | 271 const uint32_t red_and_blue = (*a & 0x00ff00ffu) + (b & 0x00ff00ffu); |
185 *a = (alpha_and_green & 0xff00ff00u) | (red_and_blue & 0x00ff00ffu); | 272 *a = (alpha_and_green & 0xff00ff00u) | (red_and_blue & 0x00ff00ffu); |
186 } | 273 } |
187 | 274 |
188 static WEBP_INLINE uint32_t Average2(uint32_t a0, uint32_t a1) { | 275 static WEBP_INLINE uint32_t Average2(uint32_t a0, uint32_t a1) { |
189 return (((a0 ^ a1) & 0xfefefefeL) >> 1) + (a0 & a1); | 276 return (((a0 ^ a1) & 0xfefefefeL) >> 1) + (a0 & a1); |
190 } | 277 } |
191 | 278 |
192 static WEBP_INLINE uint32_t Average3(uint32_t a0, uint32_t a1, uint32_t a2) { | 279 static WEBP_INLINE uint32_t Average3(uint32_t a0, uint32_t a1, uint32_t a2) { |
193 return Average2(Average2(a0, a2), a1); | 280 return Average2(Average2(a0, a2), a1); |
194 } | 281 } |
195 | 282 |
196 static WEBP_INLINE uint32_t Average4(uint32_t a0, uint32_t a1, | 283 static WEBP_INLINE uint32_t Average4(uint32_t a0, uint32_t a1, |
197 uint32_t a2, uint32_t a3) { | 284 uint32_t a2, uint32_t a3) { |
198 return Average2(Average2(a0, a1), Average2(a2, a3)); | 285 return Average2(Average2(a0, a1), Average2(a2, a3)); |
199 } | 286 } |
200 | 287 |
| 288 #if defined(WEBP_TARGET_HAS_SSE2) |
| 289 static WEBP_INLINE uint32_t ClampedAddSubtractFull(uint32_t c0, uint32_t c1, |
| 290 uint32_t c2) { |
| 291 const __m128i zero = _mm_setzero_si128(); |
| 292 const __m128i C0 = _mm_unpacklo_epi8(_mm_cvtsi32_si128(c0), zero); |
| 293 const __m128i C1 = _mm_unpacklo_epi8(_mm_cvtsi32_si128(c1), zero); |
| 294 const __m128i C2 = _mm_unpacklo_epi8(_mm_cvtsi32_si128(c2), zero); |
| 295 const __m128i V1 = _mm_add_epi16(C0, C1); |
| 296 const __m128i V2 = _mm_sub_epi16(V1, C2); |
| 297 const __m128i b = _mm_packus_epi16(V2, V2); |
| 298 const uint32_t output = _mm_cvtsi128_si32(b); |
| 299 return output; |
| 300 } |
| 301 |
| 302 static WEBP_INLINE uint32_t ClampedAddSubtractHalf(uint32_t c0, uint32_t c1, |
| 303 uint32_t c2) { |
| 304 const uint32_t ave = Average2(c0, c1); |
| 305 const __m128i zero = _mm_setzero_si128(); |
| 306 const __m128i A0 = _mm_unpacklo_epi8(_mm_cvtsi32_si128(ave), zero); |
| 307 const __m128i B0 = _mm_unpacklo_epi8(_mm_cvtsi32_si128(c2), zero); |
| 308 const __m128i A1 = _mm_sub_epi16(A0, B0); |
| 309 const __m128i BgtA = _mm_cmpgt_epi16(B0, A0); |
| 310 const __m128i A2 = _mm_sub_epi16(A1, BgtA); |
| 311 const __m128i A3 = _mm_srai_epi16(A2, 1); |
| 312 const __m128i A4 = _mm_add_epi16(A0, A3); |
| 313 const __m128i A5 = _mm_packus_epi16(A4, A4); |
| 314 const uint32_t output = _mm_cvtsi128_si32(A5); |
| 315 return output; |
| 316 } |
| 317 |
| 318 static WEBP_INLINE uint32_t Select(uint32_t a, uint32_t b, uint32_t c) { |
| 319 int pa_minus_pb; |
| 320 const __m128i zero = _mm_setzero_si128(); |
| 321 const __m128i A0 = _mm_cvtsi32_si128(a); |
| 322 const __m128i B0 = _mm_cvtsi32_si128(b); |
| 323 const __m128i C0 = _mm_cvtsi32_si128(c); |
| 324 const __m128i AC0 = _mm_subs_epu8(A0, C0); |
| 325 const __m128i CA0 = _mm_subs_epu8(C0, A0); |
| 326 const __m128i BC0 = _mm_subs_epu8(B0, C0); |
| 327 const __m128i CB0 = _mm_subs_epu8(C0, B0); |
| 328 const __m128i AC = _mm_or_si128(AC0, CA0); |
| 329 const __m128i BC = _mm_or_si128(BC0, CB0); |
| 330 const __m128i pa = _mm_unpacklo_epi8(AC, zero); // |a - c| |
| 331 const __m128i pb = _mm_unpacklo_epi8(BC, zero); // |b - c| |
| 332 const __m128i diff = _mm_sub_epi16(pb, pa); |
| 333 { |
| 334 int16_t out[8]; |
| 335 _mm_storeu_si128((__m128i*)out, diff); |
| 336 pa_minus_pb = out[0] + out[1] + out[2] + out[3]; |
| 337 } |
| 338 return (pa_minus_pb <= 0) ? a : b; |
| 339 } |
| 340 |
| 341 #else |
| 342 |
201 static WEBP_INLINE uint32_t Clip255(uint32_t a) { | 343 static WEBP_INLINE uint32_t Clip255(uint32_t a) { |
202 if (a < 256) { | 344 if (a < 256) { |
203 return a; | 345 return a; |
204 } | 346 } |
205 // return 0, when a is a negative integer. | 347 // return 0, when a is a negative integer. |
206 // return 255, when a is positive. | 348 // return 255, when a is positive. |
207 return ~a >> 24; | 349 return ~a >> 24; |
208 } | 350 } |
209 | 351 |
210 static WEBP_INLINE int AddSubtractComponentFull(int a, int b, int c) { | 352 static WEBP_INLINE int AddSubtractComponentFull(int a, int b, int c) { |
(...skipping 21 matching lines...) Expand all Loading... |
232 uint32_t c2) { | 374 uint32_t c2) { |
233 const uint32_t ave = Average2(c0, c1); | 375 const uint32_t ave = Average2(c0, c1); |
234 const int a = AddSubtractComponentHalf(ave >> 24, c2 >> 24); | 376 const int a = AddSubtractComponentHalf(ave >> 24, c2 >> 24); |
235 const int r = AddSubtractComponentHalf((ave >> 16) & 0xff, (c2 >> 16) & 0xff); | 377 const int r = AddSubtractComponentHalf((ave >> 16) & 0xff, (c2 >> 16) & 0xff); |
236 const int g = AddSubtractComponentHalf((ave >> 8) & 0xff, (c2 >> 8) & 0xff); | 378 const int g = AddSubtractComponentHalf((ave >> 8) & 0xff, (c2 >> 8) & 0xff); |
237 const int b = AddSubtractComponentHalf((ave >> 0) & 0xff, (c2 >> 0) & 0xff); | 379 const int b = AddSubtractComponentHalf((ave >> 0) & 0xff, (c2 >> 0) & 0xff); |
238 return (a << 24) | (r << 16) | (g << 8) | b; | 380 return (a << 24) | (r << 16) | (g << 8) | b; |
239 } | 381 } |
240 | 382 |
241 static WEBP_INLINE int Sub3(int a, int b, int c) { | 383 static WEBP_INLINE int Sub3(int a, int b, int c) { |
242 const int pa = b - c; | 384 const int pb = b - c; |
243 const int pb = a - c; | 385 const int pa = a - c; |
244 return abs(pa) - abs(pb); | 386 return abs(pb) - abs(pa); |
245 } | 387 } |
246 | 388 |
247 static WEBP_INLINE uint32_t Select(uint32_t a, uint32_t b, uint32_t c) { | 389 static WEBP_INLINE uint32_t Select(uint32_t a, uint32_t b, uint32_t c) { |
248 const int pa_minus_pb = | 390 const int pa_minus_pb = |
249 Sub3((a >> 24) , (b >> 24) , (c >> 24) ) + | 391 Sub3((a >> 24) , (b >> 24) , (c >> 24) ) + |
250 Sub3((a >> 16) & 0xff, (b >> 16) & 0xff, (c >> 16) & 0xff) + | 392 Sub3((a >> 16) & 0xff, (b >> 16) & 0xff, (c >> 16) & 0xff) + |
251 Sub3((a >> 8) & 0xff, (b >> 8) & 0xff, (c >> 8) & 0xff) + | 393 Sub3((a >> 8) & 0xff, (b >> 8) & 0xff, (c >> 8) & 0xff) + |
252 Sub3((a ) & 0xff, (b ) & 0xff, (c ) & 0xff); | 394 Sub3((a ) & 0xff, (b ) & 0xff, (c ) & 0xff); |
253 | |
254 return (pa_minus_pb <= 0) ? a : b; | 395 return (pa_minus_pb <= 0) ? a : b; |
255 } | 396 } |
| 397 #endif |
256 | 398 |
257 //------------------------------------------------------------------------------ | 399 //------------------------------------------------------------------------------ |
258 // Predictors | 400 // Predictors |
259 | 401 |
260 static uint32_t Predictor0(uint32_t left, const uint32_t* const top) { | 402 static uint32_t Predictor0(uint32_t left, const uint32_t* const top) { |
261 (void)top; | 403 (void)top; |
262 (void)left; | 404 (void)left; |
263 return ARGB_BLACK; | 405 return ARGB_BLACK; |
264 } | 406 } |
265 static uint32_t Predictor1(uint32_t left, const uint32_t* const top) { | 407 static uint32_t Predictor1(uint32_t left, const uint32_t* const top) { |
(...skipping 67 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
333 const double exp_decay_factor = 0.6; | 475 const double exp_decay_factor = 0.6; |
334 double bits = weight_0 * counts[0]; | 476 double bits = weight_0 * counts[0]; |
335 int i; | 477 int i; |
336 for (i = 1; i < significant_symbols; ++i) { | 478 for (i = 1; i < significant_symbols; ++i) { |
337 bits += exp_val * (counts[i] + counts[256 - i]); | 479 bits += exp_val * (counts[i] + counts[256 - i]); |
338 exp_val *= exp_decay_factor; | 480 exp_val *= exp_decay_factor; |
339 } | 481 } |
340 return (float)(-0.1 * bits); | 482 return (float)(-0.1 * bits); |
341 } | 483 } |
342 | 484 |
343 // Compute the Shanon's entropy: Sum(p*log2(p)) | 485 // Compute the combined Shanon's entropy for distribution {X} and {X+Y} |
344 static float ShannonEntropy(const int* const array, int n) { | 486 static float CombinedShannonEntropy(const int* const X, |
| 487 const int* const Y, int n) { |
345 int i; | 488 int i; |
346 float retval = 0.f; | 489 double retval = 0.; |
347 int sum = 0; | 490 int sumX = 0, sumXY = 0; |
348 for (i = 0; i < n; ++i) { | 491 for (i = 0; i < n; ++i) { |
349 if (array[i] != 0) { | 492 const int x = X[i]; |
350 sum += array[i]; | 493 const int xy = X[i] + Y[i]; |
351 retval -= VP8LFastSLog2(array[i]); | 494 if (x != 0) { |
| 495 sumX += x; |
| 496 retval -= VP8LFastSLog2(x); |
| 497 } |
| 498 if (xy != 0) { |
| 499 sumXY += xy; |
| 500 retval -= VP8LFastSLog2(xy); |
352 } | 501 } |
353 } | 502 } |
354 retval += VP8LFastSLog2(sum); | 503 retval += VP8LFastSLog2(sumX) + VP8LFastSLog2(sumXY); |
355 return retval; | 504 return (float)retval; |
356 } | 505 } |
357 | 506 |
358 static float PredictionCostSpatialHistogram(int accumulated[4][256], | 507 static float PredictionCostSpatialHistogram(int accumulated[4][256], |
359 int tile[4][256]) { | 508 int tile[4][256]) { |
360 int i; | 509 int i; |
361 int k; | |
362 int combo[256]; | |
363 double retval = 0; | 510 double retval = 0; |
364 for (i = 0; i < 4; ++i) { | 511 for (i = 0; i < 4; ++i) { |
365 const double exp_val = 0.94; | 512 const double kExpValue = 0.94; |
366 retval += PredictionCostSpatial(&tile[i][0], 1, exp_val); | 513 retval += PredictionCostSpatial(tile[i], 1, kExpValue); |
367 retval += ShannonEntropy(&tile[i][0], 256); | 514 retval += CombinedShannonEntropy(tile[i], accumulated[i], 256); |
368 for (k = 0; k < 256; ++k) { | |
369 combo[k] = accumulated[i][k] + tile[i][k]; | |
370 } | |
371 retval += ShannonEntropy(&combo[0], 256); | |
372 } | 515 } |
373 return (float)retval; | 516 return (float)retval; |
374 } | 517 } |
375 | 518 |
376 static int GetBestPredictorForTile(int width, int height, | 519 static int GetBestPredictorForTile(int width, int height, |
377 int tile_x, int tile_y, int bits, | 520 int tile_x, int tile_y, int bits, |
378 int accumulated[4][256], | 521 int accumulated[4][256], |
379 const uint32_t* const argb_scratch) { | 522 const uint32_t* const argb_scratch) { |
380 const int kNumPredModes = 14; | 523 const int kNumPredModes = 14; |
381 const int col_start = tile_x << bits; | 524 const int col_start = tile_x << bits; |
(...skipping 183 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
565 data += width; | 708 data += width; |
566 ++y; | 709 ++y; |
567 if ((y & mask) == 0) { // Use the same mask, since tiles are squares. | 710 if ((y & mask) == 0) { // Use the same mask, since tiles are squares. |
568 pred_mode_base += tiles_per_row; | 711 pred_mode_base += tiles_per_row; |
569 } | 712 } |
570 } | 713 } |
571 } | 714 } |
572 } | 715 } |
573 | 716 |
574 void VP8LSubtractGreenFromBlueAndRed(uint32_t* argb_data, int num_pixs) { | 717 void VP8LSubtractGreenFromBlueAndRed(uint32_t* argb_data, int num_pixs) { |
575 int i; | 718 int i = 0; |
576 for (i = 0; i < num_pixs; ++i) { | 719 #if defined(WEBP_TARGET_HAS_SSE2) |
| 720 const __m128i mask = _mm_set1_epi32(0x0000ff00); |
| 721 for (; i + 4 < num_pixs; i += 4) { |
| 722 const __m128i in = _mm_loadu_si128((__m128i*)&argb_data[i]); |
| 723 const __m128i in_00g0 = _mm_and_si128(in, mask); // 00g0|00g0|... |
| 724 const __m128i in_0g00 = _mm_slli_epi32(in_00g0, 8); // 0g00|0g00|... |
| 725 const __m128i in_000g = _mm_srli_epi32(in_00g0, 8); // 000g|000g|... |
| 726 const __m128i in_0g0g = _mm_or_si128(in_0g00, in_000g); |
| 727 const __m128i out = _mm_sub_epi8(in, in_0g0g); |
| 728 _mm_storeu_si128((__m128i*)&argb_data[i], out); |
| 729 } |
| 730 // fallthrough and finish off with plain-C |
| 731 #endif |
| 732 for (; i < num_pixs; ++i) { |
577 const uint32_t argb = argb_data[i]; | 733 const uint32_t argb = argb_data[i]; |
578 const uint32_t green = (argb >> 8) & 0xff; | 734 const uint32_t green = (argb >> 8) & 0xff; |
579 const uint32_t new_r = (((argb >> 16) & 0xff) - green) & 0xff; | 735 const uint32_t new_r = (((argb >> 16) & 0xff) - green) & 0xff; |
580 const uint32_t new_b = ((argb & 0xff) - green) & 0xff; | 736 const uint32_t new_b = ((argb & 0xff) - green) & 0xff; |
581 argb_data[i] = (argb & 0xff00ff00) | (new_r << 16) | new_b; | 737 argb_data[i] = (argb & 0xff00ff00) | (new_r << 16) | new_b; |
582 } | 738 } |
583 } | 739 } |
584 | 740 |
585 // Add green to blue and red channels (i.e. perform the inverse transform of | 741 // Add green to blue and red channels (i.e. perform the inverse transform of |
586 // 'subtract green'). | 742 // 'subtract green'). |
587 static void AddGreenToBlueAndRed(const VP8LTransform* const transform, | 743 static void AddGreenToBlueAndRed(const VP8LTransform* const transform, |
588 int y_start, int y_end, uint32_t* data) { | 744 int y_start, int y_end, uint32_t* data) { |
589 const int width = transform->xsize_; | 745 const int width = transform->xsize_; |
590 const uint32_t* const data_end = data + (y_end - y_start) * width; | 746 const uint32_t* const data_end = data + (y_end - y_start) * width; |
| 747 #if defined(WEBP_TARGET_HAS_SSE2) |
| 748 const __m128i mask = _mm_set1_epi32(0x0000ff00); |
| 749 for (; data + 4 < data_end; data += 4) { |
| 750 const __m128i in = _mm_loadu_si128((__m128i*)data); |
| 751 const __m128i in_00g0 = _mm_and_si128(in, mask); // 00g0|00g0|... |
| 752 const __m128i in_0g00 = _mm_slli_epi32(in_00g0, 8); // 0g00|0g00|... |
| 753 const __m128i in_000g = _mm_srli_epi32(in_00g0, 8); // 000g|000g|... |
| 754 const __m128i in_0g0g = _mm_or_si128(in_0g00, in_000g); |
| 755 const __m128i out = _mm_add_epi8(in, in_0g0g); |
| 756 _mm_storeu_si128((__m128i*)data, out); |
| 757 } |
| 758 // fallthrough and finish off with plain-C |
| 759 #endif |
591 while (data < data_end) { | 760 while (data < data_end) { |
592 const uint32_t argb = *data; | 761 const uint32_t argb = *data; |
593 // "* 0001001u" is equivalent to "(green << 16) + green)" | |
594 const uint32_t green = ((argb >> 8) & 0xff); | 762 const uint32_t green = ((argb >> 8) & 0xff); |
595 uint32_t red_blue = (argb & 0x00ff00ffu); | 763 uint32_t red_blue = (argb & 0x00ff00ffu); |
596 red_blue += (green << 16) | green; | 764 red_blue += (green << 16) | green; |
597 red_blue &= 0x00ff00ffu; | 765 red_blue &= 0x00ff00ffu; |
598 *data++ = (argb & 0xff00ff00u) | red_blue; | 766 *data++ = (argb & 0xff00ff00u) | red_blue; |
599 } | 767 } |
600 } | 768 } |
601 | 769 |
602 typedef struct { | 770 typedef struct { |
603 // Note: the members are uint8_t, so that any negative values are | 771 // Note: the members are uint8_t, so that any negative values are |
(...skipping 44 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
648 } else { | 816 } else { |
649 new_red -= ColorTransformDelta(m->green_to_red_, green); | 817 new_red -= ColorTransformDelta(m->green_to_red_, green); |
650 new_red &= 0xff; | 818 new_red &= 0xff; |
651 new_blue -= ColorTransformDelta(m->green_to_blue_, green); | 819 new_blue -= ColorTransformDelta(m->green_to_blue_, green); |
652 new_blue -= ColorTransformDelta(m->red_to_blue_, red); | 820 new_blue -= ColorTransformDelta(m->red_to_blue_, red); |
653 new_blue &= 0xff; | 821 new_blue &= 0xff; |
654 } | 822 } |
655 return (argb & 0xff00ff00u) | (new_red << 16) | (new_blue); | 823 return (argb & 0xff00ff00u) | (new_red << 16) | (new_blue); |
656 } | 824 } |
657 | 825 |
| 826 static WEBP_INLINE uint8_t TransformColorRed(uint8_t green_to_red, |
| 827 uint32_t argb) { |
| 828 const uint32_t green = argb >> 8; |
| 829 uint32_t new_red = argb >> 16; |
| 830 new_red -= ColorTransformDelta(green_to_red, green); |
| 831 return (new_red & 0xff); |
| 832 } |
| 833 |
| 834 static WEBP_INLINE uint8_t TransformColorBlue(uint8_t green_to_blue, |
| 835 uint8_t red_to_blue, |
| 836 uint32_t argb) { |
| 837 const uint32_t green = argb >> 8; |
| 838 const uint32_t red = argb >> 16; |
| 839 uint8_t new_blue = argb; |
| 840 new_blue -= ColorTransformDelta(green_to_blue, green); |
| 841 new_blue -= ColorTransformDelta(red_to_blue, red); |
| 842 return (new_blue & 0xff); |
| 843 } |
| 844 |
658 static WEBP_INLINE int SkipRepeatedPixels(const uint32_t* const argb, | 845 static WEBP_INLINE int SkipRepeatedPixels(const uint32_t* const argb, |
659 int ix, int xsize) { | 846 int ix, int xsize) { |
660 const uint32_t v = argb[ix]; | 847 const uint32_t v = argb[ix]; |
661 if (ix >= xsize + 3) { | 848 if (ix >= xsize + 3) { |
662 if (v == argb[ix - xsize] && | 849 if (v == argb[ix - xsize] && |
663 argb[ix - 1] == argb[ix - xsize - 1] && | 850 argb[ix - 1] == argb[ix - xsize - 1] && |
664 argb[ix - 2] == argb[ix - xsize - 2] && | 851 argb[ix - 2] == argb[ix - xsize - 2] && |
665 argb[ix - 3] == argb[ix - xsize - 3]) { | 852 argb[ix - 3] == argb[ix - xsize - 3]) { |
666 return 1; | 853 return 1; |
667 } | 854 } |
668 return v == argb[ix - 3] && v == argb[ix - 2] && v == argb[ix - 1]; | 855 return v == argb[ix - 3] && v == argb[ix - 2] && v == argb[ix - 1]; |
669 } else if (ix >= 3) { | 856 } else if (ix >= 3) { |
670 return v == argb[ix - 3] && v == argb[ix - 2] && v == argb[ix - 1]; | 857 return v == argb[ix - 3] && v == argb[ix - 2] && v == argb[ix - 1]; |
671 } | 858 } |
672 return 0; | 859 return 0; |
673 } | 860 } |
674 | 861 |
675 static float PredictionCostCrossColor(const int accumulated[256], | 862 static float PredictionCostCrossColor(const int accumulated[256], |
676 const int counts[256]) { | 863 const int counts[256]) { |
677 // Favor low entropy, locally and globally. | 864 // Favor low entropy, locally and globally. |
678 int i; | 865 // Favor small absolute values for PredictionCostSpatial |
679 int combo[256]; | 866 static const double kExpValue = 2.4; |
680 for (i = 0; i < 256; ++i) { | 867 return CombinedShannonEntropy(counts, accumulated, 256) + |
681 combo[i] = accumulated[i] + counts[i]; | 868 PredictionCostSpatial(counts, 3, kExpValue); |
682 } | |
683 return ShannonEntropy(combo, 256) + | |
684 ShannonEntropy(counts, 256) + | |
685 PredictionCostSpatial(counts, 3, 2.4); // Favor small absolute values. | |
686 } | 869 } |
687 | 870 |
688 static Multipliers GetBestColorTransformForTile( | 871 static Multipliers GetBestColorTransformForTile( |
689 int tile_x, int tile_y, int bits, | 872 int tile_x, int tile_y, int bits, |
690 Multipliers prevX, | 873 Multipliers prevX, |
691 Multipliers prevY, | 874 Multipliers prevY, |
692 int step, int xsize, int ysize, | 875 int step, int xsize, int ysize, |
693 int* accumulated_red_histo, | 876 int* accumulated_red_histo, |
694 int* accumulated_blue_histo, | 877 int* accumulated_blue_histo, |
695 const uint32_t* const argb) { | 878 const uint32_t* const argb) { |
696 float best_diff = MAX_DIFF_COST; | 879 float best_diff = MAX_DIFF_COST; |
697 float cur_diff; | 880 float cur_diff; |
698 const int halfstep = step / 2; | 881 const int halfstep = step / 2; |
699 const int max_tile_size = 1 << bits; | 882 const int max_tile_size = 1 << bits; |
700 const int tile_y_offset = tile_y * max_tile_size; | 883 const int tile_y_offset = tile_y * max_tile_size; |
701 const int tile_x_offset = tile_x * max_tile_size; | 884 const int tile_x_offset = tile_x * max_tile_size; |
702 int green_to_red; | 885 int green_to_red; |
703 int green_to_blue; | 886 int green_to_blue; |
704 int red_to_blue; | 887 int red_to_blue; |
705 int all_x_max = tile_x_offset + max_tile_size; | 888 int all_x_max = tile_x_offset + max_tile_size; |
706 int all_y_max = tile_y_offset + max_tile_size; | 889 int all_y_max = tile_y_offset + max_tile_size; |
707 Multipliers best_tx; | 890 Multipliers best_tx; |
708 MultipliersClear(&best_tx); | 891 MultipliersClear(&best_tx); |
709 if (all_x_max > xsize) { | 892 if (all_x_max > xsize) { |
710 all_x_max = xsize; | 893 all_x_max = xsize; |
711 } | 894 } |
712 if (all_y_max > ysize) { | 895 if (all_y_max > ysize) { |
713 all_y_max = ysize; | 896 all_y_max = ysize; |
714 } | 897 } |
| 898 |
715 for (green_to_red = -64; green_to_red <= 64; green_to_red += halfstep) { | 899 for (green_to_red = -64; green_to_red <= 64; green_to_red += halfstep) { |
716 int histo[256] = { 0 }; | 900 int histo[256] = { 0 }; |
717 int all_y; | 901 int all_y; |
718 Multipliers tx; | |
719 MultipliersClear(&tx); | |
720 tx.green_to_red_ = green_to_red & 0xff; | |
721 | 902 |
722 for (all_y = tile_y_offset; all_y < all_y_max; ++all_y) { | 903 for (all_y = tile_y_offset; all_y < all_y_max; ++all_y) { |
723 uint32_t predict; | |
724 int ix = all_y * xsize + tile_x_offset; | 904 int ix = all_y * xsize + tile_x_offset; |
725 int all_x; | 905 int all_x; |
726 for (all_x = tile_x_offset; all_x < all_x_max; ++all_x, ++ix) { | 906 for (all_x = tile_x_offset; all_x < all_x_max; ++all_x, ++ix) { |
727 if (SkipRepeatedPixels(argb, ix, xsize)) { | 907 if (SkipRepeatedPixels(argb, ix, xsize)) { |
728 continue; | 908 continue; |
729 } | 909 } |
730 predict = TransformColor(&tx, argb[ix], 0); | 910 ++histo[TransformColorRed(green_to_red, argb[ix])]; // red. |
731 ++histo[(predict >> 16) & 0xff]; // red. | |
732 } | 911 } |
733 } | 912 } |
734 cur_diff = PredictionCostCrossColor(&accumulated_red_histo[0], &histo[0]); | 913 cur_diff = PredictionCostCrossColor(&accumulated_red_histo[0], &histo[0]); |
735 if (tx.green_to_red_ == prevX.green_to_red_) { | 914 if ((uint8_t)green_to_red == prevX.green_to_red_) { |
736 cur_diff -= 3; // favor keeping the areas locally similar | 915 cur_diff -= 3; // favor keeping the areas locally similar |
737 } | 916 } |
738 if (tx.green_to_red_ == prevY.green_to_red_) { | 917 if ((uint8_t)green_to_red == prevY.green_to_red_) { |
739 cur_diff -= 3; // favor keeping the areas locally similar | 918 cur_diff -= 3; // favor keeping the areas locally similar |
740 } | 919 } |
741 if (tx.green_to_red_ == 0) { | 920 if (green_to_red == 0) { |
742 cur_diff -= 3; | 921 cur_diff -= 3; |
743 } | 922 } |
744 if (cur_diff < best_diff) { | 923 if (cur_diff < best_diff) { |
745 best_diff = cur_diff; | 924 best_diff = cur_diff; |
746 best_tx = tx; | 925 best_tx.green_to_red_ = green_to_red; |
747 } | 926 } |
748 } | 927 } |
749 best_diff = MAX_DIFF_COST; | 928 best_diff = MAX_DIFF_COST; |
750 green_to_red = best_tx.green_to_red_; | |
751 for (green_to_blue = -32; green_to_blue <= 32; green_to_blue += step) { | 929 for (green_to_blue = -32; green_to_blue <= 32; green_to_blue += step) { |
752 for (red_to_blue = -32; red_to_blue <= 32; red_to_blue += step) { | 930 for (red_to_blue = -32; red_to_blue <= 32; red_to_blue += step) { |
753 int all_y; | 931 int all_y; |
754 int histo[256] = { 0 }; | 932 int histo[256] = { 0 }; |
755 Multipliers tx; | |
756 tx.green_to_red_ = green_to_red; | |
757 tx.green_to_blue_ = green_to_blue; | |
758 tx.red_to_blue_ = red_to_blue; | |
759 for (all_y = tile_y_offset; all_y < all_y_max; ++all_y) { | 933 for (all_y = tile_y_offset; all_y < all_y_max; ++all_y) { |
760 uint32_t predict; | |
761 int all_x; | 934 int all_x; |
762 int ix = all_y * xsize + tile_x_offset; | 935 int ix = all_y * xsize + tile_x_offset; |
763 for (all_x = tile_x_offset; all_x < all_x_max; ++all_x, ++ix) { | 936 for (all_x = tile_x_offset; all_x < all_x_max; ++all_x, ++ix) { |
764 if (SkipRepeatedPixels(argb, ix, xsize)) { | 937 if (SkipRepeatedPixels(argb, ix, xsize)) { |
765 continue; | 938 continue; |
766 } | 939 } |
767 predict = TransformColor(&tx, argb[ix], 0); | 940 ++histo[TransformColorBlue(green_to_blue, red_to_blue, argb[ix])]; |
768 ++histo[predict & 0xff]; // blue. | |
769 } | 941 } |
770 } | 942 } |
771 cur_diff = | 943 cur_diff = |
772 PredictionCostCrossColor(&accumulated_blue_histo[0], &histo[0]); | 944 PredictionCostCrossColor(&accumulated_blue_histo[0], &histo[0]); |
773 if (tx.green_to_blue_ == prevX.green_to_blue_) { | 945 if ((uint8_t)green_to_blue == prevX.green_to_blue_) { |
774 cur_diff -= 3; // favor keeping the areas locally similar | 946 cur_diff -= 3; // favor keeping the areas locally similar |
775 } | 947 } |
776 if (tx.green_to_blue_ == prevY.green_to_blue_) { | 948 if ((uint8_t)green_to_blue == prevY.green_to_blue_) { |
777 cur_diff -= 3; // favor keeping the areas locally similar | 949 cur_diff -= 3; // favor keeping the areas locally similar |
778 } | 950 } |
779 if (tx.red_to_blue_ == prevX.red_to_blue_) { | 951 if ((uint8_t)red_to_blue == prevX.red_to_blue_) { |
780 cur_diff -= 3; // favor keeping the areas locally similar | 952 cur_diff -= 3; // favor keeping the areas locally similar |
781 } | 953 } |
782 if (tx.red_to_blue_ == prevY.red_to_blue_) { | 954 if ((uint8_t)red_to_blue == prevY.red_to_blue_) { |
783 cur_diff -= 3; // favor keeping the areas locally similar | 955 cur_diff -= 3; // favor keeping the areas locally similar |
784 } | 956 } |
785 if (tx.green_to_blue_ == 0) { | 957 if (green_to_blue == 0) { |
786 cur_diff -= 3; | 958 cur_diff -= 3; |
787 } | 959 } |
788 if (tx.red_to_blue_ == 0) { | 960 if (red_to_blue == 0) { |
789 cur_diff -= 3; | 961 cur_diff -= 3; |
790 } | 962 } |
791 if (cur_diff < best_diff) { | 963 if (cur_diff < best_diff) { |
792 best_diff = cur_diff; | 964 best_diff = cur_diff; |
793 best_tx = tx; | 965 best_tx.green_to_blue_ = green_to_blue; |
| 966 best_tx.red_to_blue_ = red_to_blue; |
794 } | 967 } |
795 } | 968 } |
796 } | 969 } |
797 return best_tx; | 970 return best_tx; |
798 } | 971 } |
799 | 972 |
800 static void CopyTileWithColorTransform(int xsize, int ysize, | 973 static void CopyTileWithColorTransform(int xsize, int ysize, |
801 int tile_x, int tile_y, int bits, | 974 int tile_x, int tile_y, int bits, |
802 Multipliers color_transform, | 975 Multipliers color_transform, |
803 uint32_t* const argb) { | 976 uint32_t* const argb) { |
(...skipping 124 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
928 const int width = transform->xsize_; | 1101 const int width = transform->xsize_; |
929 const uint32_t* const color_map = transform->data_; | 1102 const uint32_t* const color_map = transform->data_; |
930 if (bits_per_pixel < 8) { | 1103 if (bits_per_pixel < 8) { |
931 const int pixels_per_byte = 1 << transform->bits_; | 1104 const int pixels_per_byte = 1 << transform->bits_; |
932 const int count_mask = pixels_per_byte - 1; | 1105 const int count_mask = pixels_per_byte - 1; |
933 const uint32_t bit_mask = (1 << bits_per_pixel) - 1; | 1106 const uint32_t bit_mask = (1 << bits_per_pixel) - 1; |
934 for (y = y_start; y < y_end; ++y) { | 1107 for (y = y_start; y < y_end; ++y) { |
935 uint32_t packed_pixels = 0; | 1108 uint32_t packed_pixels = 0; |
936 int x; | 1109 int x; |
937 for (x = 0; x < width; ++x) { | 1110 for (x = 0; x < width; ++x) { |
938 // We need to load fresh 'packed_pixels' once every 'bytes_per_pixels' | 1111 // We need to load fresh 'packed_pixels' once every 'pixels_per_byte' |
939 // increments of x. Fortunately, pixels_per_byte is a power of 2, so | 1112 // increments of x. Fortunately, pixels_per_byte is a power of 2, so |
940 // can just use a mask for that, instead of decrementing a counter. | 1113 // can just use a mask for that, instead of decrementing a counter. |
941 if ((x & count_mask) == 0) packed_pixels = ((*src++) >> 8) & 0xff; | 1114 if ((x & count_mask) == 0) packed_pixels = ((*src++) >> 8) & 0xff; |
942 *dst++ = color_map[packed_pixels & bit_mask]; | 1115 *dst++ = color_map[packed_pixels & bit_mask]; |
943 packed_pixels >>= bits_per_pixel; | 1116 packed_pixels >>= bits_per_pixel; |
944 } | 1117 } |
945 } | 1118 } |
946 } else { | 1119 } else { |
947 for (y = y_start; y < y_end; ++y) { | 1120 for (y = y_start; y < y_end; ++y) { |
948 int x; | 1121 int x; |
(...skipping 20 matching lines...) Expand all Loading... |
969 // for the first row in next iteration. | 1142 // for the first row in next iteration. |
970 const int width = transform->xsize_; | 1143 const int width = transform->xsize_; |
971 memcpy(out - width, out + (row_end - row_start - 1) * width, | 1144 memcpy(out - width, out + (row_end - row_start - 1) * width, |
972 width * sizeof(*out)); | 1145 width * sizeof(*out)); |
973 } | 1146 } |
974 break; | 1147 break; |
975 case CROSS_COLOR_TRANSFORM: | 1148 case CROSS_COLOR_TRANSFORM: |
976 ColorSpaceInverseTransform(transform, row_start, row_end, out); | 1149 ColorSpaceInverseTransform(transform, row_start, row_end, out); |
977 break; | 1150 break; |
978 case COLOR_INDEXING_TRANSFORM: | 1151 case COLOR_INDEXING_TRANSFORM: |
979 ColorIndexInverseTransform(transform, row_start, row_end, in, out); | 1152 if (in == out && transform->bits_ > 0) { |
| 1153 // Move packed pixels to the end of unpacked region, so that unpacking |
| 1154 // can occur seamlessly. |
| 1155 // Also, note that this is the only transform that applies on |
| 1156 // the effective width of VP8LSubSampleSize(xsize_, bits_). All other |
| 1157 // transforms work on effective width of xsize_. |
| 1158 const int out_stride = (row_end - row_start) * transform->xsize_; |
| 1159 const int in_stride = (row_end - row_start) * |
| 1160 VP8LSubSampleSize(transform->xsize_, transform->bits_); |
| 1161 uint32_t* const src = out + out_stride - in_stride; |
| 1162 memmove(src, out, in_stride * sizeof(*src)); |
| 1163 ColorIndexInverseTransform(transform, row_start, row_end, src, out); |
| 1164 } else { |
| 1165 ColorIndexInverseTransform(transform, row_start, row_end, in, out); |
| 1166 } |
980 break; | 1167 break; |
981 } | 1168 } |
982 } | 1169 } |
983 | 1170 |
984 //------------------------------------------------------------------------------ | 1171 //------------------------------------------------------------------------------ |
985 // Color space conversion. | 1172 // Color space conversion. |
986 | 1173 |
987 static int is_big_endian(void) { | 1174 static int is_big_endian(void) { |
988 static const union { | 1175 static const union { |
989 uint16_t w; | 1176 uint16_t w; |
(...skipping 23 matching lines...) Expand all Loading... |
1013 *dst++ = (argb >> 0) & 0xff; | 1200 *dst++ = (argb >> 0) & 0xff; |
1014 *dst++ = (argb >> 24) & 0xff; | 1201 *dst++ = (argb >> 24) & 0xff; |
1015 } | 1202 } |
1016 } | 1203 } |
1017 | 1204 |
1018 static void ConvertBGRAToRGBA4444(const uint32_t* src, | 1205 static void ConvertBGRAToRGBA4444(const uint32_t* src, |
1019 int num_pixels, uint8_t* dst) { | 1206 int num_pixels, uint8_t* dst) { |
1020 const uint32_t* const src_end = src + num_pixels; | 1207 const uint32_t* const src_end = src + num_pixels; |
1021 while (src < src_end) { | 1208 while (src < src_end) { |
1022 const uint32_t argb = *src++; | 1209 const uint32_t argb = *src++; |
1023 *dst++ = ((argb >> 16) & 0xf0) | ((argb >> 12) & 0xf); | 1210 const uint8_t rg = ((argb >> 16) & 0xf0) | ((argb >> 12) & 0xf); |
1024 *dst++ = ((argb >> 0) & 0xf0) | ((argb >> 28) & 0xf); | 1211 const uint8_t ba = ((argb >> 0) & 0xf0) | ((argb >> 28) & 0xf); |
| 1212 #ifdef WEBP_SWAP_16BIT_CSP |
| 1213 *dst++ = ba; |
| 1214 *dst++ = rg; |
| 1215 #else |
| 1216 *dst++ = rg; |
| 1217 *dst++ = ba; |
| 1218 #endif |
1025 } | 1219 } |
1026 } | 1220 } |
1027 | 1221 |
1028 static void ConvertBGRAToRGB565(const uint32_t* src, | 1222 static void ConvertBGRAToRGB565(const uint32_t* src, |
1029 int num_pixels, uint8_t* dst) { | 1223 int num_pixels, uint8_t* dst) { |
1030 const uint32_t* const src_end = src + num_pixels; | 1224 const uint32_t* const src_end = src + num_pixels; |
1031 while (src < src_end) { | 1225 while (src < src_end) { |
1032 const uint32_t argb = *src++; | 1226 const uint32_t argb = *src++; |
1033 *dst++ = ((argb >> 16) & 0xf8) | ((argb >> 13) & 0x7); | 1227 const uint8_t rg = ((argb >> 16) & 0xf8) | ((argb >> 13) & 0x7); |
1034 *dst++ = ((argb >> 5) & 0xe0) | ((argb >> 3) & 0x1f); | 1228 const uint8_t gb = ((argb >> 5) & 0xe0) | ((argb >> 3) & 0x1f); |
| 1229 #ifdef WEBP_SWAP_16BIT_CSP |
| 1230 *dst++ = gb; |
| 1231 *dst++ = rg; |
| 1232 #else |
| 1233 *dst++ = rg; |
| 1234 *dst++ = gb; |
| 1235 #endif |
1035 } | 1236 } |
1036 } | 1237 } |
1037 | 1238 |
1038 static void ConvertBGRAToBGR(const uint32_t* src, | 1239 static void ConvertBGRAToBGR(const uint32_t* src, |
1039 int num_pixels, uint8_t* dst) { | 1240 int num_pixels, uint8_t* dst) { |
1040 const uint32_t* const src_end = src + num_pixels; | 1241 const uint32_t* const src_end = src + num_pixels; |
1041 while (src < src_end) { | 1242 while (src < src_end) { |
1042 const uint32_t argb = *src++; | 1243 const uint32_t argb = *src++; |
1043 *dst++ = (argb >> 0) & 0xff; | 1244 *dst++ = (argb >> 0) & 0xff; |
1044 *dst++ = (argb >> 8) & 0xff; | 1245 *dst++ = (argb >> 8) & 0xff; |
1045 *dst++ = (argb >> 16) & 0xff; | 1246 *dst++ = (argb >> 16) & 0xff; |
1046 } | 1247 } |
1047 } | 1248 } |
1048 | 1249 |
1049 static void CopyOrSwap(const uint32_t* src, int num_pixels, uint8_t* dst, | 1250 static void CopyOrSwap(const uint32_t* src, int num_pixels, uint8_t* dst, |
1050 int swap_on_big_endian) { | 1251 int swap_on_big_endian) { |
1051 if (is_big_endian() == swap_on_big_endian) { | 1252 if (is_big_endian() == swap_on_big_endian) { |
1052 const uint32_t* const src_end = src + num_pixels; | 1253 const uint32_t* const src_end = src + num_pixels; |
1053 while (src < src_end) { | 1254 while (src < src_end) { |
1054 uint32_t argb = *src++; | 1255 uint32_t argb = *src++; |
| 1256 |
| 1257 #if !defined(WEBP_REFERENCE_IMPLEMENTATION) |
1055 #if !defined(__BIG_ENDIAN__) && (defined(__i386__) || defined(__x86_64__)) | 1258 #if !defined(__BIG_ENDIAN__) && (defined(__i386__) || defined(__x86_64__)) |
1056 __asm__ volatile("bswap %0" : "=r"(argb) : "0"(argb)); | 1259 __asm__ volatile("bswap %0" : "=r"(argb) : "0"(argb)); |
1057 *(uint32_t*)dst = argb; | 1260 *(uint32_t*)dst = argb; |
1058 dst += sizeof(argb); | |
1059 #elif !defined(__BIG_ENDIAN__) && defined(_MSC_VER) | 1261 #elif !defined(__BIG_ENDIAN__) && defined(_MSC_VER) |
1060 argb = _byteswap_ulong(argb); | 1262 argb = _byteswap_ulong(argb); |
1061 *(uint32_t*)dst = argb; | 1263 *(uint32_t*)dst = argb; |
| 1264 #else |
| 1265 dst[0] = (argb >> 24) & 0xff; |
| 1266 dst[1] = (argb >> 16) & 0xff; |
| 1267 dst[2] = (argb >> 8) & 0xff; |
| 1268 dst[3] = (argb >> 0) & 0xff; |
| 1269 #endif |
| 1270 #else // WEBP_REFERENCE_IMPLEMENTATION |
| 1271 dst[0] = (argb >> 24) & 0xff; |
| 1272 dst[1] = (argb >> 16) & 0xff; |
| 1273 dst[2] = (argb >> 8) & 0xff; |
| 1274 dst[3] = (argb >> 0) & 0xff; |
| 1275 #endif |
1062 dst += sizeof(argb); | 1276 dst += sizeof(argb); |
1063 #else | |
1064 *dst++ = (argb >> 24) & 0xff; | |
1065 *dst++ = (argb >> 16) & 0xff; | |
1066 *dst++ = (argb >> 8) & 0xff; | |
1067 *dst++ = (argb >> 0) & 0xff; | |
1068 #endif | |
1069 } | 1277 } |
1070 } else { | 1278 } else { |
1071 memcpy(dst, src, num_pixels * sizeof(*src)); | 1279 memcpy(dst, src, num_pixels * sizeof(*src)); |
1072 } | 1280 } |
1073 } | 1281 } |
1074 | 1282 |
1075 void VP8LConvertFromBGRA(const uint32_t* const in_data, int num_pixels, | 1283 void VP8LConvertFromBGRA(const uint32_t* const in_data, int num_pixels, |
1076 WEBP_CSP_MODE out_colorspace, uint8_t* const rgba) { | 1284 WEBP_CSP_MODE out_colorspace, uint8_t* const rgba) { |
1077 switch (out_colorspace) { | 1285 switch (out_colorspace) { |
1078 case MODE_RGB: | 1286 case MODE_RGB: |
(...skipping 36 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
1115 default: | 1323 default: |
1116 assert(0); // Code flow should not reach here. | 1324 assert(0); // Code flow should not reach here. |
1117 } | 1325 } |
1118 } | 1326 } |
1119 | 1327 |
1120 //------------------------------------------------------------------------------ | 1328 //------------------------------------------------------------------------------ |
1121 | 1329 |
1122 #if defined(__cplusplus) || defined(c_plusplus) | 1330 #if defined(__cplusplus) || defined(c_plusplus) |
1123 } // extern "C" | 1331 } // extern "C" |
1124 #endif | 1332 #endif |
OLD | NEW |