Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(484)

Side by Side Diff: third_party/libwebp/dsp/lossless.c

Issue 12942006: libwebp: update snapshot to v0.3.0-rc6 (Closed) Base URL: svn://svn.chromium.org/chrome/trunk/src
Patch Set: local webkit layout expectations Created 7 years, 9 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch | Annotate | Revision Log
OLDNEW
1 // Copyright 2012 Google Inc. All Rights Reserved. 1 // Copyright 2012 Google Inc. All Rights Reserved.
2 // 2 //
3 // This code is licensed under the same terms as WebM: 3 // This code is licensed under the same terms as WebM:
4 // Software License Agreement: http://www.webmproject.org/license/software/ 4 // Software License Agreement: http://www.webmproject.org/license/software/
5 // Additional IP Rights Grant: http://www.webmproject.org/license/additional/ 5 // Additional IP Rights Grant: http://www.webmproject.org/license/additional/
6 // ----------------------------------------------------------------------------- 6 // -----------------------------------------------------------------------------
7 // 7 //
8 // Image transforms and color space conversion methods for lossless decoder. 8 // Image transforms and color space conversion methods for lossless decoder.
9 // 9 //
10 // Authors: Vikas Arora (vikaas.arora@gmail.com) 10 // Authors: Vikas Arora (vikaas.arora@gmail.com)
11 // Jyrki Alakuijala (jyrki@google.com) 11 // Jyrki Alakuijala (jyrki@google.com)
12 // Urvang Joshi (urvang@google.com) 12 // Urvang Joshi (urvang@google.com)
13 13
14 #include "./dsp.h"
15
16 // Define the following if target arch is sure to have SSE2
17 // #define WEBP_TARGET_HAS_SSE2
18
14 #if defined(__cplusplus) || defined(c_plusplus) 19 #if defined(__cplusplus) || defined(c_plusplus)
15 extern "C" { 20 extern "C" {
16 #endif 21 #endif
17 22
23 #if defined(WEBP_TARGET_HAS_SSE2)
24 #include <emmintrin.h>
25 #endif
26
18 #include <math.h> 27 #include <math.h>
19 #include <stdlib.h> 28 #include <stdlib.h>
20 #include "./lossless.h" 29 #include "./lossless.h"
21 #include "../dec/vp8li.h" 30 #include "../dec/vp8li.h"
22 #include "../dsp/yuv.h" 31 #include "./yuv.h"
23 #include "../dsp/dsp.h"
24 #include "../enc/histogram.h"
25 32
26 #define MAX_DIFF_COST (1e30f) 33 #define MAX_DIFF_COST (1e30f)
27 34
28 // lookup table for small values of log2(int) 35 // lookup table for small values of log2(int)
29 #define APPROX_LOG_MAX 4096 36 #define APPROX_LOG_MAX 4096
30 #define LOG_2_RECIPROCAL 1.44269504088896338700465094007086 37 #define LOG_2_RECIPROCAL 1.44269504088896338700465094007086
31 #define LOG_LOOKUP_IDX_MAX 256 38 const float kLog2Table[LOG_LOOKUP_IDX_MAX] = {
32 static const float kLog2Table[LOG_LOOKUP_IDX_MAX] = {
33 0.0000000000000000f, 0.0000000000000000f, 39 0.0000000000000000f, 0.0000000000000000f,
34 1.0000000000000000f, 1.5849625007211560f, 40 1.0000000000000000f, 1.5849625007211560f,
35 2.0000000000000000f, 2.3219280948873621f, 41 2.0000000000000000f, 2.3219280948873621f,
36 2.5849625007211560f, 2.8073549220576041f, 42 2.5849625007211560f, 2.8073549220576041f,
37 3.0000000000000000f, 3.1699250014423121f, 43 3.0000000000000000f, 3.1699250014423121f,
38 3.3219280948873621f, 3.4594316186372973f, 44 3.3219280948873621f, 3.4594316186372973f,
39 3.5849625007211560f, 3.7004397181410921f, 45 3.5849625007211560f, 3.7004397181410921f,
40 3.8073549220576041f, 3.9068905956085187f, 46 3.8073549220576041f, 3.9068905956085187f,
41 4.0000000000000000f, 4.0874628412503390f, 47 4.0000000000000000f, 4.0874628412503390f,
42 4.1699250014423121f, 4.2479275134435852f, 48 4.1699250014423121f, 4.2479275134435852f,
(...skipping 110 matching lines...) Expand 10 before | Expand all | Expand 10 after
153 7.9068905956085187f, 7.9128893362299619f, 159 7.9068905956085187f, 7.9128893362299619f,
154 7.9188632372745946f, 7.9248125036057812f, 160 7.9188632372745946f, 7.9248125036057812f,
155 7.9307373375628866f, 7.9366379390025709f, 161 7.9307373375628866f, 7.9366379390025709f,
156 7.9425145053392398f, 7.9483672315846778f, 162 7.9425145053392398f, 7.9483672315846778f,
157 7.9541963103868749f, 7.9600019320680805f, 163 7.9541963103868749f, 7.9600019320680805f,
158 7.9657842846620869f, 7.9715435539507719f, 164 7.9657842846620869f, 7.9715435539507719f,
159 7.9772799234999167f, 7.9829935746943103f, 165 7.9772799234999167f, 7.9829935746943103f,
160 7.9886846867721654f, 7.9943534368588577f 166 7.9886846867721654f, 7.9943534368588577f
161 }; 167 };
162 168
163 float VP8LFastLog2(int v) { 169 const float kSLog2Table[LOG_LOOKUP_IDX_MAX] = {
164 if (v < LOG_LOOKUP_IDX_MAX) { 170 0.00000000f, 0.00000000f, 2.00000000f, 4.75488750f,
165 return kLog2Table[v]; 171 8.00000000f, 11.60964047f, 15.50977500f, 19.65148445f,
166 } else if (v < APPROX_LOG_MAX) { 172 24.00000000f, 28.52932501f, 33.21928095f, 38.05374781f,
173 43.01955001f, 48.10571634f, 53.30296891f, 58.60335893f,
174 64.00000000f, 69.48686830f, 75.05865003f, 80.71062276f,
175 86.43856190f, 92.23866588f, 98.10749561f, 104.04192499f,
176 110.03910002f, 116.09640474f, 122.21143267f, 128.38196256f,
177 134.60593782f, 140.88144886f, 147.20671787f, 153.58008562f,
178 160.00000000f, 166.46500594f, 172.97373660f, 179.52490559f,
179 186.11730005f, 192.74977453f, 199.42124551f, 206.13068654f,
180 212.87712380f, 219.65963219f, 226.47733176f, 233.32938445f,
181 240.21499122f, 247.13338933f, 254.08384998f, 261.06567603f,
182 268.07820003f, 275.12078236f, 282.19280949f, 289.29369244f,
183 296.42286534f, 303.57978409f, 310.76392512f, 317.97478424f,
184 325.21187564f, 332.47473081f, 339.76289772f, 347.07593991f,
185 354.41343574f, 361.77497759f, 369.16017124f, 376.56863518f,
186 384.00000000f, 391.45390785f, 398.93001188f, 406.42797576f,
187 413.94747321f, 421.48818752f, 429.04981119f, 436.63204548f,
188 444.23460010f, 451.85719280f, 459.49954906f, 467.16140179f,
189 474.84249102f, 482.54256363f, 490.26137307f, 497.99867911f,
190 505.75424759f, 513.52785023f, 521.31926438f, 529.12827280f,
191 536.95466351f, 544.79822957f, 552.65876890f, 560.53608414f,
192 568.42998244f, 576.34027536f, 584.26677867f, 592.20931226f,
193 600.16769996f, 608.14176943f, 616.13135206f, 624.13628279f,
194 632.15640007f, 640.19154569f, 648.24156472f, 656.30630539f,
195 664.38561898f, 672.47935976f, 680.58738488f, 688.70955430f,
196 696.84573069f, 704.99577935f, 713.15956818f, 721.33696754f,
197 729.52785023f, 737.73209140f, 745.94956849f, 754.18016116f,
198 762.42375127f, 770.68022275f, 778.94946161f, 787.23135586f,
199 795.52579543f, 803.83267219f, 812.15187982f, 820.48331383f,
200 828.82687147f, 837.18245171f, 845.54995518f, 853.92928416f,
201 862.32034249f, 870.72303558f, 879.13727036f, 887.56295522f,
202 896.00000000f, 904.44831595f, 912.90781569f, 921.37841320f,
203 929.86002376f, 938.35256392f, 946.85595152f, 955.37010560f,
204 963.89494641f, 972.43039537f, 980.97637504f, 989.53280911f,
205 998.09962237f, 1006.67674069f, 1015.26409097f, 1023.86160116f,
206 1032.46920021f, 1041.08681805f, 1049.71438560f, 1058.35183469f,
207 1066.99909811f, 1075.65610955f, 1084.32280357f, 1092.99911564f,
208 1101.68498204f, 1110.38033993f, 1119.08512727f, 1127.79928282f,
209 1136.52274614f, 1145.25545758f, 1153.99735821f, 1162.74838989f,
210 1171.50849518f, 1180.27761738f, 1189.05570047f, 1197.84268914f,
211 1206.63852876f, 1215.44316535f, 1224.25654560f, 1233.07861684f,
212 1241.90932703f, 1250.74862473f, 1259.59645914f, 1268.45278005f,
213 1277.31753781f, 1286.19068338f, 1295.07216828f, 1303.96194457f,
214 1312.85996488f, 1321.76618236f, 1330.68055071f, 1339.60302413f,
215 1348.53355734f, 1357.47210556f, 1366.41862452f, 1375.37307041f,
216 1384.33539991f, 1393.30557020f, 1402.28353887f, 1411.26926400f,
217 1420.26270412f, 1429.26381818f, 1438.27256558f, 1447.28890615f,
218 1456.31280014f, 1465.34420819f, 1474.38309138f, 1483.42941118f,
219 1492.48312945f, 1501.54420843f, 1510.61261078f, 1519.68829949f,
220 1528.77123795f, 1537.86138993f, 1546.95871952f, 1556.06319119f,
221 1565.17476976f, 1574.29342040f, 1583.41910860f, 1592.55180020f,
222 1601.69146137f, 1610.83805860f, 1619.99155871f, 1629.15192882f,
223 1638.31913637f, 1647.49314911f, 1656.67393509f, 1665.86146266f,
224 1675.05570047f, 1684.25661744f, 1693.46418280f, 1702.67836605f,
225 1711.89913698f, 1721.12646563f, 1730.36032233f, 1739.60067768f,
226 1748.84750254f, 1758.10076802f, 1767.36044551f, 1776.62650662f,
227 1785.89892323f, 1795.17766747f, 1804.46271172f, 1813.75402857f,
228 1823.05159087f, 1832.35537170f, 1841.66534438f, 1850.98148244f,
229 1860.30375965f, 1869.63214999f, 1878.96662767f, 1888.30716711f,
230 1897.65374295f, 1907.00633003f, 1916.36490342f, 1925.72943838f,
231 1935.09991037f, 1944.47629506f, 1953.85856831f, 1963.24670620f,
232 1972.64068498f, 1982.04048108f, 1991.44607117f, 2000.85743204f,
233 2010.27454072f, 2019.69737440f, 2029.12591044f, 2038.56012640f
234 };
235
236 float VP8LFastSLog2Slow(int v) {
237 assert(v >= LOG_LOOKUP_IDX_MAX);
238 if (v < APPROX_LOG_MAX) {
239 int log_cnt = 0;
240 const float v_f = (float)v;
241 while (v >= LOG_LOOKUP_IDX_MAX) {
242 ++log_cnt;
243 v = v >> 1;
244 }
245 return v_f * (kLog2Table[v] + log_cnt);
246 } else {
247 return (float)(LOG_2_RECIPROCAL * v * log((double)v));
248 }
249 }
250
251 float VP8LFastLog2Slow(int v) {
252 assert(v >= LOG_LOOKUP_IDX_MAX);
253 if (v < APPROX_LOG_MAX) {
167 int log_cnt = 0; 254 int log_cnt = 0;
168 while (v >= LOG_LOOKUP_IDX_MAX) { 255 while (v >= LOG_LOOKUP_IDX_MAX) {
169 ++log_cnt; 256 ++log_cnt;
170 v = v >> 1; 257 v = v >> 1;
171 } 258 }
172 return kLog2Table[v] + (float)log_cnt; 259 return kLog2Table[v] + log_cnt;
173 } else { 260 } else {
174 return (float)(LOG_2_RECIPROCAL * log((double)v)); 261 return (float)(LOG_2_RECIPROCAL * log((double)v));
175 } 262 }
176 } 263 }
177 264
178 //------------------------------------------------------------------------------ 265 //------------------------------------------------------------------------------
179 // Image transforms. 266 // Image transforms.
180 267
181 // In-place sum of each component with mod 256. 268 // In-place sum of each component with mod 256.
182 static WEBP_INLINE void AddPixelsEq(uint32_t* a, uint32_t b) { 269 static WEBP_INLINE void AddPixelsEq(uint32_t* a, uint32_t b) {
183 const uint32_t alpha_and_green = (*a & 0xff00ff00u) + (b & 0xff00ff00u); 270 const uint32_t alpha_and_green = (*a & 0xff00ff00u) + (b & 0xff00ff00u);
184 const uint32_t red_and_blue = (*a & 0x00ff00ffu) + (b & 0x00ff00ffu); 271 const uint32_t red_and_blue = (*a & 0x00ff00ffu) + (b & 0x00ff00ffu);
185 *a = (alpha_and_green & 0xff00ff00u) | (red_and_blue & 0x00ff00ffu); 272 *a = (alpha_and_green & 0xff00ff00u) | (red_and_blue & 0x00ff00ffu);
186 } 273 }
187 274
188 static WEBP_INLINE uint32_t Average2(uint32_t a0, uint32_t a1) { 275 static WEBP_INLINE uint32_t Average2(uint32_t a0, uint32_t a1) {
189 return (((a0 ^ a1) & 0xfefefefeL) >> 1) + (a0 & a1); 276 return (((a0 ^ a1) & 0xfefefefeL) >> 1) + (a0 & a1);
190 } 277 }
191 278
192 static WEBP_INLINE uint32_t Average3(uint32_t a0, uint32_t a1, uint32_t a2) { 279 static WEBP_INLINE uint32_t Average3(uint32_t a0, uint32_t a1, uint32_t a2) {
193 return Average2(Average2(a0, a2), a1); 280 return Average2(Average2(a0, a2), a1);
194 } 281 }
195 282
196 static WEBP_INLINE uint32_t Average4(uint32_t a0, uint32_t a1, 283 static WEBP_INLINE uint32_t Average4(uint32_t a0, uint32_t a1,
197 uint32_t a2, uint32_t a3) { 284 uint32_t a2, uint32_t a3) {
198 return Average2(Average2(a0, a1), Average2(a2, a3)); 285 return Average2(Average2(a0, a1), Average2(a2, a3));
199 } 286 }
200 287
288 #if defined(WEBP_TARGET_HAS_SSE2)
289 static WEBP_INLINE uint32_t ClampedAddSubtractFull(uint32_t c0, uint32_t c1,
290 uint32_t c2) {
291 const __m128i zero = _mm_setzero_si128();
292 const __m128i C0 = _mm_unpacklo_epi8(_mm_cvtsi32_si128(c0), zero);
293 const __m128i C1 = _mm_unpacklo_epi8(_mm_cvtsi32_si128(c1), zero);
294 const __m128i C2 = _mm_unpacklo_epi8(_mm_cvtsi32_si128(c2), zero);
295 const __m128i V1 = _mm_add_epi16(C0, C1);
296 const __m128i V2 = _mm_sub_epi16(V1, C2);
297 const __m128i b = _mm_packus_epi16(V2, V2);
298 const uint32_t output = _mm_cvtsi128_si32(b);
299 return output;
300 }
301
302 static WEBP_INLINE uint32_t ClampedAddSubtractHalf(uint32_t c0, uint32_t c1,
303 uint32_t c2) {
304 const uint32_t ave = Average2(c0, c1);
305 const __m128i zero = _mm_setzero_si128();
306 const __m128i A0 = _mm_unpacklo_epi8(_mm_cvtsi32_si128(ave), zero);
307 const __m128i B0 = _mm_unpacklo_epi8(_mm_cvtsi32_si128(c2), zero);
308 const __m128i A1 = _mm_sub_epi16(A0, B0);
309 const __m128i BgtA = _mm_cmpgt_epi16(B0, A0);
310 const __m128i A2 = _mm_sub_epi16(A1, BgtA);
311 const __m128i A3 = _mm_srai_epi16(A2, 1);
312 const __m128i A4 = _mm_add_epi16(A0, A3);
313 const __m128i A5 = _mm_packus_epi16(A4, A4);
314 const uint32_t output = _mm_cvtsi128_si32(A5);
315 return output;
316 }
317
318 static WEBP_INLINE uint32_t Select(uint32_t a, uint32_t b, uint32_t c) {
319 int pa_minus_pb;
320 const __m128i zero = _mm_setzero_si128();
321 const __m128i A0 = _mm_cvtsi32_si128(a);
322 const __m128i B0 = _mm_cvtsi32_si128(b);
323 const __m128i C0 = _mm_cvtsi32_si128(c);
324 const __m128i AC0 = _mm_subs_epu8(A0, C0);
325 const __m128i CA0 = _mm_subs_epu8(C0, A0);
326 const __m128i BC0 = _mm_subs_epu8(B0, C0);
327 const __m128i CB0 = _mm_subs_epu8(C0, B0);
328 const __m128i AC = _mm_or_si128(AC0, CA0);
329 const __m128i BC = _mm_or_si128(BC0, CB0);
330 const __m128i pa = _mm_unpacklo_epi8(AC, zero); // |a - c|
331 const __m128i pb = _mm_unpacklo_epi8(BC, zero); // |b - c|
332 const __m128i diff = _mm_sub_epi16(pb, pa);
333 {
334 int16_t out[8];
335 _mm_storeu_si128((__m128i*)out, diff);
336 pa_minus_pb = out[0] + out[1] + out[2] + out[3];
337 }
338 return (pa_minus_pb <= 0) ? a : b;
339 }
340
341 #else
342
201 static WEBP_INLINE uint32_t Clip255(uint32_t a) { 343 static WEBP_INLINE uint32_t Clip255(uint32_t a) {
202 if (a < 256) { 344 if (a < 256) {
203 return a; 345 return a;
204 } 346 }
205 // return 0, when a is a negative integer. 347 // return 0, when a is a negative integer.
206 // return 255, when a is positive. 348 // return 255, when a is positive.
207 return ~a >> 24; 349 return ~a >> 24;
208 } 350 }
209 351
210 static WEBP_INLINE int AddSubtractComponentFull(int a, int b, int c) { 352 static WEBP_INLINE int AddSubtractComponentFull(int a, int b, int c) {
(...skipping 21 matching lines...) Expand all
232 uint32_t c2) { 374 uint32_t c2) {
233 const uint32_t ave = Average2(c0, c1); 375 const uint32_t ave = Average2(c0, c1);
234 const int a = AddSubtractComponentHalf(ave >> 24, c2 >> 24); 376 const int a = AddSubtractComponentHalf(ave >> 24, c2 >> 24);
235 const int r = AddSubtractComponentHalf((ave >> 16) & 0xff, (c2 >> 16) & 0xff); 377 const int r = AddSubtractComponentHalf((ave >> 16) & 0xff, (c2 >> 16) & 0xff);
236 const int g = AddSubtractComponentHalf((ave >> 8) & 0xff, (c2 >> 8) & 0xff); 378 const int g = AddSubtractComponentHalf((ave >> 8) & 0xff, (c2 >> 8) & 0xff);
237 const int b = AddSubtractComponentHalf((ave >> 0) & 0xff, (c2 >> 0) & 0xff); 379 const int b = AddSubtractComponentHalf((ave >> 0) & 0xff, (c2 >> 0) & 0xff);
238 return (a << 24) | (r << 16) | (g << 8) | b; 380 return (a << 24) | (r << 16) | (g << 8) | b;
239 } 381 }
240 382
241 static WEBP_INLINE int Sub3(int a, int b, int c) { 383 static WEBP_INLINE int Sub3(int a, int b, int c) {
242 const int pa = b - c; 384 const int pb = b - c;
243 const int pb = a - c; 385 const int pa = a - c;
244 return abs(pa) - abs(pb); 386 return abs(pb) - abs(pa);
245 } 387 }
246 388
247 static WEBP_INLINE uint32_t Select(uint32_t a, uint32_t b, uint32_t c) { 389 static WEBP_INLINE uint32_t Select(uint32_t a, uint32_t b, uint32_t c) {
248 const int pa_minus_pb = 390 const int pa_minus_pb =
249 Sub3((a >> 24) , (b >> 24) , (c >> 24) ) + 391 Sub3((a >> 24) , (b >> 24) , (c >> 24) ) +
250 Sub3((a >> 16) & 0xff, (b >> 16) & 0xff, (c >> 16) & 0xff) + 392 Sub3((a >> 16) & 0xff, (b >> 16) & 0xff, (c >> 16) & 0xff) +
251 Sub3((a >> 8) & 0xff, (b >> 8) & 0xff, (c >> 8) & 0xff) + 393 Sub3((a >> 8) & 0xff, (b >> 8) & 0xff, (c >> 8) & 0xff) +
252 Sub3((a ) & 0xff, (b ) & 0xff, (c ) & 0xff); 394 Sub3((a ) & 0xff, (b ) & 0xff, (c ) & 0xff);
253
254 return (pa_minus_pb <= 0) ? a : b; 395 return (pa_minus_pb <= 0) ? a : b;
255 } 396 }
397 #endif
256 398
257 //------------------------------------------------------------------------------ 399 //------------------------------------------------------------------------------
258 // Predictors 400 // Predictors
259 401
260 static uint32_t Predictor0(uint32_t left, const uint32_t* const top) { 402 static uint32_t Predictor0(uint32_t left, const uint32_t* const top) {
261 (void)top; 403 (void)top;
262 (void)left; 404 (void)left;
263 return ARGB_BLACK; 405 return ARGB_BLACK;
264 } 406 }
265 static uint32_t Predictor1(uint32_t left, const uint32_t* const top) { 407 static uint32_t Predictor1(uint32_t left, const uint32_t* const top) {
(...skipping 67 matching lines...) Expand 10 before | Expand all | Expand 10 after
333 const double exp_decay_factor = 0.6; 475 const double exp_decay_factor = 0.6;
334 double bits = weight_0 * counts[0]; 476 double bits = weight_0 * counts[0];
335 int i; 477 int i;
336 for (i = 1; i < significant_symbols; ++i) { 478 for (i = 1; i < significant_symbols; ++i) {
337 bits += exp_val * (counts[i] + counts[256 - i]); 479 bits += exp_val * (counts[i] + counts[256 - i]);
338 exp_val *= exp_decay_factor; 480 exp_val *= exp_decay_factor;
339 } 481 }
340 return (float)(-0.1 * bits); 482 return (float)(-0.1 * bits);
341 } 483 }
342 484
343 // Compute the Shanon's entropy: Sum(p*log2(p)) 485 // Compute the combined Shanon's entropy for distribution {X} and {X+Y}
344 static float ShannonEntropy(const int* const array, int n) { 486 static float CombinedShannonEntropy(const int* const X,
487 const int* const Y, int n) {
345 int i; 488 int i;
346 float retval = 0.f; 489 double retval = 0.;
347 int sum = 0; 490 int sumX = 0, sumXY = 0;
348 for (i = 0; i < n; ++i) { 491 for (i = 0; i < n; ++i) {
349 if (array[i] != 0) { 492 const int x = X[i];
350 sum += array[i]; 493 const int xy = X[i] + Y[i];
351 retval -= VP8LFastSLog2(array[i]); 494 if (x != 0) {
495 sumX += x;
496 retval -= VP8LFastSLog2(x);
497 }
498 if (xy != 0) {
499 sumXY += xy;
500 retval -= VP8LFastSLog2(xy);
352 } 501 }
353 } 502 }
354 retval += VP8LFastSLog2(sum); 503 retval += VP8LFastSLog2(sumX) + VP8LFastSLog2(sumXY);
355 return retval; 504 return (float)retval;
356 } 505 }
357 506
358 static float PredictionCostSpatialHistogram(int accumulated[4][256], 507 static float PredictionCostSpatialHistogram(int accumulated[4][256],
359 int tile[4][256]) { 508 int tile[4][256]) {
360 int i; 509 int i;
361 int k;
362 int combo[256];
363 double retval = 0; 510 double retval = 0;
364 for (i = 0; i < 4; ++i) { 511 for (i = 0; i < 4; ++i) {
365 const double exp_val = 0.94; 512 const double kExpValue = 0.94;
366 retval += PredictionCostSpatial(&tile[i][0], 1, exp_val); 513 retval += PredictionCostSpatial(tile[i], 1, kExpValue);
367 retval += ShannonEntropy(&tile[i][0], 256); 514 retval += CombinedShannonEntropy(tile[i], accumulated[i], 256);
368 for (k = 0; k < 256; ++k) {
369 combo[k] = accumulated[i][k] + tile[i][k];
370 }
371 retval += ShannonEntropy(&combo[0], 256);
372 } 515 }
373 return (float)retval; 516 return (float)retval;
374 } 517 }
375 518
376 static int GetBestPredictorForTile(int width, int height, 519 static int GetBestPredictorForTile(int width, int height,
377 int tile_x, int tile_y, int bits, 520 int tile_x, int tile_y, int bits,
378 int accumulated[4][256], 521 int accumulated[4][256],
379 const uint32_t* const argb_scratch) { 522 const uint32_t* const argb_scratch) {
380 const int kNumPredModes = 14; 523 const int kNumPredModes = 14;
381 const int col_start = tile_x << bits; 524 const int col_start = tile_x << bits;
(...skipping 183 matching lines...) Expand 10 before | Expand all | Expand 10 after
565 data += width; 708 data += width;
566 ++y; 709 ++y;
567 if ((y & mask) == 0) { // Use the same mask, since tiles are squares. 710 if ((y & mask) == 0) { // Use the same mask, since tiles are squares.
568 pred_mode_base += tiles_per_row; 711 pred_mode_base += tiles_per_row;
569 } 712 }
570 } 713 }
571 } 714 }
572 } 715 }
573 716
574 void VP8LSubtractGreenFromBlueAndRed(uint32_t* argb_data, int num_pixs) { 717 void VP8LSubtractGreenFromBlueAndRed(uint32_t* argb_data, int num_pixs) {
575 int i; 718 int i = 0;
576 for (i = 0; i < num_pixs; ++i) { 719 #if defined(WEBP_TARGET_HAS_SSE2)
720 const __m128i mask = _mm_set1_epi32(0x0000ff00);
721 for (; i + 4 < num_pixs; i += 4) {
722 const __m128i in = _mm_loadu_si128((__m128i*)&argb_data[i]);
723 const __m128i in_00g0 = _mm_and_si128(in, mask); // 00g0|00g0|...
724 const __m128i in_0g00 = _mm_slli_epi32(in_00g0, 8); // 0g00|0g00|...
725 const __m128i in_000g = _mm_srli_epi32(in_00g0, 8); // 000g|000g|...
726 const __m128i in_0g0g = _mm_or_si128(in_0g00, in_000g);
727 const __m128i out = _mm_sub_epi8(in, in_0g0g);
728 _mm_storeu_si128((__m128i*)&argb_data[i], out);
729 }
730 // fallthrough and finish off with plain-C
731 #endif
732 for (; i < num_pixs; ++i) {
577 const uint32_t argb = argb_data[i]; 733 const uint32_t argb = argb_data[i];
578 const uint32_t green = (argb >> 8) & 0xff; 734 const uint32_t green = (argb >> 8) & 0xff;
579 const uint32_t new_r = (((argb >> 16) & 0xff) - green) & 0xff; 735 const uint32_t new_r = (((argb >> 16) & 0xff) - green) & 0xff;
580 const uint32_t new_b = ((argb & 0xff) - green) & 0xff; 736 const uint32_t new_b = ((argb & 0xff) - green) & 0xff;
581 argb_data[i] = (argb & 0xff00ff00) | (new_r << 16) | new_b; 737 argb_data[i] = (argb & 0xff00ff00) | (new_r << 16) | new_b;
582 } 738 }
583 } 739 }
584 740
585 // Add green to blue and red channels (i.e. perform the inverse transform of 741 // Add green to blue and red channels (i.e. perform the inverse transform of
586 // 'subtract green'). 742 // 'subtract green').
587 static void AddGreenToBlueAndRed(const VP8LTransform* const transform, 743 static void AddGreenToBlueAndRed(const VP8LTransform* const transform,
588 int y_start, int y_end, uint32_t* data) { 744 int y_start, int y_end, uint32_t* data) {
589 const int width = transform->xsize_; 745 const int width = transform->xsize_;
590 const uint32_t* const data_end = data + (y_end - y_start) * width; 746 const uint32_t* const data_end = data + (y_end - y_start) * width;
747 #if defined(WEBP_TARGET_HAS_SSE2)
748 const __m128i mask = _mm_set1_epi32(0x0000ff00);
749 for (; data + 4 < data_end; data += 4) {
750 const __m128i in = _mm_loadu_si128((__m128i*)data);
751 const __m128i in_00g0 = _mm_and_si128(in, mask); // 00g0|00g0|...
752 const __m128i in_0g00 = _mm_slli_epi32(in_00g0, 8); // 0g00|0g00|...
753 const __m128i in_000g = _mm_srli_epi32(in_00g0, 8); // 000g|000g|...
754 const __m128i in_0g0g = _mm_or_si128(in_0g00, in_000g);
755 const __m128i out = _mm_add_epi8(in, in_0g0g);
756 _mm_storeu_si128((__m128i*)data, out);
757 }
758 // fallthrough and finish off with plain-C
759 #endif
591 while (data < data_end) { 760 while (data < data_end) {
592 const uint32_t argb = *data; 761 const uint32_t argb = *data;
593 // "* 0001001u" is equivalent to "(green << 16) + green)"
594 const uint32_t green = ((argb >> 8) & 0xff); 762 const uint32_t green = ((argb >> 8) & 0xff);
595 uint32_t red_blue = (argb & 0x00ff00ffu); 763 uint32_t red_blue = (argb & 0x00ff00ffu);
596 red_blue += (green << 16) | green; 764 red_blue += (green << 16) | green;
597 red_blue &= 0x00ff00ffu; 765 red_blue &= 0x00ff00ffu;
598 *data++ = (argb & 0xff00ff00u) | red_blue; 766 *data++ = (argb & 0xff00ff00u) | red_blue;
599 } 767 }
600 } 768 }
601 769
602 typedef struct { 770 typedef struct {
603 // Note: the members are uint8_t, so that any negative values are 771 // Note: the members are uint8_t, so that any negative values are
(...skipping 44 matching lines...) Expand 10 before | Expand all | Expand 10 after
648 } else { 816 } else {
649 new_red -= ColorTransformDelta(m->green_to_red_, green); 817 new_red -= ColorTransformDelta(m->green_to_red_, green);
650 new_red &= 0xff; 818 new_red &= 0xff;
651 new_blue -= ColorTransformDelta(m->green_to_blue_, green); 819 new_blue -= ColorTransformDelta(m->green_to_blue_, green);
652 new_blue -= ColorTransformDelta(m->red_to_blue_, red); 820 new_blue -= ColorTransformDelta(m->red_to_blue_, red);
653 new_blue &= 0xff; 821 new_blue &= 0xff;
654 } 822 }
655 return (argb & 0xff00ff00u) | (new_red << 16) | (new_blue); 823 return (argb & 0xff00ff00u) | (new_red << 16) | (new_blue);
656 } 824 }
657 825
826 static WEBP_INLINE uint8_t TransformColorRed(uint8_t green_to_red,
827 uint32_t argb) {
828 const uint32_t green = argb >> 8;
829 uint32_t new_red = argb >> 16;
830 new_red -= ColorTransformDelta(green_to_red, green);
831 return (new_red & 0xff);
832 }
833
834 static WEBP_INLINE uint8_t TransformColorBlue(uint8_t green_to_blue,
835 uint8_t red_to_blue,
836 uint32_t argb) {
837 const uint32_t green = argb >> 8;
838 const uint32_t red = argb >> 16;
839 uint8_t new_blue = argb;
840 new_blue -= ColorTransformDelta(green_to_blue, green);
841 new_blue -= ColorTransformDelta(red_to_blue, red);
842 return (new_blue & 0xff);
843 }
844
658 static WEBP_INLINE int SkipRepeatedPixels(const uint32_t* const argb, 845 static WEBP_INLINE int SkipRepeatedPixels(const uint32_t* const argb,
659 int ix, int xsize) { 846 int ix, int xsize) {
660 const uint32_t v = argb[ix]; 847 const uint32_t v = argb[ix];
661 if (ix >= xsize + 3) { 848 if (ix >= xsize + 3) {
662 if (v == argb[ix - xsize] && 849 if (v == argb[ix - xsize] &&
663 argb[ix - 1] == argb[ix - xsize - 1] && 850 argb[ix - 1] == argb[ix - xsize - 1] &&
664 argb[ix - 2] == argb[ix - xsize - 2] && 851 argb[ix - 2] == argb[ix - xsize - 2] &&
665 argb[ix - 3] == argb[ix - xsize - 3]) { 852 argb[ix - 3] == argb[ix - xsize - 3]) {
666 return 1; 853 return 1;
667 } 854 }
668 return v == argb[ix - 3] && v == argb[ix - 2] && v == argb[ix - 1]; 855 return v == argb[ix - 3] && v == argb[ix - 2] && v == argb[ix - 1];
669 } else if (ix >= 3) { 856 } else if (ix >= 3) {
670 return v == argb[ix - 3] && v == argb[ix - 2] && v == argb[ix - 1]; 857 return v == argb[ix - 3] && v == argb[ix - 2] && v == argb[ix - 1];
671 } 858 }
672 return 0; 859 return 0;
673 } 860 }
674 861
675 static float PredictionCostCrossColor(const int accumulated[256], 862 static float PredictionCostCrossColor(const int accumulated[256],
676 const int counts[256]) { 863 const int counts[256]) {
677 // Favor low entropy, locally and globally. 864 // Favor low entropy, locally and globally.
678 int i; 865 // Favor small absolute values for PredictionCostSpatial
679 int combo[256]; 866 static const double kExpValue = 2.4;
680 for (i = 0; i < 256; ++i) { 867 return CombinedShannonEntropy(counts, accumulated, 256) +
681 combo[i] = accumulated[i] + counts[i]; 868 PredictionCostSpatial(counts, 3, kExpValue);
682 }
683 return ShannonEntropy(combo, 256) +
684 ShannonEntropy(counts, 256) +
685 PredictionCostSpatial(counts, 3, 2.4); // Favor small absolute values.
686 } 869 }
687 870
688 static Multipliers GetBestColorTransformForTile( 871 static Multipliers GetBestColorTransformForTile(
689 int tile_x, int tile_y, int bits, 872 int tile_x, int tile_y, int bits,
690 Multipliers prevX, 873 Multipliers prevX,
691 Multipliers prevY, 874 Multipliers prevY,
692 int step, int xsize, int ysize, 875 int step, int xsize, int ysize,
693 int* accumulated_red_histo, 876 int* accumulated_red_histo,
694 int* accumulated_blue_histo, 877 int* accumulated_blue_histo,
695 const uint32_t* const argb) { 878 const uint32_t* const argb) {
696 float best_diff = MAX_DIFF_COST; 879 float best_diff = MAX_DIFF_COST;
697 float cur_diff; 880 float cur_diff;
698 const int halfstep = step / 2; 881 const int halfstep = step / 2;
699 const int max_tile_size = 1 << bits; 882 const int max_tile_size = 1 << bits;
700 const int tile_y_offset = tile_y * max_tile_size; 883 const int tile_y_offset = tile_y * max_tile_size;
701 const int tile_x_offset = tile_x * max_tile_size; 884 const int tile_x_offset = tile_x * max_tile_size;
702 int green_to_red; 885 int green_to_red;
703 int green_to_blue; 886 int green_to_blue;
704 int red_to_blue; 887 int red_to_blue;
705 int all_x_max = tile_x_offset + max_tile_size; 888 int all_x_max = tile_x_offset + max_tile_size;
706 int all_y_max = tile_y_offset + max_tile_size; 889 int all_y_max = tile_y_offset + max_tile_size;
707 Multipliers best_tx; 890 Multipliers best_tx;
708 MultipliersClear(&best_tx); 891 MultipliersClear(&best_tx);
709 if (all_x_max > xsize) { 892 if (all_x_max > xsize) {
710 all_x_max = xsize; 893 all_x_max = xsize;
711 } 894 }
712 if (all_y_max > ysize) { 895 if (all_y_max > ysize) {
713 all_y_max = ysize; 896 all_y_max = ysize;
714 } 897 }
898
715 for (green_to_red = -64; green_to_red <= 64; green_to_red += halfstep) { 899 for (green_to_red = -64; green_to_red <= 64; green_to_red += halfstep) {
716 int histo[256] = { 0 }; 900 int histo[256] = { 0 };
717 int all_y; 901 int all_y;
718 Multipliers tx;
719 MultipliersClear(&tx);
720 tx.green_to_red_ = green_to_red & 0xff;
721 902
722 for (all_y = tile_y_offset; all_y < all_y_max; ++all_y) { 903 for (all_y = tile_y_offset; all_y < all_y_max; ++all_y) {
723 uint32_t predict;
724 int ix = all_y * xsize + tile_x_offset; 904 int ix = all_y * xsize + tile_x_offset;
725 int all_x; 905 int all_x;
726 for (all_x = tile_x_offset; all_x < all_x_max; ++all_x, ++ix) { 906 for (all_x = tile_x_offset; all_x < all_x_max; ++all_x, ++ix) {
727 if (SkipRepeatedPixels(argb, ix, xsize)) { 907 if (SkipRepeatedPixels(argb, ix, xsize)) {
728 continue; 908 continue;
729 } 909 }
730 predict = TransformColor(&tx, argb[ix], 0); 910 ++histo[TransformColorRed(green_to_red, argb[ix])]; // red.
731 ++histo[(predict >> 16) & 0xff]; // red.
732 } 911 }
733 } 912 }
734 cur_diff = PredictionCostCrossColor(&accumulated_red_histo[0], &histo[0]); 913 cur_diff = PredictionCostCrossColor(&accumulated_red_histo[0], &histo[0]);
735 if (tx.green_to_red_ == prevX.green_to_red_) { 914 if ((uint8_t)green_to_red == prevX.green_to_red_) {
736 cur_diff -= 3; // favor keeping the areas locally similar 915 cur_diff -= 3; // favor keeping the areas locally similar
737 } 916 }
738 if (tx.green_to_red_ == prevY.green_to_red_) { 917 if ((uint8_t)green_to_red == prevY.green_to_red_) {
739 cur_diff -= 3; // favor keeping the areas locally similar 918 cur_diff -= 3; // favor keeping the areas locally similar
740 } 919 }
741 if (tx.green_to_red_ == 0) { 920 if (green_to_red == 0) {
742 cur_diff -= 3; 921 cur_diff -= 3;
743 } 922 }
744 if (cur_diff < best_diff) { 923 if (cur_diff < best_diff) {
745 best_diff = cur_diff; 924 best_diff = cur_diff;
746 best_tx = tx; 925 best_tx.green_to_red_ = green_to_red;
747 } 926 }
748 } 927 }
749 best_diff = MAX_DIFF_COST; 928 best_diff = MAX_DIFF_COST;
750 green_to_red = best_tx.green_to_red_;
751 for (green_to_blue = -32; green_to_blue <= 32; green_to_blue += step) { 929 for (green_to_blue = -32; green_to_blue <= 32; green_to_blue += step) {
752 for (red_to_blue = -32; red_to_blue <= 32; red_to_blue += step) { 930 for (red_to_blue = -32; red_to_blue <= 32; red_to_blue += step) {
753 int all_y; 931 int all_y;
754 int histo[256] = { 0 }; 932 int histo[256] = { 0 };
755 Multipliers tx;
756 tx.green_to_red_ = green_to_red;
757 tx.green_to_blue_ = green_to_blue;
758 tx.red_to_blue_ = red_to_blue;
759 for (all_y = tile_y_offset; all_y < all_y_max; ++all_y) { 933 for (all_y = tile_y_offset; all_y < all_y_max; ++all_y) {
760 uint32_t predict;
761 int all_x; 934 int all_x;
762 int ix = all_y * xsize + tile_x_offset; 935 int ix = all_y * xsize + tile_x_offset;
763 for (all_x = tile_x_offset; all_x < all_x_max; ++all_x, ++ix) { 936 for (all_x = tile_x_offset; all_x < all_x_max; ++all_x, ++ix) {
764 if (SkipRepeatedPixels(argb, ix, xsize)) { 937 if (SkipRepeatedPixels(argb, ix, xsize)) {
765 continue; 938 continue;
766 } 939 }
767 predict = TransformColor(&tx, argb[ix], 0); 940 ++histo[TransformColorBlue(green_to_blue, red_to_blue, argb[ix])];
768 ++histo[predict & 0xff]; // blue.
769 } 941 }
770 } 942 }
771 cur_diff = 943 cur_diff =
772 PredictionCostCrossColor(&accumulated_blue_histo[0], &histo[0]); 944 PredictionCostCrossColor(&accumulated_blue_histo[0], &histo[0]);
773 if (tx.green_to_blue_ == prevX.green_to_blue_) { 945 if ((uint8_t)green_to_blue == prevX.green_to_blue_) {
774 cur_diff -= 3; // favor keeping the areas locally similar 946 cur_diff -= 3; // favor keeping the areas locally similar
775 } 947 }
776 if (tx.green_to_blue_ == prevY.green_to_blue_) { 948 if ((uint8_t)green_to_blue == prevY.green_to_blue_) {
777 cur_diff -= 3; // favor keeping the areas locally similar 949 cur_diff -= 3; // favor keeping the areas locally similar
778 } 950 }
779 if (tx.red_to_blue_ == prevX.red_to_blue_) { 951 if ((uint8_t)red_to_blue == prevX.red_to_blue_) {
780 cur_diff -= 3; // favor keeping the areas locally similar 952 cur_diff -= 3; // favor keeping the areas locally similar
781 } 953 }
782 if (tx.red_to_blue_ == prevY.red_to_blue_) { 954 if ((uint8_t)red_to_blue == prevY.red_to_blue_) {
783 cur_diff -= 3; // favor keeping the areas locally similar 955 cur_diff -= 3; // favor keeping the areas locally similar
784 } 956 }
785 if (tx.green_to_blue_ == 0) { 957 if (green_to_blue == 0) {
786 cur_diff -= 3; 958 cur_diff -= 3;
787 } 959 }
788 if (tx.red_to_blue_ == 0) { 960 if (red_to_blue == 0) {
789 cur_diff -= 3; 961 cur_diff -= 3;
790 } 962 }
791 if (cur_diff < best_diff) { 963 if (cur_diff < best_diff) {
792 best_diff = cur_diff; 964 best_diff = cur_diff;
793 best_tx = tx; 965 best_tx.green_to_blue_ = green_to_blue;
966 best_tx.red_to_blue_ = red_to_blue;
794 } 967 }
795 } 968 }
796 } 969 }
797 return best_tx; 970 return best_tx;
798 } 971 }
799 972
800 static void CopyTileWithColorTransform(int xsize, int ysize, 973 static void CopyTileWithColorTransform(int xsize, int ysize,
801 int tile_x, int tile_y, int bits, 974 int tile_x, int tile_y, int bits,
802 Multipliers color_transform, 975 Multipliers color_transform,
803 uint32_t* const argb) { 976 uint32_t* const argb) {
(...skipping 124 matching lines...) Expand 10 before | Expand all | Expand 10 after
928 const int width = transform->xsize_; 1101 const int width = transform->xsize_;
929 const uint32_t* const color_map = transform->data_; 1102 const uint32_t* const color_map = transform->data_;
930 if (bits_per_pixel < 8) { 1103 if (bits_per_pixel < 8) {
931 const int pixels_per_byte = 1 << transform->bits_; 1104 const int pixels_per_byte = 1 << transform->bits_;
932 const int count_mask = pixels_per_byte - 1; 1105 const int count_mask = pixels_per_byte - 1;
933 const uint32_t bit_mask = (1 << bits_per_pixel) - 1; 1106 const uint32_t bit_mask = (1 << bits_per_pixel) - 1;
934 for (y = y_start; y < y_end; ++y) { 1107 for (y = y_start; y < y_end; ++y) {
935 uint32_t packed_pixels = 0; 1108 uint32_t packed_pixels = 0;
936 int x; 1109 int x;
937 for (x = 0; x < width; ++x) { 1110 for (x = 0; x < width; ++x) {
938 // We need to load fresh 'packed_pixels' once every 'bytes_per_pixels' 1111 // We need to load fresh 'packed_pixels' once every 'pixels_per_byte'
939 // increments of x. Fortunately, pixels_per_byte is a power of 2, so 1112 // increments of x. Fortunately, pixels_per_byte is a power of 2, so
940 // can just use a mask for that, instead of decrementing a counter. 1113 // can just use a mask for that, instead of decrementing a counter.
941 if ((x & count_mask) == 0) packed_pixels = ((*src++) >> 8) & 0xff; 1114 if ((x & count_mask) == 0) packed_pixels = ((*src++) >> 8) & 0xff;
942 *dst++ = color_map[packed_pixels & bit_mask]; 1115 *dst++ = color_map[packed_pixels & bit_mask];
943 packed_pixels >>= bits_per_pixel; 1116 packed_pixels >>= bits_per_pixel;
944 } 1117 }
945 } 1118 }
946 } else { 1119 } else {
947 for (y = y_start; y < y_end; ++y) { 1120 for (y = y_start; y < y_end; ++y) {
948 int x; 1121 int x;
(...skipping 20 matching lines...) Expand all
969 // for the first row in next iteration. 1142 // for the first row in next iteration.
970 const int width = transform->xsize_; 1143 const int width = transform->xsize_;
971 memcpy(out - width, out + (row_end - row_start - 1) * width, 1144 memcpy(out - width, out + (row_end - row_start - 1) * width,
972 width * sizeof(*out)); 1145 width * sizeof(*out));
973 } 1146 }
974 break; 1147 break;
975 case CROSS_COLOR_TRANSFORM: 1148 case CROSS_COLOR_TRANSFORM:
976 ColorSpaceInverseTransform(transform, row_start, row_end, out); 1149 ColorSpaceInverseTransform(transform, row_start, row_end, out);
977 break; 1150 break;
978 case COLOR_INDEXING_TRANSFORM: 1151 case COLOR_INDEXING_TRANSFORM:
979 ColorIndexInverseTransform(transform, row_start, row_end, in, out); 1152 if (in == out && transform->bits_ > 0) {
1153 // Move packed pixels to the end of unpacked region, so that unpacking
1154 // can occur seamlessly.
1155 // Also, note that this is the only transform that applies on
1156 // the effective width of VP8LSubSampleSize(xsize_, bits_). All other
1157 // transforms work on effective width of xsize_.
1158 const int out_stride = (row_end - row_start) * transform->xsize_;
1159 const int in_stride = (row_end - row_start) *
1160 VP8LSubSampleSize(transform->xsize_, transform->bits_);
1161 uint32_t* const src = out + out_stride - in_stride;
1162 memmove(src, out, in_stride * sizeof(*src));
1163 ColorIndexInverseTransform(transform, row_start, row_end, src, out);
1164 } else {
1165 ColorIndexInverseTransform(transform, row_start, row_end, in, out);
1166 }
980 break; 1167 break;
981 } 1168 }
982 } 1169 }
983 1170
984 //------------------------------------------------------------------------------ 1171 //------------------------------------------------------------------------------
985 // Color space conversion. 1172 // Color space conversion.
986 1173
987 static int is_big_endian(void) { 1174 static int is_big_endian(void) {
988 static const union { 1175 static const union {
989 uint16_t w; 1176 uint16_t w;
(...skipping 23 matching lines...) Expand all
1013 *dst++ = (argb >> 0) & 0xff; 1200 *dst++ = (argb >> 0) & 0xff;
1014 *dst++ = (argb >> 24) & 0xff; 1201 *dst++ = (argb >> 24) & 0xff;
1015 } 1202 }
1016 } 1203 }
1017 1204
1018 static void ConvertBGRAToRGBA4444(const uint32_t* src, 1205 static void ConvertBGRAToRGBA4444(const uint32_t* src,
1019 int num_pixels, uint8_t* dst) { 1206 int num_pixels, uint8_t* dst) {
1020 const uint32_t* const src_end = src + num_pixels; 1207 const uint32_t* const src_end = src + num_pixels;
1021 while (src < src_end) { 1208 while (src < src_end) {
1022 const uint32_t argb = *src++; 1209 const uint32_t argb = *src++;
1023 *dst++ = ((argb >> 16) & 0xf0) | ((argb >> 12) & 0xf); 1210 const uint8_t rg = ((argb >> 16) & 0xf0) | ((argb >> 12) & 0xf);
1024 *dst++ = ((argb >> 0) & 0xf0) | ((argb >> 28) & 0xf); 1211 const uint8_t ba = ((argb >> 0) & 0xf0) | ((argb >> 28) & 0xf);
1212 #ifdef WEBP_SWAP_16BIT_CSP
1213 *dst++ = ba;
1214 *dst++ = rg;
1215 #else
1216 *dst++ = rg;
1217 *dst++ = ba;
1218 #endif
1025 } 1219 }
1026 } 1220 }
1027 1221
1028 static void ConvertBGRAToRGB565(const uint32_t* src, 1222 static void ConvertBGRAToRGB565(const uint32_t* src,
1029 int num_pixels, uint8_t* dst) { 1223 int num_pixels, uint8_t* dst) {
1030 const uint32_t* const src_end = src + num_pixels; 1224 const uint32_t* const src_end = src + num_pixels;
1031 while (src < src_end) { 1225 while (src < src_end) {
1032 const uint32_t argb = *src++; 1226 const uint32_t argb = *src++;
1033 *dst++ = ((argb >> 16) & 0xf8) | ((argb >> 13) & 0x7); 1227 const uint8_t rg = ((argb >> 16) & 0xf8) | ((argb >> 13) & 0x7);
1034 *dst++ = ((argb >> 5) & 0xe0) | ((argb >> 3) & 0x1f); 1228 const uint8_t gb = ((argb >> 5) & 0xe0) | ((argb >> 3) & 0x1f);
1229 #ifdef WEBP_SWAP_16BIT_CSP
1230 *dst++ = gb;
1231 *dst++ = rg;
1232 #else
1233 *dst++ = rg;
1234 *dst++ = gb;
1235 #endif
1035 } 1236 }
1036 } 1237 }
1037 1238
1038 static void ConvertBGRAToBGR(const uint32_t* src, 1239 static void ConvertBGRAToBGR(const uint32_t* src,
1039 int num_pixels, uint8_t* dst) { 1240 int num_pixels, uint8_t* dst) {
1040 const uint32_t* const src_end = src + num_pixels; 1241 const uint32_t* const src_end = src + num_pixels;
1041 while (src < src_end) { 1242 while (src < src_end) {
1042 const uint32_t argb = *src++; 1243 const uint32_t argb = *src++;
1043 *dst++ = (argb >> 0) & 0xff; 1244 *dst++ = (argb >> 0) & 0xff;
1044 *dst++ = (argb >> 8) & 0xff; 1245 *dst++ = (argb >> 8) & 0xff;
1045 *dst++ = (argb >> 16) & 0xff; 1246 *dst++ = (argb >> 16) & 0xff;
1046 } 1247 }
1047 } 1248 }
1048 1249
1049 static void CopyOrSwap(const uint32_t* src, int num_pixels, uint8_t* dst, 1250 static void CopyOrSwap(const uint32_t* src, int num_pixels, uint8_t* dst,
1050 int swap_on_big_endian) { 1251 int swap_on_big_endian) {
1051 if (is_big_endian() == swap_on_big_endian) { 1252 if (is_big_endian() == swap_on_big_endian) {
1052 const uint32_t* const src_end = src + num_pixels; 1253 const uint32_t* const src_end = src + num_pixels;
1053 while (src < src_end) { 1254 while (src < src_end) {
1054 uint32_t argb = *src++; 1255 uint32_t argb = *src++;
1256
1257 #if !defined(WEBP_REFERENCE_IMPLEMENTATION)
1055 #if !defined(__BIG_ENDIAN__) && (defined(__i386__) || defined(__x86_64__)) 1258 #if !defined(__BIG_ENDIAN__) && (defined(__i386__) || defined(__x86_64__))
1056 __asm__ volatile("bswap %0" : "=r"(argb) : "0"(argb)); 1259 __asm__ volatile("bswap %0" : "=r"(argb) : "0"(argb));
1057 *(uint32_t*)dst = argb; 1260 *(uint32_t*)dst = argb;
1058 dst += sizeof(argb);
1059 #elif !defined(__BIG_ENDIAN__) && defined(_MSC_VER) 1261 #elif !defined(__BIG_ENDIAN__) && defined(_MSC_VER)
1060 argb = _byteswap_ulong(argb); 1262 argb = _byteswap_ulong(argb);
1061 *(uint32_t*)dst = argb; 1263 *(uint32_t*)dst = argb;
1264 #else
1265 dst[0] = (argb >> 24) & 0xff;
1266 dst[1] = (argb >> 16) & 0xff;
1267 dst[2] = (argb >> 8) & 0xff;
1268 dst[3] = (argb >> 0) & 0xff;
1269 #endif
1270 #else // WEBP_REFERENCE_IMPLEMENTATION
1271 dst[0] = (argb >> 24) & 0xff;
1272 dst[1] = (argb >> 16) & 0xff;
1273 dst[2] = (argb >> 8) & 0xff;
1274 dst[3] = (argb >> 0) & 0xff;
1275 #endif
1062 dst += sizeof(argb); 1276 dst += sizeof(argb);
1063 #else
1064 *dst++ = (argb >> 24) & 0xff;
1065 *dst++ = (argb >> 16) & 0xff;
1066 *dst++ = (argb >> 8) & 0xff;
1067 *dst++ = (argb >> 0) & 0xff;
1068 #endif
1069 } 1277 }
1070 } else { 1278 } else {
1071 memcpy(dst, src, num_pixels * sizeof(*src)); 1279 memcpy(dst, src, num_pixels * sizeof(*src));
1072 } 1280 }
1073 } 1281 }
1074 1282
1075 void VP8LConvertFromBGRA(const uint32_t* const in_data, int num_pixels, 1283 void VP8LConvertFromBGRA(const uint32_t* const in_data, int num_pixels,
1076 WEBP_CSP_MODE out_colorspace, uint8_t* const rgba) { 1284 WEBP_CSP_MODE out_colorspace, uint8_t* const rgba) {
1077 switch (out_colorspace) { 1285 switch (out_colorspace) {
1078 case MODE_RGB: 1286 case MODE_RGB:
(...skipping 36 matching lines...) Expand 10 before | Expand all | Expand 10 after
1115 default: 1323 default:
1116 assert(0); // Code flow should not reach here. 1324 assert(0); // Code flow should not reach here.
1117 } 1325 }
1118 } 1326 }
1119 1327
1120 //------------------------------------------------------------------------------ 1328 //------------------------------------------------------------------------------
1121 1329
1122 #if defined(__cplusplus) || defined(c_plusplus) 1330 #if defined(__cplusplus) || defined(c_plusplus)
1123 } // extern "C" 1331 } // extern "C"
1124 #endif 1332 #endif
OLDNEW

Powered by Google App Engine
This is Rietveld 408576698