Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(73)

Side by Side Diff: third_party/libwebp/dsp/lossless.c

Issue 116213006: Update libwebp to 0.4.0 (Closed) Base URL: svn://svn.chromium.org/chrome/trunk/src
Patch Set: After Blink Roll Created 6 years, 11 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch | Annotate | Revision Log
« no previous file with comments | « third_party/libwebp/dsp/lossless.h ('k') | third_party/libwebp/dsp/upsampling.c » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 // Copyright 2012 Google Inc. All Rights Reserved. 1 // Copyright 2012 Google Inc. All Rights Reserved.
2 // 2 //
3 // Use of this source code is governed by a BSD-style license 3 // Use of this source code is governed by a BSD-style license
4 // that can be found in the COPYING file in the root of the source 4 // that can be found in the COPYING file in the root of the source
5 // tree. An additional intellectual property rights grant can be found 5 // tree. An additional intellectual property rights grant can be found
6 // in the file PATENTS. All contributing project authors may 6 // in the file PATENTS. All contributing project authors may
7 // be found in the AUTHORS file in the root of the source tree. 7 // be found in the AUTHORS file in the root of the source tree.
8 // ----------------------------------------------------------------------------- 8 // -----------------------------------------------------------------------------
9 // 9 //
10 // Image transforms and color space conversion methods for lossless decoder. 10 // Image transforms and color space conversion methods for lossless decoder.
11 // 11 //
12 // Authors: Vikas Arora (vikaas.arora@gmail.com) 12 // Authors: Vikas Arora (vikaas.arora@gmail.com)
13 // Jyrki Alakuijala (jyrki@google.com) 13 // Jyrki Alakuijala (jyrki@google.com)
14 // Urvang Joshi (urvang@google.com) 14 // Urvang Joshi (urvang@google.com)
15 15
16 #include "./dsp.h" 16 #include "./dsp.h"
17 17
18 // Define the following if target arch is sure to have SSE2 18 #if defined(WEBP_USE_SSE2)
19 // #define WEBP_TARGET_HAS_SSE2
20
21 #if defined(__cplusplus) || defined(c_plusplus)
22 extern "C" {
23 #endif
24
25 #if defined(WEBP_TARGET_HAS_SSE2)
26 #include <emmintrin.h> 19 #include <emmintrin.h>
27 #endif 20 #endif
28 21
29 #include <math.h> 22 #include <math.h>
30 #include <stdlib.h> 23 #include <stdlib.h>
31 #include "./lossless.h" 24 #include "./lossless.h"
32 #include "../dec/vp8li.h" 25 #include "../dec/vp8li.h"
33 #include "./yuv.h" 26 #include "./yuv.h"
34 27
35 #define MAX_DIFF_COST (1e30f) 28 #define MAX_DIFF_COST (1e30f)
(...skipping 192 matching lines...) Expand 10 before | Expand all | Expand 10 after
228 1748.84750254f, 1758.10076802f, 1767.36044551f, 1776.62650662f, 221 1748.84750254f, 1758.10076802f, 1767.36044551f, 1776.62650662f,
229 1785.89892323f, 1795.17766747f, 1804.46271172f, 1813.75402857f, 222 1785.89892323f, 1795.17766747f, 1804.46271172f, 1813.75402857f,
230 1823.05159087f, 1832.35537170f, 1841.66534438f, 1850.98148244f, 223 1823.05159087f, 1832.35537170f, 1841.66534438f, 1850.98148244f,
231 1860.30375965f, 1869.63214999f, 1878.96662767f, 1888.30716711f, 224 1860.30375965f, 1869.63214999f, 1878.96662767f, 1888.30716711f,
232 1897.65374295f, 1907.00633003f, 1916.36490342f, 1925.72943838f, 225 1897.65374295f, 1907.00633003f, 1916.36490342f, 1925.72943838f,
233 1935.09991037f, 1944.47629506f, 1953.85856831f, 1963.24670620f, 226 1935.09991037f, 1944.47629506f, 1953.85856831f, 1963.24670620f,
234 1972.64068498f, 1982.04048108f, 1991.44607117f, 2000.85743204f, 227 1972.64068498f, 1982.04048108f, 1991.44607117f, 2000.85743204f,
235 2010.27454072f, 2019.69737440f, 2029.12591044f, 2038.56012640f 228 2010.27454072f, 2019.69737440f, 2029.12591044f, 2038.56012640f
236 }; 229 };
237 230
231 const VP8LPrefixCode kPrefixEncodeCode[PREFIX_LOOKUP_IDX_MAX] = {
232 { 0, 0}, { 0, 0}, { 1, 0}, { 2, 0}, { 3, 0}, { 4, 1}, { 4, 1}, { 5, 1},
233 { 5, 1}, { 6, 2}, { 6, 2}, { 6, 2}, { 6, 2}, { 7, 2}, { 7, 2}, { 7, 2},
234 { 7, 2}, { 8, 3}, { 8, 3}, { 8, 3}, { 8, 3}, { 8, 3}, { 8, 3}, { 8, 3},
235 { 8, 3}, { 9, 3}, { 9, 3}, { 9, 3}, { 9, 3}, { 9, 3}, { 9, 3}, { 9, 3},
236 { 9, 3}, {10, 4}, {10, 4}, {10, 4}, {10, 4}, {10, 4}, {10, 4}, {10, 4},
237 {10, 4}, {10, 4}, {10, 4}, {10, 4}, {10, 4}, {10, 4}, {10, 4}, {10, 4},
238 {10, 4}, {11, 4}, {11, 4}, {11, 4}, {11, 4}, {11, 4}, {11, 4}, {11, 4},
239 {11, 4}, {11, 4}, {11, 4}, {11, 4}, {11, 4}, {11, 4}, {11, 4}, {11, 4},
240 {11, 4}, {12, 5}, {12, 5}, {12, 5}, {12, 5}, {12, 5}, {12, 5}, {12, 5},
241 {12, 5}, {12, 5}, {12, 5}, {12, 5}, {12, 5}, {12, 5}, {12, 5}, {12, 5},
242 {12, 5}, {12, 5}, {12, 5}, {12, 5}, {12, 5}, {12, 5}, {12, 5}, {12, 5},
243 {12, 5}, {12, 5}, {12, 5}, {12, 5}, {12, 5}, {12, 5}, {12, 5}, {12, 5},
244 {12, 5}, {13, 5}, {13, 5}, {13, 5}, {13, 5}, {13, 5}, {13, 5}, {13, 5},
245 {13, 5}, {13, 5}, {13, 5}, {13, 5}, {13, 5}, {13, 5}, {13, 5}, {13, 5},
246 {13, 5}, {13, 5}, {13, 5}, {13, 5}, {13, 5}, {13, 5}, {13, 5}, {13, 5},
247 {13, 5}, {13, 5}, {13, 5}, {13, 5}, {13, 5}, {13, 5}, {13, 5}, {13, 5},
248 {13, 5}, {14, 6}, {14, 6}, {14, 6}, {14, 6}, {14, 6}, {14, 6}, {14, 6},
249 {14, 6}, {14, 6}, {14, 6}, {14, 6}, {14, 6}, {14, 6}, {14, 6}, {14, 6},
250 {14, 6}, {14, 6}, {14, 6}, {14, 6}, {14, 6}, {14, 6}, {14, 6}, {14, 6},
251 {14, 6}, {14, 6}, {14, 6}, {14, 6}, {14, 6}, {14, 6}, {14, 6}, {14, 6},
252 {14, 6}, {14, 6}, {14, 6}, {14, 6}, {14, 6}, {14, 6}, {14, 6}, {14, 6},
253 {14, 6}, {14, 6}, {14, 6}, {14, 6}, {14, 6}, {14, 6}, {14, 6}, {14, 6},
254 {14, 6}, {14, 6}, {14, 6}, {14, 6}, {14, 6}, {14, 6}, {14, 6}, {14, 6},
255 {14, 6}, {14, 6}, {14, 6}, {14, 6}, {14, 6}, {14, 6}, {14, 6}, {14, 6},
256 {14, 6}, {15, 6}, {15, 6}, {15, 6}, {15, 6}, {15, 6}, {15, 6}, {15, 6},
257 {15, 6}, {15, 6}, {15, 6}, {15, 6}, {15, 6}, {15, 6}, {15, 6}, {15, 6},
258 {15, 6}, {15, 6}, {15, 6}, {15, 6}, {15, 6}, {15, 6}, {15, 6}, {15, 6},
259 {15, 6}, {15, 6}, {15, 6}, {15, 6}, {15, 6}, {15, 6}, {15, 6}, {15, 6},
260 {15, 6}, {15, 6}, {15, 6}, {15, 6}, {15, 6}, {15, 6}, {15, 6}, {15, 6},
261 {15, 6}, {15, 6}, {15, 6}, {15, 6}, {15, 6}, {15, 6}, {15, 6}, {15, 6},
262 {15, 6}, {15, 6}, {15, 6}, {15, 6}, {15, 6}, {15, 6}, {15, 6}, {15, 6},
263 {15, 6}, {15, 6}, {15, 6}, {15, 6}, {15, 6}, {15, 6}, {15, 6}, {15, 6},
264 {15, 6}, {16, 7}, {16, 7}, {16, 7}, {16, 7}, {16, 7}, {16, 7}, {16, 7},
265 {16, 7}, {16, 7}, {16, 7}, {16, 7}, {16, 7}, {16, 7}, {16, 7}, {16, 7},
266 {16, 7}, {16, 7}, {16, 7}, {16, 7}, {16, 7}, {16, 7}, {16, 7}, {16, 7},
267 {16, 7}, {16, 7}, {16, 7}, {16, 7}, {16, 7}, {16, 7}, {16, 7}, {16, 7},
268 {16, 7}, {16, 7}, {16, 7}, {16, 7}, {16, 7}, {16, 7}, {16, 7}, {16, 7},
269 {16, 7}, {16, 7}, {16, 7}, {16, 7}, {16, 7}, {16, 7}, {16, 7}, {16, 7},
270 {16, 7}, {16, 7}, {16, 7}, {16, 7}, {16, 7}, {16, 7}, {16, 7}, {16, 7},
271 {16, 7}, {16, 7}, {16, 7}, {16, 7}, {16, 7}, {16, 7}, {16, 7}, {16, 7},
272 {16, 7}, {16, 7}, {16, 7}, {16, 7}, {16, 7}, {16, 7}, {16, 7}, {16, 7},
273 {16, 7}, {16, 7}, {16, 7}, {16, 7}, {16, 7}, {16, 7}, {16, 7}, {16, 7},
274 {16, 7}, {16, 7}, {16, 7}, {16, 7}, {16, 7}, {16, 7}, {16, 7}, {16, 7},
275 {16, 7}, {16, 7}, {16, 7}, {16, 7}, {16, 7}, {16, 7}, {16, 7}, {16, 7},
276 {16, 7}, {16, 7}, {16, 7}, {16, 7}, {16, 7}, {16, 7}, {16, 7}, {16, 7},
277 {16, 7}, {16, 7}, {16, 7}, {16, 7}, {16, 7}, {16, 7}, {16, 7}, {16, 7},
278 {16, 7}, {16, 7}, {16, 7}, {16, 7}, {16, 7}, {16, 7}, {16, 7}, {16, 7},
279 {16, 7}, {16, 7}, {16, 7}, {16, 7}, {16, 7}, {16, 7}, {16, 7}, {16, 7},
280 {16, 7}, {17, 7}, {17, 7}, {17, 7}, {17, 7}, {17, 7}, {17, 7}, {17, 7},
281 {17, 7}, {17, 7}, {17, 7}, {17, 7}, {17, 7}, {17, 7}, {17, 7}, {17, 7},
282 {17, 7}, {17, 7}, {17, 7}, {17, 7}, {17, 7}, {17, 7}, {17, 7}, {17, 7},
283 {17, 7}, {17, 7}, {17, 7}, {17, 7}, {17, 7}, {17, 7}, {17, 7}, {17, 7},
284 {17, 7}, {17, 7}, {17, 7}, {17, 7}, {17, 7}, {17, 7}, {17, 7}, {17, 7},
285 {17, 7}, {17, 7}, {17, 7}, {17, 7}, {17, 7}, {17, 7}, {17, 7}, {17, 7},
286 {17, 7}, {17, 7}, {17, 7}, {17, 7}, {17, 7}, {17, 7}, {17, 7}, {17, 7},
287 {17, 7}, {17, 7}, {17, 7}, {17, 7}, {17, 7}, {17, 7}, {17, 7}, {17, 7},
288 {17, 7}, {17, 7}, {17, 7}, {17, 7}, {17, 7}, {17, 7}, {17, 7}, {17, 7},
289 {17, 7}, {17, 7}, {17, 7}, {17, 7}, {17, 7}, {17, 7}, {17, 7}, {17, 7},
290 {17, 7}, {17, 7}, {17, 7}, {17, 7}, {17, 7}, {17, 7}, {17, 7}, {17, 7},
291 {17, 7}, {17, 7}, {17, 7}, {17, 7}, {17, 7}, {17, 7}, {17, 7}, {17, 7},
292 {17, 7}, {17, 7}, {17, 7}, {17, 7}, {17, 7}, {17, 7}, {17, 7}, {17, 7},
293 {17, 7}, {17, 7}, {17, 7}, {17, 7}, {17, 7}, {17, 7}, {17, 7}, {17, 7},
294 {17, 7}, {17, 7}, {17, 7}, {17, 7}, {17, 7}, {17, 7}, {17, 7}, {17, 7},
295 {17, 7}, {17, 7}, {17, 7}, {17, 7}, {17, 7}, {17, 7}, {17, 7}, {17, 7},
296 };
297
298 const uint8_t kPrefixEncodeExtraBitsValue[PREFIX_LOOKUP_IDX_MAX] = {
299 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 1, 2, 3, 0, 1, 2, 3,
300 0, 1, 2, 3, 4, 5, 6, 7, 0, 1, 2, 3, 4, 5, 6, 7,
301 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
302 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
303 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
304 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,
305 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
306 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,
307 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
308 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,
309 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47,
310 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63,
311 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
312 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,
313 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47,
314 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63,
315 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
316 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,
317 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47,
318 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63,
319 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79,
320 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95,
321 96, 97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111,
322 112, 113, 114, 115, 116, 117, 118, 119, 120, 121, 122, 123, 124, 125, 126,
323 127,
324 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
325 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,
326 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47,
327 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63,
328 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79,
329 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95,
330 96, 97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111,
331 112, 113, 114, 115, 116, 117, 118, 119, 120, 121, 122, 123, 124, 125, 126
332 };
333
238 float VP8LFastSLog2Slow(int v) { 334 float VP8LFastSLog2Slow(int v) {
239 assert(v >= LOG_LOOKUP_IDX_MAX); 335 assert(v >= LOG_LOOKUP_IDX_MAX);
240 if (v < APPROX_LOG_MAX) { 336 if (v < APPROX_LOG_MAX) {
241 int log_cnt = 0; 337 int log_cnt = 0;
242 const float v_f = (float)v; 338 const float v_f = (float)v;
243 while (v >= LOG_LOOKUP_IDX_MAX) { 339 while (v >= LOG_LOOKUP_IDX_MAX) {
244 ++log_cnt; 340 ++log_cnt;
245 v = v >> 1; 341 v = v >> 1;
246 } 342 }
247 return v_f * (kLog2Table[v] + log_cnt); 343 return v_f * (kLog2Table[v] + log_cnt);
(...skipping 32 matching lines...) Expand 10 before | Expand all | Expand 10 after
280 376
281 static WEBP_INLINE uint32_t Average3(uint32_t a0, uint32_t a1, uint32_t a2) { 377 static WEBP_INLINE uint32_t Average3(uint32_t a0, uint32_t a1, uint32_t a2) {
282 return Average2(Average2(a0, a2), a1); 378 return Average2(Average2(a0, a2), a1);
283 } 379 }
284 380
285 static WEBP_INLINE uint32_t Average4(uint32_t a0, uint32_t a1, 381 static WEBP_INLINE uint32_t Average4(uint32_t a0, uint32_t a1,
286 uint32_t a2, uint32_t a3) { 382 uint32_t a2, uint32_t a3) {
287 return Average2(Average2(a0, a1), Average2(a2, a3)); 383 return Average2(Average2(a0, a1), Average2(a2, a3));
288 } 384 }
289 385
290 #if defined(WEBP_TARGET_HAS_SSE2)
291 static WEBP_INLINE uint32_t ClampedAddSubtractFull(uint32_t c0, uint32_t c1,
292 uint32_t c2) {
293 const __m128i zero = _mm_setzero_si128();
294 const __m128i C0 = _mm_unpacklo_epi8(_mm_cvtsi32_si128(c0), zero);
295 const __m128i C1 = _mm_unpacklo_epi8(_mm_cvtsi32_si128(c1), zero);
296 const __m128i C2 = _mm_unpacklo_epi8(_mm_cvtsi32_si128(c2), zero);
297 const __m128i V1 = _mm_add_epi16(C0, C1);
298 const __m128i V2 = _mm_sub_epi16(V1, C2);
299 const __m128i b = _mm_packus_epi16(V2, V2);
300 const uint32_t output = _mm_cvtsi128_si32(b);
301 return output;
302 }
303
304 static WEBP_INLINE uint32_t ClampedAddSubtractHalf(uint32_t c0, uint32_t c1,
305 uint32_t c2) {
306 const uint32_t ave = Average2(c0, c1);
307 const __m128i zero = _mm_setzero_si128();
308 const __m128i A0 = _mm_unpacklo_epi8(_mm_cvtsi32_si128(ave), zero);
309 const __m128i B0 = _mm_unpacklo_epi8(_mm_cvtsi32_si128(c2), zero);
310 const __m128i A1 = _mm_sub_epi16(A0, B0);
311 const __m128i BgtA = _mm_cmpgt_epi16(B0, A0);
312 const __m128i A2 = _mm_sub_epi16(A1, BgtA);
313 const __m128i A3 = _mm_srai_epi16(A2, 1);
314 const __m128i A4 = _mm_add_epi16(A0, A3);
315 const __m128i A5 = _mm_packus_epi16(A4, A4);
316 const uint32_t output = _mm_cvtsi128_si32(A5);
317 return output;
318 }
319
320 static WEBP_INLINE uint32_t Select(uint32_t a, uint32_t b, uint32_t c) {
321 int pa_minus_pb;
322 const __m128i zero = _mm_setzero_si128();
323 const __m128i A0 = _mm_cvtsi32_si128(a);
324 const __m128i B0 = _mm_cvtsi32_si128(b);
325 const __m128i C0 = _mm_cvtsi32_si128(c);
326 const __m128i AC0 = _mm_subs_epu8(A0, C0);
327 const __m128i CA0 = _mm_subs_epu8(C0, A0);
328 const __m128i BC0 = _mm_subs_epu8(B0, C0);
329 const __m128i CB0 = _mm_subs_epu8(C0, B0);
330 const __m128i AC = _mm_or_si128(AC0, CA0);
331 const __m128i BC = _mm_or_si128(BC0, CB0);
332 const __m128i pa = _mm_unpacklo_epi8(AC, zero); // |a - c|
333 const __m128i pb = _mm_unpacklo_epi8(BC, zero); // |b - c|
334 const __m128i diff = _mm_sub_epi16(pb, pa);
335 {
336 int16_t out[8];
337 _mm_storeu_si128((__m128i*)out, diff);
338 pa_minus_pb = out[0] + out[1] + out[2] + out[3];
339 }
340 return (pa_minus_pb <= 0) ? a : b;
341 }
342
343 #else
344
345 static WEBP_INLINE uint32_t Clip255(uint32_t a) { 386 static WEBP_INLINE uint32_t Clip255(uint32_t a) {
346 if (a < 256) { 387 if (a < 256) {
347 return a; 388 return a;
348 } 389 }
349 // return 0, when a is a negative integer. 390 // return 0, when a is a negative integer.
350 // return 255, when a is positive. 391 // return 255, when a is positive.
351 return ~a >> 24; 392 return ~a >> 24;
352 } 393 }
353 394
354 static WEBP_INLINE int AddSubtractComponentFull(int a, int b, int c) { 395 static WEBP_INLINE int AddSubtractComponentFull(int a, int b, int c) {
(...skipping 34 matching lines...) Expand 10 before | Expand all | Expand 10 after
389 } 430 }
390 431
391 static WEBP_INLINE uint32_t Select(uint32_t a, uint32_t b, uint32_t c) { 432 static WEBP_INLINE uint32_t Select(uint32_t a, uint32_t b, uint32_t c) {
392 const int pa_minus_pb = 433 const int pa_minus_pb =
393 Sub3((a >> 24) , (b >> 24) , (c >> 24) ) + 434 Sub3((a >> 24) , (b >> 24) , (c >> 24) ) +
394 Sub3((a >> 16) & 0xff, (b >> 16) & 0xff, (c >> 16) & 0xff) + 435 Sub3((a >> 16) & 0xff, (b >> 16) & 0xff, (c >> 16) & 0xff) +
395 Sub3((a >> 8) & 0xff, (b >> 8) & 0xff, (c >> 8) & 0xff) + 436 Sub3((a >> 8) & 0xff, (b >> 8) & 0xff, (c >> 8) & 0xff) +
396 Sub3((a ) & 0xff, (b ) & 0xff, (c ) & 0xff); 437 Sub3((a ) & 0xff, (b ) & 0xff, (c ) & 0xff);
397 return (pa_minus_pb <= 0) ? a : b; 438 return (pa_minus_pb <= 0) ? a : b;
398 } 439 }
399 #endif
400 440
401 //------------------------------------------------------------------------------ 441 //------------------------------------------------------------------------------
402 // Predictors 442 // Predictors
403 443
404 static uint32_t Predictor0(uint32_t left, const uint32_t* const top) { 444 static uint32_t Predictor0(uint32_t left, const uint32_t* const top) {
405 (void)top; 445 (void)top;
406 (void)left; 446 (void)left;
407 return ARGB_BLACK; 447 return ARGB_BLACK;
408 } 448 }
409 static uint32_t Predictor1(uint32_t left, const uint32_t* const top) { 449 static uint32_t Predictor1(uint32_t left, const uint32_t* const top) {
(...skipping 32 matching lines...) Expand 10 before | Expand all | Expand 10 after
442 static uint32_t Predictor9(uint32_t left, const uint32_t* const top) { 482 static uint32_t Predictor9(uint32_t left, const uint32_t* const top) {
443 const uint32_t pred = Average2(top[0], top[1]); 483 const uint32_t pred = Average2(top[0], top[1]);
444 (void)left; 484 (void)left;
445 return pred; 485 return pred;
446 } 486 }
447 static uint32_t Predictor10(uint32_t left, const uint32_t* const top) { 487 static uint32_t Predictor10(uint32_t left, const uint32_t* const top) {
448 const uint32_t pred = Average4(left, top[-1], top[0], top[1]); 488 const uint32_t pred = Average4(left, top[-1], top[0], top[1]);
449 return pred; 489 return pred;
450 } 490 }
451 static uint32_t Predictor11(uint32_t left, const uint32_t* const top) { 491 static uint32_t Predictor11(uint32_t left, const uint32_t* const top) {
452 const uint32_t pred = Select(top[0], left, top[-1]); 492 const uint32_t pred = VP8LSelect(top[0], left, top[-1]);
453 return pred; 493 return pred;
454 } 494 }
455 static uint32_t Predictor12(uint32_t left, const uint32_t* const top) { 495 static uint32_t Predictor12(uint32_t left, const uint32_t* const top) {
456 const uint32_t pred = ClampedAddSubtractFull(left, top[0], top[-1]); 496 const uint32_t pred = VP8LClampedAddSubtractFull(left, top[0], top[-1]);
457 return pred; 497 return pred;
458 } 498 }
459 static uint32_t Predictor13(uint32_t left, const uint32_t* const top) { 499 static uint32_t Predictor13(uint32_t left, const uint32_t* const top) {
460 const uint32_t pred = ClampedAddSubtractHalf(left, top[0], top[-1]); 500 const uint32_t pred = VP8LClampedAddSubtractHalf(left, top[0], top[-1]);
461 return pred; 501 return pred;
462 } 502 }
463 503
504 // TODO(vikasa): Export the predictor array, to allow SSE2 variants.
464 typedef uint32_t (*PredictorFunc)(uint32_t left, const uint32_t* const top); 505 typedef uint32_t (*PredictorFunc)(uint32_t left, const uint32_t* const top);
465 static const PredictorFunc kPredictors[16] = { 506 static const PredictorFunc kPredictors[16] = {
466 Predictor0, Predictor1, Predictor2, Predictor3, 507 Predictor0, Predictor1, Predictor2, Predictor3,
467 Predictor4, Predictor5, Predictor6, Predictor7, 508 Predictor4, Predictor5, Predictor6, Predictor7,
468 Predictor8, Predictor9, Predictor10, Predictor11, 509 Predictor8, Predictor9, Predictor10, Predictor11,
469 Predictor12, Predictor13, 510 Predictor12, Predictor13,
470 Predictor0, Predictor0 // <- padding security sentinels 511 Predictor0, Predictor0 // <- padding security sentinels
471 }; 512 };
472 513
473 // TODO(vikasa): Replace 256 etc with defines. 514 // TODO(vikasa): Replace 256 etc with defines.
(...skipping 235 matching lines...) Expand 10 before | Expand all | Expand 10 after
709 } 750 }
710 data += width; 751 data += width;
711 ++y; 752 ++y;
712 if ((y & mask) == 0) { // Use the same mask, since tiles are squares. 753 if ((y & mask) == 0) { // Use the same mask, since tiles are squares.
713 pred_mode_base += tiles_per_row; 754 pred_mode_base += tiles_per_row;
714 } 755 }
715 } 756 }
716 } 757 }
717 } 758 }
718 759
719 void VP8LSubtractGreenFromBlueAndRed(uint32_t* argb_data, int num_pixs) { 760 static void SubtractGreenFromBlueAndRed(uint32_t* argb_data, int num_pixs) {
720 int i = 0; 761 int i = 0;
721 #if defined(WEBP_TARGET_HAS_SSE2)
722 const __m128i mask = _mm_set1_epi32(0x0000ff00);
723 for (; i + 4 < num_pixs; i += 4) {
724 const __m128i in = _mm_loadu_si128((__m128i*)&argb_data[i]);
725 const __m128i in_00g0 = _mm_and_si128(in, mask); // 00g0|00g0|...
726 const __m128i in_0g00 = _mm_slli_epi32(in_00g0, 8); // 0g00|0g00|...
727 const __m128i in_000g = _mm_srli_epi32(in_00g0, 8); // 000g|000g|...
728 const __m128i in_0g0g = _mm_or_si128(in_0g00, in_000g);
729 const __m128i out = _mm_sub_epi8(in, in_0g0g);
730 _mm_storeu_si128((__m128i*)&argb_data[i], out);
731 }
732 // fallthrough and finish off with plain-C
733 #endif
734 for (; i < num_pixs; ++i) { 762 for (; i < num_pixs; ++i) {
735 const uint32_t argb = argb_data[i]; 763 const uint32_t argb = argb_data[i];
736 const uint32_t green = (argb >> 8) & 0xff; 764 const uint32_t green = (argb >> 8) & 0xff;
737 const uint32_t new_r = (((argb >> 16) & 0xff) - green) & 0xff; 765 const uint32_t new_r = (((argb >> 16) & 0xff) - green) & 0xff;
738 const uint32_t new_b = ((argb & 0xff) - green) & 0xff; 766 const uint32_t new_b = ((argb & 0xff) - green) & 0xff;
739 argb_data[i] = (argb & 0xff00ff00) | (new_r << 16) | new_b; 767 argb_data[i] = (argb & 0xff00ff00) | (new_r << 16) | new_b;
740 } 768 }
741 } 769 }
742 770
743 // Add green to blue and red channels (i.e. perform the inverse transform of 771 // Add green to blue and red channels (i.e. perform the inverse transform of
744 // 'subtract green'). 772 // 'subtract green').
745 static void AddGreenToBlueAndRed(const VP8LTransform* const transform, 773 static void AddGreenToBlueAndRed(uint32_t* data, const uint32_t* data_end) {
746 int y_start, int y_end, uint32_t* data) {
747 const int width = transform->xsize_;
748 const uint32_t* const data_end = data + (y_end - y_start) * width;
749 #if defined(WEBP_TARGET_HAS_SSE2)
750 const __m128i mask = _mm_set1_epi32(0x0000ff00);
751 for (; data + 4 < data_end; data += 4) {
752 const __m128i in = _mm_loadu_si128((__m128i*)data);
753 const __m128i in_00g0 = _mm_and_si128(in, mask); // 00g0|00g0|...
754 const __m128i in_0g00 = _mm_slli_epi32(in_00g0, 8); // 0g00|0g00|...
755 const __m128i in_000g = _mm_srli_epi32(in_00g0, 8); // 000g|000g|...
756 const __m128i in_0g0g = _mm_or_si128(in_0g00, in_000g);
757 const __m128i out = _mm_add_epi8(in, in_0g0g);
758 _mm_storeu_si128((__m128i*)data, out);
759 }
760 // fallthrough and finish off with plain-C
761 #endif
762 while (data < data_end) { 774 while (data < data_end) {
763 const uint32_t argb = *data; 775 const uint32_t argb = *data;
764 const uint32_t green = ((argb >> 8) & 0xff); 776 const uint32_t green = ((argb >> 8) & 0xff);
765 uint32_t red_blue = (argb & 0x00ff00ffu); 777 uint32_t red_blue = (argb & 0x00ff00ffu);
766 red_blue += (green << 16) | green; 778 red_blue += (green << 16) | green;
767 red_blue &= 0x00ff00ffu; 779 red_blue &= 0x00ff00ffu;
768 *data++ = (argb & 0xff00ff00u) | red_blue; 780 *data++ = (argb & 0xff00ff00u) | red_blue;
769 } 781 }
770 } 782 }
771 783
(...skipping 377 matching lines...) Expand 10 before | Expand all | Expand 10 after
1149 static COLOR_INDEX_INVERSE(ColorIndexInverseTransform, uint32_t, GetARGBIndex, 1161 static COLOR_INDEX_INVERSE(ColorIndexInverseTransform, uint32_t, GetARGBIndex,
1150 GetARGBValue) 1162 GetARGBValue)
1151 COLOR_INDEX_INVERSE(VP8LColorIndexInverseTransformAlpha, uint8_t, GetAlphaIndex, 1163 COLOR_INDEX_INVERSE(VP8LColorIndexInverseTransformAlpha, uint8_t, GetAlphaIndex,
1152 GetAlphaValue) 1164 GetAlphaValue)
1153 1165
1154 #undef COLOR_INDEX_INVERSE 1166 #undef COLOR_INDEX_INVERSE
1155 1167
1156 void VP8LInverseTransform(const VP8LTransform* const transform, 1168 void VP8LInverseTransform(const VP8LTransform* const transform,
1157 int row_start, int row_end, 1169 int row_start, int row_end,
1158 const uint32_t* const in, uint32_t* const out) { 1170 const uint32_t* const in, uint32_t* const out) {
1171 const int width = transform->xsize_;
1159 assert(row_start < row_end); 1172 assert(row_start < row_end);
1160 assert(row_end <= transform->ysize_); 1173 assert(row_end <= transform->ysize_);
1161 switch (transform->type_) { 1174 switch (transform->type_) {
1162 case SUBTRACT_GREEN: 1175 case SUBTRACT_GREEN:
1163 AddGreenToBlueAndRed(transform, row_start, row_end, out); 1176 VP8LAddGreenToBlueAndRed(out, out + (row_end - row_start) * width);
1164 break; 1177 break;
1165 case PREDICTOR_TRANSFORM: 1178 case PREDICTOR_TRANSFORM:
1166 PredictorInverseTransform(transform, row_start, row_end, out); 1179 PredictorInverseTransform(transform, row_start, row_end, out);
1167 if (row_end != transform->ysize_) { 1180 if (row_end != transform->ysize_) {
1168 // The last predicted row in this iteration will be the top-pred row 1181 // The last predicted row in this iteration will be the top-pred row
1169 // for the first row in next iteration. 1182 // for the first row in next iteration.
1170 const int width = transform->xsize_;
1171 memcpy(out - width, out + (row_end - row_start - 1) * width, 1183 memcpy(out - width, out + (row_end - row_start - 1) * width,
1172 width * sizeof(*out)); 1184 width * sizeof(*out));
1173 } 1185 }
1174 break; 1186 break;
1175 case CROSS_COLOR_TRANSFORM: 1187 case CROSS_COLOR_TRANSFORM:
1176 ColorSpaceInverseTransform(transform, row_start, row_end, out); 1188 ColorSpaceInverseTransform(transform, row_start, row_end, out);
1177 break; 1189 break;
1178 case COLOR_INDEXING_TRANSFORM: 1190 case COLOR_INDEXING_TRANSFORM:
1179 if (in == out && transform->bits_ > 0) { 1191 if (in == out && transform->bits_ > 0) {
1180 // Move packed pixels to the end of unpacked region, so that unpacking 1192 // Move packed pixels to the end of unpacked region, so that unpacking
1181 // can occur seamlessly. 1193 // can occur seamlessly.
1182 // Also, note that this is the only transform that applies on 1194 // Also, note that this is the only transform that applies on
1183 // the effective width of VP8LSubSampleSize(xsize_, bits_). All other 1195 // the effective width of VP8LSubSampleSize(xsize_, bits_). All other
1184 // transforms work on effective width of xsize_. 1196 // transforms work on effective width of xsize_.
1185 const int out_stride = (row_end - row_start) * transform->xsize_; 1197 const int out_stride = (row_end - row_start) * width;
1186 const int in_stride = (row_end - row_start) * 1198 const int in_stride = (row_end - row_start) *
1187 VP8LSubSampleSize(transform->xsize_, transform->bits_); 1199 VP8LSubSampleSize(transform->xsize_, transform->bits_);
1188 uint32_t* const src = out + out_stride - in_stride; 1200 uint32_t* const src = out + out_stride - in_stride;
1189 memmove(src, out, in_stride * sizeof(*src)); 1201 memmove(src, out, in_stride * sizeof(*src));
1190 ColorIndexInverseTransform(transform, row_start, row_end, src, out); 1202 ColorIndexInverseTransform(transform, row_start, row_end, src, out);
1191 } else { 1203 } else {
1192 ColorIndexInverseTransform(transform, row_start, row_end, in, out); 1204 ColorIndexInverseTransform(transform, row_start, row_end, in, out);
1193 } 1205 }
1194 break; 1206 break;
1195 } 1207 }
(...skipping 179 matching lines...) Expand 10 before | Expand all | Expand 10 after
1375 code |= row[x] << (8 + bit_depth * xsub); 1387 code |= row[x] << (8 + bit_depth * xsub);
1376 dst[x >> xbits] = code; 1388 dst[x >> xbits] = code;
1377 } 1389 }
1378 } else { 1390 } else {
1379 for (x = 0; x < width; ++x) dst[x] = 0xff000000 | (row[x] << 8); 1391 for (x = 0; x < width; ++x) dst[x] = 0xff000000 | (row[x] << 8);
1380 } 1392 }
1381 } 1393 }
1382 1394
1383 //------------------------------------------------------------------------------ 1395 //------------------------------------------------------------------------------
1384 1396
1385 #if defined(__cplusplus) || defined(c_plusplus) 1397 // TODO(vikasa): Move the SSE2 functions to lossless_dsp.c (new file), once
1386 } // extern "C" 1398 // color-space conversion methods (ConvertFromBGRA) are also updated for SSE2.
1399 #if defined(WEBP_USE_SSE2)
1400 static WEBP_INLINE uint32_t ClampedAddSubtractFullSSE2(uint32_t c0, uint32_t c1,
1401 uint32_t c2) {
1402 const __m128i zero = _mm_setzero_si128();
1403 const __m128i C0 = _mm_unpacklo_epi8(_mm_cvtsi32_si128(c0), zero);
1404 const __m128i C1 = _mm_unpacklo_epi8(_mm_cvtsi32_si128(c1), zero);
1405 const __m128i C2 = _mm_unpacklo_epi8(_mm_cvtsi32_si128(c2), zero);
1406 const __m128i V1 = _mm_add_epi16(C0, C1);
1407 const __m128i V2 = _mm_sub_epi16(V1, C2);
1408 const __m128i b = _mm_packus_epi16(V2, V2);
1409 const uint32_t output = _mm_cvtsi128_si32(b);
1410 return output;
1411 }
1412
1413 static WEBP_INLINE uint32_t ClampedAddSubtractHalfSSE2(uint32_t c0, uint32_t c1,
1414 uint32_t c2) {
1415 const uint32_t ave = Average2(c0, c1);
1416 const __m128i zero = _mm_setzero_si128();
1417 const __m128i A0 = _mm_unpacklo_epi8(_mm_cvtsi32_si128(ave), zero);
1418 const __m128i B0 = _mm_unpacklo_epi8(_mm_cvtsi32_si128(c2), zero);
1419 const __m128i A1 = _mm_sub_epi16(A0, B0);
1420 const __m128i BgtA = _mm_cmpgt_epi16(B0, A0);
1421 const __m128i A2 = _mm_sub_epi16(A1, BgtA);
1422 const __m128i A3 = _mm_srai_epi16(A2, 1);
1423 const __m128i A4 = _mm_add_epi16(A0, A3);
1424 const __m128i A5 = _mm_packus_epi16(A4, A4);
1425 const uint32_t output = _mm_cvtsi128_si32(A5);
1426 return output;
1427 }
1428
1429 static WEBP_INLINE uint32_t SelectSSE2(uint32_t a, uint32_t b, uint32_t c) {
1430 int pa_minus_pb;
1431 const __m128i zero = _mm_setzero_si128();
1432 const __m128i A0 = _mm_cvtsi32_si128(a);
1433 const __m128i B0 = _mm_cvtsi32_si128(b);
1434 const __m128i C0 = _mm_cvtsi32_si128(c);
1435 const __m128i AC0 = _mm_subs_epu8(A0, C0);
1436 const __m128i CA0 = _mm_subs_epu8(C0, A0);
1437 const __m128i BC0 = _mm_subs_epu8(B0, C0);
1438 const __m128i CB0 = _mm_subs_epu8(C0, B0);
1439 const __m128i AC = _mm_or_si128(AC0, CA0);
1440 const __m128i BC = _mm_or_si128(BC0, CB0);
1441 const __m128i pa = _mm_unpacklo_epi8(AC, zero); // |a - c|
1442 const __m128i pb = _mm_unpacklo_epi8(BC, zero); // |b - c|
1443 const __m128i diff = _mm_sub_epi16(pb, pa);
1444 {
1445 int16_t out[8];
1446 _mm_storeu_si128((__m128i*)out, diff);
1447 pa_minus_pb = out[0] + out[1] + out[2] + out[3];
1448 }
1449 return (pa_minus_pb <= 0) ? a : b;
1450 }
1451
1452 static void SubtractGreenFromBlueAndRedSSE2(uint32_t* argb_data, int num_pixs) {
1453 int i = 0;
1454 const __m128i mask = _mm_set1_epi32(0x0000ff00);
1455 for (; i + 4 < num_pixs; i += 4) {
1456 const __m128i in = _mm_loadu_si128((__m128i*)&argb_data[i]);
1457 const __m128i in_00g0 = _mm_and_si128(in, mask); // 00g0|00g0|...
1458 const __m128i in_0g00 = _mm_slli_epi32(in_00g0, 8); // 0g00|0g00|...
1459 const __m128i in_000g = _mm_srli_epi32(in_00g0, 8); // 000g|000g|...
1460 const __m128i in_0g0g = _mm_or_si128(in_0g00, in_000g);
1461 const __m128i out = _mm_sub_epi8(in, in_0g0g);
1462 _mm_storeu_si128((__m128i*)&argb_data[i], out);
1463 }
1464 // fallthrough and finish off with plain-C
1465 for (; i < num_pixs; ++i) {
1466 const uint32_t argb = argb_data[i];
1467 const uint32_t green = (argb >> 8) & 0xff;
1468 const uint32_t new_r = (((argb >> 16) & 0xff) - green) & 0xff;
1469 const uint32_t new_b = ((argb & 0xff) - green) & 0xff;
1470 argb_data[i] = (argb & 0xff00ff00) | (new_r << 16) | new_b;
1471 }
1472 }
1473
1474 static void AddGreenToBlueAndRedSSE2(uint32_t* data, const uint32_t* data_end) {
1475 const __m128i mask = _mm_set1_epi32(0x0000ff00);
1476 for (; data + 4 < data_end; data += 4) {
1477 const __m128i in = _mm_loadu_si128((__m128i*)data);
1478 const __m128i in_00g0 = _mm_and_si128(in, mask); // 00g0|00g0|...
1479 const __m128i in_0g00 = _mm_slli_epi32(in_00g0, 8); // 0g00|0g00|...
1480 const __m128i in_000g = _mm_srli_epi32(in_00g0, 8); // 000g|000g|...
1481 const __m128i in_0g0g = _mm_or_si128(in_0g00, in_000g);
1482 const __m128i out = _mm_add_epi8(in, in_0g0g);
1483 _mm_storeu_si128((__m128i*)data, out);
1484 }
1485 // fallthrough and finish off with plain-C
1486 while (data < data_end) {
1487 const uint32_t argb = *data;
1488 const uint32_t green = ((argb >> 8) & 0xff);
1489 uint32_t red_blue = (argb & 0x00ff00ffu);
1490 red_blue += (green << 16) | green;
1491 red_blue &= 0x00ff00ffu;
1492 *data++ = (argb & 0xff00ff00u) | red_blue;
1493 }
1494 }
1495
1496 extern void VP8LDspInitSSE2(void);
1497
1498 void VP8LDspInitSSE2(void) {
1499 VP8LClampedAddSubtractFull = ClampedAddSubtractFullSSE2;
1500 VP8LClampedAddSubtractHalf = ClampedAddSubtractHalfSSE2;
1501 VP8LSelect = SelectSSE2;
1502 VP8LSubtractGreenFromBlueAndRed = SubtractGreenFromBlueAndRedSSE2;
1503 VP8LAddGreenToBlueAndRed = AddGreenToBlueAndRedSSE2;
1504 }
1387 #endif 1505 #endif
1506 //------------------------------------------------------------------------------
1507
1508 VP8LPredClampedAddSubFunc VP8LClampedAddSubtractFull;
1509 VP8LPredClampedAddSubFunc VP8LClampedAddSubtractHalf;
1510 VP8LPredSelectFunc VP8LSelect;
1511 VP8LSubtractGreenFromBlueAndRedFunc VP8LSubtractGreenFromBlueAndRed;
1512 VP8LAddGreenToBlueAndRedFunc VP8LAddGreenToBlueAndRed;
1513
1514 void VP8LDspInit(void) {
1515 VP8LClampedAddSubtractFull = ClampedAddSubtractFull;
1516 VP8LClampedAddSubtractHalf = ClampedAddSubtractHalf;
1517 VP8LSelect = Select;
1518 VP8LSubtractGreenFromBlueAndRed = SubtractGreenFromBlueAndRed;
1519 VP8LAddGreenToBlueAndRed = AddGreenToBlueAndRed;
1520
1521 // If defined, use CPUInfo() to overwrite some pointers with faster versions.
1522 if (VP8GetCPUInfo != NULL) {
1523 #if defined(WEBP_USE_SSE2)
1524 if (VP8GetCPUInfo(kSSE2)) {
1525 VP8LDspInitSSE2();
1526 }
1527 #endif
1528 }
1529 }
1530
1531 //------------------------------------------------------------------------------
1532
OLDNEW
« no previous file with comments | « third_party/libwebp/dsp/lossless.h ('k') | third_party/libwebp/dsp/upsampling.c » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698