OLD | NEW |
1 /* | 1 /* |
2 * Copyright 2016 Google Inc. | 2 * Copyright 2016 Google Inc. |
3 * | 3 * |
4 * Use of this source code is governed by a BSD-style license that can be | 4 * Use of this source code is governed by a BSD-style license that can be |
5 * found in the LICENSE file. | 5 * found in the LICENSE file. |
6 */ | 6 */ |
7 | 7 |
8 #include "SkPngFilters.h" | 8 #include "SkPngFilters.h" |
9 #include "SkTypes.h" | |
10 | 9 |
11 // Functions in this file look at most 3 pixels (a,b,c) to predict the fourth (d
). | 10 // Functions in this file look at most 3 pixels (a,b,c) to predict the fourth (d
). |
12 // They're positioned like this: | 11 // They're positioned like this: |
13 // prev: c b | 12 // prev: c b |
14 // row: a d | 13 // row: a d |
15 // The Sub filter predicts d=a, Avg d=(a+b)/2, and Paeth predicts d to be which
ever | 14 // The Sub filter predicts d=a, Avg d=(a+b)/2, and Paeth predicts d to be which
ever |
16 // of a, b, or c is closest to p=a+b-c. (Up also exists, predicting d=b.) | 15 // of a, b, or c is closest to p=a+b-c. (Up also exists, predicting d=b.) |
17 | 16 |
18 #if defined(__SSE2__) | 17 #if defined(__SSE2__) |
19 | 18 |
20 template <int bpp> | 19 template <int bpp> |
21 static __m128i load(const void* p) { | 20 static __m128i load(const void* p) { |
22 static_assert(bpp <= 4, ""); | 21 static_assert(bpp <= 4, ""); |
23 | 22 |
24 uint32_t packed; | 23 uint32_t packed; |
25 memcpy(&packed, p, bpp); | 24 memcpy(&packed, p, bpp); |
26 return _mm_cvtsi32_si128(packed); | 25 return _mm_cvtsi32_si128(packed); |
27 } | 26 } |
28 | 27 |
29 template <int bpp> | 28 template <int bpp> |
30 static void store(void* p, __m128i v) { | 29 static void store(void* p, __m128i v) { |
31 static_assert(bpp <= 4, ""); | 30 static_assert(bpp <= 4, ""); |
32 | 31 |
33 uint32_t packed = _mm_cvtsi128_si32(v); | 32 uint32_t packed = _mm_cvtsi128_si32(v); |
34 memcpy(p, &packed, bpp); | 33 memcpy(p, &packed, bpp); |
35 } | 34 } |
36 | 35 |
37 template <int bpp> | 36 template <int bpp> |
38 static void sk_sub_sse2(png_row_infop row_info, png_bytep row, png_const_byt
ep) { | 37 static void sk_sub_sse2(png_row_infop row_info, uint8_t* row, const uint8_t*
) { |
39 // The Sub filter predicts each pixel as the previous pixel, a. | 38 // The Sub filter predicts each pixel as the previous pixel, a. |
40 // There is no pixel to the left of the first pixel. It's encoded direc
tly. | 39 // There is no pixel to the left of the first pixel. It's encoded direc
tly. |
41 // That works with our main loop if we just say that left pixel was zero
. | 40 // That works with our main loop if we just say that left pixel was zero
. |
42 __m128i a, d = _mm_setzero_si128(); | 41 __m128i a, d = _mm_setzero_si128(); |
43 | 42 |
44 int rb = row_info->rowbytes; | 43 int rb = row_info->rowbytes; |
45 while (rb > 0) { | 44 while (rb > 0) { |
46 a = d; d = load<bpp>(row); | 45 a = d; d = load<bpp>(row); |
47 d = _mm_add_epi8(d, a); | 46 d = _mm_add_epi8(d, a); |
48 store<bpp>(row, d); | 47 store<bpp>(row, d); |
49 | 48 |
50 row += bpp; | 49 row += bpp; |
51 rb -= bpp; | 50 rb -= bpp; |
52 } | 51 } |
53 } | 52 } |
54 | 53 |
55 template <int bpp> | 54 template <int bpp> |
56 void sk_avg_sse2(png_row_infop row_info, png_bytep row, png_const_bytep prev
) { | 55 void sk_avg_sse2(png_row_infop row_info, uint8_t* row, const uint8_t* prev)
{ |
57 // The Avg filter predicts each pixel as the (truncated) average of a an
d b. | 56 // The Avg filter predicts each pixel as the (truncated) average of a an
d b. |
58 // There's no pixel to the left of the first pixel. Luckily, it's | 57 // There's no pixel to the left of the first pixel. Luckily, it's |
59 // predicted to be half of the pixel above it. So again, this works | 58 // predicted to be half of the pixel above it. So again, this works |
60 // perfectly with our loop if we make sure a starts at zero. | 59 // perfectly with our loop if we make sure a starts at zero. |
61 const __m128i zero = _mm_setzero_si128(); | 60 const __m128i zero = _mm_setzero_si128(); |
62 __m128i b; | 61 __m128i b; |
63 __m128i a, d = zero; | 62 __m128i a, d = zero; |
64 | 63 |
65 int rb = row_info->rowbytes; | 64 int rb = row_info->rowbytes; |
66 while (rb > 0) { | 65 while (rb > 0) { |
(...skipping 20 matching lines...) Expand all Loading... |
87 return _mm_or_si128(_mm_subs_epu8(x,y), _mm_subs_epu8(y,x)); | 86 return _mm_or_si128(_mm_subs_epu8(x,y), _mm_subs_epu8(y,x)); |
88 } | 87 } |
89 | 88 |
90 // Bytewise c ? t : e. | 89 // Bytewise c ? t : e. |
91 static __m128i if_then_else(__m128i c, __m128i t, __m128i e) { | 90 static __m128i if_then_else(__m128i c, __m128i t, __m128i e) { |
92 // SSE 4.1+ would be: return _mm_blendv_epi8(e,t,c); | 91 // SSE 4.1+ would be: return _mm_blendv_epi8(e,t,c); |
93 return _mm_or_si128(_mm_and_si128(c, t), _mm_andnot_si128(c, e)); | 92 return _mm_or_si128(_mm_and_si128(c, t), _mm_andnot_si128(c, e)); |
94 } | 93 } |
95 | 94 |
96 template <int bpp> | 95 template <int bpp> |
97 void sk_paeth_sse2(png_row_infop row_info, png_bytep row, png_const_bytep pr
ev) { | 96 void sk_paeth_sse2(png_row_infop row_info, uint8_t* row, const uint8_t* prev
) { |
98 // Paeth tries to predict pixel d using the pixel to the left of it, a, | 97 // Paeth tries to predict pixel d using the pixel to the left of it, a, |
99 // and two pixels from the previous row, b and c: | 98 // and two pixels from the previous row, b and c: |
100 // prev: c b | 99 // prev: c b |
101 // row: a d | 100 // row: a d |
102 // The Paeth function predicts d to be whichever of a, b, or c is neares
t to p=a+b-c. | 101 // The Paeth function predicts d to be whichever of a, b, or c is neares
t to p=a+b-c. |
103 | 102 |
104 // The first pixel has no left context, and so uses an Up filter, p = b. | 103 // The first pixel has no left context, and so uses an Up filter, p = b. |
105 // This works naturally with our main loop's p = a+b-c if we force a and
c to zero. | 104 // This works naturally with our main loop's p = a+b-c if we force a and
c to zero. |
106 // Here we zero b and d, which become c and a respectively at the start
of the loop. | 105 // Here we zero b and d, which become c and a respectively at the start
of the loop. |
107 __m128i c, b = _mm_setzero_si128(), | 106 __m128i c, b = _mm_setzero_si128(), |
(...skipping 29 matching lines...) Expand all Loading... |
137 // We've reconstructed d! Leave it for next round to become a, and
write it out. | 136 // We've reconstructed d! Leave it for next round to become a, and
write it out. |
138 d = _mm_add_epi8(d, nearest); | 137 d = _mm_add_epi8(d, nearest); |
139 store<bpp>(row, d); | 138 store<bpp>(row, d); |
140 | 139 |
141 prev += bpp; | 140 prev += bpp; |
142 row += bpp; | 141 row += bpp; |
143 rb -= bpp; | 142 rb -= bpp; |
144 } | 143 } |
145 } | 144 } |
146 | 145 |
147 void sk_sub3_sse2(png_row_infop row_info, png_bytep row, png_const_bytep pre
v) { | 146 void sk_sub3_sse2(png_row_infop row_info, uint8_t* row, const uint8_t* prev)
{ |
148 sk_sub_sse2<3>(row_info, row, prev); | 147 sk_sub_sse2<3>(row_info, row, prev); |
149 } | 148 } |
150 void sk_sub4_sse2(png_row_infop row_info, png_bytep row, png_const_bytep pre
v) { | 149 void sk_sub4_sse2(png_row_infop row_info, uint8_t* row, const uint8_t* prev)
{ |
151 sk_sub_sse2<4>(row_info, row, prev); | 150 sk_sub_sse2<4>(row_info, row, prev); |
152 } | 151 } |
153 | 152 |
154 void sk_avg3_sse2(png_row_infop row_info, png_bytep row, png_const_bytep pre
v) { | 153 void sk_avg3_sse2(png_row_infop row_info, uint8_t* row, const uint8_t* prev)
{ |
155 sk_avg_sse2<3>(row_info, row, prev); | 154 sk_avg_sse2<3>(row_info, row, prev); |
156 } | 155 } |
157 void sk_avg4_sse2(png_row_infop row_info, png_bytep row, png_const_bytep pre
v) { | 156 void sk_avg4_sse2(png_row_infop row_info, uint8_t* row, const uint8_t* prev)
{ |
158 sk_avg_sse2<4>(row_info, row, prev); | 157 sk_avg_sse2<4>(row_info, row, prev); |
159 } | 158 } |
160 | 159 |
161 void sk_paeth3_sse2(png_row_infop row_info, png_bytep row, png_const_bytep p
rev) { | 160 void sk_paeth3_sse2(png_row_infop row_info, uint8_t* row, const uint8_t* pre
v) { |
162 sk_paeth_sse2<3>(row_info, row, prev); | 161 sk_paeth_sse2<3>(row_info, row, prev); |
163 } | 162 } |
164 void sk_paeth4_sse2(png_row_infop row_info, png_bytep row, png_const_bytep p
rev) { | 163 void sk_paeth4_sse2(png_row_infop row_info, uint8_t* row, const uint8_t* pre
v) { |
165 sk_paeth_sse2<4>(row_info, row, prev); | 164 sk_paeth_sse2<4>(row_info, row, prev); |
166 } | 165 } |
167 | 166 |
168 #endif | 167 #endif |
OLD | NEW |