Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(478)

Side by Side Diff: src/codec/SkPngFilters.cpp

Issue 1699953002: Make png filter functions compatible with libpng (Closed) Base URL: https://skia.googlesource.com/skia.git@master
Patch Set: Add loadX() and storeX() functions Created 4 years, 10 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
« no previous file with comments | « no previous file | no next file » | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 /* 1 /*
2 * Copyright 2016 Google Inc. 2 * Copyright 2016 Google Inc.
3 * 3 *
4 * Use of this source code is governed by a BSD-style license that can be 4 * Use of this source code is governed by a BSD-style license that can be
5 * found in the LICENSE file. 5 * found in the LICENSE file.
6 */ 6 */
7 7
8 #include "SkPngFilters.h" 8 #include "SkPngFilters.h"
9 9
10 // Functions in this file look at most 3 pixels (a,b,c) to predict the fourth (d ). 10 // Functions in this file look at most 3 pixels (a,b,c) to predict the fourth (d ).
11 // They're positioned like this: 11 // They're positioned like this:
12 // prev: c b 12 // prev: c b
13 // row: a d 13 // row: a d
14 // The Sub filter predicts d=a, Avg d=(a+b)/2, and Paeth predicts d to be which ever 14 // The Sub filter predicts d=a, Avg d=(a+b)/2, and Paeth predicts d to be which ever
15 // of a, b, or c is closest to p=a+b-c. (Up also exists, predicting d=b.) 15 // of a, b, or c is closest to p=a+b-c. (Up also exists, predicting d=b.)
16 16
17 #if defined(__SSE2__) 17 #if defined(__SSE2__)
18 18
19 template <int bpp> 19 static __m128i load3(const void* p) {
20 static __m128i load(const void* p) { 20 uint32_t packed;
21 static_assert(bpp <= 4, ""); 21 memcpy(&packed, p, 3);
22 22 return _mm_cvtsi32_si128(packed);
23 uint32_t packed; 23 }
24 memcpy(&packed, p, bpp); 24
25 return _mm_cvtsi32_si128(packed); 25 static __m128i load4(const void* p) {
26 } 26 uint32_t packed;
27 27 memcpy(&packed, p, 4);
28 template <int bpp> 28 return _mm_cvtsi32_si128(packed);
mtklein 2016/02/16 14:11:57 Now that we've split these apart, we might conside
29 static void store(void* p, __m128i v) { 29 }
30 static_assert(bpp <= 4, ""); 30
31 31 static void store3(void* p, __m128i v) {
32 uint32_t packed = _mm_cvtsi128_si32(v); 32 uint32_t packed = _mm_cvtsi128_si32(v);
33 memcpy(p, &packed, bpp); 33 memcpy(p, &packed, 3);
34 } 34 }
35 35
36 template <int bpp> 36 static void store4(void* p, __m128i v) {
37 static void sk_sub_sse2(png_row_infop row_info, uint8_t* row, const uint8_t* ) { 37 uint32_t packed = _mm_cvtsi128_si32(v);
38 // The Sub filter predicts each pixel as the previous pixel, a. 38 memcpy(p, &packed, 4);
39 // There is no pixel to the left of the first pixel. It's encoded direc tly. 39 }
40 // That works with our main loop if we just say that left pixel was zero . 40
41 __m128i a, d = _mm_setzero_si128(); 41 void sk_sub3_sse2(png_row_infop row_info, uint8_t* row,
mtklein 2016/02/16 14:11:57 Why do these guys go to two lines? Wouldn't it on
msarett 2016/02/16 14:48:59 Not sure. I copied the style of the signatures fr
42 42 const uint8_t* prev)
43 int rb = row_info->rowbytes; 43 {
44 while (rb > 0) { 44 // The Sub filter predicts each pixel as the previous pixel, a.
45 a = d; d = load<bpp>(row); 45 // There is no pixel to the left of the first pixel. It's encoded directly.
46 d = _mm_add_epi8(d, a); 46 // That works with our main loop if we just say that left pixel was zero.
47 store<bpp>(row, d); 47 __m128i a, d = _mm_setzero_si128();
48 48
49 row += bpp; 49 int rb = row_info->rowbytes;
50 rb -= bpp; 50 while (rb > 0) {
51 } 51 a = d; d = load3(row);
52 } 52 d = _mm_add_epi8(d, a);
53 53 store3(row, d);
54 template <int bpp> 54
55 void sk_avg_sse2(png_row_infop row_info, uint8_t* row, const uint8_t* prev) { 55 row += 3;
56 // The Avg filter predicts each pixel as the (truncated) average of a an d b. 56 rb -= 3;
57 // There's no pixel to the left of the first pixel. Luckily, it's 57 }
58 // predicted to be half of the pixel above it. So again, this works 58 }
59 // perfectly with our loop if we make sure a starts at zero. 59
60 const __m128i zero = _mm_setzero_si128(); 60 void sk_sub4_sse2(png_row_infop row_info, uint8_t* row,
61 __m128i b; 61 const uint8_t* prev)
62 __m128i a, d = zero; 62 {
63 63 // The Sub filter predicts each pixel as the previous pixel, a.
64 int rb = row_info->rowbytes; 64 // There is no pixel to the left of the first pixel. It's encoded directly.
65 while (rb > 0) { 65 // That works with our main loop if we just say that left pixel was zero.
66 b = load<bpp>(prev); 66 __m128i a, d = _mm_setzero_si128();
67 a = d; d = load<bpp>(row ); 67
68 68 int rb = row_info->rowbytes;
69 // PNG requires a truncating average here, so sadly we can't just us e _mm_avg_epu8... 69 while (rb > 0) {
70 __m128i avg = _mm_avg_epu8(a,b); 70 a = d; d = load4(row);
71 // ...but we can fix it up by subtracting off 1 if it rounded up. 71 d = _mm_add_epi8(d, a);
72 avg = _mm_sub_epi8(avg, _mm_and_si128(_mm_xor_si128(a,b), _mm_set1_e pi8(1))); 72 store4(row, d);
73 73
74 d = _mm_add_epi8(d, avg); 74 row += 4;
75 store<bpp>(row, d); 75 rb -= 4;
76 76 }
77 prev += bpp; 77 }
78 row += bpp; 78
79 rb -= bpp; 79 void sk_avg3_sse2(png_row_infop row_info, uint8_t* row,
80 } 80 const uint8_t* prev)
81 } 81 {
82 82 // The Avg filter predicts each pixel as the (truncated) average of a and b.
83 // Returns |x| for 16-bit lanes. 83 // There's no pixel to the left of the first pixel. Luckily, it's
84 static __m128i abs_i16(__m128i x) { 84 // predicted to be half of the pixel above it. So again, this works
85 #if defined(__SSSE3__) 85 // perfectly with our loop if we make sure a starts at zero.
86 return _mm_abs_epi16(x); 86 const __m128i zero = _mm_setzero_si128();
87 #else 87 __m128i b;
mtklein 2016/02/16 14:11:57 We might want to scoot b over a few columns right
msarett 2016/02/16 14:48:59 Done.
88 // Read this all as, return x<0 ? -x : x. 88 __m128i a, d = zero;
89 // To negate two's complement, you flip all the bits then add 1. 89
90 __m128i is_negative = _mm_cmplt_epi16(x, _mm_setzero_si128()); 90 int rb = row_info->rowbytes;
91 x = _mm_xor_si128(x, is_negative); // Flip negative lanes. 91 while (rb > 0) {
92 x = _mm_add_epi16(x, _mm_srli_epi16(is_negative, 15)); // +1 to negativ e lanes, else +0. 92 b = load3(prev);
93 return x; 93 a = d; d = load3(row);
94 #endif 94
95 } 95 // PNG requires a truncating average here, so sadly we can't just use
96 96 // _mm_avg_epu8...
97 // Bytewise c ? t : e. 97 __m128i avg = _mm_avg_epu8(a,b);
98 static __m128i if_then_else(__m128i c, __m128i t, __m128i e) { 98 // ...but we can fix it up by subtracting off 1 if it rounded up.
99 #if 0 && defined(__SSE4_1__) // Make sure we have a bot testing this before enabling. 99 avg = _mm_sub_epi8(avg, _mm_and_si128(_mm_xor_si128(a,b),
100 return _mm_blendv_epi8(e,t,c); 100 _mm_set1_epi8(1)));
101 #else 101
102 return _mm_or_si128(_mm_and_si128(c, t), _mm_andnot_si128(c, e)); 102 d = _mm_add_epi8(d, avg);
103 #endif 103 store3(row, d);
104 } 104
105 105 prev += 3;
106 template <int bpp> 106 row += 3;
107 void sk_paeth_sse2(png_row_infop row_info, uint8_t* row, const uint8_t* prev ) { 107 rb -= 3;
108 // Paeth tries to predict pixel d using the pixel to the left of it, a, 108 }
109 // and two pixels from the previous row, b and c: 109 }
110 // prev: c b 110 void sk_avg4_sse2(png_row_infop row_info, uint8_t* row,
111 // row: a d 111 const uint8_t* prev)
112 // The Paeth function predicts d to be whichever of a, b, or c is neares t to p=a+b-c. 112 {
113 113 // The Avg filter predicts each pixel as the (truncated) average of a and b.
114 // The first pixel has no left context, and so uses an Up filter, p = b. 114 // There's no pixel to the left of the first pixel. Luckily, it's
115 // This works naturally with our main loop's p = a+b-c if we force a and c to zero. 115 // predicted to be half of the pixel above it. So again, this works
116 // Here we zero b and d, which become c and a respectively at the start of the loop. 116 // perfectly with our loop if we make sure a starts at zero.
117 const __m128i zero = _mm_setzero_si128(); 117 const __m128i zero = _mm_setzero_si128();
118 __m128i c, b = zero, 118 __m128i b;
119 a, d = zero; 119 __m128i a, d = zero;
120 120
121 int rb = row_info->rowbytes; 121 int rb = row_info->rowbytes;
122 while (rb > 0) { 122 while (rb > 0) {
123 // It's easiest to do this math (particularly, deal with pc) with 16 -bit intermediates. 123 b = load4(prev);
124 c = b; b = _mm_unpacklo_epi8(load<bpp>(prev), zero); 124 a = d; d = load4(row);
125 a = d; d = _mm_unpacklo_epi8(load<bpp>(row ), zero); 125
126 126 // PNG requires a truncating average here, so sadly we can't just use
127 __m128i pa = _mm_sub_epi16(b,c), // (p-a) == (a+b-c - a) == (b-c) 127 // _mm_avg_epu8...
128 pb = _mm_sub_epi16(a,c), // (p-b) == (a+b-c - b) == (a-c) 128 __m128i avg = _mm_avg_epu8(a,b);
129 pc = _mm_add_epi16(pa,pb); // (p-c) == (a+b-c - c) == (a+b-c -c) == (b-c)+(a-c) 129 // ...but we can fix it up by subtracting off 1 if it rounded up.
130 130 avg = _mm_sub_epi8(avg, _mm_and_si128(_mm_xor_si128(a,b),
131 pa = abs_i16(pa); // |p-a| 131 _mm_set1_epi8(1)));
132 pb = abs_i16(pb); // |p-b| 132
133 pc = abs_i16(pc); // |p-c| 133 d = _mm_add_epi8(d, avg);
134 134 store4(row, d);
135 __m128i smallest = _mm_min_epi16(pc, _mm_min_epi16(pa, pb)); 135
136 136 prev += 4;
137 // Paeth breaks ties favoring a over b over c. 137 row += 4;
138 __m128i nearest = if_then_else(_mm_cmpeq_epi16(smallest, pa), a, 138 rb -= 4;
139 if_then_else(_mm_cmpeq_epi16(smallest, pb), b, 139 }
140 c)); 140 }
141 141
142 d = _mm_add_epi8(d, nearest); // Note `_epi8`: we need addition to wrap modulo 255. 142 // Returns |x| for 16-bit lanes.
143 store<bpp>(row, _mm_packus_epi16(d,d)); 143 static __m128i abs_i16(__m128i x) {
144 144 #if defined(__SSSE3__)
145 prev += bpp; 145 return _mm_abs_epi16(x);
146 row += bpp; 146 #else
147 rb -= bpp; 147 // Read this all as, return x<0 ? -x : x.
148 } 148 // To negate two's complement, you flip all the bits then add 1.
149 } 149 __m128i is_negative = _mm_cmplt_epi16(x, _mm_setzero_si128());
150 150 // Flip negative lanes.
151 void sk_sub3_sse2(png_row_infop row_info, uint8_t* row, const uint8_t* prev) { 151 x = _mm_xor_si128(x, is_negative);
152 sk_sub_sse2<3>(row_info, row, prev); 152 // +1 to negative lanes, else +0.
153 } 153 x = _mm_add_epi16(x, _mm_srli_epi16(is_negative, 15));
154 void sk_sub4_sse2(png_row_infop row_info, uint8_t* row, const uint8_t* prev) { 154 return x;
155 sk_sub_sse2<4>(row_info, row, prev);
156 }
157
158 void sk_avg3_sse2(png_row_infop row_info, uint8_t* row, const uint8_t* prev) {
159 sk_avg_sse2<3>(row_info, row, prev);
160 }
161 void sk_avg4_sse2(png_row_infop row_info, uint8_t* row, const uint8_t* prev) {
162 sk_avg_sse2<4>(row_info, row, prev);
163 }
164
165 void sk_paeth3_sse2(png_row_infop row_info, uint8_t* row, const uint8_t* pre v) {
166 sk_paeth_sse2<3>(row_info, row, prev);
167 }
168 void sk_paeth4_sse2(png_row_infop row_info, uint8_t* row, const uint8_t* pre v) {
169 sk_paeth_sse2<4>(row_info, row, prev);
170 }
171
172 #endif 155 #endif
156 }
157
158 // Bytewise c ? t : e.
159 static __m128i if_then_else(__m128i c, __m128i t, __m128i e) {
160 #if defined(__SSE4_1__)
161 return _mm_blendv_epi8(e,t,c);
162 #else
163 return _mm_or_si128(_mm_and_si128(c, t), _mm_andnot_si128(c, e));
164 #endif
165 }
166
167 void sk_paeth3_sse2(png_row_infop row_info, uint8_t* row,
168 const uint8_t* prev)
169 {
170 // Paeth tries to predict pixel d using the pixel to the left of it, a,
171 // and two pixels from the previous row, b and c:
172 // prev: c b
173 // row: a d
174 // The Paeth function predicts d to be whichever of a, b, or c is nearest to
175 // p=a+b-c. The first pixel has no left context, and so uses an Up filter,
176 // p = b. This works naturally with our main loop's p = a+b-c if we force a
177 // and c to zero. Here we zero b and d, which become c and a respectively
178 // at the start of the loop.
179 const __m128i zero = _mm_setzero_si128();
180 __m128i c, b = zero,
181 a, d = zero;
182
183 int rb = row_info->rowbytes;
184 while (rb > 0) {
185 // It's easiest to do this math (particularly, deal with pc) with 16-bit
186 // intermediates.
187 b = load3(prev);
188 d = load3(row);
mtklein 2016/02/16 14:11:57 I think this breaks things by loading new values f
msarett 2016/02/16 14:48:59 Done.
189 c = b; b = _mm_unpacklo_epi8(b, zero);
190 a = d; d = _mm_unpacklo_epi8(d, zero);
191 __m128i pa = _mm_sub_epi16(b,c),
192 // (p-a) == (a+b-c - a) == (b-c)
mtklein 2016/02/16 14:11:57 Moving these comments around and changing the alig
msarett 2016/02/16 14:48:59 Done.
193 pb = _mm_sub_epi16(a,c),
194 // (p-b) == (a+b-c - b) == (a-c)
195 pc = _mm_add_epi16(pa,pb);
196 // (p-c) == (a+b-c - c) == (a+b-c-c) == (b-c)+(a-c)
197
198 pa = abs_i16(pa);// |p-a|
199 pb = abs_i16(pb);// |p-b|
200 pc = abs_i16(pc);// |p-c|
201
202 __m128i smallest = _mm_min_epi16(pc, _mm_min_epi16(pa, pb));
203
204 // Paeth breaks ties favoring a over b over c.
205 __m128i nearest = if_then_else(_mm_cmpeq_epi16(smallest, pa), a,
206 if_then_else(_mm_cmpeq_epi16(smallest, pb), b,
mtklein 2016/02/16 14:11:57 I think this hurts readability to indent like this
msarett 2016/02/16 14:48:58 Done.
207 c));
208
209 // Note `_epi8`: we need addition to wrap modulo 255.
210 d = _mm_add_epi8(d, nearest);
211 store3(row, _mm_packus_epi16(d,d));
212 prev += 3;
213 row += 3;
214 rb -= 3;
215 }
216 }
217
218 void sk_paeth4_sse2(png_row_infop row_info, uint8_t* row,
219 const uint8_t* prev)
220 {
221 // Paeth tries to predict pixel d using the pixel to the left of it, a,
222 // and two pixels from the previous row, b and c:
223 // prev: c b
224 // row: a d
225 // The Paeth function predicts d to be whichever of a, b, or c is nearest to
226 // p=a+b-c. The first pixel has no left context, and so uses an Up filter,
227 // p = b. This works naturally with our main loop's p = a+b-c if we force a
228 // and c to zero. Here we zero b and d, which become c and a respectively
229 // at the start of the loop.
230 const __m128i zero = _mm_setzero_si128();
231 __m128i c, b = zero,
232 a, d = zero;
233
234 int rb = row_info->rowbytes;
235 while (rb > 0) {
236 // It's easiest to do this math (particularly, deal with pc) with 16-bit
237 // intermediates.
238 b = load4(prev);
239 d = load4(row);
240 c = b; b = _mm_unpacklo_epi8(b, zero);
241 a = d; d = _mm_unpacklo_epi8(d, zero);
242 __m128i pa = _mm_sub_epi16(b,c),
243 // (p-a) == (a+b-c - a) == (b-c)
244 pb = _mm_sub_epi16(a,c),
245 // (p-b) == (a+b-c - b) == (a-c)
246 pc = _mm_add_epi16(pa,pb);
247 // (p-c) == (a+b-c - c) == (a+b-c-c) == (b-c)+(a-c)
248
249 pa = abs_i16(pa);// |p-a|
250 pb = abs_i16(pb);// |p-b|
251 pc = abs_i16(pc);// |p-c|
252
253 __m128i smallest = _mm_min_epi16(pc, _mm_min_epi16(pa, pb));
254
255 // Paeth breaks ties favoring a over b over c.
256 __m128i nearest = if_then_else(_mm_cmpeq_epi16(smallest, pa), a,
257 if_then_else(_mm_cmpeq_epi16(smallest, pb), b,
258 c));
259
260 // Note `_epi8`: we need addition to wrap modulo 255.
261 d = _mm_add_epi8(d, nearest);
262 store4(row, _mm_packus_epi16(d,d));
263 prev += 4;
264 row += 4;
265 rb -= 4;
266 }
267 }
268
269 #endif
OLDNEW
« no previous file with comments | « no previous file | no next file » | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698