Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(2)

Side by Side Diff: src/codec/SkPngFilters.cpp

Issue 1699953002: Make png filter functions compatible with libpng (Closed) Base URL: https://skia.googlesource.com/skia.git@master
Patch Set: Created 4 years, 10 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
« no previous file with comments | « no previous file | no next file » | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 /* 1 /*
2 * Copyright 2016 Google Inc. 2 * Copyright 2016 Google Inc.
3 * 3 *
4 * Use of this source code is governed by a BSD-style license that can be 4 * Use of this source code is governed by a BSD-style license that can be
5 * found in the LICENSE file. 5 * found in the LICENSE file.
6 */ 6 */
7 7
8 #include "SkPngFilters.h" 8 #include "SkPngFilters.h"
9 9
10 // Functions in this file look at most 3 pixels (a,b,c) to predict the fourth (d ). 10 // Functions in this file look at most 3 pixels (a,b,c) to predict the fourth (d ).
11 // They're positioned like this: 11 // They're positioned like this:
12 // prev: c b 12 // prev: c b
13 // row: a d 13 // row: a d
14 // The Sub filter predicts d=a, Avg d=(a+b)/2, and Paeth predicts d to be which ever 14 // The Sub filter predicts d=a, Avg d=(a+b)/2, and Paeth predicts d to be which ever
15 // of a, b, or c is closest to p=a+b-c. (Up also exists, predicting d=b.) 15 // of a, b, or c is closest to p=a+b-c. (Up also exists, predicting d=b.)
16 16
17 #if defined(__SSE2__) 17 #if defined(__SSE2__)
18 18
19 template <int bpp> 19 void sk_sub3_sse2(png_row_infop row_info, uint8_t* row,
20 static __m128i load(const void* p) { 20 const uint8_t* prev)
21 static_assert(bpp <= 4, ""); 21 {
22 22 // The Sub filter predicts each pixel as the previous pixel, a.
23 uint32_t packed; 23 // There is no pixel to the left of the first pixel. It's encoded directly.
24 memcpy(&packed, p, bpp); 24 // That works with our main loop if we just say that left pixel was zero.
25 return _mm_cvtsi32_si128(packed); 25 __m128i a, d = _mm_setzero_si128();
26 } 26
27 27 int rb = row_info->rowbytes;
28 template <int bpp> 28 while (rb > 0) {
29 static void store(void* p, __m128i v) { 29 a = d; memcpy(&d, row, 3);
mtklein 2016/02/15 20:16:32 Seems worth keeping load() and store() for readabi
msarett 2016/02/16 13:42:35 Done.
30 static_assert(bpp <= 4, ""); 30 d = _mm_add_epi8(d, a);
31 31 memcpy(row, &d, 3);
32 uint32_t packed = _mm_cvtsi128_si32(v); 32
33 memcpy(p, &packed, bpp); 33 row += 3;
34 } 34 rb -= 3;
35 35 }
36 template <int bpp> 36 }
37 static void sk_sub_sse2(png_row_infop row_info, uint8_t* row, const uint8_t* ) { 37
38 // The Sub filter predicts each pixel as the previous pixel, a. 38 void sk_sub4_sse2(png_row_infop row_info, uint8_t* row,
39 // There is no pixel to the left of the first pixel. It's encoded direc tly. 39 const uint8_t* prev)
40 // That works with our main loop if we just say that left pixel was zero . 40 {
41 __m128i a, d = _mm_setzero_si128(); 41 // The Sub filter predicts each pixel as the previous pixel, a.
42 42 // There is no pixel to the left of the first pixel. It's encoded directly.
43 int rb = row_info->rowbytes; 43 // That works with our main loop if we just say that left pixel was zero.
44 while (rb > 0) { 44 __m128i a, d = _mm_setzero_si128();
45 a = d; d = load<bpp>(row); 45
46 d = _mm_add_epi8(d, a); 46 int rb = row_info->rowbytes;
47 store<bpp>(row, d); 47 while (rb > 0) {
48 48 a = d; memcpy(&d, row, 4);
49 row += bpp; 49 d = _mm_add_epi8(d, a);
50 rb -= bpp; 50 memcpy(row, &d, 4);
51 } 51
52 } 52 row += 4;
53 53 rb -= 4;
54 template <int bpp> 54 }
55 void sk_avg_sse2(png_row_infop row_info, uint8_t* row, const uint8_t* prev) { 55 }
56 // The Avg filter predicts each pixel as the (truncated) average of a an d b. 56
57 // There's no pixel to the left of the first pixel. Luckily, it's 57 void sk_avg3_sse2(png_row_infop row_info, uint8_t* row,
58 // predicted to be half of the pixel above it. So again, this works 58 const uint8_t* prev)
59 // perfectly with our loop if we make sure a starts at zero. 59 {
60 const __m128i zero = _mm_setzero_si128(); 60 // The Avg filter predicts each pixel as the (truncated) average of a and b.
61 __m128i b; 61 // There's no pixel to the left of the first pixel. Luckily, it's
62 __m128i a, d = zero; 62 // predicted to be half of the pixel above it. So again, this works
63 63 // perfectly with our loop if we make sure a starts at zero.
64 int rb = row_info->rowbytes; 64 const __m128i zero = _mm_setzero_si128();
65 while (rb > 0) { 65 __m128i b;
66 b = load<bpp>(prev); 66 __m128i a, d = zero;
67 a = d; d = load<bpp>(row ); 67
68 68 int rb = row_info->rowbytes;
69 // PNG requires a truncating average here, so sadly we can't just us e _mm_avg_epu8... 69 while (rb > 0) {
70 __m128i avg = _mm_avg_epu8(a,b); 70 memcpy(&b, prev, 3);
71 // ...but we can fix it up by subtracting off 1 if it rounded up. 71 a = d; memcpy(&d, row, 3);
72 avg = _mm_sub_epi8(avg, _mm_and_si128(_mm_xor_si128(a,b), _mm_set1_e pi8(1))); 72
73 73 // PNG requires a truncating average here, so sadly we can't just use
74 d = _mm_add_epi8(d, avg); 74 // _mm_avg_epu8...
75 store<bpp>(row, d); 75 __m128i avg = _mm_avg_epu8(a,b);
76 76 // ...but we can fix it up by subtracting off 1 if it rounded up.
77 prev += bpp; 77 avg = _mm_sub_epi8(avg, _mm_and_si128(_mm_xor_si128(a,b),
78 row += bpp; 78 _mm_set1_epi8(1)));
79 rb -= bpp; 79
80 } 80 d = _mm_add_epi8(d, avg);
81 } 81 memcpy(row, &d, 3);
82 82
83 // Returns |x| for 16-bit lanes. 83 prev += 3;
84 static __m128i abs_i16(__m128i x) { 84 row += 3;
85 #if defined(__SSSE3__) 85 rb -= 3;
86 return _mm_abs_epi16(x); 86 }
87 #else 87 }
88 // Read this all as, return x<0 ? -x : x. 88 void sk_avg4_sse2(png_row_infop row_info, uint8_t* row,
89 // To negate two's complement, you flip all the bits then add 1. 89 const uint8_t* prev)
90 __m128i is_negative = _mm_cmplt_epi16(x, _mm_setzero_si128()); 90 {
91 x = _mm_xor_si128(x, is_negative); // Flip negative lanes. 91 // The Avg filter predicts each pixel as the (truncated) average of a and b.
92 x = _mm_add_epi16(x, _mm_srli_epi16(is_negative, 15)); // +1 to negativ e lanes, else +0. 92 // There's no pixel to the left of the first pixel. Luckily, it's
93 return x; 93 // predicted to be half of the pixel above it. So again, this works
94 #endif 94 // perfectly with our loop if we make sure a starts at zero.
95 } 95 const __m128i zero = _mm_setzero_si128();
96 96 __m128i b;
97 // Bytewise c ? t : e. 97 __m128i a, d = zero;
98 static __m128i if_then_else(__m128i c, __m128i t, __m128i e) { 98
99 #if 0 && defined(__SSE4_1__) // Make sure we have a bot testing this before enabling. 99 int rb = row_info->rowbytes;
100 return _mm_blendv_epi8(e,t,c); 100 while (rb > 0) {
101 #else 101 memcpy(&b, prev, 4);
102 return _mm_or_si128(_mm_and_si128(c, t), _mm_andnot_si128(c, e)); 102 a = d; memcpy(&d, row, 4);
103 #endif 103
104 } 104 // PNG requires a truncating average here, so sadly we can't just use
105 105 // _mm_avg_epu8...
106 template <int bpp> 106 __m128i avg = _mm_avg_epu8(a,b);
107 void sk_paeth_sse2(png_row_infop row_info, uint8_t* row, const uint8_t* prev ) { 107 // ...but we can fix it up by subtracting off 1 if it rounded up.
108 // Paeth tries to predict pixel d using the pixel to the left of it, a, 108 avg = _mm_sub_epi8(avg, _mm_and_si128(_mm_xor_si128(a,b),
109 // and two pixels from the previous row, b and c: 109 _mm_set1_epi8(1)));
110 // prev: c b 110
111 // row: a d 111 d = _mm_add_epi8(d, avg);
112 // The Paeth function predicts d to be whichever of a, b, or c is neares t to p=a+b-c. 112 memcpy(row, &d, 4);
113 113
114 // The first pixel has no left context, and so uses an Up filter, p = b. 114 prev += 4;
115 // This works naturally with our main loop's p = a+b-c if we force a and c to zero. 115 row += 4;
116 // Here we zero b and d, which become c and a respectively at the start of the loop. 116 rb -= 4;
117 const __m128i zero = _mm_setzero_si128(); 117 }
118 __m128i c, b = zero, 118 }
119 a, d = zero; 119
120 120 // Returns |x| for 16-bit lanes.
121 int rb = row_info->rowbytes; 121 static __m128i abs_i16(__m128i x) {
122 while (rb > 0) { 122 #if defined(__SSSE3__)
123 // It's easiest to do this math (particularly, deal with pc) with 16 -bit intermediates. 123 return _mm_abs_epi16(x);
124 c = b; b = _mm_unpacklo_epi8(load<bpp>(prev), zero); 124 #else
125 a = d; d = _mm_unpacklo_epi8(load<bpp>(row ), zero); 125 // Read this all as, return x<0 ? -x : x.
126 126 // To negate two's complement, you flip all the bits then add 1.
127 __m128i pa = _mm_sub_epi16(b,c), // (p-a) == (a+b-c - a) == (b-c) 127 __m128i is_negative = _mm_cmplt_epi16(x, _mm_setzero_si128());
128 pb = _mm_sub_epi16(a,c), // (p-b) == (a+b-c - b) == (a-c) 128 // Flip negative lanes.
129 pc = _mm_add_epi16(pa,pb); // (p-c) == (a+b-c - c) == (a+b-c -c) == (b-c)+(a-c) 129 x = _mm_xor_si128(x, is_negative);
130 130 // +1 to negative lanes, else +0.
131 pa = abs_i16(pa); // |p-a| 131 x = _mm_add_epi16(x, _mm_srli_epi16(is_negative, 15));
132 pb = abs_i16(pb); // |p-b| 132 return x;
133 pc = abs_i16(pc); // |p-c|
134
135 __m128i smallest = _mm_min_epi16(pc, _mm_min_epi16(pa, pb));
136
137 // Paeth breaks ties favoring a over b over c.
138 __m128i nearest = if_then_else(_mm_cmpeq_epi16(smallest, pa), a,
139 if_then_else(_mm_cmpeq_epi16(smallest, pb), b,
140 c));
141
142 d = _mm_add_epi8(d, nearest); // Note `_epi8`: we need addition to wrap modulo 255.
143 store<bpp>(row, _mm_packus_epi16(d,d));
144
145 prev += bpp;
146 row += bpp;
147 rb -= bpp;
148 }
149 }
150
151 void sk_sub3_sse2(png_row_infop row_info, uint8_t* row, const uint8_t* prev) {
152 sk_sub_sse2<3>(row_info, row, prev);
153 }
154 void sk_sub4_sse2(png_row_infop row_info, uint8_t* row, const uint8_t* prev) {
155 sk_sub_sse2<4>(row_info, row, prev);
156 }
157
158 void sk_avg3_sse2(png_row_infop row_info, uint8_t* row, const uint8_t* prev) {
159 sk_avg_sse2<3>(row_info, row, prev);
160 }
161 void sk_avg4_sse2(png_row_infop row_info, uint8_t* row, const uint8_t* prev) {
162 sk_avg_sse2<4>(row_info, row, prev);
163 }
164
165 void sk_paeth3_sse2(png_row_infop row_info, uint8_t* row, const uint8_t* pre v) {
166 sk_paeth_sse2<3>(row_info, row, prev);
167 }
168 void sk_paeth4_sse2(png_row_infop row_info, uint8_t* row, const uint8_t* pre v) {
169 sk_paeth_sse2<4>(row_info, row, prev);
170 }
171
172 #endif 133 #endif
134 }
135
136 // Bytewise c ? t : e.
137 static __m128i if_then_else(__m128i c, __m128i t, __m128i e) {
138 #if 0 && defined(__SSE4_1__) // Make sure we have a bot testing this before ena bling.
msarett 2016/02/15 19:50:20 I dropped the SSE4 code for libpng patch.
mtklein 2016/02/15 20:27:33 Let's set up a bot to test this tomorrow. It'd ac
msarett 2016/02/16 13:42:35 Dropping the #if 0 in order to actually run this c
139 return _mm_blendv_epi8(e,t,c);
140 #else
141 return _mm_or_si128(_mm_and_si128(c, t), _mm_andnot_si128(c, e));
142 #endif
143 }
144
145 void sk_paeth3_sse2(png_row_infop row_info, uint8_t* row,
146 const uint8_t* prev)
147 {
148 // Paeth tries to predict pixel d using the pixel to the left of it, a,
149 // and two pixels from the previous row, b and c:
150 // prev: c b
151 // row: a d
152 // The Paeth function predicts d to be whichever of a, b, or c is nearest to
153 // p=a+b-c. The first pixel has no left context, and so uses an Up filter,
154 // p = b. This works naturally with our main loop's p = a+b-c if we force a
155 // and c to zero. Here we zero b and d, which become c and a respectively
156 // at the start of the loop.
157 const __m128i zero = _mm_setzero_si128();
158 __m128i c, b = zero,
159 a, d = zero;
160
161 int rb = row_info->rowbytes;
162 while (rb > 0) {
163 // It's easiest to do this math (particularly, deal with pc) with 16-bit
164 // intermediates.
165 memcpy(&b, prev, 3);
166 memcpy(&d, row, 3);
167 c = b; b = _mm_unpacklo_epi8(b, zero);
168 a = d; d = _mm_unpacklo_epi8(d, zero);
169 __m128i pa = _mm_sub_epi16(b,c),
170 // (p-a) == (a+b-c - a) == (b-c)
171 pb = _mm_sub_epi16(a,c),
172 // (p-b) == (a+b-c - b) == (a-c)
173 pc = _mm_add_epi16(pa,pb);
174 // (p-c) == (a+b-c - c) == (a+b-c-c) == (b-c)+(a-c)
175
176 pa = abs_i16(pa);// |p-a|
177 pb = abs_i16(pb);// |p-b|
178 pc = abs_i16(pc);// |p-c|
179
180 __m128i smallest = _mm_min_epi16(pc, _mm_min_epi16(pa, pb));
181
182 // Paeth breaks ties favoring a over b over c.
183 __m128i nearest = if_then_else(_mm_cmpeq_epi16(smallest, pa), a,
184 if_then_else(_mm_cmpeq_epi16(smallest, pb), b,
185 c));
186
187 // Note `_epi8`: we need addition to wrap modulo 255.
188 d = _mm_add_epi8(d, nearest);
189 __m128i r = _mm_packus_epi16(d,d);
190 memcpy(row, &r, 3);
191 prev += 3;
192 row += 3;
193 rb -= 3;
194 }
195 }
196
197 void sk_paeth4_sse2(png_row_infop row_info, uint8_t* row,
198 const uint8_t* prev)
199 {
200 // Paeth tries to predict pixel d using the pixel to the left of it, a,
201 // and two pixels from the previous row, b and c:
202 // prev: c b
203 // row: a d
204 // The Paeth function predicts d to be whichever of a, b, or c is nearest to
205 // p=a+b-c. The first pixel has no left context, and so uses an Up filter,
206 // p = b. This works naturally with our main loop's p = a+b-c if we force a
207 // and c to zero. Here we zero b and d, which become c and a respectively
208 // at the start of the loop.
209 const __m128i zero = _mm_setzero_si128();
210 __m128i c, b = zero,
211 a, d = zero;
212
213 int rb = row_info->rowbytes;
214 while (rb > 0) {
215 // It's easiest to do this math (particularly, deal with pc) with 16-bit
216 // intermediates.
217 memcpy(&b, prev, 4);
218 memcpy(&d, row, 4);
219 c = b; b = _mm_unpacklo_epi8(b, zero);
220 a = d; d = _mm_unpacklo_epi8(d, zero);
221 __m128i pa = _mm_sub_epi16(b,c),
222 // (p-a) == (a+b-c - a) == (b-c)
223 pb = _mm_sub_epi16(a,c),
224 // (p-b) == (a+b-c - b) == (a-c)
225 pc = _mm_add_epi16(pa,pb);
226 // (p-c) == (a+b-c - c) == (a+b-c-c) == (b-c)+(a-c)
227
228 pa = abs_i16(pa);// |p-a|
229 pb = abs_i16(pb);// |p-b|
230 pc = abs_i16(pc);// |p-c|
231
232 __m128i smallest = _mm_min_epi16(pc, _mm_min_epi16(pa, pb));
233
234 // Paeth breaks ties favoring a over b over c.
235 __m128i nearest = if_then_else(_mm_cmpeq_epi16(smallest, pa), a,
236 if_then_else(_mm_cmpeq_epi16(smallest, pb), b,
237 c));
238
239 // Note `_epi8`: we need addition to wrap modulo 255.
240 d = _mm_add_epi8(d, nearest);
241 __m128i r = _mm_packus_epi16(d,d);
242 memcpy(row, &r, 4);
243 prev += 4;
244 row += 4;
245 rb -= 4;
246 }
247 }
248
249 #endif
OLDNEW
« no previous file with comments | « no previous file | no next file » | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698