Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(415)

Side by Side Diff: src/codec/SkPngFilters.cpp

Issue 1657503002: Look beyond SSE2 for Paeth (Closed) Base URL: https://skia.googlesource.com/skia.git@master
Patch Set: kill sse4.1 Created 4 years, 10 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
« no previous file with comments | « no previous file | no next file » | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 /* 1 /*
2 * Copyright 2016 Google Inc. 2 * Copyright 2016 Google Inc.
3 * 3 *
4 * Use of this source code is governed by a BSD-style license that can be 4 * Use of this source code is governed by a BSD-style license that can be
5 * found in the LICENSE file. 5 * found in the LICENSE file.
6 */ 6 */
7 7
8 #include "SkPngFilters.h" 8 #include "SkPngFilters.h"
9 9
10 // Functions in this file look at most 3 pixels (a,b,c) to predict the fourth (d ). 10 // Functions in this file look at most 3 pixels (a,b,c) to predict the fourth (d ).
(...skipping 62 matching lines...) Expand 10 before | Expand all | Expand 10 after
73 73
74 d = _mm_add_epi8(d, avg); 74 d = _mm_add_epi8(d, avg);
75 store<bpp>(row, d); 75 store<bpp>(row, d);
76 76
77 prev += bpp; 77 prev += bpp;
78 row += bpp; 78 row += bpp;
79 rb -= bpp; 79 rb -= bpp;
80 } 80 }
81 } 81 }
82 82
83 // Returns bytewise |x-y|. 83 // Returns |x| for 16-bit lanes.
84 static __m128i absdiff_u8(__m128i x, __m128i y) { 84 static __m128i abs_i16(__m128i x) {
85 // One of these two saturated subtractions will be the answer, the other zero. 85 #if defined(__SSSE3__)
86 return _mm_or_si128(_mm_subs_epu8(x,y), _mm_subs_epu8(y,x)); 86 return _mm_abs_epi16(x);
87 #else
88 // Read this all as, return x<0 ? -x : x.
89 // To negate two's complement, you flip all the bits then add 1.
90 __m128i is_negative = _mm_cmplt_epi16(x, _mm_setzero_si128());
91 x = _mm_xor_si128(x, is_negative); // Flip negative lanes.
92 x = _mm_add_epi16(x, _mm_srli_epi16(is_negative, 15)); // +1 to negativ e lanes, else +0.
93 return x;
94 #endif
87 } 95 }
88 96
89 // Bytewise c ? t : e. 97 // Bytewise c ? t : e.
90 static __m128i if_then_else(__m128i c, __m128i t, __m128i e) { 98 static __m128i if_then_else(__m128i c, __m128i t, __m128i e) {
91 // SSE 4.1+ would be: return _mm_blendv_epi8(e,t,c); 99 #if 0 && defined(__SSE4_1__) // Make sure we have a bot testing this before enabling.
100 return _mm_blendv_epi8(e,t,c);
101 #else
92 return _mm_or_si128(_mm_and_si128(c, t), _mm_andnot_si128(c, e)); 102 return _mm_or_si128(_mm_and_si128(c, t), _mm_andnot_si128(c, e));
103 #endif
93 } 104 }
94 105
95 template <int bpp> 106 template <int bpp>
96 void sk_paeth_sse2(png_row_infop row_info, uint8_t* row, const uint8_t* prev ) { 107 void sk_paeth_sse2(png_row_infop row_info, uint8_t* row, const uint8_t* prev ) {
97 // Paeth tries to predict pixel d using the pixel to the left of it, a, 108 // Paeth tries to predict pixel d using the pixel to the left of it, a,
98 // and two pixels from the previous row, b and c: 109 // and two pixels from the previous row, b and c:
99 // prev: c b 110 // prev: c b
100 // row: a d 111 // row: a d
101 // The Paeth function predicts d to be whichever of a, b, or c is neares t to p=a+b-c. 112 // The Paeth function predicts d to be whichever of a, b, or c is neares t to p=a+b-c.
102 113
103 // The first pixel has no left context, and so uses an Up filter, p = b. 114 // The first pixel has no left context, and so uses an Up filter, p = b.
104 // This works naturally with our main loop's p = a+b-c if we force a and c to zero. 115 // This works naturally with our main loop's p = a+b-c if we force a and c to zero.
105 // Here we zero b and d, which become c and a respectively at the start of the loop. 116 // Here we zero b and d, which become c and a respectively at the start of the loop.
106 __m128i c, b = _mm_setzero_si128(), 117 const __m128i zero = _mm_setzero_si128();
107 a, d = _mm_setzero_si128(); 118 __m128i c, b = zero,
119 a, d = zero;
108 120
109 int rb = row_info->rowbytes; 121 int rb = row_info->rowbytes;
110 while (rb > 0) { 122 while (rb > 0) {
111 c = b; b = load<bpp>(prev); 123 // It's easiest to do this math (particularly, deal with pc) with 16 -bit intermediates.
112 a = d; d = load<bpp>(row ); 124 c = b; b = _mm_unpacklo_epi8(load<bpp>(prev), zero);
125 a = d; d = _mm_unpacklo_epi8(load<bpp>(row ), zero);
113 126
114 // We can't express p in 8 bits, but luckily we can use this faux p instead. 127 __m128i pa = _mm_sub_epi16(b,c), // (p-a) == (a+b-c - a) == (b-c)
115 // (I have no deep insight here... I just proved this with brute for ce.) 128 pb = _mm_sub_epi16(a,c), // (p-b) == (a+b-c - b) == (a-c)
116 __m128i min = _mm_min_epu8(a,b), 129 pc = _mm_add_epi16(pa,pb); // (p-c) == (a+b-c - c) == (a+b-c -c) == (b-c)+(a-c)
117 max = _mm_max_epu8(a,b),
118 faux_p = _mm_adds_epu8(min, _mm_subs_epu8(max, c));
119 130
120 // We could use faux_p for calculating all three of pa, pb, and pc, 131 pa = abs_i16(pa); // |p-a|
121 // but it's a little quicker to calculate the correct pa and pb dire ctly, 132 pb = abs_i16(pb); // |p-b|
122 // and the predictor remains the same. (Again, brute force.) 133 pc = abs_i16(pc); // |p-c|
123 __m128i pa = absdiff_u8(b,c), // |a+b-c - a| == |b-c|
124 pb = absdiff_u8(a,c), // |a+b-c - b| == |a-c|
125 faux_pc = absdiff_u8(faux_p, c);
126 134
127 // From here, things are straightforward. Find the smallest distanc e to p... 135 __m128i smallest = _mm_min_epi16(pc, _mm_min_epi16(pa, pb));
128 __m128i smallest = _mm_min_epu8(_mm_min_epu8(pa, pb), faux_pc);
129 136
130 // ... then the predictor is the input corresponding to that smalles t distance, 137 // Paeth breaks ties favoring a over b over c.
131 // breaking ties in favor of a over b over c. 138 __m128i nearest = if_then_else(_mm_cmpeq_epi16(smallest, pa), a,
132 __m128i nearest = if_then_else(_mm_cmpeq_epi8(smallest, pa), a, 139 if_then_else(_mm_cmpeq_epi16(smallest, pb), b,
133 if_then_else(_mm_cmpeq_epi8(smallest, pb), b, 140 c));
134 c));
135 141
136 // We've reconstructed d! Leave it for next round to become a, and write it out. 142 d = _mm_add_epi8(d, nearest); // Note `_epi8`: we need addition to wrap modulo 255.
137 d = _mm_add_epi8(d, nearest); 143 store<bpp>(row, _mm_packus_epi16(d,d));
138 store<bpp>(row, d);
139 144
140 prev += bpp; 145 prev += bpp;
141 row += bpp; 146 row += bpp;
142 rb -= bpp; 147 rb -= bpp;
143 } 148 }
144 } 149 }
145 150
146 void sk_sub3_sse2(png_row_infop row_info, uint8_t* row, const uint8_t* prev) { 151 void sk_sub3_sse2(png_row_infop row_info, uint8_t* row, const uint8_t* prev) {
147 sk_sub_sse2<3>(row_info, row, prev); 152 sk_sub_sse2<3>(row_info, row, prev);
148 } 153 }
149 void sk_sub4_sse2(png_row_infop row_info, uint8_t* row, const uint8_t* prev) { 154 void sk_sub4_sse2(png_row_infop row_info, uint8_t* row, const uint8_t* prev) {
150 sk_sub_sse2<4>(row_info, row, prev); 155 sk_sub_sse2<4>(row_info, row, prev);
151 } 156 }
152 157
153 void sk_avg3_sse2(png_row_infop row_info, uint8_t* row, const uint8_t* prev) { 158 void sk_avg3_sse2(png_row_infop row_info, uint8_t* row, const uint8_t* prev) {
154 sk_avg_sse2<3>(row_info, row, prev); 159 sk_avg_sse2<3>(row_info, row, prev);
155 } 160 }
156 void sk_avg4_sse2(png_row_infop row_info, uint8_t* row, const uint8_t* prev) { 161 void sk_avg4_sse2(png_row_infop row_info, uint8_t* row, const uint8_t* prev) {
157 sk_avg_sse2<4>(row_info, row, prev); 162 sk_avg_sse2<4>(row_info, row, prev);
158 } 163 }
159 164
160 void sk_paeth3_sse2(png_row_infop row_info, uint8_t* row, const uint8_t* pre v) { 165 void sk_paeth3_sse2(png_row_infop row_info, uint8_t* row, const uint8_t* pre v) {
161 sk_paeth_sse2<3>(row_info, row, prev); 166 sk_paeth_sse2<3>(row_info, row, prev);
162 } 167 }
163 void sk_paeth4_sse2(png_row_infop row_info, uint8_t* row, const uint8_t* pre v) { 168 void sk_paeth4_sse2(png_row_infop row_info, uint8_t* row, const uint8_t* pre v) {
164 sk_paeth_sse2<4>(row_info, row, prev); 169 sk_paeth_sse2<4>(row_info, row, prev);
165 } 170 }
166 171
167 #endif 172 #endif
OLDNEW
« no previous file with comments | « no previous file | no next file » | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698