Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(596)

Side by Side Diff: src/opts/SkBlend_opts.h

Issue 1998373002: Improve srcover_srgb_srgb implementation. (Closed) Base URL: https://skia.googlesource.com/skia.git@master
Patch Set: Created 4 years, 7 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
« no previous file with comments | « no previous file | no next file » | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 /* 1 /*
2 * Copyright 2016 Google Inc. 2 * Copyright 2016 Google Inc.
3 * 3 *
4 * Use of this source code is governed by a BSD-style license that can be 4 * Use of this source code is governed by a BSD-style license that can be
5 * found in the LICENSE file. 5 * found in the LICENSE file.
6 */ 6 */
7 7
8 /* 8 /*
9 ninja -C out/Release dm nanobench ; and ./out/Release/dm --match Blend_opts ; an d ./out/Release/nanobench --samples 300 --nompd --match LinearSrcOver -q 9 ninja -C out/Release dm nanobench ; and ./out/Release/dm --match Blend_opts ; an d ./out/Release/nanobench --samples 300 --nompd --match LinearSrcOver -q
10 */ 10 */
(...skipping 109 matching lines...) Expand 10 before | Expand all | Expand 10 after
120 120
121 #if SK_CPU_SSE_LEVEL >= SK_CPU_SSE_LEVEL_SSE41 121 #if SK_CPU_SSE_LEVEL >= SK_CPU_SSE_LEVEL_SSE41
122 122
123 void srcover_srgb_srgb( 123 void srcover_srgb_srgb(
124 uint32_t* dst, const uint32_t* const srcStart, int ndst, const int n src) { 124 uint32_t* dst, const uint32_t* const srcStart, int ndst, const int n src) {
125 const __m128i alphaMask = _mm_set1_epi32(0xFF000000); 125 const __m128i alphaMask = _mm_set1_epi32(0xFF000000);
126 while (ndst > 0) { 126 while (ndst > 0) {
127 int count = SkTMin(ndst, nsrc); 127 int count = SkTMin(ndst, nsrc);
128 ndst -= count; 128 ndst -= count;
129 const uint32_t* src = srcStart; 129 const uint32_t* src = srcStart;
130 const uint32_t* end = src + (count & ~3); 130 const uint32_t* end = dst + (count & ~3);
131 ptrdiff_t delta = src - dst;
131 132
132 while (src < end) { 133 while (dst < end) {
133 __m128i pixels = load(src); 134 __m128i pixels = load(src);
134 if (_mm_testc_si128(pixels, alphaMask)) { 135 if (_mm_testc_si128(pixels, alphaMask)) {
136 uint32_t* start = dst;
135 do { 137 do {
136 store(dst, pixels); 138 store(dst, pixels);
137 dst += 4; 139 dst += 4;
138 src += 4; 140 } while (dst < end
139 } while (src < end && _mm_testc_si128(pixels = load(src) , alphaMask)); 141 && _mm_testc_si128(pixels = load(dst + delta), alphaMask));
142 src += dst - start;
140 } else if (_mm_testz_si128(pixels, alphaMask)) { 143 } else if (_mm_testz_si128(pixels, alphaMask)) {
141 do { 144 do {
142 dst += 4; 145 dst += 4;
143 src += 4; 146 src += 4;
144 } while (src < end && _mm_testz_si128(pixels = load(src) , alphaMask)); 147 } while (dst < end
148 && _mm_testz_si128(pixels = load(src), alphaMas k));
145 } else { 149 } else {
150 uint32_t* start = dst;
146 do { 151 do {
147 srcover_srgb_srgb_4(dst, src); 152 srcover_srgb_srgb_4(dst, dst + delta);
148 dst += 4; 153 dst += 4;
149 src += 4; 154 } while (dst < end
150 } while (src < end && _mm_testnzc_si128(pixels = load(sr c), alphaMask)); 155 && _mm_testnzc_si128(pixels = load(dst + delta) , alphaMask));
156 src += dst - start;
151 } 157 }
152 } 158 }
153 159
154 count = count & 3; 160 count = count & 3;
155 while (count-- > 0) { 161 while (count-- > 0) {
156 srcover_srgb_srgb_1(dst++, *src++); 162 srcover_srgb_srgb_1(dst++, *src++);
157 } 163 }
158 } 164 }
159 } 165 }
160 #else 166 #else
161 // SSE2 versions 167 // SSE2 versions
168
169 // Note: In the next three comparisons a group of 4 pixels is converted to a group of
170 // "signed" pixels because the sse2 does not have an unsigned comparison .
171 // Make it so that we can use the signed comparison operators by biasing
172 // 0x00xxxxxx to 0x80xxxxxxx which is the smallest values and biasing 0x ffxxxxxx to
173 // 0x7fxxxxxx which is the largest set of values.
162 static inline bool check_opaque_alphas(__m128i pixels) { 174 static inline bool check_opaque_alphas(__m128i pixels) {
175 __m128i signedPixels = _mm_xor_si128(pixels, _mm_set1_epi32(0x800000 00));
163 int mask = 176 int mask =
164 _mm_movemask_epi8( 177 _mm_movemask_epi8(
165 _mm_cmpeq_epi32( 178 _mm_cmplt_epi32(signedPixels, _mm_set1_epi32(0x7F000000)));
166 _mm_andnot_si128(pixels, _mm_set1_epi32(0xFF000000)), 179 return mask == 0;
167 _mm_setzero_si128()));
168 return mask == 0xFFFF;
169 } 180 }
170 181
171 static inline bool check_transparent_alphas(__m128i pixels) { 182 static inline bool check_transparent_alphas(__m128i pixels) {
183 __m128i signedPixels = _mm_xor_si128(pixels, _mm_set1_epi32(0x800000 00));
172 int mask = 184 int mask =
173 _mm_movemask_epi8( 185 _mm_movemask_epi8(
174 _mm_cmpeq_epi32( 186 _mm_cmpgt_epi32(signedPixels, _mm_set1_epi32(0x80FFFFFF)));
mtklein 2016/05/24 12:15:37 Can't we trim the xor here by exploiting the const
175 _mm_and_si128(pixels, _mm_set1_epi32(0xFF000000)), 187 return mask == 0;
176 _mm_setzero_si128()));
177 return mask == 0xFFFF;
178 } 188 }
179 189
180 static inline bool check_partial_alphas(__m128i pixels) { 190 static inline bool check_partial_alphas(__m128i pixels) {
181 __m128i alphas = _mm_and_si128(pixels, _mm_set1_epi32(0xFF000000)); 191 __m128i signedPixels = _mm_xor_si128(pixels, _mm_set1_epi32(0x800000 00));
182 int mask = 192 __m128i opaque = _mm_cmplt_epi32(signedPixels, _mm_set1_epi32( 0x7F000000));
mtklein 2016/05/24 13:00:13 I think we can make this logic clearer. To start,
183 _mm_movemask_epi8( 193 __m128i transparent = _mm_cmpgt_epi32(signedPixels, _mm_set1_epi32( 0x80FFFFFF));
184 _mm_cmpeq_epi8( 194 int mask = _mm_movemask_epi8(_mm_xor_si128(opaque, trans parent));
185 _mm_srai_epi32(alphas, 8), 195 return mask == 0;
186 alphas));
187 return mask == 0xFFFF;
188 } 196 }
189 197
190 void srcover_srgb_srgb( 198 void srcover_srgb_srgb(
191 uint32_t* dst, const uint32_t* const srcStart, int ndst, const int n src) { 199 uint32_t* dst, const uint32_t* const srcStart, int ndst, const int n src) {
192 while (ndst > 0) { 200 while (ndst > 0) {
193 int count = SkTMin(ndst, nsrc); 201 int count = SkTMin(ndst, nsrc);
194 ndst -= count; 202 ndst -= count;
195 const uint32_t* src = srcStart; 203 const uint32_t* src = srcStart;
196 const uint32_t* end = src + (count & ~3); 204 const uint32_t* end = dst + (count & ~3);
205 const ptrdiff_t delta = src - dst;
197 206
198 __m128i pixels = load(src); 207 __m128i pixels = load(src);
199 do { 208 do {
200 if (check_opaque_alphas(pixels)) { 209 if (check_opaque_alphas(pixels)) {
210 uint32_t* start = dst;
201 do { 211 do {
202 store(dst, pixels); 212 store(dst, pixels);
203 dst += 4; 213 dst += 4;
204 src += 4; 214 } while (dst < end && check_opaque_alphas((pixels = load (dst + delta))));
205 } while (src < end && check_opaque_alphas(pixels = load( src))); 215 src += dst - start;
206 } else if (check_transparent_alphas(pixels)) { 216 } else if (check_transparent_alphas(pixels)) {
207 const uint32_t* start = src; 217 const uint32_t* start = dst;
208 do { 218 do {
209 src += 4; 219 dst += 4;
210 } while (src < end && check_transparent_alphas(pixels = load(src))); 220 } while (dst < end && check_transparent_alphas(pixels = load(dst + delta)));
211 dst += src - start; 221 src += dst - start;
212 } else { 222 } else {
223 const uint32_t* start = dst;
213 do { 224 do {
214 srcover_srgb_srgb_4(dst, src); 225 srcover_srgb_srgb_4(dst, dst + delta);
215 dst += 4; 226 dst += 4;
216 src += 4; 227 } while (dst < end && check_partial_alphas(pixels = load (dst + delta)));
217 } while (src < end && check_partial_alphas(pixels = load (src))); 228 src += dst - start;
218 } 229 }
219 } while (src < end); 230 } while (dst < end);
220 231
221 count = count & 3; 232 count = count & 3;
222 while (count-- > 0) { 233 while (count-- > 0) {
223 srcover_srgb_srgb_1(dst++, *src++); 234 srcover_srgb_srgb_1(dst++, *src++);
224 } 235 }
225 } 236 }
226 } 237 }
227 #endif 238 #endif
228 #else 239 #else
229 240
230 void srcover_srgb_srgb( 241 void srcover_srgb_srgb(
231 uint32_t* dst, const uint32_t* const src, int ndst, const int nsrc) { 242 uint32_t* dst, const uint32_t* const src, int ndst, const int nsrc) {
232 trivial_srcover_srgb_srgb(dst, src, ndst, nsrc); 243 trivial_srcover_srgb_srgb(dst, src, ndst, nsrc);
233 } 244 }
234 245
235 #endif 246 #endif
236 247
237 } // namespace SK_OPTS_NS 248 } // namespace SK_OPTS_NS
238 249
239 #endif//SkBlend_opts_DEFINED 250 #endif//SkBlend_opts_DEFINED
OLDNEW
« no previous file with comments | « no previous file | no next file » | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698