Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(400)

Side by Side Diff: src/opts/SkXfermode_opts_SSE2.cpp

Issue 234433003: Xfermode: SSE2 implementation of difference_modeproc (Closed) Base URL: https://skia.googlesource.com/skia.git@master
Patch Set: rebase master Created 6 years, 8 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
« no previous file with comments | « no previous file | no next file » | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 #include "SkColorPriv.h" 1 #include "SkColorPriv.h"
2 #include "SkColor_opts_SSE2.h" 2 #include "SkColor_opts_SSE2.h"
3 #include "SkMathPriv.h" 3 #include "SkMathPriv.h"
4 #include "SkXfermode.h" 4 #include "SkXfermode.h"
5 #include "SkXfermode_opts_SSE2.h" 5 #include "SkXfermode_opts_SSE2.h"
6 #include "SkXfermode_proccoeff.h" 6 #include "SkXfermode_proccoeff.h"
7 7
8 //////////////////////////////////////////////////////////////////////////////// 8 ////////////////////////////////////////////////////////////////////////////////
9 // 4 pixels SSE2 version functions 9 // 4 pixels SSE2 version functions
10 //////////////////////////////////////////////////////////////////////////////// 10 ////////////////////////////////////////////////////////////////////////////////
11 11
12 static inline __m128i SkDiv255Round_SSE2(const __m128i& a) { 12 static inline __m128i SkDiv255Round_SSE2(const __m128i& a) {
13 __m128i prod = _mm_add_epi32(a, _mm_set1_epi32(128)); // prod += 128; 13 __m128i prod = _mm_add_epi32(a, _mm_set1_epi32(128)); // prod += 128;
14 prod = _mm_add_epi32(prod, _mm_srli_epi32(prod, 8)); // prod + (prod >> 8) 14 prod = _mm_add_epi32(prod, _mm_srli_epi32(prod, 8)); // prod + (prod >> 8)
15 prod = _mm_srli_epi32(prod, 8); // >> 8 15 prod = _mm_srli_epi32(prod, 8); // >> 8
16 16
17 return prod; 17 return prod;
18 } 18 }
19 19
20 static inline __m128i saturated_add_SSE2(const __m128i& a, const __m128i& b) { 20 static inline __m128i saturated_add_SSE2(const __m128i& a, const __m128i& b) {
21 __m128i sum = _mm_add_epi32(a, b); 21 __m128i sum = _mm_add_epi32(a, b);
22 __m128i cmp = _mm_cmpgt_epi32(sum, _mm_set1_epi32(255)); 22 __m128i cmp = _mm_cmpgt_epi32(sum, _mm_set1_epi32(255));
23 23
24 sum = _mm_or_si128(_mm_and_si128(cmp, _mm_set1_epi32(255)), 24 sum = _mm_or_si128(_mm_and_si128(cmp, _mm_set1_epi32(255)),
25 _mm_andnot_si128(cmp, sum)); 25 _mm_andnot_si128(cmp, sum));
26 return sum; 26 return sum;
27 } 27 }
28 28
29 static inline __m128i clamp_signed_byte_SSE2(const __m128i& n) {
30 __m128i cmp1 = _mm_cmplt_epi32(n, _mm_setzero_si128());
31 __m128i cmp2 = _mm_cmpgt_epi32(n, _mm_set1_epi32(255));
32 __m128i ret = _mm_and_si128(cmp2, _mm_set1_epi32(255));
33
34 __m128i cmp = _mm_or_si128(cmp1, cmp2);
35 ret = _mm_or_si128(_mm_and_si128(cmp, ret), _mm_andnot_si128(cmp, n));
36
37 return ret;
38 }
39
29 static inline __m128i clamp_div255round_SSE2(const __m128i& prod) { 40 static inline __m128i clamp_div255round_SSE2(const __m128i& prod) {
30 // test if > 0 41 // test if > 0
31 __m128i cmp1 = _mm_cmpgt_epi32(prod, _mm_setzero_si128()); 42 __m128i cmp1 = _mm_cmpgt_epi32(prod, _mm_setzero_si128());
32 // test if < 255*255 43 // test if < 255*255
33 __m128i cmp2 = _mm_cmplt_epi32(prod, _mm_set1_epi32(255*255)); 44 __m128i cmp2 = _mm_cmplt_epi32(prod, _mm_set1_epi32(255*255));
34 45
35 __m128i ret = _mm_setzero_si128(); 46 __m128i ret = _mm_setzero_si128();
36 47
37 // if value >= 255*255, value = 255 48 // if value >= 255*255, value = 255
38 ret = _mm_andnot_si128(cmp2, _mm_set1_epi32(255)); 49 ret = _mm_andnot_si128(cmp2, _mm_set1_epi32(255));
(...skipping 125 matching lines...) Expand 10 before | Expand all | Expand 10 after
164 SkGetPackedA32_SSE2(dst)); 175 SkGetPackedA32_SSE2(dst));
165 __m128i r = SkAlphaMulAlpha_SSE2(SkGetPackedR32_SSE2(src), 176 __m128i r = SkAlphaMulAlpha_SSE2(SkGetPackedR32_SSE2(src),
166 SkGetPackedR32_SSE2(dst)); 177 SkGetPackedR32_SSE2(dst));
167 __m128i g = SkAlphaMulAlpha_SSE2(SkGetPackedG32_SSE2(src), 178 __m128i g = SkAlphaMulAlpha_SSE2(SkGetPackedG32_SSE2(src),
168 SkGetPackedG32_SSE2(dst)); 179 SkGetPackedG32_SSE2(dst));
169 __m128i b = SkAlphaMulAlpha_SSE2(SkGetPackedB32_SSE2(src), 180 __m128i b = SkAlphaMulAlpha_SSE2(SkGetPackedB32_SSE2(src),
170 SkGetPackedB32_SSE2(dst)); 181 SkGetPackedB32_SSE2(dst));
171 return SkPackARGB32_SSE2(a, r, g, b); 182 return SkPackARGB32_SSE2(a, r, g, b);
172 } 183 }
173 184
185 static inline __m128i SkMin32_SSE2(const __m128i& a, const __m128i& b) {
186 __m128i cmp = _mm_cmplt_epi32(a, b);
187 return _mm_or_si128(_mm_and_si128(cmp, a), _mm_andnot_si128(cmp, b));
188 }
189
174 static inline __m128i srcover_byte_SSE2(const __m128i& a, const __m128i& b) { 190 static inline __m128i srcover_byte_SSE2(const __m128i& a, const __m128i& b) {
175 // a + b - SkAlphaMulAlpha(a, b); 191 // a + b - SkAlphaMulAlpha(a, b);
176 return _mm_sub_epi32(_mm_add_epi32(a, b), SkAlphaMulAlpha_SSE2(a, b)); 192 return _mm_sub_epi32(_mm_add_epi32(a, b), SkAlphaMulAlpha_SSE2(a, b));
177 193
178 } 194 }
179 195
180 static inline __m128i blendfunc_multiply_byte_SSE2(const __m128i& sc, const __m1 28i& dc, 196 static inline __m128i blendfunc_multiply_byte_SSE2(const __m128i& sc, const __m1 28i& dc,
181 const __m128i& sa, const __m1 28i& da) { 197 const __m128i& sa, const __m1 28i& da) {
182 // sc * (255 - da) 198 // sc * (255 - da)
183 __m128i ret1 = _mm_sub_epi32(_mm_set1_epi32(255), da); 199 __m128i ret1 = _mm_sub_epi32(_mm_set1_epi32(255), da);
(...skipping 117 matching lines...) Expand 10 before | Expand all | Expand 10 after
301 __m128i a = srcover_byte_SSE2(sa, da); 317 __m128i a = srcover_byte_SSE2(sa, da);
302 __m128i r = hardlight_byte_SSE2(SkGetPackedR32_SSE2(src), 318 __m128i r = hardlight_byte_SSE2(SkGetPackedR32_SSE2(src),
303 SkGetPackedR32_SSE2(dst), sa, da); 319 SkGetPackedR32_SSE2(dst), sa, da);
304 __m128i g = hardlight_byte_SSE2(SkGetPackedG32_SSE2(src), 320 __m128i g = hardlight_byte_SSE2(SkGetPackedG32_SSE2(src),
305 SkGetPackedG32_SSE2(dst), sa, da); 321 SkGetPackedG32_SSE2(dst), sa, da);
306 __m128i b = hardlight_byte_SSE2(SkGetPackedB32_SSE2(src), 322 __m128i b = hardlight_byte_SSE2(SkGetPackedB32_SSE2(src),
307 SkGetPackedB32_SSE2(dst), sa, da); 323 SkGetPackedB32_SSE2(dst), sa, da);
308 return SkPackARGB32_SSE2(a, r, g, b); 324 return SkPackARGB32_SSE2(a, r, g, b);
309 } 325 }
310 326
327
328 static inline __m128i difference_byte_SSE2(const __m128i& sc, const __m128i& dc,
329 const __m128i& sa, const __m128i& da) {
330 __m128i tmp1 = _mm_mullo_epi16(sc, da);
331 __m128i tmp2 = _mm_mullo_epi16(dc, sa);
332 __m128i tmp = SkMin32_SSE2(tmp1, tmp2);
333
334 __m128i ret1 = _mm_add_epi32(sc, dc);
335 __m128i ret2 = _mm_slli_epi32(SkDiv255Round_SSE2(tmp), 1);
336 __m128i ret = _mm_sub_epi32(ret1, ret2);
337
338 ret = clamp_signed_byte_SSE2(ret);
339 return ret;
340 }
341
342 static __m128i difference_modeproc_SSE2(const __m128i& src,
343 const __m128i& dst) {
344 __m128i sa = SkGetPackedA32_SSE2(src);
345 __m128i da = SkGetPackedA32_SSE2(dst);
346
347 __m128i a = srcover_byte_SSE2(sa, da);
348 __m128i r = difference_byte_SSE2(SkGetPackedR32_SSE2(src),
349 SkGetPackedR32_SSE2(dst), sa, da);
350 __m128i g = difference_byte_SSE2(SkGetPackedG32_SSE2(src),
351 SkGetPackedG32_SSE2(dst), sa, da);
352 __m128i b = difference_byte_SSE2(SkGetPackedB32_SSE2(src),
353 SkGetPackedB32_SSE2(dst), sa, da);
354 return SkPackARGB32_SSE2(a, r, g, b);
355 }
311 static inline __m128i exclusion_byte_SSE2(const __m128i& sc, const __m128i& dc, 356 static inline __m128i exclusion_byte_SSE2(const __m128i& sc, const __m128i& dc,
312 const __m128i&, __m128i&) { 357 const __m128i&, __m128i&) {
313 __m128i tmp1 = _mm_mullo_epi16(_mm_set1_epi32(255), sc); // 255 * sc 358 __m128i tmp1 = _mm_mullo_epi16(_mm_set1_epi32(255), sc); // 255 * sc
314 __m128i tmp2 = _mm_mullo_epi16(_mm_set1_epi32(255), dc); // 255 * dc 359 __m128i tmp2 = _mm_mullo_epi16(_mm_set1_epi32(255), dc); // 255 * dc
315 tmp1 = _mm_add_epi32(tmp1, tmp2); 360 tmp1 = _mm_add_epi32(tmp1, tmp2);
316 tmp2 = _mm_mullo_epi16(sc, dc); // sc * dc 361 tmp2 = _mm_mullo_epi16(sc, dc); // sc * dc
317 tmp2 = _mm_slli_epi32(tmp2, 1); // 2 * sc * dc 362 tmp2 = _mm_slli_epi32(tmp2, 1); // 2 * sc * dc
318 363
319 __m128i r = _mm_sub_epi32(tmp1, tmp2); 364 __m128i r = _mm_sub_epi32(tmp1, tmp2);
320 return clamp_div255round_SSE2(r); 365 return clamp_div255round_SSE2(r);
(...skipping 168 matching lines...) Expand 10 before | Expand all | Expand 10 after
489 modulate_modeproc_SSE2, 534 modulate_modeproc_SSE2,
490 screen_modeproc_SSE2, 535 screen_modeproc_SSE2,
491 536
492 overlay_modeproc_SSE2, 537 overlay_modeproc_SSE2,
493 NULL, // kDarken_Mode 538 NULL, // kDarken_Mode
494 NULL, // kLighten_Mode 539 NULL, // kLighten_Mode
495 NULL, // kColorDodge_Mode 540 NULL, // kColorDodge_Mode
496 NULL, // kColorBurn_Mode 541 NULL, // kColorBurn_Mode
497 hardlight_modeproc_SSE2, 542 hardlight_modeproc_SSE2,
498 NULL, // kSoftLight_Mode 543 NULL, // kSoftLight_Mode
499 NULL, // kDifference_Mode 544 difference_modeproc_SSE2,
500 exclusion_modeproc_SSE2, 545 exclusion_modeproc_SSE2,
501 multiply_modeproc_SSE2, 546 multiply_modeproc_SSE2,
502 547
503 NULL, // kHue_Mode 548 NULL, // kHue_Mode
504 NULL, // kSaturation_Mode 549 NULL, // kSaturation_Mode
505 NULL, // kColor_Mode 550 NULL, // kColor_Mode
506 NULL, // kLuminosity_Mode 551 NULL, // kLuminosity_Mode
507 }; 552 };
508 553
509 SkProcCoeffXfermode* SkPlatformXfermodeFactory_impl_SSE2(const ProcCoeff& rec, 554 SkProcCoeffXfermode* SkPlatformXfermodeFactory_impl_SSE2(const ProcCoeff& rec,
510 SkXfermode::Mode mode) { 555 SkXfermode::Mode mode) {
511 void* procSIMD = reinterpret_cast<void*>(gSSE2XfermodeProcs[mode]); 556 void* procSIMD = reinterpret_cast<void*>(gSSE2XfermodeProcs[mode]);
512 557
513 if (procSIMD != NULL) { 558 if (procSIMD != NULL) {
514 return SkNEW_ARGS(SkSSE2ProcCoeffXfermode, (rec, mode, procSIMD)); 559 return SkNEW_ARGS(SkSSE2ProcCoeffXfermode, (rec, mode, procSIMD));
515 } 560 }
516 return NULL; 561 return NULL;
517 } 562 }
OLDNEW
« no previous file with comments | « no previous file | no next file » | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698