Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(216)

Side by Side Diff: src/opts/SkXfermode_opts_SSE2.cpp

Issue 236363012: Xfermode: SSE2 implementation of softlight_modeproc (Closed) Base URL: https://skia.googlesource.com/skia.git@master
Patch Set: rebase and move SkSqrtBits_SSE2 to SkMath_opts_SSE2.h Created 6 years, 7 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
« no previous file with comments | « src/opts/SkMath_opts_SSE2.h ('k') | no next file » | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 #include "SkColorPriv.h" 1 #include "SkColorPriv.h"
2 #include "SkColor_opts_SSE2.h" 2 #include "SkColor_opts_SSE2.h"
3 #include "SkMathPriv.h" 3 #include "SkMathPriv.h"
4 #include "SkMath_opts_SSE2.h"
4 #include "SkXfermode.h" 5 #include "SkXfermode.h"
5 #include "SkXfermode_opts_SSE2.h" 6 #include "SkXfermode_opts_SSE2.h"
6 #include "SkXfermode_proccoeff.h" 7 #include "SkXfermode_proccoeff.h"
7 8
8 //////////////////////////////////////////////////////////////////////////////// 9 ////////////////////////////////////////////////////////////////////////////////
9 // 4 pixels SSE2 version functions 10 // 4 pixels SSE2 version functions
10 //////////////////////////////////////////////////////////////////////////////// 11 ////////////////////////////////////////////////////////////////////////////////
11 12
12 static inline __m128i SkDiv255Round_SSE2(const __m128i& a) { 13 static inline __m128i SkDiv255Round_SSE2(const __m128i& a) {
13 __m128i prod = _mm_add_epi32(a, _mm_set1_epi32(128)); // prod += 128; 14 __m128i prod = _mm_add_epi32(a, _mm_set1_epi32(128)); // prod += 128;
(...skipping 303 matching lines...) Expand 10 before | Expand all | Expand 10 after
317 __m128i a = srcover_byte_SSE2(sa, da); 318 __m128i a = srcover_byte_SSE2(sa, da);
318 __m128i r = hardlight_byte_SSE2(SkGetPackedR32_SSE2(src), 319 __m128i r = hardlight_byte_SSE2(SkGetPackedR32_SSE2(src),
319 SkGetPackedR32_SSE2(dst), sa, da); 320 SkGetPackedR32_SSE2(dst), sa, da);
320 __m128i g = hardlight_byte_SSE2(SkGetPackedG32_SSE2(src), 321 __m128i g = hardlight_byte_SSE2(SkGetPackedG32_SSE2(src),
321 SkGetPackedG32_SSE2(dst), sa, da); 322 SkGetPackedG32_SSE2(dst), sa, da);
322 __m128i b = hardlight_byte_SSE2(SkGetPackedB32_SSE2(src), 323 __m128i b = hardlight_byte_SSE2(SkGetPackedB32_SSE2(src),
323 SkGetPackedB32_SSE2(dst), sa, da); 324 SkGetPackedB32_SSE2(dst), sa, da);
324 return SkPackARGB32_SSE2(a, r, g, b); 325 return SkPackARGB32_SSE2(a, r, g, b);
325 } 326 }
326 327
328 static __m128i sqrt_unit_byte_SSE2(const __m128i& n) {
329 return SkSqrtBits_SSE2(n, 15+4);
330 }
331
332 static inline __m128i softlight_byte_SSE2(const __m128i& sc, const __m128i& dc,
333 const __m128i& sa, const __m128i& da) {
334 __m128i tmp1, tmp2, tmp3;
335
336 // int m = da ? dc * 256 / da : 0;
337 __m128i cmp = _mm_cmpeq_epi32(da, _mm_setzero_si128());
338 __m128i m = _mm_slli_epi32(dc, 8);
339 __m128 x = _mm_cvtepi32_ps(m);
340 __m128 y = _mm_cvtepi32_ps(da);
341 m = _mm_cvttps_epi32(_mm_div_ps(x, y));
342 m = _mm_andnot_si128(cmp, m);
343
344 // if (2 * sc <= sa)
345 tmp1 = _mm_slli_epi32(sc, 1); // 2 * sc
346 __m128i cmp1 = _mm_cmpgt_epi32(tmp1, sa);
347 tmp1 = _mm_sub_epi32(tmp1, sa); // 2 * sc - sa
348 tmp2 = _mm_sub_epi32(_mm_set1_epi32(256), m); // 256 - m
349 tmp1 = Multiply32_SSE2(tmp1, tmp2);
350 tmp1 = _mm_srai_epi32(tmp1, 8);
351 tmp1 = _mm_add_epi32(sa, tmp1);
352 tmp1 = Multiply32_SSE2(dc, tmp1);
353 __m128i rc1 = _mm_andnot_si128(cmp1, tmp1);
354
355 // else if (4 * dc <= da)
356 tmp2 = _mm_slli_epi32(dc, 2); // dc * 4
357 __m128i cmp2 = _mm_cmpgt_epi32(tmp2, da);
358 __m128i i = _mm_slli_epi32(m, 2); // 4 * m
359 __m128i j = _mm_add_epi32(i, _mm_set1_epi32(256)); // 4 * m + 256
360 __m128i k = Multiply32_SSE2(i, j); // 4 * m * (4 * m + 256)
361 __m128i t = _mm_sub_epi32(m, _mm_set1_epi32(256)); // m - 256
362 i = Multiply32_SSE2(k, t); // 4 * m * (4 * m + 256) * (m - 256)
363 i = _mm_srai_epi32(i, 16); // >> 16
364 j = Multiply32_SSE2(_mm_set1_epi32(7), m); // 7 * m
365 tmp2 = _mm_add_epi32(i, j);
366 i = Multiply32_SSE2(dc, sa); // dc * sa
367 j = _mm_slli_epi32(sc, 1); // 2 * sc
368 j = _mm_sub_epi32(j, sa); // 2 * sc - sa
369 j = Multiply32_SSE2(da, j); // da * (2 * sc - sa)
370 tmp2 = Multiply32_SSE2(j, tmp2); // * tmp
371 tmp2 = _mm_srai_epi32(tmp2, 8); // >> 8
372 tmp2 = _mm_add_epi32(i, tmp2);
373 cmp = _mm_andnot_si128(cmp2, cmp1);
374 __m128i rc2 = _mm_and_si128(cmp, tmp2);
375 __m128i rc = _mm_or_si128(rc1, rc2);
376
377 // else
378 tmp3 = sqrt_unit_byte_SSE2(m);
379 tmp3 = _mm_sub_epi32(tmp3, m);
380 tmp3 = Multiply32_SSE2(j, tmp3); // j = da * (2 * sc - sa)
381 tmp3 = _mm_srai_epi32(tmp3, 8);
382 tmp3 = _mm_add_epi32(i, tmp3); // i = dc * sa
383 cmp = _mm_and_si128(cmp1, cmp2);
384 __m128i rc3 = _mm_and_si128(cmp, tmp3);
385 rc = _mm_or_si128(rc, rc3);
386
387 tmp1 = _mm_sub_epi32(_mm_set1_epi32(255), da); // 255 - da
388 tmp1 = _mm_mullo_epi16(sc, tmp1);
389 tmp2 = _mm_sub_epi32(_mm_set1_epi32(255), sa); // 255 - sa
390 tmp2 = _mm_mullo_epi16(dc, tmp2);
391 rc = _mm_add_epi32(rc, tmp1);
392 rc = _mm_add_epi32(rc, tmp2);
393 return clamp_div255round_SSE2(rc);
394 }
395
396 static __m128i softlight_modeproc_SSE2(const __m128i& src, const __m128i& dst) {
397 __m128i sa = SkGetPackedA32_SSE2(src);
398 __m128i da = SkGetPackedA32_SSE2(dst);
399
400 __m128i a = srcover_byte_SSE2(sa, da);
401 __m128i r = softlight_byte_SSE2(SkGetPackedR32_SSE2(src),
402 SkGetPackedR32_SSE2(dst), sa, da);
403 __m128i g = softlight_byte_SSE2(SkGetPackedG32_SSE2(src),
404 SkGetPackedG32_SSE2(dst), sa, da);
405 __m128i b = softlight_byte_SSE2(SkGetPackedB32_SSE2(src),
406 SkGetPackedB32_SSE2(dst), sa, da);
407 return SkPackARGB32_SSE2(a, r, g, b);
408 }
327 409
328 static inline __m128i difference_byte_SSE2(const __m128i& sc, const __m128i& dc, 410 static inline __m128i difference_byte_SSE2(const __m128i& sc, const __m128i& dc,
329 const __m128i& sa, const __m128i& da) { 411 const __m128i& sa, const __m128i& da) {
330 __m128i tmp1 = _mm_mullo_epi16(sc, da); 412 __m128i tmp1 = _mm_mullo_epi16(sc, da);
331 __m128i tmp2 = _mm_mullo_epi16(dc, sa); 413 __m128i tmp2 = _mm_mullo_epi16(dc, sa);
332 __m128i tmp = SkMin32_SSE2(tmp1, tmp2); 414 __m128i tmp = SkMin32_SSE2(tmp1, tmp2);
333 415
334 __m128i ret1 = _mm_add_epi32(sc, dc); 416 __m128i ret1 = _mm_add_epi32(sc, dc);
335 __m128i ret2 = _mm_slli_epi32(SkDiv255Round_SSE2(tmp), 1); 417 __m128i ret2 = _mm_slli_epi32(SkDiv255Round_SSE2(tmp), 1);
336 __m128i ret = _mm_sub_epi32(ret1, ret2); 418 __m128i ret = _mm_sub_epi32(ret1, ret2);
337 419
338 ret = clamp_signed_byte_SSE2(ret); 420 ret = clamp_signed_byte_SSE2(ret);
339 return ret; 421 return ret;
340 } 422 }
341 423
342 static __m128i difference_modeproc_SSE2(const __m128i& src, 424 static __m128i difference_modeproc_SSE2(const __m128i& src,
343 const __m128i& dst) { 425 const __m128i& dst) {
344 __m128i sa = SkGetPackedA32_SSE2(src); 426 __m128i sa = SkGetPackedA32_SSE2(src);
345 __m128i da = SkGetPackedA32_SSE2(dst); 427 __m128i da = SkGetPackedA32_SSE2(dst);
346 428
347 __m128i a = srcover_byte_SSE2(sa, da); 429 __m128i a = srcover_byte_SSE2(sa, da);
348 __m128i r = difference_byte_SSE2(SkGetPackedR32_SSE2(src), 430 __m128i r = difference_byte_SSE2(SkGetPackedR32_SSE2(src),
349 SkGetPackedR32_SSE2(dst), sa, da); 431 SkGetPackedR32_SSE2(dst), sa, da);
350 __m128i g = difference_byte_SSE2(SkGetPackedG32_SSE2(src), 432 __m128i g = difference_byte_SSE2(SkGetPackedG32_SSE2(src),
351 SkGetPackedG32_SSE2(dst), sa, da); 433 SkGetPackedG32_SSE2(dst), sa, da);
352 __m128i b = difference_byte_SSE2(SkGetPackedB32_SSE2(src), 434 __m128i b = difference_byte_SSE2(SkGetPackedB32_SSE2(src),
353 SkGetPackedB32_SSE2(dst), sa, da); 435 SkGetPackedB32_SSE2(dst), sa, da);
354 return SkPackARGB32_SSE2(a, r, g, b); 436 return SkPackARGB32_SSE2(a, r, g, b);
355 } 437 }
438
356 static inline __m128i exclusion_byte_SSE2(const __m128i& sc, const __m128i& dc, 439 static inline __m128i exclusion_byte_SSE2(const __m128i& sc, const __m128i& dc,
357 const __m128i&, __m128i&) { 440 const __m128i&, __m128i&) {
358 __m128i tmp1 = _mm_mullo_epi16(_mm_set1_epi32(255), sc); // 255 * sc 441 __m128i tmp1 = _mm_mullo_epi16(_mm_set1_epi32(255), sc); // 255 * sc
359 __m128i tmp2 = _mm_mullo_epi16(_mm_set1_epi32(255), dc); // 255 * dc 442 __m128i tmp2 = _mm_mullo_epi16(_mm_set1_epi32(255), dc); // 255 * dc
360 tmp1 = _mm_add_epi32(tmp1, tmp2); 443 tmp1 = _mm_add_epi32(tmp1, tmp2);
361 tmp2 = _mm_mullo_epi16(sc, dc); // sc * dc 444 tmp2 = _mm_mullo_epi16(sc, dc); // sc * dc
362 tmp2 = _mm_slli_epi32(tmp2, 1); // 2 * sc * dc 445 tmp2 = _mm_slli_epi32(tmp2, 1); // 2 * sc * dc
363 446
364 __m128i r = _mm_sub_epi32(tmp1, tmp2); 447 __m128i r = _mm_sub_epi32(tmp1, tmp2);
365 return clamp_div255round_SSE2(r); 448 return clamp_div255round_SSE2(r);
(...skipping 167 matching lines...) Expand 10 before | Expand all | Expand 10 after
533 plus_modeproc_SSE2, 616 plus_modeproc_SSE2,
534 modulate_modeproc_SSE2, 617 modulate_modeproc_SSE2,
535 screen_modeproc_SSE2, 618 screen_modeproc_SSE2,
536 619
537 overlay_modeproc_SSE2, 620 overlay_modeproc_SSE2,
538 NULL, // kDarken_Mode 621 NULL, // kDarken_Mode
539 NULL, // kLighten_Mode 622 NULL, // kLighten_Mode
540 NULL, // kColorDodge_Mode 623 NULL, // kColorDodge_Mode
541 NULL, // kColorBurn_Mode 624 NULL, // kColorBurn_Mode
542 hardlight_modeproc_SSE2, 625 hardlight_modeproc_SSE2,
543 NULL, // kSoftLight_Mode 626 softlight_modeproc_SSE2,
544 difference_modeproc_SSE2, 627 difference_modeproc_SSE2,
545 exclusion_modeproc_SSE2, 628 exclusion_modeproc_SSE2,
546 multiply_modeproc_SSE2, 629 multiply_modeproc_SSE2,
547 630
548 NULL, // kHue_Mode 631 NULL, // kHue_Mode
549 NULL, // kSaturation_Mode 632 NULL, // kSaturation_Mode
550 NULL, // kColor_Mode 633 NULL, // kColor_Mode
551 NULL, // kLuminosity_Mode 634 NULL, // kLuminosity_Mode
552 }; 635 };
553 636
554 SkProcCoeffXfermode* SkPlatformXfermodeFactory_impl_SSE2(const ProcCoeff& rec, 637 SkProcCoeffXfermode* SkPlatformXfermodeFactory_impl_SSE2(const ProcCoeff& rec,
555 SkXfermode::Mode mode) { 638 SkXfermode::Mode mode) {
556 void* procSIMD = reinterpret_cast<void*>(gSSE2XfermodeProcs[mode]); 639 void* procSIMD = reinterpret_cast<void*>(gSSE2XfermodeProcs[mode]);
557 640
558 if (procSIMD != NULL) { 641 if (procSIMD != NULL) {
559 return SkNEW_ARGS(SkSSE2ProcCoeffXfermode, (rec, mode, procSIMD)); 642 return SkNEW_ARGS(SkSSE2ProcCoeffXfermode, (rec, mode, procSIMD));
560 } 643 }
561 return NULL; 644 return NULL;
562 } 645 }
OLDNEW
« no previous file with comments | « src/opts/SkMath_opts_SSE2.h ('k') | no next file » | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698