OLD | NEW |
1 #include "SkColorPriv.h" | 1 #include "SkColorPriv.h" |
2 #include "SkColor_opts_SSE2.h" | 2 #include "SkColor_opts_SSE2.h" |
3 #include "SkMathPriv.h" | 3 #include "SkMathPriv.h" |
| 4 #include "SkMath_opts_SSE2.h" |
4 #include "SkXfermode.h" | 5 #include "SkXfermode.h" |
5 #include "SkXfermode_opts_SSE2.h" | 6 #include "SkXfermode_opts_SSE2.h" |
6 #include "SkXfermode_proccoeff.h" | 7 #include "SkXfermode_proccoeff.h" |
7 | 8 |
8 //////////////////////////////////////////////////////////////////////////////// | 9 //////////////////////////////////////////////////////////////////////////////// |
9 // 4 pixels SSE2 version functions | 10 // 4 pixels SSE2 version functions |
10 //////////////////////////////////////////////////////////////////////////////// | 11 //////////////////////////////////////////////////////////////////////////////// |
11 | 12 |
12 static inline __m128i SkDiv255Round_SSE2(const __m128i& a) { | 13 static inline __m128i SkDiv255Round_SSE2(const __m128i& a) { |
13 __m128i prod = _mm_add_epi32(a, _mm_set1_epi32(128)); // prod += 128; | 14 __m128i prod = _mm_add_epi32(a, _mm_set1_epi32(128)); // prod += 128; |
(...skipping 303 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
317 __m128i a = srcover_byte_SSE2(sa, da); | 318 __m128i a = srcover_byte_SSE2(sa, da); |
318 __m128i r = hardlight_byte_SSE2(SkGetPackedR32_SSE2(src), | 319 __m128i r = hardlight_byte_SSE2(SkGetPackedR32_SSE2(src), |
319 SkGetPackedR32_SSE2(dst), sa, da); | 320 SkGetPackedR32_SSE2(dst), sa, da); |
320 __m128i g = hardlight_byte_SSE2(SkGetPackedG32_SSE2(src), | 321 __m128i g = hardlight_byte_SSE2(SkGetPackedG32_SSE2(src), |
321 SkGetPackedG32_SSE2(dst), sa, da); | 322 SkGetPackedG32_SSE2(dst), sa, da); |
322 __m128i b = hardlight_byte_SSE2(SkGetPackedB32_SSE2(src), | 323 __m128i b = hardlight_byte_SSE2(SkGetPackedB32_SSE2(src), |
323 SkGetPackedB32_SSE2(dst), sa, da); | 324 SkGetPackedB32_SSE2(dst), sa, da); |
324 return SkPackARGB32_SSE2(a, r, g, b); | 325 return SkPackARGB32_SSE2(a, r, g, b); |
325 } | 326 } |
326 | 327 |
| 328 static __m128i sqrt_unit_byte_SSE2(const __m128i& n) { |
| 329 return SkSqrtBits_SSE2(n, 15+4); |
| 330 } |
| 331 |
| 332 static inline __m128i softlight_byte_SSE2(const __m128i& sc, const __m128i& dc, |
| 333 const __m128i& sa, const __m128i& da)
{ |
| 334 __m128i tmp1, tmp2, tmp3; |
| 335 |
| 336 // int m = da ? dc * 256 / da : 0; |
| 337 __m128i cmp = _mm_cmpeq_epi32(da, _mm_setzero_si128()); |
| 338 __m128i m = _mm_slli_epi32(dc, 8); |
| 339 __m128 x = _mm_cvtepi32_ps(m); |
| 340 __m128 y = _mm_cvtepi32_ps(da); |
| 341 m = _mm_cvttps_epi32(_mm_div_ps(x, y)); |
| 342 m = _mm_andnot_si128(cmp, m); |
| 343 |
| 344 // if (2 * sc <= sa) |
| 345 tmp1 = _mm_slli_epi32(sc, 1); // 2 * sc |
| 346 __m128i cmp1 = _mm_cmpgt_epi32(tmp1, sa); |
| 347 tmp1 = _mm_sub_epi32(tmp1, sa); // 2 * sc - sa |
| 348 tmp2 = _mm_sub_epi32(_mm_set1_epi32(256), m); // 256 - m |
| 349 tmp1 = Multiply32_SSE2(tmp1, tmp2); |
| 350 tmp1 = _mm_srai_epi32(tmp1, 8); |
| 351 tmp1 = _mm_add_epi32(sa, tmp1); |
| 352 tmp1 = Multiply32_SSE2(dc, tmp1); |
| 353 __m128i rc1 = _mm_andnot_si128(cmp1, tmp1); |
| 354 |
| 355 // else if (4 * dc <= da) |
| 356 tmp2 = _mm_slli_epi32(dc, 2); // dc * 4 |
| 357 __m128i cmp2 = _mm_cmpgt_epi32(tmp2, da); |
| 358 __m128i i = _mm_slli_epi32(m, 2); // 4 * m |
| 359 __m128i j = _mm_add_epi32(i, _mm_set1_epi32(256)); // 4 * m + 256 |
| 360 __m128i k = Multiply32_SSE2(i, j); // 4 * m * (4 * m + 256) |
| 361 __m128i t = _mm_sub_epi32(m, _mm_set1_epi32(256)); // m - 256 |
| 362 i = Multiply32_SSE2(k, t); // 4 * m * (4 * m + 256)
* (m - 256) |
| 363 i = _mm_srai_epi32(i, 16); // >> 16 |
| 364 j = Multiply32_SSE2(_mm_set1_epi32(7), m); // 7 * m |
| 365 tmp2 = _mm_add_epi32(i, j); |
| 366 i = Multiply32_SSE2(dc, sa); // dc * sa |
| 367 j = _mm_slli_epi32(sc, 1); // 2 * sc |
| 368 j = _mm_sub_epi32(j, sa); // 2 * sc - sa |
| 369 j = Multiply32_SSE2(da, j); // da * (2 * sc - sa) |
| 370 tmp2 = Multiply32_SSE2(j, tmp2); // * tmp |
| 371 tmp2 = _mm_srai_epi32(tmp2, 8); // >> 8 |
| 372 tmp2 = _mm_add_epi32(i, tmp2); |
| 373 cmp = _mm_andnot_si128(cmp2, cmp1); |
| 374 __m128i rc2 = _mm_and_si128(cmp, tmp2); |
| 375 __m128i rc = _mm_or_si128(rc1, rc2); |
| 376 |
| 377 // else |
| 378 tmp3 = sqrt_unit_byte_SSE2(m); |
| 379 tmp3 = _mm_sub_epi32(tmp3, m); |
| 380 tmp3 = Multiply32_SSE2(j, tmp3); // j = da * (2 * sc - sa) |
| 381 tmp3 = _mm_srai_epi32(tmp3, 8); |
| 382 tmp3 = _mm_add_epi32(i, tmp3); // i = dc * sa |
| 383 cmp = _mm_and_si128(cmp1, cmp2); |
| 384 __m128i rc3 = _mm_and_si128(cmp, tmp3); |
| 385 rc = _mm_or_si128(rc, rc3); |
| 386 |
| 387 tmp1 = _mm_sub_epi32(_mm_set1_epi32(255), da); // 255 - da |
| 388 tmp1 = _mm_mullo_epi16(sc, tmp1); |
| 389 tmp2 = _mm_sub_epi32(_mm_set1_epi32(255), sa); // 255 - sa |
| 390 tmp2 = _mm_mullo_epi16(dc, tmp2); |
| 391 rc = _mm_add_epi32(rc, tmp1); |
| 392 rc = _mm_add_epi32(rc, tmp2); |
| 393 return clamp_div255round_SSE2(rc); |
| 394 } |
| 395 |
| 396 static __m128i softlight_modeproc_SSE2(const __m128i& src, const __m128i& dst) { |
| 397 __m128i sa = SkGetPackedA32_SSE2(src); |
| 398 __m128i da = SkGetPackedA32_SSE2(dst); |
| 399 |
| 400 __m128i a = srcover_byte_SSE2(sa, da); |
| 401 __m128i r = softlight_byte_SSE2(SkGetPackedR32_SSE2(src), |
| 402 SkGetPackedR32_SSE2(dst), sa, da); |
| 403 __m128i g = softlight_byte_SSE2(SkGetPackedG32_SSE2(src), |
| 404 SkGetPackedG32_SSE2(dst), sa, da); |
| 405 __m128i b = softlight_byte_SSE2(SkGetPackedB32_SSE2(src), |
| 406 SkGetPackedB32_SSE2(dst), sa, da); |
| 407 return SkPackARGB32_SSE2(a, r, g, b); |
| 408 } |
327 | 409 |
328 static inline __m128i difference_byte_SSE2(const __m128i& sc, const __m128i& dc, | 410 static inline __m128i difference_byte_SSE2(const __m128i& sc, const __m128i& dc, |
329 const __m128i& sa, const __m128i& da)
{ | 411 const __m128i& sa, const __m128i& da)
{ |
330 __m128i tmp1 = _mm_mullo_epi16(sc, da); | 412 __m128i tmp1 = _mm_mullo_epi16(sc, da); |
331 __m128i tmp2 = _mm_mullo_epi16(dc, sa); | 413 __m128i tmp2 = _mm_mullo_epi16(dc, sa); |
332 __m128i tmp = SkMin32_SSE2(tmp1, tmp2); | 414 __m128i tmp = SkMin32_SSE2(tmp1, tmp2); |
333 | 415 |
334 __m128i ret1 = _mm_add_epi32(sc, dc); | 416 __m128i ret1 = _mm_add_epi32(sc, dc); |
335 __m128i ret2 = _mm_slli_epi32(SkDiv255Round_SSE2(tmp), 1); | 417 __m128i ret2 = _mm_slli_epi32(SkDiv255Round_SSE2(tmp), 1); |
336 __m128i ret = _mm_sub_epi32(ret1, ret2); | 418 __m128i ret = _mm_sub_epi32(ret1, ret2); |
337 | 419 |
338 ret = clamp_signed_byte_SSE2(ret); | 420 ret = clamp_signed_byte_SSE2(ret); |
339 return ret; | 421 return ret; |
340 } | 422 } |
341 | 423 |
342 static __m128i difference_modeproc_SSE2(const __m128i& src, | 424 static __m128i difference_modeproc_SSE2(const __m128i& src, |
343 const __m128i& dst) { | 425 const __m128i& dst) { |
344 __m128i sa = SkGetPackedA32_SSE2(src); | 426 __m128i sa = SkGetPackedA32_SSE2(src); |
345 __m128i da = SkGetPackedA32_SSE2(dst); | 427 __m128i da = SkGetPackedA32_SSE2(dst); |
346 | 428 |
347 __m128i a = srcover_byte_SSE2(sa, da); | 429 __m128i a = srcover_byte_SSE2(sa, da); |
348 __m128i r = difference_byte_SSE2(SkGetPackedR32_SSE2(src), | 430 __m128i r = difference_byte_SSE2(SkGetPackedR32_SSE2(src), |
349 SkGetPackedR32_SSE2(dst), sa, da); | 431 SkGetPackedR32_SSE2(dst), sa, da); |
350 __m128i g = difference_byte_SSE2(SkGetPackedG32_SSE2(src), | 432 __m128i g = difference_byte_SSE2(SkGetPackedG32_SSE2(src), |
351 SkGetPackedG32_SSE2(dst), sa, da); | 433 SkGetPackedG32_SSE2(dst), sa, da); |
352 __m128i b = difference_byte_SSE2(SkGetPackedB32_SSE2(src), | 434 __m128i b = difference_byte_SSE2(SkGetPackedB32_SSE2(src), |
353 SkGetPackedB32_SSE2(dst), sa, da); | 435 SkGetPackedB32_SSE2(dst), sa, da); |
354 return SkPackARGB32_SSE2(a, r, g, b); | 436 return SkPackARGB32_SSE2(a, r, g, b); |
355 } | 437 } |
| 438 |
356 static inline __m128i exclusion_byte_SSE2(const __m128i& sc, const __m128i& dc, | 439 static inline __m128i exclusion_byte_SSE2(const __m128i& sc, const __m128i& dc, |
357 const __m128i&, __m128i&) { | 440 const __m128i&, __m128i&) { |
358 __m128i tmp1 = _mm_mullo_epi16(_mm_set1_epi32(255), sc); // 255 * sc | 441 __m128i tmp1 = _mm_mullo_epi16(_mm_set1_epi32(255), sc); // 255 * sc |
359 __m128i tmp2 = _mm_mullo_epi16(_mm_set1_epi32(255), dc); // 255 * dc | 442 __m128i tmp2 = _mm_mullo_epi16(_mm_set1_epi32(255), dc); // 255 * dc |
360 tmp1 = _mm_add_epi32(tmp1, tmp2); | 443 tmp1 = _mm_add_epi32(tmp1, tmp2); |
361 tmp2 = _mm_mullo_epi16(sc, dc); // sc * dc | 444 tmp2 = _mm_mullo_epi16(sc, dc); // sc * dc |
362 tmp2 = _mm_slli_epi32(tmp2, 1); // 2 * sc * dc | 445 tmp2 = _mm_slli_epi32(tmp2, 1); // 2 * sc * dc |
363 | 446 |
364 __m128i r = _mm_sub_epi32(tmp1, tmp2); | 447 __m128i r = _mm_sub_epi32(tmp1, tmp2); |
365 return clamp_div255round_SSE2(r); | 448 return clamp_div255round_SSE2(r); |
(...skipping 167 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
533 plus_modeproc_SSE2, | 616 plus_modeproc_SSE2, |
534 modulate_modeproc_SSE2, | 617 modulate_modeproc_SSE2, |
535 screen_modeproc_SSE2, | 618 screen_modeproc_SSE2, |
536 | 619 |
537 overlay_modeproc_SSE2, | 620 overlay_modeproc_SSE2, |
538 NULL, // kDarken_Mode | 621 NULL, // kDarken_Mode |
539 NULL, // kLighten_Mode | 622 NULL, // kLighten_Mode |
540 NULL, // kColorDodge_Mode | 623 NULL, // kColorDodge_Mode |
541 NULL, // kColorBurn_Mode | 624 NULL, // kColorBurn_Mode |
542 hardlight_modeproc_SSE2, | 625 hardlight_modeproc_SSE2, |
543 NULL, // kSoftLight_Mode | 626 softlight_modeproc_SSE2, |
544 difference_modeproc_SSE2, | 627 difference_modeproc_SSE2, |
545 exclusion_modeproc_SSE2, | 628 exclusion_modeproc_SSE2, |
546 multiply_modeproc_SSE2, | 629 multiply_modeproc_SSE2, |
547 | 630 |
548 NULL, // kHue_Mode | 631 NULL, // kHue_Mode |
549 NULL, // kSaturation_Mode | 632 NULL, // kSaturation_Mode |
550 NULL, // kColor_Mode | 633 NULL, // kColor_Mode |
551 NULL, // kLuminosity_Mode | 634 NULL, // kLuminosity_Mode |
552 }; | 635 }; |
553 | 636 |
554 SkProcCoeffXfermode* SkPlatformXfermodeFactory_impl_SSE2(const ProcCoeff& rec, | 637 SkProcCoeffXfermode* SkPlatformXfermodeFactory_impl_SSE2(const ProcCoeff& rec, |
555 SkXfermode::Mode mode)
{ | 638 SkXfermode::Mode mode)
{ |
556 void* procSIMD = reinterpret_cast<void*>(gSSE2XfermodeProcs[mode]); | 639 void* procSIMD = reinterpret_cast<void*>(gSSE2XfermodeProcs[mode]); |
557 | 640 |
558 if (procSIMD != NULL) { | 641 if (procSIMD != NULL) { |
559 return SkNEW_ARGS(SkSSE2ProcCoeffXfermode, (rec, mode, procSIMD)); | 642 return SkNEW_ARGS(SkSSE2ProcCoeffXfermode, (rec, mode, procSIMD)); |
560 } | 643 } |
561 return NULL; | 644 return NULL; |
562 } | 645 } |
OLD | NEW |