| OLD | NEW |
| 1 #include "SkColorPriv.h" | 1 #include "SkColorPriv.h" |
| 2 #include "SkColor_opts_SSE2.h" | 2 #include "SkColor_opts_SSE2.h" |
| 3 #include "SkMathPriv.h" | 3 #include "SkMathPriv.h" |
| 4 #include "SkXfermode.h" | 4 #include "SkXfermode.h" |
| 5 #include "SkXfermode_opts_SSE2.h" | 5 #include "SkXfermode_opts_SSE2.h" |
| 6 #include "SkXfermode_proccoeff.h" | 6 #include "SkXfermode_proccoeff.h" |
| 7 | 7 |
| 8 //////////////////////////////////////////////////////////////////////////////// | 8 //////////////////////////////////////////////////////////////////////////////// |
| 9 // 4 pixels SSE2 version functions | 9 // 4 pixels SSE2 version functions |
| 10 //////////////////////////////////////////////////////////////////////////////// | 10 //////////////////////////////////////////////////////////////////////////////// |
| 11 | 11 |
| 12 static inline __m128i SkDiv255Round_SSE2(const __m128i& a) { | 12 static inline __m128i SkDiv255Round_SSE2(const __m128i& a) { |
| 13 __m128i prod = _mm_add_epi32(a, _mm_set1_epi32(128)); // prod += 128; | 13 __m128i prod = _mm_add_epi32(a, _mm_set1_epi32(128)); // prod += 128; |
| 14 prod = _mm_add_epi32(prod, _mm_srli_epi32(prod, 8)); // prod + (prod >> 8) | 14 prod = _mm_add_epi32(prod, _mm_srli_epi32(prod, 8)); // prod + (prod >> 8) |
| 15 prod = _mm_srli_epi32(prod, 8); // >> 8 | 15 prod = _mm_srli_epi32(prod, 8); // >> 8 |
| 16 | 16 |
| 17 return prod; | 17 return prod; |
| 18 } | 18 } |
| 19 | 19 |
| 20 static inline __m128i saturated_add_SSE2(const __m128i& a, const __m128i& b) { | 20 static inline __m128i saturated_add_SSE2(const __m128i& a, const __m128i& b) { |
| 21 __m128i sum = _mm_add_epi32(a, b); | 21 __m128i sum = _mm_add_epi32(a, b); |
| 22 __m128i cmp = _mm_cmpgt_epi32(sum, _mm_set1_epi32(255)); | 22 __m128i cmp = _mm_cmpgt_epi32(sum, _mm_set1_epi32(255)); |
| 23 | 23 |
| 24 sum = _mm_or_si128(_mm_and_si128(cmp, _mm_set1_epi32(255)), | 24 sum = _mm_or_si128(_mm_and_si128(cmp, _mm_set1_epi32(255)), |
| 25 _mm_andnot_si128(cmp, sum)); | 25 _mm_andnot_si128(cmp, sum)); |
| 26 return sum; | 26 return sum; |
| 27 } | 27 } |
| 28 | 28 |
| 29 static inline __m128i clamp_signed_byte_SSE2(const __m128i& n) { |
| 30 __m128i cmp1 = _mm_cmplt_epi32(n, _mm_setzero_si128()); |
| 31 __m128i cmp2 = _mm_cmpgt_epi32(n, _mm_set1_epi32(255)); |
| 32 __m128i ret = _mm_and_si128(cmp2, _mm_set1_epi32(255)); |
| 33 |
| 34 __m128i cmp = _mm_or_si128(cmp1, cmp2); |
| 35 ret = _mm_or_si128(_mm_and_si128(cmp, ret), _mm_andnot_si128(cmp, n)); |
| 36 |
| 37 return ret; |
| 38 } |
| 39 |
| 29 static inline __m128i clamp_div255round_SSE2(const __m128i& prod) { | 40 static inline __m128i clamp_div255round_SSE2(const __m128i& prod) { |
| 30 // test if > 0 | 41 // test if > 0 |
| 31 __m128i cmp1 = _mm_cmpgt_epi32(prod, _mm_setzero_si128()); | 42 __m128i cmp1 = _mm_cmpgt_epi32(prod, _mm_setzero_si128()); |
| 32 // test if < 255*255 | 43 // test if < 255*255 |
| 33 __m128i cmp2 = _mm_cmplt_epi32(prod, _mm_set1_epi32(255*255)); | 44 __m128i cmp2 = _mm_cmplt_epi32(prod, _mm_set1_epi32(255*255)); |
| 34 | 45 |
| 35 __m128i ret = _mm_setzero_si128(); | 46 __m128i ret = _mm_setzero_si128(); |
| 36 | 47 |
| 37 // if value >= 255*255, value = 255 | 48 // if value >= 255*255, value = 255 |
| 38 ret = _mm_andnot_si128(cmp2, _mm_set1_epi32(255)); | 49 ret = _mm_andnot_si128(cmp2, _mm_set1_epi32(255)); |
| (...skipping 125 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 164 SkGetPackedA32_SSE2(dst)); | 175 SkGetPackedA32_SSE2(dst)); |
| 165 __m128i r = SkAlphaMulAlpha_SSE2(SkGetPackedR32_SSE2(src), | 176 __m128i r = SkAlphaMulAlpha_SSE2(SkGetPackedR32_SSE2(src), |
| 166 SkGetPackedR32_SSE2(dst)); | 177 SkGetPackedR32_SSE2(dst)); |
| 167 __m128i g = SkAlphaMulAlpha_SSE2(SkGetPackedG32_SSE2(src), | 178 __m128i g = SkAlphaMulAlpha_SSE2(SkGetPackedG32_SSE2(src), |
| 168 SkGetPackedG32_SSE2(dst)); | 179 SkGetPackedG32_SSE2(dst)); |
| 169 __m128i b = SkAlphaMulAlpha_SSE2(SkGetPackedB32_SSE2(src), | 180 __m128i b = SkAlphaMulAlpha_SSE2(SkGetPackedB32_SSE2(src), |
| 170 SkGetPackedB32_SSE2(dst)); | 181 SkGetPackedB32_SSE2(dst)); |
| 171 return SkPackARGB32_SSE2(a, r, g, b); | 182 return SkPackARGB32_SSE2(a, r, g, b); |
| 172 } | 183 } |
| 173 | 184 |
| 185 static inline __m128i SkMin32_SSE2(const __m128i& a, const __m128i& b) { |
| 186 __m128i cmp = _mm_cmplt_epi32(a, b); |
| 187 return _mm_or_si128(_mm_and_si128(cmp, a), _mm_andnot_si128(cmp, b)); |
| 188 } |
| 189 |
| 174 static inline __m128i srcover_byte_SSE2(const __m128i& a, const __m128i& b) { | 190 static inline __m128i srcover_byte_SSE2(const __m128i& a, const __m128i& b) { |
| 175 // a + b - SkAlphaMulAlpha(a, b); | 191 // a + b - SkAlphaMulAlpha(a, b); |
| 176 return _mm_sub_epi32(_mm_add_epi32(a, b), SkAlphaMulAlpha_SSE2(a, b)); | 192 return _mm_sub_epi32(_mm_add_epi32(a, b), SkAlphaMulAlpha_SSE2(a, b)); |
| 177 | 193 |
| 178 } | 194 } |
| 179 | 195 |
| 180 static inline __m128i blendfunc_multiply_byte_SSE2(const __m128i& sc, const __m1
28i& dc, | 196 static inline __m128i blendfunc_multiply_byte_SSE2(const __m128i& sc, const __m1
28i& dc, |
| 181 const __m128i& sa, const __m1
28i& da) { | 197 const __m128i& sa, const __m1
28i& da) { |
| 182 // sc * (255 - da) | 198 // sc * (255 - da) |
| 183 __m128i ret1 = _mm_sub_epi32(_mm_set1_epi32(255), da); | 199 __m128i ret1 = _mm_sub_epi32(_mm_set1_epi32(255), da); |
| (...skipping 117 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 301 __m128i a = srcover_byte_SSE2(sa, da); | 317 __m128i a = srcover_byte_SSE2(sa, da); |
| 302 __m128i r = hardlight_byte_SSE2(SkGetPackedR32_SSE2(src), | 318 __m128i r = hardlight_byte_SSE2(SkGetPackedR32_SSE2(src), |
| 303 SkGetPackedR32_SSE2(dst), sa, da); | 319 SkGetPackedR32_SSE2(dst), sa, da); |
| 304 __m128i g = hardlight_byte_SSE2(SkGetPackedG32_SSE2(src), | 320 __m128i g = hardlight_byte_SSE2(SkGetPackedG32_SSE2(src), |
| 305 SkGetPackedG32_SSE2(dst), sa, da); | 321 SkGetPackedG32_SSE2(dst), sa, da); |
| 306 __m128i b = hardlight_byte_SSE2(SkGetPackedB32_SSE2(src), | 322 __m128i b = hardlight_byte_SSE2(SkGetPackedB32_SSE2(src), |
| 307 SkGetPackedB32_SSE2(dst), sa, da); | 323 SkGetPackedB32_SSE2(dst), sa, da); |
| 308 return SkPackARGB32_SSE2(a, r, g, b); | 324 return SkPackARGB32_SSE2(a, r, g, b); |
| 309 } | 325 } |
| 310 | 326 |
| 327 |
| 328 static inline __m128i difference_byte_SSE2(const __m128i& sc, const __m128i& dc, |
| 329 const __m128i& sa, const __m128i& da)
{ |
| 330 __m128i tmp1 = _mm_mullo_epi16(sc, da); |
| 331 __m128i tmp2 = _mm_mullo_epi16(dc, sa); |
| 332 __m128i tmp = SkMin32_SSE2(tmp1, tmp2); |
| 333 |
| 334 __m128i ret1 = _mm_add_epi32(sc, dc); |
| 335 __m128i ret2 = _mm_slli_epi32(SkDiv255Round_SSE2(tmp), 1); |
| 336 __m128i ret = _mm_sub_epi32(ret1, ret2); |
| 337 |
| 338 ret = clamp_signed_byte_SSE2(ret); |
| 339 return ret; |
| 340 } |
| 341 |
| 342 static __m128i difference_modeproc_SSE2(const __m128i& src, |
| 343 const __m128i& dst) { |
| 344 __m128i sa = SkGetPackedA32_SSE2(src); |
| 345 __m128i da = SkGetPackedA32_SSE2(dst); |
| 346 |
| 347 __m128i a = srcover_byte_SSE2(sa, da); |
| 348 __m128i r = difference_byte_SSE2(SkGetPackedR32_SSE2(src), |
| 349 SkGetPackedR32_SSE2(dst), sa, da); |
| 350 __m128i g = difference_byte_SSE2(SkGetPackedG32_SSE2(src), |
| 351 SkGetPackedG32_SSE2(dst), sa, da); |
| 352 __m128i b = difference_byte_SSE2(SkGetPackedB32_SSE2(src), |
| 353 SkGetPackedB32_SSE2(dst), sa, da); |
| 354 return SkPackARGB32_SSE2(a, r, g, b); |
| 355 } |
| 311 static inline __m128i exclusion_byte_SSE2(const __m128i& sc, const __m128i& dc, | 356 static inline __m128i exclusion_byte_SSE2(const __m128i& sc, const __m128i& dc, |
| 312 const __m128i&, __m128i&) { | 357 const __m128i&, __m128i&) { |
| 313 __m128i tmp1 = _mm_mullo_epi16(_mm_set1_epi32(255), sc); // 255 * sc | 358 __m128i tmp1 = _mm_mullo_epi16(_mm_set1_epi32(255), sc); // 255 * sc |
| 314 __m128i tmp2 = _mm_mullo_epi16(_mm_set1_epi32(255), dc); // 255 * dc | 359 __m128i tmp2 = _mm_mullo_epi16(_mm_set1_epi32(255), dc); // 255 * dc |
| 315 tmp1 = _mm_add_epi32(tmp1, tmp2); | 360 tmp1 = _mm_add_epi32(tmp1, tmp2); |
| 316 tmp2 = _mm_mullo_epi16(sc, dc); // sc * dc | 361 tmp2 = _mm_mullo_epi16(sc, dc); // sc * dc |
| 317 tmp2 = _mm_slli_epi32(tmp2, 1); // 2 * sc * dc | 362 tmp2 = _mm_slli_epi32(tmp2, 1); // 2 * sc * dc |
| 318 | 363 |
| 319 __m128i r = _mm_sub_epi32(tmp1, tmp2); | 364 __m128i r = _mm_sub_epi32(tmp1, tmp2); |
| 320 return clamp_div255round_SSE2(r); | 365 return clamp_div255round_SSE2(r); |
| (...skipping 168 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 489 modulate_modeproc_SSE2, | 534 modulate_modeproc_SSE2, |
| 490 screen_modeproc_SSE2, | 535 screen_modeproc_SSE2, |
| 491 | 536 |
| 492 overlay_modeproc_SSE2, | 537 overlay_modeproc_SSE2, |
| 493 NULL, // kDarken_Mode | 538 NULL, // kDarken_Mode |
| 494 NULL, // kLighten_Mode | 539 NULL, // kLighten_Mode |
| 495 NULL, // kColorDodge_Mode | 540 NULL, // kColorDodge_Mode |
| 496 NULL, // kColorBurn_Mode | 541 NULL, // kColorBurn_Mode |
| 497 hardlight_modeproc_SSE2, | 542 hardlight_modeproc_SSE2, |
| 498 NULL, // kSoftLight_Mode | 543 NULL, // kSoftLight_Mode |
| 499 NULL, // kDifference_Mode | 544 difference_modeproc_SSE2, |
| 500 exclusion_modeproc_SSE2, | 545 exclusion_modeproc_SSE2, |
| 501 multiply_modeproc_SSE2, | 546 multiply_modeproc_SSE2, |
| 502 | 547 |
| 503 NULL, // kHue_Mode | 548 NULL, // kHue_Mode |
| 504 NULL, // kSaturation_Mode | 549 NULL, // kSaturation_Mode |
| 505 NULL, // kColor_Mode | 550 NULL, // kColor_Mode |
| 506 NULL, // kLuminosity_Mode | 551 NULL, // kLuminosity_Mode |
| 507 }; | 552 }; |
| 508 | 553 |
| 509 SkProcCoeffXfermode* SkPlatformXfermodeFactory_impl_SSE2(const ProcCoeff& rec, | 554 SkProcCoeffXfermode* SkPlatformXfermodeFactory_impl_SSE2(const ProcCoeff& rec, |
| 510 SkXfermode::Mode mode)
{ | 555 SkXfermode::Mode mode)
{ |
| 511 void* procSIMD = reinterpret_cast<void*>(gSSE2XfermodeProcs[mode]); | 556 void* procSIMD = reinterpret_cast<void*>(gSSE2XfermodeProcs[mode]); |
| 512 | 557 |
| 513 if (procSIMD != NULL) { | 558 if (procSIMD != NULL) { |
| 514 return SkNEW_ARGS(SkSSE2ProcCoeffXfermode, (rec, mode, procSIMD)); | 559 return SkNEW_ARGS(SkSSE2ProcCoeffXfermode, (rec, mode, procSIMD)); |
| 515 } | 560 } |
| 516 return NULL; | 561 return NULL; |
| 517 } | 562 } |
| OLD | NEW |