OLD | NEW |
1 #include "SkColorPriv.h" | 1 #include "SkColorPriv.h" |
2 #include "SkColor_opts_SSE2.h" | 2 #include "SkColor_opts_SSE2.h" |
3 #include "SkMathPriv.h" | 3 #include "SkMathPriv.h" |
4 #include "SkXfermode.h" | 4 #include "SkXfermode.h" |
5 #include "SkXfermode_opts_SSE2.h" | 5 #include "SkXfermode_opts_SSE2.h" |
6 #include "SkXfermode_proccoeff.h" | 6 #include "SkXfermode_proccoeff.h" |
7 | 7 |
8 //////////////////////////////////////////////////////////////////////////////// | 8 //////////////////////////////////////////////////////////////////////////////// |
9 // 4 pixels SSE2 version functions | 9 // 4 pixels SSE2 version functions |
10 //////////////////////////////////////////////////////////////////////////////// | 10 //////////////////////////////////////////////////////////////////////////////// |
11 | 11 |
12 static inline __m128i SkDiv255Round_SSE2(const __m128i& a) { | 12 static inline __m128i SkDiv255Round_SSE2(const __m128i& a) { |
13 __m128i prod = _mm_add_epi32(a, _mm_set1_epi32(128)); // prod += 128; | 13 __m128i prod = _mm_add_epi32(a, _mm_set1_epi32(128)); // prod += 128; |
14 prod = _mm_add_epi32(prod, _mm_srli_epi32(prod, 8)); // prod + (prod >> 8) | 14 prod = _mm_add_epi32(prod, _mm_srli_epi32(prod, 8)); // prod + (prod >> 8) |
15 prod = _mm_srli_epi32(prod, 8); // >> 8 | 15 prod = _mm_srli_epi32(prod, 8); // >> 8 |
16 | 16 |
17 return prod; | 17 return prod; |
18 } | 18 } |
19 | 19 |
20 static inline __m128i saturated_add_SSE2(const __m128i& a, const __m128i& b) { | 20 static inline __m128i saturated_add_SSE2(const __m128i& a, const __m128i& b) { |
21 __m128i sum = _mm_add_epi32(a, b); | 21 __m128i sum = _mm_add_epi32(a, b); |
22 __m128i cmp = _mm_cmpgt_epi32(sum, _mm_set1_epi32(255)); | 22 __m128i cmp = _mm_cmpgt_epi32(sum, _mm_set1_epi32(255)); |
23 | 23 |
24 sum = _mm_or_si128(_mm_and_si128(cmp, _mm_set1_epi32(255)), | 24 sum = _mm_or_si128(_mm_and_si128(cmp, _mm_set1_epi32(255)), |
25 _mm_andnot_si128(cmp, sum)); | 25 _mm_andnot_si128(cmp, sum)); |
26 return sum; | 26 return sum; |
27 } | 27 } |
28 | 28 |
| 29 static inline __m128i clamp_signed_byte_SSE2(const __m128i& n) { |
| 30 __m128i cmp1 = _mm_cmplt_epi32(n, _mm_setzero_si128()); |
| 31 __m128i cmp2 = _mm_cmpgt_epi32(n, _mm_set1_epi32(255)); |
| 32 __m128i ret = _mm_and_si128(cmp2, _mm_set1_epi32(255)); |
| 33 |
| 34 __m128i cmp = _mm_or_si128(cmp1, cmp2); |
| 35 ret = _mm_or_si128(_mm_and_si128(cmp, ret), _mm_andnot_si128(cmp, n)); |
| 36 |
| 37 return ret; |
| 38 } |
| 39 |
29 static inline __m128i clamp_div255round_SSE2(const __m128i& prod) { | 40 static inline __m128i clamp_div255round_SSE2(const __m128i& prod) { |
30 // test if > 0 | 41 // test if > 0 |
31 __m128i cmp1 = _mm_cmpgt_epi32(prod, _mm_setzero_si128()); | 42 __m128i cmp1 = _mm_cmpgt_epi32(prod, _mm_setzero_si128()); |
32 // test if < 255*255 | 43 // test if < 255*255 |
33 __m128i cmp2 = _mm_cmplt_epi32(prod, _mm_set1_epi32(255*255)); | 44 __m128i cmp2 = _mm_cmplt_epi32(prod, _mm_set1_epi32(255*255)); |
34 | 45 |
35 __m128i ret = _mm_setzero_si128(); | 46 __m128i ret = _mm_setzero_si128(); |
36 | 47 |
37 // if value >= 255*255, value = 255 | 48 // if value >= 255*255, value = 255 |
38 ret = _mm_andnot_si128(cmp2, _mm_set1_epi32(255)); | 49 ret = _mm_andnot_si128(cmp2, _mm_set1_epi32(255)); |
(...skipping 125 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
164 SkGetPackedA32_SSE2(dst)); | 175 SkGetPackedA32_SSE2(dst)); |
165 __m128i r = SkAlphaMulAlpha_SSE2(SkGetPackedR32_SSE2(src), | 176 __m128i r = SkAlphaMulAlpha_SSE2(SkGetPackedR32_SSE2(src), |
166 SkGetPackedR32_SSE2(dst)); | 177 SkGetPackedR32_SSE2(dst)); |
167 __m128i g = SkAlphaMulAlpha_SSE2(SkGetPackedG32_SSE2(src), | 178 __m128i g = SkAlphaMulAlpha_SSE2(SkGetPackedG32_SSE2(src), |
168 SkGetPackedG32_SSE2(dst)); | 179 SkGetPackedG32_SSE2(dst)); |
169 __m128i b = SkAlphaMulAlpha_SSE2(SkGetPackedB32_SSE2(src), | 180 __m128i b = SkAlphaMulAlpha_SSE2(SkGetPackedB32_SSE2(src), |
170 SkGetPackedB32_SSE2(dst)); | 181 SkGetPackedB32_SSE2(dst)); |
171 return SkPackARGB32_SSE2(a, r, g, b); | 182 return SkPackARGB32_SSE2(a, r, g, b); |
172 } | 183 } |
173 | 184 |
| 185 static inline __m128i SkMin32_SSE2(const __m128i& a, const __m128i& b) { |
| 186 __m128i cmp = _mm_cmplt_epi32(a, b); |
| 187 return _mm_or_si128(_mm_and_si128(cmp, a), _mm_andnot_si128(cmp, b)); |
| 188 } |
| 189 |
174 static inline __m128i srcover_byte_SSE2(const __m128i& a, const __m128i& b) { | 190 static inline __m128i srcover_byte_SSE2(const __m128i& a, const __m128i& b) { |
175 // a + b - SkAlphaMulAlpha(a, b); | 191 // a + b - SkAlphaMulAlpha(a, b); |
176 return _mm_sub_epi32(_mm_add_epi32(a, b), SkAlphaMulAlpha_SSE2(a, b)); | 192 return _mm_sub_epi32(_mm_add_epi32(a, b), SkAlphaMulAlpha_SSE2(a, b)); |
177 | 193 |
178 } | 194 } |
179 | 195 |
180 static inline __m128i blendfunc_multiply_byte_SSE2(const __m128i& sc, const __m1
28i& dc, | 196 static inline __m128i blendfunc_multiply_byte_SSE2(const __m128i& sc, const __m1
28i& dc, |
181 const __m128i& sa, const __m1
28i& da) { | 197 const __m128i& sa, const __m1
28i& da) { |
182 // sc * (255 - da) | 198 // sc * (255 - da) |
183 __m128i ret1 = _mm_sub_epi32(_mm_set1_epi32(255), da); | 199 __m128i ret1 = _mm_sub_epi32(_mm_set1_epi32(255), da); |
(...skipping 117 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
301 __m128i a = srcover_byte_SSE2(sa, da); | 317 __m128i a = srcover_byte_SSE2(sa, da); |
302 __m128i r = hardlight_byte_SSE2(SkGetPackedR32_SSE2(src), | 318 __m128i r = hardlight_byte_SSE2(SkGetPackedR32_SSE2(src), |
303 SkGetPackedR32_SSE2(dst), sa, da); | 319 SkGetPackedR32_SSE2(dst), sa, da); |
304 __m128i g = hardlight_byte_SSE2(SkGetPackedG32_SSE2(src), | 320 __m128i g = hardlight_byte_SSE2(SkGetPackedG32_SSE2(src), |
305 SkGetPackedG32_SSE2(dst), sa, da); | 321 SkGetPackedG32_SSE2(dst), sa, da); |
306 __m128i b = hardlight_byte_SSE2(SkGetPackedB32_SSE2(src), | 322 __m128i b = hardlight_byte_SSE2(SkGetPackedB32_SSE2(src), |
307 SkGetPackedB32_SSE2(dst), sa, da); | 323 SkGetPackedB32_SSE2(dst), sa, da); |
308 return SkPackARGB32_SSE2(a, r, g, b); | 324 return SkPackARGB32_SSE2(a, r, g, b); |
309 } | 325 } |
310 | 326 |
| 327 |
| 328 static inline __m128i difference_byte_SSE2(const __m128i& sc, const __m128i& dc, |
| 329 const __m128i& sa, const __m128i& da)
{ |
| 330 __m128i tmp1 = _mm_mullo_epi16(sc, da); |
| 331 __m128i tmp2 = _mm_mullo_epi16(dc, sa); |
| 332 __m128i tmp = SkMin32_SSE2(tmp1, tmp2); |
| 333 |
| 334 __m128i ret1 = _mm_add_epi32(sc, dc); |
| 335 __m128i ret2 = _mm_slli_epi32(SkDiv255Round_SSE2(tmp), 1); |
| 336 __m128i ret = _mm_sub_epi32(ret1, ret2); |
| 337 |
| 338 ret = clamp_signed_byte_SSE2(ret); |
| 339 return ret; |
| 340 } |
| 341 |
| 342 static __m128i difference_modeproc_SSE2(const __m128i& src, |
| 343 const __m128i& dst) { |
| 344 __m128i sa = SkGetPackedA32_SSE2(src); |
| 345 __m128i da = SkGetPackedA32_SSE2(dst); |
| 346 |
| 347 __m128i a = srcover_byte_SSE2(sa, da); |
| 348 __m128i r = difference_byte_SSE2(SkGetPackedR32_SSE2(src), |
| 349 SkGetPackedR32_SSE2(dst), sa, da); |
| 350 __m128i g = difference_byte_SSE2(SkGetPackedG32_SSE2(src), |
| 351 SkGetPackedG32_SSE2(dst), sa, da); |
| 352 __m128i b = difference_byte_SSE2(SkGetPackedB32_SSE2(src), |
| 353 SkGetPackedB32_SSE2(dst), sa, da); |
| 354 return SkPackARGB32_SSE2(a, r, g, b); |
| 355 } |
311 static inline __m128i exclusion_byte_SSE2(const __m128i& sc, const __m128i& dc, | 356 static inline __m128i exclusion_byte_SSE2(const __m128i& sc, const __m128i& dc, |
312 const __m128i&, __m128i&) { | 357 const __m128i&, __m128i&) { |
313 __m128i tmp1 = _mm_mullo_epi16(_mm_set1_epi32(255), sc); // 255 * sc | 358 __m128i tmp1 = _mm_mullo_epi16(_mm_set1_epi32(255), sc); // 255 * sc |
314 __m128i tmp2 = _mm_mullo_epi16(_mm_set1_epi32(255), dc); // 255 * dc | 359 __m128i tmp2 = _mm_mullo_epi16(_mm_set1_epi32(255), dc); // 255 * dc |
315 tmp1 = _mm_add_epi32(tmp1, tmp2); | 360 tmp1 = _mm_add_epi32(tmp1, tmp2); |
316 tmp2 = _mm_mullo_epi16(sc, dc); // sc * dc | 361 tmp2 = _mm_mullo_epi16(sc, dc); // sc * dc |
317 tmp2 = _mm_slli_epi32(tmp2, 1); // 2 * sc * dc | 362 tmp2 = _mm_slli_epi32(tmp2, 1); // 2 * sc * dc |
318 | 363 |
319 __m128i r = _mm_sub_epi32(tmp1, tmp2); | 364 __m128i r = _mm_sub_epi32(tmp1, tmp2); |
320 return clamp_div255round_SSE2(r); | 365 return clamp_div255round_SSE2(r); |
(...skipping 168 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
489 modulate_modeproc_SSE2, | 534 modulate_modeproc_SSE2, |
490 screen_modeproc_SSE2, | 535 screen_modeproc_SSE2, |
491 | 536 |
492 overlay_modeproc_SSE2, | 537 overlay_modeproc_SSE2, |
493 NULL, // kDarken_Mode | 538 NULL, // kDarken_Mode |
494 NULL, // kLighten_Mode | 539 NULL, // kLighten_Mode |
495 NULL, // kColorDodge_Mode | 540 NULL, // kColorDodge_Mode |
496 NULL, // kColorBurn_Mode | 541 NULL, // kColorBurn_Mode |
497 hardlight_modeproc_SSE2, | 542 hardlight_modeproc_SSE2, |
498 NULL, // kSoftLight_Mode | 543 NULL, // kSoftLight_Mode |
499 NULL, // kDifference_Mode | 544 difference_modeproc_SSE2, |
500 exclusion_modeproc_SSE2, | 545 exclusion_modeproc_SSE2, |
501 multiply_modeproc_SSE2, | 546 multiply_modeproc_SSE2, |
502 | 547 |
503 NULL, // kHue_Mode | 548 NULL, // kHue_Mode |
504 NULL, // kSaturation_Mode | 549 NULL, // kSaturation_Mode |
505 NULL, // kColor_Mode | 550 NULL, // kColor_Mode |
506 NULL, // kLuminosity_Mode | 551 NULL, // kLuminosity_Mode |
507 }; | 552 }; |
508 | 553 |
509 SkProcCoeffXfermode* SkPlatformXfermodeFactory_impl_SSE2(const ProcCoeff& rec, | 554 SkProcCoeffXfermode* SkPlatformXfermodeFactory_impl_SSE2(const ProcCoeff& rec, |
510 SkXfermode::Mode mode)
{ | 555 SkXfermode::Mode mode)
{ |
511 void* procSIMD = reinterpret_cast<void*>(gSSE2XfermodeProcs[mode]); | 556 void* procSIMD = reinterpret_cast<void*>(gSSE2XfermodeProcs[mode]); |
512 | 557 |
513 if (procSIMD != NULL) { | 558 if (procSIMD != NULL) { |
514 return SkNEW_ARGS(SkSSE2ProcCoeffXfermode, (rec, mode, procSIMD)); | 559 return SkNEW_ARGS(SkSSE2ProcCoeffXfermode, (rec, mode, procSIMD)); |
515 } | 560 } |
516 return NULL; | 561 return NULL; |
517 } | 562 } |
OLD | NEW |