OLD | NEW |
---|---|
1 #include "SkColorPriv.h" | 1 #include "SkColorPriv.h" |
2 #include "SkColor_opts_SSE2.h" | 2 #include "SkColor_opts_SSE2.h" |
3 #include "SkMathPriv.h" | 3 #include "SkMathPriv.h" |
4 #include "SkXfermode.h" | 4 #include "SkXfermode.h" |
5 #include "SkXfermode_opts_SSE2.h" | 5 #include "SkXfermode_opts_SSE2.h" |
6 #include "SkXfermode_proccoeff.h" | 6 #include "SkXfermode_proccoeff.h" |
7 | 7 |
8 //////////////////////////////////////////////////////////////////////////////// | 8 //////////////////////////////////////////////////////////////////////////////// |
9 // 4 pixels SSE2 version functions | 9 // 4 pixels SSE2 version functions |
10 //////////////////////////////////////////////////////////////////////////////// | 10 //////////////////////////////////////////////////////////////////////////////// |
11 | 11 |
12 static inline __m128i SkDiv255Round_SSE2(const __m128i& a) { | 12 static inline __m128i SkDiv255Round_SSE2(const __m128i& a) { |
13 __m128i prod = _mm_add_epi32(a, _mm_set1_epi32(128)); // prod += 128; | 13 __m128i prod = _mm_add_epi32(a, _mm_set1_epi32(128)); // prod += 128; |
14 prod = _mm_add_epi32(prod, _mm_srli_epi32(prod, 8)); // prod + (prod >> 8) | 14 prod = _mm_add_epi32(prod, _mm_srli_epi32(prod, 8)); // prod + (prod >> 8) |
15 prod = _mm_srli_epi32(prod, 8); // >> 8 | 15 prod = _mm_srli_epi32(prod, 8); // >> 8 |
16 | 16 |
17 return prod; | 17 return prod; |
18 } | 18 } |
19 | 19 |
20 static inline __m128i saturated_add_SSE2(const __m128i& a, const __m128i& b) { | |
21 __m128i sum = _mm_add_epi32(a, b); | |
22 __m128i cmp = _mm_cmpgt_epi32(sum, _mm_set1_epi32(255)); | |
23 | |
24 sum = _mm_or_si128(_mm_and_si128(cmp, _mm_set1_epi32(255)), | |
25 _mm_andnot_si128(cmp, sum)); | |
26 return sum; | |
27 } | |
28 | |
20 static inline __m128i clamp_div255round_SSE2(const __m128i& prod) { | 29 static inline __m128i clamp_div255round_SSE2(const __m128i& prod) { |
21 // test if > 0 | 30 // test if > 0 |
22 __m128i cmp1 = _mm_cmpgt_epi32(prod, _mm_setzero_si128()); | 31 __m128i cmp1 = _mm_cmpgt_epi32(prod, _mm_setzero_si128()); |
23 // test if < 255*255 | 32 // test if < 255*255 |
24 __m128i cmp2 = _mm_cmplt_epi32(prod, _mm_set1_epi32(255*255)); | 33 __m128i cmp2 = _mm_cmplt_epi32(prod, _mm_set1_epi32(255*255)); |
25 | 34 |
26 __m128i ret = _mm_setzero_si128(); | 35 __m128i ret = _mm_setzero_si128(); |
27 | 36 |
28 // if value >= 255*255, value = 255 | 37 // if value >= 255*255, value = 255 |
29 ret = _mm_andnot_si128(cmp2, _mm_set1_epi32(255)); | 38 ret = _mm_andnot_si128(cmp2, _mm_set1_epi32(255)); |
30 | 39 |
31 __m128i div = SkDiv255Round_SSE2(prod); | 40 __m128i div = SkDiv255Round_SSE2(prod); |
32 | 41 |
33 // test if > 0 && < 255*255 | 42 // test if > 0 && < 255*255 |
34 __m128i cmp = _mm_and_si128(cmp1, cmp2); | 43 __m128i cmp = _mm_and_si128(cmp1, cmp2); |
35 | 44 |
36 ret = _mm_or_si128(_mm_and_si128(cmp, div), _mm_andnot_si128(cmp, ret)); | 45 ret = _mm_or_si128(_mm_and_si128(cmp, div), _mm_andnot_si128(cmp, ret)); |
37 | 46 |
38 return ret; | 47 return ret; |
39 } | 48 } |
40 | 49 |
50 static __m128i srcover_modeproc_SSE2(const __m128i& src, const __m128i& dst) { | |
51 __m128i isa = _mm_sub_epi32(_mm_set1_epi32(256), SkGetPackedA32_SSE2(src)); | |
52 return _mm_add_epi32(src, SkAlphaMulQ_SSE2(dst, isa)); | |
53 } | |
54 | |
55 static __m128i dstover_modeproc_SSE2(const __m128i& src, const __m128i& dst) { | |
56 __m128i ida = _mm_sub_epi32(_mm_set1_epi32(256), SkGetPackedA32_SSE2(dst)); | |
57 return _mm_add_epi32(dst, SkAlphaMulQ_SSE2(src, ida)); | |
58 } | |
59 | |
60 static __m128i srcin_modeproc_SSE2(const __m128i& src, const __m128i& dst) { | |
61 __m128i da = SkGetPackedA32_SSE2(dst); | |
62 return SkAlphaMulQ_SSE2(src, SkAlpha255To256_SSE2(da)); | |
63 } | |
64 | |
65 static __m128i dstin_modeproc_SSE2(const __m128i& src, const __m128i& dst) { | |
66 __m128i sa = SkGetPackedA32_SSE2(src); | |
67 return SkAlphaMulQ_SSE2(dst, SkAlpha255To256_SSE2(sa)); | |
68 } | |
69 | |
70 static __m128i srcout_modeproc_SSE2(const __m128i& src, const __m128i& dst) { | |
71 __m128i ida = _mm_sub_epi32(_mm_set1_epi32(256), SkGetPackedA32_SSE2(dst)); | |
72 return SkAlphaMulQ_SSE2(src, ida); | |
73 } | |
74 | |
75 static __m128i dstout_modeproc_SSE2(const __m128i& src, const __m128i& dst) { | |
76 __m128i isa = _mm_sub_epi32(_mm_set1_epi32(256), SkGetPackedA32_SSE2(src)); | |
77 return SkAlphaMulQ_SSE2(dst, isa); | |
78 } | |
79 | |
80 static __m128i srcatop_modeproc_SSE2(const __m128i& src, const __m128i& dst) { | |
81 __m128i sa = SkGetPackedA32_SSE2(src); | |
82 __m128i da = SkGetPackedA32_SSE2(dst); | |
83 __m128i isa = _mm_sub_epi32(_mm_set1_epi32(255), sa); | |
84 | |
85 __m128i a = da; | |
86 | |
87 __m128i r1 = SkAlphaMulAlpha_SSE2(da, SkGetPackedR32_SSE2(src)); | |
88 __m128i r2 = SkAlphaMulAlpha_SSE2(isa, SkGetPackedR32_SSE2(dst)); | |
89 __m128i r = _mm_add_epi32(r1, r2); | |
90 | |
91 __m128i g1 = SkAlphaMulAlpha_SSE2(da, SkGetPackedG32_SSE2(src)); | |
92 __m128i g2 = SkAlphaMulAlpha_SSE2(isa, SkGetPackedG32_SSE2(dst)); | |
93 __m128i g = _mm_add_epi32(g1, g2); | |
94 | |
95 __m128i b1 = SkAlphaMulAlpha_SSE2(da, SkGetPackedB32_SSE2(src)); | |
96 __m128i b2 = SkAlphaMulAlpha_SSE2(isa, SkGetPackedB32_SSE2(dst)); | |
97 __m128i b = _mm_add_epi32(b1, b2); | |
98 | |
99 return SkPackARGB32_SSE2(a, r, g, b); | |
100 } | |
101 | |
102 static __m128i dstatop_modeproc_SSE2(const __m128i& src, const __m128i& dst) { | |
103 __m128i sa = SkGetPackedA32_SSE2(src); | |
104 __m128i da = SkGetPackedA32_SSE2(dst); | |
105 __m128i ida = _mm_sub_epi32(_mm_set1_epi32(255), da); | |
106 | |
107 __m128i a = sa; | |
108 | |
109 __m128i r1 = SkAlphaMulAlpha_SSE2(ida, SkGetPackedR32_SSE2(src)); | |
110 __m128i r2 = SkAlphaMulAlpha_SSE2(sa, SkGetPackedR32_SSE2(dst)); | |
111 __m128i r = _mm_add_epi32(r1, r2); | |
112 | |
113 __m128i g1 = SkAlphaMulAlpha_SSE2(ida, SkGetPackedG32_SSE2(src)); | |
114 __m128i g2 = SkAlphaMulAlpha_SSE2(sa, SkGetPackedG32_SSE2(dst)); | |
115 __m128i g = _mm_add_epi32(g1, g2); | |
116 | |
117 __m128i b1 = SkAlphaMulAlpha_SSE2(ida, SkGetPackedB32_SSE2(src)); | |
118 __m128i b2 = SkAlphaMulAlpha_SSE2(sa, SkGetPackedB32_SSE2(dst)); | |
119 __m128i b = _mm_add_epi32(b1, b2); | |
120 | |
121 return SkPackARGB32_SSE2(a, r, g, b); | |
122 } | |
123 | |
124 static __m128i xor_modeproc_SSE2(const __m128i& src, const __m128i& dst) { | |
125 __m128i sa = SkGetPackedA32_SSE2(src); | |
126 __m128i da = SkGetPackedA32_SSE2(dst); | |
127 __m128i isa = _mm_sub_epi32(_mm_set1_epi32(255), sa); | |
128 __m128i ida = _mm_sub_epi32(_mm_set1_epi32(255), da); | |
129 | |
130 __m128i a1 = _mm_add_epi32(sa, da); | |
131 __m128i a2 = SkAlphaMulAlpha_SSE2(sa, da); | |
132 a2 = _mm_slli_epi32(a2, 1); | |
133 __m128i a = _mm_sub_epi32(a1, a2); | |
134 | |
135 __m128i r1 = SkAlphaMulAlpha_SSE2(ida, SkGetPackedR32_SSE2(src)); | |
136 __m128i r2 = SkAlphaMulAlpha_SSE2(isa, SkGetPackedR32_SSE2(dst)); | |
137 __m128i r = _mm_add_epi32(r1, r2); | |
138 | |
139 __m128i g1 = SkAlphaMulAlpha_SSE2(ida, SkGetPackedG32_SSE2(src)); | |
140 __m128i g2 = SkAlphaMulAlpha_SSE2(isa, SkGetPackedG32_SSE2(dst)); | |
141 __m128i g = _mm_add_epi32(g1, g2); | |
142 | |
143 __m128i b1 = SkAlphaMulAlpha_SSE2(ida, SkGetPackedB32_SSE2(src)); | |
144 __m128i b2 = SkAlphaMulAlpha_SSE2(isa, SkGetPackedB32_SSE2(dst)); | |
145 __m128i b = _mm_add_epi32(b1, b2); | |
146 | |
147 return SkPackARGB32_SSE2(a, r, g, b); | |
148 } | |
149 | |
150 static __m128i plus_modeproc_SSE2(const __m128i& src, const __m128i& dst) { | |
151 __m128i b = saturated_add_SSE2(SkGetPackedB32_SSE2(src), | |
mtklein
2014/04/11 18:41:54
Any reason to do this one in b-g-r-a order instead
qiankun
2014/04/14 02:33:01
No special reason for the b-g-r-a order, I just fo
| |
152 SkGetPackedB32_SSE2(dst)); | |
153 __m128i g = saturated_add_SSE2(SkGetPackedG32_SSE2(src), | |
154 SkGetPackedG32_SSE2(dst)); | |
155 __m128i r = saturated_add_SSE2(SkGetPackedR32_SSE2(src), | |
156 SkGetPackedR32_SSE2(dst)); | |
157 __m128i a = saturated_add_SSE2(SkGetPackedA32_SSE2(src), | |
158 SkGetPackedA32_SSE2(dst)); | |
159 return SkPackARGB32_SSE2(a, r, g, b); | |
160 } | |
161 | |
162 static __m128i modulate_modeproc_SSE2(const __m128i& src, const __m128i& dst) { | |
163 __m128i a = SkAlphaMulAlpha_SSE2(SkGetPackedA32_SSE2(src), | |
164 SkGetPackedA32_SSE2(dst)); | |
165 __m128i r = SkAlphaMulAlpha_SSE2(SkGetPackedR32_SSE2(src), | |
166 SkGetPackedR32_SSE2(dst)); | |
167 __m128i g = SkAlphaMulAlpha_SSE2(SkGetPackedG32_SSE2(src), | |
168 SkGetPackedG32_SSE2(dst)); | |
169 __m128i b = SkAlphaMulAlpha_SSE2(SkGetPackedB32_SSE2(src), | |
170 SkGetPackedB32_SSE2(dst)); | |
171 return SkPackARGB32_SSE2(a, r, g, b); | |
172 } | |
173 | |
41 static inline __m128i srcover_byte_SSE2(const __m128i& a, const __m128i& b) { | 174 static inline __m128i srcover_byte_SSE2(const __m128i& a, const __m128i& b) { |
42 // a + b - SkAlphaMulAlpha(a, b); | 175 // a + b - SkAlphaMulAlpha(a, b); |
43 return _mm_sub_epi32(_mm_add_epi32(a, b), SkAlphaMulAlpha_SSE2(a, b)); | 176 return _mm_sub_epi32(_mm_add_epi32(a, b), SkAlphaMulAlpha_SSE2(a, b)); |
44 | 177 |
45 } | 178 } |
46 | 179 |
47 static inline __m128i blendfunc_multiply_byte_SSE2(const __m128i& sc, const __m1 28i& dc, | 180 static inline __m128i blendfunc_multiply_byte_SSE2(const __m128i& sc, const __m1 28i& dc, |
48 const __m128i& sa, const __m1 28i& da) { | 181 const __m128i& sa, const __m1 28i& da) { |
49 // sc * (255 - da) | 182 // sc * (255 - da) |
50 __m128i ret1 = _mm_sub_epi32(_mm_set1_epi32(255), da); | 183 __m128i ret1 = _mm_sub_epi32(_mm_set1_epi32(255), da); |
(...skipping 26 matching lines...) Expand all Loading... | |
77 __m128i g = blendfunc_multiply_byte_SSE2(sg, dg, sa, da); | 210 __m128i g = blendfunc_multiply_byte_SSE2(sg, dg, sa, da); |
78 | 211 |
79 | 212 |
80 __m128i sb = SkGetPackedB32_SSE2(src); | 213 __m128i sb = SkGetPackedB32_SSE2(src); |
81 __m128i db = SkGetPackedB32_SSE2(dst); | 214 __m128i db = SkGetPackedB32_SSE2(dst); |
82 __m128i b = blendfunc_multiply_byte_SSE2(sb, db, sa, da); | 215 __m128i b = blendfunc_multiply_byte_SSE2(sb, db, sa, da); |
83 | 216 |
84 return SkPackARGB32_SSE2(a, r, g, b); | 217 return SkPackARGB32_SSE2(a, r, g, b); |
85 } | 218 } |
86 | 219 |
220 static __m128i screen_modeproc_SSE2(const __m128i& src, const __m128i& dst) { | |
221 __m128i a = srcover_byte_SSE2(SkGetPackedA32_SSE2(src), | |
222 SkGetPackedA32_SSE2(dst)); | |
223 __m128i r = srcover_byte_SSE2(SkGetPackedR32_SSE2(src), | |
224 SkGetPackedR32_SSE2(dst)); | |
225 __m128i g = srcover_byte_SSE2(SkGetPackedG32_SSE2(src), | |
226 SkGetPackedG32_SSE2(dst)); | |
227 __m128i b = srcover_byte_SSE2(SkGetPackedB32_SSE2(src), | |
228 SkGetPackedB32_SSE2(dst)); | |
229 return SkPackARGB32_SSE2(a, r, g, b); | |
230 } | |
231 | |
87 //////////////////////////////////////////////////////////////////////////////// | 232 //////////////////////////////////////////////////////////////////////////////// |
88 | 233 |
89 typedef __m128i (*SkXfermodeProcSIMD)(const __m128i& src, const __m128i& dst); | 234 typedef __m128i (*SkXfermodeProcSIMD)(const __m128i& src, const __m128i& dst); |
90 | 235 |
91 extern SkXfermodeProcSIMD gSSE2XfermodeProcs[]; | 236 extern SkXfermodeProcSIMD gSSE2XfermodeProcs[]; |
92 | 237 |
93 SkSSE2ProcCoeffXfermode::SkSSE2ProcCoeffXfermode(SkReadBuffer& buffer) | 238 SkSSE2ProcCoeffXfermode::SkSSE2ProcCoeffXfermode(SkReadBuffer& buffer) |
94 : INHERITED(buffer) { | 239 : INHERITED(buffer) { |
95 fProcSIMD = reinterpret_cast<void*>(gSSE2XfermodeProcs[this->getMode()]); | 240 fProcSIMD = reinterpret_cast<void*>(gSSE2XfermodeProcs[this->getMode()]); |
96 } | 241 } |
(...skipping 122 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
219 } | 364 } |
220 #endif | 365 #endif |
221 | 366 |
222 //////////////////////////////////////////////////////////////////////////////// | 367 //////////////////////////////////////////////////////////////////////////////// |
223 | 368 |
224 // 4 pixels modeprocs with SSE2 | 369 // 4 pixels modeprocs with SSE2 |
225 SkXfermodeProcSIMD gSSE2XfermodeProcs[] = { | 370 SkXfermodeProcSIMD gSSE2XfermodeProcs[] = { |
226 NULL, // kClear_Mode | 371 NULL, // kClear_Mode |
227 NULL, // kSrc_Mode | 372 NULL, // kSrc_Mode |
228 NULL, // kDst_Mode | 373 NULL, // kDst_Mode |
229 NULL, // kSrcOver_Mode | 374 srcover_modeproc_SSE2, |
230 NULL, // kDstOver_Mode | 375 dstover_modeproc_SSE2, |
231 NULL, // kSrcIn_Mode | 376 srcin_modeproc_SSE2, |
232 NULL, // kDstIn_Mode | 377 dstin_modeproc_SSE2, |
233 NULL, // kSrcOut_Mode | 378 srcout_modeproc_SSE2, |
234 NULL, // kDstOut_Mode | 379 dstout_modeproc_SSE2, |
235 NULL, // kSrcATop_Mode | 380 srcatop_modeproc_SSE2, |
236 NULL, // kDstATop_Mode | 381 dstatop_modeproc_SSE2, |
237 NULL, // kXor_Mode | 382 xor_modeproc_SSE2, |
238 NULL, // kPlus_Mode | 383 plus_modeproc_SSE2, |
239 NULL, // kModulate_Mode | 384 modulate_modeproc_SSE2, |
240 NULL, // kScreen_Mode | 385 screen_modeproc_SSE2, |
241 | 386 |
242 NULL, // kOverlay_Mode | 387 NULL, // kOverlay_Mode |
243 NULL, // kDarken_Mode | 388 NULL, // kDarken_Mode |
244 NULL, // kLighten_Mode | 389 NULL, // kLighten_Mode |
245 NULL, // kColorDodge_Mode | 390 NULL, // kColorDodge_Mode |
246 NULL, // kColorBurn_Mode | 391 NULL, // kColorBurn_Mode |
247 NULL, // kHardLight_Mode | 392 NULL, // kHardLight_Mode |
248 NULL, // kSoftLight_Mode | 393 NULL, // kSoftLight_Mode |
249 NULL, // kDifference_Mode | 394 NULL, // kDifference_Mode |
250 NULL, // kExclusion_Mode | 395 NULL, // kExclusion_Mode |
251 multiply_modeproc_SSE2, | 396 multiply_modeproc_SSE2, |
252 | 397 |
253 NULL, // kHue_Mode | 398 NULL, // kHue_Mode |
254 NULL, // kSaturation_Mode | 399 NULL, // kSaturation_Mode |
255 NULL, // kColor_Mode | 400 NULL, // kColor_Mode |
256 NULL, // kLuminosity_Mode | 401 NULL, // kLuminosity_Mode |
257 }; | 402 }; |
258 | 403 |
259 SkProcCoeffXfermode* SkPlatformXfermodeFactory_impl_SSE2(const ProcCoeff& rec, | 404 SkProcCoeffXfermode* SkPlatformXfermodeFactory_impl_SSE2(const ProcCoeff& rec, |
260 SkXfermode::Mode mode) { | 405 SkXfermode::Mode mode) { |
261 void* procSIMD = reinterpret_cast<void*>(gSSE2XfermodeProcs[mode]); | 406 void* procSIMD = reinterpret_cast<void*>(gSSE2XfermodeProcs[mode]); |
262 | 407 |
263 if (procSIMD != NULL) { | 408 if (procSIMD != NULL) { |
264 return SkNEW_ARGS(SkSSE2ProcCoeffXfermode, (rec, mode, procSIMD)); | 409 return SkNEW_ARGS(SkSSE2ProcCoeffXfermode, (rec, mode, procSIMD)); |
265 } | 410 } |
266 return NULL; | 411 return NULL; |
267 } | 412 } |
OLD | NEW |