OLD | NEW |
---|---|
1 #include "SkColorPriv.h" | 1 #include "SkColorPriv.h" |
2 #include "SkColor_opts_SSE2.h" | 2 #include "SkColor_opts_SSE2.h" |
3 #include "SkMathPriv.h" | 3 #include "SkMathPriv.h" |
4 #include "SkXfermode.h" | 4 #include "SkXfermode.h" |
5 #include "SkXfermode_opts_SSE2.h" | 5 #include "SkXfermode_opts_SSE2.h" |
6 #include "SkXfermode_proccoeff.h" | 6 #include "SkXfermode_proccoeff.h" |
7 | 7 |
8 //////////////////////////////////////////////////////////////////////////////// | 8 //////////////////////////////////////////////////////////////////////////////// |
9 // 4 pixels SSE2 version functions | 9 // 4 pixels SSE2 version functions |
10 //////////////////////////////////////////////////////////////////////////////// | 10 //////////////////////////////////////////////////////////////////////////////// |
(...skipping 66 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
77 __m128i g = blendfunc_multiply_byte_SSE2(sg, dg, sa, da); | 77 __m128i g = blendfunc_multiply_byte_SSE2(sg, dg, sa, da); |
78 | 78 |
79 | 79 |
80 __m128i sb = SkGetPackedB32_SSE2(src); | 80 __m128i sb = SkGetPackedB32_SSE2(src); |
81 __m128i db = SkGetPackedB32_SSE2(dst); | 81 __m128i db = SkGetPackedB32_SSE2(dst); |
82 __m128i b = blendfunc_multiply_byte_SSE2(sb, db, sa, da); | 82 __m128i b = blendfunc_multiply_byte_SSE2(sb, db, sa, da); |
83 | 83 |
84 return SkPackARGB32_SSE2(a, r, g, b); | 84 return SkPackARGB32_SSE2(a, r, g, b); |
85 } | 85 } |
86 | 86 |
87 static inline __m128i SkMin32_SSE2(const __m128i& a, const __m128i& b) { | |
88 __m128i cmp = _mm_cmplt_epi32(a, b); | |
89 return _mm_or_si128(_mm_and_si128(cmp, a), _mm_andnot_si128(cmp, b)); | |
90 } | |
91 | |
92 static inline __m128i Multiply32_SSE2(const __m128i& a, const __m128i& b) { | |
93 __m128i r1 = _mm_mul_epu32(a, b); | |
94 __m128i r2 = _mm_mul_epu32(_mm_srli_si128(a, 4), _mm_srli_si128(b, 4)); | |
95 __m128i r = _mm_unpacklo_epi32(_mm_shuffle_epi32(r1, _MM_SHUFFLE(0,0,2,0)), | |
96 _mm_shuffle_epi32(r2, _MM_SHUFFLE(0,0,2,0))); | |
97 return r; | |
98 } | |
99 | |
100 static inline __m128i colordodge_byte_SSE2(const __m128i& sc, const __m128i& dc, | |
101 const __m128i& sa, const __m128i& da) { | |
102 __m128i diff = _mm_sub_epi32(sa, sc); | |
103 __m128i ida = _mm_sub_epi32(_mm_set1_epi32(255), da); | |
104 __m128i isa = _mm_sub_epi32(_mm_set1_epi32(255), sa); | |
105 | |
106 // if (0 == dc) | |
107 __m128i cmp1 = _mm_cmpeq_epi32(dc, _mm_setzero_si128()); | |
108 __m128i rc1 = _mm_and_si128(cmp1, SkAlphaMulAlpha_SSE2(sc, ida)); | |
109 | |
110 // else if (0 == diff) | |
111 __m128i cmp2 = _mm_cmpeq_epi32(diff, _mm_setzero_si128()); | |
112 __m128i cmp = _mm_andnot_si128(cmp1, cmp2); | |
113 __m128i tmp1 = _mm_mullo_epi16(sa, da); | |
114 __m128i tmp2 = _mm_mullo_epi16(sc, ida); | |
115 __m128i tmp3 = _mm_mullo_epi16(dc, isa); | |
116 __m128i rc2 = _mm_add_epi32(tmp1, tmp2); | |
117 rc2 = _mm_add_epi32(rc2, tmp3); | |
118 rc2 = clamp_div255round_SSE2(rc2); | |
119 rc2 = _mm_and_si128(cmp, rc2); | |
120 | |
121 // else | |
122 __m128i cmp3 = _mm_or_si128(cmp1, cmp2); | |
123 __m128i value = _mm_mullo_epi16(dc, sa); | |
124 __m128 x = _mm_cvtepi32_ps(value); | |
125 __m128 y = _mm_cvtepi32_ps(diff); | |
126 diff = _mm_cvttps_epi32(_mm_div_ps(x, y)); | |
mtklein
2014/04/24 18:13:31
Think it's worth pulling these three lines out as
qiankun
2014/04/25 09:24:38
Done.
| |
127 | |
128 __m128i tmp4 = SkMin32_SSE2(da, diff); | |
129 tmp4 = Multiply32_SSE2(sa, tmp4); | |
130 __m128i rc3 = _mm_add_epi32(tmp4, tmp2); | |
131 rc3 = _mm_add_epi32(rc3, tmp3); | |
132 rc3 = clamp_div255round_SSE2(rc3); | |
133 rc3 = _mm_andnot_si128(cmp3, rc3); | |
134 | |
135 __m128i rc = _mm_or_si128(rc1, rc2); | |
136 rc = _mm_or_si128(rc, rc3); | |
137 | |
138 return rc; | |
139 } | |
140 | |
141 static __m128i colordodge_modeproc_SSE2(const __m128i& src, | |
142 const __m128i& dst) { | |
143 __m128i sa = SkGetPackedA32_SSE2(src); | |
144 __m128i da = SkGetPackedA32_SSE2(dst); | |
145 | |
146 __m128i a = srcover_byte_SSE2(sa, da); | |
147 __m128i r = colordodge_byte_SSE2(SkGetPackedR32_SSE2(src), | |
148 SkGetPackedR32_SSE2(dst), sa, da); | |
149 __m128i g = colordodge_byte_SSE2(SkGetPackedG32_SSE2(src), | |
150 SkGetPackedG32_SSE2(dst), sa, da); | |
151 __m128i b = colordodge_byte_SSE2(SkGetPackedB32_SSE2(src), | |
152 SkGetPackedB32_SSE2(dst), sa, da); | |
153 return SkPackARGB32_SSE2(a, r, g, b); | |
154 } | |
155 | |
156 static inline __m128i colorburn_byte_SSE2(const __m128i& sc, const __m128i& dc, | |
157 const __m128i& sa, const __m128i& da) { | |
158 __m128i ida = _mm_sub_epi32(_mm_set1_epi32(255), da); | |
159 __m128i isa = _mm_sub_epi32(_mm_set1_epi32(255), sa); | |
160 | |
161 // if (dc == da) | |
162 __m128i cmp1 = _mm_cmpeq_epi32(dc, da); | |
163 __m128i tmp1 = _mm_mullo_epi16(sa, da); | |
164 __m128i tmp2 = _mm_mullo_epi16(sc, ida); | |
165 __m128i tmp3 = _mm_mullo_epi16(dc, isa); | |
166 __m128i rc1 = _mm_add_epi32(tmp1, tmp2); | |
167 rc1 = _mm_add_epi32(rc1, tmp3); | |
168 rc1 = clamp_div255round_SSE2(rc1); | |
169 rc1 = _mm_and_si128(cmp1, rc1); | |
170 | |
171 // else if (0 == sc) | |
172 __m128i cmp2 = _mm_cmpeq_epi32(sc, _mm_setzero_si128()); | |
173 __m128i rc2 = SkAlphaMulAlpha_SSE2(dc, isa); | |
174 __m128i cmp = _mm_andnot_si128(cmp1, cmp2); | |
175 rc2 = _mm_and_si128(cmp, rc2); | |
176 | |
177 // else | |
178 __m128i cmp3 = _mm_or_si128(cmp1, cmp2); | |
179 __m128i tmp4 = _mm_sub_epi32(da, dc); | |
180 tmp4 = Multiply32_SSE2(tmp4, sa); | |
181 __m128 x = _mm_cvtepi32_ps(tmp4); | |
182 __m128 y = _mm_cvtepi32_ps(sc); | |
183 tmp4 = _mm_cvttps_epi32(_mm_div_ps(x, y)); | |
184 | |
185 __m128i tmp5 = _mm_sub_epi32(da, SkMin32_SSE2(da, tmp4)); | |
186 tmp5 = Multiply32_SSE2(sa, tmp5); | |
187 __m128i rc3 = _mm_add_epi32(tmp5, tmp2); | |
188 rc3 = _mm_add_epi32(rc3, tmp3); | |
189 rc3 = clamp_div255round_SSE2(rc3); | |
190 rc3 = _mm_andnot_si128(cmp3, rc3); | |
191 | |
192 __m128i rc = _mm_or_si128(rc1, rc2); | |
193 rc = _mm_or_si128(rc, rc3); | |
194 | |
195 return rc; | |
196 } | |
197 | |
198 static __m128i colorburn_modeproc_SSE2(const __m128i& src, const __m128i& dst) { | |
199 __m128i sa = SkGetPackedA32_SSE2(src); | |
200 __m128i da = SkGetPackedA32_SSE2(dst); | |
201 | |
202 __m128i a = srcover_byte_SSE2(sa, da); | |
203 __m128i r = colorburn_byte_SSE2(SkGetPackedR32_SSE2(src), | |
204 SkGetPackedR32_SSE2(dst), sa, da); | |
205 __m128i g = colorburn_byte_SSE2(SkGetPackedG32_SSE2(src), | |
206 SkGetPackedG32_SSE2(dst), sa, da); | |
207 __m128i b = colorburn_byte_SSE2(SkGetPackedB32_SSE2(src), | |
208 SkGetPackedB32_SSE2(dst), sa, da); | |
209 return SkPackARGB32_SSE2(a, r, g, b); | |
210 } | |
211 | |
87 //////////////////////////////////////////////////////////////////////////////// | 212 //////////////////////////////////////////////////////////////////////////////// |
88 | 213 |
89 typedef __m128i (*SkXfermodeProcSIMD)(const __m128i& src, const __m128i& dst); | 214 typedef __m128i (*SkXfermodeProcSIMD)(const __m128i& src, const __m128i& dst); |
90 | 215 |
91 extern SkXfermodeProcSIMD gSSE2XfermodeProcs[]; | 216 extern SkXfermodeProcSIMD gSSE2XfermodeProcs[]; |
92 | 217 |
93 SkSSE2ProcCoeffXfermode::SkSSE2ProcCoeffXfermode(SkReadBuffer& buffer) | 218 SkSSE2ProcCoeffXfermode::SkSSE2ProcCoeffXfermode(SkReadBuffer& buffer) |
94 : INHERITED(buffer) { | 219 : INHERITED(buffer) { |
95 fProcSIMD = reinterpret_cast<void*>(gSSE2XfermodeProcs[this->getMode()]); | 220 fProcSIMD = reinterpret_cast<void*>(gSSE2XfermodeProcs[this->getMode()]); |
96 } | 221 } |
(...skipping 138 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
235 NULL, // kSrcATop_Mode | 360 NULL, // kSrcATop_Mode |
236 NULL, // kDstATop_Mode | 361 NULL, // kDstATop_Mode |
237 NULL, // kXor_Mode | 362 NULL, // kXor_Mode |
238 NULL, // kPlus_Mode | 363 NULL, // kPlus_Mode |
239 NULL, // kModulate_Mode | 364 NULL, // kModulate_Mode |
240 NULL, // kScreen_Mode | 365 NULL, // kScreen_Mode |
241 | 366 |
242 NULL, // kOverlay_Mode | 367 NULL, // kOverlay_Mode |
243 NULL, // kDarken_Mode | 368 NULL, // kDarken_Mode |
244 NULL, // kLighten_Mode | 369 NULL, // kLighten_Mode |
245 NULL, // kColorDodge_Mode | 370 colordodge_modeproc_SSE2, |
246 NULL, // kColorBurn_Mode | 371 colorburn_modeproc_SSE2, |
247 NULL, // kHardLight_Mode | 372 NULL, // kHardLight_Mode |
248 NULL, // kSoftLight_Mode | 373 NULL, // kSoftLight_Mode |
249 NULL, // kDifference_Mode | 374 NULL, // kDifference_Mode |
250 NULL, // kExclusion_Mode | 375 NULL, // kExclusion_Mode |
251 multiply_modeproc_SSE2, | 376 multiply_modeproc_SSE2, |
252 | 377 |
253 NULL, // kHue_Mode | 378 NULL, // kHue_Mode |
254 NULL, // kSaturation_Mode | 379 NULL, // kSaturation_Mode |
255 NULL, // kColor_Mode | 380 NULL, // kColor_Mode |
256 NULL, // kLuminosity_Mode | 381 NULL, // kLuminosity_Mode |
257 }; | 382 }; |
258 | 383 |
259 SkProcCoeffXfermode* SkPlatformXfermodeFactory_impl_SSE2(const ProcCoeff& rec, | 384 SkProcCoeffXfermode* SkPlatformXfermodeFactory_impl_SSE2(const ProcCoeff& rec, |
260 SkXfermode::Mode mode) { | 385 SkXfermode::Mode mode) { |
261 void* procSIMD = reinterpret_cast<void*>(gSSE2XfermodeProcs[mode]); | 386 void* procSIMD = reinterpret_cast<void*>(gSSE2XfermodeProcs[mode]); |
262 | 387 |
263 if (procSIMD != NULL) { | 388 if (procSIMD != NULL) { |
264 return SkNEW_ARGS(SkSSE2ProcCoeffXfermode, (rec, mode, procSIMD)); | 389 return SkNEW_ARGS(SkSSE2ProcCoeffXfermode, (rec, mode, procSIMD)); |
265 } | 390 } |
266 return NULL; | 391 return NULL; |
267 } | 392 } |
OLD | NEW |