Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(231)

Side by Side Diff: src/opts/SkXfermode_opts_SSE2.cpp

Issue 224823004: Xfermode: SSE2 implementation of colordodge&colorburn modes (Closed) Base URL: https://skia.googlesource.com/skia.git@master
Patch Set: Created 6 years, 8 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
« no previous file with comments | « no previous file | no next file » | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 #include "SkColorPriv.h" 1 #include "SkColorPriv.h"
2 #include "SkColor_opts_SSE2.h" 2 #include "SkColor_opts_SSE2.h"
3 #include "SkMathPriv.h" 3 #include "SkMathPriv.h"
4 #include "SkXfermode.h" 4 #include "SkXfermode.h"
5 #include "SkXfermode_opts_SSE2.h" 5 #include "SkXfermode_opts_SSE2.h"
6 #include "SkXfermode_proccoeff.h" 6 #include "SkXfermode_proccoeff.h"
7 7
8 //////////////////////////////////////////////////////////////////////////////// 8 ////////////////////////////////////////////////////////////////////////////////
9 // 4 pixels SSE2 version functions 9 // 4 pixels SSE2 version functions
10 //////////////////////////////////////////////////////////////////////////////// 10 ////////////////////////////////////////////////////////////////////////////////
(...skipping 66 matching lines...) Expand 10 before | Expand all | Expand 10 after
77 __m128i g = blendfunc_multiply_byte_SSE2(sg, dg, sa, da); 77 __m128i g = blendfunc_multiply_byte_SSE2(sg, dg, sa, da);
78 78
79 79
80 __m128i sb = SkGetPackedB32_SSE2(src); 80 __m128i sb = SkGetPackedB32_SSE2(src);
81 __m128i db = SkGetPackedB32_SSE2(dst); 81 __m128i db = SkGetPackedB32_SSE2(dst);
82 __m128i b = blendfunc_multiply_byte_SSE2(sb, db, sa, da); 82 __m128i b = blendfunc_multiply_byte_SSE2(sb, db, sa, da);
83 83
84 return SkPackARGB32_SSE2(a, r, g, b); 84 return SkPackARGB32_SSE2(a, r, g, b);
85 } 85 }
86 86
87 static inline __m128i SkMin32_SSE2(const __m128i& a, const __m128i& b) {
88 __m128i cmp = _mm_cmplt_epi32(a, b);
89 return _mm_or_si128(_mm_and_si128(cmp, a), _mm_andnot_si128(cmp, b));
90 }
91
92 static inline __m128i Multiply32_SSE2(const __m128i& a, const __m128i& b) {
93 __m128i r1 = _mm_mul_epu32(a, b);
94 __m128i r2 = _mm_mul_epu32(_mm_srli_si128(a, 4), _mm_srli_si128(b, 4));
95 __m128i r = _mm_unpacklo_epi32(_mm_shuffle_epi32(r1, _MM_SHUFFLE(0,0,2,0)),
96 _mm_shuffle_epi32(r2, _MM_SHUFFLE(0,0,2,0)));
97 return r;
98 }
99
100 static inline __m128i colordodge_byte_SSE2(const __m128i& sc, const __m128i& dc,
101 const __m128i& sa, const __m128i& da) {
102 __m128i diff = _mm_sub_epi32(sa, sc);
103 __m128i ida = _mm_sub_epi32(_mm_set1_epi32(255), da);
104 __m128i isa = _mm_sub_epi32(_mm_set1_epi32(255), sa);
105
106 // if (0 == dc)
107 __m128i cmp1 = _mm_cmpeq_epi32(dc, _mm_setzero_si128());
108 __m128i rc1 = _mm_and_si128(cmp1, SkAlphaMulAlpha_SSE2(sc, ida));
109
110 // else if (0 == diff)
111 __m128i cmp2 = _mm_cmpeq_epi32(diff, _mm_setzero_si128());
112 __m128i cmp = _mm_andnot_si128(cmp1, cmp2);
113 __m128i tmp1 = _mm_mullo_epi16(sa, da);
114 __m128i tmp2 = _mm_mullo_epi16(sc, ida);
115 __m128i tmp3 = _mm_mullo_epi16(dc, isa);
116 __m128i rc2 = _mm_add_epi32(tmp1, tmp2);
117 rc2 = _mm_add_epi32(rc2, tmp3);
118 rc2 = clamp_div255round_SSE2(rc2);
119 rc2 = _mm_and_si128(cmp, rc2);
120
121 // else
122 __m128i cmp3 = _mm_or_si128(cmp1, cmp2);
123 __m128i value = _mm_mullo_epi16(dc, sa);
124 __m128 x = _mm_cvtepi32_ps(value);
125 __m128 y = _mm_cvtepi32_ps(diff);
126 diff = _mm_cvttps_epi32(_mm_div_ps(x, y));
mtklein 2014/04/24 18:13:31 Think it's worth pulling these three lines out as
qiankun 2014/04/25 09:24:38 Done.
127
128 __m128i tmp4 = SkMin32_SSE2(da, diff);
129 tmp4 = Multiply32_SSE2(sa, tmp4);
130 __m128i rc3 = _mm_add_epi32(tmp4, tmp2);
131 rc3 = _mm_add_epi32(rc3, tmp3);
132 rc3 = clamp_div255round_SSE2(rc3);
133 rc3 = _mm_andnot_si128(cmp3, rc3);
134
135 __m128i rc = _mm_or_si128(rc1, rc2);
136 rc = _mm_or_si128(rc, rc3);
137
138 return rc;
139 }
140
141 static __m128i colordodge_modeproc_SSE2(const __m128i& src,
142 const __m128i& dst) {
143 __m128i sa = SkGetPackedA32_SSE2(src);
144 __m128i da = SkGetPackedA32_SSE2(dst);
145
146 __m128i a = srcover_byte_SSE2(sa, da);
147 __m128i r = colordodge_byte_SSE2(SkGetPackedR32_SSE2(src),
148 SkGetPackedR32_SSE2(dst), sa, da);
149 __m128i g = colordodge_byte_SSE2(SkGetPackedG32_SSE2(src),
150 SkGetPackedG32_SSE2(dst), sa, da);
151 __m128i b = colordodge_byte_SSE2(SkGetPackedB32_SSE2(src),
152 SkGetPackedB32_SSE2(dst), sa, da);
153 return SkPackARGB32_SSE2(a, r, g, b);
154 }
155
156 static inline __m128i colorburn_byte_SSE2(const __m128i& sc, const __m128i& dc,
157 const __m128i& sa, const __m128i& da) {
158 __m128i ida = _mm_sub_epi32(_mm_set1_epi32(255), da);
159 __m128i isa = _mm_sub_epi32(_mm_set1_epi32(255), sa);
160
161 // if (dc == da)
162 __m128i cmp1 = _mm_cmpeq_epi32(dc, da);
163 __m128i tmp1 = _mm_mullo_epi16(sa, da);
164 __m128i tmp2 = _mm_mullo_epi16(sc, ida);
165 __m128i tmp3 = _mm_mullo_epi16(dc, isa);
166 __m128i rc1 = _mm_add_epi32(tmp1, tmp2);
167 rc1 = _mm_add_epi32(rc1, tmp3);
168 rc1 = clamp_div255round_SSE2(rc1);
169 rc1 = _mm_and_si128(cmp1, rc1);
170
171 // else if (0 == sc)
172 __m128i cmp2 = _mm_cmpeq_epi32(sc, _mm_setzero_si128());
173 __m128i rc2 = SkAlphaMulAlpha_SSE2(dc, isa);
174 __m128i cmp = _mm_andnot_si128(cmp1, cmp2);
175 rc2 = _mm_and_si128(cmp, rc2);
176
177 // else
178 __m128i cmp3 = _mm_or_si128(cmp1, cmp2);
179 __m128i tmp4 = _mm_sub_epi32(da, dc);
180 tmp4 = Multiply32_SSE2(tmp4, sa);
181 __m128 x = _mm_cvtepi32_ps(tmp4);
182 __m128 y = _mm_cvtepi32_ps(sc);
183 tmp4 = _mm_cvttps_epi32(_mm_div_ps(x, y));
184
185 __m128i tmp5 = _mm_sub_epi32(da, SkMin32_SSE2(da, tmp4));
186 tmp5 = Multiply32_SSE2(sa, tmp5);
187 __m128i rc3 = _mm_add_epi32(tmp5, tmp2);
188 rc3 = _mm_add_epi32(rc3, tmp3);
189 rc3 = clamp_div255round_SSE2(rc3);
190 rc3 = _mm_andnot_si128(cmp3, rc3);
191
192 __m128i rc = _mm_or_si128(rc1, rc2);
193 rc = _mm_or_si128(rc, rc3);
194
195 return rc;
196 }
197
198 static __m128i colorburn_modeproc_SSE2(const __m128i& src, const __m128i& dst) {
199 __m128i sa = SkGetPackedA32_SSE2(src);
200 __m128i da = SkGetPackedA32_SSE2(dst);
201
202 __m128i a = srcover_byte_SSE2(sa, da);
203 __m128i r = colorburn_byte_SSE2(SkGetPackedR32_SSE2(src),
204 SkGetPackedR32_SSE2(dst), sa, da);
205 __m128i g = colorburn_byte_SSE2(SkGetPackedG32_SSE2(src),
206 SkGetPackedG32_SSE2(dst), sa, da);
207 __m128i b = colorburn_byte_SSE2(SkGetPackedB32_SSE2(src),
208 SkGetPackedB32_SSE2(dst), sa, da);
209 return SkPackARGB32_SSE2(a, r, g, b);
210 }
211
87 //////////////////////////////////////////////////////////////////////////////// 212 ////////////////////////////////////////////////////////////////////////////////
88 213
89 typedef __m128i (*SkXfermodeProcSIMD)(const __m128i& src, const __m128i& dst); 214 typedef __m128i (*SkXfermodeProcSIMD)(const __m128i& src, const __m128i& dst);
90 215
91 extern SkXfermodeProcSIMD gSSE2XfermodeProcs[]; 216 extern SkXfermodeProcSIMD gSSE2XfermodeProcs[];
92 217
93 SkSSE2ProcCoeffXfermode::SkSSE2ProcCoeffXfermode(SkReadBuffer& buffer) 218 SkSSE2ProcCoeffXfermode::SkSSE2ProcCoeffXfermode(SkReadBuffer& buffer)
94 : INHERITED(buffer) { 219 : INHERITED(buffer) {
95 fProcSIMD = reinterpret_cast<void*>(gSSE2XfermodeProcs[this->getMode()]); 220 fProcSIMD = reinterpret_cast<void*>(gSSE2XfermodeProcs[this->getMode()]);
96 } 221 }
(...skipping 138 matching lines...) Expand 10 before | Expand all | Expand 10 after
235 NULL, // kSrcATop_Mode 360 NULL, // kSrcATop_Mode
236 NULL, // kDstATop_Mode 361 NULL, // kDstATop_Mode
237 NULL, // kXor_Mode 362 NULL, // kXor_Mode
238 NULL, // kPlus_Mode 363 NULL, // kPlus_Mode
239 NULL, // kModulate_Mode 364 NULL, // kModulate_Mode
240 NULL, // kScreen_Mode 365 NULL, // kScreen_Mode
241 366
242 NULL, // kOverlay_Mode 367 NULL, // kOverlay_Mode
243 NULL, // kDarken_Mode 368 NULL, // kDarken_Mode
244 NULL, // kLighten_Mode 369 NULL, // kLighten_Mode
245 NULL, // kColorDodge_Mode 370 colordodge_modeproc_SSE2,
246 NULL, // kColorBurn_Mode 371 colorburn_modeproc_SSE2,
247 NULL, // kHardLight_Mode 372 NULL, // kHardLight_Mode
248 NULL, // kSoftLight_Mode 373 NULL, // kSoftLight_Mode
249 NULL, // kDifference_Mode 374 NULL, // kDifference_Mode
250 NULL, // kExclusion_Mode 375 NULL, // kExclusion_Mode
251 multiply_modeproc_SSE2, 376 multiply_modeproc_SSE2,
252 377
253 NULL, // kHue_Mode 378 NULL, // kHue_Mode
254 NULL, // kSaturation_Mode 379 NULL, // kSaturation_Mode
255 NULL, // kColor_Mode 380 NULL, // kColor_Mode
256 NULL, // kLuminosity_Mode 381 NULL, // kLuminosity_Mode
257 }; 382 };
258 383
259 SkProcCoeffXfermode* SkPlatformXfermodeFactory_impl_SSE2(const ProcCoeff& rec, 384 SkProcCoeffXfermode* SkPlatformXfermodeFactory_impl_SSE2(const ProcCoeff& rec,
260 SkXfermode::Mode mode) { 385 SkXfermode::Mode mode) {
261 void* procSIMD = reinterpret_cast<void*>(gSSE2XfermodeProcs[mode]); 386 void* procSIMD = reinterpret_cast<void*>(gSSE2XfermodeProcs[mode]);
262 387
263 if (procSIMD != NULL) { 388 if (procSIMD != NULL) {
264 return SkNEW_ARGS(SkSSE2ProcCoeffXfermode, (rec, mode, procSIMD)); 389 return SkNEW_ARGS(SkSSE2ProcCoeffXfermode, (rec, mode, procSIMD));
265 } 390 }
266 return NULL; 391 return NULL;
267 } 392 }
OLDNEW
« no previous file with comments | « no previous file | no next file » | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698