Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(2)

Side by Side Diff: src/opts/SkXfermode_opts_SSE2.cpp

Issue 232793002: Xfermode: SSE2 implementation of a number of simple transfer modes (Closed) Base URL: https://skia.googlesource.com/skia.git@master
Patch Set: Created 6 years, 8 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
« src/opts/SkColor_opts_SSE2.h ('K') | « src/opts/SkColor_opts_SSE2.h ('k') | no next file » | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 #include "SkColorPriv.h" 1 #include "SkColorPriv.h"
2 #include "SkColor_opts_SSE2.h" 2 #include "SkColor_opts_SSE2.h"
3 #include "SkMathPriv.h" 3 #include "SkMathPriv.h"
4 #include "SkXfermode.h" 4 #include "SkXfermode.h"
5 #include "SkXfermode_opts_SSE2.h" 5 #include "SkXfermode_opts_SSE2.h"
6 #include "SkXfermode_proccoeff.h" 6 #include "SkXfermode_proccoeff.h"
7 7
8 //////////////////////////////////////////////////////////////////////////////// 8 ////////////////////////////////////////////////////////////////////////////////
9 // 4 pixels SSE2 version functions 9 // 4 pixels SSE2 version functions
10 //////////////////////////////////////////////////////////////////////////////// 10 ////////////////////////////////////////////////////////////////////////////////
11 11
12 static inline __m128i SkDiv255Round_SSE2(const __m128i& a) { 12 static inline __m128i SkDiv255Round_SSE2(const __m128i& a) {
13 __m128i prod = _mm_add_epi32(a, _mm_set1_epi32(128)); // prod += 128; 13 __m128i prod = _mm_add_epi32(a, _mm_set1_epi32(128)); // prod += 128;
14 prod = _mm_add_epi32(prod, _mm_srli_epi32(prod, 8)); // prod + (prod >> 8) 14 prod = _mm_add_epi32(prod, _mm_srli_epi32(prod, 8)); // prod + (prod >> 8)
15 prod = _mm_srli_epi32(prod, 8); // >> 8 15 prod = _mm_srli_epi32(prod, 8); // >> 8
16 16
17 return prod; 17 return prod;
18 } 18 }
19 19
20 static inline __m128i saturated_add_SSE2(const __m128i& a, const __m128i& b) {
21 __m128i sum = _mm_add_epi32(a, b);
22 __m128i cmp = _mm_cmpgt_epi32(sum, _mm_set1_epi32(255));
23
24 sum = _mm_or_si128(_mm_and_si128(cmp, _mm_set1_epi32(255)),
25 _mm_andnot_si128(cmp, sum));
26 return sum;
27 }
28
20 static inline __m128i clamp_div255round_SSE2(const __m128i& prod) { 29 static inline __m128i clamp_div255round_SSE2(const __m128i& prod) {
21 // test if > 0 30 // test if > 0
22 __m128i cmp1 = _mm_cmpgt_epi32(prod, _mm_setzero_si128()); 31 __m128i cmp1 = _mm_cmpgt_epi32(prod, _mm_setzero_si128());
23 // test if < 255*255 32 // test if < 255*255
24 __m128i cmp2 = _mm_cmplt_epi32(prod, _mm_set1_epi32(255*255)); 33 __m128i cmp2 = _mm_cmplt_epi32(prod, _mm_set1_epi32(255*255));
25 34
26 __m128i ret = _mm_setzero_si128(); 35 __m128i ret = _mm_setzero_si128();
27 36
28 // if value >= 255*255, value = 255 37 // if value >= 255*255, value = 255
29 ret = _mm_andnot_si128(cmp2, _mm_set1_epi32(255)); 38 ret = _mm_andnot_si128(cmp2, _mm_set1_epi32(255));
30 39
31 __m128i div = SkDiv255Round_SSE2(prod); 40 __m128i div = SkDiv255Round_SSE2(prod);
32 41
33 // test if > 0 && < 255*255 42 // test if > 0 && < 255*255
34 __m128i cmp = _mm_and_si128(cmp1, cmp2); 43 __m128i cmp = _mm_and_si128(cmp1, cmp2);
35 44
36 ret = _mm_or_si128(_mm_and_si128(cmp, div), _mm_andnot_si128(cmp, ret)); 45 ret = _mm_or_si128(_mm_and_si128(cmp, div), _mm_andnot_si128(cmp, ret));
37 46
38 return ret; 47 return ret;
39 } 48 }
40 49
50 static __m128i srcover_modeproc_SSE2(const __m128i& src, const __m128i& dst) {
51 __m128i isa = _mm_sub_epi32(_mm_set1_epi32(256), SkGetPackedA32_SSE2(src));
52 return _mm_add_epi32(src, SkAlphaMulQ_SSE2(dst, isa));
53 }
54
55 static __m128i dstover_modeproc_SSE2(const __m128i& src, const __m128i& dst) {
56 __m128i ida = _mm_sub_epi32(_mm_set1_epi32(256), SkGetPackedA32_SSE2(dst));
57 return _mm_add_epi32(dst, SkAlphaMulQ_SSE2(src, ida));
58 }
59
60 static __m128i srcin_modeproc_SSE2(const __m128i& src, const __m128i& dst) {
61 __m128i da = SkGetPackedA32_SSE2(dst);
62 return SkAlphaMulQ_SSE2(src, SkAlpha255To256_SSE2(da));
63 }
64
65 static __m128i dstin_modeproc_SSE2(const __m128i& src, const __m128i& dst) {
66 __m128i sa = SkGetPackedA32_SSE2(src);
67 return SkAlphaMulQ_SSE2(dst, SkAlpha255To256_SSE2(sa));
68 }
69
70 static __m128i srcout_modeproc_SSE2(const __m128i& src, const __m128i& dst) {
71 __m128i ida = _mm_sub_epi32(_mm_set1_epi32(256), SkGetPackedA32_SSE2(dst));
72 return SkAlphaMulQ_SSE2(src, ida);
73 }
74
75 static __m128i dstout_modeproc_SSE2(const __m128i& src, const __m128i& dst) {
76 __m128i isa = _mm_sub_epi32(_mm_set1_epi32(256), SkGetPackedA32_SSE2(src));
77 return SkAlphaMulQ_SSE2(dst, isa);
78 }
79
80 static __m128i srcatop_modeproc_SSE2(const __m128i& src, const __m128i& dst) {
81 __m128i sa = SkGetPackedA32_SSE2(src);
82 __m128i da = SkGetPackedA32_SSE2(dst);
83 __m128i isa = _mm_sub_epi32(_mm_set1_epi32(255), sa);
84
85 __m128i a = da;
86
87 __m128i r1 = SkAlphaMulAlpha_SSE2(da, SkGetPackedR32_SSE2(src));
88 __m128i r2 = SkAlphaMulAlpha_SSE2(isa, SkGetPackedR32_SSE2(dst));
89 __m128i r = _mm_add_epi32(r1, r2);
90
91 __m128i g1 = SkAlphaMulAlpha_SSE2(da, SkGetPackedG32_SSE2(src));
92 __m128i g2 = SkAlphaMulAlpha_SSE2(isa, SkGetPackedG32_SSE2(dst));
93 __m128i g = _mm_add_epi32(g1, g2);
94
95 __m128i b1 = SkAlphaMulAlpha_SSE2(da, SkGetPackedB32_SSE2(src));
96 __m128i b2 = SkAlphaMulAlpha_SSE2(isa, SkGetPackedB32_SSE2(dst));
97 __m128i b = _mm_add_epi32(b1, b2);
98
99 return SkPackARGB32_SSE2(a, r, g, b);
100 }
101
102 static __m128i dstatop_modeproc_SSE2(const __m128i& src, const __m128i& dst) {
103 __m128i sa = SkGetPackedA32_SSE2(src);
104 __m128i da = SkGetPackedA32_SSE2(dst);
105 __m128i ida = _mm_sub_epi32(_mm_set1_epi32(255), da);
106
107 __m128i a = sa;
108
109 __m128i r1 = SkAlphaMulAlpha_SSE2(ida, SkGetPackedR32_SSE2(src));
110 __m128i r2 = SkAlphaMulAlpha_SSE2(sa, SkGetPackedR32_SSE2(dst));
111 __m128i r = _mm_add_epi32(r1, r2);
112
113 __m128i g1 = SkAlphaMulAlpha_SSE2(ida, SkGetPackedG32_SSE2(src));
114 __m128i g2 = SkAlphaMulAlpha_SSE2(sa, SkGetPackedG32_SSE2(dst));
115 __m128i g = _mm_add_epi32(g1, g2);
116
117 __m128i b1 = SkAlphaMulAlpha_SSE2(ida, SkGetPackedB32_SSE2(src));
118 __m128i b2 = SkAlphaMulAlpha_SSE2(sa, SkGetPackedB32_SSE2(dst));
119 __m128i b = _mm_add_epi32(b1, b2);
120
121 return SkPackARGB32_SSE2(a, r, g, b);
122 }
123
124 static __m128i xor_modeproc_SSE2(const __m128i& src, const __m128i& dst) {
125 __m128i sa = SkGetPackedA32_SSE2(src);
126 __m128i da = SkGetPackedA32_SSE2(dst);
127 __m128i isa = _mm_sub_epi32(_mm_set1_epi32(255), sa);
128 __m128i ida = _mm_sub_epi32(_mm_set1_epi32(255), da);
129
130 __m128i a1 = _mm_add_epi32(sa, da);
131 __m128i a2 = SkAlphaMulAlpha_SSE2(sa, da);
132 a2 = _mm_slli_epi32(a2, 1);
133 __m128i a = _mm_sub_epi32(a1, a2);
134
135 __m128i r1 = SkAlphaMulAlpha_SSE2(ida, SkGetPackedR32_SSE2(src));
136 __m128i r2 = SkAlphaMulAlpha_SSE2(isa, SkGetPackedR32_SSE2(dst));
137 __m128i r = _mm_add_epi32(r1, r2);
138
139 __m128i g1 = SkAlphaMulAlpha_SSE2(ida, SkGetPackedG32_SSE2(src));
140 __m128i g2 = SkAlphaMulAlpha_SSE2(isa, SkGetPackedG32_SSE2(dst));
141 __m128i g = _mm_add_epi32(g1, g2);
142
143 __m128i b1 = SkAlphaMulAlpha_SSE2(ida, SkGetPackedB32_SSE2(src));
144 __m128i b2 = SkAlphaMulAlpha_SSE2(isa, SkGetPackedB32_SSE2(dst));
145 __m128i b = _mm_add_epi32(b1, b2);
146
147 return SkPackARGB32_SSE2(a, r, g, b);
148 }
149
150 static __m128i plus_modeproc_SSE2(const __m128i& src, const __m128i& dst) {
151 __m128i b = saturated_add_SSE2(SkGetPackedB32_SSE2(src),
mtklein 2014/04/11 18:41:54 Any reason to do this one in b-g-r-a order instead
qiankun 2014/04/14 02:33:01 No special reason for the b-g-r-a order, I just fo
152 SkGetPackedB32_SSE2(dst));
153 __m128i g = saturated_add_SSE2(SkGetPackedG32_SSE2(src),
154 SkGetPackedG32_SSE2(dst));
155 __m128i r = saturated_add_SSE2(SkGetPackedR32_SSE2(src),
156 SkGetPackedR32_SSE2(dst));
157 __m128i a = saturated_add_SSE2(SkGetPackedA32_SSE2(src),
158 SkGetPackedA32_SSE2(dst));
159 return SkPackARGB32_SSE2(a, r, g, b);
160 }
161
162 static __m128i modulate_modeproc_SSE2(const __m128i& src, const __m128i& dst) {
163 __m128i a = SkAlphaMulAlpha_SSE2(SkGetPackedA32_SSE2(src),
164 SkGetPackedA32_SSE2(dst));
165 __m128i r = SkAlphaMulAlpha_SSE2(SkGetPackedR32_SSE2(src),
166 SkGetPackedR32_SSE2(dst));
167 __m128i g = SkAlphaMulAlpha_SSE2(SkGetPackedG32_SSE2(src),
168 SkGetPackedG32_SSE2(dst));
169 __m128i b = SkAlphaMulAlpha_SSE2(SkGetPackedB32_SSE2(src),
170 SkGetPackedB32_SSE2(dst));
171 return SkPackARGB32_SSE2(a, r, g, b);
172 }
173
41 static inline __m128i srcover_byte_SSE2(const __m128i& a, const __m128i& b) { 174 static inline __m128i srcover_byte_SSE2(const __m128i& a, const __m128i& b) {
42 // a + b - SkAlphaMulAlpha(a, b); 175 // a + b - SkAlphaMulAlpha(a, b);
43 return _mm_sub_epi32(_mm_add_epi32(a, b), SkAlphaMulAlpha_SSE2(a, b)); 176 return _mm_sub_epi32(_mm_add_epi32(a, b), SkAlphaMulAlpha_SSE2(a, b));
44 177
45 } 178 }
46 179
47 static inline __m128i blendfunc_multiply_byte_SSE2(const __m128i& sc, const __m1 28i& dc, 180 static inline __m128i blendfunc_multiply_byte_SSE2(const __m128i& sc, const __m1 28i& dc,
48 const __m128i& sa, const __m1 28i& da) { 181 const __m128i& sa, const __m1 28i& da) {
49 // sc * (255 - da) 182 // sc * (255 - da)
50 __m128i ret1 = _mm_sub_epi32(_mm_set1_epi32(255), da); 183 __m128i ret1 = _mm_sub_epi32(_mm_set1_epi32(255), da);
(...skipping 26 matching lines...) Expand all
77 __m128i g = blendfunc_multiply_byte_SSE2(sg, dg, sa, da); 210 __m128i g = blendfunc_multiply_byte_SSE2(sg, dg, sa, da);
78 211
79 212
80 __m128i sb = SkGetPackedB32_SSE2(src); 213 __m128i sb = SkGetPackedB32_SSE2(src);
81 __m128i db = SkGetPackedB32_SSE2(dst); 214 __m128i db = SkGetPackedB32_SSE2(dst);
82 __m128i b = blendfunc_multiply_byte_SSE2(sb, db, sa, da); 215 __m128i b = blendfunc_multiply_byte_SSE2(sb, db, sa, da);
83 216
84 return SkPackARGB32_SSE2(a, r, g, b); 217 return SkPackARGB32_SSE2(a, r, g, b);
85 } 218 }
86 219
220 static __m128i screen_modeproc_SSE2(const __m128i& src, const __m128i& dst) {
221 __m128i a = srcover_byte_SSE2(SkGetPackedA32_SSE2(src),
222 SkGetPackedA32_SSE2(dst));
223 __m128i r = srcover_byte_SSE2(SkGetPackedR32_SSE2(src),
224 SkGetPackedR32_SSE2(dst));
225 __m128i g = srcover_byte_SSE2(SkGetPackedG32_SSE2(src),
226 SkGetPackedG32_SSE2(dst));
227 __m128i b = srcover_byte_SSE2(SkGetPackedB32_SSE2(src),
228 SkGetPackedB32_SSE2(dst));
229 return SkPackARGB32_SSE2(a, r, g, b);
230 }
231
87 //////////////////////////////////////////////////////////////////////////////// 232 ////////////////////////////////////////////////////////////////////////////////
88 233
89 typedef __m128i (*SkXfermodeProcSIMD)(const __m128i& src, const __m128i& dst); 234 typedef __m128i (*SkXfermodeProcSIMD)(const __m128i& src, const __m128i& dst);
90 235
91 extern SkXfermodeProcSIMD gSSE2XfermodeProcs[]; 236 extern SkXfermodeProcSIMD gSSE2XfermodeProcs[];
92 237
93 SkSSE2ProcCoeffXfermode::SkSSE2ProcCoeffXfermode(SkReadBuffer& buffer) 238 SkSSE2ProcCoeffXfermode::SkSSE2ProcCoeffXfermode(SkReadBuffer& buffer)
94 : INHERITED(buffer) { 239 : INHERITED(buffer) {
95 fProcSIMD = reinterpret_cast<void*>(gSSE2XfermodeProcs[this->getMode()]); 240 fProcSIMD = reinterpret_cast<void*>(gSSE2XfermodeProcs[this->getMode()]);
96 } 241 }
(...skipping 122 matching lines...) Expand 10 before | Expand all | Expand 10 after
219 } 364 }
220 #endif 365 #endif
221 366
222 //////////////////////////////////////////////////////////////////////////////// 367 ////////////////////////////////////////////////////////////////////////////////
223 368
224 // 4 pixels modeprocs with SSE2 369 // 4 pixels modeprocs with SSE2
225 SkXfermodeProcSIMD gSSE2XfermodeProcs[] = { 370 SkXfermodeProcSIMD gSSE2XfermodeProcs[] = {
226 NULL, // kClear_Mode 371 NULL, // kClear_Mode
227 NULL, // kSrc_Mode 372 NULL, // kSrc_Mode
228 NULL, // kDst_Mode 373 NULL, // kDst_Mode
229 NULL, // kSrcOver_Mode 374 srcover_modeproc_SSE2,
230 NULL, // kDstOver_Mode 375 dstover_modeproc_SSE2,
231 NULL, // kSrcIn_Mode 376 srcin_modeproc_SSE2,
232 NULL, // kDstIn_Mode 377 dstin_modeproc_SSE2,
233 NULL, // kSrcOut_Mode 378 srcout_modeproc_SSE2,
234 NULL, // kDstOut_Mode 379 dstout_modeproc_SSE2,
235 NULL, // kSrcATop_Mode 380 srcatop_modeproc_SSE2,
236 NULL, // kDstATop_Mode 381 dstatop_modeproc_SSE2,
237 NULL, // kXor_Mode 382 xor_modeproc_SSE2,
238 NULL, // kPlus_Mode 383 plus_modeproc_SSE2,
239 NULL, // kModulate_Mode 384 modulate_modeproc_SSE2,
240 NULL, // kScreen_Mode 385 screen_modeproc_SSE2,
241 386
242 NULL, // kOverlay_Mode 387 NULL, // kOverlay_Mode
243 NULL, // kDarken_Mode 388 NULL, // kDarken_Mode
244 NULL, // kLighten_Mode 389 NULL, // kLighten_Mode
245 NULL, // kColorDodge_Mode 390 NULL, // kColorDodge_Mode
246 NULL, // kColorBurn_Mode 391 NULL, // kColorBurn_Mode
247 NULL, // kHardLight_Mode 392 NULL, // kHardLight_Mode
248 NULL, // kSoftLight_Mode 393 NULL, // kSoftLight_Mode
249 NULL, // kDifference_Mode 394 NULL, // kDifference_Mode
250 NULL, // kExclusion_Mode 395 NULL, // kExclusion_Mode
251 multiply_modeproc_SSE2, 396 multiply_modeproc_SSE2,
252 397
253 NULL, // kHue_Mode 398 NULL, // kHue_Mode
254 NULL, // kSaturation_Mode 399 NULL, // kSaturation_Mode
255 NULL, // kColor_Mode 400 NULL, // kColor_Mode
256 NULL, // kLuminosity_Mode 401 NULL, // kLuminosity_Mode
257 }; 402 };
258 403
259 SkProcCoeffXfermode* SkPlatformXfermodeFactory_impl_SSE2(const ProcCoeff& rec, 404 SkProcCoeffXfermode* SkPlatformXfermodeFactory_impl_SSE2(const ProcCoeff& rec,
260 SkXfermode::Mode mode) { 405 SkXfermode::Mode mode) {
261 void* procSIMD = reinterpret_cast<void*>(gSSE2XfermodeProcs[mode]); 406 void* procSIMD = reinterpret_cast<void*>(gSSE2XfermodeProcs[mode]);
262 407
263 if (procSIMD != NULL) { 408 if (procSIMD != NULL) {
264 return SkNEW_ARGS(SkSSE2ProcCoeffXfermode, (rec, mode, procSIMD)); 409 return SkNEW_ARGS(SkSSE2ProcCoeffXfermode, (rec, mode, procSIMD));
265 } 410 }
266 return NULL; 411 return NULL;
267 } 412 }
OLDNEW
« src/opts/SkColor_opts_SSE2.h ('K') | « src/opts/SkColor_opts_SSE2.h ('k') | no next file » | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698