Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(2)

Side by Side Diff: src/opts/SkXfermode_opts_SSE2.cpp

Issue 236363012: Xfermode: SSE2 implementation of softlight_modeproc (Closed) Base URL: https://skia.googlesource.com/skia.git@master
Patch Set: Created 6 years, 8 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
« no previous file with comments | « no previous file | no next file » | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 #include "SkColorPriv.h" 1 #include "SkColorPriv.h"
2 #include "SkColor_opts_SSE2.h" 2 #include "SkColor_opts_SSE2.h"
3 #include "SkMathPriv.h" 3 #include "SkMathPriv.h"
4 #include "SkXfermode.h" 4 #include "SkXfermode.h"
5 #include "SkXfermode_opts_SSE2.h" 5 #include "SkXfermode_opts_SSE2.h"
6 #include "SkXfermode_proccoeff.h" 6 #include "SkXfermode_proccoeff.h"
7 7
8 //////////////////////////////////////////////////////////////////////////////// 8 ////////////////////////////////////////////////////////////////////////////////
9 // 4 pixels SSE2 version functions 9 // 4 pixels SSE2 version functions
10 //////////////////////////////////////////////////////////////////////////////// 10 ////////////////////////////////////////////////////////////////////////////////
(...skipping 66 matching lines...) Expand 10 before | Expand all | Expand 10 after
77 __m128i g = blendfunc_multiply_byte_SSE2(sg, dg, sa, da); 77 __m128i g = blendfunc_multiply_byte_SSE2(sg, dg, sa, da);
78 78
79 79
80 __m128i sb = SkGetPackedB32_SSE2(src); 80 __m128i sb = SkGetPackedB32_SSE2(src);
81 __m128i db = SkGetPackedB32_SSE2(dst); 81 __m128i db = SkGetPackedB32_SSE2(dst);
82 __m128i b = blendfunc_multiply_byte_SSE2(sb, db, sa, da); 82 __m128i b = blendfunc_multiply_byte_SSE2(sb, db, sa, da);
83 83
84 return SkPackARGB32_SSE2(a, r, g, b); 84 return SkPackARGB32_SSE2(a, r, g, b);
85 } 85 }
86 86
87 //////////////////////////////////////////////////////////////////////////////// 87 static inline __m128i Multiply32_SSE2(const __m128i& a, const __m128i& b) {
88 __m128i r1 = _mm_mul_epu32(a, b);
89 __m128i r2 = _mm_mul_epu32(_mm_srli_si128(a, 4), _mm_srli_si128(b, 4));
90 __m128i r = _mm_unpacklo_epi32(_mm_shuffle_epi32(r1, _MM_SHUFFLE(0,0,2,0)),
91 _mm_shuffle_epi32(r2, _MM_SHUFFLE(0,0,2,0)));
92 return r;
93 }
94
95 // Portable version of SkSqrtBits is in SkMath.cpp.
96 static inline __m128i SkSqrtBits_SSE2(const __m128i& x, int count) {
mtklein 2014/04/24 17:24:11 Think it's worth spinning off an SkMath_SSE2?
qiankun 2014/04/25 08:13:06 Done. Create a new file SkMath_opts_SSE2.h to hold
97 __m128i root = _mm_setzero_si128();
98 __m128i remHi = _mm_setzero_si128();
99 __m128i remLo = x;
100 __m128i one128 = _mm_set1_epi32(1);
101
102 do {
103 root = _mm_slli_epi32(root, 1);
104
105 remHi = _mm_or_si128(_mm_slli_epi32(remHi, 2),
106 _mm_srli_epi32(remLo, 30));
107 remLo = _mm_slli_epi32(remLo, 2);
108
109 __m128i testDiv = _mm_slli_epi32(root, 1);
110 testDiv = _mm_add_epi32(testDiv, _mm_set1_epi32(1));
111
112 __m128i cmp = _mm_cmplt_epi32(remHi, testDiv);
113 __m128i remHi1 = _mm_and_si128(cmp, remHi);
114 __m128i root1 = _mm_and_si128(cmp, root);
115 __m128i remHi2 = _mm_andnot_si128(cmp, _mm_sub_epi32(remHi, testDiv));
116 __m128i root2 = _mm_andnot_si128(cmp, _mm_add_epi32(root, one128));
117
118 remHi = _mm_or_si128(remHi1, remHi2);
119 root = _mm_or_si128(root1, root2);
120 } while (--count >= 0);
121
122 return root;
123 }
124
125 static __m128i sqrt_unit_byte_SSE2(const __m128i& n) {
126 return SkSqrtBits_SSE2(n, 15+4);
127 }
128
129 static inline __m128i softlight_byte_SSE2(const __m128i& sc, const __m128i& dc,
130 const __m128i& sa, const __m128i& da) {
131 __m128i tmp1, tmp2, tmp3;
132
133 // int m = da ? dc * 256 / da : 0;
134 __m128i cmp = _mm_cmpeq_epi32(da, _mm_setzero_si128());
135 __m128i m = _mm_slli_epi32(dc, 8);
136 __m128 x = _mm_cvtepi32_ps(m);
137 __m128 y = _mm_cvtepi32_ps(da);
138 m = _mm_cvttps_epi32(_mm_div_ps(x, y));
139 m = _mm_andnot_si128(cmp, m);
140
141 // if (2 * sc <= sa)
142 tmp1 = _mm_slli_epi32(sc, 1); // 2 * sc
143 __m128i cmp1 = _mm_cmpgt_epi32(tmp1, sa);
144 tmp1 = _mm_sub_epi32(tmp1, sa); // 2*sc - sa
145 tmp2 = _mm_sub_epi32(_mm_set1_epi32(256), m); // 256 - m
146 tmp1 = Multiply32_SSE2(tmp1, tmp2);
147 tmp1 = _mm_srai_epi32(tmp1, 8);
148 tmp1 = _mm_add_epi32(sa, tmp1);
149 tmp1 = Multiply32_SSE2(dc, tmp1);
150 __m128i rc1 = _mm_andnot_si128(cmp1, tmp1);
151
152 // else if (4 * dc <= da)
153 tmp2 = _mm_slli_epi32(dc, 2); // dc * 4
154 __m128i cmp2 = _mm_cmpgt_epi32(tmp2, da);
155 __m128i i = _mm_slli_epi32(m, 2); // 4 * m
156 __m128i j = _mm_add_epi32(i, _mm_set1_epi32(256)); // 4 * m + 256
157 __m128i k = Multiply32_SSE2(i, j); // 4 * m * (4 * m + 256)
158 __m128i t = _mm_sub_epi32(m, _mm_set1_epi32(256)); // m - 256
159 i = Multiply32_SSE2(k, t); // 4 * m * (4 * m + 256) * (m - 256)
160 i = _mm_srai_epi32(i, 16); // >> 16
161 j = Multiply32_SSE2(_mm_set1_epi32(7), m); // 7 * m
162 tmp2 = _mm_add_epi32(i, j);
163 i = Multiply32_SSE2(dc, sa); // dc * sa
164 j = _mm_slli_epi32(sc, 1); // 2 * sc
165 j = _mm_sub_epi32(j, sa); // 2 * sc - sa
166 j = Multiply32_SSE2(da, j); // da * (2 * sc - sa)
167 tmp2 = Multiply32_SSE2(j, tmp2); // * tmp
168 tmp2 = _mm_srai_epi32(tmp2, 8); // >> 8
169 tmp2 = _mm_add_epi32(i, tmp2);
170 cmp = _mm_andnot_si128(cmp2, cmp1);
171 __m128i rc2 = _mm_and_si128(cmp, tmp2);
172 __m128i rc = _mm_or_si128(rc1, rc2);
173
174 // else
175 tmp3 = sqrt_unit_byte_SSE2(m);
176 tmp3 = _mm_sub_epi32(tmp3, m);
177 tmp3 = Multiply32_SSE2(j, tmp3); // j = da * (2 * sc - sa)
178 tmp3 = _mm_srai_epi32(tmp3, 8);
179 tmp3 = _mm_add_epi32(i, tmp3); // i = dc * sa
180 cmp = _mm_and_si128(cmp1, cmp2);
181 __m128i rc3 = _mm_and_si128(cmp, tmp3);
182 rc = _mm_or_si128(rc, rc3);
183
184 tmp1 = _mm_sub_epi32(_mm_set1_epi32(255), da); // 255 - da
185 tmp1 = _mm_mullo_epi16(sc, tmp1);
186 tmp2 = _mm_sub_epi32(_mm_set1_epi32(255), sa); // 255 - sa
187 tmp2 = _mm_mullo_epi16(dc, tmp2);
188 rc = _mm_add_epi32(rc, tmp1);
189 rc = _mm_add_epi32(rc, tmp2);
190 return clamp_div255round_SSE2(rc);
191 }
192
193 static __m128i softlight_modeproc_SSE2(const __m128i& src, const __m128i& dst) {
194 __m128i sa = SkGetPackedA32_SSE2(src);
195 __m128i da = SkGetPackedA32_SSE2(dst);
196
197 __m128i a = srcover_byte_SSE2(sa, da);
198 __m128i r = softlight_byte_SSE2(SkGetPackedR32_SSE2(src),
199 SkGetPackedR32_SSE2(dst), sa, da);
200 __m128i g = softlight_byte_SSE2(SkGetPackedG32_SSE2(src),
201 SkGetPackedG32_SSE2(dst), sa, da);
202 __m128i b = softlight_byte_SSE2(SkGetPackedB32_SSE2(src),
203 SkGetPackedB32_SSE2(dst), sa, da);
204 return SkPackARGB32_SSE2(a, r, g, b);
205 }
206 ///////////////////////////////////////////////////////////////////////////////
88 207
89 typedef __m128i (*SkXfermodeProcSIMD)(const __m128i& src, const __m128i& dst); 208 typedef __m128i (*SkXfermodeProcSIMD)(const __m128i& src, const __m128i& dst);
90 209
91 extern SkXfermodeProcSIMD gSSE2XfermodeProcs[]; 210 extern SkXfermodeProcSIMD gSSE2XfermodeProcs[];
92 211
93 SkSSE2ProcCoeffXfermode::SkSSE2ProcCoeffXfermode(SkReadBuffer& buffer) 212 SkSSE2ProcCoeffXfermode::SkSSE2ProcCoeffXfermode(SkReadBuffer& buffer)
94 : INHERITED(buffer) { 213 : INHERITED(buffer) {
95 fProcSIMD = reinterpret_cast<void*>(gSSE2XfermodeProcs[this->getMode()]); 214 fProcSIMD = reinterpret_cast<void*>(gSSE2XfermodeProcs[this->getMode()]);
96 } 215 }
97 216
(...skipping 140 matching lines...) Expand 10 before | Expand all | Expand 10 after
238 NULL, // kPlus_Mode 357 NULL, // kPlus_Mode
239 NULL, // kModulate_Mode 358 NULL, // kModulate_Mode
240 NULL, // kScreen_Mode 359 NULL, // kScreen_Mode
241 360
242 NULL, // kOverlay_Mode 361 NULL, // kOverlay_Mode
243 NULL, // kDarken_Mode 362 NULL, // kDarken_Mode
244 NULL, // kLighten_Mode 363 NULL, // kLighten_Mode
245 NULL, // kColorDodge_Mode 364 NULL, // kColorDodge_Mode
246 NULL, // kColorBurn_Mode 365 NULL, // kColorBurn_Mode
247 NULL, // kHardLight_Mode 366 NULL, // kHardLight_Mode
248 NULL, // kSoftLight_Mode 367 softlight_modeproc_SSE2,
249 NULL, // kDifference_Mode 368 NULL, // kDifference_Mode
250 NULL, // kExclusion_Mode 369 NULL, // kExclusion_Mode
251 multiply_modeproc_SSE2, 370 multiply_modeproc_SSE2,
252 371
253 NULL, // kHue_Mode 372 NULL, // kHue_Mode
254 NULL, // kSaturation_Mode 373 NULL, // kSaturation_Mode
255 NULL, // kColor_Mode 374 NULL, // kColor_Mode
256 NULL, // kLuminosity_Mode 375 NULL, // kLuminosity_Mode
257 }; 376 };
258 377
259 SkProcCoeffXfermode* SkPlatformXfermodeFactory_impl_SSE2(const ProcCoeff& rec, 378 SkProcCoeffXfermode* SkPlatformXfermodeFactory_impl_SSE2(const ProcCoeff& rec,
260 SkXfermode::Mode mode) { 379 SkXfermode::Mode mode) {
261 void* procSIMD = reinterpret_cast<void*>(gSSE2XfermodeProcs[mode]); 380 void* procSIMD = reinterpret_cast<void*>(gSSE2XfermodeProcs[mode]);
262 381
263 if (procSIMD != NULL) { 382 if (procSIMD != NULL) {
264 return SkNEW_ARGS(SkSSE2ProcCoeffXfermode, (rec, mode, procSIMD)); 383 return SkNEW_ARGS(SkSSE2ProcCoeffXfermode, (rec, mode, procSIMD));
265 } 384 }
266 return NULL; 385 return NULL;
267 } 386 }
OLDNEW
« no previous file with comments | « no previous file | no next file » | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698