OLD | NEW |
1 /* | 1 /* |
2 * Copyright 2014 Google Inc. | 2 * Copyright 2014 Google Inc. |
3 * | 3 * |
4 * Use of this source code is governed by a BSD-style license that can be | 4 * Use of this source code is governed by a BSD-style license that can be |
5 * found in the LICENSE file. | 5 * found in the LICENSE file. |
6 */ | 6 */ |
7 | 7 |
8 #include "SkColorPriv.h" | 8 #include "SkColorPriv.h" |
9 #include "SkColor_opts_SSE2.h" | 9 #include "SkColor_opts_SSE2.h" |
10 #include "SkMathPriv.h" | 10 #include "SkMathPriv.h" |
11 #include "SkMath_opts_SSE2.h" | 11 #include "SkMath_opts_SSE2.h" |
12 #include "SkXfermode.h" | 12 #include "SkXfermode.h" |
13 #include "SkXfermode_opts_SSE2.h" | 13 #include "SkXfermode_opts_SSE2.h" |
14 #include "SkXfermode_proccoeff.h" | 14 #include "SkXfermode_proccoeff.h" |
15 | 15 |
16 //////////////////////////////////////////////////////////////////////////////// | 16 //////////////////////////////////////////////////////////////////////////////// |
17 // 4 pixels SSE2 version functions | 17 // 4 pixels SSE2 version functions |
18 //////////////////////////////////////////////////////////////////////////////// | 18 //////////////////////////////////////////////////////////////////////////////// |
19 | 19 |
20 static inline __m128i SkDiv255Round_SSE2(const __m128i& a) { | 20 static inline __m128i SkDiv255Round_SSE2(const __m128i& a) { |
21 __m128i prod = _mm_add_epi32(a, _mm_set1_epi32(128)); // prod += 128; | 21 __m128i prod = _mm_add_epi32(a, _mm_set1_epi32(128)); // prod += 128; |
22 prod = _mm_add_epi32(prod, _mm_srli_epi32(prod, 8)); // prod + (prod >> 8) | 22 prod = _mm_add_epi32(prod, _mm_srli_epi32(prod, 8)); // prod + (prod >> 8) |
23 prod = _mm_srli_epi32(prod, 8); // >> 8 | 23 prod = _mm_srli_epi32(prod, 8); // >> 8 |
24 | 24 |
25 return prod; | 25 return prod; |
26 } | 26 } |
27 | 27 |
28 static inline __m128i saturated_add_SSE2(const __m128i& a, const __m128i& b) { | |
29 __m128i sum = _mm_add_epi32(a, b); | |
30 __m128i cmp = _mm_cmpgt_epi32(sum, _mm_set1_epi32(255)); | |
31 | |
32 sum = _mm_or_si128(_mm_and_si128(cmp, _mm_set1_epi32(255)), | |
33 _mm_andnot_si128(cmp, sum)); | |
34 return sum; | |
35 } | |
36 | |
37 static inline __m128i clamp_signed_byte_SSE2(const __m128i& n) { | |
38 __m128i cmp1 = _mm_cmplt_epi32(n, _mm_setzero_si128()); | |
39 __m128i cmp2 = _mm_cmpgt_epi32(n, _mm_set1_epi32(255)); | |
40 __m128i ret = _mm_and_si128(cmp2, _mm_set1_epi32(255)); | |
41 | |
42 __m128i cmp = _mm_or_si128(cmp1, cmp2); | |
43 ret = _mm_or_si128(_mm_and_si128(cmp, ret), _mm_andnot_si128(cmp, n)); | |
44 | |
45 return ret; | |
46 } | |
47 | |
48 static inline __m128i clamp_div255round_SSE2(const __m128i& prod) { | 28 static inline __m128i clamp_div255round_SSE2(const __m128i& prod) { |
49 // test if > 0 | 29 // test if > 0 |
50 __m128i cmp1 = _mm_cmpgt_epi32(prod, _mm_setzero_si128()); | 30 __m128i cmp1 = _mm_cmpgt_epi32(prod, _mm_setzero_si128()); |
51 // test if < 255*255 | 31 // test if < 255*255 |
52 __m128i cmp2 = _mm_cmplt_epi32(prod, _mm_set1_epi32(255*255)); | 32 __m128i cmp2 = _mm_cmplt_epi32(prod, _mm_set1_epi32(255*255)); |
53 | 33 |
54 __m128i ret = _mm_setzero_si128(); | 34 __m128i ret = _mm_setzero_si128(); |
55 | 35 |
56 // if value >= 255*255, value = 255 | 36 // if value >= 255*255, value = 255 |
57 ret = _mm_andnot_si128(cmp2, _mm_set1_epi32(255)); | 37 ret = _mm_andnot_si128(cmp2, _mm_set1_epi32(255)); |
58 | 38 |
59 __m128i div = SkDiv255Round_SSE2(prod); | 39 __m128i div = SkDiv255Round_SSE2(prod); |
60 | 40 |
61 // test if > 0 && < 255*255 | 41 // test if > 0 && < 255*255 |
62 __m128i cmp = _mm_and_si128(cmp1, cmp2); | 42 __m128i cmp = _mm_and_si128(cmp1, cmp2); |
63 | 43 |
64 ret = _mm_or_si128(_mm_and_si128(cmp, div), _mm_andnot_si128(cmp, ret)); | 44 ret = _mm_or_si128(_mm_and_si128(cmp, div), _mm_andnot_si128(cmp, ret)); |
65 | 45 |
66 return ret; | 46 return ret; |
67 } | 47 } |
68 | |
69 static __m128i srcover_modeproc_SSE2(const __m128i& src, const __m128i& dst) { | |
70 __m128i isa = _mm_sub_epi32(_mm_set1_epi32(256), SkGetPackedA32_SSE2(src)); | |
71 return _mm_add_epi32(src, SkAlphaMulQ_SSE2(dst, isa)); | |
72 } | |
73 | |
74 static __m128i dstover_modeproc_SSE2(const __m128i& src, const __m128i& dst) { | |
75 __m128i ida = _mm_sub_epi32(_mm_set1_epi32(256), SkGetPackedA32_SSE2(dst)); | |
76 return _mm_add_epi32(dst, SkAlphaMulQ_SSE2(src, ida)); | |
77 } | |
78 | |
79 static __m128i srcin_modeproc_SSE2(const __m128i& src, const __m128i& dst) { | |
80 __m128i da = SkGetPackedA32_SSE2(dst); | |
81 return SkAlphaMulQ_SSE2(src, SkAlpha255To256_SSE2(da)); | |
82 } | |
83 | |
84 static __m128i dstin_modeproc_SSE2(const __m128i& src, const __m128i& dst) { | |
85 __m128i sa = SkGetPackedA32_SSE2(src); | |
86 return SkAlphaMulQ_SSE2(dst, SkAlpha255To256_SSE2(sa)); | |
87 } | |
88 | |
89 static __m128i srcout_modeproc_SSE2(const __m128i& src, const __m128i& dst) { | |
90 __m128i ida = _mm_sub_epi32(_mm_set1_epi32(256), SkGetPackedA32_SSE2(dst)); | |
91 return SkAlphaMulQ_SSE2(src, ida); | |
92 } | |
93 | |
94 static __m128i dstout_modeproc_SSE2(const __m128i& src, const __m128i& dst) { | |
95 __m128i isa = _mm_sub_epi32(_mm_set1_epi32(256), SkGetPackedA32_SSE2(src)); | |
96 return SkAlphaMulQ_SSE2(dst, isa); | |
97 } | |
98 | |
99 static __m128i srcatop_modeproc_SSE2(const __m128i& src, const __m128i& dst) { | |
100 __m128i sa = SkGetPackedA32_SSE2(src); | |
101 __m128i da = SkGetPackedA32_SSE2(dst); | |
102 __m128i isa = _mm_sub_epi32(_mm_set1_epi32(255), sa); | |
103 | |
104 __m128i a = da; | |
105 | |
106 __m128i r1 = SkAlphaMulAlpha_SSE2(da, SkGetPackedR32_SSE2(src)); | |
107 __m128i r2 = SkAlphaMulAlpha_SSE2(isa, SkGetPackedR32_SSE2(dst)); | |
108 __m128i r = _mm_add_epi32(r1, r2); | |
109 | |
110 __m128i g1 = SkAlphaMulAlpha_SSE2(da, SkGetPackedG32_SSE2(src)); | |
111 __m128i g2 = SkAlphaMulAlpha_SSE2(isa, SkGetPackedG32_SSE2(dst)); | |
112 __m128i g = _mm_add_epi32(g1, g2); | |
113 | |
114 __m128i b1 = SkAlphaMulAlpha_SSE2(da, SkGetPackedB32_SSE2(src)); | |
115 __m128i b2 = SkAlphaMulAlpha_SSE2(isa, SkGetPackedB32_SSE2(dst)); | |
116 __m128i b = _mm_add_epi32(b1, b2); | |
117 | |
118 return SkPackARGB32_SSE2(a, r, g, b); | |
119 } | |
120 | |
121 static __m128i dstatop_modeproc_SSE2(const __m128i& src, const __m128i& dst) { | |
122 __m128i sa = SkGetPackedA32_SSE2(src); | |
123 __m128i da = SkGetPackedA32_SSE2(dst); | |
124 __m128i ida = _mm_sub_epi32(_mm_set1_epi32(255), da); | |
125 | |
126 __m128i a = sa; | |
127 | |
128 __m128i r1 = SkAlphaMulAlpha_SSE2(ida, SkGetPackedR32_SSE2(src)); | |
129 __m128i r2 = SkAlphaMulAlpha_SSE2(sa, SkGetPackedR32_SSE2(dst)); | |
130 __m128i r = _mm_add_epi32(r1, r2); | |
131 | |
132 __m128i g1 = SkAlphaMulAlpha_SSE2(ida, SkGetPackedG32_SSE2(src)); | |
133 __m128i g2 = SkAlphaMulAlpha_SSE2(sa, SkGetPackedG32_SSE2(dst)); | |
134 __m128i g = _mm_add_epi32(g1, g2); | |
135 | |
136 __m128i b1 = SkAlphaMulAlpha_SSE2(ida, SkGetPackedB32_SSE2(src)); | |
137 __m128i b2 = SkAlphaMulAlpha_SSE2(sa, SkGetPackedB32_SSE2(dst)); | |
138 __m128i b = _mm_add_epi32(b1, b2); | |
139 | |
140 return SkPackARGB32_SSE2(a, r, g, b); | |
141 } | |
142 | |
143 static __m128i xor_modeproc_SSE2(const __m128i& src, const __m128i& dst) { | |
144 __m128i sa = SkGetPackedA32_SSE2(src); | |
145 __m128i da = SkGetPackedA32_SSE2(dst); | |
146 __m128i isa = _mm_sub_epi32(_mm_set1_epi32(255), sa); | |
147 __m128i ida = _mm_sub_epi32(_mm_set1_epi32(255), da); | |
148 | |
149 __m128i a1 = _mm_add_epi32(sa, da); | |
150 __m128i a2 = SkAlphaMulAlpha_SSE2(sa, da); | |
151 a2 = _mm_slli_epi32(a2, 1); | |
152 __m128i a = _mm_sub_epi32(a1, a2); | |
153 | |
154 __m128i r1 = SkAlphaMulAlpha_SSE2(ida, SkGetPackedR32_SSE2(src)); | |
155 __m128i r2 = SkAlphaMulAlpha_SSE2(isa, SkGetPackedR32_SSE2(dst)); | |
156 __m128i r = _mm_add_epi32(r1, r2); | |
157 | |
158 __m128i g1 = SkAlphaMulAlpha_SSE2(ida, SkGetPackedG32_SSE2(src)); | |
159 __m128i g2 = SkAlphaMulAlpha_SSE2(isa, SkGetPackedG32_SSE2(dst)); | |
160 __m128i g = _mm_add_epi32(g1, g2); | |
161 | |
162 __m128i b1 = SkAlphaMulAlpha_SSE2(ida, SkGetPackedB32_SSE2(src)); | |
163 __m128i b2 = SkAlphaMulAlpha_SSE2(isa, SkGetPackedB32_SSE2(dst)); | |
164 __m128i b = _mm_add_epi32(b1, b2); | |
165 | |
166 return SkPackARGB32_SSE2(a, r, g, b); | |
167 } | |
168 | |
169 static __m128i plus_modeproc_SSE2(const __m128i& src, const __m128i& dst) { | |
170 __m128i b = saturated_add_SSE2(SkGetPackedB32_SSE2(src), | |
171 SkGetPackedB32_SSE2(dst)); | |
172 __m128i g = saturated_add_SSE2(SkGetPackedG32_SSE2(src), | |
173 SkGetPackedG32_SSE2(dst)); | |
174 __m128i r = saturated_add_SSE2(SkGetPackedR32_SSE2(src), | |
175 SkGetPackedR32_SSE2(dst)); | |
176 __m128i a = saturated_add_SSE2(SkGetPackedA32_SSE2(src), | |
177 SkGetPackedA32_SSE2(dst)); | |
178 return SkPackARGB32_SSE2(a, r, g, b); | |
179 } | |
180 | |
181 static __m128i modulate_modeproc_SSE2(const __m128i& src, const __m128i& dst) { | |
182 __m128i a = SkAlphaMulAlpha_SSE2(SkGetPackedA32_SSE2(src), | |
183 SkGetPackedA32_SSE2(dst)); | |
184 __m128i r = SkAlphaMulAlpha_SSE2(SkGetPackedR32_SSE2(src), | |
185 SkGetPackedR32_SSE2(dst)); | |
186 __m128i g = SkAlphaMulAlpha_SSE2(SkGetPackedG32_SSE2(src), | |
187 SkGetPackedG32_SSE2(dst)); | |
188 __m128i b = SkAlphaMulAlpha_SSE2(SkGetPackedB32_SSE2(src), | |
189 SkGetPackedB32_SSE2(dst)); | |
190 return SkPackARGB32_SSE2(a, r, g, b); | |
191 } | |
192 | |
193 static inline __m128i SkMin32_SSE2(const __m128i& a, const __m128i& b) { | 48 static inline __m128i SkMin32_SSE2(const __m128i& a, const __m128i& b) { |
194 __m128i cmp = _mm_cmplt_epi32(a, b); | 49 __m128i cmp = _mm_cmplt_epi32(a, b); |
195 return _mm_or_si128(_mm_and_si128(cmp, a), _mm_andnot_si128(cmp, b)); | 50 return _mm_or_si128(_mm_and_si128(cmp, a), _mm_andnot_si128(cmp, b)); |
196 } | 51 } |
197 | 52 |
198 static inline __m128i srcover_byte_SSE2(const __m128i& a, const __m128i& b) { | 53 static inline __m128i srcover_byte_SSE2(const __m128i& a, const __m128i& b) { |
199 // a + b - SkAlphaMulAlpha(a, b); | 54 // a + b - SkAlphaMulAlpha(a, b); |
200 return _mm_sub_epi32(_mm_add_epi32(a, b), SkAlphaMulAlpha_SSE2(a, b)); | 55 return _mm_sub_epi32(_mm_add_epi32(a, b), SkAlphaMulAlpha_SSE2(a, b)); |
201 | 56 |
202 } | 57 } |
203 | 58 |
204 static inline __m128i blendfunc_multiply_byte_SSE2(const __m128i& sc, const __m1
28i& dc, | |
205 const __m128i& sa, const __m1
28i& da) { | |
206 // sc * (255 - da) | |
207 __m128i ret1 = _mm_sub_epi32(_mm_set1_epi32(255), da); | |
208 ret1 = _mm_mullo_epi16(sc, ret1); | |
209 | |
210 // dc * (255 - sa) | |
211 __m128i ret2 = _mm_sub_epi32(_mm_set1_epi32(255), sa); | |
212 ret2 = _mm_mullo_epi16(dc, ret2); | |
213 | |
214 // sc * dc | |
215 __m128i ret3 = _mm_mullo_epi16(sc, dc); | |
216 | |
217 __m128i ret = _mm_add_epi32(ret1, ret2); | |
218 ret = _mm_add_epi32(ret, ret3); | |
219 | |
220 return clamp_div255round_SSE2(ret); | |
221 } | |
222 | |
223 static __m128i multiply_modeproc_SSE2(const __m128i& src, const __m128i& dst) { | |
224 __m128i sa = SkGetPackedA32_SSE2(src); | |
225 __m128i da = SkGetPackedA32_SSE2(dst); | |
226 __m128i a = srcover_byte_SSE2(sa, da); | |
227 | |
228 __m128i sr = SkGetPackedR32_SSE2(src); | |
229 __m128i dr = SkGetPackedR32_SSE2(dst); | |
230 __m128i r = blendfunc_multiply_byte_SSE2(sr, dr, sa, da); | |
231 | |
232 __m128i sg = SkGetPackedG32_SSE2(src); | |
233 __m128i dg = SkGetPackedG32_SSE2(dst); | |
234 __m128i g = blendfunc_multiply_byte_SSE2(sg, dg, sa, da); | |
235 | |
236 | |
237 __m128i sb = SkGetPackedB32_SSE2(src); | |
238 __m128i db = SkGetPackedB32_SSE2(dst); | |
239 __m128i b = blendfunc_multiply_byte_SSE2(sb, db, sa, da); | |
240 | |
241 return SkPackARGB32_SSE2(a, r, g, b); | |
242 } | |
243 | |
244 static __m128i screen_modeproc_SSE2(const __m128i& src, const __m128i& dst) { | |
245 __m128i a = srcover_byte_SSE2(SkGetPackedA32_SSE2(src), | |
246 SkGetPackedA32_SSE2(dst)); | |
247 __m128i r = srcover_byte_SSE2(SkGetPackedR32_SSE2(src), | |
248 SkGetPackedR32_SSE2(dst)); | |
249 __m128i g = srcover_byte_SSE2(SkGetPackedG32_SSE2(src), | |
250 SkGetPackedG32_SSE2(dst)); | |
251 __m128i b = srcover_byte_SSE2(SkGetPackedB32_SSE2(src), | |
252 SkGetPackedB32_SSE2(dst)); | |
253 return SkPackARGB32_SSE2(a, r, g, b); | |
254 } | |
255 | |
256 // Portable version overlay_byte() is in SkXfermode.cpp. | 59 // Portable version overlay_byte() is in SkXfermode.cpp. |
257 static inline __m128i overlay_byte_SSE2(const __m128i& sc, const __m128i& dc, | 60 static inline __m128i overlay_byte_SSE2(const __m128i& sc, const __m128i& dc, |
258 const __m128i& sa, const __m128i& da) { | 61 const __m128i& sa, const __m128i& da) { |
259 __m128i ida = _mm_sub_epi32(_mm_set1_epi32(255), da); | 62 __m128i ida = _mm_sub_epi32(_mm_set1_epi32(255), da); |
260 __m128i tmp1 = _mm_mullo_epi16(sc, ida); | 63 __m128i tmp1 = _mm_mullo_epi16(sc, ida); |
261 __m128i isa = _mm_sub_epi32(_mm_set1_epi32(255), sa); | 64 __m128i isa = _mm_sub_epi32(_mm_set1_epi32(255), sa); |
262 __m128i tmp2 = _mm_mullo_epi16(dc, isa); | 65 __m128i tmp2 = _mm_mullo_epi16(dc, isa); |
263 __m128i tmp = _mm_add_epi32(tmp1, tmp2); | 66 __m128i tmp = _mm_add_epi32(tmp1, tmp2); |
264 | 67 |
265 __m128i cmp = _mm_cmpgt_epi32(_mm_slli_epi32(dc, 1), da); | 68 __m128i cmp = _mm_cmpgt_epi32(_mm_slli_epi32(dc, 1), da); |
(...skipping 307 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
573 __m128i a = srcover_byte_SSE2(sa, da); | 376 __m128i a = srcover_byte_SSE2(sa, da); |
574 __m128i r = softlight_byte_SSE2(SkGetPackedR32_SSE2(src), | 377 __m128i r = softlight_byte_SSE2(SkGetPackedR32_SSE2(src), |
575 SkGetPackedR32_SSE2(dst), sa, da); | 378 SkGetPackedR32_SSE2(dst), sa, da); |
576 __m128i g = softlight_byte_SSE2(SkGetPackedG32_SSE2(src), | 379 __m128i g = softlight_byte_SSE2(SkGetPackedG32_SSE2(src), |
577 SkGetPackedG32_SSE2(dst), sa, da); | 380 SkGetPackedG32_SSE2(dst), sa, da); |
578 __m128i b = softlight_byte_SSE2(SkGetPackedB32_SSE2(src), | 381 __m128i b = softlight_byte_SSE2(SkGetPackedB32_SSE2(src), |
579 SkGetPackedB32_SSE2(dst), sa, da); | 382 SkGetPackedB32_SSE2(dst), sa, da); |
580 return SkPackARGB32_SSE2(a, r, g, b); | 383 return SkPackARGB32_SSE2(a, r, g, b); |
581 } | 384 } |
582 | 385 |
583 static inline __m128i difference_byte_SSE2(const __m128i& sc, const __m128i& dc, | |
584 const __m128i& sa, const __m128i& da)
{ | |
585 __m128i tmp1 = _mm_mullo_epi16(sc, da); | |
586 __m128i tmp2 = _mm_mullo_epi16(dc, sa); | |
587 __m128i tmp = SkMin32_SSE2(tmp1, tmp2); | |
588 | |
589 __m128i ret1 = _mm_add_epi32(sc, dc); | |
590 __m128i ret2 = _mm_slli_epi32(SkDiv255Round_SSE2(tmp), 1); | |
591 __m128i ret = _mm_sub_epi32(ret1, ret2); | |
592 | |
593 ret = clamp_signed_byte_SSE2(ret); | |
594 return ret; | |
595 } | |
596 | |
597 static __m128i difference_modeproc_SSE2(const __m128i& src, | |
598 const __m128i& dst) { | |
599 __m128i sa = SkGetPackedA32_SSE2(src); | |
600 __m128i da = SkGetPackedA32_SSE2(dst); | |
601 | |
602 __m128i a = srcover_byte_SSE2(sa, da); | |
603 __m128i r = difference_byte_SSE2(SkGetPackedR32_SSE2(src), | |
604 SkGetPackedR32_SSE2(dst), sa, da); | |
605 __m128i g = difference_byte_SSE2(SkGetPackedG32_SSE2(src), | |
606 SkGetPackedG32_SSE2(dst), sa, da); | |
607 __m128i b = difference_byte_SSE2(SkGetPackedB32_SSE2(src), | |
608 SkGetPackedB32_SSE2(dst), sa, da); | |
609 return SkPackARGB32_SSE2(a, r, g, b); | |
610 } | |
611 | |
612 static inline __m128i exclusion_byte_SSE2(const __m128i& sc, const __m128i& dc, | |
613 const __m128i&, __m128i&) { | |
614 __m128i tmp1 = _mm_mullo_epi16(_mm_set1_epi32(255), sc); // 255 * sc | |
615 __m128i tmp2 = _mm_mullo_epi16(_mm_set1_epi32(255), dc); // 255 * dc | |
616 tmp1 = _mm_add_epi32(tmp1, tmp2); | |
617 tmp2 = _mm_mullo_epi16(sc, dc); // sc * dc | |
618 tmp2 = _mm_slli_epi32(tmp2, 1); // 2 * sc * dc | |
619 | |
620 __m128i r = _mm_sub_epi32(tmp1, tmp2); | |
621 return clamp_div255round_SSE2(r); | |
622 } | |
623 | |
624 static __m128i exclusion_modeproc_SSE2(const __m128i& src, const __m128i& dst) { | |
625 __m128i sa = SkGetPackedA32_SSE2(src); | |
626 __m128i da = SkGetPackedA32_SSE2(dst); | |
627 | |
628 __m128i a = srcover_byte_SSE2(sa, da); | |
629 __m128i r = exclusion_byte_SSE2(SkGetPackedR32_SSE2(src), | |
630 SkGetPackedR32_SSE2(dst), sa, da); | |
631 __m128i g = exclusion_byte_SSE2(SkGetPackedG32_SSE2(src), | |
632 SkGetPackedG32_SSE2(dst), sa, da); | |
633 __m128i b = exclusion_byte_SSE2(SkGetPackedB32_SSE2(src), | |
634 SkGetPackedB32_SSE2(dst), sa, da); | |
635 return SkPackARGB32_SSE2(a, r, g, b); | |
636 } | |
637 | 386 |
638 //////////////////////////////////////////////////////////////////////////////// | 387 //////////////////////////////////////////////////////////////////////////////// |
639 | 388 |
640 typedef __m128i (*SkXfermodeProcSIMD)(const __m128i& src, const __m128i& dst); | 389 typedef __m128i (*SkXfermodeProcSIMD)(const __m128i& src, const __m128i& dst); |
641 | 390 |
642 extern SkXfermodeProcSIMD gSSE2XfermodeProcs[]; | |
643 | |
644 void SkSSE2ProcCoeffXfermode::xfer32(SkPMColor dst[], const SkPMColor src[], | 391 void SkSSE2ProcCoeffXfermode::xfer32(SkPMColor dst[], const SkPMColor src[], |
645 int count, const SkAlpha aa[]) const { | 392 int count, const SkAlpha aa[]) const { |
646 SkASSERT(dst && src && count >= 0); | 393 SkASSERT(dst && src && count >= 0); |
647 | 394 |
648 SkXfermodeProc proc = this->getProc(); | 395 SkXfermodeProc proc = this->getProc(); |
649 SkXfermodeProcSIMD procSIMD = reinterpret_cast<SkXfermodeProcSIMD>(fProcSIMD
); | 396 SkXfermodeProcSIMD procSIMD = reinterpret_cast<SkXfermodeProcSIMD>(fProcSIMD
); |
650 SkASSERT(procSIMD != NULL); | 397 SkASSERT(procSIMD != NULL); |
651 | 398 |
652 if (NULL == aa) { | 399 if (NULL == aa) { |
653 if (count >= 4) { | 400 if (count >= 4) { |
(...skipping 104 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
758 } | 505 } |
759 } | 506 } |
760 } | 507 } |
761 | 508 |
762 #ifndef SK_IGNORE_TO_STRING | 509 #ifndef SK_IGNORE_TO_STRING |
763 void SkSSE2ProcCoeffXfermode::toString(SkString* str) const { | 510 void SkSSE2ProcCoeffXfermode::toString(SkString* str) const { |
764 this->INHERITED::toString(str); | 511 this->INHERITED::toString(str); |
765 } | 512 } |
766 #endif | 513 #endif |
767 | 514 |
768 //////////////////////////////////////////////////////////////////////////////// | |
769 | |
770 // 4 pixels modeprocs with SSE2 | |
771 SkXfermodeProcSIMD gSSE2XfermodeProcs[] = { | |
772 NULL, // kClear_Mode | |
773 NULL, // kSrc_Mode | |
774 NULL, // kDst_Mode | |
775 srcover_modeproc_SSE2, | |
776 dstover_modeproc_SSE2, | |
777 srcin_modeproc_SSE2, | |
778 dstin_modeproc_SSE2, | |
779 srcout_modeproc_SSE2, | |
780 dstout_modeproc_SSE2, | |
781 srcatop_modeproc_SSE2, | |
782 dstatop_modeproc_SSE2, | |
783 xor_modeproc_SSE2, | |
784 plus_modeproc_SSE2, | |
785 modulate_modeproc_SSE2, | |
786 screen_modeproc_SSE2, | |
787 | |
788 overlay_modeproc_SSE2, | |
789 darken_modeproc_SSE2, | |
790 lighten_modeproc_SSE2, | |
791 colordodge_modeproc_SSE2, | |
792 colorburn_modeproc_SSE2, | |
793 hardlight_modeproc_SSE2, | |
794 softlight_modeproc_SSE2, | |
795 difference_modeproc_SSE2, | |
796 exclusion_modeproc_SSE2, | |
797 multiply_modeproc_SSE2, | |
798 | |
799 NULL, // kHue_Mode | |
800 NULL, // kSaturation_Mode | |
801 NULL, // kColor_Mode | |
802 NULL, // kLuminosity_Mode | |
803 }; | |
804 | |
805 SkProcCoeffXfermode* SkPlatformXfermodeFactory_impl_SSE2(const ProcCoeff& rec, | 515 SkProcCoeffXfermode* SkPlatformXfermodeFactory_impl_SSE2(const ProcCoeff& rec, |
806 SkXfermode::Mode mode)
{ | 516 SkXfermode::Mode mode)
{ |
807 void* procSIMD = reinterpret_cast<void*>(gSSE2XfermodeProcs[mode]); | 517 SkXfermodeProcSIMD proc = nullptr; |
808 | 518 // TODO(mtklein): implement these Sk4px. |
809 if (procSIMD != NULL) { | 519 switch (mode) { |
810 return SkNEW_ARGS(SkSSE2ProcCoeffXfermode, (rec, mode, procSIMD)); | 520 case SkProcCoeffXfermode::kOverlay_Mode: proc = overlay_modeproc_S
SE2; break; |
| 521 case SkProcCoeffXfermode::kDarken_Mode: proc = darken_modeproc_S
SE2; break; |
| 522 case SkProcCoeffXfermode::kLighten_Mode: proc = lighten_modeproc_S
SE2; break; |
| 523 case SkProcCoeffXfermode::kColorDodge_Mode: proc = colordodge_modeproc_S
SE2; break; |
| 524 case SkProcCoeffXfermode::kColorBurn_Mode: proc = colorburn_modeproc_S
SE2; break; |
| 525 case SkProcCoeffXfermode::kHardLight_Mode: proc = hardlight_modeproc_S
SE2; break; |
| 526 case SkProcCoeffXfermode::kSoftLight_Mode: proc = softlight_modeproc_S
SE2; break; |
| 527 default: break; |
811 } | 528 } |
812 return NULL; | 529 return proc ? SkNEW_ARGS(SkSSE2ProcCoeffXfermode, (rec, mode, (void*)proc))
: nullptr; |
813 } | 530 } |
OLD | NEW |