| OLD | NEW |
| 1 /* | 1 /* |
| 2 * Copyright 2014 Google Inc. | 2 * Copyright 2014 Google Inc. |
| 3 * | 3 * |
| 4 * Use of this source code is governed by a BSD-style license that can be | 4 * Use of this source code is governed by a BSD-style license that can be |
| 5 * found in the LICENSE file. | 5 * found in the LICENSE file. |
| 6 */ | 6 */ |
| 7 | 7 |
| 8 #include "SkColorPriv.h" | 8 #include "SkColorPriv.h" |
| 9 #include "SkColor_opts_SSE2.h" | 9 #include "SkColor_opts_SSE2.h" |
| 10 #include "SkMathPriv.h" | 10 #include "SkMathPriv.h" |
| 11 #include "SkMath_opts_SSE2.h" | 11 #include "SkMath_opts_SSE2.h" |
| 12 #include "SkXfermode.h" | 12 #include "SkXfermode.h" |
| 13 #include "SkXfermode_opts_SSE2.h" | 13 #include "SkXfermode_opts_SSE2.h" |
| 14 #include "SkXfermode_proccoeff.h" | 14 #include "SkXfermode_proccoeff.h" |
| 15 | 15 |
| 16 //////////////////////////////////////////////////////////////////////////////// | 16 //////////////////////////////////////////////////////////////////////////////// |
| 17 // 4 pixels SSE2 version functions | 17 // 4 pixels SSE2 version functions |
| 18 //////////////////////////////////////////////////////////////////////////////// | 18 //////////////////////////////////////////////////////////////////////////////// |
| 19 | 19 |
| 20 static inline __m128i SkDiv255Round_SSE2(const __m128i& a) { | 20 static inline __m128i SkDiv255Round_SSE2(const __m128i& a) { |
| 21 __m128i prod = _mm_add_epi32(a, _mm_set1_epi32(128)); // prod += 128; | 21 __m128i prod = _mm_add_epi32(a, _mm_set1_epi32(128)); // prod += 128; |
| 22 prod = _mm_add_epi32(prod, _mm_srli_epi32(prod, 8)); // prod + (prod >> 8) | 22 prod = _mm_add_epi32(prod, _mm_srli_epi32(prod, 8)); // prod + (prod >> 8) |
| 23 prod = _mm_srli_epi32(prod, 8); // >> 8 | 23 prod = _mm_srli_epi32(prod, 8); // >> 8 |
| 24 | 24 |
| 25 return prod; | 25 return prod; |
| 26 } | 26 } |
| 27 | 27 |
| 28 static inline __m128i saturated_add_SSE2(const __m128i& a, const __m128i& b) { | |
| 29 __m128i sum = _mm_add_epi32(a, b); | |
| 30 __m128i cmp = _mm_cmpgt_epi32(sum, _mm_set1_epi32(255)); | |
| 31 | |
| 32 sum = _mm_or_si128(_mm_and_si128(cmp, _mm_set1_epi32(255)), | |
| 33 _mm_andnot_si128(cmp, sum)); | |
| 34 return sum; | |
| 35 } | |
| 36 | |
| 37 static inline __m128i clamp_signed_byte_SSE2(const __m128i& n) { | |
| 38 __m128i cmp1 = _mm_cmplt_epi32(n, _mm_setzero_si128()); | |
| 39 __m128i cmp2 = _mm_cmpgt_epi32(n, _mm_set1_epi32(255)); | |
| 40 __m128i ret = _mm_and_si128(cmp2, _mm_set1_epi32(255)); | |
| 41 | |
| 42 __m128i cmp = _mm_or_si128(cmp1, cmp2); | |
| 43 ret = _mm_or_si128(_mm_and_si128(cmp, ret), _mm_andnot_si128(cmp, n)); | |
| 44 | |
| 45 return ret; | |
| 46 } | |
| 47 | |
| 48 static inline __m128i clamp_div255round_SSE2(const __m128i& prod) { | 28 static inline __m128i clamp_div255round_SSE2(const __m128i& prod) { |
| 49 // test if > 0 | 29 // test if > 0 |
| 50 __m128i cmp1 = _mm_cmpgt_epi32(prod, _mm_setzero_si128()); | 30 __m128i cmp1 = _mm_cmpgt_epi32(prod, _mm_setzero_si128()); |
| 51 // test if < 255*255 | 31 // test if < 255*255 |
| 52 __m128i cmp2 = _mm_cmplt_epi32(prod, _mm_set1_epi32(255*255)); | 32 __m128i cmp2 = _mm_cmplt_epi32(prod, _mm_set1_epi32(255*255)); |
| 53 | 33 |
| 54 __m128i ret = _mm_setzero_si128(); | 34 __m128i ret = _mm_setzero_si128(); |
| 55 | 35 |
| 56 // if value >= 255*255, value = 255 | 36 // if value >= 255*255, value = 255 |
| 57 ret = _mm_andnot_si128(cmp2, _mm_set1_epi32(255)); | 37 ret = _mm_andnot_si128(cmp2, _mm_set1_epi32(255)); |
| 58 | 38 |
| 59 __m128i div = SkDiv255Round_SSE2(prod); | 39 __m128i div = SkDiv255Round_SSE2(prod); |
| 60 | 40 |
| 61 // test if > 0 && < 255*255 | 41 // test if > 0 && < 255*255 |
| 62 __m128i cmp = _mm_and_si128(cmp1, cmp2); | 42 __m128i cmp = _mm_and_si128(cmp1, cmp2); |
| 63 | 43 |
| 64 ret = _mm_or_si128(_mm_and_si128(cmp, div), _mm_andnot_si128(cmp, ret)); | 44 ret = _mm_or_si128(_mm_and_si128(cmp, div), _mm_andnot_si128(cmp, ret)); |
| 65 | 45 |
| 66 return ret; | 46 return ret; |
| 67 } | 47 } |
| 68 | |
| 69 static __m128i srcover_modeproc_SSE2(const __m128i& src, const __m128i& dst) { | |
| 70 __m128i isa = _mm_sub_epi32(_mm_set1_epi32(256), SkGetPackedA32_SSE2(src)); | |
| 71 return _mm_add_epi32(src, SkAlphaMulQ_SSE2(dst, isa)); | |
| 72 } | |
| 73 | |
| 74 static __m128i dstover_modeproc_SSE2(const __m128i& src, const __m128i& dst) { | |
| 75 __m128i ida = _mm_sub_epi32(_mm_set1_epi32(256), SkGetPackedA32_SSE2(dst)); | |
| 76 return _mm_add_epi32(dst, SkAlphaMulQ_SSE2(src, ida)); | |
| 77 } | |
| 78 | |
| 79 static __m128i srcin_modeproc_SSE2(const __m128i& src, const __m128i& dst) { | |
| 80 __m128i da = SkGetPackedA32_SSE2(dst); | |
| 81 return SkAlphaMulQ_SSE2(src, SkAlpha255To256_SSE2(da)); | |
| 82 } | |
| 83 | |
| 84 static __m128i dstin_modeproc_SSE2(const __m128i& src, const __m128i& dst) { | |
| 85 __m128i sa = SkGetPackedA32_SSE2(src); | |
| 86 return SkAlphaMulQ_SSE2(dst, SkAlpha255To256_SSE2(sa)); | |
| 87 } | |
| 88 | |
| 89 static __m128i srcout_modeproc_SSE2(const __m128i& src, const __m128i& dst) { | |
| 90 __m128i ida = _mm_sub_epi32(_mm_set1_epi32(256), SkGetPackedA32_SSE2(dst)); | |
| 91 return SkAlphaMulQ_SSE2(src, ida); | |
| 92 } | |
| 93 | |
| 94 static __m128i dstout_modeproc_SSE2(const __m128i& src, const __m128i& dst) { | |
| 95 __m128i isa = _mm_sub_epi32(_mm_set1_epi32(256), SkGetPackedA32_SSE2(src)); | |
| 96 return SkAlphaMulQ_SSE2(dst, isa); | |
| 97 } | |
| 98 | |
| 99 static __m128i srcatop_modeproc_SSE2(const __m128i& src, const __m128i& dst) { | |
| 100 __m128i sa = SkGetPackedA32_SSE2(src); | |
| 101 __m128i da = SkGetPackedA32_SSE2(dst); | |
| 102 __m128i isa = _mm_sub_epi32(_mm_set1_epi32(255), sa); | |
| 103 | |
| 104 __m128i a = da; | |
| 105 | |
| 106 __m128i r1 = SkAlphaMulAlpha_SSE2(da, SkGetPackedR32_SSE2(src)); | |
| 107 __m128i r2 = SkAlphaMulAlpha_SSE2(isa, SkGetPackedR32_SSE2(dst)); | |
| 108 __m128i r = _mm_add_epi32(r1, r2); | |
| 109 | |
| 110 __m128i g1 = SkAlphaMulAlpha_SSE2(da, SkGetPackedG32_SSE2(src)); | |
| 111 __m128i g2 = SkAlphaMulAlpha_SSE2(isa, SkGetPackedG32_SSE2(dst)); | |
| 112 __m128i g = _mm_add_epi32(g1, g2); | |
| 113 | |
| 114 __m128i b1 = SkAlphaMulAlpha_SSE2(da, SkGetPackedB32_SSE2(src)); | |
| 115 __m128i b2 = SkAlphaMulAlpha_SSE2(isa, SkGetPackedB32_SSE2(dst)); | |
| 116 __m128i b = _mm_add_epi32(b1, b2); | |
| 117 | |
| 118 return SkPackARGB32_SSE2(a, r, g, b); | |
| 119 } | |
| 120 | |
| 121 static __m128i dstatop_modeproc_SSE2(const __m128i& src, const __m128i& dst) { | |
| 122 __m128i sa = SkGetPackedA32_SSE2(src); | |
| 123 __m128i da = SkGetPackedA32_SSE2(dst); | |
| 124 __m128i ida = _mm_sub_epi32(_mm_set1_epi32(255), da); | |
| 125 | |
| 126 __m128i a = sa; | |
| 127 | |
| 128 __m128i r1 = SkAlphaMulAlpha_SSE2(ida, SkGetPackedR32_SSE2(src)); | |
| 129 __m128i r2 = SkAlphaMulAlpha_SSE2(sa, SkGetPackedR32_SSE2(dst)); | |
| 130 __m128i r = _mm_add_epi32(r1, r2); | |
| 131 | |
| 132 __m128i g1 = SkAlphaMulAlpha_SSE2(ida, SkGetPackedG32_SSE2(src)); | |
| 133 __m128i g2 = SkAlphaMulAlpha_SSE2(sa, SkGetPackedG32_SSE2(dst)); | |
| 134 __m128i g = _mm_add_epi32(g1, g2); | |
| 135 | |
| 136 __m128i b1 = SkAlphaMulAlpha_SSE2(ida, SkGetPackedB32_SSE2(src)); | |
| 137 __m128i b2 = SkAlphaMulAlpha_SSE2(sa, SkGetPackedB32_SSE2(dst)); | |
| 138 __m128i b = _mm_add_epi32(b1, b2); | |
| 139 | |
| 140 return SkPackARGB32_SSE2(a, r, g, b); | |
| 141 } | |
| 142 | |
| 143 static __m128i xor_modeproc_SSE2(const __m128i& src, const __m128i& dst) { | |
| 144 __m128i sa = SkGetPackedA32_SSE2(src); | |
| 145 __m128i da = SkGetPackedA32_SSE2(dst); | |
| 146 __m128i isa = _mm_sub_epi32(_mm_set1_epi32(255), sa); | |
| 147 __m128i ida = _mm_sub_epi32(_mm_set1_epi32(255), da); | |
| 148 | |
| 149 __m128i a1 = _mm_add_epi32(sa, da); | |
| 150 __m128i a2 = SkAlphaMulAlpha_SSE2(sa, da); | |
| 151 a2 = _mm_slli_epi32(a2, 1); | |
| 152 __m128i a = _mm_sub_epi32(a1, a2); | |
| 153 | |
| 154 __m128i r1 = SkAlphaMulAlpha_SSE2(ida, SkGetPackedR32_SSE2(src)); | |
| 155 __m128i r2 = SkAlphaMulAlpha_SSE2(isa, SkGetPackedR32_SSE2(dst)); | |
| 156 __m128i r = _mm_add_epi32(r1, r2); | |
| 157 | |
| 158 __m128i g1 = SkAlphaMulAlpha_SSE2(ida, SkGetPackedG32_SSE2(src)); | |
| 159 __m128i g2 = SkAlphaMulAlpha_SSE2(isa, SkGetPackedG32_SSE2(dst)); | |
| 160 __m128i g = _mm_add_epi32(g1, g2); | |
| 161 | |
| 162 __m128i b1 = SkAlphaMulAlpha_SSE2(ida, SkGetPackedB32_SSE2(src)); | |
| 163 __m128i b2 = SkAlphaMulAlpha_SSE2(isa, SkGetPackedB32_SSE2(dst)); | |
| 164 __m128i b = _mm_add_epi32(b1, b2); | |
| 165 | |
| 166 return SkPackARGB32_SSE2(a, r, g, b); | |
| 167 } | |
| 168 | |
| 169 static __m128i plus_modeproc_SSE2(const __m128i& src, const __m128i& dst) { | |
| 170 __m128i b = saturated_add_SSE2(SkGetPackedB32_SSE2(src), | |
| 171 SkGetPackedB32_SSE2(dst)); | |
| 172 __m128i g = saturated_add_SSE2(SkGetPackedG32_SSE2(src), | |
| 173 SkGetPackedG32_SSE2(dst)); | |
| 174 __m128i r = saturated_add_SSE2(SkGetPackedR32_SSE2(src), | |
| 175 SkGetPackedR32_SSE2(dst)); | |
| 176 __m128i a = saturated_add_SSE2(SkGetPackedA32_SSE2(src), | |
| 177 SkGetPackedA32_SSE2(dst)); | |
| 178 return SkPackARGB32_SSE2(a, r, g, b); | |
| 179 } | |
| 180 | |
| 181 static __m128i modulate_modeproc_SSE2(const __m128i& src, const __m128i& dst) { | |
| 182 __m128i a = SkAlphaMulAlpha_SSE2(SkGetPackedA32_SSE2(src), | |
| 183 SkGetPackedA32_SSE2(dst)); | |
| 184 __m128i r = SkAlphaMulAlpha_SSE2(SkGetPackedR32_SSE2(src), | |
| 185 SkGetPackedR32_SSE2(dst)); | |
| 186 __m128i g = SkAlphaMulAlpha_SSE2(SkGetPackedG32_SSE2(src), | |
| 187 SkGetPackedG32_SSE2(dst)); | |
| 188 __m128i b = SkAlphaMulAlpha_SSE2(SkGetPackedB32_SSE2(src), | |
| 189 SkGetPackedB32_SSE2(dst)); | |
| 190 return SkPackARGB32_SSE2(a, r, g, b); | |
| 191 } | |
| 192 | |
| 193 static inline __m128i SkMin32_SSE2(const __m128i& a, const __m128i& b) { | 48 static inline __m128i SkMin32_SSE2(const __m128i& a, const __m128i& b) { |
| 194 __m128i cmp = _mm_cmplt_epi32(a, b); | 49 __m128i cmp = _mm_cmplt_epi32(a, b); |
| 195 return _mm_or_si128(_mm_and_si128(cmp, a), _mm_andnot_si128(cmp, b)); | 50 return _mm_or_si128(_mm_and_si128(cmp, a), _mm_andnot_si128(cmp, b)); |
| 196 } | 51 } |
| 197 | 52 |
| 198 static inline __m128i srcover_byte_SSE2(const __m128i& a, const __m128i& b) { | 53 static inline __m128i srcover_byte_SSE2(const __m128i& a, const __m128i& b) { |
| 199 // a + b - SkAlphaMulAlpha(a, b); | 54 // a + b - SkAlphaMulAlpha(a, b); |
| 200 return _mm_sub_epi32(_mm_add_epi32(a, b), SkAlphaMulAlpha_SSE2(a, b)); | 55 return _mm_sub_epi32(_mm_add_epi32(a, b), SkAlphaMulAlpha_SSE2(a, b)); |
| 201 | 56 |
| 202 } | 57 } |
| 203 | 58 |
| 204 static inline __m128i blendfunc_multiply_byte_SSE2(const __m128i& sc, const __m1
28i& dc, | |
| 205 const __m128i& sa, const __m1
28i& da) { | |
| 206 // sc * (255 - da) | |
| 207 __m128i ret1 = _mm_sub_epi32(_mm_set1_epi32(255), da); | |
| 208 ret1 = _mm_mullo_epi16(sc, ret1); | |
| 209 | |
| 210 // dc * (255 - sa) | |
| 211 __m128i ret2 = _mm_sub_epi32(_mm_set1_epi32(255), sa); | |
| 212 ret2 = _mm_mullo_epi16(dc, ret2); | |
| 213 | |
| 214 // sc * dc | |
| 215 __m128i ret3 = _mm_mullo_epi16(sc, dc); | |
| 216 | |
| 217 __m128i ret = _mm_add_epi32(ret1, ret2); | |
| 218 ret = _mm_add_epi32(ret, ret3); | |
| 219 | |
| 220 return clamp_div255round_SSE2(ret); | |
| 221 } | |
| 222 | |
| 223 static __m128i multiply_modeproc_SSE2(const __m128i& src, const __m128i& dst) { | |
| 224 __m128i sa = SkGetPackedA32_SSE2(src); | |
| 225 __m128i da = SkGetPackedA32_SSE2(dst); | |
| 226 __m128i a = srcover_byte_SSE2(sa, da); | |
| 227 | |
| 228 __m128i sr = SkGetPackedR32_SSE2(src); | |
| 229 __m128i dr = SkGetPackedR32_SSE2(dst); | |
| 230 __m128i r = blendfunc_multiply_byte_SSE2(sr, dr, sa, da); | |
| 231 | |
| 232 __m128i sg = SkGetPackedG32_SSE2(src); | |
| 233 __m128i dg = SkGetPackedG32_SSE2(dst); | |
| 234 __m128i g = blendfunc_multiply_byte_SSE2(sg, dg, sa, da); | |
| 235 | |
| 236 | |
| 237 __m128i sb = SkGetPackedB32_SSE2(src); | |
| 238 __m128i db = SkGetPackedB32_SSE2(dst); | |
| 239 __m128i b = blendfunc_multiply_byte_SSE2(sb, db, sa, da); | |
| 240 | |
| 241 return SkPackARGB32_SSE2(a, r, g, b); | |
| 242 } | |
| 243 | |
| 244 static __m128i screen_modeproc_SSE2(const __m128i& src, const __m128i& dst) { | |
| 245 __m128i a = srcover_byte_SSE2(SkGetPackedA32_SSE2(src), | |
| 246 SkGetPackedA32_SSE2(dst)); | |
| 247 __m128i r = srcover_byte_SSE2(SkGetPackedR32_SSE2(src), | |
| 248 SkGetPackedR32_SSE2(dst)); | |
| 249 __m128i g = srcover_byte_SSE2(SkGetPackedG32_SSE2(src), | |
| 250 SkGetPackedG32_SSE2(dst)); | |
| 251 __m128i b = srcover_byte_SSE2(SkGetPackedB32_SSE2(src), | |
| 252 SkGetPackedB32_SSE2(dst)); | |
| 253 return SkPackARGB32_SSE2(a, r, g, b); | |
| 254 } | |
| 255 | |
| 256 // Portable version overlay_byte() is in SkXfermode.cpp. | 59 // Portable version overlay_byte() is in SkXfermode.cpp. |
| 257 static inline __m128i overlay_byte_SSE2(const __m128i& sc, const __m128i& dc, | 60 static inline __m128i overlay_byte_SSE2(const __m128i& sc, const __m128i& dc, |
| 258 const __m128i& sa, const __m128i& da) { | 61 const __m128i& sa, const __m128i& da) { |
| 259 __m128i ida = _mm_sub_epi32(_mm_set1_epi32(255), da); | 62 __m128i ida = _mm_sub_epi32(_mm_set1_epi32(255), da); |
| 260 __m128i tmp1 = _mm_mullo_epi16(sc, ida); | 63 __m128i tmp1 = _mm_mullo_epi16(sc, ida); |
| 261 __m128i isa = _mm_sub_epi32(_mm_set1_epi32(255), sa); | 64 __m128i isa = _mm_sub_epi32(_mm_set1_epi32(255), sa); |
| 262 __m128i tmp2 = _mm_mullo_epi16(dc, isa); | 65 __m128i tmp2 = _mm_mullo_epi16(dc, isa); |
| 263 __m128i tmp = _mm_add_epi32(tmp1, tmp2); | 66 __m128i tmp = _mm_add_epi32(tmp1, tmp2); |
| 264 | 67 |
| 265 __m128i cmp = _mm_cmpgt_epi32(_mm_slli_epi32(dc, 1), da); | 68 __m128i cmp = _mm_cmpgt_epi32(_mm_slli_epi32(dc, 1), da); |
| (...skipping 307 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 573 __m128i a = srcover_byte_SSE2(sa, da); | 376 __m128i a = srcover_byte_SSE2(sa, da); |
| 574 __m128i r = softlight_byte_SSE2(SkGetPackedR32_SSE2(src), | 377 __m128i r = softlight_byte_SSE2(SkGetPackedR32_SSE2(src), |
| 575 SkGetPackedR32_SSE2(dst), sa, da); | 378 SkGetPackedR32_SSE2(dst), sa, da); |
| 576 __m128i g = softlight_byte_SSE2(SkGetPackedG32_SSE2(src), | 379 __m128i g = softlight_byte_SSE2(SkGetPackedG32_SSE2(src), |
| 577 SkGetPackedG32_SSE2(dst), sa, da); | 380 SkGetPackedG32_SSE2(dst), sa, da); |
| 578 __m128i b = softlight_byte_SSE2(SkGetPackedB32_SSE2(src), | 381 __m128i b = softlight_byte_SSE2(SkGetPackedB32_SSE2(src), |
| 579 SkGetPackedB32_SSE2(dst), sa, da); | 382 SkGetPackedB32_SSE2(dst), sa, da); |
| 580 return SkPackARGB32_SSE2(a, r, g, b); | 383 return SkPackARGB32_SSE2(a, r, g, b); |
| 581 } | 384 } |
| 582 | 385 |
| 583 static inline __m128i difference_byte_SSE2(const __m128i& sc, const __m128i& dc, | |
| 584 const __m128i& sa, const __m128i& da)
{ | |
| 585 __m128i tmp1 = _mm_mullo_epi16(sc, da); | |
| 586 __m128i tmp2 = _mm_mullo_epi16(dc, sa); | |
| 587 __m128i tmp = SkMin32_SSE2(tmp1, tmp2); | |
| 588 | |
| 589 __m128i ret1 = _mm_add_epi32(sc, dc); | |
| 590 __m128i ret2 = _mm_slli_epi32(SkDiv255Round_SSE2(tmp), 1); | |
| 591 __m128i ret = _mm_sub_epi32(ret1, ret2); | |
| 592 | |
| 593 ret = clamp_signed_byte_SSE2(ret); | |
| 594 return ret; | |
| 595 } | |
| 596 | |
| 597 static __m128i difference_modeproc_SSE2(const __m128i& src, | |
| 598 const __m128i& dst) { | |
| 599 __m128i sa = SkGetPackedA32_SSE2(src); | |
| 600 __m128i da = SkGetPackedA32_SSE2(dst); | |
| 601 | |
| 602 __m128i a = srcover_byte_SSE2(sa, da); | |
| 603 __m128i r = difference_byte_SSE2(SkGetPackedR32_SSE2(src), | |
| 604 SkGetPackedR32_SSE2(dst), sa, da); | |
| 605 __m128i g = difference_byte_SSE2(SkGetPackedG32_SSE2(src), | |
| 606 SkGetPackedG32_SSE2(dst), sa, da); | |
| 607 __m128i b = difference_byte_SSE2(SkGetPackedB32_SSE2(src), | |
| 608 SkGetPackedB32_SSE2(dst), sa, da); | |
| 609 return SkPackARGB32_SSE2(a, r, g, b); | |
| 610 } | |
| 611 | |
| 612 static inline __m128i exclusion_byte_SSE2(const __m128i& sc, const __m128i& dc, | |
| 613 const __m128i&, __m128i&) { | |
| 614 __m128i tmp1 = _mm_mullo_epi16(_mm_set1_epi32(255), sc); // 255 * sc | |
| 615 __m128i tmp2 = _mm_mullo_epi16(_mm_set1_epi32(255), dc); // 255 * dc | |
| 616 tmp1 = _mm_add_epi32(tmp1, tmp2); | |
| 617 tmp2 = _mm_mullo_epi16(sc, dc); // sc * dc | |
| 618 tmp2 = _mm_slli_epi32(tmp2, 1); // 2 * sc * dc | |
| 619 | |
| 620 __m128i r = _mm_sub_epi32(tmp1, tmp2); | |
| 621 return clamp_div255round_SSE2(r); | |
| 622 } | |
| 623 | |
| 624 static __m128i exclusion_modeproc_SSE2(const __m128i& src, const __m128i& dst) { | |
| 625 __m128i sa = SkGetPackedA32_SSE2(src); | |
| 626 __m128i da = SkGetPackedA32_SSE2(dst); | |
| 627 | |
| 628 __m128i a = srcover_byte_SSE2(sa, da); | |
| 629 __m128i r = exclusion_byte_SSE2(SkGetPackedR32_SSE2(src), | |
| 630 SkGetPackedR32_SSE2(dst), sa, da); | |
| 631 __m128i g = exclusion_byte_SSE2(SkGetPackedG32_SSE2(src), | |
| 632 SkGetPackedG32_SSE2(dst), sa, da); | |
| 633 __m128i b = exclusion_byte_SSE2(SkGetPackedB32_SSE2(src), | |
| 634 SkGetPackedB32_SSE2(dst), sa, da); | |
| 635 return SkPackARGB32_SSE2(a, r, g, b); | |
| 636 } | |
| 637 | 386 |
| 638 //////////////////////////////////////////////////////////////////////////////// | 387 //////////////////////////////////////////////////////////////////////////////// |
| 639 | 388 |
| 640 typedef __m128i (*SkXfermodeProcSIMD)(const __m128i& src, const __m128i& dst); | 389 typedef __m128i (*SkXfermodeProcSIMD)(const __m128i& src, const __m128i& dst); |
| 641 | 390 |
| 642 extern SkXfermodeProcSIMD gSSE2XfermodeProcs[]; | |
| 643 | |
| 644 void SkSSE2ProcCoeffXfermode::xfer32(SkPMColor dst[], const SkPMColor src[], | 391 void SkSSE2ProcCoeffXfermode::xfer32(SkPMColor dst[], const SkPMColor src[], |
| 645 int count, const SkAlpha aa[]) const { | 392 int count, const SkAlpha aa[]) const { |
| 646 SkASSERT(dst && src && count >= 0); | 393 SkASSERT(dst && src && count >= 0); |
| 647 | 394 |
| 648 SkXfermodeProc proc = this->getProc(); | 395 SkXfermodeProc proc = this->getProc(); |
| 649 SkXfermodeProcSIMD procSIMD = reinterpret_cast<SkXfermodeProcSIMD>(fProcSIMD
); | 396 SkXfermodeProcSIMD procSIMD = reinterpret_cast<SkXfermodeProcSIMD>(fProcSIMD
); |
| 650 SkASSERT(procSIMD != NULL); | 397 SkASSERT(procSIMD != NULL); |
| 651 | 398 |
| 652 if (NULL == aa) { | 399 if (NULL == aa) { |
| 653 if (count >= 4) { | 400 if (count >= 4) { |
| (...skipping 104 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 758 } | 505 } |
| 759 } | 506 } |
| 760 } | 507 } |
| 761 | 508 |
| 762 #ifndef SK_IGNORE_TO_STRING | 509 #ifndef SK_IGNORE_TO_STRING |
| 763 void SkSSE2ProcCoeffXfermode::toString(SkString* str) const { | 510 void SkSSE2ProcCoeffXfermode::toString(SkString* str) const { |
| 764 this->INHERITED::toString(str); | 511 this->INHERITED::toString(str); |
| 765 } | 512 } |
| 766 #endif | 513 #endif |
| 767 | 514 |
| 768 //////////////////////////////////////////////////////////////////////////////// | |
| 769 | |
| 770 // 4 pixels modeprocs with SSE2 | |
| 771 SkXfermodeProcSIMD gSSE2XfermodeProcs[] = { | |
| 772 NULL, // kClear_Mode | |
| 773 NULL, // kSrc_Mode | |
| 774 NULL, // kDst_Mode | |
| 775 srcover_modeproc_SSE2, | |
| 776 dstover_modeproc_SSE2, | |
| 777 srcin_modeproc_SSE2, | |
| 778 dstin_modeproc_SSE2, | |
| 779 srcout_modeproc_SSE2, | |
| 780 dstout_modeproc_SSE2, | |
| 781 srcatop_modeproc_SSE2, | |
| 782 dstatop_modeproc_SSE2, | |
| 783 xor_modeproc_SSE2, | |
| 784 plus_modeproc_SSE2, | |
| 785 modulate_modeproc_SSE2, | |
| 786 screen_modeproc_SSE2, | |
| 787 | |
| 788 overlay_modeproc_SSE2, | |
| 789 darken_modeproc_SSE2, | |
| 790 lighten_modeproc_SSE2, | |
| 791 colordodge_modeproc_SSE2, | |
| 792 colorburn_modeproc_SSE2, | |
| 793 hardlight_modeproc_SSE2, | |
| 794 softlight_modeproc_SSE2, | |
| 795 difference_modeproc_SSE2, | |
| 796 exclusion_modeproc_SSE2, | |
| 797 multiply_modeproc_SSE2, | |
| 798 | |
| 799 NULL, // kHue_Mode | |
| 800 NULL, // kSaturation_Mode | |
| 801 NULL, // kColor_Mode | |
| 802 NULL, // kLuminosity_Mode | |
| 803 }; | |
| 804 | |
| 805 SkProcCoeffXfermode* SkPlatformXfermodeFactory_impl_SSE2(const ProcCoeff& rec, | 515 SkProcCoeffXfermode* SkPlatformXfermodeFactory_impl_SSE2(const ProcCoeff& rec, |
| 806 SkXfermode::Mode mode)
{ | 516 SkXfermode::Mode mode)
{ |
| 807 void* procSIMD = reinterpret_cast<void*>(gSSE2XfermodeProcs[mode]); | 517 SkXfermodeProcSIMD proc = nullptr; |
| 808 | 518 // TODO(mtklein): implement these Sk4px. |
| 809 if (procSIMD != NULL) { | 519 switch (mode) { |
| 810 return SkNEW_ARGS(SkSSE2ProcCoeffXfermode, (rec, mode, procSIMD)); | 520 case SkProcCoeffXfermode::kOverlay_Mode: proc = overlay_modeproc_S
SE2; break; |
| 521 case SkProcCoeffXfermode::kDarken_Mode: proc = darken_modeproc_S
SE2; break; |
| 522 case SkProcCoeffXfermode::kLighten_Mode: proc = lighten_modeproc_S
SE2; break; |
| 523 case SkProcCoeffXfermode::kColorDodge_Mode: proc = colordodge_modeproc_S
SE2; break; |
| 524 case SkProcCoeffXfermode::kColorBurn_Mode: proc = colorburn_modeproc_S
SE2; break; |
| 525 case SkProcCoeffXfermode::kHardLight_Mode: proc = hardlight_modeproc_S
SE2; break; |
| 526 case SkProcCoeffXfermode::kSoftLight_Mode: proc = softlight_modeproc_S
SE2; break; |
| 527 default: break; |
| 811 } | 528 } |
| 812 return NULL; | 529 return proc ? SkNEW_ARGS(SkSSE2ProcCoeffXfermode, (rec, mode, (void*)proc))
: nullptr; |
| 813 } | 530 } |
| OLD | NEW |