Chromium Code Reviews| OLD | NEW |
|---|---|
| 1 #include "SkColorPriv.h" | 1 #include "SkColorPriv.h" |
| 2 #include "SkColor_opts_SSE2.h" | 2 #include "SkColor_opts_SSE2.h" |
| 3 #include "SkMathPriv.h" | 3 #include "SkMathPriv.h" |
| 4 #include "SkXfermode.h" | 4 #include "SkXfermode.h" |
| 5 #include "SkXfermode_opts_SSE2.h" | 5 #include "SkXfermode_opts_SSE2.h" |
| 6 #include "SkXfermode_proccoeff.h" | 6 #include "SkXfermode_proccoeff.h" |
| 7 | 7 |
| 8 //////////////////////////////////////////////////////////////////////////////// | 8 //////////////////////////////////////////////////////////////////////////////// |
| 9 // 4 pixels SSE2 version functions | 9 // 4 pixels SSE2 version functions |
| 10 //////////////////////////////////////////////////////////////////////////////// | 10 //////////////////////////////////////////////////////////////////////////////// |
| (...skipping 66 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 77 __m128i g = blendfunc_multiply_byte_SSE2(sg, dg, sa, da); | 77 __m128i g = blendfunc_multiply_byte_SSE2(sg, dg, sa, da); |
| 78 | 78 |
| 79 | 79 |
| 80 __m128i sb = SkGetPackedB32_SSE2(src); | 80 __m128i sb = SkGetPackedB32_SSE2(src); |
| 81 __m128i db = SkGetPackedB32_SSE2(dst); | 81 __m128i db = SkGetPackedB32_SSE2(dst); |
| 82 __m128i b = blendfunc_multiply_byte_SSE2(sb, db, sa, da); | 82 __m128i b = blendfunc_multiply_byte_SSE2(sb, db, sa, da); |
| 83 | 83 |
| 84 return SkPackARGB32_SSE2(a, r, g, b); | 84 return SkPackARGB32_SSE2(a, r, g, b); |
| 85 } | 85 } |
| 86 | 86 |
| 87 static inline __m128i SkMin32_SSE2(const __m128i& a, const __m128i& b) { | |
| 88 __m128i cmp = _mm_cmplt_epi32(a, b); | |
| 89 return _mm_or_si128(_mm_and_si128(cmp, a), _mm_andnot_si128(cmp, b)); | |
| 90 } | |
| 91 | |
| 92 static inline __m128i Multiply32_SSE2(const __m128i& a, const __m128i& b) { | |
| 93 __m128i r1 = _mm_mul_epu32(a, b); | |
| 94 __m128i r2 = _mm_mul_epu32(_mm_srli_si128(a, 4), _mm_srli_si128(b, 4)); | |
| 95 __m128i r = _mm_unpacklo_epi32(_mm_shuffle_epi32(r1, _MM_SHUFFLE(0,0,2,0)), | |
| 96 _mm_shuffle_epi32(r2, _MM_SHUFFLE(0,0,2,0))); | |
| 97 return r; | |
| 98 } | |
| 99 | |
| 100 static inline __m128i colordodge_byte_SSE2(const __m128i& sc, const __m128i& dc, | |
| 101 const __m128i& sa, const __m128i& da) { | |
| 102 __m128i diff = _mm_sub_epi32(sa, sc); | |
| 103 __m128i ida = _mm_sub_epi32(_mm_set1_epi32(255), da); | |
| 104 __m128i isa = _mm_sub_epi32(_mm_set1_epi32(255), sa); | |
| 105 | |
| 106 // if (0 == dc) | |
| 107 __m128i cmp1 = _mm_cmpeq_epi32(dc, _mm_setzero_si128()); | |
| 108 __m128i rc1 = _mm_and_si128(cmp1, SkAlphaMulAlpha_SSE2(sc, ida)); | |
| 109 | |
| 110 // else if (0 == diff) | |
| 111 __m128i cmp2 = _mm_cmpeq_epi32(diff, _mm_setzero_si128()); | |
| 112 __m128i cmp = _mm_andnot_si128(cmp1, cmp2); | |
| 113 __m128i tmp1 = _mm_mullo_epi16(sa, da); | |
| 114 __m128i tmp2 = _mm_mullo_epi16(sc, ida); | |
| 115 __m128i tmp3 = _mm_mullo_epi16(dc, isa); | |
| 116 __m128i rc2 = _mm_add_epi32(tmp1, tmp2); | |
| 117 rc2 = _mm_add_epi32(rc2, tmp3); | |
| 118 rc2 = clamp_div255round_SSE2(rc2); | |
| 119 rc2 = _mm_and_si128(cmp, rc2); | |
| 120 | |
| 121 // else | |
| 122 __m128i cmp3 = _mm_or_si128(cmp1, cmp2); | |
| 123 __m128i value = _mm_mullo_epi16(dc, sa); | |
| 124 __m128 x = _mm_cvtepi32_ps(value); | |
| 125 __m128 y = _mm_cvtepi32_ps(diff); | |
| 126 diff = _mm_cvttps_epi32(_mm_div_ps(x, y)); | |
|
mtklein
2014/04/24 18:13:31
Think it's worth pulling these three lines out as
qiankun
2014/04/25 09:24:38
Done.
| |
| 127 | |
| 128 __m128i tmp4 = SkMin32_SSE2(da, diff); | |
| 129 tmp4 = Multiply32_SSE2(sa, tmp4); | |
| 130 __m128i rc3 = _mm_add_epi32(tmp4, tmp2); | |
| 131 rc3 = _mm_add_epi32(rc3, tmp3); | |
| 132 rc3 = clamp_div255round_SSE2(rc3); | |
| 133 rc3 = _mm_andnot_si128(cmp3, rc3); | |
| 134 | |
| 135 __m128i rc = _mm_or_si128(rc1, rc2); | |
| 136 rc = _mm_or_si128(rc, rc3); | |
| 137 | |
| 138 return rc; | |
| 139 } | |
| 140 | |
| 141 static __m128i colordodge_modeproc_SSE2(const __m128i& src, | |
| 142 const __m128i& dst) { | |
| 143 __m128i sa = SkGetPackedA32_SSE2(src); | |
| 144 __m128i da = SkGetPackedA32_SSE2(dst); | |
| 145 | |
| 146 __m128i a = srcover_byte_SSE2(sa, da); | |
| 147 __m128i r = colordodge_byte_SSE2(SkGetPackedR32_SSE2(src), | |
| 148 SkGetPackedR32_SSE2(dst), sa, da); | |
| 149 __m128i g = colordodge_byte_SSE2(SkGetPackedG32_SSE2(src), | |
| 150 SkGetPackedG32_SSE2(dst), sa, da); | |
| 151 __m128i b = colordodge_byte_SSE2(SkGetPackedB32_SSE2(src), | |
| 152 SkGetPackedB32_SSE2(dst), sa, da); | |
| 153 return SkPackARGB32_SSE2(a, r, g, b); | |
| 154 } | |
| 155 | |
| 156 static inline __m128i colorburn_byte_SSE2(const __m128i& sc, const __m128i& dc, | |
| 157 const __m128i& sa, const __m128i& da) { | |
| 158 __m128i ida = _mm_sub_epi32(_mm_set1_epi32(255), da); | |
| 159 __m128i isa = _mm_sub_epi32(_mm_set1_epi32(255), sa); | |
| 160 | |
| 161 // if (dc == da) | |
| 162 __m128i cmp1 = _mm_cmpeq_epi32(dc, da); | |
| 163 __m128i tmp1 = _mm_mullo_epi16(sa, da); | |
| 164 __m128i tmp2 = _mm_mullo_epi16(sc, ida); | |
| 165 __m128i tmp3 = _mm_mullo_epi16(dc, isa); | |
| 166 __m128i rc1 = _mm_add_epi32(tmp1, tmp2); | |
| 167 rc1 = _mm_add_epi32(rc1, tmp3); | |
| 168 rc1 = clamp_div255round_SSE2(rc1); | |
| 169 rc1 = _mm_and_si128(cmp1, rc1); | |
| 170 | |
| 171 // else if (0 == sc) | |
| 172 __m128i cmp2 = _mm_cmpeq_epi32(sc, _mm_setzero_si128()); | |
| 173 __m128i rc2 = SkAlphaMulAlpha_SSE2(dc, isa); | |
| 174 __m128i cmp = _mm_andnot_si128(cmp1, cmp2); | |
| 175 rc2 = _mm_and_si128(cmp, rc2); | |
| 176 | |
| 177 // else | |
| 178 __m128i cmp3 = _mm_or_si128(cmp1, cmp2); | |
| 179 __m128i tmp4 = _mm_sub_epi32(da, dc); | |
| 180 tmp4 = Multiply32_SSE2(tmp4, sa); | |
| 181 __m128 x = _mm_cvtepi32_ps(tmp4); | |
| 182 __m128 y = _mm_cvtepi32_ps(sc); | |
| 183 tmp4 = _mm_cvttps_epi32(_mm_div_ps(x, y)); | |
| 184 | |
| 185 __m128i tmp5 = _mm_sub_epi32(da, SkMin32_SSE2(da, tmp4)); | |
| 186 tmp5 = Multiply32_SSE2(sa, tmp5); | |
| 187 __m128i rc3 = _mm_add_epi32(tmp5, tmp2); | |
| 188 rc3 = _mm_add_epi32(rc3, tmp3); | |
| 189 rc3 = clamp_div255round_SSE2(rc3); | |
| 190 rc3 = _mm_andnot_si128(cmp3, rc3); | |
| 191 | |
| 192 __m128i rc = _mm_or_si128(rc1, rc2); | |
| 193 rc = _mm_or_si128(rc, rc3); | |
| 194 | |
| 195 return rc; | |
| 196 } | |
| 197 | |
| 198 static __m128i colorburn_modeproc_SSE2(const __m128i& src, const __m128i& dst) { | |
| 199 __m128i sa = SkGetPackedA32_SSE2(src); | |
| 200 __m128i da = SkGetPackedA32_SSE2(dst); | |
| 201 | |
| 202 __m128i a = srcover_byte_SSE2(sa, da); | |
| 203 __m128i r = colorburn_byte_SSE2(SkGetPackedR32_SSE2(src), | |
| 204 SkGetPackedR32_SSE2(dst), sa, da); | |
| 205 __m128i g = colorburn_byte_SSE2(SkGetPackedG32_SSE2(src), | |
| 206 SkGetPackedG32_SSE2(dst), sa, da); | |
| 207 __m128i b = colorburn_byte_SSE2(SkGetPackedB32_SSE2(src), | |
| 208 SkGetPackedB32_SSE2(dst), sa, da); | |
| 209 return SkPackARGB32_SSE2(a, r, g, b); | |
| 210 } | |
| 211 | |
| 87 //////////////////////////////////////////////////////////////////////////////// | 212 //////////////////////////////////////////////////////////////////////////////// |
| 88 | 213 |
| 89 typedef __m128i (*SkXfermodeProcSIMD)(const __m128i& src, const __m128i& dst); | 214 typedef __m128i (*SkXfermodeProcSIMD)(const __m128i& src, const __m128i& dst); |
| 90 | 215 |
| 91 extern SkXfermodeProcSIMD gSSE2XfermodeProcs[]; | 216 extern SkXfermodeProcSIMD gSSE2XfermodeProcs[]; |
| 92 | 217 |
| 93 SkSSE2ProcCoeffXfermode::SkSSE2ProcCoeffXfermode(SkReadBuffer& buffer) | 218 SkSSE2ProcCoeffXfermode::SkSSE2ProcCoeffXfermode(SkReadBuffer& buffer) |
| 94 : INHERITED(buffer) { | 219 : INHERITED(buffer) { |
| 95 fProcSIMD = reinterpret_cast<void*>(gSSE2XfermodeProcs[this->getMode()]); | 220 fProcSIMD = reinterpret_cast<void*>(gSSE2XfermodeProcs[this->getMode()]); |
| 96 } | 221 } |
| (...skipping 138 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 235 NULL, // kSrcATop_Mode | 360 NULL, // kSrcATop_Mode |
| 236 NULL, // kDstATop_Mode | 361 NULL, // kDstATop_Mode |
| 237 NULL, // kXor_Mode | 362 NULL, // kXor_Mode |
| 238 NULL, // kPlus_Mode | 363 NULL, // kPlus_Mode |
| 239 NULL, // kModulate_Mode | 364 NULL, // kModulate_Mode |
| 240 NULL, // kScreen_Mode | 365 NULL, // kScreen_Mode |
| 241 | 366 |
| 242 NULL, // kOverlay_Mode | 367 NULL, // kOverlay_Mode |
| 243 NULL, // kDarken_Mode | 368 NULL, // kDarken_Mode |
| 244 NULL, // kLighten_Mode | 369 NULL, // kLighten_Mode |
| 245 NULL, // kColorDodge_Mode | 370 colordodge_modeproc_SSE2, |
| 246 NULL, // kColorBurn_Mode | 371 colorburn_modeproc_SSE2, |
| 247 NULL, // kHardLight_Mode | 372 NULL, // kHardLight_Mode |
| 248 NULL, // kSoftLight_Mode | 373 NULL, // kSoftLight_Mode |
| 249 NULL, // kDifference_Mode | 374 NULL, // kDifference_Mode |
| 250 NULL, // kExclusion_Mode | 375 NULL, // kExclusion_Mode |
| 251 multiply_modeproc_SSE2, | 376 multiply_modeproc_SSE2, |
| 252 | 377 |
| 253 NULL, // kHue_Mode | 378 NULL, // kHue_Mode |
| 254 NULL, // kSaturation_Mode | 379 NULL, // kSaturation_Mode |
| 255 NULL, // kColor_Mode | 380 NULL, // kColor_Mode |
| 256 NULL, // kLuminosity_Mode | 381 NULL, // kLuminosity_Mode |
| 257 }; | 382 }; |
| 258 | 383 |
| 259 SkProcCoeffXfermode* SkPlatformXfermodeFactory_impl_SSE2(const ProcCoeff& rec, | 384 SkProcCoeffXfermode* SkPlatformXfermodeFactory_impl_SSE2(const ProcCoeff& rec, |
| 260 SkXfermode::Mode mode) { | 385 SkXfermode::Mode mode) { |
| 261 void* procSIMD = reinterpret_cast<void*>(gSSE2XfermodeProcs[mode]); | 386 void* procSIMD = reinterpret_cast<void*>(gSSE2XfermodeProcs[mode]); |
| 262 | 387 |
| 263 if (procSIMD != NULL) { | 388 if (procSIMD != NULL) { |
| 264 return SkNEW_ARGS(SkSSE2ProcCoeffXfermode, (rec, mode, procSIMD)); | 389 return SkNEW_ARGS(SkSSE2ProcCoeffXfermode, (rec, mode, procSIMD)); |
| 265 } | 390 } |
| 266 return NULL; | 391 return NULL; |
| 267 } | 392 } |
| OLD | NEW |