| OLD | NEW |
| (Empty) |
| 1 /* | |
| 2 * Copyright 2009 The Android Open Source Project | |
| 3 * | |
| 4 * Use of this source code is governed by a BSD-style license that can be | |
| 5 * found in the LICENSE file. | |
| 6 */ | |
| 7 | |
| 8 #include "SkBitmapProcState_opts_SSE2.h" | |
| 9 #include "SkBitmapProcState_opts_SSSE3.h" | |
| 10 #include "SkBitmapFilter_opts_SSE2.h" | |
| 11 #include "SkBlitMask.h" | |
| 12 #include "SkBlitRow.h" | |
| 13 #include "SkBlitRect_opts_SSE2.h" | |
| 14 #include "SkBlitRow_opts_SSE2.h" | |
| 15 #include "SkBlurImage_opts_SSE2.h" | |
| 16 #include "SkUtils_opts_SSE2.h" | |
| 17 #include "SkUtils.h" | |
| 18 #include "SkMorphology_opts.h" | |
| 19 #include "SkMorphology_opts_SSE2.h" | |
| 20 #include "SkXfermode.h" | |
| 21 #include "SkXfermode_proccoeff.h" | |
| 22 | |
| 23 #include "SkRTConf.h" | |
| 24 | |
| 25 #if defined(_MSC_VER) && defined(_WIN64) | |
| 26 #include <intrin.h> | |
| 27 #endif | |
| 28 | |
| 29 /* This file must *not* be compiled with -msse or -msse2, otherwise | |
| 30 gcc may generate sse2 even for scalar ops (and thus give an invalid | |
| 31 instruction on Pentium3 on the code below). Only files named *_SSE2.cpp | |
| 32 in this directory should be compiled with -msse2. */ | |
| 33 | |
| 34 | |
| 35 #ifdef _MSC_VER | |
| 36 static inline void getcpuid(int info_type, int info[4]) { | |
| 37 #if defined(_WIN64) | |
| 38 __cpuid(info, info_type); | |
| 39 #else | |
| 40 __asm { | |
| 41 mov eax, [info_type] | |
| 42 cpuid | |
| 43 mov edi, [info] | |
| 44 mov [edi], eax | |
| 45 mov [edi+4], ebx | |
| 46 mov [edi+8], ecx | |
| 47 mov [edi+12], edx | |
| 48 } | |
| 49 #endif | |
| 50 } | |
| 51 #else | |
| 52 #if defined(__x86_64__) | |
| 53 static inline void getcpuid(int info_type, int info[4]) { | |
| 54 asm volatile ( | |
| 55 "cpuid \n\t" | |
| 56 : "=a"(info[0]), "=b"(info[1]), "=c"(info[2]), "=d"(info[3]) | |
| 57 : "a"(info_type) | |
| 58 ); | |
| 59 } | |
| 60 #else | |
| 61 static inline void getcpuid(int info_type, int info[4]) { | |
| 62 // We save and restore ebx, so this code can be compatible with -fPIC | |
| 63 asm volatile ( | |
| 64 "pushl %%ebx \n\t" | |
| 65 "cpuid \n\t" | |
| 66 "movl %%ebx, %1 \n\t" | |
| 67 "popl %%ebx \n\t" | |
| 68 : "=a"(info[0]), "=r"(info[1]), "=c"(info[2]), "=d"(info[3]) | |
| 69 : "a"(info_type) | |
| 70 ); | |
| 71 } | |
| 72 #endif | |
| 73 #endif | |
| 74 | |
| 75 #if defined(__x86_64__) || defined(_WIN64) || SK_CPU_SSE_LEVEL >= SK_CPU_SSE_LEV
EL_SSE2 | |
| 76 /* All x86_64 machines have SSE2, or we know it's supported at compile time, so
don't even bother checking. */ | |
| 77 static inline bool hasSSE2() { | |
| 78 return true; | |
| 79 } | |
| 80 #else | |
| 81 | |
| 82 static inline bool hasSSE2() { | |
| 83 int cpu_info[4] = { 0 }; | |
| 84 getcpuid(1, cpu_info); | |
| 85 return (cpu_info[3] & (1<<26)) != 0; | |
| 86 } | |
| 87 #endif | |
| 88 | |
| 89 #if SK_CPU_SSE_LEVEL >= SK_CPU_SSE_LEVEL_SSSE3 | |
| 90 /* If we know SSSE3 is supported at compile time, don't even bother checking. */ | |
| 91 static inline bool hasSSSE3() { | |
| 92 return true; | |
| 93 } | |
| 94 #elif defined(SK_BUILD_FOR_ANDROID_FRAMEWORK) | |
| 95 /* For the Android framework we should always know at compile time if the device | |
| 96 * we are building for supports SSSE3. The one exception to this rule is on the | |
| 97 * emulator where we are compiled without the -msse3 option (so we have no SSSE3 | |
| 98 * procs) but can be run on a host machine that supports SSSE3 instructions. So | |
| 99 * for that particular case we disable our SSSE3 options. | |
| 100 */ | |
| 101 static inline bool hasSSSE3() { | |
| 102 return false; | |
| 103 } | |
| 104 #else | |
| 105 | |
| 106 static inline bool hasSSSE3() { | |
| 107 int cpu_info[4] = { 0 }; | |
| 108 getcpuid(1, cpu_info); | |
| 109 return (cpu_info[2] & 0x200) != 0; | |
| 110 } | |
| 111 #endif | |
| 112 | |
| 113 static bool cachedHasSSE2() { | |
| 114 static bool gHasSSE2 = hasSSE2(); | |
| 115 return gHasSSE2; | |
| 116 } | |
| 117 | |
| 118 static bool cachedHasSSSE3() { | |
| 119 static bool gHasSSSE3 = hasSSSE3(); | |
| 120 return gHasSSSE3; | |
| 121 } | |
| 122 | |
| 123 SK_CONF_DECLARE( bool, c_hqfilter_sse, "bitmap.filter.highQualitySSE", false, "U
se SSE optimized version of high quality image filters"); | |
| 124 | |
| 125 void SkBitmapProcState::platformConvolutionProcs(SkConvolutionProcs* procs) { | |
| 126 if (cachedHasSSE2()) { | |
| 127 procs->fExtraHorizontalReads = 3; | |
| 128 procs->fConvolveVertically = &convolveVertically_SSE2; | |
| 129 procs->fConvolve4RowsHorizontally = &convolve4RowsHorizontally_SSE2; | |
| 130 procs->fConvolveHorizontally = &convolveHorizontally_SSE2; | |
| 131 procs->fApplySIMDPadding = &applySIMDPadding_SSE2; | |
| 132 } | |
| 133 } | |
| 134 | |
| 135 void SkBitmapProcState::platformProcs() { | |
| 136 /* Every optimization in the function requires at least SSE2 */ | |
| 137 if (!cachedHasSSE2()) { | |
| 138 return; | |
| 139 } | |
| 140 | |
| 141 /* Check fSampleProc32 */ | |
| 142 if (fSampleProc32 == S32_opaque_D32_filter_DX) { | |
| 143 if (cachedHasSSSE3()) { | |
| 144 fSampleProc32 = S32_opaque_D32_filter_DX_SSSE3; | |
| 145 } else { | |
| 146 fSampleProc32 = S32_opaque_D32_filter_DX_SSE2; | |
| 147 } | |
| 148 } else if (fSampleProc32 == S32_opaque_D32_filter_DXDY) { | |
| 149 if (cachedHasSSSE3()) { | |
| 150 fSampleProc32 = S32_opaque_D32_filter_DXDY_SSSE3; | |
| 151 } | |
| 152 } else if (fSampleProc32 == S32_alpha_D32_filter_DX) { | |
| 153 if (cachedHasSSSE3()) { | |
| 154 fSampleProc32 = S32_alpha_D32_filter_DX_SSSE3; | |
| 155 } else { | |
| 156 fSampleProc32 = S32_alpha_D32_filter_DX_SSE2; | |
| 157 } | |
| 158 } else if (fSampleProc32 == S32_alpha_D32_filter_DXDY) { | |
| 159 if (cachedHasSSSE3()) { | |
| 160 fSampleProc32 = S32_alpha_D32_filter_DXDY_SSSE3; | |
| 161 } | |
| 162 } | |
| 163 | |
| 164 /* Check fSampleProc16 */ | |
| 165 if (fSampleProc16 == S32_D16_filter_DX) { | |
| 166 fSampleProc16 = S32_D16_filter_DX_SSE2; | |
| 167 } | |
| 168 | |
| 169 /* Check fMatrixProc */ | |
| 170 if (fMatrixProc == ClampX_ClampY_filter_scale) { | |
| 171 fMatrixProc = ClampX_ClampY_filter_scale_SSE2; | |
| 172 } else if (fMatrixProc == ClampX_ClampY_nofilter_scale) { | |
| 173 fMatrixProc = ClampX_ClampY_nofilter_scale_SSE2; | |
| 174 } else if (fMatrixProc == ClampX_ClampY_filter_affine) { | |
| 175 fMatrixProc = ClampX_ClampY_filter_affine_SSE2; | |
| 176 } else if (fMatrixProc == ClampX_ClampY_nofilter_affine) { | |
| 177 fMatrixProc = ClampX_ClampY_nofilter_affine_SSE2; | |
| 178 } | |
| 179 | |
| 180 /* Check fShaderProc32 */ | |
| 181 if (c_hqfilter_sse) { | |
| 182 if (fShaderProc32 == highQualityFilter32) { | |
| 183 fShaderProc32 = highQualityFilter_SSE2; | |
| 184 } | |
| 185 } | |
| 186 } | |
| 187 | |
| 188 static SkBlitRow::Proc platform_16_procs[] = { | |
| 189 S32_D565_Opaque_SSE2, // S32_D565_Opaque | |
| 190 NULL, // S32_D565_Blend | |
| 191 S32A_D565_Opaque_SSE2, // S32A_D565_Opaque | |
| 192 NULL, // S32A_D565_Blend | |
| 193 S32_D565_Opaque_Dither_SSE2, // S32_D565_Opaque_Dither | |
| 194 NULL, // S32_D565_Blend_Dither | |
| 195 S32A_D565_Opaque_Dither_SSE2, // S32A_D565_Opaque_Dither | |
| 196 NULL, // S32A_D565_Blend_Dither | |
| 197 }; | |
| 198 | |
| 199 static SkBlitRow::Proc32 platform_32_procs[] = { | |
| 200 NULL, // S32_Opaque, | |
| 201 S32_Blend_BlitRow32_SSE2, // S32_Blend, | |
| 202 S32A_Opaque_BlitRow32_SSE2, // S32A_Opaque | |
| 203 S32A_Blend_BlitRow32_SSE2, // S32A_Blend, | |
| 204 }; | |
| 205 | |
| 206 SkBlitRow::Proc SkBlitRow::PlatformProcs565(unsigned flags) { | |
| 207 if (cachedHasSSE2()) { | |
| 208 return platform_16_procs[flags]; | |
| 209 } else { | |
| 210 return NULL; | |
| 211 } | |
| 212 } | |
| 213 | |
| 214 SkBlitRow::ColorProc SkBlitRow::PlatformColorProc() { | |
| 215 if (cachedHasSSE2()) { | |
| 216 return Color32_SSE2; | |
| 217 } else { | |
| 218 return NULL; | |
| 219 } | |
| 220 } | |
| 221 | |
| 222 SkBlitRow::Proc32 SkBlitRow::PlatformProcs32(unsigned flags) { | |
| 223 if (cachedHasSSE2()) { | |
| 224 return platform_32_procs[flags]; | |
| 225 } else { | |
| 226 return NULL; | |
| 227 } | |
| 228 } | |
| 229 | |
| 230 | |
| 231 SkBlitMask::ColorProc SkBlitMask::PlatformColorProcs(SkBitmap::Config dstConfig, | |
| 232 SkMask::Format maskFormat, | |
| 233 SkColor color) { | |
| 234 if (SkMask::kA8_Format != maskFormat) { | |
| 235 return NULL; | |
| 236 } | |
| 237 | |
| 238 ColorProc proc = NULL; | |
| 239 if (cachedHasSSE2()) { | |
| 240 switch (dstConfig) { | |
| 241 case SkBitmap::kARGB_8888_Config: | |
| 242 // The SSE2 version is not (yet) faster for black, so we check | |
| 243 // for that. | |
| 244 if (SK_ColorBLACK != color) { | |
| 245 proc = SkARGB32_A8_BlitMask_SSE2; | |
| 246 } | |
| 247 break; | |
| 248 default: | |
| 249 break; | |
| 250 } | |
| 251 } | |
| 252 return proc; | |
| 253 } | |
| 254 | |
| 255 SkBlitMask::BlitLCD16RowProc SkBlitMask::PlatformBlitRowProcs16(bool isOpaque) { | |
| 256 if (cachedHasSSE2()) { | |
| 257 if (isOpaque) { | |
| 258 return SkBlitLCD16OpaqueRow_SSE2; | |
| 259 } else { | |
| 260 return SkBlitLCD16Row_SSE2; | |
| 261 } | |
| 262 } else { | |
| 263 return NULL; | |
| 264 } | |
| 265 | |
| 266 } | |
| 267 SkBlitMask::RowProc SkBlitMask::PlatformRowProcs(SkBitmap::Config dstConfig, | |
| 268 SkMask::Format maskFormat, | |
| 269 RowFlags flags) { | |
| 270 return NULL; | |
| 271 } | |
| 272 | |
| 273 SkMemset16Proc SkMemset16GetPlatformProc() { | |
| 274 if (cachedHasSSE2()) { | |
| 275 return sk_memset16_SSE2; | |
| 276 } else { | |
| 277 return NULL; | |
| 278 } | |
| 279 } | |
| 280 | |
| 281 SkMemset32Proc SkMemset32GetPlatformProc() { | |
| 282 if (cachedHasSSE2()) { | |
| 283 return sk_memset32_SSE2; | |
| 284 } else { | |
| 285 return NULL; | |
| 286 } | |
| 287 } | |
| 288 | |
| 289 SkMorphologyImageFilter::Proc SkMorphologyGetPlatformProc(SkMorphologyProcType t
ype) { | |
| 290 if (!cachedHasSSE2()) { | |
| 291 return NULL; | |
| 292 } | |
| 293 switch (type) { | |
| 294 case kDilateX_SkMorphologyProcType: | |
| 295 return SkDilateX_SSE2; | |
| 296 case kDilateY_SkMorphologyProcType: | |
| 297 return SkDilateY_SSE2; | |
| 298 case kErodeX_SkMorphologyProcType: | |
| 299 return SkErodeX_SSE2; | |
| 300 case kErodeY_SkMorphologyProcType: | |
| 301 return SkErodeY_SSE2; | |
| 302 default: | |
| 303 return NULL; | |
| 304 } | |
| 305 } | |
| 306 | |
| 307 bool SkBoxBlurGetPlatformProcs(SkBoxBlurProc* boxBlurX, | |
| 308 SkBoxBlurProc* boxBlurY, | |
| 309 SkBoxBlurProc* boxBlurXY, | |
| 310 SkBoxBlurProc* boxBlurYX) { | |
| 311 #ifdef SK_DISABLE_BLUR_DIVISION_OPTIMIZATION | |
| 312 return false; | |
| 313 #else | |
| 314 if (!cachedHasSSE2()) { | |
| 315 return false; | |
| 316 } | |
| 317 return SkBoxBlurGetPlatformProcs_SSE2(boxBlurX, boxBlurY, boxBlurXY, boxBlur
YX); | |
| 318 #endif | |
| 319 } | |
| 320 | |
| 321 SkBlitRow::ColorRectProc PlatformColorRectProcFactory(); // suppress warning | |
| 322 | |
| 323 SkBlitRow::ColorRectProc PlatformColorRectProcFactory() { | |
| 324 if (cachedHasSSE2()) { | |
| 325 return ColorRect32_SSE2; | |
| 326 } else { | |
| 327 return NULL; | |
| 328 } | |
| 329 } | |
| 330 | |
| 331 extern SkProcCoeffXfermode* SkPlatformXfermodeFactory_impl_SSE2(const ProcCoeff&
rec, | |
| 332 SkXfermode::Mode
mode); | |
| 333 | |
| 334 SkProcCoeffXfermode* SkPlatformXfermodeFactory_impl(const ProcCoeff& rec, | |
| 335 SkXfermode::Mode mode); | |
| 336 | |
| 337 SkProcCoeffXfermode* SkPlatformXfermodeFactory_impl(const ProcCoeff& rec, | |
| 338 SkXfermode::Mode mode) { | |
| 339 return NULL; | |
| 340 } | |
| 341 | |
| 342 SkProcCoeffXfermode* SkPlatformXfermodeFactory(const ProcCoeff& rec, | |
| 343 SkXfermode::Mode mode); | |
| 344 | |
| 345 SkProcCoeffXfermode* SkPlatformXfermodeFactory(const ProcCoeff& rec, | |
| 346 SkXfermode::Mode mode) { | |
| 347 if (cachedHasSSE2()) { | |
| 348 return SkPlatformXfermodeFactory_impl_SSE2(rec, mode); | |
| 349 } else { | |
| 350 return SkPlatformXfermodeFactory_impl(rec, mode); | |
| 351 } | |
| 352 } | |
| 353 | |
| 354 SkXfermodeProc SkPlatformXfermodeProcFactory(SkXfermode::Mode mode); | |
| 355 | |
| 356 SkXfermodeProc SkPlatformXfermodeProcFactory(SkXfermode::Mode mode) { | |
| 357 return NULL; | |
| 358 } | |
| OLD | NEW |