| OLD | NEW |
| 1 /* | 1 /* |
| 2 * Copyright 2009 The Android Open Source Project | 2 * Copyright 2009 The Android Open Source Project |
| 3 * | 3 * |
| 4 * Use of this source code is governed by a BSD-style license that can be | 4 * Use of this source code is governed by a BSD-style license that can be |
| 5 * found in the LICENSE file. | 5 * found in the LICENSE file. |
| 6 */ | 6 */ |
| 7 | 7 |
| 8 #include "SkBitmapFilter_opts_SSE2.h" | 8 #include "SkBitmapFilter_opts_SSE2.h" |
| 9 #include "SkBitmapProcState_opts_SSE2.h" | 9 #include "SkBitmapProcState_opts_SSE2.h" |
| 10 #include "SkBitmapProcState_opts_SSSE3.h" | 10 #include "SkBitmapProcState_opts_SSSE3.h" |
| 11 #include "SkBitmapScaler.h" | 11 #include "SkBitmapScaler.h" |
| 12 #include "SkBlitMask.h" | 12 #include "SkBlitMask.h" |
| 13 #include "SkBlitRow.h" | 13 #include "SkBlitRow.h" |
| 14 #include "SkBlitRow_opts_SSE2.h" | 14 #include "SkBlitRow_opts_SSE2.h" |
| 15 #include "SkCpu.h" |
| 15 #include "SkOncePtr.h" | 16 #include "SkOncePtr.h" |
| 16 #include "SkRTConf.h" | 17 #include "SkRTConf.h" |
| 17 | 18 |
| 18 | 19 |
| 19 /* | 20 /* |
| 20 ***************************************** | 21 ***************************************** |
| 21 *********This file is deprecated********* | 22 *********This file is deprecated********* |
| 22 ***************************************** | 23 ***************************************** |
| 23 * New CPU-specific work should be done in | 24 * New CPU-specific work should be done in |
| 24 * SkOpts framework. Run-time detection of | 25 * SkOpts framework. Run-time detection of |
| 25 * available instruction set extensions is | 26 * available instruction set extensions is |
| 26 * implemented in src/core/SkOpts.cpp file | 27 * implemented in src/core/SkOpts.cpp file |
| 27 ***************************************** | 28 ***************************************** |
| 28 */ | 29 */ |
| 29 | 30 |
| 30 | 31 |
| 31 #if defined(_MSC_VER) && defined(_WIN64) | |
| 32 #include <intrin.h> | |
| 33 #endif | |
| 34 | |
| 35 /* This file must *not* be compiled with -msse or any other optional SIMD | 32 /* This file must *not* be compiled with -msse or any other optional SIMD |
| 36 extension, otherwise gcc may generate SIMD instructions even for scalar ops | 33 extension, otherwise gcc may generate SIMD instructions even for scalar ops |
| 37 (and thus give an invalid instruction on Pentium3 on the code below). | 34 (and thus give an invalid instruction on Pentium3 on the code below). |
| 38 For example, only files named *_SSE2.cpp in this directory should be | 35 For example, only files named *_SSE2.cpp in this directory should be |
| 39 compiled with -msse2 or higher. */ | 36 compiled with -msse2 or higher. */ |
| 40 | 37 |
| 41 | |
| 42 /* Function to get the CPU SSE-level in runtime, for different compilers. */ | |
| 43 #ifdef _MSC_VER | |
| 44 static inline void getcpuid(int info_type, int info[4]) { | |
| 45 #if defined(_WIN64) | |
| 46 __cpuid(info, info_type); | |
| 47 #else | |
| 48 __asm { | |
| 49 mov eax, [info_type] | |
| 50 cpuid | |
| 51 mov edi, [info] | |
| 52 mov [edi], eax | |
| 53 mov [edi+4], ebx | |
| 54 mov [edi+8], ecx | |
| 55 mov [edi+12], edx | |
| 56 } | |
| 57 #endif | |
| 58 } | |
| 59 #elif defined(__x86_64__) | |
| 60 static inline void getcpuid(int info_type, int info[4]) { | |
| 61 asm volatile ( | |
| 62 "cpuid \n\t" | |
| 63 : "=a"(info[0]), "=b"(info[1]), "=c"(info[2]), "=d"(info[3]) | |
| 64 : "a"(info_type) | |
| 65 ); | |
| 66 } | |
| 67 #else | |
| 68 static inline void getcpuid(int info_type, int info[4]) { | |
| 69 // We save and restore ebx, so this code can be compatible with -fPIC | |
| 70 asm volatile ( | |
| 71 "pushl %%ebx \n\t" | |
| 72 "cpuid \n\t" | |
| 73 "movl %%ebx, %1 \n\t" | |
| 74 "popl %%ebx \n\t" | |
| 75 : "=a"(info[0]), "=r"(info[1]), "=c"(info[2]), "=d"(info[3]) | |
| 76 : "a"(info_type) | |
| 77 ); | |
| 78 } | |
| 79 #endif | |
| 80 | |
| 81 //////////////////////////////////////////////////////////////////////////////// | |
| 82 | |
| 83 /* Fetch the SIMD level directly from the CPU, at run-time. | |
| 84 * Only checks the levels needed by the optimizations in this file. | |
| 85 */ | |
| 86 static int* get_SIMD_level() { | |
| 87 int cpu_info[4] = { 0, 0, 0, 0 }; | |
| 88 getcpuid(1, cpu_info); | |
| 89 | |
| 90 int* level = new int; | |
| 91 | |
| 92 if ((cpu_info[2] & (1<<20)) != 0) { | |
| 93 *level = SK_CPU_SSE_LEVEL_SSE42; | |
| 94 } else if ((cpu_info[2] & (1<<19)) != 0) { | |
| 95 *level = SK_CPU_SSE_LEVEL_SSE41; | |
| 96 } else if ((cpu_info[2] & (1<<9)) != 0) { | |
| 97 *level = SK_CPU_SSE_LEVEL_SSSE3; | |
| 98 } else if ((cpu_info[3] & (1<<26)) != 0) { | |
| 99 *level = SK_CPU_SSE_LEVEL_SSE2; | |
| 100 } else { | |
| 101 *level = 0; | |
| 102 } | |
| 103 return level; | |
| 104 } | |
| 105 | |
| 106 SK_DECLARE_STATIC_ONCE_PTR(int, gSIMDLevel); | |
| 107 | |
| 108 /* Verify that the requested SIMD level is supported in the build. | |
| 109 * If not, check if the platform supports it. | |
| 110 */ | |
| 111 static inline bool supports_simd(int minLevel) { | |
| 112 #if defined(SK_CPU_SSE_LEVEL) | |
| 113 if (minLevel <= SK_CPU_SSE_LEVEL) { | |
| 114 return true; | |
| 115 } else | |
| 116 #endif | |
| 117 { | |
| 118 #if defined(SK_BUILD_FOR_ANDROID_FRAMEWORK) | |
| 119 /* For the Android framework we should always know at compile time if th
e device | |
| 120 * we are building for supports SSSE3. The one exception to this rule i
s on the | |
| 121 * emulator where we are compiled without the -mssse3 option (so we have
no | |
| 122 * SSSE3 procs) but can be run on a host machine that supports SSSE3 | |
| 123 * instructions. So for that particular case we disable our SSSE3 option
s. | |
| 124 */ | |
| 125 return false; | |
| 126 #else | |
| 127 return minLevel <= *gSIMDLevel.get(get_SIMD_level); | |
| 128 #endif | |
| 129 } | |
| 130 } | |
| 131 | |
| 132 //////////////////////////////////////////////////////////////////////////////// | 38 //////////////////////////////////////////////////////////////////////////////// |
| 133 | 39 |
| 134 void SkBitmapScaler::PlatformConvolutionProcs(SkConvolutionProcs* procs) { | 40 void SkBitmapScaler::PlatformConvolutionProcs(SkConvolutionProcs* procs) { |
| 135 if (supports_simd(SK_CPU_SSE_LEVEL_SSE2)) { | 41 if (SkCpu::Supports(SkCpu::SSE2)) { |
| 136 procs->fExtraHorizontalReads = 3; | 42 procs->fExtraHorizontalReads = 3; |
| 137 procs->fConvolveVertically = &convolveVertically_SSE2; | 43 procs->fConvolveVertically = &convolveVertically_SSE2; |
| 138 procs->fConvolve4RowsHorizontally = &convolve4RowsHorizontally_SSE2; | 44 procs->fConvolve4RowsHorizontally = &convolve4RowsHorizontally_SSE2; |
| 139 procs->fConvolveHorizontally = &convolveHorizontally_SSE2; | 45 procs->fConvolveHorizontally = &convolveHorizontally_SSE2; |
| 140 procs->fApplySIMDPadding = &applySIMDPadding_SSE2; | 46 procs->fApplySIMDPadding = &applySIMDPadding_SSE2; |
| 141 } | 47 } |
| 142 } | 48 } |
| 143 | 49 |
| 144 //////////////////////////////////////////////////////////////////////////////// | 50 //////////////////////////////////////////////////////////////////////////////// |
| 145 | 51 |
| 146 void SkBitmapProcState::platformProcs() { | 52 void SkBitmapProcState::platformProcs() { |
| 147 /* Every optimization in the function requires at least SSE2 */ | 53 /* Every optimization in the function requires at least SSE2 */ |
| 148 if (!supports_simd(SK_CPU_SSE_LEVEL_SSE2)) { | 54 if (!SkCpu::Supports(SkCpu::SSE2)) { |
| 149 return; | 55 return; |
| 150 } | 56 } |
| 151 const bool ssse3 = supports_simd(SK_CPU_SSE_LEVEL_SSSE3); | 57 const bool ssse3 = SkCpu::Supports(SkCpu::SSSE3); |
| 152 | 58 |
| 153 /* Check fSampleProc32 */ | 59 /* Check fSampleProc32 */ |
| 154 if (fSampleProc32 == S32_opaque_D32_filter_DX) { | 60 if (fSampleProc32 == S32_opaque_D32_filter_DX) { |
| 155 if (ssse3) { | 61 if (ssse3) { |
| 156 fSampleProc32 = S32_opaque_D32_filter_DX_SSSE3; | 62 fSampleProc32 = S32_opaque_D32_filter_DX_SSSE3; |
| 157 } else { | 63 } else { |
| 158 fSampleProc32 = S32_opaque_D32_filter_DX_SSE2; | 64 fSampleProc32 = S32_opaque_D32_filter_DX_SSE2; |
| 159 } | 65 } |
| 160 } else if (fSampleProc32 == S32_opaque_D32_filter_DXDY) { | 66 } else if (fSampleProc32 == S32_opaque_D32_filter_DXDY) { |
| 161 if (ssse3) { | 67 if (ssse3) { |
| (...skipping 30 matching lines...) Expand all Loading... |
| 192 nullptr, // S32_D565_Blend | 98 nullptr, // S32_D565_Blend |
| 193 S32A_D565_Opaque_SSE2, // S32A_D565_Opaque | 99 S32A_D565_Opaque_SSE2, // S32A_D565_Opaque |
| 194 nullptr, // S32A_D565_Blend | 100 nullptr, // S32A_D565_Blend |
| 195 S32_D565_Opaque_Dither_SSE2, // S32_D565_Opaque_Dither | 101 S32_D565_Opaque_Dither_SSE2, // S32_D565_Opaque_Dither |
| 196 nullptr, // S32_D565_Blend_Dither | 102 nullptr, // S32_D565_Blend_Dither |
| 197 S32A_D565_Opaque_Dither_SSE2, // S32A_D565_Opaque_Dither | 103 S32A_D565_Opaque_Dither_SSE2, // S32A_D565_Opaque_Dither |
| 198 nullptr, // S32A_D565_Blend_Dither | 104 nullptr, // S32A_D565_Blend_Dither |
| 199 }; | 105 }; |
| 200 | 106 |
| 201 SkBlitRow::Proc16 SkBlitRow::PlatformFactory565(unsigned flags) { | 107 SkBlitRow::Proc16 SkBlitRow::PlatformFactory565(unsigned flags) { |
| 202 if (supports_simd(SK_CPU_SSE_LEVEL_SSE2)) { | 108 if (SkCpu::Supports(SkCpu::SSE2)) { |
| 203 return platform_16_procs[flags]; | 109 return platform_16_procs[flags]; |
| 204 } else { | 110 } else { |
| 205 return nullptr; | 111 return nullptr; |
| 206 } | 112 } |
| 207 } | 113 } |
| 208 | 114 |
| 209 static const SkBlitRow::ColorProc16 platform_565_colorprocs_SSE2[] = { | 115 static const SkBlitRow::ColorProc16 platform_565_colorprocs_SSE2[] = { |
| 210 Color32A_D565_SSE2, // Color32A_D565, | 116 Color32A_D565_SSE2, // Color32A_D565, |
| 211 nullptr, // Color32A_D565_Dither | 117 nullptr, // Color32A_D565_Dither |
| 212 }; | 118 }; |
| 213 | 119 |
| 214 SkBlitRow::ColorProc16 SkBlitRow::PlatformColorFactory565(unsigned flags) { | 120 SkBlitRow::ColorProc16 SkBlitRow::PlatformColorFactory565(unsigned flags) { |
| 215 /* If you're thinking about writing an SSE4 version of this, do check it's | 121 /* If you're thinking about writing an SSE4 version of this, do check it's |
| 216 * actually faster on Atom. Our original SSE4 version was slower than this | 122 * actually faster on Atom. Our original SSE4 version was slower than this |
| 217 * SSE2 version on Silvermont, and only marginally faster on a Core i7, | 123 * SSE2 version on Silvermont, and only marginally faster on a Core i7, |
| 218 * mainly due to the MULLD timings. | 124 * mainly due to the MULLD timings. |
| 219 */ | 125 */ |
| 220 if (supports_simd(SK_CPU_SSE_LEVEL_SSE2)) { | 126 if (SkCpu::Supports(SkCpu::SSE2)) { |
| 221 return platform_565_colorprocs_SSE2[flags]; | 127 return platform_565_colorprocs_SSE2[flags]; |
| 222 } else { | 128 } else { |
| 223 return nullptr; | 129 return nullptr; |
| 224 } | 130 } |
| 225 } | 131 } |
| 226 | 132 |
| 227 static const SkBlitRow::Proc32 platform_32_procs_SSE2[] = { | 133 static const SkBlitRow::Proc32 platform_32_procs_SSE2[] = { |
| 228 nullptr, // S32_Opaque, | 134 nullptr, // S32_Opaque, |
| 229 S32_Blend_BlitRow32_SSE2, // S32_Blend, | 135 S32_Blend_BlitRow32_SSE2, // S32_Blend, |
| 230 nullptr, // Ported to SkOpts | 136 nullptr, // Ported to SkOpts |
| 231 S32A_Blend_BlitRow32_SSE2, // S32A_Blend, | 137 S32A_Blend_BlitRow32_SSE2, // S32A_Blend, |
| 232 }; | 138 }; |
| 233 | 139 |
| 234 SkBlitRow::Proc32 SkBlitRow::PlatformProcs32(unsigned flags) { | 140 SkBlitRow::Proc32 SkBlitRow::PlatformProcs32(unsigned flags) { |
| 235 if (supports_simd(SK_CPU_SSE_LEVEL_SSE2)) { | 141 if (SkCpu::Supports(SkCpu::SSE2)) { |
| 236 return platform_32_procs_SSE2[flags]; | 142 return platform_32_procs_SSE2[flags]; |
| 237 } else { | 143 } else { |
| 238 return nullptr; | 144 return nullptr; |
| 239 } | 145 } |
| 240 } | 146 } |
| 241 | 147 |
| 242 //////////////////////////////////////////////////////////////////////////////// | 148 //////////////////////////////////////////////////////////////////////////////// |
| 243 | 149 |
| 244 SkBlitMask::BlitLCD16RowProc SkBlitMask::PlatformBlitRowProcs16(bool isOpaque) { | 150 SkBlitMask::BlitLCD16RowProc SkBlitMask::PlatformBlitRowProcs16(bool isOpaque) { |
| 245 if (supports_simd(SK_CPU_SSE_LEVEL_SSE2)) { | 151 if (SkCpu::Supports(SkCpu::SSE2)) { |
| 246 if (isOpaque) { | 152 if (isOpaque) { |
| 247 return SkBlitLCD16OpaqueRow_SSE2; | 153 return SkBlitLCD16OpaqueRow_SSE2; |
| 248 } else { | 154 } else { |
| 249 return SkBlitLCD16Row_SSE2; | 155 return SkBlitLCD16Row_SSE2; |
| 250 } | 156 } |
| 251 } else { | 157 } else { |
| 252 return nullptr; | 158 return nullptr; |
| 253 } | 159 } |
| 254 | 160 |
| 255 } | 161 } |
| 256 | 162 |
| 257 SkBlitMask::RowProc SkBlitMask::PlatformRowProcs(SkColorType, SkMask::Format, Ro
wFlags) { | 163 SkBlitMask::RowProc SkBlitMask::PlatformRowProcs(SkColorType, SkMask::Format, Ro
wFlags) { |
| 258 return nullptr; | 164 return nullptr; |
| 259 } | 165 } |
| OLD | NEW |