OLD | NEW |
1 /* | 1 /* |
2 * Copyright 2009 The Android Open Source Project | 2 * Copyright 2009 The Android Open Source Project |
3 * | 3 * |
4 * Use of this source code is governed by a BSD-style license that can be | 4 * Use of this source code is governed by a BSD-style license that can be |
5 * found in the LICENSE file. | 5 * found in the LICENSE file. |
6 */ | 6 */ |
7 | 7 |
8 #include "SkBitmapFilter_opts_SSE2.h" | 8 #include "SkBitmapFilter_opts_SSE2.h" |
9 #include "SkBitmapProcState_opts_SSE2.h" | 9 #include "SkBitmapProcState_opts_SSE2.h" |
10 #include "SkBitmapProcState_opts_SSSE3.h" | 10 #include "SkBitmapProcState_opts_SSSE3.h" |
11 #include "SkBitmapScaler.h" | 11 #include "SkBitmapScaler.h" |
12 #include "SkBlitMask.h" | 12 #include "SkBlitMask.h" |
13 #include "SkBlitRow.h" | 13 #include "SkBlitRow.h" |
14 #include "SkBlitRow_opts_SSE2.h" | 14 #include "SkBlitRow_opts_SSE2.h" |
15 #include "SkCpu.h" | |
16 #include "SkOncePtr.h" | 15 #include "SkOncePtr.h" |
17 #include "SkRTConf.h" | 16 #include "SkRTConf.h" |
18 | 17 |
19 | 18 |
20 /* | 19 /* |
21 ***************************************** | 20 ***************************************** |
22 *********This file is deprecated********* | 21 *********This file is deprecated********* |
23 ***************************************** | 22 ***************************************** |
24 * New CPU-specific work should be done in | 23 * New CPU-specific work should be done in |
25 * SkOpts framework. Run-time detection of | 24 * SkOpts framework. Run-time detection of |
26 * available instruction set extensions is | 25 * available instruction set extensions is |
27 * implemented in src/core/SkOpts.cpp file | 26 * implemented in src/core/SkOpts.cpp file |
28 ***************************************** | 27 ***************************************** |
29 */ | 28 */ |
30 | 29 |
31 | 30 |
| 31 #if defined(_MSC_VER) && defined(_WIN64) |
| 32 #include <intrin.h> |
| 33 #endif |
| 34 |
32 /* This file must *not* be compiled with -msse or any other optional SIMD | 35 /* This file must *not* be compiled with -msse or any other optional SIMD |
33 extension, otherwise gcc may generate SIMD instructions even for scalar ops | 36 extension, otherwise gcc may generate SIMD instructions even for scalar ops |
34 (and thus give an invalid instruction on Pentium3 on the code below). | 37 (and thus give an invalid instruction on Pentium3 on the code below). |
35 For example, only files named *_SSE2.cpp in this directory should be | 38 For example, only files named *_SSE2.cpp in this directory should be |
36 compiled with -msse2 or higher. */ | 39 compiled with -msse2 or higher. */ |
37 | 40 |
| 41 |
| 42 /* Function to get the CPU SSE-level in runtime, for different compilers. */ |
| 43 #ifdef _MSC_VER |
| 44 static inline void getcpuid(int info_type, int info[4]) { |
| 45 #if defined(_WIN64) |
| 46 __cpuid(info, info_type); |
| 47 #else |
| 48 __asm { |
| 49 mov eax, [info_type] |
| 50 cpuid |
| 51 mov edi, [info] |
| 52 mov [edi], eax |
| 53 mov [edi+4], ebx |
| 54 mov [edi+8], ecx |
| 55 mov [edi+12], edx |
| 56 } |
| 57 #endif |
| 58 } |
| 59 #elif defined(__x86_64__) |
| 60 static inline void getcpuid(int info_type, int info[4]) { |
| 61 asm volatile ( |
| 62 "cpuid \n\t" |
| 63 : "=a"(info[0]), "=b"(info[1]), "=c"(info[2]), "=d"(info[3]) |
| 64 : "a"(info_type) |
| 65 ); |
| 66 } |
| 67 #else |
| 68 static inline void getcpuid(int info_type, int info[4]) { |
| 69 // We save and restore ebx, so this code can be compatible with -fPIC |
| 70 asm volatile ( |
| 71 "pushl %%ebx \n\t" |
| 72 "cpuid \n\t" |
| 73 "movl %%ebx, %1 \n\t" |
| 74 "popl %%ebx \n\t" |
| 75 : "=a"(info[0]), "=r"(info[1]), "=c"(info[2]), "=d"(info[3]) |
| 76 : "a"(info_type) |
| 77 ); |
| 78 } |
| 79 #endif |
| 80 |
| 81 //////////////////////////////////////////////////////////////////////////////// |
| 82 |
| 83 /* Fetch the SIMD level directly from the CPU, at run-time. |
| 84 * Only checks the levels needed by the optimizations in this file. |
| 85 */ |
| 86 static int* get_SIMD_level() { |
| 87 int cpu_info[4] = { 0, 0, 0, 0 }; |
| 88 getcpuid(1, cpu_info); |
| 89 |
| 90 int* level = new int; |
| 91 |
| 92 if ((cpu_info[2] & (1<<20)) != 0) { |
| 93 *level = SK_CPU_SSE_LEVEL_SSE42; |
| 94 } else if ((cpu_info[2] & (1<<19)) != 0) { |
| 95 *level = SK_CPU_SSE_LEVEL_SSE41; |
| 96 } else if ((cpu_info[2] & (1<<9)) != 0) { |
| 97 *level = SK_CPU_SSE_LEVEL_SSSE3; |
| 98 } else if ((cpu_info[3] & (1<<26)) != 0) { |
| 99 *level = SK_CPU_SSE_LEVEL_SSE2; |
| 100 } else { |
| 101 *level = 0; |
| 102 } |
| 103 return level; |
| 104 } |
| 105 |
| 106 SK_DECLARE_STATIC_ONCE_PTR(int, gSIMDLevel); |
| 107 |
| 108 /* Verify that the requested SIMD level is supported in the build. |
| 109 * If not, check if the platform supports it. |
| 110 */ |
| 111 static inline bool supports_simd(int minLevel) { |
| 112 #if defined(SK_CPU_SSE_LEVEL) |
| 113 if (minLevel <= SK_CPU_SSE_LEVEL) { |
| 114 return true; |
| 115 } else |
| 116 #endif |
| 117 { |
| 118 #if defined(SK_BUILD_FOR_ANDROID_FRAMEWORK) |
| 119 /* For the Android framework we should always know at compile time if th
e device |
| 120 * we are building for supports SSSE3. The one exception to this rule i
s on the |
| 121 * emulator where we are compiled without the -mssse3 option (so we have
no |
| 122 * SSSE3 procs) but can be run on a host machine that supports SSSE3 |
| 123 * instructions. So for that particular case we disable our SSSE3 option
s. |
| 124 */ |
| 125 return false; |
| 126 #else |
| 127 return minLevel <= *gSIMDLevel.get(get_SIMD_level); |
| 128 #endif |
| 129 } |
| 130 } |
| 131 |
38 //////////////////////////////////////////////////////////////////////////////// | 132 //////////////////////////////////////////////////////////////////////////////// |
39 | 133 |
40 void SkBitmapScaler::PlatformConvolutionProcs(SkConvolutionProcs* procs) { | 134 void SkBitmapScaler::PlatformConvolutionProcs(SkConvolutionProcs* procs) { |
41 if (SkCpu::Supports(SkCpu::SSE2)) { | 135 if (supports_simd(SK_CPU_SSE_LEVEL_SSE2)) { |
42 procs->fExtraHorizontalReads = 3; | 136 procs->fExtraHorizontalReads = 3; |
43 procs->fConvolveVertically = &convolveVertically_SSE2; | 137 procs->fConvolveVertically = &convolveVertically_SSE2; |
44 procs->fConvolve4RowsHorizontally = &convolve4RowsHorizontally_SSE2; | 138 procs->fConvolve4RowsHorizontally = &convolve4RowsHorizontally_SSE2; |
45 procs->fConvolveHorizontally = &convolveHorizontally_SSE2; | 139 procs->fConvolveHorizontally = &convolveHorizontally_SSE2; |
46 procs->fApplySIMDPadding = &applySIMDPadding_SSE2; | 140 procs->fApplySIMDPadding = &applySIMDPadding_SSE2; |
47 } | 141 } |
48 } | 142 } |
49 | 143 |
50 //////////////////////////////////////////////////////////////////////////////// | 144 //////////////////////////////////////////////////////////////////////////////// |
51 | 145 |
52 void SkBitmapProcState::platformProcs() { | 146 void SkBitmapProcState::platformProcs() { |
53 /* Every optimization in the function requires at least SSE2 */ | 147 /* Every optimization in the function requires at least SSE2 */ |
54 if (!SkCpu::Supports(SkCpu::SSE2)) { | 148 if (!supports_simd(SK_CPU_SSE_LEVEL_SSE2)) { |
55 return; | 149 return; |
56 } | 150 } |
57 const bool ssse3 = SkCpu::Supports(SkCpu::SSSE3); | 151 const bool ssse3 = supports_simd(SK_CPU_SSE_LEVEL_SSSE3); |
58 | 152 |
59 /* Check fSampleProc32 */ | 153 /* Check fSampleProc32 */ |
60 if (fSampleProc32 == S32_opaque_D32_filter_DX) { | 154 if (fSampleProc32 == S32_opaque_D32_filter_DX) { |
61 if (ssse3) { | 155 if (ssse3) { |
62 fSampleProc32 = S32_opaque_D32_filter_DX_SSSE3; | 156 fSampleProc32 = S32_opaque_D32_filter_DX_SSSE3; |
63 } else { | 157 } else { |
64 fSampleProc32 = S32_opaque_D32_filter_DX_SSE2; | 158 fSampleProc32 = S32_opaque_D32_filter_DX_SSE2; |
65 } | 159 } |
66 } else if (fSampleProc32 == S32_opaque_D32_filter_DXDY) { | 160 } else if (fSampleProc32 == S32_opaque_D32_filter_DXDY) { |
67 if (ssse3) { | 161 if (ssse3) { |
(...skipping 30 matching lines...) Expand all Loading... |
98 nullptr, // S32_D565_Blend | 192 nullptr, // S32_D565_Blend |
99 S32A_D565_Opaque_SSE2, // S32A_D565_Opaque | 193 S32A_D565_Opaque_SSE2, // S32A_D565_Opaque |
100 nullptr, // S32A_D565_Blend | 194 nullptr, // S32A_D565_Blend |
101 S32_D565_Opaque_Dither_SSE2, // S32_D565_Opaque_Dither | 195 S32_D565_Opaque_Dither_SSE2, // S32_D565_Opaque_Dither |
102 nullptr, // S32_D565_Blend_Dither | 196 nullptr, // S32_D565_Blend_Dither |
103 S32A_D565_Opaque_Dither_SSE2, // S32A_D565_Opaque_Dither | 197 S32A_D565_Opaque_Dither_SSE2, // S32A_D565_Opaque_Dither |
104 nullptr, // S32A_D565_Blend_Dither | 198 nullptr, // S32A_D565_Blend_Dither |
105 }; | 199 }; |
106 | 200 |
107 SkBlitRow::Proc16 SkBlitRow::PlatformFactory565(unsigned flags) { | 201 SkBlitRow::Proc16 SkBlitRow::PlatformFactory565(unsigned flags) { |
108 if (SkCpu::Supports(SkCpu::SSE2)) { | 202 if (supports_simd(SK_CPU_SSE_LEVEL_SSE2)) { |
109 return platform_16_procs[flags]; | 203 return platform_16_procs[flags]; |
110 } else { | 204 } else { |
111 return nullptr; | 205 return nullptr; |
112 } | 206 } |
113 } | 207 } |
114 | 208 |
115 static const SkBlitRow::ColorProc16 platform_565_colorprocs_SSE2[] = { | 209 static const SkBlitRow::ColorProc16 platform_565_colorprocs_SSE2[] = { |
116 Color32A_D565_SSE2, // Color32A_D565, | 210 Color32A_D565_SSE2, // Color32A_D565, |
117 nullptr, // Color32A_D565_Dither | 211 nullptr, // Color32A_D565_Dither |
118 }; | 212 }; |
119 | 213 |
120 SkBlitRow::ColorProc16 SkBlitRow::PlatformColorFactory565(unsigned flags) { | 214 SkBlitRow::ColorProc16 SkBlitRow::PlatformColorFactory565(unsigned flags) { |
121 /* If you're thinking about writing an SSE4 version of this, do check it's | 215 /* If you're thinking about writing an SSE4 version of this, do check it's |
122 * actually faster on Atom. Our original SSE4 version was slower than this | 216 * actually faster on Atom. Our original SSE4 version was slower than this |
123 * SSE2 version on Silvermont, and only marginally faster on a Core i7, | 217 * SSE2 version on Silvermont, and only marginally faster on a Core i7, |
124 * mainly due to the MULLD timings. | 218 * mainly due to the MULLD timings. |
125 */ | 219 */ |
126 if (SkCpu::Supports(SkCpu::SSE2)) { | 220 if (supports_simd(SK_CPU_SSE_LEVEL_SSE2)) { |
127 return platform_565_colorprocs_SSE2[flags]; | 221 return platform_565_colorprocs_SSE2[flags]; |
128 } else { | 222 } else { |
129 return nullptr; | 223 return nullptr; |
130 } | 224 } |
131 } | 225 } |
132 | 226 |
133 static const SkBlitRow::Proc32 platform_32_procs_SSE2[] = { | 227 static const SkBlitRow::Proc32 platform_32_procs_SSE2[] = { |
134 nullptr, // S32_Opaque, | 228 nullptr, // S32_Opaque, |
135 S32_Blend_BlitRow32_SSE2, // S32_Blend, | 229 S32_Blend_BlitRow32_SSE2, // S32_Blend, |
136 nullptr, // Ported to SkOpts | 230 nullptr, // Ported to SkOpts |
137 S32A_Blend_BlitRow32_SSE2, // S32A_Blend, | 231 S32A_Blend_BlitRow32_SSE2, // S32A_Blend, |
138 }; | 232 }; |
139 | 233 |
140 SkBlitRow::Proc32 SkBlitRow::PlatformProcs32(unsigned flags) { | 234 SkBlitRow::Proc32 SkBlitRow::PlatformProcs32(unsigned flags) { |
141 if (SkCpu::Supports(SkCpu::SSE2)) { | 235 if (supports_simd(SK_CPU_SSE_LEVEL_SSE2)) { |
142 return platform_32_procs_SSE2[flags]; | 236 return platform_32_procs_SSE2[flags]; |
143 } else { | 237 } else { |
144 return nullptr; | 238 return nullptr; |
145 } | 239 } |
146 } | 240 } |
147 | 241 |
148 //////////////////////////////////////////////////////////////////////////////// | 242 //////////////////////////////////////////////////////////////////////////////// |
149 | 243 |
150 SkBlitMask::BlitLCD16RowProc SkBlitMask::PlatformBlitRowProcs16(bool isOpaque) { | 244 SkBlitMask::BlitLCD16RowProc SkBlitMask::PlatformBlitRowProcs16(bool isOpaque) { |
151 if (SkCpu::Supports(SkCpu::SSE2)) { | 245 if (supports_simd(SK_CPU_SSE_LEVEL_SSE2)) { |
152 if (isOpaque) { | 246 if (isOpaque) { |
153 return SkBlitLCD16OpaqueRow_SSE2; | 247 return SkBlitLCD16OpaqueRow_SSE2; |
154 } else { | 248 } else { |
155 return SkBlitLCD16Row_SSE2; | 249 return SkBlitLCD16Row_SSE2; |
156 } | 250 } |
157 } else { | 251 } else { |
158 return nullptr; | 252 return nullptr; |
159 } | 253 } |
160 | 254 |
161 } | 255 } |
162 | 256 |
163 SkBlitMask::RowProc SkBlitMask::PlatformRowProcs(SkColorType, SkMask::Format, Ro
wFlags) { | 257 SkBlitMask::RowProc SkBlitMask::PlatformRowProcs(SkColorType, SkMask::Format, Ro
wFlags) { |
164 return nullptr; | 258 return nullptr; |
165 } | 259 } |
OLD | NEW |