OLD | NEW |
1 /* | 1 /* |
2 * Copyright 2009 The Android Open Source Project | 2 * Copyright 2009 The Android Open Source Project |
3 * | 3 * |
4 * Use of this source code is governed by a BSD-style license that can be | 4 * Use of this source code is governed by a BSD-style license that can be |
5 * found in the LICENSE file. | 5 * found in the LICENSE file. |
6 */ | 6 */ |
7 | 7 |
8 #include "SkBitmapFilter_opts_SSE2.h" | 8 #include "SkBitmapFilter_opts_SSE2.h" |
9 #include "SkBitmapProcState_opts_SSE2.h" | 9 #include "SkBitmapProcState_opts_SSE2.h" |
10 #include "SkBitmapProcState_opts_SSSE3.h" | 10 #include "SkBitmapProcState_opts_SSSE3.h" |
11 #include "SkBitmapScaler.h" | 11 #include "SkBitmapScaler.h" |
12 #include "SkBlitMask.h" | 12 #include "SkBlitMask.h" |
13 #include "SkBlitRow.h" | 13 #include "SkBlitRow.h" |
14 #include "SkBlitRow_opts_SSE2.h" | 14 #include "SkBlitRow_opts_SSE2.h" |
| 15 #include "SkCpu.h" |
15 #include "SkOncePtr.h" | 16 #include "SkOncePtr.h" |
16 #include "SkRTConf.h" | 17 #include "SkRTConf.h" |
17 | 18 |
18 | 19 |
19 /* | 20 /* |
20 ***************************************** | 21 ***************************************** |
21 *********This file is deprecated********* | 22 *********This file is deprecated********* |
22 ***************************************** | 23 ***************************************** |
23 * New CPU-specific work should be done in | 24 * New CPU-specific work should be done in |
24 * SkOpts framework. Run-time detection of | 25 * SkOpts framework. Run-time detection of |
25 * available instruction set extensions is | 26 * available instruction set extensions is |
26 * implemented in src/core/SkOpts.cpp file | 27 * implemented in src/core/SkOpts.cpp file |
27 ***************************************** | 28 ***************************************** |
28 */ | 29 */ |
29 | 30 |
30 | 31 |
31 #if defined(_MSC_VER) && defined(_WIN64) | |
32 #include <intrin.h> | |
33 #endif | |
34 | |
35 /* This file must *not* be compiled with -msse or any other optional SIMD | 32 /* This file must *not* be compiled with -msse or any other optional SIMD |
36 extension, otherwise gcc may generate SIMD instructions even for scalar ops | 33 extension, otherwise gcc may generate SIMD instructions even for scalar ops |
37 (and thus give an invalid instruction on Pentium3 on the code below). | 34 (and thus give an invalid instruction on Pentium3 on the code below). |
38 For example, only files named *_SSE2.cpp in this directory should be | 35 For example, only files named *_SSE2.cpp in this directory should be |
39 compiled with -msse2 or higher. */ | 36 compiled with -msse2 or higher. */ |
40 | 37 |
41 | |
42 /* Function to get the CPU SSE-level in runtime, for different compilers. */ | |
43 #ifdef _MSC_VER | |
44 static inline void getcpuid(int info_type, int info[4]) { | |
45 #if defined(_WIN64) | |
46 __cpuid(info, info_type); | |
47 #else | |
48 __asm { | |
49 mov eax, [info_type] | |
50 cpuid | |
51 mov edi, [info] | |
52 mov [edi], eax | |
53 mov [edi+4], ebx | |
54 mov [edi+8], ecx | |
55 mov [edi+12], edx | |
56 } | |
57 #endif | |
58 } | |
59 #elif defined(__x86_64__) | |
60 static inline void getcpuid(int info_type, int info[4]) { | |
61 asm volatile ( | |
62 "cpuid \n\t" | |
63 : "=a"(info[0]), "=b"(info[1]), "=c"(info[2]), "=d"(info[3]) | |
64 : "a"(info_type) | |
65 ); | |
66 } | |
67 #else | |
68 static inline void getcpuid(int info_type, int info[4]) { | |
69 // We save and restore ebx, so this code can be compatible with -fPIC | |
70 asm volatile ( | |
71 "pushl %%ebx \n\t" | |
72 "cpuid \n\t" | |
73 "movl %%ebx, %1 \n\t" | |
74 "popl %%ebx \n\t" | |
75 : "=a"(info[0]), "=r"(info[1]), "=c"(info[2]), "=d"(info[3]) | |
76 : "a"(info_type) | |
77 ); | |
78 } | |
79 #endif | |
80 | |
81 //////////////////////////////////////////////////////////////////////////////// | |
82 | |
83 /* Fetch the SIMD level directly from the CPU, at run-time. | |
84 * Only checks the levels needed by the optimizations in this file. | |
85 */ | |
86 static int* get_SIMD_level() { | |
87 int cpu_info[4] = { 0, 0, 0, 0 }; | |
88 getcpuid(1, cpu_info); | |
89 | |
90 int* level = new int; | |
91 | |
92 if ((cpu_info[2] & (1<<20)) != 0) { | |
93 *level = SK_CPU_SSE_LEVEL_SSE42; | |
94 } else if ((cpu_info[2] & (1<<19)) != 0) { | |
95 *level = SK_CPU_SSE_LEVEL_SSE41; | |
96 } else if ((cpu_info[2] & (1<<9)) != 0) { | |
97 *level = SK_CPU_SSE_LEVEL_SSSE3; | |
98 } else if ((cpu_info[3] & (1<<26)) != 0) { | |
99 *level = SK_CPU_SSE_LEVEL_SSE2; | |
100 } else { | |
101 *level = 0; | |
102 } | |
103 return level; | |
104 } | |
105 | |
106 SK_DECLARE_STATIC_ONCE_PTR(int, gSIMDLevel); | |
107 | |
108 /* Verify that the requested SIMD level is supported in the build. | |
109 * If not, check if the platform supports it. | |
110 */ | |
111 static inline bool supports_simd(int minLevel) { | |
112 #if defined(SK_CPU_SSE_LEVEL) | |
113 if (minLevel <= SK_CPU_SSE_LEVEL) { | |
114 return true; | |
115 } else | |
116 #endif | |
117 { | |
118 #if defined(SK_BUILD_FOR_ANDROID_FRAMEWORK) | |
119 /* For the Android framework we should always know at compile time if th
e device | |
120 * we are building for supports SSSE3. The one exception to this rule i
s on the | |
121 * emulator where we are compiled without the -mssse3 option (so we have
no | |
122 * SSSE3 procs) but can be run on a host machine that supports SSSE3 | |
123 * instructions. So for that particular case we disable our SSSE3 option
s. | |
124 */ | |
125 return false; | |
126 #else | |
127 return minLevel <= *gSIMDLevel.get(get_SIMD_level); | |
128 #endif | |
129 } | |
130 } | |
131 | |
132 //////////////////////////////////////////////////////////////////////////////// | 38 //////////////////////////////////////////////////////////////////////////////// |
133 | 39 |
134 void SkBitmapScaler::PlatformConvolutionProcs(SkConvolutionProcs* procs) { | 40 void SkBitmapScaler::PlatformConvolutionProcs(SkConvolutionProcs* procs) { |
135 if (supports_simd(SK_CPU_SSE_LEVEL_SSE2)) { | 41 if (SkCpu::Supports(SkCpu::SSE2)) { |
136 procs->fExtraHorizontalReads = 3; | 42 procs->fExtraHorizontalReads = 3; |
137 procs->fConvolveVertically = &convolveVertically_SSE2; | 43 procs->fConvolveVertically = &convolveVertically_SSE2; |
138 procs->fConvolve4RowsHorizontally = &convolve4RowsHorizontally_SSE2; | 44 procs->fConvolve4RowsHorizontally = &convolve4RowsHorizontally_SSE2; |
139 procs->fConvolveHorizontally = &convolveHorizontally_SSE2; | 45 procs->fConvolveHorizontally = &convolveHorizontally_SSE2; |
140 procs->fApplySIMDPadding = &applySIMDPadding_SSE2; | 46 procs->fApplySIMDPadding = &applySIMDPadding_SSE2; |
141 } | 47 } |
142 } | 48 } |
143 | 49 |
144 //////////////////////////////////////////////////////////////////////////////// | 50 //////////////////////////////////////////////////////////////////////////////// |
145 | 51 |
146 void SkBitmapProcState::platformProcs() { | 52 void SkBitmapProcState::platformProcs() { |
147 /* Every optimization in the function requires at least SSE2 */ | 53 /* Every optimization in the function requires at least SSE2 */ |
148 if (!supports_simd(SK_CPU_SSE_LEVEL_SSE2)) { | 54 if (!SkCpu::Supports(SkCpu::SSE2)) { |
149 return; | 55 return; |
150 } | 56 } |
151 const bool ssse3 = supports_simd(SK_CPU_SSE_LEVEL_SSSE3); | 57 const bool ssse3 = SkCpu::Supports(SkCpu::SSSE3); |
152 | 58 |
153 /* Check fSampleProc32 */ | 59 /* Check fSampleProc32 */ |
154 if (fSampleProc32 == S32_opaque_D32_filter_DX) { | 60 if (fSampleProc32 == S32_opaque_D32_filter_DX) { |
155 if (ssse3) { | 61 if (ssse3) { |
156 fSampleProc32 = S32_opaque_D32_filter_DX_SSSE3; | 62 fSampleProc32 = S32_opaque_D32_filter_DX_SSSE3; |
157 } else { | 63 } else { |
158 fSampleProc32 = S32_opaque_D32_filter_DX_SSE2; | 64 fSampleProc32 = S32_opaque_D32_filter_DX_SSE2; |
159 } | 65 } |
160 } else if (fSampleProc32 == S32_opaque_D32_filter_DXDY) { | 66 } else if (fSampleProc32 == S32_opaque_D32_filter_DXDY) { |
161 if (ssse3) { | 67 if (ssse3) { |
(...skipping 30 matching lines...) Expand all Loading... |
192 nullptr, // S32_D565_Blend | 98 nullptr, // S32_D565_Blend |
193 S32A_D565_Opaque_SSE2, // S32A_D565_Opaque | 99 S32A_D565_Opaque_SSE2, // S32A_D565_Opaque |
194 nullptr, // S32A_D565_Blend | 100 nullptr, // S32A_D565_Blend |
195 S32_D565_Opaque_Dither_SSE2, // S32_D565_Opaque_Dither | 101 S32_D565_Opaque_Dither_SSE2, // S32_D565_Opaque_Dither |
196 nullptr, // S32_D565_Blend_Dither | 102 nullptr, // S32_D565_Blend_Dither |
197 S32A_D565_Opaque_Dither_SSE2, // S32A_D565_Opaque_Dither | 103 S32A_D565_Opaque_Dither_SSE2, // S32A_D565_Opaque_Dither |
198 nullptr, // S32A_D565_Blend_Dither | 104 nullptr, // S32A_D565_Blend_Dither |
199 }; | 105 }; |
200 | 106 |
201 SkBlitRow::Proc16 SkBlitRow::PlatformFactory565(unsigned flags) { | 107 SkBlitRow::Proc16 SkBlitRow::PlatformFactory565(unsigned flags) { |
202 if (supports_simd(SK_CPU_SSE_LEVEL_SSE2)) { | 108 if (SkCpu::Supports(SkCpu::SSE2)) { |
203 return platform_16_procs[flags]; | 109 return platform_16_procs[flags]; |
204 } else { | 110 } else { |
205 return nullptr; | 111 return nullptr; |
206 } | 112 } |
207 } | 113 } |
208 | 114 |
209 static const SkBlitRow::ColorProc16 platform_565_colorprocs_SSE2[] = { | 115 static const SkBlitRow::ColorProc16 platform_565_colorprocs_SSE2[] = { |
210 Color32A_D565_SSE2, // Color32A_D565, | 116 Color32A_D565_SSE2, // Color32A_D565, |
211 nullptr, // Color32A_D565_Dither | 117 nullptr, // Color32A_D565_Dither |
212 }; | 118 }; |
213 | 119 |
214 SkBlitRow::ColorProc16 SkBlitRow::PlatformColorFactory565(unsigned flags) { | 120 SkBlitRow::ColorProc16 SkBlitRow::PlatformColorFactory565(unsigned flags) { |
215 /* If you're thinking about writing an SSE4 version of this, do check it's | 121 /* If you're thinking about writing an SSE4 version of this, do check it's |
216 * actually faster on Atom. Our original SSE4 version was slower than this | 122 * actually faster on Atom. Our original SSE4 version was slower than this |
217 * SSE2 version on Silvermont, and only marginally faster on a Core i7, | 123 * SSE2 version on Silvermont, and only marginally faster on a Core i7, |
218 * mainly due to the MULLD timings. | 124 * mainly due to the MULLD timings. |
219 */ | 125 */ |
220 if (supports_simd(SK_CPU_SSE_LEVEL_SSE2)) { | 126 if (SkCpu::Supports(SkCpu::SSE2)) { |
221 return platform_565_colorprocs_SSE2[flags]; | 127 return platform_565_colorprocs_SSE2[flags]; |
222 } else { | 128 } else { |
223 return nullptr; | 129 return nullptr; |
224 } | 130 } |
225 } | 131 } |
226 | 132 |
227 static const SkBlitRow::Proc32 platform_32_procs_SSE2[] = { | 133 static const SkBlitRow::Proc32 platform_32_procs_SSE2[] = { |
228 nullptr, // S32_Opaque, | 134 nullptr, // S32_Opaque, |
229 S32_Blend_BlitRow32_SSE2, // S32_Blend, | 135 S32_Blend_BlitRow32_SSE2, // S32_Blend, |
230 nullptr, // Ported to SkOpts | 136 nullptr, // Ported to SkOpts |
231 S32A_Blend_BlitRow32_SSE2, // S32A_Blend, | 137 S32A_Blend_BlitRow32_SSE2, // S32A_Blend, |
232 }; | 138 }; |
233 | 139 |
234 SkBlitRow::Proc32 SkBlitRow::PlatformProcs32(unsigned flags) { | 140 SkBlitRow::Proc32 SkBlitRow::PlatformProcs32(unsigned flags) { |
235 if (supports_simd(SK_CPU_SSE_LEVEL_SSE2)) { | 141 if (SkCpu::Supports(SkCpu::SSE2)) { |
236 return platform_32_procs_SSE2[flags]; | 142 return platform_32_procs_SSE2[flags]; |
237 } else { | 143 } else { |
238 return nullptr; | 144 return nullptr; |
239 } | 145 } |
240 } | 146 } |
241 | 147 |
242 //////////////////////////////////////////////////////////////////////////////// | 148 //////////////////////////////////////////////////////////////////////////////// |
243 | 149 |
244 SkBlitMask::BlitLCD16RowProc SkBlitMask::PlatformBlitRowProcs16(bool isOpaque) { | 150 SkBlitMask::BlitLCD16RowProc SkBlitMask::PlatformBlitRowProcs16(bool isOpaque) { |
245 if (supports_simd(SK_CPU_SSE_LEVEL_SSE2)) { | 151 if (SkCpu::Supports(SkCpu::SSE2)) { |
246 if (isOpaque) { | 152 if (isOpaque) { |
247 return SkBlitLCD16OpaqueRow_SSE2; | 153 return SkBlitLCD16OpaqueRow_SSE2; |
248 } else { | 154 } else { |
249 return SkBlitLCD16Row_SSE2; | 155 return SkBlitLCD16Row_SSE2; |
250 } | 156 } |
251 } else { | 157 } else { |
252 return nullptr; | 158 return nullptr; |
253 } | 159 } |
254 | 160 |
255 } | 161 } |
256 | 162 |
257 SkBlitMask::RowProc SkBlitMask::PlatformRowProcs(SkColorType, SkMask::Format, Ro
wFlags) { | 163 SkBlitMask::RowProc SkBlitMask::PlatformRowProcs(SkColorType, SkMask::Format, Ro
wFlags) { |
258 return nullptr; | 164 return nullptr; |
259 } | 165 } |
OLD | NEW |