Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(225)

Side by Side Diff: src/opts/opts_check_x86.cpp

Issue 1890483002: Move CPU feature detection to its own file. (Closed) Base URL: https://skia.googlesource.com/skia.git@master
Patch Set: link Created 4 years, 8 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
« no previous file with comments | « src/core/SkUtilsArm.cpp ('k') | no next file » | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 /* 1 /*
2 * Copyright 2009 The Android Open Source Project 2 * Copyright 2009 The Android Open Source Project
3 * 3 *
4 * Use of this source code is governed by a BSD-style license that can be 4 * Use of this source code is governed by a BSD-style license that can be
5 * found in the LICENSE file. 5 * found in the LICENSE file.
6 */ 6 */
7 7
8 #include "SkBitmapFilter_opts_SSE2.h" 8 #include "SkBitmapFilter_opts_SSE2.h"
9 #include "SkBitmapProcState_opts_SSE2.h" 9 #include "SkBitmapProcState_opts_SSE2.h"
10 #include "SkBitmapProcState_opts_SSSE3.h" 10 #include "SkBitmapProcState_opts_SSSE3.h"
11 #include "SkBitmapScaler.h" 11 #include "SkBitmapScaler.h"
12 #include "SkBlitMask.h" 12 #include "SkBlitMask.h"
13 #include "SkBlitRow.h" 13 #include "SkBlitRow.h"
14 #include "SkBlitRow_opts_SSE2.h" 14 #include "SkBlitRow_opts_SSE2.h"
15 #include "SkCpu.h"
15 #include "SkOncePtr.h" 16 #include "SkOncePtr.h"
16 #include "SkRTConf.h" 17 #include "SkRTConf.h"
17 18
18 19
19 /* 20 /*
20 ***************************************** 21 *****************************************
21 *********This file is deprecated********* 22 *********This file is deprecated*********
22 ***************************************** 23 *****************************************
23 * New CPU-specific work should be done in 24 * New CPU-specific work should be done in
24 * SkOpts framework. Run-time detection of 25 * SkOpts framework. Run-time detection of
25 * available instruction set extensions is 26 * available instruction set extensions is
26 * implemented in src/core/SkOpts.cpp file 27 * implemented in src/core/SkOpts.cpp file
27 ***************************************** 28 *****************************************
28 */ 29 */
29 30
30 31
31 #if defined(_MSC_VER) && defined(_WIN64)
32 #include <intrin.h>
33 #endif
34
35 /* This file must *not* be compiled with -msse or any other optional SIMD 32 /* This file must *not* be compiled with -msse or any other optional SIMD
36 extension, otherwise gcc may generate SIMD instructions even for scalar ops 33 extension, otherwise gcc may generate SIMD instructions even for scalar ops
37 (and thus give an invalid instruction on Pentium3 on the code below). 34 (and thus give an invalid instruction on Pentium3 on the code below).
38 For example, only files named *_SSE2.cpp in this directory should be 35 For example, only files named *_SSE2.cpp in this directory should be
39 compiled with -msse2 or higher. */ 36 compiled with -msse2 or higher. */
40 37
41
42 /* Function to get the CPU SSE-level in runtime, for different compilers. */
43 #ifdef _MSC_VER
44 static inline void getcpuid(int info_type, int info[4]) {
45 #if defined(_WIN64)
46 __cpuid(info, info_type);
47 #else
48 __asm {
49 mov eax, [info_type]
50 cpuid
51 mov edi, [info]
52 mov [edi], eax
53 mov [edi+4], ebx
54 mov [edi+8], ecx
55 mov [edi+12], edx
56 }
57 #endif
58 }
59 #elif defined(__x86_64__)
60 static inline void getcpuid(int info_type, int info[4]) {
61 asm volatile (
62 "cpuid \n\t"
63 : "=a"(info[0]), "=b"(info[1]), "=c"(info[2]), "=d"(info[3])
64 : "a"(info_type)
65 );
66 }
67 #else
68 static inline void getcpuid(int info_type, int info[4]) {
69 // We save and restore ebx, so this code can be compatible with -fPIC
70 asm volatile (
71 "pushl %%ebx \n\t"
72 "cpuid \n\t"
73 "movl %%ebx, %1 \n\t"
74 "popl %%ebx \n\t"
75 : "=a"(info[0]), "=r"(info[1]), "=c"(info[2]), "=d"(info[3])
76 : "a"(info_type)
77 );
78 }
79 #endif
80
81 ////////////////////////////////////////////////////////////////////////////////
82
83 /* Fetch the SIMD level directly from the CPU, at run-time.
84 * Only checks the levels needed by the optimizations in this file.
85 */
86 static int* get_SIMD_level() {
87 int cpu_info[4] = { 0, 0, 0, 0 };
88 getcpuid(1, cpu_info);
89
90 int* level = new int;
91
92 if ((cpu_info[2] & (1<<20)) != 0) {
93 *level = SK_CPU_SSE_LEVEL_SSE42;
94 } else if ((cpu_info[2] & (1<<19)) != 0) {
95 *level = SK_CPU_SSE_LEVEL_SSE41;
96 } else if ((cpu_info[2] & (1<<9)) != 0) {
97 *level = SK_CPU_SSE_LEVEL_SSSE3;
98 } else if ((cpu_info[3] & (1<<26)) != 0) {
99 *level = SK_CPU_SSE_LEVEL_SSE2;
100 } else {
101 *level = 0;
102 }
103 return level;
104 }
105
106 SK_DECLARE_STATIC_ONCE_PTR(int, gSIMDLevel);
107
108 /* Verify that the requested SIMD level is supported in the build.
109 * If not, check if the platform supports it.
110 */
111 static inline bool supports_simd(int minLevel) {
112 #if defined(SK_CPU_SSE_LEVEL)
113 if (minLevel <= SK_CPU_SSE_LEVEL) {
114 return true;
115 } else
116 #endif
117 {
118 #if defined(SK_BUILD_FOR_ANDROID_FRAMEWORK)
119 /* For the Android framework we should always know at compile time if th e device
120 * we are building for supports SSSE3. The one exception to this rule i s on the
121 * emulator where we are compiled without the -mssse3 option (so we have no
122 * SSSE3 procs) but can be run on a host machine that supports SSSE3
123 * instructions. So for that particular case we disable our SSSE3 option s.
124 */
125 return false;
126 #else
127 return minLevel <= *gSIMDLevel.get(get_SIMD_level);
128 #endif
129 }
130 }
131
132 //////////////////////////////////////////////////////////////////////////////// 38 ////////////////////////////////////////////////////////////////////////////////
133 39
134 void SkBitmapScaler::PlatformConvolutionProcs(SkConvolutionProcs* procs) { 40 void SkBitmapScaler::PlatformConvolutionProcs(SkConvolutionProcs* procs) {
135 if (supports_simd(SK_CPU_SSE_LEVEL_SSE2)) { 41 if (SkCpu::Supports(SkCpu::SSE2)) {
136 procs->fExtraHorizontalReads = 3; 42 procs->fExtraHorizontalReads = 3;
137 procs->fConvolveVertically = &convolveVertically_SSE2; 43 procs->fConvolveVertically = &convolveVertically_SSE2;
138 procs->fConvolve4RowsHorizontally = &convolve4RowsHorizontally_SSE2; 44 procs->fConvolve4RowsHorizontally = &convolve4RowsHorizontally_SSE2;
139 procs->fConvolveHorizontally = &convolveHorizontally_SSE2; 45 procs->fConvolveHorizontally = &convolveHorizontally_SSE2;
140 procs->fApplySIMDPadding = &applySIMDPadding_SSE2; 46 procs->fApplySIMDPadding = &applySIMDPadding_SSE2;
141 } 47 }
142 } 48 }
143 49
144 //////////////////////////////////////////////////////////////////////////////// 50 ////////////////////////////////////////////////////////////////////////////////
145 51
146 void SkBitmapProcState::platformProcs() { 52 void SkBitmapProcState::platformProcs() {
147 /* Every optimization in the function requires at least SSE2 */ 53 /* Every optimization in the function requires at least SSE2 */
148 if (!supports_simd(SK_CPU_SSE_LEVEL_SSE2)) { 54 if (!SkCpu::Supports(SkCpu::SSE2)) {
149 return; 55 return;
150 } 56 }
151 const bool ssse3 = supports_simd(SK_CPU_SSE_LEVEL_SSSE3); 57 const bool ssse3 = SkCpu::Supports(SkCpu::SSSE3);
152 58
153 /* Check fSampleProc32 */ 59 /* Check fSampleProc32 */
154 if (fSampleProc32 == S32_opaque_D32_filter_DX) { 60 if (fSampleProc32 == S32_opaque_D32_filter_DX) {
155 if (ssse3) { 61 if (ssse3) {
156 fSampleProc32 = S32_opaque_D32_filter_DX_SSSE3; 62 fSampleProc32 = S32_opaque_D32_filter_DX_SSSE3;
157 } else { 63 } else {
158 fSampleProc32 = S32_opaque_D32_filter_DX_SSE2; 64 fSampleProc32 = S32_opaque_D32_filter_DX_SSE2;
159 } 65 }
160 } else if (fSampleProc32 == S32_opaque_D32_filter_DXDY) { 66 } else if (fSampleProc32 == S32_opaque_D32_filter_DXDY) {
161 if (ssse3) { 67 if (ssse3) {
(...skipping 30 matching lines...) Expand all
192 nullptr, // S32_D565_Blend 98 nullptr, // S32_D565_Blend
193 S32A_D565_Opaque_SSE2, // S32A_D565_Opaque 99 S32A_D565_Opaque_SSE2, // S32A_D565_Opaque
194 nullptr, // S32A_D565_Blend 100 nullptr, // S32A_D565_Blend
195 S32_D565_Opaque_Dither_SSE2, // S32_D565_Opaque_Dither 101 S32_D565_Opaque_Dither_SSE2, // S32_D565_Opaque_Dither
196 nullptr, // S32_D565_Blend_Dither 102 nullptr, // S32_D565_Blend_Dither
197 S32A_D565_Opaque_Dither_SSE2, // S32A_D565_Opaque_Dither 103 S32A_D565_Opaque_Dither_SSE2, // S32A_D565_Opaque_Dither
198 nullptr, // S32A_D565_Blend_Dither 104 nullptr, // S32A_D565_Blend_Dither
199 }; 105 };
200 106
201 SkBlitRow::Proc16 SkBlitRow::PlatformFactory565(unsigned flags) { 107 SkBlitRow::Proc16 SkBlitRow::PlatformFactory565(unsigned flags) {
202 if (supports_simd(SK_CPU_SSE_LEVEL_SSE2)) { 108 if (SkCpu::Supports(SkCpu::SSE2)) {
203 return platform_16_procs[flags]; 109 return platform_16_procs[flags];
204 } else { 110 } else {
205 return nullptr; 111 return nullptr;
206 } 112 }
207 } 113 }
208 114
209 static const SkBlitRow::ColorProc16 platform_565_colorprocs_SSE2[] = { 115 static const SkBlitRow::ColorProc16 platform_565_colorprocs_SSE2[] = {
210 Color32A_D565_SSE2, // Color32A_D565, 116 Color32A_D565_SSE2, // Color32A_D565,
211 nullptr, // Color32A_D565_Dither 117 nullptr, // Color32A_D565_Dither
212 }; 118 };
213 119
214 SkBlitRow::ColorProc16 SkBlitRow::PlatformColorFactory565(unsigned flags) { 120 SkBlitRow::ColorProc16 SkBlitRow::PlatformColorFactory565(unsigned flags) {
215 /* If you're thinking about writing an SSE4 version of this, do check it's 121 /* If you're thinking about writing an SSE4 version of this, do check it's
216 * actually faster on Atom. Our original SSE4 version was slower than this 122 * actually faster on Atom. Our original SSE4 version was slower than this
217 * SSE2 version on Silvermont, and only marginally faster on a Core i7, 123 * SSE2 version on Silvermont, and only marginally faster on a Core i7,
218 * mainly due to the MULLD timings. 124 * mainly due to the MULLD timings.
219 */ 125 */
220 if (supports_simd(SK_CPU_SSE_LEVEL_SSE2)) { 126 if (SkCpu::Supports(SkCpu::SSE2)) {
221 return platform_565_colorprocs_SSE2[flags]; 127 return platform_565_colorprocs_SSE2[flags];
222 } else { 128 } else {
223 return nullptr; 129 return nullptr;
224 } 130 }
225 } 131 }
226 132
227 static const SkBlitRow::Proc32 platform_32_procs_SSE2[] = { 133 static const SkBlitRow::Proc32 platform_32_procs_SSE2[] = {
228 nullptr, // S32_Opaque, 134 nullptr, // S32_Opaque,
229 S32_Blend_BlitRow32_SSE2, // S32_Blend, 135 S32_Blend_BlitRow32_SSE2, // S32_Blend,
230 nullptr, // Ported to SkOpts 136 nullptr, // Ported to SkOpts
231 S32A_Blend_BlitRow32_SSE2, // S32A_Blend, 137 S32A_Blend_BlitRow32_SSE2, // S32A_Blend,
232 }; 138 };
233 139
234 SkBlitRow::Proc32 SkBlitRow::PlatformProcs32(unsigned flags) { 140 SkBlitRow::Proc32 SkBlitRow::PlatformProcs32(unsigned flags) {
235 if (supports_simd(SK_CPU_SSE_LEVEL_SSE2)) { 141 if (SkCpu::Supports(SkCpu::SSE2)) {
236 return platform_32_procs_SSE2[flags]; 142 return platform_32_procs_SSE2[flags];
237 } else { 143 } else {
238 return nullptr; 144 return nullptr;
239 } 145 }
240 } 146 }
241 147
242 //////////////////////////////////////////////////////////////////////////////// 148 ////////////////////////////////////////////////////////////////////////////////
243 149
244 SkBlitMask::BlitLCD16RowProc SkBlitMask::PlatformBlitRowProcs16(bool isOpaque) { 150 SkBlitMask::BlitLCD16RowProc SkBlitMask::PlatformBlitRowProcs16(bool isOpaque) {
245 if (supports_simd(SK_CPU_SSE_LEVEL_SSE2)) { 151 if (SkCpu::Supports(SkCpu::SSE2)) {
246 if (isOpaque) { 152 if (isOpaque) {
247 return SkBlitLCD16OpaqueRow_SSE2; 153 return SkBlitLCD16OpaqueRow_SSE2;
248 } else { 154 } else {
249 return SkBlitLCD16Row_SSE2; 155 return SkBlitLCD16Row_SSE2;
250 } 156 }
251 } else { 157 } else {
252 return nullptr; 158 return nullptr;
253 } 159 }
254 160
255 } 161 }
256 162
257 SkBlitMask::RowProc SkBlitMask::PlatformRowProcs(SkColorType, SkMask::Format, Ro wFlags) { 163 SkBlitMask::RowProc SkBlitMask::PlatformRowProcs(SkColorType, SkMask::Format, Ro wFlags) {
258 return nullptr; 164 return nullptr;
259 } 165 }
OLDNEW
« no previous file with comments | « src/core/SkUtilsArm.cpp ('k') | no next file » | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698