Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(834)

Unified Diff: src/opts/opts_check_x86.cpp

Issue 277593004: Revert of Improved x86 SSE build and run-time checks. (Closed) Base URL: https://skia.googlesource.com/skia.git@master
Patch Set: Created 6 years, 7 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View side-by-side diff with in-line comments
Download patch
« no previous file with comments | « include/core/SkPreConfig.h ('k') | no next file » | no next file with comments »
Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
Index: src/opts/opts_check_x86.cpp
diff --git a/src/opts/opts_check_x86.cpp b/src/opts/opts_check_x86.cpp
index d6142931912f5ec1a07ffca2fb32b105d93a277d..0b0debb2708a80bd00cdc8e68b377917935f508b 100644
--- a/src/opts/opts_check_x86.cpp
+++ b/src/opts/opts_check_x86.cpp
@@ -25,11 +25,10 @@
#include <intrin.h>
#endif
-/* This file must *not* be compiled with -msse or any other optional SIMD
- extension, otherwise gcc may generate SIMD instructions even for scalar ops
- (and thus give an invalid instruction on Pentium3 on the code below).
- For example, only files named *_SSE2.cpp in this directory should be
- compiled with -msse2 or higher. */
+/* This file must *not* be compiled with -msse or -msse2, otherwise
+ gcc may generate sse2 even for scalar ops (and thus give an invalid
+ instruction on Pentium3 on the code below). Only files named *_SSE2.cpp
+ in this directory should be compiled with -msse2. */
/* Function to get the CPU SSE-level in runtime, for different compilers. */
@@ -49,7 +48,8 @@
}
#endif
}
-#elif defined(__x86_64__)
+#else
+#if defined(__x86_64__)
static inline void getcpuid(int info_type, int info[4]) {
asm volatile (
"cpuid \n\t"
@@ -70,47 +70,56 @@
);
}
#endif
-
-////////////////////////////////////////////////////////////////////////////////
-
-/* Fetch the SIMD level directly from the CPU, at run-time.
- * Only checks the levels needed by the optimizations in this file.
+#endif
+
+////////////////////////////////////////////////////////////////////////////////
+
+#if defined(__x86_64__) || defined(_WIN64) || SK_CPU_SSE_LEVEL >= SK_CPU_SSE_LEVEL_SSE2
+/* All x86_64 machines have SSE2, or we know it's supported at compile time, so don't even bother checking. */
+static inline bool hasSSE2() {
+ return true;
+}
+#else
+
+static inline bool hasSSE2() {
+ int cpu_info[4] = { 0 };
+ getcpuid(1, cpu_info);
+ return (cpu_info[3] & (1<<26)) != 0;
+}
+#endif
+
+#if SK_CPU_SSE_LEVEL >= SK_CPU_SSE_LEVEL_SSSE3
+/* If we know SSSE3 is supported at compile time, don't even bother checking. */
+static inline bool hasSSSE3() {
+ return true;
+}
+#elif defined(SK_BUILD_FOR_ANDROID_FRAMEWORK)
+/* For the Android framework we should always know at compile time if the device
+ * we are building for supports SSSE3. The one exception to this rule is on the
+ * emulator where we are compiled without the -msse3 option (so we have no SSSE3
+ * procs) but can be run on a host machine that supports SSSE3 instructions. So
+ * for that particular case we disable our SSSE3 options.
*/
-static int get_SIMD_level() {
+static inline bool hasSSSE3() {
+ return false;
+}
+#else
+
+static inline bool hasSSSE3() {
int cpu_info[4] = { 0 };
-
getcpuid(1, cpu_info);
- if ((cpu_info[2] & (1<<20)) != 0) {
- return SK_CPU_SSE_LEVEL_SSE42;
- } else if ((cpu_info[2] & (1<<9)) != 0) {
- return SK_CPU_SSE_LEVEL_SSSE3;
- } else if ((cpu_info[3] & (1<<26)) != 0) {
- return SK_CPU_SSE_LEVEL_SSE2;
- } else {
- return 0;
- }
-}
-
-/* Verify that the requested SIMD level exists in the build.
- * If not, check if the platform supports it.
- */
-static inline bool supports_simd(int minLevel) {
- if (minLevel <= SK_CPU_SSE_LEVEL) {
- return true;
- } else {
-#if defined(SK_BUILD_FOR_ANDROID_FRAMEWORK)
- /* For the Android framework we should always know at compile time if the device
- * we are building for supports SSSE3. The one exception to this rule is on the
- * emulator where we are compiled without the -mssse3 option (so we have no
- * SSSE3 procs) but can be run on a host machine that supports SSSE3
- * instructions. So for that particular case we disable our SSSE3 options.
- */
- return false;
-#else
- static int gSIMDLevel = get_SIMD_level();
- return (minLevel <= gSIMDLevel);
-#endif
- }
+ return (cpu_info[2] & 0x200) != 0;
+}
+#endif
+
+static bool cachedHasSSE2() {
+ static bool gHasSSE2 = hasSSE2();
+ return gHasSSE2;
+}
+
+static bool cachedHasSSSE3() {
+ static bool gHasSSSE3 = hasSSSE3();
+ return gHasSSSE3;
}
////////////////////////////////////////////////////////////////////////////////
@@ -118,7 +127,7 @@
SK_CONF_DECLARE( bool, c_hqfilter_sse, "bitmap.filter.highQualitySSE", false, "Use SSE optimized version of high quality image filters");
void SkBitmapProcState::platformConvolutionProcs(SkConvolutionProcs* procs) {
- if (supports_simd(SK_CPU_SSE_LEVEL_SSE2)) {
+ if (cachedHasSSE2()) {
procs->fExtraHorizontalReads = 3;
procs->fConvolveVertically = &convolveVertically_SSE2;
procs->fConvolve4RowsHorizontally = &convolve4RowsHorizontally_SSE2;
@@ -131,29 +140,29 @@
void SkBitmapProcState::platformProcs() {
/* Every optimization in the function requires at least SSE2 */
- if (!supports_simd(SK_CPU_SSE_LEVEL_SSE2)) {
+ if (!cachedHasSSE2()) {
return;
}
/* Check fSampleProc32 */
if (fSampleProc32 == S32_opaque_D32_filter_DX) {
- if (supports_simd(SK_CPU_SSE_LEVEL_SSSE3)) {
+ if (cachedHasSSSE3()) {
fSampleProc32 = S32_opaque_D32_filter_DX_SSSE3;
} else {
fSampleProc32 = S32_opaque_D32_filter_DX_SSE2;
}
} else if (fSampleProc32 == S32_opaque_D32_filter_DXDY) {
- if (supports_simd(SK_CPU_SSE_LEVEL_SSSE3)) {
+ if (cachedHasSSSE3()) {
fSampleProc32 = S32_opaque_D32_filter_DXDY_SSSE3;
}
} else if (fSampleProc32 == S32_alpha_D32_filter_DX) {
- if (supports_simd(SK_CPU_SSE_LEVEL_SSSE3)) {
+ if (cachedHasSSSE3()) {
fSampleProc32 = S32_alpha_D32_filter_DX_SSSE3;
} else {
fSampleProc32 = S32_alpha_D32_filter_DX_SSE2;
}
} else if (fSampleProc32 == S32_alpha_D32_filter_DXDY) {
- if (supports_simd(SK_CPU_SSE_LEVEL_SSSE3)) {
+ if (cachedHasSSSE3()) {
fSampleProc32 = S32_alpha_D32_filter_DXDY_SSSE3;
}
}
@@ -196,7 +205,7 @@
};
SkBlitRow::Proc SkBlitRow::PlatformProcs565(unsigned flags) {
- if (supports_simd(SK_CPU_SSE_LEVEL_SSE2)) {
+ if (cachedHasSSE2()) {
return platform_16_procs[flags];
} else {
return NULL;
@@ -211,7 +220,7 @@
};
SkBlitRow::Proc32 SkBlitRow::PlatformProcs32(unsigned flags) {
- if (supports_simd(SK_CPU_SSE_LEVEL_SSE2)) {
+ if (cachedHasSSE2()) {
return platform_32_procs[flags];
} else {
return NULL;
@@ -219,7 +228,7 @@
}
SkBlitRow::ColorProc SkBlitRow::PlatformColorProc() {
- if (supports_simd(SK_CPU_SSE_LEVEL_SSE2)) {
+ if (cachedHasSSE2()) {
return Color32_SSE2;
} else {
return NULL;
@@ -230,7 +239,7 @@
SkBlitRow::ColorRectProc PlatformColorRectProcFactory() {
/* Return NULL for now, since the optimized path in ColorRect32_SSE2 is disabled.
- if (supports_simd(SK_CPU_SSE_LEVEL_SSE2)) {
+ if (cachedHasSSE2()) {
return ColorRect32_SSE2;
} else {
return NULL;
@@ -249,7 +258,7 @@
}
ColorProc proc = NULL;
- if (supports_simd(SK_CPU_SSE_LEVEL_SSE2)) {
+ if (cachedHasSSE2()) {
switch (dstConfig) {
case SkBitmap::kARGB_8888_Config:
// The SSE2 version is not (yet) faster for black, so we check
@@ -266,7 +275,7 @@
}
SkBlitMask::BlitLCD16RowProc SkBlitMask::PlatformBlitRowProcs16(bool isOpaque) {
- if (supports_simd(SK_CPU_SSE_LEVEL_SSE2)) {
+ if (cachedHasSSE2()) {
if (isOpaque) {
return SkBlitLCD16OpaqueRow_SSE2;
} else {
@@ -287,7 +296,7 @@
////////////////////////////////////////////////////////////////////////////////
SkMemset16Proc SkMemset16GetPlatformProc() {
- if (supports_simd(SK_CPU_SSE_LEVEL_SSE2)) {
+ if (cachedHasSSE2()) {
return sk_memset16_SSE2;
} else {
return NULL;
@@ -295,7 +304,7 @@
}
SkMemset32Proc SkMemset32GetPlatformProc() {
- if (supports_simd(SK_CPU_SSE_LEVEL_SSE2)) {
+ if (cachedHasSSE2()) {
return sk_memset32_SSE2;
} else {
return NULL;
@@ -305,7 +314,7 @@
////////////////////////////////////////////////////////////////////////////////
SkMorphologyImageFilter::Proc SkMorphologyGetPlatformProc(SkMorphologyProcType type) {
- if (!supports_simd(SK_CPU_SSE_LEVEL_SSE2)) {
+ if (!cachedHasSSE2()) {
return NULL;
}
switch (type) {
@@ -331,7 +340,7 @@
#ifdef SK_DISABLE_BLUR_DIVISION_OPTIMIZATION
return false;
#else
- if (!supports_simd(SK_CPU_SSE_LEVEL_SSE2)) {
+ if (!cachedHasSSE2()) {
return false;
}
return SkBoxBlurGetPlatformProcs_SSE2(boxBlurX, boxBlurY, boxBlurXY, boxBlurYX);
@@ -356,7 +365,7 @@
SkProcCoeffXfermode* SkPlatformXfermodeFactory(const ProcCoeff& rec,
SkXfermode::Mode mode) {
- if (supports_simd(SK_CPU_SSE_LEVEL_SSE2)) {
+ if (cachedHasSSE2()) {
return SkPlatformXfermodeFactory_impl_SSE2(rec, mode);
} else {
return SkPlatformXfermodeFactory_impl(rec, mode);
« no previous file with comments | « include/core/SkPreConfig.h ('k') | no next file » | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698