Index: source/libvpx/vpx_ports/x86.h |
diff --git a/source/libvpx/vpx_ports/x86.h b/source/libvpx/vpx_ports/x86.h |
index 81c2b8b873f34c1814a9f7baad04da0d03ad625d..2a3ebbe22e8d942f3ef30c2a18799f57061a4c6a 100644 |
--- a/source/libvpx/vpx_ports/x86.h |
+++ b/source/libvpx/vpx_ports/x86.h |
@@ -13,6 +13,7 @@ |
#define VPX_PORTS_X86_H_ |
#include <stdlib.h> |
#include "vpx_config.h" |
+#include "vpx/vpx_integer.h" |
#ifdef __cplusplus |
extern "C" { |
@@ -104,6 +105,37 @@ void __cpuid(int CPUInfo[4], int info_type); |
#endif |
#endif /* end others */ |
+// NaCl has no support for xgetbv or the raw opcode. |
+#if !defined(__native_client__) && (defined(__i386__) || defined(__x86_64__)) |
+static INLINE uint64_t xgetbv(void) { |
+ const uint32_t ecx = 0; |
+ uint32_t eax, edx; |
+ // Use the raw opcode for xgetbv for compatibility with older toolchains. |
+ __asm__ volatile ( |
+ ".byte 0x0f, 0x01, 0xd0\n" |
+ : "=a"(eax), "=d"(edx) : "c" (ecx)); |
+ return ((uint64_t)edx << 32) | eax; |
+} |
+#elif (defined(_M_X64) || defined(_M_IX86)) && \ |
+ defined(_MSC_FULL_VER) && _MSC_FULL_VER >= 160040219 // >= VS2010 SP1 |
+#include <immintrin.h> |
+#define xgetbv() _xgetbv(0) |
+#elif defined(_MSC_VER) && defined(_M_IX86) |
+static INLINE uint64_t xgetbv(void) { |
+ uint32_t eax_, edx_; |
+ __asm { |
+ xor ecx, ecx // ecx = 0 |
+ // Use the raw opcode for xgetbv for compatibility with older toolchains. |
+ __asm _emit 0x0f __asm _emit 0x01 __asm _emit 0xd0 |
+ mov eax_, eax |
+ mov edx_, edx |
+ } |
+ return ((uint64_t)edx_ << 32) | eax_; |
+} |
+#else |
+#define xgetbv() 0U // no AVX for older x64 or unrecognized toolchains. |
+#endif |
+ |
#define HAS_MMX 0x01 |
#define HAS_SSE 0x02 |
#define HAS_SSE2 0x04 |
@@ -156,14 +188,17 @@ x86_simd_caps(void) { |
if (reg_ecx & BIT(19)) flags |= HAS_SSE4_1; |
- if (reg_ecx & BIT(28)) flags |= HAS_AVX; |
+ // bits 27 (OSXSAVE) & 28 (256-bit AVX) |
+ if (reg_ecx & (BIT(27) | BIT(28))) { |
+ if ((xgetbv() & 0x6) == 0x6) { |
+ flags |= HAS_AVX; |
- /* Get the leaf 7 feature flags. Needed to check for AVX2 support */ |
- reg_eax = 7; |
- reg_ecx = 0; |
- cpuid(7, 0, reg_eax, reg_ebx, reg_ecx, reg_edx); |
+ /* Get the leaf 7 feature flags. Needed to check for AVX2 support */ |
+ cpuid(7, 0, reg_eax, reg_ebx, reg_ecx, reg_edx); |
- if (reg_ebx & BIT(5)) flags |= HAS_AVX2; |
+ if (reg_ebx & BIT(5)) flags |= HAS_AVX2; |
+ } |
+ } |
return flags & mask; |
} |