OLD | NEW |
1 /* | 1 /* |
2 * Copyright (c) 2010 The WebM project authors. All Rights Reserved. | 2 * Copyright (c) 2010 The WebM project authors. All Rights Reserved. |
3 * | 3 * |
4 * Use of this source code is governed by a BSD-style license | 4 * Use of this source code is governed by a BSD-style license |
5 * that can be found in the LICENSE file in the root of the source | 5 * that can be found in the LICENSE file in the root of the source |
6 * tree. An additional intellectual property rights grant can be found | 6 * tree. An additional intellectual property rights grant can be found |
7 * in the file PATENTS. All contributing project authors may | 7 * in the file PATENTS. All contributing project authors may |
8 * be found in the AUTHORS file in the root of the source tree. | 8 * be found in the AUTHORS file in the root of the source tree. |
9 */ | 9 */ |
10 | 10 |
11 | 11 |
12 #ifndef VPX_PORTS_X86_H_ | 12 #ifndef VPX_PORTS_X86_H_ |
13 #define VPX_PORTS_X86_H_ | 13 #define VPX_PORTS_X86_H_ |
14 #include <stdlib.h> | 14 #include <stdlib.h> |
15 #include "vpx_config.h" | 15 #include "vpx_config.h" |
| 16 #include "vpx/vpx_integer.h" |
16 | 17 |
17 #ifdef __cplusplus | 18 #ifdef __cplusplus |
18 extern "C" { | 19 extern "C" { |
19 #endif | 20 #endif |
20 | 21 |
21 typedef enum { | 22 typedef enum { |
22 VPX_CPU_UNKNOWN = -1, | 23 VPX_CPU_UNKNOWN = -1, |
23 VPX_CPU_AMD, | 24 VPX_CPU_AMD, |
24 VPX_CPU_AMD_OLD, | 25 VPX_CPU_AMD_OLD, |
25 VPX_CPU_CENTAUR, | 26 VPX_CPU_CENTAUR, |
(...skipping 71 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
97 __asm mov eax, func\ | 98 __asm mov eax, func\ |
98 __asm mov ecx, func2\ | 99 __asm mov ecx, func2\ |
99 __asm cpuid\ | 100 __asm cpuid\ |
100 __asm mov a, eax\ | 101 __asm mov a, eax\ |
101 __asm mov b, ebx\ | 102 __asm mov b, ebx\ |
102 __asm mov c, ecx\ | 103 __asm mov c, ecx\ |
103 __asm mov d, edx | 104 __asm mov d, edx |
104 #endif | 105 #endif |
105 #endif /* end others */ | 106 #endif /* end others */ |
106 | 107 |
| 108 // NaCl has no support for xgetbv or the raw opcode. |
| 109 #if !defined(__native_client__) && (defined(__i386__) || defined(__x86_64__)) |
| 110 static INLINE uint64_t xgetbv(void) { |
| 111 const uint32_t ecx = 0; |
| 112 uint32_t eax, edx; |
| 113 // Use the raw opcode for xgetbv for compatibility with older toolchains. |
| 114 __asm__ volatile ( |
| 115 ".byte 0x0f, 0x01, 0xd0\n" |
| 116 : "=a"(eax), "=d"(edx) : "c" (ecx)); |
| 117 return ((uint64_t)edx << 32) | eax; |
| 118 } |
| 119 #elif (defined(_M_X64) || defined(_M_IX86)) && \ |
| 120 defined(_MSC_FULL_VER) && _MSC_FULL_VER >= 160040219 // >= VS2010 SP1 |
| 121 #include <immintrin.h> |
| 122 #define xgetbv() _xgetbv(0) |
| 123 #elif defined(_MSC_VER) && defined(_M_IX86) |
| 124 static INLINE uint64_t xgetbv(void) { |
| 125 uint32_t eax_, edx_; |
| 126 __asm { |
| 127 xor ecx, ecx // ecx = 0 |
| 128 // Use the raw opcode for xgetbv for compatibility with older toolchains. |
| 129 __asm _emit 0x0f __asm _emit 0x01 __asm _emit 0xd0 |
| 130 mov eax_, eax |
| 131 mov edx_, edx |
| 132 } |
| 133 return ((uint64_t)edx_ << 32) | eax_; |
| 134 } |
| 135 #else |
| 136 #define xgetbv() 0U // no AVX for older x64 or unrecognized toolchains. |
| 137 #endif |
| 138 |
107 #define HAS_MMX 0x01 | 139 #define HAS_MMX 0x01 |
108 #define HAS_SSE 0x02 | 140 #define HAS_SSE 0x02 |
109 #define HAS_SSE2 0x04 | 141 #define HAS_SSE2 0x04 |
110 #define HAS_SSE3 0x08 | 142 #define HAS_SSE3 0x08 |
111 #define HAS_SSSE3 0x10 | 143 #define HAS_SSSE3 0x10 |
112 #define HAS_SSE4_1 0x20 | 144 #define HAS_SSE4_1 0x20 |
113 #define HAS_AVX 0x40 | 145 #define HAS_AVX 0x40 |
114 #define HAS_AVX2 0x80 | 146 #define HAS_AVX2 0x80 |
115 #ifndef BIT | 147 #ifndef BIT |
116 #define BIT(n) (1<<n) | 148 #define BIT(n) (1<<n) |
(...skipping 32 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
149 if (reg_edx & BIT(25)) flags |= HAS_SSE; /* aka xmm */ | 181 if (reg_edx & BIT(25)) flags |= HAS_SSE; /* aka xmm */ |
150 | 182 |
151 if (reg_edx & BIT(26)) flags |= HAS_SSE2; /* aka wmt */ | 183 if (reg_edx & BIT(26)) flags |= HAS_SSE2; /* aka wmt */ |
152 | 184 |
153 if (reg_ecx & BIT(0)) flags |= HAS_SSE3; | 185 if (reg_ecx & BIT(0)) flags |= HAS_SSE3; |
154 | 186 |
155 if (reg_ecx & BIT(9)) flags |= HAS_SSSE3; | 187 if (reg_ecx & BIT(9)) flags |= HAS_SSSE3; |
156 | 188 |
157 if (reg_ecx & BIT(19)) flags |= HAS_SSE4_1; | 189 if (reg_ecx & BIT(19)) flags |= HAS_SSE4_1; |
158 | 190 |
159 if (reg_ecx & BIT(28)) flags |= HAS_AVX; | 191 // bits 27 (OSXSAVE) & 28 (256-bit AVX) |
| 192 if (reg_ecx & (BIT(27) | BIT(28))) { |
| 193 if ((xgetbv() & 0x6) == 0x6) { |
| 194 flags |= HAS_AVX; |
160 | 195 |
161 /* Get the leaf 7 feature flags. Needed to check for AVX2 support */ | 196 /* Get the leaf 7 feature flags. Needed to check for AVX2 support */ |
162 reg_eax = 7; | 197 cpuid(7, 0, reg_eax, reg_ebx, reg_ecx, reg_edx); |
163 reg_ecx = 0; | |
164 cpuid(7, 0, reg_eax, reg_ebx, reg_ecx, reg_edx); | |
165 | 198 |
166 if (reg_ebx & BIT(5)) flags |= HAS_AVX2; | 199 if (reg_ebx & BIT(5)) flags |= HAS_AVX2; |
| 200 } |
| 201 } |
167 | 202 |
168 return flags & mask; | 203 return flags & mask; |
169 } | 204 } |
170 | 205 |
171 #if ARCH_X86_64 && defined(_MSC_VER) | 206 #if ARCH_X86_64 && defined(_MSC_VER) |
172 unsigned __int64 __rdtsc(void); | 207 unsigned __int64 __rdtsc(void); |
173 #pragma intrinsic(__rdtsc) | 208 #pragma intrinsic(__rdtsc) |
174 #endif | 209 #endif |
175 static INLINE unsigned int | 210 static INLINE unsigned int |
176 x86_readtsc(void) { | 211 x86_readtsc(void) { |
(...skipping 80 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
257 } | 292 } |
258 | 293 |
259 | 294 |
260 extern void vpx_reset_mmx_state(void); | 295 extern void vpx_reset_mmx_state(void); |
261 | 296 |
262 #ifdef __cplusplus | 297 #ifdef __cplusplus |
263 } // extern "C" | 298 } // extern "C" |
264 #endif | 299 #endif |
265 | 300 |
266 #endif // VPX_PORTS_X86_H_ | 301 #endif // VPX_PORTS_X86_H_ |
OLD | NEW |