OLD | NEW |
1 /* | 1 /* |
2 * Copyright (c) 2010 The WebM project authors. All Rights Reserved. | 2 * Copyright (c) 2010 The WebM project authors. All Rights Reserved. |
3 * | 3 * |
4 * Use of this source code is governed by a BSD-style license | 4 * Use of this source code is governed by a BSD-style license |
5 * that can be found in the LICENSE file in the root of the source | 5 * that can be found in the LICENSE file in the root of the source |
6 * tree. An additional intellectual property rights grant can be found | 6 * tree. An additional intellectual property rights grant can be found |
7 * in the file PATENTS. All contributing project authors may | 7 * in the file PATENTS. All contributing project authors may |
8 * be found in the AUTHORS file in the root of the source tree. | 8 * be found in the AUTHORS file in the root of the source tree. |
9 */ | 9 */ |
10 | 10 |
11 | 11 |
12 #ifndef VPX_PORTS_X86_H_ | 12 #ifndef VPX_PORTS_X86_H_ |
13 #define VPX_PORTS_X86_H_ | 13 #define VPX_PORTS_X86_H_ |
14 #include <stdlib.h> | 14 #include <stdlib.h> |
15 #include "vpx_config.h" | 15 #include "vpx_config.h" |
16 #include "vpx/vpx_integer.h" | |
17 | 16 |
18 #ifdef __cplusplus | 17 #ifdef __cplusplus |
19 extern "C" { | 18 extern "C" { |
20 #endif | 19 #endif |
21 | 20 |
22 typedef enum { | 21 typedef enum { |
23 VPX_CPU_UNKNOWN = -1, | 22 VPX_CPU_UNKNOWN = -1, |
24 VPX_CPU_AMD, | 23 VPX_CPU_AMD, |
25 VPX_CPU_AMD_OLD, | 24 VPX_CPU_AMD_OLD, |
26 VPX_CPU_CENTAUR, | 25 VPX_CPU_CENTAUR, |
(...skipping 71 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
98 __asm mov eax, func\ | 97 __asm mov eax, func\ |
99 __asm mov ecx, func2\ | 98 __asm mov ecx, func2\ |
100 __asm cpuid\ | 99 __asm cpuid\ |
101 __asm mov a, eax\ | 100 __asm mov a, eax\ |
102 __asm mov b, ebx\ | 101 __asm mov b, ebx\ |
103 __asm mov c, ecx\ | 102 __asm mov c, ecx\ |
104 __asm mov d, edx | 103 __asm mov d, edx |
105 #endif | 104 #endif |
106 #endif /* end others */ | 105 #endif /* end others */ |
107 | 106 |
108 // NaCl has no support for xgetbv or the raw opcode. | |
109 #if !defined(__native_client__) && (defined(__i386__) || defined(__x86_64__)) | |
110 static INLINE uint64_t xgetbv(void) { | |
111 const uint32_t ecx = 0; | |
112 uint32_t eax, edx; | |
113 // Use the raw opcode for xgetbv for compatibility with older toolchains. | |
114 __asm__ volatile ( | |
115 ".byte 0x0f, 0x01, 0xd0\n" | |
116 : "=a"(eax), "=d"(edx) : "c" (ecx)); | |
117 return ((uint64_t)edx << 32) | eax; | |
118 } | |
119 #elif (defined(_M_X64) || defined(_M_IX86)) && \ | |
120 defined(_MSC_FULL_VER) && _MSC_FULL_VER >= 160040219 // >= VS2010 SP1 | |
121 #include <immintrin.h> | |
122 #define xgetbv() _xgetbv(0) | |
123 #elif defined(_MSC_VER) && defined(_M_IX86) | |
124 static INLINE uint64_t xgetbv(void) { | |
125 uint32_t eax_, edx_; | |
126 __asm { | |
127 xor ecx, ecx // ecx = 0 | |
128 // Use the raw opcode for xgetbv for compatibility with older toolchains. | |
129 __asm _emit 0x0f __asm _emit 0x01 __asm _emit 0xd0 | |
130 mov eax_, eax | |
131 mov edx_, edx | |
132 } | |
133 return ((uint64_t)edx_ << 32) | eax_; | |
134 } | |
135 #else | |
136 #define xgetbv() 0U // no AVX for older x64 or unrecognized toolchains. | |
137 #endif | |
138 | |
139 #define HAS_MMX 0x01 | 107 #define HAS_MMX 0x01 |
140 #define HAS_SSE 0x02 | 108 #define HAS_SSE 0x02 |
141 #define HAS_SSE2 0x04 | 109 #define HAS_SSE2 0x04 |
142 #define HAS_SSE3 0x08 | 110 #define HAS_SSE3 0x08 |
143 #define HAS_SSSE3 0x10 | 111 #define HAS_SSSE3 0x10 |
144 #define HAS_SSE4_1 0x20 | 112 #define HAS_SSE4_1 0x20 |
145 #define HAS_AVX 0x40 | 113 #define HAS_AVX 0x40 |
146 #define HAS_AVX2 0x80 | 114 #define HAS_AVX2 0x80 |
147 #ifndef BIT | 115 #ifndef BIT |
148 #define BIT(n) (1<<n) | 116 #define BIT(n) (1<<n) |
(...skipping 32 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
181 if (reg_edx & BIT(25)) flags |= HAS_SSE; /* aka xmm */ | 149 if (reg_edx & BIT(25)) flags |= HAS_SSE; /* aka xmm */ |
182 | 150 |
183 if (reg_edx & BIT(26)) flags |= HAS_SSE2; /* aka wmt */ | 151 if (reg_edx & BIT(26)) flags |= HAS_SSE2; /* aka wmt */ |
184 | 152 |
185 if (reg_ecx & BIT(0)) flags |= HAS_SSE3; | 153 if (reg_ecx & BIT(0)) flags |= HAS_SSE3; |
186 | 154 |
187 if (reg_ecx & BIT(9)) flags |= HAS_SSSE3; | 155 if (reg_ecx & BIT(9)) flags |= HAS_SSSE3; |
188 | 156 |
189 if (reg_ecx & BIT(19)) flags |= HAS_SSE4_1; | 157 if (reg_ecx & BIT(19)) flags |= HAS_SSE4_1; |
190 | 158 |
191 // bits 27 (OSXSAVE) & 28 (256-bit AVX) | 159 if (reg_ecx & BIT(28)) flags |= HAS_AVX; |
192 if (reg_ecx & (BIT(27) | BIT(28))) { | |
193 if ((xgetbv() & 0x6) == 0x6) { | |
194 flags |= HAS_AVX; | |
195 | 160 |
196 /* Get the leaf 7 feature flags. Needed to check for AVX2 support */ | 161 /* Get the leaf 7 feature flags. Needed to check for AVX2 support */ |
197 cpuid(7, 0, reg_eax, reg_ebx, reg_ecx, reg_edx); | 162 reg_eax = 7; |
| 163 reg_ecx = 0; |
| 164 cpuid(7, 0, reg_eax, reg_ebx, reg_ecx, reg_edx); |
198 | 165 |
199 if (reg_ebx & BIT(5)) flags |= HAS_AVX2; | 166 if (reg_ebx & BIT(5)) flags |= HAS_AVX2; |
200 } | |
201 } | |
202 | 167 |
203 return flags & mask; | 168 return flags & mask; |
204 } | 169 } |
205 | 170 |
206 #if ARCH_X86_64 && defined(_MSC_VER) | 171 #if ARCH_X86_64 && defined(_MSC_VER) |
207 unsigned __int64 __rdtsc(void); | 172 unsigned __int64 __rdtsc(void); |
208 #pragma intrinsic(__rdtsc) | 173 #pragma intrinsic(__rdtsc) |
209 #endif | 174 #endif |
210 static INLINE unsigned int | 175 static INLINE unsigned int |
211 x86_readtsc(void) { | 176 x86_readtsc(void) { |
(...skipping 80 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
292 } | 257 } |
293 | 258 |
294 | 259 |
295 extern void vpx_reset_mmx_state(void); | 260 extern void vpx_reset_mmx_state(void); |
296 | 261 |
297 #ifdef __cplusplus | 262 #ifdef __cplusplus |
298 } // extern "C" | 263 } // extern "C" |
299 #endif | 264 #endif |
300 | 265 |
301 #endif // VPX_PORTS_X86_H_ | 266 #endif // VPX_PORTS_X86_H_ |
OLD | NEW |