Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(338)

Side by Side Diff: source/libvpx/vpx_ports/x86.h

Issue 864393002: Cherry pick: fix AVX & AVX2 detection (Closed) Base URL: https://chromium.googlesource.com/chromium/deps/libvpx.git@master
Patch Set: Created 5 years, 11 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
« no previous file with comments | « no previous file | no next file » | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 /* 1 /*
2 * Copyright (c) 2010 The WebM project authors. All Rights Reserved. 2 * Copyright (c) 2010 The WebM project authors. All Rights Reserved.
3 * 3 *
4 * Use of this source code is governed by a BSD-style license 4 * Use of this source code is governed by a BSD-style license
5 * that can be found in the LICENSE file in the root of the source 5 * that can be found in the LICENSE file in the root of the source
6 * tree. An additional intellectual property rights grant can be found 6 * tree. An additional intellectual property rights grant can be found
7 * in the file PATENTS. All contributing project authors may 7 * in the file PATENTS. All contributing project authors may
8 * be found in the AUTHORS file in the root of the source tree. 8 * be found in the AUTHORS file in the root of the source tree.
9 */ 9 */
10 10
11 11
12 #ifndef VPX_PORTS_X86_H_ 12 #ifndef VPX_PORTS_X86_H_
13 #define VPX_PORTS_X86_H_ 13 #define VPX_PORTS_X86_H_
14 #include <stdlib.h> 14 #include <stdlib.h>
15 #include "vpx_config.h" 15 #include "vpx_config.h"
16 #include "vpx/vpx_integer.h"
16 17
17 #ifdef __cplusplus 18 #ifdef __cplusplus
18 extern "C" { 19 extern "C" {
19 #endif 20 #endif
20 21
21 typedef enum { 22 typedef enum {
22 VPX_CPU_UNKNOWN = -1, 23 VPX_CPU_UNKNOWN = -1,
23 VPX_CPU_AMD, 24 VPX_CPU_AMD,
24 VPX_CPU_AMD_OLD, 25 VPX_CPU_AMD_OLD,
25 VPX_CPU_CENTAUR, 26 VPX_CPU_CENTAUR,
(...skipping 71 matching lines...) Expand 10 before | Expand all | Expand 10 after
97 __asm mov eax, func\ 98 __asm mov eax, func\
98 __asm mov ecx, func2\ 99 __asm mov ecx, func2\
99 __asm cpuid\ 100 __asm cpuid\
100 __asm mov a, eax\ 101 __asm mov a, eax\
101 __asm mov b, ebx\ 102 __asm mov b, ebx\
102 __asm mov c, ecx\ 103 __asm mov c, ecx\
103 __asm mov d, edx 104 __asm mov d, edx
104 #endif 105 #endif
105 #endif /* end others */ 106 #endif /* end others */
106 107
108 // NaCl has no support for xgetbv or the raw opcode.
109 #if !defined(__native_client__) && (defined(__i386__) || defined(__x86_64__))
110 static INLINE uint64_t xgetbv(void) {
111 const uint32_t ecx = 0;
112 uint32_t eax, edx;
113 // Use the raw opcode for xgetbv for compatibility with older toolchains.
114 __asm__ volatile (
115 ".byte 0x0f, 0x01, 0xd0\n"
116 : "=a"(eax), "=d"(edx) : "c" (ecx));
117 return ((uint64_t)edx << 32) | eax;
118 }
119 #elif (defined(_M_X64) || defined(_M_IX86)) && \
120 defined(_MSC_FULL_VER) && _MSC_FULL_VER >= 160040219 // >= VS2010 SP1
121 #include <immintrin.h>
122 #define xgetbv() _xgetbv(0)
123 #elif defined(_MSC_VER) && defined(_M_IX86)
124 static INLINE uint64_t xgetbv(void) {
125 uint32_t eax_, edx_;
126 __asm {
127 xor ecx, ecx // ecx = 0
128 // Use the raw opcode for xgetbv for compatibility with older toolchains.
129 __asm _emit 0x0f __asm _emit 0x01 __asm _emit 0xd0
130 mov eax_, eax
131 mov edx_, edx
132 }
133 return ((uint64_t)edx_ << 32) | eax_;
134 }
135 #else
136 #define xgetbv() 0U // no AVX for older x64 or unrecognized toolchains.
137 #endif
138
107 #define HAS_MMX 0x01 139 #define HAS_MMX 0x01
108 #define HAS_SSE 0x02 140 #define HAS_SSE 0x02
109 #define HAS_SSE2 0x04 141 #define HAS_SSE2 0x04
110 #define HAS_SSE3 0x08 142 #define HAS_SSE3 0x08
111 #define HAS_SSSE3 0x10 143 #define HAS_SSSE3 0x10
112 #define HAS_SSE4_1 0x20 144 #define HAS_SSE4_1 0x20
113 #define HAS_AVX 0x40 145 #define HAS_AVX 0x40
114 #define HAS_AVX2 0x80 146 #define HAS_AVX2 0x80
115 #ifndef BIT 147 #ifndef BIT
116 #define BIT(n) (1<<n) 148 #define BIT(n) (1<<n)
(...skipping 32 matching lines...) Expand 10 before | Expand all | Expand 10 after
149 if (reg_edx & BIT(25)) flags |= HAS_SSE; /* aka xmm */ 181 if (reg_edx & BIT(25)) flags |= HAS_SSE; /* aka xmm */
150 182
151 if (reg_edx & BIT(26)) flags |= HAS_SSE2; /* aka wmt */ 183 if (reg_edx & BIT(26)) flags |= HAS_SSE2; /* aka wmt */
152 184
153 if (reg_ecx & BIT(0)) flags |= HAS_SSE3; 185 if (reg_ecx & BIT(0)) flags |= HAS_SSE3;
154 186
155 if (reg_ecx & BIT(9)) flags |= HAS_SSSE3; 187 if (reg_ecx & BIT(9)) flags |= HAS_SSSE3;
156 188
157 if (reg_ecx & BIT(19)) flags |= HAS_SSE4_1; 189 if (reg_ecx & BIT(19)) flags |= HAS_SSE4_1;
158 190
159 if (reg_ecx & BIT(28)) flags |= HAS_AVX; 191 // bits 27 (OSXSAVE) & 28 (256-bit AVX)
192 if (reg_ecx & (BIT(27) | BIT(28))) {
193 if ((xgetbv() & 0x6) == 0x6) {
194 flags |= HAS_AVX;
160 195
161 /* Get the leaf 7 feature flags. Needed to check for AVX2 support */ 196 /* Get the leaf 7 feature flags. Needed to check for AVX2 support */
162 reg_eax = 7; 197 cpuid(7, 0, reg_eax, reg_ebx, reg_ecx, reg_edx);
163 reg_ecx = 0;
164 cpuid(7, 0, reg_eax, reg_ebx, reg_ecx, reg_edx);
165 198
166 if (reg_ebx & BIT(5)) flags |= HAS_AVX2; 199 if (reg_ebx & BIT(5)) flags |= HAS_AVX2;
200 }
201 }
167 202
168 return flags & mask; 203 return flags & mask;
169 } 204 }
170 205
171 #if ARCH_X86_64 && defined(_MSC_VER) 206 #if ARCH_X86_64 && defined(_MSC_VER)
172 unsigned __int64 __rdtsc(void); 207 unsigned __int64 __rdtsc(void);
173 #pragma intrinsic(__rdtsc) 208 #pragma intrinsic(__rdtsc)
174 #endif 209 #endif
175 static INLINE unsigned int 210 static INLINE unsigned int
176 x86_readtsc(void) { 211 x86_readtsc(void) {
(...skipping 80 matching lines...) Expand 10 before | Expand all | Expand 10 after
257 } 292 }
258 293
259 294
260 extern void vpx_reset_mmx_state(void); 295 extern void vpx_reset_mmx_state(void);
261 296
262 #ifdef __cplusplus 297 #ifdef __cplusplus
263 } // extern "C" 298 } // extern "C"
264 #endif 299 #endif
265 300
266 #endif // VPX_PORTS_X86_H_ 301 #endif // VPX_PORTS_X86_H_
OLDNEW
« no previous file with comments | « no previous file | no next file » | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698