OLD | NEW |
1 /* | 1 /* |
2 * Copyright 2011 The LibYuv Project Authors. All rights reserved. | 2 * Copyright 2011 The LibYuv Project Authors. All rights reserved. |
3 * | 3 * |
4 * Use of this source code is governed by a BSD-style license | 4 * Use of this source code is governed by a BSD-style license |
5 * that can be found in the LICENSE file in the root of the source | 5 * that can be found in the LICENSE file in the root of the source |
6 * tree. An additional intellectual property rights grant can be found | 6 * tree. An additional intellectual property rights grant can be found |
7 * in the file PATENTS. All contributing project authors may | 7 * in the file PATENTS. All contributing project authors may |
8 * be found in the AUTHORS file in the root of the source tree. | 8 * be found in the AUTHORS file in the root of the source tree. |
9 */ | 9 */ |
10 | 10 |
(...skipping 50 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
61 cpuid | 61 cpuid |
62 mov [edi], eax | 62 mov [edi], eax |
63 mov [edi + 4], ebx | 63 mov [edi + 4], ebx |
64 mov [edi + 8], ecx | 64 mov [edi + 8], ecx |
65 mov [edi + 12], edx | 65 mov [edi + 12], edx |
66 } | 66 } |
67 #else // Visual C but not x86 | 67 #else // Visual C but not x86 |
68 if (info_ecx == 0) { | 68 if (info_ecx == 0) { |
69 __cpuid((int*)(cpu_info), info_eax); | 69 __cpuid((int*)(cpu_info), info_eax); |
70 } else { | 70 } else { |
71 cpu_info[3] = cpu_info[2] = cpu_info[1] = cpu_info[0] = 0; | 71 cpu_info[3] = cpu_info[2] = cpu_info[1] = cpu_info[0] = 0u; |
72 } | 72 } |
73 #endif | 73 #endif |
74 // GCC version uses inline x86 assembly. | 74 // GCC version uses inline x86 assembly. |
75 #else // defined(_MSC_VER) | 75 #else // defined(_MSC_VER) |
76 uint32 info_ebx, info_edx; | 76 uint32 info_ebx, info_edx; |
77 asm volatile ( | 77 asm volatile ( |
78 #if defined( __i386__) && defined(__PIC__) | 78 #if defined( __i386__) && defined(__PIC__) |
79 // Preserve ebx for fpic 32 bit. | 79 // Preserve ebx for fpic 32 bit. |
80 "mov %%ebx, %%edi \n" | 80 "mov %%ebx, %%edi \n" |
81 "cpuid \n" | 81 "cpuid \n" |
(...skipping 25 matching lines...) Expand all Loading... |
107 // mov xcr0, eax | 107 // mov xcr0, eax |
108 // } | 108 // } |
109 // For VS2013 and earlier 32 bit, the _xgetbv(0) optimizer produces bad code. | 109 // For VS2013 and earlier 32 bit, the _xgetbv(0) optimizer produces bad code. |
110 // https://code.google.com/p/libyuv/issues/detail?id=529 | 110 // https://code.google.com/p/libyuv/issues/detail?id=529 |
111 #if defined(_M_IX86) && (_MSC_VER < 1900) | 111 #if defined(_M_IX86) && (_MSC_VER < 1900) |
112 #pragma optimize("g", off) | 112 #pragma optimize("g", off) |
113 #endif | 113 #endif |
114 #if (defined(_M_IX86) || defined(_M_X64) || \ | 114 #if (defined(_M_IX86) || defined(_M_X64) || \ |
115 defined(__i386__) || defined(__x86_64__)) && \ | 115 defined(__i386__) || defined(__x86_64__)) && \ |
116 !defined(__pnacl__) && !defined(__CLR_VER) && !defined(__native_client__) | 116 !defined(__pnacl__) && !defined(__CLR_VER) && !defined(__native_client__) |
117 #define HAS_XGETBV | |
118 // X86 CPUs have xgetbv to detect OS saves high parts of ymm registers. | 117 // X86 CPUs have xgetbv to detect OS saves high parts of ymm registers. |
119 int GetXCR0() { | 118 int GetXCR0() { |
120 uint32 xcr0 = 0u; | 119 uint32 xcr0 = 0u; |
121 #if defined(_MSC_FULL_VER) && (_MSC_FULL_VER >= 160040219) | 120 #if defined(_MSC_FULL_VER) && (_MSC_FULL_VER >= 160040219) |
122 xcr0 = (uint32)(_xgetbv(0)); // VS2010 SP1 required. | 121 xcr0 = (uint32)(_xgetbv(0)); // VS2010 SP1 required. |
123 #elif defined(__i386__) || defined(__x86_64__) | 122 #elif defined(__i386__) || defined(__x86_64__) |
124 asm(".byte 0x0f, 0x01, 0xd0" : "=a" (xcr0) : "c" (0) : "%edx"); | 123 asm(".byte 0x0f, 0x01, 0xd0" : "=a" (xcr0) : "c" (0) : "%edx"); |
125 #endif // defined(__i386__) || defined(__x86_64__) | 124 #endif // defined(__i386__) || defined(__x86_64__) |
126 return xcr0; | 125 return xcr0; |
127 } | 126 } |
| 127 #else |
| 128 // xgetbv unavailable to query for OSSave support. Return 0. |
| 129 #define GetXCR0() 0 |
128 #endif // defined(_M_IX86) || defined(_M_X64) .. | 130 #endif // defined(_M_IX86) || defined(_M_X64) .. |
129 // Return optimization to previous setting. | 131 // Return optimization to previous setting. |
130 #if defined(_M_IX86) && (_MSC_VER < 1900) | 132 #if defined(_M_IX86) && (_MSC_VER < 1900) |
131 #pragma optimize("g", on) | 133 #pragma optimize("g", on) |
132 #endif | 134 #endif |
133 | 135 |
134 // based on libvpx arm_cpudetect.c | 136 // based on libvpx arm_cpudetect.c |
135 // For Arm, but public to allow testing on any CPU | 137 // For Arm, but public to allow testing on any CPU |
136 LIBYUV_API SAFEBUFFERS | 138 LIBYUV_API SAFEBUFFERS |
137 int ArmCpuCaps(const char* cpuinfo_name) { | 139 int ArmCpuCaps(const char* cpuinfo_name) { |
(...skipping 73 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
211 return LIBYUV_FALSE; | 213 return LIBYUV_FALSE; |
212 } | 214 } |
213 #else // nacl does not support getenv(). | 215 #else // nacl does not support getenv(). |
214 static LIBYUV_BOOL TestEnv(const char*) { | 216 static LIBYUV_BOOL TestEnv(const char*) { |
215 return LIBYUV_FALSE; | 217 return LIBYUV_FALSE; |
216 } | 218 } |
217 #endif | 219 #endif |
218 | 220 |
219 LIBYUV_API SAFEBUFFERS | 221 LIBYUV_API SAFEBUFFERS |
220 int InitCpuFlags(void) { | 222 int InitCpuFlags(void) { |
221 // TODO(fbarchard): swap kCpuInit logic so 0 means uninitialized. | |
222 int cpu_info = 0; | 223 int cpu_info = 0; |
223 #if !defined(__pnacl__) && !defined(__CLR_VER) && defined(CPU_X86) | 224 #if !defined(__pnacl__) && !defined(__CLR_VER) && defined(CPU_X86) |
224 uint32 cpu_info0[4] = { 0, 0, 0, 0 }; | 225 uint32 cpu_info0[4] = { 0, 0, 0, 0 }; |
225 uint32 cpu_info1[4] = { 0, 0, 0, 0 }; | 226 uint32 cpu_info1[4] = { 0, 0, 0, 0 }; |
226 uint32 cpu_info7[4] = { 0, 0, 0, 0 }; | 227 uint32 cpu_info7[4] = { 0, 0, 0, 0 }; |
227 CpuId(0, 0, cpu_info0); | 228 CpuId(0, 0, cpu_info0); |
228 CpuId(1, 0, cpu_info1); | 229 CpuId(1, 0, cpu_info1); |
229 if (cpu_info0[0] >= 7) { | 230 if (cpu_info0[0] >= 7) { |
230 CpuId(7, 0, cpu_info7); | 231 CpuId(7, 0, cpu_info7); |
231 } | 232 } |
232 cpu_info = kCpuHasX86 | | 233 cpu_info = kCpuHasX86 | |
233 ((cpu_info1[3] & 0x04000000) ? kCpuHasSSE2 : 0) | | 234 ((cpu_info1[3] & 0x04000000) ? kCpuHasSSE2 : 0) | |
234 ((cpu_info1[2] & 0x00000200) ? kCpuHasSSSE3 : 0) | | 235 ((cpu_info1[2] & 0x00000200) ? kCpuHasSSSE3 : 0) | |
235 ((cpu_info1[2] & 0x00080000) ? kCpuHasSSE41 : 0) | | 236 ((cpu_info1[2] & 0x00080000) ? kCpuHasSSE41 : 0) | |
236 ((cpu_info1[2] & 0x00100000) ? kCpuHasSSE42 : 0) | | 237 ((cpu_info1[2] & 0x00100000) ? kCpuHasSSE42 : 0) | |
237 ((cpu_info7[1] & 0x00000200) ? kCpuHasERMS : 0); | 238 ((cpu_info7[1] & 0x00000200) ? kCpuHasERMS : 0); |
238 | 239 |
239 #ifdef HAS_XGETBV | 240 // AVX requires OS saves YMM registers. |
240 // AVX requires CPU has AVX, XSAVE and OSXSave for xgetbv | |
241 if (((cpu_info1[2] & 0x1c000000) == 0x1c000000) && // AVX and OSXSave | 241 if (((cpu_info1[2] & 0x1c000000) == 0x1c000000) && // AVX and OSXSave |
242 ((GetXCR0() & 6) == 6)) { // Test OS saves YMM registers | 242 ((GetXCR0() & 6) == 6)) { // Test OS saves YMM registers |
243 cpu_info |= kCpuHasAVX | | 243 cpu_info |= kCpuHasAVX | |
244 ((cpu_info7[1] & 0x00000020) ? kCpuHasAVX2 : 0) | | 244 ((cpu_info7[1] & 0x00000020) ? kCpuHasAVX2 : 0) | |
245 ((cpu_info1[2] & 0x00001000) ? kCpuHasFMA3 : 0) | | 245 ((cpu_info1[2] & 0x00001000) ? kCpuHasFMA3 : 0) | |
246 ((cpu_info1[2] & 0x20000000) ? kCpuHasF16C : 0); | 246 ((cpu_info1[2] & 0x20000000) ? kCpuHasF16C : 0); |
247 | 247 |
248 // Detect AVX512bw | 248 // Detect AVX512bw |
249 if ((GetXCR0() & 0xe0) == 0xe0) { | 249 if ((GetXCR0() & 0xe0) == 0xe0) { |
250 cpu_info |= (cpu_info7[1] & 0x40000000) ? kCpuHasAVX3 : 0; | 250 cpu_info |= (cpu_info7[1] & 0x40000000) ? kCpuHasAVX3 : 0; |
251 } | 251 } |
252 } | 252 } |
253 #endif | |
254 | 253 |
255 // Environment variable overrides for testing. | 254 // Environment variable overrides for testing. |
256 if (TestEnv("LIBYUV_DISABLE_X86")) { | 255 if (TestEnv("LIBYUV_DISABLE_X86")) { |
257 cpu_info &= ~kCpuHasX86; | 256 cpu_info &= ~kCpuHasX86; |
258 } | 257 } |
259 if (TestEnv("LIBYUV_DISABLE_SSE2")) { | 258 if (TestEnv("LIBYUV_DISABLE_SSE2")) { |
260 cpu_info &= ~kCpuHasSSE2; | 259 cpu_info &= ~kCpuHasSSE2; |
261 } | 260 } |
262 if (TestEnv("LIBYUV_DISABLE_SSSE3")) { | 261 if (TestEnv("LIBYUV_DISABLE_SSSE3")) { |
263 cpu_info &= ~kCpuHasSSSE3; | 262 cpu_info &= ~kCpuHasSSSE3; |
(...skipping 71 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
335 // Note that use of this function is not thread safe. | 334 // Note that use of this function is not thread safe. |
336 LIBYUV_API | 335 LIBYUV_API |
337 void MaskCpuFlags(int enable_flags) { | 336 void MaskCpuFlags(int enable_flags) { |
338 cpu_info_ = InitCpuFlags() & enable_flags; | 337 cpu_info_ = InitCpuFlags() & enable_flags; |
339 } | 338 } |
340 | 339 |
341 #ifdef __cplusplus | 340 #ifdef __cplusplus |
342 } // extern "C" | 341 } // extern "C" |
343 } // namespace libyuv | 342 } // namespace libyuv |
344 #endif | 343 #endif |
OLD | NEW |