OLD | NEW |
1 /* | 1 /* |
2 * Copyright 2011 The LibYuv Project Authors. All rights reserved. | 2 * Copyright 2011 The LibYuv Project Authors. All rights reserved. |
3 * | 3 * |
4 * Use of this source code is governed by a BSD-style license | 4 * Use of this source code is governed by a BSD-style license |
5 * that can be found in the LICENSE file in the root of the source | 5 * that can be found in the LICENSE file in the root of the source |
6 * tree. An additional intellectual property rights grant can be found | 6 * tree. An additional intellectual property rights grant can be found |
7 * in the file PATENTS. All contributing project authors may | 7 * in the file PATENTS. All contributing project authors may |
8 * be found in the AUTHORS file in the root of the source tree. | 8 * be found in the AUTHORS file in the root of the source tree. |
9 */ | 9 */ |
10 | 10 |
(...skipping 86 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
97 cpu_info[0] = cpu_info[1] = cpu_info[2] = cpu_info[3] = 0; | 97 cpu_info[0] = cpu_info[1] = cpu_info[2] = cpu_info[3] = 0; |
98 } | 98 } |
99 #endif | 99 #endif |
100 | 100 |
101 // TODO(fbarchard): Enable xgetbv when validator supports it. | 101 // TODO(fbarchard): Enable xgetbv when validator supports it. |
102 #if (defined(_M_IX86) || defined(_M_X64) || \ | 102 #if (defined(_M_IX86) || defined(_M_X64) || \ |
103 defined(__i386__) || defined(__x86_64__)) && \ | 103 defined(__i386__) || defined(__x86_64__)) && \ |
104 !defined(__pnacl__) && !defined(__CLR_VER) && !defined(__native_client__) | 104 !defined(__pnacl__) && !defined(__CLR_VER) && !defined(__native_client__) |
105 #define HAS_XGETBV | 105 #define HAS_XGETBV |
106 // X86 CPUs have xgetbv to detect OS saves high parts of ymm registers. | 106 // X86 CPUs have xgetbv to detect OS saves high parts of ymm registers. |
107 int TestOsSaveYmm() { | 107 int GetXCR0() { |
108 uint32 xcr0 = 0u; | 108 uint32 xcr0 = 0u; |
109 #if (defined(_MSC_VER) && !defined(__clang__)) && (_MSC_FULL_VER >= 160040219) | 109 #if (defined(_MSC_VER) && !defined(__clang__)) && (_MSC_FULL_VER >= 160040219) |
110 xcr0 = (uint32)(_xgetbv(0)); // VS2010 SP1 required. | 110 xcr0 = (uint32)(_xgetbv(0)); // VS2010 SP1 required. |
111 #elif defined(_M_IX86) && defined(_MSC_VER) && !defined(__clang__) | 111 #elif defined(_M_IX86) && defined(_MSC_VER) && !defined(__clang__) |
112 __asm { | 112 __asm { |
113 xor ecx, ecx // xcr 0 | 113 xor ecx, ecx // xcr 0 |
114 _asm _emit 0x0f _asm _emit 0x01 _asm _emit 0xd0 // For VS2010 and earlier. | 114 _asm _emit 0x0f _asm _emit 0x01 _asm _emit 0xd0 // For VS2010 and earlier. |
115 mov xcr0, eax | 115 mov xcr0, eax |
116 } | 116 } |
117 #elif defined(__i386__) || defined(__x86_64__) | 117 #elif defined(__i386__) || defined(__x86_64__) |
118 asm(".byte 0x0f, 0x01, 0xd0" : "=a" (xcr0) : "c" (0) : "%edx"); | 118 asm(".byte 0x0f, 0x01, 0xd0" : "=a" (xcr0) : "c" (0) : "%edx"); |
119 #endif // defined(__i386__) || defined(__x86_64__) | 119 #endif // defined(__i386__) || defined(__x86_64__) |
120 return((xcr0 & 6) == 6); // Is ymm saved? | 120 return xcr0; |
121 } | 121 } |
122 #endif // defined(_M_IX86) || defined(_M_X64) .. | 122 #endif // defined(_M_IX86) || defined(_M_X64) .. |
123 | 123 |
124 // based on libvpx arm_cpudetect.c | 124 // based on libvpx arm_cpudetect.c |
125 // For Arm, but public to allow testing on any CPU | 125 // For Arm, but public to allow testing on any CPU |
126 LIBYUV_API SAFEBUFFERS | 126 LIBYUV_API SAFEBUFFERS |
127 int ArmCpuCaps(const char* cpuinfo_name) { | 127 int ArmCpuCaps(const char* cpuinfo_name) { |
128 char cpuinfo_line[512]; | 128 char cpuinfo_line[512]; |
129 FILE* f = fopen(cpuinfo_name, "r"); | 129 FILE* f = fopen(cpuinfo_name, "r"); |
130 if (!f) { | 130 if (!f) { |
(...skipping 81 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
212 ((cpu_info1[2] & 0x00000200) ? kCpuHasSSSE3 : 0) | | 212 ((cpu_info1[2] & 0x00000200) ? kCpuHasSSSE3 : 0) | |
213 ((cpu_info1[2] & 0x00080000) ? kCpuHasSSE41 : 0) | | 213 ((cpu_info1[2] & 0x00080000) ? kCpuHasSSE41 : 0) | |
214 ((cpu_info1[2] & 0x00100000) ? kCpuHasSSE42 : 0) | | 214 ((cpu_info1[2] & 0x00100000) ? kCpuHasSSE42 : 0) | |
215 ((cpu_info7[1] & 0x00000200) ? kCpuHasERMS : 0) | | 215 ((cpu_info7[1] & 0x00000200) ? kCpuHasERMS : 0) | |
216 ((cpu_info1[2] & 0x00001000) ? kCpuHasFMA3 : 0) | | 216 ((cpu_info1[2] & 0x00001000) ? kCpuHasFMA3 : 0) | |
217 kCpuHasX86; | 217 kCpuHasX86; |
218 | 218 |
219 #ifdef HAS_XGETBV | 219 #ifdef HAS_XGETBV |
220 // AVX requires CPU has AVX, XSAVE and OSXSave for xgetbv | 220 // AVX requires CPU has AVX, XSAVE and OSXSave for xgetbv |
221 if ((cpu_info1[2] & 0x1c000000) == 0x1c000000 && // AVX and OSXSave | 221 if ((cpu_info1[2] & 0x1c000000) == 0x1c000000 && // AVX and OSXSave |
222 !TestEnv("LIBYUV_DISABLE_AVX") && TestOsSaveYmm()) { // Saves YMM. | 222 (GetXCR0() & 6) == 6) { // Test OD saves YMM registers |
223 cpu_info |= ((cpu_info7[1] & 0x00000020) ? kCpuHasAVX2 : 0) | kCpuHasAVX; | 223 cpu_info |= ((cpu_info7[1] & 0x00000020) ? kCpuHasAVX2 : 0) | kCpuHasAVX; |
| 224 |
| 225 // Detect AVX512bw |
| 226 if ((GetXCR0() & 0xe0) == 0xe0) { |
| 227 cpu_info |= (cpu_info7[1] & 0x40000000) ? kCpuHasAVX3 : 0; |
| 228 } |
224 } | 229 } |
225 #endif | 230 #endif |
| 231 |
226 // Environment variable overrides for testing. | 232 // Environment variable overrides for testing. |
227 if (TestEnv("LIBYUV_DISABLE_X86")) { | 233 if (TestEnv("LIBYUV_DISABLE_X86")) { |
228 cpu_info &= ~kCpuHasX86; | 234 cpu_info &= ~kCpuHasX86; |
229 } | 235 } |
230 if (TestEnv("LIBYUV_DISABLE_SSE2")) { | 236 if (TestEnv("LIBYUV_DISABLE_SSE2")) { |
231 cpu_info &= ~kCpuHasSSE2; | 237 cpu_info &= ~kCpuHasSSE2; |
232 } | 238 } |
233 if (TestEnv("LIBYUV_DISABLE_SSSE3")) { | 239 if (TestEnv("LIBYUV_DISABLE_SSSE3")) { |
234 cpu_info &= ~kCpuHasSSSE3; | 240 cpu_info &= ~kCpuHasSSSE3; |
235 } | 241 } |
236 if (TestEnv("LIBYUV_DISABLE_SSE41")) { | 242 if (TestEnv("LIBYUV_DISABLE_SSE41")) { |
237 cpu_info &= ~kCpuHasSSE41; | 243 cpu_info &= ~kCpuHasSSE41; |
238 } | 244 } |
239 if (TestEnv("LIBYUV_DISABLE_SSE42")) { | 245 if (TestEnv("LIBYUV_DISABLE_SSE42")) { |
240 cpu_info &= ~kCpuHasSSE42; | 246 cpu_info &= ~kCpuHasSSE42; |
241 } | 247 } |
| 248 if (TestEnv("LIBYUV_DISABLE_AVX")) { |
| 249 cpu_info &= ~kCpuHasAVX; |
| 250 } |
242 if (TestEnv("LIBYUV_DISABLE_AVX2")) { | 251 if (TestEnv("LIBYUV_DISABLE_AVX2")) { |
243 cpu_info &= ~kCpuHasAVX2; | 252 cpu_info &= ~kCpuHasAVX2; |
244 } | 253 } |
245 if (TestEnv("LIBYUV_DISABLE_ERMS")) { | 254 if (TestEnv("LIBYUV_DISABLE_ERMS")) { |
246 cpu_info &= ~kCpuHasERMS; | 255 cpu_info &= ~kCpuHasERMS; |
247 } | 256 } |
248 if (TestEnv("LIBYUV_DISABLE_FMA3")) { | 257 if (TestEnv("LIBYUV_DISABLE_FMA3")) { |
249 cpu_info &= ~kCpuHasFMA3; | 258 cpu_info &= ~kCpuHasFMA3; |
250 } | 259 } |
| 260 if (TestEnv("LIBYUV_DISABLE_AVX3")) { |
| 261 cpu_info &= ~kCpuHasAVX3; |
| 262 } |
251 #endif | 263 #endif |
252 #if defined(__mips__) && defined(__linux__) | 264 #if defined(__mips__) && defined(__linux__) |
253 // Linux mips parse text file for dsp detect. | 265 // Linux mips parse text file for dsp detect. |
254 cpu_info = MipsCpuCaps("dsp"); // set kCpuHasMIPS_DSP. | 266 cpu_info = MipsCpuCaps("dsp"); // set kCpuHasMIPS_DSP. |
255 #if defined(__mips_dspr2) | 267 #if defined(__mips_dspr2) |
256 cpu_info |= kCpuHasMIPS_DSPR2; | 268 cpu_info |= kCpuHasMIPS_DSPR2; |
257 #endif | 269 #endif |
258 cpu_info |= kCpuHasMIPS; | 270 cpu_info |= kCpuHasMIPS; |
259 | 271 |
260 if (getenv("LIBYUV_DISABLE_MIPS")) { | 272 if (getenv("LIBYUV_DISABLE_MIPS")) { |
(...skipping 38 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
299 // Note that use of this function is not thread safe. | 311 // Note that use of this function is not thread safe. |
300 LIBYUV_API | 312 LIBYUV_API |
301 void MaskCpuFlags(int enable_flags) { | 313 void MaskCpuFlags(int enable_flags) { |
302 cpu_info_ = InitCpuFlags() & enable_flags; | 314 cpu_info_ = InitCpuFlags() & enable_flags; |
303 } | 315 } |
304 | 316 |
305 #ifdef __cplusplus | 317 #ifdef __cplusplus |
306 } // extern "C" | 318 } // extern "C" |
307 } // namespace libyuv | 319 } // namespace libyuv |
308 #endif | 320 #endif |
OLD | NEW |