| OLD | NEW |
| 1 /* | 1 /* |
| 2 * Copyright 2011 The LibYuv Project Authors. All rights reserved. | 2 * Copyright 2011 The LibYuv Project Authors. All rights reserved. |
| 3 * | 3 * |
| 4 * Use of this source code is governed by a BSD-style license | 4 * Use of this source code is governed by a BSD-style license |
| 5 * that can be found in the LICENSE file in the root of the source | 5 * that can be found in the LICENSE file in the root of the source |
| 6 * tree. An additional intellectual property rights grant can be found | 6 * tree. An additional intellectual property rights grant can be found |
| 7 * in the file PATENTS. All contributing project authors may | 7 * in the file PATENTS. All contributing project authors may |
| 8 * be found in the AUTHORS file in the root of the source tree. | 8 * be found in the AUTHORS file in the root of the source tree. |
| 9 */ | 9 */ |
| 10 | 10 |
| (...skipping 86 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 97 cpu_info[0] = cpu_info[1] = cpu_info[2] = cpu_info[3] = 0; | 97 cpu_info[0] = cpu_info[1] = cpu_info[2] = cpu_info[3] = 0; |
| 98 } | 98 } |
| 99 #endif | 99 #endif |
| 100 | 100 |
| 101 // TODO(fbarchard): Enable xgetbv when validator supports it. | 101 // TODO(fbarchard): Enable xgetbv when validator supports it. |
| 102 #if (defined(_M_IX86) || defined(_M_X64) || \ | 102 #if (defined(_M_IX86) || defined(_M_X64) || \ |
| 103 defined(__i386__) || defined(__x86_64__)) && \ | 103 defined(__i386__) || defined(__x86_64__)) && \ |
| 104 !defined(__pnacl__) && !defined(__CLR_VER) && !defined(__native_client__) | 104 !defined(__pnacl__) && !defined(__CLR_VER) && !defined(__native_client__) |
| 105 #define HAS_XGETBV | 105 #define HAS_XGETBV |
| 106 // X86 CPUs have xgetbv to detect OS saves high parts of ymm registers. | 106 // X86 CPUs have xgetbv to detect OS saves high parts of ymm registers. |
| 107 int TestOsSaveYmm() { | 107 int GetXCR0() { |
| 108 uint32 xcr0 = 0u; | 108 uint32 xcr0 = 0u; |
| 109 #if (defined(_MSC_VER) && !defined(__clang__)) && (_MSC_FULL_VER >= 160040219) | 109 #if (defined(_MSC_VER) && !defined(__clang__)) && (_MSC_FULL_VER >= 160040219) |
| 110 xcr0 = (uint32)(_xgetbv(0)); // VS2010 SP1 required. | 110 xcr0 = (uint32)(_xgetbv(0)); // VS2010 SP1 required. |
| 111 #elif defined(_M_IX86) && defined(_MSC_VER) && !defined(__clang__) | 111 #elif defined(_M_IX86) && defined(_MSC_VER) && !defined(__clang__) |
| 112 __asm { | 112 __asm { |
| 113 xor ecx, ecx // xcr 0 | 113 xor ecx, ecx // xcr 0 |
| 114 _asm _emit 0x0f _asm _emit 0x01 _asm _emit 0xd0 // For VS2010 and earlier. | 114 _asm _emit 0x0f _asm _emit 0x01 _asm _emit 0xd0 // For VS2010 and earlier. |
| 115 mov xcr0, eax | 115 mov xcr0, eax |
| 116 } | 116 } |
| 117 #elif defined(__i386__) || defined(__x86_64__) | 117 #elif defined(__i386__) || defined(__x86_64__) |
| 118 asm(".byte 0x0f, 0x01, 0xd0" : "=a" (xcr0) : "c" (0) : "%edx"); | 118 asm(".byte 0x0f, 0x01, 0xd0" : "=a" (xcr0) : "c" (0) : "%edx"); |
| 119 #endif // defined(__i386__) || defined(__x86_64__) | 119 #endif // defined(__i386__) || defined(__x86_64__) |
| 120 return((xcr0 & 6) == 6); // Is ymm saved? | 120 return xcr0; |
| 121 } | 121 } |
| 122 #endif // defined(_M_IX86) || defined(_M_X64) .. | 122 #endif // defined(_M_IX86) || defined(_M_X64) .. |
| 123 | 123 |
| 124 // based on libvpx arm_cpudetect.c | 124 // based on libvpx arm_cpudetect.c |
| 125 // For Arm, but public to allow testing on any CPU | 125 // For Arm, but public to allow testing on any CPU |
| 126 LIBYUV_API SAFEBUFFERS | 126 LIBYUV_API SAFEBUFFERS |
| 127 int ArmCpuCaps(const char* cpuinfo_name) { | 127 int ArmCpuCaps(const char* cpuinfo_name) { |
| 128 char cpuinfo_line[512]; | 128 char cpuinfo_line[512]; |
| 129 FILE* f = fopen(cpuinfo_name, "r"); | 129 FILE* f = fopen(cpuinfo_name, "r"); |
| 130 if (!f) { | 130 if (!f) { |
| (...skipping 81 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 212 ((cpu_info1[2] & 0x00000200) ? kCpuHasSSSE3 : 0) | | 212 ((cpu_info1[2] & 0x00000200) ? kCpuHasSSSE3 : 0) | |
| 213 ((cpu_info1[2] & 0x00080000) ? kCpuHasSSE41 : 0) | | 213 ((cpu_info1[2] & 0x00080000) ? kCpuHasSSE41 : 0) | |
| 214 ((cpu_info1[2] & 0x00100000) ? kCpuHasSSE42 : 0) | | 214 ((cpu_info1[2] & 0x00100000) ? kCpuHasSSE42 : 0) | |
| 215 ((cpu_info7[1] & 0x00000200) ? kCpuHasERMS : 0) | | 215 ((cpu_info7[1] & 0x00000200) ? kCpuHasERMS : 0) | |
| 216 ((cpu_info1[2] & 0x00001000) ? kCpuHasFMA3 : 0) | | 216 ((cpu_info1[2] & 0x00001000) ? kCpuHasFMA3 : 0) | |
| 217 kCpuHasX86; | 217 kCpuHasX86; |
| 218 | 218 |
| 219 #ifdef HAS_XGETBV | 219 #ifdef HAS_XGETBV |
| 220 // AVX requires CPU has AVX, XSAVE and OSXSave for xgetbv | 220 // AVX requires CPU has AVX, XSAVE and OSXSave for xgetbv |
| 221 if ((cpu_info1[2] & 0x1c000000) == 0x1c000000 && // AVX and OSXSave | 221 if ((cpu_info1[2] & 0x1c000000) == 0x1c000000 && // AVX and OSXSave |
| 222 !TestEnv("LIBYUV_DISABLE_AVX") && TestOsSaveYmm()) { // Saves YMM. | 222 (GetXCR0() & 6) == 6) { // Test OD saves YMM registers |
| 223 cpu_info |= ((cpu_info7[1] & 0x00000020) ? kCpuHasAVX2 : 0) | kCpuHasAVX; | 223 cpu_info |= ((cpu_info7[1] & 0x00000020) ? kCpuHasAVX2 : 0) | kCpuHasAVX; |
| 224 |
| 225 // Detect AVX512bw |
| 226 if ((GetXCR0() & 0xe0) == 0xe0) { |
| 227 cpu_info |= (cpu_info7[1] & 0x40000000) ? kCpuHasAVX3 : 0; |
| 228 } |
| 224 } | 229 } |
| 225 #endif | 230 #endif |
| 231 |
| 226 // Environment variable overrides for testing. | 232 // Environment variable overrides for testing. |
| 227 if (TestEnv("LIBYUV_DISABLE_X86")) { | 233 if (TestEnv("LIBYUV_DISABLE_X86")) { |
| 228 cpu_info &= ~kCpuHasX86; | 234 cpu_info &= ~kCpuHasX86; |
| 229 } | 235 } |
| 230 if (TestEnv("LIBYUV_DISABLE_SSE2")) { | 236 if (TestEnv("LIBYUV_DISABLE_SSE2")) { |
| 231 cpu_info &= ~kCpuHasSSE2; | 237 cpu_info &= ~kCpuHasSSE2; |
| 232 } | 238 } |
| 233 if (TestEnv("LIBYUV_DISABLE_SSSE3")) { | 239 if (TestEnv("LIBYUV_DISABLE_SSSE3")) { |
| 234 cpu_info &= ~kCpuHasSSSE3; | 240 cpu_info &= ~kCpuHasSSSE3; |
| 235 } | 241 } |
| 236 if (TestEnv("LIBYUV_DISABLE_SSE41")) { | 242 if (TestEnv("LIBYUV_DISABLE_SSE41")) { |
| 237 cpu_info &= ~kCpuHasSSE41; | 243 cpu_info &= ~kCpuHasSSE41; |
| 238 } | 244 } |
| 239 if (TestEnv("LIBYUV_DISABLE_SSE42")) { | 245 if (TestEnv("LIBYUV_DISABLE_SSE42")) { |
| 240 cpu_info &= ~kCpuHasSSE42; | 246 cpu_info &= ~kCpuHasSSE42; |
| 241 } | 247 } |
| 248 if (TestEnv("LIBYUV_DISABLE_AVX")) { |
| 249 cpu_info &= ~kCpuHasAVX; |
| 250 } |
| 242 if (TestEnv("LIBYUV_DISABLE_AVX2")) { | 251 if (TestEnv("LIBYUV_DISABLE_AVX2")) { |
| 243 cpu_info &= ~kCpuHasAVX2; | 252 cpu_info &= ~kCpuHasAVX2; |
| 244 } | 253 } |
| 245 if (TestEnv("LIBYUV_DISABLE_ERMS")) { | 254 if (TestEnv("LIBYUV_DISABLE_ERMS")) { |
| 246 cpu_info &= ~kCpuHasERMS; | 255 cpu_info &= ~kCpuHasERMS; |
| 247 } | 256 } |
| 248 if (TestEnv("LIBYUV_DISABLE_FMA3")) { | 257 if (TestEnv("LIBYUV_DISABLE_FMA3")) { |
| 249 cpu_info &= ~kCpuHasFMA3; | 258 cpu_info &= ~kCpuHasFMA3; |
| 250 } | 259 } |
| 260 if (TestEnv("LIBYUV_DISABLE_AVX3")) { |
| 261 cpu_info &= ~kCpuHasAVX3; |
| 262 } |
| 251 #endif | 263 #endif |
| 252 #if defined(__mips__) && defined(__linux__) | 264 #if defined(__mips__) && defined(__linux__) |
| 253 // Linux mips parse text file for dsp detect. | 265 // Linux mips parse text file for dsp detect. |
| 254 cpu_info = MipsCpuCaps("dsp"); // set kCpuHasMIPS_DSP. | 266 cpu_info = MipsCpuCaps("dsp"); // set kCpuHasMIPS_DSP. |
| 255 #if defined(__mips_dspr2) | 267 #if defined(__mips_dspr2) |
| 256 cpu_info |= kCpuHasMIPS_DSPR2; | 268 cpu_info |= kCpuHasMIPS_DSPR2; |
| 257 #endif | 269 #endif |
| 258 cpu_info |= kCpuHasMIPS; | 270 cpu_info |= kCpuHasMIPS; |
| 259 | 271 |
| 260 if (getenv("LIBYUV_DISABLE_MIPS")) { | 272 if (getenv("LIBYUV_DISABLE_MIPS")) { |
| (...skipping 38 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 299 // Note that use of this function is not thread safe. | 311 // Note that use of this function is not thread safe. |
| 300 LIBYUV_API | 312 LIBYUV_API |
| 301 void MaskCpuFlags(int enable_flags) { | 313 void MaskCpuFlags(int enable_flags) { |
| 302 cpu_info_ = InitCpuFlags() & enable_flags; | 314 cpu_info_ = InitCpuFlags() & enable_flags; |
| 303 } | 315 } |
| 304 | 316 |
| 305 #ifdef __cplusplus | 317 #ifdef __cplusplus |
| 306 } // extern "C" | 318 } // extern "C" |
| 307 } // namespace libyuv | 319 } // namespace libyuv |
| 308 #endif | 320 #endif |
| OLD | NEW |