OLD | NEW |
---|---|
1 /* | 1 /* |
2 * Copyright 2011 The LibYuv Project Authors. All rights reserved. | 2 * Copyright 2011 The LibYuv Project Authors. All rights reserved. |
3 * | 3 * |
4 * Use of this source code is governed by a BSD-style license | 4 * Use of this source code is governed by a BSD-style license |
5 * that can be found in the LICENSE file in the root of the source | 5 * that can be found in the LICENSE file in the root of the source |
6 * tree. An additional intellectual property rights grant can be found | 6 * tree. An additional intellectual property rights grant can be found |
7 * in the file PATENTS. All contributing project authors may | 7 * in the file PATENTS. All contributing project authors may |
8 * be found in the AUTHORS file in the root of the source tree. | 8 * be found in the AUTHORS file in the root of the source tree. |
9 */ | 9 */ |
10 | 10 |
(...skipping 30 matching lines...) Expand all Loading... | |
41 #else | 41 #else |
42 #define SAFEBUFFERS | 42 #define SAFEBUFFERS |
43 #endif | 43 #endif |
44 | 44 |
45 // Low level cpuid for X86. | 45 // Low level cpuid for X86. |
46 #if (defined(_M_IX86) || defined(_M_X64) || \ | 46 #if (defined(_M_IX86) || defined(_M_X64) || \ |
47 defined(__i386__) || defined(__x86_64__)) && \ | 47 defined(__i386__) || defined(__x86_64__)) && \ |
48 !defined(__pnacl__) && !defined(__CLR_VER) | 48 !defined(__pnacl__) && !defined(__CLR_VER) |
49 LIBYUV_API | 49 LIBYUV_API |
50 void CpuId(uint32 info_eax, uint32 info_ecx, uint32* cpu_info) { | 50 void CpuId(uint32 info_eax, uint32 info_ecx, uint32* cpu_info) { |
51 #if (defined(_MSC_VER) && !defined(__clang__)) && !defined(__clang__) | 51 #if defined(_MSC_VER) && !defined(__clang__) |
52 // Visual C version uses intrinsic or inline x86 assembly. | 52 // Visual C version uses intrinsic or inline x86 assembly. |
53 #if (_MSC_FULL_VER >= 160040219) | 53 #if (_MSC_FULL_VER >= 160040219) |
brucedawson
2015/12/08 19:18:46
Off topic, but it would be nice to stop having thr
fbarchard
2015/12/08 20:39:23
Acknowledged.
| |
54 __cpuidex((int*)(cpu_info), info_eax, info_ecx); | 54 __cpuidex((int*)(cpu_info), info_eax, info_ecx); |
55 #elif defined(_M_IX86) | 55 #elif defined(_M_IX86) |
56 __asm { | 56 __asm { |
57 mov eax, info_eax | 57 mov eax, info_eax |
58 mov ecx, info_ecx | 58 mov ecx, info_ecx |
59 mov edi, cpu_info | 59 mov edi, cpu_info |
60 cpuid | 60 cpuid |
61 mov [edi], eax | 61 mov [edi], eax |
62 mov [edi + 4], ebx | 62 mov [edi + 4], ebx |
63 mov [edi + 8], ecx | 63 mov [edi + 8], ecx |
64 mov [edi + 12], edx | 64 mov [edi + 12], edx |
65 } | 65 } |
66 #else | 66 #else // Visual C but not x86 |
67 if (info_ecx == 0) { | 67 if (info_ecx == 0) { |
68 __cpuid((int*)(cpu_info), info_eax); | 68 __cpuid((int*)(cpu_info), info_eax); |
69 } else { | 69 } else { |
70 cpu_info[3] = cpu_info[2] = cpu_info[1] = cpu_info[0] = 0; | 70 cpu_info[3] = cpu_info[2] = cpu_info[1] = cpu_info[0] = 0; |
71 } | 71 } |
72 #endif | 72 #endif |
73 // GCC version uses inline x86 assembly. | 73 // GCC version uses inline x86 assembly. |
74 #else // (defined(_MSC_VER) && !defined(__clang__)) && !defined(__clang__) | 74 #else // defined(_MSC_VER) && !defined(__clang__) |
75 uint32 info_ebx, info_edx; | 75 uint32 info_ebx, info_edx; |
76 asm volatile ( | 76 asm volatile ( |
77 #if defined( __i386__) && defined(__PIC__) | 77 #if defined( __i386__) && defined(__PIC__) |
78 // Preserve ebx for fpic 32 bit. | 78 // Preserve ebx for fpic 32 bit. |
79 "mov %%ebx, %%edi \n" | 79 "mov %%ebx, %%edi \n" |
80 "cpuid \n" | 80 "cpuid \n" |
81 "xchg %%edi, %%ebx \n" | 81 "xchg %%edi, %%ebx \n" |
82 : "=D" (info_ebx), | 82 : "=D" (info_ebx), |
83 #else | 83 #else |
84 "cpuid \n" | 84 "cpuid \n" |
85 : "=b" (info_ebx), | 85 : "=b" (info_ebx), |
86 #endif // defined( __i386__) && defined(__PIC__) | 86 #endif // defined( __i386__) && defined(__PIC__) |
87 "+a" (info_eax), "+c" (info_ecx), "=d" (info_edx)); | 87 "+a" (info_eax), "+c" (info_ecx), "=d" (info_edx)); |
88 cpu_info[0] = info_eax; | 88 cpu_info[0] = info_eax; |
89 cpu_info[1] = info_ebx; | 89 cpu_info[1] = info_ebx; |
90 cpu_info[2] = info_ecx; | 90 cpu_info[2] = info_ecx; |
91 cpu_info[3] = info_edx; | 91 cpu_info[3] = info_edx; |
92 #endif // (defined(_MSC_VER) && !defined(__clang__)) && !defined(__clang__) | 92 #endif // defined(_MSC_VER) && !defined(__clang__) |
93 } | 93 } |
94 #else // (defined(_M_IX86) || defined(_M_X64) ... | 94 #else // (defined(_M_IX86) || defined(_M_X64) ... |
95 LIBYUV_API | 95 LIBYUV_API |
96 void CpuId(uint32 eax, uint32 ecx, uint32* cpu_info) { | 96 void CpuId(uint32 eax, uint32 ecx, uint32* cpu_info) { |
97 cpu_info[0] = cpu_info[1] = cpu_info[2] = cpu_info[3] = 0; | 97 cpu_info[0] = cpu_info[1] = cpu_info[2] = cpu_info[3] = 0; |
98 } | 98 } |
99 #endif | 99 #endif |
100 | 100 |
101 // TODO(fbarchard): Enable xgetbv when validator supports it. | 101 // For VS2010 and earlier emit can be used: |
102 // _asm _emit 0x0f _asm _emit 0x01 _asm _emit 0xd0 // For VS2010 and earlier. | |
103 // For VS2013 32 bit, the _xgetbv(0) optimizer bug affecting return can use: | |
104 // pragma optimize("g", off) | |
brucedawson
2015/12/08 19:18:46
I would recommend using #pragma optimize off/on to
fbarchard
2015/12/08 20:39:23
Acknowledged.
My concern with using xgetbv with o
fbarchard
2015/12/08 23:06:51
Done.
brucedawson
2015/12/09 00:10:18
Unoptimized code should be quite stable so if the
brucedawson
2015/12/09 00:10:18
Can you add a link to the bug in a comment in the
fbarchard
2015/12/09 01:08:57
Done. (already had link, but youre looking at old
fbarchard
2015/12/09 01:08:57
Done.
The 3 versions correspond to 3 compilers tha
| |
102 #if (defined(_M_IX86) || defined(_M_X64) || \ | 105 #if (defined(_M_IX86) || defined(_M_X64) || \ |
103 defined(__i386__) || defined(__x86_64__)) && \ | 106 defined(__i386__) || defined(__x86_64__)) && \ |
104 !defined(__pnacl__) && !defined(__CLR_VER) && !defined(__native_client__) | 107 !defined(__pnacl__) && !defined(__CLR_VER) && !defined(__native_client__) |
105 #define HAS_XGETBV | 108 #define HAS_XGETBV |
106 // X86 CPUs have xgetbv to detect OS saves high parts of ymm registers. | 109 // X86 CPUs have xgetbv to detect OS saves high parts of ymm registers. |
107 int GetXCR0() { | 110 int GetXCR0() { |
108 uint32 xcr0 = 0u; | 111 uint32 xcr0 = 0u; |
109 #if (defined(_MSC_VER) && !defined(__clang__)) && (_MSC_FULL_VER >= 160040219) | 112 #if defined(_M_IX86) |
110 xcr0 = (uint32)(_xgetbv(0)); // VS2010 SP1 required. | |
111 #elif defined(_M_IX86) && defined(_MSC_VER) && !defined(__clang__) | |
112 __asm { | 113 __asm { |
113 xor ecx, ecx // xcr 0 | 114 xor ecx, ecx // xcr 0 |
114 _asm _emit 0x0f _asm _emit 0x01 _asm _emit 0xd0 // For VS2010 and earlier. | 115 xgetbv |
115 mov xcr0, eax | 116 mov xcr0, eax |
116 } | 117 } |
118 #elif (_MSC_FULL_VER >= 160040219) | |
119 xcr0 = (uint32)(_xgetbv(0)); // VS2010 SP1 required. | |
117 #elif defined(__i386__) || defined(__x86_64__) | 120 #elif defined(__i386__) || defined(__x86_64__) |
118 asm(".byte 0x0f, 0x01, 0xd0" : "=a" (xcr0) : "c" (0) : "%edx"); | 121 asm(".byte 0x0f, 0x01, 0xd0" : "=a" (xcr0) : "c" (0) : "%edx"); |
119 #endif // defined(__i386__) || defined(__x86_64__) | 122 #endif // defined(__i386__) || defined(__x86_64__) |
120 return xcr0; | 123 return xcr0; |
121 } | 124 } |
122 #endif // defined(_M_IX86) || defined(_M_X64) .. | 125 #endif // defined(_M_IX86) || defined(_M_X64) .. |
123 | 126 |
124 // based on libvpx arm_cpudetect.c | 127 // based on libvpx arm_cpudetect.c |
125 // For Arm, but public to allow testing on any CPU | 128 // For Arm, but public to allow testing on any CPU |
126 LIBYUV_API SAFEBUFFERS | 129 LIBYUV_API SAFEBUFFERS |
(...skipping 158 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
285 // Note that use of this function is not thread safe. | 288 // Note that use of this function is not thread safe. |
286 LIBYUV_API | 289 LIBYUV_API |
287 void MaskCpuFlags(int enable_flags) { | 290 void MaskCpuFlags(int enable_flags) { |
288 cpu_info_ = InitCpuFlags() & enable_flags; | 291 cpu_info_ = InitCpuFlags() & enable_flags; |
289 } | 292 } |
290 | 293 |
291 #ifdef __cplusplus | 294 #ifdef __cplusplus |
292 } // extern "C" | 295 } // extern "C" |
293 } // namespace libyuv | 296 } // namespace libyuv |
294 #endif | 297 #endif |
OLD | NEW |