OLD | NEW |
1 /* | 1 /* |
2 * Copyright (c) 2010 The WebM project authors. All Rights Reserved. | 2 * Copyright (c) 2010 The WebM project authors. All Rights Reserved. |
3 * | 3 * |
4 * Use of this source code is governed by a BSD-style license | 4 * Use of this source code is governed by a BSD-style license |
5 * that can be found in the LICENSE file in the root of the source | 5 * that can be found in the LICENSE file in the root of the source |
6 * tree. An additional intellectual property rights grant can be found | 6 * tree. An additional intellectual property rights grant can be found |
7 * in the file PATENTS. All contributing project authors may | 7 * in the file PATENTS. All contributing project authors may |
8 * be found in the AUTHORS file in the root of the source tree. | 8 * be found in the AUTHORS file in the root of the source tree. |
9 */ | 9 */ |
10 | 10 |
11 | 11 |
12 #ifndef VPX_PORTS_X86_H | 12 #ifndef VPX_PORTS_X86_H |
13 #define VPX_PORTS_X86_H | 13 #define VPX_PORTS_X86_H |
14 #include <stdlib.h> | 14 #include <stdlib.h> |
15 #include "vpx_config.h" | 15 #include "vpx_config.h" |
16 | 16 |
17 typedef enum | 17 typedef enum { |
18 { | 18 VPX_CPU_UNKNOWN = -1, |
19 VPX_CPU_UNKNOWN = -1, | 19 VPX_CPU_AMD, |
20 VPX_CPU_AMD, | 20 VPX_CPU_AMD_OLD, |
21 VPX_CPU_AMD_OLD, | 21 VPX_CPU_CENTAUR, |
22 VPX_CPU_CENTAUR, | 22 VPX_CPU_CYRIX, |
23 VPX_CPU_CYRIX, | 23 VPX_CPU_INTEL, |
24 VPX_CPU_INTEL, | 24 VPX_CPU_NEXGEN, |
25 VPX_CPU_NEXGEN, | 25 VPX_CPU_NSC, |
26 VPX_CPU_NSC, | 26 VPX_CPU_RISE, |
27 VPX_CPU_RISE, | 27 VPX_CPU_SIS, |
28 VPX_CPU_SIS, | 28 VPX_CPU_TRANSMETA, |
29 VPX_CPU_TRANSMETA, | 29 VPX_CPU_TRANSMETA_OLD, |
30 VPX_CPU_TRANSMETA_OLD, | 30 VPX_CPU_UMC, |
31 VPX_CPU_UMC, | 31 VPX_CPU_VIA, |
32 VPX_CPU_VIA, | |
33 | 32 |
34 VPX_CPU_LAST | 33 VPX_CPU_LAST |
35 } vpx_cpu_t; | 34 } vpx_cpu_t; |
36 | 35 |
37 #if defined(__GNUC__) && __GNUC__ | 36 #if defined(__GNUC__) && __GNUC__ |
38 #if ARCH_X86_64 | 37 #if ARCH_X86_64 |
39 #define cpuid(func,ax,bx,cx,dx)\ | 38 #define cpuid(func,ax,bx,cx,dx)\ |
40 __asm__ __volatile__ (\ | 39 __asm__ __volatile__ (\ |
41 "cpuid \n\t" \ | 40 "cpuid \n\t" \ |
42 : "=a" (ax), "=b" (bx), "=c" (cx), "=d" (dx) \ | 41 : "=a" (ax), "=b" (bx), "=c" (cx), "=d" (dx) \ |
43 : "a" (func)); | 42 : "a" (func)); |
44 #else | 43 #else |
45 #define cpuid(func,ax,bx,cx,dx)\ | 44 #define cpuid(func,ax,bx,cx,dx)\ |
46 __asm__ __volatile__ (\ | 45 __asm__ __volatile__ (\ |
47 "mov %%ebx, %%edi \n\t" \ | 46 "mov %%ebx, %%edi \n\t" \ |
48 "cpuid \n\t" \ | 47 "cpuid \n\t" \ |
49 "xchg %%edi, %%ebx \n\t" \ | 48 "xchg %%edi, %%ebx \n\t" \ |
50 : "=a" (ax), "=D" (bx), "=c" (cx), "=d" (dx) \ | 49 : "=a" (ax), "=D" (bx), "=c" (cx), "=d" (dx) \ |
51 : "a" (func)); | 50 : "a" (func)); |
52 #endif | 51 #endif |
53 #elif defined(__SUNPRO_C) || defined(__SUNPRO_CC) | 52 #elif defined(__SUNPRO_C) || defined(__SUNPRO_CC) |
54 #if ARCH_X86_64 | 53 #if ARCH_X86_64 |
55 #define cpuid(func,ax,bx,cx,dx)\ | 54 #define cpuid(func,ax,bx,cx,dx)\ |
56 asm volatile (\ | 55 asm volatile (\ |
57 "xchg %rsi, %rbx \n\t" \ | 56 "xchg %rsi, %rbx \n\t" \ |
58 "cpuid \n\t" \ | 57 "cpuid \n\t" \ |
59 "movl %ebx, %edi \n\t" \ | 58 "movl %ebx, %edi \n\t" \ |
60 "xchg %rsi, %rbx \n\t" \ | 59 "xchg %rsi, %rbx \n\t" \ |
61 : "=a" (ax), "=D" (bx), "=c" (cx), "=d" (dx) \ | 60 : "=a" (ax), "=D" (bx), "=c" (cx), "=d" (dx) \ |
62 : "a" (func)); | 61 : "a" (func)); |
63 #else | 62 #else |
64 #define cpuid(func,ax,bx,cx,dx)\ | 63 #define cpuid(func,ax,bx,cx,dx)\ |
65 asm volatile (\ | 64 asm volatile (\ |
66 "pushl %ebx \n\t" \ | 65 "pushl %ebx \n\t" \ |
67 "cpuid \n\t" \ | 66 "cpuid \n\t" \ |
68 "movl %ebx, %edi \n\t" \ | 67 "movl %ebx, %edi \n\t" \ |
69 "popl %ebx \n\t" \ | 68 "popl %ebx \n\t" \ |
70 : "=a" (ax), "=D" (bx), "=c" (cx), "=d" (dx) \ | 69 : "=a" (ax), "=D" (bx), "=c" (cx), "=d" (dx) \ |
71 : "a" (func)); | 70 : "a" (func)); |
72 #endif | 71 #endif |
73 #else | 72 #else |
74 #if ARCH_X86_64 | 73 #if ARCH_X86_64 |
75 void __cpuid(int CPUInfo[4], int info_type); | 74 void __cpuid(int CPUInfo[4], int info_type); |
76 #pragma intrinsic(__cpuid) | 75 #pragma intrinsic(__cpuid) |
77 #define cpuid(func,a,b,c,d) do{\ | 76 #define cpuid(func,a,b,c,d) do{\ |
78 int regs[4];\ | 77 int regs[4];\ |
79 __cpuid(regs,func); a=regs[0]; b=regs[1]; c=regs[2]; d=regs[3];\ | 78 __cpuid(regs,func); a=regs[0]; b=regs[1]; c=regs[2]; d=regs[3];\ |
80 } while(0) | 79 } while(0) |
81 #else | 80 #else |
82 #define cpuid(func,a,b,c,d)\ | 81 #define cpuid(func,a,b,c,d)\ |
83 __asm mov eax, func\ | 82 __asm mov eax, func\ |
84 __asm cpuid\ | 83 __asm cpuid\ |
85 __asm mov a, eax\ | 84 __asm mov a, eax\ |
86 __asm mov b, ebx\ | 85 __asm mov b, ebx\ |
87 __asm mov c, ecx\ | 86 __asm mov c, ecx\ |
88 __asm mov d, edx | 87 __asm mov d, edx |
89 #endif | 88 #endif |
90 #endif | 89 #endif |
91 | 90 |
92 #define HAS_MMX 0x01 | 91 #define HAS_MMX 0x01 |
93 #define HAS_SSE 0x02 | 92 #define HAS_SSE 0x02 |
94 #define HAS_SSE2 0x04 | 93 #define HAS_SSE2 0x04 |
95 #define HAS_SSE3 0x08 | 94 #define HAS_SSE3 0x08 |
96 #define HAS_SSSE3 0x10 | 95 #define HAS_SSSE3 0x10 |
97 #define HAS_SSE4_1 0x20 | 96 #define HAS_SSE4_1 0x20 |
98 #ifndef BIT | 97 #ifndef BIT |
99 #define BIT(n) (1<<n) | 98 #define BIT(n) (1<<n) |
100 #endif | 99 #endif |
101 | 100 |
102 static int | 101 static int |
103 x86_simd_caps(void) | 102 x86_simd_caps(void) { |
104 { | 103 unsigned int flags = 0; |
105 unsigned int flags = 0; | 104 unsigned int mask = ~0; |
106 unsigned int mask = ~0; | 105 unsigned int reg_eax, reg_ebx, reg_ecx, reg_edx; |
107 unsigned int reg_eax, reg_ebx, reg_ecx, reg_edx; | 106 char *env; |
108 char *env; | 107 (void)reg_ebx; |
109 (void)reg_ebx; | |
110 | 108 |
111 /* See if the CPU capabilities are being overridden by the environment */ | 109 /* See if the CPU capabilities are being overridden by the environment */ |
112 env = getenv("VPX_SIMD_CAPS"); | 110 env = getenv("VPX_SIMD_CAPS"); |
113 | 111 |
114 if (env && *env) | 112 if (env && *env) |
115 return (int)strtol(env, NULL, 0); | 113 return (int)strtol(env, NULL, 0); |
116 | 114 |
117 env = getenv("VPX_SIMD_CAPS_MASK"); | 115 env = getenv("VPX_SIMD_CAPS_MASK"); |
118 | 116 |
119 if (env && *env) | 117 if (env && *env) |
120 mask = strtol(env, NULL, 0); | 118 mask = strtol(env, NULL, 0); |
121 | 119 |
122 /* Ensure that the CPUID instruction supports extended features */ | 120 /* Ensure that the CPUID instruction supports extended features */ |
123 cpuid(0, reg_eax, reg_ebx, reg_ecx, reg_edx); | 121 cpuid(0, reg_eax, reg_ebx, reg_ecx, reg_edx); |
124 | 122 |
125 if (reg_eax < 1) | 123 if (reg_eax < 1) |
126 return 0; | 124 return 0; |
127 | 125 |
128 /* Get the standard feature flags */ | 126 /* Get the standard feature flags */ |
129 cpuid(1, reg_eax, reg_ebx, reg_ecx, reg_edx); | 127 cpuid(1, reg_eax, reg_ebx, reg_ecx, reg_edx); |
130 | 128 |
131 if (reg_edx & BIT(23)) flags |= HAS_MMX; | 129 if (reg_edx & BIT(23)) flags |= HAS_MMX; |
132 | 130 |
133 if (reg_edx & BIT(25)) flags |= HAS_SSE; /* aka xmm */ | 131 if (reg_edx & BIT(25)) flags |= HAS_SSE; /* aka xmm */ |
134 | 132 |
135 if (reg_edx & BIT(26)) flags |= HAS_SSE2; /* aka wmt */ | 133 if (reg_edx & BIT(26)) flags |= HAS_SSE2; /* aka wmt */ |
136 | 134 |
137 if (reg_ecx & BIT(0)) flags |= HAS_SSE3; | 135 if (reg_ecx & BIT(0)) flags |= HAS_SSE3; |
138 | 136 |
139 if (reg_ecx & BIT(9)) flags |= HAS_SSSE3; | 137 if (reg_ecx & BIT(9)) flags |= HAS_SSSE3; |
140 | 138 |
141 if (reg_ecx & BIT(19)) flags |= HAS_SSE4_1; | 139 if (reg_ecx & BIT(19)) flags |= HAS_SSE4_1; |
142 | 140 |
143 return flags & mask; | 141 return flags & mask; |
144 } | 142 } |
145 | 143 |
146 vpx_cpu_t vpx_x86_vendor(void); | 144 vpx_cpu_t vpx_x86_vendor(void); |
147 | 145 |
148 #if ARCH_X86_64 && defined(_MSC_VER) | 146 #if ARCH_X86_64 && defined(_MSC_VER) |
149 unsigned __int64 __rdtsc(void); | 147 unsigned __int64 __rdtsc(void); |
150 #pragma intrinsic(__rdtsc) | 148 #pragma intrinsic(__rdtsc) |
151 #endif | 149 #endif |
152 static unsigned int | 150 static unsigned int |
153 x86_readtsc(void) | 151 x86_readtsc(void) { |
154 { | |
155 #if defined(__GNUC__) && __GNUC__ | 152 #if defined(__GNUC__) && __GNUC__ |
156 unsigned int tsc; | 153 unsigned int tsc; |
157 __asm__ __volatile__("rdtsc\n\t":"=a"(tsc):); | 154 __asm__ __volatile__("rdtsc\n\t":"=a"(tsc):); |
158 return tsc; | 155 return tsc; |
159 #elif defined(__SUNPRO_C) || defined(__SUNPRO_CC) | 156 #elif defined(__SUNPRO_C) || defined(__SUNPRO_CC) |
160 unsigned int tsc; | 157 unsigned int tsc; |
161 asm volatile("rdtsc\n\t":"=a"(tsc):); | 158 asm volatile("rdtsc\n\t":"=a"(tsc):); |
162 return tsc; | 159 return tsc; |
163 #else | 160 #else |
164 #if ARCH_X86_64 | 161 #if ARCH_X86_64 |
165 return (unsigned int)__rdtsc(); | 162 return (unsigned int)__rdtsc(); |
166 #else | 163 #else |
167 __asm rdtsc; | 164 __asm rdtsc; |
168 #endif | 165 #endif |
169 #endif | 166 #endif |
170 } | 167 } |
171 | 168 |
172 | 169 |
173 #if defined(__GNUC__) && __GNUC__ | 170 #if defined(__GNUC__) && __GNUC__ |
174 #define x86_pause_hint()\ | 171 #define x86_pause_hint()\ |
175 __asm__ __volatile__ ("pause \n\t") | 172 __asm__ __volatile__ ("pause \n\t") |
176 #elif defined(__SUNPRO_C) || defined(__SUNPRO_CC) | 173 #elif defined(__SUNPRO_C) || defined(__SUNPRO_CC) |
177 #define x86_pause_hint()\ | 174 #define x86_pause_hint()\ |
178 asm volatile ("pause \n\t") | 175 asm volatile ("pause \n\t") |
179 #else | 176 #else |
180 #if ARCH_X86_64 | 177 #if ARCH_X86_64 |
181 #define x86_pause_hint()\ | 178 #define x86_pause_hint()\ |
182 _mm_pause(); | 179 _mm_pause(); |
183 #else | 180 #else |
184 #define x86_pause_hint()\ | 181 #define x86_pause_hint()\ |
185 __asm pause | 182 __asm pause |
186 #endif | 183 #endif |
187 #endif | 184 #endif |
188 | 185 |
189 #if defined(__GNUC__) && __GNUC__ | 186 #if defined(__GNUC__) && __GNUC__ |
190 static void | 187 static void |
191 x87_set_control_word(unsigned short mode) | 188 x87_set_control_word(unsigned short mode) { |
192 { | 189 __asm__ __volatile__("fldcw %0" : : "m"( *&mode)); |
193 __asm__ __volatile__("fldcw %0" : : "m"(*&mode)); | |
194 } | 190 } |
195 static unsigned short | 191 static unsigned short |
196 x87_get_control_word(void) | 192 x87_get_control_word(void) { |
197 { | 193 unsigned short mode; |
198 unsigned short mode; | 194 __asm__ __volatile__("fstcw %0\n\t":"=m"( *&mode):); |
199 __asm__ __volatile__("fstcw %0\n\t":"=m"(*&mode):); | |
200 return mode; | 195 return mode; |
201 } | 196 } |
202 #elif defined(__SUNPRO_C) || defined(__SUNPRO_CC) | 197 #elif defined(__SUNPRO_C) || defined(__SUNPRO_CC) |
203 static void | 198 static void |
204 x87_set_control_word(unsigned short mode) | 199 x87_set_control_word(unsigned short mode) |
205 { | 200 { |
206 asm volatile("fldcw %0" : : "m"(*&mode)); | 201 asm volatile("fldcw %0" : : "m"(*&mode)); |
207 } | 202 } |
208 static unsigned short | 203 static unsigned short |
209 x87_get_control_word(void) | 204 x87_get_control_word(void) |
210 { | 205 { |
211 unsigned short mode; | 206 unsigned short mode; |
212 asm volatile("fstcw %0\n\t":"=m"(*&mode):); | 207 asm volatile("fstcw %0\n\t":"=m"(*&mode):); |
213 return mode; | 208 return mode; |
| 209 } |
| 210 #elif defined(__SUNPRO_C) || defined(__SUNPRO_CC) |
| 211 static void |
| 212 x87_set_control_word(unsigned short mode) { |
| 213 asm volatile("fldcw %0" : : "m"( *&mode)); |
| 214 } |
| 215 static unsigned short |
| 216 x87_get_control_word(void) { |
| 217 unsigned short mode; |
| 218 asm volatile("fstcw %0\n\t":"=m"( *&mode):); |
| 219 return mode; |
214 } | 220 } |
215 #elif ARCH_X86_64 | 221 #elif ARCH_X86_64 |
216 /* No fldcw intrinsics on Windows x64, punt to external asm */ | 222 /* No fldcw intrinsics on Windows x64, punt to external asm */ |
217 extern void vpx_winx64_fldcw(unsigned short mode); | 223 extern void vpx_winx64_fldcw(unsigned short mode); |
218 extern unsigned short vpx_winx64_fstcw(void); | 224 extern unsigned short vpx_winx64_fstcw(void); |
219 #define x87_set_control_word vpx_winx64_fldcw | 225 #define x87_set_control_word vpx_winx64_fldcw |
220 #define x87_get_control_word vpx_winx64_fstcw | 226 #define x87_get_control_word vpx_winx64_fstcw |
221 #else | 227 #else |
222 static void | 228 static void |
223 x87_set_control_word(unsigned short mode) | 229 x87_set_control_word(unsigned short mode) { |
224 { | 230 __asm { fldcw mode } |
225 __asm { fldcw mode } | |
226 } | 231 } |
227 static unsigned short | 232 static unsigned short |
228 x87_get_control_word(void) | 233 x87_get_control_word(void) { |
229 { | 234 unsigned short mode; |
230 unsigned short mode; | 235 __asm { fstcw mode } |
231 __asm { fstcw mode } | 236 return mode; |
232 return mode; | |
233 } | 237 } |
234 #endif | 238 #endif |
235 | 239 |
236 static unsigned short | 240 static unsigned short |
237 x87_set_double_precision(void) | 241 x87_set_double_precision(void) { |
238 { | 242 unsigned short mode = x87_get_control_word(); |
239 unsigned short mode = x87_get_control_word(); | 243 x87_set_control_word((mode&~0x300) | 0x200); |
240 x87_set_control_word((mode&~0x300) | 0x200); | 244 return mode; |
241 return mode; | |
242 } | 245 } |
243 | 246 |
244 | 247 |
245 extern void vpx_reset_mmx_state(void); | 248 extern void vpx_reset_mmx_state(void); |
246 #endif | 249 #endif |
247 | 250 |
OLD | NEW |