| OLD | NEW |
| 1 /* | 1 /* |
| 2 * Copyright (c) 2010 The WebM project authors. All Rights Reserved. | 2 * Copyright (c) 2010 The WebM project authors. All Rights Reserved. |
| 3 * | 3 * |
| 4 * Use of this source code is governed by a BSD-style license | 4 * Use of this source code is governed by a BSD-style license |
| 5 * that can be found in the LICENSE file in the root of the source | 5 * that can be found in the LICENSE file in the root of the source |
| 6 * tree. An additional intellectual property rights grant can be found | 6 * tree. An additional intellectual property rights grant can be found |
| 7 * in the file PATENTS. All contributing project authors may | 7 * in the file PATENTS. All contributing project authors may |
| 8 * be found in the AUTHORS file in the root of the source tree. | 8 * be found in the AUTHORS file in the root of the source tree. |
| 9 */ | 9 */ |
| 10 | 10 |
| 11 | 11 |
| 12 #ifndef VPX_PORTS_X86_H | 12 #ifndef VPX_PORTS_X86_H |
| 13 #define VPX_PORTS_X86_H | 13 #define VPX_PORTS_X86_H |
| 14 #include <stdlib.h> | 14 #include <stdlib.h> |
| 15 #include "vpx_config.h" | 15 #include "vpx_config.h" |
| 16 | 16 |
| 17 typedef enum | 17 typedef enum { |
| 18 { | 18 VPX_CPU_UNKNOWN = -1, |
| 19 VPX_CPU_UNKNOWN = -1, | 19 VPX_CPU_AMD, |
| 20 VPX_CPU_AMD, | 20 VPX_CPU_AMD_OLD, |
| 21 VPX_CPU_AMD_OLD, | 21 VPX_CPU_CENTAUR, |
| 22 VPX_CPU_CENTAUR, | 22 VPX_CPU_CYRIX, |
| 23 VPX_CPU_CYRIX, | 23 VPX_CPU_INTEL, |
| 24 VPX_CPU_INTEL, | 24 VPX_CPU_NEXGEN, |
| 25 VPX_CPU_NEXGEN, | 25 VPX_CPU_NSC, |
| 26 VPX_CPU_NSC, | 26 VPX_CPU_RISE, |
| 27 VPX_CPU_RISE, | 27 VPX_CPU_SIS, |
| 28 VPX_CPU_SIS, | 28 VPX_CPU_TRANSMETA, |
| 29 VPX_CPU_TRANSMETA, | 29 VPX_CPU_TRANSMETA_OLD, |
| 30 VPX_CPU_TRANSMETA_OLD, | 30 VPX_CPU_UMC, |
| 31 VPX_CPU_UMC, | 31 VPX_CPU_VIA, |
| 32 VPX_CPU_VIA, | |
| 33 | 32 |
| 34 VPX_CPU_LAST | 33 VPX_CPU_LAST |
| 35 } vpx_cpu_t; | 34 } vpx_cpu_t; |
| 36 | 35 |
| 37 #if defined(__GNUC__) && __GNUC__ | 36 #if defined(__GNUC__) && __GNUC__ |
| 38 #if ARCH_X86_64 | 37 #if ARCH_X86_64 |
| 39 #define cpuid(func,ax,bx,cx,dx)\ | 38 #define cpuid(func,ax,bx,cx,dx)\ |
| 40 __asm__ __volatile__ (\ | 39 __asm__ __volatile__ (\ |
| 41 "cpuid \n\t" \ | 40 "cpuid \n\t" \ |
| 42 : "=a" (ax), "=b" (bx), "=c" (cx), "=d" (dx) \ | 41 : "=a" (ax), "=b" (bx), "=c" (cx), "=d" (dx) \ |
| 43 : "a" (func)); | 42 : "a" (func)); |
| 44 #else | 43 #else |
| 45 #define cpuid(func,ax,bx,cx,dx)\ | 44 #define cpuid(func,ax,bx,cx,dx)\ |
| 46 __asm__ __volatile__ (\ | 45 __asm__ __volatile__ (\ |
| 47 "mov %%ebx, %%edi \n\t" \ | 46 "mov %%ebx, %%edi \n\t" \ |
| 48 "cpuid \n\t" \ | 47 "cpuid \n\t" \ |
| 49 "xchg %%edi, %%ebx \n\t" \ | 48 "xchg %%edi, %%ebx \n\t" \ |
| 50 : "=a" (ax), "=D" (bx), "=c" (cx), "=d" (dx) \ | 49 : "=a" (ax), "=D" (bx), "=c" (cx), "=d" (dx) \ |
| 51 : "a" (func)); | 50 : "a" (func)); |
| 52 #endif | 51 #endif |
| 53 #elif defined(__SUNPRO_C) || defined(__SUNPRO_CC) | 52 #elif defined(__SUNPRO_C) || defined(__SUNPRO_CC) |
| 54 #if ARCH_X86_64 | 53 #if ARCH_X86_64 |
| 55 #define cpuid(func,ax,bx,cx,dx)\ | 54 #define cpuid(func,ax,bx,cx,dx)\ |
| 56 asm volatile (\ | 55 asm volatile (\ |
| 57 "xchg %rsi, %rbx \n\t" \ | 56 "xchg %rsi, %rbx \n\t" \ |
| 58 "cpuid \n\t" \ | 57 "cpuid \n\t" \ |
| 59 "movl %ebx, %edi \n\t" \ | 58 "movl %ebx, %edi \n\t" \ |
| 60 "xchg %rsi, %rbx \n\t" \ | 59 "xchg %rsi, %rbx \n\t" \ |
| 61 : "=a" (ax), "=D" (bx), "=c" (cx), "=d" (dx) \ | 60 : "=a" (ax), "=D" (bx), "=c" (cx), "=d" (dx) \ |
| 62 : "a" (func)); | 61 : "a" (func)); |
| 63 #else | 62 #else |
| 64 #define cpuid(func,ax,bx,cx,dx)\ | 63 #define cpuid(func,ax,bx,cx,dx)\ |
| 65 asm volatile (\ | 64 asm volatile (\ |
| 66 "pushl %ebx \n\t" \ | 65 "pushl %ebx \n\t" \ |
| 67 "cpuid \n\t" \ | 66 "cpuid \n\t" \ |
| 68 "movl %ebx, %edi \n\t" \ | 67 "movl %ebx, %edi \n\t" \ |
| 69 "popl %ebx \n\t" \ | 68 "popl %ebx \n\t" \ |
| 70 : "=a" (ax), "=D" (bx), "=c" (cx), "=d" (dx) \ | 69 : "=a" (ax), "=D" (bx), "=c" (cx), "=d" (dx) \ |
| 71 : "a" (func)); | 70 : "a" (func)); |
| 72 #endif | 71 #endif |
| 73 #else | 72 #else |
| 74 #if ARCH_X86_64 | 73 #if ARCH_X86_64 |
| 75 void __cpuid(int CPUInfo[4], int info_type); | 74 void __cpuid(int CPUInfo[4], int info_type); |
| 76 #pragma intrinsic(__cpuid) | 75 #pragma intrinsic(__cpuid) |
| 77 #define cpuid(func,a,b,c,d) do{\ | 76 #define cpuid(func,a,b,c,d) do{\ |
| 78 int regs[4];\ | 77 int regs[4];\ |
| 79 __cpuid(regs,func); a=regs[0]; b=regs[1]; c=regs[2]; d=regs[3];\ | 78 __cpuid(regs,func); a=regs[0]; b=regs[1]; c=regs[2]; d=regs[3];\ |
| 80 } while(0) | 79 } while(0) |
| 81 #else | 80 #else |
| 82 #define cpuid(func,a,b,c,d)\ | 81 #define cpuid(func,a,b,c,d)\ |
| 83 __asm mov eax, func\ | 82 __asm mov eax, func\ |
| 84 __asm cpuid\ | 83 __asm cpuid\ |
| 85 __asm mov a, eax\ | 84 __asm mov a, eax\ |
| 86 __asm mov b, ebx\ | 85 __asm mov b, ebx\ |
| 87 __asm mov c, ecx\ | 86 __asm mov c, ecx\ |
| 88 __asm mov d, edx | 87 __asm mov d, edx |
| 89 #endif | 88 #endif |
| 90 #endif | 89 #endif |
| 91 | 90 |
| 92 #define HAS_MMX 0x01 | 91 #define HAS_MMX 0x01 |
| 93 #define HAS_SSE 0x02 | 92 #define HAS_SSE 0x02 |
| 94 #define HAS_SSE2 0x04 | 93 #define HAS_SSE2 0x04 |
| 95 #define HAS_SSE3 0x08 | 94 #define HAS_SSE3 0x08 |
| 96 #define HAS_SSSE3 0x10 | 95 #define HAS_SSSE3 0x10 |
| 97 #define HAS_SSE4_1 0x20 | 96 #define HAS_SSE4_1 0x20 |
| 98 #ifndef BIT | 97 #ifndef BIT |
| 99 #define BIT(n) (1<<n) | 98 #define BIT(n) (1<<n) |
| 100 #endif | 99 #endif |
| 101 | 100 |
| 102 static int | 101 static int |
| 103 x86_simd_caps(void) | 102 x86_simd_caps(void) { |
| 104 { | 103 unsigned int flags = 0; |
| 105 unsigned int flags = 0; | 104 unsigned int mask = ~0; |
| 106 unsigned int mask = ~0; | 105 unsigned int reg_eax, reg_ebx, reg_ecx, reg_edx; |
| 107 unsigned int reg_eax, reg_ebx, reg_ecx, reg_edx; | 106 char *env; |
| 108 char *env; | 107 (void)reg_ebx; |
| 109 (void)reg_ebx; | |
| 110 | 108 |
| 111 /* See if the CPU capabilities are being overridden by the environment */ | 109 /* See if the CPU capabilities are being overridden by the environment */ |
| 112 env = getenv("VPX_SIMD_CAPS"); | 110 env = getenv("VPX_SIMD_CAPS"); |
| 113 | 111 |
| 114 if (env && *env) | 112 if (env && *env) |
| 115 return (int)strtol(env, NULL, 0); | 113 return (int)strtol(env, NULL, 0); |
| 116 | 114 |
| 117 env = getenv("VPX_SIMD_CAPS_MASK"); | 115 env = getenv("VPX_SIMD_CAPS_MASK"); |
| 118 | 116 |
| 119 if (env && *env) | 117 if (env && *env) |
| 120 mask = strtol(env, NULL, 0); | 118 mask = strtol(env, NULL, 0); |
| 121 | 119 |
| 122 /* Ensure that the CPUID instruction supports extended features */ | 120 /* Ensure that the CPUID instruction supports extended features */ |
| 123 cpuid(0, reg_eax, reg_ebx, reg_ecx, reg_edx); | 121 cpuid(0, reg_eax, reg_ebx, reg_ecx, reg_edx); |
| 124 | 122 |
| 125 if (reg_eax < 1) | 123 if (reg_eax < 1) |
| 126 return 0; | 124 return 0; |
| 127 | 125 |
| 128 /* Get the standard feature flags */ | 126 /* Get the standard feature flags */ |
| 129 cpuid(1, reg_eax, reg_ebx, reg_ecx, reg_edx); | 127 cpuid(1, reg_eax, reg_ebx, reg_ecx, reg_edx); |
| 130 | 128 |
| 131 if (reg_edx & BIT(23)) flags |= HAS_MMX; | 129 if (reg_edx & BIT(23)) flags |= HAS_MMX; |
| 132 | 130 |
| 133 if (reg_edx & BIT(25)) flags |= HAS_SSE; /* aka xmm */ | 131 if (reg_edx & BIT(25)) flags |= HAS_SSE; /* aka xmm */ |
| 134 | 132 |
| 135 if (reg_edx & BIT(26)) flags |= HAS_SSE2; /* aka wmt */ | 133 if (reg_edx & BIT(26)) flags |= HAS_SSE2; /* aka wmt */ |
| 136 | 134 |
| 137 if (reg_ecx & BIT(0)) flags |= HAS_SSE3; | 135 if (reg_ecx & BIT(0)) flags |= HAS_SSE3; |
| 138 | 136 |
| 139 if (reg_ecx & BIT(9)) flags |= HAS_SSSE3; | 137 if (reg_ecx & BIT(9)) flags |= HAS_SSSE3; |
| 140 | 138 |
| 141 if (reg_ecx & BIT(19)) flags |= HAS_SSE4_1; | 139 if (reg_ecx & BIT(19)) flags |= HAS_SSE4_1; |
| 142 | 140 |
| 143 return flags & mask; | 141 return flags & mask; |
| 144 } | 142 } |
| 145 | 143 |
| 146 vpx_cpu_t vpx_x86_vendor(void); | 144 vpx_cpu_t vpx_x86_vendor(void); |
| 147 | 145 |
| 148 #if ARCH_X86_64 && defined(_MSC_VER) | 146 #if ARCH_X86_64 && defined(_MSC_VER) |
| 149 unsigned __int64 __rdtsc(void); | 147 unsigned __int64 __rdtsc(void); |
| 150 #pragma intrinsic(__rdtsc) | 148 #pragma intrinsic(__rdtsc) |
| 151 #endif | 149 #endif |
| 152 static unsigned int | 150 static unsigned int |
| 153 x86_readtsc(void) | 151 x86_readtsc(void) { |
| 154 { | |
| 155 #if defined(__GNUC__) && __GNUC__ | 152 #if defined(__GNUC__) && __GNUC__ |
| 156 unsigned int tsc; | 153 unsigned int tsc; |
| 157 __asm__ __volatile__("rdtsc\n\t":"=a"(tsc):); | 154 __asm__ __volatile__("rdtsc\n\t":"=a"(tsc):); |
| 158 return tsc; | 155 return tsc; |
| 159 #elif defined(__SUNPRO_C) || defined(__SUNPRO_CC) | 156 #elif defined(__SUNPRO_C) || defined(__SUNPRO_CC) |
| 160 unsigned int tsc; | 157 unsigned int tsc; |
| 161 asm volatile("rdtsc\n\t":"=a"(tsc):); | 158 asm volatile("rdtsc\n\t":"=a"(tsc):); |
| 162 return tsc; | 159 return tsc; |
| 163 #else | 160 #else |
| 164 #if ARCH_X86_64 | 161 #if ARCH_X86_64 |
| 165 return (unsigned int)__rdtsc(); | 162 return (unsigned int)__rdtsc(); |
| 166 #else | 163 #else |
| 167 __asm rdtsc; | 164 __asm rdtsc; |
| 168 #endif | 165 #endif |
| 169 #endif | 166 #endif |
| 170 } | 167 } |
| 171 | 168 |
| 172 | 169 |
| 173 #if defined(__GNUC__) && __GNUC__ | 170 #if defined(__GNUC__) && __GNUC__ |
| 174 #define x86_pause_hint()\ | 171 #define x86_pause_hint()\ |
| 175 __asm__ __volatile__ ("pause \n\t") | 172 __asm__ __volatile__ ("pause \n\t") |
| 176 #elif defined(__SUNPRO_C) || defined(__SUNPRO_CC) | 173 #elif defined(__SUNPRO_C) || defined(__SUNPRO_CC) |
| 177 #define x86_pause_hint()\ | 174 #define x86_pause_hint()\ |
| 178 asm volatile ("pause \n\t") | 175 asm volatile ("pause \n\t") |
| 179 #else | 176 #else |
| 180 #if ARCH_X86_64 | 177 #if ARCH_X86_64 |
| 181 #define x86_pause_hint()\ | 178 #define x86_pause_hint()\ |
| 182 _mm_pause(); | 179 _mm_pause(); |
| 183 #else | 180 #else |
| 184 #define x86_pause_hint()\ | 181 #define x86_pause_hint()\ |
| 185 __asm pause | 182 __asm pause |
| 186 #endif | 183 #endif |
| 187 #endif | 184 #endif |
| 188 | 185 |
| 189 #if defined(__GNUC__) && __GNUC__ | 186 #if defined(__GNUC__) && __GNUC__ |
| 190 static void | 187 static void |
| 191 x87_set_control_word(unsigned short mode) | 188 x87_set_control_word(unsigned short mode) { |
| 192 { | 189 __asm__ __volatile__("fldcw %0" : : "m"( *&mode)); |
| 193 __asm__ __volatile__("fldcw %0" : : "m"(*&mode)); | |
| 194 } | 190 } |
| 195 static unsigned short | 191 static unsigned short |
| 196 x87_get_control_word(void) | 192 x87_get_control_word(void) { |
| 197 { | 193 unsigned short mode; |
| 198 unsigned short mode; | 194 __asm__ __volatile__("fstcw %0\n\t":"=m"( *&mode):); |
| 199 __asm__ __volatile__("fstcw %0\n\t":"=m"(*&mode):); | |
| 200 return mode; | 195 return mode; |
| 201 } | 196 } |
| 202 #elif defined(__SUNPRO_C) || defined(__SUNPRO_CC) | 197 #elif defined(__SUNPRO_C) || defined(__SUNPRO_CC) |
| 203 static void | 198 static void |
| 204 x87_set_control_word(unsigned short mode) | 199 x87_set_control_word(unsigned short mode) |
| 205 { | 200 { |
| 206 asm volatile("fldcw %0" : : "m"(*&mode)); | 201 asm volatile("fldcw %0" : : "m"(*&mode)); |
| 207 } | 202 } |
| 208 static unsigned short | 203 static unsigned short |
| 209 x87_get_control_word(void) | 204 x87_get_control_word(void) |
| 210 { | 205 { |
| 211 unsigned short mode; | 206 unsigned short mode; |
| 212 asm volatile("fstcw %0\n\t":"=m"(*&mode):); | 207 asm volatile("fstcw %0\n\t":"=m"(*&mode):); |
| 213 return mode; | 208 return mode; |
| 209 } |
| 210 #elif defined(__SUNPRO_C) || defined(__SUNPRO_CC) |
| 211 static void |
| 212 x87_set_control_word(unsigned short mode) { |
| 213 asm volatile("fldcw %0" : : "m"( *&mode)); |
| 214 } |
| 215 static unsigned short |
| 216 x87_get_control_word(void) { |
| 217 unsigned short mode; |
| 218 asm volatile("fstcw %0\n\t":"=m"( *&mode):); |
| 219 return mode; |
| 214 } | 220 } |
| 215 #elif ARCH_X86_64 | 221 #elif ARCH_X86_64 |
| 216 /* No fldcw intrinsics on Windows x64, punt to external asm */ | 222 /* No fldcw intrinsics on Windows x64, punt to external asm */ |
| 217 extern void vpx_winx64_fldcw(unsigned short mode); | 223 extern void vpx_winx64_fldcw(unsigned short mode); |
| 218 extern unsigned short vpx_winx64_fstcw(void); | 224 extern unsigned short vpx_winx64_fstcw(void); |
| 219 #define x87_set_control_word vpx_winx64_fldcw | 225 #define x87_set_control_word vpx_winx64_fldcw |
| 220 #define x87_get_control_word vpx_winx64_fstcw | 226 #define x87_get_control_word vpx_winx64_fstcw |
| 221 #else | 227 #else |
| 222 static void | 228 static void |
| 223 x87_set_control_word(unsigned short mode) | 229 x87_set_control_word(unsigned short mode) { |
| 224 { | 230 __asm { fldcw mode } |
| 225 __asm { fldcw mode } | |
| 226 } | 231 } |
| 227 static unsigned short | 232 static unsigned short |
| 228 x87_get_control_word(void) | 233 x87_get_control_word(void) { |
| 229 { | 234 unsigned short mode; |
| 230 unsigned short mode; | 235 __asm { fstcw mode } |
| 231 __asm { fstcw mode } | 236 return mode; |
| 232 return mode; | |
| 233 } | 237 } |
| 234 #endif | 238 #endif |
| 235 | 239 |
| 236 static unsigned short | 240 static unsigned short |
| 237 x87_set_double_precision(void) | 241 x87_set_double_precision(void) { |
| 238 { | 242 unsigned short mode = x87_get_control_word(); |
| 239 unsigned short mode = x87_get_control_word(); | 243 x87_set_control_word((mode&~0x300) | 0x200); |
| 240 x87_set_control_word((mode&~0x300) | 0x200); | 244 return mode; |
| 241 return mode; | |
| 242 } | 245 } |
| 243 | 246 |
| 244 | 247 |
| 245 extern void vpx_reset_mmx_state(void); | 248 extern void vpx_reset_mmx_state(void); |
| 246 #endif | 249 #endif |
| 247 | 250 |
| OLD | NEW |