Index: gcc/gcc/config/i386/driver-i386.c |
diff --git a/gcc/gcc/config/i386/driver-i386.c b/gcc/gcc/config/i386/driver-i386.c |
index 9aa33d27cc7525c287741e6ed978ccba8d527b37..063279aa629e4e8a9879762732c5cbea817e79f5 100644 |
--- a/gcc/gcc/config/i386/driver-i386.c |
+++ b/gcc/gcc/config/i386/driver-i386.c |
@@ -46,12 +46,15 @@ describe_cache (struct cache_desc level1, struct cache_desc level2) |
/* At the moment, gcc does not use the information |
about the associativity of the cache. */ |
- sprintf (size, "--param l1-cache-size=%u", level1.sizekb); |
- sprintf (line, "--param l1-cache-line-size=%u", level1.line); |
+ snprintf (size, sizeof (size), |
+ "--param l1-cache-size=%u ", level1.sizekb); |
+ snprintf (line, sizeof (line), |
+ "--param l1-cache-line-size=%u ", level1.line); |
- sprintf (size2, "--param l2-cache-size=%u", level2.sizekb); |
+ snprintf (size2, sizeof (size2), |
+ "--param l2-cache-size=%u ", level2.sizekb); |
- return concat (size, " ", line, " ", size2, " ", NULL); |
+ return concat (size, line, size2, NULL); |
} |
/* Detect L2 cache parameters using CPUID extended function 0x80000006. */ |
@@ -261,7 +264,8 @@ enum cache_type |
}; |
static void |
-detect_caches_cpuid4 (struct cache_desc *level1, struct cache_desc *level2) |
+detect_caches_cpuid4 (struct cache_desc *level1, struct cache_desc *level2, |
+ struct cache_desc *level3) |
{ |
struct cache_desc *cache; |
@@ -286,6 +290,9 @@ detect_caches_cpuid4 (struct cache_desc *level1, struct cache_desc *level2) |
case 2: |
cache = level2; |
break; |
+ case 3: |
+ cache = level3; |
+ break; |
default: |
cache = NULL; |
} |
@@ -300,7 +307,7 @@ detect_caches_cpuid4 (struct cache_desc *level1, struct cache_desc *level2) |
cache->sizekb = (cache->assoc * part |
* cache->line * sets) / 1024; |
- } |
+ } |
} |
default: |
break; |
@@ -311,12 +318,13 @@ detect_caches_cpuid4 (struct cache_desc *level1, struct cache_desc *level2) |
/* Returns the description of caches for an Intel processor. */ |
static const char * |
-detect_caches_intel (bool xeon_mp, unsigned max_level, unsigned max_ext_level) |
+detect_caches_intel (bool xeon_mp, unsigned max_level, |
+ unsigned max_ext_level, unsigned *l2sizekb) |
{ |
- struct cache_desc level1 = {0, 0, 0}, level2 = {0, 0, 0}; |
+ struct cache_desc level1 = {0, 0, 0}, level2 = {0, 0, 0}, level3 = {0, 0, 0}; |
if (max_level >= 4) |
- detect_caches_cpuid4 (&level1, &level2); |
+ detect_caches_cpuid4 (&level1, &level2, &level3); |
else if (max_level >= 2) |
detect_caches_cpuid2 (xeon_mp, &level1, &level2); |
else |
@@ -325,11 +333,18 @@ detect_caches_intel (bool xeon_mp, unsigned max_level, unsigned max_ext_level) |
if (level1.sizekb == 0) |
return ""; |
+ /* Let the L3 replace the L2. This assumes inclusive caches |
+ and single threaded program for now. */ |
+ if (level3.sizekb) |
+ level2 = level3; |
+ |
/* Intel CPUs are equipped with AMD style L2 cache info. Try this |
method if other methods fail to provide L2 cache parameters. */ |
if (level2.sizekb == 0 && max_ext_level >= 0x80000006) |
detect_l2_cache (&level2); |
+ *l2sizekb = level2.sizekb; |
+ |
return describe_cache (level1, level2); |
} |
@@ -378,12 +393,14 @@ const char *host_detect_local_cpu (int argc, const char **argv) |
/* Extended features */ |
unsigned int has_lahf_lm = 0, has_sse4a = 0; |
unsigned int has_longmode = 0, has_3dnowp = 0, has_3dnow = 0; |
- unsigned int has_sse4_1 = 0, has_sse4_2 = 0; |
+ unsigned int has_movbe = 0, has_sse4_1 = 0, has_sse4_2 = 0; |
unsigned int has_popcnt = 0, has_aes = 0, has_avx = 0; |
- unsigned int has_pclmul = 0; |
+ unsigned int has_pclmul = 0, has_abm = 0, has_lwp = 0; |
bool arch; |
+ unsigned int l2sizekb = 0; |
+ |
if (argc < 1) |
return NULL; |
@@ -398,9 +415,22 @@ const char *host_detect_local_cpu (int argc, const char **argv) |
__cpuid (1, eax, ebx, ecx, edx); |
- /* We don't care for extended family. */ |
model = (eax >> 4) & 0x0f; |
family = (eax >> 8) & 0x0f; |
+ if (vendor == SIG_INTEL) |
+ { |
+ unsigned int extended_model, extended_family; |
+ |
+ extended_model = (eax >> 12) & 0xf0; |
+ extended_family = (eax >> 20) & 0xff; |
+ if (family == 0x0f) |
+ { |
+ family += extended_family; |
+ model += extended_model; |
+ } |
+ else if (family == 0x06) |
+ model += extended_model; |
+ } |
has_sse3 = ecx & bit_SSE3; |
has_ssse3 = ecx & bit_SSSE3; |
@@ -408,6 +438,7 @@ const char *host_detect_local_cpu (int argc, const char **argv) |
has_sse4_2 = ecx & bit_SSE4_2; |
has_avx = ecx & bit_AVX; |
has_cmpxchg16b = ecx & bit_CMPXCHG16B; |
+ has_movbe = ecx & bit_MOVBE; |
has_popcnt = ecx & bit_POPCNT; |
has_aes = ecx & bit_AES; |
has_pclmul = ecx & bit_PCLMUL; |
@@ -427,6 +458,8 @@ const char *host_detect_local_cpu (int argc, const char **argv) |
has_lahf_lm = ecx & bit_LAHF_LM; |
has_sse4a = ecx & bit_SSE4a; |
+ has_abm = ecx & bit_ABM; |
+ has_lwp = ecx & bit_LWP; |
has_longmode = edx & bit_LM; |
has_3dnowp = edx & bit_3DNOWP; |
@@ -440,7 +473,8 @@ const char *host_detect_local_cpu (int argc, const char **argv) |
else if (vendor == SIG_INTEL) |
{ |
bool xeon_mp = (family == 15 && model == 6); |
- cache = detect_caches_intel (xeon_mp, max_level, ext_level); |
+ cache = detect_caches_intel (xeon_mp, max_level, |
+ ext_level, &l2sizekb); |
} |
} |
@@ -504,30 +538,61 @@ const char *host_detect_local_cpu (int argc, const char **argv) |
cpu = "pentium"; |
break; |
case PROCESSOR_PENTIUMPRO: |
- if (has_longmode) |
- /* It is Core 2 Duo. */ |
- cpu = "core2"; |
- else if (arch) |
+ switch (model) |
{ |
- if (has_sse3) |
- /* It is Core Duo. */ |
- cpu = "prescott"; |
- else if (has_sse2) |
- /* It is Pentium M. */ |
- cpu = "pentium-m"; |
- else if (has_sse) |
- /* It is Pentium III. */ |
- cpu = "pentium3"; |
- else if (has_mmx) |
- /* It is Pentium II. */ |
- cpu = "pentium2"; |
+ case 0x1c: |
+ case 0x26: |
+ /* Atom. */ |
+ cpu = "atom"; |
+ break; |
+ case 0x1a: |
+ case 0x1e: |
+ case 0x1f: |
+ case 0x2e: |
+ /* FIXME: Optimize for Nehalem. */ |
+ cpu = "core2"; |
+ break; |
+ case 0x25: |
+ case 0x2f: |
+ /* FIXME: Optimize for Westmere. */ |
+ cpu = "core2"; |
+ break; |
+ case 0x17: |
+ case 0x1d: |
+ /* Penryn. FIXME: -mtune=core2 is slower than -mtune=generic */ |
+ cpu = "core2"; |
+ break; |
+ case 0x0f: |
+ /* Merom. FIXME: -mtune=core2 is slower than -mtune=generic */ |
+ cpu = "core2"; |
+ break; |
+ default: |
+ if (arch) |
+ { |
+ if (has_ssse3) |
+ /* If it is an unknown CPU with SSSE3, assume Core 2. */ |
+ cpu = "core2"; |
+ else if (has_sse3) |
+ /* It is Core Duo. */ |
+ cpu = "pentium-m"; |
+ else if (has_sse2) |
+ /* It is Pentium M. */ |
+ cpu = "pentium-m"; |
+ else if (has_sse) |
+ /* It is Pentium III. */ |
+ cpu = "pentium3"; |
+ else if (has_mmx) |
+ /* It is Pentium II. */ |
+ cpu = "pentium2"; |
+ else |
+ /* Default to Pentium Pro. */ |
+ cpu = "pentiumpro"; |
+ } |
else |
- /* Default to Pentium Pro. */ |
- cpu = "pentiumpro"; |
+ /* For -mtune, we default to -mtune=generic. */ |
+ cpu = "generic"; |
+ break; |
} |
- else |
- /* For -mtune, we default to -mtune=generic. */ |
- cpu = "generic"; |
break; |
case PROCESSOR_PENTIUM4: |
if (has_sse3) |
@@ -594,53 +659,42 @@ const char *host_detect_local_cpu (int argc, const char **argv) |
if (arch) |
{ |
if (has_cmpxchg16b) |
- options = concat (options, "-mcx16 ", NULL); |
+ options = concat (options, " -mcx16", NULL); |
if (has_lahf_lm) |
- options = concat (options, "-msahf ", NULL); |
+ options = concat (options, " -msahf", NULL); |
+ if (has_movbe) |
+ options = concat (options, " -mmovbe", NULL); |
if (has_aes) |
- options = concat (options, "-maes ", NULL); |
+ options = concat (options, " -maes", NULL); |
if (has_pclmul) |
- options = concat (options, "-mpclmul ", NULL); |
+ options = concat (options, " -mpclmul", NULL); |
if (has_popcnt) |
- options = concat (options, "-mpopcnt ", NULL); |
+ options = concat (options, " -mpopcnt", NULL); |
+ if (has_abm) |
+ options = concat (options, " -mabm", NULL); |
+ if (has_lwp) |
+ options = concat (options, " -mlwp", NULL); |
+ |
if (has_avx) |
- options = concat (options, "-mavx ", NULL); |
+ options = concat (options, " -mavx", NULL); |
else if (has_sse4_2) |
- options = concat (options, "-msse4.2 ", NULL); |
+ options = concat (options, " -msse4.2", NULL); |
else if (has_sse4_1) |
- options = concat (options, "-msse4.1 ", NULL); |
+ options = concat (options, " -msse4.1", NULL); |
} |
done: |
- return concat (cache, "-m", argv[0], "=", cpu, " ", options, NULL); |
+ return concat (cache, "-m", argv[0], "=", cpu, options, NULL); |
} |
#else |
-/* If we aren't compiling with GCC we just provide a minimal |
- default value. */ |
+/* If we aren't compiling with GCC then the driver will just ignore |
+ -march and -mtune "native" target and will leave to the newly |
+ built compiler to generate code for its default target. */ |
-const char *host_detect_local_cpu (int argc, const char **argv) |
+const char *host_detect_local_cpu (int argc ATTRIBUTE_UNUSED, |
+ const char **argv ATTRIBUTE_UNUSED) |
{ |
- const char *cpu; |
- bool arch; |
- |
- if (argc < 1) |
- return NULL; |
- |
- arch = !strcmp (argv[0], "arch"); |
- |
- if (!arch && strcmp (argv[0], "tune")) |
- return NULL; |
- |
- if (arch) |
- { |
- /* FIXME: i386 is wrong for 64bit compiler. How can we tell if |
- we are generating 64bit or 32bit code? */ |
- cpu = "i386"; |
- } |
- else |
- cpu = "generic"; |
- |
- return concat ("-m", argv[0], "=", cpu, NULL); |
+ return NULL; |
} |
#endif /* __GNUC__ */ |