OLD | NEW |
1 /* Subroutines for the gcc driver. | 1 /* Subroutines for the gcc driver. |
2 Copyright (C) 2006, 2007, 2008 Free Software Foundation, Inc. | 2 Copyright (C) 2006, 2007, 2008 Free Software Foundation, Inc. |
3 | 3 |
4 This file is part of GCC. | 4 This file is part of GCC. |
5 | 5 |
6 GCC is free software; you can redistribute it and/or modify | 6 GCC is free software; you can redistribute it and/or modify |
7 it under the terms of the GNU General Public License as published by | 7 it under the terms of the GNU General Public License as published by |
8 the Free Software Foundation; either version 3, or (at your option) | 8 the Free Software Foundation; either version 3, or (at your option) |
9 any later version. | 9 any later version. |
10 | 10 |
(...skipping 28 matching lines...) Expand all Loading... |
39 cache line size of the processor caches. */ | 39 cache line size of the processor caches. */ |
40 | 40 |
41 static char * | 41 static char * |
42 describe_cache (struct cache_desc level1, struct cache_desc level2) | 42 describe_cache (struct cache_desc level1, struct cache_desc level2) |
43 { | 43 { |
44 char size[100], line[100], size2[100]; | 44 char size[100], line[100], size2[100]; |
45 | 45 |
46 /* At the moment, gcc does not use the information | 46 /* At the moment, gcc does not use the information |
47 about the associativity of the cache. */ | 47 about the associativity of the cache. */ |
48 | 48 |
49 sprintf (size, "--param l1-cache-size=%u", level1.sizekb); | 49 snprintf (size, sizeof (size), |
50 sprintf (line, "--param l1-cache-line-size=%u", level1.line); | 50 » "--param l1-cache-size=%u ", level1.sizekb); |
| 51 snprintf (line, sizeof (line), |
| 52 » "--param l1-cache-line-size=%u ", level1.line); |
51 | 53 |
52 sprintf (size2, "--param l2-cache-size=%u", level2.sizekb); | 54 snprintf (size2, sizeof (size2), |
| 55 » "--param l2-cache-size=%u ", level2.sizekb); |
53 | 56 |
54 return concat (size, " ", line, " ", size2, " ", NULL); | 57 return concat (size, line, size2, NULL); |
55 } | 58 } |
56 | 59 |
57 /* Detect L2 cache parameters using CPUID extended function 0x80000006. */ | 60 /* Detect L2 cache parameters using CPUID extended function 0x80000006. */ |
58 | 61 |
59 static void | 62 static void |
60 detect_l2_cache (struct cache_desc *level2) | 63 detect_l2_cache (struct cache_desc *level2) |
61 { | 64 { |
62 unsigned eax, ebx, ecx, edx; | 65 unsigned eax, ebx, ecx, edx; |
63 unsigned assoc; | 66 unsigned assoc; |
64 | 67 |
(...skipping 189 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
254 | 257 |
255 enum cache_type | 258 enum cache_type |
256 { | 259 { |
257 CACHE_END = 0, | 260 CACHE_END = 0, |
258 CACHE_DATA = 1, | 261 CACHE_DATA = 1, |
259 CACHE_INST = 2, | 262 CACHE_INST = 2, |
260 CACHE_UNIFIED = 3 | 263 CACHE_UNIFIED = 3 |
261 }; | 264 }; |
262 | 265 |
263 static void | 266 static void |
264 detect_caches_cpuid4 (struct cache_desc *level1, struct cache_desc *level2) | 267 detect_caches_cpuid4 (struct cache_desc *level1, struct cache_desc *level2, |
| 268 » » struct cache_desc *level3) |
265 { | 269 { |
266 struct cache_desc *cache; | 270 struct cache_desc *cache; |
267 | 271 |
268 unsigned eax, ebx, ecx, edx; | 272 unsigned eax, ebx, ecx, edx; |
269 int count; | 273 int count; |
270 | 274 |
271 for (count = 0;; count++) | 275 for (count = 0;; count++) |
272 { | 276 { |
273 __cpuid_count(4, count, eax, ebx, ecx, edx); | 277 __cpuid_count(4, count, eax, ebx, ecx, edx); |
274 switch (eax & 0x1f) | 278 switch (eax & 0x1f) |
275 { | 279 { |
276 case CACHE_END: | 280 case CACHE_END: |
277 return; | 281 return; |
278 case CACHE_DATA: | 282 case CACHE_DATA: |
279 case CACHE_UNIFIED: | 283 case CACHE_UNIFIED: |
280 { | 284 { |
281 switch ((eax >> 5) & 0x07) | 285 switch ((eax >> 5) & 0x07) |
282 { | 286 { |
283 case 1: | 287 case 1: |
284 cache = level1; | 288 cache = level1; |
285 break; | 289 break; |
286 case 2: | 290 case 2: |
287 cache = level2; | 291 cache = level2; |
288 break; | 292 break; |
| 293 case 3: |
| 294 cache = level3; |
| 295 break; |
289 default: | 296 default: |
290 cache = NULL; | 297 cache = NULL; |
291 } | 298 } |
292 | 299 |
293 if (cache) | 300 if (cache) |
294 { | 301 { |
295 unsigned sets = ecx + 1; | 302 unsigned sets = ecx + 1; |
296 unsigned part = ((ebx >> 12) & 0x03ff) + 1; | 303 unsigned part = ((ebx >> 12) & 0x03ff) + 1; |
297 | 304 |
298 cache->assoc = ((ebx >> 22) & 0x03ff) + 1; | 305 cache->assoc = ((ebx >> 22) & 0x03ff) + 1; |
299 cache->line = (ebx & 0x0fff) + 1; | 306 cache->line = (ebx & 0x0fff) + 1; |
300 | 307 |
301 cache->sizekb = (cache->assoc * part | 308 cache->sizekb = (cache->assoc * part |
302 * cache->line * sets) / 1024; | 309 * cache->line * sets) / 1024; |
303 » }» | 310 » } |
304 } | 311 } |
305 default: | 312 default: |
306 break; | 313 break; |
307 } | 314 } |
308 } | 315 } |
309 } | 316 } |
310 | 317 |
311 /* Returns the description of caches for an Intel processor. */ | 318 /* Returns the description of caches for an Intel processor. */ |
312 | 319 |
313 static const char * | 320 static const char * |
314 detect_caches_intel (bool xeon_mp, unsigned max_level, unsigned max_ext_level) | 321 detect_caches_intel (bool xeon_mp, unsigned max_level, |
| 322 » » unsigned max_ext_level, unsigned *l2sizekb) |
315 { | 323 { |
316 struct cache_desc level1 = {0, 0, 0}, level2 = {0, 0, 0}; | 324 struct cache_desc level1 = {0, 0, 0}, level2 = {0, 0, 0}, level3 = {0, 0, 0}; |
317 | 325 |
318 if (max_level >= 4) | 326 if (max_level >= 4) |
319 detect_caches_cpuid4 (&level1, &level2); | 327 detect_caches_cpuid4 (&level1, &level2, &level3); |
320 else if (max_level >= 2) | 328 else if (max_level >= 2) |
321 detect_caches_cpuid2 (xeon_mp, &level1, &level2); | 329 detect_caches_cpuid2 (xeon_mp, &level1, &level2); |
322 else | 330 else |
323 return ""; | 331 return ""; |
324 | 332 |
325 if (level1.sizekb == 0) | 333 if (level1.sizekb == 0) |
326 return ""; | 334 return ""; |
327 | 335 |
| 336 /* Let the L3 replace the L2. This assumes inclusive caches |
| 337 and single threaded program for now. */ |
| 338 if (level3.sizekb) |
| 339 level2 = level3; |
| 340 |
328 /* Intel CPUs are equipped with AMD style L2 cache info. Try this | 341 /* Intel CPUs are equipped with AMD style L2 cache info. Try this |
329 method if other methods fail to provide L2 cache parameters. */ | 342 method if other methods fail to provide L2 cache parameters. */ |
330 if (level2.sizekb == 0 && max_ext_level >= 0x80000006) | 343 if (level2.sizekb == 0 && max_ext_level >= 0x80000006) |
331 detect_l2_cache (&level2); | 344 detect_l2_cache (&level2); |
332 | 345 |
| 346 *l2sizekb = level2.sizekb; |
| 347 |
333 return describe_cache (level1, level2); | 348 return describe_cache (level1, level2); |
334 } | 349 } |
335 | 350 |
336 enum vendor_signatures | 351 enum vendor_signatures |
337 { | 352 { |
338 SIG_INTEL = 0x756e6547 /* Genu */, | 353 SIG_INTEL = 0x756e6547 /* Genu */, |
339 SIG_AMD = 0x68747541 /* Auth */ | 354 SIG_AMD = 0x68747541 /* Auth */ |
340 }; | 355 }; |
341 | 356 |
342 enum processor_signatures | 357 enum processor_signatures |
(...skipping 28 matching lines...) Expand all Loading... |
371 | 386 |
372 unsigned int vendor; | 387 unsigned int vendor; |
373 unsigned int model, family; | 388 unsigned int model, family; |
374 | 389 |
375 unsigned int has_sse3, has_ssse3, has_cmpxchg16b; | 390 unsigned int has_sse3, has_ssse3, has_cmpxchg16b; |
376 unsigned int has_cmpxchg8b, has_cmov, has_mmx, has_sse, has_sse2; | 391 unsigned int has_cmpxchg8b, has_cmov, has_mmx, has_sse, has_sse2; |
377 | 392 |
378 /* Extended features */ | 393 /* Extended features */ |
379 unsigned int has_lahf_lm = 0, has_sse4a = 0; | 394 unsigned int has_lahf_lm = 0, has_sse4a = 0; |
380 unsigned int has_longmode = 0, has_3dnowp = 0, has_3dnow = 0; | 395 unsigned int has_longmode = 0, has_3dnowp = 0, has_3dnow = 0; |
381 unsigned int has_sse4_1 = 0, has_sse4_2 = 0; | 396 unsigned int has_movbe = 0, has_sse4_1 = 0, has_sse4_2 = 0; |
382 unsigned int has_popcnt = 0, has_aes = 0, has_avx = 0; | 397 unsigned int has_popcnt = 0, has_aes = 0, has_avx = 0; |
383 unsigned int has_pclmul = 0; | 398 unsigned int has_pclmul = 0, has_abm = 0, has_lwp = 0; |
384 | 399 |
385 bool arch; | 400 bool arch; |
386 | 401 |
| 402 unsigned int l2sizekb = 0; |
| 403 |
387 if (argc < 1) | 404 if (argc < 1) |
388 return NULL; | 405 return NULL; |
389 | 406 |
390 arch = !strcmp (argv[0], "arch"); | 407 arch = !strcmp (argv[0], "arch"); |
391 | 408 |
392 if (!arch && strcmp (argv[0], "tune")) | 409 if (!arch && strcmp (argv[0], "tune")) |
393 return NULL; | 410 return NULL; |
394 | 411 |
395 max_level = __get_cpuid_max (0, &vendor); | 412 max_level = __get_cpuid_max (0, &vendor); |
396 if (max_level < 1) | 413 if (max_level < 1) |
397 goto done; | 414 goto done; |
398 | 415 |
399 __cpuid (1, eax, ebx, ecx, edx); | 416 __cpuid (1, eax, ebx, ecx, edx); |
400 | 417 |
401 /* We don't care for extended family. */ | |
402 model = (eax >> 4) & 0x0f; | 418 model = (eax >> 4) & 0x0f; |
403 family = (eax >> 8) & 0x0f; | 419 family = (eax >> 8) & 0x0f; |
| 420 if (vendor == SIG_INTEL) |
| 421 { |
| 422 unsigned int extended_model, extended_family; |
| 423 |
| 424 extended_model = (eax >> 12) & 0xf0; |
| 425 extended_family = (eax >> 20) & 0xff; |
| 426 if (family == 0x0f) |
| 427 { |
| 428 family += extended_family; |
| 429 model += extended_model; |
| 430 } |
| 431 else if (family == 0x06) |
| 432 model += extended_model; |
| 433 } |
404 | 434 |
405 has_sse3 = ecx & bit_SSE3; | 435 has_sse3 = ecx & bit_SSE3; |
406 has_ssse3 = ecx & bit_SSSE3; | 436 has_ssse3 = ecx & bit_SSSE3; |
407 has_sse4_1 = ecx & bit_SSE4_1; | 437 has_sse4_1 = ecx & bit_SSE4_1; |
408 has_sse4_2 = ecx & bit_SSE4_2; | 438 has_sse4_2 = ecx & bit_SSE4_2; |
409 has_avx = ecx & bit_AVX; | 439 has_avx = ecx & bit_AVX; |
410 has_cmpxchg16b = ecx & bit_CMPXCHG16B; | 440 has_cmpxchg16b = ecx & bit_CMPXCHG16B; |
| 441 has_movbe = ecx & bit_MOVBE; |
411 has_popcnt = ecx & bit_POPCNT; | 442 has_popcnt = ecx & bit_POPCNT; |
412 has_aes = ecx & bit_AES; | 443 has_aes = ecx & bit_AES; |
413 has_pclmul = ecx & bit_PCLMUL; | 444 has_pclmul = ecx & bit_PCLMUL; |
414 | 445 |
415 has_cmpxchg8b = edx & bit_CMPXCHG8B; | 446 has_cmpxchg8b = edx & bit_CMPXCHG8B; |
416 has_cmov = edx & bit_CMOV; | 447 has_cmov = edx & bit_CMOV; |
417 has_mmx = edx & bit_MMX; | 448 has_mmx = edx & bit_MMX; |
418 has_sse = edx & bit_SSE; | 449 has_sse = edx & bit_SSE; |
419 has_sse2 = edx & bit_SSE2; | 450 has_sse2 = edx & bit_SSE2; |
420 | 451 |
421 /* Check cpuid level of extended features. */ | 452 /* Check cpuid level of extended features. */ |
422 __cpuid (0x80000000, ext_level, ebx, ecx, edx); | 453 __cpuid (0x80000000, ext_level, ebx, ecx, edx); |
423 | 454 |
424 if (ext_level > 0x80000000) | 455 if (ext_level > 0x80000000) |
425 { | 456 { |
426 __cpuid (0x80000001, eax, ebx, ecx, edx); | 457 __cpuid (0x80000001, eax, ebx, ecx, edx); |
427 | 458 |
428 has_lahf_lm = ecx & bit_LAHF_LM; | 459 has_lahf_lm = ecx & bit_LAHF_LM; |
429 has_sse4a = ecx & bit_SSE4a; | 460 has_sse4a = ecx & bit_SSE4a; |
| 461 has_abm = ecx & bit_ABM; |
| 462 has_lwp = ecx & bit_LWP; |
430 | 463 |
431 has_longmode = edx & bit_LM; | 464 has_longmode = edx & bit_LM; |
432 has_3dnowp = edx & bit_3DNOWP; | 465 has_3dnowp = edx & bit_3DNOWP; |
433 has_3dnow = edx & bit_3DNOW; | 466 has_3dnow = edx & bit_3DNOW; |
434 } | 467 } |
435 | 468 |
436 if (!arch) | 469 if (!arch) |
437 { | 470 { |
438 if (vendor == SIG_AMD) | 471 if (vendor == SIG_AMD) |
439 cache = detect_caches_amd (ext_level); | 472 cache = detect_caches_amd (ext_level); |
440 else if (vendor == SIG_INTEL) | 473 else if (vendor == SIG_INTEL) |
441 { | 474 { |
442 bool xeon_mp = (family == 15 && model == 6); | 475 bool xeon_mp = (family == 15 && model == 6); |
443 » cache = detect_caches_intel (xeon_mp, max_level, ext_level); | 476 » cache = detect_caches_intel (xeon_mp, max_level, |
| 477 » » » » ext_level, &l2sizekb); |
444 } | 478 } |
445 } | 479 } |
446 | 480 |
447 if (vendor == SIG_AMD) | 481 if (vendor == SIG_AMD) |
448 { | 482 { |
449 unsigned int name; | 483 unsigned int name; |
450 | 484 |
451 /* Detect geode processor by its processor signature. */ | 485 /* Detect geode processor by its processor signature. */ |
452 if (ext_level > 0x80000001) | 486 if (ext_level > 0x80000001) |
453 __cpuid (0x80000002, name, ebx, ecx, edx); | 487 __cpuid (0x80000002, name, ebx, ecx, edx); |
(...skipping 43 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
497 case PROCESSOR_I486: | 531 case PROCESSOR_I486: |
498 cpu = "i486"; | 532 cpu = "i486"; |
499 break; | 533 break; |
500 case PROCESSOR_PENTIUM: | 534 case PROCESSOR_PENTIUM: |
501 if (arch && has_mmx) | 535 if (arch && has_mmx) |
502 cpu = "pentium-mmx"; | 536 cpu = "pentium-mmx"; |
503 else | 537 else |
504 cpu = "pentium"; | 538 cpu = "pentium"; |
505 break; | 539 break; |
506 case PROCESSOR_PENTIUMPRO: | 540 case PROCESSOR_PENTIUMPRO: |
507 if (has_longmode) | 541 switch (model) |
508 » /* It is Core 2 Duo. */ | |
509 » cpu = "core2"; | |
510 else if (arch) | |
511 { | 542 { |
512 » if (has_sse3) | 543 » case 0x1c: |
513 » /* It is Core Duo. */ | 544 » case 0x26: |
514 » cpu = "prescott"; | 545 » /* Atom. */ |
515 » else if (has_sse2) | 546 » cpu = "atom"; |
516 » /* It is Pentium M. */ | 547 » break; |
517 » cpu = "pentium-m"; | 548 » case 0x1a: |
518 » else if (has_sse) | 549 » case 0x1e: |
519 » /* It is Pentium III. */ | 550 » case 0x1f: |
520 » cpu = "pentium3"; | 551 » case 0x2e: |
521 » else if (has_mmx) | 552 » /* FIXME: Optimize for Nehalem. */ |
522 » /* It is Pentium II. */ | 553 » cpu = "core2"; |
523 » cpu = "pentium2"; | 554 » break; |
| 555 » case 0x25: |
| 556 » case 0x2f: |
| 557 » /* FIXME: Optimize for Westmere. */ |
| 558 » cpu = "core2"; |
| 559 » break; |
| 560 » case 0x17: |
| 561 » case 0x1d: |
| 562 » /* Penryn. FIXME: -mtune=core2 is slower than -mtune=generic */ |
| 563 » cpu = "core2"; |
| 564 » break; |
| 565 » case 0x0f: |
| 566 » /* Merom. FIXME: -mtune=core2 is slower than -mtune=generic */ |
| 567 » cpu = "core2"; |
| 568 » break; |
| 569 » default: |
| 570 » if (arch) |
| 571 » { |
| 572 » if (has_ssse3) |
| 573 » » /* If it is an unknown CPU with SSSE3, assume Core 2. */ |
| 574 » » cpu = "core2"; |
| 575 » else if (has_sse3) |
| 576 » » /* It is Core Duo. */ |
| 577 » » cpu = "pentium-m"; |
| 578 » else if (has_sse2) |
| 579 » » /* It is Pentium M. */ |
| 580 » » cpu = "pentium-m"; |
| 581 » else if (has_sse) |
| 582 » » /* It is Pentium III. */ |
| 583 » » cpu = "pentium3"; |
| 584 » else if (has_mmx) |
| 585 » » /* It is Pentium II. */ |
| 586 » » cpu = "pentium2"; |
| 587 » else |
| 588 » » /* Default to Pentium Pro. */ |
| 589 » » cpu = "pentiumpro"; |
| 590 » } |
524 else | 591 else |
525 » /* Default to Pentium Pro. */ | 592 » /* For -mtune, we default to -mtune=generic. */ |
526 » cpu = "pentiumpro"; | 593 » cpu = "generic"; |
| 594 » break; |
527 } | 595 } |
528 else | |
529 /* For -mtune, we default to -mtune=generic. */ | |
530 cpu = "generic"; | |
531 break; | 596 break; |
532 case PROCESSOR_PENTIUM4: | 597 case PROCESSOR_PENTIUM4: |
533 if (has_sse3) | 598 if (has_sse3) |
534 { | 599 { |
535 if (has_longmode) | 600 if (has_longmode) |
536 cpu = "nocona"; | 601 cpu = "nocona"; |
537 else | 602 else |
538 cpu = "prescott"; | 603 cpu = "prescott"; |
539 } | 604 } |
540 else | 605 else |
(...skipping 46 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
587 else if (has_cmpxchg8b) | 652 else if (has_cmpxchg8b) |
588 cpu = "pentium"; | 653 cpu = "pentium"; |
589 } | 654 } |
590 else | 655 else |
591 cpu = "generic"; | 656 cpu = "generic"; |
592 } | 657 } |
593 | 658 |
594 if (arch) | 659 if (arch) |
595 { | 660 { |
596 if (has_cmpxchg16b) | 661 if (has_cmpxchg16b) |
597 » options = concat (options, "-mcx16 ", NULL); | 662 » options = concat (options, " -mcx16", NULL); |
598 if (has_lahf_lm) | 663 if (has_lahf_lm) |
599 » options = concat (options, "-msahf ", NULL); | 664 » options = concat (options, " -msahf", NULL); |
| 665 if (has_movbe) |
| 666 » options = concat (options, " -mmovbe", NULL); |
600 if (has_aes) | 667 if (has_aes) |
601 » options = concat (options, "-maes ", NULL); | 668 » options = concat (options, " -maes", NULL); |
602 if (has_pclmul) | 669 if (has_pclmul) |
603 » options = concat (options, "-mpclmul ", NULL); | 670 » options = concat (options, " -mpclmul", NULL); |
604 if (has_popcnt) | 671 if (has_popcnt) |
605 » options = concat (options, "-mpopcnt ", NULL); | 672 » options = concat (options, " -mpopcnt", NULL); |
| 673 if (has_abm) |
| 674 » options = concat (options, " -mabm", NULL); |
| 675 if (has_lwp) |
| 676 » options = concat (options, " -mlwp", NULL); |
| 677 |
606 if (has_avx) | 678 if (has_avx) |
607 » options = concat (options, "-mavx ", NULL); | 679 » options = concat (options, " -mavx", NULL); |
608 else if (has_sse4_2) | 680 else if (has_sse4_2) |
609 » options = concat (options, "-msse4.2 ", NULL); | 681 » options = concat (options, " -msse4.2", NULL); |
610 else if (has_sse4_1) | 682 else if (has_sse4_1) |
611 » options = concat (options, "-msse4.1 ", NULL); | 683 » options = concat (options, " -msse4.1", NULL); |
612 } | 684 } |
613 | 685 |
614 done: | 686 done: |
615 return concat (cache, "-m", argv[0], "=", cpu, " ", options, NULL); | 687 return concat (cache, "-m", argv[0], "=", cpu, options, NULL); |
616 } | 688 } |
617 #else | 689 #else |
618 | 690 |
619 /* If we aren't compiling with GCC we just provide a minimal | 691 /* If we aren't compiling with GCC then the driver will just ignore |
620 default value. */ | 692 -march and -mtune "native" target and will leave to the newly |
| 693 built compiler to generate code for its default target. */ |
621 | 694 |
622 const char *host_detect_local_cpu (int argc, const char **argv) | 695 const char *host_detect_local_cpu (int argc ATTRIBUTE_UNUSED, |
| 696 » » » » const char **argv ATTRIBUTE_UNUSED) |
623 { | 697 { |
624 const char *cpu; | 698 return NULL; |
625 bool arch; | |
626 | |
627 if (argc < 1) | |
628 return NULL; | |
629 | |
630 arch = !strcmp (argv[0], "arch"); | |
631 | |
632 if (!arch && strcmp (argv[0], "tune")) | |
633 return NULL; | |
634 | |
635 if (arch) | |
636 { | |
637 /* FIXME: i386 is wrong for 64bit compiler. How can we tell if | |
638 » we are generating 64bit or 32bit code? */ | |
639 cpu = "i386"; | |
640 } | |
641 else | |
642 cpu = "generic"; | |
643 | |
644 return concat ("-m", argv[0], "=", cpu, NULL); | |
645 } | 699 } |
646 #endif /* __GNUC__ */ | 700 #endif /* __GNUC__ */ |
OLD | NEW |