Index: mozilla/security/nss/lib/freebl/mpi/mpcpucache.c |
=================================================================== |
--- mozilla/security/nss/lib/freebl/mpi/mpcpucache.c (revision 191424) |
+++ mozilla/security/nss/lib/freebl/mpi/mpcpucache.c (working copy) |
@@ -1,813 +0,0 @@ |
-/* This Source Code Form is subject to the terms of the Mozilla Public |
- * License, v. 2.0. If a copy of the MPL was not distributed with this |
- * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ |
- |
-#include "mpi.h" |
- |
-/* |
- * This file implements a single function: s_mpi_getProcessorLineSize(); |
- * s_mpi_getProcessorLineSize() returns the size in bytes of the cache line |
- * if a cache exists, or zero if there is no cache. If more than one |
- * cache line exists, it should return the smallest line size (which is |
- * usually the L1 cache). |
- * |
- * mp_modexp uses this information to make sure that private key information |
- * isn't being leaked through the cache. |
- * |
- * Currently the file returns good data for most modern x86 processors, and |
- * reasonable data on 64-bit ppc processors. All other processors are assumed |
- * to have a cache line size of 32 bytes unless modified by target.mk. |
- * |
- */ |
- |
-#if defined(i386) || defined(__i386) || defined(__X86__) || defined (_M_IX86) || defined(__x86_64__) || defined(__x86_64) || defined(_M_AMD64) |
-/* X86 processors have special instructions that tell us about the cache */ |
-#include "string.h" |
- |
-#if defined(__x86_64__) || defined(__x86_64) || defined(_M_AMD64) |
-#define AMD_64 1 |
-#endif |
- |
-/* Generic CPUID function */ |
-#if defined(AMD_64) |
- |
-#if defined(__GNUC__) |
- |
-void freebl_cpuid(unsigned long op, unsigned long *eax, |
- unsigned long *ebx, unsigned long *ecx, |
- unsigned long *edx) |
-{ |
- __asm__("cpuid\n\t" |
- : "=a" (*eax), |
- "=b" (*ebx), |
- "=c" (*ecx), |
- "=d" (*edx) |
- : "0" (op)); |
-} |
- |
-#elif defined(_MSC_VER) |
- |
-#include <intrin.h> |
- |
-void freebl_cpuid(unsigned long op, unsigned long *eax, |
- unsigned long *ebx, unsigned long *ecx, |
- unsigned long *edx) |
-{ |
- int intrinsic_out[4]; |
- |
- __cpuid(intrinsic_out, op); |
- *eax = intrinsic_out[0]; |
- *ebx = intrinsic_out[1]; |
- *ecx = intrinsic_out[2]; |
- *edx = intrinsic_out[3]; |
-} |
- |
-#endif |
- |
-#else /* !defined(AMD_64) */ |
- |
-/* x86 */ |
- |
-#if defined(__GNUC__) |
-void freebl_cpuid(unsigned long op, unsigned long *eax, |
- unsigned long *ebx, unsigned long *ecx, |
- unsigned long *edx) |
-{ |
-/* sigh GCC isn't smart enough to save the ebx PIC register on it's own |
- * in this case, so do it by hand. Use edi to store ebx and pass the |
- * value returned in ebx from cpuid through edi. */ |
- __asm__("mov %%ebx,%%edi\n\t" |
- "cpuid\n\t" |
- "xchgl %%ebx,%%edi\n\t" |
- : "=a" (*eax), |
- "=D" (*ebx), |
- "=c" (*ecx), |
- "=d" (*edx) |
- : "0" (op)); |
-} |
- |
-/* |
- * try flipping a processor flag to determine CPU type |
- */ |
-static unsigned long changeFlag(unsigned long flag) |
-{ |
- unsigned long changedFlags, originalFlags; |
- __asm__("pushfl\n\t" /* get the flags */ |
- "popl %0\n\t" |
- "movl %0,%1\n\t" /* save the original flags */ |
- "xorl %2,%0\n\t" /* flip the bit */ |
- "pushl %0\n\t" /* set the flags */ |
- "popfl\n\t" |
- "pushfl\n\t" /* get the flags again (for return) */ |
- "popl %0\n\t" |
- "pushl %1\n\t" /* restore the original flags */ |
- "popfl\n\t" |
- : "=r" (changedFlags), |
- "=r" (originalFlags), |
- "=r" (flag) |
- : "2" (flag)); |
- return changedFlags ^ originalFlags; |
-} |
- |
-#elif defined(_MSC_VER) |
- |
-/* |
- * windows versions of the above assembler |
- */ |
-#define wcpuid __asm __emit 0fh __asm __emit 0a2h |
-void freebl_cpuid(unsigned long op, unsigned long *Reax, |
- unsigned long *Rebx, unsigned long *Recx, unsigned long *Redx) |
-{ |
- unsigned long Leax, Lebx, Lecx, Ledx; |
- __asm { |
- pushad |
- mov eax,op |
- wcpuid |
- mov Leax,eax |
- mov Lebx,ebx |
- mov Lecx,ecx |
- mov Ledx,edx |
- popad |
- } |
- *Reax = Leax; |
- *Rebx = Lebx; |
- *Recx = Lecx; |
- *Redx = Ledx; |
-} |
- |
-static unsigned long changeFlag(unsigned long flag) |
-{ |
- unsigned long changedFlags, originalFlags; |
- __asm { |
- push eax |
- push ebx |
- pushfd /* get the flags */ |
- pop eax |
- push eax /* save the flags on the stack */ |
- mov originalFlags,eax /* save the original flags */ |
- mov ebx,flag |
- xor eax,ebx /* flip the bit */ |
- push eax /* set the flags */ |
- popfd |
- pushfd /* get the flags again (for return) */ |
- pop eax |
- popfd /* restore the original flags */ |
- mov changedFlags,eax |
- pop ebx |
- pop eax |
- } |
- return changedFlags ^ originalFlags; |
-} |
-#endif |
- |
-#endif |
- |
-#if !defined(AMD_64) |
-#define AC_FLAG 0x40000 |
-#define ID_FLAG 0x200000 |
- |
-/* 386 processors can't flip the AC_FLAG, intel AP Note AP-485 */ |
-static int is386() |
-{ |
- return changeFlag(AC_FLAG) == 0; |
-} |
- |
-/* 486 processors can't flip the ID_FLAG, intel AP Note AP-485 */ |
-static int is486() |
-{ |
- return changeFlag(ID_FLAG) == 0; |
-} |
-#endif |
- |
- |
-/* |
- * table for Intel Cache. |
- * See Intel Application Note AP-485 for more information |
- */ |
- |
-typedef unsigned char CacheTypeEntry; |
- |
-typedef enum { |
- Cache_NONE = 0, |
- Cache_UNKNOWN = 1, |
- Cache_TLB = 2, |
- Cache_TLBi = 3, |
- Cache_TLBd = 4, |
- Cache_Trace = 5, |
- Cache_L1 = 6, |
- Cache_L1i = 7, |
- Cache_L1d = 8, |
- Cache_L2 = 9 , |
- Cache_L2i = 10 , |
- Cache_L2d = 11 , |
- Cache_L3 = 12 , |
- Cache_L3i = 13, |
- Cache_L3d = 14 |
-} CacheType; |
- |
-struct _cache { |
- CacheTypeEntry type; |
- unsigned char lineSize; |
-}; |
-static const struct _cache CacheMap[256] = { |
-/* 00 */ {Cache_NONE, 0 }, |
-/* 01 */ {Cache_TLBi, 0 }, |
-/* 02 */ {Cache_TLBi, 0 }, |
-/* 03 */ {Cache_TLBd, 0 }, |
-/* 04 */ {Cache_TLBd, }, |
-/* 05 */ {Cache_UNKNOWN, 0 }, |
-/* 06 */ {Cache_L1i, 32 }, |
-/* 07 */ {Cache_UNKNOWN, 0 }, |
-/* 08 */ {Cache_L1i, 32 }, |
-/* 09 */ {Cache_UNKNOWN, 0 }, |
-/* 0a */ {Cache_L1d, 32 }, |
-/* 0b */ {Cache_UNKNOWN, 0 }, |
-/* 0c */ {Cache_L1d, 32 }, |
-/* 0d */ {Cache_UNKNOWN, 0 }, |
-/* 0e */ {Cache_UNKNOWN, 0 }, |
-/* 0f */ {Cache_UNKNOWN, 0 }, |
-/* 10 */ {Cache_UNKNOWN, 0 }, |
-/* 11 */ {Cache_UNKNOWN, 0 }, |
-/* 12 */ {Cache_UNKNOWN, 0 }, |
-/* 13 */ {Cache_UNKNOWN, 0 }, |
-/* 14 */ {Cache_UNKNOWN, 0 }, |
-/* 15 */ {Cache_UNKNOWN, 0 }, |
-/* 16 */ {Cache_UNKNOWN, 0 }, |
-/* 17 */ {Cache_UNKNOWN, 0 }, |
-/* 18 */ {Cache_UNKNOWN, 0 }, |
-/* 19 */ {Cache_UNKNOWN, 0 }, |
-/* 1a */ {Cache_UNKNOWN, 0 }, |
-/* 1b */ {Cache_UNKNOWN, 0 }, |
-/* 1c */ {Cache_UNKNOWN, 0 }, |
-/* 1d */ {Cache_UNKNOWN, 0 }, |
-/* 1e */ {Cache_UNKNOWN, 0 }, |
-/* 1f */ {Cache_UNKNOWN, 0 }, |
-/* 20 */ {Cache_UNKNOWN, 0 }, |
-/* 21 */ {Cache_UNKNOWN, 0 }, |
-/* 22 */ {Cache_L3, 64 }, |
-/* 23 */ {Cache_L3, 64 }, |
-/* 24 */ {Cache_UNKNOWN, 0 }, |
-/* 25 */ {Cache_L3, 64 }, |
-/* 26 */ {Cache_UNKNOWN, 0 }, |
-/* 27 */ {Cache_UNKNOWN, 0 }, |
-/* 28 */ {Cache_UNKNOWN, 0 }, |
-/* 29 */ {Cache_L3, 64 }, |
-/* 2a */ {Cache_UNKNOWN, 0 }, |
-/* 2b */ {Cache_UNKNOWN, 0 }, |
-/* 2c */ {Cache_L1d, 64 }, |
-/* 2d */ {Cache_UNKNOWN, 0 }, |
-/* 2e */ {Cache_UNKNOWN, 0 }, |
-/* 2f */ {Cache_UNKNOWN, 0 }, |
-/* 30 */ {Cache_L1i, 64 }, |
-/* 31 */ {Cache_UNKNOWN, 0 }, |
-/* 32 */ {Cache_UNKNOWN, 0 }, |
-/* 33 */ {Cache_UNKNOWN, 0 }, |
-/* 34 */ {Cache_UNKNOWN, 0 }, |
-/* 35 */ {Cache_UNKNOWN, 0 }, |
-/* 36 */ {Cache_UNKNOWN, 0 }, |
-/* 37 */ {Cache_UNKNOWN, 0 }, |
-/* 38 */ {Cache_UNKNOWN, 0 }, |
-/* 39 */ {Cache_L2, 64 }, |
-/* 3a */ {Cache_UNKNOWN, 0 }, |
-/* 3b */ {Cache_L2, 64 }, |
-/* 3c */ {Cache_L2, 64 }, |
-/* 3d */ {Cache_UNKNOWN, 0 }, |
-/* 3e */ {Cache_UNKNOWN, 0 }, |
-/* 3f */ {Cache_UNKNOWN, 0 }, |
-/* 40 */ {Cache_L2, 0 }, |
-/* 41 */ {Cache_L2, 32 }, |
-/* 42 */ {Cache_L2, 32 }, |
-/* 43 */ {Cache_L2, 32 }, |
-/* 44 */ {Cache_L2, 32 }, |
-/* 45 */ {Cache_L2, 32 }, |
-/* 46 */ {Cache_UNKNOWN, 0 }, |
-/* 47 */ {Cache_UNKNOWN, 0 }, |
-/* 48 */ {Cache_UNKNOWN, 0 }, |
-/* 49 */ {Cache_UNKNOWN, 0 }, |
-/* 4a */ {Cache_UNKNOWN, 0 }, |
-/* 4b */ {Cache_UNKNOWN, 0 }, |
-/* 4c */ {Cache_UNKNOWN, 0 }, |
-/* 4d */ {Cache_UNKNOWN, 0 }, |
-/* 4e */ {Cache_UNKNOWN, 0 }, |
-/* 4f */ {Cache_UNKNOWN, 0 }, |
-/* 50 */ {Cache_TLBi, 0 }, |
-/* 51 */ {Cache_TLBi, 0 }, |
-/* 52 */ {Cache_TLBi, 0 }, |
-/* 53 */ {Cache_UNKNOWN, 0 }, |
-/* 54 */ {Cache_UNKNOWN, 0 }, |
-/* 55 */ {Cache_UNKNOWN, 0 }, |
-/* 56 */ {Cache_UNKNOWN, 0 }, |
-/* 57 */ {Cache_UNKNOWN, 0 }, |
-/* 58 */ {Cache_UNKNOWN, 0 }, |
-/* 59 */ {Cache_UNKNOWN, 0 }, |
-/* 5a */ {Cache_UNKNOWN, 0 }, |
-/* 5b */ {Cache_TLBd, 0 }, |
-/* 5c */ {Cache_TLBd, 0 }, |
-/* 5d */ {Cache_TLBd, 0 }, |
-/* 5e */ {Cache_UNKNOWN, 0 }, |
-/* 5f */ {Cache_UNKNOWN, 0 }, |
-/* 60 */ {Cache_UNKNOWN, 0 }, |
-/* 61 */ {Cache_UNKNOWN, 0 }, |
-/* 62 */ {Cache_UNKNOWN, 0 }, |
-/* 63 */ {Cache_UNKNOWN, 0 }, |
-/* 64 */ {Cache_UNKNOWN, 0 }, |
-/* 65 */ {Cache_UNKNOWN, 0 }, |
-/* 66 */ {Cache_L1d, 64 }, |
-/* 67 */ {Cache_L1d, 64 }, |
-/* 68 */ {Cache_L1d, 64 }, |
-/* 69 */ {Cache_UNKNOWN, 0 }, |
-/* 6a */ {Cache_UNKNOWN, 0 }, |
-/* 6b */ {Cache_UNKNOWN, 0 }, |
-/* 6c */ {Cache_UNKNOWN, 0 }, |
-/* 6d */ {Cache_UNKNOWN, 0 }, |
-/* 6e */ {Cache_UNKNOWN, 0 }, |
-/* 6f */ {Cache_UNKNOWN, 0 }, |
-/* 70 */ {Cache_Trace, 1 }, |
-/* 71 */ {Cache_Trace, 1 }, |
-/* 72 */ {Cache_Trace, 1 }, |
-/* 73 */ {Cache_UNKNOWN, 0 }, |
-/* 74 */ {Cache_UNKNOWN, 0 }, |
-/* 75 */ {Cache_UNKNOWN, 0 }, |
-/* 76 */ {Cache_UNKNOWN, 0 }, |
-/* 77 */ {Cache_UNKNOWN, 0 }, |
-/* 78 */ {Cache_UNKNOWN, 0 }, |
-/* 79 */ {Cache_L2, 64 }, |
-/* 7a */ {Cache_L2, 64 }, |
-/* 7b */ {Cache_L2, 64 }, |
-/* 7c */ {Cache_L2, 64 }, |
-/* 7d */ {Cache_UNKNOWN, 0 }, |
-/* 7e */ {Cache_UNKNOWN, 0 }, |
-/* 7f */ {Cache_UNKNOWN, 0 }, |
-/* 80 */ {Cache_UNKNOWN, 0 }, |
-/* 81 */ {Cache_UNKNOWN, 0 }, |
-/* 82 */ {Cache_L2, 32 }, |
-/* 83 */ {Cache_L2, 32 }, |
-/* 84 */ {Cache_L2, 32 }, |
-/* 85 */ {Cache_L2, 32 }, |
-/* 86 */ {Cache_L2, 64 }, |
-/* 87 */ {Cache_L2, 64 }, |
-/* 88 */ {Cache_UNKNOWN, 0 }, |
-/* 89 */ {Cache_UNKNOWN, 0 }, |
-/* 8a */ {Cache_UNKNOWN, 0 }, |
-/* 8b */ {Cache_UNKNOWN, 0 }, |
-/* 8c */ {Cache_UNKNOWN, 0 }, |
-/* 8d */ {Cache_UNKNOWN, 0 }, |
-/* 8e */ {Cache_UNKNOWN, 0 }, |
-/* 8f */ {Cache_UNKNOWN, 0 }, |
-/* 90 */ {Cache_UNKNOWN, 0 }, |
-/* 91 */ {Cache_UNKNOWN, 0 }, |
-/* 92 */ {Cache_UNKNOWN, 0 }, |
-/* 93 */ {Cache_UNKNOWN, 0 }, |
-/* 94 */ {Cache_UNKNOWN, 0 }, |
-/* 95 */ {Cache_UNKNOWN, 0 }, |
-/* 96 */ {Cache_UNKNOWN, 0 }, |
-/* 97 */ {Cache_UNKNOWN, 0 }, |
-/* 98 */ {Cache_UNKNOWN, 0 }, |
-/* 99 */ {Cache_UNKNOWN, 0 }, |
-/* 9a */ {Cache_UNKNOWN, 0 }, |
-/* 9b */ {Cache_UNKNOWN, 0 }, |
-/* 9c */ {Cache_UNKNOWN, 0 }, |
-/* 9d */ {Cache_UNKNOWN, 0 }, |
-/* 9e */ {Cache_UNKNOWN, 0 }, |
-/* 9f */ {Cache_UNKNOWN, 0 }, |
-/* a0 */ {Cache_UNKNOWN, 0 }, |
-/* a1 */ {Cache_UNKNOWN, 0 }, |
-/* a2 */ {Cache_UNKNOWN, 0 }, |
-/* a3 */ {Cache_UNKNOWN, 0 }, |
-/* a4 */ {Cache_UNKNOWN, 0 }, |
-/* a5 */ {Cache_UNKNOWN, 0 }, |
-/* a6 */ {Cache_UNKNOWN, 0 }, |
-/* a7 */ {Cache_UNKNOWN, 0 }, |
-/* a8 */ {Cache_UNKNOWN, 0 }, |
-/* a9 */ {Cache_UNKNOWN, 0 }, |
-/* aa */ {Cache_UNKNOWN, 0 }, |
-/* ab */ {Cache_UNKNOWN, 0 }, |
-/* ac */ {Cache_UNKNOWN, 0 }, |
-/* ad */ {Cache_UNKNOWN, 0 }, |
-/* ae */ {Cache_UNKNOWN, 0 }, |
-/* af */ {Cache_UNKNOWN, 0 }, |
-/* b0 */ {Cache_TLBi, 0 }, |
-/* b1 */ {Cache_UNKNOWN, 0 }, |
-/* b2 */ {Cache_UNKNOWN, 0 }, |
-/* b3 */ {Cache_TLBd, 0 }, |
-/* b4 */ {Cache_UNKNOWN, 0 }, |
-/* b5 */ {Cache_UNKNOWN, 0 }, |
-/* b6 */ {Cache_UNKNOWN, 0 }, |
-/* b7 */ {Cache_UNKNOWN, 0 }, |
-/* b8 */ {Cache_UNKNOWN, 0 }, |
-/* b9 */ {Cache_UNKNOWN, 0 }, |
-/* ba */ {Cache_UNKNOWN, 0 }, |
-/* bb */ {Cache_UNKNOWN, 0 }, |
-/* bc */ {Cache_UNKNOWN, 0 }, |
-/* bd */ {Cache_UNKNOWN, 0 }, |
-/* be */ {Cache_UNKNOWN, 0 }, |
-/* bf */ {Cache_UNKNOWN, 0 }, |
-/* c0 */ {Cache_UNKNOWN, 0 }, |
-/* c1 */ {Cache_UNKNOWN, 0 }, |
-/* c2 */ {Cache_UNKNOWN, 0 }, |
-/* c3 */ {Cache_UNKNOWN, 0 }, |
-/* c4 */ {Cache_UNKNOWN, 0 }, |
-/* c5 */ {Cache_UNKNOWN, 0 }, |
-/* c6 */ {Cache_UNKNOWN, 0 }, |
-/* c7 */ {Cache_UNKNOWN, 0 }, |
-/* c8 */ {Cache_UNKNOWN, 0 }, |
-/* c9 */ {Cache_UNKNOWN, 0 }, |
-/* ca */ {Cache_UNKNOWN, 0 }, |
-/* cb */ {Cache_UNKNOWN, 0 }, |
-/* cc */ {Cache_UNKNOWN, 0 }, |
-/* cd */ {Cache_UNKNOWN, 0 }, |
-/* ce */ {Cache_UNKNOWN, 0 }, |
-/* cf */ {Cache_UNKNOWN, 0 }, |
-/* d0 */ {Cache_UNKNOWN, 0 }, |
-/* d1 */ {Cache_UNKNOWN, 0 }, |
-/* d2 */ {Cache_UNKNOWN, 0 }, |
-/* d3 */ {Cache_UNKNOWN, 0 }, |
-/* d4 */ {Cache_UNKNOWN, 0 }, |
-/* d5 */ {Cache_UNKNOWN, 0 }, |
-/* d6 */ {Cache_UNKNOWN, 0 }, |
-/* d7 */ {Cache_UNKNOWN, 0 }, |
-/* d8 */ {Cache_UNKNOWN, 0 }, |
-/* d9 */ {Cache_UNKNOWN, 0 }, |
-/* da */ {Cache_UNKNOWN, 0 }, |
-/* db */ {Cache_UNKNOWN, 0 }, |
-/* dc */ {Cache_UNKNOWN, 0 }, |
-/* dd */ {Cache_UNKNOWN, 0 }, |
-/* de */ {Cache_UNKNOWN, 0 }, |
-/* df */ {Cache_UNKNOWN, 0 }, |
-/* e0 */ {Cache_UNKNOWN, 0 }, |
-/* e1 */ {Cache_UNKNOWN, 0 }, |
-/* e2 */ {Cache_UNKNOWN, 0 }, |
-/* e3 */ {Cache_UNKNOWN, 0 }, |
-/* e4 */ {Cache_UNKNOWN, 0 }, |
-/* e5 */ {Cache_UNKNOWN, 0 }, |
-/* e6 */ {Cache_UNKNOWN, 0 }, |
-/* e7 */ {Cache_UNKNOWN, 0 }, |
-/* e8 */ {Cache_UNKNOWN, 0 }, |
-/* e9 */ {Cache_UNKNOWN, 0 }, |
-/* ea */ {Cache_UNKNOWN, 0 }, |
-/* eb */ {Cache_UNKNOWN, 0 }, |
-/* ec */ {Cache_UNKNOWN, 0 }, |
-/* ed */ {Cache_UNKNOWN, 0 }, |
-/* ee */ {Cache_UNKNOWN, 0 }, |
-/* ef */ {Cache_UNKNOWN, 0 }, |
-/* f0 */ {Cache_UNKNOWN, 0 }, |
-/* f1 */ {Cache_UNKNOWN, 0 }, |
-/* f2 */ {Cache_UNKNOWN, 0 }, |
-/* f3 */ {Cache_UNKNOWN, 0 }, |
-/* f4 */ {Cache_UNKNOWN, 0 }, |
-/* f5 */ {Cache_UNKNOWN, 0 }, |
-/* f6 */ {Cache_UNKNOWN, 0 }, |
-/* f7 */ {Cache_UNKNOWN, 0 }, |
-/* f8 */ {Cache_UNKNOWN, 0 }, |
-/* f9 */ {Cache_UNKNOWN, 0 }, |
-/* fa */ {Cache_UNKNOWN, 0 }, |
-/* fb */ {Cache_UNKNOWN, 0 }, |
-/* fc */ {Cache_UNKNOWN, 0 }, |
-/* fd */ {Cache_UNKNOWN, 0 }, |
-/* fe */ {Cache_UNKNOWN, 0 }, |
-/* ff */ {Cache_UNKNOWN, 0 } |
-}; |
- |
- |
-/* |
- * use the above table to determine the CacheEntryLineSize. |
- */ |
-static void |
-getIntelCacheEntryLineSize(unsigned long val, int *level, |
- unsigned long *lineSize) |
-{ |
- CacheType type; |
- |
- type = CacheMap[val].type; |
- /* only interested in data caches */ |
- /* NOTE val = 0x40 is a special value that means no L2 or L3 cache. |
- * this data check has the side effect of rejecting that entry. If |
- * that wasn't the case, we could have to reject it explicitly */ |
- if (CacheMap[val].lineSize == 0) { |
- return; |
- } |
- /* look at the caches, skip types we aren't interested in. |
- * if we already have a value for a lower level cache, skip the |
- * current entry */ |
- if ((type == Cache_L1)|| (type == Cache_L1d)) { |
- *level = 1; |
- *lineSize = CacheMap[val].lineSize; |
- } else if ((*level >= 2) && ((type == Cache_L2) || (type == Cache_L2d))) { |
- *level = 2; |
- *lineSize = CacheMap[val].lineSize; |
- } else if ((*level >= 3) && ((type == Cache_L3) || (type == Cache_L3d))) { |
- *level = 3; |
- *lineSize = CacheMap[val].lineSize; |
- } |
- return; |
-} |
- |
- |
-static void |
-getIntelRegisterCacheLineSize(unsigned long val, |
- int *level, unsigned long *lineSize) |
-{ |
- getIntelCacheEntryLineSize(val >> 24 & 0xff, level, lineSize); |
- getIntelCacheEntryLineSize(val >> 16 & 0xff, level, lineSize); |
- getIntelCacheEntryLineSize(val >> 8 & 0xff, level, lineSize); |
- getIntelCacheEntryLineSize(val & 0xff, level, lineSize); |
-} |
- |
-/* |
- * returns '0' if no recognized cache is found, or if the cache |
- * information is supported by this processor |
- */ |
-static unsigned long |
-getIntelCacheLineSize(int cpuidLevel) |
-{ |
- int level = 4; |
- unsigned long lineSize = 0; |
- unsigned long eax, ebx, ecx, edx; |
- int repeat, count; |
- |
- if (cpuidLevel < 2) { |
- return 0; |
- } |
- |
- /* command '2' of the cpuid is intel's cache info call. Each byte of the |
- * 4 registers contain a potential descriptor for the cache. The CacheMap |
- * table maps the cache entry with the processor cache. Register 'al' |
- * contains a count value that cpuid '2' needs to be called in order to |
- * find all the cache descriptors. Only registers with the high bit set |
- * to 'zero' have valid descriptors. This code loops through all the |
- * required calls to cpuid '2' and passes any valid descriptors it finds |
- * to the getIntelRegisterCacheLineSize code, which breaks the registers |
- * down into their component descriptors. In the end the lineSize of the |
- * lowest level cache data cache is returned. */ |
- freebl_cpuid(2, &eax, &ebx, &ecx, &edx); |
- repeat = eax & 0xf; |
- for (count = 0; count < repeat; count++) { |
- if ((eax & 0x80000000) == 0) { |
- getIntelRegisterCacheLineSize(eax & 0xffffff00, &level, &lineSize); |
- } |
- if ((ebx & 0x80000000) == 0) { |
- getIntelRegisterCacheLineSize(ebx, &level, &lineSize); |
- } |
- if ((ecx & 0x80000000) == 0) { |
- getIntelRegisterCacheLineSize(ecx, &level, &lineSize); |
- } |
- if ((edx & 0x80000000) == 0) { |
- getIntelRegisterCacheLineSize(edx, &level, &lineSize); |
- } |
- if (count+1 != repeat) { |
- freebl_cpuid(2, &eax, &ebx, &ecx, &edx); |
- } |
- } |
- return lineSize; |
-} |
- |
-/* |
- * returns '0' if the cache info is not supported by this processor. |
- * This is based on the AMD extended cache commands for cpuid. |
- * (see "AMD Processor Recognition Application Note" Publication 20734). |
- * Some other processors use the identical scheme. |
- * (see "Processor Recognition, Transmeta Corporation"). |
- */ |
-static unsigned long |
-getOtherCacheLineSize(unsigned long cpuidLevel) |
-{ |
- unsigned long lineSize = 0; |
- unsigned long eax, ebx, ecx, edx; |
- |
- /* get the Extended CPUID level */ |
- freebl_cpuid(0x80000000, &eax, &ebx, &ecx, &edx); |
- cpuidLevel = eax; |
- |
- if (cpuidLevel >= 0x80000005) { |
- freebl_cpuid(0x80000005, &eax, &ebx, &ecx, &edx); |
- lineSize = ecx & 0xff; /* line Size, L1 Data Cache */ |
- } |
- return lineSize; |
-} |
- |
-static const char * const manMap[] = { |
-#define INTEL 0 |
- "GenuineIntel", |
-#define AMD 1 |
- "AuthenticAMD", |
-#define CYRIX 2 |
- "CyrixInstead", |
-#define CENTAUR 2 |
- "CentaurHauls", |
-#define NEXGEN 3 |
- "NexGenDriven", |
-#define TRANSMETA 4 |
- "GenuineTMx86", |
-#define RISE 5 |
- "RiseRiseRise", |
-#define UMC 6 |
- "UMC UMC UMC ", |
-#define SIS 7 |
- "Sis Sis Sis ", |
-#define NATIONAL 8 |
- "Geode by NSC", |
-}; |
- |
-static const int n_manufacturers = sizeof(manMap)/sizeof(manMap[0]); |
- |
- |
-#define MAN_UNKNOWN 9 |
- |
-#if !defined(AMD_64) |
-#define SSE2_FLAG (1<<26) |
-unsigned long |
-s_mpi_is_sse2() |
-{ |
- unsigned long eax, ebx, ecx, edx; |
- int manufacturer = MAN_UNKNOWN; |
- int i; |
- char string[13]; |
- |
- if (is386() || is486()) { |
- return 0; |
- } |
- freebl_cpuid(0, &eax, &ebx, &ecx, &edx); |
- /* string holds the CPU's manufacturer ID string - a twelve |
- * character ASCII string stored in ebx, edx, ecx, and |
- * the 32-bit extended feature flags are in edx, ecx. |
- */ |
- *(int *)string = ebx; |
- *(int *)&string[4] = (int)edx; |
- *(int *)&string[8] = (int)ecx; |
- string[12] = 0; |
- |
- /* has no SSE2 extensions */ |
- if (eax == 0) { |
- return 0; |
- } |
- |
- for (i=0; i < n_manufacturers; i++) { |
- if ( strcmp(manMap[i],string) == 0) { |
- manufacturer = i; |
- break; |
- } |
- } |
- |
- freebl_cpuid(1,&eax,&ebx,&ecx,&edx); |
- return (edx & SSE2_FLAG) == SSE2_FLAG; |
-} |
-#endif |
- |
-unsigned long |
-s_mpi_getProcessorLineSize() |
-{ |
- unsigned long eax, ebx, ecx, edx; |
- unsigned long cpuidLevel; |
- unsigned long cacheLineSize = 0; |
- int manufacturer = MAN_UNKNOWN; |
- int i; |
- char string[65]; |
- |
-#if !defined(AMD_64) |
- if (is386()) { |
- return 0; /* 386 had no cache */ |
- } if (is486()) { |
- return 32; /* really? need more info */ |
- } |
-#endif |
- |
- /* Pentium, cpuid command is available */ |
- freebl_cpuid(0, &eax, &ebx, &ecx, &edx); |
- cpuidLevel = eax; |
- /* string holds the CPU's manufacturer ID string - a twelve |
- * character ASCII string stored in ebx, edx, ecx, and |
- * the 32-bit extended feature flags are in edx, ecx. |
- */ |
- *(int *)string = ebx; |
- *(int *)&string[4] = (int)edx; |
- *(int *)&string[8] = (int)ecx; |
- string[12] = 0; |
- |
- manufacturer = MAN_UNKNOWN; |
- for (i=0; i < n_manufacturers; i++) { |
- if ( strcmp(manMap[i],string) == 0) { |
- manufacturer = i; |
- } |
- } |
- |
- if (manufacturer == INTEL) { |
- cacheLineSize = getIntelCacheLineSize(cpuidLevel); |
- } else { |
- cacheLineSize = getOtherCacheLineSize(cpuidLevel); |
- } |
- /* doesn't support cache info based on cpuid. This means |
- * an old pentium class processor, which have cache lines of |
- * 32. If we learn differently, we can use a switch based on |
- * the Manufacturer id */ |
- if (cacheLineSize == 0) { |
- cacheLineSize = 32; |
- } |
- return cacheLineSize; |
-} |
-#define MPI_GET_PROCESSOR_LINE_SIZE_DEFINED 1 |
-#endif |
- |
-#if defined(__ppc64__) |
-/* |
- * Sigh, The PPC has some really nice features to help us determine cache |
- * size, since it had lots of direct control functions to do so. The POWER |
- * processor even has an instruction to do this, but it was dropped in |
- * PowerPC. Unfortunately most of them are not available in user mode. |
- * |
- * The dcbz function would be a great way to determine cache line size except |
- * 1) it only works on write-back memory (it throws an exception otherwise), |
- * and 2) because so many mac programs 'knew' the processor cache size was |
- * 32 bytes, they used this instruction as a fast 'zero 32 bytes'. Now the new |
- * G5 processor has 128 byte cache, but dcbz only clears 32 bytes to keep |
- * these programs happy. dcbzl work if 64 bit instructions are supported. |
- * If you know 64 bit instructions are supported, and that stack is |
- * write-back, you can use this code. |
- */ |
-#include "memory.h" |
- |
-/* clear the cache line that contains 'array' */ |
-static inline void dcbzl(char *array) |
-{ |
- register char *a asm("r2") = array; |
- __asm__ __volatile__( "dcbzl %0,r0" : "=r" (a): "0"(a) ); |
-} |
- |
- |
-#define PPC_DO_ALIGN(x,y) ((char *)\ |
- ((((long long) (x))+((y)-1))&~((y)-1))) |
- |
-#define PPC_MAX_LINE_SIZE 256 |
-unsigned long |
-s_mpi_getProcessorLineSize() |
-{ |
- char testArray[2*PPC_MAX_LINE_SIZE+1]; |
- char *test; |
- int i; |
- |
- /* align the array on a maximum line size boundary, so we |
- * know we are starting to clear from the first address */ |
- test = PPC_DO_ALIGN(testArray, PPC_MAX_LINE_SIZE); |
- /* set all the values to 1's */ |
- memset(test, 0xff, PPC_MAX_LINE_SIZE); |
- /* clear one cache block starting at 'test' */ |
- dcbzl(test); |
- |
- /* find the size of the cleared area, that's our block size */ |
- for (i=PPC_MAX_LINE_SIZE; i != 0; i = i/2) { |
- if (test[i-1] == 0) { |
- return i; |
- } |
- } |
- return 0; |
-} |
- |
-#define MPI_GET_PROCESSOR_LINE_SIZE_DEFINED 1 |
-#endif |
- |
- |
-/* |
- * put other processor and platform specific cache code here |
- * return the smallest cache line size in bytes on the processor |
- * (usually the L1 cache). If the OS has a call, this would be |
- * a greate place to put it. |
- * |
- * If there is no cache, return 0; |
- * |
- * define MPI_GET_PROCESSOR_LINE_SIZE_DEFINED so the generic functions |
- * below aren't compiled. |
- * |
- */ |
- |
- |
-/* target.mk can define MPI_CACHE_LINE_SIZE if it's common for the family or |
- * OS */ |
-#if defined(MPI_CACHE_LINE_SIZE) && !defined(MPI_GET_PROCESSOR_LINE_SIZE_DEFINED) |
- |
-unsigned long |
-s_mpi_getProcessorLineSize() |
-{ |
- return MPI_CACHE_LINE_SIZE; |
-} |
-#define MPI_GET_PROCESSOR_LINE_SIZE_DEFINED 1 |
-#endif |
- |
- |
-/* If no way to get the processor cache line size has been defined, assume |
- * it's 32 bytes (most common value, does not significantly impact performance) |
- */ |
-#ifndef MPI_GET_PROCESSOR_LINE_SIZE_DEFINED |
-unsigned long |
-s_mpi_getProcessorLineSize() |
-{ |
- return 32; |
-} |
-#endif |
- |
-#ifdef TEST_IT |
-#include <stdio.h> |
- |
-main() |
-{ |
- printf("line size = %d\n", s_mpi_getProcessorLineSize()); |
-} |
-#endif |