| Index: mozilla/security/nss/lib/freebl/mpi/mpcpucache.c
|
| ===================================================================
|
| --- mozilla/security/nss/lib/freebl/mpi/mpcpucache.c (revision 191424)
|
| +++ mozilla/security/nss/lib/freebl/mpi/mpcpucache.c (working copy)
|
| @@ -1,813 +0,0 @@
|
| -/* This Source Code Form is subject to the terms of the Mozilla Public
|
| - * License, v. 2.0. If a copy of the MPL was not distributed with this
|
| - * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
|
| -
|
| -#include "mpi.h"
|
| -
|
| -/*
|
| - * This file implements a single function: s_mpi_getProcessorLineSize();
|
| - * s_mpi_getProcessorLineSize() returns the size in bytes of the cache line
|
| - * if a cache exists, or zero if there is no cache. If more than one
|
| - * cache line exists, it should return the smallest line size (which is
|
| - * usually the L1 cache).
|
| - *
|
| - * mp_modexp uses this information to make sure that private key information
|
| - * isn't being leaked through the cache.
|
| - *
|
| - * Currently the file returns good data for most modern x86 processors, and
|
| - * reasonable data on 64-bit ppc processors. All other processors are assumed
|
| - * to have a cache line size of 32 bytes unless modified by target.mk.
|
| - *
|
| - */
|
| -
|
| -#if defined(i386) || defined(__i386) || defined(__X86__) || defined (_M_IX86) || defined(__x86_64__) || defined(__x86_64) || defined(_M_AMD64)
|
| -/* X86 processors have special instructions that tell us about the cache */
|
| -#include "string.h"
|
| -
|
| -#if defined(__x86_64__) || defined(__x86_64) || defined(_M_AMD64)
|
| -#define AMD_64 1
|
| -#endif
|
| -
|
| -/* Generic CPUID function */
|
| -#if defined(AMD_64)
|
| -
|
| -#if defined(__GNUC__)
|
| -
|
| -void freebl_cpuid(unsigned long op, unsigned long *eax,
|
| - unsigned long *ebx, unsigned long *ecx,
|
| - unsigned long *edx)
|
| -{
|
| - __asm__("cpuid\n\t"
|
| - : "=a" (*eax),
|
| - "=b" (*ebx),
|
| - "=c" (*ecx),
|
| - "=d" (*edx)
|
| - : "0" (op));
|
| -}
|
| -
|
| -#elif defined(_MSC_VER)
|
| -
|
| -#include <intrin.h>
|
| -
|
| -void freebl_cpuid(unsigned long op, unsigned long *eax,
|
| - unsigned long *ebx, unsigned long *ecx,
|
| - unsigned long *edx)
|
| -{
|
| - int intrinsic_out[4];
|
| -
|
| - __cpuid(intrinsic_out, op);
|
| - *eax = intrinsic_out[0];
|
| - *ebx = intrinsic_out[1];
|
| - *ecx = intrinsic_out[2];
|
| - *edx = intrinsic_out[3];
|
| -}
|
| -
|
| -#endif
|
| -
|
| -#else /* !defined(AMD_64) */
|
| -
|
| -/* x86 */
|
| -
|
| -#if defined(__GNUC__)
|
| -void freebl_cpuid(unsigned long op, unsigned long *eax,
|
| - unsigned long *ebx, unsigned long *ecx,
|
| - unsigned long *edx)
|
| -{
|
| -/* sigh GCC isn't smart enough to save the ebx PIC register on it's own
|
| - * in this case, so do it by hand. Use edi to store ebx and pass the
|
| - * value returned in ebx from cpuid through edi. */
|
| - __asm__("mov %%ebx,%%edi\n\t"
|
| - "cpuid\n\t"
|
| - "xchgl %%ebx,%%edi\n\t"
|
| - : "=a" (*eax),
|
| - "=D" (*ebx),
|
| - "=c" (*ecx),
|
| - "=d" (*edx)
|
| - : "0" (op));
|
| -}
|
| -
|
| -/*
|
| - * try flipping a processor flag to determine CPU type
|
| - */
|
| -static unsigned long changeFlag(unsigned long flag)
|
| -{
|
| - unsigned long changedFlags, originalFlags;
|
| - __asm__("pushfl\n\t" /* get the flags */
|
| - "popl %0\n\t"
|
| - "movl %0,%1\n\t" /* save the original flags */
|
| - "xorl %2,%0\n\t" /* flip the bit */
|
| - "pushl %0\n\t" /* set the flags */
|
| - "popfl\n\t"
|
| - "pushfl\n\t" /* get the flags again (for return) */
|
| - "popl %0\n\t"
|
| - "pushl %1\n\t" /* restore the original flags */
|
| - "popfl\n\t"
|
| - : "=r" (changedFlags),
|
| - "=r" (originalFlags),
|
| - "=r" (flag)
|
| - : "2" (flag));
|
| - return changedFlags ^ originalFlags;
|
| -}
|
| -
|
| -#elif defined(_MSC_VER)
|
| -
|
| -/*
|
| - * windows versions of the above assembler
|
| - */
|
| -#define wcpuid __asm __emit 0fh __asm __emit 0a2h
|
| -void freebl_cpuid(unsigned long op, unsigned long *Reax,
|
| - unsigned long *Rebx, unsigned long *Recx, unsigned long *Redx)
|
| -{
|
| - unsigned long Leax, Lebx, Lecx, Ledx;
|
| - __asm {
|
| - pushad
|
| - mov eax,op
|
| - wcpuid
|
| - mov Leax,eax
|
| - mov Lebx,ebx
|
| - mov Lecx,ecx
|
| - mov Ledx,edx
|
| - popad
|
| - }
|
| - *Reax = Leax;
|
| - *Rebx = Lebx;
|
| - *Recx = Lecx;
|
| - *Redx = Ledx;
|
| -}
|
| -
|
| -static unsigned long changeFlag(unsigned long flag)
|
| -{
|
| - unsigned long changedFlags, originalFlags;
|
| - __asm {
|
| - push eax
|
| - push ebx
|
| - pushfd /* get the flags */
|
| - pop eax
|
| - push eax /* save the flags on the stack */
|
| - mov originalFlags,eax /* save the original flags */
|
| - mov ebx,flag
|
| - xor eax,ebx /* flip the bit */
|
| - push eax /* set the flags */
|
| - popfd
|
| - pushfd /* get the flags again (for return) */
|
| - pop eax
|
| - popfd /* restore the original flags */
|
| - mov changedFlags,eax
|
| - pop ebx
|
| - pop eax
|
| - }
|
| - return changedFlags ^ originalFlags;
|
| -}
|
| -#endif
|
| -
|
| -#endif
|
| -
|
| -#if !defined(AMD_64)
|
| -#define AC_FLAG 0x40000
|
| -#define ID_FLAG 0x200000
|
| -
|
| -/* 386 processors can't flip the AC_FLAG, intel AP Note AP-485 */
|
| -static int is386()
|
| -{
|
| - return changeFlag(AC_FLAG) == 0;
|
| -}
|
| -
|
| -/* 486 processors can't flip the ID_FLAG, intel AP Note AP-485 */
|
| -static int is486()
|
| -{
|
| - return changeFlag(ID_FLAG) == 0;
|
| -}
|
| -#endif
|
| -
|
| -
|
| -/*
|
| - * table for Intel Cache.
|
| - * See Intel Application Note AP-485 for more information
|
| - */
|
| -
|
| -typedef unsigned char CacheTypeEntry;
|
| -
|
| -typedef enum {
|
| - Cache_NONE = 0,
|
| - Cache_UNKNOWN = 1,
|
| - Cache_TLB = 2,
|
| - Cache_TLBi = 3,
|
| - Cache_TLBd = 4,
|
| - Cache_Trace = 5,
|
| - Cache_L1 = 6,
|
| - Cache_L1i = 7,
|
| - Cache_L1d = 8,
|
| - Cache_L2 = 9 ,
|
| - Cache_L2i = 10 ,
|
| - Cache_L2d = 11 ,
|
| - Cache_L3 = 12 ,
|
| - Cache_L3i = 13,
|
| - Cache_L3d = 14
|
| -} CacheType;
|
| -
|
| -struct _cache {
|
| - CacheTypeEntry type;
|
| - unsigned char lineSize;
|
| -};
|
| -static const struct _cache CacheMap[256] = {
|
| -/* 00 */ {Cache_NONE, 0 },
|
| -/* 01 */ {Cache_TLBi, 0 },
|
| -/* 02 */ {Cache_TLBi, 0 },
|
| -/* 03 */ {Cache_TLBd, 0 },
|
| -/* 04 */ {Cache_TLBd, },
|
| -/* 05 */ {Cache_UNKNOWN, 0 },
|
| -/* 06 */ {Cache_L1i, 32 },
|
| -/* 07 */ {Cache_UNKNOWN, 0 },
|
| -/* 08 */ {Cache_L1i, 32 },
|
| -/* 09 */ {Cache_UNKNOWN, 0 },
|
| -/* 0a */ {Cache_L1d, 32 },
|
| -/* 0b */ {Cache_UNKNOWN, 0 },
|
| -/* 0c */ {Cache_L1d, 32 },
|
| -/* 0d */ {Cache_UNKNOWN, 0 },
|
| -/* 0e */ {Cache_UNKNOWN, 0 },
|
| -/* 0f */ {Cache_UNKNOWN, 0 },
|
| -/* 10 */ {Cache_UNKNOWN, 0 },
|
| -/* 11 */ {Cache_UNKNOWN, 0 },
|
| -/* 12 */ {Cache_UNKNOWN, 0 },
|
| -/* 13 */ {Cache_UNKNOWN, 0 },
|
| -/* 14 */ {Cache_UNKNOWN, 0 },
|
| -/* 15 */ {Cache_UNKNOWN, 0 },
|
| -/* 16 */ {Cache_UNKNOWN, 0 },
|
| -/* 17 */ {Cache_UNKNOWN, 0 },
|
| -/* 18 */ {Cache_UNKNOWN, 0 },
|
| -/* 19 */ {Cache_UNKNOWN, 0 },
|
| -/* 1a */ {Cache_UNKNOWN, 0 },
|
| -/* 1b */ {Cache_UNKNOWN, 0 },
|
| -/* 1c */ {Cache_UNKNOWN, 0 },
|
| -/* 1d */ {Cache_UNKNOWN, 0 },
|
| -/* 1e */ {Cache_UNKNOWN, 0 },
|
| -/* 1f */ {Cache_UNKNOWN, 0 },
|
| -/* 20 */ {Cache_UNKNOWN, 0 },
|
| -/* 21 */ {Cache_UNKNOWN, 0 },
|
| -/* 22 */ {Cache_L3, 64 },
|
| -/* 23 */ {Cache_L3, 64 },
|
| -/* 24 */ {Cache_UNKNOWN, 0 },
|
| -/* 25 */ {Cache_L3, 64 },
|
| -/* 26 */ {Cache_UNKNOWN, 0 },
|
| -/* 27 */ {Cache_UNKNOWN, 0 },
|
| -/* 28 */ {Cache_UNKNOWN, 0 },
|
| -/* 29 */ {Cache_L3, 64 },
|
| -/* 2a */ {Cache_UNKNOWN, 0 },
|
| -/* 2b */ {Cache_UNKNOWN, 0 },
|
| -/* 2c */ {Cache_L1d, 64 },
|
| -/* 2d */ {Cache_UNKNOWN, 0 },
|
| -/* 2e */ {Cache_UNKNOWN, 0 },
|
| -/* 2f */ {Cache_UNKNOWN, 0 },
|
| -/* 30 */ {Cache_L1i, 64 },
|
| -/* 31 */ {Cache_UNKNOWN, 0 },
|
| -/* 32 */ {Cache_UNKNOWN, 0 },
|
| -/* 33 */ {Cache_UNKNOWN, 0 },
|
| -/* 34 */ {Cache_UNKNOWN, 0 },
|
| -/* 35 */ {Cache_UNKNOWN, 0 },
|
| -/* 36 */ {Cache_UNKNOWN, 0 },
|
| -/* 37 */ {Cache_UNKNOWN, 0 },
|
| -/* 38 */ {Cache_UNKNOWN, 0 },
|
| -/* 39 */ {Cache_L2, 64 },
|
| -/* 3a */ {Cache_UNKNOWN, 0 },
|
| -/* 3b */ {Cache_L2, 64 },
|
| -/* 3c */ {Cache_L2, 64 },
|
| -/* 3d */ {Cache_UNKNOWN, 0 },
|
| -/* 3e */ {Cache_UNKNOWN, 0 },
|
| -/* 3f */ {Cache_UNKNOWN, 0 },
|
| -/* 40 */ {Cache_L2, 0 },
|
| -/* 41 */ {Cache_L2, 32 },
|
| -/* 42 */ {Cache_L2, 32 },
|
| -/* 43 */ {Cache_L2, 32 },
|
| -/* 44 */ {Cache_L2, 32 },
|
| -/* 45 */ {Cache_L2, 32 },
|
| -/* 46 */ {Cache_UNKNOWN, 0 },
|
| -/* 47 */ {Cache_UNKNOWN, 0 },
|
| -/* 48 */ {Cache_UNKNOWN, 0 },
|
| -/* 49 */ {Cache_UNKNOWN, 0 },
|
| -/* 4a */ {Cache_UNKNOWN, 0 },
|
| -/* 4b */ {Cache_UNKNOWN, 0 },
|
| -/* 4c */ {Cache_UNKNOWN, 0 },
|
| -/* 4d */ {Cache_UNKNOWN, 0 },
|
| -/* 4e */ {Cache_UNKNOWN, 0 },
|
| -/* 4f */ {Cache_UNKNOWN, 0 },
|
| -/* 50 */ {Cache_TLBi, 0 },
|
| -/* 51 */ {Cache_TLBi, 0 },
|
| -/* 52 */ {Cache_TLBi, 0 },
|
| -/* 53 */ {Cache_UNKNOWN, 0 },
|
| -/* 54 */ {Cache_UNKNOWN, 0 },
|
| -/* 55 */ {Cache_UNKNOWN, 0 },
|
| -/* 56 */ {Cache_UNKNOWN, 0 },
|
| -/* 57 */ {Cache_UNKNOWN, 0 },
|
| -/* 58 */ {Cache_UNKNOWN, 0 },
|
| -/* 59 */ {Cache_UNKNOWN, 0 },
|
| -/* 5a */ {Cache_UNKNOWN, 0 },
|
| -/* 5b */ {Cache_TLBd, 0 },
|
| -/* 5c */ {Cache_TLBd, 0 },
|
| -/* 5d */ {Cache_TLBd, 0 },
|
| -/* 5e */ {Cache_UNKNOWN, 0 },
|
| -/* 5f */ {Cache_UNKNOWN, 0 },
|
| -/* 60 */ {Cache_UNKNOWN, 0 },
|
| -/* 61 */ {Cache_UNKNOWN, 0 },
|
| -/* 62 */ {Cache_UNKNOWN, 0 },
|
| -/* 63 */ {Cache_UNKNOWN, 0 },
|
| -/* 64 */ {Cache_UNKNOWN, 0 },
|
| -/* 65 */ {Cache_UNKNOWN, 0 },
|
| -/* 66 */ {Cache_L1d, 64 },
|
| -/* 67 */ {Cache_L1d, 64 },
|
| -/* 68 */ {Cache_L1d, 64 },
|
| -/* 69 */ {Cache_UNKNOWN, 0 },
|
| -/* 6a */ {Cache_UNKNOWN, 0 },
|
| -/* 6b */ {Cache_UNKNOWN, 0 },
|
| -/* 6c */ {Cache_UNKNOWN, 0 },
|
| -/* 6d */ {Cache_UNKNOWN, 0 },
|
| -/* 6e */ {Cache_UNKNOWN, 0 },
|
| -/* 6f */ {Cache_UNKNOWN, 0 },
|
| -/* 70 */ {Cache_Trace, 1 },
|
| -/* 71 */ {Cache_Trace, 1 },
|
| -/* 72 */ {Cache_Trace, 1 },
|
| -/* 73 */ {Cache_UNKNOWN, 0 },
|
| -/* 74 */ {Cache_UNKNOWN, 0 },
|
| -/* 75 */ {Cache_UNKNOWN, 0 },
|
| -/* 76 */ {Cache_UNKNOWN, 0 },
|
| -/* 77 */ {Cache_UNKNOWN, 0 },
|
| -/* 78 */ {Cache_UNKNOWN, 0 },
|
| -/* 79 */ {Cache_L2, 64 },
|
| -/* 7a */ {Cache_L2, 64 },
|
| -/* 7b */ {Cache_L2, 64 },
|
| -/* 7c */ {Cache_L2, 64 },
|
| -/* 7d */ {Cache_UNKNOWN, 0 },
|
| -/* 7e */ {Cache_UNKNOWN, 0 },
|
| -/* 7f */ {Cache_UNKNOWN, 0 },
|
| -/* 80 */ {Cache_UNKNOWN, 0 },
|
| -/* 81 */ {Cache_UNKNOWN, 0 },
|
| -/* 82 */ {Cache_L2, 32 },
|
| -/* 83 */ {Cache_L2, 32 },
|
| -/* 84 */ {Cache_L2, 32 },
|
| -/* 85 */ {Cache_L2, 32 },
|
| -/* 86 */ {Cache_L2, 64 },
|
| -/* 87 */ {Cache_L2, 64 },
|
| -/* 88 */ {Cache_UNKNOWN, 0 },
|
| -/* 89 */ {Cache_UNKNOWN, 0 },
|
| -/* 8a */ {Cache_UNKNOWN, 0 },
|
| -/* 8b */ {Cache_UNKNOWN, 0 },
|
| -/* 8c */ {Cache_UNKNOWN, 0 },
|
| -/* 8d */ {Cache_UNKNOWN, 0 },
|
| -/* 8e */ {Cache_UNKNOWN, 0 },
|
| -/* 8f */ {Cache_UNKNOWN, 0 },
|
| -/* 90 */ {Cache_UNKNOWN, 0 },
|
| -/* 91 */ {Cache_UNKNOWN, 0 },
|
| -/* 92 */ {Cache_UNKNOWN, 0 },
|
| -/* 93 */ {Cache_UNKNOWN, 0 },
|
| -/* 94 */ {Cache_UNKNOWN, 0 },
|
| -/* 95 */ {Cache_UNKNOWN, 0 },
|
| -/* 96 */ {Cache_UNKNOWN, 0 },
|
| -/* 97 */ {Cache_UNKNOWN, 0 },
|
| -/* 98 */ {Cache_UNKNOWN, 0 },
|
| -/* 99 */ {Cache_UNKNOWN, 0 },
|
| -/* 9a */ {Cache_UNKNOWN, 0 },
|
| -/* 9b */ {Cache_UNKNOWN, 0 },
|
| -/* 9c */ {Cache_UNKNOWN, 0 },
|
| -/* 9d */ {Cache_UNKNOWN, 0 },
|
| -/* 9e */ {Cache_UNKNOWN, 0 },
|
| -/* 9f */ {Cache_UNKNOWN, 0 },
|
| -/* a0 */ {Cache_UNKNOWN, 0 },
|
| -/* a1 */ {Cache_UNKNOWN, 0 },
|
| -/* a2 */ {Cache_UNKNOWN, 0 },
|
| -/* a3 */ {Cache_UNKNOWN, 0 },
|
| -/* a4 */ {Cache_UNKNOWN, 0 },
|
| -/* a5 */ {Cache_UNKNOWN, 0 },
|
| -/* a6 */ {Cache_UNKNOWN, 0 },
|
| -/* a7 */ {Cache_UNKNOWN, 0 },
|
| -/* a8 */ {Cache_UNKNOWN, 0 },
|
| -/* a9 */ {Cache_UNKNOWN, 0 },
|
| -/* aa */ {Cache_UNKNOWN, 0 },
|
| -/* ab */ {Cache_UNKNOWN, 0 },
|
| -/* ac */ {Cache_UNKNOWN, 0 },
|
| -/* ad */ {Cache_UNKNOWN, 0 },
|
| -/* ae */ {Cache_UNKNOWN, 0 },
|
| -/* af */ {Cache_UNKNOWN, 0 },
|
| -/* b0 */ {Cache_TLBi, 0 },
|
| -/* b1 */ {Cache_UNKNOWN, 0 },
|
| -/* b2 */ {Cache_UNKNOWN, 0 },
|
| -/* b3 */ {Cache_TLBd, 0 },
|
| -/* b4 */ {Cache_UNKNOWN, 0 },
|
| -/* b5 */ {Cache_UNKNOWN, 0 },
|
| -/* b6 */ {Cache_UNKNOWN, 0 },
|
| -/* b7 */ {Cache_UNKNOWN, 0 },
|
| -/* b8 */ {Cache_UNKNOWN, 0 },
|
| -/* b9 */ {Cache_UNKNOWN, 0 },
|
| -/* ba */ {Cache_UNKNOWN, 0 },
|
| -/* bb */ {Cache_UNKNOWN, 0 },
|
| -/* bc */ {Cache_UNKNOWN, 0 },
|
| -/* bd */ {Cache_UNKNOWN, 0 },
|
| -/* be */ {Cache_UNKNOWN, 0 },
|
| -/* bf */ {Cache_UNKNOWN, 0 },
|
| -/* c0 */ {Cache_UNKNOWN, 0 },
|
| -/* c1 */ {Cache_UNKNOWN, 0 },
|
| -/* c2 */ {Cache_UNKNOWN, 0 },
|
| -/* c3 */ {Cache_UNKNOWN, 0 },
|
| -/* c4 */ {Cache_UNKNOWN, 0 },
|
| -/* c5 */ {Cache_UNKNOWN, 0 },
|
| -/* c6 */ {Cache_UNKNOWN, 0 },
|
| -/* c7 */ {Cache_UNKNOWN, 0 },
|
| -/* c8 */ {Cache_UNKNOWN, 0 },
|
| -/* c9 */ {Cache_UNKNOWN, 0 },
|
| -/* ca */ {Cache_UNKNOWN, 0 },
|
| -/* cb */ {Cache_UNKNOWN, 0 },
|
| -/* cc */ {Cache_UNKNOWN, 0 },
|
| -/* cd */ {Cache_UNKNOWN, 0 },
|
| -/* ce */ {Cache_UNKNOWN, 0 },
|
| -/* cf */ {Cache_UNKNOWN, 0 },
|
| -/* d0 */ {Cache_UNKNOWN, 0 },
|
| -/* d1 */ {Cache_UNKNOWN, 0 },
|
| -/* d2 */ {Cache_UNKNOWN, 0 },
|
| -/* d3 */ {Cache_UNKNOWN, 0 },
|
| -/* d4 */ {Cache_UNKNOWN, 0 },
|
| -/* d5 */ {Cache_UNKNOWN, 0 },
|
| -/* d6 */ {Cache_UNKNOWN, 0 },
|
| -/* d7 */ {Cache_UNKNOWN, 0 },
|
| -/* d8 */ {Cache_UNKNOWN, 0 },
|
| -/* d9 */ {Cache_UNKNOWN, 0 },
|
| -/* da */ {Cache_UNKNOWN, 0 },
|
| -/* db */ {Cache_UNKNOWN, 0 },
|
| -/* dc */ {Cache_UNKNOWN, 0 },
|
| -/* dd */ {Cache_UNKNOWN, 0 },
|
| -/* de */ {Cache_UNKNOWN, 0 },
|
| -/* df */ {Cache_UNKNOWN, 0 },
|
| -/* e0 */ {Cache_UNKNOWN, 0 },
|
| -/* e1 */ {Cache_UNKNOWN, 0 },
|
| -/* e2 */ {Cache_UNKNOWN, 0 },
|
| -/* e3 */ {Cache_UNKNOWN, 0 },
|
| -/* e4 */ {Cache_UNKNOWN, 0 },
|
| -/* e5 */ {Cache_UNKNOWN, 0 },
|
| -/* e6 */ {Cache_UNKNOWN, 0 },
|
| -/* e7 */ {Cache_UNKNOWN, 0 },
|
| -/* e8 */ {Cache_UNKNOWN, 0 },
|
| -/* e9 */ {Cache_UNKNOWN, 0 },
|
| -/* ea */ {Cache_UNKNOWN, 0 },
|
| -/* eb */ {Cache_UNKNOWN, 0 },
|
| -/* ec */ {Cache_UNKNOWN, 0 },
|
| -/* ed */ {Cache_UNKNOWN, 0 },
|
| -/* ee */ {Cache_UNKNOWN, 0 },
|
| -/* ef */ {Cache_UNKNOWN, 0 },
|
| -/* f0 */ {Cache_UNKNOWN, 0 },
|
| -/* f1 */ {Cache_UNKNOWN, 0 },
|
| -/* f2 */ {Cache_UNKNOWN, 0 },
|
| -/* f3 */ {Cache_UNKNOWN, 0 },
|
| -/* f4 */ {Cache_UNKNOWN, 0 },
|
| -/* f5 */ {Cache_UNKNOWN, 0 },
|
| -/* f6 */ {Cache_UNKNOWN, 0 },
|
| -/* f7 */ {Cache_UNKNOWN, 0 },
|
| -/* f8 */ {Cache_UNKNOWN, 0 },
|
| -/* f9 */ {Cache_UNKNOWN, 0 },
|
| -/* fa */ {Cache_UNKNOWN, 0 },
|
| -/* fb */ {Cache_UNKNOWN, 0 },
|
| -/* fc */ {Cache_UNKNOWN, 0 },
|
| -/* fd */ {Cache_UNKNOWN, 0 },
|
| -/* fe */ {Cache_UNKNOWN, 0 },
|
| -/* ff */ {Cache_UNKNOWN, 0 }
|
| -};
|
| -
|
| -
|
| -/*
|
| - * use the above table to determine the CacheEntryLineSize.
|
| - */
|
| -static void
|
| -getIntelCacheEntryLineSize(unsigned long val, int *level,
|
| - unsigned long *lineSize)
|
| -{
|
| - CacheType type;
|
| -
|
| - type = CacheMap[val].type;
|
| - /* only interested in data caches */
|
| - /* NOTE val = 0x40 is a special value that means no L2 or L3 cache.
|
| - * this data check has the side effect of rejecting that entry. If
|
| - * that wasn't the case, we could have to reject it explicitly */
|
| - if (CacheMap[val].lineSize == 0) {
|
| - return;
|
| - }
|
| - /* look at the caches, skip types we aren't interested in.
|
| - * if we already have a value for a lower level cache, skip the
|
| - * current entry */
|
| - if ((type == Cache_L1)|| (type == Cache_L1d)) {
|
| - *level = 1;
|
| - *lineSize = CacheMap[val].lineSize;
|
| - } else if ((*level >= 2) && ((type == Cache_L2) || (type == Cache_L2d))) {
|
| - *level = 2;
|
| - *lineSize = CacheMap[val].lineSize;
|
| - } else if ((*level >= 3) && ((type == Cache_L3) || (type == Cache_L3d))) {
|
| - *level = 3;
|
| - *lineSize = CacheMap[val].lineSize;
|
| - }
|
| - return;
|
| -}
|
| -
|
| -
|
| -static void
|
| -getIntelRegisterCacheLineSize(unsigned long val,
|
| - int *level, unsigned long *lineSize)
|
| -{
|
| - getIntelCacheEntryLineSize(val >> 24 & 0xff, level, lineSize);
|
| - getIntelCacheEntryLineSize(val >> 16 & 0xff, level, lineSize);
|
| - getIntelCacheEntryLineSize(val >> 8 & 0xff, level, lineSize);
|
| - getIntelCacheEntryLineSize(val & 0xff, level, lineSize);
|
| -}
|
| -
|
| -/*
|
| - * returns '0' if no recognized cache is found, or if the cache
|
| - * information is supported by this processor
|
| - */
|
| -static unsigned long
|
| -getIntelCacheLineSize(int cpuidLevel)
|
| -{
|
| - int level = 4;
|
| - unsigned long lineSize = 0;
|
| - unsigned long eax, ebx, ecx, edx;
|
| - int repeat, count;
|
| -
|
| - if (cpuidLevel < 2) {
|
| - return 0;
|
| - }
|
| -
|
| - /* command '2' of the cpuid is intel's cache info call. Each byte of the
|
| - * 4 registers contain a potential descriptor for the cache. The CacheMap
|
| - * table maps the cache entry with the processor cache. Register 'al'
|
| - * contains a count value that cpuid '2' needs to be called in order to
|
| - * find all the cache descriptors. Only registers with the high bit set
|
| - * to 'zero' have valid descriptors. This code loops through all the
|
| - * required calls to cpuid '2' and passes any valid descriptors it finds
|
| - * to the getIntelRegisterCacheLineSize code, which breaks the registers
|
| - * down into their component descriptors. In the end the lineSize of the
|
| - * lowest level cache data cache is returned. */
|
| - freebl_cpuid(2, &eax, &ebx, &ecx, &edx);
|
| - repeat = eax & 0xf;
|
| - for (count = 0; count < repeat; count++) {
|
| - if ((eax & 0x80000000) == 0) {
|
| - getIntelRegisterCacheLineSize(eax & 0xffffff00, &level, &lineSize);
|
| - }
|
| - if ((ebx & 0x80000000) == 0) {
|
| - getIntelRegisterCacheLineSize(ebx, &level, &lineSize);
|
| - }
|
| - if ((ecx & 0x80000000) == 0) {
|
| - getIntelRegisterCacheLineSize(ecx, &level, &lineSize);
|
| - }
|
| - if ((edx & 0x80000000) == 0) {
|
| - getIntelRegisterCacheLineSize(edx, &level, &lineSize);
|
| - }
|
| - if (count+1 != repeat) {
|
| - freebl_cpuid(2, &eax, &ebx, &ecx, &edx);
|
| - }
|
| - }
|
| - return lineSize;
|
| -}
|
| -
|
| -/*
|
| - * returns '0' if the cache info is not supported by this processor.
|
| - * This is based on the AMD extended cache commands for cpuid.
|
| - * (see "AMD Processor Recognition Application Note" Publication 20734).
|
| - * Some other processors use the identical scheme.
|
| - * (see "Processor Recognition, Transmeta Corporation").
|
| - */
|
| -static unsigned long
|
| -getOtherCacheLineSize(unsigned long cpuidLevel)
|
| -{
|
| - unsigned long lineSize = 0;
|
| - unsigned long eax, ebx, ecx, edx;
|
| -
|
| - /* get the Extended CPUID level */
|
| - freebl_cpuid(0x80000000, &eax, &ebx, &ecx, &edx);
|
| - cpuidLevel = eax;
|
| -
|
| - if (cpuidLevel >= 0x80000005) {
|
| - freebl_cpuid(0x80000005, &eax, &ebx, &ecx, &edx);
|
| - lineSize = ecx & 0xff; /* line Size, L1 Data Cache */
|
| - }
|
| - return lineSize;
|
| -}
|
| -
|
| -static const char * const manMap[] = {
|
| -#define INTEL 0
|
| - "GenuineIntel",
|
| -#define AMD 1
|
| - "AuthenticAMD",
|
| -#define CYRIX 2
|
| - "CyrixInstead",
|
| -#define CENTAUR 2
|
| - "CentaurHauls",
|
| -#define NEXGEN 3
|
| - "NexGenDriven",
|
| -#define TRANSMETA 4
|
| - "GenuineTMx86",
|
| -#define RISE 5
|
| - "RiseRiseRise",
|
| -#define UMC 6
|
| - "UMC UMC UMC ",
|
| -#define SIS 7
|
| - "Sis Sis Sis ",
|
| -#define NATIONAL 8
|
| - "Geode by NSC",
|
| -};
|
| -
|
| -static const int n_manufacturers = sizeof(manMap)/sizeof(manMap[0]);
|
| -
|
| -
|
| -#define MAN_UNKNOWN 9
|
| -
|
| -#if !defined(AMD_64)
|
| -#define SSE2_FLAG (1<<26)
|
| -unsigned long
|
| -s_mpi_is_sse2()
|
| -{
|
| - unsigned long eax, ebx, ecx, edx;
|
| - int manufacturer = MAN_UNKNOWN;
|
| - int i;
|
| - char string[13];
|
| -
|
| - if (is386() || is486()) {
|
| - return 0;
|
| - }
|
| - freebl_cpuid(0, &eax, &ebx, &ecx, &edx);
|
| - /* string holds the CPU's manufacturer ID string - a twelve
|
| - * character ASCII string stored in ebx, edx, ecx, and
|
| - * the 32-bit extended feature flags are in edx, ecx.
|
| - */
|
| - *(int *)string = ebx;
|
| - *(int *)&string[4] = (int)edx;
|
| - *(int *)&string[8] = (int)ecx;
|
| - string[12] = 0;
|
| -
|
| - /* has no SSE2 extensions */
|
| - if (eax == 0) {
|
| - return 0;
|
| - }
|
| -
|
| - for (i=0; i < n_manufacturers; i++) {
|
| - if ( strcmp(manMap[i],string) == 0) {
|
| - manufacturer = i;
|
| - break;
|
| - }
|
| - }
|
| -
|
| - freebl_cpuid(1,&eax,&ebx,&ecx,&edx);
|
| - return (edx & SSE2_FLAG) == SSE2_FLAG;
|
| -}
|
| -#endif
|
| -
|
| -unsigned long
|
| -s_mpi_getProcessorLineSize()
|
| -{
|
| - unsigned long eax, ebx, ecx, edx;
|
| - unsigned long cpuidLevel;
|
| - unsigned long cacheLineSize = 0;
|
| - int manufacturer = MAN_UNKNOWN;
|
| - int i;
|
| - char string[65];
|
| -
|
| -#if !defined(AMD_64)
|
| - if (is386()) {
|
| - return 0; /* 386 had no cache */
|
| - } if (is486()) {
|
| - return 32; /* really? need more info */
|
| - }
|
| -#endif
|
| -
|
| - /* Pentium, cpuid command is available */
|
| - freebl_cpuid(0, &eax, &ebx, &ecx, &edx);
|
| - cpuidLevel = eax;
|
| - /* string holds the CPU's manufacturer ID string - a twelve
|
| - * character ASCII string stored in ebx, edx, ecx, and
|
| - * the 32-bit extended feature flags are in edx, ecx.
|
| - */
|
| - *(int *)string = ebx;
|
| - *(int *)&string[4] = (int)edx;
|
| - *(int *)&string[8] = (int)ecx;
|
| - string[12] = 0;
|
| -
|
| - manufacturer = MAN_UNKNOWN;
|
| - for (i=0; i < n_manufacturers; i++) {
|
| - if ( strcmp(manMap[i],string) == 0) {
|
| - manufacturer = i;
|
| - }
|
| - }
|
| -
|
| - if (manufacturer == INTEL) {
|
| - cacheLineSize = getIntelCacheLineSize(cpuidLevel);
|
| - } else {
|
| - cacheLineSize = getOtherCacheLineSize(cpuidLevel);
|
| - }
|
| - /* doesn't support cache info based on cpuid. This means
|
| - * an old pentium class processor, which have cache lines of
|
| - * 32. If we learn differently, we can use a switch based on
|
| - * the Manufacturer id */
|
| - if (cacheLineSize == 0) {
|
| - cacheLineSize = 32;
|
| - }
|
| - return cacheLineSize;
|
| -}
|
| -#define MPI_GET_PROCESSOR_LINE_SIZE_DEFINED 1
|
| -#endif
|
| -
|
| -#if defined(__ppc64__)
|
| -/*
|
| - * Sigh, The PPC has some really nice features to help us determine cache
|
| - * size, since it had lots of direct control functions to do so. The POWER
|
| - * processor even has an instruction to do this, but it was dropped in
|
| - * PowerPC. Unfortunately most of them are not available in user mode.
|
| - *
|
| - * The dcbz function would be a great way to determine cache line size except
|
| - * 1) it only works on write-back memory (it throws an exception otherwise),
|
| - * and 2) because so many mac programs 'knew' the processor cache size was
|
| - * 32 bytes, they used this instruction as a fast 'zero 32 bytes'. Now the new
|
| - * G5 processor has 128 byte cache, but dcbz only clears 32 bytes to keep
|
| - * these programs happy. dcbzl work if 64 bit instructions are supported.
|
| - * If you know 64 bit instructions are supported, and that stack is
|
| - * write-back, you can use this code.
|
| - */
|
| -#include "memory.h"
|
| -
|
| -/* clear the cache line that contains 'array' */
|
| -static inline void dcbzl(char *array)
|
| -{
|
| - register char *a asm("r2") = array;
|
| - __asm__ __volatile__( "dcbzl %0,r0" : "=r" (a): "0"(a) );
|
| -}
|
| -
|
| -
|
| -#define PPC_DO_ALIGN(x,y) ((char *)\
|
| - ((((long long) (x))+((y)-1))&~((y)-1)))
|
| -
|
| -#define PPC_MAX_LINE_SIZE 256
|
| -unsigned long
|
| -s_mpi_getProcessorLineSize()
|
| -{
|
| - char testArray[2*PPC_MAX_LINE_SIZE+1];
|
| - char *test;
|
| - int i;
|
| -
|
| - /* align the array on a maximum line size boundary, so we
|
| - * know we are starting to clear from the first address */
|
| - test = PPC_DO_ALIGN(testArray, PPC_MAX_LINE_SIZE);
|
| - /* set all the values to 1's */
|
| - memset(test, 0xff, PPC_MAX_LINE_SIZE);
|
| - /* clear one cache block starting at 'test' */
|
| - dcbzl(test);
|
| -
|
| - /* find the size of the cleared area, that's our block size */
|
| - for (i=PPC_MAX_LINE_SIZE; i != 0; i = i/2) {
|
| - if (test[i-1] == 0) {
|
| - return i;
|
| - }
|
| - }
|
| - return 0;
|
| -}
|
| -
|
| -#define MPI_GET_PROCESSOR_LINE_SIZE_DEFINED 1
|
| -#endif
|
| -
|
| -
|
| -/*
|
| - * put other processor and platform specific cache code here
|
| - * return the smallest cache line size in bytes on the processor
|
| - * (usually the L1 cache). If the OS has a call, this would be
|
| - * a greate place to put it.
|
| - *
|
| - * If there is no cache, return 0;
|
| - *
|
| - * define MPI_GET_PROCESSOR_LINE_SIZE_DEFINED so the generic functions
|
| - * below aren't compiled.
|
| - *
|
| - */
|
| -
|
| -
|
| -/* target.mk can define MPI_CACHE_LINE_SIZE if it's common for the family or
|
| - * OS */
|
| -#if defined(MPI_CACHE_LINE_SIZE) && !defined(MPI_GET_PROCESSOR_LINE_SIZE_DEFINED)
|
| -
|
| -unsigned long
|
| -s_mpi_getProcessorLineSize()
|
| -{
|
| - return MPI_CACHE_LINE_SIZE;
|
| -}
|
| -#define MPI_GET_PROCESSOR_LINE_SIZE_DEFINED 1
|
| -#endif
|
| -
|
| -
|
| -/* If no way to get the processor cache line size has been defined, assume
|
| - * it's 32 bytes (most common value, does not significantly impact performance)
|
| - */
|
| -#ifndef MPI_GET_PROCESSOR_LINE_SIZE_DEFINED
|
| -unsigned long
|
| -s_mpi_getProcessorLineSize()
|
| -{
|
| - return 32;
|
| -}
|
| -#endif
|
| -
|
| -#ifdef TEST_IT
|
| -#include <stdio.h>
|
| -
|
| -main()
|
| -{
|
| - printf("line size = %d\n", s_mpi_getProcessorLineSize());
|
| -}
|
| -#endif
|
|
|