Index: net/third_party/nss/ssl/mpi/mpcpucache.c |
diff --git a/net/third_party/nss/ssl/mpi/mpcpucache.c b/net/third_party/nss/ssl/mpi/mpcpucache.c |
new file mode 100644 |
index 0000000000000000000000000000000000000000..6efa07222f7a5997a4905c43a216bd40c02c2eef |
--- /dev/null |
+++ b/net/third_party/nss/ssl/mpi/mpcpucache.c |
@@ -0,0 +1,838 @@ |
+/* ***** BEGIN LICENSE BLOCK ***** |
+ * Version: MPL 1.1/GPL 2.0/LGPL 2.1 |
+ * |
+ * The contents of this file are subject to the Mozilla Public License Version |
+ * 1.1 (the "License"); you may not use this file except in compliance with |
+ * the License. You may obtain a copy of the License at |
+ * http://www.mozilla.org/MPL/ |
+ * |
+ * Software distributed under the License is distributed on an "AS IS" basis, |
+ * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License |
+ * for the specific language governing rights and limitations under the |
+ * License. |
+ * |
+ * The Original Code is the Netscape security libraries. |
+ * |
+ * The Initial Developer of the Original Code is |
+ * Red Hat, Inc |
+ * Portions created by the Initial Developer are Copyright (C) 2005 |
+ * the Initial Developer. All Rights Reserved. |
+ * |
+ * Contributor(s): |
+ * Robert Relyea <rrelyea@redhat.com> |
+ * |
+ * Alternatively, the contents of this file may be used under the terms of |
+ * either the GNU General Public License Version 2 or later (the "GPL"), or |
+ * the GNU Lesser General Public License Version 2.1 or later (the "LGPL"), |
+ * in which case the provisions of the GPL or the LGPL are applicable instead |
+ * of those above. If you wish to allow use of your version of this file only |
+ * under the terms of either the GPL or the LGPL, and not to allow others to |
+ * use your version of this file under the terms of the MPL, indicate your |
+ * decision by deleting the provisions above and replace them with the notice |
+ * and other provisions required by the GPL or the LGPL. If you do not delete |
+ * the provisions above, a recipient may use your version of this file under |
+ * the terms of any one of the MPL, the GPL or the LGPL. |
+ * |
+ * ***** END LICENSE BLOCK ***** */ |
+ |
+#include "mpi.h" |
+ |
+/* |
+ * This file implements a single function: s_mpi_getProcessorLineSize(); |
+ * s_mpi_getProcessorLineSize() returns the size in bytes of the cache line |
+ * if a cache exists, or zero if there is no cache. If more than one |
+ * cache line exists, it should return the smallest line size (which is |
+ * usually the L1 cache). |
+ * |
+ * mp_modexp uses this information to make sure that private key information |
+ * isn't being leaked through the cache. |
+ * |
+ * Currently the file returns good data for most modern x86 processors, and |
+ * reasonable data on 64-bit ppc processors. All other processors are assumed |
+ * to have a cache line size of 32 bytes unless modified by target.mk. |
+ * |
+ */ |
+ |
+#if defined(i386) || defined(__i386) || defined(__X86__) || defined (_M_IX86) || defined(__x86_64__) || defined(__x86_64) || defined(_M_AMD64) |
+/* X86 processors have special instructions that tell us about the cache */ |
+#include "string.h" |
+ |
+#if defined(__x86_64__) || defined(__x86_64) || defined(_M_AMD64) |
+#define AMD_64 1 |
+#endif |
+ |
+/* Generic CPUID function */ |
+#if defined(AMD_64) |
+ |
+#if defined(__GNUC__) |
+ |
+void freebl_cpuid(unsigned long op, unsigned long *eax, |
+ unsigned long *ebx, unsigned long *ecx, |
+ unsigned long *edx) |
+{ |
+ __asm__("cpuid\n\t" |
+ : "=a" (*eax), |
+ "=b" (*ebx), |
+ "=c" (*ecx), |
+ "=d" (*edx) |
+ : "0" (op)); |
+} |
+ |
+#elif defined(_MSC_VER) |
+ |
+#include <intrin.h> |
+ |
+void freebl_cpuid(unsigned long op, unsigned long *eax, |
+ unsigned long *ebx, unsigned long *ecx, |
+ unsigned long *edx) |
+{ |
+ int intrinsic_out[4]; |
+ |
+ __cpuid(intrinsic_out, op); |
+ *eax = intrinsic_out[0]; |
+ *ebx = intrinsic_out[1]; |
+ *ecx = intrinsic_out[2]; |
+ *edx = intrinsic_out[3]; |
+} |
+ |
+#endif |
+ |
+#else /* !defined(AMD_64) */ |
+ |
+/* x86 */ |
+ |
+#if defined(__GNUC__) |
+void freebl_cpuid(unsigned long op, unsigned long *eax, |
+ unsigned long *ebx, unsigned long *ecx, |
+ unsigned long *edx) |
+{ |
+/* sigh GCC isn't smart enough to save the ebx PIC register on it's own |
+ * in this case, so do it by hand. */ |
+ __asm__("pushl %%ebx\n\t" |
+ "cpuid\n\t" |
+ "mov %%ebx,%1\n\t" |
+ "popl %%ebx\n\t" |
+ : "=a" (*eax), |
+ "=r" (*ebx), |
+ "=c" (*ecx), |
+ "=d" (*edx) |
+ : "0" (op)); |
+} |
+ |
+/* |
+ * try flipping a processor flag to determine CPU type |
+ */ |
+static unsigned long changeFlag(unsigned long flag) |
+{ |
+ unsigned long changedFlags, originalFlags; |
+ __asm__("pushfl\n\t" /* get the flags */ |
+ "popl %0\n\t" |
+ "movl %0,%1\n\t" /* save the original flags */ |
+ "xorl %2,%0\n\t" /* flip the bit */ |
+ "pushl %0\n\t" /* set the flags */ |
+ "popfl\n\t" |
+ "pushfl\n\t" /* get the flags again (for return) */ |
+ "popl %0\n\t" |
+ "pushl %1\n\t" /* restore the original flags */ |
+ "popfl\n\t" |
+ : "=r" (changedFlags), |
+ "=r" (originalFlags), |
+ "=r" (flag) |
+ : "2" (flag)); |
+ return changedFlags ^ originalFlags; |
+} |
+ |
+#elif defined(_MSC_VER) |
+ |
+/* |
+ * windows versions of the above assembler |
+ */ |
+#define wcpuid __asm __emit 0fh __asm __emit 0a2h |
+void freebl_cpuid(unsigned long op, unsigned long *Reax, |
+ unsigned long *Rebx, unsigned long *Recx, unsigned long *Redx) |
+{ |
+ unsigned long Leax, Lebx, Lecx, Ledx; |
+ __asm { |
+ pushad |
+ mov eax,op |
+ wcpuid |
+ mov Leax,eax |
+ mov Lebx,ebx |
+ mov Lecx,ecx |
+ mov Ledx,edx |
+ popad |
+ } |
+ *Reax = Leax; |
+ *Rebx = Lebx; |
+ *Recx = Lecx; |
+ *Redx = Ledx; |
+} |
+ |
+static unsigned long changeFlag(unsigned long flag) |
+{ |
+ unsigned long changedFlags, originalFlags; |
+ __asm { |
+ push eax |
+ push ebx |
+ pushfd /* get the flags */ |
+ pop eax |
+ push eax /* save the flags on the stack */ |
+ mov originalFlags,eax /* save the original flags */ |
+ mov ebx,flag |
+ xor eax,ebx /* flip the bit */ |
+ push eax /* set the flags */ |
+ popfd |
+ pushfd /* get the flags again (for return) */ |
+ pop eax |
+ popfd /* restore the original flags */ |
+ mov changedFlags,eax |
+ pop ebx |
+ pop eax |
+ } |
+ return changedFlags ^ originalFlags; |
+} |
+#endif |
+ |
+#endif |
+ |
+#if !defined(AMD_64) |
+#define AC_FLAG 0x40000 |
+#define ID_FLAG 0x200000 |
+ |
+/* 386 processors can't flip the AC_FLAG, intel AP Note AP-485 */ |
+static int is386() |
+{ |
+ return changeFlag(AC_FLAG) == 0; |
+} |
+ |
+/* 486 processors can't flip the ID_FLAG, intel AP Note AP-485 */ |
+static int is486() |
+{ |
+ return changeFlag(ID_FLAG) == 0; |
+} |
+#endif |
+ |
+ |
+/* |
+ * table for Intel Cache. |
+ * See Intel Application Note AP-485 for more information |
+ */ |
+ |
+typedef unsigned char CacheTypeEntry; |
+ |
+typedef enum { |
+ Cache_NONE = 0, |
+ Cache_UNKNOWN = 1, |
+ Cache_TLB = 2, |
+ Cache_TLBi = 3, |
+ Cache_TLBd = 4, |
+ Cache_Trace = 5, |
+ Cache_L1 = 6, |
+ Cache_L1i = 7, |
+ Cache_L1d = 8, |
+ Cache_L2 = 9 , |
+ Cache_L2i = 10 , |
+ Cache_L2d = 11 , |
+ Cache_L3 = 12 , |
+ Cache_L3i = 13, |
+ Cache_L3d = 14 |
+} CacheType; |
+ |
+struct _cache { |
+ CacheTypeEntry type; |
+ unsigned char lineSize; |
+}; |
+static const struct _cache CacheMap[256] = { |
+/* 00 */ {Cache_NONE, 0 }, |
+/* 01 */ {Cache_TLBi, 0 }, |
+/* 02 */ {Cache_TLBi, 0 }, |
+/* 03 */ {Cache_TLBd, 0 }, |
+/* 04 */ {Cache_TLBd, }, |
+/* 05 */ {Cache_UNKNOWN, 0 }, |
+/* 06 */ {Cache_L1i, 32 }, |
+/* 07 */ {Cache_UNKNOWN, 0 }, |
+/* 08 */ {Cache_L1i, 32 }, |
+/* 09 */ {Cache_UNKNOWN, 0 }, |
+/* 0a */ {Cache_L1d, 32 }, |
+/* 0b */ {Cache_UNKNOWN, 0 }, |
+/* 0c */ {Cache_L1d, 32 }, |
+/* 0d */ {Cache_UNKNOWN, 0 }, |
+/* 0e */ {Cache_UNKNOWN, 0 }, |
+/* 0f */ {Cache_UNKNOWN, 0 }, |
+/* 10 */ {Cache_UNKNOWN, 0 }, |
+/* 11 */ {Cache_UNKNOWN, 0 }, |
+/* 12 */ {Cache_UNKNOWN, 0 }, |
+/* 13 */ {Cache_UNKNOWN, 0 }, |
+/* 14 */ {Cache_UNKNOWN, 0 }, |
+/* 15 */ {Cache_UNKNOWN, 0 }, |
+/* 16 */ {Cache_UNKNOWN, 0 }, |
+/* 17 */ {Cache_UNKNOWN, 0 }, |
+/* 18 */ {Cache_UNKNOWN, 0 }, |
+/* 19 */ {Cache_UNKNOWN, 0 }, |
+/* 1a */ {Cache_UNKNOWN, 0 }, |
+/* 1b */ {Cache_UNKNOWN, 0 }, |
+/* 1c */ {Cache_UNKNOWN, 0 }, |
+/* 1d */ {Cache_UNKNOWN, 0 }, |
+/* 1e */ {Cache_UNKNOWN, 0 }, |
+/* 1f */ {Cache_UNKNOWN, 0 }, |
+/* 20 */ {Cache_UNKNOWN, 0 }, |
+/* 21 */ {Cache_UNKNOWN, 0 }, |
+/* 22 */ {Cache_L3, 64 }, |
+/* 23 */ {Cache_L3, 64 }, |
+/* 24 */ {Cache_UNKNOWN, 0 }, |
+/* 25 */ {Cache_L3, 64 }, |
+/* 26 */ {Cache_UNKNOWN, 0 }, |
+/* 27 */ {Cache_UNKNOWN, 0 }, |
+/* 28 */ {Cache_UNKNOWN, 0 }, |
+/* 29 */ {Cache_L3, 64 }, |
+/* 2a */ {Cache_UNKNOWN, 0 }, |
+/* 2b */ {Cache_UNKNOWN, 0 }, |
+/* 2c */ {Cache_L1d, 64 }, |
+/* 2d */ {Cache_UNKNOWN, 0 }, |
+/* 2e */ {Cache_UNKNOWN, 0 }, |
+/* 2f */ {Cache_UNKNOWN, 0 }, |
+/* 30 */ {Cache_L1i, 64 }, |
+/* 31 */ {Cache_UNKNOWN, 0 }, |
+/* 32 */ {Cache_UNKNOWN, 0 }, |
+/* 33 */ {Cache_UNKNOWN, 0 }, |
+/* 34 */ {Cache_UNKNOWN, 0 }, |
+/* 35 */ {Cache_UNKNOWN, 0 }, |
+/* 36 */ {Cache_UNKNOWN, 0 }, |
+/* 37 */ {Cache_UNKNOWN, 0 }, |
+/* 38 */ {Cache_UNKNOWN, 0 }, |
+/* 39 */ {Cache_L2, 64 }, |
+/* 3a */ {Cache_UNKNOWN, 0 }, |
+/* 3b */ {Cache_L2, 64 }, |
+/* 3c */ {Cache_L2, 64 }, |
+/* 3d */ {Cache_UNKNOWN, 0 }, |
+/* 3e */ {Cache_UNKNOWN, 0 }, |
+/* 3f */ {Cache_UNKNOWN, 0 }, |
+/* 40 */ {Cache_L2, 0 }, |
+/* 41 */ {Cache_L2, 32 }, |
+/* 42 */ {Cache_L2, 32 }, |
+/* 43 */ {Cache_L2, 32 }, |
+/* 44 */ {Cache_L2, 32 }, |
+/* 45 */ {Cache_L2, 32 }, |
+/* 46 */ {Cache_UNKNOWN, 0 }, |
+/* 47 */ {Cache_UNKNOWN, 0 }, |
+/* 48 */ {Cache_UNKNOWN, 0 }, |
+/* 49 */ {Cache_UNKNOWN, 0 }, |
+/* 4a */ {Cache_UNKNOWN, 0 }, |
+/* 4b */ {Cache_UNKNOWN, 0 }, |
+/* 4c */ {Cache_UNKNOWN, 0 }, |
+/* 4d */ {Cache_UNKNOWN, 0 }, |
+/* 4e */ {Cache_UNKNOWN, 0 }, |
+/* 4f */ {Cache_UNKNOWN, 0 }, |
+/* 50 */ {Cache_TLBi, 0 }, |
+/* 51 */ {Cache_TLBi, 0 }, |
+/* 52 */ {Cache_TLBi, 0 }, |
+/* 53 */ {Cache_UNKNOWN, 0 }, |
+/* 54 */ {Cache_UNKNOWN, 0 }, |
+/* 55 */ {Cache_UNKNOWN, 0 }, |
+/* 56 */ {Cache_UNKNOWN, 0 }, |
+/* 57 */ {Cache_UNKNOWN, 0 }, |
+/* 58 */ {Cache_UNKNOWN, 0 }, |
+/* 59 */ {Cache_UNKNOWN, 0 }, |
+/* 5a */ {Cache_UNKNOWN, 0 }, |
+/* 5b */ {Cache_TLBd, 0 }, |
+/* 5c */ {Cache_TLBd, 0 }, |
+/* 5d */ {Cache_TLBd, 0 }, |
+/* 5e */ {Cache_UNKNOWN, 0 }, |
+/* 5f */ {Cache_UNKNOWN, 0 }, |
+/* 60 */ {Cache_UNKNOWN, 0 }, |
+/* 61 */ {Cache_UNKNOWN, 0 }, |
+/* 62 */ {Cache_UNKNOWN, 0 }, |
+/* 63 */ {Cache_UNKNOWN, 0 }, |
+/* 64 */ {Cache_UNKNOWN, 0 }, |
+/* 65 */ {Cache_UNKNOWN, 0 }, |
+/* 66 */ {Cache_L1d, 64 }, |
+/* 67 */ {Cache_L1d, 64 }, |
+/* 68 */ {Cache_L1d, 64 }, |
+/* 69 */ {Cache_UNKNOWN, 0 }, |
+/* 6a */ {Cache_UNKNOWN, 0 }, |
+/* 6b */ {Cache_UNKNOWN, 0 }, |
+/* 6c */ {Cache_UNKNOWN, 0 }, |
+/* 6d */ {Cache_UNKNOWN, 0 }, |
+/* 6e */ {Cache_UNKNOWN, 0 }, |
+/* 6f */ {Cache_UNKNOWN, 0 }, |
+/* 70 */ {Cache_Trace, 1 }, |
+/* 71 */ {Cache_Trace, 1 }, |
+/* 72 */ {Cache_Trace, 1 }, |
+/* 73 */ {Cache_UNKNOWN, 0 }, |
+/* 74 */ {Cache_UNKNOWN, 0 }, |
+/* 75 */ {Cache_UNKNOWN, 0 }, |
+/* 76 */ {Cache_UNKNOWN, 0 }, |
+/* 77 */ {Cache_UNKNOWN, 0 }, |
+/* 78 */ {Cache_UNKNOWN, 0 }, |
+/* 79 */ {Cache_L2, 64 }, |
+/* 7a */ {Cache_L2, 64 }, |
+/* 7b */ {Cache_L2, 64 }, |
+/* 7c */ {Cache_L2, 64 }, |
+/* 7d */ {Cache_UNKNOWN, 0 }, |
+/* 7e */ {Cache_UNKNOWN, 0 }, |
+/* 7f */ {Cache_UNKNOWN, 0 }, |
+/* 80 */ {Cache_UNKNOWN, 0 }, |
+/* 81 */ {Cache_UNKNOWN, 0 }, |
+/* 82 */ {Cache_L2, 32 }, |
+/* 83 */ {Cache_L2, 32 }, |
+/* 84 */ {Cache_L2, 32 }, |
+/* 85 */ {Cache_L2, 32 }, |
+/* 86 */ {Cache_L2, 64 }, |
+/* 87 */ {Cache_L2, 64 }, |
+/* 88 */ {Cache_UNKNOWN, 0 }, |
+/* 89 */ {Cache_UNKNOWN, 0 }, |
+/* 8a */ {Cache_UNKNOWN, 0 }, |
+/* 8b */ {Cache_UNKNOWN, 0 }, |
+/* 8c */ {Cache_UNKNOWN, 0 }, |
+/* 8d */ {Cache_UNKNOWN, 0 }, |
+/* 8e */ {Cache_UNKNOWN, 0 }, |
+/* 8f */ {Cache_UNKNOWN, 0 }, |
+/* 90 */ {Cache_UNKNOWN, 0 }, |
+/* 91 */ {Cache_UNKNOWN, 0 }, |
+/* 92 */ {Cache_UNKNOWN, 0 }, |
+/* 93 */ {Cache_UNKNOWN, 0 }, |
+/* 94 */ {Cache_UNKNOWN, 0 }, |
+/* 95 */ {Cache_UNKNOWN, 0 }, |
+/* 96 */ {Cache_UNKNOWN, 0 }, |
+/* 97 */ {Cache_UNKNOWN, 0 }, |
+/* 98 */ {Cache_UNKNOWN, 0 }, |
+/* 99 */ {Cache_UNKNOWN, 0 }, |
+/* 9a */ {Cache_UNKNOWN, 0 }, |
+/* 9b */ {Cache_UNKNOWN, 0 }, |
+/* 9c */ {Cache_UNKNOWN, 0 }, |
+/* 9d */ {Cache_UNKNOWN, 0 }, |
+/* 9e */ {Cache_UNKNOWN, 0 }, |
+/* 9f */ {Cache_UNKNOWN, 0 }, |
+/* a0 */ {Cache_UNKNOWN, 0 }, |
+/* a1 */ {Cache_UNKNOWN, 0 }, |
+/* a2 */ {Cache_UNKNOWN, 0 }, |
+/* a3 */ {Cache_UNKNOWN, 0 }, |
+/* a4 */ {Cache_UNKNOWN, 0 }, |
+/* a5 */ {Cache_UNKNOWN, 0 }, |
+/* a6 */ {Cache_UNKNOWN, 0 }, |
+/* a7 */ {Cache_UNKNOWN, 0 }, |
+/* a8 */ {Cache_UNKNOWN, 0 }, |
+/* a9 */ {Cache_UNKNOWN, 0 }, |
+/* aa */ {Cache_UNKNOWN, 0 }, |
+/* ab */ {Cache_UNKNOWN, 0 }, |
+/* ac */ {Cache_UNKNOWN, 0 }, |
+/* ad */ {Cache_UNKNOWN, 0 }, |
+/* ae */ {Cache_UNKNOWN, 0 }, |
+/* af */ {Cache_UNKNOWN, 0 }, |
+/* b0 */ {Cache_TLBi, 0 }, |
+/* b1 */ {Cache_UNKNOWN, 0 }, |
+/* b2 */ {Cache_UNKNOWN, 0 }, |
+/* b3 */ {Cache_TLBd, 0 }, |
+/* b4 */ {Cache_UNKNOWN, 0 }, |
+/* b5 */ {Cache_UNKNOWN, 0 }, |
+/* b6 */ {Cache_UNKNOWN, 0 }, |
+/* b7 */ {Cache_UNKNOWN, 0 }, |
+/* b8 */ {Cache_UNKNOWN, 0 }, |
+/* b9 */ {Cache_UNKNOWN, 0 }, |
+/* ba */ {Cache_UNKNOWN, 0 }, |
+/* bb */ {Cache_UNKNOWN, 0 }, |
+/* bc */ {Cache_UNKNOWN, 0 }, |
+/* bd */ {Cache_UNKNOWN, 0 }, |
+/* be */ {Cache_UNKNOWN, 0 }, |
+/* bf */ {Cache_UNKNOWN, 0 }, |
+/* c0 */ {Cache_UNKNOWN, 0 }, |
+/* c1 */ {Cache_UNKNOWN, 0 }, |
+/* c2 */ {Cache_UNKNOWN, 0 }, |
+/* c3 */ {Cache_UNKNOWN, 0 }, |
+/* c4 */ {Cache_UNKNOWN, 0 }, |
+/* c5 */ {Cache_UNKNOWN, 0 }, |
+/* c6 */ {Cache_UNKNOWN, 0 }, |
+/* c7 */ {Cache_UNKNOWN, 0 }, |
+/* c8 */ {Cache_UNKNOWN, 0 }, |
+/* c9 */ {Cache_UNKNOWN, 0 }, |
+/* ca */ {Cache_UNKNOWN, 0 }, |
+/* cb */ {Cache_UNKNOWN, 0 }, |
+/* cc */ {Cache_UNKNOWN, 0 }, |
+/* cd */ {Cache_UNKNOWN, 0 }, |
+/* ce */ {Cache_UNKNOWN, 0 }, |
+/* cf */ {Cache_UNKNOWN, 0 }, |
+/* d0 */ {Cache_UNKNOWN, 0 }, |
+/* d1 */ {Cache_UNKNOWN, 0 }, |
+/* d2 */ {Cache_UNKNOWN, 0 }, |
+/* d3 */ {Cache_UNKNOWN, 0 }, |
+/* d4 */ {Cache_UNKNOWN, 0 }, |
+/* d5 */ {Cache_UNKNOWN, 0 }, |
+/* d6 */ {Cache_UNKNOWN, 0 }, |
+/* d7 */ {Cache_UNKNOWN, 0 }, |
+/* d8 */ {Cache_UNKNOWN, 0 }, |
+/* d9 */ {Cache_UNKNOWN, 0 }, |
+/* da */ {Cache_UNKNOWN, 0 }, |
+/* db */ {Cache_UNKNOWN, 0 }, |
+/* dc */ {Cache_UNKNOWN, 0 }, |
+/* dd */ {Cache_UNKNOWN, 0 }, |
+/* de */ {Cache_UNKNOWN, 0 }, |
+/* df */ {Cache_UNKNOWN, 0 }, |
+/* e0 */ {Cache_UNKNOWN, 0 }, |
+/* e1 */ {Cache_UNKNOWN, 0 }, |
+/* e2 */ {Cache_UNKNOWN, 0 }, |
+/* e3 */ {Cache_UNKNOWN, 0 }, |
+/* e4 */ {Cache_UNKNOWN, 0 }, |
+/* e5 */ {Cache_UNKNOWN, 0 }, |
+/* e6 */ {Cache_UNKNOWN, 0 }, |
+/* e7 */ {Cache_UNKNOWN, 0 }, |
+/* e8 */ {Cache_UNKNOWN, 0 }, |
+/* e9 */ {Cache_UNKNOWN, 0 }, |
+/* ea */ {Cache_UNKNOWN, 0 }, |
+/* eb */ {Cache_UNKNOWN, 0 }, |
+/* ec */ {Cache_UNKNOWN, 0 }, |
+/* ed */ {Cache_UNKNOWN, 0 }, |
+/* ee */ {Cache_UNKNOWN, 0 }, |
+/* ef */ {Cache_UNKNOWN, 0 }, |
+/* f0 */ {Cache_UNKNOWN, 0 }, |
+/* f1 */ {Cache_UNKNOWN, 0 }, |
+/* f2 */ {Cache_UNKNOWN, 0 }, |
+/* f3 */ {Cache_UNKNOWN, 0 }, |
+/* f4 */ {Cache_UNKNOWN, 0 }, |
+/* f5 */ {Cache_UNKNOWN, 0 }, |
+/* f6 */ {Cache_UNKNOWN, 0 }, |
+/* f7 */ {Cache_UNKNOWN, 0 }, |
+/* f8 */ {Cache_UNKNOWN, 0 }, |
+/* f9 */ {Cache_UNKNOWN, 0 }, |
+/* fa */ {Cache_UNKNOWN, 0 }, |
+/* fb */ {Cache_UNKNOWN, 0 }, |
+/* fc */ {Cache_UNKNOWN, 0 }, |
+/* fd */ {Cache_UNKNOWN, 0 }, |
+/* fe */ {Cache_UNKNOWN, 0 }, |
+/* ff */ {Cache_UNKNOWN, 0 } |
+}; |
+ |
+ |
+/* |
+ * use the above table to determine the CacheEntryLineSize. |
+ */ |
+static void |
+getIntelCacheEntryLineSize(unsigned long val, int *level, |
+ unsigned long *lineSize) |
+{ |
+ CacheType type; |
+ |
+ type = CacheMap[val].type; |
+ /* only interested in data caches */ |
+ /* NOTE val = 0x40 is a special value that means no L2 or L3 cache. |
+ * this data check has the side effect of rejecting that entry. If |
+ * that wasn't the case, we could have to reject it explicitly */ |
+ if (CacheMap[val].lineSize == 0) { |
+ return; |
+ } |
+ /* look at the caches, skip types we aren't interested in. |
+ * if we already have a value for a lower level cache, skip the |
+ * current entry */ |
+ if ((type == Cache_L1)|| (type == Cache_L1d)) { |
+ *level = 1; |
+ *lineSize = CacheMap[val].lineSize; |
+ } else if ((*level >= 2) && ((type == Cache_L2) || (type == Cache_L2d))) { |
+ *level = 2; |
+ *lineSize = CacheMap[val].lineSize; |
+ } else if ((*level >= 3) && ((type == Cache_L3) || (type == Cache_L3d))) { |
+ *level = 3; |
+ *lineSize = CacheMap[val].lineSize; |
+ } |
+ return; |
+} |
+ |
+ |
+static void |
+getIntelRegisterCacheLineSize(unsigned long val, |
+ int *level, unsigned long *lineSize) |
+{ |
+ getIntelCacheEntryLineSize(val >> 24 & 0xff, level, lineSize); |
+ getIntelCacheEntryLineSize(val >> 16 & 0xff, level, lineSize); |
+ getIntelCacheEntryLineSize(val >> 8 & 0xff, level, lineSize); |
+ getIntelCacheEntryLineSize(val & 0xff, level, lineSize); |
+} |
+ |
+/* |
+ * returns '0' if no recognized cache is found, or if the cache |
+ * information is supported by this processor |
+ */ |
+static unsigned long |
+getIntelCacheLineSize(int cpuidLevel) |
+{ |
+ int level = 4; |
+ unsigned long lineSize = 0; |
+ unsigned long eax, ebx, ecx, edx; |
+ int repeat, count; |
+ |
+ if (cpuidLevel < 2) { |
+ return 0; |
+ } |
+ |
+ /* command '2' of the cpuid is intel's cache info call. Each byte of the |
+ * 4 registers contain a potential descriptor for the cache. The CacheMap |
+ * table maps the cache entry with the processor cache. Register 'al' |
+ * contains a count value that cpuid '2' needs to be called in order to |
+ * find all the cache descriptors. Only registers with the high bit set |
+ * to 'zero' have valid descriptors. This code loops through all the |
+ * required calls to cpuid '2' and passes any valid descriptors it finds |
+ * to the getIntelRegisterCacheLineSize code, which breaks the registers |
+ * down into their component descriptors. In the end the lineSize of the |
+ * lowest level cache data cache is returned. */ |
+ freebl_cpuid(2, &eax, &ebx, &ecx, &edx); |
+ repeat = eax & 0xf; |
+ for (count = 0; count < repeat; count++) { |
+ if ((eax & 0x80000000) == 0) { |
+ getIntelRegisterCacheLineSize(eax & 0xffffff00, &level, &lineSize); |
+ } |
+ if ((ebx & 0x80000000) == 0) { |
+ getIntelRegisterCacheLineSize(ebx, &level, &lineSize); |
+ } |
+ if ((ecx & 0x80000000) == 0) { |
+ getIntelRegisterCacheLineSize(ecx, &level, &lineSize); |
+ } |
+ if ((edx & 0x80000000) == 0) { |
+ getIntelRegisterCacheLineSize(edx, &level, &lineSize); |
+ } |
+ if (count+1 != repeat) { |
+ freebl_cpuid(2, &eax, &ebx, &ecx, &edx); |
+ } |
+ } |
+ return lineSize; |
+} |
+ |
+/* |
+ * returns '0' if the cache info is not supported by this processor. |
+ * This is based on the AMD extended cache commands for cpuid. |
+ * (see "AMD Processor Recognition Application Note" Publication 20734). |
+ * Some other processors use the identical scheme. |
+ * (see "Processor Recognition, Transmeta Corporation"). |
+ */ |
+static unsigned long |
+getOtherCacheLineSize(unsigned long cpuidLevel) |
+{ |
+ unsigned long lineSize = 0; |
+ unsigned long eax, ebx, ecx, edx; |
+ |
+ /* get the Extended CPUID level */ |
+ freebl_cpuid(0x80000000, &eax, &ebx, &ecx, &edx); |
+ cpuidLevel = eax; |
+ |
+ if (cpuidLevel >= 0x80000005) { |
+ freebl_cpuid(0x80000005, &eax, &ebx, &ecx, &edx); |
+ lineSize = ecx & 0xff; /* line Size, L1 Data Cache */ |
+ } |
+ return lineSize; |
+} |
+ |
+static const char * const manMap[] = { |
+#define INTEL 0 |
+ "GenuineIntel", |
+#define AMD 1 |
+ "AuthenticAMD", |
+#define CYRIX 2 |
+ "CyrixInstead", |
+#define CENTAUR 2 |
+ "CentaurHauls", |
+#define NEXGEN 3 |
+ "NexGenDriven", |
+#define TRANSMETA 4 |
+ "GenuineTMx86", |
+#define RISE 5 |
+ "RiseRiseRise", |
+#define UMC 6 |
+ "UMC UMC UMC ", |
+#define SIS 7 |
+ "Sis Sis Sis ", |
+#define NATIONAL 8 |
+ "Geode by NSC", |
+}; |
+ |
+static const int n_manufacturers = sizeof(manMap)/sizeof(manMap[0]); |
+ |
+ |
+#define MAN_UNKNOWN 9 |
+ |
+#if !defined(AMD_64) |
+#define SSE2_FLAG (1<<26) |
+unsigned long |
+s_mpi_is_sse2() |
+{ |
+ unsigned long eax, ebx, ecx, edx; |
+ int manufacturer = MAN_UNKNOWN; |
+ int i; |
+ char string[13]; |
+ |
+ if (is386() || is486()) { |
+ return 0; |
+ } |
+ freebl_cpuid(0, &eax, &ebx, &ecx, &edx); |
+ *(int *)string = ebx; |
+ *(int *)&string[4] = edx; |
+ *(int *)&string[8] = ecx; |
+ string[12] = 0; |
+ |
+ /* has no SSE2 extensions */ |
+ if (eax == 0) { |
+ return 0; |
+ } |
+ |
+ for (i=0; i < n_manufacturers; i++) { |
+ if ( strcmp(manMap[i],string) == 0) { |
+ manufacturer = i; |
+ break; |
+ } |
+ } |
+ |
+ freebl_cpuid(1,&eax,&ebx,&ecx,&edx); |
+ return (edx & SSE2_FLAG) == SSE2_FLAG; |
+} |
+#endif |
+ |
+unsigned long |
+s_mpi_getProcessorLineSize() |
+{ |
+ unsigned long eax, ebx, ecx, edx; |
+ unsigned long cpuidLevel; |
+ unsigned long cacheLineSize = 0; |
+ int manufacturer = MAN_UNKNOWN; |
+ int i; |
+ char string[65]; |
+ |
+#if !defined(AMD_64) |
+ if (is386()) { |
+ return 0; /* 386 had no cache */ |
+ } if (is486()) { |
+ return 32; /* really? need more info */ |
+ } |
+#endif |
+ |
+ /* Pentium, cpuid command is available */ |
+ freebl_cpuid(0, &eax, &ebx, &ecx, &edx); |
+ cpuidLevel = eax; |
+ *(int *)string = ebx; |
+ *(int *)&string[4] = edx; |
+ *(int *)&string[8] = ecx; |
+ string[12] = 0; |
+ |
+ manufacturer = MAN_UNKNOWN; |
+ for (i=0; i < n_manufacturers; i++) { |
+ if ( strcmp(manMap[i],string) == 0) { |
+ manufacturer = i; |
+ } |
+ } |
+ |
+ if (manufacturer == INTEL) { |
+ cacheLineSize = getIntelCacheLineSize(cpuidLevel); |
+ } else { |
+ cacheLineSize = getOtherCacheLineSize(cpuidLevel); |
+ } |
+ /* doesn't support cache info based on cpuid. This means |
+ * an old pentium class processor, which have cache lines of |
+ * 32. If we learn differently, we can use a switch based on |
+ * the Manufacturer id */ |
+ if (cacheLineSize == 0) { |
+ cacheLineSize = 32; |
+ } |
+ return cacheLineSize; |
+} |
+#define MPI_GET_PROCESSOR_LINE_SIZE_DEFINED 1 |
+#endif |
+ |
+#if defined(__ppc64__) |
+/* |
+ * Sigh, The PPC has some really nice features to help us determine cache |
+ * size, since it had lots of direct control functions to do so. The POWER |
+ * processor even has an instruction to do this, but it was dropped in |
+ * PowerPC. Unfortunately most of them are not available in user mode. |
+ * |
+ * The dcbz function would be a great way to determine cache line size except |
+ * 1) it only works on write-back memory (it throws an exception otherwise), |
+ * and 2) because so many mac programs 'knew' the processor cache size was |
+ * 32 bytes, they used this instruction as a fast 'zero 32 bytes'. Now the new |
+ * G5 processor has 128 byte cache, but dcbz only clears 32 bytes to keep |
+ * these programs happy. dcbzl work if 64 bit instructions are supported. |
+ * If you know 64 bit instructions are supported, and that stack is |
+ * write-back, you can use this code. |
+ */ |
+#include "memory.h" |
+ |
+/* clear the cache line that contains 'array' */ |
+static inline void dcbzl(char *array) |
+{ |
+ register char *a asm("r2") = array; |
+ __asm__ __volatile__( "dcbzl %0,r0" : "=r" (a): "0"(a) ); |
+} |
+ |
+ |
+#define PPC_DO_ALIGN(x,y) ((char *)\ |
+ ((((long long) (x))+((y)-1))&~((y)-1))) |
+ |
+#define PPC_MAX_LINE_SIZE 256 |
+unsigned long |
+s_mpi_getProcessorLineSize() |
+{ |
+ char testArray[2*PPC_MAX_LINE_SIZE+1]; |
+ char *test; |
+ int i; |
+ |
+ /* align the array on a maximum line size boundary, so we |
+ * know we are starting to clear from the first address */ |
+ test = PPC_DO_ALIGN(testArray, PPC_MAX_LINE_SIZE); |
+ /* set all the values to 1's */ |
+ memset(test, 0xff, PPC_MAX_LINE_SIZE); |
+ /* clear one cache block starting at 'test' */ |
+ dcbzl(test); |
+ |
+ /* find the size of the cleared area, that's our block size */ |
+ for (i=PPC_MAX_LINE_SIZE; i != 0; i = i/2) { |
+ if (test[i-1] == 0) { |
+ return i; |
+ } |
+ } |
+ return 0; |
+} |
+ |
+#define MPI_GET_PROCESSOR_LINE_SIZE_DEFINED 1 |
+#endif |
+ |
+ |
+/* |
+ * put other processor and platform specific cache code here |
+ * return the smallest cache line size in bytes on the processor |
+ * (usually the L1 cache). If the OS has a call, this would be |
+ * a greate place to put it. |
+ * |
+ * If there is no cache, return 0; |
+ * |
+ * define MPI_GET_PROCESSOR_LINE_SIZE_DEFINED so the generic functions |
+ * below aren't compiled. |
+ * |
+ */ |
+ |
+ |
+/* target.mk can define MPI_CACHE_LINE_SIZE if it's common for the family or |
+ * OS */ |
+#if defined(MPI_CACHE_LINE_SIZE) && !defined(MPI_GET_PROCESSOR_LINE_SIZE_DEFINED) |
+ |
+unsigned long |
+s_mpi_getProcessorLineSize() |
+{ |
+ return MPI_CACHE_LINE_SIZE; |
+} |
+#define MPI_GET_PROCESSOR_LINE_SIZE_DEFINED 1 |
+#endif |
+ |
+ |
+/* If no way to get the processor cache line size has been defined, assume |
+ * it's 32 bytes (most common value, does not significantly impact performance) |
+ */ |
+#ifndef MPI_GET_PROCESSOR_LINE_SIZE_DEFINED |
+unsigned long |
+s_mpi_getProcessorLineSize() |
+{ |
+ return 32; |
+} |
+#endif |
+ |
+#ifdef TEST_IT |
+#include <stdio.h> |
+ |
+main() |
+{ |
+ printf("line size = %d\n", s_mpi_getProcessorLineSize()); |
+} |
+#endif |