Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(770)

Side by Side Diff: nss/lib/freebl/mpi/mpcpucache.c

Issue 2078763002: Delete bundled copy of NSS and replace with README. (Closed) Base URL: https://chromium.googlesource.com/chromium/deps/nss@master
Patch Set: Delete bundled copy of NSS and replace with README. Created 4 years, 6 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
« no previous file with comments | « nss/lib/freebl/mpi/mp_gf2m-priv.h ('k') | nss/lib/freebl/mpi/mpi.h » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
(Empty)
1 /* This Source Code Form is subject to the terms of the Mozilla Public
2 * License, v. 2.0. If a copy of the MPL was not distributed with this
3 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
4
5 #include "mpi.h"
6 #include "prtypes.h"
7
8 /*
9 * This file implements a single function: s_mpi_getProcessorLineSize();
10 * s_mpi_getProcessorLineSize() returns the size in bytes of the cache line
11 * if a cache exists, or zero if there is no cache. If more than one
12 * cache line exists, it should return the smallest line size (which is
13 * usually the L1 cache).
14 *
15 * mp_modexp uses this information to make sure that private key information
16 * isn't being leaked through the cache.
17 *
18 * Currently the file returns good data for most modern x86 processors, and
19 * reasonable data on 64-bit ppc processors. All other processors are assumed
20 * to have a cache line size of 32 bytes unless modified by target.mk.
21 *
22 */
23
24 #if defined(i386) || defined(__i386) || defined(__X86__) || defined (_M_IX86) || defined(__x86_64__) || defined(__x86_64) || defined(_M_AMD64)
25 /* X86 processors have special instructions that tell us about the cache */
26 #include "string.h"
27
28 #if defined(__x86_64__) || defined(__x86_64) || defined(_M_AMD64)
29 #define AMD_64 1
30 #endif
31
32 /* Generic CPUID function */
33 #if defined(AMD_64)
34
35 #if defined(__GNUC__)
36
37 void freebl_cpuid(unsigned long op, unsigned long *eax,
38 unsigned long *ebx, unsigned long *ecx,
39 unsigned long *edx)
40 {
41 __asm__("cpuid\n\t"
42 : "=a" (*eax),
43 "=b" (*ebx),
44 "=c" (*ecx),
45 "=d" (*edx)
46 : "0" (op));
47 }
48
49 #elif defined(_MSC_VER)
50
51 #include <intrin.h>
52
53 void freebl_cpuid(unsigned long op, unsigned long *eax,
54 unsigned long *ebx, unsigned long *ecx,
55 unsigned long *edx)
56 {
57 int intrinsic_out[4];
58
59 __cpuid(intrinsic_out, op);
60 *eax = intrinsic_out[0];
61 *ebx = intrinsic_out[1];
62 *ecx = intrinsic_out[2];
63 *edx = intrinsic_out[3];
64 }
65
66 #endif
67
68 #else /* !defined(AMD_64) */
69
70 /* x86 */
71
72 #if defined(__GNUC__)
73 void freebl_cpuid(unsigned long op, unsigned long *eax,
74 unsigned long *ebx, unsigned long *ecx,
75 unsigned long *edx)
76 {
77 /* sigh GCC isn't smart enough to save the ebx PIC register on it's own
78 * in this case, so do it by hand. Use edi to store ebx and pass the
79 * value returned in ebx from cpuid through edi. */
80 __asm__("mov %%ebx,%%edi\n\t"
81 "cpuid\n\t"
82 "xchgl %%ebx,%%edi\n\t"
83 : "=a" (*eax),
84 "=D" (*ebx),
85 "=c" (*ecx),
86 "=d" (*edx)
87 : "0" (op));
88 }
89
90 /*
91 * try flipping a processor flag to determine CPU type
92 */
93 static unsigned long changeFlag(unsigned long flag)
94 {
95 unsigned long changedFlags, originalFlags;
96 __asm__("pushfl\n\t" /* get the flags */
97 "popl %0\n\t"
98 "movl %0,%1\n\t" /* save the original flags */
99 "xorl %2,%0\n\t" /* flip the bit */
100 "pushl %0\n\t" /* set the flags */
101 "popfl\n\t"
102 "pushfl\n\t" /* get the flags again (for return) */
103 "popl %0\n\t"
104 "pushl %1\n\t" /* restore the original flags */
105 "popfl\n\t"
106 : "=r" (changedFlags),
107 "=r" (originalFlags),
108 "=r" (flag)
109 : "2" (flag));
110 return changedFlags ^ originalFlags;
111 }
112
113 #elif defined(_MSC_VER)
114
115 /*
116 * windows versions of the above assembler
117 */
118 #define wcpuid __asm __emit 0fh __asm __emit 0a2h
119 void freebl_cpuid(unsigned long op, unsigned long *Reax,
120 unsigned long *Rebx, unsigned long *Recx, unsigned long *Redx)
121 {
122 unsigned long Leax, Lebx, Lecx, Ledx;
123 __asm {
124 pushad
125 mov eax,op
126 wcpuid
127 mov Leax,eax
128 mov Lebx,ebx
129 mov Lecx,ecx
130 mov Ledx,edx
131 popad
132 }
133 *Reax = Leax;
134 *Rebx = Lebx;
135 *Recx = Lecx;
136 *Redx = Ledx;
137 }
138
139 static unsigned long changeFlag(unsigned long flag)
140 {
141 unsigned long changedFlags, originalFlags;
142 __asm {
143 push eax
144 push ebx
145 pushfd /* get the flags */
146 pop eax
147 push eax /* save the flags on the stack */
148 mov originalFlags,eax /* save the original flags */
149 mov ebx,flag
150 xor eax,ebx /* flip the bit */
151 push eax /* set the flags */
152 popfd
153 pushfd /* get the flags again (for return) */
154 pop eax
155 popfd /* restore the original flags */
156 mov changedFlags,eax
157 pop ebx
158 pop eax
159 }
160 return changedFlags ^ originalFlags;
161 }
162 #endif
163
164 #endif
165
166 #if !defined(AMD_64)
167 #define AC_FLAG 0x40000
168 #define ID_FLAG 0x200000
169
170 /* 386 processors can't flip the AC_FLAG, intel AP Note AP-485 */
171 static int is386()
172 {
173 return changeFlag(AC_FLAG) == 0;
174 }
175
176 /* 486 processors can't flip the ID_FLAG, intel AP Note AP-485 */
177 static int is486()
178 {
179 return changeFlag(ID_FLAG) == 0;
180 }
181 #endif
182
183
184 /*
185 * table for Intel Cache.
186 * See Intel Application Note AP-485 for more information
187 */
188
189 typedef unsigned char CacheTypeEntry;
190
191 typedef enum {
192 Cache_NONE = 0,
193 Cache_UNKNOWN = 1,
194 Cache_TLB = 2,
195 Cache_TLBi = 3,
196 Cache_TLBd = 4,
197 Cache_Trace = 5,
198 Cache_L1 = 6,
199 Cache_L1i = 7,
200 Cache_L1d = 8,
201 Cache_L2 = 9 ,
202 Cache_L2i = 10 ,
203 Cache_L2d = 11 ,
204 Cache_L3 = 12 ,
205 Cache_L3i = 13,
206 Cache_L3d = 14
207 } CacheType;
208
209 struct _cache {
210 CacheTypeEntry type;
211 unsigned char lineSize;
212 };
213 static const struct _cache CacheMap[256] = {
214 /* 00 */ {Cache_NONE, 0 },
215 /* 01 */ {Cache_TLBi, 0 },
216 /* 02 */ {Cache_TLBi, 0 },
217 /* 03 */ {Cache_TLBd, 0 },
218 /* 04 */ {Cache_TLBd, },
219 /* 05 */ {Cache_UNKNOWN, 0 },
220 /* 06 */ {Cache_L1i, 32 },
221 /* 07 */ {Cache_UNKNOWN, 0 },
222 /* 08 */ {Cache_L1i, 32 },
223 /* 09 */ {Cache_UNKNOWN, 0 },
224 /* 0a */ {Cache_L1d, 32 },
225 /* 0b */ {Cache_UNKNOWN, 0 },
226 /* 0c */ {Cache_L1d, 32 },
227 /* 0d */ {Cache_UNKNOWN, 0 },
228 /* 0e */ {Cache_UNKNOWN, 0 },
229 /* 0f */ {Cache_UNKNOWN, 0 },
230 /* 10 */ {Cache_UNKNOWN, 0 },
231 /* 11 */ {Cache_UNKNOWN, 0 },
232 /* 12 */ {Cache_UNKNOWN, 0 },
233 /* 13 */ {Cache_UNKNOWN, 0 },
234 /* 14 */ {Cache_UNKNOWN, 0 },
235 /* 15 */ {Cache_UNKNOWN, 0 },
236 /* 16 */ {Cache_UNKNOWN, 0 },
237 /* 17 */ {Cache_UNKNOWN, 0 },
238 /* 18 */ {Cache_UNKNOWN, 0 },
239 /* 19 */ {Cache_UNKNOWN, 0 },
240 /* 1a */ {Cache_UNKNOWN, 0 },
241 /* 1b */ {Cache_UNKNOWN, 0 },
242 /* 1c */ {Cache_UNKNOWN, 0 },
243 /* 1d */ {Cache_UNKNOWN, 0 },
244 /* 1e */ {Cache_UNKNOWN, 0 },
245 /* 1f */ {Cache_UNKNOWN, 0 },
246 /* 20 */ {Cache_UNKNOWN, 0 },
247 /* 21 */ {Cache_UNKNOWN, 0 },
248 /* 22 */ {Cache_L3, 64 },
249 /* 23 */ {Cache_L3, 64 },
250 /* 24 */ {Cache_UNKNOWN, 0 },
251 /* 25 */ {Cache_L3, 64 },
252 /* 26 */ {Cache_UNKNOWN, 0 },
253 /* 27 */ {Cache_UNKNOWN, 0 },
254 /* 28 */ {Cache_UNKNOWN, 0 },
255 /* 29 */ {Cache_L3, 64 },
256 /* 2a */ {Cache_UNKNOWN, 0 },
257 /* 2b */ {Cache_UNKNOWN, 0 },
258 /* 2c */ {Cache_L1d, 64 },
259 /* 2d */ {Cache_UNKNOWN, 0 },
260 /* 2e */ {Cache_UNKNOWN, 0 },
261 /* 2f */ {Cache_UNKNOWN, 0 },
262 /* 30 */ {Cache_L1i, 64 },
263 /* 31 */ {Cache_UNKNOWN, 0 },
264 /* 32 */ {Cache_UNKNOWN, 0 },
265 /* 33 */ {Cache_UNKNOWN, 0 },
266 /* 34 */ {Cache_UNKNOWN, 0 },
267 /* 35 */ {Cache_UNKNOWN, 0 },
268 /* 36 */ {Cache_UNKNOWN, 0 },
269 /* 37 */ {Cache_UNKNOWN, 0 },
270 /* 38 */ {Cache_UNKNOWN, 0 },
271 /* 39 */ {Cache_L2, 64 },
272 /* 3a */ {Cache_UNKNOWN, 0 },
273 /* 3b */ {Cache_L2, 64 },
274 /* 3c */ {Cache_L2, 64 },
275 /* 3d */ {Cache_UNKNOWN, 0 },
276 /* 3e */ {Cache_UNKNOWN, 0 },
277 /* 3f */ {Cache_UNKNOWN, 0 },
278 /* 40 */ {Cache_L2, 0 },
279 /* 41 */ {Cache_L2, 32 },
280 /* 42 */ {Cache_L2, 32 },
281 /* 43 */ {Cache_L2, 32 },
282 /* 44 */ {Cache_L2, 32 },
283 /* 45 */ {Cache_L2, 32 },
284 /* 46 */ {Cache_UNKNOWN, 0 },
285 /* 47 */ {Cache_UNKNOWN, 0 },
286 /* 48 */ {Cache_UNKNOWN, 0 },
287 /* 49 */ {Cache_UNKNOWN, 0 },
288 /* 4a */ {Cache_UNKNOWN, 0 },
289 /* 4b */ {Cache_UNKNOWN, 0 },
290 /* 4c */ {Cache_UNKNOWN, 0 },
291 /* 4d */ {Cache_UNKNOWN, 0 },
292 /* 4e */ {Cache_UNKNOWN, 0 },
293 /* 4f */ {Cache_UNKNOWN, 0 },
294 /* 50 */ {Cache_TLBi, 0 },
295 /* 51 */ {Cache_TLBi, 0 },
296 /* 52 */ {Cache_TLBi, 0 },
297 /* 53 */ {Cache_UNKNOWN, 0 },
298 /* 54 */ {Cache_UNKNOWN, 0 },
299 /* 55 */ {Cache_UNKNOWN, 0 },
300 /* 56 */ {Cache_UNKNOWN, 0 },
301 /* 57 */ {Cache_UNKNOWN, 0 },
302 /* 58 */ {Cache_UNKNOWN, 0 },
303 /* 59 */ {Cache_UNKNOWN, 0 },
304 /* 5a */ {Cache_UNKNOWN, 0 },
305 /* 5b */ {Cache_TLBd, 0 },
306 /* 5c */ {Cache_TLBd, 0 },
307 /* 5d */ {Cache_TLBd, 0 },
308 /* 5e */ {Cache_UNKNOWN, 0 },
309 /* 5f */ {Cache_UNKNOWN, 0 },
310 /* 60 */ {Cache_UNKNOWN, 0 },
311 /* 61 */ {Cache_UNKNOWN, 0 },
312 /* 62 */ {Cache_UNKNOWN, 0 },
313 /* 63 */ {Cache_UNKNOWN, 0 },
314 /* 64 */ {Cache_UNKNOWN, 0 },
315 /* 65 */ {Cache_UNKNOWN, 0 },
316 /* 66 */ {Cache_L1d, 64 },
317 /* 67 */ {Cache_L1d, 64 },
318 /* 68 */ {Cache_L1d, 64 },
319 /* 69 */ {Cache_UNKNOWN, 0 },
320 /* 6a */ {Cache_UNKNOWN, 0 },
321 /* 6b */ {Cache_UNKNOWN, 0 },
322 /* 6c */ {Cache_UNKNOWN, 0 },
323 /* 6d */ {Cache_UNKNOWN, 0 },
324 /* 6e */ {Cache_UNKNOWN, 0 },
325 /* 6f */ {Cache_UNKNOWN, 0 },
326 /* 70 */ {Cache_Trace, 1 },
327 /* 71 */ {Cache_Trace, 1 },
328 /* 72 */ {Cache_Trace, 1 },
329 /* 73 */ {Cache_UNKNOWN, 0 },
330 /* 74 */ {Cache_UNKNOWN, 0 },
331 /* 75 */ {Cache_UNKNOWN, 0 },
332 /* 76 */ {Cache_UNKNOWN, 0 },
333 /* 77 */ {Cache_UNKNOWN, 0 },
334 /* 78 */ {Cache_UNKNOWN, 0 },
335 /* 79 */ {Cache_L2, 64 },
336 /* 7a */ {Cache_L2, 64 },
337 /* 7b */ {Cache_L2, 64 },
338 /* 7c */ {Cache_L2, 64 },
339 /* 7d */ {Cache_UNKNOWN, 0 },
340 /* 7e */ {Cache_UNKNOWN, 0 },
341 /* 7f */ {Cache_UNKNOWN, 0 },
342 /* 80 */ {Cache_UNKNOWN, 0 },
343 /* 81 */ {Cache_UNKNOWN, 0 },
344 /* 82 */ {Cache_L2, 32 },
345 /* 83 */ {Cache_L2, 32 },
346 /* 84 */ {Cache_L2, 32 },
347 /* 85 */ {Cache_L2, 32 },
348 /* 86 */ {Cache_L2, 64 },
349 /* 87 */ {Cache_L2, 64 },
350 /* 88 */ {Cache_UNKNOWN, 0 },
351 /* 89 */ {Cache_UNKNOWN, 0 },
352 /* 8a */ {Cache_UNKNOWN, 0 },
353 /* 8b */ {Cache_UNKNOWN, 0 },
354 /* 8c */ {Cache_UNKNOWN, 0 },
355 /* 8d */ {Cache_UNKNOWN, 0 },
356 /* 8e */ {Cache_UNKNOWN, 0 },
357 /* 8f */ {Cache_UNKNOWN, 0 },
358 /* 90 */ {Cache_UNKNOWN, 0 },
359 /* 91 */ {Cache_UNKNOWN, 0 },
360 /* 92 */ {Cache_UNKNOWN, 0 },
361 /* 93 */ {Cache_UNKNOWN, 0 },
362 /* 94 */ {Cache_UNKNOWN, 0 },
363 /* 95 */ {Cache_UNKNOWN, 0 },
364 /* 96 */ {Cache_UNKNOWN, 0 },
365 /* 97 */ {Cache_UNKNOWN, 0 },
366 /* 98 */ {Cache_UNKNOWN, 0 },
367 /* 99 */ {Cache_UNKNOWN, 0 },
368 /* 9a */ {Cache_UNKNOWN, 0 },
369 /* 9b */ {Cache_UNKNOWN, 0 },
370 /* 9c */ {Cache_UNKNOWN, 0 },
371 /* 9d */ {Cache_UNKNOWN, 0 },
372 /* 9e */ {Cache_UNKNOWN, 0 },
373 /* 9f */ {Cache_UNKNOWN, 0 },
374 /* a0 */ {Cache_UNKNOWN, 0 },
375 /* a1 */ {Cache_UNKNOWN, 0 },
376 /* a2 */ {Cache_UNKNOWN, 0 },
377 /* a3 */ {Cache_UNKNOWN, 0 },
378 /* a4 */ {Cache_UNKNOWN, 0 },
379 /* a5 */ {Cache_UNKNOWN, 0 },
380 /* a6 */ {Cache_UNKNOWN, 0 },
381 /* a7 */ {Cache_UNKNOWN, 0 },
382 /* a8 */ {Cache_UNKNOWN, 0 },
383 /* a9 */ {Cache_UNKNOWN, 0 },
384 /* aa */ {Cache_UNKNOWN, 0 },
385 /* ab */ {Cache_UNKNOWN, 0 },
386 /* ac */ {Cache_UNKNOWN, 0 },
387 /* ad */ {Cache_UNKNOWN, 0 },
388 /* ae */ {Cache_UNKNOWN, 0 },
389 /* af */ {Cache_UNKNOWN, 0 },
390 /* b0 */ {Cache_TLBi, 0 },
391 /* b1 */ {Cache_UNKNOWN, 0 },
392 /* b2 */ {Cache_UNKNOWN, 0 },
393 /* b3 */ {Cache_TLBd, 0 },
394 /* b4 */ {Cache_UNKNOWN, 0 },
395 /* b5 */ {Cache_UNKNOWN, 0 },
396 /* b6 */ {Cache_UNKNOWN, 0 },
397 /* b7 */ {Cache_UNKNOWN, 0 },
398 /* b8 */ {Cache_UNKNOWN, 0 },
399 /* b9 */ {Cache_UNKNOWN, 0 },
400 /* ba */ {Cache_UNKNOWN, 0 },
401 /* bb */ {Cache_UNKNOWN, 0 },
402 /* bc */ {Cache_UNKNOWN, 0 },
403 /* bd */ {Cache_UNKNOWN, 0 },
404 /* be */ {Cache_UNKNOWN, 0 },
405 /* bf */ {Cache_UNKNOWN, 0 },
406 /* c0 */ {Cache_UNKNOWN, 0 },
407 /* c1 */ {Cache_UNKNOWN, 0 },
408 /* c2 */ {Cache_UNKNOWN, 0 },
409 /* c3 */ {Cache_UNKNOWN, 0 },
410 /* c4 */ {Cache_UNKNOWN, 0 },
411 /* c5 */ {Cache_UNKNOWN, 0 },
412 /* c6 */ {Cache_UNKNOWN, 0 },
413 /* c7 */ {Cache_UNKNOWN, 0 },
414 /* c8 */ {Cache_UNKNOWN, 0 },
415 /* c9 */ {Cache_UNKNOWN, 0 },
416 /* ca */ {Cache_UNKNOWN, 0 },
417 /* cb */ {Cache_UNKNOWN, 0 },
418 /* cc */ {Cache_UNKNOWN, 0 },
419 /* cd */ {Cache_UNKNOWN, 0 },
420 /* ce */ {Cache_UNKNOWN, 0 },
421 /* cf */ {Cache_UNKNOWN, 0 },
422 /* d0 */ {Cache_UNKNOWN, 0 },
423 /* d1 */ {Cache_UNKNOWN, 0 },
424 /* d2 */ {Cache_UNKNOWN, 0 },
425 /* d3 */ {Cache_UNKNOWN, 0 },
426 /* d4 */ {Cache_UNKNOWN, 0 },
427 /* d5 */ {Cache_UNKNOWN, 0 },
428 /* d6 */ {Cache_UNKNOWN, 0 },
429 /* d7 */ {Cache_UNKNOWN, 0 },
430 /* d8 */ {Cache_UNKNOWN, 0 },
431 /* d9 */ {Cache_UNKNOWN, 0 },
432 /* da */ {Cache_UNKNOWN, 0 },
433 /* db */ {Cache_UNKNOWN, 0 },
434 /* dc */ {Cache_UNKNOWN, 0 },
435 /* dd */ {Cache_UNKNOWN, 0 },
436 /* de */ {Cache_UNKNOWN, 0 },
437 /* df */ {Cache_UNKNOWN, 0 },
438 /* e0 */ {Cache_UNKNOWN, 0 },
439 /* e1 */ {Cache_UNKNOWN, 0 },
440 /* e2 */ {Cache_UNKNOWN, 0 },
441 /* e3 */ {Cache_UNKNOWN, 0 },
442 /* e4 */ {Cache_UNKNOWN, 0 },
443 /* e5 */ {Cache_UNKNOWN, 0 },
444 /* e6 */ {Cache_UNKNOWN, 0 },
445 /* e7 */ {Cache_UNKNOWN, 0 },
446 /* e8 */ {Cache_UNKNOWN, 0 },
447 /* e9 */ {Cache_UNKNOWN, 0 },
448 /* ea */ {Cache_UNKNOWN, 0 },
449 /* eb */ {Cache_UNKNOWN, 0 },
450 /* ec */ {Cache_UNKNOWN, 0 },
451 /* ed */ {Cache_UNKNOWN, 0 },
452 /* ee */ {Cache_UNKNOWN, 0 },
453 /* ef */ {Cache_UNKNOWN, 0 },
454 /* f0 */ {Cache_UNKNOWN, 0 },
455 /* f1 */ {Cache_UNKNOWN, 0 },
456 /* f2 */ {Cache_UNKNOWN, 0 },
457 /* f3 */ {Cache_UNKNOWN, 0 },
458 /* f4 */ {Cache_UNKNOWN, 0 },
459 /* f5 */ {Cache_UNKNOWN, 0 },
460 /* f6 */ {Cache_UNKNOWN, 0 },
461 /* f7 */ {Cache_UNKNOWN, 0 },
462 /* f8 */ {Cache_UNKNOWN, 0 },
463 /* f9 */ {Cache_UNKNOWN, 0 },
464 /* fa */ {Cache_UNKNOWN, 0 },
465 /* fb */ {Cache_UNKNOWN, 0 },
466 /* fc */ {Cache_UNKNOWN, 0 },
467 /* fd */ {Cache_UNKNOWN, 0 },
468 /* fe */ {Cache_UNKNOWN, 0 },
469 /* ff */ {Cache_UNKNOWN, 0 }
470 };
471
472
473 /*
474 * use the above table to determine the CacheEntryLineSize.
475 */
476 static void
477 getIntelCacheEntryLineSize(unsigned long val, int *level,
478 unsigned long *lineSize)
479 {
480 CacheType type;
481
482 type = CacheMap[val].type;
483 /* only interested in data caches */
484 /* NOTE val = 0x40 is a special value that means no L2 or L3 cache.
485 * this data check has the side effect of rejecting that entry. If
486 * that wasn't the case, we could have to reject it explicitly */
487 if (CacheMap[val].lineSize == 0) {
488 return;
489 }
490 /* look at the caches, skip types we aren't interested in.
491 * if we already have a value for a lower level cache, skip the
492 * current entry */
493 if ((type == Cache_L1)|| (type == Cache_L1d)) {
494 *level = 1;
495 *lineSize = CacheMap[val].lineSize;
496 } else if ((*level >= 2) && ((type == Cache_L2) || (type == Cache_L2d))) {
497 *level = 2;
498 *lineSize = CacheMap[val].lineSize;
499 } else if ((*level >= 3) && ((type == Cache_L3) || (type == Cache_L3d))) {
500 *level = 3;
501 *lineSize = CacheMap[val].lineSize;
502 }
503 return;
504 }
505
506
507 static void
508 getIntelRegisterCacheLineSize(unsigned long val,
509 int *level, unsigned long *lineSize)
510 {
511 getIntelCacheEntryLineSize(val >> 24 & 0xff, level, lineSize);
512 getIntelCacheEntryLineSize(val >> 16 & 0xff, level, lineSize);
513 getIntelCacheEntryLineSize(val >> 8 & 0xff, level, lineSize);
514 getIntelCacheEntryLineSize(val & 0xff, level, lineSize);
515 }
516
517 /*
518 * returns '0' if no recognized cache is found, or if the cache
519 * information is supported by this processor
520 */
521 static unsigned long
522 getIntelCacheLineSize(int cpuidLevel)
523 {
524 int level = 4;
525 unsigned long lineSize = 0;
526 unsigned long eax, ebx, ecx, edx;
527 int repeat, count;
528
529 if (cpuidLevel < 2) {
530 return 0;
531 }
532
533 /* command '2' of the cpuid is intel's cache info call. Each byte of the
534 * 4 registers contain a potential descriptor for the cache. The CacheMap
535 * table maps the cache entry with the processor cache. Register 'al'
536 * contains a count value that cpuid '2' needs to be called in order to
537 * find all the cache descriptors. Only registers with the high bit set
538 * to 'zero' have valid descriptors. This code loops through all the
539 * required calls to cpuid '2' and passes any valid descriptors it finds
540 * to the getIntelRegisterCacheLineSize code, which breaks the registers
541 * down into their component descriptors. In the end the lineSize of the
542 * lowest level cache data cache is returned. */
543 freebl_cpuid(2, &eax, &ebx, &ecx, &edx);
544 repeat = eax & 0xf;
545 for (count = 0; count < repeat; count++) {
546 if ((eax & 0x80000000) == 0) {
547 getIntelRegisterCacheLineSize(eax & 0xffffff00, &level, &lineSize);
548 }
549 if ((ebx & 0x80000000) == 0) {
550 getIntelRegisterCacheLineSize(ebx, &level, &lineSize);
551 }
552 if ((ecx & 0x80000000) == 0) {
553 getIntelRegisterCacheLineSize(ecx, &level, &lineSize);
554 }
555 if ((edx & 0x80000000) == 0) {
556 getIntelRegisterCacheLineSize(edx, &level, &lineSize);
557 }
558 if (count+1 != repeat) {
559 freebl_cpuid(2, &eax, &ebx, &ecx, &edx);
560 }
561 }
562 return lineSize;
563 }
564
565 /*
566 * returns '0' if the cache info is not supported by this processor.
567 * This is based on the AMD extended cache commands for cpuid.
568 * (see "AMD Processor Recognition Application Note" Publication 20734).
569 * Some other processors use the identical scheme.
570 * (see "Processor Recognition, Transmeta Corporation").
571 */
572 static unsigned long
573 getOtherCacheLineSize(unsigned long cpuidLevel)
574 {
575 unsigned long lineSize = 0;
576 unsigned long eax, ebx, ecx, edx;
577
578 /* get the Extended CPUID level */
579 freebl_cpuid(0x80000000, &eax, &ebx, &ecx, &edx);
580 cpuidLevel = eax;
581
582 if (cpuidLevel >= 0x80000005) {
583 freebl_cpuid(0x80000005, &eax, &ebx, &ecx, &edx);
584 lineSize = ecx & 0xff; /* line Size, L1 Data Cache */
585 }
586 return lineSize;
587 }
588
589 static const char * const manMap[] = {
590 #define INTEL 0
591 "GenuineIntel",
592 #define AMD 1
593 "AuthenticAMD",
594 #define CYRIX 2
595 "CyrixInstead",
596 #define CENTAUR 2
597 "CentaurHauls",
598 #define NEXGEN 3
599 "NexGenDriven",
600 #define TRANSMETA 4
601 "GenuineTMx86",
602 #define RISE 5
603 "RiseRiseRise",
604 #define UMC 6
605 "UMC UMC UMC ",
606 #define SIS 7
607 "Sis Sis Sis ",
608 #define NATIONAL 8
609 "Geode by NSC",
610 };
611
612 static const int n_manufacturers = sizeof(manMap)/sizeof(manMap[0]);
613
614
615 #define MAN_UNKNOWN 9
616
617 #if !defined(AMD_64)
618 #define SSE2_FLAG (1<<26)
619 unsigned long
620 s_mpi_is_sse2()
621 {
622 unsigned long eax, ebx, ecx, edx;
623
624 if (is386() || is486()) {
625 return 0;
626 }
627 freebl_cpuid(0, &eax, &ebx, &ecx, &edx);
628
629 /* has no SSE2 extensions */
630 if (eax == 0) {
631 return 0;
632 }
633
634 freebl_cpuid(1,&eax,&ebx,&ecx,&edx);
635 return (edx & SSE2_FLAG) == SSE2_FLAG;
636 }
637 #endif
638
639 unsigned long
640 s_mpi_getProcessorLineSize()
641 {
642 unsigned long eax, ebx, ecx, edx;
643 PRUint32 cpuid[3];
644 unsigned long cpuidLevel;
645 unsigned long cacheLineSize = 0;
646 int manufacturer = MAN_UNKNOWN;
647 int i;
648 char string[13];
649
650 #if !defined(AMD_64)
651 if (is386()) {
652 return 0; /* 386 had no cache */
653 } if (is486()) {
654 return 32; /* really? need more info */
655 }
656 #endif
657
658 /* Pentium, cpuid command is available */
659 freebl_cpuid(0, &eax, &ebx, &ecx, &edx);
660 cpuidLevel = eax;
661 /* string holds the CPU's manufacturer ID string - a twelve
662 * character ASCII string stored in ebx, edx, ecx, and
663 * the 32-bit extended feature flags are in edx, ecx.
664 */
665 cpuid[0] = ebx;
666 cpuid[1] = ecx;
667 cpuid[2] = edx;
668 memcpy(string, cpuid, sizeof(cpuid));
669 string[12] = 0;
670
671 manufacturer = MAN_UNKNOWN;
672 for (i=0; i < n_manufacturers; i++) {
673 if ( strcmp(manMap[i],string) == 0) {
674 manufacturer = i;
675 }
676 }
677
678 if (manufacturer == INTEL) {
679 cacheLineSize = getIntelCacheLineSize(cpuidLevel);
680 } else {
681 cacheLineSize = getOtherCacheLineSize(cpuidLevel);
682 }
683 /* doesn't support cache info based on cpuid. This means
684 * an old pentium class processor, which have cache lines of
685 * 32. If we learn differently, we can use a switch based on
686 * the Manufacturer id */
687 if (cacheLineSize == 0) {
688 cacheLineSize = 32;
689 }
690 return cacheLineSize;
691 }
692 #define MPI_GET_PROCESSOR_LINE_SIZE_DEFINED 1
693 #endif
694
695 #if defined(__ppc64__)
696 /*
697 * Sigh, The PPC has some really nice features to help us determine cache
698 * size, since it had lots of direct control functions to do so. The POWER
699 * processor even has an instruction to do this, but it was dropped in
700 * PowerPC. Unfortunately most of them are not available in user mode.
701 *
702 * The dcbz function would be a great way to determine cache line size except
703 * 1) it only works on write-back memory (it throws an exception otherwise),
704 * and 2) because so many mac programs 'knew' the processor cache size was
705 * 32 bytes, they used this instruction as a fast 'zero 32 bytes'. Now the new
706 * G5 processor has 128 byte cache, but dcbz only clears 32 bytes to keep
707 * these programs happy. dcbzl work if 64 bit instructions are supported.
708 * If you know 64 bit instructions are supported, and that stack is
709 * write-back, you can use this code.
710 */
711 #include "memory.h"
712
713 /* clear the cache line that contains 'array' */
714 static inline void dcbzl(char *array)
715 {
716 register char *a asm("r2") = array;
717 __asm__ __volatile__( "dcbzl %0,r0" : "=r" (a): "0"(a) );
718 }
719
720
721 #define PPC_DO_ALIGN(x,y) ((char *)\
722 ((((long long) (x))+((y)-1))&~((y)-1)))
723
724 #define PPC_MAX_LINE_SIZE 256
725 unsigned long
726 s_mpi_getProcessorLineSize()
727 {
728 char testArray[2*PPC_MAX_LINE_SIZE+1];
729 char *test;
730 int i;
731
732 /* align the array on a maximum line size boundary, so we
733 * know we are starting to clear from the first address */
734 test = PPC_DO_ALIGN(testArray, PPC_MAX_LINE_SIZE);
735 /* set all the values to 1's */
736 memset(test, 0xff, PPC_MAX_LINE_SIZE);
737 /* clear one cache block starting at 'test' */
738 dcbzl(test);
739
740 /* find the size of the cleared area, that's our block size */
741 for (i=PPC_MAX_LINE_SIZE; i != 0; i = i/2) {
742 if (test[i-1] == 0) {
743 return i;
744 }
745 }
746 return 0;
747 }
748
749 #define MPI_GET_PROCESSOR_LINE_SIZE_DEFINED 1
750 #endif
751
752
753 /*
754 * put other processor and platform specific cache code here
755 * return the smallest cache line size in bytes on the processor
756 * (usually the L1 cache). If the OS has a call, this would be
757 * a greate place to put it.
758 *
759 * If there is no cache, return 0;
760 *
761 * define MPI_GET_PROCESSOR_LINE_SIZE_DEFINED so the generic functions
762 * below aren't compiled.
763 *
764 */
765
766
767 /* target.mk can define MPI_CACHE_LINE_SIZE if it's common for the family or
768 * OS */
769 #if defined(MPI_CACHE_LINE_SIZE) && !defined(MPI_GET_PROCESSOR_LINE_SIZE_DEFINED )
770
771 unsigned long
772 s_mpi_getProcessorLineSize()
773 {
774 return MPI_CACHE_LINE_SIZE;
775 }
776 #define MPI_GET_PROCESSOR_LINE_SIZE_DEFINED 1
777 #endif
778
779
780 /* If no way to get the processor cache line size has been defined, assume
781 * it's 32 bytes (most common value, does not significantly impact performance)
782 */
783 #ifndef MPI_GET_PROCESSOR_LINE_SIZE_DEFINED
784 unsigned long
785 s_mpi_getProcessorLineSize()
786 {
787 return 32;
788 }
789 #endif
790
791 #ifdef TEST_IT
792 #include <stdio.h>
793
794 main()
795 {
796 printf("line size = %d\n", s_mpi_getProcessorLineSize());
797 }
798 #endif
OLDNEW
« no previous file with comments | « nss/lib/freebl/mpi/mp_gf2m-priv.h ('k') | nss/lib/freebl/mpi/mpi.h » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698