Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(117)

Side by Side Diff: net/third_party/nss/ssl/mpi/mpcpucache.c

Issue 6804032: Add TLS-SRP (RFC 5054) support Base URL: http://git.chromium.org/git/chromium.git@trunk
Patch Set: Created 9 years, 8 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch | Annotate | Revision Log
OLDNEW
(Empty)
1 /* ***** BEGIN LICENSE BLOCK *****
2 * Version: MPL 1.1/GPL 2.0/LGPL 2.1
3 *
4 * The contents of this file are subject to the Mozilla Public License Version
5 * 1.1 (the "License"); you may not use this file except in compliance with
6 * the License. You may obtain a copy of the License at
7 * http://www.mozilla.org/MPL/
8 *
9 * Software distributed under the License is distributed on an "AS IS" basis,
10 * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
11 * for the specific language governing rights and limitations under the
12 * License.
13 *
14 * The Original Code is the Netscape security libraries.
15 *
16 * The Initial Developer of the Original Code is
17 * Red Hat, Inc
18 * Portions created by the Initial Developer are Copyright (C) 2005
19 * the Initial Developer. All Rights Reserved.
20 *
21 * Contributor(s):
22 * Robert Relyea <rrelyea@redhat.com>
23 *
24 * Alternatively, the contents of this file may be used under the terms of
25 * either the GNU General Public License Version 2 or later (the "GPL"), or
26 * the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
27 * in which case the provisions of the GPL or the LGPL are applicable instead
28 * of those above. If you wish to allow use of your version of this file only
29 * under the terms of either the GPL or the LGPL, and not to allow others to
30 * use your version of this file under the terms of the MPL, indicate your
31 * decision by deleting the provisions above and replace them with the notice
32 * and other provisions required by the GPL or the LGPL. If you do not delete
33 * the provisions above, a recipient may use your version of this file under
34 * the terms of any one of the MPL, the GPL or the LGPL.
35 *
36 * ***** END LICENSE BLOCK ***** */
37
38 #include "mpi.h"
39
40 /*
41 * This file implements a single function: s_mpi_getProcessorLineSize();
42 * s_mpi_getProcessorLineSize() returns the size in bytes of the cache line
43 * if a cache exists, or zero if there is no cache. If more than one
44 * cache line exists, it should return the smallest line size (which is
45 * usually the L1 cache).
46 *
47 * mp_modexp uses this information to make sure that private key information
48 * isn't being leaked through the cache.
49 *
50 * Currently the file returns good data for most modern x86 processors, and
51 * reasonable data on 64-bit ppc processors. All other processors are assumed
52 * to have a cache line size of 32 bytes unless modified by target.mk.
53 *
54 */
55
56 #if defined(i386) || defined(__i386) || defined(__X86__) || defined (_M_IX86) || defined(__x86_64__) || defined(__x86_64) || defined(_M_AMD64)
57 /* X86 processors have special instructions that tell us about the cache */
58 #include "string.h"
59
60 #if defined(__x86_64__) || defined(__x86_64) || defined(_M_AMD64)
61 #define AMD_64 1
62 #endif
63
64 /* Generic CPUID function */
65 #if defined(AMD_64)
66
67 #if defined(__GNUC__)
68
69 void freebl_cpuid(unsigned long op, unsigned long *eax,
70 unsigned long *ebx, unsigned long *ecx,
71 unsigned long *edx)
72 {
73 __asm__("cpuid\n\t"
74 : "=a" (*eax),
75 "=b" (*ebx),
76 "=c" (*ecx),
77 "=d" (*edx)
78 : "0" (op));
79 }
80
81 #elif defined(_MSC_VER)
82
83 #include <intrin.h>
84
85 void freebl_cpuid(unsigned long op, unsigned long *eax,
86 unsigned long *ebx, unsigned long *ecx,
87 unsigned long *edx)
88 {
89 int intrinsic_out[4];
90
91 __cpuid(intrinsic_out, op);
92 *eax = intrinsic_out[0];
93 *ebx = intrinsic_out[1];
94 *ecx = intrinsic_out[2];
95 *edx = intrinsic_out[3];
96 }
97
98 #endif
99
100 #else /* !defined(AMD_64) */
101
102 /* x86 */
103
104 #if defined(__GNUC__)
105 void freebl_cpuid(unsigned long op, unsigned long *eax,
106 unsigned long *ebx, unsigned long *ecx,
107 unsigned long *edx)
108 {
109 /* sigh GCC isn't smart enough to save the ebx PIC register on it's own
110 * in this case, so do it by hand. */
111 __asm__("pushl %%ebx\n\t"
112 "cpuid\n\t"
113 "mov %%ebx,%1\n\t"
114 "popl %%ebx\n\t"
115 : "=a" (*eax),
116 "=r" (*ebx),
117 "=c" (*ecx),
118 "=d" (*edx)
119 : "0" (op));
120 }
121
122 /*
123 * try flipping a processor flag to determine CPU type
124 */
125 static unsigned long changeFlag(unsigned long flag)
126 {
127 unsigned long changedFlags, originalFlags;
128 __asm__("pushfl\n\t" /* get the flags */
129 "popl %0\n\t"
130 "movl %0,%1\n\t" /* save the original flags */
131 "xorl %2,%0\n\t" /* flip the bit */
132 "pushl %0\n\t" /* set the flags */
133 "popfl\n\t"
134 "pushfl\n\t" /* get the flags again (for return) */
135 "popl %0\n\t"
136 "pushl %1\n\t" /* restore the original flags */
137 "popfl\n\t"
138 : "=r" (changedFlags),
139 "=r" (originalFlags),
140 "=r" (flag)
141 : "2" (flag));
142 return changedFlags ^ originalFlags;
143 }
144
145 #elif defined(_MSC_VER)
146
147 /*
148 * windows versions of the above assembler
149 */
150 #define wcpuid __asm __emit 0fh __asm __emit 0a2h
151 void freebl_cpuid(unsigned long op, unsigned long *Reax,
152 unsigned long *Rebx, unsigned long *Recx, unsigned long *Redx)
153 {
154 unsigned long Leax, Lebx, Lecx, Ledx;
155 __asm {
156 pushad
157 mov eax,op
158 wcpuid
159 mov Leax,eax
160 mov Lebx,ebx
161 mov Lecx,ecx
162 mov Ledx,edx
163 popad
164 }
165 *Reax = Leax;
166 *Rebx = Lebx;
167 *Recx = Lecx;
168 *Redx = Ledx;
169 }
170
171 static unsigned long changeFlag(unsigned long flag)
172 {
173 unsigned long changedFlags, originalFlags;
174 __asm {
175 push eax
176 push ebx
177 pushfd /* get the flags */
178 pop eax
179 push eax /* save the flags on the stack */
180 mov originalFlags,eax /* save the original flags */
181 mov ebx,flag
182 xor eax,ebx /* flip the bit */
183 push eax /* set the flags */
184 popfd
185 pushfd /* get the flags again (for return) */
186 pop eax
187 popfd /* restore the original flags */
188 mov changedFlags,eax
189 pop ebx
190 pop eax
191 }
192 return changedFlags ^ originalFlags;
193 }
194 #endif
195
196 #endif
197
198 #if !defined(AMD_64)
199 #define AC_FLAG 0x40000
200 #define ID_FLAG 0x200000
201
202 /* 386 processors can't flip the AC_FLAG, intel AP Note AP-485 */
203 static int is386()
204 {
205 return changeFlag(AC_FLAG) == 0;
206 }
207
208 /* 486 processors can't flip the ID_FLAG, intel AP Note AP-485 */
209 static int is486()
210 {
211 return changeFlag(ID_FLAG) == 0;
212 }
213 #endif
214
215
216 /*
217 * table for Intel Cache.
218 * See Intel Application Note AP-485 for more information
219 */
220
221 typedef unsigned char CacheTypeEntry;
222
223 typedef enum {
224 Cache_NONE = 0,
225 Cache_UNKNOWN = 1,
226 Cache_TLB = 2,
227 Cache_TLBi = 3,
228 Cache_TLBd = 4,
229 Cache_Trace = 5,
230 Cache_L1 = 6,
231 Cache_L1i = 7,
232 Cache_L1d = 8,
233 Cache_L2 = 9 ,
234 Cache_L2i = 10 ,
235 Cache_L2d = 11 ,
236 Cache_L3 = 12 ,
237 Cache_L3i = 13,
238 Cache_L3d = 14
239 } CacheType;
240
241 struct _cache {
242 CacheTypeEntry type;
243 unsigned char lineSize;
244 };
245 static const struct _cache CacheMap[256] = {
246 /* 00 */ {Cache_NONE, 0 },
247 /* 01 */ {Cache_TLBi, 0 },
248 /* 02 */ {Cache_TLBi, 0 },
249 /* 03 */ {Cache_TLBd, 0 },
250 /* 04 */ {Cache_TLBd, },
251 /* 05 */ {Cache_UNKNOWN, 0 },
252 /* 06 */ {Cache_L1i, 32 },
253 /* 07 */ {Cache_UNKNOWN, 0 },
254 /* 08 */ {Cache_L1i, 32 },
255 /* 09 */ {Cache_UNKNOWN, 0 },
256 /* 0a */ {Cache_L1d, 32 },
257 /* 0b */ {Cache_UNKNOWN, 0 },
258 /* 0c */ {Cache_L1d, 32 },
259 /* 0d */ {Cache_UNKNOWN, 0 },
260 /* 0e */ {Cache_UNKNOWN, 0 },
261 /* 0f */ {Cache_UNKNOWN, 0 },
262 /* 10 */ {Cache_UNKNOWN, 0 },
263 /* 11 */ {Cache_UNKNOWN, 0 },
264 /* 12 */ {Cache_UNKNOWN, 0 },
265 /* 13 */ {Cache_UNKNOWN, 0 },
266 /* 14 */ {Cache_UNKNOWN, 0 },
267 /* 15 */ {Cache_UNKNOWN, 0 },
268 /* 16 */ {Cache_UNKNOWN, 0 },
269 /* 17 */ {Cache_UNKNOWN, 0 },
270 /* 18 */ {Cache_UNKNOWN, 0 },
271 /* 19 */ {Cache_UNKNOWN, 0 },
272 /* 1a */ {Cache_UNKNOWN, 0 },
273 /* 1b */ {Cache_UNKNOWN, 0 },
274 /* 1c */ {Cache_UNKNOWN, 0 },
275 /* 1d */ {Cache_UNKNOWN, 0 },
276 /* 1e */ {Cache_UNKNOWN, 0 },
277 /* 1f */ {Cache_UNKNOWN, 0 },
278 /* 20 */ {Cache_UNKNOWN, 0 },
279 /* 21 */ {Cache_UNKNOWN, 0 },
280 /* 22 */ {Cache_L3, 64 },
281 /* 23 */ {Cache_L3, 64 },
282 /* 24 */ {Cache_UNKNOWN, 0 },
283 /* 25 */ {Cache_L3, 64 },
284 /* 26 */ {Cache_UNKNOWN, 0 },
285 /* 27 */ {Cache_UNKNOWN, 0 },
286 /* 28 */ {Cache_UNKNOWN, 0 },
287 /* 29 */ {Cache_L3, 64 },
288 /* 2a */ {Cache_UNKNOWN, 0 },
289 /* 2b */ {Cache_UNKNOWN, 0 },
290 /* 2c */ {Cache_L1d, 64 },
291 /* 2d */ {Cache_UNKNOWN, 0 },
292 /* 2e */ {Cache_UNKNOWN, 0 },
293 /* 2f */ {Cache_UNKNOWN, 0 },
294 /* 30 */ {Cache_L1i, 64 },
295 /* 31 */ {Cache_UNKNOWN, 0 },
296 /* 32 */ {Cache_UNKNOWN, 0 },
297 /* 33 */ {Cache_UNKNOWN, 0 },
298 /* 34 */ {Cache_UNKNOWN, 0 },
299 /* 35 */ {Cache_UNKNOWN, 0 },
300 /* 36 */ {Cache_UNKNOWN, 0 },
301 /* 37 */ {Cache_UNKNOWN, 0 },
302 /* 38 */ {Cache_UNKNOWN, 0 },
303 /* 39 */ {Cache_L2, 64 },
304 /* 3a */ {Cache_UNKNOWN, 0 },
305 /* 3b */ {Cache_L2, 64 },
306 /* 3c */ {Cache_L2, 64 },
307 /* 3d */ {Cache_UNKNOWN, 0 },
308 /* 3e */ {Cache_UNKNOWN, 0 },
309 /* 3f */ {Cache_UNKNOWN, 0 },
310 /* 40 */ {Cache_L2, 0 },
311 /* 41 */ {Cache_L2, 32 },
312 /* 42 */ {Cache_L2, 32 },
313 /* 43 */ {Cache_L2, 32 },
314 /* 44 */ {Cache_L2, 32 },
315 /* 45 */ {Cache_L2, 32 },
316 /* 46 */ {Cache_UNKNOWN, 0 },
317 /* 47 */ {Cache_UNKNOWN, 0 },
318 /* 48 */ {Cache_UNKNOWN, 0 },
319 /* 49 */ {Cache_UNKNOWN, 0 },
320 /* 4a */ {Cache_UNKNOWN, 0 },
321 /* 4b */ {Cache_UNKNOWN, 0 },
322 /* 4c */ {Cache_UNKNOWN, 0 },
323 /* 4d */ {Cache_UNKNOWN, 0 },
324 /* 4e */ {Cache_UNKNOWN, 0 },
325 /* 4f */ {Cache_UNKNOWN, 0 },
326 /* 50 */ {Cache_TLBi, 0 },
327 /* 51 */ {Cache_TLBi, 0 },
328 /* 52 */ {Cache_TLBi, 0 },
329 /* 53 */ {Cache_UNKNOWN, 0 },
330 /* 54 */ {Cache_UNKNOWN, 0 },
331 /* 55 */ {Cache_UNKNOWN, 0 },
332 /* 56 */ {Cache_UNKNOWN, 0 },
333 /* 57 */ {Cache_UNKNOWN, 0 },
334 /* 58 */ {Cache_UNKNOWN, 0 },
335 /* 59 */ {Cache_UNKNOWN, 0 },
336 /* 5a */ {Cache_UNKNOWN, 0 },
337 /* 5b */ {Cache_TLBd, 0 },
338 /* 5c */ {Cache_TLBd, 0 },
339 /* 5d */ {Cache_TLBd, 0 },
340 /* 5e */ {Cache_UNKNOWN, 0 },
341 /* 5f */ {Cache_UNKNOWN, 0 },
342 /* 60 */ {Cache_UNKNOWN, 0 },
343 /* 61 */ {Cache_UNKNOWN, 0 },
344 /* 62 */ {Cache_UNKNOWN, 0 },
345 /* 63 */ {Cache_UNKNOWN, 0 },
346 /* 64 */ {Cache_UNKNOWN, 0 },
347 /* 65 */ {Cache_UNKNOWN, 0 },
348 /* 66 */ {Cache_L1d, 64 },
349 /* 67 */ {Cache_L1d, 64 },
350 /* 68 */ {Cache_L1d, 64 },
351 /* 69 */ {Cache_UNKNOWN, 0 },
352 /* 6a */ {Cache_UNKNOWN, 0 },
353 /* 6b */ {Cache_UNKNOWN, 0 },
354 /* 6c */ {Cache_UNKNOWN, 0 },
355 /* 6d */ {Cache_UNKNOWN, 0 },
356 /* 6e */ {Cache_UNKNOWN, 0 },
357 /* 6f */ {Cache_UNKNOWN, 0 },
358 /* 70 */ {Cache_Trace, 1 },
359 /* 71 */ {Cache_Trace, 1 },
360 /* 72 */ {Cache_Trace, 1 },
361 /* 73 */ {Cache_UNKNOWN, 0 },
362 /* 74 */ {Cache_UNKNOWN, 0 },
363 /* 75 */ {Cache_UNKNOWN, 0 },
364 /* 76 */ {Cache_UNKNOWN, 0 },
365 /* 77 */ {Cache_UNKNOWN, 0 },
366 /* 78 */ {Cache_UNKNOWN, 0 },
367 /* 79 */ {Cache_L2, 64 },
368 /* 7a */ {Cache_L2, 64 },
369 /* 7b */ {Cache_L2, 64 },
370 /* 7c */ {Cache_L2, 64 },
371 /* 7d */ {Cache_UNKNOWN, 0 },
372 /* 7e */ {Cache_UNKNOWN, 0 },
373 /* 7f */ {Cache_UNKNOWN, 0 },
374 /* 80 */ {Cache_UNKNOWN, 0 },
375 /* 81 */ {Cache_UNKNOWN, 0 },
376 /* 82 */ {Cache_L2, 32 },
377 /* 83 */ {Cache_L2, 32 },
378 /* 84 */ {Cache_L2, 32 },
379 /* 85 */ {Cache_L2, 32 },
380 /* 86 */ {Cache_L2, 64 },
381 /* 87 */ {Cache_L2, 64 },
382 /* 88 */ {Cache_UNKNOWN, 0 },
383 /* 89 */ {Cache_UNKNOWN, 0 },
384 /* 8a */ {Cache_UNKNOWN, 0 },
385 /* 8b */ {Cache_UNKNOWN, 0 },
386 /* 8c */ {Cache_UNKNOWN, 0 },
387 /* 8d */ {Cache_UNKNOWN, 0 },
388 /* 8e */ {Cache_UNKNOWN, 0 },
389 /* 8f */ {Cache_UNKNOWN, 0 },
390 /* 90 */ {Cache_UNKNOWN, 0 },
391 /* 91 */ {Cache_UNKNOWN, 0 },
392 /* 92 */ {Cache_UNKNOWN, 0 },
393 /* 93 */ {Cache_UNKNOWN, 0 },
394 /* 94 */ {Cache_UNKNOWN, 0 },
395 /* 95 */ {Cache_UNKNOWN, 0 },
396 /* 96 */ {Cache_UNKNOWN, 0 },
397 /* 97 */ {Cache_UNKNOWN, 0 },
398 /* 98 */ {Cache_UNKNOWN, 0 },
399 /* 99 */ {Cache_UNKNOWN, 0 },
400 /* 9a */ {Cache_UNKNOWN, 0 },
401 /* 9b */ {Cache_UNKNOWN, 0 },
402 /* 9c */ {Cache_UNKNOWN, 0 },
403 /* 9d */ {Cache_UNKNOWN, 0 },
404 /* 9e */ {Cache_UNKNOWN, 0 },
405 /* 9f */ {Cache_UNKNOWN, 0 },
406 /* a0 */ {Cache_UNKNOWN, 0 },
407 /* a1 */ {Cache_UNKNOWN, 0 },
408 /* a2 */ {Cache_UNKNOWN, 0 },
409 /* a3 */ {Cache_UNKNOWN, 0 },
410 /* a4 */ {Cache_UNKNOWN, 0 },
411 /* a5 */ {Cache_UNKNOWN, 0 },
412 /* a6 */ {Cache_UNKNOWN, 0 },
413 /* a7 */ {Cache_UNKNOWN, 0 },
414 /* a8 */ {Cache_UNKNOWN, 0 },
415 /* a9 */ {Cache_UNKNOWN, 0 },
416 /* aa */ {Cache_UNKNOWN, 0 },
417 /* ab */ {Cache_UNKNOWN, 0 },
418 /* ac */ {Cache_UNKNOWN, 0 },
419 /* ad */ {Cache_UNKNOWN, 0 },
420 /* ae */ {Cache_UNKNOWN, 0 },
421 /* af */ {Cache_UNKNOWN, 0 },
422 /* b0 */ {Cache_TLBi, 0 },
423 /* b1 */ {Cache_UNKNOWN, 0 },
424 /* b2 */ {Cache_UNKNOWN, 0 },
425 /* b3 */ {Cache_TLBd, 0 },
426 /* b4 */ {Cache_UNKNOWN, 0 },
427 /* b5 */ {Cache_UNKNOWN, 0 },
428 /* b6 */ {Cache_UNKNOWN, 0 },
429 /* b7 */ {Cache_UNKNOWN, 0 },
430 /* b8 */ {Cache_UNKNOWN, 0 },
431 /* b9 */ {Cache_UNKNOWN, 0 },
432 /* ba */ {Cache_UNKNOWN, 0 },
433 /* bb */ {Cache_UNKNOWN, 0 },
434 /* bc */ {Cache_UNKNOWN, 0 },
435 /* bd */ {Cache_UNKNOWN, 0 },
436 /* be */ {Cache_UNKNOWN, 0 },
437 /* bf */ {Cache_UNKNOWN, 0 },
438 /* c0 */ {Cache_UNKNOWN, 0 },
439 /* c1 */ {Cache_UNKNOWN, 0 },
440 /* c2 */ {Cache_UNKNOWN, 0 },
441 /* c3 */ {Cache_UNKNOWN, 0 },
442 /* c4 */ {Cache_UNKNOWN, 0 },
443 /* c5 */ {Cache_UNKNOWN, 0 },
444 /* c6 */ {Cache_UNKNOWN, 0 },
445 /* c7 */ {Cache_UNKNOWN, 0 },
446 /* c8 */ {Cache_UNKNOWN, 0 },
447 /* c9 */ {Cache_UNKNOWN, 0 },
448 /* ca */ {Cache_UNKNOWN, 0 },
449 /* cb */ {Cache_UNKNOWN, 0 },
450 /* cc */ {Cache_UNKNOWN, 0 },
451 /* cd */ {Cache_UNKNOWN, 0 },
452 /* ce */ {Cache_UNKNOWN, 0 },
453 /* cf */ {Cache_UNKNOWN, 0 },
454 /* d0 */ {Cache_UNKNOWN, 0 },
455 /* d1 */ {Cache_UNKNOWN, 0 },
456 /* d2 */ {Cache_UNKNOWN, 0 },
457 /* d3 */ {Cache_UNKNOWN, 0 },
458 /* d4 */ {Cache_UNKNOWN, 0 },
459 /* d5 */ {Cache_UNKNOWN, 0 },
460 /* d6 */ {Cache_UNKNOWN, 0 },
461 /* d7 */ {Cache_UNKNOWN, 0 },
462 /* d8 */ {Cache_UNKNOWN, 0 },
463 /* d9 */ {Cache_UNKNOWN, 0 },
464 /* da */ {Cache_UNKNOWN, 0 },
465 /* db */ {Cache_UNKNOWN, 0 },
466 /* dc */ {Cache_UNKNOWN, 0 },
467 /* dd */ {Cache_UNKNOWN, 0 },
468 /* de */ {Cache_UNKNOWN, 0 },
469 /* df */ {Cache_UNKNOWN, 0 },
470 /* e0 */ {Cache_UNKNOWN, 0 },
471 /* e1 */ {Cache_UNKNOWN, 0 },
472 /* e2 */ {Cache_UNKNOWN, 0 },
473 /* e3 */ {Cache_UNKNOWN, 0 },
474 /* e4 */ {Cache_UNKNOWN, 0 },
475 /* e5 */ {Cache_UNKNOWN, 0 },
476 /* e6 */ {Cache_UNKNOWN, 0 },
477 /* e7 */ {Cache_UNKNOWN, 0 },
478 /* e8 */ {Cache_UNKNOWN, 0 },
479 /* e9 */ {Cache_UNKNOWN, 0 },
480 /* ea */ {Cache_UNKNOWN, 0 },
481 /* eb */ {Cache_UNKNOWN, 0 },
482 /* ec */ {Cache_UNKNOWN, 0 },
483 /* ed */ {Cache_UNKNOWN, 0 },
484 /* ee */ {Cache_UNKNOWN, 0 },
485 /* ef */ {Cache_UNKNOWN, 0 },
486 /* f0 */ {Cache_UNKNOWN, 0 },
487 /* f1 */ {Cache_UNKNOWN, 0 },
488 /* f2 */ {Cache_UNKNOWN, 0 },
489 /* f3 */ {Cache_UNKNOWN, 0 },
490 /* f4 */ {Cache_UNKNOWN, 0 },
491 /* f5 */ {Cache_UNKNOWN, 0 },
492 /* f6 */ {Cache_UNKNOWN, 0 },
493 /* f7 */ {Cache_UNKNOWN, 0 },
494 /* f8 */ {Cache_UNKNOWN, 0 },
495 /* f9 */ {Cache_UNKNOWN, 0 },
496 /* fa */ {Cache_UNKNOWN, 0 },
497 /* fb */ {Cache_UNKNOWN, 0 },
498 /* fc */ {Cache_UNKNOWN, 0 },
499 /* fd */ {Cache_UNKNOWN, 0 },
500 /* fe */ {Cache_UNKNOWN, 0 },
501 /* ff */ {Cache_UNKNOWN, 0 }
502 };
503
504
505 /*
506 * use the above table to determine the CacheEntryLineSize.
507 */
508 static void
509 getIntelCacheEntryLineSize(unsigned long val, int *level,
510 unsigned long *lineSize)
511 {
512 CacheType type;
513
514 type = CacheMap[val].type;
515 /* only interested in data caches */
516 /* NOTE val = 0x40 is a special value that means no L2 or L3 cache.
517 * this data check has the side effect of rejecting that entry. If
518 * that wasn't the case, we could have to reject it explicitly */
519 if (CacheMap[val].lineSize == 0) {
520 return;
521 }
522 /* look at the caches, skip types we aren't interested in.
523 * if we already have a value for a lower level cache, skip the
524 * current entry */
525 if ((type == Cache_L1)|| (type == Cache_L1d)) {
526 *level = 1;
527 *lineSize = CacheMap[val].lineSize;
528 } else if ((*level >= 2) && ((type == Cache_L2) || (type == Cache_L2d))) {
529 *level = 2;
530 *lineSize = CacheMap[val].lineSize;
531 } else if ((*level >= 3) && ((type == Cache_L3) || (type == Cache_L3d))) {
532 *level = 3;
533 *lineSize = CacheMap[val].lineSize;
534 }
535 return;
536 }
537
538
539 static void
540 getIntelRegisterCacheLineSize(unsigned long val,
541 int *level, unsigned long *lineSize)
542 {
543 getIntelCacheEntryLineSize(val >> 24 & 0xff, level, lineSize);
544 getIntelCacheEntryLineSize(val >> 16 & 0xff, level, lineSize);
545 getIntelCacheEntryLineSize(val >> 8 & 0xff, level, lineSize);
546 getIntelCacheEntryLineSize(val & 0xff, level, lineSize);
547 }
548
549 /*
550 * returns '0' if no recognized cache is found, or if the cache
551 * information is supported by this processor
552 */
553 static unsigned long
554 getIntelCacheLineSize(int cpuidLevel)
555 {
556 int level = 4;
557 unsigned long lineSize = 0;
558 unsigned long eax, ebx, ecx, edx;
559 int repeat, count;
560
561 if (cpuidLevel < 2) {
562 return 0;
563 }
564
565 /* command '2' of the cpuid is intel's cache info call. Each byte of the
566 * 4 registers contain a potential descriptor for the cache. The CacheMap
567 * table maps the cache entry with the processor cache. Register 'al'
568 * contains a count value that cpuid '2' needs to be called in order to
569 * find all the cache descriptors. Only registers with the high bit set
570 * to 'zero' have valid descriptors. This code loops through all the
571 * required calls to cpuid '2' and passes any valid descriptors it finds
572 * to the getIntelRegisterCacheLineSize code, which breaks the registers
573 * down into their component descriptors. In the end the lineSize of the
574 * lowest level cache data cache is returned. */
575 freebl_cpuid(2, &eax, &ebx, &ecx, &edx);
576 repeat = eax & 0xf;
577 for (count = 0; count < repeat; count++) {
578 if ((eax & 0x80000000) == 0) {
579 getIntelRegisterCacheLineSize(eax & 0xffffff00, &level, &lineSize);
580 }
581 if ((ebx & 0x80000000) == 0) {
582 getIntelRegisterCacheLineSize(ebx, &level, &lineSize);
583 }
584 if ((ecx & 0x80000000) == 0) {
585 getIntelRegisterCacheLineSize(ecx, &level, &lineSize);
586 }
587 if ((edx & 0x80000000) == 0) {
588 getIntelRegisterCacheLineSize(edx, &level, &lineSize);
589 }
590 if (count+1 != repeat) {
591 freebl_cpuid(2, &eax, &ebx, &ecx, &edx);
592 }
593 }
594 return lineSize;
595 }
596
597 /*
598 * returns '0' if the cache info is not supported by this processor.
599 * This is based on the AMD extended cache commands for cpuid.
600 * (see "AMD Processor Recognition Application Note" Publication 20734).
601 * Some other processors use the identical scheme.
602 * (see "Processor Recognition, Transmeta Corporation").
603 */
604 static unsigned long
605 getOtherCacheLineSize(unsigned long cpuidLevel)
606 {
607 unsigned long lineSize = 0;
608 unsigned long eax, ebx, ecx, edx;
609
610 /* get the Extended CPUID level */
611 freebl_cpuid(0x80000000, &eax, &ebx, &ecx, &edx);
612 cpuidLevel = eax;
613
614 if (cpuidLevel >= 0x80000005) {
615 freebl_cpuid(0x80000005, &eax, &ebx, &ecx, &edx);
616 lineSize = ecx & 0xff; /* line Size, L1 Data Cache */
617 }
618 return lineSize;
619 }
620
621 static const char * const manMap[] = {
622 #define INTEL 0
623 "GenuineIntel",
624 #define AMD 1
625 "AuthenticAMD",
626 #define CYRIX 2
627 "CyrixInstead",
628 #define CENTAUR 2
629 "CentaurHauls",
630 #define NEXGEN 3
631 "NexGenDriven",
632 #define TRANSMETA 4
633 "GenuineTMx86",
634 #define RISE 5
635 "RiseRiseRise",
636 #define UMC 6
637 "UMC UMC UMC ",
638 #define SIS 7
639 "Sis Sis Sis ",
640 #define NATIONAL 8
641 "Geode by NSC",
642 };
643
644 static const int n_manufacturers = sizeof(manMap)/sizeof(manMap[0]);
645
646
647 #define MAN_UNKNOWN 9
648
649 #if !defined(AMD_64)
650 #define SSE2_FLAG (1<<26)
651 unsigned long
652 s_mpi_is_sse2()
653 {
654 unsigned long eax, ebx, ecx, edx;
655 int manufacturer = MAN_UNKNOWN;
656 int i;
657 char string[13];
658
659 if (is386() || is486()) {
660 return 0;
661 }
662 freebl_cpuid(0, &eax, &ebx, &ecx, &edx);
663 *(int *)string = ebx;
664 *(int *)&string[4] = edx;
665 *(int *)&string[8] = ecx;
666 string[12] = 0;
667
668 /* has no SSE2 extensions */
669 if (eax == 0) {
670 return 0;
671 }
672
673 for (i=0; i < n_manufacturers; i++) {
674 if ( strcmp(manMap[i],string) == 0) {
675 manufacturer = i;
676 break;
677 }
678 }
679
680 freebl_cpuid(1,&eax,&ebx,&ecx,&edx);
681 return (edx & SSE2_FLAG) == SSE2_FLAG;
682 }
683 #endif
684
685 unsigned long
686 s_mpi_getProcessorLineSize()
687 {
688 unsigned long eax, ebx, ecx, edx;
689 unsigned long cpuidLevel;
690 unsigned long cacheLineSize = 0;
691 int manufacturer = MAN_UNKNOWN;
692 int i;
693 char string[65];
694
695 #if !defined(AMD_64)
696 if (is386()) {
697 return 0; /* 386 had no cache */
698 } if (is486()) {
699 return 32; /* really? need more info */
700 }
701 #endif
702
703 /* Pentium, cpuid command is available */
704 freebl_cpuid(0, &eax, &ebx, &ecx, &edx);
705 cpuidLevel = eax;
706 *(int *)string = ebx;
707 *(int *)&string[4] = edx;
708 *(int *)&string[8] = ecx;
709 string[12] = 0;
710
711 manufacturer = MAN_UNKNOWN;
712 for (i=0; i < n_manufacturers; i++) {
713 if ( strcmp(manMap[i],string) == 0) {
714 manufacturer = i;
715 }
716 }
717
718 if (manufacturer == INTEL) {
719 cacheLineSize = getIntelCacheLineSize(cpuidLevel);
720 } else {
721 cacheLineSize = getOtherCacheLineSize(cpuidLevel);
722 }
723 /* doesn't support cache info based on cpuid. This means
724 * an old pentium class processor, which have cache lines of
725 * 32. If we learn differently, we can use a switch based on
726 * the Manufacturer id */
727 if (cacheLineSize == 0) {
728 cacheLineSize = 32;
729 }
730 return cacheLineSize;
731 }
732 #define MPI_GET_PROCESSOR_LINE_SIZE_DEFINED 1
733 #endif
734
735 #if defined(__ppc64__)
736 /*
737 * Sigh, The PPC has some really nice features to help us determine cache
738 * size, since it had lots of direct control functions to do so. The POWER
739 * processor even has an instruction to do this, but it was dropped in
740 * PowerPC. Unfortunately most of them are not available in user mode.
741 *
742 * The dcbz function would be a great way to determine cache line size except
743 * 1) it only works on write-back memory (it throws an exception otherwise),
744 * and 2) because so many mac programs 'knew' the processor cache size was
745 * 32 bytes, they used this instruction as a fast 'zero 32 bytes'. Now the new
746 * G5 processor has 128 byte cache, but dcbz only clears 32 bytes to keep
747 * these programs happy. dcbzl work if 64 bit instructions are supported.
748 * If you know 64 bit instructions are supported, and that stack is
749 * write-back, you can use this code.
750 */
751 #include "memory.h"
752
753 /* clear the cache line that contains 'array' */
754 static inline void dcbzl(char *array)
755 {
756 register char *a asm("r2") = array;
757 __asm__ __volatile__( "dcbzl %0,r0" : "=r" (a): "0"(a) );
758 }
759
760
761 #define PPC_DO_ALIGN(x,y) ((char *)\
762 ((((long long) (x))+((y)-1))&~((y)-1)))
763
764 #define PPC_MAX_LINE_SIZE 256
765 unsigned long
766 s_mpi_getProcessorLineSize()
767 {
768 char testArray[2*PPC_MAX_LINE_SIZE+1];
769 char *test;
770 int i;
771
772 /* align the array on a maximum line size boundary, so we
773 * know we are starting to clear from the first address */
774 test = PPC_DO_ALIGN(testArray, PPC_MAX_LINE_SIZE);
775 /* set all the values to 1's */
776 memset(test, 0xff, PPC_MAX_LINE_SIZE);
777 /* clear one cache block starting at 'test' */
778 dcbzl(test);
779
780 /* find the size of the cleared area, that's our block size */
781 for (i=PPC_MAX_LINE_SIZE; i != 0; i = i/2) {
782 if (test[i-1] == 0) {
783 return i;
784 }
785 }
786 return 0;
787 }
788
789 #define MPI_GET_PROCESSOR_LINE_SIZE_DEFINED 1
790 #endif
791
792
793 /*
794 * put other processor and platform specific cache code here
795 * return the smallest cache line size in bytes on the processor
796 * (usually the L1 cache). If the OS has a call, this would be
797 * a greate place to put it.
798 *
799 * If there is no cache, return 0;
800 *
801 * define MPI_GET_PROCESSOR_LINE_SIZE_DEFINED so the generic functions
802 * below aren't compiled.
803 *
804 */
805
806
807 /* target.mk can define MPI_CACHE_LINE_SIZE if it's common for the family or
808 * OS */
809 #if defined(MPI_CACHE_LINE_SIZE) && !defined(MPI_GET_PROCESSOR_LINE_SIZE_DEFINED )
810
811 unsigned long
812 s_mpi_getProcessorLineSize()
813 {
814 return MPI_CACHE_LINE_SIZE;
815 }
816 #define MPI_GET_PROCESSOR_LINE_SIZE_DEFINED 1
817 #endif
818
819
820 /* If no way to get the processor cache line size has been defined, assume
821 * it's 32 bytes (most common value, does not significantly impact performance)
822 */
823 #ifndef MPI_GET_PROCESSOR_LINE_SIZE_DEFINED
824 unsigned long
825 s_mpi_getProcessorLineSize()
826 {
827 return 32;
828 }
829 #endif
830
831 #ifdef TEST_IT
832 #include <stdio.h>
833
834 main()
835 {
836 printf("line size = %d\n", s_mpi_getProcessorLineSize());
837 }
838 #endif
OLDNEW

Powered by Google App Engine
This is Rietveld 408576698