OLD | NEW |
(Empty) | |
| 1 /* ***** BEGIN LICENSE BLOCK ***** |
| 2 * Version: MPL 1.1/GPL 2.0/LGPL 2.1 |
| 3 * |
| 4 * The contents of this file are subject to the Mozilla Public License Version |
| 5 * 1.1 (the "License"); you may not use this file except in compliance with |
| 6 * the License. You may obtain a copy of the License at |
| 7 * http://www.mozilla.org/MPL/ |
| 8 * |
| 9 * Software distributed under the License is distributed on an "AS IS" basis, |
| 10 * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License |
| 11 * for the specific language governing rights and limitations under the |
| 12 * License. |
| 13 * |
| 14 * The Original Code is the Netscape security libraries. |
| 15 * |
| 16 * The Initial Developer of the Original Code is |
| 17 * Red Hat, Inc |
| 18 * Portions created by the Initial Developer are Copyright (C) 2005 |
| 19 * the Initial Developer. All Rights Reserved. |
| 20 * |
| 21 * Contributor(s): |
| 22 * Robert Relyea <rrelyea@redhat.com> |
| 23 * |
| 24 * Alternatively, the contents of this file may be used under the terms of |
| 25 * either the GNU General Public License Version 2 or later (the "GPL"), or |
| 26 * the GNU Lesser General Public License Version 2.1 or later (the "LGPL"), |
| 27 * in which case the provisions of the GPL or the LGPL are applicable instead |
| 28 * of those above. If you wish to allow use of your version of this file only |
| 29 * under the terms of either the GPL or the LGPL, and not to allow others to |
| 30 * use your version of this file under the terms of the MPL, indicate your |
| 31 * decision by deleting the provisions above and replace them with the notice |
| 32 * and other provisions required by the GPL or the LGPL. If you do not delete |
| 33 * the provisions above, a recipient may use your version of this file under |
| 34 * the terms of any one of the MPL, the GPL or the LGPL. |
| 35 * |
| 36 * ***** END LICENSE BLOCK ***** */ |
| 37 |
| 38 #include "mpi.h" |
| 39 |
| 40 /* |
| 41 * This file implements a single function: s_mpi_getProcessorLineSize(); |
| 42 * s_mpi_getProcessorLineSize() returns the size in bytes of the cache line |
| 43 * if a cache exists, or zero if there is no cache. If more than one |
| 44 * cache line exists, it should return the smallest line size (which is |
| 45 * usually the L1 cache). |
| 46 * |
| 47 * mp_modexp uses this information to make sure that private key information |
| 48 * isn't being leaked through the cache. |
| 49 * |
| 50 * Currently the file returns good data for most modern x86 processors, and |
| 51 * reasonable data on 64-bit ppc processors. All other processors are assumed |
| 52 * to have a cache line size of 32 bytes unless modified by target.mk. |
| 53 * |
| 54 */ |
| 55 |
| 56 #if defined(i386) || defined(__i386) || defined(__X86__) || defined (_M_IX86) ||
defined(__x86_64__) || defined(__x86_64) || defined(_M_AMD64) |
| 57 /* X86 processors have special instructions that tell us about the cache */ |
| 58 #include "string.h" |
| 59 |
| 60 #if defined(__x86_64__) || defined(__x86_64) || defined(_M_AMD64) |
| 61 #define AMD_64 1 |
| 62 #endif |
| 63 |
| 64 /* Generic CPUID function */ |
| 65 #if defined(AMD_64) |
| 66 |
| 67 #if defined(__GNUC__) |
| 68 |
| 69 void freebl_cpuid(unsigned long op, unsigned long *eax, |
| 70 unsigned long *ebx, unsigned long *ecx, |
| 71 unsigned long *edx) |
| 72 { |
| 73 __asm__("cpuid\n\t" |
| 74 : "=a" (*eax), |
| 75 "=b" (*ebx), |
| 76 "=c" (*ecx), |
| 77 "=d" (*edx) |
| 78 : "0" (op)); |
| 79 } |
| 80 |
| 81 #elif defined(_MSC_VER) |
| 82 |
| 83 #include <intrin.h> |
| 84 |
| 85 void freebl_cpuid(unsigned long op, unsigned long *eax, |
| 86 unsigned long *ebx, unsigned long *ecx, |
| 87 unsigned long *edx) |
| 88 { |
| 89 int intrinsic_out[4]; |
| 90 |
| 91 __cpuid(intrinsic_out, op); |
| 92 *eax = intrinsic_out[0]; |
| 93 *ebx = intrinsic_out[1]; |
| 94 *ecx = intrinsic_out[2]; |
| 95 *edx = intrinsic_out[3]; |
| 96 } |
| 97 |
| 98 #endif |
| 99 |
| 100 #else /* !defined(AMD_64) */ |
| 101 |
| 102 /* x86 */ |
| 103 |
| 104 #if defined(__GNUC__) |
| 105 void freebl_cpuid(unsigned long op, unsigned long *eax, |
| 106 unsigned long *ebx, unsigned long *ecx, |
| 107 unsigned long *edx) |
| 108 { |
| 109 /* sigh GCC isn't smart enough to save the ebx PIC register on it's own |
| 110 * in this case, so do it by hand. */ |
| 111 __asm__("pushl %%ebx\n\t" |
| 112 "cpuid\n\t" |
| 113 "mov %%ebx,%1\n\t" |
| 114 "popl %%ebx\n\t" |
| 115 : "=a" (*eax), |
| 116 "=r" (*ebx), |
| 117 "=c" (*ecx), |
| 118 "=d" (*edx) |
| 119 : "0" (op)); |
| 120 } |
| 121 |
| 122 /* |
| 123 * try flipping a processor flag to determine CPU type |
| 124 */ |
| 125 static unsigned long changeFlag(unsigned long flag) |
| 126 { |
| 127 unsigned long changedFlags, originalFlags; |
| 128 __asm__("pushfl\n\t" /* get the flags */ |
| 129 "popl %0\n\t" |
| 130 "movl %0,%1\n\t" /* save the original flags */ |
| 131 "xorl %2,%0\n\t" /* flip the bit */ |
| 132 "pushl %0\n\t" /* set the flags */ |
| 133 "popfl\n\t" |
| 134 "pushfl\n\t" /* get the flags again (for return) */ |
| 135 "popl %0\n\t" |
| 136 "pushl %1\n\t" /* restore the original flags */ |
| 137 "popfl\n\t" |
| 138 : "=r" (changedFlags), |
| 139 "=r" (originalFlags), |
| 140 "=r" (flag) |
| 141 : "2" (flag)); |
| 142 return changedFlags ^ originalFlags; |
| 143 } |
| 144 |
| 145 #elif defined(_MSC_VER) |
| 146 |
| 147 /* |
| 148 * windows versions of the above assembler |
| 149 */ |
| 150 #define wcpuid __asm __emit 0fh __asm __emit 0a2h |
| 151 void freebl_cpuid(unsigned long op, unsigned long *Reax, |
| 152 unsigned long *Rebx, unsigned long *Recx, unsigned long *Redx) |
| 153 { |
| 154 unsigned long Leax, Lebx, Lecx, Ledx; |
| 155 __asm { |
| 156 pushad |
| 157 mov eax,op |
| 158 wcpuid |
| 159 mov Leax,eax |
| 160 mov Lebx,ebx |
| 161 mov Lecx,ecx |
| 162 mov Ledx,edx |
| 163 popad |
| 164 } |
| 165 *Reax = Leax; |
| 166 *Rebx = Lebx; |
| 167 *Recx = Lecx; |
| 168 *Redx = Ledx; |
| 169 } |
| 170 |
| 171 static unsigned long changeFlag(unsigned long flag) |
| 172 { |
| 173 unsigned long changedFlags, originalFlags; |
| 174 __asm { |
| 175 push eax |
| 176 push ebx |
| 177 pushfd /* get the flags */ |
| 178 pop eax |
| 179 push eax /* save the flags on the stack */ |
| 180 mov originalFlags,eax /* save the original flags */ |
| 181 mov ebx,flag |
| 182 xor eax,ebx /* flip the bit */ |
| 183 push eax /* set the flags */ |
| 184 popfd |
| 185 pushfd /* get the flags again (for return) */ |
| 186 pop eax |
| 187 popfd /* restore the original flags */ |
| 188 mov changedFlags,eax |
| 189 pop ebx |
| 190 pop eax |
| 191 } |
| 192 return changedFlags ^ originalFlags; |
| 193 } |
| 194 #endif |
| 195 |
| 196 #endif |
| 197 |
| 198 #if !defined(AMD_64) |
| 199 #define AC_FLAG 0x40000 |
| 200 #define ID_FLAG 0x200000 |
| 201 |
| 202 /* 386 processors can't flip the AC_FLAG, intel AP Note AP-485 */ |
| 203 static int is386() |
| 204 { |
| 205 return changeFlag(AC_FLAG) == 0; |
| 206 } |
| 207 |
| 208 /* 486 processors can't flip the ID_FLAG, intel AP Note AP-485 */ |
| 209 static int is486() |
| 210 { |
| 211 return changeFlag(ID_FLAG) == 0; |
| 212 } |
| 213 #endif |
| 214 |
| 215 |
| 216 /* |
| 217 * table for Intel Cache. |
| 218 * See Intel Application Note AP-485 for more information |
| 219 */ |
| 220 |
| 221 typedef unsigned char CacheTypeEntry; |
| 222 |
| 223 typedef enum { |
| 224 Cache_NONE = 0, |
| 225 Cache_UNKNOWN = 1, |
| 226 Cache_TLB = 2, |
| 227 Cache_TLBi = 3, |
| 228 Cache_TLBd = 4, |
| 229 Cache_Trace = 5, |
| 230 Cache_L1 = 6, |
| 231 Cache_L1i = 7, |
| 232 Cache_L1d = 8, |
| 233 Cache_L2 = 9 , |
| 234 Cache_L2i = 10 , |
| 235 Cache_L2d = 11 , |
| 236 Cache_L3 = 12 , |
| 237 Cache_L3i = 13, |
| 238 Cache_L3d = 14 |
| 239 } CacheType; |
| 240 |
| 241 struct _cache { |
| 242 CacheTypeEntry type; |
| 243 unsigned char lineSize; |
| 244 }; |
| 245 static const struct _cache CacheMap[256] = { |
| 246 /* 00 */ {Cache_NONE, 0 }, |
| 247 /* 01 */ {Cache_TLBi, 0 }, |
| 248 /* 02 */ {Cache_TLBi, 0 }, |
| 249 /* 03 */ {Cache_TLBd, 0 }, |
| 250 /* 04 */ {Cache_TLBd, }, |
| 251 /* 05 */ {Cache_UNKNOWN, 0 }, |
| 252 /* 06 */ {Cache_L1i, 32 }, |
| 253 /* 07 */ {Cache_UNKNOWN, 0 }, |
| 254 /* 08 */ {Cache_L1i, 32 }, |
| 255 /* 09 */ {Cache_UNKNOWN, 0 }, |
| 256 /* 0a */ {Cache_L1d, 32 }, |
| 257 /* 0b */ {Cache_UNKNOWN, 0 }, |
| 258 /* 0c */ {Cache_L1d, 32 }, |
| 259 /* 0d */ {Cache_UNKNOWN, 0 }, |
| 260 /* 0e */ {Cache_UNKNOWN, 0 }, |
| 261 /* 0f */ {Cache_UNKNOWN, 0 }, |
| 262 /* 10 */ {Cache_UNKNOWN, 0 }, |
| 263 /* 11 */ {Cache_UNKNOWN, 0 }, |
| 264 /* 12 */ {Cache_UNKNOWN, 0 }, |
| 265 /* 13 */ {Cache_UNKNOWN, 0 }, |
| 266 /* 14 */ {Cache_UNKNOWN, 0 }, |
| 267 /* 15 */ {Cache_UNKNOWN, 0 }, |
| 268 /* 16 */ {Cache_UNKNOWN, 0 }, |
| 269 /* 17 */ {Cache_UNKNOWN, 0 }, |
| 270 /* 18 */ {Cache_UNKNOWN, 0 }, |
| 271 /* 19 */ {Cache_UNKNOWN, 0 }, |
| 272 /* 1a */ {Cache_UNKNOWN, 0 }, |
| 273 /* 1b */ {Cache_UNKNOWN, 0 }, |
| 274 /* 1c */ {Cache_UNKNOWN, 0 }, |
| 275 /* 1d */ {Cache_UNKNOWN, 0 }, |
| 276 /* 1e */ {Cache_UNKNOWN, 0 }, |
| 277 /* 1f */ {Cache_UNKNOWN, 0 }, |
| 278 /* 20 */ {Cache_UNKNOWN, 0 }, |
| 279 /* 21 */ {Cache_UNKNOWN, 0 }, |
| 280 /* 22 */ {Cache_L3, 64 }, |
| 281 /* 23 */ {Cache_L3, 64 }, |
| 282 /* 24 */ {Cache_UNKNOWN, 0 }, |
| 283 /* 25 */ {Cache_L3, 64 }, |
| 284 /* 26 */ {Cache_UNKNOWN, 0 }, |
| 285 /* 27 */ {Cache_UNKNOWN, 0 }, |
| 286 /* 28 */ {Cache_UNKNOWN, 0 }, |
| 287 /* 29 */ {Cache_L3, 64 }, |
| 288 /* 2a */ {Cache_UNKNOWN, 0 }, |
| 289 /* 2b */ {Cache_UNKNOWN, 0 }, |
| 290 /* 2c */ {Cache_L1d, 64 }, |
| 291 /* 2d */ {Cache_UNKNOWN, 0 }, |
| 292 /* 2e */ {Cache_UNKNOWN, 0 }, |
| 293 /* 2f */ {Cache_UNKNOWN, 0 }, |
| 294 /* 30 */ {Cache_L1i, 64 }, |
| 295 /* 31 */ {Cache_UNKNOWN, 0 }, |
| 296 /* 32 */ {Cache_UNKNOWN, 0 }, |
| 297 /* 33 */ {Cache_UNKNOWN, 0 }, |
| 298 /* 34 */ {Cache_UNKNOWN, 0 }, |
| 299 /* 35 */ {Cache_UNKNOWN, 0 }, |
| 300 /* 36 */ {Cache_UNKNOWN, 0 }, |
| 301 /* 37 */ {Cache_UNKNOWN, 0 }, |
| 302 /* 38 */ {Cache_UNKNOWN, 0 }, |
| 303 /* 39 */ {Cache_L2, 64 }, |
| 304 /* 3a */ {Cache_UNKNOWN, 0 }, |
| 305 /* 3b */ {Cache_L2, 64 }, |
| 306 /* 3c */ {Cache_L2, 64 }, |
| 307 /* 3d */ {Cache_UNKNOWN, 0 }, |
| 308 /* 3e */ {Cache_UNKNOWN, 0 }, |
| 309 /* 3f */ {Cache_UNKNOWN, 0 }, |
| 310 /* 40 */ {Cache_L2, 0 }, |
| 311 /* 41 */ {Cache_L2, 32 }, |
| 312 /* 42 */ {Cache_L2, 32 }, |
| 313 /* 43 */ {Cache_L2, 32 }, |
| 314 /* 44 */ {Cache_L2, 32 }, |
| 315 /* 45 */ {Cache_L2, 32 }, |
| 316 /* 46 */ {Cache_UNKNOWN, 0 }, |
| 317 /* 47 */ {Cache_UNKNOWN, 0 }, |
| 318 /* 48 */ {Cache_UNKNOWN, 0 }, |
| 319 /* 49 */ {Cache_UNKNOWN, 0 }, |
| 320 /* 4a */ {Cache_UNKNOWN, 0 }, |
| 321 /* 4b */ {Cache_UNKNOWN, 0 }, |
| 322 /* 4c */ {Cache_UNKNOWN, 0 }, |
| 323 /* 4d */ {Cache_UNKNOWN, 0 }, |
| 324 /* 4e */ {Cache_UNKNOWN, 0 }, |
| 325 /* 4f */ {Cache_UNKNOWN, 0 }, |
| 326 /* 50 */ {Cache_TLBi, 0 }, |
| 327 /* 51 */ {Cache_TLBi, 0 }, |
| 328 /* 52 */ {Cache_TLBi, 0 }, |
| 329 /* 53 */ {Cache_UNKNOWN, 0 }, |
| 330 /* 54 */ {Cache_UNKNOWN, 0 }, |
| 331 /* 55 */ {Cache_UNKNOWN, 0 }, |
| 332 /* 56 */ {Cache_UNKNOWN, 0 }, |
| 333 /* 57 */ {Cache_UNKNOWN, 0 }, |
| 334 /* 58 */ {Cache_UNKNOWN, 0 }, |
| 335 /* 59 */ {Cache_UNKNOWN, 0 }, |
| 336 /* 5a */ {Cache_UNKNOWN, 0 }, |
| 337 /* 5b */ {Cache_TLBd, 0 }, |
| 338 /* 5c */ {Cache_TLBd, 0 }, |
| 339 /* 5d */ {Cache_TLBd, 0 }, |
| 340 /* 5e */ {Cache_UNKNOWN, 0 }, |
| 341 /* 5f */ {Cache_UNKNOWN, 0 }, |
| 342 /* 60 */ {Cache_UNKNOWN, 0 }, |
| 343 /* 61 */ {Cache_UNKNOWN, 0 }, |
| 344 /* 62 */ {Cache_UNKNOWN, 0 }, |
| 345 /* 63 */ {Cache_UNKNOWN, 0 }, |
| 346 /* 64 */ {Cache_UNKNOWN, 0 }, |
| 347 /* 65 */ {Cache_UNKNOWN, 0 }, |
| 348 /* 66 */ {Cache_L1d, 64 }, |
| 349 /* 67 */ {Cache_L1d, 64 }, |
| 350 /* 68 */ {Cache_L1d, 64 }, |
| 351 /* 69 */ {Cache_UNKNOWN, 0 }, |
| 352 /* 6a */ {Cache_UNKNOWN, 0 }, |
| 353 /* 6b */ {Cache_UNKNOWN, 0 }, |
| 354 /* 6c */ {Cache_UNKNOWN, 0 }, |
| 355 /* 6d */ {Cache_UNKNOWN, 0 }, |
| 356 /* 6e */ {Cache_UNKNOWN, 0 }, |
| 357 /* 6f */ {Cache_UNKNOWN, 0 }, |
| 358 /* 70 */ {Cache_Trace, 1 }, |
| 359 /* 71 */ {Cache_Trace, 1 }, |
| 360 /* 72 */ {Cache_Trace, 1 }, |
| 361 /* 73 */ {Cache_UNKNOWN, 0 }, |
| 362 /* 74 */ {Cache_UNKNOWN, 0 }, |
| 363 /* 75 */ {Cache_UNKNOWN, 0 }, |
| 364 /* 76 */ {Cache_UNKNOWN, 0 }, |
| 365 /* 77 */ {Cache_UNKNOWN, 0 }, |
| 366 /* 78 */ {Cache_UNKNOWN, 0 }, |
| 367 /* 79 */ {Cache_L2, 64 }, |
| 368 /* 7a */ {Cache_L2, 64 }, |
| 369 /* 7b */ {Cache_L2, 64 }, |
| 370 /* 7c */ {Cache_L2, 64 }, |
| 371 /* 7d */ {Cache_UNKNOWN, 0 }, |
| 372 /* 7e */ {Cache_UNKNOWN, 0 }, |
| 373 /* 7f */ {Cache_UNKNOWN, 0 }, |
| 374 /* 80 */ {Cache_UNKNOWN, 0 }, |
| 375 /* 81 */ {Cache_UNKNOWN, 0 }, |
| 376 /* 82 */ {Cache_L2, 32 }, |
| 377 /* 83 */ {Cache_L2, 32 }, |
| 378 /* 84 */ {Cache_L2, 32 }, |
| 379 /* 85 */ {Cache_L2, 32 }, |
| 380 /* 86 */ {Cache_L2, 64 }, |
| 381 /* 87 */ {Cache_L2, 64 }, |
| 382 /* 88 */ {Cache_UNKNOWN, 0 }, |
| 383 /* 89 */ {Cache_UNKNOWN, 0 }, |
| 384 /* 8a */ {Cache_UNKNOWN, 0 }, |
| 385 /* 8b */ {Cache_UNKNOWN, 0 }, |
| 386 /* 8c */ {Cache_UNKNOWN, 0 }, |
| 387 /* 8d */ {Cache_UNKNOWN, 0 }, |
| 388 /* 8e */ {Cache_UNKNOWN, 0 }, |
| 389 /* 8f */ {Cache_UNKNOWN, 0 }, |
| 390 /* 90 */ {Cache_UNKNOWN, 0 }, |
| 391 /* 91 */ {Cache_UNKNOWN, 0 }, |
| 392 /* 92 */ {Cache_UNKNOWN, 0 }, |
| 393 /* 93 */ {Cache_UNKNOWN, 0 }, |
| 394 /* 94 */ {Cache_UNKNOWN, 0 }, |
| 395 /* 95 */ {Cache_UNKNOWN, 0 }, |
| 396 /* 96 */ {Cache_UNKNOWN, 0 }, |
| 397 /* 97 */ {Cache_UNKNOWN, 0 }, |
| 398 /* 98 */ {Cache_UNKNOWN, 0 }, |
| 399 /* 99 */ {Cache_UNKNOWN, 0 }, |
| 400 /* 9a */ {Cache_UNKNOWN, 0 }, |
| 401 /* 9b */ {Cache_UNKNOWN, 0 }, |
| 402 /* 9c */ {Cache_UNKNOWN, 0 }, |
| 403 /* 9d */ {Cache_UNKNOWN, 0 }, |
| 404 /* 9e */ {Cache_UNKNOWN, 0 }, |
| 405 /* 9f */ {Cache_UNKNOWN, 0 }, |
| 406 /* a0 */ {Cache_UNKNOWN, 0 }, |
| 407 /* a1 */ {Cache_UNKNOWN, 0 }, |
| 408 /* a2 */ {Cache_UNKNOWN, 0 }, |
| 409 /* a3 */ {Cache_UNKNOWN, 0 }, |
| 410 /* a4 */ {Cache_UNKNOWN, 0 }, |
| 411 /* a5 */ {Cache_UNKNOWN, 0 }, |
| 412 /* a6 */ {Cache_UNKNOWN, 0 }, |
| 413 /* a7 */ {Cache_UNKNOWN, 0 }, |
| 414 /* a8 */ {Cache_UNKNOWN, 0 }, |
| 415 /* a9 */ {Cache_UNKNOWN, 0 }, |
| 416 /* aa */ {Cache_UNKNOWN, 0 }, |
| 417 /* ab */ {Cache_UNKNOWN, 0 }, |
| 418 /* ac */ {Cache_UNKNOWN, 0 }, |
| 419 /* ad */ {Cache_UNKNOWN, 0 }, |
| 420 /* ae */ {Cache_UNKNOWN, 0 }, |
| 421 /* af */ {Cache_UNKNOWN, 0 }, |
| 422 /* b0 */ {Cache_TLBi, 0 }, |
| 423 /* b1 */ {Cache_UNKNOWN, 0 }, |
| 424 /* b2 */ {Cache_UNKNOWN, 0 }, |
| 425 /* b3 */ {Cache_TLBd, 0 }, |
| 426 /* b4 */ {Cache_UNKNOWN, 0 }, |
| 427 /* b5 */ {Cache_UNKNOWN, 0 }, |
| 428 /* b6 */ {Cache_UNKNOWN, 0 }, |
| 429 /* b7 */ {Cache_UNKNOWN, 0 }, |
| 430 /* b8 */ {Cache_UNKNOWN, 0 }, |
| 431 /* b9 */ {Cache_UNKNOWN, 0 }, |
| 432 /* ba */ {Cache_UNKNOWN, 0 }, |
| 433 /* bb */ {Cache_UNKNOWN, 0 }, |
| 434 /* bc */ {Cache_UNKNOWN, 0 }, |
| 435 /* bd */ {Cache_UNKNOWN, 0 }, |
| 436 /* be */ {Cache_UNKNOWN, 0 }, |
| 437 /* bf */ {Cache_UNKNOWN, 0 }, |
| 438 /* c0 */ {Cache_UNKNOWN, 0 }, |
| 439 /* c1 */ {Cache_UNKNOWN, 0 }, |
| 440 /* c2 */ {Cache_UNKNOWN, 0 }, |
| 441 /* c3 */ {Cache_UNKNOWN, 0 }, |
| 442 /* c4 */ {Cache_UNKNOWN, 0 }, |
| 443 /* c5 */ {Cache_UNKNOWN, 0 }, |
| 444 /* c6 */ {Cache_UNKNOWN, 0 }, |
| 445 /* c7 */ {Cache_UNKNOWN, 0 }, |
| 446 /* c8 */ {Cache_UNKNOWN, 0 }, |
| 447 /* c9 */ {Cache_UNKNOWN, 0 }, |
| 448 /* ca */ {Cache_UNKNOWN, 0 }, |
| 449 /* cb */ {Cache_UNKNOWN, 0 }, |
| 450 /* cc */ {Cache_UNKNOWN, 0 }, |
| 451 /* cd */ {Cache_UNKNOWN, 0 }, |
| 452 /* ce */ {Cache_UNKNOWN, 0 }, |
| 453 /* cf */ {Cache_UNKNOWN, 0 }, |
| 454 /* d0 */ {Cache_UNKNOWN, 0 }, |
| 455 /* d1 */ {Cache_UNKNOWN, 0 }, |
| 456 /* d2 */ {Cache_UNKNOWN, 0 }, |
| 457 /* d3 */ {Cache_UNKNOWN, 0 }, |
| 458 /* d4 */ {Cache_UNKNOWN, 0 }, |
| 459 /* d5 */ {Cache_UNKNOWN, 0 }, |
| 460 /* d6 */ {Cache_UNKNOWN, 0 }, |
| 461 /* d7 */ {Cache_UNKNOWN, 0 }, |
| 462 /* d8 */ {Cache_UNKNOWN, 0 }, |
| 463 /* d9 */ {Cache_UNKNOWN, 0 }, |
| 464 /* da */ {Cache_UNKNOWN, 0 }, |
| 465 /* db */ {Cache_UNKNOWN, 0 }, |
| 466 /* dc */ {Cache_UNKNOWN, 0 }, |
| 467 /* dd */ {Cache_UNKNOWN, 0 }, |
| 468 /* de */ {Cache_UNKNOWN, 0 }, |
| 469 /* df */ {Cache_UNKNOWN, 0 }, |
| 470 /* e0 */ {Cache_UNKNOWN, 0 }, |
| 471 /* e1 */ {Cache_UNKNOWN, 0 }, |
| 472 /* e2 */ {Cache_UNKNOWN, 0 }, |
| 473 /* e3 */ {Cache_UNKNOWN, 0 }, |
| 474 /* e4 */ {Cache_UNKNOWN, 0 }, |
| 475 /* e5 */ {Cache_UNKNOWN, 0 }, |
| 476 /* e6 */ {Cache_UNKNOWN, 0 }, |
| 477 /* e7 */ {Cache_UNKNOWN, 0 }, |
| 478 /* e8 */ {Cache_UNKNOWN, 0 }, |
| 479 /* e9 */ {Cache_UNKNOWN, 0 }, |
| 480 /* ea */ {Cache_UNKNOWN, 0 }, |
| 481 /* eb */ {Cache_UNKNOWN, 0 }, |
| 482 /* ec */ {Cache_UNKNOWN, 0 }, |
| 483 /* ed */ {Cache_UNKNOWN, 0 }, |
| 484 /* ee */ {Cache_UNKNOWN, 0 }, |
| 485 /* ef */ {Cache_UNKNOWN, 0 }, |
| 486 /* f0 */ {Cache_UNKNOWN, 0 }, |
| 487 /* f1 */ {Cache_UNKNOWN, 0 }, |
| 488 /* f2 */ {Cache_UNKNOWN, 0 }, |
| 489 /* f3 */ {Cache_UNKNOWN, 0 }, |
| 490 /* f4 */ {Cache_UNKNOWN, 0 }, |
| 491 /* f5 */ {Cache_UNKNOWN, 0 }, |
| 492 /* f6 */ {Cache_UNKNOWN, 0 }, |
| 493 /* f7 */ {Cache_UNKNOWN, 0 }, |
| 494 /* f8 */ {Cache_UNKNOWN, 0 }, |
| 495 /* f9 */ {Cache_UNKNOWN, 0 }, |
| 496 /* fa */ {Cache_UNKNOWN, 0 }, |
| 497 /* fb */ {Cache_UNKNOWN, 0 }, |
| 498 /* fc */ {Cache_UNKNOWN, 0 }, |
| 499 /* fd */ {Cache_UNKNOWN, 0 }, |
| 500 /* fe */ {Cache_UNKNOWN, 0 }, |
| 501 /* ff */ {Cache_UNKNOWN, 0 } |
| 502 }; |
| 503 |
| 504 |
| 505 /* |
| 506 * use the above table to determine the CacheEntryLineSize. |
| 507 */ |
| 508 static void |
| 509 getIntelCacheEntryLineSize(unsigned long val, int *level, |
| 510 unsigned long *lineSize) |
| 511 { |
| 512 CacheType type; |
| 513 |
| 514 type = CacheMap[val].type; |
| 515 /* only interested in data caches */ |
| 516 /* NOTE val = 0x40 is a special value that means no L2 or L3 cache. |
| 517 * this data check has the side effect of rejecting that entry. If |
| 518 * that wasn't the case, we could have to reject it explicitly */ |
| 519 if (CacheMap[val].lineSize == 0) { |
| 520 return; |
| 521 } |
| 522 /* look at the caches, skip types we aren't interested in. |
| 523 * if we already have a value for a lower level cache, skip the |
| 524 * current entry */ |
| 525 if ((type == Cache_L1)|| (type == Cache_L1d)) { |
| 526 *level = 1; |
| 527 *lineSize = CacheMap[val].lineSize; |
| 528 } else if ((*level >= 2) && ((type == Cache_L2) || (type == Cache_L2d))) { |
| 529 *level = 2; |
| 530 *lineSize = CacheMap[val].lineSize; |
| 531 } else if ((*level >= 3) && ((type == Cache_L3) || (type == Cache_L3d))) { |
| 532 *level = 3; |
| 533 *lineSize = CacheMap[val].lineSize; |
| 534 } |
| 535 return; |
| 536 } |
| 537 |
| 538 |
| 539 static void |
| 540 getIntelRegisterCacheLineSize(unsigned long val, |
| 541 int *level, unsigned long *lineSize) |
| 542 { |
| 543 getIntelCacheEntryLineSize(val >> 24 & 0xff, level, lineSize); |
| 544 getIntelCacheEntryLineSize(val >> 16 & 0xff, level, lineSize); |
| 545 getIntelCacheEntryLineSize(val >> 8 & 0xff, level, lineSize); |
| 546 getIntelCacheEntryLineSize(val & 0xff, level, lineSize); |
| 547 } |
| 548 |
| 549 /* |
| 550 * returns '0' if no recognized cache is found, or if the cache |
| 551 * information is supported by this processor |
| 552 */ |
| 553 static unsigned long |
| 554 getIntelCacheLineSize(int cpuidLevel) |
| 555 { |
| 556 int level = 4; |
| 557 unsigned long lineSize = 0; |
| 558 unsigned long eax, ebx, ecx, edx; |
| 559 int repeat, count; |
| 560 |
| 561 if (cpuidLevel < 2) { |
| 562 return 0; |
| 563 } |
| 564 |
| 565 /* command '2' of the cpuid is intel's cache info call. Each byte of the |
| 566 * 4 registers contain a potential descriptor for the cache. The CacheMap |
| 567 * table maps the cache entry with the processor cache. Register 'al' |
| 568 * contains a count value that cpuid '2' needs to be called in order to |
| 569 * find all the cache descriptors. Only registers with the high bit set |
| 570 * to 'zero' have valid descriptors. This code loops through all the |
| 571 * required calls to cpuid '2' and passes any valid descriptors it finds |
| 572 * to the getIntelRegisterCacheLineSize code, which breaks the registers |
| 573 * down into their component descriptors. In the end the lineSize of the |
| 574 * lowest level cache data cache is returned. */ |
| 575 freebl_cpuid(2, &eax, &ebx, &ecx, &edx); |
| 576 repeat = eax & 0xf; |
| 577 for (count = 0; count < repeat; count++) { |
| 578 if ((eax & 0x80000000) == 0) { |
| 579 getIntelRegisterCacheLineSize(eax & 0xffffff00, &level, &lineSize); |
| 580 } |
| 581 if ((ebx & 0x80000000) == 0) { |
| 582 getIntelRegisterCacheLineSize(ebx, &level, &lineSize); |
| 583 } |
| 584 if ((ecx & 0x80000000) == 0) { |
| 585 getIntelRegisterCacheLineSize(ecx, &level, &lineSize); |
| 586 } |
| 587 if ((edx & 0x80000000) == 0) { |
| 588 getIntelRegisterCacheLineSize(edx, &level, &lineSize); |
| 589 } |
| 590 if (count+1 != repeat) { |
| 591 freebl_cpuid(2, &eax, &ebx, &ecx, &edx); |
| 592 } |
| 593 } |
| 594 return lineSize; |
| 595 } |
| 596 |
| 597 /* |
| 598 * returns '0' if the cache info is not supported by this processor. |
| 599 * This is based on the AMD extended cache commands for cpuid. |
| 600 * (see "AMD Processor Recognition Application Note" Publication 20734). |
| 601 * Some other processors use the identical scheme. |
| 602 * (see "Processor Recognition, Transmeta Corporation"). |
| 603 */ |
| 604 static unsigned long |
| 605 getOtherCacheLineSize(unsigned long cpuidLevel) |
| 606 { |
| 607 unsigned long lineSize = 0; |
| 608 unsigned long eax, ebx, ecx, edx; |
| 609 |
| 610 /* get the Extended CPUID level */ |
| 611 freebl_cpuid(0x80000000, &eax, &ebx, &ecx, &edx); |
| 612 cpuidLevel = eax; |
| 613 |
| 614 if (cpuidLevel >= 0x80000005) { |
| 615 freebl_cpuid(0x80000005, &eax, &ebx, &ecx, &edx); |
| 616 lineSize = ecx & 0xff; /* line Size, L1 Data Cache */ |
| 617 } |
| 618 return lineSize; |
| 619 } |
| 620 |
| 621 static const char * const manMap[] = { |
| 622 #define INTEL 0 |
| 623 "GenuineIntel", |
| 624 #define AMD 1 |
| 625 "AuthenticAMD", |
| 626 #define CYRIX 2 |
| 627 "CyrixInstead", |
| 628 #define CENTAUR 2 |
| 629 "CentaurHauls", |
| 630 #define NEXGEN 3 |
| 631 "NexGenDriven", |
| 632 #define TRANSMETA 4 |
| 633 "GenuineTMx86", |
| 634 #define RISE 5 |
| 635 "RiseRiseRise", |
| 636 #define UMC 6 |
| 637 "UMC UMC UMC ", |
| 638 #define SIS 7 |
| 639 "Sis Sis Sis ", |
| 640 #define NATIONAL 8 |
| 641 "Geode by NSC", |
| 642 }; |
| 643 |
| 644 static const int n_manufacturers = sizeof(manMap)/sizeof(manMap[0]); |
| 645 |
| 646 |
| 647 #define MAN_UNKNOWN 9 |
| 648 |
| 649 #if !defined(AMD_64) |
| 650 #define SSE2_FLAG (1<<26) |
| 651 unsigned long |
| 652 s_mpi_is_sse2() |
| 653 { |
| 654 unsigned long eax, ebx, ecx, edx; |
| 655 int manufacturer = MAN_UNKNOWN; |
| 656 int i; |
| 657 char string[13]; |
| 658 |
| 659 if (is386() || is486()) { |
| 660 return 0; |
| 661 } |
| 662 freebl_cpuid(0, &eax, &ebx, &ecx, &edx); |
| 663 *(int *)string = ebx; |
| 664 *(int *)&string[4] = edx; |
| 665 *(int *)&string[8] = ecx; |
| 666 string[12] = 0; |
| 667 |
| 668 /* has no SSE2 extensions */ |
| 669 if (eax == 0) { |
| 670 return 0; |
| 671 } |
| 672 |
| 673 for (i=0; i < n_manufacturers; i++) { |
| 674 if ( strcmp(manMap[i],string) == 0) { |
| 675 manufacturer = i; |
| 676 break; |
| 677 } |
| 678 } |
| 679 |
| 680 freebl_cpuid(1,&eax,&ebx,&ecx,&edx); |
| 681 return (edx & SSE2_FLAG) == SSE2_FLAG; |
| 682 } |
| 683 #endif |
| 684 |
| 685 unsigned long |
| 686 s_mpi_getProcessorLineSize() |
| 687 { |
| 688 unsigned long eax, ebx, ecx, edx; |
| 689 unsigned long cpuidLevel; |
| 690 unsigned long cacheLineSize = 0; |
| 691 int manufacturer = MAN_UNKNOWN; |
| 692 int i; |
| 693 char string[65]; |
| 694 |
| 695 #if !defined(AMD_64) |
| 696 if (is386()) { |
| 697 return 0; /* 386 had no cache */ |
| 698 } if (is486()) { |
| 699 return 32; /* really? need more info */ |
| 700 } |
| 701 #endif |
| 702 |
| 703 /* Pentium, cpuid command is available */ |
| 704 freebl_cpuid(0, &eax, &ebx, &ecx, &edx); |
| 705 cpuidLevel = eax; |
| 706 *(int *)string = ebx; |
| 707 *(int *)&string[4] = edx; |
| 708 *(int *)&string[8] = ecx; |
| 709 string[12] = 0; |
| 710 |
| 711 manufacturer = MAN_UNKNOWN; |
| 712 for (i=0; i < n_manufacturers; i++) { |
| 713 if ( strcmp(manMap[i],string) == 0) { |
| 714 manufacturer = i; |
| 715 } |
| 716 } |
| 717 |
| 718 if (manufacturer == INTEL) { |
| 719 cacheLineSize = getIntelCacheLineSize(cpuidLevel); |
| 720 } else { |
| 721 cacheLineSize = getOtherCacheLineSize(cpuidLevel); |
| 722 } |
| 723 /* doesn't support cache info based on cpuid. This means |
| 724 * an old pentium class processor, which have cache lines of |
| 725 * 32. If we learn differently, we can use a switch based on |
| 726 * the Manufacturer id */ |
| 727 if (cacheLineSize == 0) { |
| 728 cacheLineSize = 32; |
| 729 } |
| 730 return cacheLineSize; |
| 731 } |
| 732 #define MPI_GET_PROCESSOR_LINE_SIZE_DEFINED 1 |
| 733 #endif |
| 734 |
| 735 #if defined(__ppc64__) |
| 736 /* |
| 737 * Sigh, The PPC has some really nice features to help us determine cache |
| 738 * size, since it had lots of direct control functions to do so. The POWER |
| 739 * processor even has an instruction to do this, but it was dropped in |
| 740 * PowerPC. Unfortunately most of them are not available in user mode. |
| 741 * |
| 742 * The dcbz function would be a great way to determine cache line size except |
| 743 * 1) it only works on write-back memory (it throws an exception otherwise), |
| 744 * and 2) because so many mac programs 'knew' the processor cache size was |
| 745 * 32 bytes, they used this instruction as a fast 'zero 32 bytes'. Now the new |
| 746 * G5 processor has 128 byte cache, but dcbz only clears 32 bytes to keep |
| 747 * these programs happy. dcbzl work if 64 bit instructions are supported. |
| 748 * If you know 64 bit instructions are supported, and that stack is |
| 749 * write-back, you can use this code. |
| 750 */ |
| 751 #include "memory.h" |
| 752 |
| 753 /* clear the cache line that contains 'array' */ |
| 754 static inline void dcbzl(char *array) |
| 755 { |
| 756 register char *a asm("r2") = array; |
| 757 __asm__ __volatile__( "dcbzl %0,r0" : "=r" (a): "0"(a) ); |
| 758 } |
| 759 |
| 760 |
| 761 #define PPC_DO_ALIGN(x,y) ((char *)\ |
| 762 ((((long long) (x))+((y)-1))&~((y)-1))) |
| 763 |
| 764 #define PPC_MAX_LINE_SIZE 256 |
| 765 unsigned long |
| 766 s_mpi_getProcessorLineSize() |
| 767 { |
| 768 char testArray[2*PPC_MAX_LINE_SIZE+1]; |
| 769 char *test; |
| 770 int i; |
| 771 |
| 772 /* align the array on a maximum line size boundary, so we |
| 773 * know we are starting to clear from the first address */ |
| 774 test = PPC_DO_ALIGN(testArray, PPC_MAX_LINE_SIZE); |
| 775 /* set all the values to 1's */ |
| 776 memset(test, 0xff, PPC_MAX_LINE_SIZE); |
| 777 /* clear one cache block starting at 'test' */ |
| 778 dcbzl(test); |
| 779 |
| 780 /* find the size of the cleared area, that's our block size */ |
| 781 for (i=PPC_MAX_LINE_SIZE; i != 0; i = i/2) { |
| 782 if (test[i-1] == 0) { |
| 783 return i; |
| 784 } |
| 785 } |
| 786 return 0; |
| 787 } |
| 788 |
| 789 #define MPI_GET_PROCESSOR_LINE_SIZE_DEFINED 1 |
| 790 #endif |
| 791 |
| 792 |
| 793 /* |
| 794 * put other processor and platform specific cache code here |
| 795 * return the smallest cache line size in bytes on the processor |
| 796 * (usually the L1 cache). If the OS has a call, this would be |
| 797 * a greate place to put it. |
| 798 * |
| 799 * If there is no cache, return 0; |
| 800 * |
| 801 * define MPI_GET_PROCESSOR_LINE_SIZE_DEFINED so the generic functions |
| 802 * below aren't compiled. |
| 803 * |
| 804 */ |
| 805 |
| 806 |
| 807 /* target.mk can define MPI_CACHE_LINE_SIZE if it's common for the family or |
| 808 * OS */ |
| 809 #if defined(MPI_CACHE_LINE_SIZE) && !defined(MPI_GET_PROCESSOR_LINE_SIZE_DEFINED
) |
| 810 |
| 811 unsigned long |
| 812 s_mpi_getProcessorLineSize() |
| 813 { |
| 814 return MPI_CACHE_LINE_SIZE; |
| 815 } |
| 816 #define MPI_GET_PROCESSOR_LINE_SIZE_DEFINED 1 |
| 817 #endif |
| 818 |
| 819 |
| 820 /* If no way to get the processor cache line size has been defined, assume |
| 821 * it's 32 bytes (most common value, does not significantly impact performance) |
| 822 */ |
| 823 #ifndef MPI_GET_PROCESSOR_LINE_SIZE_DEFINED |
| 824 unsigned long |
| 825 s_mpi_getProcessorLineSize() |
| 826 { |
| 827 return 32; |
| 828 } |
| 829 #endif |
| 830 |
| 831 #ifdef TEST_IT |
| 832 #include <stdio.h> |
| 833 |
| 834 main() |
| 835 { |
| 836 printf("line size = %d\n", s_mpi_getProcessorLineSize()); |
| 837 } |
| 838 #endif |
OLD | NEW |