OLD | NEW |
(Empty) | |
| 1 /* |
| 2 * Support for VIA PadLock Advanced Cryptography Engine (ACE) |
| 3 * Written by Michal Ludvig <michal@logix.cz> |
| 4 * http://www.logix.cz/michal |
| 5 * |
| 6 * Big thanks to Andy Polyakov for a help with optimization, |
| 7 * assembler fixes, port to MS Windows and a lot of other |
| 8 * valuable work on this engine! |
| 9 */ |
| 10 |
| 11 /* ==================================================================== |
| 12 * Copyright (c) 1999-2001 The OpenSSL Project. All rights reserved. |
| 13 * |
| 14 * Redistribution and use in source and binary forms, with or without |
| 15 * modification, are permitted provided that the following conditions |
| 16 * are met: |
| 17 * |
| 18 * 1. Redistributions of source code must retain the above copyright |
| 19 * notice, this list of conditions and the following disclaimer. |
| 20 * |
| 21 * 2. Redistributions in binary form must reproduce the above copyright |
| 22 * notice, this list of conditions and the following disclaimer in |
| 23 * the documentation and/or other materials provided with the |
| 24 * distribution. |
| 25 * |
| 26 * 3. All advertising materials mentioning features or use of this |
| 27 * software must display the following acknowledgment: |
| 28 * "This product includes software developed by the OpenSSL Project |
| 29 * for use in the OpenSSL Toolkit. (http://www.OpenSSL.org/)" |
| 30 * |
| 31 * 4. The names "OpenSSL Toolkit" and "OpenSSL Project" must not be used to |
| 32 * endorse or promote products derived from this software without |
| 33 * prior written permission. For written permission, please contact |
| 34 * licensing@OpenSSL.org. |
| 35 * |
| 36 * 5. Products derived from this software may not be called "OpenSSL" |
| 37 * nor may "OpenSSL" appear in their names without prior written |
| 38 * permission of the OpenSSL Project. |
| 39 * |
| 40 * 6. Redistributions of any form whatsoever must retain the following |
| 41 * acknowledgment: |
| 42 * "This product includes software developed by the OpenSSL Project |
| 43 * for use in the OpenSSL Toolkit (http://www.OpenSSL.org/)" |
| 44 * |
| 45 * THIS SOFTWARE IS PROVIDED BY THE OpenSSL PROJECT ``AS IS'' AND ANY |
| 46 * EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE |
| 47 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR |
| 48 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE OpenSSL PROJECT OR |
| 49 * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, |
| 50 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT |
| 51 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; |
| 52 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) |
| 53 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, |
| 54 * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) |
| 55 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED |
| 56 * OF THE POSSIBILITY OF SUCH DAMAGE. |
| 57 * ==================================================================== |
| 58 * |
| 59 * This product includes cryptographic software written by Eric Young |
| 60 * (eay@cryptsoft.com). This product includes software written by Tim |
| 61 * Hudson (tjh@cryptsoft.com). |
| 62 * |
| 63 */ |
| 64 |
| 65 |
| 66 #include <stdio.h> |
| 67 #include <string.h> |
| 68 |
| 69 #include <openssl/opensslconf.h> |
| 70 #include <openssl/crypto.h> |
| 71 #include <openssl/dso.h> |
| 72 #include <openssl/engine.h> |
| 73 #include <openssl/evp.h> |
| 74 #ifndef OPENSSL_NO_AES |
| 75 #include <openssl/aes.h> |
| 76 #endif |
| 77 #include <openssl/rand.h> |
| 78 #include <openssl/err.h> |
| 79 |
| 80 #ifndef OPENSSL_NO_HW |
| 81 #ifndef OPENSSL_NO_HW_PADLOCK |
| 82 |
| 83 /* Attempt to have a single source for both 0.9.7 and 0.9.8 :-) */ |
| 84 #if (OPENSSL_VERSION_NUMBER >= 0x00908000L) |
| 85 # ifndef OPENSSL_NO_DYNAMIC_ENGINE |
| 86 # define DYNAMIC_ENGINE |
| 87 # endif |
| 88 #elif (OPENSSL_VERSION_NUMBER >= 0x00907000L) |
| 89 # ifdef ENGINE_DYNAMIC_SUPPORT |
| 90 # define DYNAMIC_ENGINE |
| 91 # endif |
| 92 #else |
| 93 # error "Only OpenSSL >= 0.9.7 is supported" |
| 94 #endif |
| 95 |
| 96 /* VIA PadLock AES is available *ONLY* on some x86 CPUs. |
| 97 Not only that it doesn't exist elsewhere, but it |
| 98 even can't be compiled on other platforms! |
| 99 |
| 100 In addition, because of the heavy use of inline assembler, |
| 101 compiler choice is limited to GCC and Microsoft C. */ |
| 102 #undef COMPILE_HW_PADLOCK |
| 103 #if !defined(I386_ONLY) && !defined(OPENSSL_NO_INLINE_ASM) |
| 104 # if (defined(__GNUC__) && (defined(__i386__) || defined(__i386))) || \ |
| 105 (defined(_MSC_VER) && defined(_M_IX86)) |
| 106 # define COMPILE_HW_PADLOCK |
| 107 static ENGINE *ENGINE_padlock (void); |
| 108 # endif |
| 109 #endif |
| 110 |
| 111 #ifdef OPENSSL_NO_DYNAMIC_ENGINE |
| 112 |
| 113 void ENGINE_load_padlock (void) |
| 114 { |
| 115 /* On non-x86 CPUs it just returns. */ |
| 116 #ifdef COMPILE_HW_PADLOCK |
| 117 ENGINE *toadd = ENGINE_padlock (); |
| 118 if (!toadd) return; |
| 119 ENGINE_add (toadd); |
| 120 ENGINE_free (toadd); |
| 121 ERR_clear_error (); |
| 122 #endif |
| 123 } |
| 124 |
| 125 #endif |
| 126 |
| 127 #ifdef COMPILE_HW_PADLOCK |
| 128 /* We do these includes here to avoid header problems on platforms that |
| 129 do not have the VIA padlock anyway... */ |
| 130 #include <stdlib.h> |
| 131 #ifdef _WIN32 |
| 132 # include <malloc.h> |
| 133 # ifndef alloca |
| 134 # define alloca _alloca |
| 135 # endif |
| 136 #elif defined(__GNUC__) |
| 137 # ifndef alloca |
| 138 # define alloca(s) __builtin_alloca(s) |
| 139 # endif |
| 140 #endif |
| 141 |
| 142 /* Function for ENGINE detection and control */ |
| 143 static int padlock_available(void); |
| 144 static int padlock_init(ENGINE *e); |
| 145 |
| 146 /* RNG Stuff */ |
| 147 static RAND_METHOD padlock_rand; |
| 148 |
| 149 /* Cipher Stuff */ |
| 150 #ifndef OPENSSL_NO_AES |
| 151 static int padlock_ciphers(ENGINE *e, const EVP_CIPHER **cipher, const int **nid
s, int nid); |
| 152 #endif |
| 153 |
| 154 /* Engine names */ |
| 155 static const char *padlock_id = "padlock"; |
| 156 static char padlock_name[100]; |
| 157 |
| 158 /* Available features */ |
| 159 static int padlock_use_ace = 0; /* Advanced Cryptography Engine */ |
| 160 static int padlock_use_rng = 0; /* Random Number Generator */ |
| 161 #ifndef OPENSSL_NO_AES |
| 162 static int padlock_aes_align_required = 1; |
| 163 #endif |
| 164 |
| 165 /* ===== Engine "management" functions ===== */ |
| 166 |
| 167 /* Prepare the ENGINE structure for registration */ |
| 168 static int |
| 169 padlock_bind_helper(ENGINE *e) |
| 170 { |
| 171 /* Check available features */ |
| 172 padlock_available(); |
| 173 |
| 174 #if 1 /* disable RNG for now, see commentary in vicinity of RNG code */ |
| 175 padlock_use_rng=0; |
| 176 #endif |
| 177 |
| 178 /* Generate a nice engine name with available features */ |
| 179 BIO_snprintf(padlock_name, sizeof(padlock_name), |
| 180 "VIA PadLock (%s, %s)", |
| 181 padlock_use_rng ? "RNG" : "no-RNG", |
| 182 padlock_use_ace ? "ACE" : "no-ACE"); |
| 183 |
| 184 /* Register everything or return with an error */ |
| 185 if (!ENGINE_set_id(e, padlock_id) || |
| 186 !ENGINE_set_name(e, padlock_name) || |
| 187 |
| 188 !ENGINE_set_init_function(e, padlock_init) || |
| 189 #ifndef OPENSSL_NO_AES |
| 190 (padlock_use_ace && !ENGINE_set_ciphers (e, padlock_ciphers)) || |
| 191 #endif |
| 192 (padlock_use_rng && !ENGINE_set_RAND (e, &padlock_rand))) { |
| 193 return 0; |
| 194 } |
| 195 |
| 196 /* Everything looks good */ |
| 197 return 1; |
| 198 } |
| 199 |
| 200 /* Constructor */ |
| 201 static ENGINE * |
| 202 ENGINE_padlock(void) |
| 203 { |
| 204 ENGINE *eng = ENGINE_new(); |
| 205 |
| 206 if (!eng) { |
| 207 return NULL; |
| 208 } |
| 209 |
| 210 if (!padlock_bind_helper(eng)) { |
| 211 ENGINE_free(eng); |
| 212 return NULL; |
| 213 } |
| 214 |
| 215 return eng; |
| 216 } |
| 217 |
| 218 /* Check availability of the engine */ |
| 219 static int |
| 220 padlock_init(ENGINE *e) |
| 221 { |
| 222 return (padlock_use_rng || padlock_use_ace); |
| 223 } |
| 224 |
| 225 /* This stuff is needed if this ENGINE is being compiled into a self-contained |
| 226 * shared-library. |
| 227 */ |
| 228 #ifdef DYNAMIC_ENGINE |
| 229 static int |
| 230 padlock_bind_fn(ENGINE *e, const char *id) |
| 231 { |
| 232 if (id && (strcmp(id, padlock_id) != 0)) { |
| 233 return 0; |
| 234 } |
| 235 |
| 236 if (!padlock_bind_helper(e)) { |
| 237 return 0; |
| 238 } |
| 239 |
| 240 return 1; |
| 241 } |
| 242 |
| 243 IMPLEMENT_DYNAMIC_CHECK_FN() |
| 244 IMPLEMENT_DYNAMIC_BIND_FN (padlock_bind_fn) |
| 245 #endif /* DYNAMIC_ENGINE */ |
| 246 |
| 247 /* ===== Here comes the "real" engine ===== */ |
| 248 |
| 249 #ifndef OPENSSL_NO_AES |
| 250 /* Some AES-related constants */ |
| 251 #define AES_BLOCK_SIZE 16 |
| 252 #define AES_KEY_SIZE_128 16 |
| 253 #define AES_KEY_SIZE_192 24 |
| 254 #define AES_KEY_SIZE_256 32 |
| 255 |
| 256 /* Here we store the status information relevant to the |
| 257 current context. */ |
| 258 /* BIG FAT WARNING: |
| 259 * Inline assembler in PADLOCK_XCRYPT_ASM() |
| 260 * depends on the order of items in this structure. |
| 261 * Don't blindly modify, reorder, etc! |
| 262 */ |
| 263 struct padlock_cipher_data |
| 264 { |
| 265 unsigned char iv[AES_BLOCK_SIZE]; /* Initialization vector */ |
| 266 union { unsigned int pad[4]; |
| 267 struct { |
| 268 int rounds:4; |
| 269 int dgst:1; /* n/a in C3 */ |
| 270 int align:1; /* n/a in C3 */ |
| 271 int ciphr:1; /* n/a in C3 */ |
| 272 unsigned int keygen:1; |
| 273 int interm:1; |
| 274 unsigned int encdec:1; |
| 275 int ksize:2; |
| 276 } b; |
| 277 } cword; /* Control word */ |
| 278 AES_KEY ks; /* Encryption key */ |
| 279 }; |
| 280 |
| 281 /* |
| 282 * Essentially this variable belongs in thread local storage. |
| 283 * Having this variable global on the other hand can only cause |
| 284 * few bogus key reloads [if any at all on single-CPU system], |
| 285 * so we accept the penatly... |
| 286 */ |
| 287 static volatile struct padlock_cipher_data *padlock_saved_context; |
| 288 #endif |
| 289 |
| 290 /* |
| 291 * ======================================================= |
| 292 * Inline assembler section(s). |
| 293 * ======================================================= |
| 294 * Order of arguments is chosen to facilitate Windows port |
| 295 * using __fastcall calling convention. If you wish to add |
| 296 * more routines, keep in mind that first __fastcall |
| 297 * argument is passed in %ecx and second - in %edx. |
| 298 * ======================================================= |
| 299 */ |
| 300 #if defined(__GNUC__) && __GNUC__>=2 |
| 301 /* |
| 302 * As for excessive "push %ebx"/"pop %ebx" found all over. |
| 303 * When generating position-independent code GCC won't let |
| 304 * us use "b" in assembler templates nor even respect "ebx" |
| 305 * in "clobber description." Therefore the trouble... |
| 306 */ |
| 307 |
| 308 /* Helper function - check if a CPUID instruction |
| 309 is available on this CPU */ |
| 310 static int |
| 311 padlock_insn_cpuid_available(void) |
| 312 { |
| 313 int result = -1; |
| 314 |
| 315 /* We're checking if the bit #21 of EFLAGS |
| 316 can be toggled. If yes = CPUID is available. */ |
| 317 asm volatile ( |
| 318 "pushf\n" |
| 319 "popl %%eax\n" |
| 320 "xorl $0x200000, %%eax\n" |
| 321 "movl %%eax, %%ecx\n" |
| 322 "andl $0x200000, %%ecx\n" |
| 323 "pushl %%eax\n" |
| 324 "popf\n" |
| 325 "pushf\n" |
| 326 "popl %%eax\n" |
| 327 "andl $0x200000, %%eax\n" |
| 328 "xorl %%eax, %%ecx\n" |
| 329 "movl %%ecx, %0\n" |
| 330 : "=r" (result) : : "eax", "ecx"); |
| 331 |
| 332 return (result == 0); |
| 333 } |
| 334 |
| 335 /* Load supported features of the CPU to see if |
| 336 the PadLock is available. */ |
| 337 static int |
| 338 padlock_available(void) |
| 339 { |
| 340 char vendor_string[16]; |
| 341 unsigned int eax, edx; |
| 342 |
| 343 /* First check if the CPUID instruction is available at all... */ |
| 344 if (! padlock_insn_cpuid_available()) |
| 345 return 0; |
| 346 |
| 347 /* Are we running on the Centaur (VIA) CPU? */ |
| 348 eax = 0x00000000; |
| 349 vendor_string[12] = 0; |
| 350 asm volatile ( |
| 351 "pushl %%ebx\n" |
| 352 "cpuid\n" |
| 353 "movl %%ebx,(%%edi)\n" |
| 354 "movl %%edx,4(%%edi)\n" |
| 355 "movl %%ecx,8(%%edi)\n" |
| 356 "popl %%ebx" |
| 357 : "+a"(eax) : "D"(vendor_string) : "ecx", "edx"); |
| 358 if (strcmp(vendor_string, "CentaurHauls") != 0) |
| 359 return 0; |
| 360 |
| 361 /* Check for Centaur Extended Feature Flags presence */ |
| 362 eax = 0xC0000000; |
| 363 asm volatile ("pushl %%ebx; cpuid; popl %%ebx" |
| 364 : "+a"(eax) : : "ecx", "edx"); |
| 365 if (eax < 0xC0000001) |
| 366 return 0; |
| 367 |
| 368 /* Read the Centaur Extended Feature Flags */ |
| 369 eax = 0xC0000001; |
| 370 asm volatile ("pushl %%ebx; cpuid; popl %%ebx" |
| 371 : "+a"(eax), "=d"(edx) : : "ecx"); |
| 372 |
| 373 /* Fill up some flags */ |
| 374 padlock_use_ace = ((edx & (0x3<<6)) == (0x3<<6)); |
| 375 padlock_use_rng = ((edx & (0x3<<2)) == (0x3<<2)); |
| 376 |
| 377 return padlock_use_ace + padlock_use_rng; |
| 378 } |
| 379 |
| 380 #ifndef OPENSSL_NO_AES |
| 381 /* Our own htonl()/ntohl() */ |
| 382 static inline void |
| 383 padlock_bswapl(AES_KEY *ks) |
| 384 { |
| 385 size_t i = sizeof(ks->rd_key)/sizeof(ks->rd_key[0]); |
| 386 unsigned int *key = ks->rd_key; |
| 387 |
| 388 while (i--) { |
| 389 asm volatile ("bswapl %0" : "+r"(*key)); |
| 390 key++; |
| 391 } |
| 392 } |
| 393 #endif |
| 394 |
| 395 /* Force key reload from memory to the CPU microcode. |
| 396 Loading EFLAGS from the stack clears EFLAGS[30] |
| 397 which does the trick. */ |
| 398 static inline void |
| 399 padlock_reload_key(void) |
| 400 { |
| 401 asm volatile ("pushfl; popfl"); |
| 402 } |
| 403 |
| 404 #ifndef OPENSSL_NO_AES |
| 405 /* |
| 406 * This is heuristic key context tracing. At first one |
| 407 * believes that one should use atomic swap instructions, |
| 408 * but it's not actually necessary. Point is that if |
| 409 * padlock_saved_context was changed by another thread |
| 410 * after we've read it and before we compare it with cdata, |
| 411 * our key *shall* be reloaded upon thread context switch |
| 412 * and we are therefore set in either case... |
| 413 */ |
| 414 static inline void |
| 415 padlock_verify_context(struct padlock_cipher_data *cdata) |
| 416 { |
| 417 asm volatile ( |
| 418 "pushfl\n" |
| 419 " btl $30,(%%esp)\n" |
| 420 " jnc 1f\n" |
| 421 " cmpl %2,%1\n" |
| 422 " je 1f\n" |
| 423 " popfl\n" |
| 424 " subl $4,%%esp\n" |
| 425 "1: addl $4,%%esp\n" |
| 426 " movl %2,%0" |
| 427 :"+m"(padlock_saved_context) |
| 428 : "r"(padlock_saved_context), "r"(cdata) : "cc"); |
| 429 } |
| 430 |
| 431 /* Template for padlock_xcrypt_* modes */ |
| 432 /* BIG FAT WARNING: |
| 433 * The offsets used with 'leal' instructions |
| 434 * describe items of the 'padlock_cipher_data' |
| 435 * structure. |
| 436 */ |
| 437 #define PADLOCK_XCRYPT_ASM(name,rep_xcrypt) \ |
| 438 static inline void *name(size_t cnt, \ |
| 439 struct padlock_cipher_data *cdata, \ |
| 440 void *out, const void *inp) \ |
| 441 { void *iv; \ |
| 442 asm volatile ( "pushl %%ebx\n" \ |
| 443 " leal 16(%0),%%edx\n" \ |
| 444 " leal 32(%0),%%ebx\n" \ |
| 445 rep_xcrypt "\n" \ |
| 446 " popl %%ebx" \ |
| 447 : "=a"(iv), "=c"(cnt), "=D"(out), "=S"(inp) \ |
| 448 : "0"(cdata), "1"(cnt), "2"(out), "3"(inp) \ |
| 449 : "edx", "cc", "memory"); \ |
| 450 return iv; \ |
| 451 } |
| 452 |
| 453 /* Generate all functions with appropriate opcodes */ |
| 454 PADLOCK_XCRYPT_ASM(padlock_xcrypt_ecb, ".byte 0xf3,0x0f,0xa7,0xc8") /* rep x
cryptecb */ |
| 455 PADLOCK_XCRYPT_ASM(padlock_xcrypt_cbc, ".byte 0xf3,0x0f,0xa7,0xd0") /* rep x
cryptcbc */ |
| 456 PADLOCK_XCRYPT_ASM(padlock_xcrypt_cfb, ".byte 0xf3,0x0f,0xa7,0xe0") /* rep x
cryptcfb */ |
| 457 PADLOCK_XCRYPT_ASM(padlock_xcrypt_ofb, ".byte 0xf3,0x0f,0xa7,0xe8") /* rep x
cryptofb */ |
| 458 #endif |
| 459 |
| 460 /* The RNG call itself */ |
| 461 static inline unsigned int |
| 462 padlock_xstore(void *addr, unsigned int edx_in) |
| 463 { |
| 464 unsigned int eax_out; |
| 465 |
| 466 asm volatile (".byte 0x0f,0xa7,0xc0" /* xstore */ |
| 467 : "=a"(eax_out),"=m"(*(unsigned *)addr) |
| 468 : "D"(addr), "d" (edx_in) |
| 469 ); |
| 470 |
| 471 return eax_out; |
| 472 } |
| 473 |
| 474 /* Why not inline 'rep movsd'? I failed to find information on what |
| 475 * value in Direction Flag one can expect and consequently have to |
| 476 * apply "better-safe-than-sorry" approach and assume "undefined." |
| 477 * I could explicitly clear it and restore the original value upon |
| 478 * return from padlock_aes_cipher, but it's presumably too much |
| 479 * trouble for too little gain... |
| 480 * |
| 481 * In case you wonder 'rep xcrypt*' instructions above are *not* |
| 482 * affected by the Direction Flag and pointers advance toward |
| 483 * larger addresses unconditionally. |
| 484 */ |
| 485 static inline unsigned char * |
| 486 padlock_memcpy(void *dst,const void *src,size_t n) |
| 487 { |
| 488 long *d=dst; |
| 489 const long *s=src; |
| 490 |
| 491 n /= sizeof(*d); |
| 492 do { *d++ = *s++; } while (--n); |
| 493 |
| 494 return dst; |
| 495 } |
| 496 |
| 497 #elif defined(_MSC_VER) |
| 498 /* |
| 499 * Unlike GCC these are real functions. In order to minimize impact |
| 500 * on performance we adhere to __fastcall calling convention in |
| 501 * order to get two first arguments passed through %ecx and %edx. |
| 502 * Which kind of suits very well, as instructions in question use |
| 503 * both %ecx and %edx as input:-) |
| 504 */ |
| 505 #define REP_XCRYPT(code) \ |
| 506 _asm _emit 0xf3 \ |
| 507 _asm _emit 0x0f _asm _emit 0xa7 \ |
| 508 _asm _emit code |
| 509 |
| 510 /* BIG FAT WARNING: |
| 511 * The offsets used with 'lea' instructions |
| 512 * describe items of the 'padlock_cipher_data' |
| 513 * structure. |
| 514 */ |
| 515 #define PADLOCK_XCRYPT_ASM(name,code) \ |
| 516 static void * __fastcall \ |
| 517 name (size_t cnt, void *cdata, \ |
| 518 void *outp, const void *inp) \ |
| 519 { _asm mov eax,edx \ |
| 520 _asm lea edx,[eax+16] \ |
| 521 _asm lea ebx,[eax+32] \ |
| 522 _asm mov edi,outp \ |
| 523 _asm mov esi,inp \ |
| 524 REP_XCRYPT(code) \ |
| 525 } |
| 526 |
| 527 PADLOCK_XCRYPT_ASM(padlock_xcrypt_ecb,0xc8) |
| 528 PADLOCK_XCRYPT_ASM(padlock_xcrypt_cbc,0xd0) |
| 529 PADLOCK_XCRYPT_ASM(padlock_xcrypt_cfb,0xe0) |
| 530 PADLOCK_XCRYPT_ASM(padlock_xcrypt_ofb,0xe8) |
| 531 |
| 532 static int __fastcall |
| 533 padlock_xstore(void *outp,unsigned int code) |
| 534 { _asm mov edi,ecx |
| 535 _asm _emit 0x0f _asm _emit 0xa7 _asm _emit 0xc0 |
| 536 } |
| 537 |
| 538 static void __fastcall |
| 539 padlock_reload_key(void) |
| 540 { _asm pushfd _asm popfd } |
| 541 |
| 542 static void __fastcall |
| 543 padlock_verify_context(void *cdata) |
| 544 { _asm { |
| 545 pushfd |
| 546 bt DWORD PTR[esp],30 |
| 547 jnc skip |
| 548 cmp ecx,padlock_saved_context |
| 549 je skip |
| 550 popfd |
| 551 sub esp,4 |
| 552 skip: add esp,4 |
| 553 mov padlock_saved_context,ecx |
| 554 } |
| 555 } |
| 556 |
| 557 static int |
| 558 padlock_available(void) |
| 559 { _asm { |
| 560 pushfd |
| 561 pop eax |
| 562 mov ecx,eax |
| 563 xor eax,1<<21 |
| 564 push eax |
| 565 popfd |
| 566 pushfd |
| 567 pop eax |
| 568 xor eax,ecx |
| 569 bt eax,21 |
| 570 jnc noluck |
| 571 mov eax,0 |
| 572 cpuid |
| 573 xor eax,eax |
| 574 cmp ebx,'tneC' |
| 575 jne noluck |
| 576 cmp edx,'Hrua' |
| 577 jne noluck |
| 578 cmp ecx,'slua' |
| 579 jne noluck |
| 580 mov eax,0xC0000000 |
| 581 cpuid |
| 582 mov edx,eax |
| 583 xor eax,eax |
| 584 cmp edx,0xC0000001 |
| 585 jb noluck |
| 586 mov eax,0xC0000001 |
| 587 cpuid |
| 588 xor eax,eax |
| 589 bt edx,6 |
| 590 jnc skip_a |
| 591 bt edx,7 |
| 592 jnc skip_a |
| 593 mov padlock_use_ace,1 |
| 594 inc eax |
| 595 skip_a: bt edx,2 |
| 596 jnc skip_r |
| 597 bt edx,3 |
| 598 jnc skip_r |
| 599 mov padlock_use_rng,1 |
| 600 inc eax |
| 601 skip_r: |
| 602 noluck: |
| 603 } |
| 604 } |
| 605 |
| 606 static void __fastcall |
| 607 padlock_bswapl(void *key) |
| 608 { _asm { |
| 609 pushfd |
| 610 cld |
| 611 mov esi,ecx |
| 612 mov edi,ecx |
| 613 mov ecx,60 |
| 614 up: lodsd |
| 615 bswap eax |
| 616 stosd |
| 617 loop up |
| 618 popfd |
| 619 } |
| 620 } |
| 621 |
| 622 /* MS actually specifies status of Direction Flag and compiler even |
| 623 * manages to compile following as 'rep movsd' all by itself... |
| 624 */ |
| 625 #define padlock_memcpy(o,i,n) ((unsigned char *)memcpy((o),(i),(n)&~3U)) |
| 626 #endif |
| 627 |
| 628 /* ===== AES encryption/decryption ===== */ |
| 629 #ifndef OPENSSL_NO_AES |
| 630 |
| 631 #if defined(NID_aes_128_cfb128) && ! defined (NID_aes_128_cfb) |
| 632 #define NID_aes_128_cfb NID_aes_128_cfb128 |
| 633 #endif |
| 634 |
| 635 #if defined(NID_aes_128_ofb128) && ! defined (NID_aes_128_ofb) |
| 636 #define NID_aes_128_ofb NID_aes_128_ofb128 |
| 637 #endif |
| 638 |
| 639 #if defined(NID_aes_192_cfb128) && ! defined (NID_aes_192_cfb) |
| 640 #define NID_aes_192_cfb NID_aes_192_cfb128 |
| 641 #endif |
| 642 |
| 643 #if defined(NID_aes_192_ofb128) && ! defined (NID_aes_192_ofb) |
| 644 #define NID_aes_192_ofb NID_aes_192_ofb128 |
| 645 #endif |
| 646 |
| 647 #if defined(NID_aes_256_cfb128) && ! defined (NID_aes_256_cfb) |
| 648 #define NID_aes_256_cfb NID_aes_256_cfb128 |
| 649 #endif |
| 650 |
| 651 #if defined(NID_aes_256_ofb128) && ! defined (NID_aes_256_ofb) |
| 652 #define NID_aes_256_ofb NID_aes_256_ofb128 |
| 653 #endif |
| 654 |
| 655 /* List of supported ciphers. */ |
| 656 static int padlock_cipher_nids[] = { |
| 657 NID_aes_128_ecb, |
| 658 NID_aes_128_cbc, |
| 659 NID_aes_128_cfb, |
| 660 NID_aes_128_ofb, |
| 661 |
| 662 NID_aes_192_ecb, |
| 663 NID_aes_192_cbc, |
| 664 NID_aes_192_cfb, |
| 665 NID_aes_192_ofb, |
| 666 |
| 667 NID_aes_256_ecb, |
| 668 NID_aes_256_cbc, |
| 669 NID_aes_256_cfb, |
| 670 NID_aes_256_ofb, |
| 671 }; |
| 672 static int padlock_cipher_nids_num = (sizeof(padlock_cipher_nids)/ |
| 673 sizeof(padlock_cipher_nids[0])); |
| 674 |
| 675 /* Function prototypes ... */ |
| 676 static int padlock_aes_init_key(EVP_CIPHER_CTX *ctx, const unsigned char *key, |
| 677 const unsigned char *iv, int enc); |
| 678 static int padlock_aes_cipher(EVP_CIPHER_CTX *ctx, unsigned char *out, |
| 679 const unsigned char *in, size_t nbytes); |
| 680 |
| 681 #define NEAREST_ALIGNED(ptr) ( (unsigned char *)(ptr) + \ |
| 682 ( (0x10 - ((size_t)(ptr) & 0x0F)) & 0x0F ) ) |
| 683 #define ALIGNED_CIPHER_DATA(ctx) ((struct padlock_cipher_data *)\ |
| 684 NEAREST_ALIGNED(ctx->cipher_data)) |
| 685 |
| 686 #define EVP_CIPHER_block_size_ECB AES_BLOCK_SIZE |
| 687 #define EVP_CIPHER_block_size_CBC AES_BLOCK_SIZE |
| 688 #define EVP_CIPHER_block_size_OFB 1 |
| 689 #define EVP_CIPHER_block_size_CFB 1 |
| 690 |
| 691 /* Declaring so many ciphers by hand would be a pain. |
| 692 Instead introduce a bit of preprocessor magic :-) */ |
| 693 #define DECLARE_AES_EVP(ksize,lmode,umode) \ |
| 694 static const EVP_CIPHER padlock_aes_##ksize##_##lmode = { \ |
| 695 NID_aes_##ksize##_##lmode, \ |
| 696 EVP_CIPHER_block_size_##umode, \ |
| 697 AES_KEY_SIZE_##ksize, \ |
| 698 AES_BLOCK_SIZE, \ |
| 699 0 | EVP_CIPH_##umode##_MODE, \ |
| 700 padlock_aes_init_key, \ |
| 701 padlock_aes_cipher, \ |
| 702 NULL, \ |
| 703 sizeof(struct padlock_cipher_data) + 16, \ |
| 704 EVP_CIPHER_set_asn1_iv, \ |
| 705 EVP_CIPHER_get_asn1_iv, \ |
| 706 NULL, \ |
| 707 NULL \ |
| 708 } |
| 709 |
| 710 DECLARE_AES_EVP(128,ecb,ECB); |
| 711 DECLARE_AES_EVP(128,cbc,CBC); |
| 712 DECLARE_AES_EVP(128,cfb,CFB); |
| 713 DECLARE_AES_EVP(128,ofb,OFB); |
| 714 |
| 715 DECLARE_AES_EVP(192,ecb,ECB); |
| 716 DECLARE_AES_EVP(192,cbc,CBC); |
| 717 DECLARE_AES_EVP(192,cfb,CFB); |
| 718 DECLARE_AES_EVP(192,ofb,OFB); |
| 719 |
| 720 DECLARE_AES_EVP(256,ecb,ECB); |
| 721 DECLARE_AES_EVP(256,cbc,CBC); |
| 722 DECLARE_AES_EVP(256,cfb,CFB); |
| 723 DECLARE_AES_EVP(256,ofb,OFB); |
| 724 |
| 725 static int |
| 726 padlock_ciphers (ENGINE *e, const EVP_CIPHER **cipher, const int **nids, int nid
) |
| 727 { |
| 728 /* No specific cipher => return a list of supported nids ... */ |
| 729 if (!cipher) { |
| 730 *nids = padlock_cipher_nids; |
| 731 return padlock_cipher_nids_num; |
| 732 } |
| 733 |
| 734 /* ... or the requested "cipher" otherwise */ |
| 735 switch (nid) { |
| 736 case NID_aes_128_ecb: |
| 737 *cipher = &padlock_aes_128_ecb; |
| 738 break; |
| 739 case NID_aes_128_cbc: |
| 740 *cipher = &padlock_aes_128_cbc; |
| 741 break; |
| 742 case NID_aes_128_cfb: |
| 743 *cipher = &padlock_aes_128_cfb; |
| 744 break; |
| 745 case NID_aes_128_ofb: |
| 746 *cipher = &padlock_aes_128_ofb; |
| 747 break; |
| 748 |
| 749 case NID_aes_192_ecb: |
| 750 *cipher = &padlock_aes_192_ecb; |
| 751 break; |
| 752 case NID_aes_192_cbc: |
| 753 *cipher = &padlock_aes_192_cbc; |
| 754 break; |
| 755 case NID_aes_192_cfb: |
| 756 *cipher = &padlock_aes_192_cfb; |
| 757 break; |
| 758 case NID_aes_192_ofb: |
| 759 *cipher = &padlock_aes_192_ofb; |
| 760 break; |
| 761 |
| 762 case NID_aes_256_ecb: |
| 763 *cipher = &padlock_aes_256_ecb; |
| 764 break; |
| 765 case NID_aes_256_cbc: |
| 766 *cipher = &padlock_aes_256_cbc; |
| 767 break; |
| 768 case NID_aes_256_cfb: |
| 769 *cipher = &padlock_aes_256_cfb; |
| 770 break; |
| 771 case NID_aes_256_ofb: |
| 772 *cipher = &padlock_aes_256_ofb; |
| 773 break; |
| 774 |
| 775 default: |
| 776 /* Sorry, we don't support this NID */ |
| 777 *cipher = NULL; |
| 778 return 0; |
| 779 } |
| 780 |
| 781 return 1; |
| 782 } |
| 783 |
| 784 /* Prepare the encryption key for PadLock usage */ |
| 785 static int |
| 786 padlock_aes_init_key (EVP_CIPHER_CTX *ctx, const unsigned char *key, |
| 787 const unsigned char *iv, int enc) |
| 788 { |
| 789 struct padlock_cipher_data *cdata; |
| 790 int key_len = EVP_CIPHER_CTX_key_length(ctx) * 8; |
| 791 |
| 792 if (key==NULL) return 0; /* ERROR */ |
| 793 |
| 794 cdata = ALIGNED_CIPHER_DATA(ctx); |
| 795 memset(cdata, 0, sizeof(struct padlock_cipher_data)); |
| 796 |
| 797 /* Prepare Control word. */ |
| 798 if (EVP_CIPHER_CTX_mode(ctx) == EVP_CIPH_OFB_MODE) |
| 799 cdata->cword.b.encdec = 0; |
| 800 else |
| 801 cdata->cword.b.encdec = (ctx->encrypt == 0); |
| 802 cdata->cword.b.rounds = 10 + (key_len - 128) / 32; |
| 803 cdata->cword.b.ksize = (key_len - 128) / 64; |
| 804 |
| 805 switch(key_len) { |
| 806 case 128: |
| 807 /* PadLock can generate an extended key for |
| 808 AES128 in hardware */ |
| 809 memcpy(cdata->ks.rd_key, key, AES_KEY_SIZE_128); |
| 810 cdata->cword.b.keygen = 0; |
| 811 break; |
| 812 |
| 813 case 192: |
| 814 case 256: |
| 815 /* Generate an extended AES key in software. |
| 816 Needed for AES192/AES256 */ |
| 817 /* Well, the above applies to Stepping 8 CPUs |
| 818 and is listed as hardware errata. They most |
| 819 likely will fix it at some point and then |
| 820 a check for stepping would be due here. */ |
| 821 if (EVP_CIPHER_CTX_mode(ctx) == EVP_CIPH_CFB_MODE || |
| 822 EVP_CIPHER_CTX_mode(ctx) == EVP_CIPH_OFB_MODE || |
| 823 enc) |
| 824 AES_set_encrypt_key(key, key_len, &cdata->ks); |
| 825 else |
| 826 AES_set_decrypt_key(key, key_len, &cdata->ks); |
| 827 #ifndef AES_ASM |
| 828 /* OpenSSL C functions use byte-swapped extended key. */ |
| 829 padlock_bswapl(&cdata->ks); |
| 830 #endif |
| 831 cdata->cword.b.keygen = 1; |
| 832 break; |
| 833 |
| 834 default: |
| 835 /* ERROR */ |
| 836 return 0; |
| 837 } |
| 838 |
| 839 /* |
| 840 * This is done to cover for cases when user reuses the |
| 841 * context for new key. The catch is that if we don't do |
| 842 * this, padlock_eas_cipher might proceed with old key... |
| 843 */ |
| 844 padlock_reload_key (); |
| 845 |
| 846 return 1; |
| 847 } |
| 848 |
| 849 /* |
| 850 * Simplified version of padlock_aes_cipher() used when |
| 851 * 1) both input and output buffers are at aligned addresses. |
| 852 * or when |
| 853 * 2) running on a newer CPU that doesn't require aligned buffers. |
| 854 */ |
| 855 static int |
| 856 padlock_aes_cipher_omnivorous(EVP_CIPHER_CTX *ctx, unsigned char *out_arg, |
| 857 const unsigned char *in_arg, size_t nbytes) |
| 858 { |
| 859 struct padlock_cipher_data *cdata; |
| 860 void *iv; |
| 861 |
| 862 cdata = ALIGNED_CIPHER_DATA(ctx); |
| 863 padlock_verify_context(cdata); |
| 864 |
| 865 switch (EVP_CIPHER_CTX_mode(ctx)) { |
| 866 case EVP_CIPH_ECB_MODE: |
| 867 padlock_xcrypt_ecb(nbytes/AES_BLOCK_SIZE, cdata, out_arg, in_arg
); |
| 868 break; |
| 869 |
| 870 case EVP_CIPH_CBC_MODE: |
| 871 memcpy(cdata->iv, ctx->iv, AES_BLOCK_SIZE); |
| 872 iv = padlock_xcrypt_cbc(nbytes/AES_BLOCK_SIZE, cdata, out_arg, i
n_arg); |
| 873 memcpy(ctx->iv, iv, AES_BLOCK_SIZE); |
| 874 break; |
| 875 |
| 876 case EVP_CIPH_CFB_MODE: |
| 877 memcpy(cdata->iv, ctx->iv, AES_BLOCK_SIZE); |
| 878 iv = padlock_xcrypt_cfb(nbytes/AES_BLOCK_SIZE, cdata, out_arg, i
n_arg); |
| 879 memcpy(ctx->iv, iv, AES_BLOCK_SIZE); |
| 880 break; |
| 881 |
| 882 case EVP_CIPH_OFB_MODE: |
| 883 memcpy(cdata->iv, ctx->iv, AES_BLOCK_SIZE); |
| 884 padlock_xcrypt_ofb(nbytes/AES_BLOCK_SIZE, cdata, out_arg, in_arg
); |
| 885 memcpy(ctx->iv, cdata->iv, AES_BLOCK_SIZE); |
| 886 break; |
| 887 |
| 888 default: |
| 889 return 0; |
| 890 } |
| 891 |
| 892 memset(cdata->iv, 0, AES_BLOCK_SIZE); |
| 893 |
| 894 return 1; |
| 895 } |
| 896 |
| 897 #ifndef PADLOCK_CHUNK |
| 898 # define PADLOCK_CHUNK 512 /* Must be a power of 2 larger than 16 */ |
| 899 #endif |
| 900 #if PADLOCK_CHUNK<16 || PADLOCK_CHUNK&(PADLOCK_CHUNK-1) |
| 901 # error "insane PADLOCK_CHUNK..." |
| 902 #endif |
| 903 |
| 904 /* Re-align the arguments to 16-Bytes boundaries and run the |
| 905 encryption function itself. This function is not AES-specific. */ |
| 906 static int |
| 907 padlock_aes_cipher(EVP_CIPHER_CTX *ctx, unsigned char *out_arg, |
| 908 const unsigned char *in_arg, size_t nbytes) |
| 909 { |
| 910 struct padlock_cipher_data *cdata; |
| 911 const void *inp; |
| 912 unsigned char *out; |
| 913 void *iv; |
| 914 int inp_misaligned, out_misaligned, realign_in_loop; |
| 915 size_t chunk, allocated=0; |
| 916 |
| 917 /* ctx->num is maintained in byte-oriented modes, |
| 918 such as CFB and OFB... */ |
| 919 if ((chunk = ctx->num)) { /* borrow chunk variable */ |
| 920 unsigned char *ivp=ctx->iv; |
| 921 |
| 922 switch (EVP_CIPHER_CTX_mode(ctx)) { |
| 923 case EVP_CIPH_CFB_MODE: |
| 924 if (chunk >= AES_BLOCK_SIZE) |
| 925 return 0; /* bogus value */ |
| 926 |
| 927 if (ctx->encrypt) |
| 928 while (chunk<AES_BLOCK_SIZE && nbytes!=0) { |
| 929 ivp[chunk] = *(out_arg++) = *(in_arg++)
^ ivp[chunk]; |
| 930 chunk++, nbytes--; |
| 931 } |
| 932 else while (chunk<AES_BLOCK_SIZE && nbytes!=0) { |
| 933 unsigned char c = *(in_arg++); |
| 934 *(out_arg++) = c ^ ivp[chunk]; |
| 935 ivp[chunk++] = c, nbytes--; |
| 936 } |
| 937 |
| 938 ctx->num = chunk%AES_BLOCK_SIZE; |
| 939 break; |
| 940 case EVP_CIPH_OFB_MODE: |
| 941 if (chunk >= AES_BLOCK_SIZE) |
| 942 return 0; /* bogus value */ |
| 943 |
| 944 while (chunk<AES_BLOCK_SIZE && nbytes!=0) { |
| 945 *(out_arg++) = *(in_arg++) ^ ivp[chunk]; |
| 946 chunk++, nbytes--; |
| 947 } |
| 948 |
| 949 ctx->num = chunk%AES_BLOCK_SIZE; |
| 950 break; |
| 951 } |
| 952 } |
| 953 |
| 954 if (nbytes == 0) |
| 955 return 1; |
| 956 #if 0 |
| 957 if (nbytes % AES_BLOCK_SIZE) |
| 958 return 0; /* are we expected to do tail processing? */ |
| 959 #else |
| 960 /* nbytes is always multiple of AES_BLOCK_SIZE in ECB and CBC |
| 961 modes and arbitrary value in byte-oriented modes, such as |
| 962 CFB and OFB... */ |
| 963 #endif |
| 964 |
| 965 /* VIA promises CPUs that won't require alignment in the future. |
| 966 For now padlock_aes_align_required is initialized to 1 and |
| 967 the condition is never met... */ |
| 968 /* C7 core is capable to manage unaligned input in non-ECB[!] |
| 969 mode, but performance penalties appear to be approximately |
| 970 same as for software alignment below or ~3x. They promise to |
| 971 improve it in the future, but for now we can just as well |
| 972 pretend that it can only handle aligned input... */ |
| 973 if (!padlock_aes_align_required && (nbytes%AES_BLOCK_SIZE)==0) |
| 974 return padlock_aes_cipher_omnivorous(ctx, out_arg, in_arg, nbyte
s); |
| 975 |
| 976 inp_misaligned = (((size_t)in_arg) & 0x0F); |
| 977 out_misaligned = (((size_t)out_arg) & 0x0F); |
| 978 |
| 979 /* Note that even if output is aligned and input not, |
| 980 * I still prefer to loop instead of copy the whole |
| 981 * input and then encrypt in one stroke. This is done |
| 982 * in order to improve L1 cache utilization... */ |
| 983 realign_in_loop = out_misaligned|inp_misaligned; |
| 984 |
| 985 if (!realign_in_loop && (nbytes%AES_BLOCK_SIZE)==0) |
| 986 return padlock_aes_cipher_omnivorous(ctx, out_arg, in_arg, nbyte
s); |
| 987 |
| 988 /* this takes one "if" out of the loops */ |
| 989 chunk = nbytes; |
| 990 chunk %= PADLOCK_CHUNK; |
| 991 if (chunk==0) chunk = PADLOCK_CHUNK; |
| 992 |
| 993 if (out_misaligned) { |
| 994 /* optmize for small input */ |
| 995 allocated = (chunk<nbytes?PADLOCK_CHUNK:nbytes); |
| 996 out = alloca(0x10 + allocated); |
| 997 out = NEAREST_ALIGNED(out); |
| 998 } |
| 999 else |
| 1000 out = out_arg; |
| 1001 |
| 1002 cdata = ALIGNED_CIPHER_DATA(ctx); |
| 1003 padlock_verify_context(cdata); |
| 1004 |
| 1005 switch (EVP_CIPHER_CTX_mode(ctx)) { |
| 1006 case EVP_CIPH_ECB_MODE: |
| 1007 do { |
| 1008 if (inp_misaligned) |
| 1009 inp = padlock_memcpy(out, in_arg, chunk); |
| 1010 else |
| 1011 inp = in_arg; |
| 1012 in_arg += chunk; |
| 1013 |
| 1014 padlock_xcrypt_ecb(chunk/AES_BLOCK_SIZE, cdata, out, inp
); |
| 1015 |
| 1016 if (out_misaligned) |
| 1017 out_arg = padlock_memcpy(out_arg, out, chunk) +
chunk; |
| 1018 else |
| 1019 out = out_arg+=chunk; |
| 1020 |
| 1021 nbytes -= chunk; |
| 1022 chunk = PADLOCK_CHUNK; |
| 1023 } while (nbytes); |
| 1024 break; |
| 1025 |
| 1026 case EVP_CIPH_CBC_MODE: |
| 1027 memcpy(cdata->iv, ctx->iv, AES_BLOCK_SIZE); |
| 1028 goto cbc_shortcut; |
| 1029 do { |
| 1030 if (iv != cdata->iv) |
| 1031 memcpy(cdata->iv, iv, AES_BLOCK_SIZE); |
| 1032 chunk = PADLOCK_CHUNK; |
| 1033 cbc_shortcut: /* optimize for small input */ |
| 1034 if (inp_misaligned) |
| 1035 inp = padlock_memcpy(out, in_arg, chunk); |
| 1036 else |
| 1037 inp = in_arg; |
| 1038 in_arg += chunk; |
| 1039 |
| 1040 iv = padlock_xcrypt_cbc(chunk/AES_BLOCK_SIZE, cdata, out
, inp); |
| 1041 |
| 1042 if (out_misaligned) |
| 1043 out_arg = padlock_memcpy(out_arg, out, chunk) +
chunk; |
| 1044 else |
| 1045 out = out_arg+=chunk; |
| 1046 |
| 1047 } while (nbytes -= chunk); |
| 1048 memcpy(ctx->iv, iv, AES_BLOCK_SIZE); |
| 1049 break; |
| 1050 |
| 1051 case EVP_CIPH_CFB_MODE: |
| 1052 memcpy (iv = cdata->iv, ctx->iv, AES_BLOCK_SIZE); |
| 1053 chunk &= ~(AES_BLOCK_SIZE-1); |
| 1054 if (chunk) goto cfb_shortcut; |
| 1055 else goto cfb_skiploop; |
| 1056 do { |
| 1057 if (iv != cdata->iv) |
| 1058 memcpy(cdata->iv, iv, AES_BLOCK_SIZE); |
| 1059 chunk = PADLOCK_CHUNK; |
| 1060 cfb_shortcut: /* optimize for small input */ |
| 1061 if (inp_misaligned) |
| 1062 inp = padlock_memcpy(out, in_arg, chunk); |
| 1063 else |
| 1064 inp = in_arg; |
| 1065 in_arg += chunk; |
| 1066 |
| 1067 iv = padlock_xcrypt_cfb(chunk/AES_BLOCK_SIZE, cdata, out
, inp); |
| 1068 |
| 1069 if (out_misaligned) |
| 1070 out_arg = padlock_memcpy(out_arg, out, chunk) +
chunk; |
| 1071 else |
| 1072 out = out_arg+=chunk; |
| 1073 |
| 1074 nbytes -= chunk; |
| 1075 } while (nbytes >= AES_BLOCK_SIZE); |
| 1076 |
| 1077 cfb_skiploop: |
| 1078 if (nbytes) { |
| 1079 unsigned char *ivp = cdata->iv; |
| 1080 |
| 1081 if (iv != ivp) { |
| 1082 memcpy(ivp, iv, AES_BLOCK_SIZE); |
| 1083 iv = ivp; |
| 1084 } |
| 1085 ctx->num = nbytes; |
| 1086 if (cdata->cword.b.encdec) { |
| 1087 cdata->cword.b.encdec=0; |
| 1088 padlock_reload_key(); |
| 1089 padlock_xcrypt_ecb(1,cdata,ivp,ivp); |
| 1090 cdata->cword.b.encdec=1; |
| 1091 padlock_reload_key(); |
| 1092 while(nbytes) { |
| 1093 unsigned char c = *(in_arg++); |
| 1094 *(out_arg++) = c ^ *ivp; |
| 1095 *(ivp++) = c, nbytes--; |
| 1096 } |
| 1097 } |
| 1098 else { padlock_reload_key(); |
| 1099 padlock_xcrypt_ecb(1,cdata,ivp,ivp); |
| 1100 padlock_reload_key(); |
| 1101 while (nbytes) { |
| 1102 *ivp = *(out_arg++) = *(in_arg++) ^ *ivp
; |
| 1103 ivp++, nbytes--; |
| 1104 } |
| 1105 } |
| 1106 } |
| 1107 |
| 1108 memcpy(ctx->iv, iv, AES_BLOCK_SIZE); |
| 1109 break; |
| 1110 |
| 1111 case EVP_CIPH_OFB_MODE: |
| 1112 memcpy(cdata->iv, ctx->iv, AES_BLOCK_SIZE); |
| 1113 chunk &= ~(AES_BLOCK_SIZE-1); |
| 1114 if (chunk) do { |
| 1115 if (inp_misaligned) |
| 1116 inp = padlock_memcpy(out, in_arg, chunk); |
| 1117 else |
| 1118 inp = in_arg; |
| 1119 in_arg += chunk; |
| 1120 |
| 1121 padlock_xcrypt_ofb(chunk/AES_BLOCK_SIZE, cdata, out, inp
); |
| 1122 |
| 1123 if (out_misaligned) |
| 1124 out_arg = padlock_memcpy(out_arg, out, chunk) +
chunk; |
| 1125 else |
| 1126 out = out_arg+=chunk; |
| 1127 |
| 1128 nbytes -= chunk; |
| 1129 chunk = PADLOCK_CHUNK; |
| 1130 } while (nbytes >= AES_BLOCK_SIZE); |
| 1131 |
| 1132 if (nbytes) { |
| 1133 unsigned char *ivp = cdata->iv; |
| 1134 |
| 1135 ctx->num = nbytes; |
| 1136 padlock_reload_key(); /* empirically found */ |
| 1137 padlock_xcrypt_ecb(1,cdata,ivp,ivp); |
| 1138 padlock_reload_key(); /* empirically found */ |
| 1139 while (nbytes) { |
| 1140 *(out_arg++) = *(in_arg++) ^ *ivp; |
| 1141 ivp++, nbytes--; |
| 1142 } |
| 1143 } |
| 1144 |
| 1145 memcpy(ctx->iv, cdata->iv, AES_BLOCK_SIZE); |
| 1146 break; |
| 1147 |
| 1148 default: |
| 1149 return 0; |
| 1150 } |
| 1151 |
| 1152 /* Clean the realign buffer if it was used */ |
| 1153 if (out_misaligned) { |
| 1154 volatile unsigned long *p=(void *)out; |
| 1155 size_t n = allocated/sizeof(*p); |
| 1156 while (n--) *p++=0; |
| 1157 } |
| 1158 |
| 1159 memset(cdata->iv, 0, AES_BLOCK_SIZE); |
| 1160 |
| 1161 return 1; |
| 1162 } |
| 1163 |
| 1164 #endif /* OPENSSL_NO_AES */ |
| 1165 |
| 1166 /* ===== Random Number Generator ===== */ |
| 1167 /* |
| 1168 * This code is not engaged. The reason is that it does not comply |
| 1169 * with recommendations for VIA RNG usage for secure applications |
| 1170 * (posted at http://www.via.com.tw/en/viac3/c3.jsp) nor does it |
| 1171 * provide meaningful error control... |
| 1172 */ |
| 1173 /* Wrapper that provides an interface between the API and |
| 1174 the raw PadLock RNG */ |
| 1175 static int |
| 1176 padlock_rand_bytes(unsigned char *output, int count) |
| 1177 { |
| 1178 unsigned int eax, buf; |
| 1179 |
| 1180 while (count >= 8) { |
| 1181 eax = padlock_xstore(output, 0); |
| 1182 if (!(eax&(1<<6))) return 0; /* RNG disabled */ |
| 1183 /* this ---vv--- covers DC bias, Raw Bits and String Filter */ |
| 1184 if (eax&(0x1F<<10)) return 0; |
| 1185 if ((eax&0x1F)==0) continue; /* no data, retry... */ |
| 1186 if ((eax&0x1F)!=8) return 0; /* fatal failure... */ |
| 1187 output += 8; |
| 1188 count -= 8; |
| 1189 } |
| 1190 while (count > 0) { |
| 1191 eax = padlock_xstore(&buf, 3); |
| 1192 if (!(eax&(1<<6))) return 0; /* RNG disabled */ |
| 1193 /* this ---vv--- covers DC bias, Raw Bits and String Filter */ |
| 1194 if (eax&(0x1F<<10)) return 0; |
| 1195 if ((eax&0x1F)==0) continue; /* no data, retry... */ |
| 1196 if ((eax&0x1F)!=1) return 0; /* fatal failure... */ |
| 1197 *output++ = (unsigned char)buf; |
| 1198 count--; |
| 1199 } |
| 1200 *(volatile unsigned int *)&buf=0; |
| 1201 |
| 1202 return 1; |
| 1203 } |
| 1204 |
| 1205 /* Dummy but necessary function */ |
| 1206 static int |
| 1207 padlock_rand_status(void) |
| 1208 { |
| 1209 return 1; |
| 1210 } |
| 1211 |
| 1212 /* Prepare structure for registration */ |
| 1213 static RAND_METHOD padlock_rand = { |
| 1214 NULL, /* seed */ |
| 1215 padlock_rand_bytes, /* bytes */ |
| 1216 NULL, /* cleanup */ |
| 1217 NULL, /* add */ |
| 1218 padlock_rand_bytes, /* pseudorand */ |
| 1219 padlock_rand_status, /* rand status */ |
| 1220 }; |
| 1221 |
| 1222 #else /* !COMPILE_HW_PADLOCK */ |
| 1223 #ifndef OPENSSL_NO_DYNAMIC_ENGINE |
| 1224 OPENSSL_EXPORT |
| 1225 int bind_engine(ENGINE *e, const char *id, const dynamic_fns *fns); |
| 1226 OPENSSL_EXPORT |
| 1227 int bind_engine(ENGINE *e, const char *id, const dynamic_fns *fns) { return 0; } |
| 1228 IMPLEMENT_DYNAMIC_CHECK_FN() |
| 1229 #endif |
| 1230 #endif /* COMPILE_HW_PADLOCK */ |
| 1231 |
| 1232 #endif /* !OPENSSL_NO_HW_PADLOCK */ |
| 1233 #endif /* !OPENSSL_NO_HW */ |
OLD | NEW |