| OLD | NEW |
| (Empty) |
| 1 /* | |
| 2 * Support for VIA PadLock Advanced Cryptography Engine (ACE) | |
| 3 * Written by Michal Ludvig <michal@logix.cz> | |
| 4 * http://www.logix.cz/michal | |
| 5 * | |
| 6 * Big thanks to Andy Polyakov for a help with optimization, | |
| 7 * assembler fixes, port to MS Windows and a lot of other | |
| 8 * valuable work on this engine! | |
| 9 */ | |
| 10 | |
| 11 /* ==================================================================== | |
| 12 * Copyright (c) 1999-2001 The OpenSSL Project. All rights reserved. | |
| 13 * | |
| 14 * Redistribution and use in source and binary forms, with or without | |
| 15 * modification, are permitted provided that the following conditions | |
| 16 * are met: | |
| 17 * | |
| 18 * 1. Redistributions of source code must retain the above copyright | |
| 19 * notice, this list of conditions and the following disclaimer. | |
| 20 * | |
| 21 * 2. Redistributions in binary form must reproduce the above copyright | |
| 22 * notice, this list of conditions and the following disclaimer in | |
| 23 * the documentation and/or other materials provided with the | |
| 24 * distribution. | |
| 25 * | |
| 26 * 3. All advertising materials mentioning features or use of this | |
| 27 * software must display the following acknowledgment: | |
| 28 * "This product includes software developed by the OpenSSL Project | |
| 29 * for use in the OpenSSL Toolkit. (http://www.OpenSSL.org/)" | |
| 30 * | |
| 31 * 4. The names "OpenSSL Toolkit" and "OpenSSL Project" must not be used to | |
| 32 * endorse or promote products derived from this software without | |
| 33 * prior written permission. For written permission, please contact | |
| 34 * licensing@OpenSSL.org. | |
| 35 * | |
| 36 * 5. Products derived from this software may not be called "OpenSSL" | |
| 37 * nor may "OpenSSL" appear in their names without prior written | |
| 38 * permission of the OpenSSL Project. | |
| 39 * | |
| 40 * 6. Redistributions of any form whatsoever must retain the following | |
| 41 * acknowledgment: | |
| 42 * "This product includes software developed by the OpenSSL Project | |
| 43 * for use in the OpenSSL Toolkit (http://www.OpenSSL.org/)" | |
| 44 * | |
| 45 * THIS SOFTWARE IS PROVIDED BY THE OpenSSL PROJECT ``AS IS'' AND ANY | |
| 46 * EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE | |
| 47 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR | |
| 48 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE OpenSSL PROJECT OR | |
| 49 * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, | |
| 50 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT | |
| 51 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; | |
| 52 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) | |
| 53 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, | |
| 54 * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) | |
| 55 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED | |
| 56 * OF THE POSSIBILITY OF SUCH DAMAGE. | |
| 57 * ==================================================================== | |
| 58 * | |
| 59 * This product includes cryptographic software written by Eric Young | |
| 60 * (eay@cryptsoft.com). This product includes software written by Tim | |
| 61 * Hudson (tjh@cryptsoft.com). | |
| 62 * | |
| 63 */ | |
| 64 | |
| 65 | |
| 66 #include <stdio.h> | |
| 67 #include <string.h> | |
| 68 | |
| 69 #include <openssl/opensslconf.h> | |
| 70 #include <openssl/crypto.h> | |
| 71 #include <openssl/dso.h> | |
| 72 #include <openssl/engine.h> | |
| 73 #include <openssl/evp.h> | |
| 74 #ifndef OPENSSL_NO_AES | |
| 75 #include <openssl/aes.h> | |
| 76 #endif | |
| 77 #include <openssl/rand.h> | |
| 78 #include <openssl/err.h> | |
| 79 | |
| 80 #ifndef OPENSSL_NO_HW | |
| 81 #ifndef OPENSSL_NO_HW_PADLOCK | |
| 82 | |
| 83 /* Attempt to have a single source for both 0.9.7 and 0.9.8 :-) */ | |
| 84 #if (OPENSSL_VERSION_NUMBER >= 0x00908000L) | |
| 85 # ifndef OPENSSL_NO_DYNAMIC_ENGINE | |
| 86 # define DYNAMIC_ENGINE | |
| 87 # endif | |
| 88 #elif (OPENSSL_VERSION_NUMBER >= 0x00907000L) | |
| 89 # ifdef ENGINE_DYNAMIC_SUPPORT | |
| 90 # define DYNAMIC_ENGINE | |
| 91 # endif | |
| 92 #else | |
| 93 # error "Only OpenSSL >= 0.9.7 is supported" | |
| 94 #endif | |
| 95 | |
| 96 /* VIA PadLock AES is available *ONLY* on some x86 CPUs. | |
| 97 Not only that it doesn't exist elsewhere, but it | |
| 98 even can't be compiled on other platforms! | |
| 99 | |
| 100 In addition, because of the heavy use of inline assembler, | |
| 101 compiler choice is limited to GCC and Microsoft C. */ | |
| 102 #undef COMPILE_HW_PADLOCK | |
| 103 #if !defined(I386_ONLY) && !defined(OPENSSL_NO_INLINE_ASM) | |
| 104 # if (defined(__GNUC__) && (defined(__i386__) || defined(__i386))) || \ | |
| 105 (defined(_MSC_VER) && defined(_M_IX86)) | |
| 106 # define COMPILE_HW_PADLOCK | |
| 107 static ENGINE *ENGINE_padlock (void); | |
| 108 # endif | |
| 109 #endif | |
| 110 | |
| 111 void ENGINE_load_padlock (void) | |
| 112 { | |
| 113 /* On non-x86 CPUs it just returns. */ | |
| 114 #ifdef COMPILE_HW_PADLOCK | |
| 115 ENGINE *toadd = ENGINE_padlock (); | |
| 116 if (!toadd) return; | |
| 117 ENGINE_add (toadd); | |
| 118 ENGINE_free (toadd); | |
| 119 ERR_clear_error (); | |
| 120 #endif | |
| 121 } | |
| 122 | |
| 123 #ifdef COMPILE_HW_PADLOCK | |
| 124 /* We do these includes here to avoid header problems on platforms that | |
| 125 do not have the VIA padlock anyway... */ | |
| 126 #ifdef _MSC_VER | |
| 127 # include <malloc.h> | |
| 128 # define alloca _alloca | |
| 129 #elif defined(NETWARE_CLIB) && defined(__GNUC__) | |
| 130 void *alloca(size_t); | |
| 131 # define alloca(s) __builtin_alloca(s) | |
| 132 #else | |
| 133 # include <stdlib.h> | |
| 134 #endif | |
| 135 | |
| 136 /* Function for ENGINE detection and control */ | |
| 137 static int padlock_available(void); | |
| 138 static int padlock_init(ENGINE *e); | |
| 139 | |
| 140 /* RNG Stuff */ | |
| 141 static RAND_METHOD padlock_rand; | |
| 142 | |
| 143 /* Cipher Stuff */ | |
| 144 #ifndef OPENSSL_NO_AES | |
| 145 static int padlock_ciphers(ENGINE *e, const EVP_CIPHER **cipher, const int **nid
s, int nid); | |
| 146 #endif | |
| 147 | |
| 148 /* Engine names */ | |
| 149 static const char *padlock_id = "padlock"; | |
| 150 static char padlock_name[100]; | |
| 151 | |
| 152 /* Available features */ | |
| 153 static int padlock_use_ace = 0; /* Advanced Cryptography Engine */ | |
| 154 static int padlock_use_rng = 0; /* Random Number Generator */ | |
| 155 #ifndef OPENSSL_NO_AES | |
| 156 static int padlock_aes_align_required = 1; | |
| 157 #endif | |
| 158 | |
| 159 /* ===== Engine "management" functions ===== */ | |
| 160 | |
| 161 /* Prepare the ENGINE structure for registration */ | |
| 162 static int | |
| 163 padlock_bind_helper(ENGINE *e) | |
| 164 { | |
| 165 /* Check available features */ | |
| 166 padlock_available(); | |
| 167 | |
| 168 #if 1 /* disable RNG for now, see commentary in vicinity of RNG code */ | |
| 169 padlock_use_rng=0; | |
| 170 #endif | |
| 171 | |
| 172 /* Generate a nice engine name with available features */ | |
| 173 BIO_snprintf(padlock_name, sizeof(padlock_name), | |
| 174 "VIA PadLock (%s, %s)", | |
| 175 padlock_use_rng ? "RNG" : "no-RNG", | |
| 176 padlock_use_ace ? "ACE" : "no-ACE"); | |
| 177 | |
| 178 /* Register everything or return with an error */ | |
| 179 if (!ENGINE_set_id(e, padlock_id) || | |
| 180 !ENGINE_set_name(e, padlock_name) || | |
| 181 | |
| 182 !ENGINE_set_init_function(e, padlock_init) || | |
| 183 #ifndef OPENSSL_NO_AES | |
| 184 (padlock_use_ace && !ENGINE_set_ciphers (e, padlock_ciphers)) || | |
| 185 #endif | |
| 186 (padlock_use_rng && !ENGINE_set_RAND (e, &padlock_rand))) { | |
| 187 return 0; | |
| 188 } | |
| 189 | |
| 190 /* Everything looks good */ | |
| 191 return 1; | |
| 192 } | |
| 193 | |
| 194 /* Constructor */ | |
| 195 static ENGINE * | |
| 196 ENGINE_padlock(void) | |
| 197 { | |
| 198 ENGINE *eng = ENGINE_new(); | |
| 199 | |
| 200 if (!eng) { | |
| 201 return NULL; | |
| 202 } | |
| 203 | |
| 204 if (!padlock_bind_helper(eng)) { | |
| 205 ENGINE_free(eng); | |
| 206 return NULL; | |
| 207 } | |
| 208 | |
| 209 return eng; | |
| 210 } | |
| 211 | |
| 212 /* Check availability of the engine */ | |
| 213 static int | |
| 214 padlock_init(ENGINE *e) | |
| 215 { | |
| 216 return (padlock_use_rng || padlock_use_ace); | |
| 217 } | |
| 218 | |
| 219 /* This stuff is needed if this ENGINE is being compiled into a self-contained | |
| 220 * shared-library. | |
| 221 */ | |
| 222 #ifdef DYNAMIC_ENGINE | |
| 223 static int | |
| 224 padlock_bind_fn(ENGINE *e, const char *id) | |
| 225 { | |
| 226 if (id && (strcmp(id, padlock_id) != 0)) { | |
| 227 return 0; | |
| 228 } | |
| 229 | |
| 230 if (!padlock_bind_helper(e)) { | |
| 231 return 0; | |
| 232 } | |
| 233 | |
| 234 return 1; | |
| 235 } | |
| 236 | |
| 237 IMPLEMENT_DYNAMIC_CHECK_FN () | |
| 238 IMPLEMENT_DYNAMIC_BIND_FN (padlock_bind_fn) | |
| 239 #endif /* DYNAMIC_ENGINE */ | |
| 240 | |
| 241 /* ===== Here comes the "real" engine ===== */ | |
| 242 | |
| 243 #ifndef OPENSSL_NO_AES | |
| 244 /* Some AES-related constants */ | |
| 245 #define AES_BLOCK_SIZE 16 | |
| 246 #define AES_KEY_SIZE_128 16 | |
| 247 #define AES_KEY_SIZE_192 24 | |
| 248 #define AES_KEY_SIZE_256 32 | |
| 249 | |
| 250 /* Here we store the status information relevant to the | |
| 251 current context. */ | |
| 252 /* BIG FAT WARNING: | |
| 253 * Inline assembler in PADLOCK_XCRYPT_ASM() | |
| 254 * depends on the order of items in this structure. | |
| 255 * Don't blindly modify, reorder, etc! | |
| 256 */ | |
| 257 struct padlock_cipher_data | |
| 258 { | |
| 259 unsigned char iv[AES_BLOCK_SIZE]; /* Initialization vector */ | |
| 260 union { unsigned int pad[4]; | |
| 261 struct { | |
| 262 int rounds:4; | |
| 263 int dgst:1; /* n/a in C3 */ | |
| 264 int align:1; /* n/a in C3 */ | |
| 265 int ciphr:1; /* n/a in C3 */ | |
| 266 unsigned int keygen:1; | |
| 267 int interm:1; | |
| 268 unsigned int encdec:1; | |
| 269 int ksize:2; | |
| 270 } b; | |
| 271 } cword; /* Control word */ | |
| 272 AES_KEY ks; /* Encryption key */ | |
| 273 }; | |
| 274 | |
| 275 /* | |
| 276 * Essentially this variable belongs in thread local storage. | |
| 277 * Having this variable global on the other hand can only cause | |
| 278 * few bogus key reloads [if any at all on single-CPU system], | |
| 279 * so we accept the penatly... | |
| 280 */ | |
| 281 static volatile struct padlock_cipher_data *padlock_saved_context; | |
| 282 #endif | |
| 283 | |
| 284 /* | |
| 285 * ======================================================= | |
| 286 * Inline assembler section(s). | |
| 287 * ======================================================= | |
| 288 * Order of arguments is chosen to facilitate Windows port | |
| 289 * using __fastcall calling convention. If you wish to add | |
| 290 * more routines, keep in mind that first __fastcall | |
| 291 * argument is passed in %ecx and second - in %edx. | |
| 292 * ======================================================= | |
| 293 */ | |
| 294 #if defined(__GNUC__) && __GNUC__>=2 | |
| 295 /* | |
| 296 * As for excessive "push %ebx"/"pop %ebx" found all over. | |
| 297 * When generating position-independent code GCC won't let | |
| 298 * us use "b" in assembler templates nor even respect "ebx" | |
| 299 * in "clobber description." Therefore the trouble... | |
| 300 */ | |
| 301 | |
| 302 /* Helper function - check if a CPUID instruction | |
| 303 is available on this CPU */ | |
| 304 static int | |
| 305 padlock_insn_cpuid_available(void) | |
| 306 { | |
| 307 int result = -1; | |
| 308 | |
| 309 /* We're checking if the bit #21 of EFLAGS | |
| 310 can be toggled. If yes = CPUID is available. */ | |
| 311 asm volatile ( | |
| 312 "pushf\n" | |
| 313 "popl %%eax\n" | |
| 314 "xorl $0x200000, %%eax\n" | |
| 315 "movl %%eax, %%ecx\n" | |
| 316 "andl $0x200000, %%ecx\n" | |
| 317 "pushl %%eax\n" | |
| 318 "popf\n" | |
| 319 "pushf\n" | |
| 320 "popl %%eax\n" | |
| 321 "andl $0x200000, %%eax\n" | |
| 322 "xorl %%eax, %%ecx\n" | |
| 323 "movl %%ecx, %0\n" | |
| 324 : "=r" (result) : : "eax", "ecx"); | |
| 325 | |
| 326 return (result == 0); | |
| 327 } | |
| 328 | |
| 329 /* Load supported features of the CPU to see if | |
| 330 the PadLock is available. */ | |
| 331 static int | |
| 332 padlock_available(void) | |
| 333 { | |
| 334 char vendor_string[16]; | |
| 335 unsigned int eax, edx; | |
| 336 | |
| 337 /* First check if the CPUID instruction is available at all... */ | |
| 338 if (! padlock_insn_cpuid_available()) | |
| 339 return 0; | |
| 340 | |
| 341 /* Are we running on the Centaur (VIA) CPU? */ | |
| 342 eax = 0x00000000; | |
| 343 vendor_string[12] = 0; | |
| 344 asm volatile ( | |
| 345 "pushl %%ebx\n" | |
| 346 "cpuid\n" | |
| 347 "movl %%ebx,(%%edi)\n" | |
| 348 "movl %%edx,4(%%edi)\n" | |
| 349 "movl %%ecx,8(%%edi)\n" | |
| 350 "popl %%ebx" | |
| 351 : "+a"(eax) : "D"(vendor_string) : "ecx", "edx"); | |
| 352 if (strcmp(vendor_string, "CentaurHauls") != 0) | |
| 353 return 0; | |
| 354 | |
| 355 /* Check for Centaur Extended Feature Flags presence */ | |
| 356 eax = 0xC0000000; | |
| 357 asm volatile ("pushl %%ebx; cpuid; popl %%ebx" | |
| 358 : "+a"(eax) : : "ecx", "edx"); | |
| 359 if (eax < 0xC0000001) | |
| 360 return 0; | |
| 361 | |
| 362 /* Read the Centaur Extended Feature Flags */ | |
| 363 eax = 0xC0000001; | |
| 364 asm volatile ("pushl %%ebx; cpuid; popl %%ebx" | |
| 365 : "+a"(eax), "=d"(edx) : : "ecx"); | |
| 366 | |
| 367 /* Fill up some flags */ | |
| 368 padlock_use_ace = ((edx & (0x3<<6)) == (0x3<<6)); | |
| 369 padlock_use_rng = ((edx & (0x3<<2)) == (0x3<<2)); | |
| 370 | |
| 371 return padlock_use_ace + padlock_use_rng; | |
| 372 } | |
| 373 | |
| 374 #ifndef OPENSSL_NO_AES | |
| 375 /* Our own htonl()/ntohl() */ | |
| 376 static inline void | |
| 377 padlock_bswapl(AES_KEY *ks) | |
| 378 { | |
| 379 size_t i = sizeof(ks->rd_key)/sizeof(ks->rd_key[0]); | |
| 380 unsigned int *key = ks->rd_key; | |
| 381 | |
| 382 while (i--) { | |
| 383 asm volatile ("bswapl %0" : "+r"(*key)); | |
| 384 key++; | |
| 385 } | |
| 386 } | |
| 387 #endif | |
| 388 | |
| 389 /* Force key reload from memory to the CPU microcode. | |
| 390 Loading EFLAGS from the stack clears EFLAGS[30] | |
| 391 which does the trick. */ | |
| 392 static inline void | |
| 393 padlock_reload_key(void) | |
| 394 { | |
| 395 asm volatile ("pushfl; popfl"); | |
| 396 } | |
| 397 | |
| 398 #ifndef OPENSSL_NO_AES | |
| 399 /* | |
| 400 * This is heuristic key context tracing. At first one | |
| 401 * believes that one should use atomic swap instructions, | |
| 402 * but it's not actually necessary. Point is that if | |
| 403 * padlock_saved_context was changed by another thread | |
| 404 * after we've read it and before we compare it with cdata, | |
| 405 * our key *shall* be reloaded upon thread context switch | |
| 406 * and we are therefore set in either case... | |
| 407 */ | |
| 408 static inline void | |
| 409 padlock_verify_context(struct padlock_cipher_data *cdata) | |
| 410 { | |
| 411 asm volatile ( | |
| 412 "pushfl\n" | |
| 413 " btl $30,(%%esp)\n" | |
| 414 " jnc 1f\n" | |
| 415 " cmpl %2,%1\n" | |
| 416 " je 1f\n" | |
| 417 " popfl\n" | |
| 418 " subl $4,%%esp\n" | |
| 419 "1: addl $4,%%esp\n" | |
| 420 " movl %2,%0" | |
| 421 :"+m"(padlock_saved_context) | |
| 422 : "r"(padlock_saved_context), "r"(cdata) : "cc"); | |
| 423 } | |
| 424 | |
| 425 /* Template for padlock_xcrypt_* modes */ | |
| 426 /* BIG FAT WARNING: | |
| 427 * The offsets used with 'leal' instructions | |
| 428 * describe items of the 'padlock_cipher_data' | |
| 429 * structure. | |
| 430 */ | |
| 431 #define PADLOCK_XCRYPT_ASM(name,rep_xcrypt) \ | |
| 432 static inline void *name(size_t cnt, \ | |
| 433 struct padlock_cipher_data *cdata, \ | |
| 434 void *out, const void *inp) \ | |
| 435 { void *iv; \ | |
| 436 asm volatile ( "pushl %%ebx\n" \ | |
| 437 " leal 16(%0),%%edx\n" \ | |
| 438 " leal 32(%0),%%ebx\n" \ | |
| 439 rep_xcrypt "\n" \ | |
| 440 " popl %%ebx" \ | |
| 441 : "=a"(iv), "=c"(cnt), "=D"(out), "=S"(inp) \ | |
| 442 : "0"(cdata), "1"(cnt), "2"(out), "3"(inp) \ | |
| 443 : "edx", "cc", "memory"); \ | |
| 444 return iv; \ | |
| 445 } | |
| 446 | |
| 447 /* Generate all functions with appropriate opcodes */ | |
| 448 PADLOCK_XCRYPT_ASM(padlock_xcrypt_ecb, ".byte 0xf3,0x0f,0xa7,0xc8") /* rep x
cryptecb */ | |
| 449 PADLOCK_XCRYPT_ASM(padlock_xcrypt_cbc, ".byte 0xf3,0x0f,0xa7,0xd0") /* rep x
cryptcbc */ | |
| 450 PADLOCK_XCRYPT_ASM(padlock_xcrypt_cfb, ".byte 0xf3,0x0f,0xa7,0xe0") /* rep x
cryptcfb */ | |
| 451 PADLOCK_XCRYPT_ASM(padlock_xcrypt_ofb, ".byte 0xf3,0x0f,0xa7,0xe8") /* rep x
cryptofb */ | |
| 452 #endif | |
| 453 | |
| 454 /* The RNG call itself */ | |
| 455 static inline unsigned int | |
| 456 padlock_xstore(void *addr, unsigned int edx_in) | |
| 457 { | |
| 458 unsigned int eax_out; | |
| 459 | |
| 460 asm volatile (".byte 0x0f,0xa7,0xc0" /* xstore */ | |
| 461 : "=a"(eax_out),"=m"(*(unsigned *)addr) | |
| 462 : "D"(addr), "d" (edx_in) | |
| 463 ); | |
| 464 | |
| 465 return eax_out; | |
| 466 } | |
| 467 | |
| 468 /* Why not inline 'rep movsd'? I failed to find information on what | |
| 469 * value in Direction Flag one can expect and consequently have to | |
| 470 * apply "better-safe-than-sorry" approach and assume "undefined." | |
| 471 * I could explicitly clear it and restore the original value upon | |
| 472 * return from padlock_aes_cipher, but it's presumably too much | |
| 473 * trouble for too little gain... | |
| 474 * | |
| 475 * In case you wonder 'rep xcrypt*' instructions above are *not* | |
| 476 * affected by the Direction Flag and pointers advance toward | |
| 477 * larger addresses unconditionally. | |
| 478 */ | |
| 479 static inline unsigned char * | |
| 480 padlock_memcpy(void *dst,const void *src,size_t n) | |
| 481 { | |
| 482 long *d=dst; | |
| 483 const long *s=src; | |
| 484 | |
| 485 n /= sizeof(*d); | |
| 486 do { *d++ = *s++; } while (--n); | |
| 487 | |
| 488 return dst; | |
| 489 } | |
| 490 | |
| 491 #elif defined(_MSC_VER) | |
| 492 /* | |
| 493 * Unlike GCC these are real functions. In order to minimize impact | |
| 494 * on performance we adhere to __fastcall calling convention in | |
| 495 * order to get two first arguments passed through %ecx and %edx. | |
| 496 * Which kind of suits very well, as instructions in question use | |
| 497 * both %ecx and %edx as input:-) | |
| 498 */ | |
| 499 #define REP_XCRYPT(code) \ | |
| 500 _asm _emit 0xf3 \ | |
| 501 _asm _emit 0x0f _asm _emit 0xa7 \ | |
| 502 _asm _emit code | |
| 503 | |
| 504 /* BIG FAT WARNING: | |
| 505 * The offsets used with 'lea' instructions | |
| 506 * describe items of the 'padlock_cipher_data' | |
| 507 * structure. | |
| 508 */ | |
| 509 #define PADLOCK_XCRYPT_ASM(name,code) \ | |
| 510 static void * __fastcall \ | |
| 511 name (size_t cnt, void *cdata, \ | |
| 512 void *outp, const void *inp) \ | |
| 513 { _asm mov eax,edx \ | |
| 514 _asm lea edx,[eax+16] \ | |
| 515 _asm lea ebx,[eax+32] \ | |
| 516 _asm mov edi,outp \ | |
| 517 _asm mov esi,inp \ | |
| 518 REP_XCRYPT(code) \ | |
| 519 } | |
| 520 | |
| 521 PADLOCK_XCRYPT_ASM(padlock_xcrypt_ecb,0xc8) | |
| 522 PADLOCK_XCRYPT_ASM(padlock_xcrypt_cbc,0xd0) | |
| 523 PADLOCK_XCRYPT_ASM(padlock_xcrypt_cfb,0xe0) | |
| 524 PADLOCK_XCRYPT_ASM(padlock_xcrypt_ofb,0xe8) | |
| 525 | |
| 526 static int __fastcall | |
| 527 padlock_xstore(void *outp,unsigned int code) | |
| 528 { _asm mov edi,ecx | |
| 529 _asm _emit 0x0f _asm _emit 0xa7 _asm _emit 0xc0 | |
| 530 } | |
| 531 | |
| 532 static void __fastcall | |
| 533 padlock_reload_key(void) | |
| 534 { _asm pushfd _asm popfd } | |
| 535 | |
| 536 static void __fastcall | |
| 537 padlock_verify_context(void *cdata) | |
| 538 { _asm { | |
| 539 pushfd | |
| 540 bt DWORD PTR[esp],30 | |
| 541 jnc skip | |
| 542 cmp ecx,padlock_saved_context | |
| 543 je skip | |
| 544 popfd | |
| 545 sub esp,4 | |
| 546 skip: add esp,4 | |
| 547 mov padlock_saved_context,ecx | |
| 548 } | |
| 549 } | |
| 550 | |
| 551 static int | |
| 552 padlock_available(void) | |
| 553 { _asm { | |
| 554 pushfd | |
| 555 pop eax | |
| 556 mov ecx,eax | |
| 557 xor eax,1<<21 | |
| 558 push eax | |
| 559 popfd | |
| 560 pushfd | |
| 561 pop eax | |
| 562 xor eax,ecx | |
| 563 bt eax,21 | |
| 564 jnc noluck | |
| 565 mov eax,0 | |
| 566 cpuid | |
| 567 xor eax,eax | |
| 568 cmp ebx,'tneC' | |
| 569 jne noluck | |
| 570 cmp edx,'Hrua' | |
| 571 jne noluck | |
| 572 cmp ecx,'slua' | |
| 573 jne noluck | |
| 574 mov eax,0xC0000000 | |
| 575 cpuid | |
| 576 mov edx,eax | |
| 577 xor eax,eax | |
| 578 cmp edx,0xC0000001 | |
| 579 jb noluck | |
| 580 mov eax,0xC0000001 | |
| 581 cpuid | |
| 582 xor eax,eax | |
| 583 bt edx,6 | |
| 584 jnc skip_a | |
| 585 bt edx,7 | |
| 586 jnc skip_a | |
| 587 mov padlock_use_ace,1 | |
| 588 inc eax | |
| 589 skip_a: bt edx,2 | |
| 590 jnc skip_r | |
| 591 bt edx,3 | |
| 592 jnc skip_r | |
| 593 mov padlock_use_rng,1 | |
| 594 inc eax | |
| 595 skip_r: | |
| 596 noluck: | |
| 597 } | |
| 598 } | |
| 599 | |
| 600 static void __fastcall | |
| 601 padlock_bswapl(void *key) | |
| 602 { _asm { | |
| 603 pushfd | |
| 604 cld | |
| 605 mov esi,ecx | |
| 606 mov edi,ecx | |
| 607 mov ecx,60 | |
| 608 up: lodsd | |
| 609 bswap eax | |
| 610 stosd | |
| 611 loop up | |
| 612 popfd | |
| 613 } | |
| 614 } | |
| 615 | |
| 616 /* MS actually specifies status of Direction Flag and compiler even | |
| 617 * manages to compile following as 'rep movsd' all by itself... | |
| 618 */ | |
| 619 #define padlock_memcpy(o,i,n) ((unsigned char *)memcpy((o),(i),(n)&~3U)) | |
| 620 #endif | |
| 621 | |
| 622 /* ===== AES encryption/decryption ===== */ | |
| 623 #ifndef OPENSSL_NO_AES | |
| 624 | |
| 625 #if defined(NID_aes_128_cfb128) && ! defined (NID_aes_128_cfb) | |
| 626 #define NID_aes_128_cfb NID_aes_128_cfb128 | |
| 627 #endif | |
| 628 | |
| 629 #if defined(NID_aes_128_ofb128) && ! defined (NID_aes_128_ofb) | |
| 630 #define NID_aes_128_ofb NID_aes_128_ofb128 | |
| 631 #endif | |
| 632 | |
| 633 #if defined(NID_aes_192_cfb128) && ! defined (NID_aes_192_cfb) | |
| 634 #define NID_aes_192_cfb NID_aes_192_cfb128 | |
| 635 #endif | |
| 636 | |
| 637 #if defined(NID_aes_192_ofb128) && ! defined (NID_aes_192_ofb) | |
| 638 #define NID_aes_192_ofb NID_aes_192_ofb128 | |
| 639 #endif | |
| 640 | |
| 641 #if defined(NID_aes_256_cfb128) && ! defined (NID_aes_256_cfb) | |
| 642 #define NID_aes_256_cfb NID_aes_256_cfb128 | |
| 643 #endif | |
| 644 | |
| 645 #if defined(NID_aes_256_ofb128) && ! defined (NID_aes_256_ofb) | |
| 646 #define NID_aes_256_ofb NID_aes_256_ofb128 | |
| 647 #endif | |
| 648 | |
| 649 /* List of supported ciphers. */ | |
| 650 static int padlock_cipher_nids[] = { | |
| 651 NID_aes_128_ecb, | |
| 652 NID_aes_128_cbc, | |
| 653 NID_aes_128_cfb, | |
| 654 NID_aes_128_ofb, | |
| 655 | |
| 656 NID_aes_192_ecb, | |
| 657 NID_aes_192_cbc, | |
| 658 NID_aes_192_cfb, | |
| 659 NID_aes_192_ofb, | |
| 660 | |
| 661 NID_aes_256_ecb, | |
| 662 NID_aes_256_cbc, | |
| 663 NID_aes_256_cfb, | |
| 664 NID_aes_256_ofb, | |
| 665 }; | |
| 666 static int padlock_cipher_nids_num = (sizeof(padlock_cipher_nids)/ | |
| 667 sizeof(padlock_cipher_nids[0])); | |
| 668 | |
| 669 /* Function prototypes ... */ | |
| 670 static int padlock_aes_init_key(EVP_CIPHER_CTX *ctx, const unsigned char *key, | |
| 671 const unsigned char *iv, int enc); | |
| 672 static int padlock_aes_cipher(EVP_CIPHER_CTX *ctx, unsigned char *out, | |
| 673 const unsigned char *in, size_t nbytes); | |
| 674 | |
| 675 #define NEAREST_ALIGNED(ptr) ( (unsigned char *)(ptr) + \ | |
| 676 ( (0x10 - ((size_t)(ptr) & 0x0F)) & 0x0F ) ) | |
| 677 #define ALIGNED_CIPHER_DATA(ctx) ((struct padlock_cipher_data *)\ | |
| 678 NEAREST_ALIGNED(ctx->cipher_data)) | |
| 679 | |
| 680 #define EVP_CIPHER_block_size_ECB AES_BLOCK_SIZE | |
| 681 #define EVP_CIPHER_block_size_CBC AES_BLOCK_SIZE | |
| 682 #define EVP_CIPHER_block_size_OFB 1 | |
| 683 #define EVP_CIPHER_block_size_CFB 1 | |
| 684 | |
| 685 /* Declaring so many ciphers by hand would be a pain. | |
| 686 Instead introduce a bit of preprocessor magic :-) */ | |
| 687 #define DECLARE_AES_EVP(ksize,lmode,umode) \ | |
| 688 static const EVP_CIPHER padlock_aes_##ksize##_##lmode = { \ | |
| 689 NID_aes_##ksize##_##lmode, \ | |
| 690 EVP_CIPHER_block_size_##umode, \ | |
| 691 AES_KEY_SIZE_##ksize, \ | |
| 692 AES_BLOCK_SIZE, \ | |
| 693 0 | EVP_CIPH_##umode##_MODE, \ | |
| 694 padlock_aes_init_key, \ | |
| 695 padlock_aes_cipher, \ | |
| 696 NULL, \ | |
| 697 sizeof(struct padlock_cipher_data) + 16, \ | |
| 698 EVP_CIPHER_set_asn1_iv, \ | |
| 699 EVP_CIPHER_get_asn1_iv, \ | |
| 700 NULL, \ | |
| 701 NULL \ | |
| 702 } | |
| 703 | |
| 704 DECLARE_AES_EVP(128,ecb,ECB); | |
| 705 DECLARE_AES_EVP(128,cbc,CBC); | |
| 706 DECLARE_AES_EVP(128,cfb,CFB); | |
| 707 DECLARE_AES_EVP(128,ofb,OFB); | |
| 708 | |
| 709 DECLARE_AES_EVP(192,ecb,ECB); | |
| 710 DECLARE_AES_EVP(192,cbc,CBC); | |
| 711 DECLARE_AES_EVP(192,cfb,CFB); | |
| 712 DECLARE_AES_EVP(192,ofb,OFB); | |
| 713 | |
| 714 DECLARE_AES_EVP(256,ecb,ECB); | |
| 715 DECLARE_AES_EVP(256,cbc,CBC); | |
| 716 DECLARE_AES_EVP(256,cfb,CFB); | |
| 717 DECLARE_AES_EVP(256,ofb,OFB); | |
| 718 | |
| 719 static int | |
| 720 padlock_ciphers (ENGINE *e, const EVP_CIPHER **cipher, const int **nids, int nid
) | |
| 721 { | |
| 722 /* No specific cipher => return a list of supported nids ... */ | |
| 723 if (!cipher) { | |
| 724 *nids = padlock_cipher_nids; | |
| 725 return padlock_cipher_nids_num; | |
| 726 } | |
| 727 | |
| 728 /* ... or the requested "cipher" otherwise */ | |
| 729 switch (nid) { | |
| 730 case NID_aes_128_ecb: | |
| 731 *cipher = &padlock_aes_128_ecb; | |
| 732 break; | |
| 733 case NID_aes_128_cbc: | |
| 734 *cipher = &padlock_aes_128_cbc; | |
| 735 break; | |
| 736 case NID_aes_128_cfb: | |
| 737 *cipher = &padlock_aes_128_cfb; | |
| 738 break; | |
| 739 case NID_aes_128_ofb: | |
| 740 *cipher = &padlock_aes_128_ofb; | |
| 741 break; | |
| 742 | |
| 743 case NID_aes_192_ecb: | |
| 744 *cipher = &padlock_aes_192_ecb; | |
| 745 break; | |
| 746 case NID_aes_192_cbc: | |
| 747 *cipher = &padlock_aes_192_cbc; | |
| 748 break; | |
| 749 case NID_aes_192_cfb: | |
| 750 *cipher = &padlock_aes_192_cfb; | |
| 751 break; | |
| 752 case NID_aes_192_ofb: | |
| 753 *cipher = &padlock_aes_192_ofb; | |
| 754 break; | |
| 755 | |
| 756 case NID_aes_256_ecb: | |
| 757 *cipher = &padlock_aes_256_ecb; | |
| 758 break; | |
| 759 case NID_aes_256_cbc: | |
| 760 *cipher = &padlock_aes_256_cbc; | |
| 761 break; | |
| 762 case NID_aes_256_cfb: | |
| 763 *cipher = &padlock_aes_256_cfb; | |
| 764 break; | |
| 765 case NID_aes_256_ofb: | |
| 766 *cipher = &padlock_aes_256_ofb; | |
| 767 break; | |
| 768 | |
| 769 default: | |
| 770 /* Sorry, we don't support this NID */ | |
| 771 *cipher = NULL; | |
| 772 return 0; | |
| 773 } | |
| 774 | |
| 775 return 1; | |
| 776 } | |
| 777 | |
| 778 /* Prepare the encryption key for PadLock usage */ | |
| 779 static int | |
| 780 padlock_aes_init_key (EVP_CIPHER_CTX *ctx, const unsigned char *key, | |
| 781 const unsigned char *iv, int enc) | |
| 782 { | |
| 783 struct padlock_cipher_data *cdata; | |
| 784 int key_len = EVP_CIPHER_CTX_key_length(ctx) * 8; | |
| 785 | |
| 786 if (key==NULL) return 0; /* ERROR */ | |
| 787 | |
| 788 cdata = ALIGNED_CIPHER_DATA(ctx); | |
| 789 memset(cdata, 0, sizeof(struct padlock_cipher_data)); | |
| 790 | |
| 791 /* Prepare Control word. */ | |
| 792 if (EVP_CIPHER_CTX_mode(ctx) == EVP_CIPH_OFB_MODE) | |
| 793 cdata->cword.b.encdec = 0; | |
| 794 else | |
| 795 cdata->cword.b.encdec = (ctx->encrypt == 0); | |
| 796 cdata->cword.b.rounds = 10 + (key_len - 128) / 32; | |
| 797 cdata->cword.b.ksize = (key_len - 128) / 64; | |
| 798 | |
| 799 switch(key_len) { | |
| 800 case 128: | |
| 801 /* PadLock can generate an extended key for | |
| 802 AES128 in hardware */ | |
| 803 memcpy(cdata->ks.rd_key, key, AES_KEY_SIZE_128); | |
| 804 cdata->cword.b.keygen = 0; | |
| 805 break; | |
| 806 | |
| 807 case 192: | |
| 808 case 256: | |
| 809 /* Generate an extended AES key in software. | |
| 810 Needed for AES192/AES256 */ | |
| 811 /* Well, the above applies to Stepping 8 CPUs | |
| 812 and is listed as hardware errata. They most | |
| 813 likely will fix it at some point and then | |
| 814 a check for stepping would be due here. */ | |
| 815 if (EVP_CIPHER_CTX_mode(ctx) == EVP_CIPH_CFB_MODE || | |
| 816 EVP_CIPHER_CTX_mode(ctx) == EVP_CIPH_OFB_MODE || | |
| 817 enc) | |
| 818 AES_set_encrypt_key(key, key_len, &cdata->ks); | |
| 819 else | |
| 820 AES_set_decrypt_key(key, key_len, &cdata->ks); | |
| 821 #ifndef AES_ASM | |
| 822 /* OpenSSL C functions use byte-swapped extended key. */ | |
| 823 padlock_bswapl(&cdata->ks); | |
| 824 #endif | |
| 825 cdata->cword.b.keygen = 1; | |
| 826 break; | |
| 827 | |
| 828 default: | |
| 829 /* ERROR */ | |
| 830 return 0; | |
| 831 } | |
| 832 | |
| 833 /* | |
| 834 * This is done to cover for cases when user reuses the | |
| 835 * context for new key. The catch is that if we don't do | |
| 836 * this, padlock_eas_cipher might proceed with old key... | |
| 837 */ | |
| 838 padlock_reload_key (); | |
| 839 | |
| 840 return 1; | |
| 841 } | |
| 842 | |
| 843 /* | |
| 844 * Simplified version of padlock_aes_cipher() used when | |
| 845 * 1) both input and output buffers are at aligned addresses. | |
| 846 * or when | |
| 847 * 2) running on a newer CPU that doesn't require aligned buffers. | |
| 848 */ | |
| 849 static int | |
| 850 padlock_aes_cipher_omnivorous(EVP_CIPHER_CTX *ctx, unsigned char *out_arg, | |
| 851 const unsigned char *in_arg, size_t nbytes) | |
| 852 { | |
| 853 struct padlock_cipher_data *cdata; | |
| 854 void *iv; | |
| 855 | |
| 856 cdata = ALIGNED_CIPHER_DATA(ctx); | |
| 857 padlock_verify_context(cdata); | |
| 858 | |
| 859 switch (EVP_CIPHER_CTX_mode(ctx)) { | |
| 860 case EVP_CIPH_ECB_MODE: | |
| 861 padlock_xcrypt_ecb(nbytes/AES_BLOCK_SIZE, cdata, out_arg, in_arg
); | |
| 862 break; | |
| 863 | |
| 864 case EVP_CIPH_CBC_MODE: | |
| 865 memcpy(cdata->iv, ctx->iv, AES_BLOCK_SIZE); | |
| 866 iv = padlock_xcrypt_cbc(nbytes/AES_BLOCK_SIZE, cdata, out_arg, i
n_arg); | |
| 867 memcpy(ctx->iv, iv, AES_BLOCK_SIZE); | |
| 868 break; | |
| 869 | |
| 870 case EVP_CIPH_CFB_MODE: | |
| 871 memcpy(cdata->iv, ctx->iv, AES_BLOCK_SIZE); | |
| 872 iv = padlock_xcrypt_cfb(nbytes/AES_BLOCK_SIZE, cdata, out_arg, i
n_arg); | |
| 873 memcpy(ctx->iv, iv, AES_BLOCK_SIZE); | |
| 874 break; | |
| 875 | |
| 876 case EVP_CIPH_OFB_MODE: | |
| 877 memcpy(cdata->iv, ctx->iv, AES_BLOCK_SIZE); | |
| 878 padlock_xcrypt_ofb(nbytes/AES_BLOCK_SIZE, cdata, out_arg, in_arg
); | |
| 879 memcpy(ctx->iv, cdata->iv, AES_BLOCK_SIZE); | |
| 880 break; | |
| 881 | |
| 882 default: | |
| 883 return 0; | |
| 884 } | |
| 885 | |
| 886 memset(cdata->iv, 0, AES_BLOCK_SIZE); | |
| 887 | |
| 888 return 1; | |
| 889 } | |
| 890 | |
| 891 #ifndef PADLOCK_CHUNK | |
| 892 # define PADLOCK_CHUNK 512 /* Must be a power of 2 larger than 16 */ | |
| 893 #endif | |
| 894 #if PADLOCK_CHUNK<16 || PADLOCK_CHUNK&(PADLOCK_CHUNK-1) | |
| 895 # error "insane PADLOCK_CHUNK..." | |
| 896 #endif | |
| 897 | |
| 898 /* Re-align the arguments to 16-Bytes boundaries and run the | |
| 899 encryption function itself. This function is not AES-specific. */ | |
| 900 static int | |
| 901 padlock_aes_cipher(EVP_CIPHER_CTX *ctx, unsigned char *out_arg, | |
| 902 const unsigned char *in_arg, size_t nbytes) | |
| 903 { | |
| 904 struct padlock_cipher_data *cdata; | |
| 905 const void *inp; | |
| 906 unsigned char *out; | |
| 907 void *iv; | |
| 908 int inp_misaligned, out_misaligned, realign_in_loop; | |
| 909 size_t chunk, allocated=0; | |
| 910 | |
| 911 /* ctx->num is maintained in byte-oriented modes, | |
| 912 such as CFB and OFB... */ | |
| 913 if ((chunk = ctx->num)) { /* borrow chunk variable */ | |
| 914 unsigned char *ivp=ctx->iv; | |
| 915 | |
| 916 switch (EVP_CIPHER_CTX_mode(ctx)) { | |
| 917 case EVP_CIPH_CFB_MODE: | |
| 918 if (chunk >= AES_BLOCK_SIZE) | |
| 919 return 0; /* bogus value */ | |
| 920 | |
| 921 if (ctx->encrypt) | |
| 922 while (chunk<AES_BLOCK_SIZE && nbytes!=0) { | |
| 923 ivp[chunk] = *(out_arg++) = *(in_arg++)
^ ivp[chunk]; | |
| 924 chunk++, nbytes--; | |
| 925 } | |
| 926 else while (chunk<AES_BLOCK_SIZE && nbytes!=0) { | |
| 927 unsigned char c = *(in_arg++); | |
| 928 *(out_arg++) = c ^ ivp[chunk]; | |
| 929 ivp[chunk++] = c, nbytes--; | |
| 930 } | |
| 931 | |
| 932 ctx->num = chunk%AES_BLOCK_SIZE; | |
| 933 break; | |
| 934 case EVP_CIPH_OFB_MODE: | |
| 935 if (chunk >= AES_BLOCK_SIZE) | |
| 936 return 0; /* bogus value */ | |
| 937 | |
| 938 while (chunk<AES_BLOCK_SIZE && nbytes!=0) { | |
| 939 *(out_arg++) = *(in_arg++) ^ ivp[chunk]; | |
| 940 chunk++, nbytes--; | |
| 941 } | |
| 942 | |
| 943 ctx->num = chunk%AES_BLOCK_SIZE; | |
| 944 break; | |
| 945 } | |
| 946 } | |
| 947 | |
| 948 if (nbytes == 0) | |
| 949 return 1; | |
| 950 #if 0 | |
| 951 if (nbytes % AES_BLOCK_SIZE) | |
| 952 return 0; /* are we expected to do tail processing? */ | |
| 953 #else | |
| 954 /* nbytes is always multiple of AES_BLOCK_SIZE in ECB and CBC | |
| 955 modes and arbitrary value in byte-oriented modes, such as | |
| 956 CFB and OFB... */ | |
| 957 #endif | |
| 958 | |
| 959 /* VIA promises CPUs that won't require alignment in the future. | |
| 960 For now padlock_aes_align_required is initialized to 1 and | |
| 961 the condition is never met... */ | |
| 962 /* C7 core is capable to manage unaligned input in non-ECB[!] | |
| 963 mode, but performance penalties appear to be approximately | |
| 964 same as for software alignment below or ~3x. They promise to | |
| 965 improve it in the future, but for now we can just as well | |
| 966 pretend that it can only handle aligned input... */ | |
| 967 if (!padlock_aes_align_required && (nbytes%AES_BLOCK_SIZE)==0) | |
| 968 return padlock_aes_cipher_omnivorous(ctx, out_arg, in_arg, nbyte
s); | |
| 969 | |
| 970 inp_misaligned = (((size_t)in_arg) & 0x0F); | |
| 971 out_misaligned = (((size_t)out_arg) & 0x0F); | |
| 972 | |
| 973 /* Note that even if output is aligned and input not, | |
| 974 * I still prefer to loop instead of copy the whole | |
| 975 * input and then encrypt in one stroke. This is done | |
| 976 * in order to improve L1 cache utilization... */ | |
| 977 realign_in_loop = out_misaligned|inp_misaligned; | |
| 978 | |
| 979 if (!realign_in_loop && (nbytes%AES_BLOCK_SIZE)==0) | |
| 980 return padlock_aes_cipher_omnivorous(ctx, out_arg, in_arg, nbyte
s); | |
| 981 | |
| 982 /* this takes one "if" out of the loops */ | |
| 983 chunk = nbytes; | |
| 984 chunk %= PADLOCK_CHUNK; | |
| 985 if (chunk==0) chunk = PADLOCK_CHUNK; | |
| 986 | |
| 987 if (out_misaligned) { | |
| 988 /* optmize for small input */ | |
| 989 allocated = (chunk<nbytes?PADLOCK_CHUNK:nbytes); | |
| 990 out = alloca(0x10 + allocated); | |
| 991 out = NEAREST_ALIGNED(out); | |
| 992 } | |
| 993 else | |
| 994 out = out_arg; | |
| 995 | |
| 996 cdata = ALIGNED_CIPHER_DATA(ctx); | |
| 997 padlock_verify_context(cdata); | |
| 998 | |
| 999 switch (EVP_CIPHER_CTX_mode(ctx)) { | |
| 1000 case EVP_CIPH_ECB_MODE: | |
| 1001 do { | |
| 1002 if (inp_misaligned) | |
| 1003 inp = padlock_memcpy(out, in_arg, chunk); | |
| 1004 else | |
| 1005 inp = in_arg; | |
| 1006 in_arg += chunk; | |
| 1007 | |
| 1008 padlock_xcrypt_ecb(chunk/AES_BLOCK_SIZE, cdata, out, inp
); | |
| 1009 | |
| 1010 if (out_misaligned) | |
| 1011 out_arg = padlock_memcpy(out_arg, out, chunk) +
chunk; | |
| 1012 else | |
| 1013 out = out_arg+=chunk; | |
| 1014 | |
| 1015 nbytes -= chunk; | |
| 1016 chunk = PADLOCK_CHUNK; | |
| 1017 } while (nbytes); | |
| 1018 break; | |
| 1019 | |
| 1020 case EVP_CIPH_CBC_MODE: | |
| 1021 memcpy(cdata->iv, ctx->iv, AES_BLOCK_SIZE); | |
| 1022 goto cbc_shortcut; | |
| 1023 do { | |
| 1024 if (iv != cdata->iv) | |
| 1025 memcpy(cdata->iv, iv, AES_BLOCK_SIZE); | |
| 1026 chunk = PADLOCK_CHUNK; | |
| 1027 cbc_shortcut: /* optimize for small input */ | |
| 1028 if (inp_misaligned) | |
| 1029 inp = padlock_memcpy(out, in_arg, chunk); | |
| 1030 else | |
| 1031 inp = in_arg; | |
| 1032 in_arg += chunk; | |
| 1033 | |
| 1034 iv = padlock_xcrypt_cbc(chunk/AES_BLOCK_SIZE, cdata, out
, inp); | |
| 1035 | |
| 1036 if (out_misaligned) | |
| 1037 out_arg = padlock_memcpy(out_arg, out, chunk) +
chunk; | |
| 1038 else | |
| 1039 out = out_arg+=chunk; | |
| 1040 | |
| 1041 } while (nbytes -= chunk); | |
| 1042 memcpy(ctx->iv, iv, AES_BLOCK_SIZE); | |
| 1043 break; | |
| 1044 | |
| 1045 case EVP_CIPH_CFB_MODE: | |
| 1046 memcpy (iv = cdata->iv, ctx->iv, AES_BLOCK_SIZE); | |
| 1047 chunk &= ~(AES_BLOCK_SIZE-1); | |
| 1048 if (chunk) goto cfb_shortcut; | |
| 1049 else goto cfb_skiploop; | |
| 1050 do { | |
| 1051 if (iv != cdata->iv) | |
| 1052 memcpy(cdata->iv, iv, AES_BLOCK_SIZE); | |
| 1053 chunk = PADLOCK_CHUNK; | |
| 1054 cfb_shortcut: /* optimize for small input */ | |
| 1055 if (inp_misaligned) | |
| 1056 inp = padlock_memcpy(out, in_arg, chunk); | |
| 1057 else | |
| 1058 inp = in_arg; | |
| 1059 in_arg += chunk; | |
| 1060 | |
| 1061 iv = padlock_xcrypt_cfb(chunk/AES_BLOCK_SIZE, cdata, out
, inp); | |
| 1062 | |
| 1063 if (out_misaligned) | |
| 1064 out_arg = padlock_memcpy(out_arg, out, chunk) +
chunk; | |
| 1065 else | |
| 1066 out = out_arg+=chunk; | |
| 1067 | |
| 1068 nbytes -= chunk; | |
| 1069 } while (nbytes >= AES_BLOCK_SIZE); | |
| 1070 | |
| 1071 cfb_skiploop: | |
| 1072 if (nbytes) { | |
| 1073 unsigned char *ivp = cdata->iv; | |
| 1074 | |
| 1075 if (iv != ivp) { | |
| 1076 memcpy(ivp, iv, AES_BLOCK_SIZE); | |
| 1077 iv = ivp; | |
| 1078 } | |
| 1079 ctx->num = nbytes; | |
| 1080 if (cdata->cword.b.encdec) { | |
| 1081 cdata->cword.b.encdec=0; | |
| 1082 padlock_reload_key(); | |
| 1083 padlock_xcrypt_ecb(1,cdata,ivp,ivp); | |
| 1084 cdata->cword.b.encdec=1; | |
| 1085 padlock_reload_key(); | |
| 1086 while(nbytes) { | |
| 1087 unsigned char c = *(in_arg++); | |
| 1088 *(out_arg++) = c ^ *ivp; | |
| 1089 *(ivp++) = c, nbytes--; | |
| 1090 } | |
| 1091 } | |
| 1092 else { padlock_reload_key(); | |
| 1093 padlock_xcrypt_ecb(1,cdata,ivp,ivp); | |
| 1094 padlock_reload_key(); | |
| 1095 while (nbytes) { | |
| 1096 *ivp = *(out_arg++) = *(in_arg++) ^ *ivp
; | |
| 1097 ivp++, nbytes--; | |
| 1098 } | |
| 1099 } | |
| 1100 } | |
| 1101 | |
| 1102 memcpy(ctx->iv, iv, AES_BLOCK_SIZE); | |
| 1103 break; | |
| 1104 | |
| 1105 case EVP_CIPH_OFB_MODE: | |
| 1106 memcpy(cdata->iv, ctx->iv, AES_BLOCK_SIZE); | |
| 1107 chunk &= ~(AES_BLOCK_SIZE-1); | |
| 1108 if (chunk) do { | |
| 1109 if (inp_misaligned) | |
| 1110 inp = padlock_memcpy(out, in_arg, chunk); | |
| 1111 else | |
| 1112 inp = in_arg; | |
| 1113 in_arg += chunk; | |
| 1114 | |
| 1115 padlock_xcrypt_ofb(chunk/AES_BLOCK_SIZE, cdata, out, inp
); | |
| 1116 | |
| 1117 if (out_misaligned) | |
| 1118 out_arg = padlock_memcpy(out_arg, out, chunk) +
chunk; | |
| 1119 else | |
| 1120 out = out_arg+=chunk; | |
| 1121 | |
| 1122 nbytes -= chunk; | |
| 1123 chunk = PADLOCK_CHUNK; | |
| 1124 } while (nbytes >= AES_BLOCK_SIZE); | |
| 1125 | |
| 1126 if (nbytes) { | |
| 1127 unsigned char *ivp = cdata->iv; | |
| 1128 | |
| 1129 ctx->num = nbytes; | |
| 1130 padlock_reload_key(); /* empirically found */ | |
| 1131 padlock_xcrypt_ecb(1,cdata,ivp,ivp); | |
| 1132 padlock_reload_key(); /* empirically found */ | |
| 1133 while (nbytes) { | |
| 1134 *(out_arg++) = *(in_arg++) ^ *ivp; | |
| 1135 ivp++, nbytes--; | |
| 1136 } | |
| 1137 } | |
| 1138 | |
| 1139 memcpy(ctx->iv, cdata->iv, AES_BLOCK_SIZE); | |
| 1140 break; | |
| 1141 | |
| 1142 default: | |
| 1143 return 0; | |
| 1144 } | |
| 1145 | |
| 1146 /* Clean the realign buffer if it was used */ | |
| 1147 if (out_misaligned) { | |
| 1148 volatile unsigned long *p=(void *)out; | |
| 1149 size_t n = allocated/sizeof(*p); | |
| 1150 while (n--) *p++=0; | |
| 1151 } | |
| 1152 | |
| 1153 memset(cdata->iv, 0, AES_BLOCK_SIZE); | |
| 1154 | |
| 1155 return 1; | |
| 1156 } | |
| 1157 | |
| 1158 #endif /* OPENSSL_NO_AES */ | |
| 1159 | |
| 1160 /* ===== Random Number Generator ===== */ | |
| 1161 /* | |
| 1162 * This code is not engaged. The reason is that it does not comply | |
| 1163 * with recommendations for VIA RNG usage for secure applications | |
| 1164 * (posted at http://www.via.com.tw/en/viac3/c3.jsp) nor does it | |
| 1165 * provide meaningful error control... | |
| 1166 */ | |
| 1167 /* Wrapper that provides an interface between the API and | |
| 1168 the raw PadLock RNG */ | |
| 1169 static int | |
| 1170 padlock_rand_bytes(unsigned char *output, int count) | |
| 1171 { | |
| 1172 unsigned int eax, buf; | |
| 1173 | |
| 1174 while (count >= 8) { | |
| 1175 eax = padlock_xstore(output, 0); | |
| 1176 if (!(eax&(1<<6))) return 0; /* RNG disabled */ | |
| 1177 /* this ---vv--- covers DC bias, Raw Bits and String Filter */ | |
| 1178 if (eax&(0x1F<<10)) return 0; | |
| 1179 if ((eax&0x1F)==0) continue; /* no data, retry... */ | |
| 1180 if ((eax&0x1F)!=8) return 0; /* fatal failure... */ | |
| 1181 output += 8; | |
| 1182 count -= 8; | |
| 1183 } | |
| 1184 while (count > 0) { | |
| 1185 eax = padlock_xstore(&buf, 3); | |
| 1186 if (!(eax&(1<<6))) return 0; /* RNG disabled */ | |
| 1187 /* this ---vv--- covers DC bias, Raw Bits and String Filter */ | |
| 1188 if (eax&(0x1F<<10)) return 0; | |
| 1189 if ((eax&0x1F)==0) continue; /* no data, retry... */ | |
| 1190 if ((eax&0x1F)!=1) return 0; /* fatal failure... */ | |
| 1191 *output++ = (unsigned char)buf; | |
| 1192 count--; | |
| 1193 } | |
| 1194 *(volatile unsigned int *)&buf=0; | |
| 1195 | |
| 1196 return 1; | |
| 1197 } | |
| 1198 | |
| 1199 /* Dummy but necessary function */ | |
| 1200 static int | |
| 1201 padlock_rand_status(void) | |
| 1202 { | |
| 1203 return 1; | |
| 1204 } | |
| 1205 | |
| 1206 /* Prepare structure for registration */ | |
| 1207 static RAND_METHOD padlock_rand = { | |
| 1208 NULL, /* seed */ | |
| 1209 padlock_rand_bytes, /* bytes */ | |
| 1210 NULL, /* cleanup */ | |
| 1211 NULL, /* add */ | |
| 1212 padlock_rand_bytes, /* pseudorand */ | |
| 1213 padlock_rand_status, /* rand status */ | |
| 1214 }; | |
| 1215 | |
| 1216 #endif /* COMPILE_HW_PADLOCK */ | |
| 1217 | |
| 1218 #endif /* !OPENSSL_NO_HW_PADLOCK */ | |
| 1219 #endif /* !OPENSSL_NO_HW */ | |
| OLD | NEW |