| OLD | NEW |
| (Empty) |
| 1 /* This Source Code Form is subject to the terms of the Mozilla Public | |
| 2 * License, v. 2.0. If a copy of the MPL was not distributed with this | |
| 3 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ | |
| 4 | |
| 5 #ifdef FREEBL_NO_DEPEND | |
| 6 #include "stubs.h" | |
| 7 #endif | |
| 8 | |
| 9 #include "prinit.h" | |
| 10 #include "prenv.h" | |
| 11 #include "prerr.h" | |
| 12 #include "secerr.h" | |
| 13 | |
| 14 #include "prtypes.h" | |
| 15 #include "blapi.h" | |
| 16 #include "rijndael.h" | |
| 17 | |
| 18 #include "cts.h" | |
| 19 #include "ctr.h" | |
| 20 #include "gcm.h" | |
| 21 | |
| 22 #ifdef USE_HW_AES | |
| 23 #include "intel-aes.h" | |
| 24 #include "mpi.h" | |
| 25 | |
| 26 static int has_intel_aes = 0; | |
| 27 static PRBool use_hw_aes = PR_FALSE; | |
| 28 | |
| 29 #ifdef INTEL_GCM | |
| 30 #include "intel-gcm.h" | |
| 31 static int has_intel_avx = 0; | |
| 32 static int has_intel_clmul = 0; | |
| 33 static PRBool use_hw_gcm = PR_FALSE; | |
| 34 #if defined(_MSC_VER) && !defined(_M_IX86) | |
| 35 #include <intrin.h> /* for _xgetbv() */ | |
| 36 #endif | |
| 37 #endif | |
| 38 #endif /* USE_HW_AES */ | |
| 39 | |
| 40 /* | |
| 41 * There are currently five ways to build this code, varying in performance | |
| 42 * and code size. | |
| 43 * | |
| 44 * RIJNDAEL_INCLUDE_TABLES Include all tables from rijndael32.tab | |
| 45 * RIJNDAEL_GENERATE_TABLES Generate tables on first | |
| 46 * encryption/decryption, then store them; | |
| 47 * use the function gfm | |
| 48 * RIJNDAEL_GENERATE_TABLES_MACRO Same as above, but use macros to do | |
| 49 * the generation | |
| 50 * RIJNDAEL_GENERATE_VALUES Do not store tables, generate the table | |
| 51 * values "on-the-fly", using gfm | |
| 52 * RIJNDAEL_GENERATE_VALUES_MACRO Same as above, but use macros | |
| 53 * | |
| 54 * The default is RIJNDAEL_INCLUDE_TABLES. | |
| 55 */ | |
| 56 | |
| 57 /* | |
| 58 * When building RIJNDAEL_INCLUDE_TABLES, includes S**-1, Rcon, T[0..4], | |
| 59 * T**-1[0..4], IMXC[0..4] | |
| 60 * When building anything else, includes S, S**-1, Rcon | |
| 61 */ | |
| 62 #include "rijndael32.tab" | |
| 63 | |
| 64 #if defined(RIJNDAEL_INCLUDE_TABLES) | |
| 65 /* | |
| 66 * RIJNDAEL_INCLUDE_TABLES | |
| 67 */ | |
| 68 #define T0(i) _T0[i] | |
| 69 #define T1(i) _T1[i] | |
| 70 #define T2(i) _T2[i] | |
| 71 #define T3(i) _T3[i] | |
| 72 #define TInv0(i) _TInv0[i] | |
| 73 #define TInv1(i) _TInv1[i] | |
| 74 #define TInv2(i) _TInv2[i] | |
| 75 #define TInv3(i) _TInv3[i] | |
| 76 #define IMXC0(b) _IMXC0[b] | |
| 77 #define IMXC1(b) _IMXC1[b] | |
| 78 #define IMXC2(b) _IMXC2[b] | |
| 79 #define IMXC3(b) _IMXC3[b] | |
| 80 /* The S-box can be recovered from the T-tables */ | |
| 81 #ifdef IS_LITTLE_ENDIAN | |
| 82 #define SBOX(b) ((PRUint8)_T3[b]) | |
| 83 #else | |
| 84 #define SBOX(b) ((PRUint8)_T1[b]) | |
| 85 #endif | |
| 86 #define SINV(b) (_SInv[b]) | |
| 87 | |
| 88 #else /* not RIJNDAEL_INCLUDE_TABLES */ | |
| 89 | |
| 90 /* | |
| 91 * Code for generating T-table values. | |
| 92 */ | |
| 93 | |
| 94 #ifdef IS_LITTLE_ENDIAN | |
| 95 #define WORD4(b0, b1, b2, b3) \ | |
| 96 (((b3) << 24) | ((b2) << 16) | ((b1) << 8) | (b0)) | |
| 97 #else | |
| 98 #define WORD4(b0, b1, b2, b3) \ | |
| 99 (((b0) << 24) | ((b1) << 16) | ((b2) << 8) | (b3)) | |
| 100 #endif | |
| 101 | |
| 102 /* | |
| 103 * Define the S and S**-1 tables (both have been stored) | |
| 104 */ | |
| 105 #define SBOX(b) (_S[b]) | |
| 106 #define SINV(b) (_SInv[b]) | |
| 107 | |
| 108 /* | |
| 109 * The function xtime, used for Galois field multiplication | |
| 110 */ | |
| 111 #define XTIME(a) \ | |
| 112 ((a & 0x80) ? ((a << 1) ^ 0x1b) : (a << 1)) | |
| 113 | |
| 114 /* Choose GFM method (macros or function) */ | |
| 115 #if defined(RIJNDAEL_GENERATE_TABLES_MACRO) || \ | |
| 116 defined(RIJNDAEL_GENERATE_VALUES_MACRO) | |
| 117 | |
| 118 /* | |
| 119 * Galois field GF(2**8) multipliers, in macro form | |
| 120 */ | |
| 121 #define GFM01(a) \ | |
| 122 (a) /* a * 01 = a, the identity */ | |
| 123 #define GFM02(a) \ | |
| 124 (XTIME(a) & 0xff) /* a * 02 = xtime(a) */ | |
| 125 #define GFM04(a) \ | |
| 126 (GFM02(GFM02(a))) /* a * 04 = xtime**2(a) */ | |
| 127 #define GFM08(a) \ | |
| 128 (GFM02(GFM04(a))) /* a * 08 = xtime**3(a) */ | |
| 129 #define GFM03(a) \ | |
| 130 (GFM01(a) ^ GFM02(a)) /* a * 03 = a * (01 + 02) */ | |
| 131 #define GFM09(a) \ | |
| 132 (GFM01(a) ^ GFM08(a)) /* a * 09 = a * (01 + 08) */ | |
| 133 #define GFM0B(a) \ | |
| 134 (GFM01(a) ^ GFM02(a) ^ GFM08(a)) /* a * 0B = a * (01 + 02 + 08) */ | |
| 135 #define GFM0D(a) \ | |
| 136 (GFM01(a) ^ GFM04(a) ^ GFM08(a)) /* a * 0D = a * (01 + 04 + 08) */ | |
| 137 #define GFM0E(a) \ | |
| 138 (GFM02(a) ^ GFM04(a) ^ GFM08(a)) /* a * 0E = a * (02 + 04 + 08) */ | |
| 139 | |
| 140 #else /* RIJNDAEL_GENERATE_TABLES or RIJNDAEL_GENERATE_VALUES */ | |
| 141 | |
| 142 /* GF_MULTIPLY | |
| 143 * | |
| 144 * multiply two bytes represented in GF(2**8), mod (x**4 + 1) | |
| 145 */ | |
| 146 PRUint8 gfm(PRUint8 a, PRUint8 b) | |
| 147 { | |
| 148 PRUint8 res = 0; | |
| 149 while (b > 0) { | |
| 150 res = (b & 0x01) ? res ^ a : res; | |
| 151 a = XTIME(a); | |
| 152 b >>= 1; | |
| 153 } | |
| 154 return res; | |
| 155 } | |
| 156 | |
| 157 #define GFM01(a) \ | |
| 158 (a) /* a * 01 = a, the identity */ | |
| 159 #define GFM02(a) \ | |
| 160 (XTIME(a) & 0xff) /* a * 02 = xtime(a) */ | |
| 161 #define GFM03(a) \ | |
| 162 (gfm(a, 0x03)) /* a * 03 */ | |
| 163 #define GFM09(a) \ | |
| 164 (gfm(a, 0x09)) /* a * 09 */ | |
| 165 #define GFM0B(a) \ | |
| 166 (gfm(a, 0x0B)) /* a * 0B */ | |
| 167 #define GFM0D(a) \ | |
| 168 (gfm(a, 0x0D)) /* a * 0D */ | |
| 169 #define GFM0E(a) \ | |
| 170 (gfm(a, 0x0E)) /* a * 0E */ | |
| 171 | |
| 172 #endif /* choosing GFM function */ | |
| 173 | |
| 174 /* | |
| 175 * The T-tables | |
| 176 */ | |
| 177 #define G_T0(i) \ | |
| 178 ( WORD4( GFM02(SBOX(i)), GFM01(SBOX(i)), GFM01(SBOX(i)), GFM03(SBOX(i)) ) ) | |
| 179 #define G_T1(i) \ | |
| 180 ( WORD4( GFM03(SBOX(i)), GFM02(SBOX(i)), GFM01(SBOX(i)), GFM01(SBOX(i)) ) ) | |
| 181 #define G_T2(i) \ | |
| 182 ( WORD4( GFM01(SBOX(i)), GFM03(SBOX(i)), GFM02(SBOX(i)), GFM01(SBOX(i)) ) ) | |
| 183 #define G_T3(i) \ | |
| 184 ( WORD4( GFM01(SBOX(i)), GFM01(SBOX(i)), GFM03(SBOX(i)), GFM02(SBOX(i)) ) ) | |
| 185 | |
| 186 /* | |
| 187 * The inverse T-tables | |
| 188 */ | |
| 189 #define G_TInv0(i) \ | |
| 190 ( WORD4( GFM0E(SINV(i)), GFM09(SINV(i)), GFM0D(SINV(i)), GFM0B(SINV(i)) ) ) | |
| 191 #define G_TInv1(i) \ | |
| 192 ( WORD4( GFM0B(SINV(i)), GFM0E(SINV(i)), GFM09(SINV(i)), GFM0D(SINV(i)) ) ) | |
| 193 #define G_TInv2(i) \ | |
| 194 ( WORD4( GFM0D(SINV(i)), GFM0B(SINV(i)), GFM0E(SINV(i)), GFM09(SINV(i)) ) ) | |
| 195 #define G_TInv3(i) \ | |
| 196 ( WORD4( GFM09(SINV(i)), GFM0D(SINV(i)), GFM0B(SINV(i)), GFM0E(SINV(i)) ) ) | |
| 197 | |
| 198 /* | |
| 199 * The inverse mix column tables | |
| 200 */ | |
| 201 #define G_IMXC0(i) \ | |
| 202 ( WORD4( GFM0E(i), GFM09(i), GFM0D(i), GFM0B(i) ) ) | |
| 203 #define G_IMXC1(i) \ | |
| 204 ( WORD4( GFM0B(i), GFM0E(i), GFM09(i), GFM0D(i) ) ) | |
| 205 #define G_IMXC2(i) \ | |
| 206 ( WORD4( GFM0D(i), GFM0B(i), GFM0E(i), GFM09(i) ) ) | |
| 207 #define G_IMXC3(i) \ | |
| 208 ( WORD4( GFM09(i), GFM0D(i), GFM0B(i), GFM0E(i) ) ) | |
| 209 | |
| 210 /* Now choose the T-table indexing method */ | |
| 211 #if defined(RIJNDAEL_GENERATE_VALUES) | |
| 212 /* generate values for the tables with a function*/ | |
| 213 static PRUint32 gen_TInvXi(PRUint8 tx, PRUint8 i) | |
| 214 { | |
| 215 PRUint8 si01, si02, si03, si04, si08, si09, si0B, si0D, si0E; | |
| 216 si01 = SINV(i); | |
| 217 si02 = XTIME(si01); | |
| 218 si04 = XTIME(si02); | |
| 219 si08 = XTIME(si04); | |
| 220 si03 = si02 ^ si01; | |
| 221 si09 = si08 ^ si01; | |
| 222 si0B = si08 ^ si03; | |
| 223 si0D = si09 ^ si04; | |
| 224 si0E = si08 ^ si04 ^ si02; | |
| 225 switch (tx) { | |
| 226 case 0: | |
| 227 return WORD4(si0E, si09, si0D, si0B); | |
| 228 case 1: | |
| 229 return WORD4(si0B, si0E, si09, si0D); | |
| 230 case 2: | |
| 231 return WORD4(si0D, si0B, si0E, si09); | |
| 232 case 3: | |
| 233 return WORD4(si09, si0D, si0B, si0E); | |
| 234 } | |
| 235 return -1; | |
| 236 } | |
| 237 #define T0(i) G_T0(i) | |
| 238 #define T1(i) G_T1(i) | |
| 239 #define T2(i) G_T2(i) | |
| 240 #define T3(i) G_T3(i) | |
| 241 #define TInv0(i) gen_TInvXi(0, i) | |
| 242 #define TInv1(i) gen_TInvXi(1, i) | |
| 243 #define TInv2(i) gen_TInvXi(2, i) | |
| 244 #define TInv3(i) gen_TInvXi(3, i) | |
| 245 #define IMXC0(b) G_IMXC0(b) | |
| 246 #define IMXC1(b) G_IMXC1(b) | |
| 247 #define IMXC2(b) G_IMXC2(b) | |
| 248 #define IMXC3(b) G_IMXC3(b) | |
| 249 #elif defined(RIJNDAEL_GENERATE_VALUES_MACRO) | |
| 250 /* generate values for the tables with macros */ | |
| 251 #define T0(i) G_T0(i) | |
| 252 #define T1(i) G_T1(i) | |
| 253 #define T2(i) G_T2(i) | |
| 254 #define T3(i) G_T3(i) | |
| 255 #define TInv0(i) G_TInv0(i) | |
| 256 #define TInv1(i) G_TInv1(i) | |
| 257 #define TInv2(i) G_TInv2(i) | |
| 258 #define TInv3(i) G_TInv3(i) | |
| 259 #define IMXC0(b) G_IMXC0(b) | |
| 260 #define IMXC1(b) G_IMXC1(b) | |
| 261 #define IMXC2(b) G_IMXC2(b) | |
| 262 #define IMXC3(b) G_IMXC3(b) | |
| 263 #else /* RIJNDAEL_GENERATE_TABLES or RIJNDAEL_GENERATE_TABLES_MACRO */ | |
| 264 /* Generate T and T**-1 table values and store, then index */ | |
| 265 /* The inverse mix column tables are still generated */ | |
| 266 #define T0(i) rijndaelTables->T0[i] | |
| 267 #define T1(i) rijndaelTables->T1[i] | |
| 268 #define T2(i) rijndaelTables->T2[i] | |
| 269 #define T3(i) rijndaelTables->T3[i] | |
| 270 #define TInv0(i) rijndaelTables->TInv0[i] | |
| 271 #define TInv1(i) rijndaelTables->TInv1[i] | |
| 272 #define TInv2(i) rijndaelTables->TInv2[i] | |
| 273 #define TInv3(i) rijndaelTables->TInv3[i] | |
| 274 #define IMXC0(b) G_IMXC0(b) | |
| 275 #define IMXC1(b) G_IMXC1(b) | |
| 276 #define IMXC2(b) G_IMXC2(b) | |
| 277 #define IMXC3(b) G_IMXC3(b) | |
| 278 #endif /* choose T-table indexing method */ | |
| 279 | |
| 280 #endif /* not RIJNDAEL_INCLUDE_TABLES */ | |
| 281 | |
| 282 #if defined(RIJNDAEL_GENERATE_TABLES) || \ | |
| 283 defined(RIJNDAEL_GENERATE_TABLES_MACRO) | |
| 284 | |
| 285 /* Code to generate and store the tables */ | |
| 286 | |
| 287 struct rijndael_tables_str { | |
| 288 PRUint32 T0[256]; | |
| 289 PRUint32 T1[256]; | |
| 290 PRUint32 T2[256]; | |
| 291 PRUint32 T3[256]; | |
| 292 PRUint32 TInv0[256]; | |
| 293 PRUint32 TInv1[256]; | |
| 294 PRUint32 TInv2[256]; | |
| 295 PRUint32 TInv3[256]; | |
| 296 }; | |
| 297 | |
| 298 static struct rijndael_tables_str *rijndaelTables = NULL; | |
| 299 static PRCallOnceType coRTInit = { 0, 0, 0 }; | |
| 300 static PRStatus | |
| 301 init_rijndael_tables(void) | |
| 302 { | |
| 303 PRUint32 i; | |
| 304 PRUint8 si01, si02, si03, si04, si08, si09, si0B, si0D, si0E; | |
| 305 struct rijndael_tables_str *rts; | |
| 306 rts = (struct rijndael_tables_str *) | |
| 307 PORT_Alloc(sizeof(struct rijndael_tables_str)); | |
| 308 if (!rts) return PR_FAILURE; | |
| 309 for (i=0; i<256; i++) { | |
| 310 /* The forward values */ | |
| 311 si01 = SBOX(i); | |
| 312 si02 = XTIME(si01); | |
| 313 si03 = si02 ^ si01; | |
| 314 rts->T0[i] = WORD4(si02, si01, si01, si03); | |
| 315 rts->T1[i] = WORD4(si03, si02, si01, si01); | |
| 316 rts->T2[i] = WORD4(si01, si03, si02, si01); | |
| 317 rts->T3[i] = WORD4(si01, si01, si03, si02); | |
| 318 /* The inverse values */ | |
| 319 si01 = SINV(i); | |
| 320 si02 = XTIME(si01); | |
| 321 si04 = XTIME(si02); | |
| 322 si08 = XTIME(si04); | |
| 323 si03 = si02 ^ si01; | |
| 324 si09 = si08 ^ si01; | |
| 325 si0B = si08 ^ si03; | |
| 326 si0D = si09 ^ si04; | |
| 327 si0E = si08 ^ si04 ^ si02; | |
| 328 rts->TInv0[i] = WORD4(si0E, si09, si0D, si0B); | |
| 329 rts->TInv1[i] = WORD4(si0B, si0E, si09, si0D); | |
| 330 rts->TInv2[i] = WORD4(si0D, si0B, si0E, si09); | |
| 331 rts->TInv3[i] = WORD4(si09, si0D, si0B, si0E); | |
| 332 } | |
| 333 /* wait until all the values are in to set */ | |
| 334 rijndaelTables = rts; | |
| 335 return PR_SUCCESS; | |
| 336 } | |
| 337 | |
| 338 #endif /* code to generate tables */ | |
| 339 | |
| 340 /************************************************************************** | |
| 341 * | |
| 342 * Stuff related to the Rijndael key schedule | |
| 343 * | |
| 344 *************************************************************************/ | |
| 345 | |
| 346 #define SUBBYTE(w) \ | |
| 347 ((SBOX((w >> 24) & 0xff) << 24) | \ | |
| 348 (SBOX((w >> 16) & 0xff) << 16) | \ | |
| 349 (SBOX((w >> 8) & 0xff) << 8) | \ | |
| 350 (SBOX((w ) & 0xff) )) | |
| 351 | |
| 352 #ifdef IS_LITTLE_ENDIAN | |
| 353 #define ROTBYTE(b) \ | |
| 354 ((b >> 8) | (b << 24)) | |
| 355 #else | |
| 356 #define ROTBYTE(b) \ | |
| 357 ((b << 8) | (b >> 24)) | |
| 358 #endif | |
| 359 | |
| 360 /* rijndael_key_expansion7 | |
| 361 * | |
| 362 * Generate the expanded key from the key input by the user. | |
| 363 * XXX | |
| 364 * Nk == 7 (224 key bits) is a weird case. Since Nk > 6, an added SubByte | |
| 365 * transformation is done periodically. The period is every 4 bytes, and | |
| 366 * since 7%4 != 0 this happens at different times for each key word (unlike | |
| 367 * Nk == 8 where it happens twice in every key word, in the same positions). | |
| 368 * For now, I'm implementing this case "dumbly", w/o any unrolling. | |
| 369 */ | |
| 370 static SECStatus | |
| 371 rijndael_key_expansion7(AESContext *cx, const unsigned char *key, unsigned int N
k) | |
| 372 { | |
| 373 unsigned int i; | |
| 374 PRUint32 *W; | |
| 375 PRUint32 *pW; | |
| 376 PRUint32 tmp; | |
| 377 W = cx->expandedKey; | |
| 378 /* 1. the first Nk words contain the cipher key */ | |
| 379 memcpy(W, key, Nk * 4); | |
| 380 i = Nk; | |
| 381 /* 2. loop until full expanded key is obtained */ | |
| 382 pW = W + i - 1; | |
| 383 for (; i < cx->Nb * (cx->Nr + 1); ++i) { | |
| 384 tmp = *pW++; | |
| 385 if (i % Nk == 0) | |
| 386 tmp = SUBBYTE(ROTBYTE(tmp)) ^ Rcon[i / Nk - 1]; | |
| 387 else if (i % Nk == 4) | |
| 388 tmp = SUBBYTE(tmp); | |
| 389 *pW = W[i - Nk] ^ tmp; | |
| 390 } | |
| 391 return SECSuccess; | |
| 392 } | |
| 393 | |
| 394 /* rijndael_key_expansion | |
| 395 * | |
| 396 * Generate the expanded key from the key input by the user. | |
| 397 */ | |
| 398 static SECStatus | |
| 399 rijndael_key_expansion(AESContext *cx, const unsigned char *key, unsigned int Nk
) | |
| 400 { | |
| 401 unsigned int i; | |
| 402 PRUint32 *W; | |
| 403 PRUint32 *pW; | |
| 404 PRUint32 tmp; | |
| 405 unsigned int round_key_words = cx->Nb * (cx->Nr + 1); | |
| 406 if (Nk == 7) | |
| 407 return rijndael_key_expansion7(cx, key, Nk); | |
| 408 W = cx->expandedKey; | |
| 409 /* The first Nk words contain the input cipher key */ | |
| 410 memcpy(W, key, Nk * 4); | |
| 411 i = Nk; | |
| 412 pW = W + i - 1; | |
| 413 /* Loop over all sets of Nk words, except the last */ | |
| 414 while (i < round_key_words - Nk) { | |
| 415 tmp = *pW++; | |
| 416 tmp = SUBBYTE(ROTBYTE(tmp)) ^ Rcon[i / Nk - 1]; | |
| 417 *pW = W[i++ - Nk] ^ tmp; | |
| 418 tmp = *pW++; *pW = W[i++ - Nk] ^ tmp; | |
| 419 tmp = *pW++; *pW = W[i++ - Nk] ^ tmp; | |
| 420 tmp = *pW++; *pW = W[i++ - Nk] ^ tmp; | |
| 421 if (Nk == 4) | |
| 422 continue; | |
| 423 switch (Nk) { | |
| 424 case 8: tmp = *pW++; tmp = SUBBYTE(tmp); *pW = W[i++ - Nk] ^ tmp; | |
| 425 case 7: tmp = *pW++; *pW = W[i++ - Nk] ^ tmp; | |
| 426 case 6: tmp = *pW++; *pW = W[i++ - Nk] ^ tmp; | |
| 427 case 5: tmp = *pW++; *pW = W[i++ - Nk] ^ tmp; | |
| 428 } | |
| 429 } | |
| 430 /* Generate the last word */ | |
| 431 tmp = *pW++; | |
| 432 tmp = SUBBYTE(ROTBYTE(tmp)) ^ Rcon[i / Nk - 1]; | |
| 433 *pW = W[i++ - Nk] ^ tmp; | |
| 434 /* There may be overflow here, if Nk % (Nb * (Nr + 1)) > 0. However, | |
| 435 * since the above loop generated all but the last Nk key words, there | |
| 436 * is no more need for the SubByte transformation. | |
| 437 */ | |
| 438 if (Nk < 8) { | |
| 439 for (; i < round_key_words; ++i) { | |
| 440 tmp = *pW++; | |
| 441 *pW = W[i - Nk] ^ tmp; | |
| 442 } | |
| 443 } else { | |
| 444 /* except in the case when Nk == 8. Then one more SubByte may have | |
| 445 * to be performed, at i % Nk == 4. | |
| 446 */ | |
| 447 for (; i < round_key_words; ++i) { | |
| 448 tmp = *pW++; | |
| 449 if (i % Nk == 4) | |
| 450 tmp = SUBBYTE(tmp); | |
| 451 *pW = W[i - Nk] ^ tmp; | |
| 452 } | |
| 453 } | |
| 454 return SECSuccess; | |
| 455 } | |
| 456 | |
| 457 /* rijndael_invkey_expansion | |
| 458 * | |
| 459 * Generate the expanded key for the inverse cipher from the key input by | |
| 460 * the user. | |
| 461 */ | |
| 462 static SECStatus | |
| 463 rijndael_invkey_expansion(AESContext *cx, const unsigned char *key, unsigned int
Nk) | |
| 464 { | |
| 465 unsigned int r; | |
| 466 PRUint32 *roundkeyw; | |
| 467 PRUint8 *b; | |
| 468 int Nb = cx->Nb; | |
| 469 /* begins like usual key expansion ... */ | |
| 470 if (rijndael_key_expansion(cx, key, Nk) != SECSuccess) | |
| 471 return SECFailure; | |
| 472 /* ... but has the additional step of InvMixColumn, | |
| 473 * excepting the first and last round keys. | |
| 474 */ | |
| 475 roundkeyw = cx->expandedKey + cx->Nb; | |
| 476 for (r=1; r<cx->Nr; ++r) { | |
| 477 /* each key word, roundkeyw, represents a column in the key | |
| 478 * matrix. Each column is multiplied by the InvMixColumn matrix. | |
| 479 * [ 0E 0B 0D 09 ] [ b0 ] | |
| 480 * [ 09 0E 0B 0D ] * [ b1 ] | |
| 481 * [ 0D 09 0E 0B ] [ b2 ] | |
| 482 * [ 0B 0D 09 0E ] [ b3 ] | |
| 483 */ | |
| 484 b = (PRUint8 *)roundkeyw; | |
| 485 *roundkeyw++ = IMXC0(b[0]) ^ IMXC1(b[1]) ^ IMXC2(b[2]) ^ IMXC3(b[3]); | |
| 486 b = (PRUint8 *)roundkeyw; | |
| 487 *roundkeyw++ = IMXC0(b[0]) ^ IMXC1(b[1]) ^ IMXC2(b[2]) ^ IMXC3(b[3]); | |
| 488 b = (PRUint8 *)roundkeyw; | |
| 489 *roundkeyw++ = IMXC0(b[0]) ^ IMXC1(b[1]) ^ IMXC2(b[2]) ^ IMXC3(b[3]); | |
| 490 b = (PRUint8 *)roundkeyw; | |
| 491 *roundkeyw++ = IMXC0(b[0]) ^ IMXC1(b[1]) ^ IMXC2(b[2]) ^ IMXC3(b[3]); | |
| 492 if (Nb <= 4) | |
| 493 continue; | |
| 494 switch (Nb) { | |
| 495 case 8: b = (PRUint8 *)roundkeyw; | |
| 496 *roundkeyw++ = IMXC0(b[0]) ^ IMXC1(b[1]) ^ | |
| 497 IMXC2(b[2]) ^ IMXC3(b[3]); | |
| 498 case 7: b = (PRUint8 *)roundkeyw; | |
| 499 *roundkeyw++ = IMXC0(b[0]) ^ IMXC1(b[1]) ^ | |
| 500 IMXC2(b[2]) ^ IMXC3(b[3]); | |
| 501 case 6: b = (PRUint8 *)roundkeyw; | |
| 502 *roundkeyw++ = IMXC0(b[0]) ^ IMXC1(b[1]) ^ | |
| 503 IMXC2(b[2]) ^ IMXC3(b[3]); | |
| 504 case 5: b = (PRUint8 *)roundkeyw; | |
| 505 *roundkeyw++ = IMXC0(b[0]) ^ IMXC1(b[1]) ^ | |
| 506 IMXC2(b[2]) ^ IMXC3(b[3]); | |
| 507 } | |
| 508 } | |
| 509 return SECSuccess; | |
| 510 } | |
| 511 /************************************************************************** | |
| 512 * | |
| 513 * Stuff related to Rijndael encryption/decryption, optimized for | |
| 514 * a 128-bit blocksize. | |
| 515 * | |
| 516 *************************************************************************/ | |
| 517 | |
| 518 #ifdef IS_LITTLE_ENDIAN | |
| 519 #define BYTE0WORD(w) ((w) & 0x000000ff) | |
| 520 #define BYTE1WORD(w) ((w) & 0x0000ff00) | |
| 521 #define BYTE2WORD(w) ((w) & 0x00ff0000) | |
| 522 #define BYTE3WORD(w) ((w) & 0xff000000) | |
| 523 #else | |
| 524 #define BYTE0WORD(w) ((w) & 0xff000000) | |
| 525 #define BYTE1WORD(w) ((w) & 0x00ff0000) | |
| 526 #define BYTE2WORD(w) ((w) & 0x0000ff00) | |
| 527 #define BYTE3WORD(w) ((w) & 0x000000ff) | |
| 528 #endif | |
| 529 | |
| 530 typedef union { | |
| 531 PRUint32 w[4]; | |
| 532 PRUint8 b[16]; | |
| 533 } rijndael_state; | |
| 534 | |
| 535 #define COLUMN_0(state) state.w[0] | |
| 536 #define COLUMN_1(state) state.w[1] | |
| 537 #define COLUMN_2(state) state.w[2] | |
| 538 #define COLUMN_3(state) state.w[3] | |
| 539 | |
| 540 #define STATE_BYTE(i) state.b[i] | |
| 541 | |
| 542 static SECStatus | |
| 543 rijndael_encryptBlock128(AESContext *cx, | |
| 544 unsigned char *output, | |
| 545 const unsigned char *input) | |
| 546 { | |
| 547 unsigned int r; | |
| 548 PRUint32 *roundkeyw; | |
| 549 rijndael_state state; | |
| 550 PRUint32 C0, C1, C2, C3; | |
| 551 #if defined(NSS_X86_OR_X64) | |
| 552 #define pIn input | |
| 553 #define pOut output | |
| 554 #else | |
| 555 unsigned char *pIn, *pOut; | |
| 556 PRUint32 inBuf[4], outBuf[4]; | |
| 557 | |
| 558 if ((ptrdiff_t)input & 0x3) { | |
| 559 memcpy(inBuf, input, sizeof inBuf); | |
| 560 pIn = (unsigned char *)inBuf; | |
| 561 } else { | |
| 562 pIn = (unsigned char *)input; | |
| 563 } | |
| 564 if ((ptrdiff_t)output & 0x3) { | |
| 565 pOut = (unsigned char *)outBuf; | |
| 566 } else { | |
| 567 pOut = (unsigned char *)output; | |
| 568 } | |
| 569 #endif | |
| 570 roundkeyw = cx->expandedKey; | |
| 571 /* Step 1: Add Round Key 0 to initial state */ | |
| 572 COLUMN_0(state) = *((PRUint32 *)(pIn )) ^ *roundkeyw++; | |
| 573 COLUMN_1(state) = *((PRUint32 *)(pIn + 4 )) ^ *roundkeyw++; | |
| 574 COLUMN_2(state) = *((PRUint32 *)(pIn + 8 )) ^ *roundkeyw++; | |
| 575 COLUMN_3(state) = *((PRUint32 *)(pIn + 12)) ^ *roundkeyw++; | |
| 576 /* Step 2: Loop over rounds [1..NR-1] */ | |
| 577 for (r=1; r<cx->Nr; ++r) { | |
| 578 /* Do ShiftRow, ByteSub, and MixColumn all at once */ | |
| 579 C0 = T0(STATE_BYTE(0)) ^ | |
| 580 T1(STATE_BYTE(5)) ^ | |
| 581 T2(STATE_BYTE(10)) ^ | |
| 582 T3(STATE_BYTE(15)); | |
| 583 C1 = T0(STATE_BYTE(4)) ^ | |
| 584 T1(STATE_BYTE(9)) ^ | |
| 585 T2(STATE_BYTE(14)) ^ | |
| 586 T3(STATE_BYTE(3)); | |
| 587 C2 = T0(STATE_BYTE(8)) ^ | |
| 588 T1(STATE_BYTE(13)) ^ | |
| 589 T2(STATE_BYTE(2)) ^ | |
| 590 T3(STATE_BYTE(7)); | |
| 591 C3 = T0(STATE_BYTE(12)) ^ | |
| 592 T1(STATE_BYTE(1)) ^ | |
| 593 T2(STATE_BYTE(6)) ^ | |
| 594 T3(STATE_BYTE(11)); | |
| 595 /* Round key addition */ | |
| 596 COLUMN_0(state) = C0 ^ *roundkeyw++; | |
| 597 COLUMN_1(state) = C1 ^ *roundkeyw++; | |
| 598 COLUMN_2(state) = C2 ^ *roundkeyw++; | |
| 599 COLUMN_3(state) = C3 ^ *roundkeyw++; | |
| 600 } | |
| 601 /* Step 3: Do the last round */ | |
| 602 /* Final round does not employ MixColumn */ | |
| 603 C0 = ((BYTE0WORD(T2(STATE_BYTE(0)))) | | |
| 604 (BYTE1WORD(T3(STATE_BYTE(5)))) | | |
| 605 (BYTE2WORD(T0(STATE_BYTE(10)))) | | |
| 606 (BYTE3WORD(T1(STATE_BYTE(15))))) ^ | |
| 607 *roundkeyw++; | |
| 608 C1 = ((BYTE0WORD(T2(STATE_BYTE(4)))) | | |
| 609 (BYTE1WORD(T3(STATE_BYTE(9)))) | | |
| 610 (BYTE2WORD(T0(STATE_BYTE(14)))) | | |
| 611 (BYTE3WORD(T1(STATE_BYTE(3))))) ^ | |
| 612 *roundkeyw++; | |
| 613 C2 = ((BYTE0WORD(T2(STATE_BYTE(8)))) | | |
| 614 (BYTE1WORD(T3(STATE_BYTE(13)))) | | |
| 615 (BYTE2WORD(T0(STATE_BYTE(2)))) | | |
| 616 (BYTE3WORD(T1(STATE_BYTE(7))))) ^ | |
| 617 *roundkeyw++; | |
| 618 C3 = ((BYTE0WORD(T2(STATE_BYTE(12)))) | | |
| 619 (BYTE1WORD(T3(STATE_BYTE(1)))) | | |
| 620 (BYTE2WORD(T0(STATE_BYTE(6)))) | | |
| 621 (BYTE3WORD(T1(STATE_BYTE(11))))) ^ | |
| 622 *roundkeyw++; | |
| 623 *((PRUint32 *) pOut ) = C0; | |
| 624 *((PRUint32 *)(pOut + 4)) = C1; | |
| 625 *((PRUint32 *)(pOut + 8)) = C2; | |
| 626 *((PRUint32 *)(pOut + 12)) = C3; | |
| 627 #if defined(NSS_X86_OR_X64) | |
| 628 #undef pIn | |
| 629 #undef pOut | |
| 630 #else | |
| 631 if ((ptrdiff_t)output & 0x3) { | |
| 632 memcpy(output, outBuf, sizeof outBuf); | |
| 633 } | |
| 634 #endif | |
| 635 return SECSuccess; | |
| 636 } | |
| 637 | |
| 638 static SECStatus | |
| 639 rijndael_decryptBlock128(AESContext *cx, | |
| 640 unsigned char *output, | |
| 641 const unsigned char *input) | |
| 642 { | |
| 643 int r; | |
| 644 PRUint32 *roundkeyw; | |
| 645 rijndael_state state; | |
| 646 PRUint32 C0, C1, C2, C3; | |
| 647 #if defined(NSS_X86_OR_X64) | |
| 648 #define pIn input | |
| 649 #define pOut output | |
| 650 #else | |
| 651 unsigned char *pIn, *pOut; | |
| 652 PRUint32 inBuf[4], outBuf[4]; | |
| 653 | |
| 654 if ((ptrdiff_t)input & 0x3) { | |
| 655 memcpy(inBuf, input, sizeof inBuf); | |
| 656 pIn = (unsigned char *)inBuf; | |
| 657 } else { | |
| 658 pIn = (unsigned char *)input; | |
| 659 } | |
| 660 if ((ptrdiff_t)output & 0x3) { | |
| 661 pOut = (unsigned char *)outBuf; | |
| 662 } else { | |
| 663 pOut = (unsigned char *)output; | |
| 664 } | |
| 665 #endif | |
| 666 roundkeyw = cx->expandedKey + cx->Nb * cx->Nr + 3; | |
| 667 /* reverse the final key addition */ | |
| 668 COLUMN_3(state) = *((PRUint32 *)(pIn + 12)) ^ *roundkeyw--; | |
| 669 COLUMN_2(state) = *((PRUint32 *)(pIn + 8)) ^ *roundkeyw--; | |
| 670 COLUMN_1(state) = *((PRUint32 *)(pIn + 4)) ^ *roundkeyw--; | |
| 671 COLUMN_0(state) = *((PRUint32 *)(pIn )) ^ *roundkeyw--; | |
| 672 /* Loop over rounds in reverse [NR..1] */ | |
| 673 for (r=cx->Nr; r>1; --r) { | |
| 674 /* Invert the (InvByteSub*InvMixColumn)(InvShiftRow(state)) */ | |
| 675 C0 = TInv0(STATE_BYTE(0)) ^ | |
| 676 TInv1(STATE_BYTE(13)) ^ | |
| 677 TInv2(STATE_BYTE(10)) ^ | |
| 678 TInv3(STATE_BYTE(7)); | |
| 679 C1 = TInv0(STATE_BYTE(4)) ^ | |
| 680 TInv1(STATE_BYTE(1)) ^ | |
| 681 TInv2(STATE_BYTE(14)) ^ | |
| 682 TInv3(STATE_BYTE(11)); | |
| 683 C2 = TInv0(STATE_BYTE(8)) ^ | |
| 684 TInv1(STATE_BYTE(5)) ^ | |
| 685 TInv2(STATE_BYTE(2)) ^ | |
| 686 TInv3(STATE_BYTE(15)); | |
| 687 C3 = TInv0(STATE_BYTE(12)) ^ | |
| 688 TInv1(STATE_BYTE(9)) ^ | |
| 689 TInv2(STATE_BYTE(6)) ^ | |
| 690 TInv3(STATE_BYTE(3)); | |
| 691 /* Invert the key addition step */ | |
| 692 COLUMN_3(state) = C3 ^ *roundkeyw--; | |
| 693 COLUMN_2(state) = C2 ^ *roundkeyw--; | |
| 694 COLUMN_1(state) = C1 ^ *roundkeyw--; | |
| 695 COLUMN_0(state) = C0 ^ *roundkeyw--; | |
| 696 } | |
| 697 /* inverse sub */ | |
| 698 pOut[ 0] = SINV(STATE_BYTE( 0)); | |
| 699 pOut[ 1] = SINV(STATE_BYTE(13)); | |
| 700 pOut[ 2] = SINV(STATE_BYTE(10)); | |
| 701 pOut[ 3] = SINV(STATE_BYTE( 7)); | |
| 702 pOut[ 4] = SINV(STATE_BYTE( 4)); | |
| 703 pOut[ 5] = SINV(STATE_BYTE( 1)); | |
| 704 pOut[ 6] = SINV(STATE_BYTE(14)); | |
| 705 pOut[ 7] = SINV(STATE_BYTE(11)); | |
| 706 pOut[ 8] = SINV(STATE_BYTE( 8)); | |
| 707 pOut[ 9] = SINV(STATE_BYTE( 5)); | |
| 708 pOut[10] = SINV(STATE_BYTE( 2)); | |
| 709 pOut[11] = SINV(STATE_BYTE(15)); | |
| 710 pOut[12] = SINV(STATE_BYTE(12)); | |
| 711 pOut[13] = SINV(STATE_BYTE( 9)); | |
| 712 pOut[14] = SINV(STATE_BYTE( 6)); | |
| 713 pOut[15] = SINV(STATE_BYTE( 3)); | |
| 714 /* final key addition */ | |
| 715 *((PRUint32 *)(pOut + 12)) ^= *roundkeyw--; | |
| 716 *((PRUint32 *)(pOut + 8)) ^= *roundkeyw--; | |
| 717 *((PRUint32 *)(pOut + 4)) ^= *roundkeyw--; | |
| 718 *((PRUint32 *) pOut ) ^= *roundkeyw--; | |
| 719 #if defined(NSS_X86_OR_X64) | |
| 720 #undef pIn | |
| 721 #undef pOut | |
| 722 #else | |
| 723 if ((ptrdiff_t)output & 0x3) { | |
| 724 memcpy(output, outBuf, sizeof outBuf); | |
| 725 } | |
| 726 #endif | |
| 727 return SECSuccess; | |
| 728 } | |
| 729 | |
| 730 /************************************************************************** | |
| 731 * | |
| 732 * Stuff related to general Rijndael encryption/decryption, for blocksizes | |
| 733 * greater than 128 bits. | |
| 734 * | |
| 735 * XXX This code is currently untested! So far, AES specs have only been | |
| 736 * released for 128 bit blocksizes. This will be tested, but for now | |
| 737 * only the code above has been tested using known values. | |
| 738 * | |
| 739 *************************************************************************/ | |
| 740 | |
| 741 #define COLUMN(array, j) *((PRUint32 *)(array + j)) | |
| 742 | |
| 743 SECStatus | |
| 744 rijndael_encryptBlock(AESContext *cx, | |
| 745 unsigned char *output, | |
| 746 const unsigned char *input) | |
| 747 { | |
| 748 return SECFailure; | |
| 749 #ifdef rijndael_large_blocks_fixed | |
| 750 unsigned int j, r, Nb; | |
| 751 unsigned int c2=0, c3=0; | |
| 752 PRUint32 *roundkeyw; | |
| 753 PRUint8 clone[RIJNDAEL_MAX_STATE_SIZE]; | |
| 754 Nb = cx->Nb; | |
| 755 roundkeyw = cx->expandedKey; | |
| 756 /* Step 1: Add Round Key 0 to initial state */ | |
| 757 for (j=0; j<4*Nb; j+=4) { | |
| 758 COLUMN(clone, j) = COLUMN(input, j) ^ *roundkeyw++; | |
| 759 } | |
| 760 /* Step 2: Loop over rounds [1..NR-1] */ | |
| 761 for (r=1; r<cx->Nr; ++r) { | |
| 762 for (j=0; j<Nb; ++j) { | |
| 763 COLUMN(output, j) = T0(STATE_BYTE(4* j )) ^ | |
| 764 T1(STATE_BYTE(4*((j+ 1)%Nb)+1)) ^ | |
| 765 T2(STATE_BYTE(4*((j+c2)%Nb)+2)) ^ | |
| 766 T3(STATE_BYTE(4*((j+c3)%Nb)+3)); | |
| 767 } | |
| 768 for (j=0; j<4*Nb; j+=4) { | |
| 769 COLUMN(clone, j) = COLUMN(output, j) ^ *roundkeyw++; | |
| 770 } | |
| 771 } | |
| 772 /* Step 3: Do the last round */ | |
| 773 /* Final round does not employ MixColumn */ | |
| 774 for (j=0; j<Nb; ++j) { | |
| 775 COLUMN(output, j) = ((BYTE0WORD(T2(STATE_BYTE(4* j )))) | | |
| 776 (BYTE1WORD(T3(STATE_BYTE(4*(j+ 1)%Nb)+1))) | | |
| 777 (BYTE2WORD(T0(STATE_BYTE(4*(j+c2)%Nb)+2))) | | |
| 778 (BYTE3WORD(T1(STATE_BYTE(4*(j+c3)%Nb)+3)))) ^ | |
| 779 *roundkeyw++; | |
| 780 } | |
| 781 return SECSuccess; | |
| 782 #endif | |
| 783 } | |
| 784 | |
| 785 SECStatus | |
| 786 rijndael_decryptBlock(AESContext *cx, | |
| 787 unsigned char *output, | |
| 788 const unsigned char *input) | |
| 789 { | |
| 790 return SECFailure; | |
| 791 #ifdef rijndael_large_blocks_fixed | |
| 792 int j, r, Nb; | |
| 793 int c2=0, c3=0; | |
| 794 PRUint32 *roundkeyw; | |
| 795 PRUint8 clone[RIJNDAEL_MAX_STATE_SIZE]; | |
| 796 Nb = cx->Nb; | |
| 797 roundkeyw = cx->expandedKey + cx->Nb * cx->Nr + 3; | |
| 798 /* reverse key addition */ | |
| 799 for (j=4*Nb; j>=0; j-=4) { | |
| 800 COLUMN(clone, j) = COLUMN(input, j) ^ *roundkeyw--; | |
| 801 } | |
| 802 /* Loop over rounds in reverse [NR..1] */ | |
| 803 for (r=cx->Nr; r>1; --r) { | |
| 804 /* Invert the (InvByteSub*InvMixColumn)(InvShiftRow(state)) */ | |
| 805 for (j=0; j<Nb; ++j) { | |
| 806 COLUMN(output, 4*j) = TInv0(STATE_BYTE(4* j )) ^ | |
| 807 TInv1(STATE_BYTE(4*(j+Nb- 1)%Nb)+1) ^ | |
| 808 TInv2(STATE_BYTE(4*(j+Nb-c2)%Nb)+2) ^ | |
| 809 TInv3(STATE_BYTE(4*(j+Nb-c3)%Nb)+3); | |
| 810 } | |
| 811 /* Invert the key addition step */ | |
| 812 for (j=4*Nb; j>=0; j-=4) { | |
| 813 COLUMN(clone, j) = COLUMN(output, j) ^ *roundkeyw--; | |
| 814 } | |
| 815 } | |
| 816 /* inverse sub */ | |
| 817 for (j=0; j<4*Nb; ++j) { | |
| 818 output[j] = SINV(clone[j]); | |
| 819 } | |
| 820 /* final key addition */ | |
| 821 for (j=4*Nb; j>=0; j-=4) { | |
| 822 COLUMN(output, j) ^= *roundkeyw--; | |
| 823 } | |
| 824 return SECSuccess; | |
| 825 #endif | |
| 826 } | |
| 827 | |
| 828 /************************************************************************** | |
| 829 * | |
| 830 * Rijndael modes of operation (ECB and CBC) | |
| 831 * | |
| 832 *************************************************************************/ | |
| 833 | |
| 834 static SECStatus | |
| 835 rijndael_encryptECB(AESContext *cx, unsigned char *output, | |
| 836 unsigned int *outputLen, unsigned int maxOutputLen, | |
| 837 const unsigned char *input, unsigned int inputLen, | |
| 838 unsigned int blocksize) | |
| 839 { | |
| 840 SECStatus rv; | |
| 841 AESBlockFunc *encryptor; | |
| 842 | |
| 843 encryptor = (blocksize == RIJNDAEL_MIN_BLOCKSIZE) | |
| 844 ? &rijndael_encryptBlock128 | |
| 845 : &rijndael_encryptBlock; | |
| 846 while (inputLen > 0) { | |
| 847 rv = (*encryptor)(cx, output, input); | |
| 848 if (rv != SECSuccess) | |
| 849 return rv; | |
| 850 output += blocksize; | |
| 851 input += blocksize; | |
| 852 inputLen -= blocksize; | |
| 853 } | |
| 854 return SECSuccess; | |
| 855 } | |
| 856 | |
| 857 static SECStatus | |
| 858 rijndael_encryptCBC(AESContext *cx, unsigned char *output, | |
| 859 unsigned int *outputLen, unsigned int maxOutputLen, | |
| 860 const unsigned char *input, unsigned int inputLen, | |
| 861 unsigned int blocksize) | |
| 862 { | |
| 863 unsigned int j; | |
| 864 SECStatus rv; | |
| 865 AESBlockFunc *encryptor; | |
| 866 unsigned char *lastblock; | |
| 867 unsigned char inblock[RIJNDAEL_MAX_STATE_SIZE * 8]; | |
| 868 | |
| 869 if (!inputLen) | |
| 870 return SECSuccess; | |
| 871 lastblock = cx->iv; | |
| 872 encryptor = (blocksize == RIJNDAEL_MIN_BLOCKSIZE) | |
| 873 ? &rijndael_encryptBlock128 | |
| 874 : &rijndael_encryptBlock; | |
| 875 while (inputLen > 0) { | |
| 876 /* XOR with the last block (IV if first block) */ | |
| 877 for (j=0; j<blocksize; ++j) | |
| 878 inblock[j] = input[j] ^ lastblock[j]; | |
| 879 /* encrypt */ | |
| 880 rv = (*encryptor)(cx, output, inblock); | |
| 881 if (rv != SECSuccess) | |
| 882 return rv; | |
| 883 /* move to the next block */ | |
| 884 lastblock = output; | |
| 885 output += blocksize; | |
| 886 input += blocksize; | |
| 887 inputLen -= blocksize; | |
| 888 } | |
| 889 memcpy(cx->iv, lastblock, blocksize); | |
| 890 return SECSuccess; | |
| 891 } | |
| 892 | |
| 893 static SECStatus | |
| 894 rijndael_decryptECB(AESContext *cx, unsigned char *output, | |
| 895 unsigned int *outputLen, unsigned int maxOutputLen, | |
| 896 const unsigned char *input, unsigned int inputLen, | |
| 897 unsigned int blocksize) | |
| 898 { | |
| 899 SECStatus rv; | |
| 900 AESBlockFunc *decryptor; | |
| 901 | |
| 902 decryptor = (blocksize == RIJNDAEL_MIN_BLOCKSIZE) | |
| 903 ? &rijndael_decryptBlock128 | |
| 904 : &rijndael_decryptBlock; | |
| 905 while (inputLen > 0) { | |
| 906 rv = (*decryptor)(cx, output, input); | |
| 907 if (rv != SECSuccess) | |
| 908 return rv; | |
| 909 output += blocksize; | |
| 910 input += blocksize; | |
| 911 inputLen -= blocksize; | |
| 912 } | |
| 913 return SECSuccess; | |
| 914 } | |
| 915 | |
| 916 static SECStatus | |
| 917 rijndael_decryptCBC(AESContext *cx, unsigned char *output, | |
| 918 unsigned int *outputLen, unsigned int maxOutputLen, | |
| 919 const unsigned char *input, unsigned int inputLen, | |
| 920 unsigned int blocksize) | |
| 921 { | |
| 922 SECStatus rv; | |
| 923 AESBlockFunc *decryptor; | |
| 924 const unsigned char *in; | |
| 925 unsigned char *out; | |
| 926 unsigned int j; | |
| 927 unsigned char newIV[RIJNDAEL_MAX_BLOCKSIZE]; | |
| 928 | |
| 929 | |
| 930 if (!inputLen) | |
| 931 return SECSuccess; | |
| 932 PORT_Assert(output - input >= 0 || input - output >= (int)inputLen ); | |
| 933 decryptor = (blocksize == RIJNDAEL_MIN_BLOCKSIZE) | |
| 934 ? &rijndael_decryptBlock128 | |
| 935 : &rijndael_decryptBlock; | |
| 936 in = input + (inputLen - blocksize); | |
| 937 memcpy(newIV, in, blocksize); | |
| 938 out = output + (inputLen - blocksize); | |
| 939 while (inputLen > blocksize) { | |
| 940 rv = (*decryptor)(cx, out, in); | |
| 941 if (rv != SECSuccess) | |
| 942 return rv; | |
| 943 for (j=0; j<blocksize; ++j) | |
| 944 out[j] ^= in[(int)(j - blocksize)]; | |
| 945 out -= blocksize; | |
| 946 in -= blocksize; | |
| 947 inputLen -= blocksize; | |
| 948 } | |
| 949 if (in == input) { | |
| 950 rv = (*decryptor)(cx, out, in); | |
| 951 if (rv != SECSuccess) | |
| 952 return rv; | |
| 953 for (j=0; j<blocksize; ++j) | |
| 954 out[j] ^= cx->iv[j]; | |
| 955 } | |
| 956 memcpy(cx->iv, newIV, blocksize); | |
| 957 return SECSuccess; | |
| 958 } | |
| 959 | |
| 960 /************************************************************************ | |
| 961 * | |
| 962 * BLAPI Interface functions | |
| 963 * | |
| 964 * The following functions implement the encryption routines defined in | |
| 965 * BLAPI for the AES cipher, Rijndael. | |
| 966 * | |
| 967 ***********************************************************************/ | |
| 968 | |
| 969 AESContext * AES_AllocateContext(void) | |
| 970 { | |
| 971 return PORT_ZNew(AESContext); | |
| 972 } | |
| 973 | |
| 974 | |
| 975 #ifdef INTEL_GCM | |
| 976 /* | |
| 977 * Adapted from the example code in "How to detect New Instruction support in | |
| 978 * the 4th generation Intel Core processor family" by Max Locktyukhin. | |
| 979 * | |
| 980 * XGETBV: | |
| 981 * Reads an extended control register (XCR) specified by ECX into EDX:EAX. | |
| 982 */ | |
| 983 static PRBool | |
| 984 check_xcr0_ymm() | |
| 985 { | |
| 986 PRUint32 xcr0; | |
| 987 #if defined(_MSC_VER) | |
| 988 #if defined(_M_IX86) | |
| 989 __asm { | |
| 990 mov ecx, 0 | |
| 991 xgetbv | |
| 992 mov xcr0, eax | |
| 993 } | |
| 994 #else | |
| 995 xcr0 = (PRUint32)_xgetbv(0); /* Requires VS2010 SP1 or later. */ | |
| 996 #endif | |
| 997 #else | |
| 998 __asm__ ("xgetbv" : "=a" (xcr0) : "c" (0) : "%edx"); | |
| 999 #endif | |
| 1000 /* Check if xmm and ymm state are enabled in XCR0. */ | |
| 1001 return (xcr0 & 6) == 6; | |
| 1002 } | |
| 1003 #endif | |
| 1004 | |
| 1005 /* | |
| 1006 ** Initialize a new AES context suitable for AES encryption/decryption in | |
| 1007 ** the ECB or CBC mode. | |
| 1008 ** "mode" the mode of operation, which must be NSS_AES or NSS_AES_CBC | |
| 1009 */ | |
| 1010 static SECStatus | |
| 1011 aes_InitContext(AESContext *cx, const unsigned char *key, unsigned int keysize, | |
| 1012 const unsigned char *iv, int mode, unsigned int encrypt, | |
| 1013 unsigned int blocksize) | |
| 1014 { | |
| 1015 unsigned int Nk; | |
| 1016 /* According to Rijndael AES Proposal, section 12.1, block and key | |
| 1017 * lengths between 128 and 256 bits are supported, as long as the | |
| 1018 * length in bytes is divisible by 4. | |
| 1019 */ | |
| 1020 if (key == NULL || | |
| 1021 keysize < RIJNDAEL_MIN_BLOCKSIZE || | |
| 1022 keysize > RIJNDAEL_MAX_BLOCKSIZE || | |
| 1023 keysize % 4 != 0 || | |
| 1024 blocksize < RIJNDAEL_MIN_BLOCKSIZE || | |
| 1025 blocksize > RIJNDAEL_MAX_BLOCKSIZE || | |
| 1026 blocksize % 4 != 0) { | |
| 1027 PORT_SetError(SEC_ERROR_INVALID_ARGS); | |
| 1028 return SECFailure; | |
| 1029 } | |
| 1030 if (mode != NSS_AES && mode != NSS_AES_CBC) { | |
| 1031 PORT_SetError(SEC_ERROR_INVALID_ARGS); | |
| 1032 return SECFailure; | |
| 1033 } | |
| 1034 if (mode == NSS_AES_CBC && iv == NULL) { | |
| 1035 PORT_SetError(SEC_ERROR_INVALID_ARGS); | |
| 1036 return SECFailure; | |
| 1037 } | |
| 1038 if (!cx) { | |
| 1039 PORT_SetError(SEC_ERROR_INVALID_ARGS); | |
| 1040 return SECFailure; | |
| 1041 } | |
| 1042 #ifdef USE_HW_AES | |
| 1043 if (has_intel_aes == 0) { | |
| 1044 unsigned long eax, ebx, ecx, edx; | |
| 1045 char *disable_hw_aes = PR_GetEnvSecure("NSS_DISABLE_HW_AES"); | |
| 1046 | |
| 1047 if (disable_hw_aes == NULL) { | |
| 1048 freebl_cpuid(1, &eax, &ebx, &ecx, &edx); | |
| 1049 has_intel_aes = (ecx & (1 << 25)) != 0 ? 1 : -1; | |
| 1050 #ifdef INTEL_GCM | |
| 1051 has_intel_clmul = (ecx & (1 << 1)) != 0 ? 1 : -1; | |
| 1052 if ((ecx & (1 << 27)) != 0 && (ecx & (1 << 28)) != 0 && | |
| 1053 check_xcr0_ymm()) { | |
| 1054 has_intel_avx = 1; | |
| 1055 } else { | |
| 1056 has_intel_avx = -1; | |
| 1057 } | |
| 1058 #endif | |
| 1059 } else { | |
| 1060 has_intel_aes = -1; | |
| 1061 #ifdef INTEL_GCM | |
| 1062 has_intel_avx = -1; | |
| 1063 has_intel_clmul = -1; | |
| 1064 #endif | |
| 1065 } | |
| 1066 } | |
| 1067 use_hw_aes = (PRBool) | |
| 1068 (has_intel_aes > 0 && (keysize % 8) == 0 && blocksize == 16); | |
| 1069 #ifdef INTEL_GCM | |
| 1070 use_hw_gcm = (PRBool) | |
| 1071 (use_hw_aes && has_intel_avx>0 && has_intel_clmul>0); | |
| 1072 #endif | |
| 1073 #endif /* USE_HW_AES */ | |
| 1074 /* Nb = (block size in bits) / 32 */ | |
| 1075 cx->Nb = blocksize / 4; | |
| 1076 /* Nk = (key size in bits) / 32 */ | |
| 1077 Nk = keysize / 4; | |
| 1078 /* Obtain number of rounds from "table" */ | |
| 1079 cx->Nr = RIJNDAEL_NUM_ROUNDS(Nk, cx->Nb); | |
| 1080 /* copy in the iv, if neccessary */ | |
| 1081 if (mode == NSS_AES_CBC) { | |
| 1082 memcpy(cx->iv, iv, blocksize); | |
| 1083 #ifdef USE_HW_AES | |
| 1084 if (use_hw_aes) { | |
| 1085 cx->worker = (freeblCipherFunc) | |
| 1086 intel_aes_cbc_worker(encrypt, keysize); | |
| 1087 } else | |
| 1088 #endif | |
| 1089 { | |
| 1090 cx->worker = (freeblCipherFunc) (encrypt | |
| 1091 ? &rijndael_encryptCBC : &rijndael_decryptCBC); | |
| 1092 } | |
| 1093 } else { | |
| 1094 #ifdef USE_HW_AES | |
| 1095 if (use_hw_aes) { | |
| 1096 cx->worker = (freeblCipherFunc) | |
| 1097 intel_aes_ecb_worker(encrypt, keysize); | |
| 1098 } else | |
| 1099 #endif | |
| 1100 { | |
| 1101 cx->worker = (freeblCipherFunc) (encrypt | |
| 1102 ? &rijndael_encryptECB : &rijndael_decryptECB); | |
| 1103 } | |
| 1104 } | |
| 1105 PORT_Assert((cx->Nb * (cx->Nr + 1)) <= RIJNDAEL_MAX_EXP_KEY_SIZE); | |
| 1106 if ((cx->Nb * (cx->Nr + 1)) > RIJNDAEL_MAX_EXP_KEY_SIZE) { | |
| 1107 PORT_SetError(SEC_ERROR_LIBRARY_FAILURE); | |
| 1108 goto cleanup; | |
| 1109 } | |
| 1110 #ifdef USE_HW_AES | |
| 1111 if (use_hw_aes) { | |
| 1112 intel_aes_init(encrypt, keysize); | |
| 1113 } else | |
| 1114 #endif | |
| 1115 { | |
| 1116 | |
| 1117 #if defined(RIJNDAEL_GENERATE_TABLES) || \ | |
| 1118 defined(RIJNDAEL_GENERATE_TABLES_MACRO) | |
| 1119 if (rijndaelTables == NULL) { | |
| 1120 if (PR_CallOnce(&coRTInit, init_rijndael_tables) | |
| 1121 != PR_SUCCESS) { | |
| 1122 return SecFailure; | |
| 1123 } | |
| 1124 } | |
| 1125 #endif | |
| 1126 /* Generate expanded key */ | |
| 1127 if (encrypt) { | |
| 1128 if (rijndael_key_expansion(cx, key, Nk) != SECSuccess) | |
| 1129 goto cleanup; | |
| 1130 } else { | |
| 1131 if (rijndael_invkey_expansion(cx, key, Nk) != SECSuccess) | |
| 1132 goto cleanup; | |
| 1133 } | |
| 1134 } | |
| 1135 cx->worker_cx = cx; | |
| 1136 cx->destroy = NULL; | |
| 1137 cx->isBlock = PR_TRUE; | |
| 1138 return SECSuccess; | |
| 1139 cleanup: | |
| 1140 return SECFailure; | |
| 1141 } | |
| 1142 | |
| 1143 SECStatus | |
| 1144 AES_InitContext(AESContext *cx, const unsigned char *key, unsigned int keysize, | |
| 1145 const unsigned char *iv, int mode, unsigned int encrypt, | |
| 1146 unsigned int blocksize) | |
| 1147 { | |
| 1148 int basemode = mode; | |
| 1149 PRBool baseencrypt = encrypt; | |
| 1150 SECStatus rv; | |
| 1151 | |
| 1152 switch (mode) { | |
| 1153 case NSS_AES_CTS: | |
| 1154 basemode = NSS_AES_CBC; | |
| 1155 break; | |
| 1156 case NSS_AES_GCM: | |
| 1157 case NSS_AES_CTR: | |
| 1158 basemode = NSS_AES; | |
| 1159 baseencrypt = PR_TRUE; | |
| 1160 break; | |
| 1161 } | |
| 1162 /* make sure enough is initializes so we can safely call Destroy */ | |
| 1163 cx->worker_cx = NULL; | |
| 1164 cx->destroy = NULL; | |
| 1165 rv = aes_InitContext(cx, key, keysize, iv, basemode, | |
| 1166 baseencrypt, blocksize); | |
| 1167 if (rv != SECSuccess) { | |
| 1168 AES_DestroyContext(cx, PR_FALSE); | |
| 1169 return rv; | |
| 1170 } | |
| 1171 | |
| 1172 /* finally, set up any mode specific contexts */ | |
| 1173 switch (mode) { | |
| 1174 case NSS_AES_CTS: | |
| 1175 cx->worker_cx = CTS_CreateContext(cx, cx->worker, iv, blocksize); | |
| 1176 cx->worker = (freeblCipherFunc) | |
| 1177 (encrypt ? CTS_EncryptUpdate : CTS_DecryptUpdate); | |
| 1178 cx->destroy = (freeblDestroyFunc) CTS_DestroyContext; | |
| 1179 cx->isBlock = PR_FALSE; | |
| 1180 break; | |
| 1181 case NSS_AES_GCM: | |
| 1182 #ifdef INTEL_GCM | |
| 1183 if(use_hw_gcm) { | |
| 1184 cx->worker_cx = intel_AES_GCM_CreateContext(cx, cx->worker, iv,
blocksize); | |
| 1185 cx->worker = (freeblCipherFunc) | |
| 1186 (encrypt ? intel_AES_GCM_EncryptUpdate : intel_AES_GCM_D
ecryptUpdate); | |
| 1187 cx->destroy = (freeblDestroyFunc) intel_AES_GCM_DestroyContext; | |
| 1188 cx->isBlock = PR_FALSE; | |
| 1189 } else | |
| 1190 #endif | |
| 1191 { | |
| 1192 cx->worker_cx = GCM_CreateContext(cx, cx->worker, iv, blocksize); | |
| 1193 cx->worker = (freeblCipherFunc) | |
| 1194 (encrypt ? GCM_EncryptUpdate : GCM_DecryptUpdate); | |
| 1195 cx->destroy = (freeblDestroyFunc) GCM_DestroyContext; | |
| 1196 cx->isBlock = PR_FALSE; | |
| 1197 } | |
| 1198 break; | |
| 1199 case NSS_AES_CTR: | |
| 1200 cx->worker_cx = CTR_CreateContext(cx, cx->worker, iv, blocksize); | |
| 1201 #if defined(USE_HW_AES) && defined(_MSC_VER) | |
| 1202 if (use_hw_aes) { | |
| 1203 cx->worker = (freeblCipherFunc) CTR_Update_HW_AES; | |
| 1204 } else | |
| 1205 #endif | |
| 1206 { | |
| 1207 cx->worker = (freeblCipherFunc) CTR_Update; | |
| 1208 } | |
| 1209 cx->destroy = (freeblDestroyFunc) CTR_DestroyContext; | |
| 1210 cx->isBlock = PR_FALSE; | |
| 1211 break; | |
| 1212 default: | |
| 1213 /* everything has already been set up by aes_InitContext, just | |
| 1214 * return */ | |
| 1215 return SECSuccess; | |
| 1216 } | |
| 1217 /* check to see if we succeeded in getting the worker context */ | |
| 1218 if (cx->worker_cx == NULL) { | |
| 1219 /* no, just destroy the existing context */ | |
| 1220 cx->destroy = NULL; /* paranoia, though you can see a dozen lines */ | |
| 1221 /* below that this isn't necessary */ | |
| 1222 AES_DestroyContext(cx, PR_FALSE); | |
| 1223 return SECFailure; | |
| 1224 } | |
| 1225 return SECSuccess; | |
| 1226 } | |
| 1227 | |
| 1228 /* AES_CreateContext | |
| 1229 * | |
| 1230 * create a new context for Rijndael operations | |
| 1231 */ | |
| 1232 AESContext * | |
| 1233 AES_CreateContext(const unsigned char *key, const unsigned char *iv, | |
| 1234 int mode, int encrypt, | |
| 1235 unsigned int keysize, unsigned int blocksize) | |
| 1236 { | |
| 1237 AESContext *cx = AES_AllocateContext(); | |
| 1238 if (cx) { | |
| 1239 SECStatus rv = AES_InitContext(cx, key, keysize, iv, mode, encrypt, | |
| 1240 blocksize); | |
| 1241 if (rv != SECSuccess) { | |
| 1242 AES_DestroyContext(cx, PR_TRUE); | |
| 1243 cx = NULL; | |
| 1244 } | |
| 1245 } | |
| 1246 return cx; | |
| 1247 } | |
| 1248 | |
| 1249 /* | |
| 1250 * AES_DestroyContext | |
| 1251 * | |
| 1252 * Zero an AES cipher context. If freeit is true, also free the pointer | |
| 1253 * to the context. | |
| 1254 */ | |
| 1255 void | |
| 1256 AES_DestroyContext(AESContext *cx, PRBool freeit) | |
| 1257 { | |
| 1258 if (cx->worker_cx && cx->destroy) { | |
| 1259 (*cx->destroy)(cx->worker_cx, PR_TRUE); | |
| 1260 cx->worker_cx = NULL; | |
| 1261 cx->destroy = NULL; | |
| 1262 } | |
| 1263 if (freeit) | |
| 1264 PORT_Free(cx); | |
| 1265 } | |
| 1266 | |
| 1267 /* | |
| 1268 * AES_Encrypt | |
| 1269 * | |
| 1270 * Encrypt an arbitrary-length buffer. The output buffer must already be | |
| 1271 * allocated to at least inputLen. | |
| 1272 */ | |
| 1273 SECStatus | |
| 1274 AES_Encrypt(AESContext *cx, unsigned char *output, | |
| 1275 unsigned int *outputLen, unsigned int maxOutputLen, | |
| 1276 const unsigned char *input, unsigned int inputLen) | |
| 1277 { | |
| 1278 int blocksize; | |
| 1279 /* Check args */ | |
| 1280 if (cx == NULL || output == NULL || (input == NULL && inputLen != 0)) { | |
| 1281 PORT_SetError(SEC_ERROR_INVALID_ARGS); | |
| 1282 return SECFailure; | |
| 1283 } | |
| 1284 blocksize = 4 * cx->Nb; | |
| 1285 if (cx->isBlock && (inputLen % blocksize != 0)) { | |
| 1286 PORT_SetError(SEC_ERROR_INPUT_LEN); | |
| 1287 return SECFailure; | |
| 1288 } | |
| 1289 if (maxOutputLen < inputLen) { | |
| 1290 PORT_SetError(SEC_ERROR_OUTPUT_LEN); | |
| 1291 return SECFailure; | |
| 1292 } | |
| 1293 *outputLen = inputLen; | |
| 1294 return (*cx->worker)(cx->worker_cx, output, outputLen, maxOutputLen, | |
| 1295 input, inputLen, blocksize); | |
| 1296 } | |
| 1297 | |
| 1298 /* | |
| 1299 * AES_Decrypt | |
| 1300 * | |
| 1301 * Decrypt and arbitrary-length buffer. The output buffer must already be | |
| 1302 * allocated to at least inputLen. | |
| 1303 */ | |
| 1304 SECStatus | |
| 1305 AES_Decrypt(AESContext *cx, unsigned char *output, | |
| 1306 unsigned int *outputLen, unsigned int maxOutputLen, | |
| 1307 const unsigned char *input, unsigned int inputLen) | |
| 1308 { | |
| 1309 int blocksize; | |
| 1310 /* Check args */ | |
| 1311 if (cx == NULL || output == NULL || (input == NULL && inputLen != 0)) { | |
| 1312 PORT_SetError(SEC_ERROR_INVALID_ARGS); | |
| 1313 return SECFailure; | |
| 1314 } | |
| 1315 blocksize = 4 * cx->Nb; | |
| 1316 if (cx->isBlock && (inputLen % blocksize != 0)) { | |
| 1317 PORT_SetError(SEC_ERROR_INPUT_LEN); | |
| 1318 return SECFailure; | |
| 1319 } | |
| 1320 if (maxOutputLen < inputLen) { | |
| 1321 PORT_SetError(SEC_ERROR_OUTPUT_LEN); | |
| 1322 return SECFailure; | |
| 1323 } | |
| 1324 *outputLen = inputLen; | |
| 1325 return (*cx->worker)(cx->worker_cx, output, outputLen, maxOutputLen, | |
| 1326 input, inputLen, blocksize); | |
| 1327 } | |
| OLD | NEW |