OLD | NEW |
(Empty) | |
| 1 ; LICENSE: |
| 2 ; This submission to NSS is to be made available under the terms of the |
| 3 ; Mozilla Public License, v. 2.0. You can obtain one at http: |
| 4 ; //mozilla.org/MPL/2.0/. |
| 5 ;############################################################################### |
| 6 ; Copyright(c) 2014, Intel Corp. |
| 7 ; Developers and authors: |
| 8 ; Shay Gueron and Vlad Krasnov |
| 9 ; Intel Corporation, Israel Development Centre, Haifa, Israel |
| 10 ; Please send feedback directly to crypto.feedback.alias@intel.com |
| 11 |
| 12 |
| 13 .MODEL FLAT, C |
| 14 .XMM |
| 15 |
| 16 .DATA |
| 17 ALIGN 16 |
| 18 Lmask dd 0c0f0e0dh,0c0f0e0dh,0c0f0e0dh,0c0f0e0dh |
| 19 Lmask192 dd 004070605h, 004070605h, 004070605h, 004070605h |
| 20 Lmask256 dd 00c0f0e0dh, 00c0f0e0dh, 00c0f0e0dh, 00c0f0e0dh |
| 21 Lcon1 dd 1,1,1,1 |
| 22 Lcon2 dd 1bh,1bh,1bh,1bh |
| 23 |
| 24 .CODE |
| 25 |
| 26 ctx textequ <ecx> |
| 27 output textequ <edx> |
| 28 input textequ <eax> |
| 29 inputLen textequ <edi> |
| 30 |
| 31 |
| 32 aes_rnd MACRO i |
| 33 movdqu xmm7, [i*16 + ctx] |
| 34 aesenc xmm0, xmm7 |
| 35 aesenc xmm1, xmm7 |
| 36 aesenc xmm2, xmm7 |
| 37 aesenc xmm3, xmm7 |
| 38 aesenc xmm4, xmm7 |
| 39 aesenc xmm5, xmm7 |
| 40 aesenc xmm6, xmm7 |
| 41 ENDM |
| 42 |
| 43 aes_last_rnd MACRO i |
| 44 movdqu xmm7, [i*16 + ctx] |
| 45 aesenclast xmm0, xmm7 |
| 46 aesenclast xmm1, xmm7 |
| 47 aesenclast xmm2, xmm7 |
| 48 aesenclast xmm3, xmm7 |
| 49 aesenclast xmm4, xmm7 |
| 50 aesenclast xmm5, xmm7 |
| 51 aesenclast xmm6, xmm7 |
| 52 ENDM |
| 53 |
| 54 aes_dec_rnd MACRO i |
| 55 movdqu xmm7, [i*16 + ctx] |
| 56 aesdec xmm0, xmm7 |
| 57 aesdec xmm1, xmm7 |
| 58 aesdec xmm2, xmm7 |
| 59 aesdec xmm3, xmm7 |
| 60 aesdec xmm4, xmm7 |
| 61 aesdec xmm5, xmm7 |
| 62 aesdec xmm6, xmm7 |
| 63 ENDM |
| 64 |
| 65 aes_dec_last_rnd MACRO i |
| 66 movdqu xmm7, [i*16 + ctx] |
| 67 aesdeclast xmm0, xmm7 |
| 68 aesdeclast xmm1, xmm7 |
| 69 aesdeclast xmm2, xmm7 |
| 70 aesdeclast xmm3, xmm7 |
| 71 aesdeclast xmm4, xmm7 |
| 72 aesdeclast xmm5, xmm7 |
| 73 aesdeclast xmm6, xmm7 |
| 74 ENDM |
| 75 |
| 76 |
| 77 gen_aes_ecb_func MACRO enc, rnds |
| 78 |
| 79 LOCAL loop7 |
| 80 LOCAL loop1 |
| 81 LOCAL bail |
| 82 |
| 83 push inputLen |
| 84 |
| 85 mov ctx, [esp + 2*4 + 0*4] |
| 86 mov output, [esp + 2*4 + 1*4] |
| 87 mov input, [esp + 2*4 + 4*4] |
| 88 mov inputLen, [esp + 2*4 + 5*4] |
| 89 |
| 90 lea ctx, [44+ctx] |
| 91 |
| 92 loop7: |
| 93 cmp inputLen, 7*16 |
| 94 jb loop1 |
| 95 |
| 96 movdqu xmm0, [0*16 + input] |
| 97 movdqu xmm1, [1*16 + input] |
| 98 movdqu xmm2, [2*16 + input] |
| 99 movdqu xmm3, [3*16 + input] |
| 100 movdqu xmm4, [4*16 + input] |
| 101 movdqu xmm5, [5*16 + input] |
| 102 movdqu xmm6, [6*16 + input] |
| 103 |
| 104 movdqu xmm7, [0*16 + ctx] |
| 105 pxor xmm0, xmm7 |
| 106 pxor xmm1, xmm7 |
| 107 pxor xmm2, xmm7 |
| 108 pxor xmm3, xmm7 |
| 109 pxor xmm4, xmm7 |
| 110 pxor xmm5, xmm7 |
| 111 pxor xmm6, xmm7 |
| 112 |
| 113 IF enc eq 1 |
| 114 rnd textequ <aes_rnd> |
| 115 lastrnd textequ <aes_last_rnd> |
| 116 aesinst textequ <aesenc> |
| 117 aeslastinst textequ <aesenclast> |
| 118 ELSE |
| 119 rnd textequ <aes_dec_rnd> |
| 120 lastrnd textequ <aes_dec_last_rnd> |
| 121 aesinst textequ <aesdec> |
| 122 aeslastinst textequ <aesdeclast> |
| 123 ENDIF |
| 124 |
| 125 i = 1 |
| 126 WHILE i LT rnds |
| 127 rnd i |
| 128 i = i+1 |
| 129 ENDM |
| 130 lastrnd rnds |
| 131 |
| 132 movdqu [0*16 + output], xmm0 |
| 133 movdqu [1*16 + output], xmm1 |
| 134 movdqu [2*16 + output], xmm2 |
| 135 movdqu [3*16 + output], xmm3 |
| 136 movdqu [4*16 + output], xmm4 |
| 137 movdqu [5*16 + output], xmm5 |
| 138 movdqu [6*16 + output], xmm6 |
| 139 |
| 140 lea input, [7*16 + input] |
| 141 lea output, [7*16 + output] |
| 142 sub inputLen, 7*16 |
| 143 jmp loop7 |
| 144 |
| 145 loop1: |
| 146 cmp inputLen, 1*16 |
| 147 jb bail |
| 148 |
| 149 movdqu xmm0, [input] |
| 150 movdqu xmm7, [0*16 + ctx] |
| 151 pxor xmm0, xmm7 |
| 152 |
| 153 i = 1 |
| 154 WHILE i LT rnds |
| 155 movdqu xmm7, [i*16 + ctx] |
| 156 aesinst xmm0, xmm7 |
| 157 i = i+1 |
| 158 ENDM |
| 159 movdqu xmm7, [rnds*16 + ctx] |
| 160 aeslastinst xmm0, xmm7 |
| 161 |
| 162 movdqu [output], xmm0 |
| 163 |
| 164 lea input, [1*16 + input] |
| 165 lea output, [1*16 + output] |
| 166 sub inputLen, 1*16 |
| 167 jmp loop1 |
| 168 |
| 169 bail: |
| 170 xor eax, eax |
| 171 pop inputLen |
| 172 ret |
| 173 |
| 174 ENDM |
| 175 |
| 176 ALIGN 16 |
| 177 intel_aes_encrypt_ecb_128 PROC |
| 178 gen_aes_ecb_func 1, 10 |
| 179 intel_aes_encrypt_ecb_128 ENDP |
| 180 |
| 181 ALIGN 16 |
| 182 intel_aes_encrypt_ecb_192 PROC |
| 183 gen_aes_ecb_func 1, 12 |
| 184 intel_aes_encrypt_ecb_192 ENDP |
| 185 |
| 186 ALIGN 16 |
| 187 intel_aes_encrypt_ecb_256 PROC |
| 188 gen_aes_ecb_func 1, 14 |
| 189 intel_aes_encrypt_ecb_256 ENDP |
| 190 |
| 191 ALIGN 16 |
| 192 intel_aes_decrypt_ecb_128 PROC |
| 193 gen_aes_ecb_func 0, 10 |
| 194 intel_aes_decrypt_ecb_128 ENDP |
| 195 |
| 196 ALIGN 16 |
| 197 intel_aes_decrypt_ecb_192 PROC |
| 198 gen_aes_ecb_func 0, 12 |
| 199 intel_aes_decrypt_ecb_192 ENDP |
| 200 |
| 201 ALIGN 16 |
| 202 intel_aes_decrypt_ecb_256 PROC |
| 203 gen_aes_ecb_func 0, 14 |
| 204 intel_aes_decrypt_ecb_256 ENDP |
| 205 |
| 206 |
| 207 KEY textequ <ecx> |
| 208 KS textequ <edx> |
| 209 ITR textequ <eax> |
| 210 |
| 211 ALIGN 16 |
| 212 intel_aes_encrypt_init_128 PROC |
| 213 |
| 214 mov KEY, [esp + 1*4 + 0*4] |
| 215 mov KS, [esp + 1*4 + 1*4] |
| 216 |
| 217 |
| 218 movdqu xmm1, [KEY] |
| 219 movdqu [KS], xmm1 |
| 220 movdqa xmm2, xmm1 |
| 221 |
| 222 lea ITR, Lcon1 |
| 223 movdqa xmm0, [ITR] |
| 224 lea ITR, Lmask |
| 225 movdqa xmm4, [ITR] |
| 226 |
| 227 mov ITR, 8 |
| 228 |
| 229 Lenc_128_ks_loop: |
| 230 lea KS, [16 + KS] |
| 231 dec ITR |
| 232 |
| 233 pshufb xmm2, xmm4 |
| 234 aesenclast xmm2, xmm0 |
| 235 pslld xmm0, 1 |
| 236 movdqa xmm3, xmm1 |
| 237 pslldq xmm3, 4 |
| 238 pxor xmm1, xmm3 |
| 239 pslldq xmm3, 4 |
| 240 pxor xmm1, xmm3 |
| 241 pslldq xmm3, 4 |
| 242 pxor xmm1, xmm3 |
| 243 pxor xmm1, xmm2 |
| 244 movdqu [KS], xmm1 |
| 245 movdqa xmm2, xmm1 |
| 246 |
| 247 jne Lenc_128_ks_loop |
| 248 |
| 249 lea ITR, Lcon2 |
| 250 movdqa xmm0, [ITR] |
| 251 |
| 252 pshufb xmm2, xmm4 |
| 253 aesenclast xmm2, xmm0 |
| 254 pslld xmm0, 1 |
| 255 movdqa xmm3, xmm1 |
| 256 pslldq xmm3, 4 |
| 257 pxor xmm1, xmm3 |
| 258 pslldq xmm3, 4 |
| 259 pxor xmm1, xmm3 |
| 260 pslldq xmm3, 4 |
| 261 pxor xmm1, xmm3 |
| 262 pxor xmm1, xmm2 |
| 263 movdqu [16 + KS], xmm1 |
| 264 movdqa xmm2, xmm1 |
| 265 |
| 266 pshufb xmm2, xmm4 |
| 267 aesenclast xmm2, xmm0 |
| 268 movdqa xmm3, xmm1 |
| 269 pslldq xmm3, 4 |
| 270 pxor xmm1, xmm3 |
| 271 pslldq xmm3, 4 |
| 272 pxor xmm1, xmm3 |
| 273 pslldq xmm3, 4 |
| 274 pxor xmm1, xmm3 |
| 275 pxor xmm1, xmm2 |
| 276 movdqu [32 + KS], xmm1 |
| 277 movdqa xmm2, xmm1 |
| 278 |
| 279 ret |
| 280 intel_aes_encrypt_init_128 ENDP |
| 281 |
| 282 |
| 283 ALIGN 16 |
| 284 intel_aes_decrypt_init_128 PROC |
| 285 |
| 286 mov KEY, [esp + 1*4 + 0*4] |
| 287 mov KS, [esp + 1*4 + 1*4] |
| 288 |
| 289 push KS |
| 290 push KEY |
| 291 |
| 292 call intel_aes_encrypt_init_128 |
| 293 |
| 294 pop KEY |
| 295 pop KS |
| 296 |
| 297 movdqu xmm0, [0*16 + KS] |
| 298 movdqu xmm1, [10*16 + KS] |
| 299 movdqu [10*16 + KS], xmm0 |
| 300 movdqu [0*16 + KS], xmm1 |
| 301 |
| 302 i = 1 |
| 303 WHILE i LT 5 |
| 304 movdqu xmm0, [i*16 + KS] |
| 305 movdqu xmm1, [(10-i)*16 + KS] |
| 306 |
| 307 aesimc xmm0, xmm0 |
| 308 aesimc xmm1, xmm1 |
| 309 |
| 310 movdqu [(10-i)*16 + KS], xmm0 |
| 311 movdqu [i*16 + KS], xmm1 |
| 312 |
| 313 i = i+1 |
| 314 ENDM |
| 315 |
| 316 movdqu xmm0, [5*16 + KS] |
| 317 aesimc xmm0, xmm0 |
| 318 movdqu [5*16 + KS], xmm0 |
| 319 ret |
| 320 intel_aes_decrypt_init_128 ENDP |
| 321 |
| 322 |
| 323 ALIGN 16 |
| 324 intel_aes_encrypt_init_192 PROC |
| 325 |
| 326 mov KEY, [esp + 1*4 + 0*4] |
| 327 mov KS, [esp + 1*4 + 1*4] |
| 328 |
| 329 pxor xmm3, xmm3 |
| 330 movdqu xmm1, [KEY] |
| 331 pinsrd xmm3, DWORD PTR [16 + KEY], 0 |
| 332 pinsrd xmm3, DWORD PTR [20 + KEY], 1 |
| 333 |
| 334 movdqu [KS], xmm1 |
| 335 movdqa xmm5, xmm3 |
| 336 |
| 337 lea ITR, Lcon1 |
| 338 movdqu xmm0, [ITR] |
| 339 lea ITR, Lmask192 |
| 340 movdqu xmm4, [ITR] |
| 341 |
| 342 mov ITR, 4 |
| 343 |
| 344 Lenc_192_ks_loop: |
| 345 movdqa xmm2, xmm3 |
| 346 pshufb xmm2, xmm4 |
| 347 aesenclast xmm2, xmm0 |
| 348 pslld xmm0, 1 |
| 349 |
| 350 movdqa xmm6, xmm1 |
| 351 movdqa xmm7, xmm3 |
| 352 pslldq xmm6, 4 |
| 353 pslldq xmm7, 4 |
| 354 pxor xmm1, xmm6 |
| 355 pxor xmm3, xmm7 |
| 356 pslldq xmm6, 4 |
| 357 pxor xmm1, xmm6 |
| 358 pslldq xmm6, 4 |
| 359 pxor xmm1, xmm6 |
| 360 pxor xmm1, xmm2 |
| 361 pshufd xmm2, xmm1, 0ffh |
| 362 pxor xmm3, xmm2 |
| 363 |
| 364 movdqa xmm6, xmm1 |
| 365 shufpd xmm5, xmm1, 00h |
| 366 shufpd xmm6, xmm3, 01h |
| 367 |
| 368 movdqu [16 + KS], xmm5 |
| 369 movdqu [32 + KS], xmm6 |
| 370 |
| 371 movdqa xmm2, xmm3 |
| 372 pshufb xmm2, xmm4 |
| 373 aesenclast xmm2, xmm0 |
| 374 pslld xmm0, 1 |
| 375 |
| 376 movdqa xmm6, xmm1 |
| 377 movdqa xmm7, xmm3 |
| 378 pslldq xmm6, 4 |
| 379 pslldq xmm7, 4 |
| 380 pxor xmm1, xmm6 |
| 381 pxor xmm3, xmm7 |
| 382 pslldq xmm6, 4 |
| 383 pxor xmm1, xmm6 |
| 384 pslldq xmm6, 4 |
| 385 pxor xmm1, xmm6 |
| 386 pxor xmm1, xmm2 |
| 387 pshufd xmm2, xmm1, 0ffh |
| 388 pxor xmm3, xmm2 |
| 389 |
| 390 movdqu [48 + KS], xmm1 |
| 391 movdqa xmm5, xmm3 |
| 392 |
| 393 lea KS, [48 + KS] |
| 394 |
| 395 dec ITR |
| 396 jnz Lenc_192_ks_loop |
| 397 |
| 398 movdqu [16 + KS], xmm5 |
| 399 ret |
| 400 intel_aes_encrypt_init_192 ENDP |
| 401 |
| 402 ALIGN 16 |
| 403 intel_aes_decrypt_init_192 PROC |
| 404 mov KEY, [esp + 1*4 + 0*4] |
| 405 mov KS, [esp + 1*4 + 1*4] |
| 406 |
| 407 push KS |
| 408 push KEY |
| 409 |
| 410 call intel_aes_encrypt_init_192 |
| 411 |
| 412 pop KEY |
| 413 pop KS |
| 414 |
| 415 movdqu xmm0, [0*16 + KS] |
| 416 movdqu xmm1, [12*16 + KS] |
| 417 movdqu [12*16 + KS], xmm0 |
| 418 movdqu [0*16 + KS], xmm1 |
| 419 |
| 420 i = 1 |
| 421 WHILE i LT 6 |
| 422 movdqu xmm0, [i*16 + KS] |
| 423 movdqu xmm1, [(12-i)*16 + KS] |
| 424 |
| 425 aesimc xmm0, xmm0 |
| 426 aesimc xmm1, xmm1 |
| 427 |
| 428 movdqu [(12-i)*16 + KS], xmm0 |
| 429 movdqu [i*16 + KS], xmm1 |
| 430 |
| 431 i = i+1 |
| 432 ENDM |
| 433 |
| 434 movdqu xmm0, [6*16 + KS] |
| 435 aesimc xmm0, xmm0 |
| 436 movdqu [6*16 + KS], xmm0 |
| 437 ret |
| 438 intel_aes_decrypt_init_192 ENDP |
| 439 |
| 440 ALIGN 16 |
| 441 intel_aes_encrypt_init_256 PROC |
| 442 |
| 443 mov KEY, [esp + 1*4 + 0*4] |
| 444 mov KS, [esp + 1*4 + 1*4] |
| 445 movdqu xmm1, [16*0 + KEY] |
| 446 movdqu xmm3, [16*1 + KEY] |
| 447 |
| 448 movdqu [16*0 + KS], xmm1 |
| 449 movdqu [16*1 + KS], xmm3 |
| 450 |
| 451 lea ITR, Lcon1 |
| 452 movdqu xmm0, [ITR] |
| 453 lea ITR, Lmask256 |
| 454 movdqu xmm5, [ITR] |
| 455 |
| 456 pxor xmm6, xmm6 |
| 457 |
| 458 mov ITR, 6 |
| 459 |
| 460 Lenc_256_ks_loop: |
| 461 |
| 462 movdqa xmm2, xmm3 |
| 463 pshufb xmm2, xmm5 |
| 464 aesenclast xmm2, xmm0 |
| 465 pslld xmm0, 1 |
| 466 movdqa xmm4, xmm1 |
| 467 pslldq xmm4, 4 |
| 468 pxor xmm1, xmm4 |
| 469 pslldq xmm4, 4 |
| 470 pxor xmm1, xmm4 |
| 471 pslldq xmm4, 4 |
| 472 pxor xmm1, xmm4 |
| 473 pxor xmm1, xmm2 |
| 474 movdqu [16*2 + KS], xmm1 |
| 475 |
| 476 pshufd xmm2, xmm1, 0ffh |
| 477 aesenclast xmm2, xmm6 |
| 478 movdqa xmm4, xmm3 |
| 479 pslldq xmm4, 4 |
| 480 pxor xmm3, xmm4 |
| 481 pslldq xmm4, 4 |
| 482 pxor xmm3, xmm4 |
| 483 pslldq xmm4, 4 |
| 484 pxor xmm3, xmm4 |
| 485 pxor xmm3, xmm2 |
| 486 movdqu [16*3 + KS], xmm3 |
| 487 |
| 488 lea KS, [32 + KS] |
| 489 dec ITR |
| 490 jnz Lenc_256_ks_loop |
| 491 |
| 492 movdqa xmm2, xmm3 |
| 493 pshufb xmm2, xmm5 |
| 494 aesenclast xmm2, xmm0 |
| 495 movdqa xmm4, xmm1 |
| 496 pslldq xmm4, 4 |
| 497 pxor xmm1, xmm4 |
| 498 pslldq xmm4, 4 |
| 499 pxor xmm1, xmm4 |
| 500 pslldq xmm4, 4 |
| 501 pxor xmm1, xmm4 |
| 502 pxor xmm1, xmm2 |
| 503 movdqu [16*2 + KS], xmm1 |
| 504 |
| 505 ret |
| 506 intel_aes_encrypt_init_256 ENDP |
| 507 |
| 508 ALIGN 16 |
| 509 intel_aes_decrypt_init_256 PROC |
| 510 mov KEY, [esp + 1*4 + 0*4] |
| 511 mov KS, [esp + 1*4 + 1*4] |
| 512 |
| 513 push KS |
| 514 push KEY |
| 515 |
| 516 call intel_aes_encrypt_init_256 |
| 517 |
| 518 pop KEY |
| 519 pop KS |
| 520 |
| 521 movdqu xmm0, [0*16 + KS] |
| 522 movdqu xmm1, [14*16 + KS] |
| 523 movdqu [14*16 + KS], xmm0 |
| 524 movdqu [0*16 + KS], xmm1 |
| 525 |
| 526 i = 1 |
| 527 WHILE i LT 7 |
| 528 movdqu xmm0, [i*16 + KS] |
| 529 movdqu xmm1, [(14-i)*16 + KS] |
| 530 |
| 531 aesimc xmm0, xmm0 |
| 532 aesimc xmm1, xmm1 |
| 533 |
| 534 movdqu [(14-i)*16 + KS], xmm0 |
| 535 movdqu [i*16 + KS], xmm1 |
| 536 |
| 537 i = i+1 |
| 538 ENDM |
| 539 |
| 540 movdqu xmm0, [7*16 + KS] |
| 541 aesimc xmm0, xmm0 |
| 542 movdqu [7*16 + KS], xmm0 |
| 543 ret |
| 544 intel_aes_decrypt_init_256 ENDP |
| 545 |
| 546 |
| 547 |
| 548 gen_aes_cbc_enc_func MACRO rnds |
| 549 |
| 550 LOCAL loop1 |
| 551 LOCAL bail |
| 552 |
| 553 push inputLen |
| 554 |
| 555 mov ctx, [esp + 2*4 + 0*4] |
| 556 mov output, [esp + 2*4 + 1*4] |
| 557 mov input, [esp + 2*4 + 4*4] |
| 558 mov inputLen, [esp + 2*4 + 5*4] |
| 559 |
| 560 lea ctx, [44+ctx] |
| 561 |
| 562 movdqu xmm0, [-32+ctx] |
| 563 |
| 564 movdqu xmm2, [0*16 + ctx] |
| 565 movdqu xmm3, [1*16 + ctx] |
| 566 movdqu xmm4, [2*16 + ctx] |
| 567 movdqu xmm5, [3*16 + ctx] |
| 568 movdqu xmm6, [4*16 + ctx] |
| 569 |
| 570 loop1: |
| 571 cmp inputLen, 1*16 |
| 572 jb bail |
| 573 |
| 574 movdqu xmm1, [input] |
| 575 pxor xmm1, xmm2 |
| 576 pxor xmm0, xmm1 |
| 577 |
| 578 aesenc xmm0, xmm3 |
| 579 aesenc xmm0, xmm4 |
| 580 aesenc xmm0, xmm5 |
| 581 aesenc xmm0, xmm6 |
| 582 |
| 583 i = 5 |
| 584 WHILE i LT rnds |
| 585 movdqu xmm7, [i*16 + ctx] |
| 586 aesenc xmm0, xmm7 |
| 587 i = i+1 |
| 588 ENDM |
| 589 movdqu xmm7, [rnds*16 + ctx] |
| 590 aesenclast xmm0, xmm7 |
| 591 |
| 592 movdqu [output], xmm0 |
| 593 |
| 594 lea input, [1*16 + input] |
| 595 lea output, [1*16 + output] |
| 596 sub inputLen, 1*16 |
| 597 jmp loop1 |
| 598 |
| 599 bail: |
| 600 movdqu [-32+ctx], xmm0 |
| 601 |
| 602 xor eax, eax |
| 603 pop inputLen |
| 604 ret |
| 605 |
| 606 ENDM |
| 607 |
| 608 gen_aes_cbc_dec_func MACRO rnds |
| 609 |
| 610 LOCAL loop7 |
| 611 LOCAL loop1 |
| 612 LOCAL dec1 |
| 613 LOCAL bail |
| 614 |
| 615 push inputLen |
| 616 |
| 617 mov ctx, [esp + 2*4 + 0*4] |
| 618 mov output, [esp + 2*4 + 1*4] |
| 619 mov input, [esp + 2*4 + 4*4] |
| 620 mov inputLen, [esp + 2*4 + 5*4] |
| 621 |
| 622 lea ctx, [44+ctx] |
| 623 |
| 624 loop7: |
| 625 cmp inputLen, 7*16 |
| 626 jb dec1 |
| 627 |
| 628 movdqu xmm0, [0*16 + input] |
| 629 movdqu xmm1, [1*16 + input] |
| 630 movdqu xmm2, [2*16 + input] |
| 631 movdqu xmm3, [3*16 + input] |
| 632 movdqu xmm4, [4*16 + input] |
| 633 movdqu xmm5, [5*16 + input] |
| 634 movdqu xmm6, [6*16 + input] |
| 635 |
| 636 movdqu xmm7, [0*16 + ctx] |
| 637 pxor xmm0, xmm7 |
| 638 pxor xmm1, xmm7 |
| 639 pxor xmm2, xmm7 |
| 640 pxor xmm3, xmm7 |
| 641 pxor xmm4, xmm7 |
| 642 pxor xmm5, xmm7 |
| 643 pxor xmm6, xmm7 |
| 644 |
| 645 i = 1 |
| 646 WHILE i LT rnds |
| 647 aes_dec_rnd i |
| 648 i = i+1 |
| 649 ENDM |
| 650 aes_dec_last_rnd rnds |
| 651 |
| 652 movdqu xmm7, [-32 + ctx] |
| 653 pxor xmm0, xmm7 |
| 654 movdqu xmm7, [0*16 + input] |
| 655 pxor xmm1, xmm7 |
| 656 movdqu xmm7, [1*16 + input] |
| 657 pxor xmm2, xmm7 |
| 658 movdqu xmm7, [2*16 + input] |
| 659 pxor xmm3, xmm7 |
| 660 movdqu xmm7, [3*16 + input] |
| 661 pxor xmm4, xmm7 |
| 662 movdqu xmm7, [4*16 + input] |
| 663 pxor xmm5, xmm7 |
| 664 movdqu xmm7, [5*16 + input] |
| 665 pxor xmm6, xmm7 |
| 666 movdqu xmm7, [6*16 + input] |
| 667 |
| 668 movdqu [0*16 + output], xmm0 |
| 669 movdqu [1*16 + output], xmm1 |
| 670 movdqu [2*16 + output], xmm2 |
| 671 movdqu [3*16 + output], xmm3 |
| 672 movdqu [4*16 + output], xmm4 |
| 673 movdqu [5*16 + output], xmm5 |
| 674 movdqu [6*16 + output], xmm6 |
| 675 movdqu [-32 + ctx], xmm7 |
| 676 |
| 677 lea input, [7*16 + input] |
| 678 lea output, [7*16 + output] |
| 679 sub inputLen, 7*16 |
| 680 jmp loop7 |
| 681 dec1: |
| 682 |
| 683 movdqu xmm3, [-32 + ctx] |
| 684 |
| 685 loop1: |
| 686 cmp inputLen, 1*16 |
| 687 jb bail |
| 688 |
| 689 movdqu xmm0, [input] |
| 690 movdqa xmm4, xmm0 |
| 691 movdqu xmm7, [0*16 + ctx] |
| 692 pxor xmm0, xmm7 |
| 693 |
| 694 i = 1 |
| 695 WHILE i LT rnds |
| 696 movdqu xmm7, [i*16 + ctx] |
| 697 aesdec xmm0, xmm7 |
| 698 i = i+1 |
| 699 ENDM |
| 700 movdqu xmm7, [rnds*16 + ctx] |
| 701 aesdeclast xmm0, xmm7 |
| 702 pxor xmm3, xmm0 |
| 703 |
| 704 movdqu [output], xmm3 |
| 705 movdqa xmm3, xmm4 |
| 706 |
| 707 lea input, [1*16 + input] |
| 708 lea output, [1*16 + output] |
| 709 sub inputLen, 1*16 |
| 710 jmp loop1 |
| 711 |
| 712 bail: |
| 713 movdqu [-32 + ctx], xmm3 |
| 714 xor eax, eax |
| 715 pop inputLen |
| 716 ret |
| 717 ENDM |
| 718 |
| 719 ALIGN 16 |
| 720 intel_aes_encrypt_cbc_128 PROC |
| 721 gen_aes_cbc_enc_func 10 |
| 722 intel_aes_encrypt_cbc_128 ENDP |
| 723 |
| 724 ALIGN 16 |
| 725 intel_aes_encrypt_cbc_192 PROC |
| 726 gen_aes_cbc_enc_func 12 |
| 727 intel_aes_encrypt_cbc_192 ENDP |
| 728 |
| 729 ALIGN 16 |
| 730 intel_aes_encrypt_cbc_256 PROC |
| 731 gen_aes_cbc_enc_func 14 |
| 732 intel_aes_encrypt_cbc_256 ENDP |
| 733 |
| 734 ALIGN 16 |
| 735 intel_aes_decrypt_cbc_128 PROC |
| 736 gen_aes_cbc_dec_func 10 |
| 737 intel_aes_decrypt_cbc_128 ENDP |
| 738 |
| 739 ALIGN 16 |
| 740 intel_aes_decrypt_cbc_192 PROC |
| 741 gen_aes_cbc_dec_func 12 |
| 742 intel_aes_decrypt_cbc_192 ENDP |
| 743 |
| 744 ALIGN 16 |
| 745 intel_aes_decrypt_cbc_256 PROC |
| 746 gen_aes_cbc_dec_func 14 |
| 747 intel_aes_decrypt_cbc_256 ENDP |
| 748 |
| 749 |
| 750 |
| 751 ctrCtx textequ <esi> |
| 752 CTR textequ <ebx> |
| 753 |
| 754 gen_aes_ctr_func MACRO rnds |
| 755 |
| 756 LOCAL loop7 |
| 757 LOCAL loop1 |
| 758 LOCAL enc1 |
| 759 LOCAL bail |
| 760 |
| 761 push inputLen |
| 762 push ctrCtx |
| 763 push CTR |
| 764 push ebp |
| 765 |
| 766 mov ctrCtx, [esp + 4*5 + 0*4] |
| 767 mov output, [esp + 4*5 + 1*4] |
| 768 mov input, [esp + 4*5 + 4*4] |
| 769 mov inputLen, [esp + 4*5 + 5*4] |
| 770 |
| 771 mov ctx, [4+ctrCtx] |
| 772 lea ctx, [44+ctx] |
| 773 |
| 774 mov ebp, esp |
| 775 sub esp, 7*16 |
| 776 and esp, -16 |
| 777 |
| 778 movdqu xmm0, [8+ctrCtx] |
| 779 mov ctrCtx, [ctrCtx + 8 + 3*4] |
| 780 bswap ctrCtx |
| 781 movdqu xmm1, [ctx + 0*16] |
| 782 |
| 783 pxor xmm0, xmm1 |
| 784 |
| 785 movdqa [esp + 0*16], xmm0 |
| 786 movdqa [esp + 1*16], xmm0 |
| 787 movdqa [esp + 2*16], xmm0 |
| 788 movdqa [esp + 3*16], xmm0 |
| 789 movdqa [esp + 4*16], xmm0 |
| 790 movdqa [esp + 5*16], xmm0 |
| 791 movdqa [esp + 6*16], xmm0 |
| 792 |
| 793 inc ctrCtx |
| 794 mov CTR, ctrCtx |
| 795 bswap CTR |
| 796 xor CTR, [ctx + 3*4] |
| 797 mov [esp + 1*16 + 3*4], CTR |
| 798 |
| 799 inc ctrCtx |
| 800 mov CTR, ctrCtx |
| 801 bswap CTR |
| 802 xor CTR, [ctx + 3*4] |
| 803 mov [esp + 2*16 + 3*4], CTR |
| 804 |
| 805 inc ctrCtx |
| 806 mov CTR, ctrCtx |
| 807 bswap CTR |
| 808 xor CTR, [ctx + 3*4] |
| 809 mov [esp + 3*16 + 3*4], CTR |
| 810 |
| 811 inc ctrCtx |
| 812 mov CTR, ctrCtx |
| 813 bswap CTR |
| 814 xor CTR, [ctx + 3*4] |
| 815 mov [esp + 4*16 + 3*4], CTR |
| 816 |
| 817 inc ctrCtx |
| 818 mov CTR, ctrCtx |
| 819 bswap CTR |
| 820 xor CTR, [ctx + 3*4] |
| 821 mov [esp + 5*16 + 3*4], CTR |
| 822 |
| 823 inc ctrCtx |
| 824 mov CTR, ctrCtx |
| 825 bswap CTR |
| 826 xor CTR, [ctx + 3*4] |
| 827 mov [esp + 6*16 + 3*4], CTR |
| 828 |
| 829 |
| 830 loop7: |
| 831 cmp inputLen, 7*16 |
| 832 jb loop1 |
| 833 |
| 834 movdqu xmm0, [0*16 + esp] |
| 835 movdqu xmm1, [1*16 + esp] |
| 836 movdqu xmm2, [2*16 + esp] |
| 837 movdqu xmm3, [3*16 + esp] |
| 838 movdqu xmm4, [4*16 + esp] |
| 839 movdqu xmm5, [5*16 + esp] |
| 840 movdqu xmm6, [6*16 + esp] |
| 841 |
| 842 i = 1 |
| 843 WHILE i LE 7 |
| 844 aes_rnd i |
| 845 |
| 846 inc ctrCtx |
| 847 mov CTR, ctrCtx |
| 848 bswap CTR |
| 849 xor CTR, [ctx + 3*4] |
| 850 mov [esp + (i-1)*16 + 3*4], CTR |
| 851 |
| 852 i = i+1 |
| 853 ENDM |
| 854 WHILE i LT rnds |
| 855 aes_rnd i |
| 856 i = i+1 |
| 857 ENDM |
| 858 aes_last_rnd rnds |
| 859 |
| 860 movdqu xmm7, [0*16 + input] |
| 861 pxor xmm0, xmm7 |
| 862 movdqu xmm7, [1*16 + input] |
| 863 pxor xmm1, xmm7 |
| 864 movdqu xmm7, [2*16 + input] |
| 865 pxor xmm2, xmm7 |
| 866 movdqu xmm7, [3*16 + input] |
| 867 pxor xmm3, xmm7 |
| 868 movdqu xmm7, [4*16 + input] |
| 869 pxor xmm4, xmm7 |
| 870 movdqu xmm7, [5*16 + input] |
| 871 pxor xmm5, xmm7 |
| 872 movdqu xmm7, [6*16 + input] |
| 873 pxor xmm6, xmm7 |
| 874 |
| 875 movdqu [0*16 + output], xmm0 |
| 876 movdqu [1*16 + output], xmm1 |
| 877 movdqu [2*16 + output], xmm2 |
| 878 movdqu [3*16 + output], xmm3 |
| 879 movdqu [4*16 + output], xmm4 |
| 880 movdqu [5*16 + output], xmm5 |
| 881 movdqu [6*16 + output], xmm6 |
| 882 |
| 883 lea input, [7*16 + input] |
| 884 lea output, [7*16 + output] |
| 885 sub inputLen, 7*16 |
| 886 jmp loop7 |
| 887 |
| 888 |
| 889 loop1: |
| 890 cmp inputLen, 1*16 |
| 891 jb bail |
| 892 |
| 893 movdqu xmm0, [esp] |
| 894 add esp, 16 |
| 895 |
| 896 i = 1 |
| 897 WHILE i LT rnds |
| 898 movdqu xmm7, [i*16 + ctx] |
| 899 aesenc xmm0, xmm7 |
| 900 i = i+1 |
| 901 ENDM |
| 902 movdqu xmm7, [rnds*16 + ctx] |
| 903 aesenclast xmm0, xmm7 |
| 904 |
| 905 movdqu xmm7, [input] |
| 906 pxor xmm0, xmm7 |
| 907 movdqu [output], xmm0 |
| 908 |
| 909 lea input, [1*16 + input] |
| 910 lea output, [1*16 + output] |
| 911 sub inputLen, 1*16 |
| 912 jmp loop1 |
| 913 |
| 914 bail: |
| 915 |
| 916 mov ctrCtx, [ebp + 4*5 + 0*4] |
| 917 movdqu xmm0, [esp] |
| 918 movdqu xmm1, [ctx + 0*16] |
| 919 pxor xmm0, xmm1 |
| 920 movdqu [8+ctrCtx], xmm0 |
| 921 |
| 922 |
| 923 xor eax, eax |
| 924 mov esp, ebp |
| 925 pop ebp |
| 926 pop CTR |
| 927 pop ctrCtx |
| 928 pop inputLen |
| 929 ret |
| 930 ENDM |
| 931 |
| 932 |
| 933 ALIGN 16 |
| 934 intel_aes_encrypt_ctr_128 PROC |
| 935 gen_aes_ctr_func 10 |
| 936 intel_aes_encrypt_ctr_128 ENDP |
| 937 |
| 938 ALIGN 16 |
| 939 intel_aes_encrypt_ctr_192 PROC |
| 940 gen_aes_ctr_func 12 |
| 941 intel_aes_encrypt_ctr_192 ENDP |
| 942 |
| 943 ALIGN 16 |
| 944 intel_aes_encrypt_ctr_256 PROC |
| 945 gen_aes_ctr_func 14 |
| 946 intel_aes_encrypt_ctr_256 ENDP |
| 947 |
| 948 |
| 949 END |
OLD | NEW |