| OLD | NEW |
| 1 ; LICENSE: | 1 ; LICENSE: |
| 2 ; This submission to NSS is to be made available under the terms of the | 2 ; This submission to NSS is to be made available under the terms of the |
| 3 ; Mozilla Public License, v. 2.0. You can obtain one at http: | 3 ; Mozilla Public License, v. 2.0. You can obtain one at http: |
| 4 ; //mozilla.org/MPL/2.0/. | 4 ; //mozilla.org/MPL/2.0/. |
| 5 ;############################################################################### | 5 ;############################################################################### |
| 6 ; Copyright(c) 2014, Intel Corp. | 6 ; Copyright(c) 2014, Intel Corp. |
| 7 ; Developers and authors: | 7 ; Developers and authors: |
| 8 ; Shay Gueron and Vlad Krasnov | 8 ; Shay Gueron and Vlad Krasnov |
| 9 ; Intel Corporation, Israel Development Centre, Haifa, Israel | 9 ; Intel Corporation, Israel Development Centre, Haifa, Israel |
| 10 ; Please send feedback directly to crypto.feedback.alias@intel.com | 10 ; Please send feedback directly to crypto.feedback.alias@intel.com |
| 11 | 11 |
| 12 | 12 |
| 13 .MODEL FLAT, C | |
| 14 .XMM | |
| 15 | |
| 16 .DATA | 13 .DATA |
| 17 ALIGN 16 | 14 ALIGN 16 |
| 18 Lmask dd 0c0f0e0dh,0c0f0e0dh,0c0f0e0dh,0c0f0e0dh | 15 Lmask dd 0c0f0e0dh,0c0f0e0dh,0c0f0e0dh,0c0f0e0dh |
| 19 Lmask192 dd 004070605h, 004070605h, 004070605h, 004070605h | 16 Lmask192 dd 004070605h, 004070605h, 004070605h, 004070605h |
| 20 Lmask256 dd 00c0f0e0dh, 00c0f0e0dh, 00c0f0e0dh, 00c0f0e0dh | 17 Lmask256 dd 00c0f0e0dh, 00c0f0e0dh, 00c0f0e0dh, 00c0f0e0dh |
| 21 Lcon1 dd 1,1,1,1 | 18 Lcon1 dd 1,1,1,1 |
| 22 Lcon2 dd 1bh,1bh,1bh,1bh | 19 Lcon2 dd 1bh,1bh,1bh,1bh |
| 23 | 20 |
| 24 .CODE | 21 .CODE |
| 25 | 22 |
| 26 ctx textequ <ecx> | 23 ctx textequ <rcx> |
| 27 output textequ <edx> | 24 output textequ <rdx> |
| 28 input textequ <eax> | 25 input textequ <r8> |
| 29 inputLen textequ <edi> | 26 inputLen textequ <r9d> |
| 30 | 27 |
| 31 | 28 |
| 32 aes_rnd MACRO i | 29 aes_rnd MACRO i |
| 33 movdqu xmm7, [i*16 + ctx] | 30 movdqu xmm8, [i*16 + ctx] |
| 34 aesenc xmm0, xmm7 | 31 aesenc xmm0, xmm8 |
| 35 aesenc xmm1, xmm7 | 32 aesenc xmm1, xmm8 |
| 36 aesenc xmm2, xmm7 | 33 aesenc xmm2, xmm8 |
| 37 aesenc xmm3, xmm7 | 34 aesenc xmm3, xmm8 |
| 38 aesenc xmm4, xmm7 | 35 aesenc xmm4, xmm8 |
| 39 aesenc xmm5, xmm7 | 36 aesenc xmm5, xmm8 |
| 40 aesenc xmm6, xmm7 | 37 aesenc xmm6, xmm8 |
| 38 aesenc xmm7, xmm8 |
| 41 ENDM | 39 ENDM |
| 42 | 40 |
| 43 aes_last_rnd MACRO i | 41 aes_last_rnd MACRO i |
| 44 movdqu xmm7, [i*16 + ctx] | 42 movdqu xmm8, [i*16 + ctx] |
| 45 aesenclast xmm0, xmm7 | 43 aesenclast xmm0, xmm8 |
| 46 aesenclast xmm1, xmm7 | 44 aesenclast xmm1, xmm8 |
| 47 aesenclast xmm2, xmm7 | 45 aesenclast xmm2, xmm8 |
| 48 aesenclast xmm3, xmm7 | 46 aesenclast xmm3, xmm8 |
| 49 aesenclast xmm4, xmm7 | 47 aesenclast xmm4, xmm8 |
| 50 aesenclast xmm5, xmm7 | 48 aesenclast xmm5, xmm8 |
| 51 aesenclast xmm6, xmm7 | 49 aesenclast xmm6, xmm8 |
| 50 aesenclast xmm7, xmm8 |
| 52 ENDM | 51 ENDM |
| 53 | 52 |
| 54 aes_dec_rnd MACRO i | 53 aes_dec_rnd MACRO i |
| 55 movdqu xmm7, [i*16 + ctx] | 54 movdqu xmm8, [i*16 + ctx] |
| 56 aesdec xmm0, xmm7 | 55 aesdec xmm0, xmm8 |
| 57 aesdec xmm1, xmm7 | 56 aesdec xmm1, xmm8 |
| 58 aesdec xmm2, xmm7 | 57 aesdec xmm2, xmm8 |
| 59 aesdec xmm3, xmm7 | 58 aesdec xmm3, xmm8 |
| 60 aesdec xmm4, xmm7 | 59 aesdec xmm4, xmm8 |
| 61 aesdec xmm5, xmm7 | 60 aesdec xmm5, xmm8 |
| 62 aesdec xmm6, xmm7 | 61 aesdec xmm6, xmm8 |
| 62 aesdec xmm7, xmm8 |
| 63 ENDM | 63 ENDM |
| 64 | 64 |
| 65 aes_dec_last_rnd MACRO i | 65 aes_dec_last_rnd MACRO i |
| 66 movdqu xmm7, [i*16 + ctx] | 66 movdqu xmm8, [i*16 + ctx] |
| 67 aesdeclast xmm0, xmm7 | 67 aesdeclast xmm0, xmm8 |
| 68 aesdeclast xmm1, xmm7 | 68 aesdeclast xmm1, xmm8 |
| 69 aesdeclast xmm2, xmm7 | 69 aesdeclast xmm2, xmm8 |
| 70 aesdeclast xmm3, xmm7 | 70 aesdeclast xmm3, xmm8 |
| 71 aesdeclast xmm4, xmm7 | 71 aesdeclast xmm4, xmm8 |
| 72 aesdeclast xmm5, xmm7 | 72 aesdeclast xmm5, xmm8 |
| 73 aesdeclast xmm6, xmm7 | 73 aesdeclast xmm6, xmm8 |
| 74 aesdeclast xmm7, xmm8 |
| 74 ENDM | 75 ENDM |
| 75 | 76 |
| 76 | 77 |
| 77 gen_aes_ecb_func MACRO enc, rnds | 78 gen_aes_ecb_func MACRO enc, rnds |
| 78 | 79 |
| 79 LOCAL loop7 | 80 LOCAL loop8 |
| 80 LOCAL loop1 | 81 LOCAL loop1 |
| 81 LOCAL bail | 82 LOCAL bail |
| 82 | 83 |
| 83 push inputLen | 84 xor inputLen, inputLen |
| 85 mov input, [rsp + 1*8 + 8*4] |
| 86 mov inputLen, [rsp + 1*8 + 8*5] |
| 84 | 87 |
| 85 mov ctx, [esp + 2*4 + 0*4] | 88 sub rsp, 3*16 |
| 86 mov output, [esp + 2*4 + 1*4] | |
| 87 mov input, [esp + 2*4 + 4*4] | |
| 88 mov inputLen, [esp + 2*4 + 5*4] | |
| 89 | 89 |
| 90 lea ctx, [44+ctx] | 90 movdqu [rsp + 0*16], xmm6 |
| 91 movdqu [rsp + 1*16], xmm7 |
| 92 movdqu [rsp + 2*16], xmm8 |
| 91 | 93 |
| 92 loop7: | 94 lea ctx, [48+ctx] |
| 93 cmp inputLen, 7*16 | 95 |
| 96 loop8: |
| 97 cmp inputLen, 8*16 |
| 94 jb loop1 | 98 jb loop1 |
| 95 | 99 |
| 96 movdqu xmm0, [0*16 + input] | 100 movdqu xmm0, [0*16 + input] |
| 97 movdqu xmm1, [1*16 + input] | 101 movdqu xmm1, [1*16 + input] |
| 98 movdqu xmm2, [2*16 + input] | 102 movdqu xmm2, [2*16 + input] |
| 99 movdqu xmm3, [3*16 + input] | 103 movdqu xmm3, [3*16 + input] |
| 100 movdqu xmm4, [4*16 + input] | 104 movdqu xmm4, [4*16 + input] |
| 101 movdqu xmm5, [5*16 + input] | 105 movdqu xmm5, [5*16 + input] |
| 102 movdqu xmm6, [6*16 + input] | 106 movdqu xmm6, [6*16 + input] |
| 107 movdqu xmm7, [7*16 + input] |
| 103 | 108 |
| 104 movdqu xmm7, [0*16 + ctx] | 109 movdqu xmm8, [0*16 + ctx] |
| 105 pxor xmm0, xmm7 | 110 pxor xmm0, xmm8 |
| 106 pxor xmm1, xmm7 | 111 pxor xmm1, xmm8 |
| 107 pxor xmm2, xmm7 | 112 pxor xmm2, xmm8 |
| 108 pxor xmm3, xmm7 | 113 pxor xmm3, xmm8 |
| 109 pxor xmm4, xmm7 | 114 pxor xmm4, xmm8 |
| 110 pxor xmm5, xmm7 | 115 pxor xmm5, xmm8 |
| 111 pxor xmm6, xmm7 | 116 pxor xmm6, xmm8 |
| 117 pxor xmm7, xmm8 |
| 112 | 118 |
| 113 IF enc eq 1 | 119 IF enc eq 1 |
| 114 rnd textequ <aes_rnd> | 120 rnd textequ <aes_rnd> |
| 115 lastrnd textequ <aes_last_rnd> | 121 lastrnd textequ <aes_last_rnd> |
| 116 aesinst textequ <aesenc> | 122 aesinst textequ <aesenc> |
| 117 aeslastinst textequ <aesenclast> | 123 aeslastinst textequ <aesenclast> |
| 118 ELSE | 124 ELSE |
| 119 rnd textequ <aes_dec_rnd> | 125 rnd textequ <aes_dec_rnd> |
| 120 lastrnd textequ <aes_dec_last_rnd> | 126 lastrnd textequ <aes_dec_last_rnd> |
| 121 aesinst textequ <aesdec> | 127 aesinst textequ <aesdec> |
| 122 aeslastinst textequ <aesdeclast> | 128 aeslastinst textequ <aesdeclast> |
| 123 ENDIF | 129 ENDIF |
| 124 | 130 |
| 125 i = 1 | 131 i = 1 |
| 126 WHILE i LT rnds | 132 WHILE i LT rnds |
| 127 rnd i | 133 rnd i |
| 128 i = i+1 | 134 i = i+1 |
| 129 ENDM | 135 ENDM |
| 130 lastrnd rnds | 136 lastrnd rnds |
| 131 | 137 |
| 132 movdqu [0*16 + output], xmm0 | 138 movdqu [0*16 + output], xmm0 |
| 133 movdqu [1*16 + output], xmm1 | 139 movdqu [1*16 + output], xmm1 |
| 134 movdqu [2*16 + output], xmm2 | 140 movdqu [2*16 + output], xmm2 |
| 135 movdqu [3*16 + output], xmm3 | 141 movdqu [3*16 + output], xmm3 |
| 136 movdqu [4*16 + output], xmm4 | 142 movdqu [4*16 + output], xmm4 |
| 137 movdqu [5*16 + output], xmm5 | 143 movdqu [5*16 + output], xmm5 |
| 138 movdqu [6*16 + output], xmm6 | 144 movdqu [6*16 + output], xmm6 |
| 145 movdqu [7*16 + output], xmm7 |
| 139 | 146 |
| 140 lea input, [7*16 + input] | 147 lea input, [8*16 + input] |
| 141 lea output, [7*16 + output] | 148 lea output, [8*16 + output] |
| 142 sub inputLen, 7*16 | 149 sub inputLen, 8*16 |
| 143 jmp loop7 | 150 jmp loop8 |
| 144 | 151 |
| 145 loop1: | 152 loop1: |
| 146 cmp inputLen, 1*16 | 153 cmp inputLen, 1*16 |
| 147 jb bail | 154 jb bail |
| 148 | 155 |
| 149 movdqu xmm0, [input] | 156 movdqu xmm0, [input] |
| 150 movdqu xmm7, [0*16 + ctx] | 157 movdqu xmm7, [0*16 + ctx] |
| 151 pxor xmm0, xmm7 | 158 pxor xmm0, xmm7 |
| 152 | 159 |
| 153 i = 1 | 160 i = 1 |
| 154 WHILE i LT rnds | 161 WHILE i LT rnds |
| 155 movdqu xmm7, [i*16 + ctx] | 162 movdqu xmm7, [i*16 + ctx] |
| 156 aesinst xmm0, xmm7 | 163 aesinst xmm0, xmm7 |
| 157 i = i+1 | 164 i = i+1 |
| 158 ENDM | 165 ENDM |
| 159 movdqu xmm7, [rnds*16 + ctx] | 166 movdqu xmm7, [rnds*16 + ctx] |
| 160 aeslastinst xmm0, xmm7 | 167 aeslastinst xmm0, xmm7 |
| 161 | 168 |
| 162 movdqu [output], xmm0 | 169 movdqu [output], xmm0 |
| 163 | 170 |
| 164 lea input, [1*16 + input] | 171 lea input, [1*16 + input] |
| 165 lea output, [1*16 + output] | 172 lea output, [1*16 + output] |
| 166 sub inputLen, 1*16 | 173 sub inputLen, 1*16 |
| 167 jmp loop1 | 174 jmp loop1 |
| 168 | 175 |
| 169 bail: | 176 bail: |
| 170 xor eax, eax | 177 xor rax, rax |
| 171 pop inputLen | 178 |
| 179 movdqu xmm6, [rsp + 0*16] |
| 180 movdqu xmm7, [rsp + 1*16] |
| 181 movdqu xmm8, [rsp + 2*16] |
| 182 add rsp, 3*16 |
| 172 ret | 183 ret |
| 173 | |
| 174 ENDM | 184 ENDM |
| 175 | 185 |
| 176 ALIGN 16 | |
| 177 intel_aes_encrypt_ecb_128 PROC | 186 intel_aes_encrypt_ecb_128 PROC |
| 178 gen_aes_ecb_func 1, 10 | 187 gen_aes_ecb_func 1, 10 |
| 179 intel_aes_encrypt_ecb_128 ENDP | 188 intel_aes_encrypt_ecb_128 ENDP |
| 180 | 189 |
| 181 ALIGN 16 | |
| 182 intel_aes_encrypt_ecb_192 PROC | 190 intel_aes_encrypt_ecb_192 PROC |
| 183 gen_aes_ecb_func 1, 12 | 191 gen_aes_ecb_func 1, 12 |
| 184 intel_aes_encrypt_ecb_192 ENDP | 192 intel_aes_encrypt_ecb_192 ENDP |
| 185 | 193 |
| 186 ALIGN 16 | |
| 187 intel_aes_encrypt_ecb_256 PROC | 194 intel_aes_encrypt_ecb_256 PROC |
| 188 gen_aes_ecb_func 1, 14 | 195 gen_aes_ecb_func 1, 14 |
| 189 intel_aes_encrypt_ecb_256 ENDP | 196 intel_aes_encrypt_ecb_256 ENDP |
| 190 | 197 |
| 191 ALIGN 16 | |
| 192 intel_aes_decrypt_ecb_128 PROC | 198 intel_aes_decrypt_ecb_128 PROC |
| 193 gen_aes_ecb_func 0, 10 | 199 gen_aes_ecb_func 0, 10 |
| 194 intel_aes_decrypt_ecb_128 ENDP | 200 intel_aes_decrypt_ecb_128 ENDP |
| 195 | 201 |
| 196 ALIGN 16 | |
| 197 intel_aes_decrypt_ecb_192 PROC | 202 intel_aes_decrypt_ecb_192 PROC |
| 198 gen_aes_ecb_func 0, 12 | 203 gen_aes_ecb_func 0, 12 |
| 199 intel_aes_decrypt_ecb_192 ENDP | 204 intel_aes_decrypt_ecb_192 ENDP |
| 200 | 205 |
| 201 ALIGN 16 | |
| 202 intel_aes_decrypt_ecb_256 PROC | 206 intel_aes_decrypt_ecb_256 PROC |
| 203 gen_aes_ecb_func 0, 14 | 207 gen_aes_ecb_func 0, 14 |
| 204 intel_aes_decrypt_ecb_256 ENDP | 208 intel_aes_decrypt_ecb_256 ENDP |
| 205 | 209 |
| 206 | 210 |
| 207 KEY textequ <ecx> | 211 KEY textequ <rcx> |
| 208 KS textequ <edx> | 212 KS textequ <rdx> |
| 209 ITR textequ <eax> | 213 ITR textequ <r8> |
| 210 | 214 |
| 211 ALIGN 16 | |
| 212 intel_aes_encrypt_init_128 PROC | 215 intel_aes_encrypt_init_128 PROC |
| 213 | 216 |
| 214 mov KEY, [esp + 1*4 + 0*4] | |
| 215 mov KS, [esp + 1*4 + 1*4] | |
| 216 | |
| 217 | |
| 218 movdqu xmm1, [KEY] | 217 movdqu xmm1, [KEY] |
| 219 movdqu [KS], xmm1 | 218 movdqu [KS], xmm1 |
| 220 movdqa xmm2, xmm1 | 219 movdqa xmm2, xmm1 |
| 221 | 220 |
| 222 lea ITR, Lcon1 | 221 lea ITR, Lcon1 |
| 223 movdqa xmm0, [ITR] | 222 movdqa xmm0, [ITR] |
| 224 lea ITR, Lmask | 223 lea ITR, Lmask |
| 225 movdqa xmm4, [ITR] | 224 movdqa xmm4, [ITR] |
| 226 | 225 |
| 227 mov ITR, 8 | 226 mov ITR, 8 |
| (...skipping 45 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 273 pslldq xmm3, 4 | 272 pslldq xmm3, 4 |
| 274 pxor xmm1, xmm3 | 273 pxor xmm1, xmm3 |
| 275 pxor xmm1, xmm2 | 274 pxor xmm1, xmm2 |
| 276 movdqu [32 + KS], xmm1 | 275 movdqu [32 + KS], xmm1 |
| 277 movdqa xmm2, xmm1 | 276 movdqa xmm2, xmm1 |
| 278 | 277 |
| 279 ret | 278 ret |
| 280 intel_aes_encrypt_init_128 ENDP | 279 intel_aes_encrypt_init_128 ENDP |
| 281 | 280 |
| 282 | 281 |
| 283 ALIGN 16 | |
| 284 intel_aes_decrypt_init_128 PROC | 282 intel_aes_decrypt_init_128 PROC |
| 285 | 283 |
| 286 mov KEY, [esp + 1*4 + 0*4] | |
| 287 mov KS, [esp + 1*4 + 1*4] | |
| 288 | |
| 289 push KS | 284 push KS |
| 290 push KEY | 285 push KEY |
| 291 | 286 |
| 292 call intel_aes_encrypt_init_128 | 287 call intel_aes_encrypt_init_128 |
| 293 | 288 |
| 294 pop KEY | 289 pop KEY |
| 295 pop KS | 290 pop KS |
| 296 | 291 |
| 297 movdqu xmm0, [0*16 + KS] | 292 movdqu xmm0, [0*16 + KS] |
| 298 movdqu xmm1, [10*16 + KS] | 293 movdqu xmm1, [10*16 + KS] |
| (...skipping 14 matching lines...) Expand all Loading... |
| 313 i = i+1 | 308 i = i+1 |
| 314 ENDM | 309 ENDM |
| 315 | 310 |
| 316 movdqu xmm0, [5*16 + KS] | 311 movdqu xmm0, [5*16 + KS] |
| 317 aesimc xmm0, xmm0 | 312 aesimc xmm0, xmm0 |
| 318 movdqu [5*16 + KS], xmm0 | 313 movdqu [5*16 + KS], xmm0 |
| 319 ret | 314 ret |
| 320 intel_aes_decrypt_init_128 ENDP | 315 intel_aes_decrypt_init_128 ENDP |
| 321 | 316 |
| 322 | 317 |
| 323 ALIGN 16 | |
| 324 intel_aes_encrypt_init_192 PROC | 318 intel_aes_encrypt_init_192 PROC |
| 325 | 319 |
| 326 mov KEY, [esp + 1*4 + 0*4] | 320 sub rsp, 16*2 |
| 327 mov KS, [esp + 1*4 + 1*4] | 321 movdqu [16*0 + rsp], xmm6 |
| 322 movdqu [16*1 + rsp], xmm7 |
| 328 | 323 |
| 329 pxor xmm3, xmm3 | |
| 330 movdqu xmm1, [KEY] | 324 movdqu xmm1, [KEY] |
| 331 pinsrd xmm3, DWORD PTR [16 + KEY], 0 | 325 mov ITR, [16 + KEY] |
| 332 pinsrd xmm3, DWORD PTR [20 + KEY], 1 | 326 movd xmm3, ITR |
| 333 | 327 |
| 334 movdqu [KS], xmm1 | 328 movdqu [KS], xmm1 |
| 335 movdqa xmm5, xmm3 | 329 movdqa xmm5, xmm3 |
| 336 | 330 |
| 337 lea ITR, Lcon1 | 331 lea ITR, Lcon1 |
| 338 movdqu xmm0, [ITR] | 332 movdqu xmm0, [ITR] |
| 339 lea ITR, Lmask192 | 333 lea ITR, Lmask192 |
| 340 movdqu xmm4, [ITR] | 334 movdqu xmm4, [ITR] |
| 341 | 335 |
| 342 mov ITR, 4 | 336 mov ITR, 4 |
| (...skipping 46 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 389 | 383 |
| 390 movdqu [48 + KS], xmm1 | 384 movdqu [48 + KS], xmm1 |
| 391 movdqa xmm5, xmm3 | 385 movdqa xmm5, xmm3 |
| 392 | 386 |
| 393 lea KS, [48 + KS] | 387 lea KS, [48 + KS] |
| 394 | 388 |
| 395 dec ITR | 389 dec ITR |
| 396 jnz Lenc_192_ks_loop | 390 jnz Lenc_192_ks_loop |
| 397 | 391 |
| 398 movdqu [16 + KS], xmm5 | 392 movdqu [16 + KS], xmm5 |
| 399 ret | 393 |
| 394 movdqu xmm7, [16*1 + rsp] |
| 395 movdqu xmm6, [16*0 + rsp] |
| 396 add rsp, 16*2 |
| 397 ret |
| 400 intel_aes_encrypt_init_192 ENDP | 398 intel_aes_encrypt_init_192 ENDP |
| 401 | 399 |
| 402 ALIGN 16 | |
| 403 intel_aes_decrypt_init_192 PROC | 400 intel_aes_decrypt_init_192 PROC |
| 404 mov KEY, [esp + 1*4 + 0*4] | |
| 405 mov KS, [esp + 1*4 + 1*4] | |
| 406 | |
| 407 push KS | 401 push KS |
| 408 push KEY | 402 push KEY |
| 409 | 403 |
| 410 call intel_aes_encrypt_init_192 | 404 call intel_aes_encrypt_init_192 |
| 411 | 405 |
| 412 pop KEY | 406 pop KEY |
| 413 pop KS | 407 pop KS |
| 414 | 408 |
| 415 movdqu xmm0, [0*16 + KS] | 409 movdqu xmm0, [0*16 + KS] |
| 416 movdqu xmm1, [12*16 + KS] | 410 movdqu xmm1, [12*16 + KS] |
| (...skipping 13 matching lines...) Expand all Loading... |
| 430 | 424 |
| 431 i = i+1 | 425 i = i+1 |
| 432 ENDM | 426 ENDM |
| 433 | 427 |
| 434 movdqu xmm0, [6*16 + KS] | 428 movdqu xmm0, [6*16 + KS] |
| 435 aesimc xmm0, xmm0 | 429 aesimc xmm0, xmm0 |
| 436 movdqu [6*16 + KS], xmm0 | 430 movdqu [6*16 + KS], xmm0 |
| 437 ret | 431 ret |
| 438 intel_aes_decrypt_init_192 ENDP | 432 intel_aes_decrypt_init_192 ENDP |
| 439 | 433 |
| 440 ALIGN 16 | 434 |
| 441 intel_aes_encrypt_init_256 PROC | 435 intel_aes_encrypt_init_256 PROC |
| 436 sub rsp, 16*2 |
| 437 movdqu [16*0 + rsp], xmm6 |
| 438 movdqu [16*1 + rsp], xmm7 |
| 442 | 439 |
| 443 mov KEY, [esp + 1*4 + 0*4] | |
| 444 mov KS, [esp + 1*4 + 1*4] | |
| 445 movdqu xmm1, [16*0 + KEY] | 440 movdqu xmm1, [16*0 + KEY] |
| 446 movdqu xmm3, [16*1 + KEY] | 441 movdqu xmm3, [16*1 + KEY] |
| 447 | 442 |
| 448 movdqu [16*0 + KS], xmm1 | 443 movdqu [16*0 + KS], xmm1 |
| 449 movdqu [16*1 + KS], xmm3 | 444 movdqu [16*1 + KS], xmm3 |
| 450 | 445 |
| 451 lea ITR, Lcon1 | 446 lea ITR, Lcon1 |
| 452 movdqu xmm0, [ITR] | 447 movdqu xmm0, [ITR] |
| 453 lea ITR, Lmask256 | 448 lea ITR, Lmask256 |
| 454 movdqu xmm5, [ITR] | 449 movdqu xmm5, [ITR] |
| (...skipping 40 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 495 movdqa xmm4, xmm1 | 490 movdqa xmm4, xmm1 |
| 496 pslldq xmm4, 4 | 491 pslldq xmm4, 4 |
| 497 pxor xmm1, xmm4 | 492 pxor xmm1, xmm4 |
| 498 pslldq xmm4, 4 | 493 pslldq xmm4, 4 |
| 499 pxor xmm1, xmm4 | 494 pxor xmm1, xmm4 |
| 500 pslldq xmm4, 4 | 495 pslldq xmm4, 4 |
| 501 pxor xmm1, xmm4 | 496 pxor xmm1, xmm4 |
| 502 pxor xmm1, xmm2 | 497 pxor xmm1, xmm2 |
| 503 movdqu [16*2 + KS], xmm1 | 498 movdqu [16*2 + KS], xmm1 |
| 504 | 499 |
| 500 movdqu xmm7, [16*1 + rsp] |
| 501 movdqu xmm6, [16*0 + rsp] |
| 502 add rsp, 16*2 |
| 505 ret | 503 ret |
| 504 |
| 506 intel_aes_encrypt_init_256 ENDP | 505 intel_aes_encrypt_init_256 ENDP |
| 507 | 506 |
| 508 ALIGN 16 | 507 |
| 509 intel_aes_decrypt_init_256 PROC | 508 intel_aes_decrypt_init_256 PROC |
| 510 mov KEY, [esp + 1*4 + 0*4] | |
| 511 mov KS, [esp + 1*4 + 1*4] | |
| 512 | |
| 513 push KS | 509 push KS |
| 514 push KEY | 510 push KEY |
| 515 | 511 |
| 516 call intel_aes_encrypt_init_256 | 512 call intel_aes_encrypt_init_256 |
| 517 | 513 |
| 518 pop KEY | 514 pop KEY |
| 519 pop KS | 515 pop KS |
| 520 | 516 |
| 521 movdqu xmm0, [0*16 + KS] | 517 movdqu xmm0, [0*16 + KS] |
| 522 movdqu xmm1, [14*16 + KS] | 518 movdqu xmm1, [14*16 + KS] |
| (...skipping 20 matching lines...) Expand all Loading... |
| 543 ret | 539 ret |
| 544 intel_aes_decrypt_init_256 ENDP | 540 intel_aes_decrypt_init_256 ENDP |
| 545 | 541 |
| 546 | 542 |
| 547 | 543 |
| 548 gen_aes_cbc_enc_func MACRO rnds | 544 gen_aes_cbc_enc_func MACRO rnds |
| 549 | 545 |
| 550 LOCAL loop1 | 546 LOCAL loop1 |
| 551 LOCAL bail | 547 LOCAL bail |
| 552 | 548 |
| 553 push inputLen | 549 mov input, [rsp + 1*8 + 8*4] |
| 550 mov inputLen, [rsp + 1*8 + 8*5] |
| 554 | 551 |
| 555 mov ctx, [esp + 2*4 + 0*4] | 552 sub rsp, 3*16 |
| 556 mov output, [esp + 2*4 + 1*4] | |
| 557 mov input, [esp + 2*4 + 4*4] | |
| 558 mov inputLen, [esp + 2*4 + 5*4] | |
| 559 | 553 |
| 560 lea ctx, [44+ctx] | 554 movdqu [rsp + 0*16], xmm6 |
| 555 movdqu [rsp + 1*16], xmm7 |
| 556 movdqu [rsp + 2*16], xmm8 |
| 557 |
| 558 lea ctx, [48+ctx] |
| 561 | 559 |
| 562 movdqu xmm0, [-32+ctx] | 560 movdqu xmm0, [-32+ctx] |
| 563 | 561 |
| 564 movdqu xmm2, [0*16 + ctx] | 562 movdqu xmm2, [0*16 + ctx] |
| 565 movdqu xmm3, [1*16 + ctx] | 563 movdqu xmm3, [1*16 + ctx] |
| 566 movdqu xmm4, [2*16 + ctx] | 564 movdqu xmm4, [2*16 + ctx] |
| 567 movdqu xmm5, [3*16 + ctx] | 565 movdqu xmm5, [3*16 + ctx] |
| 568 movdqu xmm6, [4*16 + ctx] | 566 movdqu xmm6, [4*16 + ctx] |
| 567 movdqu xmm7, [5*16 + ctx] |
| 569 | 568 |
| 570 loop1: | 569 loop1: |
| 571 cmp inputLen, 1*16 | 570 cmp inputLen, 1*16 |
| 572 jb bail | 571 jb bail |
| 573 | 572 |
| 574 movdqu xmm1, [input] | 573 movdqu xmm1, [input] |
| 575 pxor xmm1, xmm2 | 574 pxor xmm1, xmm2 |
| 576 pxor xmm0, xmm1 | 575 pxor xmm0, xmm1 |
| 577 | 576 |
| 578 aesenc xmm0, xmm3 | 577 aesenc xmm0, xmm3 |
| 579 aesenc xmm0, xmm4 | 578 aesenc xmm0, xmm4 |
| 580 aesenc xmm0, xmm5 | 579 aesenc xmm0, xmm5 |
| 581 aesenc xmm0, xmm6 | 580 aesenc xmm0, xmm6 |
| 581 aesenc xmm0, xmm7 |
| 582 | 582 |
| 583 i = 5 | 583 i = 6 |
| 584 WHILE i LT rnds | 584 WHILE i LT rnds |
| 585 movdqu xmm7, [i*16 + ctx] | 585 movdqu xmm8, [i*16 + ctx] |
| 586 aesenc xmm0, xmm7 | 586 aesenc xmm0, xmm8 |
| 587 i = i+1 | 587 i = i+1 |
| 588 ENDM | 588 ENDM |
| 589 movdqu xmm7, [rnds*16 + ctx] | 589 movdqu xmm8, [rnds*16 + ctx] |
| 590 aesenclast xmm0, xmm7 | 590 aesenclast xmm0, xmm8 |
| 591 | 591 |
| 592 movdqu [output], xmm0 | 592 movdqu [output], xmm0 |
| 593 | 593 |
| 594 lea input, [1*16 + input] | 594 lea input, [1*16 + input] |
| 595 lea output, [1*16 + output] | 595 lea output, [1*16 + output] |
| 596 sub inputLen, 1*16 | 596 sub inputLen, 1*16 |
| 597 jmp loop1 | 597 jmp loop1 |
| 598 | 598 |
| 599 bail: | 599 bail: |
| 600 movdqu [-32+ctx], xmm0 | 600 movdqu [-32+ctx], xmm0 |
| 601 | 601 |
| 602 xor eax, eax | 602 xor rax, rax |
| 603 pop inputLen | 603 |
| 604 movdqu xmm6, [rsp + 0*16] |
| 605 movdqu xmm7, [rsp + 1*16] |
| 606 movdqu xmm8, [rsp + 2*16] |
| 607 add rsp, 3*16 |
| 604 ret | 608 ret |
| 605 | 609 |
| 606 ENDM | 610 ENDM |
| 607 | 611 |
| 608 gen_aes_cbc_dec_func MACRO rnds | 612 gen_aes_cbc_dec_func MACRO rnds |
| 609 | 613 |
| 610 LOCAL loop7 | 614 LOCAL loop8 |
| 611 LOCAL loop1 | 615 LOCAL loop1 |
| 612 LOCAL dec1 | 616 LOCAL dec1 |
| 613 LOCAL bail | 617 LOCAL bail |
| 614 | 618 |
| 615 push inputLen | 619 mov input, [rsp + 1*8 + 8*4] |
| 620 mov inputLen, [rsp + 1*8 + 8*5] |
| 616 | 621 |
| 617 mov ctx, [esp + 2*4 + 0*4] | 622 sub rsp, 3*16 |
| 618 mov output, [esp + 2*4 + 1*4] | |
| 619 mov input, [esp + 2*4 + 4*4] | |
| 620 mov inputLen, [esp + 2*4 + 5*4] | |
| 621 | 623 |
| 622 lea ctx, [44+ctx] | 624 movdqu [rsp + 0*16], xmm6 |
| 625 movdqu [rsp + 1*16], xmm7 |
| 626 movdqu [rsp + 2*16], xmm8 |
| 623 | 627 |
| 624 loop7: | 628 lea ctx, [48+ctx] |
| 625 cmp inputLen, 7*16 | 629 |
| 630 loop8: |
| 631 cmp inputLen, 8*16 |
| 626 jb dec1 | 632 jb dec1 |
| 627 | 633 |
| 628 movdqu xmm0, [0*16 + input] | 634 movdqu xmm0, [0*16 + input] |
| 629 movdqu xmm1, [1*16 + input] | 635 movdqu xmm1, [1*16 + input] |
| 630 movdqu xmm2, [2*16 + input] | 636 movdqu xmm2, [2*16 + input] |
| 631 movdqu xmm3, [3*16 + input] | 637 movdqu xmm3, [3*16 + input] |
| 632 movdqu xmm4, [4*16 + input] | 638 movdqu xmm4, [4*16 + input] |
| 633 movdqu xmm5, [5*16 + input] | 639 movdqu xmm5, [5*16 + input] |
| 634 movdqu xmm6, [6*16 + input] | 640 movdqu xmm6, [6*16 + input] |
| 641 movdqu xmm7, [7*16 + input] |
| 635 | 642 |
| 636 movdqu xmm7, [0*16 + ctx] | 643 movdqu xmm8, [0*16 + ctx] |
| 637 pxor xmm0, xmm7 | 644 pxor xmm0, xmm8 |
| 638 pxor xmm1, xmm7 | 645 pxor xmm1, xmm8 |
| 639 pxor xmm2, xmm7 | 646 pxor xmm2, xmm8 |
| 640 pxor xmm3, xmm7 | 647 pxor xmm3, xmm8 |
| 641 pxor xmm4, xmm7 | 648 pxor xmm4, xmm8 |
| 642 pxor xmm5, xmm7 | 649 pxor xmm5, xmm8 |
| 643 pxor xmm6, xmm7 | 650 pxor xmm6, xmm8 |
| 651 pxor xmm7, xmm8 |
| 644 | 652 |
| 645 i = 1 | 653 i = 1 |
| 646 WHILE i LT rnds | 654 WHILE i LT rnds |
| 647 aes_dec_rnd i | 655 aes_dec_rnd i |
| 648 i = i+1 | 656 i = i+1 |
| 649 ENDM | 657 ENDM |
| 650 aes_dec_last_rnd rnds | 658 aes_dec_last_rnd rnds |
| 651 | 659 |
| 652 movdqu xmm7, [-32 + ctx] | 660 movdqu xmm8, [-32 + ctx] |
| 653 pxor xmm0, xmm7 | 661 pxor xmm0, xmm8 |
| 654 movdqu xmm7, [0*16 + input] | 662 movdqu xmm8, [0*16 + input] |
| 655 pxor xmm1, xmm7 | 663 pxor xmm1, xmm8 |
| 656 movdqu xmm7, [1*16 + input] | 664 movdqu xmm8, [1*16 + input] |
| 657 pxor xmm2, xmm7 | 665 pxor xmm2, xmm8 |
| 658 movdqu xmm7, [2*16 + input] | 666 movdqu xmm8, [2*16 + input] |
| 659 pxor xmm3, xmm7 | 667 pxor xmm3, xmm8 |
| 660 movdqu xmm7, [3*16 + input] | 668 movdqu xmm8, [3*16 + input] |
| 661 pxor xmm4, xmm7 | 669 pxor xmm4, xmm8 |
| 662 movdqu xmm7, [4*16 + input] | 670 movdqu xmm8, [4*16 + input] |
| 663 pxor xmm5, xmm7 | 671 pxor xmm5, xmm8 |
| 664 movdqu xmm7, [5*16 + input] | 672 movdqu xmm8, [5*16 + input] |
| 665 pxor xmm6, xmm7 | 673 pxor xmm6, xmm8 |
| 666 movdqu xmm7, [6*16 + input] | 674 movdqu xmm8, [6*16 + input] |
| 675 pxor xmm7, xmm8 |
| 676 movdqu xmm8, [7*16 + input] |
| 667 | 677 |
| 668 movdqu [0*16 + output], xmm0 | 678 movdqu [0*16 + output], xmm0 |
| 669 movdqu [1*16 + output], xmm1 | 679 movdqu [1*16 + output], xmm1 |
| 670 movdqu [2*16 + output], xmm2 | 680 movdqu [2*16 + output], xmm2 |
| 671 movdqu [3*16 + output], xmm3 | 681 movdqu [3*16 + output], xmm3 |
| 672 movdqu [4*16 + output], xmm4 | 682 movdqu [4*16 + output], xmm4 |
| 673 movdqu [5*16 + output], xmm5 | 683 movdqu [5*16 + output], xmm5 |
| 674 movdqu [6*16 + output], xmm6 | 684 movdqu [6*16 + output], xmm6 |
| 675 movdqu [-32 + ctx], xmm7 | 685 movdqu [7*16 + output], xmm7 |
| 686 movdqu [-32 + ctx], xmm8 |
| 676 | 687 |
| 677 lea input, [7*16 + input] | 688 lea input, [8*16 + input] |
| 678 lea output, [7*16 + output] | 689 lea output, [8*16 + output] |
| 679 sub inputLen, 7*16 | 690 sub inputLen, 8*16 |
| 680 jmp loop7 | 691 jmp loop8 |
| 681 dec1: | 692 dec1: |
| 682 | 693 |
| 683 movdqu xmm3, [-32 + ctx] | 694 movdqu xmm3, [-32 + ctx] |
| 684 | 695 |
| 685 loop1: | 696 loop1: |
| 686 cmp inputLen, 1*16 | 697 cmp inputLen, 1*16 |
| 687 jb bail | 698 jb bail |
| 688 | 699 |
| 689 movdqu xmm0, [input] | 700 movdqu xmm0, [input] |
| 690 movdqa xmm4, xmm0 | 701 movdqa xmm4, xmm0 |
| (...skipping 13 matching lines...) Expand all Loading... |
| 704 movdqu [output], xmm3 | 715 movdqu [output], xmm3 |
| 705 movdqa xmm3, xmm4 | 716 movdqa xmm3, xmm4 |
| 706 | 717 |
| 707 lea input, [1*16 + input] | 718 lea input, [1*16 + input] |
| 708 lea output, [1*16 + output] | 719 lea output, [1*16 + output] |
| 709 sub inputLen, 1*16 | 720 sub inputLen, 1*16 |
| 710 jmp loop1 | 721 jmp loop1 |
| 711 | 722 |
| 712 bail: | 723 bail: |
| 713 movdqu [-32 + ctx], xmm3 | 724 movdqu [-32 + ctx], xmm3 |
| 714 xor eax, eax | 725 xor rax, rax |
| 715 pop inputLen | 726 |
| 727 movdqu xmm6, [rsp + 0*16] |
| 728 movdqu xmm7, [rsp + 1*16] |
| 729 movdqu xmm8, [rsp + 2*16] |
| 730 add rsp, 3*16 |
| 716 ret | 731 ret |
| 717 ENDM | 732 ENDM |
| 718 | 733 |
| 719 ALIGN 16 | |
| 720 intel_aes_encrypt_cbc_128 PROC | 734 intel_aes_encrypt_cbc_128 PROC |
| 721 gen_aes_cbc_enc_func 10 | 735 gen_aes_cbc_enc_func 10 |
| 722 intel_aes_encrypt_cbc_128 ENDP | 736 intel_aes_encrypt_cbc_128 ENDP |
| 723 | 737 |
| 724 ALIGN 16 | |
| 725 intel_aes_encrypt_cbc_192 PROC | 738 intel_aes_encrypt_cbc_192 PROC |
| 726 gen_aes_cbc_enc_func 12 | 739 gen_aes_cbc_enc_func 12 |
| 727 intel_aes_encrypt_cbc_192 ENDP | 740 intel_aes_encrypt_cbc_192 ENDP |
| 728 | 741 |
| 729 ALIGN 16 | |
| 730 intel_aes_encrypt_cbc_256 PROC | 742 intel_aes_encrypt_cbc_256 PROC |
| 731 gen_aes_cbc_enc_func 14 | 743 gen_aes_cbc_enc_func 14 |
| 732 intel_aes_encrypt_cbc_256 ENDP | 744 intel_aes_encrypt_cbc_256 ENDP |
| 733 | 745 |
| 734 ALIGN 16 | |
| 735 intel_aes_decrypt_cbc_128 PROC | 746 intel_aes_decrypt_cbc_128 PROC |
| 736 gen_aes_cbc_dec_func 10 | 747 gen_aes_cbc_dec_func 10 |
| 737 intel_aes_decrypt_cbc_128 ENDP | 748 intel_aes_decrypt_cbc_128 ENDP |
| 738 | 749 |
| 739 ALIGN 16 | |
| 740 intel_aes_decrypt_cbc_192 PROC | 750 intel_aes_decrypt_cbc_192 PROC |
| 741 gen_aes_cbc_dec_func 12 | 751 gen_aes_cbc_dec_func 12 |
| 742 intel_aes_decrypt_cbc_192 ENDP | 752 intel_aes_decrypt_cbc_192 ENDP |
| 743 | 753 |
| 744 ALIGN 16 | |
| 745 intel_aes_decrypt_cbc_256 PROC | 754 intel_aes_decrypt_cbc_256 PROC |
| 746 gen_aes_cbc_dec_func 14 | 755 gen_aes_cbc_dec_func 14 |
| 747 intel_aes_decrypt_cbc_256 ENDP | 756 intel_aes_decrypt_cbc_256 ENDP |
| 748 | 757 |
| 749 | 758 |
| 750 | 759 |
| 751 ctrCtx textequ <esi> | 760 ctrCtx textequ <r10> |
| 752 CTR textequ <ebx> | 761 CTR textequ <r11d> |
| 762 CTRSave textequ <eax> |
| 753 | 763 |
| 754 gen_aes_ctr_func MACRO rnds | 764 gen_aes_ctr_func MACRO rnds |
| 755 | 765 |
| 756 LOCAL loop7 | 766 LOCAL loop8 |
| 757 LOCAL loop1 | 767 LOCAL loop1 |
| 758 LOCAL enc1 | 768 LOCAL enc1 |
| 759 LOCAL bail | 769 LOCAL bail |
| 760 | 770 |
| 761 push inputLen | 771 mov input, [rsp + 8*1 + 4*8] |
| 762 push ctrCtx | 772 mov inputLen, [rsp + 8*1 + 5*8] |
| 763 push CTR | |
| 764 push ebp | |
| 765 | 773 |
| 766 mov ctrCtx, [esp + 4*5 + 0*4] | 774 mov ctrCtx, ctx |
| 767 mov output, [esp + 4*5 + 1*4] | 775 mov ctx, [8+ctrCtx] |
| 768 mov input, [esp + 4*5 + 4*4] | 776 lea ctx, [48+ctx] |
| 769 mov inputLen, [esp + 4*5 + 5*4] | |
| 770 | 777 |
| 771 mov ctx, [4+ctrCtx] | 778 sub rsp, 3*16 |
| 772 lea ctx, [44+ctx] | 779 movdqu [rsp + 0*16], xmm6 |
| 780 movdqu [rsp + 1*16], xmm7 |
| 781 movdqu [rsp + 2*16], xmm8 |
| 773 | 782 |
| 774 mov ebp, esp | |
| 775 sub esp, 7*16 | |
| 776 and esp, -16 | |
| 777 | 783 |
| 778 movdqu xmm0, [8+ctrCtx] | 784 push rbp |
| 779 mov ctrCtx, [ctrCtx + 8 + 3*4] | 785 mov rbp, rsp |
| 780 bswap ctrCtx | 786 sub rsp, 8*16 |
| 787 and rsp, -16 |
| 788 |
| 789 |
| 790 movdqu xmm0, [16+ctrCtx] |
| 791 mov CTRSave, DWORD PTR [ctrCtx + 16 + 3*4] |
| 792 bswap CTRSave |
| 781 movdqu xmm1, [ctx + 0*16] | 793 movdqu xmm1, [ctx + 0*16] |
| 782 | 794 |
| 783 pxor xmm0, xmm1 | 795 pxor xmm0, xmm1 |
| 784 | 796 |
| 785 movdqa [esp + 0*16], xmm0 | 797 movdqa [rsp + 0*16], xmm0 |
| 786 movdqa [esp + 1*16], xmm0 | 798 movdqa [rsp + 1*16], xmm0 |
| 787 movdqa [esp + 2*16], xmm0 | 799 movdqa [rsp + 2*16], xmm0 |
| 788 movdqa [esp + 3*16], xmm0 | 800 movdqa [rsp + 3*16], xmm0 |
| 789 movdqa [esp + 4*16], xmm0 | 801 movdqa [rsp + 4*16], xmm0 |
| 790 movdqa [esp + 5*16], xmm0 | 802 movdqa [rsp + 5*16], xmm0 |
| 791 movdqa [esp + 6*16], xmm0 | 803 movdqa [rsp + 6*16], xmm0 |
| 804 movdqa [rsp + 7*16], xmm0 |
| 792 | 805 |
| 793 inc ctrCtx | 806 inc CTRSave |
| 794 mov CTR, ctrCtx | 807 mov CTR, CTRSave |
| 795 bswap CTR | 808 bswap CTR |
| 796 xor CTR, [ctx + 3*4] | 809 xor CTR, DWORD PTR [ctx + 3*4] |
| 797 mov [esp + 1*16 + 3*4], CTR | 810 mov DWORD PTR [rsp + 1*16 + 3*4], CTR |
| 798 | 811 |
| 799 inc ctrCtx | 812 inc CTRSave |
| 800 mov CTR, ctrCtx | 813 mov CTR, CTRSave |
| 801 bswap CTR | 814 bswap CTR |
| 802 xor CTR, [ctx + 3*4] | 815 xor CTR, DWORD PTR [ctx + 3*4] |
| 803 mov [esp + 2*16 + 3*4], CTR | 816 mov DWORD PTR [rsp + 2*16 + 3*4], CTR |
| 804 | 817 |
| 805 inc ctrCtx | 818 inc CTRSave |
| 806 mov CTR, ctrCtx | 819 mov CTR, CTRSave |
| 807 bswap CTR | 820 bswap CTR |
| 808 xor CTR, [ctx + 3*4] | 821 xor CTR, DWORD PTR [ctx + 3*4] |
| 809 mov [esp + 3*16 + 3*4], CTR | 822 mov DWORD PTR [rsp + 3*16 + 3*4], CTR |
| 810 | 823 |
| 811 inc ctrCtx | 824 inc CTRSave |
| 812 mov CTR, ctrCtx | 825 mov CTR, CTRSave |
| 813 bswap CTR | 826 bswap CTR |
| 814 xor CTR, [ctx + 3*4] | 827 xor CTR, DWORD PTR [ctx + 3*4] |
| 815 mov [esp + 4*16 + 3*4], CTR | 828 mov DWORD PTR [rsp + 4*16 + 3*4], CTR |
| 816 | 829 |
| 817 inc ctrCtx | 830 inc CTRSave |
| 818 mov CTR, ctrCtx | 831 mov CTR, CTRSave |
| 819 bswap CTR | 832 bswap CTR |
| 820 xor CTR, [ctx + 3*4] | 833 xor CTR, DWORD PTR [ctx + 3*4] |
| 821 mov [esp + 5*16 + 3*4], CTR | 834 mov DWORD PTR [rsp + 5*16 + 3*4], CTR |
| 822 | 835 |
| 823 inc ctrCtx | 836 inc CTRSave |
| 824 mov CTR, ctrCtx | 837 mov CTR, CTRSave |
| 825 bswap CTR | 838 bswap CTR |
| 826 xor CTR, [ctx + 3*4] | 839 xor CTR, DWORD PTR [ctx + 3*4] |
| 827 mov [esp + 6*16 + 3*4], CTR | 840 mov DWORD PTR [rsp + 6*16 + 3*4], CTR |
| 841 |
| 842 inc CTRSave |
| 843 mov CTR, CTRSave |
| 844 bswap CTR |
| 845 xor CTR, DWORD PTR [ctx + 3*4] |
| 846 mov DWORD PTR [rsp + 7*16 + 3*4], CTR |
| 828 | 847 |
| 829 | 848 |
| 830 loop7: | 849 loop8: |
| 831 cmp inputLen, 7*16 | 850 cmp inputLen, 8*16 |
| 832 jb loop1 | 851 jb loop1 |
| 833 | 852 |
| 834 movdqu xmm0, [0*16 + esp] | 853 movdqu xmm0, [0*16 + rsp] |
| 835 movdqu xmm1, [1*16 + esp] | 854 movdqu xmm1, [1*16 + rsp] |
| 836 movdqu xmm2, [2*16 + esp] | 855 movdqu xmm2, [2*16 + rsp] |
| 837 movdqu xmm3, [3*16 + esp] | 856 movdqu xmm3, [3*16 + rsp] |
| 838 movdqu xmm4, [4*16 + esp] | 857 movdqu xmm4, [4*16 + rsp] |
| 839 movdqu xmm5, [5*16 + esp] | 858 movdqu xmm5, [5*16 + rsp] |
| 840 movdqu xmm6, [6*16 + esp] | 859 movdqu xmm6, [6*16 + rsp] |
| 860 movdqu xmm7, [7*16 + rsp] |
| 841 | 861 |
| 842 i = 1 | 862 i = 1 |
| 843 WHILE i LE 7 | 863 WHILE i LE 8 |
| 844 aes_rnd i | 864 aes_rnd i |
| 845 | 865 |
| 846 inc ctrCtx | 866 inc CTRSave |
| 847 mov CTR, ctrCtx | 867 mov CTR, CTRSave |
| 848 bswap CTR | 868 bswap CTR |
| 849 xor CTR, [ctx + 3*4] | 869 xor CTR, DWORD PTR [ctx + 3*4] |
| 850 mov [esp + (i-1)*16 + 3*4], CTR | 870 mov DWORD PTR [rsp + (i-1)*16 + 3*4], CTR |
| 851 | 871 |
| 852 i = i+1 | 872 i = i+1 |
| 853 ENDM | 873 ENDM |
| 854 WHILE i LT rnds | 874 WHILE i LT rnds |
| 855 aes_rnd i | 875 aes_rnd i |
| 856 i = i+1 | 876 i = i+1 |
| 857 ENDM | 877 ENDM |
| 858 aes_last_rnd rnds | 878 aes_last_rnd rnds |
| 859 | 879 |
| 860 movdqu xmm7, [0*16 + input] | 880 movdqu xmm8, [0*16 + input] |
| 861 pxor xmm0, xmm7 | 881 pxor xmm0, xmm8 |
| 862 movdqu xmm7, [1*16 + input] | 882 movdqu xmm8, [1*16 + input] |
| 863 pxor xmm1, xmm7 | 883 pxor xmm1, xmm8 |
| 864 movdqu xmm7, [2*16 + input] | 884 movdqu xmm8, [2*16 + input] |
| 865 pxor xmm2, xmm7 | 885 pxor xmm2, xmm8 |
| 866 movdqu xmm7, [3*16 + input] | 886 movdqu xmm8, [3*16 + input] |
| 867 pxor xmm3, xmm7 | 887 pxor xmm3, xmm8 |
| 868 movdqu xmm7, [4*16 + input] | 888 movdqu xmm8, [4*16 + input] |
| 869 pxor xmm4, xmm7 | 889 pxor xmm4, xmm8 |
| 870 movdqu xmm7, [5*16 + input] | 890 movdqu xmm8, [5*16 + input] |
| 871 pxor xmm5, xmm7 | 891 pxor xmm5, xmm8 |
| 872 movdqu xmm7, [6*16 + input] | 892 movdqu xmm8, [6*16 + input] |
| 873 pxor xmm6, xmm7 | 893 pxor xmm6, xmm8 |
| 894 movdqu xmm8, [7*16 + input] |
| 895 pxor xmm7, xmm8 |
| 874 | 896 |
| 875 movdqu [0*16 + output], xmm0 | 897 movdqu [0*16 + output], xmm0 |
| 876 movdqu [1*16 + output], xmm1 | 898 movdqu [1*16 + output], xmm1 |
| 877 movdqu [2*16 + output], xmm2 | 899 movdqu [2*16 + output], xmm2 |
| 878 movdqu [3*16 + output], xmm3 | 900 movdqu [3*16 + output], xmm3 |
| 879 movdqu [4*16 + output], xmm4 | 901 movdqu [4*16 + output], xmm4 |
| 880 movdqu [5*16 + output], xmm5 | 902 movdqu [5*16 + output], xmm5 |
| 881 movdqu [6*16 + output], xmm6 | 903 movdqu [6*16 + output], xmm6 |
| 904 movdqu [7*16 + output], xmm7 |
| 882 | 905 |
| 883 lea input, [7*16 + input] | 906 lea input, [8*16 + input] |
| 884 lea output, [7*16 + output] | 907 lea output, [8*16 + output] |
| 885 sub inputLen, 7*16 | 908 sub inputLen, 8*16 |
| 886 jmp loop7 | 909 jmp loop8 |
| 887 | 910 |
| 888 | 911 |
| 889 loop1: | 912 loop1: |
| 890 cmp inputLen, 1*16 | 913 cmp inputLen, 1*16 |
| 891 jb bail | 914 jb bail |
| 892 | 915 |
| 893 movdqu xmm0, [esp] | 916 movdqu xmm0, [rsp] |
| 894 add esp, 16 | 917 add rsp, 16 |
| 895 | 918 |
| 896 i = 1 | 919 i = 1 |
| 897 WHILE i LT rnds | 920 WHILE i LT rnds |
| 898 movdqu xmm7, [i*16 + ctx] | 921 movdqu xmm7, [i*16 + ctx] |
| 899 aesenc xmm0, xmm7 | 922 aesenc xmm0, xmm7 |
| 900 i = i+1 | 923 i = i+1 |
| 901 ENDM | 924 ENDM |
| 902 movdqu xmm7, [rnds*16 + ctx] | 925 movdqu xmm7, [rnds*16 + ctx] |
| 903 aesenclast xmm0, xmm7 | 926 aesenclast xmm0, xmm7 |
| 904 | 927 |
| 905 movdqu xmm7, [input] | 928 movdqu xmm7, [input] |
| 906 pxor xmm0, xmm7 | 929 pxor xmm0, xmm7 |
| 907 movdqu [output], xmm0 | 930 movdqu [output], xmm0 |
| 908 | 931 |
| 909 lea input, [1*16 + input] | 932 lea input, [1*16 + input] |
| 910 lea output, [1*16 + output] | 933 lea output, [1*16 + output] |
| 911 sub inputLen, 1*16 | 934 sub inputLen, 1*16 |
| 912 jmp loop1 | 935 jmp loop1 |
| 913 | 936 |
| 914 bail: | 937 bail: |
| 915 | 938 |
| 916 mov ctrCtx, [ebp + 4*5 + 0*4] | 939 movdqu xmm0, [rsp] |
| 917 movdqu xmm0, [esp] | |
| 918 movdqu xmm1, [ctx + 0*16] | 940 movdqu xmm1, [ctx + 0*16] |
| 919 pxor xmm0, xmm1 | 941 pxor xmm0, xmm1 |
| 920 movdqu [8+ctrCtx], xmm0 | 942 movdqu [16+ctrCtx], xmm0 |
| 921 | 943 |
| 922 | 944 |
| 923 xor eax, eax | 945 xor rax, rax |
| 924 mov esp, ebp | 946 mov rsp, rbp |
| 925 pop ebp | 947 pop rbp |
| 926 pop CTR | 948 |
| 927 pop ctrCtx | 949 movdqu xmm6, [rsp + 0*16] |
| 928 pop inputLen | 950 movdqu xmm7, [rsp + 1*16] |
| 951 movdqu xmm8, [rsp + 2*16] |
| 952 add rsp, 3*16 |
| 953 |
| 929 ret | 954 ret |
| 930 ENDM | 955 ENDM |
| 931 | 956 |
| 932 | 957 |
| 933 ALIGN 16 | |
| 934 intel_aes_encrypt_ctr_128 PROC | 958 intel_aes_encrypt_ctr_128 PROC |
| 935 gen_aes_ctr_func 10 | 959 gen_aes_ctr_func 10 |
| 936 intel_aes_encrypt_ctr_128 ENDP | 960 intel_aes_encrypt_ctr_128 ENDP |
| 937 | 961 |
| 938 ALIGN 16 | |
| 939 intel_aes_encrypt_ctr_192 PROC | 962 intel_aes_encrypt_ctr_192 PROC |
| 940 gen_aes_ctr_func 12 | 963 gen_aes_ctr_func 12 |
| 941 intel_aes_encrypt_ctr_192 ENDP | 964 intel_aes_encrypt_ctr_192 ENDP |
| 942 | 965 |
| 943 ALIGN 16 | |
| 944 intel_aes_encrypt_ctr_256 PROC | 966 intel_aes_encrypt_ctr_256 PROC |
| 945 gen_aes_ctr_func 14 | 967 gen_aes_ctr_func 14 |
| 946 intel_aes_encrypt_ctr_256 ENDP | 968 intel_aes_encrypt_ctr_256 ENDP |
| 947 | 969 |
| 948 | 970 |
| 949 END | 971 END |
| OLD | NEW |