Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(286)

Side by Side Diff: third_party/boringssl/win-x86_64/crypto/cipher_extra/aes128gcmsiv-x86_64.asm

Issue 2869243005: Roll src/third_party/boringssl/src ddfcc6a60..1e5cb820d (Closed)
Patch Set: Created 3 years, 7 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
OLDNEW
(Empty)
1 default rel
2 %define XMMWORD
3 %define YMMWORD
4 %define ZMMWORD
5 section .data data align=8
6
7
8 ALIGN 16
9 one:
10 DQ 1,0
11 two:
12 DQ 2,0
13 three:
14 DQ 3,0
15 four:
16 DQ 4,0
17 five:
18 DQ 5,0
19 six:
20 DQ 6,0
21 seven:
22 DQ 7,0
23 eight:
24 DQ 8,0
25
26 OR_MASK:
27 DD 0x00000000,0x00000000,0x00000000,0x80000000
28 poly:
29 DQ 0x1,0xc200000000000000
30 mask:
31 DD 0x0c0f0e0d,0x0c0f0e0d,0x0c0f0e0d,0x0c0f0e0d
32 con1:
33 DD 1,1,1,1
34 con2:
35 DD 0x1b,0x1b,0x1b,0x1b
36 con3:
37 DB -1,-1,-1,-1,-1,-1,-1,-1,4,5,6,7,4,5,6,7
38 and_mask:
39 DD 0,0xffffffff,0xffffffff,0xffffffff
40 section .text code align=64
41
42
43 ALIGN 16
44 GFMUL:
45
46 vpclmulqdq xmm2,xmm0,xmm1,0x00
47 vpclmulqdq xmm5,xmm0,xmm1,0x11
48 vpclmulqdq xmm3,xmm0,xmm1,0x10
49 vpclmulqdq xmm4,xmm0,xmm1,0x01
50 vpxor xmm3,xmm3,xmm4
51 vpslldq xmm4,xmm3,8
52 vpsrldq xmm3,xmm3,8
53 vpxor xmm2,xmm2,xmm4
54 vpxor xmm5,xmm5,xmm3
55
56 vpclmulqdq xmm3,xmm2,XMMWORD[poly],0x10
57 vpshufd xmm4,xmm2,78
58 vpxor xmm2,xmm3,xmm4
59
60 vpclmulqdq xmm3,xmm2,XMMWORD[poly],0x10
61 vpshufd xmm4,xmm2,78
62 vpxor xmm2,xmm3,xmm4
63
64 vpxor xmm0,xmm2,xmm5
65 DB 0F3h,0C3h ;repret
66
67
68 global aesgcmsiv_htable_init
69
70 ALIGN 16
71 aesgcmsiv_htable_init:
72 mov QWORD[8+rsp],rdi ;WIN64 prologue
73 mov QWORD[16+rsp],rsi
74 mov rax,rsp
75 $L$SEH_begin_aesgcmsiv_htable_init:
76 mov rdi,rcx
77 mov rsi,rdx
78
79
80
81 vmovdqa xmm0,XMMWORD[rsi]
82 vmovdqa xmm1,xmm0
83 vmovdqa XMMWORD[rdi],xmm0
84 call GFMUL
85 vmovdqa XMMWORD[16+rdi],xmm0
86 call GFMUL
87 vmovdqa XMMWORD[32+rdi],xmm0
88 call GFMUL
89 vmovdqa XMMWORD[48+rdi],xmm0
90 call GFMUL
91 vmovdqa XMMWORD[64+rdi],xmm0
92 call GFMUL
93 vmovdqa XMMWORD[80+rdi],xmm0
94 call GFMUL
95 vmovdqa XMMWORD[96+rdi],xmm0
96 call GFMUL
97 vmovdqa XMMWORD[112+rdi],xmm0
98 mov rdi,QWORD[8+rsp] ;WIN64 epilogue
99 mov rsi,QWORD[16+rsp]
100 DB 0F3h,0C3h ;repret
101
102 $L$SEH_end_aesgcmsiv_htable_init:
103 global aesgcmsiv_htable6_init
104
105 ALIGN 16
106 aesgcmsiv_htable6_init:
107 mov QWORD[8+rsp],rdi ;WIN64 prologue
108 mov QWORD[16+rsp],rsi
109 mov rax,rsp
110 $L$SEH_begin_aesgcmsiv_htable6_init:
111 mov rdi,rcx
112 mov rsi,rdx
113
114
115
116 vmovdqa xmm0,XMMWORD[rsi]
117 vmovdqa xmm1,xmm0
118 vmovdqa XMMWORD[rdi],xmm0
119 call GFMUL
120 vmovdqa XMMWORD[16+rdi],xmm0
121 call GFMUL
122 vmovdqa XMMWORD[32+rdi],xmm0
123 call GFMUL
124 vmovdqa XMMWORD[48+rdi],xmm0
125 call GFMUL
126 vmovdqa XMMWORD[64+rdi],xmm0
127 call GFMUL
128 vmovdqa XMMWORD[80+rdi],xmm0
129 mov rdi,QWORD[8+rsp] ;WIN64 epilogue
130 mov rsi,QWORD[16+rsp]
131 DB 0F3h,0C3h ;repret
132
133 $L$SEH_end_aesgcmsiv_htable6_init:
134 global aesgcmsiv_htable_polyval
135
136 ALIGN 16
137 aesgcmsiv_htable_polyval:
138 mov QWORD[8+rsp],rdi ;WIN64 prologue
139 mov QWORD[16+rsp],rsi
140 mov rax,rsp
141 $L$SEH_begin_aesgcmsiv_htable_polyval:
142 mov rdi,rcx
143 mov rsi,rdx
144 mov rdx,r8
145 mov rcx,r9
146
147
148
149 test rdx,rdx
150 jnz NEAR $L$htable_polyval_start
151 mov rdi,QWORD[8+rsp] ;WIN64 epilogue
152 mov rsi,QWORD[16+rsp]
153 DB 0F3h,0C3h ;repret
154
155 $L$htable_polyval_start:
156 vzeroall
157
158
159
160 mov r11,rdx
161 and r11,127
162
163 jz NEAR $L$htable_polyval_no_prefix
164
165 vpxor xmm9,xmm9,xmm9
166 vmovdqa xmm1,XMMWORD[rcx]
167 sub rdx,r11
168
169 sub r11,16
170
171
172 vmovdqu xmm0,XMMWORD[rsi]
173 vpxor xmm0,xmm0,xmm1
174
175 vpclmulqdq xmm5,xmm0,XMMWORD[r11*1+rdi],0x01
176 vpclmulqdq xmm3,xmm0,XMMWORD[r11*1+rdi],0x00
177 vpclmulqdq xmm4,xmm0,XMMWORD[r11*1+rdi],0x11
178 vpclmulqdq xmm6,xmm0,XMMWORD[r11*1+rdi],0x10
179 vpxor xmm5,xmm5,xmm6
180
181 lea rsi,[16+rsi]
182 test r11,r11
183 jnz NEAR $L$htable_polyval_prefix_loop
184 jmp NEAR $L$htable_polyval_prefix_complete
185
186
187 ALIGN 64
188 $L$htable_polyval_prefix_loop:
189 sub r11,16
190
191 vmovdqu xmm0,XMMWORD[rsi]
192
193 vpclmulqdq xmm6,xmm0,XMMWORD[r11*1+rdi],0x00
194 vpxor xmm3,xmm3,xmm6
195 vpclmulqdq xmm6,xmm0,XMMWORD[r11*1+rdi],0x11
196 vpxor xmm4,xmm4,xmm6
197 vpclmulqdq xmm6,xmm0,XMMWORD[r11*1+rdi],0x01
198 vpxor xmm5,xmm5,xmm6
199 vpclmulqdq xmm6,xmm0,XMMWORD[r11*1+rdi],0x10
200 vpxor xmm5,xmm5,xmm6
201
202 test r11,r11
203
204 lea rsi,[16+rsi]
205
206 jnz NEAR $L$htable_polyval_prefix_loop
207
208 $L$htable_polyval_prefix_complete:
209 vpsrldq xmm6,xmm5,8
210 vpslldq xmm5,xmm5,8
211
212 vpxor xmm9,xmm4,xmm6
213 vpxor xmm1,xmm3,xmm5
214
215 jmp NEAR $L$htable_polyval_main_loop
216
217 $L$htable_polyval_no_prefix:
218
219
220
221
222 vpxor xmm1,xmm1,xmm1
223 vmovdqa xmm9,XMMWORD[rcx]
224
225 ALIGN 64
226 $L$htable_polyval_main_loop:
227 sub rdx,0x80
228 jb NEAR $L$htable_polyval_out
229
230 vmovdqu xmm0,XMMWORD[112+rsi]
231
232 vpclmulqdq xmm5,xmm0,XMMWORD[rdi],0x01
233 vpclmulqdq xmm3,xmm0,XMMWORD[rdi],0x00
234 vpclmulqdq xmm4,xmm0,XMMWORD[rdi],0x11
235 vpclmulqdq xmm6,xmm0,XMMWORD[rdi],0x10
236 vpxor xmm5,xmm5,xmm6
237
238
239 vmovdqu xmm0,XMMWORD[96+rsi]
240 vpclmulqdq xmm6,xmm0,XMMWORD[16+rdi],0x01
241 vpxor xmm5,xmm5,xmm6
242 vpclmulqdq xmm6,xmm0,XMMWORD[16+rdi],0x00
243 vpxor xmm3,xmm3,xmm6
244 vpclmulqdq xmm6,xmm0,XMMWORD[16+rdi],0x11
245 vpxor xmm4,xmm4,xmm6
246 vpclmulqdq xmm6,xmm0,XMMWORD[16+rdi],0x10
247 vpxor xmm5,xmm5,xmm6
248
249
250
251 vmovdqu xmm0,XMMWORD[80+rsi]
252
253 vpclmulqdq xmm7,xmm1,XMMWORD[poly],0x10
254 vpalignr xmm1,xmm1,xmm1,8
255
256 vpclmulqdq xmm6,xmm0,XMMWORD[32+rdi],0x01
257 vpxor xmm5,xmm5,xmm6
258 vpclmulqdq xmm6,xmm0,XMMWORD[32+rdi],0x00
259 vpxor xmm3,xmm3,xmm6
260 vpclmulqdq xmm6,xmm0,XMMWORD[32+rdi],0x11
261 vpxor xmm4,xmm4,xmm6
262 vpclmulqdq xmm6,xmm0,XMMWORD[32+rdi],0x10
263 vpxor xmm5,xmm5,xmm6
264
265
266 vpxor xmm1,xmm1,xmm7
267
268 vmovdqu xmm0,XMMWORD[64+rsi]
269
270 vpclmulqdq xmm6,xmm0,XMMWORD[48+rdi],0x01
271 vpxor xmm5,xmm5,xmm6
272 vpclmulqdq xmm6,xmm0,XMMWORD[48+rdi],0x00
273 vpxor xmm3,xmm3,xmm6
274 vpclmulqdq xmm6,xmm0,XMMWORD[48+rdi],0x11
275 vpxor xmm4,xmm4,xmm6
276 vpclmulqdq xmm6,xmm0,XMMWORD[48+rdi],0x10
277 vpxor xmm5,xmm5,xmm6
278
279
280 vmovdqu xmm0,XMMWORD[48+rsi]
281
282 vpclmulqdq xmm7,xmm1,XMMWORD[poly],0x10
283 vpalignr xmm1,xmm1,xmm1,8
284
285 vpclmulqdq xmm6,xmm0,XMMWORD[64+rdi],0x01
286 vpxor xmm5,xmm5,xmm6
287 vpclmulqdq xmm6,xmm0,XMMWORD[64+rdi],0x00
288 vpxor xmm3,xmm3,xmm6
289 vpclmulqdq xmm6,xmm0,XMMWORD[64+rdi],0x11
290 vpxor xmm4,xmm4,xmm6
291 vpclmulqdq xmm6,xmm0,XMMWORD[64+rdi],0x10
292 vpxor xmm5,xmm5,xmm6
293
294
295 vpxor xmm1,xmm1,xmm7
296
297 vmovdqu xmm0,XMMWORD[32+rsi]
298
299 vpclmulqdq xmm6,xmm0,XMMWORD[80+rdi],0x01
300 vpxor xmm5,xmm5,xmm6
301 vpclmulqdq xmm6,xmm0,XMMWORD[80+rdi],0x00
302 vpxor xmm3,xmm3,xmm6
303 vpclmulqdq xmm6,xmm0,XMMWORD[80+rdi],0x11
304 vpxor xmm4,xmm4,xmm6
305 vpclmulqdq xmm6,xmm0,XMMWORD[80+rdi],0x10
306 vpxor xmm5,xmm5,xmm6
307
308
309 vpxor xmm1,xmm1,xmm9
310
311 vmovdqu xmm0,XMMWORD[16+rsi]
312
313 vpclmulqdq xmm6,xmm0,XMMWORD[96+rdi],0x01
314 vpxor xmm5,xmm5,xmm6
315 vpclmulqdq xmm6,xmm0,XMMWORD[96+rdi],0x00
316 vpxor xmm3,xmm3,xmm6
317 vpclmulqdq xmm6,xmm0,XMMWORD[96+rdi],0x11
318 vpxor xmm4,xmm4,xmm6
319 vpclmulqdq xmm6,xmm0,XMMWORD[96+rdi],0x10
320 vpxor xmm5,xmm5,xmm6
321
322
323 vmovdqu xmm0,XMMWORD[rsi]
324 vpxor xmm0,xmm0,xmm1
325
326 vpclmulqdq xmm6,xmm0,XMMWORD[112+rdi],0x01
327 vpxor xmm5,xmm5,xmm6
328 vpclmulqdq xmm6,xmm0,XMMWORD[112+rdi],0x00
329 vpxor xmm3,xmm3,xmm6
330 vpclmulqdq xmm6,xmm0,XMMWORD[112+rdi],0x11
331 vpxor xmm4,xmm4,xmm6
332 vpclmulqdq xmm6,xmm0,XMMWORD[112+rdi],0x10
333 vpxor xmm5,xmm5,xmm6
334
335
336 vpsrldq xmm6,xmm5,8
337 vpslldq xmm5,xmm5,8
338
339 vpxor xmm9,xmm4,xmm6
340 vpxor xmm1,xmm3,xmm5
341
342 lea rsi,[128+rsi]
343 jmp NEAR $L$htable_polyval_main_loop
344
345
346
347 $L$htable_polyval_out:
348 vpclmulqdq xmm6,xmm1,XMMWORD[poly],0x10
349 vpalignr xmm1,xmm1,xmm1,8
350 vpxor xmm1,xmm1,xmm6
351
352 vpclmulqdq xmm6,xmm1,XMMWORD[poly],0x10
353 vpalignr xmm1,xmm1,xmm1,8
354 vpxor xmm1,xmm1,xmm6
355 vpxor xmm1,xmm1,xmm9
356
357 vmovdqu XMMWORD[rcx],xmm1
358 vzeroupper
359 mov rdi,QWORD[8+rsp] ;WIN64 epilogue
360 mov rsi,QWORD[16+rsp]
361 DB 0F3h,0C3h ;repret
362
363 $L$SEH_end_aesgcmsiv_htable_polyval:
364 global aesgcmsiv_polyval_horner
365
366 ALIGN 16
367 aesgcmsiv_polyval_horner:
368 mov QWORD[8+rsp],rdi ;WIN64 prologue
369 mov QWORD[16+rsp],rsi
370 mov rax,rsp
371 $L$SEH_begin_aesgcmsiv_polyval_horner:
372 mov rdi,rcx
373 mov rsi,rdx
374 mov rdx,r8
375 mov rcx,r9
376
377
378
379 test rcx,rcx
380 jnz NEAR $L$polyval_horner_start
381 mov rdi,QWORD[8+rsp] ;WIN64 epilogue
382 mov rsi,QWORD[16+rsp]
383 DB 0F3h,0C3h ;repret
384
385 $L$polyval_horner_start:
386
387
388
389 xor r10,r10
390 shl rcx,4
391
392 vmovdqa xmm1,XMMWORD[rsi]
393 vmovdqa xmm0,XMMWORD[rdi]
394
395 $L$polyval_horner_loop:
396 vpxor xmm0,xmm0,XMMWORD[r10*1+rdx]
397 call GFMUL
398
399 add r10,16
400 cmp rcx,r10
401 jne NEAR $L$polyval_horner_loop
402
403
404 vmovdqa XMMWORD[rdi],xmm0
405 mov rdi,QWORD[8+rsp] ;WIN64 epilogue
406 mov rsi,QWORD[16+rsp]
407 DB 0F3h,0C3h ;repret
408
409 $L$SEH_end_aesgcmsiv_polyval_horner:
410 global aes128gcmsiv_aes_ks
411
412 ALIGN 16
413 aes128gcmsiv_aes_ks:
414 mov QWORD[8+rsp],rdi ;WIN64 prologue
415 mov QWORD[16+rsp],rsi
416 mov rax,rsp
417 $L$SEH_begin_aes128gcmsiv_aes_ks:
418 mov rdi,rcx
419 mov rsi,rdx
420
421
422
423 vmovdqa xmm1,XMMWORD[rdi]
424 vmovdqa XMMWORD[rsi],xmm1
425
426 vmovdqa xmm0,XMMWORD[con1]
427 vmovdqa xmm15,XMMWORD[mask]
428
429 mov rax,8
430
431 $L$ks128_loop:
432 add rsi,16
433 sub rax,1
434 vpshufb xmm2,xmm1,xmm15
435 vaesenclast xmm2,xmm2,xmm0
436 vpslld xmm0,xmm0,1
437 vpslldq xmm3,xmm1,4
438 vpxor xmm1,xmm1,xmm3
439 vpslldq xmm3,xmm3,4
440 vpxor xmm1,xmm1,xmm3
441 vpslldq xmm3,xmm3,4
442 vpxor xmm1,xmm1,xmm3
443 vpxor xmm1,xmm1,xmm2
444 vmovdqa XMMWORD[rsi],xmm1
445 jne NEAR $L$ks128_loop
446
447 vmovdqa xmm0,XMMWORD[con2]
448 vpshufb xmm2,xmm1,xmm15
449 vaesenclast xmm2,xmm2,xmm0
450 vpslld xmm0,xmm0,1
451 vpslldq xmm3,xmm1,4
452 vpxor xmm1,xmm1,xmm3
453 vpslldq xmm3,xmm3,4
454 vpxor xmm1,xmm1,xmm3
455 vpslldq xmm3,xmm3,4
456 vpxor xmm1,xmm1,xmm3
457 vpxor xmm1,xmm1,xmm2
458 vmovdqa XMMWORD[16+rsi],xmm1
459
460 vpshufb xmm2,xmm1,xmm15
461 vaesenclast xmm2,xmm2,xmm0
462 vpslldq xmm3,xmm1,4
463 vpxor xmm1,xmm1,xmm3
464 vpslldq xmm3,xmm3,4
465 vpxor xmm1,xmm1,xmm3
466 vpslldq xmm3,xmm3,4
467 vpxor xmm1,xmm1,xmm3
468 vpxor xmm1,xmm1,xmm2
469 vmovdqa XMMWORD[32+rsi],xmm1
470 mov rdi,QWORD[8+rsp] ;WIN64 epilogue
471 mov rsi,QWORD[16+rsp]
472 DB 0F3h,0C3h ;repret
473
474 $L$SEH_end_aes128gcmsiv_aes_ks:
475 global aes256gcmsiv_aes_ks
476
477 ALIGN 16
478 aes256gcmsiv_aes_ks:
479 mov QWORD[8+rsp],rdi ;WIN64 prologue
480 mov QWORD[16+rsp],rsi
481 mov rax,rsp
482 $L$SEH_begin_aes256gcmsiv_aes_ks:
483 mov rdi,rcx
484 mov rsi,rdx
485
486
487
488 vmovdqa xmm1,XMMWORD[rdi]
489 vmovdqa xmm3,XMMWORD[16+rdi]
490 vmovdqa XMMWORD[rsi],xmm1
491 vmovdqa XMMWORD[16+rsi],xmm3
492 vmovdqa xmm0,XMMWORD[con1]
493 vmovdqa xmm15,XMMWORD[mask]
494 vpxor xmm14,xmm14,xmm14
495 mov rax,6
496
497 $L$ks256_loop:
498 add rsi,32
499 sub rax,1
500 vpshufb xmm2,xmm3,xmm15
501 vaesenclast xmm2,xmm2,xmm0
502 vpslld xmm0,xmm0,1
503 vpsllq xmm4,xmm1,32
504 vpxor xmm1,xmm1,xmm4
505 vpshufb xmm4,xmm1,XMMWORD[con3]
506 vpxor xmm1,xmm1,xmm4
507 vpxor xmm1,xmm1,xmm2
508 vmovdqa XMMWORD[rsi],xmm1
509 vpshufd xmm2,xmm1,0xff
510 vaesenclast xmm2,xmm2,xmm14
511 vpsllq xmm4,xmm3,32
512 vpxor xmm3,xmm3,xmm4
513 vpshufb xmm4,xmm3,XMMWORD[con3]
514 vpxor xmm3,xmm3,xmm4
515 vpxor xmm3,xmm3,xmm2
516 vmovdqa XMMWORD[16+rsi],xmm3
517 jne NEAR $L$ks256_loop
518
519 vpshufb xmm2,xmm3,xmm15
520 vaesenclast xmm2,xmm2,xmm0
521 vpsllq xmm4,xmm1,32
522 vpxor xmm1,xmm1,xmm4
523 vpshufb xmm4,xmm1,XMMWORD[con3]
524 vpxor xmm1,xmm1,xmm4
525 vpxor xmm1,xmm1,xmm2
526 vmovdqa XMMWORD[32+rsi],xmm1
527 mov rdi,QWORD[8+rsp] ;WIN64 epilogue
528 mov rsi,QWORD[16+rsp]
529 DB 0F3h,0C3h ;repret
530
531 global aes128gcmsiv_aes_ks_enc_x1
532
533 ALIGN 16
534 aes128gcmsiv_aes_ks_enc_x1:
535 mov QWORD[8+rsp],rdi ;WIN64 prologue
536 mov QWORD[16+rsp],rsi
537 mov rax,rsp
538 $L$SEH_begin_aes128gcmsiv_aes_ks_enc_x1:
539 mov rdi,rcx
540 mov rsi,rdx
541 mov rdx,r8
542 mov rcx,r9
543
544
545
546 vmovdqa xmm1,XMMWORD[rcx]
547 vmovdqa xmm4,XMMWORD[rdi]
548
549 vmovdqa XMMWORD[rdx],xmm1
550 vpxor xmm4,xmm4,xmm1
551
552 vmovdqa xmm0,XMMWORD[con1]
553 vmovdqa xmm15,XMMWORD[mask]
554
555 vpshufb xmm2,xmm1,xmm15
556 vaesenclast xmm2,xmm2,xmm0
557 vpslld xmm0,xmm0,1
558 vpsllq xmm3,xmm1,32
559 vpxor xmm1,xmm1,xmm3
560 vpshufb xmm3,xmm1,XMMWORD[con3]
561 vpxor xmm1,xmm1,xmm3
562 vpxor xmm1,xmm1,xmm2
563
564 vaesenc xmm4,xmm4,xmm1
565 vmovdqa XMMWORD[16+rdx],xmm1
566
567 vpshufb xmm2,xmm1,xmm15
568 vaesenclast xmm2,xmm2,xmm0
569 vpslld xmm0,xmm0,1
570 vpsllq xmm3,xmm1,32
571 vpxor xmm1,xmm1,xmm3
572 vpshufb xmm3,xmm1,XMMWORD[con3]
573 vpxor xmm1,xmm1,xmm3
574 vpxor xmm1,xmm1,xmm2
575
576 vaesenc xmm4,xmm4,xmm1
577 vmovdqa XMMWORD[32+rdx],xmm1
578
579 vpshufb xmm2,xmm1,xmm15
580 vaesenclast xmm2,xmm2,xmm0
581 vpslld xmm0,xmm0,1
582 vpsllq xmm3,xmm1,32
583 vpxor xmm1,xmm1,xmm3
584 vpshufb xmm3,xmm1,XMMWORD[con3]
585 vpxor xmm1,xmm1,xmm3
586 vpxor xmm1,xmm1,xmm2
587
588 vaesenc xmm4,xmm4,xmm1
589 vmovdqa XMMWORD[48+rdx],xmm1
590
591 vpshufb xmm2,xmm1,xmm15
592 vaesenclast xmm2,xmm2,xmm0
593 vpslld xmm0,xmm0,1
594 vpsllq xmm3,xmm1,32
595 vpxor xmm1,xmm1,xmm3
596 vpshufb xmm3,xmm1,XMMWORD[con3]
597 vpxor xmm1,xmm1,xmm3
598 vpxor xmm1,xmm1,xmm2
599
600 vaesenc xmm4,xmm4,xmm1
601 vmovdqa XMMWORD[64+rdx],xmm1
602
603 vpshufb xmm2,xmm1,xmm15
604 vaesenclast xmm2,xmm2,xmm0
605 vpslld xmm0,xmm0,1
606 vpsllq xmm3,xmm1,32
607 vpxor xmm1,xmm1,xmm3
608 vpshufb xmm3,xmm1,XMMWORD[con3]
609 vpxor xmm1,xmm1,xmm3
610 vpxor xmm1,xmm1,xmm2
611
612 vaesenc xmm4,xmm4,xmm1
613 vmovdqa XMMWORD[80+rdx],xmm1
614
615 vpshufb xmm2,xmm1,xmm15
616 vaesenclast xmm2,xmm2,xmm0
617 vpslld xmm0,xmm0,1
618 vpsllq xmm3,xmm1,32
619 vpxor xmm1,xmm1,xmm3
620 vpshufb xmm3,xmm1,XMMWORD[con3]
621 vpxor xmm1,xmm1,xmm3
622 vpxor xmm1,xmm1,xmm2
623
624 vaesenc xmm4,xmm4,xmm1
625 vmovdqa XMMWORD[96+rdx],xmm1
626
627 vpshufb xmm2,xmm1,xmm15
628 vaesenclast xmm2,xmm2,xmm0
629 vpslld xmm0,xmm0,1
630 vpsllq xmm3,xmm1,32
631 vpxor xmm1,xmm1,xmm3
632 vpshufb xmm3,xmm1,XMMWORD[con3]
633 vpxor xmm1,xmm1,xmm3
634 vpxor xmm1,xmm1,xmm2
635
636 vaesenc xmm4,xmm4,xmm1
637 vmovdqa XMMWORD[112+rdx],xmm1
638
639 vpshufb xmm2,xmm1,xmm15
640 vaesenclast xmm2,xmm2,xmm0
641 vpslld xmm0,xmm0,1
642 vpsllq xmm3,xmm1,32
643 vpxor xmm1,xmm1,xmm3
644 vpshufb xmm3,xmm1,XMMWORD[con3]
645 vpxor xmm1,xmm1,xmm3
646 vpxor xmm1,xmm1,xmm2
647
648 vaesenc xmm4,xmm4,xmm1
649 vmovdqa XMMWORD[128+rdx],xmm1
650
651
652 vmovdqa xmm0,XMMWORD[con2]
653
654 vpshufb xmm2,xmm1,xmm15
655 vaesenclast xmm2,xmm2,xmm0
656 vpslld xmm0,xmm0,1
657 vpsllq xmm3,xmm1,32
658 vpxor xmm1,xmm1,xmm3
659 vpshufb xmm3,xmm1,XMMWORD[con3]
660 vpxor xmm1,xmm1,xmm3
661 vpxor xmm1,xmm1,xmm2
662
663 vaesenc xmm4,xmm4,xmm1
664 vmovdqa XMMWORD[144+rdx],xmm1
665
666 vpshufb xmm2,xmm1,xmm15
667 vaesenclast xmm2,xmm2,xmm0
668 vpsllq xmm3,xmm1,32
669 vpxor xmm1,xmm1,xmm3
670 vpshufb xmm3,xmm1,XMMWORD[con3]
671 vpxor xmm1,xmm1,xmm3
672 vpxor xmm1,xmm1,xmm2
673
674 vaesenclast xmm4,xmm4,xmm1
675 vmovdqa XMMWORD[160+rdx],xmm1
676
677
678 vmovdqa XMMWORD[rsi],xmm4
679 mov rdi,QWORD[8+rsp] ;WIN64 epilogue
680 mov rsi,QWORD[16+rsp]
681 DB 0F3h,0C3h ;repret
682
683 $L$SEH_end_aes128gcmsiv_aes_ks_enc_x1:
684 global aes128gcmsiv_kdf
685
686 ALIGN 16
687 aes128gcmsiv_kdf:
688 mov QWORD[8+rsp],rdi ;WIN64 prologue
689 mov QWORD[16+rsp],rsi
690 mov rax,rsp
691 $L$SEH_begin_aes128gcmsiv_kdf:
692 mov rdi,rcx
693 mov rsi,rdx
694 mov rdx,r8
695
696
697
698
699
700
701
702 vmovdqa xmm1,XMMWORD[rdx]
703 vmovdqa xmm9,XMMWORD[rdi]
704 vmovdqa xmm12,XMMWORD[and_mask]
705 vmovdqa xmm13,XMMWORD[one]
706 vpshufd xmm9,xmm9,0x90
707 vpand xmm9,xmm9,xmm12
708 vpaddd xmm10,xmm9,xmm13
709 vpaddd xmm11,xmm10,xmm13
710 vpaddd xmm12,xmm11,xmm13
711
712 vpxor xmm9,xmm9,xmm1
713 vpxor xmm10,xmm10,xmm1
714 vpxor xmm11,xmm11,xmm1
715 vpxor xmm12,xmm12,xmm1
716
717 vmovdqa xmm1,XMMWORD[16+rdx]
718 vaesenc xmm9,xmm9,xmm1
719 vaesenc xmm10,xmm10,xmm1
720 vaesenc xmm11,xmm11,xmm1
721 vaesenc xmm12,xmm12,xmm1
722
723 vmovdqa xmm2,XMMWORD[32+rdx]
724 vaesenc xmm9,xmm9,xmm2
725 vaesenc xmm10,xmm10,xmm2
726 vaesenc xmm11,xmm11,xmm2
727 vaesenc xmm12,xmm12,xmm2
728
729 vmovdqa xmm1,XMMWORD[48+rdx]
730 vaesenc xmm9,xmm9,xmm1
731 vaesenc xmm10,xmm10,xmm1
732 vaesenc xmm11,xmm11,xmm1
733 vaesenc xmm12,xmm12,xmm1
734
735 vmovdqa xmm2,XMMWORD[64+rdx]
736 vaesenc xmm9,xmm9,xmm2
737 vaesenc xmm10,xmm10,xmm2
738 vaesenc xmm11,xmm11,xmm2
739 vaesenc xmm12,xmm12,xmm2
740
741 vmovdqa xmm1,XMMWORD[80+rdx]
742 vaesenc xmm9,xmm9,xmm1
743 vaesenc xmm10,xmm10,xmm1
744 vaesenc xmm11,xmm11,xmm1
745 vaesenc xmm12,xmm12,xmm1
746
747 vmovdqa xmm2,XMMWORD[96+rdx]
748 vaesenc xmm9,xmm9,xmm2
749 vaesenc xmm10,xmm10,xmm2
750 vaesenc xmm11,xmm11,xmm2
751 vaesenc xmm12,xmm12,xmm2
752
753 vmovdqa xmm1,XMMWORD[112+rdx]
754 vaesenc xmm9,xmm9,xmm1
755 vaesenc xmm10,xmm10,xmm1
756 vaesenc xmm11,xmm11,xmm1
757 vaesenc xmm12,xmm12,xmm1
758
759 vmovdqa xmm2,XMMWORD[128+rdx]
760 vaesenc xmm9,xmm9,xmm2
761 vaesenc xmm10,xmm10,xmm2
762 vaesenc xmm11,xmm11,xmm2
763 vaesenc xmm12,xmm12,xmm2
764
765 vmovdqa xmm1,XMMWORD[144+rdx]
766 vaesenc xmm9,xmm9,xmm1
767 vaesenc xmm10,xmm10,xmm1
768 vaesenc xmm11,xmm11,xmm1
769 vaesenc xmm12,xmm12,xmm1
770
771 vmovdqa xmm2,XMMWORD[160+rdx]
772 vaesenclast xmm9,xmm9,xmm2
773 vaesenclast xmm10,xmm10,xmm2
774 vaesenclast xmm11,xmm11,xmm2
775 vaesenclast xmm12,xmm12,xmm2
776
777
778 vmovdqa XMMWORD[rsi],xmm9
779 vmovdqa XMMWORD[16+rsi],xmm10
780 vmovdqa XMMWORD[32+rsi],xmm11
781 vmovdqa XMMWORD[48+rsi],xmm12
782 mov rdi,QWORD[8+rsp] ;WIN64 epilogue
783 mov rsi,QWORD[16+rsp]
784 DB 0F3h,0C3h ;repret
785
786 $L$SEH_end_aes128gcmsiv_kdf:
787 global aes128gcmsiv_enc_msg_x4
788
789 ALIGN 16
790 aes128gcmsiv_enc_msg_x4:
791 mov QWORD[8+rsp],rdi ;WIN64 prologue
792 mov QWORD[16+rsp],rsi
793 mov rax,rsp
794 $L$SEH_begin_aes128gcmsiv_enc_msg_x4:
795 mov rdi,rcx
796 mov rsi,rdx
797 mov rdx,r8
798 mov rcx,r9
799 mov r8,QWORD[40+rsp]
800
801
802
803 test r8,r8
804 jnz NEAR $L$128_enc_msg_x4_start
805 mov rdi,QWORD[8+rsp] ;WIN64 epilogue
806 mov rsi,QWORD[16+rsp]
807 DB 0F3h,0C3h ;repret
808
809 $L$128_enc_msg_x4_start:
810 push r12
811
812 push r13
813
814
815 shr r8,4
816 mov r10,r8
817 shl r10,62
818 shr r10,62
819
820
821 vmovdqa xmm15,XMMWORD[rdx]
822 vpor xmm15,xmm15,XMMWORD[OR_MASK]
823
824 vmovdqu xmm4,XMMWORD[four]
825 vmovdqa xmm0,xmm15
826 vpaddd xmm1,xmm15,XMMWORD[one]
827 vpaddd xmm2,xmm15,XMMWORD[two]
828 vpaddd xmm3,xmm15,XMMWORD[three]
829
830 shr r8,2
831 je NEAR $L$128_enc_msg_x4_check_remainder
832
833 sub rsi,64
834 sub rdi,64
835
836 $L$128_enc_msg_x4_loop1:
837 add rsi,64
838 add rdi,64
839
840 vmovdqa xmm5,xmm0
841 vmovdqa xmm6,xmm1
842 vmovdqa xmm7,xmm2
843 vmovdqa xmm8,xmm3
844
845 vpxor xmm5,xmm5,XMMWORD[rcx]
846 vpxor xmm6,xmm6,XMMWORD[rcx]
847 vpxor xmm7,xmm7,XMMWORD[rcx]
848 vpxor xmm8,xmm8,XMMWORD[rcx]
849
850 vmovdqu xmm12,XMMWORD[16+rcx]
851 vaesenc xmm5,xmm5,xmm12
852 vaesenc xmm6,xmm6,xmm12
853 vaesenc xmm7,xmm7,xmm12
854 vaesenc xmm8,xmm8,xmm12
855
856 vpaddd xmm0,xmm0,xmm4
857 vmovdqu xmm12,XMMWORD[32+rcx]
858 vaesenc xmm5,xmm5,xmm12
859 vaesenc xmm6,xmm6,xmm12
860 vaesenc xmm7,xmm7,xmm12
861 vaesenc xmm8,xmm8,xmm12
862
863 vpaddd xmm1,xmm1,xmm4
864 vmovdqu xmm12,XMMWORD[48+rcx]
865 vaesenc xmm5,xmm5,xmm12
866 vaesenc xmm6,xmm6,xmm12
867 vaesenc xmm7,xmm7,xmm12
868 vaesenc xmm8,xmm8,xmm12
869
870 vpaddd xmm2,xmm2,xmm4
871 vmovdqu xmm12,XMMWORD[64+rcx]
872 vaesenc xmm5,xmm5,xmm12
873 vaesenc xmm6,xmm6,xmm12
874 vaesenc xmm7,xmm7,xmm12
875 vaesenc xmm8,xmm8,xmm12
876
877 vpaddd xmm3,xmm3,xmm4
878
879 vmovdqu xmm12,XMMWORD[80+rcx]
880 vaesenc xmm5,xmm5,xmm12
881 vaesenc xmm6,xmm6,xmm12
882 vaesenc xmm7,xmm7,xmm12
883 vaesenc xmm8,xmm8,xmm12
884
885 vmovdqu xmm12,XMMWORD[96+rcx]
886 vaesenc xmm5,xmm5,xmm12
887 vaesenc xmm6,xmm6,xmm12
888 vaesenc xmm7,xmm7,xmm12
889 vaesenc xmm8,xmm8,xmm12
890
891 vmovdqu xmm12,XMMWORD[112+rcx]
892 vaesenc xmm5,xmm5,xmm12
893 vaesenc xmm6,xmm6,xmm12
894 vaesenc xmm7,xmm7,xmm12
895 vaesenc xmm8,xmm8,xmm12
896
897 vmovdqu xmm12,XMMWORD[128+rcx]
898 vaesenc xmm5,xmm5,xmm12
899 vaesenc xmm6,xmm6,xmm12
900 vaesenc xmm7,xmm7,xmm12
901 vaesenc xmm8,xmm8,xmm12
902
903 vmovdqu xmm12,XMMWORD[144+rcx]
904 vaesenc xmm5,xmm5,xmm12
905 vaesenc xmm6,xmm6,xmm12
906 vaesenc xmm7,xmm7,xmm12
907 vaesenc xmm8,xmm8,xmm12
908
909 vmovdqu xmm12,XMMWORD[160+rcx]
910 vaesenclast xmm5,xmm5,xmm12
911 vaesenclast xmm6,xmm6,xmm12
912 vaesenclast xmm7,xmm7,xmm12
913 vaesenclast xmm8,xmm8,xmm12
914
915
916
917 vpxor xmm5,xmm5,XMMWORD[rdi]
918 vpxor xmm6,xmm6,XMMWORD[16+rdi]
919 vpxor xmm7,xmm7,XMMWORD[32+rdi]
920 vpxor xmm8,xmm8,XMMWORD[48+rdi]
921
922 sub r8,1
923
924 vmovdqu XMMWORD[rsi],xmm5
925 vmovdqu XMMWORD[16+rsi],xmm6
926 vmovdqu XMMWORD[32+rsi],xmm7
927 vmovdqu XMMWORD[48+rsi],xmm8
928
929 jne NEAR $L$128_enc_msg_x4_loop1
930
931 add rsi,64
932 add rdi,64
933
934 $L$128_enc_msg_x4_check_remainder:
935 cmp r10,0
936 je NEAR $L$128_enc_msg_x4_out
937
938 $L$128_enc_msg_x4_loop2:
939
940
941 vmovdqa xmm5,xmm0
942 vpaddd xmm0,xmm0,XMMWORD[one]
943
944 vpxor xmm5,xmm5,XMMWORD[rcx]
945 vaesenc xmm5,xmm5,XMMWORD[16+rcx]
946 vaesenc xmm5,xmm5,XMMWORD[32+rcx]
947 vaesenc xmm5,xmm5,XMMWORD[48+rcx]
948 vaesenc xmm5,xmm5,XMMWORD[64+rcx]
949 vaesenc xmm5,xmm5,XMMWORD[80+rcx]
950 vaesenc xmm5,xmm5,XMMWORD[96+rcx]
951 vaesenc xmm5,xmm5,XMMWORD[112+rcx]
952 vaesenc xmm5,xmm5,XMMWORD[128+rcx]
953 vaesenc xmm5,xmm5,XMMWORD[144+rcx]
954 vaesenclast xmm5,xmm5,XMMWORD[160+rcx]
955
956
957 vpxor xmm5,xmm5,XMMWORD[rdi]
958 vmovdqu XMMWORD[rsi],xmm5
959
960 add rdi,16
961 add rsi,16
962
963 sub r10,1
964 jne NEAR $L$128_enc_msg_x4_loop2
965
966 $L$128_enc_msg_x4_out:
967 pop r13
968
969 pop r12
970
971 mov rdi,QWORD[8+rsp] ;WIN64 epilogue
972 mov rsi,QWORD[16+rsp]
973 DB 0F3h,0C3h ;repret
974
975 $L$SEH_end_aes128gcmsiv_enc_msg_x4:
976 global aes128gcmsiv_enc_msg_x8
977
978 ALIGN 16
979 aes128gcmsiv_enc_msg_x8:
980 mov QWORD[8+rsp],rdi ;WIN64 prologue
981 mov QWORD[16+rsp],rsi
982 mov rax,rsp
983 $L$SEH_begin_aes128gcmsiv_enc_msg_x8:
984 mov rdi,rcx
985 mov rsi,rdx
986 mov rdx,r8
987 mov rcx,r9
988 mov r8,QWORD[40+rsp]
989
990
991
992 test r8,r8
993 jnz NEAR $L$128_enc_msg_x8_start
994 mov rdi,QWORD[8+rsp] ;WIN64 epilogue
995 mov rsi,QWORD[16+rsp]
996 DB 0F3h,0C3h ;repret
997
998 $L$128_enc_msg_x8_start:
999 push r12
1000
1001 push r13
1002
1003 push rbp
1004
1005 mov rbp,rsp
1006
1007
1008
1009 sub rsp,128
1010 and rsp,-64
1011
1012 shr r8,4
1013 mov r10,r8
1014 shl r10,61
1015 shr r10,61
1016
1017
1018 vmovdqu xmm1,XMMWORD[rdx]
1019 vpor xmm1,xmm1,XMMWORD[OR_MASK]
1020
1021
1022 vpaddd xmm0,xmm1,XMMWORD[seven]
1023 vmovdqu XMMWORD[rsp],xmm0
1024 vpaddd xmm9,xmm1,XMMWORD[one]
1025 vpaddd xmm10,xmm1,XMMWORD[two]
1026 vpaddd xmm11,xmm1,XMMWORD[three]
1027 vpaddd xmm12,xmm1,XMMWORD[four]
1028 vpaddd xmm13,xmm1,XMMWORD[five]
1029 vpaddd xmm14,xmm1,XMMWORD[six]
1030 vmovdqa xmm0,xmm1
1031
1032 shr r8,3
1033 je NEAR $L$128_enc_msg_x8_check_remainder
1034
1035 sub rsi,128
1036 sub rdi,128
1037
1038 $L$128_enc_msg_x8_loop1:
1039 add rsi,128
1040 add rdi,128
1041
1042 vmovdqa xmm1,xmm0
1043 vmovdqa xmm2,xmm9
1044 vmovdqa xmm3,xmm10
1045 vmovdqa xmm4,xmm11
1046 vmovdqa xmm5,xmm12
1047 vmovdqa xmm6,xmm13
1048 vmovdqa xmm7,xmm14
1049
1050 vmovdqu xmm8,XMMWORD[rsp]
1051
1052 vpxor xmm1,xmm1,XMMWORD[rcx]
1053 vpxor xmm2,xmm2,XMMWORD[rcx]
1054 vpxor xmm3,xmm3,XMMWORD[rcx]
1055 vpxor xmm4,xmm4,XMMWORD[rcx]
1056 vpxor xmm5,xmm5,XMMWORD[rcx]
1057 vpxor xmm6,xmm6,XMMWORD[rcx]
1058 vpxor xmm7,xmm7,XMMWORD[rcx]
1059 vpxor xmm8,xmm8,XMMWORD[rcx]
1060
1061 vmovdqu xmm15,XMMWORD[16+rcx]
1062 vaesenc xmm1,xmm1,xmm15
1063 vaesenc xmm2,xmm2,xmm15
1064 vaesenc xmm3,xmm3,xmm15
1065 vaesenc xmm4,xmm4,xmm15
1066 vaesenc xmm5,xmm5,xmm15
1067 vaesenc xmm6,xmm6,xmm15
1068 vaesenc xmm7,xmm7,xmm15
1069 vaesenc xmm8,xmm8,xmm15
1070
1071 vmovdqu xmm14,XMMWORD[rsp]
1072 vpaddd xmm14,xmm14,XMMWORD[eight]
1073 vmovdqu XMMWORD[rsp],xmm14
1074 vmovdqu xmm15,XMMWORD[32+rcx]
1075 vaesenc xmm1,xmm1,xmm15
1076 vaesenc xmm2,xmm2,xmm15
1077 vaesenc xmm3,xmm3,xmm15
1078 vaesenc xmm4,xmm4,xmm15
1079 vaesenc xmm5,xmm5,xmm15
1080 vaesenc xmm6,xmm6,xmm15
1081 vaesenc xmm7,xmm7,xmm15
1082 vaesenc xmm8,xmm8,xmm15
1083
1084 vpsubd xmm14,xmm14,XMMWORD[one]
1085 vmovdqu xmm15,XMMWORD[48+rcx]
1086 vaesenc xmm1,xmm1,xmm15
1087 vaesenc xmm2,xmm2,xmm15
1088 vaesenc xmm3,xmm3,xmm15
1089 vaesenc xmm4,xmm4,xmm15
1090 vaesenc xmm5,xmm5,xmm15
1091 vaesenc xmm6,xmm6,xmm15
1092 vaesenc xmm7,xmm7,xmm15
1093 vaesenc xmm8,xmm8,xmm15
1094
1095 vpaddd xmm0,xmm0,XMMWORD[eight]
1096 vmovdqu xmm15,XMMWORD[64+rcx]
1097 vaesenc xmm1,xmm1,xmm15
1098 vaesenc xmm2,xmm2,xmm15
1099 vaesenc xmm3,xmm3,xmm15
1100 vaesenc xmm4,xmm4,xmm15
1101 vaesenc xmm5,xmm5,xmm15
1102 vaesenc xmm6,xmm6,xmm15
1103 vaesenc xmm7,xmm7,xmm15
1104 vaesenc xmm8,xmm8,xmm15
1105
1106 vpaddd xmm9,xmm9,XMMWORD[eight]
1107 vmovdqu xmm15,XMMWORD[80+rcx]
1108 vaesenc xmm1,xmm1,xmm15
1109 vaesenc xmm2,xmm2,xmm15
1110 vaesenc xmm3,xmm3,xmm15
1111 vaesenc xmm4,xmm4,xmm15
1112 vaesenc xmm5,xmm5,xmm15
1113 vaesenc xmm6,xmm6,xmm15
1114 vaesenc xmm7,xmm7,xmm15
1115 vaesenc xmm8,xmm8,xmm15
1116
1117 vpaddd xmm10,xmm10,XMMWORD[eight]
1118 vmovdqu xmm15,XMMWORD[96+rcx]
1119 vaesenc xmm1,xmm1,xmm15
1120 vaesenc xmm2,xmm2,xmm15
1121 vaesenc xmm3,xmm3,xmm15
1122 vaesenc xmm4,xmm4,xmm15
1123 vaesenc xmm5,xmm5,xmm15
1124 vaesenc xmm6,xmm6,xmm15
1125 vaesenc xmm7,xmm7,xmm15
1126 vaesenc xmm8,xmm8,xmm15
1127
1128 vpaddd xmm11,xmm11,XMMWORD[eight]
1129 vmovdqu xmm15,XMMWORD[112+rcx]
1130 vaesenc xmm1,xmm1,xmm15
1131 vaesenc xmm2,xmm2,xmm15
1132 vaesenc xmm3,xmm3,xmm15
1133 vaesenc xmm4,xmm4,xmm15
1134 vaesenc xmm5,xmm5,xmm15
1135 vaesenc xmm6,xmm6,xmm15
1136 vaesenc xmm7,xmm7,xmm15
1137 vaesenc xmm8,xmm8,xmm15
1138
1139 vpaddd xmm12,xmm12,XMMWORD[eight]
1140 vmovdqu xmm15,XMMWORD[128+rcx]
1141 vaesenc xmm1,xmm1,xmm15
1142 vaesenc xmm2,xmm2,xmm15
1143 vaesenc xmm3,xmm3,xmm15
1144 vaesenc xmm4,xmm4,xmm15
1145 vaesenc xmm5,xmm5,xmm15
1146 vaesenc xmm6,xmm6,xmm15
1147 vaesenc xmm7,xmm7,xmm15
1148 vaesenc xmm8,xmm8,xmm15
1149
1150 vpaddd xmm13,xmm13,XMMWORD[eight]
1151 vmovdqu xmm15,XMMWORD[144+rcx]
1152 vaesenc xmm1,xmm1,xmm15
1153 vaesenc xmm2,xmm2,xmm15
1154 vaesenc xmm3,xmm3,xmm15
1155 vaesenc xmm4,xmm4,xmm15
1156 vaesenc xmm5,xmm5,xmm15
1157 vaesenc xmm6,xmm6,xmm15
1158 vaesenc xmm7,xmm7,xmm15
1159 vaesenc xmm8,xmm8,xmm15
1160
1161 vmovdqu xmm15,XMMWORD[160+rcx]
1162 vaesenclast xmm1,xmm1,xmm15
1163 vaesenclast xmm2,xmm2,xmm15
1164 vaesenclast xmm3,xmm3,xmm15
1165 vaesenclast xmm4,xmm4,xmm15
1166 vaesenclast xmm5,xmm5,xmm15
1167 vaesenclast xmm6,xmm6,xmm15
1168 vaesenclast xmm7,xmm7,xmm15
1169 vaesenclast xmm8,xmm8,xmm15
1170
1171
1172
1173 vpxor xmm1,xmm1,XMMWORD[rdi]
1174 vpxor xmm2,xmm2,XMMWORD[16+rdi]
1175 vpxor xmm3,xmm3,XMMWORD[32+rdi]
1176 vpxor xmm4,xmm4,XMMWORD[48+rdi]
1177 vpxor xmm5,xmm5,XMMWORD[64+rdi]
1178 vpxor xmm6,xmm6,XMMWORD[80+rdi]
1179 vpxor xmm7,xmm7,XMMWORD[96+rdi]
1180 vpxor xmm8,xmm8,XMMWORD[112+rdi]
1181
1182 dec r8
1183
1184 vmovdqu XMMWORD[rsi],xmm1
1185 vmovdqu XMMWORD[16+rsi],xmm2
1186 vmovdqu XMMWORD[32+rsi],xmm3
1187 vmovdqu XMMWORD[48+rsi],xmm4
1188 vmovdqu XMMWORD[64+rsi],xmm5
1189 vmovdqu XMMWORD[80+rsi],xmm6
1190 vmovdqu XMMWORD[96+rsi],xmm7
1191 vmovdqu XMMWORD[112+rsi],xmm8
1192
1193 jne NEAR $L$128_enc_msg_x8_loop1
1194
1195 add rsi,128
1196 add rdi,128
1197
1198 $L$128_enc_msg_x8_check_remainder:
1199 cmp r10,0
1200 je NEAR $L$128_enc_msg_x8_out
1201
1202 $L$128_enc_msg_x8_loop2:
1203
1204
1205 vmovdqa xmm1,xmm0
1206 vpaddd xmm0,xmm0,XMMWORD[one]
1207
1208 vpxor xmm1,xmm1,XMMWORD[rcx]
1209 vaesenc xmm1,xmm1,XMMWORD[16+rcx]
1210 vaesenc xmm1,xmm1,XMMWORD[32+rcx]
1211 vaesenc xmm1,xmm1,XMMWORD[48+rcx]
1212 vaesenc xmm1,xmm1,XMMWORD[64+rcx]
1213 vaesenc xmm1,xmm1,XMMWORD[80+rcx]
1214 vaesenc xmm1,xmm1,XMMWORD[96+rcx]
1215 vaesenc xmm1,xmm1,XMMWORD[112+rcx]
1216 vaesenc xmm1,xmm1,XMMWORD[128+rcx]
1217 vaesenc xmm1,xmm1,XMMWORD[144+rcx]
1218 vaesenclast xmm1,xmm1,XMMWORD[160+rcx]
1219
1220
1221 vpxor xmm1,xmm1,XMMWORD[rdi]
1222
1223 vmovdqu XMMWORD[rsi],xmm1
1224
1225 add rdi,16
1226 add rsi,16
1227
1228 dec r10
1229 jne NEAR $L$128_enc_msg_x8_loop2
1230
1231 $L$128_enc_msg_x8_out:
1232 mov rsp,rbp
1233
1234 pop rbp
1235
1236 pop r13
1237
1238 pop r12
1239
1240 mov rdi,QWORD[8+rsp] ;WIN64 epilogue
1241 mov rsi,QWORD[16+rsp]
1242 DB 0F3h,0C3h ;repret
1243
1244 $L$SEH_end_aes128gcmsiv_enc_msg_x8:
1245 global aes128gcmsiv_dec
1246
1247 ALIGN 16
1248 aes128gcmsiv_dec:
1249 mov QWORD[8+rsp],rdi ;WIN64 prologue
1250 mov QWORD[16+rsp],rsi
1251 mov rax,rsp
1252 $L$SEH_begin_aes128gcmsiv_dec:
1253 mov rdi,rcx
1254 mov rsi,rdx
1255 mov rdx,r8
1256 mov rcx,r9
1257 mov r8,QWORD[40+rsp]
1258 mov r9,QWORD[48+rsp]
1259
1260
1261
1262 test r9,~15
1263 jnz NEAR $L$128_dec_start
1264 mov rdi,QWORD[8+rsp] ;WIN64 epilogue
1265 mov rsi,QWORD[16+rsp]
1266 DB 0F3h,0C3h ;repret
1267
1268 $L$128_dec_start:
1269 vzeroupper
1270 vmovdqa xmm0,XMMWORD[rdx]
1271 mov rax,rdx
1272
1273 lea rax,[32+rax]
1274 lea rcx,[32+rcx]
1275
1276
1277 vmovdqu xmm15,XMMWORD[r9*1+rdi]
1278 vpor xmm15,xmm15,XMMWORD[OR_MASK]
1279 and r9,~15
1280
1281
1282 cmp r9,96
1283 jb NEAR $L$128_dec_loop2
1284
1285
1286 sub r9,96
1287 vmovdqa xmm7,xmm15
1288 vpaddd xmm8,xmm7,XMMWORD[one]
1289 vpaddd xmm9,xmm7,XMMWORD[two]
1290 vpaddd xmm10,xmm9,XMMWORD[one]
1291 vpaddd xmm11,xmm9,XMMWORD[two]
1292 vpaddd xmm12,xmm11,XMMWORD[one]
1293 vpaddd xmm15,xmm11,XMMWORD[two]
1294
1295 vpxor xmm7,xmm7,XMMWORD[r8]
1296 vpxor xmm8,xmm8,XMMWORD[r8]
1297 vpxor xmm9,xmm9,XMMWORD[r8]
1298 vpxor xmm10,xmm10,XMMWORD[r8]
1299 vpxor xmm11,xmm11,XMMWORD[r8]
1300 vpxor xmm12,xmm12,XMMWORD[r8]
1301
1302 vmovdqu xmm4,XMMWORD[16+r8]
1303 vaesenc xmm7,xmm7,xmm4
1304 vaesenc xmm8,xmm8,xmm4
1305 vaesenc xmm9,xmm9,xmm4
1306 vaesenc xmm10,xmm10,xmm4
1307 vaesenc xmm11,xmm11,xmm4
1308 vaesenc xmm12,xmm12,xmm4
1309
1310 vmovdqu xmm4,XMMWORD[32+r8]
1311 vaesenc xmm7,xmm7,xmm4
1312 vaesenc xmm8,xmm8,xmm4
1313 vaesenc xmm9,xmm9,xmm4
1314 vaesenc xmm10,xmm10,xmm4
1315 vaesenc xmm11,xmm11,xmm4
1316 vaesenc xmm12,xmm12,xmm4
1317
1318 vmovdqu xmm4,XMMWORD[48+r8]
1319 vaesenc xmm7,xmm7,xmm4
1320 vaesenc xmm8,xmm8,xmm4
1321 vaesenc xmm9,xmm9,xmm4
1322 vaesenc xmm10,xmm10,xmm4
1323 vaesenc xmm11,xmm11,xmm4
1324 vaesenc xmm12,xmm12,xmm4
1325
1326 vmovdqu xmm4,XMMWORD[64+r8]
1327 vaesenc xmm7,xmm7,xmm4
1328 vaesenc xmm8,xmm8,xmm4
1329 vaesenc xmm9,xmm9,xmm4
1330 vaesenc xmm10,xmm10,xmm4
1331 vaesenc xmm11,xmm11,xmm4
1332 vaesenc xmm12,xmm12,xmm4
1333
1334 vmovdqu xmm4,XMMWORD[80+r8]
1335 vaesenc xmm7,xmm7,xmm4
1336 vaesenc xmm8,xmm8,xmm4
1337 vaesenc xmm9,xmm9,xmm4
1338 vaesenc xmm10,xmm10,xmm4
1339 vaesenc xmm11,xmm11,xmm4
1340 vaesenc xmm12,xmm12,xmm4
1341
1342 vmovdqu xmm4,XMMWORD[96+r8]
1343 vaesenc xmm7,xmm7,xmm4
1344 vaesenc xmm8,xmm8,xmm4
1345 vaesenc xmm9,xmm9,xmm4
1346 vaesenc xmm10,xmm10,xmm4
1347 vaesenc xmm11,xmm11,xmm4
1348 vaesenc xmm12,xmm12,xmm4
1349
1350 vmovdqu xmm4,XMMWORD[112+r8]
1351 vaesenc xmm7,xmm7,xmm4
1352 vaesenc xmm8,xmm8,xmm4
1353 vaesenc xmm9,xmm9,xmm4
1354 vaesenc xmm10,xmm10,xmm4
1355 vaesenc xmm11,xmm11,xmm4
1356 vaesenc xmm12,xmm12,xmm4
1357
1358 vmovdqu xmm4,XMMWORD[128+r8]
1359 vaesenc xmm7,xmm7,xmm4
1360 vaesenc xmm8,xmm8,xmm4
1361 vaesenc xmm9,xmm9,xmm4
1362 vaesenc xmm10,xmm10,xmm4
1363 vaesenc xmm11,xmm11,xmm4
1364 vaesenc xmm12,xmm12,xmm4
1365
1366 vmovdqu xmm4,XMMWORD[144+r8]
1367 vaesenc xmm7,xmm7,xmm4
1368 vaesenc xmm8,xmm8,xmm4
1369 vaesenc xmm9,xmm9,xmm4
1370 vaesenc xmm10,xmm10,xmm4
1371 vaesenc xmm11,xmm11,xmm4
1372 vaesenc xmm12,xmm12,xmm4
1373
1374 vmovdqu xmm4,XMMWORD[160+r8]
1375 vaesenclast xmm7,xmm7,xmm4
1376 vaesenclast xmm8,xmm8,xmm4
1377 vaesenclast xmm9,xmm9,xmm4
1378 vaesenclast xmm10,xmm10,xmm4
1379 vaesenclast xmm11,xmm11,xmm4
1380 vaesenclast xmm12,xmm12,xmm4
1381
1382
1383 vpxor xmm7,xmm7,XMMWORD[rdi]
1384 vpxor xmm8,xmm8,XMMWORD[16+rdi]
1385 vpxor xmm9,xmm9,XMMWORD[32+rdi]
1386 vpxor xmm10,xmm10,XMMWORD[48+rdi]
1387 vpxor xmm11,xmm11,XMMWORD[64+rdi]
1388 vpxor xmm12,xmm12,XMMWORD[80+rdi]
1389
1390 vmovdqu XMMWORD[rsi],xmm7
1391 vmovdqu XMMWORD[16+rsi],xmm8
1392 vmovdqu XMMWORD[32+rsi],xmm9
1393 vmovdqu XMMWORD[48+rsi],xmm10
1394 vmovdqu XMMWORD[64+rsi],xmm11
1395 vmovdqu XMMWORD[80+rsi],xmm12
1396
1397 add rdi,96
1398 add rsi,96
1399 jmp NEAR $L$128_dec_loop1
1400
1401
1402 ALIGN 64
1403 $L$128_dec_loop1:
1404 cmp r9,96
1405 jb NEAR $L$128_dec_finish_96
1406 sub r9,96
1407
1408 vmovdqa xmm6,xmm12
1409 vmovdqa XMMWORD[(16-32)+rax],xmm11
1410 vmovdqa XMMWORD[(32-32)+rax],xmm10
1411 vmovdqa XMMWORD[(48-32)+rax],xmm9
1412 vmovdqa XMMWORD[(64-32)+rax],xmm8
1413 vmovdqa XMMWORD[(80-32)+rax],xmm7
1414
1415 vmovdqa xmm7,xmm15
1416 vpaddd xmm8,xmm7,XMMWORD[one]
1417 vpaddd xmm9,xmm7,XMMWORD[two]
1418 vpaddd xmm10,xmm9,XMMWORD[one]
1419 vpaddd xmm11,xmm9,XMMWORD[two]
1420 vpaddd xmm12,xmm11,XMMWORD[one]
1421 vpaddd xmm15,xmm11,XMMWORD[two]
1422
1423 vmovdqa xmm4,XMMWORD[r8]
1424 vpxor xmm7,xmm7,xmm4
1425 vpxor xmm8,xmm8,xmm4
1426 vpxor xmm9,xmm9,xmm4
1427 vpxor xmm10,xmm10,xmm4
1428 vpxor xmm11,xmm11,xmm4
1429 vpxor xmm12,xmm12,xmm4
1430
1431 vmovdqu xmm4,XMMWORD[((0-32))+rcx]
1432 vpclmulqdq xmm2,xmm6,xmm4,0x11
1433 vpclmulqdq xmm3,xmm6,xmm4,0x00
1434 vpclmulqdq xmm1,xmm6,xmm4,0x01
1435 vpclmulqdq xmm4,xmm6,xmm4,0x10
1436 vpxor xmm1,xmm1,xmm4
1437
1438 vmovdqu xmm4,XMMWORD[16+r8]
1439 vaesenc xmm7,xmm7,xmm4
1440 vaesenc xmm8,xmm8,xmm4
1441 vaesenc xmm9,xmm9,xmm4
1442 vaesenc xmm10,xmm10,xmm4
1443 vaesenc xmm11,xmm11,xmm4
1444 vaesenc xmm12,xmm12,xmm4
1445
1446 vmovdqu xmm6,XMMWORD[((-16))+rax]
1447 vmovdqu xmm13,XMMWORD[((-16))+rcx]
1448
1449 vpclmulqdq xmm4,xmm6,xmm13,0x10
1450 vpxor xmm1,xmm1,xmm4
1451 vpclmulqdq xmm4,xmm6,xmm13,0x11
1452 vpxor xmm2,xmm2,xmm4
1453 vpclmulqdq xmm4,xmm6,xmm13,0x00
1454 vpxor xmm3,xmm3,xmm4
1455 vpclmulqdq xmm4,xmm6,xmm13,0x01
1456 vpxor xmm1,xmm1,xmm4
1457
1458
1459 vmovdqu xmm4,XMMWORD[32+r8]
1460 vaesenc xmm7,xmm7,xmm4
1461 vaesenc xmm8,xmm8,xmm4
1462 vaesenc xmm9,xmm9,xmm4
1463 vaesenc xmm10,xmm10,xmm4
1464 vaesenc xmm11,xmm11,xmm4
1465 vaesenc xmm12,xmm12,xmm4
1466
1467 vmovdqu xmm6,XMMWORD[rax]
1468 vmovdqu xmm13,XMMWORD[rcx]
1469
1470 vpclmulqdq xmm4,xmm6,xmm13,0x10
1471 vpxor xmm1,xmm1,xmm4
1472 vpclmulqdq xmm4,xmm6,xmm13,0x11
1473 vpxor xmm2,xmm2,xmm4
1474 vpclmulqdq xmm4,xmm6,xmm13,0x00
1475 vpxor xmm3,xmm3,xmm4
1476 vpclmulqdq xmm4,xmm6,xmm13,0x01
1477 vpxor xmm1,xmm1,xmm4
1478
1479
1480 vmovdqu xmm4,XMMWORD[48+r8]
1481 vaesenc xmm7,xmm7,xmm4
1482 vaesenc xmm8,xmm8,xmm4
1483 vaesenc xmm9,xmm9,xmm4
1484 vaesenc xmm10,xmm10,xmm4
1485 vaesenc xmm11,xmm11,xmm4
1486 vaesenc xmm12,xmm12,xmm4
1487
1488 vmovdqu xmm6,XMMWORD[16+rax]
1489 vmovdqu xmm13,XMMWORD[16+rcx]
1490
1491 vpclmulqdq xmm4,xmm6,xmm13,0x10
1492 vpxor xmm1,xmm1,xmm4
1493 vpclmulqdq xmm4,xmm6,xmm13,0x11
1494 vpxor xmm2,xmm2,xmm4
1495 vpclmulqdq xmm4,xmm6,xmm13,0x00
1496 vpxor xmm3,xmm3,xmm4
1497 vpclmulqdq xmm4,xmm6,xmm13,0x01
1498 vpxor xmm1,xmm1,xmm4
1499
1500
1501 vmovdqu xmm4,XMMWORD[64+r8]
1502 vaesenc xmm7,xmm7,xmm4
1503 vaesenc xmm8,xmm8,xmm4
1504 vaesenc xmm9,xmm9,xmm4
1505 vaesenc xmm10,xmm10,xmm4
1506 vaesenc xmm11,xmm11,xmm4
1507 vaesenc xmm12,xmm12,xmm4
1508
1509 vmovdqu xmm6,XMMWORD[32+rax]
1510 vmovdqu xmm13,XMMWORD[32+rcx]
1511
1512 vpclmulqdq xmm4,xmm6,xmm13,0x10
1513 vpxor xmm1,xmm1,xmm4
1514 vpclmulqdq xmm4,xmm6,xmm13,0x11
1515 vpxor xmm2,xmm2,xmm4
1516 vpclmulqdq xmm4,xmm6,xmm13,0x00
1517 vpxor xmm3,xmm3,xmm4
1518 vpclmulqdq xmm4,xmm6,xmm13,0x01
1519 vpxor xmm1,xmm1,xmm4
1520
1521
1522 vmovdqu xmm4,XMMWORD[80+r8]
1523 vaesenc xmm7,xmm7,xmm4
1524 vaesenc xmm8,xmm8,xmm4
1525 vaesenc xmm9,xmm9,xmm4
1526 vaesenc xmm10,xmm10,xmm4
1527 vaesenc xmm11,xmm11,xmm4
1528 vaesenc xmm12,xmm12,xmm4
1529
1530 vmovdqu xmm4,XMMWORD[96+r8]
1531 vaesenc xmm7,xmm7,xmm4
1532 vaesenc xmm8,xmm8,xmm4
1533 vaesenc xmm9,xmm9,xmm4
1534 vaesenc xmm10,xmm10,xmm4
1535 vaesenc xmm11,xmm11,xmm4
1536 vaesenc xmm12,xmm12,xmm4
1537
1538 vmovdqu xmm4,XMMWORD[112+r8]
1539 vaesenc xmm7,xmm7,xmm4
1540 vaesenc xmm8,xmm8,xmm4
1541 vaesenc xmm9,xmm9,xmm4
1542 vaesenc xmm10,xmm10,xmm4
1543 vaesenc xmm11,xmm11,xmm4
1544 vaesenc xmm12,xmm12,xmm4
1545
1546
1547 vmovdqa xmm6,XMMWORD[((80-32))+rax]
1548 vpxor xmm6,xmm6,xmm0
1549 vmovdqu xmm5,XMMWORD[((80-32))+rcx]
1550
1551 vpclmulqdq xmm4,xmm6,xmm5,0x01
1552 vpxor xmm1,xmm1,xmm4
1553 vpclmulqdq xmm4,xmm6,xmm5,0x11
1554 vpxor xmm2,xmm2,xmm4
1555 vpclmulqdq xmm4,xmm6,xmm5,0x00
1556 vpxor xmm3,xmm3,xmm4
1557 vpclmulqdq xmm4,xmm6,xmm5,0x10
1558 vpxor xmm1,xmm1,xmm4
1559
1560 vmovdqu xmm4,XMMWORD[128+r8]
1561 vaesenc xmm7,xmm7,xmm4
1562 vaesenc xmm8,xmm8,xmm4
1563 vaesenc xmm9,xmm9,xmm4
1564 vaesenc xmm10,xmm10,xmm4
1565 vaesenc xmm11,xmm11,xmm4
1566 vaesenc xmm12,xmm12,xmm4
1567
1568
1569 vpsrldq xmm4,xmm1,8
1570 vpxor xmm5,xmm2,xmm4
1571 vpslldq xmm4,xmm1,8
1572 vpxor xmm0,xmm3,xmm4
1573
1574 vmovdqa xmm3,XMMWORD[poly]
1575
1576 vmovdqu xmm4,XMMWORD[144+r8]
1577 vaesenc xmm7,xmm7,xmm4
1578 vaesenc xmm8,xmm8,xmm4
1579 vaesenc xmm9,xmm9,xmm4
1580 vaesenc xmm10,xmm10,xmm4
1581 vaesenc xmm11,xmm11,xmm4
1582 vaesenc xmm12,xmm12,xmm4
1583
1584 vmovdqu xmm6,XMMWORD[160+r8]
1585 vpalignr xmm2,xmm0,xmm0,8
1586 vpclmulqdq xmm0,xmm0,xmm3,0x10
1587 vpxor xmm0,xmm2,xmm0
1588
1589 vpxor xmm4,xmm6,XMMWORD[rdi]
1590 vaesenclast xmm7,xmm7,xmm4
1591 vpxor xmm4,xmm6,XMMWORD[16+rdi]
1592 vaesenclast xmm8,xmm8,xmm4
1593 vpxor xmm4,xmm6,XMMWORD[32+rdi]
1594 vaesenclast xmm9,xmm9,xmm4
1595 vpxor xmm4,xmm6,XMMWORD[48+rdi]
1596 vaesenclast xmm10,xmm10,xmm4
1597 vpxor xmm4,xmm6,XMMWORD[64+rdi]
1598 vaesenclast xmm11,xmm11,xmm4
1599 vpxor xmm4,xmm6,XMMWORD[80+rdi]
1600 vaesenclast xmm12,xmm12,xmm4
1601
1602 vpalignr xmm2,xmm0,xmm0,8
1603 vpclmulqdq xmm0,xmm0,xmm3,0x10
1604 vpxor xmm0,xmm2,xmm0
1605
1606 vmovdqu XMMWORD[rsi],xmm7
1607 vmovdqu XMMWORD[16+rsi],xmm8
1608 vmovdqu XMMWORD[32+rsi],xmm9
1609 vmovdqu XMMWORD[48+rsi],xmm10
1610 vmovdqu XMMWORD[64+rsi],xmm11
1611 vmovdqu XMMWORD[80+rsi],xmm12
1612
1613 vpxor xmm0,xmm0,xmm5
1614
1615 lea rdi,[96+rdi]
1616 lea rsi,[96+rsi]
1617 jmp NEAR $L$128_dec_loop1
1618
1619 $L$128_dec_finish_96:
1620 vmovdqa xmm6,xmm12
1621 vmovdqa XMMWORD[(16-32)+rax],xmm11
1622 vmovdqa XMMWORD[(32-32)+rax],xmm10
1623 vmovdqa XMMWORD[(48-32)+rax],xmm9
1624 vmovdqa XMMWORD[(64-32)+rax],xmm8
1625 vmovdqa XMMWORD[(80-32)+rax],xmm7
1626
1627 vmovdqu xmm4,XMMWORD[((0-32))+rcx]
1628 vpclmulqdq xmm1,xmm6,xmm4,0x10
1629 vpclmulqdq xmm2,xmm6,xmm4,0x11
1630 vpclmulqdq xmm3,xmm6,xmm4,0x00
1631 vpclmulqdq xmm4,xmm6,xmm4,0x01
1632 vpxor xmm1,xmm1,xmm4
1633
1634 vmovdqu xmm6,XMMWORD[((-16))+rax]
1635 vmovdqu xmm13,XMMWORD[((-16))+rcx]
1636
1637 vpclmulqdq xmm4,xmm6,xmm13,0x10
1638 vpxor xmm1,xmm1,xmm4
1639 vpclmulqdq xmm4,xmm6,xmm13,0x11
1640 vpxor xmm2,xmm2,xmm4
1641 vpclmulqdq xmm4,xmm6,xmm13,0x00
1642 vpxor xmm3,xmm3,xmm4
1643 vpclmulqdq xmm4,xmm6,xmm13,0x01
1644 vpxor xmm1,xmm1,xmm4
1645
1646 vmovdqu xmm6,XMMWORD[rax]
1647 vmovdqu xmm13,XMMWORD[rcx]
1648
1649 vpclmulqdq xmm4,xmm6,xmm13,0x10
1650 vpxor xmm1,xmm1,xmm4
1651 vpclmulqdq xmm4,xmm6,xmm13,0x11
1652 vpxor xmm2,xmm2,xmm4
1653 vpclmulqdq xmm4,xmm6,xmm13,0x00
1654 vpxor xmm3,xmm3,xmm4
1655 vpclmulqdq xmm4,xmm6,xmm13,0x01
1656 vpxor xmm1,xmm1,xmm4
1657
1658 vmovdqu xmm6,XMMWORD[16+rax]
1659 vmovdqu xmm13,XMMWORD[16+rcx]
1660
1661 vpclmulqdq xmm4,xmm6,xmm13,0x10
1662 vpxor xmm1,xmm1,xmm4
1663 vpclmulqdq xmm4,xmm6,xmm13,0x11
1664 vpxor xmm2,xmm2,xmm4
1665 vpclmulqdq xmm4,xmm6,xmm13,0x00
1666 vpxor xmm3,xmm3,xmm4
1667 vpclmulqdq xmm4,xmm6,xmm13,0x01
1668 vpxor xmm1,xmm1,xmm4
1669
1670 vmovdqu xmm6,XMMWORD[32+rax]
1671 vmovdqu xmm13,XMMWORD[32+rcx]
1672
1673 vpclmulqdq xmm4,xmm6,xmm13,0x10
1674 vpxor xmm1,xmm1,xmm4
1675 vpclmulqdq xmm4,xmm6,xmm13,0x11
1676 vpxor xmm2,xmm2,xmm4
1677 vpclmulqdq xmm4,xmm6,xmm13,0x00
1678 vpxor xmm3,xmm3,xmm4
1679 vpclmulqdq xmm4,xmm6,xmm13,0x01
1680 vpxor xmm1,xmm1,xmm4
1681
1682
1683 vmovdqu xmm6,XMMWORD[((80-32))+rax]
1684 vpxor xmm6,xmm6,xmm0
1685 vmovdqu xmm5,XMMWORD[((80-32))+rcx]
1686 vpclmulqdq xmm4,xmm6,xmm5,0x11
1687 vpxor xmm2,xmm2,xmm4
1688 vpclmulqdq xmm4,xmm6,xmm5,0x00
1689 vpxor xmm3,xmm3,xmm4
1690 vpclmulqdq xmm4,xmm6,xmm5,0x10
1691 vpxor xmm1,xmm1,xmm4
1692 vpclmulqdq xmm4,xmm6,xmm5,0x01
1693 vpxor xmm1,xmm1,xmm4
1694
1695 vpsrldq xmm4,xmm1,8
1696 vpxor xmm5,xmm2,xmm4
1697 vpslldq xmm4,xmm1,8
1698 vpxor xmm0,xmm3,xmm4
1699
1700 vmovdqa xmm3,XMMWORD[poly]
1701
1702 vpalignr xmm2,xmm0,xmm0,8
1703 vpclmulqdq xmm0,xmm0,xmm3,0x10
1704 vpxor xmm0,xmm2,xmm0
1705
1706 vpalignr xmm2,xmm0,xmm0,8
1707 vpclmulqdq xmm0,xmm0,xmm3,0x10
1708 vpxor xmm0,xmm2,xmm0
1709
1710 vpxor xmm0,xmm0,xmm5
1711
1712 $L$128_dec_loop2:
1713
1714
1715
1716 cmp r9,16
1717 jb NEAR $L$128_dec_out
1718 sub r9,16
1719
1720 vmovdqa xmm2,xmm15
1721 vpaddd xmm15,xmm15,XMMWORD[one]
1722
1723 vpxor xmm2,xmm2,XMMWORD[r8]
1724 vaesenc xmm2,xmm2,XMMWORD[16+r8]
1725 vaesenc xmm2,xmm2,XMMWORD[32+r8]
1726 vaesenc xmm2,xmm2,XMMWORD[48+r8]
1727 vaesenc xmm2,xmm2,XMMWORD[64+r8]
1728 vaesenc xmm2,xmm2,XMMWORD[80+r8]
1729 vaesenc xmm2,xmm2,XMMWORD[96+r8]
1730 vaesenc xmm2,xmm2,XMMWORD[112+r8]
1731 vaesenc xmm2,xmm2,XMMWORD[128+r8]
1732 vaesenc xmm2,xmm2,XMMWORD[144+r8]
1733 vaesenclast xmm2,xmm2,XMMWORD[160+r8]
1734 vpxor xmm2,xmm2,XMMWORD[rdi]
1735 vmovdqu XMMWORD[rsi],xmm2
1736 add rdi,16
1737 add rsi,16
1738
1739 vpxor xmm0,xmm0,xmm2
1740 vmovdqa xmm1,XMMWORD[((-32))+rcx]
1741 call GFMUL
1742
1743 jmp NEAR $L$128_dec_loop2
1744
1745 $L$128_dec_out:
1746 vmovdqu XMMWORD[rdx],xmm0
1747 mov rdi,QWORD[8+rsp] ;WIN64 epilogue
1748 mov rsi,QWORD[16+rsp]
1749 DB 0F3h,0C3h ;repret
1750
1751 $L$SEH_end_aes128gcmsiv_dec:
1752 global aes128gcmsiv_ecb_enc_block
1753
1754 ALIGN 16
1755 aes128gcmsiv_ecb_enc_block:
1756 mov QWORD[8+rsp],rdi ;WIN64 prologue
1757 mov QWORD[16+rsp],rsi
1758 mov rax,rsp
1759 $L$SEH_begin_aes128gcmsiv_ecb_enc_block:
1760 mov rdi,rcx
1761 mov rsi,rdx
1762 mov rdx,r8
1763
1764
1765
1766 vmovdqa xmm1,XMMWORD[rdi]
1767
1768 vpxor xmm1,xmm1,XMMWORD[rdx]
1769 vaesenc xmm1,xmm1,XMMWORD[16+rdx]
1770 vaesenc xmm1,xmm1,XMMWORD[32+rdx]
1771 vaesenc xmm1,xmm1,XMMWORD[48+rdx]
1772 vaesenc xmm1,xmm1,XMMWORD[64+rdx]
1773 vaesenc xmm1,xmm1,XMMWORD[80+rdx]
1774 vaesenc xmm1,xmm1,XMMWORD[96+rdx]
1775 vaesenc xmm1,xmm1,XMMWORD[112+rdx]
1776 vaesenc xmm1,xmm1,XMMWORD[128+rdx]
1777 vaesenc xmm1,xmm1,XMMWORD[144+rdx]
1778 vaesenclast xmm1,xmm1,XMMWORD[160+rdx]
1779
1780 vmovdqa XMMWORD[rsi],xmm1
1781
1782 mov rdi,QWORD[8+rsp] ;WIN64 epilogue
1783 mov rsi,QWORD[16+rsp]
1784 DB 0F3h,0C3h ;repret
1785
1786 $L$SEH_end_aes128gcmsiv_ecb_enc_block:
1787 global aes256gcmsiv_aes_ks_enc_x1
1788
1789 ALIGN 16
1790 aes256gcmsiv_aes_ks_enc_x1:
1791 mov QWORD[8+rsp],rdi ;WIN64 prologue
1792 mov QWORD[16+rsp],rsi
1793 mov rax,rsp
1794 $L$SEH_begin_aes256gcmsiv_aes_ks_enc_x1:
1795 mov rdi,rcx
1796 mov rsi,rdx
1797 mov rdx,r8
1798 mov rcx,r9
1799
1800
1801
1802 vmovdqa xmm0,XMMWORD[con1]
1803 vmovdqa xmm15,XMMWORD[mask]
1804 vmovdqa xmm8,XMMWORD[rdi]
1805 vmovdqa xmm1,XMMWORD[rcx]
1806 vmovdqa xmm3,XMMWORD[16+rcx]
1807 vpxor xmm8,xmm8,xmm1
1808 vaesenc xmm8,xmm8,xmm3
1809 vmovdqu XMMWORD[rdx],xmm1
1810 vmovdqu XMMWORD[16+rdx],xmm3
1811 vpxor xmm14,xmm14,xmm14
1812
1813 vpshufb xmm2,xmm3,xmm15
1814 vaesenclast xmm2,xmm2,xmm0
1815 vpslld xmm0,xmm0,1
1816 vpslldq xmm4,xmm1,4
1817 vpxor xmm1,xmm1,xmm4
1818 vpslldq xmm4,xmm4,4
1819 vpxor xmm1,xmm1,xmm4
1820 vpslldq xmm4,xmm4,4
1821 vpxor xmm1,xmm1,xmm4
1822 vpxor xmm1,xmm1,xmm2
1823 vaesenc xmm8,xmm8,xmm1
1824 vmovdqu XMMWORD[32+rdx],xmm1
1825
1826 vpshufd xmm2,xmm1,0xff
1827 vaesenclast xmm2,xmm2,xmm14
1828 vpslldq xmm4,xmm3,4
1829 vpxor xmm3,xmm3,xmm4
1830 vpslldq xmm4,xmm4,4
1831 vpxor xmm3,xmm3,xmm4
1832 vpslldq xmm4,xmm4,4
1833 vpxor xmm3,xmm3,xmm4
1834 vpxor xmm3,xmm3,xmm2
1835 vaesenc xmm8,xmm8,xmm3
1836 vmovdqu XMMWORD[48+rdx],xmm3
1837
1838 vpshufb xmm2,xmm3,xmm15
1839 vaesenclast xmm2,xmm2,xmm0
1840 vpslld xmm0,xmm0,1
1841 vpslldq xmm4,xmm1,4
1842 vpxor xmm1,xmm1,xmm4
1843 vpslldq xmm4,xmm4,4
1844 vpxor xmm1,xmm1,xmm4
1845 vpslldq xmm4,xmm4,4
1846 vpxor xmm1,xmm1,xmm4
1847 vpxor xmm1,xmm1,xmm2
1848 vaesenc xmm8,xmm8,xmm1
1849 vmovdqu XMMWORD[64+rdx],xmm1
1850
1851 vpshufd xmm2,xmm1,0xff
1852 vaesenclast xmm2,xmm2,xmm14
1853 vpslldq xmm4,xmm3,4
1854 vpxor xmm3,xmm3,xmm4
1855 vpslldq xmm4,xmm4,4
1856 vpxor xmm3,xmm3,xmm4
1857 vpslldq xmm4,xmm4,4
1858 vpxor xmm3,xmm3,xmm4
1859 vpxor xmm3,xmm3,xmm2
1860 vaesenc xmm8,xmm8,xmm3
1861 vmovdqu XMMWORD[80+rdx],xmm3
1862
1863 vpshufb xmm2,xmm3,xmm15
1864 vaesenclast xmm2,xmm2,xmm0
1865 vpslld xmm0,xmm0,1
1866 vpslldq xmm4,xmm1,4
1867 vpxor xmm1,xmm1,xmm4
1868 vpslldq xmm4,xmm4,4
1869 vpxor xmm1,xmm1,xmm4
1870 vpslldq xmm4,xmm4,4
1871 vpxor xmm1,xmm1,xmm4
1872 vpxor xmm1,xmm1,xmm2
1873 vaesenc xmm8,xmm8,xmm1
1874 vmovdqu XMMWORD[96+rdx],xmm1
1875
1876 vpshufd xmm2,xmm1,0xff
1877 vaesenclast xmm2,xmm2,xmm14
1878 vpslldq xmm4,xmm3,4
1879 vpxor xmm3,xmm3,xmm4
1880 vpslldq xmm4,xmm4,4
1881 vpxor xmm3,xmm3,xmm4
1882 vpslldq xmm4,xmm4,4
1883 vpxor xmm3,xmm3,xmm4
1884 vpxor xmm3,xmm3,xmm2
1885 vaesenc xmm8,xmm8,xmm3
1886 vmovdqu XMMWORD[112+rdx],xmm3
1887
1888 vpshufb xmm2,xmm3,xmm15
1889 vaesenclast xmm2,xmm2,xmm0
1890 vpslld xmm0,xmm0,1
1891 vpslldq xmm4,xmm1,4
1892 vpxor xmm1,xmm1,xmm4
1893 vpslldq xmm4,xmm4,4
1894 vpxor xmm1,xmm1,xmm4
1895 vpslldq xmm4,xmm4,4
1896 vpxor xmm1,xmm1,xmm4
1897 vpxor xmm1,xmm1,xmm2
1898 vaesenc xmm8,xmm8,xmm1
1899 vmovdqu XMMWORD[128+rdx],xmm1
1900
1901 vpshufd xmm2,xmm1,0xff
1902 vaesenclast xmm2,xmm2,xmm14
1903 vpslldq xmm4,xmm3,4
1904 vpxor xmm3,xmm3,xmm4
1905 vpslldq xmm4,xmm4,4
1906 vpxor xmm3,xmm3,xmm4
1907 vpslldq xmm4,xmm4,4
1908 vpxor xmm3,xmm3,xmm4
1909 vpxor xmm3,xmm3,xmm2
1910 vaesenc xmm8,xmm8,xmm3
1911 vmovdqu XMMWORD[144+rdx],xmm3
1912
1913 vpshufb xmm2,xmm3,xmm15
1914 vaesenclast xmm2,xmm2,xmm0
1915 vpslld xmm0,xmm0,1
1916 vpslldq xmm4,xmm1,4
1917 vpxor xmm1,xmm1,xmm4
1918 vpslldq xmm4,xmm4,4
1919 vpxor xmm1,xmm1,xmm4
1920 vpslldq xmm4,xmm4,4
1921 vpxor xmm1,xmm1,xmm4
1922 vpxor xmm1,xmm1,xmm2
1923 vaesenc xmm8,xmm8,xmm1
1924 vmovdqu XMMWORD[160+rdx],xmm1
1925
1926 vpshufd xmm2,xmm1,0xff
1927 vaesenclast xmm2,xmm2,xmm14
1928 vpslldq xmm4,xmm3,4
1929 vpxor xmm3,xmm3,xmm4
1930 vpslldq xmm4,xmm4,4
1931 vpxor xmm3,xmm3,xmm4
1932 vpslldq xmm4,xmm4,4
1933 vpxor xmm3,xmm3,xmm4
1934 vpxor xmm3,xmm3,xmm2
1935 vaesenc xmm8,xmm8,xmm3
1936 vmovdqu XMMWORD[176+rdx],xmm3
1937
1938 vpshufb xmm2,xmm3,xmm15
1939 vaesenclast xmm2,xmm2,xmm0
1940 vpslld xmm0,xmm0,1
1941 vpslldq xmm4,xmm1,4
1942 vpxor xmm1,xmm1,xmm4
1943 vpslldq xmm4,xmm4,4
1944 vpxor xmm1,xmm1,xmm4
1945 vpslldq xmm4,xmm4,4
1946 vpxor xmm1,xmm1,xmm4
1947 vpxor xmm1,xmm1,xmm2
1948 vaesenc xmm8,xmm8,xmm1
1949 vmovdqu XMMWORD[192+rdx],xmm1
1950
1951 vpshufd xmm2,xmm1,0xff
1952 vaesenclast xmm2,xmm2,xmm14
1953 vpslldq xmm4,xmm3,4
1954 vpxor xmm3,xmm3,xmm4
1955 vpslldq xmm4,xmm4,4
1956 vpxor xmm3,xmm3,xmm4
1957 vpslldq xmm4,xmm4,4
1958 vpxor xmm3,xmm3,xmm4
1959 vpxor xmm3,xmm3,xmm2
1960 vaesenc xmm8,xmm8,xmm3
1961 vmovdqu XMMWORD[208+rdx],xmm3
1962
1963 vpshufb xmm2,xmm3,xmm15
1964 vaesenclast xmm2,xmm2,xmm0
1965 vpslldq xmm4,xmm1,4
1966 vpxor xmm1,xmm1,xmm4
1967 vpslldq xmm4,xmm4,4
1968 vpxor xmm1,xmm1,xmm4
1969 vpslldq xmm4,xmm4,4
1970 vpxor xmm1,xmm1,xmm4
1971 vpxor xmm1,xmm1,xmm2
1972 vaesenclast xmm8,xmm8,xmm1
1973 vmovdqu XMMWORD[224+rdx],xmm1
1974
1975 vmovdqa XMMWORD[rsi],xmm8
1976 mov rdi,QWORD[8+rsp] ;WIN64 epilogue
1977 mov rsi,QWORD[16+rsp]
1978 DB 0F3h,0C3h ;repret
1979
1980 $L$SEH_end_aes256gcmsiv_aes_ks_enc_x1:
1981 global aes256gcmsiv_ecb_enc_block
1982
1983 ALIGN 16
1984 aes256gcmsiv_ecb_enc_block:
1985 mov QWORD[8+rsp],rdi ;WIN64 prologue
1986 mov QWORD[16+rsp],rsi
1987 mov rax,rsp
1988 $L$SEH_begin_aes256gcmsiv_ecb_enc_block:
1989 mov rdi,rcx
1990 mov rsi,rdx
1991 mov rdx,r8
1992
1993
1994
1995 vmovdqa xmm1,XMMWORD[rdi]
1996 vpxor xmm1,xmm1,XMMWORD[rdx]
1997 vaesenc xmm1,xmm1,XMMWORD[16+rdx]
1998 vaesenc xmm1,xmm1,XMMWORD[32+rdx]
1999 vaesenc xmm1,xmm1,XMMWORD[48+rdx]
2000 vaesenc xmm1,xmm1,XMMWORD[64+rdx]
2001 vaesenc xmm1,xmm1,XMMWORD[80+rdx]
2002 vaesenc xmm1,xmm1,XMMWORD[96+rdx]
2003 vaesenc xmm1,xmm1,XMMWORD[112+rdx]
2004 vaesenc xmm1,xmm1,XMMWORD[128+rdx]
2005 vaesenc xmm1,xmm1,XMMWORD[144+rdx]
2006 vaesenc xmm1,xmm1,XMMWORD[160+rdx]
2007 vaesenc xmm1,xmm1,XMMWORD[176+rdx]
2008 vaesenc xmm1,xmm1,XMMWORD[192+rdx]
2009 vaesenc xmm1,xmm1,XMMWORD[208+rdx]
2010 vaesenclast xmm1,xmm1,XMMWORD[224+rdx]
2011 vmovdqa XMMWORD[rsi],xmm1
2012 mov rdi,QWORD[8+rsp] ;WIN64 epilogue
2013 mov rsi,QWORD[16+rsp]
2014 DB 0F3h,0C3h ;repret
2015
2016 $L$SEH_end_aes256gcmsiv_ecb_enc_block:
2017 global aes256gcmsiv_enc_msg_x4
2018
2019 ALIGN 16
2020 aes256gcmsiv_enc_msg_x4:
2021 mov QWORD[8+rsp],rdi ;WIN64 prologue
2022 mov QWORD[16+rsp],rsi
2023 mov rax,rsp
2024 $L$SEH_begin_aes256gcmsiv_enc_msg_x4:
2025 mov rdi,rcx
2026 mov rsi,rdx
2027 mov rdx,r8
2028 mov rcx,r9
2029 mov r8,QWORD[40+rsp]
2030
2031
2032
2033 test r8,r8
2034 jnz NEAR $L$256_enc_msg_x4_start
2035 mov rdi,QWORD[8+rsp] ;WIN64 epilogue
2036 mov rsi,QWORD[16+rsp]
2037 DB 0F3h,0C3h ;repret
2038
2039 $L$256_enc_msg_x4_start:
2040 mov r10,r8
2041 shr r8,4
2042 shl r10,60
2043 jz NEAR $L$256_enc_msg_x4_start2
2044 add r8,1
2045
2046 $L$256_enc_msg_x4_start2:
2047 mov r10,r8
2048 shl r10,62
2049 shr r10,62
2050
2051
2052 vmovdqa xmm15,XMMWORD[rdx]
2053 vpor xmm15,xmm15,XMMWORD[OR_MASK]
2054
2055 vmovdqa xmm4,XMMWORD[four]
2056 vmovdqa xmm0,xmm15
2057 vpaddd xmm1,xmm15,XMMWORD[one]
2058 vpaddd xmm2,xmm15,XMMWORD[two]
2059 vpaddd xmm3,xmm15,XMMWORD[three]
2060
2061 shr r8,2
2062 je NEAR $L$256_enc_msg_x4_check_remainder
2063
2064 sub rsi,64
2065 sub rdi,64
2066
2067 $L$256_enc_msg_x4_loop1:
2068 add rsi,64
2069 add rdi,64
2070
2071 vmovdqa xmm5,xmm0
2072 vmovdqa xmm6,xmm1
2073 vmovdqa xmm7,xmm2
2074 vmovdqa xmm8,xmm3
2075
2076 vpxor xmm5,xmm5,XMMWORD[rcx]
2077 vpxor xmm6,xmm6,XMMWORD[rcx]
2078 vpxor xmm7,xmm7,XMMWORD[rcx]
2079 vpxor xmm8,xmm8,XMMWORD[rcx]
2080
2081 vmovdqu xmm12,XMMWORD[16+rcx]
2082 vaesenc xmm5,xmm5,xmm12
2083 vaesenc xmm6,xmm6,xmm12
2084 vaesenc xmm7,xmm7,xmm12
2085 vaesenc xmm8,xmm8,xmm12
2086
2087 vpaddd xmm0,xmm0,xmm4
2088 vmovdqu xmm12,XMMWORD[32+rcx]
2089 vaesenc xmm5,xmm5,xmm12
2090 vaesenc xmm6,xmm6,xmm12
2091 vaesenc xmm7,xmm7,xmm12
2092 vaesenc xmm8,xmm8,xmm12
2093
2094 vpaddd xmm1,xmm1,xmm4
2095 vmovdqu xmm12,XMMWORD[48+rcx]
2096 vaesenc xmm5,xmm5,xmm12
2097 vaesenc xmm6,xmm6,xmm12
2098 vaesenc xmm7,xmm7,xmm12
2099 vaesenc xmm8,xmm8,xmm12
2100
2101 vpaddd xmm2,xmm2,xmm4
2102 vmovdqu xmm12,XMMWORD[64+rcx]
2103 vaesenc xmm5,xmm5,xmm12
2104 vaesenc xmm6,xmm6,xmm12
2105 vaesenc xmm7,xmm7,xmm12
2106 vaesenc xmm8,xmm8,xmm12
2107
2108 vpaddd xmm3,xmm3,xmm4
2109
2110 vmovdqu xmm12,XMMWORD[80+rcx]
2111 vaesenc xmm5,xmm5,xmm12
2112 vaesenc xmm6,xmm6,xmm12
2113 vaesenc xmm7,xmm7,xmm12
2114 vaesenc xmm8,xmm8,xmm12
2115
2116 vmovdqu xmm12,XMMWORD[96+rcx]
2117 vaesenc xmm5,xmm5,xmm12
2118 vaesenc xmm6,xmm6,xmm12
2119 vaesenc xmm7,xmm7,xmm12
2120 vaesenc xmm8,xmm8,xmm12
2121
2122 vmovdqu xmm12,XMMWORD[112+rcx]
2123 vaesenc xmm5,xmm5,xmm12
2124 vaesenc xmm6,xmm6,xmm12
2125 vaesenc xmm7,xmm7,xmm12
2126 vaesenc xmm8,xmm8,xmm12
2127
2128 vmovdqu xmm12,XMMWORD[128+rcx]
2129 vaesenc xmm5,xmm5,xmm12
2130 vaesenc xmm6,xmm6,xmm12
2131 vaesenc xmm7,xmm7,xmm12
2132 vaesenc xmm8,xmm8,xmm12
2133
2134 vmovdqu xmm12,XMMWORD[144+rcx]
2135 vaesenc xmm5,xmm5,xmm12
2136 vaesenc xmm6,xmm6,xmm12
2137 vaesenc xmm7,xmm7,xmm12
2138 vaesenc xmm8,xmm8,xmm12
2139
2140 vmovdqu xmm12,XMMWORD[160+rcx]
2141 vaesenc xmm5,xmm5,xmm12
2142 vaesenc xmm6,xmm6,xmm12
2143 vaesenc xmm7,xmm7,xmm12
2144 vaesenc xmm8,xmm8,xmm12
2145
2146 vmovdqu xmm12,XMMWORD[176+rcx]
2147 vaesenc xmm5,xmm5,xmm12
2148 vaesenc xmm6,xmm6,xmm12
2149 vaesenc xmm7,xmm7,xmm12
2150 vaesenc xmm8,xmm8,xmm12
2151
2152 vmovdqu xmm12,XMMWORD[192+rcx]
2153 vaesenc xmm5,xmm5,xmm12
2154 vaesenc xmm6,xmm6,xmm12
2155 vaesenc xmm7,xmm7,xmm12
2156 vaesenc xmm8,xmm8,xmm12
2157
2158 vmovdqu xmm12,XMMWORD[208+rcx]
2159 vaesenc xmm5,xmm5,xmm12
2160 vaesenc xmm6,xmm6,xmm12
2161 vaesenc xmm7,xmm7,xmm12
2162 vaesenc xmm8,xmm8,xmm12
2163
2164 vmovdqu xmm12,XMMWORD[224+rcx]
2165 vaesenclast xmm5,xmm5,xmm12
2166 vaesenclast xmm6,xmm6,xmm12
2167 vaesenclast xmm7,xmm7,xmm12
2168 vaesenclast xmm8,xmm8,xmm12
2169
2170
2171
2172 vpxor xmm5,xmm5,XMMWORD[rdi]
2173 vpxor xmm6,xmm6,XMMWORD[16+rdi]
2174 vpxor xmm7,xmm7,XMMWORD[32+rdi]
2175 vpxor xmm8,xmm8,XMMWORD[48+rdi]
2176
2177 sub r8,1
2178
2179 vmovdqu XMMWORD[rsi],xmm5
2180 vmovdqu XMMWORD[16+rsi],xmm6
2181 vmovdqu XMMWORD[32+rsi],xmm7
2182 vmovdqu XMMWORD[48+rsi],xmm8
2183
2184 jne NEAR $L$256_enc_msg_x4_loop1
2185
2186 add rsi,64
2187 add rdi,64
2188
2189 $L$256_enc_msg_x4_check_remainder:
2190 cmp r10,0
2191 je NEAR $L$256_enc_msg_x4_out
2192
2193 $L$256_enc_msg_x4_loop2:
2194
2195
2196
2197 vmovdqa xmm5,xmm0
2198 vpaddd xmm0,xmm0,XMMWORD[one]
2199 vpxor xmm5,xmm5,XMMWORD[rcx]
2200 vaesenc xmm5,xmm5,XMMWORD[16+rcx]
2201 vaesenc xmm5,xmm5,XMMWORD[32+rcx]
2202 vaesenc xmm5,xmm5,XMMWORD[48+rcx]
2203 vaesenc xmm5,xmm5,XMMWORD[64+rcx]
2204 vaesenc xmm5,xmm5,XMMWORD[80+rcx]
2205 vaesenc xmm5,xmm5,XMMWORD[96+rcx]
2206 vaesenc xmm5,xmm5,XMMWORD[112+rcx]
2207 vaesenc xmm5,xmm5,XMMWORD[128+rcx]
2208 vaesenc xmm5,xmm5,XMMWORD[144+rcx]
2209 vaesenc xmm5,xmm5,XMMWORD[160+rcx]
2210 vaesenc xmm5,xmm5,XMMWORD[176+rcx]
2211 vaesenc xmm5,xmm5,XMMWORD[192+rcx]
2212 vaesenc xmm5,xmm5,XMMWORD[208+rcx]
2213 vaesenclast xmm5,xmm5,XMMWORD[224+rcx]
2214
2215
2216 vpxor xmm5,xmm5,XMMWORD[rdi]
2217
2218 vmovdqu XMMWORD[rsi],xmm5
2219
2220 add rdi,16
2221 add rsi,16
2222
2223 sub r10,1
2224 jne NEAR $L$256_enc_msg_x4_loop2
2225
2226 $L$256_enc_msg_x4_out:
2227 mov rdi,QWORD[8+rsp] ;WIN64 epilogue
2228 mov rsi,QWORD[16+rsp]
2229 DB 0F3h,0C3h ;repret
2230
2231 $L$SEH_end_aes256gcmsiv_enc_msg_x4:
2232 global aes256gcmsiv_enc_msg_x8
2233
2234 ALIGN 16
2235 aes256gcmsiv_enc_msg_x8:
2236 mov QWORD[8+rsp],rdi ;WIN64 prologue
2237 mov QWORD[16+rsp],rsi
2238 mov rax,rsp
2239 $L$SEH_begin_aes256gcmsiv_enc_msg_x8:
2240 mov rdi,rcx
2241 mov rsi,rdx
2242 mov rdx,r8
2243 mov rcx,r9
2244 mov r8,QWORD[40+rsp]
2245
2246
2247
2248 test r8,r8
2249 jnz NEAR $L$256_enc_msg_x8_start
2250 mov rdi,QWORD[8+rsp] ;WIN64 epilogue
2251 mov rsi,QWORD[16+rsp]
2252 DB 0F3h,0C3h ;repret
2253
2254 $L$256_enc_msg_x8_start:
2255
2256 mov r11,rsp
2257 sub r11,16
2258 and r11,-64
2259
2260 mov r10,r8
2261 shr r8,4
2262 shl r10,60
2263 jz NEAR $L$256_enc_msg_x8_start2
2264 add r8,1
2265
2266 $L$256_enc_msg_x8_start2:
2267 mov r10,r8
2268 shl r10,61
2269 shr r10,61
2270
2271
2272 vmovdqa xmm1,XMMWORD[rdx]
2273 vpor xmm1,xmm1,XMMWORD[OR_MASK]
2274
2275
2276 vpaddd xmm0,xmm1,XMMWORD[seven]
2277 vmovdqa XMMWORD[r11],xmm0
2278 vpaddd xmm9,xmm1,XMMWORD[one]
2279 vpaddd xmm10,xmm1,XMMWORD[two]
2280 vpaddd xmm11,xmm1,XMMWORD[three]
2281 vpaddd xmm12,xmm1,XMMWORD[four]
2282 vpaddd xmm13,xmm1,XMMWORD[five]
2283 vpaddd xmm14,xmm1,XMMWORD[six]
2284 vmovdqa xmm0,xmm1
2285
2286 shr r8,3
2287 jz NEAR $L$256_enc_msg_x8_check_remainder
2288
2289 sub rsi,128
2290 sub rdi,128
2291
2292 $L$256_enc_msg_x8_loop1:
2293 add rsi,128
2294 add rdi,128
2295
2296 vmovdqa xmm1,xmm0
2297 vmovdqa xmm2,xmm9
2298 vmovdqa xmm3,xmm10
2299 vmovdqa xmm4,xmm11
2300 vmovdqa xmm5,xmm12
2301 vmovdqa xmm6,xmm13
2302 vmovdqa xmm7,xmm14
2303
2304 vmovdqa xmm8,XMMWORD[r11]
2305
2306 vpxor xmm1,xmm1,XMMWORD[rcx]
2307 vpxor xmm2,xmm2,XMMWORD[rcx]
2308 vpxor xmm3,xmm3,XMMWORD[rcx]
2309 vpxor xmm4,xmm4,XMMWORD[rcx]
2310 vpxor xmm5,xmm5,XMMWORD[rcx]
2311 vpxor xmm6,xmm6,XMMWORD[rcx]
2312 vpxor xmm7,xmm7,XMMWORD[rcx]
2313 vpxor xmm8,xmm8,XMMWORD[rcx]
2314
2315 vmovdqu xmm15,XMMWORD[16+rcx]
2316 vaesenc xmm1,xmm1,xmm15
2317 vaesenc xmm2,xmm2,xmm15
2318 vaesenc xmm3,xmm3,xmm15
2319 vaesenc xmm4,xmm4,xmm15
2320 vaesenc xmm5,xmm5,xmm15
2321 vaesenc xmm6,xmm6,xmm15
2322 vaesenc xmm7,xmm7,xmm15
2323 vaesenc xmm8,xmm8,xmm15
2324
2325 vmovdqa xmm14,XMMWORD[r11]
2326 vpaddd xmm14,xmm14,XMMWORD[eight]
2327 vmovdqa XMMWORD[r11],xmm14
2328 vmovdqu xmm15,XMMWORD[32+rcx]
2329 vaesenc xmm1,xmm1,xmm15
2330 vaesenc xmm2,xmm2,xmm15
2331 vaesenc xmm3,xmm3,xmm15
2332 vaesenc xmm4,xmm4,xmm15
2333 vaesenc xmm5,xmm5,xmm15
2334 vaesenc xmm6,xmm6,xmm15
2335 vaesenc xmm7,xmm7,xmm15
2336 vaesenc xmm8,xmm8,xmm15
2337
2338 vpsubd xmm14,xmm14,XMMWORD[one]
2339 vmovdqu xmm15,XMMWORD[48+rcx]
2340 vaesenc xmm1,xmm1,xmm15
2341 vaesenc xmm2,xmm2,xmm15
2342 vaesenc xmm3,xmm3,xmm15
2343 vaesenc xmm4,xmm4,xmm15
2344 vaesenc xmm5,xmm5,xmm15
2345 vaesenc xmm6,xmm6,xmm15
2346 vaesenc xmm7,xmm7,xmm15
2347 vaesenc xmm8,xmm8,xmm15
2348
2349 vpaddd xmm0,xmm0,XMMWORD[eight]
2350 vmovdqu xmm15,XMMWORD[64+rcx]
2351 vaesenc xmm1,xmm1,xmm15
2352 vaesenc xmm2,xmm2,xmm15
2353 vaesenc xmm3,xmm3,xmm15
2354 vaesenc xmm4,xmm4,xmm15
2355 vaesenc xmm5,xmm5,xmm15
2356 vaesenc xmm6,xmm6,xmm15
2357 vaesenc xmm7,xmm7,xmm15
2358 vaesenc xmm8,xmm8,xmm15
2359
2360 vpaddd xmm9,xmm9,XMMWORD[eight]
2361 vmovdqu xmm15,XMMWORD[80+rcx]
2362 vaesenc xmm1,xmm1,xmm15
2363 vaesenc xmm2,xmm2,xmm15
2364 vaesenc xmm3,xmm3,xmm15
2365 vaesenc xmm4,xmm4,xmm15
2366 vaesenc xmm5,xmm5,xmm15
2367 vaesenc xmm6,xmm6,xmm15
2368 vaesenc xmm7,xmm7,xmm15
2369 vaesenc xmm8,xmm8,xmm15
2370
2371 vpaddd xmm10,xmm10,XMMWORD[eight]
2372 vmovdqu xmm15,XMMWORD[96+rcx]
2373 vaesenc xmm1,xmm1,xmm15
2374 vaesenc xmm2,xmm2,xmm15
2375 vaesenc xmm3,xmm3,xmm15
2376 vaesenc xmm4,xmm4,xmm15
2377 vaesenc xmm5,xmm5,xmm15
2378 vaesenc xmm6,xmm6,xmm15
2379 vaesenc xmm7,xmm7,xmm15
2380 vaesenc xmm8,xmm8,xmm15
2381
2382 vpaddd xmm11,xmm11,XMMWORD[eight]
2383 vmovdqu xmm15,XMMWORD[112+rcx]
2384 vaesenc xmm1,xmm1,xmm15
2385 vaesenc xmm2,xmm2,xmm15
2386 vaesenc xmm3,xmm3,xmm15
2387 vaesenc xmm4,xmm4,xmm15
2388 vaesenc xmm5,xmm5,xmm15
2389 vaesenc xmm6,xmm6,xmm15
2390 vaesenc xmm7,xmm7,xmm15
2391 vaesenc xmm8,xmm8,xmm15
2392
2393 vpaddd xmm12,xmm12,XMMWORD[eight]
2394 vmovdqu xmm15,XMMWORD[128+rcx]
2395 vaesenc xmm1,xmm1,xmm15
2396 vaesenc xmm2,xmm2,xmm15
2397 vaesenc xmm3,xmm3,xmm15
2398 vaesenc xmm4,xmm4,xmm15
2399 vaesenc xmm5,xmm5,xmm15
2400 vaesenc xmm6,xmm6,xmm15
2401 vaesenc xmm7,xmm7,xmm15
2402 vaesenc xmm8,xmm8,xmm15
2403
2404 vpaddd xmm13,xmm13,XMMWORD[eight]
2405 vmovdqu xmm15,XMMWORD[144+rcx]
2406 vaesenc xmm1,xmm1,xmm15
2407 vaesenc xmm2,xmm2,xmm15
2408 vaesenc xmm3,xmm3,xmm15
2409 vaesenc xmm4,xmm4,xmm15
2410 vaesenc xmm5,xmm5,xmm15
2411 vaesenc xmm6,xmm6,xmm15
2412 vaesenc xmm7,xmm7,xmm15
2413 vaesenc xmm8,xmm8,xmm15
2414
2415 vmovdqu xmm15,XMMWORD[160+rcx]
2416 vaesenc xmm1,xmm1,xmm15
2417 vaesenc xmm2,xmm2,xmm15
2418 vaesenc xmm3,xmm3,xmm15
2419 vaesenc xmm4,xmm4,xmm15
2420 vaesenc xmm5,xmm5,xmm15
2421 vaesenc xmm6,xmm6,xmm15
2422 vaesenc xmm7,xmm7,xmm15
2423 vaesenc xmm8,xmm8,xmm15
2424
2425 vmovdqu xmm15,XMMWORD[176+rcx]
2426 vaesenc xmm1,xmm1,xmm15
2427 vaesenc xmm2,xmm2,xmm15
2428 vaesenc xmm3,xmm3,xmm15
2429 vaesenc xmm4,xmm4,xmm15
2430 vaesenc xmm5,xmm5,xmm15
2431 vaesenc xmm6,xmm6,xmm15
2432 vaesenc xmm7,xmm7,xmm15
2433 vaesenc xmm8,xmm8,xmm15
2434
2435 vmovdqu xmm15,XMMWORD[192+rcx]
2436 vaesenc xmm1,xmm1,xmm15
2437 vaesenc xmm2,xmm2,xmm15
2438 vaesenc xmm3,xmm3,xmm15
2439 vaesenc xmm4,xmm4,xmm15
2440 vaesenc xmm5,xmm5,xmm15
2441 vaesenc xmm6,xmm6,xmm15
2442 vaesenc xmm7,xmm7,xmm15
2443 vaesenc xmm8,xmm8,xmm15
2444
2445 vmovdqu xmm15,XMMWORD[208+rcx]
2446 vaesenc xmm1,xmm1,xmm15
2447 vaesenc xmm2,xmm2,xmm15
2448 vaesenc xmm3,xmm3,xmm15
2449 vaesenc xmm4,xmm4,xmm15
2450 vaesenc xmm5,xmm5,xmm15
2451 vaesenc xmm6,xmm6,xmm15
2452 vaesenc xmm7,xmm7,xmm15
2453 vaesenc xmm8,xmm8,xmm15
2454
2455 vmovdqu xmm15,XMMWORD[224+rcx]
2456 vaesenclast xmm1,xmm1,xmm15
2457 vaesenclast xmm2,xmm2,xmm15
2458 vaesenclast xmm3,xmm3,xmm15
2459 vaesenclast xmm4,xmm4,xmm15
2460 vaesenclast xmm5,xmm5,xmm15
2461 vaesenclast xmm6,xmm6,xmm15
2462 vaesenclast xmm7,xmm7,xmm15
2463 vaesenclast xmm8,xmm8,xmm15
2464
2465
2466
2467 vpxor xmm1,xmm1,XMMWORD[rdi]
2468 vpxor xmm2,xmm2,XMMWORD[16+rdi]
2469 vpxor xmm3,xmm3,XMMWORD[32+rdi]
2470 vpxor xmm4,xmm4,XMMWORD[48+rdi]
2471 vpxor xmm5,xmm5,XMMWORD[64+rdi]
2472 vpxor xmm6,xmm6,XMMWORD[80+rdi]
2473 vpxor xmm7,xmm7,XMMWORD[96+rdi]
2474 vpxor xmm8,xmm8,XMMWORD[112+rdi]
2475
2476 sub r8,1
2477
2478 vmovdqu XMMWORD[rsi],xmm1
2479 vmovdqu XMMWORD[16+rsi],xmm2
2480 vmovdqu XMMWORD[32+rsi],xmm3
2481 vmovdqu XMMWORD[48+rsi],xmm4
2482 vmovdqu XMMWORD[64+rsi],xmm5
2483 vmovdqu XMMWORD[80+rsi],xmm6
2484 vmovdqu XMMWORD[96+rsi],xmm7
2485 vmovdqu XMMWORD[112+rsi],xmm8
2486
2487 jne NEAR $L$256_enc_msg_x8_loop1
2488
2489 add rsi,128
2490 add rdi,128
2491
2492 $L$256_enc_msg_x8_check_remainder:
2493 cmp r10,0
2494 je NEAR $L$256_enc_msg_x8_out
2495
2496 $L$256_enc_msg_x8_loop2:
2497
2498
2499 vmovdqa xmm1,xmm0
2500 vpaddd xmm0,xmm0,XMMWORD[one]
2501
2502 vpxor xmm1,xmm1,XMMWORD[rcx]
2503 vaesenc xmm1,xmm1,XMMWORD[16+rcx]
2504 vaesenc xmm1,xmm1,XMMWORD[32+rcx]
2505 vaesenc xmm1,xmm1,XMMWORD[48+rcx]
2506 vaesenc xmm1,xmm1,XMMWORD[64+rcx]
2507 vaesenc xmm1,xmm1,XMMWORD[80+rcx]
2508 vaesenc xmm1,xmm1,XMMWORD[96+rcx]
2509 vaesenc xmm1,xmm1,XMMWORD[112+rcx]
2510 vaesenc xmm1,xmm1,XMMWORD[128+rcx]
2511 vaesenc xmm1,xmm1,XMMWORD[144+rcx]
2512 vaesenc xmm1,xmm1,XMMWORD[160+rcx]
2513 vaesenc xmm1,xmm1,XMMWORD[176+rcx]
2514 vaesenc xmm1,xmm1,XMMWORD[192+rcx]
2515 vaesenc xmm1,xmm1,XMMWORD[208+rcx]
2516 vaesenclast xmm1,xmm1,XMMWORD[224+rcx]
2517
2518
2519 vpxor xmm1,xmm1,XMMWORD[rdi]
2520
2521 vmovdqu XMMWORD[rsi],xmm1
2522
2523 add rdi,16
2524 add rsi,16
2525 sub r10,1
2526 jnz NEAR $L$256_enc_msg_x8_loop2
2527
2528 $L$256_enc_msg_x8_out:
2529 mov rdi,QWORD[8+rsp] ;WIN64 epilogue
2530 mov rsi,QWORD[16+rsp]
2531 DB 0F3h,0C3h ;repret
2532
2533
2534 $L$SEH_end_aes256gcmsiv_enc_msg_x8:
2535 global aes256gcmsiv_dec
2536
2537 ALIGN 16
2538 aes256gcmsiv_dec:
2539 mov QWORD[8+rsp],rdi ;WIN64 prologue
2540 mov QWORD[16+rsp],rsi
2541 mov rax,rsp
2542 $L$SEH_begin_aes256gcmsiv_dec:
2543 mov rdi,rcx
2544 mov rsi,rdx
2545 mov rdx,r8
2546 mov rcx,r9
2547 mov r8,QWORD[40+rsp]
2548 mov r9,QWORD[48+rsp]
2549
2550
2551
2552 test r9,~15
2553 jnz NEAR $L$256_dec_start
2554 mov rdi,QWORD[8+rsp] ;WIN64 epilogue
2555 mov rsi,QWORD[16+rsp]
2556 DB 0F3h,0C3h ;repret
2557
2558 $L$256_dec_start:
2559 vzeroupper
2560 vmovdqa xmm0,XMMWORD[rdx]
2561 mov rax,rdx
2562
2563 lea rax,[32+rax]
2564 lea rcx,[32+rcx]
2565
2566
2567 vmovdqu xmm15,XMMWORD[r9*1+rdi]
2568 vpor xmm15,xmm15,XMMWORD[OR_MASK]
2569 and r9,~15
2570
2571
2572 cmp r9,96
2573 jb NEAR $L$256_dec_loop2
2574
2575
2576 sub r9,96
2577 vmovdqa xmm7,xmm15
2578 vpaddd xmm8,xmm7,XMMWORD[one]
2579 vpaddd xmm9,xmm7,XMMWORD[two]
2580 vpaddd xmm10,xmm9,XMMWORD[one]
2581 vpaddd xmm11,xmm9,XMMWORD[two]
2582 vpaddd xmm12,xmm11,XMMWORD[one]
2583 vpaddd xmm15,xmm11,XMMWORD[two]
2584
2585 vpxor xmm7,xmm7,XMMWORD[r8]
2586 vpxor xmm8,xmm8,XMMWORD[r8]
2587 vpxor xmm9,xmm9,XMMWORD[r8]
2588 vpxor xmm10,xmm10,XMMWORD[r8]
2589 vpxor xmm11,xmm11,XMMWORD[r8]
2590 vpxor xmm12,xmm12,XMMWORD[r8]
2591
2592 vmovdqu xmm4,XMMWORD[16+r8]
2593 vaesenc xmm7,xmm7,xmm4
2594 vaesenc xmm8,xmm8,xmm4
2595 vaesenc xmm9,xmm9,xmm4
2596 vaesenc xmm10,xmm10,xmm4
2597 vaesenc xmm11,xmm11,xmm4
2598 vaesenc xmm12,xmm12,xmm4
2599
2600 vmovdqu xmm4,XMMWORD[32+r8]
2601 vaesenc xmm7,xmm7,xmm4
2602 vaesenc xmm8,xmm8,xmm4
2603 vaesenc xmm9,xmm9,xmm4
2604 vaesenc xmm10,xmm10,xmm4
2605 vaesenc xmm11,xmm11,xmm4
2606 vaesenc xmm12,xmm12,xmm4
2607
2608 vmovdqu xmm4,XMMWORD[48+r8]
2609 vaesenc xmm7,xmm7,xmm4
2610 vaesenc xmm8,xmm8,xmm4
2611 vaesenc xmm9,xmm9,xmm4
2612 vaesenc xmm10,xmm10,xmm4
2613 vaesenc xmm11,xmm11,xmm4
2614 vaesenc xmm12,xmm12,xmm4
2615
2616 vmovdqu xmm4,XMMWORD[64+r8]
2617 vaesenc xmm7,xmm7,xmm4
2618 vaesenc xmm8,xmm8,xmm4
2619 vaesenc xmm9,xmm9,xmm4
2620 vaesenc xmm10,xmm10,xmm4
2621 vaesenc xmm11,xmm11,xmm4
2622 vaesenc xmm12,xmm12,xmm4
2623
2624 vmovdqu xmm4,XMMWORD[80+r8]
2625 vaesenc xmm7,xmm7,xmm4
2626 vaesenc xmm8,xmm8,xmm4
2627 vaesenc xmm9,xmm9,xmm4
2628 vaesenc xmm10,xmm10,xmm4
2629 vaesenc xmm11,xmm11,xmm4
2630 vaesenc xmm12,xmm12,xmm4
2631
2632 vmovdqu xmm4,XMMWORD[96+r8]
2633 vaesenc xmm7,xmm7,xmm4
2634 vaesenc xmm8,xmm8,xmm4
2635 vaesenc xmm9,xmm9,xmm4
2636 vaesenc xmm10,xmm10,xmm4
2637 vaesenc xmm11,xmm11,xmm4
2638 vaesenc xmm12,xmm12,xmm4
2639
2640 vmovdqu xmm4,XMMWORD[112+r8]
2641 vaesenc xmm7,xmm7,xmm4
2642 vaesenc xmm8,xmm8,xmm4
2643 vaesenc xmm9,xmm9,xmm4
2644 vaesenc xmm10,xmm10,xmm4
2645 vaesenc xmm11,xmm11,xmm4
2646 vaesenc xmm12,xmm12,xmm4
2647
2648 vmovdqu xmm4,XMMWORD[128+r8]
2649 vaesenc xmm7,xmm7,xmm4
2650 vaesenc xmm8,xmm8,xmm4
2651 vaesenc xmm9,xmm9,xmm4
2652 vaesenc xmm10,xmm10,xmm4
2653 vaesenc xmm11,xmm11,xmm4
2654 vaesenc xmm12,xmm12,xmm4
2655
2656 vmovdqu xmm4,XMMWORD[144+r8]
2657 vaesenc xmm7,xmm7,xmm4
2658 vaesenc xmm8,xmm8,xmm4
2659 vaesenc xmm9,xmm9,xmm4
2660 vaesenc xmm10,xmm10,xmm4
2661 vaesenc xmm11,xmm11,xmm4
2662 vaesenc xmm12,xmm12,xmm4
2663
2664 vmovdqu xmm4,XMMWORD[160+r8]
2665 vaesenc xmm7,xmm7,xmm4
2666 vaesenc xmm8,xmm8,xmm4
2667 vaesenc xmm9,xmm9,xmm4
2668 vaesenc xmm10,xmm10,xmm4
2669 vaesenc xmm11,xmm11,xmm4
2670 vaesenc xmm12,xmm12,xmm4
2671
2672 vmovdqu xmm4,XMMWORD[176+r8]
2673 vaesenc xmm7,xmm7,xmm4
2674 vaesenc xmm8,xmm8,xmm4
2675 vaesenc xmm9,xmm9,xmm4
2676 vaesenc xmm10,xmm10,xmm4
2677 vaesenc xmm11,xmm11,xmm4
2678 vaesenc xmm12,xmm12,xmm4
2679
2680 vmovdqu xmm4,XMMWORD[192+r8]
2681 vaesenc xmm7,xmm7,xmm4
2682 vaesenc xmm8,xmm8,xmm4
2683 vaesenc xmm9,xmm9,xmm4
2684 vaesenc xmm10,xmm10,xmm4
2685 vaesenc xmm11,xmm11,xmm4
2686 vaesenc xmm12,xmm12,xmm4
2687
2688 vmovdqu xmm4,XMMWORD[208+r8]
2689 vaesenc xmm7,xmm7,xmm4
2690 vaesenc xmm8,xmm8,xmm4
2691 vaesenc xmm9,xmm9,xmm4
2692 vaesenc xmm10,xmm10,xmm4
2693 vaesenc xmm11,xmm11,xmm4
2694 vaesenc xmm12,xmm12,xmm4
2695
2696 vmovdqu xmm4,XMMWORD[224+r8]
2697 vaesenclast xmm7,xmm7,xmm4
2698 vaesenclast xmm8,xmm8,xmm4
2699 vaesenclast xmm9,xmm9,xmm4
2700 vaesenclast xmm10,xmm10,xmm4
2701 vaesenclast xmm11,xmm11,xmm4
2702 vaesenclast xmm12,xmm12,xmm4
2703
2704
2705 vpxor xmm7,xmm7,XMMWORD[rdi]
2706 vpxor xmm8,xmm8,XMMWORD[16+rdi]
2707 vpxor xmm9,xmm9,XMMWORD[32+rdi]
2708 vpxor xmm10,xmm10,XMMWORD[48+rdi]
2709 vpxor xmm11,xmm11,XMMWORD[64+rdi]
2710 vpxor xmm12,xmm12,XMMWORD[80+rdi]
2711
2712 vmovdqu XMMWORD[rsi],xmm7
2713 vmovdqu XMMWORD[16+rsi],xmm8
2714 vmovdqu XMMWORD[32+rsi],xmm9
2715 vmovdqu XMMWORD[48+rsi],xmm10
2716 vmovdqu XMMWORD[64+rsi],xmm11
2717 vmovdqu XMMWORD[80+rsi],xmm12
2718
2719 add rdi,96
2720 add rsi,96
2721 jmp NEAR $L$256_dec_loop1
2722
2723
2724 ALIGN 64
2725 $L$256_dec_loop1:
2726 cmp r9,96
2727 jb NEAR $L$256_dec_finish_96
2728 sub r9,96
2729
2730 vmovdqa xmm6,xmm12
2731 vmovdqa XMMWORD[(16-32)+rax],xmm11
2732 vmovdqa XMMWORD[(32-32)+rax],xmm10
2733 vmovdqa XMMWORD[(48-32)+rax],xmm9
2734 vmovdqa XMMWORD[(64-32)+rax],xmm8
2735 vmovdqa XMMWORD[(80-32)+rax],xmm7
2736
2737 vmovdqa xmm7,xmm15
2738 vpaddd xmm8,xmm7,XMMWORD[one]
2739 vpaddd xmm9,xmm7,XMMWORD[two]
2740 vpaddd xmm10,xmm9,XMMWORD[one]
2741 vpaddd xmm11,xmm9,XMMWORD[two]
2742 vpaddd xmm12,xmm11,XMMWORD[one]
2743 vpaddd xmm15,xmm11,XMMWORD[two]
2744
2745 vmovdqa xmm4,XMMWORD[r8]
2746 vpxor xmm7,xmm7,xmm4
2747 vpxor xmm8,xmm8,xmm4
2748 vpxor xmm9,xmm9,xmm4
2749 vpxor xmm10,xmm10,xmm4
2750 vpxor xmm11,xmm11,xmm4
2751 vpxor xmm12,xmm12,xmm4
2752
2753 vmovdqu xmm4,XMMWORD[((0-32))+rcx]
2754 vpclmulqdq xmm2,xmm6,xmm4,0x11
2755 vpclmulqdq xmm3,xmm6,xmm4,0x00
2756 vpclmulqdq xmm1,xmm6,xmm4,0x01
2757 vpclmulqdq xmm4,xmm6,xmm4,0x10
2758 vpxor xmm1,xmm1,xmm4
2759
2760 vmovdqu xmm4,XMMWORD[16+r8]
2761 vaesenc xmm7,xmm7,xmm4
2762 vaesenc xmm8,xmm8,xmm4
2763 vaesenc xmm9,xmm9,xmm4
2764 vaesenc xmm10,xmm10,xmm4
2765 vaesenc xmm11,xmm11,xmm4
2766 vaesenc xmm12,xmm12,xmm4
2767
2768 vmovdqu xmm6,XMMWORD[((-16))+rax]
2769 vmovdqu xmm13,XMMWORD[((-16))+rcx]
2770
2771 vpclmulqdq xmm4,xmm6,xmm13,0x10
2772 vpxor xmm1,xmm1,xmm4
2773 vpclmulqdq xmm4,xmm6,xmm13,0x11
2774 vpxor xmm2,xmm2,xmm4
2775 vpclmulqdq xmm4,xmm6,xmm13,0x00
2776 vpxor xmm3,xmm3,xmm4
2777 vpclmulqdq xmm4,xmm6,xmm13,0x01
2778 vpxor xmm1,xmm1,xmm4
2779
2780
2781 vmovdqu xmm4,XMMWORD[32+r8]
2782 vaesenc xmm7,xmm7,xmm4
2783 vaesenc xmm8,xmm8,xmm4
2784 vaesenc xmm9,xmm9,xmm4
2785 vaesenc xmm10,xmm10,xmm4
2786 vaesenc xmm11,xmm11,xmm4
2787 vaesenc xmm12,xmm12,xmm4
2788
2789 vmovdqu xmm6,XMMWORD[rax]
2790 vmovdqu xmm13,XMMWORD[rcx]
2791
2792 vpclmulqdq xmm4,xmm6,xmm13,0x10
2793 vpxor xmm1,xmm1,xmm4
2794 vpclmulqdq xmm4,xmm6,xmm13,0x11
2795 vpxor xmm2,xmm2,xmm4
2796 vpclmulqdq xmm4,xmm6,xmm13,0x00
2797 vpxor xmm3,xmm3,xmm4
2798 vpclmulqdq xmm4,xmm6,xmm13,0x01
2799 vpxor xmm1,xmm1,xmm4
2800
2801
2802 vmovdqu xmm4,XMMWORD[48+r8]
2803 vaesenc xmm7,xmm7,xmm4
2804 vaesenc xmm8,xmm8,xmm4
2805 vaesenc xmm9,xmm9,xmm4
2806 vaesenc xmm10,xmm10,xmm4
2807 vaesenc xmm11,xmm11,xmm4
2808 vaesenc xmm12,xmm12,xmm4
2809
2810 vmovdqu xmm6,XMMWORD[16+rax]
2811 vmovdqu xmm13,XMMWORD[16+rcx]
2812
2813 vpclmulqdq xmm4,xmm6,xmm13,0x10
2814 vpxor xmm1,xmm1,xmm4
2815 vpclmulqdq xmm4,xmm6,xmm13,0x11
2816 vpxor xmm2,xmm2,xmm4
2817 vpclmulqdq xmm4,xmm6,xmm13,0x00
2818 vpxor xmm3,xmm3,xmm4
2819 vpclmulqdq xmm4,xmm6,xmm13,0x01
2820 vpxor xmm1,xmm1,xmm4
2821
2822
2823 vmovdqu xmm4,XMMWORD[64+r8]
2824 vaesenc xmm7,xmm7,xmm4
2825 vaesenc xmm8,xmm8,xmm4
2826 vaesenc xmm9,xmm9,xmm4
2827 vaesenc xmm10,xmm10,xmm4
2828 vaesenc xmm11,xmm11,xmm4
2829 vaesenc xmm12,xmm12,xmm4
2830
2831 vmovdqu xmm6,XMMWORD[32+rax]
2832 vmovdqu xmm13,XMMWORD[32+rcx]
2833
2834 vpclmulqdq xmm4,xmm6,xmm13,0x10
2835 vpxor xmm1,xmm1,xmm4
2836 vpclmulqdq xmm4,xmm6,xmm13,0x11
2837 vpxor xmm2,xmm2,xmm4
2838 vpclmulqdq xmm4,xmm6,xmm13,0x00
2839 vpxor xmm3,xmm3,xmm4
2840 vpclmulqdq xmm4,xmm6,xmm13,0x01
2841 vpxor xmm1,xmm1,xmm4
2842
2843
2844 vmovdqu xmm4,XMMWORD[80+r8]
2845 vaesenc xmm7,xmm7,xmm4
2846 vaesenc xmm8,xmm8,xmm4
2847 vaesenc xmm9,xmm9,xmm4
2848 vaesenc xmm10,xmm10,xmm4
2849 vaesenc xmm11,xmm11,xmm4
2850 vaesenc xmm12,xmm12,xmm4
2851
2852 vmovdqu xmm4,XMMWORD[96+r8]
2853 vaesenc xmm7,xmm7,xmm4
2854 vaesenc xmm8,xmm8,xmm4
2855 vaesenc xmm9,xmm9,xmm4
2856 vaesenc xmm10,xmm10,xmm4
2857 vaesenc xmm11,xmm11,xmm4
2858 vaesenc xmm12,xmm12,xmm4
2859
2860 vmovdqu xmm4,XMMWORD[112+r8]
2861 vaesenc xmm7,xmm7,xmm4
2862 vaesenc xmm8,xmm8,xmm4
2863 vaesenc xmm9,xmm9,xmm4
2864 vaesenc xmm10,xmm10,xmm4
2865 vaesenc xmm11,xmm11,xmm4
2866 vaesenc xmm12,xmm12,xmm4
2867
2868
2869 vmovdqa xmm6,XMMWORD[((80-32))+rax]
2870 vpxor xmm6,xmm6,xmm0
2871 vmovdqu xmm5,XMMWORD[((80-32))+rcx]
2872
2873 vpclmulqdq xmm4,xmm6,xmm5,0x01
2874 vpxor xmm1,xmm1,xmm4
2875 vpclmulqdq xmm4,xmm6,xmm5,0x11
2876 vpxor xmm2,xmm2,xmm4
2877 vpclmulqdq xmm4,xmm6,xmm5,0x00
2878 vpxor xmm3,xmm3,xmm4
2879 vpclmulqdq xmm4,xmm6,xmm5,0x10
2880 vpxor xmm1,xmm1,xmm4
2881
2882 vmovdqu xmm4,XMMWORD[128+r8]
2883 vaesenc xmm7,xmm7,xmm4
2884 vaesenc xmm8,xmm8,xmm4
2885 vaesenc xmm9,xmm9,xmm4
2886 vaesenc xmm10,xmm10,xmm4
2887 vaesenc xmm11,xmm11,xmm4
2888 vaesenc xmm12,xmm12,xmm4
2889
2890
2891 vpsrldq xmm4,xmm1,8
2892 vpxor xmm5,xmm2,xmm4
2893 vpslldq xmm4,xmm1,8
2894 vpxor xmm0,xmm3,xmm4
2895
2896 vmovdqa xmm3,XMMWORD[poly]
2897
2898 vmovdqu xmm4,XMMWORD[144+r8]
2899 vaesenc xmm7,xmm7,xmm4
2900 vaesenc xmm8,xmm8,xmm4
2901 vaesenc xmm9,xmm9,xmm4
2902 vaesenc xmm10,xmm10,xmm4
2903 vaesenc xmm11,xmm11,xmm4
2904 vaesenc xmm12,xmm12,xmm4
2905
2906 vmovdqu xmm4,XMMWORD[160+r8]
2907 vaesenc xmm7,xmm7,xmm4
2908 vaesenc xmm8,xmm8,xmm4
2909 vaesenc xmm9,xmm9,xmm4
2910 vaesenc xmm10,xmm10,xmm4
2911 vaesenc xmm11,xmm11,xmm4
2912 vaesenc xmm12,xmm12,xmm4
2913
2914 vmovdqu xmm4,XMMWORD[176+r8]
2915 vaesenc xmm7,xmm7,xmm4
2916 vaesenc xmm8,xmm8,xmm4
2917 vaesenc xmm9,xmm9,xmm4
2918 vaesenc xmm10,xmm10,xmm4
2919 vaesenc xmm11,xmm11,xmm4
2920 vaesenc xmm12,xmm12,xmm4
2921
2922 vmovdqu xmm4,XMMWORD[192+r8]
2923 vaesenc xmm7,xmm7,xmm4
2924 vaesenc xmm8,xmm8,xmm4
2925 vaesenc xmm9,xmm9,xmm4
2926 vaesenc xmm10,xmm10,xmm4
2927 vaesenc xmm11,xmm11,xmm4
2928 vaesenc xmm12,xmm12,xmm4
2929
2930 vmovdqu xmm4,XMMWORD[208+r8]
2931 vaesenc xmm7,xmm7,xmm4
2932 vaesenc xmm8,xmm8,xmm4
2933 vaesenc xmm9,xmm9,xmm4
2934 vaesenc xmm10,xmm10,xmm4
2935 vaesenc xmm11,xmm11,xmm4
2936 vaesenc xmm12,xmm12,xmm4
2937
2938 vmovdqu xmm6,XMMWORD[224+r8]
2939 vpalignr xmm2,xmm0,xmm0,8
2940 vpclmulqdq xmm0,xmm0,xmm3,0x10
2941 vpxor xmm0,xmm2,xmm0
2942
2943 vpxor xmm4,xmm6,XMMWORD[rdi]
2944 vaesenclast xmm7,xmm7,xmm4
2945 vpxor xmm4,xmm6,XMMWORD[16+rdi]
2946 vaesenclast xmm8,xmm8,xmm4
2947 vpxor xmm4,xmm6,XMMWORD[32+rdi]
2948 vaesenclast xmm9,xmm9,xmm4
2949 vpxor xmm4,xmm6,XMMWORD[48+rdi]
2950 vaesenclast xmm10,xmm10,xmm4
2951 vpxor xmm4,xmm6,XMMWORD[64+rdi]
2952 vaesenclast xmm11,xmm11,xmm4
2953 vpxor xmm4,xmm6,XMMWORD[80+rdi]
2954 vaesenclast xmm12,xmm12,xmm4
2955
2956 vpalignr xmm2,xmm0,xmm0,8
2957 vpclmulqdq xmm0,xmm0,xmm3,0x10
2958 vpxor xmm0,xmm2,xmm0
2959
2960 vmovdqu XMMWORD[rsi],xmm7
2961 vmovdqu XMMWORD[16+rsi],xmm8
2962 vmovdqu XMMWORD[32+rsi],xmm9
2963 vmovdqu XMMWORD[48+rsi],xmm10
2964 vmovdqu XMMWORD[64+rsi],xmm11
2965 vmovdqu XMMWORD[80+rsi],xmm12
2966
2967 vpxor xmm0,xmm0,xmm5
2968
2969 lea rdi,[96+rdi]
2970 lea rsi,[96+rsi]
2971 jmp NEAR $L$256_dec_loop1
2972
2973 $L$256_dec_finish_96:
2974 vmovdqa xmm6,xmm12
2975 vmovdqa XMMWORD[(16-32)+rax],xmm11
2976 vmovdqa XMMWORD[(32-32)+rax],xmm10
2977 vmovdqa XMMWORD[(48-32)+rax],xmm9
2978 vmovdqa XMMWORD[(64-32)+rax],xmm8
2979 vmovdqa XMMWORD[(80-32)+rax],xmm7
2980
2981 vmovdqu xmm4,XMMWORD[((0-32))+rcx]
2982 vpclmulqdq xmm1,xmm6,xmm4,0x10
2983 vpclmulqdq xmm2,xmm6,xmm4,0x11
2984 vpclmulqdq xmm3,xmm6,xmm4,0x00
2985 vpclmulqdq xmm4,xmm6,xmm4,0x01
2986 vpxor xmm1,xmm1,xmm4
2987
2988 vmovdqu xmm6,XMMWORD[((-16))+rax]
2989 vmovdqu xmm13,XMMWORD[((-16))+rcx]
2990
2991 vpclmulqdq xmm4,xmm6,xmm13,0x10
2992 vpxor xmm1,xmm1,xmm4
2993 vpclmulqdq xmm4,xmm6,xmm13,0x11
2994 vpxor xmm2,xmm2,xmm4
2995 vpclmulqdq xmm4,xmm6,xmm13,0x00
2996 vpxor xmm3,xmm3,xmm4
2997 vpclmulqdq xmm4,xmm6,xmm13,0x01
2998 vpxor xmm1,xmm1,xmm4
2999
3000 vmovdqu xmm6,XMMWORD[rax]
3001 vmovdqu xmm13,XMMWORD[rcx]
3002
3003 vpclmulqdq xmm4,xmm6,xmm13,0x10
3004 vpxor xmm1,xmm1,xmm4
3005 vpclmulqdq xmm4,xmm6,xmm13,0x11
3006 vpxor xmm2,xmm2,xmm4
3007 vpclmulqdq xmm4,xmm6,xmm13,0x00
3008 vpxor xmm3,xmm3,xmm4
3009 vpclmulqdq xmm4,xmm6,xmm13,0x01
3010 vpxor xmm1,xmm1,xmm4
3011
3012 vmovdqu xmm6,XMMWORD[16+rax]
3013 vmovdqu xmm13,XMMWORD[16+rcx]
3014
3015 vpclmulqdq xmm4,xmm6,xmm13,0x10
3016 vpxor xmm1,xmm1,xmm4
3017 vpclmulqdq xmm4,xmm6,xmm13,0x11
3018 vpxor xmm2,xmm2,xmm4
3019 vpclmulqdq xmm4,xmm6,xmm13,0x00
3020 vpxor xmm3,xmm3,xmm4
3021 vpclmulqdq xmm4,xmm6,xmm13,0x01
3022 vpxor xmm1,xmm1,xmm4
3023
3024 vmovdqu xmm6,XMMWORD[32+rax]
3025 vmovdqu xmm13,XMMWORD[32+rcx]
3026
3027 vpclmulqdq xmm4,xmm6,xmm13,0x10
3028 vpxor xmm1,xmm1,xmm4
3029 vpclmulqdq xmm4,xmm6,xmm13,0x11
3030 vpxor xmm2,xmm2,xmm4
3031 vpclmulqdq xmm4,xmm6,xmm13,0x00
3032 vpxor xmm3,xmm3,xmm4
3033 vpclmulqdq xmm4,xmm6,xmm13,0x01
3034 vpxor xmm1,xmm1,xmm4
3035
3036
3037 vmovdqu xmm6,XMMWORD[((80-32))+rax]
3038 vpxor xmm6,xmm6,xmm0
3039 vmovdqu xmm5,XMMWORD[((80-32))+rcx]
3040 vpclmulqdq xmm4,xmm6,xmm5,0x11
3041 vpxor xmm2,xmm2,xmm4
3042 vpclmulqdq xmm4,xmm6,xmm5,0x00
3043 vpxor xmm3,xmm3,xmm4
3044 vpclmulqdq xmm4,xmm6,xmm5,0x10
3045 vpxor xmm1,xmm1,xmm4
3046 vpclmulqdq xmm4,xmm6,xmm5,0x01
3047 vpxor xmm1,xmm1,xmm4
3048
3049 vpsrldq xmm4,xmm1,8
3050 vpxor xmm5,xmm2,xmm4
3051 vpslldq xmm4,xmm1,8
3052 vpxor xmm0,xmm3,xmm4
3053
3054 vmovdqa xmm3,XMMWORD[poly]
3055
3056 vpalignr xmm2,xmm0,xmm0,8
3057 vpclmulqdq xmm0,xmm0,xmm3,0x10
3058 vpxor xmm0,xmm2,xmm0
3059
3060 vpalignr xmm2,xmm0,xmm0,8
3061 vpclmulqdq xmm0,xmm0,xmm3,0x10
3062 vpxor xmm0,xmm2,xmm0
3063
3064 vpxor xmm0,xmm0,xmm5
3065
3066 $L$256_dec_loop2:
3067
3068
3069
3070 cmp r9,16
3071 jb NEAR $L$256_dec_out
3072 sub r9,16
3073
3074 vmovdqa xmm2,xmm15
3075 vpaddd xmm15,xmm15,XMMWORD[one]
3076
3077 vpxor xmm2,xmm2,XMMWORD[r8]
3078 vaesenc xmm2,xmm2,XMMWORD[16+r8]
3079 vaesenc xmm2,xmm2,XMMWORD[32+r8]
3080 vaesenc xmm2,xmm2,XMMWORD[48+r8]
3081 vaesenc xmm2,xmm2,XMMWORD[64+r8]
3082 vaesenc xmm2,xmm2,XMMWORD[80+r8]
3083 vaesenc xmm2,xmm2,XMMWORD[96+r8]
3084 vaesenc xmm2,xmm2,XMMWORD[112+r8]
3085 vaesenc xmm2,xmm2,XMMWORD[128+r8]
3086 vaesenc xmm2,xmm2,XMMWORD[144+r8]
3087 vaesenc xmm2,xmm2,XMMWORD[160+r8]
3088 vaesenc xmm2,xmm2,XMMWORD[176+r8]
3089 vaesenc xmm2,xmm2,XMMWORD[192+r8]
3090 vaesenc xmm2,xmm2,XMMWORD[208+r8]
3091 vaesenclast xmm2,xmm2,XMMWORD[224+r8]
3092 vpxor xmm2,xmm2,XMMWORD[rdi]
3093 vmovdqu XMMWORD[rsi],xmm2
3094 add rdi,16
3095 add rsi,16
3096
3097 vpxor xmm0,xmm0,xmm2
3098 vmovdqa xmm1,XMMWORD[((-32))+rcx]
3099 call GFMUL
3100
3101 jmp NEAR $L$256_dec_loop2
3102
3103 $L$256_dec_out:
3104 vmovdqu XMMWORD[rdx],xmm0
3105 mov rdi,QWORD[8+rsp] ;WIN64 epilogue
3106 mov rsi,QWORD[16+rsp]
3107 DB 0F3h,0C3h ;repret
3108
3109 $L$SEH_end_aes256gcmsiv_dec:
3110 global aes256gcmsiv_kdf
3111
3112 ALIGN 16
3113 aes256gcmsiv_kdf:
3114 mov QWORD[8+rsp],rdi ;WIN64 prologue
3115 mov QWORD[16+rsp],rsi
3116 mov rax,rsp
3117 $L$SEH_begin_aes256gcmsiv_kdf:
3118 mov rdi,rcx
3119 mov rsi,rdx
3120 mov rdx,r8
3121
3122
3123
3124
3125
3126
3127
3128 vmovdqa xmm1,XMMWORD[rdx]
3129 vmovdqa xmm4,XMMWORD[rdi]
3130 vmovdqa xmm11,XMMWORD[and_mask]
3131 vmovdqa xmm8,XMMWORD[one]
3132 vpshufd xmm4,xmm4,0x90
3133 vpand xmm4,xmm4,xmm11
3134 vpaddd xmm6,xmm4,xmm8
3135 vpaddd xmm7,xmm6,xmm8
3136 vpaddd xmm11,xmm7,xmm8
3137 vpaddd xmm12,xmm11,xmm8
3138 vpaddd xmm13,xmm12,xmm8
3139
3140 vpxor xmm4,xmm4,xmm1
3141 vpxor xmm6,xmm6,xmm1
3142 vpxor xmm7,xmm7,xmm1
3143 vpxor xmm11,xmm11,xmm1
3144 vpxor xmm12,xmm12,xmm1
3145 vpxor xmm13,xmm13,xmm1
3146
3147 vmovdqa xmm1,XMMWORD[16+rdx]
3148 vaesenc xmm4,xmm4,xmm1
3149 vaesenc xmm6,xmm6,xmm1
3150 vaesenc xmm7,xmm7,xmm1
3151 vaesenc xmm11,xmm11,xmm1
3152 vaesenc xmm12,xmm12,xmm1
3153 vaesenc xmm13,xmm13,xmm1
3154
3155 vmovdqa xmm2,XMMWORD[32+rdx]
3156 vaesenc xmm4,xmm4,xmm2
3157 vaesenc xmm6,xmm6,xmm2
3158 vaesenc xmm7,xmm7,xmm2
3159 vaesenc xmm11,xmm11,xmm2
3160 vaesenc xmm12,xmm12,xmm2
3161 vaesenc xmm13,xmm13,xmm2
3162
3163 vmovdqa xmm1,XMMWORD[48+rdx]
3164 vaesenc xmm4,xmm4,xmm1
3165 vaesenc xmm6,xmm6,xmm1
3166 vaesenc xmm7,xmm7,xmm1
3167 vaesenc xmm11,xmm11,xmm1
3168 vaesenc xmm12,xmm12,xmm1
3169 vaesenc xmm13,xmm13,xmm1
3170
3171 vmovdqa xmm2,XMMWORD[64+rdx]
3172 vaesenc xmm4,xmm4,xmm2
3173 vaesenc xmm6,xmm6,xmm2
3174 vaesenc xmm7,xmm7,xmm2
3175 vaesenc xmm11,xmm11,xmm2
3176 vaesenc xmm12,xmm12,xmm2
3177 vaesenc xmm13,xmm13,xmm2
3178
3179 vmovdqa xmm1,XMMWORD[80+rdx]
3180 vaesenc xmm4,xmm4,xmm1
3181 vaesenc xmm6,xmm6,xmm1
3182 vaesenc xmm7,xmm7,xmm1
3183 vaesenc xmm11,xmm11,xmm1
3184 vaesenc xmm12,xmm12,xmm1
3185 vaesenc xmm13,xmm13,xmm1
3186
3187 vmovdqa xmm2,XMMWORD[96+rdx]
3188 vaesenc xmm4,xmm4,xmm2
3189 vaesenc xmm6,xmm6,xmm2
3190 vaesenc xmm7,xmm7,xmm2
3191 vaesenc xmm11,xmm11,xmm2
3192 vaesenc xmm12,xmm12,xmm2
3193 vaesenc xmm13,xmm13,xmm2
3194
3195 vmovdqa xmm1,XMMWORD[112+rdx]
3196 vaesenc xmm4,xmm4,xmm1
3197 vaesenc xmm6,xmm6,xmm1
3198 vaesenc xmm7,xmm7,xmm1
3199 vaesenc xmm11,xmm11,xmm1
3200 vaesenc xmm12,xmm12,xmm1
3201 vaesenc xmm13,xmm13,xmm1
3202
3203 vmovdqa xmm2,XMMWORD[128+rdx]
3204 vaesenc xmm4,xmm4,xmm2
3205 vaesenc xmm6,xmm6,xmm2
3206 vaesenc xmm7,xmm7,xmm2
3207 vaesenc xmm11,xmm11,xmm2
3208 vaesenc xmm12,xmm12,xmm2
3209 vaesenc xmm13,xmm13,xmm2
3210
3211 vmovdqa xmm1,XMMWORD[144+rdx]
3212 vaesenc xmm4,xmm4,xmm1
3213 vaesenc xmm6,xmm6,xmm1
3214 vaesenc xmm7,xmm7,xmm1
3215 vaesenc xmm11,xmm11,xmm1
3216 vaesenc xmm12,xmm12,xmm1
3217 vaesenc xmm13,xmm13,xmm1
3218
3219 vmovdqa xmm2,XMMWORD[160+rdx]
3220 vaesenc xmm4,xmm4,xmm2
3221 vaesenc xmm6,xmm6,xmm2
3222 vaesenc xmm7,xmm7,xmm2
3223 vaesenc xmm11,xmm11,xmm2
3224 vaesenc xmm12,xmm12,xmm2
3225 vaesenc xmm13,xmm13,xmm2
3226
3227 vmovdqa xmm1,XMMWORD[176+rdx]
3228 vaesenc xmm4,xmm4,xmm1
3229 vaesenc xmm6,xmm6,xmm1
3230 vaesenc xmm7,xmm7,xmm1
3231 vaesenc xmm11,xmm11,xmm1
3232 vaesenc xmm12,xmm12,xmm1
3233 vaesenc xmm13,xmm13,xmm1
3234
3235 vmovdqa xmm2,XMMWORD[192+rdx]
3236 vaesenc xmm4,xmm4,xmm2
3237 vaesenc xmm6,xmm6,xmm2
3238 vaesenc xmm7,xmm7,xmm2
3239 vaesenc xmm11,xmm11,xmm2
3240 vaesenc xmm12,xmm12,xmm2
3241 vaesenc xmm13,xmm13,xmm2
3242
3243 vmovdqa xmm1,XMMWORD[208+rdx]
3244 vaesenc xmm4,xmm4,xmm1
3245 vaesenc xmm6,xmm6,xmm1
3246 vaesenc xmm7,xmm7,xmm1
3247 vaesenc xmm11,xmm11,xmm1
3248 vaesenc xmm12,xmm12,xmm1
3249 vaesenc xmm13,xmm13,xmm1
3250
3251 vmovdqa xmm2,XMMWORD[224+rdx]
3252 vaesenclast xmm4,xmm4,xmm2
3253 vaesenclast xmm6,xmm6,xmm2
3254 vaesenclast xmm7,xmm7,xmm2
3255 vaesenclast xmm11,xmm11,xmm2
3256 vaesenclast xmm12,xmm12,xmm2
3257 vaesenclast xmm13,xmm13,xmm2
3258
3259
3260 vmovdqa XMMWORD[rsi],xmm4
3261 vmovdqa XMMWORD[16+rsi],xmm6
3262 vmovdqa XMMWORD[32+rsi],xmm7
3263 vmovdqa XMMWORD[48+rsi],xmm11
3264 vmovdqa XMMWORD[64+rsi],xmm12
3265 vmovdqa XMMWORD[80+rsi],xmm13
3266 mov rdi,QWORD[8+rsp] ;WIN64 epilogue
3267 mov rsi,QWORD[16+rsp]
3268 DB 0F3h,0C3h ;repret
3269
3270 $L$SEH_end_aes256gcmsiv_kdf:
OLDNEW

Powered by Google App Engine
This is Rietveld 408576698