Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(667)

Side by Side Diff: third_party/boringssl/linux-x86_64/crypto/cipher/chacha20_poly1305_x86_64.S

Issue 2669913002: Roll src/third_party/boringssl/src 358baeb9a..c26692cfd (Closed)
Patch Set: Created 3 years, 10 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
OLDNEW
(Empty)
1 #if defined(__x86_64__)
2 .text
3 .extern OPENSSL_ia32cap_P
4 .hidden OPENSSL_ia32cap_P
5 .align 64
6 .chacha20_consts:
7 .byte 'e','x','p','a','n','d',' ','3','2','-','b','y','t','e',' ','k'
8 .byte 'e','x','p','a','n','d',' ','3','2','-','b','y','t','e',' ','k'
9 .rol8:
10 .byte 3,0,1,2, 7,4,5,6, 11,8,9,10, 15,12,13,14
11 .byte 3,0,1,2, 7,4,5,6, 11,8,9,10, 15,12,13,14
12 .rol16:
13 .byte 2,3,0,1, 6,7,4,5, 10,11,8,9, 14,15,12,13
14 .byte 2,3,0,1, 6,7,4,5, 10,11,8,9, 14,15,12,13
15 .avx2_init:
16 .long 0,0,0,0
17 .sse_inc:
18 .long 1,0,0,0
19 .avx2_inc:
20 .long 2,0,0,0,2,0,0,0
21 .clamp:
22 .quad 0x0FFFFFFC0FFFFFFF, 0x0FFFFFFC0FFFFFFC
23 .quad 0xFFFFFFFFFFFFFFFF, 0xFFFFFFFFFFFFFFFF
24 .align 16
25 .and_masks:
26 .byte 0xff,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x 00,0x00
27 .byte 0xff,0xff,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x 00,0x00
28 .byte 0xff,0xff,0xff,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x 00,0x00
29 .byte 0xff,0xff,0xff,0xff,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x 00,0x00
30 .byte 0xff,0xff,0xff,0xff,0xff,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x 00,0x00
31 .byte 0xff,0xff,0xff,0xff,0xff,0xff,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x 00,0x00
32 .byte 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x 00,0x00
33 .byte 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0x00,0x00,0x00,0x00,0x00,0x00,0x 00,0x00
34 .byte 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0x00,0x00,0x00,0x00,0x00,0x 00,0x00
35 .byte 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0x00,0x00,0x00,0x00,0x 00,0x00
36 .byte 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0x00,0x00,0x00,0x 00,0x00
37 .byte 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0x00,0x00,0x 00,0x00
38 .byte 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0x00,0x 00,0x00
39 .byte 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0x 00,0x00
40 .byte 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0x ff,0x00
41
42 .type poly_hash_ad_internal,@function
43 .align 64
44 poly_hash_ad_internal:
45 .cfi_startproc
46 xorq %r10,%r10
47 xorq %r11,%r11
48 xorq %r12,%r12
49 cmpq $13,%r8
50 jne hash_ad_loop
51 poly_fast_tls_ad:
52
53 movq (%rcx),%r10
54 movq 5(%rcx),%r11
55 shrq $24,%r11
56 movq $1,%r12
57 movq 0+0(%rbp),%rax
58 movq %rax,%r15
59 mulq %r10
60 movq %rax,%r13
61 movq %rdx,%r14
62 movq 0+0(%rbp),%rax
63 mulq %r11
64 imulq %r12,%r15
65 addq %rax,%r14
66 adcq %rdx,%r15
67 movq 8+0(%rbp),%rax
68 movq %rax,%r9
69 mulq %r10
70 addq %rax,%r14
71 adcq $0,%rdx
72 movq %rdx,%r10
73 movq 8+0(%rbp),%rax
74 mulq %r11
75 addq %rax,%r15
76 adcq $0,%rdx
77 imulq %r12,%r9
78 addq %r10,%r15
79 adcq %rdx,%r9
80 movq %r13,%r10
81 movq %r14,%r11
82 movq %r15,%r12
83 andq $3,%r12
84 movq %r15,%r13
85 andq $-4,%r13
86 movq %r9,%r14
87 shrdq $2,%r9,%r15
88 shrq $2,%r9
89 addq %r13,%r10
90 adcq %r14,%r11
91 adcq $0,%r12
92 addq %r15,%r10
93 adcq %r9,%r11
94 adcq $0,%r12
95
96 .byte 0xf3,0xc3
97 hash_ad_loop:
98
99 cmpq $16,%r8
100 jb hash_ad_tail
101 addq 0(%rcx),%r10
102 adcq 8+0(%rcx),%r11
103 adcq $1,%r12
104 movq 0+0(%rbp),%rax
105 movq %rax,%r15
106 mulq %r10
107 movq %rax,%r13
108 movq %rdx,%r14
109 movq 0+0(%rbp),%rax
110 mulq %r11
111 imulq %r12,%r15
112 addq %rax,%r14
113 adcq %rdx,%r15
114 movq 8+0(%rbp),%rax
115 movq %rax,%r9
116 mulq %r10
117 addq %rax,%r14
118 adcq $0,%rdx
119 movq %rdx,%r10
120 movq 8+0(%rbp),%rax
121 mulq %r11
122 addq %rax,%r15
123 adcq $0,%rdx
124 imulq %r12,%r9
125 addq %r10,%r15
126 adcq %rdx,%r9
127 movq %r13,%r10
128 movq %r14,%r11
129 movq %r15,%r12
130 andq $3,%r12
131 movq %r15,%r13
132 andq $-4,%r13
133 movq %r9,%r14
134 shrdq $2,%r9,%r15
135 shrq $2,%r9
136 addq %r13,%r10
137 adcq %r14,%r11
138 adcq $0,%r12
139 addq %r15,%r10
140 adcq %r9,%r11
141 adcq $0,%r12
142
143 leaq 16(%rcx),%rcx
144 subq $16,%r8
145 jmp hash_ad_loop
146 hash_ad_tail:
147 cmpq $0,%r8
148 je 1f
149
150 xorq %r13,%r13
151 xorq %r14,%r14
152 xorq %r15,%r15
153 addq %r8,%rcx
154 hash_ad_tail_loop:
155 shldq $8,%r13,%r14
156 shlq $8,%r13
157 movzbq -1(%rcx),%r15
158 xorq %r15,%r13
159 decq %rcx
160 decq %r8
161 jne hash_ad_tail_loop
162
163 addq %r13,%r10
164 adcq %r14,%r11
165 adcq $1,%r12
166 movq 0+0(%rbp),%rax
167 movq %rax,%r15
168 mulq %r10
169 movq %rax,%r13
170 movq %rdx,%r14
171 movq 0+0(%rbp),%rax
172 mulq %r11
173 imulq %r12,%r15
174 addq %rax,%r14
175 adcq %rdx,%r15
176 movq 8+0(%rbp),%rax
177 movq %rax,%r9
178 mulq %r10
179 addq %rax,%r14
180 adcq $0,%rdx
181 movq %rdx,%r10
182 movq 8+0(%rbp),%rax
183 mulq %r11
184 addq %rax,%r15
185 adcq $0,%rdx
186 imulq %r12,%r9
187 addq %r10,%r15
188 adcq %rdx,%r9
189 movq %r13,%r10
190 movq %r14,%r11
191 movq %r15,%r12
192 andq $3,%r12
193 movq %r15,%r13
194 andq $-4,%r13
195 movq %r9,%r14
196 shrdq $2,%r9,%r15
197 shrq $2,%r9
198 addq %r13,%r10
199 adcq %r14,%r11
200 adcq $0,%r12
201 addq %r15,%r10
202 adcq %r9,%r11
203 adcq $0,%r12
204
205
206 1:
207 .byte 0xf3,0xc3
208 .cfi_endproc
209 .size poly_hash_ad_internal, .-poly_hash_ad_internal
210
211 .globl chacha20_poly1305_open
212 .hidden chacha20_poly1305_open
213 .type chacha20_poly1305_open,@function
214 .align 64
215 chacha20_poly1305_open:
216 .cfi_startproc
217 pushq %rbp
218 .cfi_adjust_cfa_offset 8
219 pushq %rbx
220 .cfi_adjust_cfa_offset 8
221 pushq %r12
222 .cfi_adjust_cfa_offset 8
223 pushq %r13
224 .cfi_adjust_cfa_offset 8
225 pushq %r14
226 .cfi_adjust_cfa_offset 8
227 pushq %r15
228 .cfi_adjust_cfa_offset 8
229
230
231 pushq %r9
232 .cfi_adjust_cfa_offset 8
233 subq $288 + 32,%rsp
234 .cfi_adjust_cfa_offset 288 + 32
235 .cfi_offset rbp, -16
236 .cfi_offset rbx, -24
237 .cfi_offset r12, -32
238 .cfi_offset r13, -40
239 .cfi_offset r14, -48
240 .cfi_offset r15, -56
241 .cfi_offset %r9, -64
242 leaq 32(%rsp),%rbp
243 andq $-32,%rbp
244 movq %rdx,8+32(%rbp)
245 movq %r8,0+32(%rbp)
246 movq %rdx,%rbx
247
248 movl OPENSSL_ia32cap_P+8(%rip),%eax
249 andl $288,%eax
250 xorl $288,%eax
251 jz chacha20_poly1305_open_avx2
252
253 1:
254 cmpq $128,%rbx
255 jbe open_sse_128
256
257 movdqa .chacha20_consts(%rip),%xmm0
258 movdqu 0(%r9),%xmm4
259 movdqu 16(%r9),%xmm8
260 movdqu 32(%r9),%xmm12
261 movdqa %xmm12,%xmm7
262
263 movdqa %xmm4,48(%rbp)
264 movdqa %xmm8,64(%rbp)
265 movdqa %xmm12,96(%rbp)
266 movq $10,%r10
267 1:
268 paddd %xmm4,%xmm0
269 pxor %xmm0,%xmm12
270 pshufb .rol16(%rip),%xmm12
271 paddd %xmm12,%xmm8
272 pxor %xmm8,%xmm4
273 movdqa %xmm4,%xmm3
274 pslld $12,%xmm3
275 psrld $20,%xmm4
276 pxor %xmm3,%xmm4
277 paddd %xmm4,%xmm0
278 pxor %xmm0,%xmm12
279 pshufb .rol8(%rip),%xmm12
280 paddd %xmm12,%xmm8
281 pxor %xmm8,%xmm4
282 movdqa %xmm4,%xmm3
283 pslld $7,%xmm3
284 psrld $25,%xmm4
285 pxor %xmm3,%xmm4
286 .byte 102,15,58,15,228,4
287 .byte 102,69,15,58,15,192,8
288 .byte 102,69,15,58,15,228,12
289 paddd %xmm4,%xmm0
290 pxor %xmm0,%xmm12
291 pshufb .rol16(%rip),%xmm12
292 paddd %xmm12,%xmm8
293 pxor %xmm8,%xmm4
294 movdqa %xmm4,%xmm3
295 pslld $12,%xmm3
296 psrld $20,%xmm4
297 pxor %xmm3,%xmm4
298 paddd %xmm4,%xmm0
299 pxor %xmm0,%xmm12
300 pshufb .rol8(%rip),%xmm12
301 paddd %xmm12,%xmm8
302 pxor %xmm8,%xmm4
303 movdqa %xmm4,%xmm3
304 pslld $7,%xmm3
305 psrld $25,%xmm4
306 pxor %xmm3,%xmm4
307 .byte 102,15,58,15,228,12
308 .byte 102,69,15,58,15,192,8
309 .byte 102,69,15,58,15,228,4
310
311 decq %r10
312 jne 1b
313
314 paddd .chacha20_consts(%rip),%xmm0
315 paddd 48(%rbp),%xmm4
316
317 pand .clamp(%rip),%xmm0
318 movdqa %xmm0,0(%rbp)
319 movdqa %xmm4,16(%rbp)
320
321 movq %r8,%r8
322 call poly_hash_ad_internal
323 open_sse_main_loop:
324 cmpq $256,%rbx
325 jb 2f
326
327 movdqa .chacha20_consts(%rip),%xmm0
328 movdqa 48(%rbp),%xmm4
329 movdqa 64(%rbp),%xmm8
330 movdqa %xmm0,%xmm1
331 movdqa %xmm4,%xmm5
332 movdqa %xmm8,%xmm9
333 movdqa %xmm0,%xmm2
334 movdqa %xmm4,%xmm6
335 movdqa %xmm8,%xmm10
336 movdqa %xmm0,%xmm3
337 movdqa %xmm4,%xmm7
338 movdqa %xmm8,%xmm11
339 movdqa 96(%rbp),%xmm15
340 paddd .sse_inc(%rip),%xmm15
341 movdqa %xmm15,%xmm14
342 paddd .sse_inc(%rip),%xmm14
343 movdqa %xmm14,%xmm13
344 paddd .sse_inc(%rip),%xmm13
345 movdqa %xmm13,%xmm12
346 paddd .sse_inc(%rip),%xmm12
347 movdqa %xmm12,96(%rbp)
348 movdqa %xmm13,112(%rbp)
349 movdqa %xmm14,128(%rbp)
350 movdqa %xmm15,144(%rbp)
351
352
353
354 movq $4,%rcx
355 movq %rsi,%r8
356 1:
357 movdqa %xmm8,80(%rbp)
358 movdqa .rol16(%rip),%xmm8
359 paddd %xmm7,%xmm3
360 paddd %xmm6,%xmm2
361 paddd %xmm5,%xmm1
362 paddd %xmm4,%xmm0
363 pxor %xmm3,%xmm15
364 pxor %xmm2,%xmm14
365 pxor %xmm1,%xmm13
366 pxor %xmm0,%xmm12
367 .byte 102,69,15,56,0,248
368 .byte 102,69,15,56,0,240
369 .byte 102,69,15,56,0,232
370 .byte 102,69,15,56,0,224
371 movdqa 80(%rbp),%xmm8
372 paddd %xmm15,%xmm11
373 paddd %xmm14,%xmm10
374 paddd %xmm13,%xmm9
375 paddd %xmm12,%xmm8
376 pxor %xmm11,%xmm7
377 addq 0(%r8),%r10
378 adcq 8+0(%r8),%r11
379 adcq $1,%r12
380
381 leaq 16(%r8),%r8
382 pxor %xmm10,%xmm6
383 pxor %xmm9,%xmm5
384 pxor %xmm8,%xmm4
385 movdqa %xmm8,80(%rbp)
386 movdqa %xmm7,%xmm8
387 psrld $20,%xmm8
388 pslld $32-20,%xmm7
389 pxor %xmm8,%xmm7
390 movdqa %xmm6,%xmm8
391 psrld $20,%xmm8
392 pslld $32-20,%xmm6
393 pxor %xmm8,%xmm6
394 movdqa %xmm5,%xmm8
395 psrld $20,%xmm8
396 pslld $32-20,%xmm5
397 pxor %xmm8,%xmm5
398 movdqa %xmm4,%xmm8
399 psrld $20,%xmm8
400 pslld $32-20,%xmm4
401 pxor %xmm8,%xmm4
402 movq 0+0(%rbp),%rax
403 movq %rax,%r15
404 mulq %r10
405 movq %rax,%r13
406 movq %rdx,%r14
407 movq 0+0(%rbp),%rax
408 mulq %r11
409 imulq %r12,%r15
410 addq %rax,%r14
411 adcq %rdx,%r15
412 movdqa .rol8(%rip),%xmm8
413 paddd %xmm7,%xmm3
414 paddd %xmm6,%xmm2
415 paddd %xmm5,%xmm1
416 paddd %xmm4,%xmm0
417 pxor %xmm3,%xmm15
418 pxor %xmm2,%xmm14
419 pxor %xmm1,%xmm13
420 pxor %xmm0,%xmm12
421 .byte 102,69,15,56,0,248
422 .byte 102,69,15,56,0,240
423 .byte 102,69,15,56,0,232
424 .byte 102,69,15,56,0,224
425 movdqa 80(%rbp),%xmm8
426 paddd %xmm15,%xmm11
427 paddd %xmm14,%xmm10
428 paddd %xmm13,%xmm9
429 paddd %xmm12,%xmm8
430 pxor %xmm11,%xmm7
431 pxor %xmm10,%xmm6
432 movq 8+0(%rbp),%rax
433 movq %rax,%r9
434 mulq %r10
435 addq %rax,%r14
436 adcq $0,%rdx
437 movq %rdx,%r10
438 movq 8+0(%rbp),%rax
439 mulq %r11
440 addq %rax,%r15
441 adcq $0,%rdx
442 pxor %xmm9,%xmm5
443 pxor %xmm8,%xmm4
444 movdqa %xmm8,80(%rbp)
445 movdqa %xmm7,%xmm8
446 psrld $25,%xmm8
447 pslld $32-25,%xmm7
448 pxor %xmm8,%xmm7
449 movdqa %xmm6,%xmm8
450 psrld $25,%xmm8
451 pslld $32-25,%xmm6
452 pxor %xmm8,%xmm6
453 movdqa %xmm5,%xmm8
454 psrld $25,%xmm8
455 pslld $32-25,%xmm5
456 pxor %xmm8,%xmm5
457 movdqa %xmm4,%xmm8
458 psrld $25,%xmm8
459 pslld $32-25,%xmm4
460 pxor %xmm8,%xmm4
461 movdqa 80(%rbp),%xmm8
462 imulq %r12,%r9
463 addq %r10,%r15
464 adcq %rdx,%r9
465 .byte 102,15,58,15,255,4
466 .byte 102,69,15,58,15,219,8
467 .byte 102,69,15,58,15,255,12
468 .byte 102,15,58,15,246,4
469 .byte 102,69,15,58,15,210,8
470 .byte 102,69,15,58,15,246,12
471 .byte 102,15,58,15,237,4
472 .byte 102,69,15,58,15,201,8
473 .byte 102,69,15,58,15,237,12
474 .byte 102,15,58,15,228,4
475 .byte 102,69,15,58,15,192,8
476 .byte 102,69,15,58,15,228,12
477 movdqa %xmm8,80(%rbp)
478 movdqa .rol16(%rip),%xmm8
479 paddd %xmm7,%xmm3
480 paddd %xmm6,%xmm2
481 paddd %xmm5,%xmm1
482 paddd %xmm4,%xmm0
483 pxor %xmm3,%xmm15
484 pxor %xmm2,%xmm14
485 movq %r13,%r10
486 movq %r14,%r11
487 movq %r15,%r12
488 andq $3,%r12
489 movq %r15,%r13
490 andq $-4,%r13
491 movq %r9,%r14
492 shrdq $2,%r9,%r15
493 shrq $2,%r9
494 addq %r13,%r10
495 adcq %r14,%r11
496 adcq $0,%r12
497 addq %r15,%r10
498 adcq %r9,%r11
499 adcq $0,%r12
500 pxor %xmm1,%xmm13
501 pxor %xmm0,%xmm12
502 .byte 102,69,15,56,0,248
503 .byte 102,69,15,56,0,240
504 .byte 102,69,15,56,0,232
505 .byte 102,69,15,56,0,224
506 movdqa 80(%rbp),%xmm8
507 paddd %xmm15,%xmm11
508 paddd %xmm14,%xmm10
509 paddd %xmm13,%xmm9
510 paddd %xmm12,%xmm8
511 pxor %xmm11,%xmm7
512 pxor %xmm10,%xmm6
513 pxor %xmm9,%xmm5
514 pxor %xmm8,%xmm4
515 movdqa %xmm8,80(%rbp)
516 movdqa %xmm7,%xmm8
517 psrld $20,%xmm8
518 pslld $32-20,%xmm7
519 pxor %xmm8,%xmm7
520 movdqa %xmm6,%xmm8
521 psrld $20,%xmm8
522 pslld $32-20,%xmm6
523 pxor %xmm8,%xmm6
524 movdqa %xmm5,%xmm8
525 psrld $20,%xmm8
526 pslld $32-20,%xmm5
527 pxor %xmm8,%xmm5
528 movdqa %xmm4,%xmm8
529 psrld $20,%xmm8
530 pslld $32-20,%xmm4
531 pxor %xmm8,%xmm4
532 movdqa .rol8(%rip),%xmm8
533 paddd %xmm7,%xmm3
534 paddd %xmm6,%xmm2
535 paddd %xmm5,%xmm1
536 paddd %xmm4,%xmm0
537 pxor %xmm3,%xmm15
538 pxor %xmm2,%xmm14
539 pxor %xmm1,%xmm13
540 pxor %xmm0,%xmm12
541 .byte 102,69,15,56,0,248
542 .byte 102,69,15,56,0,240
543 .byte 102,69,15,56,0,232
544 .byte 102,69,15,56,0,224
545 movdqa 80(%rbp),%xmm8
546 paddd %xmm15,%xmm11
547 paddd %xmm14,%xmm10
548 paddd %xmm13,%xmm9
549 paddd %xmm12,%xmm8
550 pxor %xmm11,%xmm7
551 pxor %xmm10,%xmm6
552 pxor %xmm9,%xmm5
553 pxor %xmm8,%xmm4
554 movdqa %xmm8,80(%rbp)
555 movdqa %xmm7,%xmm8
556 psrld $25,%xmm8
557 pslld $32-25,%xmm7
558 pxor %xmm8,%xmm7
559 movdqa %xmm6,%xmm8
560 psrld $25,%xmm8
561 pslld $32-25,%xmm6
562 pxor %xmm8,%xmm6
563 movdqa %xmm5,%xmm8
564 psrld $25,%xmm8
565 pslld $32-25,%xmm5
566 pxor %xmm8,%xmm5
567 movdqa %xmm4,%xmm8
568 psrld $25,%xmm8
569 pslld $32-25,%xmm4
570 pxor %xmm8,%xmm4
571 movdqa 80(%rbp),%xmm8
572 .byte 102,15,58,15,255,12
573 .byte 102,69,15,58,15,219,8
574 .byte 102,69,15,58,15,255,4
575 .byte 102,15,58,15,246,12
576 .byte 102,69,15,58,15,210,8
577 .byte 102,69,15,58,15,246,4
578 .byte 102,15,58,15,237,12
579 .byte 102,69,15,58,15,201,8
580 .byte 102,69,15,58,15,237,4
581 .byte 102,15,58,15,228,12
582 .byte 102,69,15,58,15,192,8
583 .byte 102,69,15,58,15,228,4
584
585 decq %rcx
586 jge 1b
587 addq 0(%r8),%r10
588 adcq 8+0(%r8),%r11
589 adcq $1,%r12
590 movq 0+0(%rbp),%rax
591 movq %rax,%r15
592 mulq %r10
593 movq %rax,%r13
594 movq %rdx,%r14
595 movq 0+0(%rbp),%rax
596 mulq %r11
597 imulq %r12,%r15
598 addq %rax,%r14
599 adcq %rdx,%r15
600 movq 8+0(%rbp),%rax
601 movq %rax,%r9
602 mulq %r10
603 addq %rax,%r14
604 adcq $0,%rdx
605 movq %rdx,%r10
606 movq 8+0(%rbp),%rax
607 mulq %r11
608 addq %rax,%r15
609 adcq $0,%rdx
610 imulq %r12,%r9
611 addq %r10,%r15
612 adcq %rdx,%r9
613 movq %r13,%r10
614 movq %r14,%r11
615 movq %r15,%r12
616 andq $3,%r12
617 movq %r15,%r13
618 andq $-4,%r13
619 movq %r9,%r14
620 shrdq $2,%r9,%r15
621 shrq $2,%r9
622 addq %r13,%r10
623 adcq %r14,%r11
624 adcq $0,%r12
625 addq %r15,%r10
626 adcq %r9,%r11
627 adcq $0,%r12
628
629 leaq 16(%r8),%r8
630 cmpq $-6,%rcx
631 jg 1b
632 paddd .chacha20_consts(%rip),%xmm3
633 paddd 48(%rbp),%xmm7
634 paddd 64(%rbp),%xmm11
635 paddd 144(%rbp),%xmm15
636 paddd .chacha20_consts(%rip),%xmm2
637 paddd 48(%rbp),%xmm6
638 paddd 64(%rbp),%xmm10
639 paddd 128(%rbp),%xmm14
640 paddd .chacha20_consts(%rip),%xmm1
641 paddd 48(%rbp),%xmm5
642 paddd 64(%rbp),%xmm9
643 paddd 112(%rbp),%xmm13
644 paddd .chacha20_consts(%rip),%xmm0
645 paddd 48(%rbp),%xmm4
646 paddd 64(%rbp),%xmm8
647 paddd 96(%rbp),%xmm12
648 movdqa %xmm12,80(%rbp)
649 movdqu 0 + 0(%rsi),%xmm12
650 pxor %xmm3,%xmm12
651 movdqu %xmm12,0 + 0(%rdi)
652 movdqu 16 + 0(%rsi),%xmm12
653 pxor %xmm7,%xmm12
654 movdqu %xmm12,16 + 0(%rdi)
655 movdqu 32 + 0(%rsi),%xmm12
656 pxor %xmm11,%xmm12
657 movdqu %xmm12,32 + 0(%rdi)
658 movdqu 48 + 0(%rsi),%xmm12
659 pxor %xmm15,%xmm12
660 movdqu %xmm12,48 + 0(%rdi)
661 movdqu 0 + 64(%rsi),%xmm3
662 movdqu 16 + 64(%rsi),%xmm7
663 movdqu 32 + 64(%rsi),%xmm11
664 movdqu 48 + 64(%rsi),%xmm15
665 pxor %xmm3,%xmm2
666 pxor %xmm7,%xmm6
667 pxor %xmm11,%xmm10
668 pxor %xmm14,%xmm15
669 movdqu %xmm2,0 + 64(%rdi)
670 movdqu %xmm6,16 + 64(%rdi)
671 movdqu %xmm10,32 + 64(%rdi)
672 movdqu %xmm15,48 + 64(%rdi)
673 movdqu 0 + 128(%rsi),%xmm3
674 movdqu 16 + 128(%rsi),%xmm7
675 movdqu 32 + 128(%rsi),%xmm11
676 movdqu 48 + 128(%rsi),%xmm15
677 pxor %xmm3,%xmm1
678 pxor %xmm7,%xmm5
679 pxor %xmm11,%xmm9
680 pxor %xmm13,%xmm15
681 movdqu %xmm1,0 + 128(%rdi)
682 movdqu %xmm5,16 + 128(%rdi)
683 movdqu %xmm9,32 + 128(%rdi)
684 movdqu %xmm15,48 + 128(%rdi)
685 movdqu 0 + 192(%rsi),%xmm3
686 movdqu 16 + 192(%rsi),%xmm7
687 movdqu 32 + 192(%rsi),%xmm11
688 movdqu 48 + 192(%rsi),%xmm15
689 pxor %xmm3,%xmm0
690 pxor %xmm7,%xmm4
691 pxor %xmm11,%xmm8
692 pxor 80(%rbp),%xmm15
693 movdqu %xmm0,0 + 192(%rdi)
694 movdqu %xmm4,16 + 192(%rdi)
695 movdqu %xmm8,32 + 192(%rdi)
696 movdqu %xmm15,48 + 192(%rdi)
697
698 leaq 256(%rsi),%rsi
699 leaq 256(%rdi),%rdi
700 subq $256,%rbx
701 jmp open_sse_main_loop
702 2:
703
704 testq %rbx,%rbx
705 jz open_sse_finalize
706 cmpq $64,%rbx
707 ja 3f
708 movdqa .chacha20_consts(%rip),%xmm0
709 movdqa 48(%rbp),%xmm4
710 movdqa 64(%rbp),%xmm8
711 movdqa 96(%rbp),%xmm12
712 paddd .sse_inc(%rip),%xmm12
713 movdqa %xmm12,96(%rbp)
714
715 xorq %r8,%r8
716 movq %rbx,%rcx
717 cmpq $16,%rcx
718 jb 2f
719 1:
720 addq 0(%rsi,%r8), %r10
721 adcq 8+0(%rsi,%r8), %r11
722 adcq $1,%r12
723 movq 0+0(%rbp),%rax
724 movq %rax,%r15
725 mulq %r10
726 movq %rax,%r13
727 movq %rdx,%r14
728 movq 0+0(%rbp),%rax
729 mulq %r11
730 imulq %r12,%r15
731 addq %rax,%r14
732 adcq %rdx,%r15
733 movq 8+0(%rbp),%rax
734 movq %rax,%r9
735 mulq %r10
736 addq %rax,%r14
737 adcq $0,%rdx
738 movq %rdx,%r10
739 movq 8+0(%rbp),%rax
740 mulq %r11
741 addq %rax,%r15
742 adcq $0,%rdx
743 imulq %r12,%r9
744 addq %r10,%r15
745 adcq %rdx,%r9
746 movq %r13,%r10
747 movq %r14,%r11
748 movq %r15,%r12
749 andq $3,%r12
750 movq %r15,%r13
751 andq $-4,%r13
752 movq %r9,%r14
753 shrdq $2,%r9,%r15
754 shrq $2,%r9
755 addq %r13,%r10
756 adcq %r14,%r11
757 adcq $0,%r12
758 addq %r15,%r10
759 adcq %r9,%r11
760 adcq $0,%r12
761
762 subq $16,%rcx
763 2:
764 addq $16,%r8
765 paddd %xmm4,%xmm0
766 pxor %xmm0,%xmm12
767 pshufb .rol16(%rip),%xmm12
768 paddd %xmm12,%xmm8
769 pxor %xmm8,%xmm4
770 movdqa %xmm4,%xmm3
771 pslld $12,%xmm3
772 psrld $20,%xmm4
773 pxor %xmm3,%xmm4
774 paddd %xmm4,%xmm0
775 pxor %xmm0,%xmm12
776 pshufb .rol8(%rip),%xmm12
777 paddd %xmm12,%xmm8
778 pxor %xmm8,%xmm4
779 movdqa %xmm4,%xmm3
780 pslld $7,%xmm3
781 psrld $25,%xmm4
782 pxor %xmm3,%xmm4
783 .byte 102,15,58,15,228,4
784 .byte 102,69,15,58,15,192,8
785 .byte 102,69,15,58,15,228,12
786 paddd %xmm4,%xmm0
787 pxor %xmm0,%xmm12
788 pshufb .rol16(%rip),%xmm12
789 paddd %xmm12,%xmm8
790 pxor %xmm8,%xmm4
791 movdqa %xmm4,%xmm3
792 pslld $12,%xmm3
793 psrld $20,%xmm4
794 pxor %xmm3,%xmm4
795 paddd %xmm4,%xmm0
796 pxor %xmm0,%xmm12
797 pshufb .rol8(%rip),%xmm12
798 paddd %xmm12,%xmm8
799 pxor %xmm8,%xmm4
800 movdqa %xmm4,%xmm3
801 pslld $7,%xmm3
802 psrld $25,%xmm4
803 pxor %xmm3,%xmm4
804 .byte 102,15,58,15,228,12
805 .byte 102,69,15,58,15,192,8
806 .byte 102,69,15,58,15,228,4
807
808 cmpq $16,%rcx
809 jae 1b
810 cmpq $160,%r8
811 jne 2b
812 paddd .chacha20_consts(%rip),%xmm0
813 paddd 48(%rbp),%xmm4
814 paddd 64(%rbp),%xmm8
815 paddd 96(%rbp),%xmm12
816
817 jmp open_sse_tail_64_dec_loop
818 3:
819 cmpq $128,%rbx
820 ja 3f
821 movdqa .chacha20_consts(%rip),%xmm0
822 movdqa 48(%rbp),%xmm4
823 movdqa 64(%rbp),%xmm8
824 movdqa %xmm0,%xmm1
825 movdqa %xmm4,%xmm5
826 movdqa %xmm8,%xmm9
827 movdqa 96(%rbp),%xmm13
828 paddd .sse_inc(%rip),%xmm13
829 movdqa %xmm13,%xmm12
830 paddd .sse_inc(%rip),%xmm12
831 movdqa %xmm12,96(%rbp)
832 movdqa %xmm13,112(%rbp)
833
834 movq %rbx,%rcx
835 andq $-16,%rcx
836 xorq %r8,%r8
837 1:
838 addq 0(%rsi,%r8), %r10
839 adcq 8+0(%rsi,%r8), %r11
840 adcq $1,%r12
841 movq 0+0(%rbp),%rax
842 movq %rax,%r15
843 mulq %r10
844 movq %rax,%r13
845 movq %rdx,%r14
846 movq 0+0(%rbp),%rax
847 mulq %r11
848 imulq %r12,%r15
849 addq %rax,%r14
850 adcq %rdx,%r15
851 movq 8+0(%rbp),%rax
852 movq %rax,%r9
853 mulq %r10
854 addq %rax,%r14
855 adcq $0,%rdx
856 movq %rdx,%r10
857 movq 8+0(%rbp),%rax
858 mulq %r11
859 addq %rax,%r15
860 adcq $0,%rdx
861 imulq %r12,%r9
862 addq %r10,%r15
863 adcq %rdx,%r9
864 movq %r13,%r10
865 movq %r14,%r11
866 movq %r15,%r12
867 andq $3,%r12
868 movq %r15,%r13
869 andq $-4,%r13
870 movq %r9,%r14
871 shrdq $2,%r9,%r15
872 shrq $2,%r9
873 addq %r13,%r10
874 adcq %r14,%r11
875 adcq $0,%r12
876 addq %r15,%r10
877 adcq %r9,%r11
878 adcq $0,%r12
879
880 2:
881 addq $16,%r8
882 paddd %xmm4,%xmm0
883 pxor %xmm0,%xmm12
884 pshufb .rol16(%rip),%xmm12
885 paddd %xmm12,%xmm8
886 pxor %xmm8,%xmm4
887 movdqa %xmm4,%xmm3
888 pslld $12,%xmm3
889 psrld $20,%xmm4
890 pxor %xmm3,%xmm4
891 paddd %xmm4,%xmm0
892 pxor %xmm0,%xmm12
893 pshufb .rol8(%rip),%xmm12
894 paddd %xmm12,%xmm8
895 pxor %xmm8,%xmm4
896 movdqa %xmm4,%xmm3
897 pslld $7,%xmm3
898 psrld $25,%xmm4
899 pxor %xmm3,%xmm4
900 .byte 102,15,58,15,228,4
901 .byte 102,69,15,58,15,192,8
902 .byte 102,69,15,58,15,228,12
903 paddd %xmm5,%xmm1
904 pxor %xmm1,%xmm13
905 pshufb .rol16(%rip),%xmm13
906 paddd %xmm13,%xmm9
907 pxor %xmm9,%xmm5
908 movdqa %xmm5,%xmm3
909 pslld $12,%xmm3
910 psrld $20,%xmm5
911 pxor %xmm3,%xmm5
912 paddd %xmm5,%xmm1
913 pxor %xmm1,%xmm13
914 pshufb .rol8(%rip),%xmm13
915 paddd %xmm13,%xmm9
916 pxor %xmm9,%xmm5
917 movdqa %xmm5,%xmm3
918 pslld $7,%xmm3
919 psrld $25,%xmm5
920 pxor %xmm3,%xmm5
921 .byte 102,15,58,15,237,4
922 .byte 102,69,15,58,15,201,8
923 .byte 102,69,15,58,15,237,12
924 paddd %xmm4,%xmm0
925 pxor %xmm0,%xmm12
926 pshufb .rol16(%rip),%xmm12
927 paddd %xmm12,%xmm8
928 pxor %xmm8,%xmm4
929 movdqa %xmm4,%xmm3
930 pslld $12,%xmm3
931 psrld $20,%xmm4
932 pxor %xmm3,%xmm4
933 paddd %xmm4,%xmm0
934 pxor %xmm0,%xmm12
935 pshufb .rol8(%rip),%xmm12
936 paddd %xmm12,%xmm8
937 pxor %xmm8,%xmm4
938 movdqa %xmm4,%xmm3
939 pslld $7,%xmm3
940 psrld $25,%xmm4
941 pxor %xmm3,%xmm4
942 .byte 102,15,58,15,228,12
943 .byte 102,69,15,58,15,192,8
944 .byte 102,69,15,58,15,228,4
945 paddd %xmm5,%xmm1
946 pxor %xmm1,%xmm13
947 pshufb .rol16(%rip),%xmm13
948 paddd %xmm13,%xmm9
949 pxor %xmm9,%xmm5
950 movdqa %xmm5,%xmm3
951 pslld $12,%xmm3
952 psrld $20,%xmm5
953 pxor %xmm3,%xmm5
954 paddd %xmm5,%xmm1
955 pxor %xmm1,%xmm13
956 pshufb .rol8(%rip),%xmm13
957 paddd %xmm13,%xmm9
958 pxor %xmm9,%xmm5
959 movdqa %xmm5,%xmm3
960 pslld $7,%xmm3
961 psrld $25,%xmm5
962 pxor %xmm3,%xmm5
963 .byte 102,15,58,15,237,12
964 .byte 102,69,15,58,15,201,8
965 .byte 102,69,15,58,15,237,4
966
967 cmpq %rcx,%r8
968 jb 1b
969 cmpq $160,%r8
970 jne 2b
971 paddd .chacha20_consts(%rip),%xmm1
972 paddd 48(%rbp),%xmm5
973 paddd 64(%rbp),%xmm9
974 paddd 112(%rbp),%xmm13
975 paddd .chacha20_consts(%rip),%xmm0
976 paddd 48(%rbp),%xmm4
977 paddd 64(%rbp),%xmm8
978 paddd 96(%rbp),%xmm12
979 movdqu 0 + 0(%rsi),%xmm3
980 movdqu 16 + 0(%rsi),%xmm7
981 movdqu 32 + 0(%rsi),%xmm11
982 movdqu 48 + 0(%rsi),%xmm15
983 pxor %xmm3,%xmm1
984 pxor %xmm7,%xmm5
985 pxor %xmm11,%xmm9
986 pxor %xmm13,%xmm15
987 movdqu %xmm1,0 + 0(%rdi)
988 movdqu %xmm5,16 + 0(%rdi)
989 movdqu %xmm9,32 + 0(%rdi)
990 movdqu %xmm15,48 + 0(%rdi)
991
992 subq $64,%rbx
993 leaq 64(%rsi),%rsi
994 leaq 64(%rdi),%rdi
995 jmp open_sse_tail_64_dec_loop
996 3:
997 cmpq $192,%rbx
998 ja 3f
999 movdqa .chacha20_consts(%rip),%xmm0
1000 movdqa 48(%rbp),%xmm4
1001 movdqa 64(%rbp),%xmm8
1002 movdqa %xmm0,%xmm1
1003 movdqa %xmm4,%xmm5
1004 movdqa %xmm8,%xmm9
1005 movdqa %xmm0,%xmm2
1006 movdqa %xmm4,%xmm6
1007 movdqa %xmm8,%xmm10
1008 movdqa 96(%rbp),%xmm14
1009 paddd .sse_inc(%rip),%xmm14
1010 movdqa %xmm14,%xmm13
1011 paddd .sse_inc(%rip),%xmm13
1012 movdqa %xmm13,%xmm12
1013 paddd .sse_inc(%rip),%xmm12
1014 movdqa %xmm12,96(%rbp)
1015 movdqa %xmm13,112(%rbp)
1016 movdqa %xmm14,128(%rbp)
1017
1018 movq %rbx,%rcx
1019 movq $160,%r8
1020 cmpq $160,%rcx
1021 cmovgq %r8,%rcx
1022 andq $-16,%rcx
1023 xorq %r8,%r8
1024 1:
1025 addq 0(%rsi,%r8), %r10
1026 adcq 8+0(%rsi,%r8), %r11
1027 adcq $1,%r12
1028 movq 0+0(%rbp),%rax
1029 movq %rax,%r15
1030 mulq %r10
1031 movq %rax,%r13
1032 movq %rdx,%r14
1033 movq 0+0(%rbp),%rax
1034 mulq %r11
1035 imulq %r12,%r15
1036 addq %rax,%r14
1037 adcq %rdx,%r15
1038 movq 8+0(%rbp),%rax
1039 movq %rax,%r9
1040 mulq %r10
1041 addq %rax,%r14
1042 adcq $0,%rdx
1043 movq %rdx,%r10
1044 movq 8+0(%rbp),%rax
1045 mulq %r11
1046 addq %rax,%r15
1047 adcq $0,%rdx
1048 imulq %r12,%r9
1049 addq %r10,%r15
1050 adcq %rdx,%r9
1051 movq %r13,%r10
1052 movq %r14,%r11
1053 movq %r15,%r12
1054 andq $3,%r12
1055 movq %r15,%r13
1056 andq $-4,%r13
1057 movq %r9,%r14
1058 shrdq $2,%r9,%r15
1059 shrq $2,%r9
1060 addq %r13,%r10
1061 adcq %r14,%r11
1062 adcq $0,%r12
1063 addq %r15,%r10
1064 adcq %r9,%r11
1065 adcq $0,%r12
1066
1067 2:
1068 addq $16,%r8
1069 paddd %xmm4,%xmm0
1070 pxor %xmm0,%xmm12
1071 pshufb .rol16(%rip),%xmm12
1072 paddd %xmm12,%xmm8
1073 pxor %xmm8,%xmm4
1074 movdqa %xmm4,%xmm3
1075 pslld $12,%xmm3
1076 psrld $20,%xmm4
1077 pxor %xmm3,%xmm4
1078 paddd %xmm4,%xmm0
1079 pxor %xmm0,%xmm12
1080 pshufb .rol8(%rip),%xmm12
1081 paddd %xmm12,%xmm8
1082 pxor %xmm8,%xmm4
1083 movdqa %xmm4,%xmm3
1084 pslld $7,%xmm3
1085 psrld $25,%xmm4
1086 pxor %xmm3,%xmm4
1087 .byte 102,15,58,15,228,4
1088 .byte 102,69,15,58,15,192,8
1089 .byte 102,69,15,58,15,228,12
1090 paddd %xmm5,%xmm1
1091 pxor %xmm1,%xmm13
1092 pshufb .rol16(%rip),%xmm13
1093 paddd %xmm13,%xmm9
1094 pxor %xmm9,%xmm5
1095 movdqa %xmm5,%xmm3
1096 pslld $12,%xmm3
1097 psrld $20,%xmm5
1098 pxor %xmm3,%xmm5
1099 paddd %xmm5,%xmm1
1100 pxor %xmm1,%xmm13
1101 pshufb .rol8(%rip),%xmm13
1102 paddd %xmm13,%xmm9
1103 pxor %xmm9,%xmm5
1104 movdqa %xmm5,%xmm3
1105 pslld $7,%xmm3
1106 psrld $25,%xmm5
1107 pxor %xmm3,%xmm5
1108 .byte 102,15,58,15,237,4
1109 .byte 102,69,15,58,15,201,8
1110 .byte 102,69,15,58,15,237,12
1111 paddd %xmm6,%xmm2
1112 pxor %xmm2,%xmm14
1113 pshufb .rol16(%rip),%xmm14
1114 paddd %xmm14,%xmm10
1115 pxor %xmm10,%xmm6
1116 movdqa %xmm6,%xmm3
1117 pslld $12,%xmm3
1118 psrld $20,%xmm6
1119 pxor %xmm3,%xmm6
1120 paddd %xmm6,%xmm2
1121 pxor %xmm2,%xmm14
1122 pshufb .rol8(%rip),%xmm14
1123 paddd %xmm14,%xmm10
1124 pxor %xmm10,%xmm6
1125 movdqa %xmm6,%xmm3
1126 pslld $7,%xmm3
1127 psrld $25,%xmm6
1128 pxor %xmm3,%xmm6
1129 .byte 102,15,58,15,246,4
1130 .byte 102,69,15,58,15,210,8
1131 .byte 102,69,15,58,15,246,12
1132 paddd %xmm4,%xmm0
1133 pxor %xmm0,%xmm12
1134 pshufb .rol16(%rip),%xmm12
1135 paddd %xmm12,%xmm8
1136 pxor %xmm8,%xmm4
1137 movdqa %xmm4,%xmm3
1138 pslld $12,%xmm3
1139 psrld $20,%xmm4
1140 pxor %xmm3,%xmm4
1141 paddd %xmm4,%xmm0
1142 pxor %xmm0,%xmm12
1143 pshufb .rol8(%rip),%xmm12
1144 paddd %xmm12,%xmm8
1145 pxor %xmm8,%xmm4
1146 movdqa %xmm4,%xmm3
1147 pslld $7,%xmm3
1148 psrld $25,%xmm4
1149 pxor %xmm3,%xmm4
1150 .byte 102,15,58,15,228,12
1151 .byte 102,69,15,58,15,192,8
1152 .byte 102,69,15,58,15,228,4
1153 paddd %xmm5,%xmm1
1154 pxor %xmm1,%xmm13
1155 pshufb .rol16(%rip),%xmm13
1156 paddd %xmm13,%xmm9
1157 pxor %xmm9,%xmm5
1158 movdqa %xmm5,%xmm3
1159 pslld $12,%xmm3
1160 psrld $20,%xmm5
1161 pxor %xmm3,%xmm5
1162 paddd %xmm5,%xmm1
1163 pxor %xmm1,%xmm13
1164 pshufb .rol8(%rip),%xmm13
1165 paddd %xmm13,%xmm9
1166 pxor %xmm9,%xmm5
1167 movdqa %xmm5,%xmm3
1168 pslld $7,%xmm3
1169 psrld $25,%xmm5
1170 pxor %xmm3,%xmm5
1171 .byte 102,15,58,15,237,12
1172 .byte 102,69,15,58,15,201,8
1173 .byte 102,69,15,58,15,237,4
1174 paddd %xmm6,%xmm2
1175 pxor %xmm2,%xmm14
1176 pshufb .rol16(%rip),%xmm14
1177 paddd %xmm14,%xmm10
1178 pxor %xmm10,%xmm6
1179 movdqa %xmm6,%xmm3
1180 pslld $12,%xmm3
1181 psrld $20,%xmm6
1182 pxor %xmm3,%xmm6
1183 paddd %xmm6,%xmm2
1184 pxor %xmm2,%xmm14
1185 pshufb .rol8(%rip),%xmm14
1186 paddd %xmm14,%xmm10
1187 pxor %xmm10,%xmm6
1188 movdqa %xmm6,%xmm3
1189 pslld $7,%xmm3
1190 psrld $25,%xmm6
1191 pxor %xmm3,%xmm6
1192 .byte 102,15,58,15,246,12
1193 .byte 102,69,15,58,15,210,8
1194 .byte 102,69,15,58,15,246,4
1195
1196 cmpq %rcx,%r8
1197 jb 1b
1198 cmpq $160,%r8
1199 jne 2b
1200 cmpq $176,%rbx
1201 jb 1f
1202 addq 160(%rsi),%r10
1203 adcq 8+160(%rsi),%r11
1204 adcq $1,%r12
1205 movq 0+0(%rbp),%rax
1206 movq %rax,%r15
1207 mulq %r10
1208 movq %rax,%r13
1209 movq %rdx,%r14
1210 movq 0+0(%rbp),%rax
1211 mulq %r11
1212 imulq %r12,%r15
1213 addq %rax,%r14
1214 adcq %rdx,%r15
1215 movq 8+0(%rbp),%rax
1216 movq %rax,%r9
1217 mulq %r10
1218 addq %rax,%r14
1219 adcq $0,%rdx
1220 movq %rdx,%r10
1221 movq 8+0(%rbp),%rax
1222 mulq %r11
1223 addq %rax,%r15
1224 adcq $0,%rdx
1225 imulq %r12,%r9
1226 addq %r10,%r15
1227 adcq %rdx,%r9
1228 movq %r13,%r10
1229 movq %r14,%r11
1230 movq %r15,%r12
1231 andq $3,%r12
1232 movq %r15,%r13
1233 andq $-4,%r13
1234 movq %r9,%r14
1235 shrdq $2,%r9,%r15
1236 shrq $2,%r9
1237 addq %r13,%r10
1238 adcq %r14,%r11
1239 adcq $0,%r12
1240 addq %r15,%r10
1241 adcq %r9,%r11
1242 adcq $0,%r12
1243
1244 cmpq $192,%rbx
1245 jb 1f
1246 addq 176(%rsi),%r10
1247 adcq 8+176(%rsi),%r11
1248 adcq $1,%r12
1249 movq 0+0(%rbp),%rax
1250 movq %rax,%r15
1251 mulq %r10
1252 movq %rax,%r13
1253 movq %rdx,%r14
1254 movq 0+0(%rbp),%rax
1255 mulq %r11
1256 imulq %r12,%r15
1257 addq %rax,%r14
1258 adcq %rdx,%r15
1259 movq 8+0(%rbp),%rax
1260 movq %rax,%r9
1261 mulq %r10
1262 addq %rax,%r14
1263 adcq $0,%rdx
1264 movq %rdx,%r10
1265 movq 8+0(%rbp),%rax
1266 mulq %r11
1267 addq %rax,%r15
1268 adcq $0,%rdx
1269 imulq %r12,%r9
1270 addq %r10,%r15
1271 adcq %rdx,%r9
1272 movq %r13,%r10
1273 movq %r14,%r11
1274 movq %r15,%r12
1275 andq $3,%r12
1276 movq %r15,%r13
1277 andq $-4,%r13
1278 movq %r9,%r14
1279 shrdq $2,%r9,%r15
1280 shrq $2,%r9
1281 addq %r13,%r10
1282 adcq %r14,%r11
1283 adcq $0,%r12
1284 addq %r15,%r10
1285 adcq %r9,%r11
1286 adcq $0,%r12
1287
1288 1:
1289 paddd .chacha20_consts(%rip),%xmm2
1290 paddd 48(%rbp),%xmm6
1291 paddd 64(%rbp),%xmm10
1292 paddd 128(%rbp),%xmm14
1293 paddd .chacha20_consts(%rip),%xmm1
1294 paddd 48(%rbp),%xmm5
1295 paddd 64(%rbp),%xmm9
1296 paddd 112(%rbp),%xmm13
1297 paddd .chacha20_consts(%rip),%xmm0
1298 paddd 48(%rbp),%xmm4
1299 paddd 64(%rbp),%xmm8
1300 paddd 96(%rbp),%xmm12
1301 movdqu 0 + 0(%rsi),%xmm3
1302 movdqu 16 + 0(%rsi),%xmm7
1303 movdqu 32 + 0(%rsi),%xmm11
1304 movdqu 48 + 0(%rsi),%xmm15
1305 pxor %xmm3,%xmm2
1306 pxor %xmm7,%xmm6
1307 pxor %xmm11,%xmm10
1308 pxor %xmm14,%xmm15
1309 movdqu %xmm2,0 + 0(%rdi)
1310 movdqu %xmm6,16 + 0(%rdi)
1311 movdqu %xmm10,32 + 0(%rdi)
1312 movdqu %xmm15,48 + 0(%rdi)
1313 movdqu 0 + 64(%rsi),%xmm3
1314 movdqu 16 + 64(%rsi),%xmm7
1315 movdqu 32 + 64(%rsi),%xmm11
1316 movdqu 48 + 64(%rsi),%xmm15
1317 pxor %xmm3,%xmm1
1318 pxor %xmm7,%xmm5
1319 pxor %xmm11,%xmm9
1320 pxor %xmm13,%xmm15
1321 movdqu %xmm1,0 + 64(%rdi)
1322 movdqu %xmm5,16 + 64(%rdi)
1323 movdqu %xmm9,32 + 64(%rdi)
1324 movdqu %xmm15,48 + 64(%rdi)
1325
1326 subq $128,%rbx
1327 leaq 128(%rsi),%rsi
1328 leaq 128(%rdi),%rdi
1329 jmp open_sse_tail_64_dec_loop
1330 3:
1331
1332 movdqa .chacha20_consts(%rip),%xmm0
1333 movdqa 48(%rbp),%xmm4
1334 movdqa 64(%rbp),%xmm8
1335 movdqa %xmm0,%xmm1
1336 movdqa %xmm4,%xmm5
1337 movdqa %xmm8,%xmm9
1338 movdqa %xmm0,%xmm2
1339 movdqa %xmm4,%xmm6
1340 movdqa %xmm8,%xmm10
1341 movdqa %xmm0,%xmm3
1342 movdqa %xmm4,%xmm7
1343 movdqa %xmm8,%xmm11
1344 movdqa 96(%rbp),%xmm15
1345 paddd .sse_inc(%rip),%xmm15
1346 movdqa %xmm15,%xmm14
1347 paddd .sse_inc(%rip),%xmm14
1348 movdqa %xmm14,%xmm13
1349 paddd .sse_inc(%rip),%xmm13
1350 movdqa %xmm13,%xmm12
1351 paddd .sse_inc(%rip),%xmm12
1352 movdqa %xmm12,96(%rbp)
1353 movdqa %xmm13,112(%rbp)
1354 movdqa %xmm14,128(%rbp)
1355 movdqa %xmm15,144(%rbp)
1356
1357 xorq %r8,%r8
1358 1:
1359 addq 0(%rsi,%r8), %r10
1360 adcq 8+0(%rsi,%r8), %r11
1361 adcq $1,%r12
1362 movdqa %xmm11,80(%rbp)
1363 paddd %xmm4,%xmm0
1364 pxor %xmm0,%xmm12
1365 pshufb .rol16(%rip),%xmm12
1366 paddd %xmm12,%xmm8
1367 pxor %xmm8,%xmm4
1368 movdqa %xmm4,%xmm11
1369 pslld $12,%xmm11
1370 psrld $20,%xmm4
1371 pxor %xmm11,%xmm4
1372 paddd %xmm4,%xmm0
1373 pxor %xmm0,%xmm12
1374 pshufb .rol8(%rip),%xmm12
1375 paddd %xmm12,%xmm8
1376 pxor %xmm8,%xmm4
1377 movdqa %xmm4,%xmm11
1378 pslld $7,%xmm11
1379 psrld $25,%xmm4
1380 pxor %xmm11,%xmm4
1381 .byte 102,15,58,15,228,4
1382 .byte 102,69,15,58,15,192,8
1383 .byte 102,69,15,58,15,228,12
1384 paddd %xmm5,%xmm1
1385 pxor %xmm1,%xmm13
1386 pshufb .rol16(%rip),%xmm13
1387 paddd %xmm13,%xmm9
1388 pxor %xmm9,%xmm5
1389 movdqa %xmm5,%xmm11
1390 pslld $12,%xmm11
1391 psrld $20,%xmm5
1392 pxor %xmm11,%xmm5
1393 paddd %xmm5,%xmm1
1394 pxor %xmm1,%xmm13
1395 pshufb .rol8(%rip),%xmm13
1396 paddd %xmm13,%xmm9
1397 pxor %xmm9,%xmm5
1398 movdqa %xmm5,%xmm11
1399 pslld $7,%xmm11
1400 psrld $25,%xmm5
1401 pxor %xmm11,%xmm5
1402 .byte 102,15,58,15,237,4
1403 .byte 102,69,15,58,15,201,8
1404 .byte 102,69,15,58,15,237,12
1405 paddd %xmm6,%xmm2
1406 pxor %xmm2,%xmm14
1407 pshufb .rol16(%rip),%xmm14
1408 paddd %xmm14,%xmm10
1409 pxor %xmm10,%xmm6
1410 movdqa %xmm6,%xmm11
1411 pslld $12,%xmm11
1412 psrld $20,%xmm6
1413 pxor %xmm11,%xmm6
1414 paddd %xmm6,%xmm2
1415 pxor %xmm2,%xmm14
1416 pshufb .rol8(%rip),%xmm14
1417 paddd %xmm14,%xmm10
1418 pxor %xmm10,%xmm6
1419 movdqa %xmm6,%xmm11
1420 pslld $7,%xmm11
1421 psrld $25,%xmm6
1422 pxor %xmm11,%xmm6
1423 .byte 102,15,58,15,246,4
1424 .byte 102,69,15,58,15,210,8
1425 .byte 102,69,15,58,15,246,12
1426 movdqa 80(%rbp),%xmm11
1427 movq 0+0(%rbp),%rax
1428 movq %rax,%r15
1429 mulq %r10
1430 movq %rax,%r13
1431 movq %rdx,%r14
1432 movq 0+0(%rbp),%rax
1433 mulq %r11
1434 imulq %r12,%r15
1435 addq %rax,%r14
1436 adcq %rdx,%r15
1437 movdqa %xmm9,80(%rbp)
1438 paddd %xmm7,%xmm3
1439 pxor %xmm3,%xmm15
1440 pshufb .rol16(%rip),%xmm15
1441 paddd %xmm15,%xmm11
1442 pxor %xmm11,%xmm7
1443 movdqa %xmm7,%xmm9
1444 pslld $12,%xmm9
1445 psrld $20,%xmm7
1446 pxor %xmm9,%xmm7
1447 paddd %xmm7,%xmm3
1448 pxor %xmm3,%xmm15
1449 pshufb .rol8(%rip),%xmm15
1450 paddd %xmm15,%xmm11
1451 pxor %xmm11,%xmm7
1452 movdqa %xmm7,%xmm9
1453 pslld $7,%xmm9
1454 psrld $25,%xmm7
1455 pxor %xmm9,%xmm7
1456 .byte 102,15,58,15,255,4
1457 .byte 102,69,15,58,15,219,8
1458 .byte 102,69,15,58,15,255,12
1459 movdqa 80(%rbp),%xmm9
1460 movq 8+0(%rbp),%rax
1461 movq %rax,%r9
1462 mulq %r10
1463 addq %rax,%r14
1464 adcq $0,%rdx
1465 movq %rdx,%r10
1466 movq 8+0(%rbp),%rax
1467 mulq %r11
1468 addq %rax,%r15
1469 adcq $0,%rdx
1470 movdqa %xmm11,80(%rbp)
1471 paddd %xmm4,%xmm0
1472 pxor %xmm0,%xmm12
1473 pshufb .rol16(%rip),%xmm12
1474 paddd %xmm12,%xmm8
1475 pxor %xmm8,%xmm4
1476 movdqa %xmm4,%xmm11
1477 pslld $12,%xmm11
1478 psrld $20,%xmm4
1479 pxor %xmm11,%xmm4
1480 paddd %xmm4,%xmm0
1481 pxor %xmm0,%xmm12
1482 pshufb .rol8(%rip),%xmm12
1483 paddd %xmm12,%xmm8
1484 pxor %xmm8,%xmm4
1485 movdqa %xmm4,%xmm11
1486 pslld $7,%xmm11
1487 psrld $25,%xmm4
1488 pxor %xmm11,%xmm4
1489 .byte 102,15,58,15,228,12
1490 .byte 102,69,15,58,15,192,8
1491 .byte 102,69,15,58,15,228,4
1492 paddd %xmm5,%xmm1
1493 pxor %xmm1,%xmm13
1494 pshufb .rol16(%rip),%xmm13
1495 paddd %xmm13,%xmm9
1496 pxor %xmm9,%xmm5
1497 movdqa %xmm5,%xmm11
1498 pslld $12,%xmm11
1499 psrld $20,%xmm5
1500 pxor %xmm11,%xmm5
1501 paddd %xmm5,%xmm1
1502 pxor %xmm1,%xmm13
1503 pshufb .rol8(%rip),%xmm13
1504 paddd %xmm13,%xmm9
1505 pxor %xmm9,%xmm5
1506 movdqa %xmm5,%xmm11
1507 pslld $7,%xmm11
1508 psrld $25,%xmm5
1509 pxor %xmm11,%xmm5
1510 .byte 102,15,58,15,237,12
1511 .byte 102,69,15,58,15,201,8
1512 .byte 102,69,15,58,15,237,4
1513 imulq %r12,%r9
1514 addq %r10,%r15
1515 adcq %rdx,%r9
1516 paddd %xmm6,%xmm2
1517 pxor %xmm2,%xmm14
1518 pshufb .rol16(%rip),%xmm14
1519 paddd %xmm14,%xmm10
1520 pxor %xmm10,%xmm6
1521 movdqa %xmm6,%xmm11
1522 pslld $12,%xmm11
1523 psrld $20,%xmm6
1524 pxor %xmm11,%xmm6
1525 paddd %xmm6,%xmm2
1526 pxor %xmm2,%xmm14
1527 pshufb .rol8(%rip),%xmm14
1528 paddd %xmm14,%xmm10
1529 pxor %xmm10,%xmm6
1530 movdqa %xmm6,%xmm11
1531 pslld $7,%xmm11
1532 psrld $25,%xmm6
1533 pxor %xmm11,%xmm6
1534 .byte 102,15,58,15,246,12
1535 .byte 102,69,15,58,15,210,8
1536 .byte 102,69,15,58,15,246,4
1537 movdqa 80(%rbp),%xmm11
1538 movq %r13,%r10
1539 movq %r14,%r11
1540 movq %r15,%r12
1541 andq $3,%r12
1542 movq %r15,%r13
1543 andq $-4,%r13
1544 movq %r9,%r14
1545 shrdq $2,%r9,%r15
1546 shrq $2,%r9
1547 addq %r13,%r10
1548 adcq %r14,%r11
1549 adcq $0,%r12
1550 addq %r15,%r10
1551 adcq %r9,%r11
1552 adcq $0,%r12
1553 movdqa %xmm9,80(%rbp)
1554 paddd %xmm7,%xmm3
1555 pxor %xmm3,%xmm15
1556 pshufb .rol16(%rip),%xmm15
1557 paddd %xmm15,%xmm11
1558 pxor %xmm11,%xmm7
1559 movdqa %xmm7,%xmm9
1560 pslld $12,%xmm9
1561 psrld $20,%xmm7
1562 pxor %xmm9,%xmm7
1563 paddd %xmm7,%xmm3
1564 pxor %xmm3,%xmm15
1565 pshufb .rol8(%rip),%xmm15
1566 paddd %xmm15,%xmm11
1567 pxor %xmm11,%xmm7
1568 movdqa %xmm7,%xmm9
1569 pslld $7,%xmm9
1570 psrld $25,%xmm7
1571 pxor %xmm9,%xmm7
1572 .byte 102,15,58,15,255,12
1573 .byte 102,69,15,58,15,219,8
1574 .byte 102,69,15,58,15,255,4
1575 movdqa 80(%rbp),%xmm9
1576
1577 addq $16,%r8
1578 cmpq $160,%r8
1579 jb 1b
1580 movq %rbx,%rcx
1581 andq $-16,%rcx
1582 1:
1583 addq 0(%rsi,%r8), %r10
1584 adcq 8+0(%rsi,%r8), %r11
1585 adcq $1,%r12
1586 movq 0+0(%rbp),%rax
1587 movq %rax,%r15
1588 mulq %r10
1589 movq %rax,%r13
1590 movq %rdx,%r14
1591 movq 0+0(%rbp),%rax
1592 mulq %r11
1593 imulq %r12,%r15
1594 addq %rax,%r14
1595 adcq %rdx,%r15
1596 movq 8+0(%rbp),%rax
1597 movq %rax,%r9
1598 mulq %r10
1599 addq %rax,%r14
1600 adcq $0,%rdx
1601 movq %rdx,%r10
1602 movq 8+0(%rbp),%rax
1603 mulq %r11
1604 addq %rax,%r15
1605 adcq $0,%rdx
1606 imulq %r12,%r9
1607 addq %r10,%r15
1608 adcq %rdx,%r9
1609 movq %r13,%r10
1610 movq %r14,%r11
1611 movq %r15,%r12
1612 andq $3,%r12
1613 movq %r15,%r13
1614 andq $-4,%r13
1615 movq %r9,%r14
1616 shrdq $2,%r9,%r15
1617 shrq $2,%r9
1618 addq %r13,%r10
1619 adcq %r14,%r11
1620 adcq $0,%r12
1621 addq %r15,%r10
1622 adcq %r9,%r11
1623 adcq $0,%r12
1624
1625 addq $16,%r8
1626 cmpq %rcx,%r8
1627 jb 1b
1628 paddd .chacha20_consts(%rip),%xmm3
1629 paddd 48(%rbp),%xmm7
1630 paddd 64(%rbp),%xmm11
1631 paddd 144(%rbp),%xmm15
1632 paddd .chacha20_consts(%rip),%xmm2
1633 paddd 48(%rbp),%xmm6
1634 paddd 64(%rbp),%xmm10
1635 paddd 128(%rbp),%xmm14
1636 paddd .chacha20_consts(%rip),%xmm1
1637 paddd 48(%rbp),%xmm5
1638 paddd 64(%rbp),%xmm9
1639 paddd 112(%rbp),%xmm13
1640 paddd .chacha20_consts(%rip),%xmm0
1641 paddd 48(%rbp),%xmm4
1642 paddd 64(%rbp),%xmm8
1643 paddd 96(%rbp),%xmm12
1644 movdqa %xmm12,80(%rbp)
1645 movdqu 0 + 0(%rsi),%xmm12
1646 pxor %xmm3,%xmm12
1647 movdqu %xmm12,0 + 0(%rdi)
1648 movdqu 16 + 0(%rsi),%xmm12
1649 pxor %xmm7,%xmm12
1650 movdqu %xmm12,16 + 0(%rdi)
1651 movdqu 32 + 0(%rsi),%xmm12
1652 pxor %xmm11,%xmm12
1653 movdqu %xmm12,32 + 0(%rdi)
1654 movdqu 48 + 0(%rsi),%xmm12
1655 pxor %xmm15,%xmm12
1656 movdqu %xmm12,48 + 0(%rdi)
1657 movdqu 0 + 64(%rsi),%xmm3
1658 movdqu 16 + 64(%rsi),%xmm7
1659 movdqu 32 + 64(%rsi),%xmm11
1660 movdqu 48 + 64(%rsi),%xmm15
1661 pxor %xmm3,%xmm2
1662 pxor %xmm7,%xmm6
1663 pxor %xmm11,%xmm10
1664 pxor %xmm14,%xmm15
1665 movdqu %xmm2,0 + 64(%rdi)
1666 movdqu %xmm6,16 + 64(%rdi)
1667 movdqu %xmm10,32 + 64(%rdi)
1668 movdqu %xmm15,48 + 64(%rdi)
1669 movdqu 0 + 128(%rsi),%xmm3
1670 movdqu 16 + 128(%rsi),%xmm7
1671 movdqu 32 + 128(%rsi),%xmm11
1672 movdqu 48 + 128(%rsi),%xmm15
1673 pxor %xmm3,%xmm1
1674 pxor %xmm7,%xmm5
1675 pxor %xmm11,%xmm9
1676 pxor %xmm13,%xmm15
1677 movdqu %xmm1,0 + 128(%rdi)
1678 movdqu %xmm5,16 + 128(%rdi)
1679 movdqu %xmm9,32 + 128(%rdi)
1680 movdqu %xmm15,48 + 128(%rdi)
1681
1682 movdqa 80(%rbp),%xmm12
1683 subq $192,%rbx
1684 leaq 192(%rsi),%rsi
1685 leaq 192(%rdi),%rdi
1686
1687
1688 open_sse_tail_64_dec_loop:
1689 cmpq $16,%rbx
1690 jb 1f
1691 subq $16,%rbx
1692 movdqu (%rsi),%xmm3
1693 pxor %xmm3,%xmm0
1694 movdqu %xmm0,(%rdi)
1695 leaq 16(%rsi),%rsi
1696 leaq 16(%rdi),%rdi
1697 movdqa %xmm4,%xmm0
1698 movdqa %xmm8,%xmm4
1699 movdqa %xmm12,%xmm8
1700 jmp open_sse_tail_64_dec_loop
1701 1:
1702 movdqa %xmm0,%xmm1
1703
1704
1705 open_sse_tail_16:
1706 testq %rbx,%rbx
1707 jz open_sse_finalize
1708
1709
1710
1711 pxor %xmm3,%xmm3
1712 leaq -1(%rsi,%rbx), %rsi
1713 movq %rbx,%r8
1714 2:
1715 pslldq $1,%xmm3
1716 pinsrb $0,(%rsi),%xmm3
1717 subq $1,%rsi
1718 subq $1,%r8
1719 jnz 2b
1720
1721 3:
1722 .byte 102,73,15,126,221
1723 pextrq $1,%xmm3,%r14
1724
1725 pxor %xmm1,%xmm3
1726
1727
1728 2:
1729 pextrb $0,%xmm3,(%rdi)
1730 psrldq $1,%xmm3
1731 addq $1,%rdi
1732 subq $1,%rbx
1733 jne 2b
1734
1735 addq %r13,%r10
1736 adcq %r14,%r11
1737 adcq $1,%r12
1738 movq 0+0(%rbp),%rax
1739 movq %rax,%r15
1740 mulq %r10
1741 movq %rax,%r13
1742 movq %rdx,%r14
1743 movq 0+0(%rbp),%rax
1744 mulq %r11
1745 imulq %r12,%r15
1746 addq %rax,%r14
1747 adcq %rdx,%r15
1748 movq 8+0(%rbp),%rax
1749 movq %rax,%r9
1750 mulq %r10
1751 addq %rax,%r14
1752 adcq $0,%rdx
1753 movq %rdx,%r10
1754 movq 8+0(%rbp),%rax
1755 mulq %r11
1756 addq %rax,%r15
1757 adcq $0,%rdx
1758 imulq %r12,%r9
1759 addq %r10,%r15
1760 adcq %rdx,%r9
1761 movq %r13,%r10
1762 movq %r14,%r11
1763 movq %r15,%r12
1764 andq $3,%r12
1765 movq %r15,%r13
1766 andq $-4,%r13
1767 movq %r9,%r14
1768 shrdq $2,%r9,%r15
1769 shrq $2,%r9
1770 addq %r13,%r10
1771 adcq %r14,%r11
1772 adcq $0,%r12
1773 addq %r15,%r10
1774 adcq %r9,%r11
1775 adcq $0,%r12
1776
1777
1778 open_sse_finalize:
1779 addq 32(%rbp),%r10
1780 adcq 8+32(%rbp),%r11
1781 adcq $1,%r12
1782 movq 0+0(%rbp),%rax
1783 movq %rax,%r15
1784 mulq %r10
1785 movq %rax,%r13
1786 movq %rdx,%r14
1787 movq 0+0(%rbp),%rax
1788 mulq %r11
1789 imulq %r12,%r15
1790 addq %rax,%r14
1791 adcq %rdx,%r15
1792 movq 8+0(%rbp),%rax
1793 movq %rax,%r9
1794 mulq %r10
1795 addq %rax,%r14
1796 adcq $0,%rdx
1797 movq %rdx,%r10
1798 movq 8+0(%rbp),%rax
1799 mulq %r11
1800 addq %rax,%r15
1801 adcq $0,%rdx
1802 imulq %r12,%r9
1803 addq %r10,%r15
1804 adcq %rdx,%r9
1805 movq %r13,%r10
1806 movq %r14,%r11
1807 movq %r15,%r12
1808 andq $3,%r12
1809 movq %r15,%r13
1810 andq $-4,%r13
1811 movq %r9,%r14
1812 shrdq $2,%r9,%r15
1813 shrq $2,%r9
1814 addq %r13,%r10
1815 adcq %r14,%r11
1816 adcq $0,%r12
1817 addq %r15,%r10
1818 adcq %r9,%r11
1819 adcq $0,%r12
1820
1821
1822 movq %r10,%r13
1823 movq %r11,%r14
1824 movq %r12,%r15
1825 subq $-5,%r10
1826 sbbq $-1,%r11
1827 sbbq $3,%r12
1828 cmovcq %r13,%r10
1829 cmovcq %r14,%r11
1830 cmovcq %r15,%r12
1831
1832 addq 0+16(%rbp),%r10
1833 adcq 8+16(%rbp),%r11
1834
1835 addq $288 + 32,%rsp
1836 .cfi_adjust_cfa_offset -(288 + 32)
1837 popq %r9
1838 .cfi_adjust_cfa_offset -8
1839 movq %r10,(%r9)
1840 movq %r11,8(%r9)
1841
1842 popq %r15
1843 .cfi_adjust_cfa_offset -8
1844 popq %r14
1845 .cfi_adjust_cfa_offset -8
1846 popq %r13
1847 .cfi_adjust_cfa_offset -8
1848 popq %r12
1849 .cfi_adjust_cfa_offset -8
1850 popq %rbx
1851 .cfi_adjust_cfa_offset -8
1852 popq %rbp
1853 .cfi_adjust_cfa_offset -8
1854 .byte 0xf3,0xc3
1855 .cfi_adjust_cfa_offset (8 * 6) + 288 + 32
1856
1857 open_sse_128:
1858 movdqu .chacha20_consts(%rip),%xmm0
1859 movdqa %xmm0,%xmm1
1860 movdqa %xmm0,%xmm2
1861 movdqu 0(%r9),%xmm4
1862 movdqa %xmm4,%xmm5
1863 movdqa %xmm4,%xmm6
1864 movdqu 16(%r9),%xmm8
1865 movdqa %xmm8,%xmm9
1866 movdqa %xmm8,%xmm10
1867 movdqu 32(%r9),%xmm12
1868 movdqa %xmm12,%xmm13
1869 paddd .sse_inc(%rip),%xmm13
1870 movdqa %xmm13,%xmm14
1871 paddd .sse_inc(%rip),%xmm14
1872 movdqa %xmm4,%xmm7
1873 movdqa %xmm8,%xmm11
1874 movdqa %xmm13,%xmm15
1875 movq $10,%r10
1876 1:
1877 paddd %xmm4,%xmm0
1878 pxor %xmm0,%xmm12
1879 pshufb .rol16(%rip),%xmm12
1880 paddd %xmm12,%xmm8
1881 pxor %xmm8,%xmm4
1882 movdqa %xmm4,%xmm3
1883 pslld $12,%xmm3
1884 psrld $20,%xmm4
1885 pxor %xmm3,%xmm4
1886 paddd %xmm4,%xmm0
1887 pxor %xmm0,%xmm12
1888 pshufb .rol8(%rip),%xmm12
1889 paddd %xmm12,%xmm8
1890 pxor %xmm8,%xmm4
1891 movdqa %xmm4,%xmm3
1892 pslld $7,%xmm3
1893 psrld $25,%xmm4
1894 pxor %xmm3,%xmm4
1895 .byte 102,15,58,15,228,4
1896 .byte 102,69,15,58,15,192,8
1897 .byte 102,69,15,58,15,228,12
1898 paddd %xmm5,%xmm1
1899 pxor %xmm1,%xmm13
1900 pshufb .rol16(%rip),%xmm13
1901 paddd %xmm13,%xmm9
1902 pxor %xmm9,%xmm5
1903 movdqa %xmm5,%xmm3
1904 pslld $12,%xmm3
1905 psrld $20,%xmm5
1906 pxor %xmm3,%xmm5
1907 paddd %xmm5,%xmm1
1908 pxor %xmm1,%xmm13
1909 pshufb .rol8(%rip),%xmm13
1910 paddd %xmm13,%xmm9
1911 pxor %xmm9,%xmm5
1912 movdqa %xmm5,%xmm3
1913 pslld $7,%xmm3
1914 psrld $25,%xmm5
1915 pxor %xmm3,%xmm5
1916 .byte 102,15,58,15,237,4
1917 .byte 102,69,15,58,15,201,8
1918 .byte 102,69,15,58,15,237,12
1919 paddd %xmm6,%xmm2
1920 pxor %xmm2,%xmm14
1921 pshufb .rol16(%rip),%xmm14
1922 paddd %xmm14,%xmm10
1923 pxor %xmm10,%xmm6
1924 movdqa %xmm6,%xmm3
1925 pslld $12,%xmm3
1926 psrld $20,%xmm6
1927 pxor %xmm3,%xmm6
1928 paddd %xmm6,%xmm2
1929 pxor %xmm2,%xmm14
1930 pshufb .rol8(%rip),%xmm14
1931 paddd %xmm14,%xmm10
1932 pxor %xmm10,%xmm6
1933 movdqa %xmm6,%xmm3
1934 pslld $7,%xmm3
1935 psrld $25,%xmm6
1936 pxor %xmm3,%xmm6
1937 .byte 102,15,58,15,246,4
1938 .byte 102,69,15,58,15,210,8
1939 .byte 102,69,15,58,15,246,12
1940 paddd %xmm4,%xmm0
1941 pxor %xmm0,%xmm12
1942 pshufb .rol16(%rip),%xmm12
1943 paddd %xmm12,%xmm8
1944 pxor %xmm8,%xmm4
1945 movdqa %xmm4,%xmm3
1946 pslld $12,%xmm3
1947 psrld $20,%xmm4
1948 pxor %xmm3,%xmm4
1949 paddd %xmm4,%xmm0
1950 pxor %xmm0,%xmm12
1951 pshufb .rol8(%rip),%xmm12
1952 paddd %xmm12,%xmm8
1953 pxor %xmm8,%xmm4
1954 movdqa %xmm4,%xmm3
1955 pslld $7,%xmm3
1956 psrld $25,%xmm4
1957 pxor %xmm3,%xmm4
1958 .byte 102,15,58,15,228,12
1959 .byte 102,69,15,58,15,192,8
1960 .byte 102,69,15,58,15,228,4
1961 paddd %xmm5,%xmm1
1962 pxor %xmm1,%xmm13
1963 pshufb .rol16(%rip),%xmm13
1964 paddd %xmm13,%xmm9
1965 pxor %xmm9,%xmm5
1966 movdqa %xmm5,%xmm3
1967 pslld $12,%xmm3
1968 psrld $20,%xmm5
1969 pxor %xmm3,%xmm5
1970 paddd %xmm5,%xmm1
1971 pxor %xmm1,%xmm13
1972 pshufb .rol8(%rip),%xmm13
1973 paddd %xmm13,%xmm9
1974 pxor %xmm9,%xmm5
1975 movdqa %xmm5,%xmm3
1976 pslld $7,%xmm3
1977 psrld $25,%xmm5
1978 pxor %xmm3,%xmm5
1979 .byte 102,15,58,15,237,12
1980 .byte 102,69,15,58,15,201,8
1981 .byte 102,69,15,58,15,237,4
1982 paddd %xmm6,%xmm2
1983 pxor %xmm2,%xmm14
1984 pshufb .rol16(%rip),%xmm14
1985 paddd %xmm14,%xmm10
1986 pxor %xmm10,%xmm6
1987 movdqa %xmm6,%xmm3
1988 pslld $12,%xmm3
1989 psrld $20,%xmm6
1990 pxor %xmm3,%xmm6
1991 paddd %xmm6,%xmm2
1992 pxor %xmm2,%xmm14
1993 pshufb .rol8(%rip),%xmm14
1994 paddd %xmm14,%xmm10
1995 pxor %xmm10,%xmm6
1996 movdqa %xmm6,%xmm3
1997 pslld $7,%xmm3
1998 psrld $25,%xmm6
1999 pxor %xmm3,%xmm6
2000 .byte 102,15,58,15,246,12
2001 .byte 102,69,15,58,15,210,8
2002 .byte 102,69,15,58,15,246,4
2003
2004 decq %r10
2005 jnz 1b
2006 paddd .chacha20_consts(%rip),%xmm0
2007 paddd .chacha20_consts(%rip),%xmm1
2008 paddd .chacha20_consts(%rip),%xmm2
2009 paddd %xmm7,%xmm4
2010 paddd %xmm7,%xmm5
2011 paddd %xmm7,%xmm6
2012 paddd %xmm11,%xmm9
2013 paddd %xmm11,%xmm10
2014 paddd %xmm15,%xmm13
2015 paddd .sse_inc(%rip),%xmm15
2016 paddd %xmm15,%xmm14
2017
2018 pand .clamp(%rip),%xmm0
2019 movdqa %xmm0,0(%rbp)
2020 movdqa %xmm4,16(%rbp)
2021
2022 movq %r8,%r8
2023 call poly_hash_ad_internal
2024 1:
2025 cmpq $16,%rbx
2026 jb open_sse_tail_16
2027 subq $16,%rbx
2028 addq 0(%rsi),%r10
2029 adcq 8+0(%rsi),%r11
2030 adcq $1,%r12
2031
2032
2033 movdqu 0(%rsi),%xmm3
2034 pxor %xmm3,%xmm1
2035 movdqu %xmm1,0(%rdi)
2036 leaq 16(%rsi),%rsi
2037 leaq 16(%rdi),%rdi
2038 movq 0+0(%rbp),%rax
2039 movq %rax,%r15
2040 mulq %r10
2041 movq %rax,%r13
2042 movq %rdx,%r14
2043 movq 0+0(%rbp),%rax
2044 mulq %r11
2045 imulq %r12,%r15
2046 addq %rax,%r14
2047 adcq %rdx,%r15
2048 movq 8+0(%rbp),%rax
2049 movq %rax,%r9
2050 mulq %r10
2051 addq %rax,%r14
2052 adcq $0,%rdx
2053 movq %rdx,%r10
2054 movq 8+0(%rbp),%rax
2055 mulq %r11
2056 addq %rax,%r15
2057 adcq $0,%rdx
2058 imulq %r12,%r9
2059 addq %r10,%r15
2060 adcq %rdx,%r9
2061 movq %r13,%r10
2062 movq %r14,%r11
2063 movq %r15,%r12
2064 andq $3,%r12
2065 movq %r15,%r13
2066 andq $-4,%r13
2067 movq %r9,%r14
2068 shrdq $2,%r9,%r15
2069 shrq $2,%r9
2070 addq %r13,%r10
2071 adcq %r14,%r11
2072 adcq $0,%r12
2073 addq %r15,%r10
2074 adcq %r9,%r11
2075 adcq $0,%r12
2076
2077
2078 movdqa %xmm5,%xmm1
2079 movdqa %xmm9,%xmm5
2080 movdqa %xmm13,%xmm9
2081 movdqa %xmm2,%xmm13
2082 movdqa %xmm6,%xmm2
2083 movdqa %xmm10,%xmm6
2084 movdqa %xmm14,%xmm10
2085 jmp 1b
2086 jmp open_sse_tail_16
2087 .size chacha20_poly1305_open, .-chacha20_poly1305_open
2088 .cfi_endproc
2089
2090
2091
2092
2093 .globl chacha20_poly1305_seal
2094 .hidden chacha20_poly1305_seal
2095 .type chacha20_poly1305_seal,@function
2096 .align 64
2097 chacha20_poly1305_seal:
2098 .cfi_startproc
2099 pushq %rbp
2100 .cfi_adjust_cfa_offset 8
2101 pushq %rbx
2102 .cfi_adjust_cfa_offset 8
2103 pushq %r12
2104 .cfi_adjust_cfa_offset 8
2105 pushq %r13
2106 .cfi_adjust_cfa_offset 8
2107 pushq %r14
2108 .cfi_adjust_cfa_offset 8
2109 pushq %r15
2110 .cfi_adjust_cfa_offset 8
2111
2112
2113 pushq %r9
2114 .cfi_adjust_cfa_offset 8
2115 subq $288 + 32,%rsp
2116 .cfi_adjust_cfa_offset 288 + 32
2117 .cfi_offset rbp, -16
2118 .cfi_offset rbx, -24
2119 .cfi_offset r12, -32
2120 .cfi_offset r13, -40
2121 .cfi_offset r14, -48
2122 .cfi_offset r15, -56
2123 .cfi_offset %r9, -64
2124 leaq 32(%rsp),%rbp
2125 andq $-32,%rbp
2126 movq %rdx,8+32(%rbp)
2127 movq %r8,0+32(%rbp)
2128 movq %rdx,%rbx
2129
2130 movl OPENSSL_ia32cap_P+8(%rip),%eax
2131 andl $288,%eax
2132 xorl $288,%eax
2133 jz chacha20_poly1305_seal_avx2
2134
2135 cmpq $128,%rbx
2136 jbe seal_sse_128
2137
2138 movdqa .chacha20_consts(%rip),%xmm0
2139 movdqu 0(%r9),%xmm4
2140 movdqu 16(%r9),%xmm8
2141 movdqu 32(%r9),%xmm12
2142 movdqa %xmm0,%xmm1
2143 movdqa %xmm0,%xmm2
2144 movdqa %xmm0,%xmm3
2145 movdqa %xmm4,%xmm5
2146 movdqa %xmm4,%xmm6
2147 movdqa %xmm4,%xmm7
2148 movdqa %xmm8,%xmm9
2149 movdqa %xmm8,%xmm10
2150 movdqa %xmm8,%xmm11
2151 movdqa %xmm12,%xmm15
2152 paddd .sse_inc(%rip),%xmm12
2153 movdqa %xmm12,%xmm14
2154 paddd .sse_inc(%rip),%xmm12
2155 movdqa %xmm12,%xmm13
2156 paddd .sse_inc(%rip),%xmm12
2157
2158 movdqa %xmm4,48(%rbp)
2159 movdqa %xmm8,64(%rbp)
2160 movdqa %xmm12,96(%rbp)
2161 movdqa %xmm13,112(%rbp)
2162 movdqa %xmm14,128(%rbp)
2163 movdqa %xmm15,144(%rbp)
2164 movq $10,%r10
2165 1:
2166 movdqa %xmm8,80(%rbp)
2167 movdqa .rol16(%rip),%xmm8
2168 paddd %xmm7,%xmm3
2169 paddd %xmm6,%xmm2
2170 paddd %xmm5,%xmm1
2171 paddd %xmm4,%xmm0
2172 pxor %xmm3,%xmm15
2173 pxor %xmm2,%xmm14
2174 pxor %xmm1,%xmm13
2175 pxor %xmm0,%xmm12
2176 .byte 102,69,15,56,0,248
2177 .byte 102,69,15,56,0,240
2178 .byte 102,69,15,56,0,232
2179 .byte 102,69,15,56,0,224
2180 movdqa 80(%rbp),%xmm8
2181 paddd %xmm15,%xmm11
2182 paddd %xmm14,%xmm10
2183 paddd %xmm13,%xmm9
2184 paddd %xmm12,%xmm8
2185 pxor %xmm11,%xmm7
2186 pxor %xmm10,%xmm6
2187 pxor %xmm9,%xmm5
2188 pxor %xmm8,%xmm4
2189 movdqa %xmm8,80(%rbp)
2190 movdqa %xmm7,%xmm8
2191 psrld $20,%xmm8
2192 pslld $32-20,%xmm7
2193 pxor %xmm8,%xmm7
2194 movdqa %xmm6,%xmm8
2195 psrld $20,%xmm8
2196 pslld $32-20,%xmm6
2197 pxor %xmm8,%xmm6
2198 movdqa %xmm5,%xmm8
2199 psrld $20,%xmm8
2200 pslld $32-20,%xmm5
2201 pxor %xmm8,%xmm5
2202 movdqa %xmm4,%xmm8
2203 psrld $20,%xmm8
2204 pslld $32-20,%xmm4
2205 pxor %xmm8,%xmm4
2206 movdqa .rol8(%rip),%xmm8
2207 paddd %xmm7,%xmm3
2208 paddd %xmm6,%xmm2
2209 paddd %xmm5,%xmm1
2210 paddd %xmm4,%xmm0
2211 pxor %xmm3,%xmm15
2212 pxor %xmm2,%xmm14
2213 pxor %xmm1,%xmm13
2214 pxor %xmm0,%xmm12
2215 .byte 102,69,15,56,0,248
2216 .byte 102,69,15,56,0,240
2217 .byte 102,69,15,56,0,232
2218 .byte 102,69,15,56,0,224
2219 movdqa 80(%rbp),%xmm8
2220 paddd %xmm15,%xmm11
2221 paddd %xmm14,%xmm10
2222 paddd %xmm13,%xmm9
2223 paddd %xmm12,%xmm8
2224 pxor %xmm11,%xmm7
2225 pxor %xmm10,%xmm6
2226 pxor %xmm9,%xmm5
2227 pxor %xmm8,%xmm4
2228 movdqa %xmm8,80(%rbp)
2229 movdqa %xmm7,%xmm8
2230 psrld $25,%xmm8
2231 pslld $32-25,%xmm7
2232 pxor %xmm8,%xmm7
2233 movdqa %xmm6,%xmm8
2234 psrld $25,%xmm8
2235 pslld $32-25,%xmm6
2236 pxor %xmm8,%xmm6
2237 movdqa %xmm5,%xmm8
2238 psrld $25,%xmm8
2239 pslld $32-25,%xmm5
2240 pxor %xmm8,%xmm5
2241 movdqa %xmm4,%xmm8
2242 psrld $25,%xmm8
2243 pslld $32-25,%xmm4
2244 pxor %xmm8,%xmm4
2245 movdqa 80(%rbp),%xmm8
2246 .byte 102,15,58,15,255,4
2247 .byte 102,69,15,58,15,219,8
2248 .byte 102,69,15,58,15,255,12
2249 .byte 102,15,58,15,246,4
2250 .byte 102,69,15,58,15,210,8
2251 .byte 102,69,15,58,15,246,12
2252 .byte 102,15,58,15,237,4
2253 .byte 102,69,15,58,15,201,8
2254 .byte 102,69,15,58,15,237,12
2255 .byte 102,15,58,15,228,4
2256 .byte 102,69,15,58,15,192,8
2257 .byte 102,69,15,58,15,228,12
2258 movdqa %xmm8,80(%rbp)
2259 movdqa .rol16(%rip),%xmm8
2260 paddd %xmm7,%xmm3
2261 paddd %xmm6,%xmm2
2262 paddd %xmm5,%xmm1
2263 paddd %xmm4,%xmm0
2264 pxor %xmm3,%xmm15
2265 pxor %xmm2,%xmm14
2266 pxor %xmm1,%xmm13
2267 pxor %xmm0,%xmm12
2268 .byte 102,69,15,56,0,248
2269 .byte 102,69,15,56,0,240
2270 .byte 102,69,15,56,0,232
2271 .byte 102,69,15,56,0,224
2272 movdqa 80(%rbp),%xmm8
2273 paddd %xmm15,%xmm11
2274 paddd %xmm14,%xmm10
2275 paddd %xmm13,%xmm9
2276 paddd %xmm12,%xmm8
2277 pxor %xmm11,%xmm7
2278 pxor %xmm10,%xmm6
2279 pxor %xmm9,%xmm5
2280 pxor %xmm8,%xmm4
2281 movdqa %xmm8,80(%rbp)
2282 movdqa %xmm7,%xmm8
2283 psrld $20,%xmm8
2284 pslld $32-20,%xmm7
2285 pxor %xmm8,%xmm7
2286 movdqa %xmm6,%xmm8
2287 psrld $20,%xmm8
2288 pslld $32-20,%xmm6
2289 pxor %xmm8,%xmm6
2290 movdqa %xmm5,%xmm8
2291 psrld $20,%xmm8
2292 pslld $32-20,%xmm5
2293 pxor %xmm8,%xmm5
2294 movdqa %xmm4,%xmm8
2295 psrld $20,%xmm8
2296 pslld $32-20,%xmm4
2297 pxor %xmm8,%xmm4
2298 movdqa .rol8(%rip),%xmm8
2299 paddd %xmm7,%xmm3
2300 paddd %xmm6,%xmm2
2301 paddd %xmm5,%xmm1
2302 paddd %xmm4,%xmm0
2303 pxor %xmm3,%xmm15
2304 pxor %xmm2,%xmm14
2305 pxor %xmm1,%xmm13
2306 pxor %xmm0,%xmm12
2307 .byte 102,69,15,56,0,248
2308 .byte 102,69,15,56,0,240
2309 .byte 102,69,15,56,0,232
2310 .byte 102,69,15,56,0,224
2311 movdqa 80(%rbp),%xmm8
2312 paddd %xmm15,%xmm11
2313 paddd %xmm14,%xmm10
2314 paddd %xmm13,%xmm9
2315 paddd %xmm12,%xmm8
2316 pxor %xmm11,%xmm7
2317 pxor %xmm10,%xmm6
2318 pxor %xmm9,%xmm5
2319 pxor %xmm8,%xmm4
2320 movdqa %xmm8,80(%rbp)
2321 movdqa %xmm7,%xmm8
2322 psrld $25,%xmm8
2323 pslld $32-25,%xmm7
2324 pxor %xmm8,%xmm7
2325 movdqa %xmm6,%xmm8
2326 psrld $25,%xmm8
2327 pslld $32-25,%xmm6
2328 pxor %xmm8,%xmm6
2329 movdqa %xmm5,%xmm8
2330 psrld $25,%xmm8
2331 pslld $32-25,%xmm5
2332 pxor %xmm8,%xmm5
2333 movdqa %xmm4,%xmm8
2334 psrld $25,%xmm8
2335 pslld $32-25,%xmm4
2336 pxor %xmm8,%xmm4
2337 movdqa 80(%rbp),%xmm8
2338 .byte 102,15,58,15,255,12
2339 .byte 102,69,15,58,15,219,8
2340 .byte 102,69,15,58,15,255,4
2341 .byte 102,15,58,15,246,12
2342 .byte 102,69,15,58,15,210,8
2343 .byte 102,69,15,58,15,246,4
2344 .byte 102,15,58,15,237,12
2345 .byte 102,69,15,58,15,201,8
2346 .byte 102,69,15,58,15,237,4
2347 .byte 102,15,58,15,228,12
2348 .byte 102,69,15,58,15,192,8
2349 .byte 102,69,15,58,15,228,4
2350
2351 decq %r10
2352 jnz 1b
2353 paddd .chacha20_consts(%rip),%xmm3
2354 paddd 48(%rbp),%xmm7
2355 paddd 64(%rbp),%xmm11
2356 paddd 144(%rbp),%xmm15
2357 paddd .chacha20_consts(%rip),%xmm2
2358 paddd 48(%rbp),%xmm6
2359 paddd 64(%rbp),%xmm10
2360 paddd 128(%rbp),%xmm14
2361 paddd .chacha20_consts(%rip),%xmm1
2362 paddd 48(%rbp),%xmm5
2363 paddd 64(%rbp),%xmm9
2364 paddd 112(%rbp),%xmm13
2365 paddd .chacha20_consts(%rip),%xmm0
2366 paddd 48(%rbp),%xmm4
2367 paddd 64(%rbp),%xmm8
2368 paddd 96(%rbp),%xmm12
2369
2370
2371 pand .clamp(%rip),%xmm3
2372 movdqa %xmm3,0(%rbp)
2373 movdqa %xmm7,16(%rbp)
2374
2375 movq %r8,%r8
2376 call poly_hash_ad_internal
2377 movdqu 0 + 0(%rsi),%xmm3
2378 movdqu 16 + 0(%rsi),%xmm7
2379 movdqu 32 + 0(%rsi),%xmm11
2380 movdqu 48 + 0(%rsi),%xmm15
2381 pxor %xmm3,%xmm2
2382 pxor %xmm7,%xmm6
2383 pxor %xmm11,%xmm10
2384 pxor %xmm14,%xmm15
2385 movdqu %xmm2,0 + 0(%rdi)
2386 movdqu %xmm6,16 + 0(%rdi)
2387 movdqu %xmm10,32 + 0(%rdi)
2388 movdqu %xmm15,48 + 0(%rdi)
2389 movdqu 0 + 64(%rsi),%xmm3
2390 movdqu 16 + 64(%rsi),%xmm7
2391 movdqu 32 + 64(%rsi),%xmm11
2392 movdqu 48 + 64(%rsi),%xmm15
2393 pxor %xmm3,%xmm1
2394 pxor %xmm7,%xmm5
2395 pxor %xmm11,%xmm9
2396 pxor %xmm13,%xmm15
2397 movdqu %xmm1,0 + 64(%rdi)
2398 movdqu %xmm5,16 + 64(%rdi)
2399 movdqu %xmm9,32 + 64(%rdi)
2400 movdqu %xmm15,48 + 64(%rdi)
2401
2402 cmpq $192,%rbx
2403 ja 1f
2404 movq $128,%rcx
2405 subq $128,%rbx
2406 leaq 128(%rsi),%rsi
2407 jmp seal_sse_128_seal_hash
2408 1:
2409 movdqu 0 + 128(%rsi),%xmm3
2410 movdqu 16 + 128(%rsi),%xmm7
2411 movdqu 32 + 128(%rsi),%xmm11
2412 movdqu 48 + 128(%rsi),%xmm15
2413 pxor %xmm3,%xmm0
2414 pxor %xmm7,%xmm4
2415 pxor %xmm11,%xmm8
2416 pxor %xmm12,%xmm15
2417 movdqu %xmm0,0 + 128(%rdi)
2418 movdqu %xmm4,16 + 128(%rdi)
2419 movdqu %xmm8,32 + 128(%rdi)
2420 movdqu %xmm15,48 + 128(%rdi)
2421
2422 movq $192,%rcx
2423 subq $192,%rbx
2424 leaq 192(%rsi),%rsi
2425 movq $2,%rcx
2426 movq $8,%r8
2427 cmpq $64,%rbx
2428 jbe seal_sse_tail_64
2429 cmpq $128,%rbx
2430 jbe seal_sse_tail_128
2431 cmpq $192,%rbx
2432 jbe seal_sse_tail_192
2433
2434 1:
2435 movdqa .chacha20_consts(%rip),%xmm0
2436 movdqa 48(%rbp),%xmm4
2437 movdqa 64(%rbp),%xmm8
2438 movdqa %xmm0,%xmm1
2439 movdqa %xmm4,%xmm5
2440 movdqa %xmm8,%xmm9
2441 movdqa %xmm0,%xmm2
2442 movdqa %xmm4,%xmm6
2443 movdqa %xmm8,%xmm10
2444 movdqa %xmm0,%xmm3
2445 movdqa %xmm4,%xmm7
2446 movdqa %xmm8,%xmm11
2447 movdqa 96(%rbp),%xmm15
2448 paddd .sse_inc(%rip),%xmm15
2449 movdqa %xmm15,%xmm14
2450 paddd .sse_inc(%rip),%xmm14
2451 movdqa %xmm14,%xmm13
2452 paddd .sse_inc(%rip),%xmm13
2453 movdqa %xmm13,%xmm12
2454 paddd .sse_inc(%rip),%xmm12
2455 movdqa %xmm12,96(%rbp)
2456 movdqa %xmm13,112(%rbp)
2457 movdqa %xmm14,128(%rbp)
2458 movdqa %xmm15,144(%rbp)
2459
2460 2:
2461 movdqa %xmm8,80(%rbp)
2462 movdqa .rol16(%rip),%xmm8
2463 paddd %xmm7,%xmm3
2464 paddd %xmm6,%xmm2
2465 paddd %xmm5,%xmm1
2466 paddd %xmm4,%xmm0
2467 pxor %xmm3,%xmm15
2468 pxor %xmm2,%xmm14
2469 pxor %xmm1,%xmm13
2470 pxor %xmm0,%xmm12
2471 .byte 102,69,15,56,0,248
2472 .byte 102,69,15,56,0,240
2473 .byte 102,69,15,56,0,232
2474 .byte 102,69,15,56,0,224
2475 movdqa 80(%rbp),%xmm8
2476 paddd %xmm15,%xmm11
2477 paddd %xmm14,%xmm10
2478 paddd %xmm13,%xmm9
2479 paddd %xmm12,%xmm8
2480 pxor %xmm11,%xmm7
2481 addq 0(%rdi),%r10
2482 adcq 8+0(%rdi),%r11
2483 adcq $1,%r12
2484 pxor %xmm10,%xmm6
2485 pxor %xmm9,%xmm5
2486 pxor %xmm8,%xmm4
2487 movdqa %xmm8,80(%rbp)
2488 movdqa %xmm7,%xmm8
2489 psrld $20,%xmm8
2490 pslld $32-20,%xmm7
2491 pxor %xmm8,%xmm7
2492 movdqa %xmm6,%xmm8
2493 psrld $20,%xmm8
2494 pslld $32-20,%xmm6
2495 pxor %xmm8,%xmm6
2496 movdqa %xmm5,%xmm8
2497 psrld $20,%xmm8
2498 pslld $32-20,%xmm5
2499 pxor %xmm8,%xmm5
2500 movdqa %xmm4,%xmm8
2501 psrld $20,%xmm8
2502 pslld $32-20,%xmm4
2503 pxor %xmm8,%xmm4
2504 movq 0+0(%rbp),%rax
2505 movq %rax,%r15
2506 mulq %r10
2507 movq %rax,%r13
2508 movq %rdx,%r14
2509 movq 0+0(%rbp),%rax
2510 mulq %r11
2511 imulq %r12,%r15
2512 addq %rax,%r14
2513 adcq %rdx,%r15
2514 movdqa .rol8(%rip),%xmm8
2515 paddd %xmm7,%xmm3
2516 paddd %xmm6,%xmm2
2517 paddd %xmm5,%xmm1
2518 paddd %xmm4,%xmm0
2519 pxor %xmm3,%xmm15
2520 pxor %xmm2,%xmm14
2521 pxor %xmm1,%xmm13
2522 pxor %xmm0,%xmm12
2523 .byte 102,69,15,56,0,248
2524 .byte 102,69,15,56,0,240
2525 .byte 102,69,15,56,0,232
2526 .byte 102,69,15,56,0,224
2527 movdqa 80(%rbp),%xmm8
2528 paddd %xmm15,%xmm11
2529 paddd %xmm14,%xmm10
2530 paddd %xmm13,%xmm9
2531 paddd %xmm12,%xmm8
2532 pxor %xmm11,%xmm7
2533 pxor %xmm10,%xmm6
2534 movq 8+0(%rbp),%rax
2535 movq %rax,%r9
2536 mulq %r10
2537 addq %rax,%r14
2538 adcq $0,%rdx
2539 movq %rdx,%r10
2540 movq 8+0(%rbp),%rax
2541 mulq %r11
2542 addq %rax,%r15
2543 adcq $0,%rdx
2544 pxor %xmm9,%xmm5
2545 pxor %xmm8,%xmm4
2546 movdqa %xmm8,80(%rbp)
2547 movdqa %xmm7,%xmm8
2548 psrld $25,%xmm8
2549 pslld $32-25,%xmm7
2550 pxor %xmm8,%xmm7
2551 movdqa %xmm6,%xmm8
2552 psrld $25,%xmm8
2553 pslld $32-25,%xmm6
2554 pxor %xmm8,%xmm6
2555 movdqa %xmm5,%xmm8
2556 psrld $25,%xmm8
2557 pslld $32-25,%xmm5
2558 pxor %xmm8,%xmm5
2559 movdqa %xmm4,%xmm8
2560 psrld $25,%xmm8
2561 pslld $32-25,%xmm4
2562 pxor %xmm8,%xmm4
2563 movdqa 80(%rbp),%xmm8
2564 imulq %r12,%r9
2565 addq %r10,%r15
2566 adcq %rdx,%r9
2567 .byte 102,15,58,15,255,4
2568 .byte 102,69,15,58,15,219,8
2569 .byte 102,69,15,58,15,255,12
2570 .byte 102,15,58,15,246,4
2571 .byte 102,69,15,58,15,210,8
2572 .byte 102,69,15,58,15,246,12
2573 .byte 102,15,58,15,237,4
2574 .byte 102,69,15,58,15,201,8
2575 .byte 102,69,15,58,15,237,12
2576 .byte 102,15,58,15,228,4
2577 .byte 102,69,15,58,15,192,8
2578 .byte 102,69,15,58,15,228,12
2579 movdqa %xmm8,80(%rbp)
2580 movdqa .rol16(%rip),%xmm8
2581 paddd %xmm7,%xmm3
2582 paddd %xmm6,%xmm2
2583 paddd %xmm5,%xmm1
2584 paddd %xmm4,%xmm0
2585 pxor %xmm3,%xmm15
2586 pxor %xmm2,%xmm14
2587 movq %r13,%r10
2588 movq %r14,%r11
2589 movq %r15,%r12
2590 andq $3,%r12
2591 movq %r15,%r13
2592 andq $-4,%r13
2593 movq %r9,%r14
2594 shrdq $2,%r9,%r15
2595 shrq $2,%r9
2596 addq %r13,%r10
2597 adcq %r14,%r11
2598 adcq $0,%r12
2599 addq %r15,%r10
2600 adcq %r9,%r11
2601 adcq $0,%r12
2602 pxor %xmm1,%xmm13
2603 pxor %xmm0,%xmm12
2604 .byte 102,69,15,56,0,248
2605 .byte 102,69,15,56,0,240
2606 .byte 102,69,15,56,0,232
2607 .byte 102,69,15,56,0,224
2608 movdqa 80(%rbp),%xmm8
2609 paddd %xmm15,%xmm11
2610 paddd %xmm14,%xmm10
2611 paddd %xmm13,%xmm9
2612 paddd %xmm12,%xmm8
2613 pxor %xmm11,%xmm7
2614 pxor %xmm10,%xmm6
2615 pxor %xmm9,%xmm5
2616 pxor %xmm8,%xmm4
2617 movdqa %xmm8,80(%rbp)
2618 movdqa %xmm7,%xmm8
2619 psrld $20,%xmm8
2620 pslld $32-20,%xmm7
2621 pxor %xmm8,%xmm7
2622 movdqa %xmm6,%xmm8
2623 psrld $20,%xmm8
2624 pslld $32-20,%xmm6
2625 pxor %xmm8,%xmm6
2626 movdqa %xmm5,%xmm8
2627 psrld $20,%xmm8
2628 pslld $32-20,%xmm5
2629 pxor %xmm8,%xmm5
2630 movdqa %xmm4,%xmm8
2631 psrld $20,%xmm8
2632 pslld $32-20,%xmm4
2633 pxor %xmm8,%xmm4
2634 movdqa .rol8(%rip),%xmm8
2635 paddd %xmm7,%xmm3
2636 paddd %xmm6,%xmm2
2637 paddd %xmm5,%xmm1
2638 paddd %xmm4,%xmm0
2639 pxor %xmm3,%xmm15
2640 pxor %xmm2,%xmm14
2641 pxor %xmm1,%xmm13
2642 pxor %xmm0,%xmm12
2643 .byte 102,69,15,56,0,248
2644 .byte 102,69,15,56,0,240
2645 .byte 102,69,15,56,0,232
2646 .byte 102,69,15,56,0,224
2647 movdqa 80(%rbp),%xmm8
2648 paddd %xmm15,%xmm11
2649 paddd %xmm14,%xmm10
2650 paddd %xmm13,%xmm9
2651 paddd %xmm12,%xmm8
2652 pxor %xmm11,%xmm7
2653 pxor %xmm10,%xmm6
2654 pxor %xmm9,%xmm5
2655 pxor %xmm8,%xmm4
2656 movdqa %xmm8,80(%rbp)
2657 movdqa %xmm7,%xmm8
2658 psrld $25,%xmm8
2659 pslld $32-25,%xmm7
2660 pxor %xmm8,%xmm7
2661 movdqa %xmm6,%xmm8
2662 psrld $25,%xmm8
2663 pslld $32-25,%xmm6
2664 pxor %xmm8,%xmm6
2665 movdqa %xmm5,%xmm8
2666 psrld $25,%xmm8
2667 pslld $32-25,%xmm5
2668 pxor %xmm8,%xmm5
2669 movdqa %xmm4,%xmm8
2670 psrld $25,%xmm8
2671 pslld $32-25,%xmm4
2672 pxor %xmm8,%xmm4
2673 movdqa 80(%rbp),%xmm8
2674 .byte 102,15,58,15,255,12
2675 .byte 102,69,15,58,15,219,8
2676 .byte 102,69,15,58,15,255,4
2677 .byte 102,15,58,15,246,12
2678 .byte 102,69,15,58,15,210,8
2679 .byte 102,69,15,58,15,246,4
2680 .byte 102,15,58,15,237,12
2681 .byte 102,69,15,58,15,201,8
2682 .byte 102,69,15,58,15,237,4
2683 .byte 102,15,58,15,228,12
2684 .byte 102,69,15,58,15,192,8
2685 .byte 102,69,15,58,15,228,4
2686
2687 leaq 16(%rdi),%rdi
2688 decq %r8
2689 jge 2b
2690 addq 0(%rdi),%r10
2691 adcq 8+0(%rdi),%r11
2692 adcq $1,%r12
2693 movq 0+0(%rbp),%rax
2694 movq %rax,%r15
2695 mulq %r10
2696 movq %rax,%r13
2697 movq %rdx,%r14
2698 movq 0+0(%rbp),%rax
2699 mulq %r11
2700 imulq %r12,%r15
2701 addq %rax,%r14
2702 adcq %rdx,%r15
2703 movq 8+0(%rbp),%rax
2704 movq %rax,%r9
2705 mulq %r10
2706 addq %rax,%r14
2707 adcq $0,%rdx
2708 movq %rdx,%r10
2709 movq 8+0(%rbp),%rax
2710 mulq %r11
2711 addq %rax,%r15
2712 adcq $0,%rdx
2713 imulq %r12,%r9
2714 addq %r10,%r15
2715 adcq %rdx,%r9
2716 movq %r13,%r10
2717 movq %r14,%r11
2718 movq %r15,%r12
2719 andq $3,%r12
2720 movq %r15,%r13
2721 andq $-4,%r13
2722 movq %r9,%r14
2723 shrdq $2,%r9,%r15
2724 shrq $2,%r9
2725 addq %r13,%r10
2726 adcq %r14,%r11
2727 adcq $0,%r12
2728 addq %r15,%r10
2729 adcq %r9,%r11
2730 adcq $0,%r12
2731
2732 leaq 16(%rdi),%rdi
2733 decq %rcx
2734 jg 2b
2735 paddd .chacha20_consts(%rip),%xmm3
2736 paddd 48(%rbp),%xmm7
2737 paddd 64(%rbp),%xmm11
2738 paddd 144(%rbp),%xmm15
2739 paddd .chacha20_consts(%rip),%xmm2
2740 paddd 48(%rbp),%xmm6
2741 paddd 64(%rbp),%xmm10
2742 paddd 128(%rbp),%xmm14
2743 paddd .chacha20_consts(%rip),%xmm1
2744 paddd 48(%rbp),%xmm5
2745 paddd 64(%rbp),%xmm9
2746 paddd 112(%rbp),%xmm13
2747 paddd .chacha20_consts(%rip),%xmm0
2748 paddd 48(%rbp),%xmm4
2749 paddd 64(%rbp),%xmm8
2750 paddd 96(%rbp),%xmm12
2751
2752 movdqa %xmm14,80(%rbp)
2753 movdqa %xmm14,80(%rbp)
2754 movdqu 0 + 0(%rsi),%xmm14
2755 pxor %xmm3,%xmm14
2756 movdqu %xmm14,0 + 0(%rdi)
2757 movdqu 16 + 0(%rsi),%xmm14
2758 pxor %xmm7,%xmm14
2759 movdqu %xmm14,16 + 0(%rdi)
2760 movdqu 32 + 0(%rsi),%xmm14
2761 pxor %xmm11,%xmm14
2762 movdqu %xmm14,32 + 0(%rdi)
2763 movdqu 48 + 0(%rsi),%xmm14
2764 pxor %xmm15,%xmm14
2765 movdqu %xmm14,48 + 0(%rdi)
2766
2767 movdqa 80(%rbp),%xmm14
2768 movdqu 0 + 64(%rsi),%xmm3
2769 movdqu 16 + 64(%rsi),%xmm7
2770 movdqu 32 + 64(%rsi),%xmm11
2771 movdqu 48 + 64(%rsi),%xmm15
2772 pxor %xmm3,%xmm2
2773 pxor %xmm7,%xmm6
2774 pxor %xmm11,%xmm10
2775 pxor %xmm14,%xmm15
2776 movdqu %xmm2,0 + 64(%rdi)
2777 movdqu %xmm6,16 + 64(%rdi)
2778 movdqu %xmm10,32 + 64(%rdi)
2779 movdqu %xmm15,48 + 64(%rdi)
2780 movdqu 0 + 128(%rsi),%xmm3
2781 movdqu 16 + 128(%rsi),%xmm7
2782 movdqu 32 + 128(%rsi),%xmm11
2783 movdqu 48 + 128(%rsi),%xmm15
2784 pxor %xmm3,%xmm1
2785 pxor %xmm7,%xmm5
2786 pxor %xmm11,%xmm9
2787 pxor %xmm13,%xmm15
2788 movdqu %xmm1,0 + 128(%rdi)
2789 movdqu %xmm5,16 + 128(%rdi)
2790 movdqu %xmm9,32 + 128(%rdi)
2791 movdqu %xmm15,48 + 128(%rdi)
2792
2793 cmpq $256,%rbx
2794 ja 3f
2795
2796 movq $192,%rcx
2797 subq $192,%rbx
2798 leaq 192(%rsi),%rsi
2799 jmp seal_sse_128_seal_hash
2800 3:
2801 movdqu 0 + 192(%rsi),%xmm3
2802 movdqu 16 + 192(%rsi),%xmm7
2803 movdqu 32 + 192(%rsi),%xmm11
2804 movdqu 48 + 192(%rsi),%xmm15
2805 pxor %xmm3,%xmm0
2806 pxor %xmm7,%xmm4
2807 pxor %xmm11,%xmm8
2808 pxor %xmm12,%xmm15
2809 movdqu %xmm0,0 + 192(%rdi)
2810 movdqu %xmm4,16 + 192(%rdi)
2811 movdqu %xmm8,32 + 192(%rdi)
2812 movdqu %xmm15,48 + 192(%rdi)
2813
2814 leaq 256(%rsi),%rsi
2815 subq $256,%rbx
2816 movq $6,%rcx
2817 movq $4,%r8
2818 cmpq $192,%rbx
2819 jg 1b
2820 movq %rbx,%rcx
2821 testq %rbx,%rbx
2822 je seal_sse_128_seal_hash
2823 movq $6,%rcx
2824 cmpq $64,%rbx
2825 jg 3f
2826
2827 seal_sse_tail_64:
2828 movdqa .chacha20_consts(%rip),%xmm0
2829 movdqa 48(%rbp),%xmm4
2830 movdqa 64(%rbp),%xmm8
2831 movdqa 96(%rbp),%xmm12
2832 paddd .sse_inc(%rip),%xmm12
2833 movdqa %xmm12,96(%rbp)
2834
2835 1:
2836 addq 0(%rdi),%r10
2837 adcq 8+0(%rdi),%r11
2838 adcq $1,%r12
2839 movq 0+0(%rbp),%rax
2840 movq %rax,%r15
2841 mulq %r10
2842 movq %rax,%r13
2843 movq %rdx,%r14
2844 movq 0+0(%rbp),%rax
2845 mulq %r11
2846 imulq %r12,%r15
2847 addq %rax,%r14
2848 adcq %rdx,%r15
2849 movq 8+0(%rbp),%rax
2850 movq %rax,%r9
2851 mulq %r10
2852 addq %rax,%r14
2853 adcq $0,%rdx
2854 movq %rdx,%r10
2855 movq 8+0(%rbp),%rax
2856 mulq %r11
2857 addq %rax,%r15
2858 adcq $0,%rdx
2859 imulq %r12,%r9
2860 addq %r10,%r15
2861 adcq %rdx,%r9
2862 movq %r13,%r10
2863 movq %r14,%r11
2864 movq %r15,%r12
2865 andq $3,%r12
2866 movq %r15,%r13
2867 andq $-4,%r13
2868 movq %r9,%r14
2869 shrdq $2,%r9,%r15
2870 shrq $2,%r9
2871 addq %r13,%r10
2872 adcq %r14,%r11
2873 adcq $0,%r12
2874 addq %r15,%r10
2875 adcq %r9,%r11
2876 adcq $0,%r12
2877
2878 leaq 16(%rdi),%rdi
2879 2:
2880 paddd %xmm4,%xmm0
2881 pxor %xmm0,%xmm12
2882 pshufb .rol16(%rip),%xmm12
2883 paddd %xmm12,%xmm8
2884 pxor %xmm8,%xmm4
2885 movdqa %xmm4,%xmm3
2886 pslld $12,%xmm3
2887 psrld $20,%xmm4
2888 pxor %xmm3,%xmm4
2889 paddd %xmm4,%xmm0
2890 pxor %xmm0,%xmm12
2891 pshufb .rol8(%rip),%xmm12
2892 paddd %xmm12,%xmm8
2893 pxor %xmm8,%xmm4
2894 movdqa %xmm4,%xmm3
2895 pslld $7,%xmm3
2896 psrld $25,%xmm4
2897 pxor %xmm3,%xmm4
2898 .byte 102,15,58,15,228,4
2899 .byte 102,69,15,58,15,192,8
2900 .byte 102,69,15,58,15,228,12
2901 paddd %xmm4,%xmm0
2902 pxor %xmm0,%xmm12
2903 pshufb .rol16(%rip),%xmm12
2904 paddd %xmm12,%xmm8
2905 pxor %xmm8,%xmm4
2906 movdqa %xmm4,%xmm3
2907 pslld $12,%xmm3
2908 psrld $20,%xmm4
2909 pxor %xmm3,%xmm4
2910 paddd %xmm4,%xmm0
2911 pxor %xmm0,%xmm12
2912 pshufb .rol8(%rip),%xmm12
2913 paddd %xmm12,%xmm8
2914 pxor %xmm8,%xmm4
2915 movdqa %xmm4,%xmm3
2916 pslld $7,%xmm3
2917 psrld $25,%xmm4
2918 pxor %xmm3,%xmm4
2919 .byte 102,15,58,15,228,12
2920 .byte 102,69,15,58,15,192,8
2921 .byte 102,69,15,58,15,228,4
2922 addq 0(%rdi),%r10
2923 adcq 8+0(%rdi),%r11
2924 adcq $1,%r12
2925 movq 0+0(%rbp),%rax
2926 movq %rax,%r15
2927 mulq %r10
2928 movq %rax,%r13
2929 movq %rdx,%r14
2930 movq 0+0(%rbp),%rax
2931 mulq %r11
2932 imulq %r12,%r15
2933 addq %rax,%r14
2934 adcq %rdx,%r15
2935 movq 8+0(%rbp),%rax
2936 movq %rax,%r9
2937 mulq %r10
2938 addq %rax,%r14
2939 adcq $0,%rdx
2940 movq %rdx,%r10
2941 movq 8+0(%rbp),%rax
2942 mulq %r11
2943 addq %rax,%r15
2944 adcq $0,%rdx
2945 imulq %r12,%r9
2946 addq %r10,%r15
2947 adcq %rdx,%r9
2948 movq %r13,%r10
2949 movq %r14,%r11
2950 movq %r15,%r12
2951 andq $3,%r12
2952 movq %r15,%r13
2953 andq $-4,%r13
2954 movq %r9,%r14
2955 shrdq $2,%r9,%r15
2956 shrq $2,%r9
2957 addq %r13,%r10
2958 adcq %r14,%r11
2959 adcq $0,%r12
2960 addq %r15,%r10
2961 adcq %r9,%r11
2962 adcq $0,%r12
2963
2964 leaq 16(%rdi),%rdi
2965 decq %rcx
2966 jg 1b
2967 decq %r8
2968 jge 2b
2969 paddd .chacha20_consts(%rip),%xmm0
2970 paddd 48(%rbp),%xmm4
2971 paddd 64(%rbp),%xmm8
2972 paddd 96(%rbp),%xmm12
2973
2974 jmp seal_sse_128_seal
2975 3:
2976 cmpq $128,%rbx
2977 jg 3f
2978
2979 seal_sse_tail_128:
2980 movdqa .chacha20_consts(%rip),%xmm0
2981 movdqa 48(%rbp),%xmm4
2982 movdqa 64(%rbp),%xmm8
2983 movdqa %xmm0,%xmm1
2984 movdqa %xmm4,%xmm5
2985 movdqa %xmm8,%xmm9
2986 movdqa 96(%rbp),%xmm13
2987 paddd .sse_inc(%rip),%xmm13
2988 movdqa %xmm13,%xmm12
2989 paddd .sse_inc(%rip),%xmm12
2990 movdqa %xmm12,96(%rbp)
2991 movdqa %xmm13,112(%rbp)
2992
2993 1:
2994 addq 0(%rdi),%r10
2995 adcq 8+0(%rdi),%r11
2996 adcq $1,%r12
2997 movq 0+0(%rbp),%rax
2998 movq %rax,%r15
2999 mulq %r10
3000 movq %rax,%r13
3001 movq %rdx,%r14
3002 movq 0+0(%rbp),%rax
3003 mulq %r11
3004 imulq %r12,%r15
3005 addq %rax,%r14
3006 adcq %rdx,%r15
3007 movq 8+0(%rbp),%rax
3008 movq %rax,%r9
3009 mulq %r10
3010 addq %rax,%r14
3011 adcq $0,%rdx
3012 movq %rdx,%r10
3013 movq 8+0(%rbp),%rax
3014 mulq %r11
3015 addq %rax,%r15
3016 adcq $0,%rdx
3017 imulq %r12,%r9
3018 addq %r10,%r15
3019 adcq %rdx,%r9
3020 movq %r13,%r10
3021 movq %r14,%r11
3022 movq %r15,%r12
3023 andq $3,%r12
3024 movq %r15,%r13
3025 andq $-4,%r13
3026 movq %r9,%r14
3027 shrdq $2,%r9,%r15
3028 shrq $2,%r9
3029 addq %r13,%r10
3030 adcq %r14,%r11
3031 adcq $0,%r12
3032 addq %r15,%r10
3033 adcq %r9,%r11
3034 adcq $0,%r12
3035
3036 leaq 16(%rdi),%rdi
3037 2:
3038 paddd %xmm4,%xmm0
3039 pxor %xmm0,%xmm12
3040 pshufb .rol16(%rip),%xmm12
3041 paddd %xmm12,%xmm8
3042 pxor %xmm8,%xmm4
3043 movdqa %xmm4,%xmm3
3044 pslld $12,%xmm3
3045 psrld $20,%xmm4
3046 pxor %xmm3,%xmm4
3047 paddd %xmm4,%xmm0
3048 pxor %xmm0,%xmm12
3049 pshufb .rol8(%rip),%xmm12
3050 paddd %xmm12,%xmm8
3051 pxor %xmm8,%xmm4
3052 movdqa %xmm4,%xmm3
3053 pslld $7,%xmm3
3054 psrld $25,%xmm4
3055 pxor %xmm3,%xmm4
3056 .byte 102,15,58,15,228,4
3057 .byte 102,69,15,58,15,192,8
3058 .byte 102,69,15,58,15,228,12
3059 paddd %xmm5,%xmm1
3060 pxor %xmm1,%xmm13
3061 pshufb .rol16(%rip),%xmm13
3062 paddd %xmm13,%xmm9
3063 pxor %xmm9,%xmm5
3064 movdqa %xmm5,%xmm3
3065 pslld $12,%xmm3
3066 psrld $20,%xmm5
3067 pxor %xmm3,%xmm5
3068 paddd %xmm5,%xmm1
3069 pxor %xmm1,%xmm13
3070 pshufb .rol8(%rip),%xmm13
3071 paddd %xmm13,%xmm9
3072 pxor %xmm9,%xmm5
3073 movdqa %xmm5,%xmm3
3074 pslld $7,%xmm3
3075 psrld $25,%xmm5
3076 pxor %xmm3,%xmm5
3077 .byte 102,15,58,15,237,4
3078 .byte 102,69,15,58,15,201,8
3079 .byte 102,69,15,58,15,237,12
3080 addq 0(%rdi),%r10
3081 adcq 8+0(%rdi),%r11
3082 adcq $1,%r12
3083 movq 0+0(%rbp),%rax
3084 movq %rax,%r15
3085 mulq %r10
3086 movq %rax,%r13
3087 movq %rdx,%r14
3088 movq 0+0(%rbp),%rax
3089 mulq %r11
3090 imulq %r12,%r15
3091 addq %rax,%r14
3092 adcq %rdx,%r15
3093 movq 8+0(%rbp),%rax
3094 movq %rax,%r9
3095 mulq %r10
3096 addq %rax,%r14
3097 adcq $0,%rdx
3098 movq %rdx,%r10
3099 movq 8+0(%rbp),%rax
3100 mulq %r11
3101 addq %rax,%r15
3102 adcq $0,%rdx
3103 imulq %r12,%r9
3104 addq %r10,%r15
3105 adcq %rdx,%r9
3106 movq %r13,%r10
3107 movq %r14,%r11
3108 movq %r15,%r12
3109 andq $3,%r12
3110 movq %r15,%r13
3111 andq $-4,%r13
3112 movq %r9,%r14
3113 shrdq $2,%r9,%r15
3114 shrq $2,%r9
3115 addq %r13,%r10
3116 adcq %r14,%r11
3117 adcq $0,%r12
3118 addq %r15,%r10
3119 adcq %r9,%r11
3120 adcq $0,%r12
3121 paddd %xmm4,%xmm0
3122 pxor %xmm0,%xmm12
3123 pshufb .rol16(%rip),%xmm12
3124 paddd %xmm12,%xmm8
3125 pxor %xmm8,%xmm4
3126 movdqa %xmm4,%xmm3
3127 pslld $12,%xmm3
3128 psrld $20,%xmm4
3129 pxor %xmm3,%xmm4
3130 paddd %xmm4,%xmm0
3131 pxor %xmm0,%xmm12
3132 pshufb .rol8(%rip),%xmm12
3133 paddd %xmm12,%xmm8
3134 pxor %xmm8,%xmm4
3135 movdqa %xmm4,%xmm3
3136 pslld $7,%xmm3
3137 psrld $25,%xmm4
3138 pxor %xmm3,%xmm4
3139 .byte 102,15,58,15,228,12
3140 .byte 102,69,15,58,15,192,8
3141 .byte 102,69,15,58,15,228,4
3142 paddd %xmm5,%xmm1
3143 pxor %xmm1,%xmm13
3144 pshufb .rol16(%rip),%xmm13
3145 paddd %xmm13,%xmm9
3146 pxor %xmm9,%xmm5
3147 movdqa %xmm5,%xmm3
3148 pslld $12,%xmm3
3149 psrld $20,%xmm5
3150 pxor %xmm3,%xmm5
3151 paddd %xmm5,%xmm1
3152 pxor %xmm1,%xmm13
3153 pshufb .rol8(%rip),%xmm13
3154 paddd %xmm13,%xmm9
3155 pxor %xmm9,%xmm5
3156 movdqa %xmm5,%xmm3
3157 pslld $7,%xmm3
3158 psrld $25,%xmm5
3159 pxor %xmm3,%xmm5
3160 .byte 102,15,58,15,237,12
3161 .byte 102,69,15,58,15,201,8
3162 .byte 102,69,15,58,15,237,4
3163
3164 leaq 16(%rdi),%rdi
3165 decq %rcx
3166 jg 1b
3167 decq %r8
3168 jge 2b
3169 paddd .chacha20_consts(%rip),%xmm1
3170 paddd 48(%rbp),%xmm5
3171 paddd 64(%rbp),%xmm9
3172 paddd 112(%rbp),%xmm13
3173 paddd .chacha20_consts(%rip),%xmm0
3174 paddd 48(%rbp),%xmm4
3175 paddd 64(%rbp),%xmm8
3176 paddd 96(%rbp),%xmm12
3177 movdqu 0 + 0(%rsi),%xmm3
3178 movdqu 16 + 0(%rsi),%xmm7
3179 movdqu 32 + 0(%rsi),%xmm11
3180 movdqu 48 + 0(%rsi),%xmm15
3181 pxor %xmm3,%xmm1
3182 pxor %xmm7,%xmm5
3183 pxor %xmm11,%xmm9
3184 pxor %xmm13,%xmm15
3185 movdqu %xmm1,0 + 0(%rdi)
3186 movdqu %xmm5,16 + 0(%rdi)
3187 movdqu %xmm9,32 + 0(%rdi)
3188 movdqu %xmm15,48 + 0(%rdi)
3189
3190 movq $64,%rcx
3191 subq $64,%rbx
3192 leaq 64(%rsi),%rsi
3193 jmp seal_sse_128_seal_hash
3194 3:
3195
3196 seal_sse_tail_192:
3197 movdqa .chacha20_consts(%rip),%xmm0
3198 movdqa 48(%rbp),%xmm4
3199 movdqa 64(%rbp),%xmm8
3200 movdqa %xmm0,%xmm1
3201 movdqa %xmm4,%xmm5
3202 movdqa %xmm8,%xmm9
3203 movdqa %xmm0,%xmm2
3204 movdqa %xmm4,%xmm6
3205 movdqa %xmm8,%xmm10
3206 movdqa 96(%rbp),%xmm14
3207 paddd .sse_inc(%rip),%xmm14
3208 movdqa %xmm14,%xmm13
3209 paddd .sse_inc(%rip),%xmm13
3210 movdqa %xmm13,%xmm12
3211 paddd .sse_inc(%rip),%xmm12
3212 movdqa %xmm12,96(%rbp)
3213 movdqa %xmm13,112(%rbp)
3214 movdqa %xmm14,128(%rbp)
3215
3216 1:
3217 addq 0(%rdi),%r10
3218 adcq 8+0(%rdi),%r11
3219 adcq $1,%r12
3220 movq 0+0(%rbp),%rax
3221 movq %rax,%r15
3222 mulq %r10
3223 movq %rax,%r13
3224 movq %rdx,%r14
3225 movq 0+0(%rbp),%rax
3226 mulq %r11
3227 imulq %r12,%r15
3228 addq %rax,%r14
3229 adcq %rdx,%r15
3230 movq 8+0(%rbp),%rax
3231 movq %rax,%r9
3232 mulq %r10
3233 addq %rax,%r14
3234 adcq $0,%rdx
3235 movq %rdx,%r10
3236 movq 8+0(%rbp),%rax
3237 mulq %r11
3238 addq %rax,%r15
3239 adcq $0,%rdx
3240 imulq %r12,%r9
3241 addq %r10,%r15
3242 adcq %rdx,%r9
3243 movq %r13,%r10
3244 movq %r14,%r11
3245 movq %r15,%r12
3246 andq $3,%r12
3247 movq %r15,%r13
3248 andq $-4,%r13
3249 movq %r9,%r14
3250 shrdq $2,%r9,%r15
3251 shrq $2,%r9
3252 addq %r13,%r10
3253 adcq %r14,%r11
3254 adcq $0,%r12
3255 addq %r15,%r10
3256 adcq %r9,%r11
3257 adcq $0,%r12
3258
3259 leaq 16(%rdi),%rdi
3260 2:
3261 paddd %xmm4,%xmm0
3262 pxor %xmm0,%xmm12
3263 pshufb .rol16(%rip),%xmm12
3264 paddd %xmm12,%xmm8
3265 pxor %xmm8,%xmm4
3266 movdqa %xmm4,%xmm3
3267 pslld $12,%xmm3
3268 psrld $20,%xmm4
3269 pxor %xmm3,%xmm4
3270 paddd %xmm4,%xmm0
3271 pxor %xmm0,%xmm12
3272 pshufb .rol8(%rip),%xmm12
3273 paddd %xmm12,%xmm8
3274 pxor %xmm8,%xmm4
3275 movdqa %xmm4,%xmm3
3276 pslld $7,%xmm3
3277 psrld $25,%xmm4
3278 pxor %xmm3,%xmm4
3279 .byte 102,15,58,15,228,4
3280 .byte 102,69,15,58,15,192,8
3281 .byte 102,69,15,58,15,228,12
3282 paddd %xmm5,%xmm1
3283 pxor %xmm1,%xmm13
3284 pshufb .rol16(%rip),%xmm13
3285 paddd %xmm13,%xmm9
3286 pxor %xmm9,%xmm5
3287 movdqa %xmm5,%xmm3
3288 pslld $12,%xmm3
3289 psrld $20,%xmm5
3290 pxor %xmm3,%xmm5
3291 paddd %xmm5,%xmm1
3292 pxor %xmm1,%xmm13
3293 pshufb .rol8(%rip),%xmm13
3294 paddd %xmm13,%xmm9
3295 pxor %xmm9,%xmm5
3296 movdqa %xmm5,%xmm3
3297 pslld $7,%xmm3
3298 psrld $25,%xmm5
3299 pxor %xmm3,%xmm5
3300 .byte 102,15,58,15,237,4
3301 .byte 102,69,15,58,15,201,8
3302 .byte 102,69,15,58,15,237,12
3303 paddd %xmm6,%xmm2
3304 pxor %xmm2,%xmm14
3305 pshufb .rol16(%rip),%xmm14
3306 paddd %xmm14,%xmm10
3307 pxor %xmm10,%xmm6
3308 movdqa %xmm6,%xmm3
3309 pslld $12,%xmm3
3310 psrld $20,%xmm6
3311 pxor %xmm3,%xmm6
3312 paddd %xmm6,%xmm2
3313 pxor %xmm2,%xmm14
3314 pshufb .rol8(%rip),%xmm14
3315 paddd %xmm14,%xmm10
3316 pxor %xmm10,%xmm6
3317 movdqa %xmm6,%xmm3
3318 pslld $7,%xmm3
3319 psrld $25,%xmm6
3320 pxor %xmm3,%xmm6
3321 .byte 102,15,58,15,246,4
3322 .byte 102,69,15,58,15,210,8
3323 .byte 102,69,15,58,15,246,12
3324 addq 0(%rdi),%r10
3325 adcq 8+0(%rdi),%r11
3326 adcq $1,%r12
3327 movq 0+0(%rbp),%rax
3328 movq %rax,%r15
3329 mulq %r10
3330 movq %rax,%r13
3331 movq %rdx,%r14
3332 movq 0+0(%rbp),%rax
3333 mulq %r11
3334 imulq %r12,%r15
3335 addq %rax,%r14
3336 adcq %rdx,%r15
3337 movq 8+0(%rbp),%rax
3338 movq %rax,%r9
3339 mulq %r10
3340 addq %rax,%r14
3341 adcq $0,%rdx
3342 movq %rdx,%r10
3343 movq 8+0(%rbp),%rax
3344 mulq %r11
3345 addq %rax,%r15
3346 adcq $0,%rdx
3347 imulq %r12,%r9
3348 addq %r10,%r15
3349 adcq %rdx,%r9
3350 movq %r13,%r10
3351 movq %r14,%r11
3352 movq %r15,%r12
3353 andq $3,%r12
3354 movq %r15,%r13
3355 andq $-4,%r13
3356 movq %r9,%r14
3357 shrdq $2,%r9,%r15
3358 shrq $2,%r9
3359 addq %r13,%r10
3360 adcq %r14,%r11
3361 adcq $0,%r12
3362 addq %r15,%r10
3363 adcq %r9,%r11
3364 adcq $0,%r12
3365 paddd %xmm4,%xmm0
3366 pxor %xmm0,%xmm12
3367 pshufb .rol16(%rip),%xmm12
3368 paddd %xmm12,%xmm8
3369 pxor %xmm8,%xmm4
3370 movdqa %xmm4,%xmm3
3371 pslld $12,%xmm3
3372 psrld $20,%xmm4
3373 pxor %xmm3,%xmm4
3374 paddd %xmm4,%xmm0
3375 pxor %xmm0,%xmm12
3376 pshufb .rol8(%rip),%xmm12
3377 paddd %xmm12,%xmm8
3378 pxor %xmm8,%xmm4
3379 movdqa %xmm4,%xmm3
3380 pslld $7,%xmm3
3381 psrld $25,%xmm4
3382 pxor %xmm3,%xmm4
3383 .byte 102,15,58,15,228,12
3384 .byte 102,69,15,58,15,192,8
3385 .byte 102,69,15,58,15,228,4
3386 paddd %xmm5,%xmm1
3387 pxor %xmm1,%xmm13
3388 pshufb .rol16(%rip),%xmm13
3389 paddd %xmm13,%xmm9
3390 pxor %xmm9,%xmm5
3391 movdqa %xmm5,%xmm3
3392 pslld $12,%xmm3
3393 psrld $20,%xmm5
3394 pxor %xmm3,%xmm5
3395 paddd %xmm5,%xmm1
3396 pxor %xmm1,%xmm13
3397 pshufb .rol8(%rip),%xmm13
3398 paddd %xmm13,%xmm9
3399 pxor %xmm9,%xmm5
3400 movdqa %xmm5,%xmm3
3401 pslld $7,%xmm3
3402 psrld $25,%xmm5
3403 pxor %xmm3,%xmm5
3404 .byte 102,15,58,15,237,12
3405 .byte 102,69,15,58,15,201,8
3406 .byte 102,69,15,58,15,237,4
3407 paddd %xmm6,%xmm2
3408 pxor %xmm2,%xmm14
3409 pshufb .rol16(%rip),%xmm14
3410 paddd %xmm14,%xmm10
3411 pxor %xmm10,%xmm6
3412 movdqa %xmm6,%xmm3
3413 pslld $12,%xmm3
3414 psrld $20,%xmm6
3415 pxor %xmm3,%xmm6
3416 paddd %xmm6,%xmm2
3417 pxor %xmm2,%xmm14
3418 pshufb .rol8(%rip),%xmm14
3419 paddd %xmm14,%xmm10
3420 pxor %xmm10,%xmm6
3421 movdqa %xmm6,%xmm3
3422 pslld $7,%xmm3
3423 psrld $25,%xmm6
3424 pxor %xmm3,%xmm6
3425 .byte 102,15,58,15,246,12
3426 .byte 102,69,15,58,15,210,8
3427 .byte 102,69,15,58,15,246,4
3428
3429 leaq 16(%rdi),%rdi
3430 decq %rcx
3431 jg 1b
3432 decq %r8
3433 jge 2b
3434 paddd .chacha20_consts(%rip),%xmm2
3435 paddd 48(%rbp),%xmm6
3436 paddd 64(%rbp),%xmm10
3437 paddd 128(%rbp),%xmm14
3438 paddd .chacha20_consts(%rip),%xmm1
3439 paddd 48(%rbp),%xmm5
3440 paddd 64(%rbp),%xmm9
3441 paddd 112(%rbp),%xmm13
3442 paddd .chacha20_consts(%rip),%xmm0
3443 paddd 48(%rbp),%xmm4
3444 paddd 64(%rbp),%xmm8
3445 paddd 96(%rbp),%xmm12
3446 movdqu 0 + 0(%rsi),%xmm3
3447 movdqu 16 + 0(%rsi),%xmm7
3448 movdqu 32 + 0(%rsi),%xmm11
3449 movdqu 48 + 0(%rsi),%xmm15
3450 pxor %xmm3,%xmm2
3451 pxor %xmm7,%xmm6
3452 pxor %xmm11,%xmm10
3453 pxor %xmm14,%xmm15
3454 movdqu %xmm2,0 + 0(%rdi)
3455 movdqu %xmm6,16 + 0(%rdi)
3456 movdqu %xmm10,32 + 0(%rdi)
3457 movdqu %xmm15,48 + 0(%rdi)
3458 movdqu 0 + 64(%rsi),%xmm3
3459 movdqu 16 + 64(%rsi),%xmm7
3460 movdqu 32 + 64(%rsi),%xmm11
3461 movdqu 48 + 64(%rsi),%xmm15
3462 pxor %xmm3,%xmm1
3463 pxor %xmm7,%xmm5
3464 pxor %xmm11,%xmm9
3465 pxor %xmm13,%xmm15
3466 movdqu %xmm1,0 + 64(%rdi)
3467 movdqu %xmm5,16 + 64(%rdi)
3468 movdqu %xmm9,32 + 64(%rdi)
3469 movdqu %xmm15,48 + 64(%rdi)
3470
3471 movq $128,%rcx
3472 subq $128,%rbx
3473 leaq 128(%rsi),%rsi
3474
3475 seal_sse_128_seal_hash:
3476 cmpq $16,%rcx
3477 jb seal_sse_128_seal
3478 addq 0(%rdi),%r10
3479 adcq 8+0(%rdi),%r11
3480 adcq $1,%r12
3481 movq 0+0(%rbp),%rax
3482 movq %rax,%r15
3483 mulq %r10
3484 movq %rax,%r13
3485 movq %rdx,%r14
3486 movq 0+0(%rbp),%rax
3487 mulq %r11
3488 imulq %r12,%r15
3489 addq %rax,%r14
3490 adcq %rdx,%r15
3491 movq 8+0(%rbp),%rax
3492 movq %rax,%r9
3493 mulq %r10
3494 addq %rax,%r14
3495 adcq $0,%rdx
3496 movq %rdx,%r10
3497 movq 8+0(%rbp),%rax
3498 mulq %r11
3499 addq %rax,%r15
3500 adcq $0,%rdx
3501 imulq %r12,%r9
3502 addq %r10,%r15
3503 adcq %rdx,%r9
3504 movq %r13,%r10
3505 movq %r14,%r11
3506 movq %r15,%r12
3507 andq $3,%r12
3508 movq %r15,%r13
3509 andq $-4,%r13
3510 movq %r9,%r14
3511 shrdq $2,%r9,%r15
3512 shrq $2,%r9
3513 addq %r13,%r10
3514 adcq %r14,%r11
3515 adcq $0,%r12
3516 addq %r15,%r10
3517 adcq %r9,%r11
3518 adcq $0,%r12
3519
3520 subq $16,%rcx
3521 leaq 16(%rdi),%rdi
3522 jmp seal_sse_128_seal_hash
3523
3524 seal_sse_128_seal:
3525 cmpq $16,%rbx
3526 jb seal_sse_tail_16
3527 subq $16,%rbx
3528
3529 movdqu 0(%rsi),%xmm3
3530 pxor %xmm3,%xmm0
3531 movdqu %xmm0,0(%rdi)
3532
3533 addq 0(%rdi),%r10
3534 adcq 8(%rdi),%r11
3535 adcq $1,%r12
3536 leaq 16(%rsi),%rsi
3537 leaq 16(%rdi),%rdi
3538 movq 0+0(%rbp),%rax
3539 movq %rax,%r15
3540 mulq %r10
3541 movq %rax,%r13
3542 movq %rdx,%r14
3543 movq 0+0(%rbp),%rax
3544 mulq %r11
3545 imulq %r12,%r15
3546 addq %rax,%r14
3547 adcq %rdx,%r15
3548 movq 8+0(%rbp),%rax
3549 movq %rax,%r9
3550 mulq %r10
3551 addq %rax,%r14
3552 adcq $0,%rdx
3553 movq %rdx,%r10
3554 movq 8+0(%rbp),%rax
3555 mulq %r11
3556 addq %rax,%r15
3557 adcq $0,%rdx
3558 imulq %r12,%r9
3559 addq %r10,%r15
3560 adcq %rdx,%r9
3561 movq %r13,%r10
3562 movq %r14,%r11
3563 movq %r15,%r12
3564 andq $3,%r12
3565 movq %r15,%r13
3566 andq $-4,%r13
3567 movq %r9,%r14
3568 shrdq $2,%r9,%r15
3569 shrq $2,%r9
3570 addq %r13,%r10
3571 adcq %r14,%r11
3572 adcq $0,%r12
3573 addq %r15,%r10
3574 adcq %r9,%r11
3575 adcq $0,%r12
3576
3577
3578 movdqa %xmm4,%xmm0
3579 movdqa %xmm8,%xmm4
3580 movdqa %xmm12,%xmm8
3581 movdqa %xmm1,%xmm12
3582 movdqa %xmm5,%xmm1
3583 movdqa %xmm9,%xmm5
3584 movdqa %xmm13,%xmm9
3585 jmp seal_sse_128_seal
3586
3587 seal_sse_tail_16:
3588 testq %rbx,%rbx
3589 jz seal_sse_finalize
3590
3591 movq %rbx,%r8
3592 shlq $4,%r8
3593 leaq .and_masks(%rip),%r13
3594 movq %rbx,%rcx
3595 leaq -1(%rsi,%rbx), %rsi
3596 pxor %xmm15,%xmm15
3597 1:
3598 pslldq $1,%xmm15
3599 pinsrb $0,(%rsi),%xmm15
3600 leaq -1(%rsi),%rsi
3601 decq %rcx
3602 jne 1b
3603
3604
3605 pxor %xmm0,%xmm15
3606
3607
3608 movq %rbx,%rcx
3609 movdqu %xmm15,%xmm0
3610 2:
3611 pextrb $0,%xmm0,(%rdi)
3612 psrldq $1,%xmm0
3613 addq $1,%rdi
3614 subq $1,%rcx
3615 jnz 2b
3616
3617 pand -16(%r13,%r8), %xmm15
3618 .byte 102,77,15,126,253
3619 pextrq $1,%xmm15,%r14
3620 addq %r13,%r10
3621 adcq %r14,%r11
3622 adcq $1,%r12
3623 movq 0+0(%rbp),%rax
3624 movq %rax,%r15
3625 mulq %r10
3626 movq %rax,%r13
3627 movq %rdx,%r14
3628 movq 0+0(%rbp),%rax
3629 mulq %r11
3630 imulq %r12,%r15
3631 addq %rax,%r14
3632 adcq %rdx,%r15
3633 movq 8+0(%rbp),%rax
3634 movq %rax,%r9
3635 mulq %r10
3636 addq %rax,%r14
3637 adcq $0,%rdx
3638 movq %rdx,%r10
3639 movq 8+0(%rbp),%rax
3640 mulq %r11
3641 addq %rax,%r15
3642 adcq $0,%rdx
3643 imulq %r12,%r9
3644 addq %r10,%r15
3645 adcq %rdx,%r9
3646 movq %r13,%r10
3647 movq %r14,%r11
3648 movq %r15,%r12
3649 andq $3,%r12
3650 movq %r15,%r13
3651 andq $-4,%r13
3652 movq %r9,%r14
3653 shrdq $2,%r9,%r15
3654 shrq $2,%r9
3655 addq %r13,%r10
3656 adcq %r14,%r11
3657 adcq $0,%r12
3658 addq %r15,%r10
3659 adcq %r9,%r11
3660 adcq $0,%r12
3661
3662 seal_sse_finalize:
3663 addq 32(%rbp),%r10
3664 adcq 8+32(%rbp),%r11
3665 adcq $1,%r12
3666 movq 0+0(%rbp),%rax
3667 movq %rax,%r15
3668 mulq %r10
3669 movq %rax,%r13
3670 movq %rdx,%r14
3671 movq 0+0(%rbp),%rax
3672 mulq %r11
3673 imulq %r12,%r15
3674 addq %rax,%r14
3675 adcq %rdx,%r15
3676 movq 8+0(%rbp),%rax
3677 movq %rax,%r9
3678 mulq %r10
3679 addq %rax,%r14
3680 adcq $0,%rdx
3681 movq %rdx,%r10
3682 movq 8+0(%rbp),%rax
3683 mulq %r11
3684 addq %rax,%r15
3685 adcq $0,%rdx
3686 imulq %r12,%r9
3687 addq %r10,%r15
3688 adcq %rdx,%r9
3689 movq %r13,%r10
3690 movq %r14,%r11
3691 movq %r15,%r12
3692 andq $3,%r12
3693 movq %r15,%r13
3694 andq $-4,%r13
3695 movq %r9,%r14
3696 shrdq $2,%r9,%r15
3697 shrq $2,%r9
3698 addq %r13,%r10
3699 adcq %r14,%r11
3700 adcq $0,%r12
3701 addq %r15,%r10
3702 adcq %r9,%r11
3703 adcq $0,%r12
3704
3705
3706 movq %r10,%r13
3707 movq %r11,%r14
3708 movq %r12,%r15
3709 subq $-5,%r10
3710 sbbq $-1,%r11
3711 sbbq $3,%r12
3712 cmovcq %r13,%r10
3713 cmovcq %r14,%r11
3714 cmovcq %r15,%r12
3715
3716 addq 0+16(%rbp),%r10
3717 adcq 8+16(%rbp),%r11
3718
3719 addq $288 + 32,%rsp
3720 .cfi_adjust_cfa_offset -(288 + 32)
3721 popq %r9
3722 .cfi_adjust_cfa_offset -8
3723 movq %r10,0(%r9)
3724 movq %r11,8(%r9)
3725
3726 popq %r15
3727 .cfi_adjust_cfa_offset -8
3728 popq %r14
3729 .cfi_adjust_cfa_offset -8
3730 popq %r13
3731 .cfi_adjust_cfa_offset -8
3732 popq %r12
3733 .cfi_adjust_cfa_offset -8
3734 popq %rbx
3735 .cfi_adjust_cfa_offset -8
3736 popq %rbp
3737 .cfi_adjust_cfa_offset -8
3738 .byte 0xf3,0xc3
3739 .cfi_adjust_cfa_offset (8 * 6) + 288 + 32
3740
3741 seal_sse_128:
3742 movdqu .chacha20_consts(%rip),%xmm0
3743 movdqa %xmm0,%xmm1
3744 movdqa %xmm0,%xmm2
3745 movdqu 0(%r9),%xmm4
3746 movdqa %xmm4,%xmm5
3747 movdqa %xmm4,%xmm6
3748 movdqu 16(%r9),%xmm8
3749 movdqa %xmm8,%xmm9
3750 movdqa %xmm8,%xmm10
3751 movdqu 32(%r9),%xmm14
3752 movdqa %xmm14,%xmm12
3753 paddd .sse_inc(%rip),%xmm12
3754 movdqa %xmm12,%xmm13
3755 paddd .sse_inc(%rip),%xmm13
3756 movdqa %xmm4,%xmm7
3757 movdqa %xmm8,%xmm11
3758 movdqa %xmm12,%xmm15
3759 movq $10,%r10
3760 1:
3761 paddd %xmm4,%xmm0
3762 pxor %xmm0,%xmm12
3763 pshufb .rol16(%rip),%xmm12
3764 paddd %xmm12,%xmm8
3765 pxor %xmm8,%xmm4
3766 movdqa %xmm4,%xmm3
3767 pslld $12,%xmm3
3768 psrld $20,%xmm4
3769 pxor %xmm3,%xmm4
3770 paddd %xmm4,%xmm0
3771 pxor %xmm0,%xmm12
3772 pshufb .rol8(%rip),%xmm12
3773 paddd %xmm12,%xmm8
3774 pxor %xmm8,%xmm4
3775 movdqa %xmm4,%xmm3
3776 pslld $7,%xmm3
3777 psrld $25,%xmm4
3778 pxor %xmm3,%xmm4
3779 .byte 102,15,58,15,228,4
3780 .byte 102,69,15,58,15,192,8
3781 .byte 102,69,15,58,15,228,12
3782 paddd %xmm5,%xmm1
3783 pxor %xmm1,%xmm13
3784 pshufb .rol16(%rip),%xmm13
3785 paddd %xmm13,%xmm9
3786 pxor %xmm9,%xmm5
3787 movdqa %xmm5,%xmm3
3788 pslld $12,%xmm3
3789 psrld $20,%xmm5
3790 pxor %xmm3,%xmm5
3791 paddd %xmm5,%xmm1
3792 pxor %xmm1,%xmm13
3793 pshufb .rol8(%rip),%xmm13
3794 paddd %xmm13,%xmm9
3795 pxor %xmm9,%xmm5
3796 movdqa %xmm5,%xmm3
3797 pslld $7,%xmm3
3798 psrld $25,%xmm5
3799 pxor %xmm3,%xmm5
3800 .byte 102,15,58,15,237,4
3801 .byte 102,69,15,58,15,201,8
3802 .byte 102,69,15,58,15,237,12
3803 paddd %xmm6,%xmm2
3804 pxor %xmm2,%xmm14
3805 pshufb .rol16(%rip),%xmm14
3806 paddd %xmm14,%xmm10
3807 pxor %xmm10,%xmm6
3808 movdqa %xmm6,%xmm3
3809 pslld $12,%xmm3
3810 psrld $20,%xmm6
3811 pxor %xmm3,%xmm6
3812 paddd %xmm6,%xmm2
3813 pxor %xmm2,%xmm14
3814 pshufb .rol8(%rip),%xmm14
3815 paddd %xmm14,%xmm10
3816 pxor %xmm10,%xmm6
3817 movdqa %xmm6,%xmm3
3818 pslld $7,%xmm3
3819 psrld $25,%xmm6
3820 pxor %xmm3,%xmm6
3821 .byte 102,15,58,15,246,4
3822 .byte 102,69,15,58,15,210,8
3823 .byte 102,69,15,58,15,246,12
3824 paddd %xmm4,%xmm0
3825 pxor %xmm0,%xmm12
3826 pshufb .rol16(%rip),%xmm12
3827 paddd %xmm12,%xmm8
3828 pxor %xmm8,%xmm4
3829 movdqa %xmm4,%xmm3
3830 pslld $12,%xmm3
3831 psrld $20,%xmm4
3832 pxor %xmm3,%xmm4
3833 paddd %xmm4,%xmm0
3834 pxor %xmm0,%xmm12
3835 pshufb .rol8(%rip),%xmm12
3836 paddd %xmm12,%xmm8
3837 pxor %xmm8,%xmm4
3838 movdqa %xmm4,%xmm3
3839 pslld $7,%xmm3
3840 psrld $25,%xmm4
3841 pxor %xmm3,%xmm4
3842 .byte 102,15,58,15,228,12
3843 .byte 102,69,15,58,15,192,8
3844 .byte 102,69,15,58,15,228,4
3845 paddd %xmm5,%xmm1
3846 pxor %xmm1,%xmm13
3847 pshufb .rol16(%rip),%xmm13
3848 paddd %xmm13,%xmm9
3849 pxor %xmm9,%xmm5
3850 movdqa %xmm5,%xmm3
3851 pslld $12,%xmm3
3852 psrld $20,%xmm5
3853 pxor %xmm3,%xmm5
3854 paddd %xmm5,%xmm1
3855 pxor %xmm1,%xmm13
3856 pshufb .rol8(%rip),%xmm13
3857 paddd %xmm13,%xmm9
3858 pxor %xmm9,%xmm5
3859 movdqa %xmm5,%xmm3
3860 pslld $7,%xmm3
3861 psrld $25,%xmm5
3862 pxor %xmm3,%xmm5
3863 .byte 102,15,58,15,237,12
3864 .byte 102,69,15,58,15,201,8
3865 .byte 102,69,15,58,15,237,4
3866 paddd %xmm6,%xmm2
3867 pxor %xmm2,%xmm14
3868 pshufb .rol16(%rip),%xmm14
3869 paddd %xmm14,%xmm10
3870 pxor %xmm10,%xmm6
3871 movdqa %xmm6,%xmm3
3872 pslld $12,%xmm3
3873 psrld $20,%xmm6
3874 pxor %xmm3,%xmm6
3875 paddd %xmm6,%xmm2
3876 pxor %xmm2,%xmm14
3877 pshufb .rol8(%rip),%xmm14
3878 paddd %xmm14,%xmm10
3879 pxor %xmm10,%xmm6
3880 movdqa %xmm6,%xmm3
3881 pslld $7,%xmm3
3882 psrld $25,%xmm6
3883 pxor %xmm3,%xmm6
3884 .byte 102,15,58,15,246,12
3885 .byte 102,69,15,58,15,210,8
3886 .byte 102,69,15,58,15,246,4
3887
3888 decq %r10
3889 jnz 1b
3890 paddd .chacha20_consts(%rip),%xmm0
3891 paddd .chacha20_consts(%rip),%xmm1
3892 paddd .chacha20_consts(%rip),%xmm2
3893 paddd %xmm7,%xmm4
3894 paddd %xmm7,%xmm5
3895 paddd %xmm7,%xmm6
3896 paddd %xmm11,%xmm8
3897 paddd %xmm11,%xmm9
3898 paddd %xmm15,%xmm12
3899 paddd .sse_inc(%rip),%xmm15
3900 paddd %xmm15,%xmm13
3901
3902 pand .clamp(%rip),%xmm2
3903 movdqa %xmm2,0(%rbp)
3904 movdqa %xmm6,16(%rbp)
3905
3906 movq %r8,%r8
3907 call poly_hash_ad_internal
3908 jmp seal_sse_128_seal
3909 .size chacha20_poly1305_seal, .-chacha20_poly1305_seal
3910
3911
3912 .type chacha20_poly1305_open_avx2,@function
3913 .align 64
3914 chacha20_poly1305_open_avx2:
3915 vzeroupper
3916 vmovdqa .chacha20_consts(%rip),%ymm0
3917 vbroadcasti128 0(%r9),%ymm4
3918 vbroadcasti128 16(%r9),%ymm8
3919 vbroadcasti128 32(%r9),%ymm12
3920 vpaddd .avx2_init(%rip),%ymm12,%ymm12
3921 cmpq $192,%rbx
3922 jbe open_avx2_192
3923 cmpq $320,%rbx
3924 jbe open_avx2_320
3925
3926 vmovdqa %ymm4,64(%rbp)
3927 vmovdqa %ymm8,96(%rbp)
3928 vmovdqa %ymm12,160(%rbp)
3929 movq $10,%r10
3930 1:
3931 vpaddd %ymm4,%ymm0,%ymm0
3932 vpxor %ymm0,%ymm12,%ymm12
3933 vpshufb .rol16(%rip),%ymm12,%ymm12
3934 vpaddd %ymm12,%ymm8,%ymm8
3935 vpxor %ymm8,%ymm4,%ymm4
3936 vpsrld $20,%ymm4,%ymm3
3937 vpslld $12,%ymm4,%ymm4
3938 vpxor %ymm3,%ymm4,%ymm4
3939 vpaddd %ymm4,%ymm0,%ymm0
3940 vpxor %ymm0,%ymm12,%ymm12
3941 vpshufb .rol8(%rip),%ymm12,%ymm12
3942 vpaddd %ymm12,%ymm8,%ymm8
3943 vpxor %ymm8,%ymm4,%ymm4
3944 vpslld $7,%ymm4,%ymm3
3945 vpsrld $25,%ymm4,%ymm4
3946 vpxor %ymm3,%ymm4,%ymm4
3947 vpalignr $12,%ymm12,%ymm12,%ymm12
3948 vpalignr $8,%ymm8,%ymm8,%ymm8
3949 vpalignr $4,%ymm4,%ymm4,%ymm4
3950 vpaddd %ymm4,%ymm0,%ymm0
3951 vpxor %ymm0,%ymm12,%ymm12
3952 vpshufb .rol16(%rip),%ymm12,%ymm12
3953 vpaddd %ymm12,%ymm8,%ymm8
3954 vpxor %ymm8,%ymm4,%ymm4
3955 vpsrld $20,%ymm4,%ymm3
3956 vpslld $12,%ymm4,%ymm4
3957 vpxor %ymm3,%ymm4,%ymm4
3958 vpaddd %ymm4,%ymm0,%ymm0
3959 vpxor %ymm0,%ymm12,%ymm12
3960 vpshufb .rol8(%rip),%ymm12,%ymm12
3961 vpaddd %ymm12,%ymm8,%ymm8
3962 vpxor %ymm8,%ymm4,%ymm4
3963 vpslld $7,%ymm4,%ymm3
3964 vpsrld $25,%ymm4,%ymm4
3965 vpxor %ymm3,%ymm4,%ymm4
3966 vpalignr $4,%ymm12,%ymm12,%ymm12
3967 vpalignr $8,%ymm8,%ymm8,%ymm8
3968 vpalignr $12,%ymm4,%ymm4,%ymm4
3969
3970 decq %r10
3971 jne 1b
3972 vpaddd .chacha20_consts(%rip),%ymm0,%ymm0
3973 vpaddd 64(%rbp),%ymm4,%ymm4
3974 vpaddd 96(%rbp),%ymm8,%ymm8
3975 vpaddd 160(%rbp),%ymm12,%ymm12
3976
3977 vperm2i128 $0x02,%ymm0,%ymm4,%ymm3
3978
3979 vpand .clamp(%rip),%ymm3,%ymm3
3980 vmovdqa %ymm3,0(%rbp)
3981
3982 vperm2i128 $0x13,%ymm0,%ymm4,%ymm0
3983 vperm2i128 $0x13,%ymm8,%ymm12,%ymm4
3984
3985 movq %r8,%r8
3986 call poly_hash_ad_internal
3987 xorq %rcx,%rcx
3988
3989 1:
3990 addq 0(%rsi,%rcx), %r10
3991 adcq 8+0(%rsi,%rcx), %r11
3992 adcq $1,%r12
3993 movq 0+0(%rbp),%rax
3994 movq %rax,%r15
3995 mulq %r10
3996 movq %rax,%r13
3997 movq %rdx,%r14
3998 movq 0+0(%rbp),%rax
3999 mulq %r11
4000 imulq %r12,%r15
4001 addq %rax,%r14
4002 adcq %rdx,%r15
4003 movq 8+0(%rbp),%rax
4004 movq %rax,%r9
4005 mulq %r10
4006 addq %rax,%r14
4007 adcq $0,%rdx
4008 movq %rdx,%r10
4009 movq 8+0(%rbp),%rax
4010 mulq %r11
4011 addq %rax,%r15
4012 adcq $0,%rdx
4013 imulq %r12,%r9
4014 addq %r10,%r15
4015 adcq %rdx,%r9
4016 movq %r13,%r10
4017 movq %r14,%r11
4018 movq %r15,%r12
4019 andq $3,%r12
4020 movq %r15,%r13
4021 andq $-4,%r13
4022 movq %r9,%r14
4023 shrdq $2,%r9,%r15
4024 shrq $2,%r9
4025 addq %r13,%r10
4026 adcq %r14,%r11
4027 adcq $0,%r12
4028 addq %r15,%r10
4029 adcq %r9,%r11
4030 adcq $0,%r12
4031
4032 addq $16,%rcx
4033 cmpq $64,%rcx
4034 jne 1b
4035
4036 vpxor 0(%rsi),%ymm0,%ymm0
4037 vpxor 32(%rsi),%ymm4,%ymm4
4038 vmovdqu %ymm0,0(%rdi)
4039 vmovdqu %ymm4,32(%rdi)
4040 leaq 64(%rsi),%rsi
4041 leaq 64(%rdi),%rdi
4042 subq $64,%rbx
4043 1:
4044
4045 cmpq $512,%rbx
4046 jb 3f
4047 vmovdqa .chacha20_consts(%rip),%ymm0
4048 vmovdqa 64(%rbp),%ymm4
4049 vmovdqa 96(%rbp),%ymm8
4050 vmovdqa %ymm0,%ymm1
4051 vmovdqa %ymm4,%ymm5
4052 vmovdqa %ymm8,%ymm9
4053 vmovdqa %ymm0,%ymm2
4054 vmovdqa %ymm4,%ymm6
4055 vmovdqa %ymm8,%ymm10
4056 vmovdqa %ymm0,%ymm3
4057 vmovdqa %ymm4,%ymm7
4058 vmovdqa %ymm8,%ymm11
4059 vmovdqa .avx2_inc(%rip),%ymm12
4060 vpaddd 160(%rbp),%ymm12,%ymm15
4061 vpaddd %ymm15,%ymm12,%ymm14
4062 vpaddd %ymm14,%ymm12,%ymm13
4063 vpaddd %ymm13,%ymm12,%ymm12
4064 vmovdqa %ymm15,256(%rbp)
4065 vmovdqa %ymm14,224(%rbp)
4066 vmovdqa %ymm13,192(%rbp)
4067 vmovdqa %ymm12,160(%rbp)
4068
4069 xorq %rcx,%rcx
4070 2:
4071 addq 0*8(%rsi,%rcx), %r10
4072 adcq 8+0*8(%rsi,%rcx), %r11
4073 adcq $1,%r12
4074 vmovdqa %ymm8,128(%rbp)
4075 vmovdqa .rol16(%rip),%ymm8
4076 vpaddd %ymm7,%ymm3,%ymm3
4077 vpaddd %ymm6,%ymm2,%ymm2
4078 vpaddd %ymm5,%ymm1,%ymm1
4079 vpaddd %ymm4,%ymm0,%ymm0
4080 vpxor %ymm3,%ymm15,%ymm15
4081 vpxor %ymm2,%ymm14,%ymm14
4082 vpxor %ymm1,%ymm13,%ymm13
4083 vpxor %ymm0,%ymm12,%ymm12
4084 movq 0+0(%rbp),%rdx
4085 movq %rdx,%r15
4086 mulxq %r10,%r13,%r14
4087 mulxq %r11,%rax,%rdx
4088 imulq %r12,%r15
4089 addq %rax,%r14
4090 adcq %rdx,%r15
4091 vpshufb %ymm8,%ymm15,%ymm15
4092 vpshufb %ymm8,%ymm14,%ymm14
4093 vpshufb %ymm8,%ymm13,%ymm13
4094 vpshufb %ymm8,%ymm12,%ymm12
4095 vmovdqa 128(%rbp),%ymm8
4096 vpaddd %ymm15,%ymm11,%ymm11
4097 vpaddd %ymm14,%ymm10,%ymm10
4098 vpaddd %ymm13,%ymm9,%ymm9
4099 vpaddd %ymm12,%ymm8,%ymm8
4100 movq 8+0(%rbp),%rdx
4101 mulxq %r10,%r10,%rax
4102 addq %r10,%r14
4103 mulxq %r11,%r11,%r9
4104 adcq %r11,%r15
4105 adcq $0,%r9
4106 imulq %r12,%rdx
4107 vpxor %ymm11,%ymm7,%ymm7
4108 vpxor %ymm10,%ymm6,%ymm6
4109 vpxor %ymm9,%ymm5,%ymm5
4110 vpxor %ymm8,%ymm4,%ymm4
4111 vmovdqa %ymm8,128(%rbp)
4112 vpsrld $20,%ymm7,%ymm8
4113 vpslld $32-20,%ymm7,%ymm7
4114 vpxor %ymm8,%ymm7,%ymm7
4115 vpsrld $20,%ymm6,%ymm8
4116 vpslld $32-20,%ymm6,%ymm6
4117 vpxor %ymm8,%ymm6,%ymm6
4118 vpsrld $20,%ymm5,%ymm8
4119 addq %rax,%r15
4120 adcq %rdx,%r9
4121 vpslld $32-20,%ymm5,%ymm5
4122 vpxor %ymm8,%ymm5,%ymm5
4123 vpsrld $20,%ymm4,%ymm8
4124 vpslld $32-20,%ymm4,%ymm4
4125 vpxor %ymm8,%ymm4,%ymm4
4126 vmovdqa .rol8(%rip),%ymm8
4127 vpaddd %ymm7,%ymm3,%ymm3
4128 vpaddd %ymm6,%ymm2,%ymm2
4129 vpaddd %ymm5,%ymm1,%ymm1
4130 vpaddd %ymm4,%ymm0,%ymm0
4131 movq %r13,%r10
4132 movq %r14,%r11
4133 movq %r15,%r12
4134 andq $3,%r12
4135 movq %r15,%r13
4136 andq $-4,%r13
4137 movq %r9,%r14
4138 shrdq $2,%r9,%r15
4139 shrq $2,%r9
4140 addq %r13,%r10
4141 adcq %r14,%r11
4142 adcq $0,%r12
4143 addq %r15,%r10
4144 adcq %r9,%r11
4145 adcq $0,%r12
4146 vpxor %ymm3,%ymm15,%ymm15
4147 vpxor %ymm2,%ymm14,%ymm14
4148 vpxor %ymm1,%ymm13,%ymm13
4149 vpxor %ymm0,%ymm12,%ymm12
4150 vpshufb %ymm8,%ymm15,%ymm15
4151 vpshufb %ymm8,%ymm14,%ymm14
4152 vpshufb %ymm8,%ymm13,%ymm13
4153 vpshufb %ymm8,%ymm12,%ymm12
4154 vmovdqa 128(%rbp),%ymm8
4155 addq 2*8(%rsi,%rcx), %r10
4156 adcq 8+2*8(%rsi,%rcx), %r11
4157 adcq $1,%r12
4158 vpaddd %ymm15,%ymm11,%ymm11
4159 vpaddd %ymm14,%ymm10,%ymm10
4160 vpaddd %ymm13,%ymm9,%ymm9
4161 vpaddd %ymm12,%ymm8,%ymm8
4162 vpxor %ymm11,%ymm7,%ymm7
4163 vpxor %ymm10,%ymm6,%ymm6
4164 vpxor %ymm9,%ymm5,%ymm5
4165 vpxor %ymm8,%ymm4,%ymm4
4166 movq 0+0(%rbp),%rdx
4167 movq %rdx,%r15
4168 mulxq %r10,%r13,%r14
4169 mulxq %r11,%rax,%rdx
4170 imulq %r12,%r15
4171 addq %rax,%r14
4172 adcq %rdx,%r15
4173 vmovdqa %ymm8,128(%rbp)
4174 vpsrld $25,%ymm7,%ymm8
4175 vpslld $32-25,%ymm7,%ymm7
4176 vpxor %ymm8,%ymm7,%ymm7
4177 vpsrld $25,%ymm6,%ymm8
4178 vpslld $32-25,%ymm6,%ymm6
4179 vpxor %ymm8,%ymm6,%ymm6
4180 vpsrld $25,%ymm5,%ymm8
4181 vpslld $32-25,%ymm5,%ymm5
4182 vpxor %ymm8,%ymm5,%ymm5
4183 vpsrld $25,%ymm4,%ymm8
4184 vpslld $32-25,%ymm4,%ymm4
4185 vpxor %ymm8,%ymm4,%ymm4
4186 vmovdqa 128(%rbp),%ymm8
4187 vpalignr $4,%ymm7,%ymm7,%ymm7
4188 vpalignr $8,%ymm11,%ymm11,%ymm11
4189 vpalignr $12,%ymm15,%ymm15,%ymm15
4190 vpalignr $4,%ymm6,%ymm6,%ymm6
4191 movq 8+0(%rbp),%rdx
4192 mulxq %r10,%r10,%rax
4193 addq %r10,%r14
4194 mulxq %r11,%r11,%r9
4195 adcq %r11,%r15
4196 adcq $0,%r9
4197 imulq %r12,%rdx
4198 vpalignr $8,%ymm10,%ymm10,%ymm10
4199 vpalignr $12,%ymm14,%ymm14,%ymm14
4200 vpalignr $4,%ymm5,%ymm5,%ymm5
4201 vpalignr $8,%ymm9,%ymm9,%ymm9
4202 vpalignr $12,%ymm13,%ymm13,%ymm13
4203 vpalignr $4,%ymm4,%ymm4,%ymm4
4204 vpalignr $8,%ymm8,%ymm8,%ymm8
4205 vpalignr $12,%ymm12,%ymm12,%ymm12
4206 vmovdqa %ymm8,128(%rbp)
4207 vmovdqa .rol16(%rip),%ymm8
4208 vpaddd %ymm7,%ymm3,%ymm3
4209 vpaddd %ymm6,%ymm2,%ymm2
4210 vpaddd %ymm5,%ymm1,%ymm1
4211 vpaddd %ymm4,%ymm0,%ymm0
4212 vpxor %ymm3,%ymm15,%ymm15
4213 vpxor %ymm2,%ymm14,%ymm14
4214 vpxor %ymm1,%ymm13,%ymm13
4215 vpxor %ymm0,%ymm12,%ymm12
4216 addq %rax,%r15
4217 adcq %rdx,%r9
4218 vpshufb %ymm8,%ymm15,%ymm15
4219 vpshufb %ymm8,%ymm14,%ymm14
4220 vpshufb %ymm8,%ymm13,%ymm13
4221 vpshufb %ymm8,%ymm12,%ymm12
4222 vmovdqa 128(%rbp),%ymm8
4223 vpaddd %ymm15,%ymm11,%ymm11
4224 vpaddd %ymm14,%ymm10,%ymm10
4225 vpaddd %ymm13,%ymm9,%ymm9
4226 vpaddd %ymm12,%ymm8,%ymm8
4227 movq %r13,%r10
4228 movq %r14,%r11
4229 movq %r15,%r12
4230 andq $3,%r12
4231 movq %r15,%r13
4232 andq $-4,%r13
4233 movq %r9,%r14
4234 shrdq $2,%r9,%r15
4235 shrq $2,%r9
4236 addq %r13,%r10
4237 adcq %r14,%r11
4238 adcq $0,%r12
4239 addq %r15,%r10
4240 adcq %r9,%r11
4241 adcq $0,%r12
4242 vpxor %ymm11,%ymm7,%ymm7
4243 vpxor %ymm10,%ymm6,%ymm6
4244 vpxor %ymm9,%ymm5,%ymm5
4245 vpxor %ymm8,%ymm4,%ymm4
4246 vmovdqa %ymm8,128(%rbp)
4247 vpsrld $20,%ymm7,%ymm8
4248 vpslld $32-20,%ymm7,%ymm7
4249 vpxor %ymm8,%ymm7,%ymm7
4250 addq 4*8(%rsi,%rcx), %r10
4251 adcq 8+4*8(%rsi,%rcx), %r11
4252 adcq $1,%r12
4253
4254 leaq 48(%rcx),%rcx
4255 vpsrld $20,%ymm6,%ymm8
4256 vpslld $32-20,%ymm6,%ymm6
4257 vpxor %ymm8,%ymm6,%ymm6
4258 vpsrld $20,%ymm5,%ymm8
4259 vpslld $32-20,%ymm5,%ymm5
4260 vpxor %ymm8,%ymm5,%ymm5
4261 vpsrld $20,%ymm4,%ymm8
4262 vpslld $32-20,%ymm4,%ymm4
4263 vpxor %ymm8,%ymm4,%ymm4
4264 vmovdqa .rol8(%rip),%ymm8
4265 vpaddd %ymm7,%ymm3,%ymm3
4266 vpaddd %ymm6,%ymm2,%ymm2
4267 vpaddd %ymm5,%ymm1,%ymm1
4268 vpaddd %ymm4,%ymm0,%ymm0
4269 vpxor %ymm3,%ymm15,%ymm15
4270 vpxor %ymm2,%ymm14,%ymm14
4271 vpxor %ymm1,%ymm13,%ymm13
4272 vpxor %ymm0,%ymm12,%ymm12
4273 movq 0+0(%rbp),%rdx
4274 movq %rdx,%r15
4275 mulxq %r10,%r13,%r14
4276 mulxq %r11,%rax,%rdx
4277 imulq %r12,%r15
4278 addq %rax,%r14
4279 adcq %rdx,%r15
4280 vpshufb %ymm8,%ymm15,%ymm15
4281 vpshufb %ymm8,%ymm14,%ymm14
4282 vpshufb %ymm8,%ymm13,%ymm13
4283 vpshufb %ymm8,%ymm12,%ymm12
4284 vmovdqa 128(%rbp),%ymm8
4285 vpaddd %ymm15,%ymm11,%ymm11
4286 vpaddd %ymm14,%ymm10,%ymm10
4287 vpaddd %ymm13,%ymm9,%ymm9
4288 movq 8+0(%rbp),%rdx
4289 mulxq %r10,%r10,%rax
4290 addq %r10,%r14
4291 mulxq %r11,%r11,%r9
4292 adcq %r11,%r15
4293 adcq $0,%r9
4294 imulq %r12,%rdx
4295 vpaddd %ymm12,%ymm8,%ymm8
4296 vpxor %ymm11,%ymm7,%ymm7
4297 vpxor %ymm10,%ymm6,%ymm6
4298 vpxor %ymm9,%ymm5,%ymm5
4299 vpxor %ymm8,%ymm4,%ymm4
4300 vmovdqa %ymm8,128(%rbp)
4301 vpsrld $25,%ymm7,%ymm8
4302 vpslld $32-25,%ymm7,%ymm7
4303 addq %rax,%r15
4304 adcq %rdx,%r9
4305 vpxor %ymm8,%ymm7,%ymm7
4306 vpsrld $25,%ymm6,%ymm8
4307 vpslld $32-25,%ymm6,%ymm6
4308 vpxor %ymm8,%ymm6,%ymm6
4309 vpsrld $25,%ymm5,%ymm8
4310 vpslld $32-25,%ymm5,%ymm5
4311 vpxor %ymm8,%ymm5,%ymm5
4312 vpsrld $25,%ymm4,%ymm8
4313 vpslld $32-25,%ymm4,%ymm4
4314 vpxor %ymm8,%ymm4,%ymm4
4315 vmovdqa 128(%rbp),%ymm8
4316 vpalignr $12,%ymm7,%ymm7,%ymm7
4317 vpalignr $8,%ymm11,%ymm11,%ymm11
4318 vpalignr $4,%ymm15,%ymm15,%ymm15
4319 vpalignr $12,%ymm6,%ymm6,%ymm6
4320 vpalignr $8,%ymm10,%ymm10,%ymm10
4321 vpalignr $4,%ymm14,%ymm14,%ymm14
4322 vpalignr $12,%ymm5,%ymm5,%ymm5
4323 movq %r13,%r10
4324 movq %r14,%r11
4325 movq %r15,%r12
4326 andq $3,%r12
4327 movq %r15,%r13
4328 andq $-4,%r13
4329 movq %r9,%r14
4330 shrdq $2,%r9,%r15
4331 shrq $2,%r9
4332 addq %r13,%r10
4333 adcq %r14,%r11
4334 adcq $0,%r12
4335 addq %r15,%r10
4336 adcq %r9,%r11
4337 adcq $0,%r12
4338 vpalignr $8,%ymm9,%ymm9,%ymm9
4339 vpalignr $4,%ymm13,%ymm13,%ymm13
4340 vpalignr $12,%ymm4,%ymm4,%ymm4
4341 vpalignr $8,%ymm8,%ymm8,%ymm8
4342 vpalignr $4,%ymm12,%ymm12,%ymm12
4343
4344 cmpq $60*8,%rcx
4345 jne 2b
4346 vpaddd .chacha20_consts(%rip),%ymm3,%ymm3
4347 vpaddd 64(%rbp),%ymm7,%ymm7
4348 vpaddd 96(%rbp),%ymm11,%ymm11
4349 vpaddd 256(%rbp),%ymm15,%ymm15
4350 vpaddd .chacha20_consts(%rip),%ymm2,%ymm2
4351 vpaddd 64(%rbp),%ymm6,%ymm6
4352 vpaddd 96(%rbp),%ymm10,%ymm10
4353 vpaddd 224(%rbp),%ymm14,%ymm14
4354 vpaddd .chacha20_consts(%rip),%ymm1,%ymm1
4355 vpaddd 64(%rbp),%ymm5,%ymm5
4356 vpaddd 96(%rbp),%ymm9,%ymm9
4357 vpaddd 192(%rbp),%ymm13,%ymm13
4358 vpaddd .chacha20_consts(%rip),%ymm0,%ymm0
4359 vpaddd 64(%rbp),%ymm4,%ymm4
4360 vpaddd 96(%rbp),%ymm8,%ymm8
4361 vpaddd 160(%rbp),%ymm12,%ymm12
4362
4363 vmovdqa %ymm0,128(%rbp)
4364 addq 60*8(%rsi),%r10
4365 adcq 8+60*8(%rsi),%r11
4366 adcq $1,%r12
4367 vperm2i128 $0x02,%ymm3,%ymm7,%ymm0
4368 vperm2i128 $0x13,%ymm3,%ymm7,%ymm7
4369 vperm2i128 $0x02,%ymm11,%ymm15,%ymm3
4370 vperm2i128 $0x13,%ymm11,%ymm15,%ymm11
4371 vpxor 0+0(%rsi),%ymm0,%ymm0
4372 vpxor 32+0(%rsi),%ymm3,%ymm3
4373 vpxor 64+0(%rsi),%ymm7,%ymm7
4374 vpxor 96+0(%rsi),%ymm11,%ymm11
4375 vmovdqu %ymm0,0+0(%rdi)
4376 vmovdqu %ymm3,32+0(%rdi)
4377 vmovdqu %ymm7,64+0(%rdi)
4378 vmovdqu %ymm11,96+0(%rdi)
4379
4380 vmovdqa 128(%rbp),%ymm0
4381 movq 0+0(%rbp),%rax
4382 movq %rax,%r15
4383 mulq %r10
4384 movq %rax,%r13
4385 movq %rdx,%r14
4386 movq 0+0(%rbp),%rax
4387 mulq %r11
4388 imulq %r12,%r15
4389 addq %rax,%r14
4390 adcq %rdx,%r15
4391 movq 8+0(%rbp),%rax
4392 movq %rax,%r9
4393 mulq %r10
4394 addq %rax,%r14
4395 adcq $0,%rdx
4396 movq %rdx,%r10
4397 movq 8+0(%rbp),%rax
4398 mulq %r11
4399 addq %rax,%r15
4400 adcq $0,%rdx
4401 imulq %r12,%r9
4402 addq %r10,%r15
4403 adcq %rdx,%r9
4404 movq %r13,%r10
4405 movq %r14,%r11
4406 movq %r15,%r12
4407 andq $3,%r12
4408 movq %r15,%r13
4409 andq $-4,%r13
4410 movq %r9,%r14
4411 shrdq $2,%r9,%r15
4412 shrq $2,%r9
4413 addq %r13,%r10
4414 adcq %r14,%r11
4415 adcq $0,%r12
4416 addq %r15,%r10
4417 adcq %r9,%r11
4418 adcq $0,%r12
4419 vperm2i128 $0x02,%ymm2,%ymm6,%ymm3
4420 vperm2i128 $0x13,%ymm2,%ymm6,%ymm6
4421 vperm2i128 $0x02,%ymm10,%ymm14,%ymm2
4422 vperm2i128 $0x13,%ymm10,%ymm14,%ymm10
4423 vpxor 0+128(%rsi),%ymm3,%ymm3
4424 vpxor 32+128(%rsi),%ymm2,%ymm2
4425 vpxor 64+128(%rsi),%ymm6,%ymm6
4426 vpxor 96+128(%rsi),%ymm10,%ymm10
4427 vmovdqu %ymm3,0+128(%rdi)
4428 vmovdqu %ymm2,32+128(%rdi)
4429 vmovdqu %ymm6,64+128(%rdi)
4430 vmovdqu %ymm10,96+128(%rdi)
4431 addq 60*8+16(%rsi),%r10
4432 adcq 8+60*8+16(%rsi),%r11
4433 adcq $1,%r12
4434 vperm2i128 $0x02,%ymm1,%ymm5,%ymm3
4435 vperm2i128 $0x13,%ymm1,%ymm5,%ymm5
4436 vperm2i128 $0x02,%ymm9,%ymm13,%ymm1
4437 vperm2i128 $0x13,%ymm9,%ymm13,%ymm9
4438 vpxor 0+256(%rsi),%ymm3,%ymm3
4439 vpxor 32+256(%rsi),%ymm1,%ymm1
4440 vpxor 64+256(%rsi),%ymm5,%ymm5
4441 vpxor 96+256(%rsi),%ymm9,%ymm9
4442 vmovdqu %ymm3,0+256(%rdi)
4443 vmovdqu %ymm1,32+256(%rdi)
4444 vmovdqu %ymm5,64+256(%rdi)
4445 vmovdqu %ymm9,96+256(%rdi)
4446 movq 0+0(%rbp),%rax
4447 movq %rax,%r15
4448 mulq %r10
4449 movq %rax,%r13
4450 movq %rdx,%r14
4451 movq 0+0(%rbp),%rax
4452 mulq %r11
4453 imulq %r12,%r15
4454 addq %rax,%r14
4455 adcq %rdx,%r15
4456 movq 8+0(%rbp),%rax
4457 movq %rax,%r9
4458 mulq %r10
4459 addq %rax,%r14
4460 adcq $0,%rdx
4461 movq %rdx,%r10
4462 movq 8+0(%rbp),%rax
4463 mulq %r11
4464 addq %rax,%r15
4465 adcq $0,%rdx
4466 imulq %r12,%r9
4467 addq %r10,%r15
4468 adcq %rdx,%r9
4469 movq %r13,%r10
4470 movq %r14,%r11
4471 movq %r15,%r12
4472 andq $3,%r12
4473 movq %r15,%r13
4474 andq $-4,%r13
4475 movq %r9,%r14
4476 shrdq $2,%r9,%r15
4477 shrq $2,%r9
4478 addq %r13,%r10
4479 adcq %r14,%r11
4480 adcq $0,%r12
4481 addq %r15,%r10
4482 adcq %r9,%r11
4483 adcq $0,%r12
4484 vperm2i128 $0x02,%ymm0,%ymm4,%ymm3
4485 vperm2i128 $0x13,%ymm0,%ymm4,%ymm4
4486 vperm2i128 $0x02,%ymm8,%ymm12,%ymm0
4487 vperm2i128 $0x13,%ymm8,%ymm12,%ymm8
4488 vpxor 0+384(%rsi),%ymm3,%ymm3
4489 vpxor 32+384(%rsi),%ymm0,%ymm0
4490 vpxor 64+384(%rsi),%ymm4,%ymm4
4491 vpxor 96+384(%rsi),%ymm8,%ymm8
4492 vmovdqu %ymm3,0+384(%rdi)
4493 vmovdqu %ymm0,32+384(%rdi)
4494 vmovdqu %ymm4,64+384(%rdi)
4495 vmovdqu %ymm8,96+384(%rdi)
4496
4497 leaq 512(%rsi),%rsi
4498 leaq 512(%rdi),%rdi
4499 subq $512,%rbx
4500 jmp 1b
4501 3:
4502 testq %rbx,%rbx
4503 vzeroupper
4504 je open_sse_finalize
4505 3:
4506 cmpq $128,%rbx
4507 ja 3f
4508 vmovdqa .chacha20_consts(%rip),%ymm0
4509 vmovdqa 64(%rbp),%ymm4
4510 vmovdqa 96(%rbp),%ymm8
4511 vmovdqa .avx2_inc(%rip),%ymm12
4512 vpaddd 160(%rbp),%ymm12,%ymm12
4513 vmovdqa %ymm12,160(%rbp)
4514
4515 xorq %r8,%r8
4516 movq %rbx,%rcx
4517 andq $-16,%rcx
4518 testq %rcx,%rcx
4519 je 2f
4520 1:
4521 addq 0*8(%rsi,%r8), %r10
4522 adcq 8+0*8(%rsi,%r8), %r11
4523 adcq $1,%r12
4524 movq 0+0(%rbp),%rax
4525 movq %rax,%r15
4526 mulq %r10
4527 movq %rax,%r13
4528 movq %rdx,%r14
4529 movq 0+0(%rbp),%rax
4530 mulq %r11
4531 imulq %r12,%r15
4532 addq %rax,%r14
4533 adcq %rdx,%r15
4534 movq 8+0(%rbp),%rax
4535 movq %rax,%r9
4536 mulq %r10
4537 addq %rax,%r14
4538 adcq $0,%rdx
4539 movq %rdx,%r10
4540 movq 8+0(%rbp),%rax
4541 mulq %r11
4542 addq %rax,%r15
4543 adcq $0,%rdx
4544 imulq %r12,%r9
4545 addq %r10,%r15
4546 adcq %rdx,%r9
4547 movq %r13,%r10
4548 movq %r14,%r11
4549 movq %r15,%r12
4550 andq $3,%r12
4551 movq %r15,%r13
4552 andq $-4,%r13
4553 movq %r9,%r14
4554 shrdq $2,%r9,%r15
4555 shrq $2,%r9
4556 addq %r13,%r10
4557 adcq %r14,%r11
4558 adcq $0,%r12
4559 addq %r15,%r10
4560 adcq %r9,%r11
4561 adcq $0,%r12
4562
4563 2:
4564 addq $16,%r8
4565 vpaddd %ymm4,%ymm0,%ymm0
4566 vpxor %ymm0,%ymm12,%ymm12
4567 vpshufb .rol16(%rip),%ymm12,%ymm12
4568 vpaddd %ymm12,%ymm8,%ymm8
4569 vpxor %ymm8,%ymm4,%ymm4
4570 vpsrld $20,%ymm4,%ymm3
4571 vpslld $12,%ymm4,%ymm4
4572 vpxor %ymm3,%ymm4,%ymm4
4573 vpaddd %ymm4,%ymm0,%ymm0
4574 vpxor %ymm0,%ymm12,%ymm12
4575 vpshufb .rol8(%rip),%ymm12,%ymm12
4576 vpaddd %ymm12,%ymm8,%ymm8
4577 vpxor %ymm8,%ymm4,%ymm4
4578 vpslld $7,%ymm4,%ymm3
4579 vpsrld $25,%ymm4,%ymm4
4580 vpxor %ymm3,%ymm4,%ymm4
4581 vpalignr $12,%ymm12,%ymm12,%ymm12
4582 vpalignr $8,%ymm8,%ymm8,%ymm8
4583 vpalignr $4,%ymm4,%ymm4,%ymm4
4584 vpaddd %ymm4,%ymm0,%ymm0
4585 vpxor %ymm0,%ymm12,%ymm12
4586 vpshufb .rol16(%rip),%ymm12,%ymm12
4587 vpaddd %ymm12,%ymm8,%ymm8
4588 vpxor %ymm8,%ymm4,%ymm4
4589 vpsrld $20,%ymm4,%ymm3
4590 vpslld $12,%ymm4,%ymm4
4591 vpxor %ymm3,%ymm4,%ymm4
4592 vpaddd %ymm4,%ymm0,%ymm0
4593 vpxor %ymm0,%ymm12,%ymm12
4594 vpshufb .rol8(%rip),%ymm12,%ymm12
4595 vpaddd %ymm12,%ymm8,%ymm8
4596 vpxor %ymm8,%ymm4,%ymm4
4597 vpslld $7,%ymm4,%ymm3
4598 vpsrld $25,%ymm4,%ymm4
4599 vpxor %ymm3,%ymm4,%ymm4
4600 vpalignr $4,%ymm12,%ymm12,%ymm12
4601 vpalignr $8,%ymm8,%ymm8,%ymm8
4602 vpalignr $12,%ymm4,%ymm4,%ymm4
4603
4604 cmpq %rcx,%r8
4605 jb 1b
4606 cmpq $160,%r8
4607 jne 2b
4608 vpaddd .chacha20_consts(%rip),%ymm0,%ymm0
4609 vpaddd 64(%rbp),%ymm4,%ymm4
4610 vpaddd 96(%rbp),%ymm8,%ymm8
4611 vpaddd 160(%rbp),%ymm12,%ymm12
4612 vperm2i128 $0x13,%ymm0,%ymm4,%ymm3
4613 vperm2i128 $0x02,%ymm0,%ymm4,%ymm0
4614 vperm2i128 $0x02,%ymm8,%ymm12,%ymm4
4615 vperm2i128 $0x13,%ymm8,%ymm12,%ymm12
4616 vmovdqa %ymm3,%ymm8
4617
4618 jmp open_avx2_tail_loop
4619 3:
4620 cmpq $256,%rbx
4621 ja 3f
4622 vmovdqa .chacha20_consts(%rip),%ymm0
4623 vmovdqa 64(%rbp),%ymm4
4624 vmovdqa 96(%rbp),%ymm8
4625 vmovdqa %ymm0,%ymm1
4626 vmovdqa %ymm4,%ymm5
4627 vmovdqa %ymm8,%ymm9
4628 vmovdqa .avx2_inc(%rip),%ymm12
4629 vpaddd 160(%rbp),%ymm12,%ymm13
4630 vpaddd %ymm13,%ymm12,%ymm12
4631 vmovdqa %ymm12,160(%rbp)
4632 vmovdqa %ymm13,192(%rbp)
4633
4634 movq %rbx,128(%rbp)
4635 movq %rbx,%rcx
4636 subq $128,%rcx
4637 shrq $4,%rcx
4638 movq $10,%r8
4639 cmpq $10,%rcx
4640 cmovgq %r8,%rcx
4641 movq %rsi,%rbx
4642 xorq %r8,%r8
4643 1:
4644 addq 0(%rbx),%r10
4645 adcq 8+0(%rbx),%r11
4646 adcq $1,%r12
4647 movq 0+0(%rbp),%rdx
4648 movq %rdx,%r15
4649 mulxq %r10,%r13,%r14
4650 mulxq %r11,%rax,%rdx
4651 imulq %r12,%r15
4652 addq %rax,%r14
4653 adcq %rdx,%r15
4654 movq 8+0(%rbp),%rdx
4655 mulxq %r10,%r10,%rax
4656 addq %r10,%r14
4657 mulxq %r11,%r11,%r9
4658 adcq %r11,%r15
4659 adcq $0,%r9
4660 imulq %r12,%rdx
4661 addq %rax,%r15
4662 adcq %rdx,%r9
4663 movq %r13,%r10
4664 movq %r14,%r11
4665 movq %r15,%r12
4666 andq $3,%r12
4667 movq %r15,%r13
4668 andq $-4,%r13
4669 movq %r9,%r14
4670 shrdq $2,%r9,%r15
4671 shrq $2,%r9
4672 addq %r13,%r10
4673 adcq %r14,%r11
4674 adcq $0,%r12
4675 addq %r15,%r10
4676 adcq %r9,%r11
4677 adcq $0,%r12
4678
4679 leaq 16(%rbx),%rbx
4680 2:
4681 vpaddd %ymm4,%ymm0,%ymm0
4682 vpxor %ymm0,%ymm12,%ymm12
4683 vpshufb .rol16(%rip),%ymm12,%ymm12
4684 vpaddd %ymm12,%ymm8,%ymm8
4685 vpxor %ymm8,%ymm4,%ymm4
4686 vpsrld $20,%ymm4,%ymm3
4687 vpslld $12,%ymm4,%ymm4
4688 vpxor %ymm3,%ymm4,%ymm4
4689 vpaddd %ymm4,%ymm0,%ymm0
4690 vpxor %ymm0,%ymm12,%ymm12
4691 vpshufb .rol8(%rip),%ymm12,%ymm12
4692 vpaddd %ymm12,%ymm8,%ymm8
4693 vpxor %ymm8,%ymm4,%ymm4
4694 vpslld $7,%ymm4,%ymm3
4695 vpsrld $25,%ymm4,%ymm4
4696 vpxor %ymm3,%ymm4,%ymm4
4697 vpalignr $12,%ymm12,%ymm12,%ymm12
4698 vpalignr $8,%ymm8,%ymm8,%ymm8
4699 vpalignr $4,%ymm4,%ymm4,%ymm4
4700 vpaddd %ymm5,%ymm1,%ymm1
4701 vpxor %ymm1,%ymm13,%ymm13
4702 vpshufb .rol16(%rip),%ymm13,%ymm13
4703 vpaddd %ymm13,%ymm9,%ymm9
4704 vpxor %ymm9,%ymm5,%ymm5
4705 vpsrld $20,%ymm5,%ymm3
4706 vpslld $12,%ymm5,%ymm5
4707 vpxor %ymm3,%ymm5,%ymm5
4708 vpaddd %ymm5,%ymm1,%ymm1
4709 vpxor %ymm1,%ymm13,%ymm13
4710 vpshufb .rol8(%rip),%ymm13,%ymm13
4711 vpaddd %ymm13,%ymm9,%ymm9
4712 vpxor %ymm9,%ymm5,%ymm5
4713 vpslld $7,%ymm5,%ymm3
4714 vpsrld $25,%ymm5,%ymm5
4715 vpxor %ymm3,%ymm5,%ymm5
4716 vpalignr $12,%ymm13,%ymm13,%ymm13
4717 vpalignr $8,%ymm9,%ymm9,%ymm9
4718 vpalignr $4,%ymm5,%ymm5,%ymm5
4719
4720 incq %r8
4721 vpaddd %ymm4,%ymm0,%ymm0
4722 vpxor %ymm0,%ymm12,%ymm12
4723 vpshufb .rol16(%rip),%ymm12,%ymm12
4724 vpaddd %ymm12,%ymm8,%ymm8
4725 vpxor %ymm8,%ymm4,%ymm4
4726 vpsrld $20,%ymm4,%ymm3
4727 vpslld $12,%ymm4,%ymm4
4728 vpxor %ymm3,%ymm4,%ymm4
4729 vpaddd %ymm4,%ymm0,%ymm0
4730 vpxor %ymm0,%ymm12,%ymm12
4731 vpshufb .rol8(%rip),%ymm12,%ymm12
4732 vpaddd %ymm12,%ymm8,%ymm8
4733 vpxor %ymm8,%ymm4,%ymm4
4734 vpslld $7,%ymm4,%ymm3
4735 vpsrld $25,%ymm4,%ymm4
4736 vpxor %ymm3,%ymm4,%ymm4
4737 vpalignr $4,%ymm12,%ymm12,%ymm12
4738 vpalignr $8,%ymm8,%ymm8,%ymm8
4739 vpalignr $12,%ymm4,%ymm4,%ymm4
4740 vpaddd %ymm5,%ymm1,%ymm1
4741 vpxor %ymm1,%ymm13,%ymm13
4742 vpshufb .rol16(%rip),%ymm13,%ymm13
4743 vpaddd %ymm13,%ymm9,%ymm9
4744 vpxor %ymm9,%ymm5,%ymm5
4745 vpsrld $20,%ymm5,%ymm3
4746 vpslld $12,%ymm5,%ymm5
4747 vpxor %ymm3,%ymm5,%ymm5
4748 vpaddd %ymm5,%ymm1,%ymm1
4749 vpxor %ymm1,%ymm13,%ymm13
4750 vpshufb .rol8(%rip),%ymm13,%ymm13
4751 vpaddd %ymm13,%ymm9,%ymm9
4752 vpxor %ymm9,%ymm5,%ymm5
4753 vpslld $7,%ymm5,%ymm3
4754 vpsrld $25,%ymm5,%ymm5
4755 vpxor %ymm3,%ymm5,%ymm5
4756 vpalignr $4,%ymm13,%ymm13,%ymm13
4757 vpalignr $8,%ymm9,%ymm9,%ymm9
4758 vpalignr $12,%ymm5,%ymm5,%ymm5
4759 vpaddd %ymm6,%ymm2,%ymm2
4760 vpxor %ymm2,%ymm14,%ymm14
4761 vpshufb .rol16(%rip),%ymm14,%ymm14
4762 vpaddd %ymm14,%ymm10,%ymm10
4763 vpxor %ymm10,%ymm6,%ymm6
4764 vpsrld $20,%ymm6,%ymm3
4765 vpslld $12,%ymm6,%ymm6
4766 vpxor %ymm3,%ymm6,%ymm6
4767 vpaddd %ymm6,%ymm2,%ymm2
4768 vpxor %ymm2,%ymm14,%ymm14
4769 vpshufb .rol8(%rip),%ymm14,%ymm14
4770 vpaddd %ymm14,%ymm10,%ymm10
4771 vpxor %ymm10,%ymm6,%ymm6
4772 vpslld $7,%ymm6,%ymm3
4773 vpsrld $25,%ymm6,%ymm6
4774 vpxor %ymm3,%ymm6,%ymm6
4775 vpalignr $4,%ymm14,%ymm14,%ymm14
4776 vpalignr $8,%ymm10,%ymm10,%ymm10
4777 vpalignr $12,%ymm6,%ymm6,%ymm6
4778
4779 cmpq %rcx,%r8
4780 jb 1b
4781 cmpq $10,%r8
4782 jne 2b
4783 movq %rbx,%r8
4784 subq %rsi,%rbx
4785 movq %rbx,%rcx
4786 movq 128(%rbp),%rbx
4787 1:
4788 addq $16,%rcx
4789 cmpq %rbx,%rcx
4790 jg 1f
4791 addq 0(%r8),%r10
4792 adcq 8+0(%r8),%r11
4793 adcq $1,%r12
4794 movq 0+0(%rbp),%rdx
4795 movq %rdx,%r15
4796 mulxq %r10,%r13,%r14
4797 mulxq %r11,%rax,%rdx
4798 imulq %r12,%r15
4799 addq %rax,%r14
4800 adcq %rdx,%r15
4801 movq 8+0(%rbp),%rdx
4802 mulxq %r10,%r10,%rax
4803 addq %r10,%r14
4804 mulxq %r11,%r11,%r9
4805 adcq %r11,%r15
4806 adcq $0,%r9
4807 imulq %r12,%rdx
4808 addq %rax,%r15
4809 adcq %rdx,%r9
4810 movq %r13,%r10
4811 movq %r14,%r11
4812 movq %r15,%r12
4813 andq $3,%r12
4814 movq %r15,%r13
4815 andq $-4,%r13
4816 movq %r9,%r14
4817 shrdq $2,%r9,%r15
4818 shrq $2,%r9
4819 addq %r13,%r10
4820 adcq %r14,%r11
4821 adcq $0,%r12
4822 addq %r15,%r10
4823 adcq %r9,%r11
4824 adcq $0,%r12
4825
4826 leaq 16(%r8),%r8
4827 jmp 1b
4828 1:
4829 vpaddd .chacha20_consts(%rip),%ymm1,%ymm1
4830 vpaddd 64(%rbp),%ymm5,%ymm5
4831 vpaddd 96(%rbp),%ymm9,%ymm9
4832 vpaddd 192(%rbp),%ymm13,%ymm13
4833 vpaddd .chacha20_consts(%rip),%ymm0,%ymm0
4834 vpaddd 64(%rbp),%ymm4,%ymm4
4835 vpaddd 96(%rbp),%ymm8,%ymm8
4836 vpaddd 160(%rbp),%ymm12,%ymm12
4837 vperm2i128 $0x02,%ymm1,%ymm5,%ymm3
4838 vperm2i128 $0x13,%ymm1,%ymm5,%ymm5
4839 vperm2i128 $0x02,%ymm9,%ymm13,%ymm1
4840 vperm2i128 $0x13,%ymm9,%ymm13,%ymm9
4841 vpxor 0+0(%rsi),%ymm3,%ymm3
4842 vpxor 32+0(%rsi),%ymm1,%ymm1
4843 vpxor 64+0(%rsi),%ymm5,%ymm5
4844 vpxor 96+0(%rsi),%ymm9,%ymm9
4845 vmovdqu %ymm3,0+0(%rdi)
4846 vmovdqu %ymm1,32+0(%rdi)
4847 vmovdqu %ymm5,64+0(%rdi)
4848 vmovdqu %ymm9,96+0(%rdi)
4849 vperm2i128 $0x13,%ymm0,%ymm4,%ymm3
4850 vperm2i128 $0x02,%ymm0,%ymm4,%ymm0
4851 vperm2i128 $0x02,%ymm8,%ymm12,%ymm4
4852 vperm2i128 $0x13,%ymm8,%ymm12,%ymm12
4853 vmovdqa %ymm3,%ymm8
4854
4855 leaq 128(%rsi),%rsi
4856 leaq 128(%rdi),%rdi
4857 subq $128,%rbx
4858 jmp open_avx2_tail_loop
4859 3:
4860 cmpq $384,%rbx
4861 ja 3f
4862 vmovdqa .chacha20_consts(%rip),%ymm0
4863 vmovdqa 64(%rbp),%ymm4
4864 vmovdqa 96(%rbp),%ymm8
4865 vmovdqa %ymm0,%ymm1
4866 vmovdqa %ymm4,%ymm5
4867 vmovdqa %ymm8,%ymm9
4868 vmovdqa %ymm0,%ymm2
4869 vmovdqa %ymm4,%ymm6
4870 vmovdqa %ymm8,%ymm10
4871 vmovdqa .avx2_inc(%rip),%ymm12
4872 vpaddd 160(%rbp),%ymm12,%ymm14
4873 vpaddd %ymm14,%ymm12,%ymm13
4874 vpaddd %ymm13,%ymm12,%ymm12
4875 vmovdqa %ymm12,160(%rbp)
4876 vmovdqa %ymm13,192(%rbp)
4877 vmovdqa %ymm14,224(%rbp)
4878
4879 movq %rbx,128(%rbp)
4880 movq %rbx,%rcx
4881 subq $256,%rcx
4882 shrq $4,%rcx
4883 addq $6,%rcx
4884 movq $10,%r8
4885 cmpq $10,%rcx
4886 cmovgq %r8,%rcx
4887 movq %rsi,%rbx
4888 xorq %r8,%r8
4889 1:
4890 addq 0(%rbx),%r10
4891 adcq 8+0(%rbx),%r11
4892 adcq $1,%r12
4893 movq 0+0(%rbp),%rdx
4894 movq %rdx,%r15
4895 mulxq %r10,%r13,%r14
4896 mulxq %r11,%rax,%rdx
4897 imulq %r12,%r15
4898 addq %rax,%r14
4899 adcq %rdx,%r15
4900 movq 8+0(%rbp),%rdx
4901 mulxq %r10,%r10,%rax
4902 addq %r10,%r14
4903 mulxq %r11,%r11,%r9
4904 adcq %r11,%r15
4905 adcq $0,%r9
4906 imulq %r12,%rdx
4907 addq %rax,%r15
4908 adcq %rdx,%r9
4909 movq %r13,%r10
4910 movq %r14,%r11
4911 movq %r15,%r12
4912 andq $3,%r12
4913 movq %r15,%r13
4914 andq $-4,%r13
4915 movq %r9,%r14
4916 shrdq $2,%r9,%r15
4917 shrq $2,%r9
4918 addq %r13,%r10
4919 adcq %r14,%r11
4920 adcq $0,%r12
4921 addq %r15,%r10
4922 adcq %r9,%r11
4923 adcq $0,%r12
4924
4925 leaq 16(%rbx),%rbx
4926 2:
4927 vpaddd %ymm6,%ymm2,%ymm2
4928 vpxor %ymm2,%ymm14,%ymm14
4929 vpshufb .rol16(%rip),%ymm14,%ymm14
4930 vpaddd %ymm14,%ymm10,%ymm10
4931 vpxor %ymm10,%ymm6,%ymm6
4932 vpsrld $20,%ymm6,%ymm3
4933 vpslld $12,%ymm6,%ymm6
4934 vpxor %ymm3,%ymm6,%ymm6
4935 vpaddd %ymm6,%ymm2,%ymm2
4936 vpxor %ymm2,%ymm14,%ymm14
4937 vpshufb .rol8(%rip),%ymm14,%ymm14
4938 vpaddd %ymm14,%ymm10,%ymm10
4939 vpxor %ymm10,%ymm6,%ymm6
4940 vpslld $7,%ymm6,%ymm3
4941 vpsrld $25,%ymm6,%ymm6
4942 vpxor %ymm3,%ymm6,%ymm6
4943 vpalignr $12,%ymm14,%ymm14,%ymm14
4944 vpalignr $8,%ymm10,%ymm10,%ymm10
4945 vpalignr $4,%ymm6,%ymm6,%ymm6
4946 vpaddd %ymm5,%ymm1,%ymm1
4947 vpxor %ymm1,%ymm13,%ymm13
4948 vpshufb .rol16(%rip),%ymm13,%ymm13
4949 vpaddd %ymm13,%ymm9,%ymm9
4950 vpxor %ymm9,%ymm5,%ymm5
4951 vpsrld $20,%ymm5,%ymm3
4952 vpslld $12,%ymm5,%ymm5
4953 vpxor %ymm3,%ymm5,%ymm5
4954 vpaddd %ymm5,%ymm1,%ymm1
4955 vpxor %ymm1,%ymm13,%ymm13
4956 vpshufb .rol8(%rip),%ymm13,%ymm13
4957 vpaddd %ymm13,%ymm9,%ymm9
4958 vpxor %ymm9,%ymm5,%ymm5
4959 vpslld $7,%ymm5,%ymm3
4960 vpsrld $25,%ymm5,%ymm5
4961 vpxor %ymm3,%ymm5,%ymm5
4962 vpalignr $12,%ymm13,%ymm13,%ymm13
4963 vpalignr $8,%ymm9,%ymm9,%ymm9
4964 vpalignr $4,%ymm5,%ymm5,%ymm5
4965 vpaddd %ymm4,%ymm0,%ymm0
4966 vpxor %ymm0,%ymm12,%ymm12
4967 vpshufb .rol16(%rip),%ymm12,%ymm12
4968 vpaddd %ymm12,%ymm8,%ymm8
4969 vpxor %ymm8,%ymm4,%ymm4
4970 vpsrld $20,%ymm4,%ymm3
4971 vpslld $12,%ymm4,%ymm4
4972 vpxor %ymm3,%ymm4,%ymm4
4973 vpaddd %ymm4,%ymm0,%ymm0
4974 vpxor %ymm0,%ymm12,%ymm12
4975 vpshufb .rol8(%rip),%ymm12,%ymm12
4976 vpaddd %ymm12,%ymm8,%ymm8
4977 vpxor %ymm8,%ymm4,%ymm4
4978 vpslld $7,%ymm4,%ymm3
4979 vpsrld $25,%ymm4,%ymm4
4980 vpxor %ymm3,%ymm4,%ymm4
4981 vpalignr $12,%ymm12,%ymm12,%ymm12
4982 vpalignr $8,%ymm8,%ymm8,%ymm8
4983 vpalignr $4,%ymm4,%ymm4,%ymm4
4984 addq 0(%rbx),%r10
4985 adcq 8+0(%rbx),%r11
4986 adcq $1,%r12
4987 movq 0+0(%rbp),%rax
4988 movq %rax,%r15
4989 mulq %r10
4990 movq %rax,%r13
4991 movq %rdx,%r14
4992 movq 0+0(%rbp),%rax
4993 mulq %r11
4994 imulq %r12,%r15
4995 addq %rax,%r14
4996 adcq %rdx,%r15
4997 movq 8+0(%rbp),%rax
4998 movq %rax,%r9
4999 mulq %r10
5000 addq %rax,%r14
5001 adcq $0,%rdx
5002 movq %rdx,%r10
5003 movq 8+0(%rbp),%rax
5004 mulq %r11
5005 addq %rax,%r15
5006 adcq $0,%rdx
5007 imulq %r12,%r9
5008 addq %r10,%r15
5009 adcq %rdx,%r9
5010 movq %r13,%r10
5011 movq %r14,%r11
5012 movq %r15,%r12
5013 andq $3,%r12
5014 movq %r15,%r13
5015 andq $-4,%r13
5016 movq %r9,%r14
5017 shrdq $2,%r9,%r15
5018 shrq $2,%r9
5019 addq %r13,%r10
5020 adcq %r14,%r11
5021 adcq $0,%r12
5022 addq %r15,%r10
5023 adcq %r9,%r11
5024 adcq $0,%r12
5025
5026 leaq 16(%rbx),%rbx
5027 incq %r8
5028 vpaddd %ymm6,%ymm2,%ymm2
5029 vpxor %ymm2,%ymm14,%ymm14
5030 vpshufb .rol16(%rip),%ymm14,%ymm14
5031 vpaddd %ymm14,%ymm10,%ymm10
5032 vpxor %ymm10,%ymm6,%ymm6
5033 vpsrld $20,%ymm6,%ymm3
5034 vpslld $12,%ymm6,%ymm6
5035 vpxor %ymm3,%ymm6,%ymm6
5036 vpaddd %ymm6,%ymm2,%ymm2
5037 vpxor %ymm2,%ymm14,%ymm14
5038 vpshufb .rol8(%rip),%ymm14,%ymm14
5039 vpaddd %ymm14,%ymm10,%ymm10
5040 vpxor %ymm10,%ymm6,%ymm6
5041 vpslld $7,%ymm6,%ymm3
5042 vpsrld $25,%ymm6,%ymm6
5043 vpxor %ymm3,%ymm6,%ymm6
5044 vpalignr $4,%ymm14,%ymm14,%ymm14
5045 vpalignr $8,%ymm10,%ymm10,%ymm10
5046 vpalignr $12,%ymm6,%ymm6,%ymm6
5047 vpaddd %ymm5,%ymm1,%ymm1
5048 vpxor %ymm1,%ymm13,%ymm13
5049 vpshufb .rol16(%rip),%ymm13,%ymm13
5050 vpaddd %ymm13,%ymm9,%ymm9
5051 vpxor %ymm9,%ymm5,%ymm5
5052 vpsrld $20,%ymm5,%ymm3
5053 vpslld $12,%ymm5,%ymm5
5054 vpxor %ymm3,%ymm5,%ymm5
5055 vpaddd %ymm5,%ymm1,%ymm1
5056 vpxor %ymm1,%ymm13,%ymm13
5057 vpshufb .rol8(%rip),%ymm13,%ymm13
5058 vpaddd %ymm13,%ymm9,%ymm9
5059 vpxor %ymm9,%ymm5,%ymm5
5060 vpslld $7,%ymm5,%ymm3
5061 vpsrld $25,%ymm5,%ymm5
5062 vpxor %ymm3,%ymm5,%ymm5
5063 vpalignr $4,%ymm13,%ymm13,%ymm13
5064 vpalignr $8,%ymm9,%ymm9,%ymm9
5065 vpalignr $12,%ymm5,%ymm5,%ymm5
5066 vpaddd %ymm4,%ymm0,%ymm0
5067 vpxor %ymm0,%ymm12,%ymm12
5068 vpshufb .rol16(%rip),%ymm12,%ymm12
5069 vpaddd %ymm12,%ymm8,%ymm8
5070 vpxor %ymm8,%ymm4,%ymm4
5071 vpsrld $20,%ymm4,%ymm3
5072 vpslld $12,%ymm4,%ymm4
5073 vpxor %ymm3,%ymm4,%ymm4
5074 vpaddd %ymm4,%ymm0,%ymm0
5075 vpxor %ymm0,%ymm12,%ymm12
5076 vpshufb .rol8(%rip),%ymm12,%ymm12
5077 vpaddd %ymm12,%ymm8,%ymm8
5078 vpxor %ymm8,%ymm4,%ymm4
5079 vpslld $7,%ymm4,%ymm3
5080 vpsrld $25,%ymm4,%ymm4
5081 vpxor %ymm3,%ymm4,%ymm4
5082 vpalignr $4,%ymm12,%ymm12,%ymm12
5083 vpalignr $8,%ymm8,%ymm8,%ymm8
5084 vpalignr $12,%ymm4,%ymm4,%ymm4
5085
5086 cmpq %rcx,%r8
5087 jb 1b
5088 cmpq $10,%r8
5089 jne 2b
5090 movq %rbx,%r8
5091 subq %rsi,%rbx
5092 movq %rbx,%rcx
5093 movq 128(%rbp),%rbx
5094 1:
5095 addq $16,%rcx
5096 cmpq %rbx,%rcx
5097 jg 1f
5098 addq 0(%r8),%r10
5099 adcq 8+0(%r8),%r11
5100 adcq $1,%r12
5101 movq 0+0(%rbp),%rdx
5102 movq %rdx,%r15
5103 mulxq %r10,%r13,%r14
5104 mulxq %r11,%rax,%rdx
5105 imulq %r12,%r15
5106 addq %rax,%r14
5107 adcq %rdx,%r15
5108 movq 8+0(%rbp),%rdx
5109 mulxq %r10,%r10,%rax
5110 addq %r10,%r14
5111 mulxq %r11,%r11,%r9
5112 adcq %r11,%r15
5113 adcq $0,%r9
5114 imulq %r12,%rdx
5115 addq %rax,%r15
5116 adcq %rdx,%r9
5117 movq %r13,%r10
5118 movq %r14,%r11
5119 movq %r15,%r12
5120 andq $3,%r12
5121 movq %r15,%r13
5122 andq $-4,%r13
5123 movq %r9,%r14
5124 shrdq $2,%r9,%r15
5125 shrq $2,%r9
5126 addq %r13,%r10
5127 adcq %r14,%r11
5128 adcq $0,%r12
5129 addq %r15,%r10
5130 adcq %r9,%r11
5131 adcq $0,%r12
5132
5133 leaq 16(%r8),%r8
5134 jmp 1b
5135 1:
5136 vpaddd .chacha20_consts(%rip),%ymm2,%ymm2
5137 vpaddd 64(%rbp),%ymm6,%ymm6
5138 vpaddd 96(%rbp),%ymm10,%ymm10
5139 vpaddd 224(%rbp),%ymm14,%ymm14
5140 vpaddd .chacha20_consts(%rip),%ymm1,%ymm1
5141 vpaddd 64(%rbp),%ymm5,%ymm5
5142 vpaddd 96(%rbp),%ymm9,%ymm9
5143 vpaddd 192(%rbp),%ymm13,%ymm13
5144 vpaddd .chacha20_consts(%rip),%ymm0,%ymm0
5145 vpaddd 64(%rbp),%ymm4,%ymm4
5146 vpaddd 96(%rbp),%ymm8,%ymm8
5147 vpaddd 160(%rbp),%ymm12,%ymm12
5148 vperm2i128 $0x02,%ymm2,%ymm6,%ymm3
5149 vperm2i128 $0x13,%ymm2,%ymm6,%ymm6
5150 vperm2i128 $0x02,%ymm10,%ymm14,%ymm2
5151 vperm2i128 $0x13,%ymm10,%ymm14,%ymm10
5152 vpxor 0+0(%rsi),%ymm3,%ymm3
5153 vpxor 32+0(%rsi),%ymm2,%ymm2
5154 vpxor 64+0(%rsi),%ymm6,%ymm6
5155 vpxor 96+0(%rsi),%ymm10,%ymm10
5156 vmovdqu %ymm3,0+0(%rdi)
5157 vmovdqu %ymm2,32+0(%rdi)
5158 vmovdqu %ymm6,64+0(%rdi)
5159 vmovdqu %ymm10,96+0(%rdi)
5160 vperm2i128 $0x02,%ymm1,%ymm5,%ymm3
5161 vperm2i128 $0x13,%ymm1,%ymm5,%ymm5
5162 vperm2i128 $0x02,%ymm9,%ymm13,%ymm1
5163 vperm2i128 $0x13,%ymm9,%ymm13,%ymm9
5164 vpxor 0+128(%rsi),%ymm3,%ymm3
5165 vpxor 32+128(%rsi),%ymm1,%ymm1
5166 vpxor 64+128(%rsi),%ymm5,%ymm5
5167 vpxor 96+128(%rsi),%ymm9,%ymm9
5168 vmovdqu %ymm3,0+128(%rdi)
5169 vmovdqu %ymm1,32+128(%rdi)
5170 vmovdqu %ymm5,64+128(%rdi)
5171 vmovdqu %ymm9,96+128(%rdi)
5172 vperm2i128 $0x13,%ymm0,%ymm4,%ymm3
5173 vperm2i128 $0x02,%ymm0,%ymm4,%ymm0
5174 vperm2i128 $0x02,%ymm8,%ymm12,%ymm4
5175 vperm2i128 $0x13,%ymm8,%ymm12,%ymm12
5176 vmovdqa %ymm3,%ymm8
5177
5178 leaq 256(%rsi),%rsi
5179 leaq 256(%rdi),%rdi
5180 subq $256,%rbx
5181 jmp open_avx2_tail_loop
5182 3:
5183 vmovdqa .chacha20_consts(%rip),%ymm0
5184 vmovdqa 64(%rbp),%ymm4
5185 vmovdqa 96(%rbp),%ymm8
5186 vmovdqa %ymm0,%ymm1
5187 vmovdqa %ymm4,%ymm5
5188 vmovdqa %ymm8,%ymm9
5189 vmovdqa %ymm0,%ymm2
5190 vmovdqa %ymm4,%ymm6
5191 vmovdqa %ymm8,%ymm10
5192 vmovdqa %ymm0,%ymm3
5193 vmovdqa %ymm4,%ymm7
5194 vmovdqa %ymm8,%ymm11
5195 vmovdqa .avx2_inc(%rip),%ymm12
5196 vpaddd 160(%rbp),%ymm12,%ymm15
5197 vpaddd %ymm15,%ymm12,%ymm14
5198 vpaddd %ymm14,%ymm12,%ymm13
5199 vpaddd %ymm13,%ymm12,%ymm12
5200 vmovdqa %ymm15,256(%rbp)
5201 vmovdqa %ymm14,224(%rbp)
5202 vmovdqa %ymm13,192(%rbp)
5203 vmovdqa %ymm12,160(%rbp)
5204
5205 xorq %rcx,%rcx
5206 movq %rsi,%r8
5207 1:
5208 addq 0(%r8),%r10
5209 adcq 8+0(%r8),%r11
5210 adcq $1,%r12
5211 movq 0+0(%rbp),%rax
5212 movq %rax,%r15
5213 mulq %r10
5214 movq %rax,%r13
5215 movq %rdx,%r14
5216 movq 0+0(%rbp),%rax
5217 mulq %r11
5218 imulq %r12,%r15
5219 addq %rax,%r14
5220 adcq %rdx,%r15
5221 movq 8+0(%rbp),%rax
5222 movq %rax,%r9
5223 mulq %r10
5224 addq %rax,%r14
5225 adcq $0,%rdx
5226 movq %rdx,%r10
5227 movq 8+0(%rbp),%rax
5228 mulq %r11
5229 addq %rax,%r15
5230 adcq $0,%rdx
5231 imulq %r12,%r9
5232 addq %r10,%r15
5233 adcq %rdx,%r9
5234 movq %r13,%r10
5235 movq %r14,%r11
5236 movq %r15,%r12
5237 andq $3,%r12
5238 movq %r15,%r13
5239 andq $-4,%r13
5240 movq %r9,%r14
5241 shrdq $2,%r9,%r15
5242 shrq $2,%r9
5243 addq %r13,%r10
5244 adcq %r14,%r11
5245 adcq $0,%r12
5246 addq %r15,%r10
5247 adcq %r9,%r11
5248 adcq $0,%r12
5249
5250 leaq 16(%r8),%r8
5251 2:
5252 vmovdqa %ymm8,128(%rbp)
5253 vmovdqa .rol16(%rip),%ymm8
5254 vpaddd %ymm7,%ymm3,%ymm3
5255 vpaddd %ymm6,%ymm2,%ymm2
5256 vpaddd %ymm5,%ymm1,%ymm1
5257 vpaddd %ymm4,%ymm0,%ymm0
5258 vpxor %ymm3,%ymm15,%ymm15
5259 vpxor %ymm2,%ymm14,%ymm14
5260 vpxor %ymm1,%ymm13,%ymm13
5261 vpxor %ymm0,%ymm12,%ymm12
5262 vpshufb %ymm8,%ymm15,%ymm15
5263 vpshufb %ymm8,%ymm14,%ymm14
5264 vpshufb %ymm8,%ymm13,%ymm13
5265 vpshufb %ymm8,%ymm12,%ymm12
5266 vmovdqa 128(%rbp),%ymm8
5267 vpaddd %ymm15,%ymm11,%ymm11
5268 vpaddd %ymm14,%ymm10,%ymm10
5269 vpaddd %ymm13,%ymm9,%ymm9
5270 vpaddd %ymm12,%ymm8,%ymm8
5271 vpxor %ymm11,%ymm7,%ymm7
5272 vpxor %ymm10,%ymm6,%ymm6
5273 vpxor %ymm9,%ymm5,%ymm5
5274 vpxor %ymm8,%ymm4,%ymm4
5275 vmovdqa %ymm8,128(%rbp)
5276 vpsrld $20,%ymm7,%ymm8
5277 vpslld $32-20,%ymm7,%ymm7
5278 vpxor %ymm8,%ymm7,%ymm7
5279 vpsrld $20,%ymm6,%ymm8
5280 vpslld $32-20,%ymm6,%ymm6
5281 vpxor %ymm8,%ymm6,%ymm6
5282 vpsrld $20,%ymm5,%ymm8
5283 vpslld $32-20,%ymm5,%ymm5
5284 vpxor %ymm8,%ymm5,%ymm5
5285 vpsrld $20,%ymm4,%ymm8
5286 vpslld $32-20,%ymm4,%ymm4
5287 vpxor %ymm8,%ymm4,%ymm4
5288 vmovdqa .rol8(%rip),%ymm8
5289 addq 0(%r8),%r10
5290 adcq 8+0(%r8),%r11
5291 adcq $1,%r12
5292 movq 0+0(%rbp),%rdx
5293 movq %rdx,%r15
5294 mulxq %r10,%r13,%r14
5295 mulxq %r11,%rax,%rdx
5296 imulq %r12,%r15
5297 addq %rax,%r14
5298 adcq %rdx,%r15
5299 movq 8+0(%rbp),%rdx
5300 mulxq %r10,%r10,%rax
5301 addq %r10,%r14
5302 mulxq %r11,%r11,%r9
5303 adcq %r11,%r15
5304 adcq $0,%r9
5305 imulq %r12,%rdx
5306 addq %rax,%r15
5307 adcq %rdx,%r9
5308 movq %r13,%r10
5309 movq %r14,%r11
5310 movq %r15,%r12
5311 andq $3,%r12
5312 movq %r15,%r13
5313 andq $-4,%r13
5314 movq %r9,%r14
5315 shrdq $2,%r9,%r15
5316 shrq $2,%r9
5317 addq %r13,%r10
5318 adcq %r14,%r11
5319 adcq $0,%r12
5320 addq %r15,%r10
5321 adcq %r9,%r11
5322 adcq $0,%r12
5323 vpaddd %ymm7,%ymm3,%ymm3
5324 vpaddd %ymm6,%ymm2,%ymm2
5325 vpaddd %ymm5,%ymm1,%ymm1
5326 vpaddd %ymm4,%ymm0,%ymm0
5327 vpxor %ymm3,%ymm15,%ymm15
5328 vpxor %ymm2,%ymm14,%ymm14
5329 vpxor %ymm1,%ymm13,%ymm13
5330 vpxor %ymm0,%ymm12,%ymm12
5331 vpshufb %ymm8,%ymm15,%ymm15
5332 vpshufb %ymm8,%ymm14,%ymm14
5333 vpshufb %ymm8,%ymm13,%ymm13
5334 vpshufb %ymm8,%ymm12,%ymm12
5335 vmovdqa 128(%rbp),%ymm8
5336 vpaddd %ymm15,%ymm11,%ymm11
5337 vpaddd %ymm14,%ymm10,%ymm10
5338 vpaddd %ymm13,%ymm9,%ymm9
5339 vpaddd %ymm12,%ymm8,%ymm8
5340 vpxor %ymm11,%ymm7,%ymm7
5341 vpxor %ymm10,%ymm6,%ymm6
5342 vpxor %ymm9,%ymm5,%ymm5
5343 vpxor %ymm8,%ymm4,%ymm4
5344 vmovdqa %ymm8,128(%rbp)
5345 vpsrld $25,%ymm7,%ymm8
5346 vpslld $32-25,%ymm7,%ymm7
5347 vpxor %ymm8,%ymm7,%ymm7
5348 vpsrld $25,%ymm6,%ymm8
5349 vpslld $32-25,%ymm6,%ymm6
5350 vpxor %ymm8,%ymm6,%ymm6
5351 vpsrld $25,%ymm5,%ymm8
5352 vpslld $32-25,%ymm5,%ymm5
5353 vpxor %ymm8,%ymm5,%ymm5
5354 vpsrld $25,%ymm4,%ymm8
5355 vpslld $32-25,%ymm4,%ymm4
5356 vpxor %ymm8,%ymm4,%ymm4
5357 vmovdqa 128(%rbp),%ymm8
5358 vpalignr $4,%ymm7,%ymm7,%ymm7
5359 vpalignr $8,%ymm11,%ymm11,%ymm11
5360 vpalignr $12,%ymm15,%ymm15,%ymm15
5361 vpalignr $4,%ymm6,%ymm6,%ymm6
5362 vpalignr $8,%ymm10,%ymm10,%ymm10
5363 vpalignr $12,%ymm14,%ymm14,%ymm14
5364 vpalignr $4,%ymm5,%ymm5,%ymm5
5365 vpalignr $8,%ymm9,%ymm9,%ymm9
5366 vpalignr $12,%ymm13,%ymm13,%ymm13
5367 vpalignr $4,%ymm4,%ymm4,%ymm4
5368 vpalignr $8,%ymm8,%ymm8,%ymm8
5369 vpalignr $12,%ymm12,%ymm12,%ymm12
5370 vmovdqa %ymm8,128(%rbp)
5371 addq 16(%r8),%r10
5372 adcq 8+16(%r8),%r11
5373 adcq $1,%r12
5374 movq 0+0(%rbp),%rdx
5375 movq %rdx,%r15
5376 mulxq %r10,%r13,%r14
5377 mulxq %r11,%rax,%rdx
5378 imulq %r12,%r15
5379 addq %rax,%r14
5380 adcq %rdx,%r15
5381 movq 8+0(%rbp),%rdx
5382 mulxq %r10,%r10,%rax
5383 addq %r10,%r14
5384 mulxq %r11,%r11,%r9
5385 adcq %r11,%r15
5386 adcq $0,%r9
5387 imulq %r12,%rdx
5388 addq %rax,%r15
5389 adcq %rdx,%r9
5390 movq %r13,%r10
5391 movq %r14,%r11
5392 movq %r15,%r12
5393 andq $3,%r12
5394 movq %r15,%r13
5395 andq $-4,%r13
5396 movq %r9,%r14
5397 shrdq $2,%r9,%r15
5398 shrq $2,%r9
5399 addq %r13,%r10
5400 adcq %r14,%r11
5401 adcq $0,%r12
5402 addq %r15,%r10
5403 adcq %r9,%r11
5404 adcq $0,%r12
5405
5406 leaq 32(%r8),%r8
5407 vmovdqa .rol16(%rip),%ymm8
5408 vpaddd %ymm7,%ymm3,%ymm3
5409 vpaddd %ymm6,%ymm2,%ymm2
5410 vpaddd %ymm5,%ymm1,%ymm1
5411 vpaddd %ymm4,%ymm0,%ymm0
5412 vpxor %ymm3,%ymm15,%ymm15
5413 vpxor %ymm2,%ymm14,%ymm14
5414 vpxor %ymm1,%ymm13,%ymm13
5415 vpxor %ymm0,%ymm12,%ymm12
5416 vpshufb %ymm8,%ymm15,%ymm15
5417 vpshufb %ymm8,%ymm14,%ymm14
5418 vpshufb %ymm8,%ymm13,%ymm13
5419 vpshufb %ymm8,%ymm12,%ymm12
5420 vmovdqa 128(%rbp),%ymm8
5421 vpaddd %ymm15,%ymm11,%ymm11
5422 vpaddd %ymm14,%ymm10,%ymm10
5423 vpaddd %ymm13,%ymm9,%ymm9
5424 vpaddd %ymm12,%ymm8,%ymm8
5425 vpxor %ymm11,%ymm7,%ymm7
5426 vpxor %ymm10,%ymm6,%ymm6
5427 vpxor %ymm9,%ymm5,%ymm5
5428 vpxor %ymm8,%ymm4,%ymm4
5429 vmovdqa %ymm8,128(%rbp)
5430 vpsrld $20,%ymm7,%ymm8
5431 vpslld $32-20,%ymm7,%ymm7
5432 vpxor %ymm8,%ymm7,%ymm7
5433 vpsrld $20,%ymm6,%ymm8
5434 vpslld $32-20,%ymm6,%ymm6
5435 vpxor %ymm8,%ymm6,%ymm6
5436 vpsrld $20,%ymm5,%ymm8
5437 vpslld $32-20,%ymm5,%ymm5
5438 vpxor %ymm8,%ymm5,%ymm5
5439 vpsrld $20,%ymm4,%ymm8
5440 vpslld $32-20,%ymm4,%ymm4
5441 vpxor %ymm8,%ymm4,%ymm4
5442 vmovdqa .rol8(%rip),%ymm8
5443 vpaddd %ymm7,%ymm3,%ymm3
5444 vpaddd %ymm6,%ymm2,%ymm2
5445 vpaddd %ymm5,%ymm1,%ymm1
5446 vpaddd %ymm4,%ymm0,%ymm0
5447 vpxor %ymm3,%ymm15,%ymm15
5448 vpxor %ymm2,%ymm14,%ymm14
5449 vpxor %ymm1,%ymm13,%ymm13
5450 vpxor %ymm0,%ymm12,%ymm12
5451 vpshufb %ymm8,%ymm15,%ymm15
5452 vpshufb %ymm8,%ymm14,%ymm14
5453 vpshufb %ymm8,%ymm13,%ymm13
5454 vpshufb %ymm8,%ymm12,%ymm12
5455 vmovdqa 128(%rbp),%ymm8
5456 vpaddd %ymm15,%ymm11,%ymm11
5457 vpaddd %ymm14,%ymm10,%ymm10
5458 vpaddd %ymm13,%ymm9,%ymm9
5459 vpaddd %ymm12,%ymm8,%ymm8
5460 vpxor %ymm11,%ymm7,%ymm7
5461 vpxor %ymm10,%ymm6,%ymm6
5462 vpxor %ymm9,%ymm5,%ymm5
5463 vpxor %ymm8,%ymm4,%ymm4
5464 vmovdqa %ymm8,128(%rbp)
5465 vpsrld $25,%ymm7,%ymm8
5466 vpslld $32-25,%ymm7,%ymm7
5467 vpxor %ymm8,%ymm7,%ymm7
5468 vpsrld $25,%ymm6,%ymm8
5469 vpslld $32-25,%ymm6,%ymm6
5470 vpxor %ymm8,%ymm6,%ymm6
5471 vpsrld $25,%ymm5,%ymm8
5472 vpslld $32-25,%ymm5,%ymm5
5473 vpxor %ymm8,%ymm5,%ymm5
5474 vpsrld $25,%ymm4,%ymm8
5475 vpslld $32-25,%ymm4,%ymm4
5476 vpxor %ymm8,%ymm4,%ymm4
5477 vmovdqa 128(%rbp),%ymm8
5478 vpalignr $12,%ymm7,%ymm7,%ymm7
5479 vpalignr $8,%ymm11,%ymm11,%ymm11
5480 vpalignr $4,%ymm15,%ymm15,%ymm15
5481 vpalignr $12,%ymm6,%ymm6,%ymm6
5482 vpalignr $8,%ymm10,%ymm10,%ymm10
5483 vpalignr $4,%ymm14,%ymm14,%ymm14
5484 vpalignr $12,%ymm5,%ymm5,%ymm5
5485 vpalignr $8,%ymm9,%ymm9,%ymm9
5486 vpalignr $4,%ymm13,%ymm13,%ymm13
5487 vpalignr $12,%ymm4,%ymm4,%ymm4
5488 vpalignr $8,%ymm8,%ymm8,%ymm8
5489 vpalignr $4,%ymm12,%ymm12,%ymm12
5490
5491 incq %rcx
5492 cmpq $4,%rcx
5493 jl 1b
5494 cmpq $10,%rcx
5495 jne 2b
5496 movq %rbx,%rcx
5497 subq $384,%rcx
5498 andq $-16,%rcx
5499 1:
5500 testq %rcx,%rcx
5501 je 1f
5502 addq 0(%r8),%r10
5503 adcq 8+0(%r8),%r11
5504 adcq $1,%r12
5505 movq 0+0(%rbp),%rdx
5506 movq %rdx,%r15
5507 mulxq %r10,%r13,%r14
5508 mulxq %r11,%rax,%rdx
5509 imulq %r12,%r15
5510 addq %rax,%r14
5511 adcq %rdx,%r15
5512 movq 8+0(%rbp),%rdx
5513 mulxq %r10,%r10,%rax
5514 addq %r10,%r14
5515 mulxq %r11,%r11,%r9
5516 adcq %r11,%r15
5517 adcq $0,%r9
5518 imulq %r12,%rdx
5519 addq %rax,%r15
5520 adcq %rdx,%r9
5521 movq %r13,%r10
5522 movq %r14,%r11
5523 movq %r15,%r12
5524 andq $3,%r12
5525 movq %r15,%r13
5526 andq $-4,%r13
5527 movq %r9,%r14
5528 shrdq $2,%r9,%r15
5529 shrq $2,%r9
5530 addq %r13,%r10
5531 adcq %r14,%r11
5532 adcq $0,%r12
5533 addq %r15,%r10
5534 adcq %r9,%r11
5535 adcq $0,%r12
5536
5537 leaq 16(%r8),%r8
5538 subq $16,%rcx
5539 jmp 1b
5540 1:
5541 vpaddd .chacha20_consts(%rip),%ymm3,%ymm3
5542 vpaddd 64(%rbp),%ymm7,%ymm7
5543 vpaddd 96(%rbp),%ymm11,%ymm11
5544 vpaddd 256(%rbp),%ymm15,%ymm15
5545 vpaddd .chacha20_consts(%rip),%ymm2,%ymm2
5546 vpaddd 64(%rbp),%ymm6,%ymm6
5547 vpaddd 96(%rbp),%ymm10,%ymm10
5548 vpaddd 224(%rbp),%ymm14,%ymm14
5549 vpaddd .chacha20_consts(%rip),%ymm1,%ymm1
5550 vpaddd 64(%rbp),%ymm5,%ymm5
5551 vpaddd 96(%rbp),%ymm9,%ymm9
5552 vpaddd 192(%rbp),%ymm13,%ymm13
5553 vpaddd .chacha20_consts(%rip),%ymm0,%ymm0
5554 vpaddd 64(%rbp),%ymm4,%ymm4
5555 vpaddd 96(%rbp),%ymm8,%ymm8
5556 vpaddd 160(%rbp),%ymm12,%ymm12
5557
5558 vmovdqa %ymm0,128(%rbp)
5559 vperm2i128 $0x02,%ymm3,%ymm7,%ymm0
5560 vperm2i128 $0x13,%ymm3,%ymm7,%ymm7
5561 vperm2i128 $0x02,%ymm11,%ymm15,%ymm3
5562 vperm2i128 $0x13,%ymm11,%ymm15,%ymm11
5563 vpxor 0+0(%rsi),%ymm0,%ymm0
5564 vpxor 32+0(%rsi),%ymm3,%ymm3
5565 vpxor 64+0(%rsi),%ymm7,%ymm7
5566 vpxor 96+0(%rsi),%ymm11,%ymm11
5567 vmovdqu %ymm0,0+0(%rdi)
5568 vmovdqu %ymm3,32+0(%rdi)
5569 vmovdqu %ymm7,64+0(%rdi)
5570 vmovdqu %ymm11,96+0(%rdi)
5571
5572 vmovdqa 128(%rbp),%ymm0
5573 vperm2i128 $0x02,%ymm2,%ymm6,%ymm3
5574 vperm2i128 $0x13,%ymm2,%ymm6,%ymm6
5575 vperm2i128 $0x02,%ymm10,%ymm14,%ymm2
5576 vperm2i128 $0x13,%ymm10,%ymm14,%ymm10
5577 vpxor 0+128(%rsi),%ymm3,%ymm3
5578 vpxor 32+128(%rsi),%ymm2,%ymm2
5579 vpxor 64+128(%rsi),%ymm6,%ymm6
5580 vpxor 96+128(%rsi),%ymm10,%ymm10
5581 vmovdqu %ymm3,0+128(%rdi)
5582 vmovdqu %ymm2,32+128(%rdi)
5583 vmovdqu %ymm6,64+128(%rdi)
5584 vmovdqu %ymm10,96+128(%rdi)
5585 vperm2i128 $0x02,%ymm1,%ymm5,%ymm3
5586 vperm2i128 $0x13,%ymm1,%ymm5,%ymm5
5587 vperm2i128 $0x02,%ymm9,%ymm13,%ymm1
5588 vperm2i128 $0x13,%ymm9,%ymm13,%ymm9
5589 vpxor 0+256(%rsi),%ymm3,%ymm3
5590 vpxor 32+256(%rsi),%ymm1,%ymm1
5591 vpxor 64+256(%rsi),%ymm5,%ymm5
5592 vpxor 96+256(%rsi),%ymm9,%ymm9
5593 vmovdqu %ymm3,0+256(%rdi)
5594 vmovdqu %ymm1,32+256(%rdi)
5595 vmovdqu %ymm5,64+256(%rdi)
5596 vmovdqu %ymm9,96+256(%rdi)
5597 vperm2i128 $0x13,%ymm0,%ymm4,%ymm3
5598 vperm2i128 $0x02,%ymm0,%ymm4,%ymm0
5599 vperm2i128 $0x02,%ymm8,%ymm12,%ymm4
5600 vperm2i128 $0x13,%ymm8,%ymm12,%ymm12
5601 vmovdqa %ymm3,%ymm8
5602
5603 leaq 384(%rsi),%rsi
5604 leaq 384(%rdi),%rdi
5605 subq $384,%rbx
5606 open_avx2_tail_loop:
5607 cmpq $32,%rbx
5608 jb open_avx2_tail
5609 subq $32,%rbx
5610 vpxor (%rsi),%ymm0,%ymm0
5611 vmovdqu %ymm0,(%rdi)
5612 leaq 32(%rsi),%rsi
5613 leaq 32(%rdi),%rdi
5614 vmovdqa %ymm4,%ymm0
5615 vmovdqa %ymm8,%ymm4
5616 vmovdqa %ymm12,%ymm8
5617 jmp open_avx2_tail_loop
5618 open_avx2_tail:
5619 cmpq $16,%rbx
5620 vmovdqa %xmm0,%xmm1
5621 jb 1f
5622 subq $16,%rbx
5623
5624 vpxor (%rsi),%xmm0,%xmm1
5625 vmovdqu %xmm1,(%rdi)
5626 leaq 16(%rsi),%rsi
5627 leaq 16(%rdi),%rdi
5628 vperm2i128 $0x11,%ymm0,%ymm0,%ymm0
5629 vmovdqa %xmm0,%xmm1
5630 1:
5631 vzeroupper
5632 jmp open_sse_tail_16
5633
5634 open_avx2_192:
5635 vmovdqa %ymm0,%ymm1
5636 vmovdqa %ymm0,%ymm2
5637 vmovdqa %ymm4,%ymm5
5638 vmovdqa %ymm4,%ymm6
5639 vmovdqa %ymm8,%ymm9
5640 vmovdqa %ymm8,%ymm10
5641 vpaddd .avx2_inc(%rip),%ymm12,%ymm13
5642 vmovdqa %ymm12,%ymm11
5643 vmovdqa %ymm13,%ymm15
5644 movq $10,%r10
5645 1:
5646 vpaddd %ymm4,%ymm0,%ymm0
5647 vpxor %ymm0,%ymm12,%ymm12
5648 vpshufb .rol16(%rip),%ymm12,%ymm12
5649 vpaddd %ymm12,%ymm8,%ymm8
5650 vpxor %ymm8,%ymm4,%ymm4
5651 vpsrld $20,%ymm4,%ymm3
5652 vpslld $12,%ymm4,%ymm4
5653 vpxor %ymm3,%ymm4,%ymm4
5654 vpaddd %ymm4,%ymm0,%ymm0
5655 vpxor %ymm0,%ymm12,%ymm12
5656 vpshufb .rol8(%rip),%ymm12,%ymm12
5657 vpaddd %ymm12,%ymm8,%ymm8
5658 vpxor %ymm8,%ymm4,%ymm4
5659 vpslld $7,%ymm4,%ymm3
5660 vpsrld $25,%ymm4,%ymm4
5661 vpxor %ymm3,%ymm4,%ymm4
5662 vpalignr $12,%ymm12,%ymm12,%ymm12
5663 vpalignr $8,%ymm8,%ymm8,%ymm8
5664 vpalignr $4,%ymm4,%ymm4,%ymm4
5665 vpaddd %ymm5,%ymm1,%ymm1
5666 vpxor %ymm1,%ymm13,%ymm13
5667 vpshufb .rol16(%rip),%ymm13,%ymm13
5668 vpaddd %ymm13,%ymm9,%ymm9
5669 vpxor %ymm9,%ymm5,%ymm5
5670 vpsrld $20,%ymm5,%ymm3
5671 vpslld $12,%ymm5,%ymm5
5672 vpxor %ymm3,%ymm5,%ymm5
5673 vpaddd %ymm5,%ymm1,%ymm1
5674 vpxor %ymm1,%ymm13,%ymm13
5675 vpshufb .rol8(%rip),%ymm13,%ymm13
5676 vpaddd %ymm13,%ymm9,%ymm9
5677 vpxor %ymm9,%ymm5,%ymm5
5678 vpslld $7,%ymm5,%ymm3
5679 vpsrld $25,%ymm5,%ymm5
5680 vpxor %ymm3,%ymm5,%ymm5
5681 vpalignr $12,%ymm13,%ymm13,%ymm13
5682 vpalignr $8,%ymm9,%ymm9,%ymm9
5683 vpalignr $4,%ymm5,%ymm5,%ymm5
5684 vpaddd %ymm4,%ymm0,%ymm0
5685 vpxor %ymm0,%ymm12,%ymm12
5686 vpshufb .rol16(%rip),%ymm12,%ymm12
5687 vpaddd %ymm12,%ymm8,%ymm8
5688 vpxor %ymm8,%ymm4,%ymm4
5689 vpsrld $20,%ymm4,%ymm3
5690 vpslld $12,%ymm4,%ymm4
5691 vpxor %ymm3,%ymm4,%ymm4
5692 vpaddd %ymm4,%ymm0,%ymm0
5693 vpxor %ymm0,%ymm12,%ymm12
5694 vpshufb .rol8(%rip),%ymm12,%ymm12
5695 vpaddd %ymm12,%ymm8,%ymm8
5696 vpxor %ymm8,%ymm4,%ymm4
5697 vpslld $7,%ymm4,%ymm3
5698 vpsrld $25,%ymm4,%ymm4
5699 vpxor %ymm3,%ymm4,%ymm4
5700 vpalignr $4,%ymm12,%ymm12,%ymm12
5701 vpalignr $8,%ymm8,%ymm8,%ymm8
5702 vpalignr $12,%ymm4,%ymm4,%ymm4
5703 vpaddd %ymm5,%ymm1,%ymm1
5704 vpxor %ymm1,%ymm13,%ymm13
5705 vpshufb .rol16(%rip),%ymm13,%ymm13
5706 vpaddd %ymm13,%ymm9,%ymm9
5707 vpxor %ymm9,%ymm5,%ymm5
5708 vpsrld $20,%ymm5,%ymm3
5709 vpslld $12,%ymm5,%ymm5
5710 vpxor %ymm3,%ymm5,%ymm5
5711 vpaddd %ymm5,%ymm1,%ymm1
5712 vpxor %ymm1,%ymm13,%ymm13
5713 vpshufb .rol8(%rip),%ymm13,%ymm13
5714 vpaddd %ymm13,%ymm9,%ymm9
5715 vpxor %ymm9,%ymm5,%ymm5
5716 vpslld $7,%ymm5,%ymm3
5717 vpsrld $25,%ymm5,%ymm5
5718 vpxor %ymm3,%ymm5,%ymm5
5719 vpalignr $4,%ymm13,%ymm13,%ymm13
5720 vpalignr $8,%ymm9,%ymm9,%ymm9
5721 vpalignr $12,%ymm5,%ymm5,%ymm5
5722
5723 decq %r10
5724 jne 1b
5725 vpaddd %ymm2,%ymm0,%ymm0
5726 vpaddd %ymm2,%ymm1,%ymm1
5727 vpaddd %ymm6,%ymm4,%ymm4
5728 vpaddd %ymm6,%ymm5,%ymm5
5729 vpaddd %ymm10,%ymm8,%ymm8
5730 vpaddd %ymm10,%ymm9,%ymm9
5731 vpaddd %ymm11,%ymm12,%ymm12
5732 vpaddd %ymm15,%ymm13,%ymm13
5733 vperm2i128 $0x02,%ymm0,%ymm4,%ymm3
5734
5735 vpand .clamp(%rip),%ymm3,%ymm3
5736 vmovdqa %ymm3,0(%rbp)
5737
5738 vperm2i128 $0x13,%ymm0,%ymm4,%ymm0
5739 vperm2i128 $0x13,%ymm8,%ymm12,%ymm4
5740 vperm2i128 $0x02,%ymm1,%ymm5,%ymm8
5741 vperm2i128 $0x02,%ymm9,%ymm13,%ymm12
5742 vperm2i128 $0x13,%ymm1,%ymm5,%ymm1
5743 vperm2i128 $0x13,%ymm9,%ymm13,%ymm5
5744 open_avx2_short:
5745 movq %r8,%r8
5746 call poly_hash_ad_internal
5747 open_avx2_hash_and_xor_loop:
5748 cmpq $32,%rbx
5749 jb open_avx2_short_tail_32
5750 subq $32,%rbx
5751 addq 0(%rsi),%r10
5752 adcq 8+0(%rsi),%r11
5753 adcq $1,%r12
5754 movq 0+0(%rbp),%rax
5755 movq %rax,%r15
5756 mulq %r10
5757 movq %rax,%r13
5758 movq %rdx,%r14
5759 movq 0+0(%rbp),%rax
5760 mulq %r11
5761 imulq %r12,%r15
5762 addq %rax,%r14
5763 adcq %rdx,%r15
5764 movq 8+0(%rbp),%rax
5765 movq %rax,%r9
5766 mulq %r10
5767 addq %rax,%r14
5768 adcq $0,%rdx
5769 movq %rdx,%r10
5770 movq 8+0(%rbp),%rax
5771 mulq %r11
5772 addq %rax,%r15
5773 adcq $0,%rdx
5774 imulq %r12,%r9
5775 addq %r10,%r15
5776 adcq %rdx,%r9
5777 movq %r13,%r10
5778 movq %r14,%r11
5779 movq %r15,%r12
5780 andq $3,%r12
5781 movq %r15,%r13
5782 andq $-4,%r13
5783 movq %r9,%r14
5784 shrdq $2,%r9,%r15
5785 shrq $2,%r9
5786 addq %r13,%r10
5787 adcq %r14,%r11
5788 adcq $0,%r12
5789 addq %r15,%r10
5790 adcq %r9,%r11
5791 adcq $0,%r12
5792 addq 16(%rsi),%r10
5793 adcq 8+16(%rsi),%r11
5794 adcq $1,%r12
5795 movq 0+0(%rbp),%rax
5796 movq %rax,%r15
5797 mulq %r10
5798 movq %rax,%r13
5799 movq %rdx,%r14
5800 movq 0+0(%rbp),%rax
5801 mulq %r11
5802 imulq %r12,%r15
5803 addq %rax,%r14
5804 adcq %rdx,%r15
5805 movq 8+0(%rbp),%rax
5806 movq %rax,%r9
5807 mulq %r10
5808 addq %rax,%r14
5809 adcq $0,%rdx
5810 movq %rdx,%r10
5811 movq 8+0(%rbp),%rax
5812 mulq %r11
5813 addq %rax,%r15
5814 adcq $0,%rdx
5815 imulq %r12,%r9
5816 addq %r10,%r15
5817 adcq %rdx,%r9
5818 movq %r13,%r10
5819 movq %r14,%r11
5820 movq %r15,%r12
5821 andq $3,%r12
5822 movq %r15,%r13
5823 andq $-4,%r13
5824 movq %r9,%r14
5825 shrdq $2,%r9,%r15
5826 shrq $2,%r9
5827 addq %r13,%r10
5828 adcq %r14,%r11
5829 adcq $0,%r12
5830 addq %r15,%r10
5831 adcq %r9,%r11
5832 adcq $0,%r12
5833
5834
5835 vpxor (%rsi),%ymm0,%ymm0
5836 vmovdqu %ymm0,(%rdi)
5837 leaq 32(%rsi),%rsi
5838 leaq 32(%rdi),%rdi
5839
5840 vmovdqa %ymm4,%ymm0
5841 vmovdqa %ymm8,%ymm4
5842 vmovdqa %ymm12,%ymm8
5843 vmovdqa %ymm1,%ymm12
5844 vmovdqa %ymm5,%ymm1
5845 vmovdqa %ymm9,%ymm5
5846 vmovdqa %ymm13,%ymm9
5847 vmovdqa %ymm2,%ymm13
5848 vmovdqa %ymm6,%ymm2
5849 jmp open_avx2_hash_and_xor_loop
5850 open_avx2_short_tail_32:
5851 cmpq $16,%rbx
5852 vmovdqa %xmm0,%xmm1
5853 jb 1f
5854 subq $16,%rbx
5855 addq 0(%rsi),%r10
5856 adcq 8+0(%rsi),%r11
5857 adcq $1,%r12
5858 movq 0+0(%rbp),%rax
5859 movq %rax,%r15
5860 mulq %r10
5861 movq %rax,%r13
5862 movq %rdx,%r14
5863 movq 0+0(%rbp),%rax
5864 mulq %r11
5865 imulq %r12,%r15
5866 addq %rax,%r14
5867 adcq %rdx,%r15
5868 movq 8+0(%rbp),%rax
5869 movq %rax,%r9
5870 mulq %r10
5871 addq %rax,%r14
5872 adcq $0,%rdx
5873 movq %rdx,%r10
5874 movq 8+0(%rbp),%rax
5875 mulq %r11
5876 addq %rax,%r15
5877 adcq $0,%rdx
5878 imulq %r12,%r9
5879 addq %r10,%r15
5880 adcq %rdx,%r9
5881 movq %r13,%r10
5882 movq %r14,%r11
5883 movq %r15,%r12
5884 andq $3,%r12
5885 movq %r15,%r13
5886 andq $-4,%r13
5887 movq %r9,%r14
5888 shrdq $2,%r9,%r15
5889 shrq $2,%r9
5890 addq %r13,%r10
5891 adcq %r14,%r11
5892 adcq $0,%r12
5893 addq %r15,%r10
5894 adcq %r9,%r11
5895 adcq $0,%r12
5896
5897 vpxor (%rsi),%xmm0,%xmm3
5898 vmovdqu %xmm3,(%rdi)
5899 leaq 16(%rsi),%rsi
5900 leaq 16(%rdi),%rdi
5901 vextracti128 $1,%ymm0,%xmm1
5902 1:
5903 vzeroupper
5904 jmp open_sse_tail_16
5905
5906 open_avx2_320:
5907 vmovdqa %ymm0,%ymm1
5908 vmovdqa %ymm0,%ymm2
5909 vmovdqa %ymm4,%ymm5
5910 vmovdqa %ymm4,%ymm6
5911 vmovdqa %ymm8,%ymm9
5912 vmovdqa %ymm8,%ymm10
5913 vpaddd .avx2_inc(%rip),%ymm12,%ymm13
5914 vpaddd .avx2_inc(%rip),%ymm13,%ymm14
5915 vmovdqa %ymm4,%ymm7
5916 vmovdqa %ymm8,%ymm11
5917 vmovdqa %ymm12,160(%rbp)
5918 vmovdqa %ymm13,192(%rbp)
5919 vmovdqa %ymm14,224(%rbp)
5920 movq $10,%r10
5921 1:
5922 vpaddd %ymm4,%ymm0,%ymm0
5923 vpxor %ymm0,%ymm12,%ymm12
5924 vpshufb .rol16(%rip),%ymm12,%ymm12
5925 vpaddd %ymm12,%ymm8,%ymm8
5926 vpxor %ymm8,%ymm4,%ymm4
5927 vpsrld $20,%ymm4,%ymm3
5928 vpslld $12,%ymm4,%ymm4
5929 vpxor %ymm3,%ymm4,%ymm4
5930 vpaddd %ymm4,%ymm0,%ymm0
5931 vpxor %ymm0,%ymm12,%ymm12
5932 vpshufb .rol8(%rip),%ymm12,%ymm12
5933 vpaddd %ymm12,%ymm8,%ymm8
5934 vpxor %ymm8,%ymm4,%ymm4
5935 vpslld $7,%ymm4,%ymm3
5936 vpsrld $25,%ymm4,%ymm4
5937 vpxor %ymm3,%ymm4,%ymm4
5938 vpalignr $12,%ymm12,%ymm12,%ymm12
5939 vpalignr $8,%ymm8,%ymm8,%ymm8
5940 vpalignr $4,%ymm4,%ymm4,%ymm4
5941 vpaddd %ymm5,%ymm1,%ymm1
5942 vpxor %ymm1,%ymm13,%ymm13
5943 vpshufb .rol16(%rip),%ymm13,%ymm13
5944 vpaddd %ymm13,%ymm9,%ymm9
5945 vpxor %ymm9,%ymm5,%ymm5
5946 vpsrld $20,%ymm5,%ymm3
5947 vpslld $12,%ymm5,%ymm5
5948 vpxor %ymm3,%ymm5,%ymm5
5949 vpaddd %ymm5,%ymm1,%ymm1
5950 vpxor %ymm1,%ymm13,%ymm13
5951 vpshufb .rol8(%rip),%ymm13,%ymm13
5952 vpaddd %ymm13,%ymm9,%ymm9
5953 vpxor %ymm9,%ymm5,%ymm5
5954 vpslld $7,%ymm5,%ymm3
5955 vpsrld $25,%ymm5,%ymm5
5956 vpxor %ymm3,%ymm5,%ymm5
5957 vpalignr $12,%ymm13,%ymm13,%ymm13
5958 vpalignr $8,%ymm9,%ymm9,%ymm9
5959 vpalignr $4,%ymm5,%ymm5,%ymm5
5960 vpaddd %ymm6,%ymm2,%ymm2
5961 vpxor %ymm2,%ymm14,%ymm14
5962 vpshufb .rol16(%rip),%ymm14,%ymm14
5963 vpaddd %ymm14,%ymm10,%ymm10
5964 vpxor %ymm10,%ymm6,%ymm6
5965 vpsrld $20,%ymm6,%ymm3
5966 vpslld $12,%ymm6,%ymm6
5967 vpxor %ymm3,%ymm6,%ymm6
5968 vpaddd %ymm6,%ymm2,%ymm2
5969 vpxor %ymm2,%ymm14,%ymm14
5970 vpshufb .rol8(%rip),%ymm14,%ymm14
5971 vpaddd %ymm14,%ymm10,%ymm10
5972 vpxor %ymm10,%ymm6,%ymm6
5973 vpslld $7,%ymm6,%ymm3
5974 vpsrld $25,%ymm6,%ymm6
5975 vpxor %ymm3,%ymm6,%ymm6
5976 vpalignr $12,%ymm14,%ymm14,%ymm14
5977 vpalignr $8,%ymm10,%ymm10,%ymm10
5978 vpalignr $4,%ymm6,%ymm6,%ymm6
5979 vpaddd %ymm4,%ymm0,%ymm0
5980 vpxor %ymm0,%ymm12,%ymm12
5981 vpshufb .rol16(%rip),%ymm12,%ymm12
5982 vpaddd %ymm12,%ymm8,%ymm8
5983 vpxor %ymm8,%ymm4,%ymm4
5984 vpsrld $20,%ymm4,%ymm3
5985 vpslld $12,%ymm4,%ymm4
5986 vpxor %ymm3,%ymm4,%ymm4
5987 vpaddd %ymm4,%ymm0,%ymm0
5988 vpxor %ymm0,%ymm12,%ymm12
5989 vpshufb .rol8(%rip),%ymm12,%ymm12
5990 vpaddd %ymm12,%ymm8,%ymm8
5991 vpxor %ymm8,%ymm4,%ymm4
5992 vpslld $7,%ymm4,%ymm3
5993 vpsrld $25,%ymm4,%ymm4
5994 vpxor %ymm3,%ymm4,%ymm4
5995 vpalignr $4,%ymm12,%ymm12,%ymm12
5996 vpalignr $8,%ymm8,%ymm8,%ymm8
5997 vpalignr $12,%ymm4,%ymm4,%ymm4
5998 vpaddd %ymm5,%ymm1,%ymm1
5999 vpxor %ymm1,%ymm13,%ymm13
6000 vpshufb .rol16(%rip),%ymm13,%ymm13
6001 vpaddd %ymm13,%ymm9,%ymm9
6002 vpxor %ymm9,%ymm5,%ymm5
6003 vpsrld $20,%ymm5,%ymm3
6004 vpslld $12,%ymm5,%ymm5
6005 vpxor %ymm3,%ymm5,%ymm5
6006 vpaddd %ymm5,%ymm1,%ymm1
6007 vpxor %ymm1,%ymm13,%ymm13
6008 vpshufb .rol8(%rip),%ymm13,%ymm13
6009 vpaddd %ymm13,%ymm9,%ymm9
6010 vpxor %ymm9,%ymm5,%ymm5
6011 vpslld $7,%ymm5,%ymm3
6012 vpsrld $25,%ymm5,%ymm5
6013 vpxor %ymm3,%ymm5,%ymm5
6014 vpalignr $4,%ymm13,%ymm13,%ymm13
6015 vpalignr $8,%ymm9,%ymm9,%ymm9
6016 vpalignr $12,%ymm5,%ymm5,%ymm5
6017 vpaddd %ymm6,%ymm2,%ymm2
6018 vpxor %ymm2,%ymm14,%ymm14
6019 vpshufb .rol16(%rip),%ymm14,%ymm14
6020 vpaddd %ymm14,%ymm10,%ymm10
6021 vpxor %ymm10,%ymm6,%ymm6
6022 vpsrld $20,%ymm6,%ymm3
6023 vpslld $12,%ymm6,%ymm6
6024 vpxor %ymm3,%ymm6,%ymm6
6025 vpaddd %ymm6,%ymm2,%ymm2
6026 vpxor %ymm2,%ymm14,%ymm14
6027 vpshufb .rol8(%rip),%ymm14,%ymm14
6028 vpaddd %ymm14,%ymm10,%ymm10
6029 vpxor %ymm10,%ymm6,%ymm6
6030 vpslld $7,%ymm6,%ymm3
6031 vpsrld $25,%ymm6,%ymm6
6032 vpxor %ymm3,%ymm6,%ymm6
6033 vpalignr $4,%ymm14,%ymm14,%ymm14
6034 vpalignr $8,%ymm10,%ymm10,%ymm10
6035 vpalignr $12,%ymm6,%ymm6,%ymm6
6036
6037 decq %r10
6038 jne 1b
6039 vpaddd .chacha20_consts(%rip),%ymm0,%ymm0
6040 vpaddd .chacha20_consts(%rip),%ymm1,%ymm1
6041 vpaddd .chacha20_consts(%rip),%ymm2,%ymm2
6042 vpaddd %ymm7,%ymm4,%ymm4
6043 vpaddd %ymm7,%ymm5,%ymm5
6044 vpaddd %ymm7,%ymm6,%ymm6
6045 vpaddd %ymm11,%ymm8,%ymm8
6046 vpaddd %ymm11,%ymm9,%ymm9
6047 vpaddd %ymm11,%ymm10,%ymm10
6048 vpaddd 160(%rbp),%ymm12,%ymm12
6049 vpaddd 192(%rbp),%ymm13,%ymm13
6050 vpaddd 224(%rbp),%ymm14,%ymm14
6051 vperm2i128 $0x02,%ymm0,%ymm4,%ymm3
6052
6053 vpand .clamp(%rip),%ymm3,%ymm3
6054 vmovdqa %ymm3,0(%rbp)
6055
6056 vperm2i128 $0x13,%ymm0,%ymm4,%ymm0
6057 vperm2i128 $0x13,%ymm8,%ymm12,%ymm4
6058 vperm2i128 $0x02,%ymm1,%ymm5,%ymm8
6059 vperm2i128 $0x02,%ymm9,%ymm13,%ymm12
6060 vperm2i128 $0x13,%ymm1,%ymm5,%ymm1
6061 vperm2i128 $0x13,%ymm9,%ymm13,%ymm5
6062 vperm2i128 $0x02,%ymm2,%ymm6,%ymm9
6063 vperm2i128 $0x02,%ymm10,%ymm14,%ymm13
6064 vperm2i128 $0x13,%ymm2,%ymm6,%ymm2
6065 vperm2i128 $0x13,%ymm10,%ymm14,%ymm6
6066 jmp open_avx2_short
6067 .size chacha20_poly1305_open_avx2, .-chacha20_poly1305_open_avx2
6068
6069
6070 .type chacha20_poly1305_seal_avx2,@function
6071 .align 64
6072 chacha20_poly1305_seal_avx2:
6073 vzeroupper
6074 vmovdqa .chacha20_consts(%rip),%ymm0
6075 vbroadcasti128 0(%r9),%ymm4
6076 vbroadcasti128 16(%r9),%ymm8
6077 vbroadcasti128 32(%r9),%ymm12
6078 vpaddd .avx2_init(%rip),%ymm12,%ymm12
6079 cmpq $192,%rbx
6080 jbe seal_avx2_192
6081 cmpq $320,%rbx
6082 jbe seal_avx2_320
6083 vmovdqa %ymm0,%ymm1
6084 vmovdqa %ymm0,%ymm2
6085 vmovdqa %ymm0,%ymm3
6086 vmovdqa %ymm4,%ymm5
6087 vmovdqa %ymm4,%ymm6
6088 vmovdqa %ymm4,%ymm7
6089 vmovdqa %ymm4,64(%rbp)
6090 vmovdqa %ymm8,%ymm9
6091 vmovdqa %ymm8,%ymm10
6092 vmovdqa %ymm8,%ymm11
6093 vmovdqa %ymm8,96(%rbp)
6094 vmovdqa %ymm12,%ymm15
6095 vpaddd .avx2_inc(%rip),%ymm15,%ymm14
6096 vpaddd .avx2_inc(%rip),%ymm14,%ymm13
6097 vpaddd .avx2_inc(%rip),%ymm13,%ymm12
6098 vmovdqa %ymm12,160(%rbp)
6099 vmovdqa %ymm13,192(%rbp)
6100 vmovdqa %ymm14,224(%rbp)
6101 vmovdqa %ymm15,256(%rbp)
6102 movq $10,%r10
6103 1:
6104 vmovdqa %ymm8,128(%rbp)
6105 vmovdqa .rol16(%rip),%ymm8
6106 vpaddd %ymm7,%ymm3,%ymm3
6107 vpaddd %ymm6,%ymm2,%ymm2
6108 vpaddd %ymm5,%ymm1,%ymm1
6109 vpaddd %ymm4,%ymm0,%ymm0
6110 vpxor %ymm3,%ymm15,%ymm15
6111 vpxor %ymm2,%ymm14,%ymm14
6112 vpxor %ymm1,%ymm13,%ymm13
6113 vpxor %ymm0,%ymm12,%ymm12
6114 vpshufb %ymm8,%ymm15,%ymm15
6115 vpshufb %ymm8,%ymm14,%ymm14
6116 vpshufb %ymm8,%ymm13,%ymm13
6117 vpshufb %ymm8,%ymm12,%ymm12
6118 vmovdqa 128(%rbp),%ymm8
6119 vpaddd %ymm15,%ymm11,%ymm11
6120 vpaddd %ymm14,%ymm10,%ymm10
6121 vpaddd %ymm13,%ymm9,%ymm9
6122 vpaddd %ymm12,%ymm8,%ymm8
6123 vpxor %ymm11,%ymm7,%ymm7
6124 vpxor %ymm10,%ymm6,%ymm6
6125 vpxor %ymm9,%ymm5,%ymm5
6126 vpxor %ymm8,%ymm4,%ymm4
6127 vmovdqa %ymm8,128(%rbp)
6128 vpsrld $20,%ymm7,%ymm8
6129 vpslld $32-20,%ymm7,%ymm7
6130 vpxor %ymm8,%ymm7,%ymm7
6131 vpsrld $20,%ymm6,%ymm8
6132 vpslld $32-20,%ymm6,%ymm6
6133 vpxor %ymm8,%ymm6,%ymm6
6134 vpsrld $20,%ymm5,%ymm8
6135 vpslld $32-20,%ymm5,%ymm5
6136 vpxor %ymm8,%ymm5,%ymm5
6137 vpsrld $20,%ymm4,%ymm8
6138 vpslld $32-20,%ymm4,%ymm4
6139 vpxor %ymm8,%ymm4,%ymm4
6140 vmovdqa .rol8(%rip),%ymm8
6141 vpaddd %ymm7,%ymm3,%ymm3
6142 vpaddd %ymm6,%ymm2,%ymm2
6143 vpaddd %ymm5,%ymm1,%ymm1
6144 vpaddd %ymm4,%ymm0,%ymm0
6145 vpxor %ymm3,%ymm15,%ymm15
6146 vpxor %ymm2,%ymm14,%ymm14
6147 vpxor %ymm1,%ymm13,%ymm13
6148 vpxor %ymm0,%ymm12,%ymm12
6149 vpshufb %ymm8,%ymm15,%ymm15
6150 vpshufb %ymm8,%ymm14,%ymm14
6151 vpshufb %ymm8,%ymm13,%ymm13
6152 vpshufb %ymm8,%ymm12,%ymm12
6153 vmovdqa 128(%rbp),%ymm8
6154 vpaddd %ymm15,%ymm11,%ymm11
6155 vpaddd %ymm14,%ymm10,%ymm10
6156 vpaddd %ymm13,%ymm9,%ymm9
6157 vpaddd %ymm12,%ymm8,%ymm8
6158 vpxor %ymm11,%ymm7,%ymm7
6159 vpxor %ymm10,%ymm6,%ymm6
6160 vpxor %ymm9,%ymm5,%ymm5
6161 vpxor %ymm8,%ymm4,%ymm4
6162 vmovdqa %ymm8,128(%rbp)
6163 vpsrld $25,%ymm7,%ymm8
6164 vpslld $32-25,%ymm7,%ymm7
6165 vpxor %ymm8,%ymm7,%ymm7
6166 vpsrld $25,%ymm6,%ymm8
6167 vpslld $32-25,%ymm6,%ymm6
6168 vpxor %ymm8,%ymm6,%ymm6
6169 vpsrld $25,%ymm5,%ymm8
6170 vpslld $32-25,%ymm5,%ymm5
6171 vpxor %ymm8,%ymm5,%ymm5
6172 vpsrld $25,%ymm4,%ymm8
6173 vpslld $32-25,%ymm4,%ymm4
6174 vpxor %ymm8,%ymm4,%ymm4
6175 vmovdqa 128(%rbp),%ymm8
6176 vpalignr $4,%ymm7,%ymm7,%ymm7
6177 vpalignr $8,%ymm11,%ymm11,%ymm11
6178 vpalignr $12,%ymm15,%ymm15,%ymm15
6179 vpalignr $4,%ymm6,%ymm6,%ymm6
6180 vpalignr $8,%ymm10,%ymm10,%ymm10
6181 vpalignr $12,%ymm14,%ymm14,%ymm14
6182 vpalignr $4,%ymm5,%ymm5,%ymm5
6183 vpalignr $8,%ymm9,%ymm9,%ymm9
6184 vpalignr $12,%ymm13,%ymm13,%ymm13
6185 vpalignr $4,%ymm4,%ymm4,%ymm4
6186 vpalignr $8,%ymm8,%ymm8,%ymm8
6187 vpalignr $12,%ymm12,%ymm12,%ymm12
6188 vmovdqa %ymm8,128(%rbp)
6189 vmovdqa .rol16(%rip),%ymm8
6190 vpaddd %ymm7,%ymm3,%ymm3
6191 vpaddd %ymm6,%ymm2,%ymm2
6192 vpaddd %ymm5,%ymm1,%ymm1
6193 vpaddd %ymm4,%ymm0,%ymm0
6194 vpxor %ymm3,%ymm15,%ymm15
6195 vpxor %ymm2,%ymm14,%ymm14
6196 vpxor %ymm1,%ymm13,%ymm13
6197 vpxor %ymm0,%ymm12,%ymm12
6198 vpshufb %ymm8,%ymm15,%ymm15
6199 vpshufb %ymm8,%ymm14,%ymm14
6200 vpshufb %ymm8,%ymm13,%ymm13
6201 vpshufb %ymm8,%ymm12,%ymm12
6202 vmovdqa 128(%rbp),%ymm8
6203 vpaddd %ymm15,%ymm11,%ymm11
6204 vpaddd %ymm14,%ymm10,%ymm10
6205 vpaddd %ymm13,%ymm9,%ymm9
6206 vpaddd %ymm12,%ymm8,%ymm8
6207 vpxor %ymm11,%ymm7,%ymm7
6208 vpxor %ymm10,%ymm6,%ymm6
6209 vpxor %ymm9,%ymm5,%ymm5
6210 vpxor %ymm8,%ymm4,%ymm4
6211 vmovdqa %ymm8,128(%rbp)
6212 vpsrld $20,%ymm7,%ymm8
6213 vpslld $32-20,%ymm7,%ymm7
6214 vpxor %ymm8,%ymm7,%ymm7
6215 vpsrld $20,%ymm6,%ymm8
6216 vpslld $32-20,%ymm6,%ymm6
6217 vpxor %ymm8,%ymm6,%ymm6
6218 vpsrld $20,%ymm5,%ymm8
6219 vpslld $32-20,%ymm5,%ymm5
6220 vpxor %ymm8,%ymm5,%ymm5
6221 vpsrld $20,%ymm4,%ymm8
6222 vpslld $32-20,%ymm4,%ymm4
6223 vpxor %ymm8,%ymm4,%ymm4
6224 vmovdqa .rol8(%rip),%ymm8
6225 vpaddd %ymm7,%ymm3,%ymm3
6226 vpaddd %ymm6,%ymm2,%ymm2
6227 vpaddd %ymm5,%ymm1,%ymm1
6228 vpaddd %ymm4,%ymm0,%ymm0
6229 vpxor %ymm3,%ymm15,%ymm15
6230 vpxor %ymm2,%ymm14,%ymm14
6231 vpxor %ymm1,%ymm13,%ymm13
6232 vpxor %ymm0,%ymm12,%ymm12
6233 vpshufb %ymm8,%ymm15,%ymm15
6234 vpshufb %ymm8,%ymm14,%ymm14
6235 vpshufb %ymm8,%ymm13,%ymm13
6236 vpshufb %ymm8,%ymm12,%ymm12
6237 vmovdqa 128(%rbp),%ymm8
6238 vpaddd %ymm15,%ymm11,%ymm11
6239 vpaddd %ymm14,%ymm10,%ymm10
6240 vpaddd %ymm13,%ymm9,%ymm9
6241 vpaddd %ymm12,%ymm8,%ymm8
6242 vpxor %ymm11,%ymm7,%ymm7
6243 vpxor %ymm10,%ymm6,%ymm6
6244 vpxor %ymm9,%ymm5,%ymm5
6245 vpxor %ymm8,%ymm4,%ymm4
6246 vmovdqa %ymm8,128(%rbp)
6247 vpsrld $25,%ymm7,%ymm8
6248 vpslld $32-25,%ymm7,%ymm7
6249 vpxor %ymm8,%ymm7,%ymm7
6250 vpsrld $25,%ymm6,%ymm8
6251 vpslld $32-25,%ymm6,%ymm6
6252 vpxor %ymm8,%ymm6,%ymm6
6253 vpsrld $25,%ymm5,%ymm8
6254 vpslld $32-25,%ymm5,%ymm5
6255 vpxor %ymm8,%ymm5,%ymm5
6256 vpsrld $25,%ymm4,%ymm8
6257 vpslld $32-25,%ymm4,%ymm4
6258 vpxor %ymm8,%ymm4,%ymm4
6259 vmovdqa 128(%rbp),%ymm8
6260 vpalignr $12,%ymm7,%ymm7,%ymm7
6261 vpalignr $8,%ymm11,%ymm11,%ymm11
6262 vpalignr $4,%ymm15,%ymm15,%ymm15
6263 vpalignr $12,%ymm6,%ymm6,%ymm6
6264 vpalignr $8,%ymm10,%ymm10,%ymm10
6265 vpalignr $4,%ymm14,%ymm14,%ymm14
6266 vpalignr $12,%ymm5,%ymm5,%ymm5
6267 vpalignr $8,%ymm9,%ymm9,%ymm9
6268 vpalignr $4,%ymm13,%ymm13,%ymm13
6269 vpalignr $12,%ymm4,%ymm4,%ymm4
6270 vpalignr $8,%ymm8,%ymm8,%ymm8
6271 vpalignr $4,%ymm12,%ymm12,%ymm12
6272
6273 decq %r10
6274 jnz 1b
6275 vpaddd .chacha20_consts(%rip),%ymm3,%ymm3
6276 vpaddd 64(%rbp),%ymm7,%ymm7
6277 vpaddd 96(%rbp),%ymm11,%ymm11
6278 vpaddd 256(%rbp),%ymm15,%ymm15
6279 vpaddd .chacha20_consts(%rip),%ymm2,%ymm2
6280 vpaddd 64(%rbp),%ymm6,%ymm6
6281 vpaddd 96(%rbp),%ymm10,%ymm10
6282 vpaddd 224(%rbp),%ymm14,%ymm14
6283 vpaddd .chacha20_consts(%rip),%ymm1,%ymm1
6284 vpaddd 64(%rbp),%ymm5,%ymm5
6285 vpaddd 96(%rbp),%ymm9,%ymm9
6286 vpaddd 192(%rbp),%ymm13,%ymm13
6287 vpaddd .chacha20_consts(%rip),%ymm0,%ymm0
6288 vpaddd 64(%rbp),%ymm4,%ymm4
6289 vpaddd 96(%rbp),%ymm8,%ymm8
6290 vpaddd 160(%rbp),%ymm12,%ymm12
6291
6292 vperm2i128 $0x13,%ymm11,%ymm15,%ymm11
6293 vperm2i128 $0x02,%ymm3,%ymm7,%ymm15
6294 vperm2i128 $0x13,%ymm3,%ymm7,%ymm3
6295 vpand .clamp(%rip),%ymm15,%ymm15
6296 vmovdqa %ymm15,0(%rbp)
6297 movq %r8,%r8
6298 call poly_hash_ad_internal
6299
6300 vpxor 0(%rsi),%ymm3,%ymm3
6301 vpxor 32(%rsi),%ymm11,%ymm11
6302 vmovdqu %ymm3,0(%rdi)
6303 vmovdqu %ymm11,32(%rdi)
6304 vperm2i128 $0x02,%ymm2,%ymm6,%ymm15
6305 vperm2i128 $0x13,%ymm2,%ymm6,%ymm6
6306 vperm2i128 $0x02,%ymm10,%ymm14,%ymm2
6307 vperm2i128 $0x13,%ymm10,%ymm14,%ymm10
6308 vpxor 0+64(%rsi),%ymm15,%ymm15
6309 vpxor 32+64(%rsi),%ymm2,%ymm2
6310 vpxor 64+64(%rsi),%ymm6,%ymm6
6311 vpxor 96+64(%rsi),%ymm10,%ymm10
6312 vmovdqu %ymm15,0+64(%rdi)
6313 vmovdqu %ymm2,32+64(%rdi)
6314 vmovdqu %ymm6,64+64(%rdi)
6315 vmovdqu %ymm10,96+64(%rdi)
6316 vperm2i128 $0x02,%ymm1,%ymm5,%ymm15
6317 vperm2i128 $0x13,%ymm1,%ymm5,%ymm5
6318 vperm2i128 $0x02,%ymm9,%ymm13,%ymm1
6319 vperm2i128 $0x13,%ymm9,%ymm13,%ymm9
6320 vpxor 0+192(%rsi),%ymm15,%ymm15
6321 vpxor 32+192(%rsi),%ymm1,%ymm1
6322 vpxor 64+192(%rsi),%ymm5,%ymm5
6323 vpxor 96+192(%rsi),%ymm9,%ymm9
6324 vmovdqu %ymm15,0+192(%rdi)
6325 vmovdqu %ymm1,32+192(%rdi)
6326 vmovdqu %ymm5,64+192(%rdi)
6327 vmovdqu %ymm9,96+192(%rdi)
6328 vperm2i128 $0x13,%ymm0,%ymm4,%ymm15
6329 vperm2i128 $0x02,%ymm0,%ymm4,%ymm0
6330 vperm2i128 $0x02,%ymm8,%ymm12,%ymm4
6331 vperm2i128 $0x13,%ymm8,%ymm12,%ymm12
6332 vmovdqa %ymm15,%ymm8
6333
6334 leaq 320(%rsi),%rsi
6335 subq $320,%rbx
6336 movq $320,%rcx
6337 cmpq $128,%rbx
6338 jbe seal_avx2_hash
6339 vpxor 0(%rsi),%ymm0,%ymm0
6340 vpxor 32(%rsi),%ymm4,%ymm4
6341 vpxor 64(%rsi),%ymm8,%ymm8
6342 vpxor 96(%rsi),%ymm12,%ymm12
6343 vmovdqu %ymm0,320(%rdi)
6344 vmovdqu %ymm4,352(%rdi)
6345 vmovdqu %ymm8,384(%rdi)
6346 vmovdqu %ymm12,416(%rdi)
6347 leaq 128(%rsi),%rsi
6348 subq $128,%rbx
6349 movq $8,%rcx
6350 movq $2,%r8
6351 cmpq $128,%rbx
6352 jbe seal_avx2_tail_128
6353 cmpq $256,%rbx
6354 jbe seal_avx2_tail_256
6355 cmpq $384,%rbx
6356 jbe seal_avx2_tail_384
6357 cmpq $512,%rbx
6358 jbe seal_avx2_tail_512
6359 vmovdqa .chacha20_consts(%rip),%ymm0
6360 vmovdqa 64(%rbp),%ymm4
6361 vmovdqa 96(%rbp),%ymm8
6362 vmovdqa %ymm0,%ymm1
6363 vmovdqa %ymm4,%ymm5
6364 vmovdqa %ymm8,%ymm9
6365 vmovdqa %ymm0,%ymm2
6366 vmovdqa %ymm4,%ymm6
6367 vmovdqa %ymm8,%ymm10
6368 vmovdqa %ymm0,%ymm3
6369 vmovdqa %ymm4,%ymm7
6370 vmovdqa %ymm8,%ymm11
6371 vmovdqa .avx2_inc(%rip),%ymm12
6372 vpaddd 160(%rbp),%ymm12,%ymm15
6373 vpaddd %ymm15,%ymm12,%ymm14
6374 vpaddd %ymm14,%ymm12,%ymm13
6375 vpaddd %ymm13,%ymm12,%ymm12
6376 vmovdqa %ymm15,256(%rbp)
6377 vmovdqa %ymm14,224(%rbp)
6378 vmovdqa %ymm13,192(%rbp)
6379 vmovdqa %ymm12,160(%rbp)
6380 vmovdqa %ymm8,128(%rbp)
6381 vmovdqa .rol16(%rip),%ymm8
6382 vpaddd %ymm7,%ymm3,%ymm3
6383 vpaddd %ymm6,%ymm2,%ymm2
6384 vpaddd %ymm5,%ymm1,%ymm1
6385 vpaddd %ymm4,%ymm0,%ymm0
6386 vpxor %ymm3,%ymm15,%ymm15
6387 vpxor %ymm2,%ymm14,%ymm14
6388 vpxor %ymm1,%ymm13,%ymm13
6389 vpxor %ymm0,%ymm12,%ymm12
6390 vpshufb %ymm8,%ymm15,%ymm15
6391 vpshufb %ymm8,%ymm14,%ymm14
6392 vpshufb %ymm8,%ymm13,%ymm13
6393 vpshufb %ymm8,%ymm12,%ymm12
6394 vmovdqa 128(%rbp),%ymm8
6395 vpaddd %ymm15,%ymm11,%ymm11
6396 vpaddd %ymm14,%ymm10,%ymm10
6397 vpaddd %ymm13,%ymm9,%ymm9
6398 vpaddd %ymm12,%ymm8,%ymm8
6399 vpxor %ymm11,%ymm7,%ymm7
6400 vpxor %ymm10,%ymm6,%ymm6
6401 vpxor %ymm9,%ymm5,%ymm5
6402 vpxor %ymm8,%ymm4,%ymm4
6403 vmovdqa %ymm8,128(%rbp)
6404 vpsrld $20,%ymm7,%ymm8
6405 vpslld $32-20,%ymm7,%ymm7
6406 vpxor %ymm8,%ymm7,%ymm7
6407 vpsrld $20,%ymm6,%ymm8
6408 vpslld $32-20,%ymm6,%ymm6
6409 vpxor %ymm8,%ymm6,%ymm6
6410 vpsrld $20,%ymm5,%ymm8
6411 vpslld $32-20,%ymm5,%ymm5
6412 vpxor %ymm8,%ymm5,%ymm5
6413 vpsrld $20,%ymm4,%ymm8
6414 vpslld $32-20,%ymm4,%ymm4
6415 vpxor %ymm8,%ymm4,%ymm4
6416 vmovdqa .rol8(%rip),%ymm8
6417 vpaddd %ymm7,%ymm3,%ymm3
6418 vpaddd %ymm6,%ymm2,%ymm2
6419 vpaddd %ymm5,%ymm1,%ymm1
6420 vpaddd %ymm4,%ymm0,%ymm0
6421 vpxor %ymm3,%ymm15,%ymm15
6422 vpxor %ymm2,%ymm14,%ymm14
6423 vpxor %ymm1,%ymm13,%ymm13
6424 vpxor %ymm0,%ymm12,%ymm12
6425 vpshufb %ymm8,%ymm15,%ymm15
6426 vpshufb %ymm8,%ymm14,%ymm14
6427 vpshufb %ymm8,%ymm13,%ymm13
6428 vpshufb %ymm8,%ymm12,%ymm12
6429 vmovdqa 128(%rbp),%ymm8
6430 vpaddd %ymm15,%ymm11,%ymm11
6431 vpaddd %ymm14,%ymm10,%ymm10
6432 vpaddd %ymm13,%ymm9,%ymm9
6433 vpaddd %ymm12,%ymm8,%ymm8
6434 vpxor %ymm11,%ymm7,%ymm7
6435 vpxor %ymm10,%ymm6,%ymm6
6436 vpxor %ymm9,%ymm5,%ymm5
6437 vpxor %ymm8,%ymm4,%ymm4
6438 vmovdqa %ymm8,128(%rbp)
6439 vpsrld $25,%ymm7,%ymm8
6440 vpslld $32-25,%ymm7,%ymm7
6441 vpxor %ymm8,%ymm7,%ymm7
6442 vpsrld $25,%ymm6,%ymm8
6443 vpslld $32-25,%ymm6,%ymm6
6444 vpxor %ymm8,%ymm6,%ymm6
6445 vpsrld $25,%ymm5,%ymm8
6446 vpslld $32-25,%ymm5,%ymm5
6447 vpxor %ymm8,%ymm5,%ymm5
6448 vpsrld $25,%ymm4,%ymm8
6449 vpslld $32-25,%ymm4,%ymm4
6450 vpxor %ymm8,%ymm4,%ymm4
6451 vmovdqa 128(%rbp),%ymm8
6452 vpalignr $4,%ymm7,%ymm7,%ymm7
6453 vpalignr $8,%ymm11,%ymm11,%ymm11
6454 vpalignr $12,%ymm15,%ymm15,%ymm15
6455 vpalignr $4,%ymm6,%ymm6,%ymm6
6456 vpalignr $8,%ymm10,%ymm10,%ymm10
6457 vpalignr $12,%ymm14,%ymm14,%ymm14
6458 vpalignr $4,%ymm5,%ymm5,%ymm5
6459 vpalignr $8,%ymm9,%ymm9,%ymm9
6460 vpalignr $12,%ymm13,%ymm13,%ymm13
6461 vpalignr $4,%ymm4,%ymm4,%ymm4
6462 vpalignr $8,%ymm8,%ymm8,%ymm8
6463 vpalignr $12,%ymm12,%ymm12,%ymm12
6464 vmovdqa %ymm8,128(%rbp)
6465 vmovdqa .rol16(%rip),%ymm8
6466 vpaddd %ymm7,%ymm3,%ymm3
6467 vpaddd %ymm6,%ymm2,%ymm2
6468 vpaddd %ymm5,%ymm1,%ymm1
6469 vpaddd %ymm4,%ymm0,%ymm0
6470 vpxor %ymm3,%ymm15,%ymm15
6471 vpxor %ymm2,%ymm14,%ymm14
6472 vpxor %ymm1,%ymm13,%ymm13
6473 vpxor %ymm0,%ymm12,%ymm12
6474 vpshufb %ymm8,%ymm15,%ymm15
6475 vpshufb %ymm8,%ymm14,%ymm14
6476 vpshufb %ymm8,%ymm13,%ymm13
6477 vpshufb %ymm8,%ymm12,%ymm12
6478 vmovdqa 128(%rbp),%ymm8
6479 vpaddd %ymm15,%ymm11,%ymm11
6480 vpaddd %ymm14,%ymm10,%ymm10
6481 vpaddd %ymm13,%ymm9,%ymm9
6482 vpaddd %ymm12,%ymm8,%ymm8
6483 vpxor %ymm11,%ymm7,%ymm7
6484 vpxor %ymm10,%ymm6,%ymm6
6485 vpxor %ymm9,%ymm5,%ymm5
6486 vpxor %ymm8,%ymm4,%ymm4
6487 vmovdqa %ymm8,128(%rbp)
6488 vpsrld $20,%ymm7,%ymm8
6489 vpslld $32-20,%ymm7,%ymm7
6490 vpxor %ymm8,%ymm7,%ymm7
6491 vpsrld $20,%ymm6,%ymm8
6492 vpslld $32-20,%ymm6,%ymm6
6493 vpxor %ymm8,%ymm6,%ymm6
6494 vpsrld $20,%ymm5,%ymm8
6495 vpslld $32-20,%ymm5,%ymm5
6496 vpxor %ymm8,%ymm5,%ymm5
6497 vpsrld $20,%ymm4,%ymm8
6498 vpslld $32-20,%ymm4,%ymm4
6499 vpxor %ymm8,%ymm4,%ymm4
6500 vmovdqa .rol8(%rip),%ymm8
6501 vpaddd %ymm7,%ymm3,%ymm3
6502 vpaddd %ymm6,%ymm2,%ymm2
6503 vpaddd %ymm5,%ymm1,%ymm1
6504 vpaddd %ymm4,%ymm0,%ymm0
6505 vpxor %ymm3,%ymm15,%ymm15
6506 vpxor %ymm2,%ymm14,%ymm14
6507 vpxor %ymm1,%ymm13,%ymm13
6508 vpxor %ymm0,%ymm12,%ymm12
6509 vpshufb %ymm8,%ymm15,%ymm15
6510 vpshufb %ymm8,%ymm14,%ymm14
6511 vpshufb %ymm8,%ymm13,%ymm13
6512 vpshufb %ymm8,%ymm12,%ymm12
6513 vmovdqa 128(%rbp),%ymm8
6514 vpaddd %ymm15,%ymm11,%ymm11
6515 vpaddd %ymm14,%ymm10,%ymm10
6516 vpaddd %ymm13,%ymm9,%ymm9
6517 vpaddd %ymm12,%ymm8,%ymm8
6518 vpxor %ymm11,%ymm7,%ymm7
6519 vpxor %ymm10,%ymm6,%ymm6
6520 vpxor %ymm9,%ymm5,%ymm5
6521 vpxor %ymm8,%ymm4,%ymm4
6522 vmovdqa %ymm8,128(%rbp)
6523 vpsrld $25,%ymm7,%ymm8
6524 vpslld $32-25,%ymm7,%ymm7
6525 vpxor %ymm8,%ymm7,%ymm7
6526 vpsrld $25,%ymm6,%ymm8
6527 vpslld $32-25,%ymm6,%ymm6
6528 vpxor %ymm8,%ymm6,%ymm6
6529 vpsrld $25,%ymm5,%ymm8
6530 vpslld $32-25,%ymm5,%ymm5
6531 vpxor %ymm8,%ymm5,%ymm5
6532 vpsrld $25,%ymm4,%ymm8
6533 vpslld $32-25,%ymm4,%ymm4
6534 vpxor %ymm8,%ymm4,%ymm4
6535 vmovdqa 128(%rbp),%ymm8
6536 vpalignr $12,%ymm7,%ymm7,%ymm7
6537 vpalignr $8,%ymm11,%ymm11,%ymm11
6538 vpalignr $4,%ymm15,%ymm15,%ymm15
6539 vpalignr $12,%ymm6,%ymm6,%ymm6
6540 vpalignr $8,%ymm10,%ymm10,%ymm10
6541 vpalignr $4,%ymm14,%ymm14,%ymm14
6542 vpalignr $12,%ymm5,%ymm5,%ymm5
6543 vpalignr $8,%ymm9,%ymm9,%ymm9
6544 vpalignr $4,%ymm13,%ymm13,%ymm13
6545 vpalignr $12,%ymm4,%ymm4,%ymm4
6546 vpalignr $8,%ymm8,%ymm8,%ymm8
6547 vpalignr $4,%ymm12,%ymm12,%ymm12
6548 vmovdqa %ymm8,128(%rbp)
6549 vmovdqa .rol16(%rip),%ymm8
6550 vpaddd %ymm7,%ymm3,%ymm3
6551 vpaddd %ymm6,%ymm2,%ymm2
6552 vpaddd %ymm5,%ymm1,%ymm1
6553 vpaddd %ymm4,%ymm0,%ymm0
6554 vpxor %ymm3,%ymm15,%ymm15
6555 vpxor %ymm2,%ymm14,%ymm14
6556 vpxor %ymm1,%ymm13,%ymm13
6557 vpxor %ymm0,%ymm12,%ymm12
6558 vpshufb %ymm8,%ymm15,%ymm15
6559 vpshufb %ymm8,%ymm14,%ymm14
6560 vpshufb %ymm8,%ymm13,%ymm13
6561 vpshufb %ymm8,%ymm12,%ymm12
6562 vmovdqa 128(%rbp),%ymm8
6563 vpaddd %ymm15,%ymm11,%ymm11
6564 vpaddd %ymm14,%ymm10,%ymm10
6565 vpaddd %ymm13,%ymm9,%ymm9
6566 vpaddd %ymm12,%ymm8,%ymm8
6567 vpxor %ymm11,%ymm7,%ymm7
6568 vpxor %ymm10,%ymm6,%ymm6
6569 vpxor %ymm9,%ymm5,%ymm5
6570 vpxor %ymm8,%ymm4,%ymm4
6571 vmovdqa %ymm8,128(%rbp)
6572 vpsrld $20,%ymm7,%ymm8
6573 vpslld $32-20,%ymm7,%ymm7
6574 vpxor %ymm8,%ymm7,%ymm7
6575 vpsrld $20,%ymm6,%ymm8
6576 vpslld $32-20,%ymm6,%ymm6
6577 vpxor %ymm8,%ymm6,%ymm6
6578 vpsrld $20,%ymm5,%ymm8
6579 vpslld $32-20,%ymm5,%ymm5
6580 vpxor %ymm8,%ymm5,%ymm5
6581 vpsrld $20,%ymm4,%ymm8
6582 vpslld $32-20,%ymm4,%ymm4
6583 vpxor %ymm8,%ymm4,%ymm4
6584 vmovdqa .rol8(%rip),%ymm8
6585 vpaddd %ymm7,%ymm3,%ymm3
6586 vpaddd %ymm6,%ymm2,%ymm2
6587 vpaddd %ymm5,%ymm1,%ymm1
6588 vpaddd %ymm4,%ymm0,%ymm0
6589
6590 subq $16,%rdi
6591 movq $9,%rcx
6592 jmp 4f
6593 1:
6594 vmovdqa .chacha20_consts(%rip),%ymm0
6595 vmovdqa 64(%rbp),%ymm4
6596 vmovdqa 96(%rbp),%ymm8
6597 vmovdqa %ymm0,%ymm1
6598 vmovdqa %ymm4,%ymm5
6599 vmovdqa %ymm8,%ymm9
6600 vmovdqa %ymm0,%ymm2
6601 vmovdqa %ymm4,%ymm6
6602 vmovdqa %ymm8,%ymm10
6603 vmovdqa %ymm0,%ymm3
6604 vmovdqa %ymm4,%ymm7
6605 vmovdqa %ymm8,%ymm11
6606 vmovdqa .avx2_inc(%rip),%ymm12
6607 vpaddd 160(%rbp),%ymm12,%ymm15
6608 vpaddd %ymm15,%ymm12,%ymm14
6609 vpaddd %ymm14,%ymm12,%ymm13
6610 vpaddd %ymm13,%ymm12,%ymm12
6611 vmovdqa %ymm15,256(%rbp)
6612 vmovdqa %ymm14,224(%rbp)
6613 vmovdqa %ymm13,192(%rbp)
6614 vmovdqa %ymm12,160(%rbp)
6615
6616 movq $10,%rcx
6617 2:
6618 addq 0(%rdi),%r10
6619 adcq 8+0(%rdi),%r11
6620 adcq $1,%r12
6621 vmovdqa %ymm8,128(%rbp)
6622 vmovdqa .rol16(%rip),%ymm8
6623 vpaddd %ymm7,%ymm3,%ymm3
6624 vpaddd %ymm6,%ymm2,%ymm2
6625 vpaddd %ymm5,%ymm1,%ymm1
6626 vpaddd %ymm4,%ymm0,%ymm0
6627 vpxor %ymm3,%ymm15,%ymm15
6628 vpxor %ymm2,%ymm14,%ymm14
6629 vpxor %ymm1,%ymm13,%ymm13
6630 vpxor %ymm0,%ymm12,%ymm12
6631 movq 0+0(%rbp),%rdx
6632 movq %rdx,%r15
6633 mulxq %r10,%r13,%r14
6634 mulxq %r11,%rax,%rdx
6635 imulq %r12,%r15
6636 addq %rax,%r14
6637 adcq %rdx,%r15
6638 vpshufb %ymm8,%ymm15,%ymm15
6639 vpshufb %ymm8,%ymm14,%ymm14
6640 vpshufb %ymm8,%ymm13,%ymm13
6641 vpshufb %ymm8,%ymm12,%ymm12
6642 vmovdqa 128(%rbp),%ymm8
6643 vpaddd %ymm15,%ymm11,%ymm11
6644 vpaddd %ymm14,%ymm10,%ymm10
6645 vpaddd %ymm13,%ymm9,%ymm9
6646 vpaddd %ymm12,%ymm8,%ymm8
6647 movq 8+0(%rbp),%rdx
6648 mulxq %r10,%r10,%rax
6649 addq %r10,%r14
6650 mulxq %r11,%r11,%r9
6651 adcq %r11,%r15
6652 adcq $0,%r9
6653 imulq %r12,%rdx
6654 vpxor %ymm11,%ymm7,%ymm7
6655 vpxor %ymm10,%ymm6,%ymm6
6656 vpxor %ymm9,%ymm5,%ymm5
6657 vpxor %ymm8,%ymm4,%ymm4
6658 vmovdqa %ymm8,128(%rbp)
6659 vpsrld $20,%ymm7,%ymm8
6660 vpslld $32-20,%ymm7,%ymm7
6661 vpxor %ymm8,%ymm7,%ymm7
6662 vpsrld $20,%ymm6,%ymm8
6663 vpslld $32-20,%ymm6,%ymm6
6664 vpxor %ymm8,%ymm6,%ymm6
6665 vpsrld $20,%ymm5,%ymm8
6666 addq %rax,%r15
6667 adcq %rdx,%r9
6668 vpslld $32-20,%ymm5,%ymm5
6669 vpxor %ymm8,%ymm5,%ymm5
6670 vpsrld $20,%ymm4,%ymm8
6671 vpslld $32-20,%ymm4,%ymm4
6672 vpxor %ymm8,%ymm4,%ymm4
6673 vmovdqa .rol8(%rip),%ymm8
6674 vpaddd %ymm7,%ymm3,%ymm3
6675 vpaddd %ymm6,%ymm2,%ymm2
6676 vpaddd %ymm5,%ymm1,%ymm1
6677 vpaddd %ymm4,%ymm0,%ymm0
6678 movq %r13,%r10
6679 movq %r14,%r11
6680 movq %r15,%r12
6681 andq $3,%r12
6682 movq %r15,%r13
6683 andq $-4,%r13
6684 movq %r9,%r14
6685 shrdq $2,%r9,%r15
6686 shrq $2,%r9
6687 addq %r13,%r10
6688 adcq %r14,%r11
6689 adcq $0,%r12
6690 addq %r15,%r10
6691 adcq %r9,%r11
6692 adcq $0,%r12
6693
6694 4:
6695 vpxor %ymm3,%ymm15,%ymm15
6696 vpxor %ymm2,%ymm14,%ymm14
6697 vpxor %ymm1,%ymm13,%ymm13
6698 vpxor %ymm0,%ymm12,%ymm12
6699 vpshufb %ymm8,%ymm15,%ymm15
6700 vpshufb %ymm8,%ymm14,%ymm14
6701 vpshufb %ymm8,%ymm13,%ymm13
6702 vpshufb %ymm8,%ymm12,%ymm12
6703 vmovdqa 128(%rbp),%ymm8
6704 addq 16(%rdi),%r10
6705 adcq 8+16(%rdi),%r11
6706 adcq $1,%r12
6707 vpaddd %ymm15,%ymm11,%ymm11
6708 vpaddd %ymm14,%ymm10,%ymm10
6709 vpaddd %ymm13,%ymm9,%ymm9
6710 vpaddd %ymm12,%ymm8,%ymm8
6711 vpxor %ymm11,%ymm7,%ymm7
6712 vpxor %ymm10,%ymm6,%ymm6
6713 vpxor %ymm9,%ymm5,%ymm5
6714 vpxor %ymm8,%ymm4,%ymm4
6715 movq 0+0(%rbp),%rdx
6716 movq %rdx,%r15
6717 mulxq %r10,%r13,%r14
6718 mulxq %r11,%rax,%rdx
6719 imulq %r12,%r15
6720 addq %rax,%r14
6721 adcq %rdx,%r15
6722 vmovdqa %ymm8,128(%rbp)
6723 vpsrld $25,%ymm7,%ymm8
6724 vpslld $32-25,%ymm7,%ymm7
6725 vpxor %ymm8,%ymm7,%ymm7
6726 vpsrld $25,%ymm6,%ymm8
6727 vpslld $32-25,%ymm6,%ymm6
6728 vpxor %ymm8,%ymm6,%ymm6
6729 vpsrld $25,%ymm5,%ymm8
6730 vpslld $32-25,%ymm5,%ymm5
6731 vpxor %ymm8,%ymm5,%ymm5
6732 vpsrld $25,%ymm4,%ymm8
6733 vpslld $32-25,%ymm4,%ymm4
6734 vpxor %ymm8,%ymm4,%ymm4
6735 vmovdqa 128(%rbp),%ymm8
6736 vpalignr $4,%ymm7,%ymm7,%ymm7
6737 vpalignr $8,%ymm11,%ymm11,%ymm11
6738 vpalignr $12,%ymm15,%ymm15,%ymm15
6739 vpalignr $4,%ymm6,%ymm6,%ymm6
6740 movq 8+0(%rbp),%rdx
6741 mulxq %r10,%r10,%rax
6742 addq %r10,%r14
6743 mulxq %r11,%r11,%r9
6744 adcq %r11,%r15
6745 adcq $0,%r9
6746 imulq %r12,%rdx
6747 vpalignr $8,%ymm10,%ymm10,%ymm10
6748 vpalignr $12,%ymm14,%ymm14,%ymm14
6749 vpalignr $4,%ymm5,%ymm5,%ymm5
6750 vpalignr $8,%ymm9,%ymm9,%ymm9
6751 vpalignr $12,%ymm13,%ymm13,%ymm13
6752 vpalignr $4,%ymm4,%ymm4,%ymm4
6753 vpalignr $8,%ymm8,%ymm8,%ymm8
6754 vpalignr $12,%ymm12,%ymm12,%ymm12
6755 vmovdqa %ymm8,128(%rbp)
6756 vmovdqa .rol16(%rip),%ymm8
6757 vpaddd %ymm7,%ymm3,%ymm3
6758 vpaddd %ymm6,%ymm2,%ymm2
6759 vpaddd %ymm5,%ymm1,%ymm1
6760 vpaddd %ymm4,%ymm0,%ymm0
6761 vpxor %ymm3,%ymm15,%ymm15
6762 vpxor %ymm2,%ymm14,%ymm14
6763 vpxor %ymm1,%ymm13,%ymm13
6764 vpxor %ymm0,%ymm12,%ymm12
6765 addq %rax,%r15
6766 adcq %rdx,%r9
6767 vpshufb %ymm8,%ymm15,%ymm15
6768 vpshufb %ymm8,%ymm14,%ymm14
6769 vpshufb %ymm8,%ymm13,%ymm13
6770 vpshufb %ymm8,%ymm12,%ymm12
6771 vmovdqa 128(%rbp),%ymm8
6772 vpaddd %ymm15,%ymm11,%ymm11
6773 vpaddd %ymm14,%ymm10,%ymm10
6774 vpaddd %ymm13,%ymm9,%ymm9
6775 vpaddd %ymm12,%ymm8,%ymm8
6776 movq %r13,%r10
6777 movq %r14,%r11
6778 movq %r15,%r12
6779 andq $3,%r12
6780 movq %r15,%r13
6781 andq $-4,%r13
6782 movq %r9,%r14
6783 shrdq $2,%r9,%r15
6784 shrq $2,%r9
6785 addq %r13,%r10
6786 adcq %r14,%r11
6787 adcq $0,%r12
6788 addq %r15,%r10
6789 adcq %r9,%r11
6790 adcq $0,%r12
6791 vpxor %ymm11,%ymm7,%ymm7
6792 vpxor %ymm10,%ymm6,%ymm6
6793 vpxor %ymm9,%ymm5,%ymm5
6794 vpxor %ymm8,%ymm4,%ymm4
6795 vmovdqa %ymm8,128(%rbp)
6796 vpsrld $20,%ymm7,%ymm8
6797 vpslld $32-20,%ymm7,%ymm7
6798 vpxor %ymm8,%ymm7,%ymm7
6799 addq 32(%rdi),%r10
6800 adcq 8+32(%rdi),%r11
6801 adcq $1,%r12
6802
6803 leaq 48(%rdi),%rdi
6804 vpsrld $20,%ymm6,%ymm8
6805 vpslld $32-20,%ymm6,%ymm6
6806 vpxor %ymm8,%ymm6,%ymm6
6807 vpsrld $20,%ymm5,%ymm8
6808 vpslld $32-20,%ymm5,%ymm5
6809 vpxor %ymm8,%ymm5,%ymm5
6810 vpsrld $20,%ymm4,%ymm8
6811 vpslld $32-20,%ymm4,%ymm4
6812 vpxor %ymm8,%ymm4,%ymm4
6813 vmovdqa .rol8(%rip),%ymm8
6814 vpaddd %ymm7,%ymm3,%ymm3
6815 vpaddd %ymm6,%ymm2,%ymm2
6816 vpaddd %ymm5,%ymm1,%ymm1
6817 vpaddd %ymm4,%ymm0,%ymm0
6818 vpxor %ymm3,%ymm15,%ymm15
6819 vpxor %ymm2,%ymm14,%ymm14
6820 vpxor %ymm1,%ymm13,%ymm13
6821 vpxor %ymm0,%ymm12,%ymm12
6822 movq 0+0(%rbp),%rdx
6823 movq %rdx,%r15
6824 mulxq %r10,%r13,%r14
6825 mulxq %r11,%rax,%rdx
6826 imulq %r12,%r15
6827 addq %rax,%r14
6828 adcq %rdx,%r15
6829 vpshufb %ymm8,%ymm15,%ymm15
6830 vpshufb %ymm8,%ymm14,%ymm14
6831 vpshufb %ymm8,%ymm13,%ymm13
6832 vpshufb %ymm8,%ymm12,%ymm12
6833 vmovdqa 128(%rbp),%ymm8
6834 vpaddd %ymm15,%ymm11,%ymm11
6835 vpaddd %ymm14,%ymm10,%ymm10
6836 vpaddd %ymm13,%ymm9,%ymm9
6837 movq 8+0(%rbp),%rdx
6838 mulxq %r10,%r10,%rax
6839 addq %r10,%r14
6840 mulxq %r11,%r11,%r9
6841 adcq %r11,%r15
6842 adcq $0,%r9
6843 imulq %r12,%rdx
6844 vpaddd %ymm12,%ymm8,%ymm8
6845 vpxor %ymm11,%ymm7,%ymm7
6846 vpxor %ymm10,%ymm6,%ymm6
6847 vpxor %ymm9,%ymm5,%ymm5
6848 vpxor %ymm8,%ymm4,%ymm4
6849 vmovdqa %ymm8,128(%rbp)
6850 vpsrld $25,%ymm7,%ymm8
6851 vpslld $32-25,%ymm7,%ymm7
6852 addq %rax,%r15
6853 adcq %rdx,%r9
6854 vpxor %ymm8,%ymm7,%ymm7
6855 vpsrld $25,%ymm6,%ymm8
6856 vpslld $32-25,%ymm6,%ymm6
6857 vpxor %ymm8,%ymm6,%ymm6
6858 vpsrld $25,%ymm5,%ymm8
6859 vpslld $32-25,%ymm5,%ymm5
6860 vpxor %ymm8,%ymm5,%ymm5
6861 vpsrld $25,%ymm4,%ymm8
6862 vpslld $32-25,%ymm4,%ymm4
6863 vpxor %ymm8,%ymm4,%ymm4
6864 vmovdqa 128(%rbp),%ymm8
6865 vpalignr $12,%ymm7,%ymm7,%ymm7
6866 vpalignr $8,%ymm11,%ymm11,%ymm11
6867 vpalignr $4,%ymm15,%ymm15,%ymm15
6868 vpalignr $12,%ymm6,%ymm6,%ymm6
6869 vpalignr $8,%ymm10,%ymm10,%ymm10
6870 vpalignr $4,%ymm14,%ymm14,%ymm14
6871 vpalignr $12,%ymm5,%ymm5,%ymm5
6872 movq %r13,%r10
6873 movq %r14,%r11
6874 movq %r15,%r12
6875 andq $3,%r12
6876 movq %r15,%r13
6877 andq $-4,%r13
6878 movq %r9,%r14
6879 shrdq $2,%r9,%r15
6880 shrq $2,%r9
6881 addq %r13,%r10
6882 adcq %r14,%r11
6883 adcq $0,%r12
6884 addq %r15,%r10
6885 adcq %r9,%r11
6886 adcq $0,%r12
6887 vpalignr $8,%ymm9,%ymm9,%ymm9
6888 vpalignr $4,%ymm13,%ymm13,%ymm13
6889 vpalignr $12,%ymm4,%ymm4,%ymm4
6890 vpalignr $8,%ymm8,%ymm8,%ymm8
6891 vpalignr $4,%ymm12,%ymm12,%ymm12
6892
6893 decq %rcx
6894 jne 2b
6895 vpaddd .chacha20_consts(%rip),%ymm3,%ymm3
6896 vpaddd 64(%rbp),%ymm7,%ymm7
6897 vpaddd 96(%rbp),%ymm11,%ymm11
6898 vpaddd 256(%rbp),%ymm15,%ymm15
6899 vpaddd .chacha20_consts(%rip),%ymm2,%ymm2
6900 vpaddd 64(%rbp),%ymm6,%ymm6
6901 vpaddd 96(%rbp),%ymm10,%ymm10
6902 vpaddd 224(%rbp),%ymm14,%ymm14
6903 vpaddd .chacha20_consts(%rip),%ymm1,%ymm1
6904 vpaddd 64(%rbp),%ymm5,%ymm5
6905 vpaddd 96(%rbp),%ymm9,%ymm9
6906 vpaddd 192(%rbp),%ymm13,%ymm13
6907 vpaddd .chacha20_consts(%rip),%ymm0,%ymm0
6908 vpaddd 64(%rbp),%ymm4,%ymm4
6909 vpaddd 96(%rbp),%ymm8,%ymm8
6910 vpaddd 160(%rbp),%ymm12,%ymm12
6911
6912 leaq 32(%rdi),%rdi
6913 vmovdqa %ymm0,128(%rbp)
6914 addq -32(%rdi),%r10
6915 adcq 8+-32(%rdi),%r11
6916 adcq $1,%r12
6917 vperm2i128 $0x02,%ymm3,%ymm7,%ymm0
6918 vperm2i128 $0x13,%ymm3,%ymm7,%ymm7
6919 vperm2i128 $0x02,%ymm11,%ymm15,%ymm3
6920 vperm2i128 $0x13,%ymm11,%ymm15,%ymm11
6921 vpxor 0+0(%rsi),%ymm0,%ymm0
6922 vpxor 32+0(%rsi),%ymm3,%ymm3
6923 vpxor 64+0(%rsi),%ymm7,%ymm7
6924 vpxor 96+0(%rsi),%ymm11,%ymm11
6925 vmovdqu %ymm0,0+0(%rdi)
6926 vmovdqu %ymm3,32+0(%rdi)
6927 vmovdqu %ymm7,64+0(%rdi)
6928 vmovdqu %ymm11,96+0(%rdi)
6929
6930 vmovdqa 128(%rbp),%ymm0
6931 movq 0+0(%rbp),%rax
6932 movq %rax,%r15
6933 mulq %r10
6934 movq %rax,%r13
6935 movq %rdx,%r14
6936 movq 0+0(%rbp),%rax
6937 mulq %r11
6938 imulq %r12,%r15
6939 addq %rax,%r14
6940 adcq %rdx,%r15
6941 movq 8+0(%rbp),%rax
6942 movq %rax,%r9
6943 mulq %r10
6944 addq %rax,%r14
6945 adcq $0,%rdx
6946 movq %rdx,%r10
6947 movq 8+0(%rbp),%rax
6948 mulq %r11
6949 addq %rax,%r15
6950 adcq $0,%rdx
6951 imulq %r12,%r9
6952 addq %r10,%r15
6953 adcq %rdx,%r9
6954 movq %r13,%r10
6955 movq %r14,%r11
6956 movq %r15,%r12
6957 andq $3,%r12
6958 movq %r15,%r13
6959 andq $-4,%r13
6960 movq %r9,%r14
6961 shrdq $2,%r9,%r15
6962 shrq $2,%r9
6963 addq %r13,%r10
6964 adcq %r14,%r11
6965 adcq $0,%r12
6966 addq %r15,%r10
6967 adcq %r9,%r11
6968 adcq $0,%r12
6969 vperm2i128 $0x02,%ymm2,%ymm6,%ymm3
6970 vperm2i128 $0x13,%ymm2,%ymm6,%ymm6
6971 vperm2i128 $0x02,%ymm10,%ymm14,%ymm2
6972 vperm2i128 $0x13,%ymm10,%ymm14,%ymm10
6973 vpxor 0+128(%rsi),%ymm3,%ymm3
6974 vpxor 32+128(%rsi),%ymm2,%ymm2
6975 vpxor 64+128(%rsi),%ymm6,%ymm6
6976 vpxor 96+128(%rsi),%ymm10,%ymm10
6977 vmovdqu %ymm3,0+128(%rdi)
6978 vmovdqu %ymm2,32+128(%rdi)
6979 vmovdqu %ymm6,64+128(%rdi)
6980 vmovdqu %ymm10,96+128(%rdi)
6981 addq -16(%rdi),%r10
6982 adcq 8+-16(%rdi),%r11
6983 adcq $1,%r12
6984 vperm2i128 $0x02,%ymm1,%ymm5,%ymm3
6985 vperm2i128 $0x13,%ymm1,%ymm5,%ymm5
6986 vperm2i128 $0x02,%ymm9,%ymm13,%ymm1
6987 vperm2i128 $0x13,%ymm9,%ymm13,%ymm9
6988 vpxor 0+256(%rsi),%ymm3,%ymm3
6989 vpxor 32+256(%rsi),%ymm1,%ymm1
6990 vpxor 64+256(%rsi),%ymm5,%ymm5
6991 vpxor 96+256(%rsi),%ymm9,%ymm9
6992 vmovdqu %ymm3,0+256(%rdi)
6993 vmovdqu %ymm1,32+256(%rdi)
6994 vmovdqu %ymm5,64+256(%rdi)
6995 vmovdqu %ymm9,96+256(%rdi)
6996 movq 0+0(%rbp),%rax
6997 movq %rax,%r15
6998 mulq %r10
6999 movq %rax,%r13
7000 movq %rdx,%r14
7001 movq 0+0(%rbp),%rax
7002 mulq %r11
7003 imulq %r12,%r15
7004 addq %rax,%r14
7005 adcq %rdx,%r15
7006 movq 8+0(%rbp),%rax
7007 movq %rax,%r9
7008 mulq %r10
7009 addq %rax,%r14
7010 adcq $0,%rdx
7011 movq %rdx,%r10
7012 movq 8+0(%rbp),%rax
7013 mulq %r11
7014 addq %rax,%r15
7015 adcq $0,%rdx
7016 imulq %r12,%r9
7017 addq %r10,%r15
7018 adcq %rdx,%r9
7019 movq %r13,%r10
7020 movq %r14,%r11
7021 movq %r15,%r12
7022 andq $3,%r12
7023 movq %r15,%r13
7024 andq $-4,%r13
7025 movq %r9,%r14
7026 shrdq $2,%r9,%r15
7027 shrq $2,%r9
7028 addq %r13,%r10
7029 adcq %r14,%r11
7030 adcq $0,%r12
7031 addq %r15,%r10
7032 adcq %r9,%r11
7033 adcq $0,%r12
7034 vperm2i128 $0x02,%ymm0,%ymm4,%ymm3
7035 vperm2i128 $0x13,%ymm0,%ymm4,%ymm4
7036 vperm2i128 $0x02,%ymm8,%ymm12,%ymm0
7037 vperm2i128 $0x13,%ymm8,%ymm12,%ymm8
7038 vpxor 0+384(%rsi),%ymm3,%ymm3
7039 vpxor 32+384(%rsi),%ymm0,%ymm0
7040 vpxor 64+384(%rsi),%ymm4,%ymm4
7041 vpxor 96+384(%rsi),%ymm8,%ymm8
7042 vmovdqu %ymm3,0+384(%rdi)
7043 vmovdqu %ymm0,32+384(%rdi)
7044 vmovdqu %ymm4,64+384(%rdi)
7045 vmovdqu %ymm8,96+384(%rdi)
7046
7047 leaq 512(%rsi),%rsi
7048 subq $512,%rbx
7049 cmpq $512,%rbx
7050 jg 1b
7051 addq 0(%rdi),%r10
7052 adcq 8+0(%rdi),%r11
7053 adcq $1,%r12
7054 movq 0+0(%rbp),%rax
7055 movq %rax,%r15
7056 mulq %r10
7057 movq %rax,%r13
7058 movq %rdx,%r14
7059 movq 0+0(%rbp),%rax
7060 mulq %r11
7061 imulq %r12,%r15
7062 addq %rax,%r14
7063 adcq %rdx,%r15
7064 movq 8+0(%rbp),%rax
7065 movq %rax,%r9
7066 mulq %r10
7067 addq %rax,%r14
7068 adcq $0,%rdx
7069 movq %rdx,%r10
7070 movq 8+0(%rbp),%rax
7071 mulq %r11
7072 addq %rax,%r15
7073 adcq $0,%rdx
7074 imulq %r12,%r9
7075 addq %r10,%r15
7076 adcq %rdx,%r9
7077 movq %r13,%r10
7078 movq %r14,%r11
7079 movq %r15,%r12
7080 andq $3,%r12
7081 movq %r15,%r13
7082 andq $-4,%r13
7083 movq %r9,%r14
7084 shrdq $2,%r9,%r15
7085 shrq $2,%r9
7086 addq %r13,%r10
7087 adcq %r14,%r11
7088 adcq $0,%r12
7089 addq %r15,%r10
7090 adcq %r9,%r11
7091 adcq $0,%r12
7092 addq 16(%rdi),%r10
7093 adcq 8+16(%rdi),%r11
7094 adcq $1,%r12
7095 movq 0+0(%rbp),%rax
7096 movq %rax,%r15
7097 mulq %r10
7098 movq %rax,%r13
7099 movq %rdx,%r14
7100 movq 0+0(%rbp),%rax
7101 mulq %r11
7102 imulq %r12,%r15
7103 addq %rax,%r14
7104 adcq %rdx,%r15
7105 movq 8+0(%rbp),%rax
7106 movq %rax,%r9
7107 mulq %r10
7108 addq %rax,%r14
7109 adcq $0,%rdx
7110 movq %rdx,%r10
7111 movq 8+0(%rbp),%rax
7112 mulq %r11
7113 addq %rax,%r15
7114 adcq $0,%rdx
7115 imulq %r12,%r9
7116 addq %r10,%r15
7117 adcq %rdx,%r9
7118 movq %r13,%r10
7119 movq %r14,%r11
7120 movq %r15,%r12
7121 andq $3,%r12
7122 movq %r15,%r13
7123 andq $-4,%r13
7124 movq %r9,%r14
7125 shrdq $2,%r9,%r15
7126 shrq $2,%r9
7127 addq %r13,%r10
7128 adcq %r14,%r11
7129 adcq $0,%r12
7130 addq %r15,%r10
7131 adcq %r9,%r11
7132 adcq $0,%r12
7133
7134 leaq 32(%rdi),%rdi
7135 movq $10,%rcx
7136 xorq %r8,%r8
7137 cmpq $128,%rbx
7138 ja 3f
7139
7140 seal_avx2_tail_128:
7141 vmovdqa .chacha20_consts(%rip),%ymm0
7142 vmovdqa 64(%rbp),%ymm4
7143 vmovdqa 96(%rbp),%ymm8
7144 vmovdqa .avx2_inc(%rip),%ymm12
7145 vpaddd 160(%rbp),%ymm12,%ymm12
7146 vmovdqa %ymm12,160(%rbp)
7147
7148 1:
7149 addq 0(%rdi),%r10
7150 adcq 8+0(%rdi),%r11
7151 adcq $1,%r12
7152 movq 0+0(%rbp),%rax
7153 movq %rax,%r15
7154 mulq %r10
7155 movq %rax,%r13
7156 movq %rdx,%r14
7157 movq 0+0(%rbp),%rax
7158 mulq %r11
7159 imulq %r12,%r15
7160 addq %rax,%r14
7161 adcq %rdx,%r15
7162 movq 8+0(%rbp),%rax
7163 movq %rax,%r9
7164 mulq %r10
7165 addq %rax,%r14
7166 adcq $0,%rdx
7167 movq %rdx,%r10
7168 movq 8+0(%rbp),%rax
7169 mulq %r11
7170 addq %rax,%r15
7171 adcq $0,%rdx
7172 imulq %r12,%r9
7173 addq %r10,%r15
7174 adcq %rdx,%r9
7175 movq %r13,%r10
7176 movq %r14,%r11
7177 movq %r15,%r12
7178 andq $3,%r12
7179 movq %r15,%r13
7180 andq $-4,%r13
7181 movq %r9,%r14
7182 shrdq $2,%r9,%r15
7183 shrq $2,%r9
7184 addq %r13,%r10
7185 adcq %r14,%r11
7186 adcq $0,%r12
7187 addq %r15,%r10
7188 adcq %r9,%r11
7189 adcq $0,%r12
7190
7191 leaq 16(%rdi),%rdi
7192 2:
7193 vpaddd %ymm4,%ymm0,%ymm0
7194 vpxor %ymm0,%ymm12,%ymm12
7195 vpshufb .rol16(%rip),%ymm12,%ymm12
7196 vpaddd %ymm12,%ymm8,%ymm8
7197 vpxor %ymm8,%ymm4,%ymm4
7198 vpsrld $20,%ymm4,%ymm3
7199 vpslld $12,%ymm4,%ymm4
7200 vpxor %ymm3,%ymm4,%ymm4
7201 vpaddd %ymm4,%ymm0,%ymm0
7202 vpxor %ymm0,%ymm12,%ymm12
7203 vpshufb .rol8(%rip),%ymm12,%ymm12
7204 vpaddd %ymm12,%ymm8,%ymm8
7205 vpxor %ymm8,%ymm4,%ymm4
7206 vpslld $7,%ymm4,%ymm3
7207 vpsrld $25,%ymm4,%ymm4
7208 vpxor %ymm3,%ymm4,%ymm4
7209 vpalignr $12,%ymm12,%ymm12,%ymm12
7210 vpalignr $8,%ymm8,%ymm8,%ymm8
7211 vpalignr $4,%ymm4,%ymm4,%ymm4
7212 addq 0(%rdi),%r10
7213 adcq 8+0(%rdi),%r11
7214 adcq $1,%r12
7215 movq 0+0(%rbp),%rax
7216 movq %rax,%r15
7217 mulq %r10
7218 movq %rax,%r13
7219 movq %rdx,%r14
7220 movq 0+0(%rbp),%rax
7221 mulq %r11
7222 imulq %r12,%r15
7223 addq %rax,%r14
7224 adcq %rdx,%r15
7225 movq 8+0(%rbp),%rax
7226 movq %rax,%r9
7227 mulq %r10
7228 addq %rax,%r14
7229 adcq $0,%rdx
7230 movq %rdx,%r10
7231 movq 8+0(%rbp),%rax
7232 mulq %r11
7233 addq %rax,%r15
7234 adcq $0,%rdx
7235 imulq %r12,%r9
7236 addq %r10,%r15
7237 adcq %rdx,%r9
7238 movq %r13,%r10
7239 movq %r14,%r11
7240 movq %r15,%r12
7241 andq $3,%r12
7242 movq %r15,%r13
7243 andq $-4,%r13
7244 movq %r9,%r14
7245 shrdq $2,%r9,%r15
7246 shrq $2,%r9
7247 addq %r13,%r10
7248 adcq %r14,%r11
7249 adcq $0,%r12
7250 addq %r15,%r10
7251 adcq %r9,%r11
7252 adcq $0,%r12
7253 vpaddd %ymm4,%ymm0,%ymm0
7254 vpxor %ymm0,%ymm12,%ymm12
7255 vpshufb .rol16(%rip),%ymm12,%ymm12
7256 vpaddd %ymm12,%ymm8,%ymm8
7257 vpxor %ymm8,%ymm4,%ymm4
7258 vpsrld $20,%ymm4,%ymm3
7259 vpslld $12,%ymm4,%ymm4
7260 vpxor %ymm3,%ymm4,%ymm4
7261 vpaddd %ymm4,%ymm0,%ymm0
7262 vpxor %ymm0,%ymm12,%ymm12
7263 vpshufb .rol8(%rip),%ymm12,%ymm12
7264 vpaddd %ymm12,%ymm8,%ymm8
7265 vpxor %ymm8,%ymm4,%ymm4
7266 vpslld $7,%ymm4,%ymm3
7267 vpsrld $25,%ymm4,%ymm4
7268 vpxor %ymm3,%ymm4,%ymm4
7269 vpalignr $4,%ymm12,%ymm12,%ymm12
7270 vpalignr $8,%ymm8,%ymm8,%ymm8
7271 vpalignr $12,%ymm4,%ymm4,%ymm4
7272 addq 16(%rdi),%r10
7273 adcq 8+16(%rdi),%r11
7274 adcq $1,%r12
7275 movq 0+0(%rbp),%rax
7276 movq %rax,%r15
7277 mulq %r10
7278 movq %rax,%r13
7279 movq %rdx,%r14
7280 movq 0+0(%rbp),%rax
7281 mulq %r11
7282 imulq %r12,%r15
7283 addq %rax,%r14
7284 adcq %rdx,%r15
7285 movq 8+0(%rbp),%rax
7286 movq %rax,%r9
7287 mulq %r10
7288 addq %rax,%r14
7289 adcq $0,%rdx
7290 movq %rdx,%r10
7291 movq 8+0(%rbp),%rax
7292 mulq %r11
7293 addq %rax,%r15
7294 adcq $0,%rdx
7295 imulq %r12,%r9
7296 addq %r10,%r15
7297 adcq %rdx,%r9
7298 movq %r13,%r10
7299 movq %r14,%r11
7300 movq %r15,%r12
7301 andq $3,%r12
7302 movq %r15,%r13
7303 andq $-4,%r13
7304 movq %r9,%r14
7305 shrdq $2,%r9,%r15
7306 shrq $2,%r9
7307 addq %r13,%r10
7308 adcq %r14,%r11
7309 adcq $0,%r12
7310 addq %r15,%r10
7311 adcq %r9,%r11
7312 adcq $0,%r12
7313
7314 leaq 32(%rdi),%rdi
7315 decq %rcx
7316 jg 1b
7317 decq %r8
7318 jge 2b
7319 vpaddd .chacha20_consts(%rip),%ymm0,%ymm0
7320 vpaddd 64(%rbp),%ymm4,%ymm4
7321 vpaddd 96(%rbp),%ymm8,%ymm8
7322 vpaddd 160(%rbp),%ymm12,%ymm12
7323 vperm2i128 $0x13,%ymm0,%ymm4,%ymm3
7324 vperm2i128 $0x02,%ymm0,%ymm4,%ymm0
7325 vperm2i128 $0x02,%ymm8,%ymm12,%ymm4
7326 vperm2i128 $0x13,%ymm8,%ymm12,%ymm12
7327 vmovdqa %ymm3,%ymm8
7328
7329 jmp seal_avx2_short_loop
7330 3:
7331 cmpq $256,%rbx
7332 ja 3f
7333
7334 seal_avx2_tail_256:
7335 vmovdqa .chacha20_consts(%rip),%ymm0
7336 vmovdqa 64(%rbp),%ymm4
7337 vmovdqa 96(%rbp),%ymm8
7338 vmovdqa %ymm0,%ymm1
7339 vmovdqa %ymm4,%ymm5
7340 vmovdqa %ymm8,%ymm9
7341 vmovdqa .avx2_inc(%rip),%ymm12
7342 vpaddd 160(%rbp),%ymm12,%ymm13
7343 vpaddd %ymm13,%ymm12,%ymm12
7344 vmovdqa %ymm12,160(%rbp)
7345 vmovdqa %ymm13,192(%rbp)
7346
7347 1:
7348 addq 0(%rdi),%r10
7349 adcq 8+0(%rdi),%r11
7350 adcq $1,%r12
7351 movq 0+0(%rbp),%rax
7352 movq %rax,%r15
7353 mulq %r10
7354 movq %rax,%r13
7355 movq %rdx,%r14
7356 movq 0+0(%rbp),%rax
7357 mulq %r11
7358 imulq %r12,%r15
7359 addq %rax,%r14
7360 adcq %rdx,%r15
7361 movq 8+0(%rbp),%rax
7362 movq %rax,%r9
7363 mulq %r10
7364 addq %rax,%r14
7365 adcq $0,%rdx
7366 movq %rdx,%r10
7367 movq 8+0(%rbp),%rax
7368 mulq %r11
7369 addq %rax,%r15
7370 adcq $0,%rdx
7371 imulq %r12,%r9
7372 addq %r10,%r15
7373 adcq %rdx,%r9
7374 movq %r13,%r10
7375 movq %r14,%r11
7376 movq %r15,%r12
7377 andq $3,%r12
7378 movq %r15,%r13
7379 andq $-4,%r13
7380 movq %r9,%r14
7381 shrdq $2,%r9,%r15
7382 shrq $2,%r9
7383 addq %r13,%r10
7384 adcq %r14,%r11
7385 adcq $0,%r12
7386 addq %r15,%r10
7387 adcq %r9,%r11
7388 adcq $0,%r12
7389
7390 leaq 16(%rdi),%rdi
7391 2:
7392 vpaddd %ymm4,%ymm0,%ymm0
7393 vpxor %ymm0,%ymm12,%ymm12
7394 vpshufb .rol16(%rip),%ymm12,%ymm12
7395 vpaddd %ymm12,%ymm8,%ymm8
7396 vpxor %ymm8,%ymm4,%ymm4
7397 vpsrld $20,%ymm4,%ymm3
7398 vpslld $12,%ymm4,%ymm4
7399 vpxor %ymm3,%ymm4,%ymm4
7400 vpaddd %ymm4,%ymm0,%ymm0
7401 vpxor %ymm0,%ymm12,%ymm12
7402 vpshufb .rol8(%rip),%ymm12,%ymm12
7403 vpaddd %ymm12,%ymm8,%ymm8
7404 vpxor %ymm8,%ymm4,%ymm4
7405 vpslld $7,%ymm4,%ymm3
7406 vpsrld $25,%ymm4,%ymm4
7407 vpxor %ymm3,%ymm4,%ymm4
7408 vpalignr $12,%ymm12,%ymm12,%ymm12
7409 vpalignr $8,%ymm8,%ymm8,%ymm8
7410 vpalignr $4,%ymm4,%ymm4,%ymm4
7411 vpaddd %ymm5,%ymm1,%ymm1
7412 vpxor %ymm1,%ymm13,%ymm13
7413 vpshufb .rol16(%rip),%ymm13,%ymm13
7414 vpaddd %ymm13,%ymm9,%ymm9
7415 vpxor %ymm9,%ymm5,%ymm5
7416 vpsrld $20,%ymm5,%ymm3
7417 vpslld $12,%ymm5,%ymm5
7418 vpxor %ymm3,%ymm5,%ymm5
7419 vpaddd %ymm5,%ymm1,%ymm1
7420 vpxor %ymm1,%ymm13,%ymm13
7421 vpshufb .rol8(%rip),%ymm13,%ymm13
7422 vpaddd %ymm13,%ymm9,%ymm9
7423 vpxor %ymm9,%ymm5,%ymm5
7424 vpslld $7,%ymm5,%ymm3
7425 vpsrld $25,%ymm5,%ymm5
7426 vpxor %ymm3,%ymm5,%ymm5
7427 vpalignr $12,%ymm13,%ymm13,%ymm13
7428 vpalignr $8,%ymm9,%ymm9,%ymm9
7429 vpalignr $4,%ymm5,%ymm5,%ymm5
7430 addq 0(%rdi),%r10
7431 adcq 8+0(%rdi),%r11
7432 adcq $1,%r12
7433 movq 0+0(%rbp),%rax
7434 movq %rax,%r15
7435 mulq %r10
7436 movq %rax,%r13
7437 movq %rdx,%r14
7438 movq 0+0(%rbp),%rax
7439 mulq %r11
7440 imulq %r12,%r15
7441 addq %rax,%r14
7442 adcq %rdx,%r15
7443 movq 8+0(%rbp),%rax
7444 movq %rax,%r9
7445 mulq %r10
7446 addq %rax,%r14
7447 adcq $0,%rdx
7448 movq %rdx,%r10
7449 movq 8+0(%rbp),%rax
7450 mulq %r11
7451 addq %rax,%r15
7452 adcq $0,%rdx
7453 imulq %r12,%r9
7454 addq %r10,%r15
7455 adcq %rdx,%r9
7456 movq %r13,%r10
7457 movq %r14,%r11
7458 movq %r15,%r12
7459 andq $3,%r12
7460 movq %r15,%r13
7461 andq $-4,%r13
7462 movq %r9,%r14
7463 shrdq $2,%r9,%r15
7464 shrq $2,%r9
7465 addq %r13,%r10
7466 adcq %r14,%r11
7467 adcq $0,%r12
7468 addq %r15,%r10
7469 adcq %r9,%r11
7470 adcq $0,%r12
7471 vpaddd %ymm4,%ymm0,%ymm0
7472 vpxor %ymm0,%ymm12,%ymm12
7473 vpshufb .rol16(%rip),%ymm12,%ymm12
7474 vpaddd %ymm12,%ymm8,%ymm8
7475 vpxor %ymm8,%ymm4,%ymm4
7476 vpsrld $20,%ymm4,%ymm3
7477 vpslld $12,%ymm4,%ymm4
7478 vpxor %ymm3,%ymm4,%ymm4
7479 vpaddd %ymm4,%ymm0,%ymm0
7480 vpxor %ymm0,%ymm12,%ymm12
7481 vpshufb .rol8(%rip),%ymm12,%ymm12
7482 vpaddd %ymm12,%ymm8,%ymm8
7483 vpxor %ymm8,%ymm4,%ymm4
7484 vpslld $7,%ymm4,%ymm3
7485 vpsrld $25,%ymm4,%ymm4
7486 vpxor %ymm3,%ymm4,%ymm4
7487 vpalignr $4,%ymm12,%ymm12,%ymm12
7488 vpalignr $8,%ymm8,%ymm8,%ymm8
7489 vpalignr $12,%ymm4,%ymm4,%ymm4
7490 vpaddd %ymm5,%ymm1,%ymm1
7491 vpxor %ymm1,%ymm13,%ymm13
7492 vpshufb .rol16(%rip),%ymm13,%ymm13
7493 vpaddd %ymm13,%ymm9,%ymm9
7494 vpxor %ymm9,%ymm5,%ymm5
7495 vpsrld $20,%ymm5,%ymm3
7496 vpslld $12,%ymm5,%ymm5
7497 vpxor %ymm3,%ymm5,%ymm5
7498 vpaddd %ymm5,%ymm1,%ymm1
7499 vpxor %ymm1,%ymm13,%ymm13
7500 vpshufb .rol8(%rip),%ymm13,%ymm13
7501 vpaddd %ymm13,%ymm9,%ymm9
7502 vpxor %ymm9,%ymm5,%ymm5
7503 vpslld $7,%ymm5,%ymm3
7504 vpsrld $25,%ymm5,%ymm5
7505 vpxor %ymm3,%ymm5,%ymm5
7506 vpalignr $4,%ymm13,%ymm13,%ymm13
7507 vpalignr $8,%ymm9,%ymm9,%ymm9
7508 vpalignr $12,%ymm5,%ymm5,%ymm5
7509 addq 16(%rdi),%r10
7510 adcq 8+16(%rdi),%r11
7511 adcq $1,%r12
7512 movq 0+0(%rbp),%rax
7513 movq %rax,%r15
7514 mulq %r10
7515 movq %rax,%r13
7516 movq %rdx,%r14
7517 movq 0+0(%rbp),%rax
7518 mulq %r11
7519 imulq %r12,%r15
7520 addq %rax,%r14
7521 adcq %rdx,%r15
7522 movq 8+0(%rbp),%rax
7523 movq %rax,%r9
7524 mulq %r10
7525 addq %rax,%r14
7526 adcq $0,%rdx
7527 movq %rdx,%r10
7528 movq 8+0(%rbp),%rax
7529 mulq %r11
7530 addq %rax,%r15
7531 adcq $0,%rdx
7532 imulq %r12,%r9
7533 addq %r10,%r15
7534 adcq %rdx,%r9
7535 movq %r13,%r10
7536 movq %r14,%r11
7537 movq %r15,%r12
7538 andq $3,%r12
7539 movq %r15,%r13
7540 andq $-4,%r13
7541 movq %r9,%r14
7542 shrdq $2,%r9,%r15
7543 shrq $2,%r9
7544 addq %r13,%r10
7545 adcq %r14,%r11
7546 adcq $0,%r12
7547 addq %r15,%r10
7548 adcq %r9,%r11
7549 adcq $0,%r12
7550
7551 leaq 32(%rdi),%rdi
7552 decq %rcx
7553 jg 1b
7554 decq %r8
7555 jge 2b
7556 vpaddd .chacha20_consts(%rip),%ymm1,%ymm1
7557 vpaddd 64(%rbp),%ymm5,%ymm5
7558 vpaddd 96(%rbp),%ymm9,%ymm9
7559 vpaddd 192(%rbp),%ymm13,%ymm13
7560 vpaddd .chacha20_consts(%rip),%ymm0,%ymm0
7561 vpaddd 64(%rbp),%ymm4,%ymm4
7562 vpaddd 96(%rbp),%ymm8,%ymm8
7563 vpaddd 160(%rbp),%ymm12,%ymm12
7564 vperm2i128 $0x02,%ymm1,%ymm5,%ymm3
7565 vperm2i128 $0x13,%ymm1,%ymm5,%ymm5
7566 vperm2i128 $0x02,%ymm9,%ymm13,%ymm1
7567 vperm2i128 $0x13,%ymm9,%ymm13,%ymm9
7568 vpxor 0+0(%rsi),%ymm3,%ymm3
7569 vpxor 32+0(%rsi),%ymm1,%ymm1
7570 vpxor 64+0(%rsi),%ymm5,%ymm5
7571 vpxor 96+0(%rsi),%ymm9,%ymm9
7572 vmovdqu %ymm3,0+0(%rdi)
7573 vmovdqu %ymm1,32+0(%rdi)
7574 vmovdqu %ymm5,64+0(%rdi)
7575 vmovdqu %ymm9,96+0(%rdi)
7576 vperm2i128 $0x13,%ymm0,%ymm4,%ymm3
7577 vperm2i128 $0x02,%ymm0,%ymm4,%ymm0
7578 vperm2i128 $0x02,%ymm8,%ymm12,%ymm4
7579 vperm2i128 $0x13,%ymm8,%ymm12,%ymm12
7580 vmovdqa %ymm3,%ymm8
7581
7582 movq $128,%rcx
7583 leaq 128(%rsi),%rsi
7584 subq $128,%rbx
7585 jmp seal_avx2_hash
7586 3:
7587 cmpq $384,%rbx
7588 ja seal_avx2_tail_512
7589
7590 seal_avx2_tail_384:
7591 vmovdqa .chacha20_consts(%rip),%ymm0
7592 vmovdqa 64(%rbp),%ymm4
7593 vmovdqa 96(%rbp),%ymm8
7594 vmovdqa %ymm0,%ymm1
7595 vmovdqa %ymm4,%ymm5
7596 vmovdqa %ymm8,%ymm9
7597 vmovdqa %ymm0,%ymm2
7598 vmovdqa %ymm4,%ymm6
7599 vmovdqa %ymm8,%ymm10
7600 vmovdqa .avx2_inc(%rip),%ymm12
7601 vpaddd 160(%rbp),%ymm12,%ymm14
7602 vpaddd %ymm14,%ymm12,%ymm13
7603 vpaddd %ymm13,%ymm12,%ymm12
7604 vmovdqa %ymm12,160(%rbp)
7605 vmovdqa %ymm13,192(%rbp)
7606 vmovdqa %ymm14,224(%rbp)
7607
7608 1:
7609 addq 0(%rdi),%r10
7610 adcq 8+0(%rdi),%r11
7611 adcq $1,%r12
7612 movq 0+0(%rbp),%rax
7613 movq %rax,%r15
7614 mulq %r10
7615 movq %rax,%r13
7616 movq %rdx,%r14
7617 movq 0+0(%rbp),%rax
7618 mulq %r11
7619 imulq %r12,%r15
7620 addq %rax,%r14
7621 adcq %rdx,%r15
7622 movq 8+0(%rbp),%rax
7623 movq %rax,%r9
7624 mulq %r10
7625 addq %rax,%r14
7626 adcq $0,%rdx
7627 movq %rdx,%r10
7628 movq 8+0(%rbp),%rax
7629 mulq %r11
7630 addq %rax,%r15
7631 adcq $0,%rdx
7632 imulq %r12,%r9
7633 addq %r10,%r15
7634 adcq %rdx,%r9
7635 movq %r13,%r10
7636 movq %r14,%r11
7637 movq %r15,%r12
7638 andq $3,%r12
7639 movq %r15,%r13
7640 andq $-4,%r13
7641 movq %r9,%r14
7642 shrdq $2,%r9,%r15
7643 shrq $2,%r9
7644 addq %r13,%r10
7645 adcq %r14,%r11
7646 adcq $0,%r12
7647 addq %r15,%r10
7648 adcq %r9,%r11
7649 adcq $0,%r12
7650
7651 leaq 16(%rdi),%rdi
7652 2:
7653 vpaddd %ymm4,%ymm0,%ymm0
7654 vpxor %ymm0,%ymm12,%ymm12
7655 vpshufb .rol16(%rip),%ymm12,%ymm12
7656 vpaddd %ymm12,%ymm8,%ymm8
7657 vpxor %ymm8,%ymm4,%ymm4
7658 vpsrld $20,%ymm4,%ymm3
7659 vpslld $12,%ymm4,%ymm4
7660 vpxor %ymm3,%ymm4,%ymm4
7661 vpaddd %ymm4,%ymm0,%ymm0
7662 vpxor %ymm0,%ymm12,%ymm12
7663 vpshufb .rol8(%rip),%ymm12,%ymm12
7664 vpaddd %ymm12,%ymm8,%ymm8
7665 vpxor %ymm8,%ymm4,%ymm4
7666 vpslld $7,%ymm4,%ymm3
7667 vpsrld $25,%ymm4,%ymm4
7668 vpxor %ymm3,%ymm4,%ymm4
7669 vpalignr $12,%ymm12,%ymm12,%ymm12
7670 vpalignr $8,%ymm8,%ymm8,%ymm8
7671 vpalignr $4,%ymm4,%ymm4,%ymm4
7672 vpaddd %ymm5,%ymm1,%ymm1
7673 vpxor %ymm1,%ymm13,%ymm13
7674 vpshufb .rol16(%rip),%ymm13,%ymm13
7675 vpaddd %ymm13,%ymm9,%ymm9
7676 vpxor %ymm9,%ymm5,%ymm5
7677 vpsrld $20,%ymm5,%ymm3
7678 vpslld $12,%ymm5,%ymm5
7679 vpxor %ymm3,%ymm5,%ymm5
7680 vpaddd %ymm5,%ymm1,%ymm1
7681 vpxor %ymm1,%ymm13,%ymm13
7682 vpshufb .rol8(%rip),%ymm13,%ymm13
7683 vpaddd %ymm13,%ymm9,%ymm9
7684 vpxor %ymm9,%ymm5,%ymm5
7685 vpslld $7,%ymm5,%ymm3
7686 vpsrld $25,%ymm5,%ymm5
7687 vpxor %ymm3,%ymm5,%ymm5
7688 vpalignr $12,%ymm13,%ymm13,%ymm13
7689 vpalignr $8,%ymm9,%ymm9,%ymm9
7690 vpalignr $4,%ymm5,%ymm5,%ymm5
7691 addq 0(%rdi),%r10
7692 adcq 8+0(%rdi),%r11
7693 adcq $1,%r12
7694 movq 0+0(%rbp),%rax
7695 movq %rax,%r15
7696 mulq %r10
7697 movq %rax,%r13
7698 movq %rdx,%r14
7699 movq 0+0(%rbp),%rax
7700 mulq %r11
7701 imulq %r12,%r15
7702 addq %rax,%r14
7703 adcq %rdx,%r15
7704 movq 8+0(%rbp),%rax
7705 movq %rax,%r9
7706 mulq %r10
7707 addq %rax,%r14
7708 adcq $0,%rdx
7709 movq %rdx,%r10
7710 movq 8+0(%rbp),%rax
7711 mulq %r11
7712 addq %rax,%r15
7713 adcq $0,%rdx
7714 imulq %r12,%r9
7715 addq %r10,%r15
7716 adcq %rdx,%r9
7717 movq %r13,%r10
7718 movq %r14,%r11
7719 movq %r15,%r12
7720 andq $3,%r12
7721 movq %r15,%r13
7722 andq $-4,%r13
7723 movq %r9,%r14
7724 shrdq $2,%r9,%r15
7725 shrq $2,%r9
7726 addq %r13,%r10
7727 adcq %r14,%r11
7728 adcq $0,%r12
7729 addq %r15,%r10
7730 adcq %r9,%r11
7731 adcq $0,%r12
7732 vpaddd %ymm6,%ymm2,%ymm2
7733 vpxor %ymm2,%ymm14,%ymm14
7734 vpshufb .rol16(%rip),%ymm14,%ymm14
7735 vpaddd %ymm14,%ymm10,%ymm10
7736 vpxor %ymm10,%ymm6,%ymm6
7737 vpsrld $20,%ymm6,%ymm3
7738 vpslld $12,%ymm6,%ymm6
7739 vpxor %ymm3,%ymm6,%ymm6
7740 vpaddd %ymm6,%ymm2,%ymm2
7741 vpxor %ymm2,%ymm14,%ymm14
7742 vpshufb .rol8(%rip),%ymm14,%ymm14
7743 vpaddd %ymm14,%ymm10,%ymm10
7744 vpxor %ymm10,%ymm6,%ymm6
7745 vpslld $7,%ymm6,%ymm3
7746 vpsrld $25,%ymm6,%ymm6
7747 vpxor %ymm3,%ymm6,%ymm6
7748 vpalignr $12,%ymm14,%ymm14,%ymm14
7749 vpalignr $8,%ymm10,%ymm10,%ymm10
7750 vpalignr $4,%ymm6,%ymm6,%ymm6
7751 vpaddd %ymm4,%ymm0,%ymm0
7752 vpxor %ymm0,%ymm12,%ymm12
7753 vpshufb .rol16(%rip),%ymm12,%ymm12
7754 vpaddd %ymm12,%ymm8,%ymm8
7755 vpxor %ymm8,%ymm4,%ymm4
7756 vpsrld $20,%ymm4,%ymm3
7757 vpslld $12,%ymm4,%ymm4
7758 vpxor %ymm3,%ymm4,%ymm4
7759 vpaddd %ymm4,%ymm0,%ymm0
7760 vpxor %ymm0,%ymm12,%ymm12
7761 vpshufb .rol8(%rip),%ymm12,%ymm12
7762 vpaddd %ymm12,%ymm8,%ymm8
7763 vpxor %ymm8,%ymm4,%ymm4
7764 vpslld $7,%ymm4,%ymm3
7765 vpsrld $25,%ymm4,%ymm4
7766 vpxor %ymm3,%ymm4,%ymm4
7767 vpalignr $4,%ymm12,%ymm12,%ymm12
7768 vpalignr $8,%ymm8,%ymm8,%ymm8
7769 vpalignr $12,%ymm4,%ymm4,%ymm4
7770 addq 16(%rdi),%r10
7771 adcq 8+16(%rdi),%r11
7772 adcq $1,%r12
7773 movq 0+0(%rbp),%rax
7774 movq %rax,%r15
7775 mulq %r10
7776 movq %rax,%r13
7777 movq %rdx,%r14
7778 movq 0+0(%rbp),%rax
7779 mulq %r11
7780 imulq %r12,%r15
7781 addq %rax,%r14
7782 adcq %rdx,%r15
7783 movq 8+0(%rbp),%rax
7784 movq %rax,%r9
7785 mulq %r10
7786 addq %rax,%r14
7787 adcq $0,%rdx
7788 movq %rdx,%r10
7789 movq 8+0(%rbp),%rax
7790 mulq %r11
7791 addq %rax,%r15
7792 adcq $0,%rdx
7793 imulq %r12,%r9
7794 addq %r10,%r15
7795 adcq %rdx,%r9
7796 movq %r13,%r10
7797 movq %r14,%r11
7798 movq %r15,%r12
7799 andq $3,%r12
7800 movq %r15,%r13
7801 andq $-4,%r13
7802 movq %r9,%r14
7803 shrdq $2,%r9,%r15
7804 shrq $2,%r9
7805 addq %r13,%r10
7806 adcq %r14,%r11
7807 adcq $0,%r12
7808 addq %r15,%r10
7809 adcq %r9,%r11
7810 adcq $0,%r12
7811 vpaddd %ymm5,%ymm1,%ymm1
7812 vpxor %ymm1,%ymm13,%ymm13
7813 vpshufb .rol16(%rip),%ymm13,%ymm13
7814 vpaddd %ymm13,%ymm9,%ymm9
7815 vpxor %ymm9,%ymm5,%ymm5
7816 vpsrld $20,%ymm5,%ymm3
7817 vpslld $12,%ymm5,%ymm5
7818 vpxor %ymm3,%ymm5,%ymm5
7819 vpaddd %ymm5,%ymm1,%ymm1
7820 vpxor %ymm1,%ymm13,%ymm13
7821 vpshufb .rol8(%rip),%ymm13,%ymm13
7822 vpaddd %ymm13,%ymm9,%ymm9
7823 vpxor %ymm9,%ymm5,%ymm5
7824 vpslld $7,%ymm5,%ymm3
7825 vpsrld $25,%ymm5,%ymm5
7826 vpxor %ymm3,%ymm5,%ymm5
7827 vpalignr $4,%ymm13,%ymm13,%ymm13
7828 vpalignr $8,%ymm9,%ymm9,%ymm9
7829 vpalignr $12,%ymm5,%ymm5,%ymm5
7830 vpaddd %ymm6,%ymm2,%ymm2
7831 vpxor %ymm2,%ymm14,%ymm14
7832 vpshufb .rol16(%rip),%ymm14,%ymm14
7833 vpaddd %ymm14,%ymm10,%ymm10
7834 vpxor %ymm10,%ymm6,%ymm6
7835 vpsrld $20,%ymm6,%ymm3
7836 vpslld $12,%ymm6,%ymm6
7837 vpxor %ymm3,%ymm6,%ymm6
7838 vpaddd %ymm6,%ymm2,%ymm2
7839 vpxor %ymm2,%ymm14,%ymm14
7840 vpshufb .rol8(%rip),%ymm14,%ymm14
7841 vpaddd %ymm14,%ymm10,%ymm10
7842 vpxor %ymm10,%ymm6,%ymm6
7843 vpslld $7,%ymm6,%ymm3
7844 vpsrld $25,%ymm6,%ymm6
7845 vpxor %ymm3,%ymm6,%ymm6
7846 vpalignr $4,%ymm14,%ymm14,%ymm14
7847 vpalignr $8,%ymm10,%ymm10,%ymm10
7848 vpalignr $12,%ymm6,%ymm6,%ymm6
7849
7850 leaq 32(%rdi),%rdi
7851 decq %rcx
7852 jg 1b
7853 decq %r8
7854 jge 2b
7855 vpaddd .chacha20_consts(%rip),%ymm2,%ymm2
7856 vpaddd 64(%rbp),%ymm6,%ymm6
7857 vpaddd 96(%rbp),%ymm10,%ymm10
7858 vpaddd 224(%rbp),%ymm14,%ymm14
7859 vpaddd .chacha20_consts(%rip),%ymm1,%ymm1
7860 vpaddd 64(%rbp),%ymm5,%ymm5
7861 vpaddd 96(%rbp),%ymm9,%ymm9
7862 vpaddd 192(%rbp),%ymm13,%ymm13
7863 vpaddd .chacha20_consts(%rip),%ymm0,%ymm0
7864 vpaddd 64(%rbp),%ymm4,%ymm4
7865 vpaddd 96(%rbp),%ymm8,%ymm8
7866 vpaddd 160(%rbp),%ymm12,%ymm12
7867 vperm2i128 $0x02,%ymm2,%ymm6,%ymm3
7868 vperm2i128 $0x13,%ymm2,%ymm6,%ymm6
7869 vperm2i128 $0x02,%ymm10,%ymm14,%ymm2
7870 vperm2i128 $0x13,%ymm10,%ymm14,%ymm10
7871 vpxor 0+0(%rsi),%ymm3,%ymm3
7872 vpxor 32+0(%rsi),%ymm2,%ymm2
7873 vpxor 64+0(%rsi),%ymm6,%ymm6
7874 vpxor 96+0(%rsi),%ymm10,%ymm10
7875 vmovdqu %ymm3,0+0(%rdi)
7876 vmovdqu %ymm2,32+0(%rdi)
7877 vmovdqu %ymm6,64+0(%rdi)
7878 vmovdqu %ymm10,96+0(%rdi)
7879 vperm2i128 $0x02,%ymm1,%ymm5,%ymm3
7880 vperm2i128 $0x13,%ymm1,%ymm5,%ymm5
7881 vperm2i128 $0x02,%ymm9,%ymm13,%ymm1
7882 vperm2i128 $0x13,%ymm9,%ymm13,%ymm9
7883 vpxor 0+128(%rsi),%ymm3,%ymm3
7884 vpxor 32+128(%rsi),%ymm1,%ymm1
7885 vpxor 64+128(%rsi),%ymm5,%ymm5
7886 vpxor 96+128(%rsi),%ymm9,%ymm9
7887 vmovdqu %ymm3,0+128(%rdi)
7888 vmovdqu %ymm1,32+128(%rdi)
7889 vmovdqu %ymm5,64+128(%rdi)
7890 vmovdqu %ymm9,96+128(%rdi)
7891 vperm2i128 $0x13,%ymm0,%ymm4,%ymm3
7892 vperm2i128 $0x02,%ymm0,%ymm4,%ymm0
7893 vperm2i128 $0x02,%ymm8,%ymm12,%ymm4
7894 vperm2i128 $0x13,%ymm8,%ymm12,%ymm12
7895 vmovdqa %ymm3,%ymm8
7896
7897 movq $256,%rcx
7898 leaq 256(%rsi),%rsi
7899 subq $256,%rbx
7900 jmp seal_avx2_hash
7901
7902 seal_avx2_tail_512:
7903 vmovdqa .chacha20_consts(%rip),%ymm0
7904 vmovdqa 64(%rbp),%ymm4
7905 vmovdqa 96(%rbp),%ymm8
7906 vmovdqa %ymm0,%ymm1
7907 vmovdqa %ymm4,%ymm5
7908 vmovdqa %ymm8,%ymm9
7909 vmovdqa %ymm0,%ymm2
7910 vmovdqa %ymm4,%ymm6
7911 vmovdqa %ymm8,%ymm10
7912 vmovdqa %ymm0,%ymm3
7913 vmovdqa %ymm4,%ymm7
7914 vmovdqa %ymm8,%ymm11
7915 vmovdqa .avx2_inc(%rip),%ymm12
7916 vpaddd 160(%rbp),%ymm12,%ymm15
7917 vpaddd %ymm15,%ymm12,%ymm14
7918 vpaddd %ymm14,%ymm12,%ymm13
7919 vpaddd %ymm13,%ymm12,%ymm12
7920 vmovdqa %ymm15,256(%rbp)
7921 vmovdqa %ymm14,224(%rbp)
7922 vmovdqa %ymm13,192(%rbp)
7923 vmovdqa %ymm12,160(%rbp)
7924
7925 1:
7926 addq 0(%rdi),%r10
7927 adcq 8+0(%rdi),%r11
7928 adcq $1,%r12
7929 movq 0+0(%rbp),%rdx
7930 movq %rdx,%r15
7931 mulxq %r10,%r13,%r14
7932 mulxq %r11,%rax,%rdx
7933 imulq %r12,%r15
7934 addq %rax,%r14
7935 adcq %rdx,%r15
7936 movq 8+0(%rbp),%rdx
7937 mulxq %r10,%r10,%rax
7938 addq %r10,%r14
7939 mulxq %r11,%r11,%r9
7940 adcq %r11,%r15
7941 adcq $0,%r9
7942 imulq %r12,%rdx
7943 addq %rax,%r15
7944 adcq %rdx,%r9
7945 movq %r13,%r10
7946 movq %r14,%r11
7947 movq %r15,%r12
7948 andq $3,%r12
7949 movq %r15,%r13
7950 andq $-4,%r13
7951 movq %r9,%r14
7952 shrdq $2,%r9,%r15
7953 shrq $2,%r9
7954 addq %r13,%r10
7955 adcq %r14,%r11
7956 adcq $0,%r12
7957 addq %r15,%r10
7958 adcq %r9,%r11
7959 adcq $0,%r12
7960
7961 leaq 16(%rdi),%rdi
7962 2:
7963 vmovdqa %ymm8,128(%rbp)
7964 vmovdqa .rol16(%rip),%ymm8
7965 vpaddd %ymm7,%ymm3,%ymm3
7966 vpaddd %ymm6,%ymm2,%ymm2
7967 vpaddd %ymm5,%ymm1,%ymm1
7968 vpaddd %ymm4,%ymm0,%ymm0
7969 vpxor %ymm3,%ymm15,%ymm15
7970 vpxor %ymm2,%ymm14,%ymm14
7971 vpxor %ymm1,%ymm13,%ymm13
7972 vpxor %ymm0,%ymm12,%ymm12
7973 vpshufb %ymm8,%ymm15,%ymm15
7974 vpshufb %ymm8,%ymm14,%ymm14
7975 vpshufb %ymm8,%ymm13,%ymm13
7976 vpshufb %ymm8,%ymm12,%ymm12
7977 vmovdqa 128(%rbp),%ymm8
7978 vpaddd %ymm15,%ymm11,%ymm11
7979 vpaddd %ymm14,%ymm10,%ymm10
7980 vpaddd %ymm13,%ymm9,%ymm9
7981 vpaddd %ymm12,%ymm8,%ymm8
7982 vpxor %ymm11,%ymm7,%ymm7
7983 addq 0(%rdi),%r10
7984 adcq 8+0(%rdi),%r11
7985 adcq $1,%r12
7986 vpxor %ymm10,%ymm6,%ymm6
7987 vpxor %ymm9,%ymm5,%ymm5
7988 vpxor %ymm8,%ymm4,%ymm4
7989 vmovdqa %ymm8,128(%rbp)
7990 vpsrld $20,%ymm7,%ymm8
7991 vpslld $32-20,%ymm7,%ymm7
7992 vpxor %ymm8,%ymm7,%ymm7
7993 vpsrld $20,%ymm6,%ymm8
7994 vpslld $32-20,%ymm6,%ymm6
7995 vpxor %ymm8,%ymm6,%ymm6
7996 vpsrld $20,%ymm5,%ymm8
7997 vpslld $32-20,%ymm5,%ymm5
7998 vpxor %ymm8,%ymm5,%ymm5
7999 vpsrld $20,%ymm4,%ymm8
8000 vpslld $32-20,%ymm4,%ymm4
8001 vpxor %ymm8,%ymm4,%ymm4
8002 vmovdqa .rol8(%rip),%ymm8
8003 vpaddd %ymm7,%ymm3,%ymm3
8004 vpaddd %ymm6,%ymm2,%ymm2
8005 vpaddd %ymm5,%ymm1,%ymm1
8006 movq 0+0(%rbp),%rdx
8007 movq %rdx,%r15
8008 mulxq %r10,%r13,%r14
8009 mulxq %r11,%rax,%rdx
8010 imulq %r12,%r15
8011 addq %rax,%r14
8012 adcq %rdx,%r15
8013 vpaddd %ymm4,%ymm0,%ymm0
8014 vpxor %ymm3,%ymm15,%ymm15
8015 vpxor %ymm2,%ymm14,%ymm14
8016 vpxor %ymm1,%ymm13,%ymm13
8017 vpxor %ymm0,%ymm12,%ymm12
8018 vpshufb %ymm8,%ymm15,%ymm15
8019 vpshufb %ymm8,%ymm14,%ymm14
8020 vpshufb %ymm8,%ymm13,%ymm13
8021 vpshufb %ymm8,%ymm12,%ymm12
8022 vmovdqa 128(%rbp),%ymm8
8023 vpaddd %ymm15,%ymm11,%ymm11
8024 vpaddd %ymm14,%ymm10,%ymm10
8025 vpaddd %ymm13,%ymm9,%ymm9
8026 vpaddd %ymm12,%ymm8,%ymm8
8027 vpxor %ymm11,%ymm7,%ymm7
8028 vpxor %ymm10,%ymm6,%ymm6
8029 vpxor %ymm9,%ymm5,%ymm5
8030 vpxor %ymm8,%ymm4,%ymm4
8031 vmovdqa %ymm8,128(%rbp)
8032 vpsrld $25,%ymm7,%ymm8
8033 movq 8+0(%rbp),%rdx
8034 mulxq %r10,%r10,%rax
8035 addq %r10,%r14
8036 mulxq %r11,%r11,%r9
8037 adcq %r11,%r15
8038 adcq $0,%r9
8039 imulq %r12,%rdx
8040 vpslld $32-25,%ymm7,%ymm7
8041 vpxor %ymm8,%ymm7,%ymm7
8042 vpsrld $25,%ymm6,%ymm8
8043 vpslld $32-25,%ymm6,%ymm6
8044 vpxor %ymm8,%ymm6,%ymm6
8045 vpsrld $25,%ymm5,%ymm8
8046 vpslld $32-25,%ymm5,%ymm5
8047 vpxor %ymm8,%ymm5,%ymm5
8048 vpsrld $25,%ymm4,%ymm8
8049 vpslld $32-25,%ymm4,%ymm4
8050 vpxor %ymm8,%ymm4,%ymm4
8051 vmovdqa 128(%rbp),%ymm8
8052 vpalignr $4,%ymm7,%ymm7,%ymm7
8053 vpalignr $8,%ymm11,%ymm11,%ymm11
8054 vpalignr $12,%ymm15,%ymm15,%ymm15
8055 vpalignr $4,%ymm6,%ymm6,%ymm6
8056 vpalignr $8,%ymm10,%ymm10,%ymm10
8057 vpalignr $12,%ymm14,%ymm14,%ymm14
8058 vpalignr $4,%ymm5,%ymm5,%ymm5
8059 vpalignr $8,%ymm9,%ymm9,%ymm9
8060 addq %rax,%r15
8061 adcq %rdx,%r9
8062 vpalignr $12,%ymm13,%ymm13,%ymm13
8063 vpalignr $4,%ymm4,%ymm4,%ymm4
8064 vpalignr $8,%ymm8,%ymm8,%ymm8
8065 vpalignr $12,%ymm12,%ymm12,%ymm12
8066 vmovdqa %ymm8,128(%rbp)
8067 vmovdqa .rol16(%rip),%ymm8
8068 vpaddd %ymm7,%ymm3,%ymm3
8069 vpaddd %ymm6,%ymm2,%ymm2
8070 vpaddd %ymm5,%ymm1,%ymm1
8071 vpaddd %ymm4,%ymm0,%ymm0
8072 vpxor %ymm3,%ymm15,%ymm15
8073 vpxor %ymm2,%ymm14,%ymm14
8074 vpxor %ymm1,%ymm13,%ymm13
8075 vpxor %ymm0,%ymm12,%ymm12
8076 vpshufb %ymm8,%ymm15,%ymm15
8077 vpshufb %ymm8,%ymm14,%ymm14
8078 vpshufb %ymm8,%ymm13,%ymm13
8079 vpshufb %ymm8,%ymm12,%ymm12
8080 vmovdqa 128(%rbp),%ymm8
8081 vpaddd %ymm15,%ymm11,%ymm11
8082 movq %r13,%r10
8083 movq %r14,%r11
8084 movq %r15,%r12
8085 andq $3,%r12
8086 movq %r15,%r13
8087 andq $-4,%r13
8088 movq %r9,%r14
8089 shrdq $2,%r9,%r15
8090 shrq $2,%r9
8091 addq %r13,%r10
8092 adcq %r14,%r11
8093 adcq $0,%r12
8094 addq %r15,%r10
8095 adcq %r9,%r11
8096 adcq $0,%r12
8097 vpaddd %ymm14,%ymm10,%ymm10
8098 vpaddd %ymm13,%ymm9,%ymm9
8099 vpaddd %ymm12,%ymm8,%ymm8
8100 vpxor %ymm11,%ymm7,%ymm7
8101 vpxor %ymm10,%ymm6,%ymm6
8102 vpxor %ymm9,%ymm5,%ymm5
8103 vpxor %ymm8,%ymm4,%ymm4
8104 vmovdqa %ymm8,128(%rbp)
8105 vpsrld $20,%ymm7,%ymm8
8106 vpslld $32-20,%ymm7,%ymm7
8107 vpxor %ymm8,%ymm7,%ymm7
8108 vpsrld $20,%ymm6,%ymm8
8109 vpslld $32-20,%ymm6,%ymm6
8110 vpxor %ymm8,%ymm6,%ymm6
8111 vpsrld $20,%ymm5,%ymm8
8112 vpslld $32-20,%ymm5,%ymm5
8113 vpxor %ymm8,%ymm5,%ymm5
8114 vpsrld $20,%ymm4,%ymm8
8115 vpslld $32-20,%ymm4,%ymm4
8116 vpxor %ymm8,%ymm4,%ymm4
8117 addq 16(%rdi),%r10
8118 adcq 8+16(%rdi),%r11
8119 adcq $1,%r12
8120 vmovdqa .rol8(%rip),%ymm8
8121 vpaddd %ymm7,%ymm3,%ymm3
8122 vpaddd %ymm6,%ymm2,%ymm2
8123 vpaddd %ymm5,%ymm1,%ymm1
8124 vpaddd %ymm4,%ymm0,%ymm0
8125 vpxor %ymm3,%ymm15,%ymm15
8126 vpxor %ymm2,%ymm14,%ymm14
8127 vpxor %ymm1,%ymm13,%ymm13
8128 vpxor %ymm0,%ymm12,%ymm12
8129 vpshufb %ymm8,%ymm15,%ymm15
8130 vpshufb %ymm8,%ymm14,%ymm14
8131 vpshufb %ymm8,%ymm13,%ymm13
8132 vpshufb %ymm8,%ymm12,%ymm12
8133 vmovdqa 128(%rbp),%ymm8
8134 vpaddd %ymm15,%ymm11,%ymm11
8135 vpaddd %ymm14,%ymm10,%ymm10
8136 vpaddd %ymm13,%ymm9,%ymm9
8137 vpaddd %ymm12,%ymm8,%ymm8
8138 vpxor %ymm11,%ymm7,%ymm7
8139 vpxor %ymm10,%ymm6,%ymm6
8140 movq 0+0(%rbp),%rdx
8141 movq %rdx,%r15
8142 mulxq %r10,%r13,%r14
8143 mulxq %r11,%rax,%rdx
8144 imulq %r12,%r15
8145 addq %rax,%r14
8146 adcq %rdx,%r15
8147 vpxor %ymm9,%ymm5,%ymm5
8148 vpxor %ymm8,%ymm4,%ymm4
8149 vmovdqa %ymm8,128(%rbp)
8150 vpsrld $25,%ymm7,%ymm8
8151 vpslld $32-25,%ymm7,%ymm7
8152 vpxor %ymm8,%ymm7,%ymm7
8153 vpsrld $25,%ymm6,%ymm8
8154 vpslld $32-25,%ymm6,%ymm6
8155 vpxor %ymm8,%ymm6,%ymm6
8156 vpsrld $25,%ymm5,%ymm8
8157 vpslld $32-25,%ymm5,%ymm5
8158 vpxor %ymm8,%ymm5,%ymm5
8159 vpsrld $25,%ymm4,%ymm8
8160 vpslld $32-25,%ymm4,%ymm4
8161 vpxor %ymm8,%ymm4,%ymm4
8162 vmovdqa 128(%rbp),%ymm8
8163 vpalignr $12,%ymm7,%ymm7,%ymm7
8164 vpalignr $8,%ymm11,%ymm11,%ymm11
8165 vpalignr $4,%ymm15,%ymm15,%ymm15
8166 vpalignr $12,%ymm6,%ymm6,%ymm6
8167 movq 8+0(%rbp),%rdx
8168 mulxq %r10,%r10,%rax
8169 addq %r10,%r14
8170 mulxq %r11,%r11,%r9
8171 adcq %r11,%r15
8172 adcq $0,%r9
8173 imulq %r12,%rdx
8174 vpalignr $8,%ymm10,%ymm10,%ymm10
8175 vpalignr $4,%ymm14,%ymm14,%ymm14
8176 vpalignr $12,%ymm5,%ymm5,%ymm5
8177 vpalignr $8,%ymm9,%ymm9,%ymm9
8178 vpalignr $4,%ymm13,%ymm13,%ymm13
8179 vpalignr $12,%ymm4,%ymm4,%ymm4
8180 vpalignr $8,%ymm8,%ymm8,%ymm8
8181 vpalignr $4,%ymm12,%ymm12,%ymm12
8182
8183
8184
8185
8186
8187
8188
8189
8190
8191
8192
8193
8194 addq %rax,%r15
8195 adcq %rdx,%r9
8196
8197
8198
8199
8200
8201
8202
8203
8204
8205
8206
8207
8208
8209
8210
8211
8212
8213
8214
8215
8216 movq %r13,%r10
8217 movq %r14,%r11
8218 movq %r15,%r12
8219 andq $3,%r12
8220 movq %r15,%r13
8221 andq $-4,%r13
8222 movq %r9,%r14
8223 shrdq $2,%r9,%r15
8224 shrq $2,%r9
8225 addq %r13,%r10
8226 adcq %r14,%r11
8227 adcq $0,%r12
8228 addq %r15,%r10
8229 adcq %r9,%r11
8230 adcq $0,%r12
8231
8232 leaq 32(%rdi),%rdi
8233 decq %rcx
8234 jg 1b
8235 decq %r8
8236 jge 2b
8237 vpaddd .chacha20_consts(%rip),%ymm3,%ymm3
8238 vpaddd 64(%rbp),%ymm7,%ymm7
8239 vpaddd 96(%rbp),%ymm11,%ymm11
8240 vpaddd 256(%rbp),%ymm15,%ymm15
8241 vpaddd .chacha20_consts(%rip),%ymm2,%ymm2
8242 vpaddd 64(%rbp),%ymm6,%ymm6
8243 vpaddd 96(%rbp),%ymm10,%ymm10
8244 vpaddd 224(%rbp),%ymm14,%ymm14
8245 vpaddd .chacha20_consts(%rip),%ymm1,%ymm1
8246 vpaddd 64(%rbp),%ymm5,%ymm5
8247 vpaddd 96(%rbp),%ymm9,%ymm9
8248 vpaddd 192(%rbp),%ymm13,%ymm13
8249 vpaddd .chacha20_consts(%rip),%ymm0,%ymm0
8250 vpaddd 64(%rbp),%ymm4,%ymm4
8251 vpaddd 96(%rbp),%ymm8,%ymm8
8252 vpaddd 160(%rbp),%ymm12,%ymm12
8253
8254 vmovdqa %ymm0,128(%rbp)
8255 vperm2i128 $0x02,%ymm3,%ymm7,%ymm0
8256 vperm2i128 $0x13,%ymm3,%ymm7,%ymm7
8257 vperm2i128 $0x02,%ymm11,%ymm15,%ymm3
8258 vperm2i128 $0x13,%ymm11,%ymm15,%ymm11
8259 vpxor 0+0(%rsi),%ymm0,%ymm0
8260 vpxor 32+0(%rsi),%ymm3,%ymm3
8261 vpxor 64+0(%rsi),%ymm7,%ymm7
8262 vpxor 96+0(%rsi),%ymm11,%ymm11
8263 vmovdqu %ymm0,0+0(%rdi)
8264 vmovdqu %ymm3,32+0(%rdi)
8265 vmovdqu %ymm7,64+0(%rdi)
8266 vmovdqu %ymm11,96+0(%rdi)
8267
8268 vmovdqa 128(%rbp),%ymm0
8269 vperm2i128 $0x02,%ymm2,%ymm6,%ymm3
8270 vperm2i128 $0x13,%ymm2,%ymm6,%ymm6
8271 vperm2i128 $0x02,%ymm10,%ymm14,%ymm2
8272 vperm2i128 $0x13,%ymm10,%ymm14,%ymm10
8273 vpxor 0+128(%rsi),%ymm3,%ymm3
8274 vpxor 32+128(%rsi),%ymm2,%ymm2
8275 vpxor 64+128(%rsi),%ymm6,%ymm6
8276 vpxor 96+128(%rsi),%ymm10,%ymm10
8277 vmovdqu %ymm3,0+128(%rdi)
8278 vmovdqu %ymm2,32+128(%rdi)
8279 vmovdqu %ymm6,64+128(%rdi)
8280 vmovdqu %ymm10,96+128(%rdi)
8281 vperm2i128 $0x02,%ymm1,%ymm5,%ymm3
8282 vperm2i128 $0x13,%ymm1,%ymm5,%ymm5
8283 vperm2i128 $0x02,%ymm9,%ymm13,%ymm1
8284 vperm2i128 $0x13,%ymm9,%ymm13,%ymm9
8285 vpxor 0+256(%rsi),%ymm3,%ymm3
8286 vpxor 32+256(%rsi),%ymm1,%ymm1
8287 vpxor 64+256(%rsi),%ymm5,%ymm5
8288 vpxor 96+256(%rsi),%ymm9,%ymm9
8289 vmovdqu %ymm3,0+256(%rdi)
8290 vmovdqu %ymm1,32+256(%rdi)
8291 vmovdqu %ymm5,64+256(%rdi)
8292 vmovdqu %ymm9,96+256(%rdi)
8293 vperm2i128 $0x13,%ymm0,%ymm4,%ymm3
8294 vperm2i128 $0x02,%ymm0,%ymm4,%ymm0
8295 vperm2i128 $0x02,%ymm8,%ymm12,%ymm4
8296 vperm2i128 $0x13,%ymm8,%ymm12,%ymm12
8297 vmovdqa %ymm3,%ymm8
8298
8299 movq $384,%rcx
8300 leaq 384(%rsi),%rsi
8301 subq $384,%rbx
8302 jmp seal_avx2_hash
8303
8304 seal_avx2_320:
8305 vmovdqa %ymm0,%ymm1
8306 vmovdqa %ymm0,%ymm2
8307 vmovdqa %ymm4,%ymm5
8308 vmovdqa %ymm4,%ymm6
8309 vmovdqa %ymm8,%ymm9
8310 vmovdqa %ymm8,%ymm10
8311 vpaddd .avx2_inc(%rip),%ymm12,%ymm13
8312 vpaddd .avx2_inc(%rip),%ymm13,%ymm14
8313 vmovdqa %ymm4,%ymm7
8314 vmovdqa %ymm8,%ymm11
8315 vmovdqa %ymm12,160(%rbp)
8316 vmovdqa %ymm13,192(%rbp)
8317 vmovdqa %ymm14,224(%rbp)
8318 movq $10,%r10
8319 1:
8320 vpaddd %ymm4,%ymm0,%ymm0
8321 vpxor %ymm0,%ymm12,%ymm12
8322 vpshufb .rol16(%rip),%ymm12,%ymm12
8323 vpaddd %ymm12,%ymm8,%ymm8
8324 vpxor %ymm8,%ymm4,%ymm4
8325 vpsrld $20,%ymm4,%ymm3
8326 vpslld $12,%ymm4,%ymm4
8327 vpxor %ymm3,%ymm4,%ymm4
8328 vpaddd %ymm4,%ymm0,%ymm0
8329 vpxor %ymm0,%ymm12,%ymm12
8330 vpshufb .rol8(%rip),%ymm12,%ymm12
8331 vpaddd %ymm12,%ymm8,%ymm8
8332 vpxor %ymm8,%ymm4,%ymm4
8333 vpslld $7,%ymm4,%ymm3
8334 vpsrld $25,%ymm4,%ymm4
8335 vpxor %ymm3,%ymm4,%ymm4
8336 vpalignr $12,%ymm12,%ymm12,%ymm12
8337 vpalignr $8,%ymm8,%ymm8,%ymm8
8338 vpalignr $4,%ymm4,%ymm4,%ymm4
8339 vpaddd %ymm5,%ymm1,%ymm1
8340 vpxor %ymm1,%ymm13,%ymm13
8341 vpshufb .rol16(%rip),%ymm13,%ymm13
8342 vpaddd %ymm13,%ymm9,%ymm9
8343 vpxor %ymm9,%ymm5,%ymm5
8344 vpsrld $20,%ymm5,%ymm3
8345 vpslld $12,%ymm5,%ymm5
8346 vpxor %ymm3,%ymm5,%ymm5
8347 vpaddd %ymm5,%ymm1,%ymm1
8348 vpxor %ymm1,%ymm13,%ymm13
8349 vpshufb .rol8(%rip),%ymm13,%ymm13
8350 vpaddd %ymm13,%ymm9,%ymm9
8351 vpxor %ymm9,%ymm5,%ymm5
8352 vpslld $7,%ymm5,%ymm3
8353 vpsrld $25,%ymm5,%ymm5
8354 vpxor %ymm3,%ymm5,%ymm5
8355 vpalignr $12,%ymm13,%ymm13,%ymm13
8356 vpalignr $8,%ymm9,%ymm9,%ymm9
8357 vpalignr $4,%ymm5,%ymm5,%ymm5
8358 vpaddd %ymm6,%ymm2,%ymm2
8359 vpxor %ymm2,%ymm14,%ymm14
8360 vpshufb .rol16(%rip),%ymm14,%ymm14
8361 vpaddd %ymm14,%ymm10,%ymm10
8362 vpxor %ymm10,%ymm6,%ymm6
8363 vpsrld $20,%ymm6,%ymm3
8364 vpslld $12,%ymm6,%ymm6
8365 vpxor %ymm3,%ymm6,%ymm6
8366 vpaddd %ymm6,%ymm2,%ymm2
8367 vpxor %ymm2,%ymm14,%ymm14
8368 vpshufb .rol8(%rip),%ymm14,%ymm14
8369 vpaddd %ymm14,%ymm10,%ymm10
8370 vpxor %ymm10,%ymm6,%ymm6
8371 vpslld $7,%ymm6,%ymm3
8372 vpsrld $25,%ymm6,%ymm6
8373 vpxor %ymm3,%ymm6,%ymm6
8374 vpalignr $12,%ymm14,%ymm14,%ymm14
8375 vpalignr $8,%ymm10,%ymm10,%ymm10
8376 vpalignr $4,%ymm6,%ymm6,%ymm6
8377 vpaddd %ymm4,%ymm0,%ymm0
8378 vpxor %ymm0,%ymm12,%ymm12
8379 vpshufb .rol16(%rip),%ymm12,%ymm12
8380 vpaddd %ymm12,%ymm8,%ymm8
8381 vpxor %ymm8,%ymm4,%ymm4
8382 vpsrld $20,%ymm4,%ymm3
8383 vpslld $12,%ymm4,%ymm4
8384 vpxor %ymm3,%ymm4,%ymm4
8385 vpaddd %ymm4,%ymm0,%ymm0
8386 vpxor %ymm0,%ymm12,%ymm12
8387 vpshufb .rol8(%rip),%ymm12,%ymm12
8388 vpaddd %ymm12,%ymm8,%ymm8
8389 vpxor %ymm8,%ymm4,%ymm4
8390 vpslld $7,%ymm4,%ymm3
8391 vpsrld $25,%ymm4,%ymm4
8392 vpxor %ymm3,%ymm4,%ymm4
8393 vpalignr $4,%ymm12,%ymm12,%ymm12
8394 vpalignr $8,%ymm8,%ymm8,%ymm8
8395 vpalignr $12,%ymm4,%ymm4,%ymm4
8396 vpaddd %ymm5,%ymm1,%ymm1
8397 vpxor %ymm1,%ymm13,%ymm13
8398 vpshufb .rol16(%rip),%ymm13,%ymm13
8399 vpaddd %ymm13,%ymm9,%ymm9
8400 vpxor %ymm9,%ymm5,%ymm5
8401 vpsrld $20,%ymm5,%ymm3
8402 vpslld $12,%ymm5,%ymm5
8403 vpxor %ymm3,%ymm5,%ymm5
8404 vpaddd %ymm5,%ymm1,%ymm1
8405 vpxor %ymm1,%ymm13,%ymm13
8406 vpshufb .rol8(%rip),%ymm13,%ymm13
8407 vpaddd %ymm13,%ymm9,%ymm9
8408 vpxor %ymm9,%ymm5,%ymm5
8409 vpslld $7,%ymm5,%ymm3
8410 vpsrld $25,%ymm5,%ymm5
8411 vpxor %ymm3,%ymm5,%ymm5
8412 vpalignr $4,%ymm13,%ymm13,%ymm13
8413 vpalignr $8,%ymm9,%ymm9,%ymm9
8414 vpalignr $12,%ymm5,%ymm5,%ymm5
8415 vpaddd %ymm6,%ymm2,%ymm2
8416 vpxor %ymm2,%ymm14,%ymm14
8417 vpshufb .rol16(%rip),%ymm14,%ymm14
8418 vpaddd %ymm14,%ymm10,%ymm10
8419 vpxor %ymm10,%ymm6,%ymm6
8420 vpsrld $20,%ymm6,%ymm3
8421 vpslld $12,%ymm6,%ymm6
8422 vpxor %ymm3,%ymm6,%ymm6
8423 vpaddd %ymm6,%ymm2,%ymm2
8424 vpxor %ymm2,%ymm14,%ymm14
8425 vpshufb .rol8(%rip),%ymm14,%ymm14
8426 vpaddd %ymm14,%ymm10,%ymm10
8427 vpxor %ymm10,%ymm6,%ymm6
8428 vpslld $7,%ymm6,%ymm3
8429 vpsrld $25,%ymm6,%ymm6
8430 vpxor %ymm3,%ymm6,%ymm6
8431 vpalignr $4,%ymm14,%ymm14,%ymm14
8432 vpalignr $8,%ymm10,%ymm10,%ymm10
8433 vpalignr $12,%ymm6,%ymm6,%ymm6
8434
8435 decq %r10
8436 jne 1b
8437 vpaddd .chacha20_consts(%rip),%ymm0,%ymm0
8438 vpaddd .chacha20_consts(%rip),%ymm1,%ymm1
8439 vpaddd .chacha20_consts(%rip),%ymm2,%ymm2
8440 vpaddd %ymm7,%ymm4,%ymm4
8441 vpaddd %ymm7,%ymm5,%ymm5
8442 vpaddd %ymm7,%ymm6,%ymm6
8443 vpaddd %ymm11,%ymm8,%ymm8
8444 vpaddd %ymm11,%ymm9,%ymm9
8445 vpaddd %ymm11,%ymm10,%ymm10
8446 vpaddd 160(%rbp),%ymm12,%ymm12
8447 vpaddd 192(%rbp),%ymm13,%ymm13
8448 vpaddd 224(%rbp),%ymm14,%ymm14
8449 vperm2i128 $0x02,%ymm0,%ymm4,%ymm3
8450
8451 vpand .clamp(%rip),%ymm3,%ymm3
8452 vmovdqa %ymm3,0(%rbp)
8453
8454 vperm2i128 $0x13,%ymm0,%ymm4,%ymm0
8455 vperm2i128 $0x13,%ymm8,%ymm12,%ymm4
8456 vperm2i128 $0x02,%ymm1,%ymm5,%ymm8
8457 vperm2i128 $0x02,%ymm9,%ymm13,%ymm12
8458 vperm2i128 $0x13,%ymm1,%ymm5,%ymm1
8459 vperm2i128 $0x13,%ymm9,%ymm13,%ymm5
8460 vperm2i128 $0x02,%ymm2,%ymm6,%ymm9
8461 vperm2i128 $0x02,%ymm10,%ymm14,%ymm13
8462 vperm2i128 $0x13,%ymm2,%ymm6,%ymm2
8463 vperm2i128 $0x13,%ymm10,%ymm14,%ymm6
8464 jmp seal_avx2_short
8465
8466 seal_avx2_192:
8467 vmovdqa %ymm0,%ymm1
8468 vmovdqa %ymm0,%ymm2
8469 vmovdqa %ymm4,%ymm5
8470 vmovdqa %ymm4,%ymm6
8471 vmovdqa %ymm8,%ymm9
8472 vmovdqa %ymm8,%ymm10
8473 vpaddd .avx2_inc(%rip),%ymm12,%ymm13
8474 vmovdqa %ymm12,%ymm11
8475 vmovdqa %ymm13,%ymm15
8476 movq $10,%r10
8477 1:
8478 vpaddd %ymm4,%ymm0,%ymm0
8479 vpxor %ymm0,%ymm12,%ymm12
8480 vpshufb .rol16(%rip),%ymm12,%ymm12
8481 vpaddd %ymm12,%ymm8,%ymm8
8482 vpxor %ymm8,%ymm4,%ymm4
8483 vpsrld $20,%ymm4,%ymm3
8484 vpslld $12,%ymm4,%ymm4
8485 vpxor %ymm3,%ymm4,%ymm4
8486 vpaddd %ymm4,%ymm0,%ymm0
8487 vpxor %ymm0,%ymm12,%ymm12
8488 vpshufb .rol8(%rip),%ymm12,%ymm12
8489 vpaddd %ymm12,%ymm8,%ymm8
8490 vpxor %ymm8,%ymm4,%ymm4
8491 vpslld $7,%ymm4,%ymm3
8492 vpsrld $25,%ymm4,%ymm4
8493 vpxor %ymm3,%ymm4,%ymm4
8494 vpalignr $12,%ymm12,%ymm12,%ymm12
8495 vpalignr $8,%ymm8,%ymm8,%ymm8
8496 vpalignr $4,%ymm4,%ymm4,%ymm4
8497 vpaddd %ymm5,%ymm1,%ymm1
8498 vpxor %ymm1,%ymm13,%ymm13
8499 vpshufb .rol16(%rip),%ymm13,%ymm13
8500 vpaddd %ymm13,%ymm9,%ymm9
8501 vpxor %ymm9,%ymm5,%ymm5
8502 vpsrld $20,%ymm5,%ymm3
8503 vpslld $12,%ymm5,%ymm5
8504 vpxor %ymm3,%ymm5,%ymm5
8505 vpaddd %ymm5,%ymm1,%ymm1
8506 vpxor %ymm1,%ymm13,%ymm13
8507 vpshufb .rol8(%rip),%ymm13,%ymm13
8508 vpaddd %ymm13,%ymm9,%ymm9
8509 vpxor %ymm9,%ymm5,%ymm5
8510 vpslld $7,%ymm5,%ymm3
8511 vpsrld $25,%ymm5,%ymm5
8512 vpxor %ymm3,%ymm5,%ymm5
8513 vpalignr $12,%ymm13,%ymm13,%ymm13
8514 vpalignr $8,%ymm9,%ymm9,%ymm9
8515 vpalignr $4,%ymm5,%ymm5,%ymm5
8516 vpaddd %ymm4,%ymm0,%ymm0
8517 vpxor %ymm0,%ymm12,%ymm12
8518 vpshufb .rol16(%rip),%ymm12,%ymm12
8519 vpaddd %ymm12,%ymm8,%ymm8
8520 vpxor %ymm8,%ymm4,%ymm4
8521 vpsrld $20,%ymm4,%ymm3
8522 vpslld $12,%ymm4,%ymm4
8523 vpxor %ymm3,%ymm4,%ymm4
8524 vpaddd %ymm4,%ymm0,%ymm0
8525 vpxor %ymm0,%ymm12,%ymm12
8526 vpshufb .rol8(%rip),%ymm12,%ymm12
8527 vpaddd %ymm12,%ymm8,%ymm8
8528 vpxor %ymm8,%ymm4,%ymm4
8529 vpslld $7,%ymm4,%ymm3
8530 vpsrld $25,%ymm4,%ymm4
8531 vpxor %ymm3,%ymm4,%ymm4
8532 vpalignr $4,%ymm12,%ymm12,%ymm12
8533 vpalignr $8,%ymm8,%ymm8,%ymm8
8534 vpalignr $12,%ymm4,%ymm4,%ymm4
8535 vpaddd %ymm5,%ymm1,%ymm1
8536 vpxor %ymm1,%ymm13,%ymm13
8537 vpshufb .rol16(%rip),%ymm13,%ymm13
8538 vpaddd %ymm13,%ymm9,%ymm9
8539 vpxor %ymm9,%ymm5,%ymm5
8540 vpsrld $20,%ymm5,%ymm3
8541 vpslld $12,%ymm5,%ymm5
8542 vpxor %ymm3,%ymm5,%ymm5
8543 vpaddd %ymm5,%ymm1,%ymm1
8544 vpxor %ymm1,%ymm13,%ymm13
8545 vpshufb .rol8(%rip),%ymm13,%ymm13
8546 vpaddd %ymm13,%ymm9,%ymm9
8547 vpxor %ymm9,%ymm5,%ymm5
8548 vpslld $7,%ymm5,%ymm3
8549 vpsrld $25,%ymm5,%ymm5
8550 vpxor %ymm3,%ymm5,%ymm5
8551 vpalignr $4,%ymm13,%ymm13,%ymm13
8552 vpalignr $8,%ymm9,%ymm9,%ymm9
8553 vpalignr $12,%ymm5,%ymm5,%ymm5
8554
8555 decq %r10
8556 jne 1b
8557 vpaddd %ymm2,%ymm0,%ymm0
8558 vpaddd %ymm2,%ymm1,%ymm1
8559 vpaddd %ymm6,%ymm4,%ymm4
8560 vpaddd %ymm6,%ymm5,%ymm5
8561 vpaddd %ymm10,%ymm8,%ymm8
8562 vpaddd %ymm10,%ymm9,%ymm9
8563 vpaddd %ymm11,%ymm12,%ymm12
8564 vpaddd %ymm15,%ymm13,%ymm13
8565 vperm2i128 $0x02,%ymm0,%ymm4,%ymm3
8566
8567 vpand .clamp(%rip),%ymm3,%ymm3
8568 vmovdqa %ymm3,0(%rbp)
8569
8570 vperm2i128 $0x13,%ymm0,%ymm4,%ymm0
8571 vperm2i128 $0x13,%ymm8,%ymm12,%ymm4
8572 vperm2i128 $0x02,%ymm1,%ymm5,%ymm8
8573 vperm2i128 $0x02,%ymm9,%ymm13,%ymm12
8574 vperm2i128 $0x13,%ymm1,%ymm5,%ymm1
8575 vperm2i128 $0x13,%ymm9,%ymm13,%ymm5
8576 seal_avx2_short:
8577 movq %r8,%r8
8578 call poly_hash_ad_internal
8579 xorq %rcx,%rcx
8580 seal_avx2_hash:
8581 cmpq $16,%rcx
8582 jb seal_avx2_short_loop
8583 addq 0(%rdi),%r10
8584 adcq 8+0(%rdi),%r11
8585 adcq $1,%r12
8586 movq 0+0(%rbp),%rax
8587 movq %rax,%r15
8588 mulq %r10
8589 movq %rax,%r13
8590 movq %rdx,%r14
8591 movq 0+0(%rbp),%rax
8592 mulq %r11
8593 imulq %r12,%r15
8594 addq %rax,%r14
8595 adcq %rdx,%r15
8596 movq 8+0(%rbp),%rax
8597 movq %rax,%r9
8598 mulq %r10
8599 addq %rax,%r14
8600 adcq $0,%rdx
8601 movq %rdx,%r10
8602 movq 8+0(%rbp),%rax
8603 mulq %r11
8604 addq %rax,%r15
8605 adcq $0,%rdx
8606 imulq %r12,%r9
8607 addq %r10,%r15
8608 adcq %rdx,%r9
8609 movq %r13,%r10
8610 movq %r14,%r11
8611 movq %r15,%r12
8612 andq $3,%r12
8613 movq %r15,%r13
8614 andq $-4,%r13
8615 movq %r9,%r14
8616 shrdq $2,%r9,%r15
8617 shrq $2,%r9
8618 addq %r13,%r10
8619 adcq %r14,%r11
8620 adcq $0,%r12
8621 addq %r15,%r10
8622 adcq %r9,%r11
8623 adcq $0,%r12
8624
8625 subq $16,%rcx
8626 addq $16,%rdi
8627 jmp seal_avx2_hash
8628 seal_avx2_short_loop:
8629 cmpq $32,%rbx
8630 jb seal_avx2_short_tail
8631 subq $32,%rbx
8632
8633 vpxor (%rsi),%ymm0,%ymm0
8634 vmovdqu %ymm0,(%rdi)
8635 leaq 32(%rsi),%rsi
8636
8637 addq 0(%rdi),%r10
8638 adcq 8+0(%rdi),%r11
8639 adcq $1,%r12
8640 movq 0+0(%rbp),%rax
8641 movq %rax,%r15
8642 mulq %r10
8643 movq %rax,%r13
8644 movq %rdx,%r14
8645 movq 0+0(%rbp),%rax
8646 mulq %r11
8647 imulq %r12,%r15
8648 addq %rax,%r14
8649 adcq %rdx,%r15
8650 movq 8+0(%rbp),%rax
8651 movq %rax,%r9
8652 mulq %r10
8653 addq %rax,%r14
8654 adcq $0,%rdx
8655 movq %rdx,%r10
8656 movq 8+0(%rbp),%rax
8657 mulq %r11
8658 addq %rax,%r15
8659 adcq $0,%rdx
8660 imulq %r12,%r9
8661 addq %r10,%r15
8662 adcq %rdx,%r9
8663 movq %r13,%r10
8664 movq %r14,%r11
8665 movq %r15,%r12
8666 andq $3,%r12
8667 movq %r15,%r13
8668 andq $-4,%r13
8669 movq %r9,%r14
8670 shrdq $2,%r9,%r15
8671 shrq $2,%r9
8672 addq %r13,%r10
8673 adcq %r14,%r11
8674 adcq $0,%r12
8675 addq %r15,%r10
8676 adcq %r9,%r11
8677 adcq $0,%r12
8678 addq 16(%rdi),%r10
8679 adcq 8+16(%rdi),%r11
8680 adcq $1,%r12
8681 movq 0+0(%rbp),%rax
8682 movq %rax,%r15
8683 mulq %r10
8684 movq %rax,%r13
8685 movq %rdx,%r14
8686 movq 0+0(%rbp),%rax
8687 mulq %r11
8688 imulq %r12,%r15
8689 addq %rax,%r14
8690 adcq %rdx,%r15
8691 movq 8+0(%rbp),%rax
8692 movq %rax,%r9
8693 mulq %r10
8694 addq %rax,%r14
8695 adcq $0,%rdx
8696 movq %rdx,%r10
8697 movq 8+0(%rbp),%rax
8698 mulq %r11
8699 addq %rax,%r15
8700 adcq $0,%rdx
8701 imulq %r12,%r9
8702 addq %r10,%r15
8703 adcq %rdx,%r9
8704 movq %r13,%r10
8705 movq %r14,%r11
8706 movq %r15,%r12
8707 andq $3,%r12
8708 movq %r15,%r13
8709 andq $-4,%r13
8710 movq %r9,%r14
8711 shrdq $2,%r9,%r15
8712 shrq $2,%r9
8713 addq %r13,%r10
8714 adcq %r14,%r11
8715 adcq $0,%r12
8716 addq %r15,%r10
8717 adcq %r9,%r11
8718 adcq $0,%r12
8719
8720 leaq 32(%rdi),%rdi
8721
8722 vmovdqa %ymm4,%ymm0
8723 vmovdqa %ymm8,%ymm4
8724 vmovdqa %ymm12,%ymm8
8725 vmovdqa %ymm1,%ymm12
8726 vmovdqa %ymm5,%ymm1
8727 vmovdqa %ymm9,%ymm5
8728 vmovdqa %ymm13,%ymm9
8729 vmovdqa %ymm2,%ymm13
8730 vmovdqa %ymm6,%ymm2
8731 jmp seal_avx2_short_loop
8732 seal_avx2_short_tail:
8733 cmpq $16,%rbx
8734 jb 1f
8735 subq $16,%rbx
8736 vpxor (%rsi),%xmm0,%xmm3
8737 vmovdqu %xmm3,(%rdi)
8738 leaq 16(%rsi),%rsi
8739 addq 0(%rdi),%r10
8740 adcq 8+0(%rdi),%r11
8741 adcq $1,%r12
8742 movq 0+0(%rbp),%rax
8743 movq %rax,%r15
8744 mulq %r10
8745 movq %rax,%r13
8746 movq %rdx,%r14
8747 movq 0+0(%rbp),%rax
8748 mulq %r11
8749 imulq %r12,%r15
8750 addq %rax,%r14
8751 adcq %rdx,%r15
8752 movq 8+0(%rbp),%rax
8753 movq %rax,%r9
8754 mulq %r10
8755 addq %rax,%r14
8756 adcq $0,%rdx
8757 movq %rdx,%r10
8758 movq 8+0(%rbp),%rax
8759 mulq %r11
8760 addq %rax,%r15
8761 adcq $0,%rdx
8762 imulq %r12,%r9
8763 addq %r10,%r15
8764 adcq %rdx,%r9
8765 movq %r13,%r10
8766 movq %r14,%r11
8767 movq %r15,%r12
8768 andq $3,%r12
8769 movq %r15,%r13
8770 andq $-4,%r13
8771 movq %r9,%r14
8772 shrdq $2,%r9,%r15
8773 shrq $2,%r9
8774 addq %r13,%r10
8775 adcq %r14,%r11
8776 adcq $0,%r12
8777 addq %r15,%r10
8778 adcq %r9,%r11
8779 adcq $0,%r12
8780
8781 leaq 16(%rdi),%rdi
8782 vextracti128 $1,%ymm0,%xmm0
8783 1:
8784 vzeroupper
8785 jmp seal_sse_tail_16
8786 .cfi_endproc
8787 #endif
OLDNEW
« no previous file with comments | « third_party/boringssl/linux-x86_64/crypto/bn/x86_64-mont5.S ('k') | third_party/boringssl/mac-x86_64/crypto/bn/x86_64-mont5.S » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698