Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(425)

Side by Side Diff: third_party/boringssl/linux-x86_64/crypto/aes/aesni-x86_64.S

Issue 2829743002: Roll src/third_party/boringssl/src bc6a76b0e..777fdd644 (Closed)
Patch Set: Created 3 years, 8 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
OLDNEW
(Empty)
1 #if defined(__x86_64__) && !defined(OPENSSL_NO_ASM)
2 .text
3 .extern OPENSSL_ia32cap_P
4 .hidden OPENSSL_ia32cap_P
5 .globl aesni_encrypt
6 .hidden aesni_encrypt
7 .type aesni_encrypt,@function
8 .align 16
9 aesni_encrypt:
10 movups (%rdi),%xmm2
11 movl 240(%rdx),%eax
12 movups (%rdx),%xmm0
13 movups 16(%rdx),%xmm1
14 leaq 32(%rdx),%rdx
15 xorps %xmm0,%xmm2
16 .Loop_enc1_1:
17 .byte 102,15,56,220,209
18 decl %eax
19 movups (%rdx),%xmm1
20 leaq 16(%rdx),%rdx
21 jnz .Loop_enc1_1
22 .byte 102,15,56,221,209
23 pxor %xmm0,%xmm0
24 pxor %xmm1,%xmm1
25 movups %xmm2,(%rsi)
26 pxor %xmm2,%xmm2
27 .byte 0xf3,0xc3
28 .size aesni_encrypt,.-aesni_encrypt
29
30 .globl aesni_decrypt
31 .hidden aesni_decrypt
32 .type aesni_decrypt,@function
33 .align 16
34 aesni_decrypt:
35 movups (%rdi),%xmm2
36 movl 240(%rdx),%eax
37 movups (%rdx),%xmm0
38 movups 16(%rdx),%xmm1
39 leaq 32(%rdx),%rdx
40 xorps %xmm0,%xmm2
41 .Loop_dec1_2:
42 .byte 102,15,56,222,209
43 decl %eax
44 movups (%rdx),%xmm1
45 leaq 16(%rdx),%rdx
46 jnz .Loop_dec1_2
47 .byte 102,15,56,223,209
48 pxor %xmm0,%xmm0
49 pxor %xmm1,%xmm1
50 movups %xmm2,(%rsi)
51 pxor %xmm2,%xmm2
52 .byte 0xf3,0xc3
53 .size aesni_decrypt, .-aesni_decrypt
54 .type _aesni_encrypt2,@function
55 .align 16
56 _aesni_encrypt2:
57 movups (%rcx),%xmm0
58 shll $4,%eax
59 movups 16(%rcx),%xmm1
60 xorps %xmm0,%xmm2
61 xorps %xmm0,%xmm3
62 movups 32(%rcx),%xmm0
63 leaq 32(%rcx,%rax,1),%rcx
64 negq %rax
65 addq $16,%rax
66
67 .Lenc_loop2:
68 .byte 102,15,56,220,209
69 .byte 102,15,56,220,217
70 movups (%rcx,%rax,1),%xmm1
71 addq $32,%rax
72 .byte 102,15,56,220,208
73 .byte 102,15,56,220,216
74 movups -16(%rcx,%rax,1),%xmm0
75 jnz .Lenc_loop2
76
77 .byte 102,15,56,220,209
78 .byte 102,15,56,220,217
79 .byte 102,15,56,221,208
80 .byte 102,15,56,221,216
81 .byte 0xf3,0xc3
82 .size _aesni_encrypt2,.-_aesni_encrypt2
83 .type _aesni_decrypt2,@function
84 .align 16
85 _aesni_decrypt2:
86 movups (%rcx),%xmm0
87 shll $4,%eax
88 movups 16(%rcx),%xmm1
89 xorps %xmm0,%xmm2
90 xorps %xmm0,%xmm3
91 movups 32(%rcx),%xmm0
92 leaq 32(%rcx,%rax,1),%rcx
93 negq %rax
94 addq $16,%rax
95
96 .Ldec_loop2:
97 .byte 102,15,56,222,209
98 .byte 102,15,56,222,217
99 movups (%rcx,%rax,1),%xmm1
100 addq $32,%rax
101 .byte 102,15,56,222,208
102 .byte 102,15,56,222,216
103 movups -16(%rcx,%rax,1),%xmm0
104 jnz .Ldec_loop2
105
106 .byte 102,15,56,222,209
107 .byte 102,15,56,222,217
108 .byte 102,15,56,223,208
109 .byte 102,15,56,223,216
110 .byte 0xf3,0xc3
111 .size _aesni_decrypt2,.-_aesni_decrypt2
112 .type _aesni_encrypt3,@function
113 .align 16
114 _aesni_encrypt3:
115 movups (%rcx),%xmm0
116 shll $4,%eax
117 movups 16(%rcx),%xmm1
118 xorps %xmm0,%xmm2
119 xorps %xmm0,%xmm3
120 xorps %xmm0,%xmm4
121 movups 32(%rcx),%xmm0
122 leaq 32(%rcx,%rax,1),%rcx
123 negq %rax
124 addq $16,%rax
125
126 .Lenc_loop3:
127 .byte 102,15,56,220,209
128 .byte 102,15,56,220,217
129 .byte 102,15,56,220,225
130 movups (%rcx,%rax,1),%xmm1
131 addq $32,%rax
132 .byte 102,15,56,220,208
133 .byte 102,15,56,220,216
134 .byte 102,15,56,220,224
135 movups -16(%rcx,%rax,1),%xmm0
136 jnz .Lenc_loop3
137
138 .byte 102,15,56,220,209
139 .byte 102,15,56,220,217
140 .byte 102,15,56,220,225
141 .byte 102,15,56,221,208
142 .byte 102,15,56,221,216
143 .byte 102,15,56,221,224
144 .byte 0xf3,0xc3
145 .size _aesni_encrypt3,.-_aesni_encrypt3
146 .type _aesni_decrypt3,@function
147 .align 16
148 _aesni_decrypt3:
149 movups (%rcx),%xmm0
150 shll $4,%eax
151 movups 16(%rcx),%xmm1
152 xorps %xmm0,%xmm2
153 xorps %xmm0,%xmm3
154 xorps %xmm0,%xmm4
155 movups 32(%rcx),%xmm0
156 leaq 32(%rcx,%rax,1),%rcx
157 negq %rax
158 addq $16,%rax
159
160 .Ldec_loop3:
161 .byte 102,15,56,222,209
162 .byte 102,15,56,222,217
163 .byte 102,15,56,222,225
164 movups (%rcx,%rax,1),%xmm1
165 addq $32,%rax
166 .byte 102,15,56,222,208
167 .byte 102,15,56,222,216
168 .byte 102,15,56,222,224
169 movups -16(%rcx,%rax,1),%xmm0
170 jnz .Ldec_loop3
171
172 .byte 102,15,56,222,209
173 .byte 102,15,56,222,217
174 .byte 102,15,56,222,225
175 .byte 102,15,56,223,208
176 .byte 102,15,56,223,216
177 .byte 102,15,56,223,224
178 .byte 0xf3,0xc3
179 .size _aesni_decrypt3,.-_aesni_decrypt3
180 .type _aesni_encrypt4,@function
181 .align 16
182 _aesni_encrypt4:
183 movups (%rcx),%xmm0
184 shll $4,%eax
185 movups 16(%rcx),%xmm1
186 xorps %xmm0,%xmm2
187 xorps %xmm0,%xmm3
188 xorps %xmm0,%xmm4
189 xorps %xmm0,%xmm5
190 movups 32(%rcx),%xmm0
191 leaq 32(%rcx,%rax,1),%rcx
192 negq %rax
193 .byte 0x0f,0x1f,0x00
194 addq $16,%rax
195
196 .Lenc_loop4:
197 .byte 102,15,56,220,209
198 .byte 102,15,56,220,217
199 .byte 102,15,56,220,225
200 .byte 102,15,56,220,233
201 movups (%rcx,%rax,1),%xmm1
202 addq $32,%rax
203 .byte 102,15,56,220,208
204 .byte 102,15,56,220,216
205 .byte 102,15,56,220,224
206 .byte 102,15,56,220,232
207 movups -16(%rcx,%rax,1),%xmm0
208 jnz .Lenc_loop4
209
210 .byte 102,15,56,220,209
211 .byte 102,15,56,220,217
212 .byte 102,15,56,220,225
213 .byte 102,15,56,220,233
214 .byte 102,15,56,221,208
215 .byte 102,15,56,221,216
216 .byte 102,15,56,221,224
217 .byte 102,15,56,221,232
218 .byte 0xf3,0xc3
219 .size _aesni_encrypt4,.-_aesni_encrypt4
220 .type _aesni_decrypt4,@function
221 .align 16
222 _aesni_decrypt4:
223 movups (%rcx),%xmm0
224 shll $4,%eax
225 movups 16(%rcx),%xmm1
226 xorps %xmm0,%xmm2
227 xorps %xmm0,%xmm3
228 xorps %xmm0,%xmm4
229 xorps %xmm0,%xmm5
230 movups 32(%rcx),%xmm0
231 leaq 32(%rcx,%rax,1),%rcx
232 negq %rax
233 .byte 0x0f,0x1f,0x00
234 addq $16,%rax
235
236 .Ldec_loop4:
237 .byte 102,15,56,222,209
238 .byte 102,15,56,222,217
239 .byte 102,15,56,222,225
240 .byte 102,15,56,222,233
241 movups (%rcx,%rax,1),%xmm1
242 addq $32,%rax
243 .byte 102,15,56,222,208
244 .byte 102,15,56,222,216
245 .byte 102,15,56,222,224
246 .byte 102,15,56,222,232
247 movups -16(%rcx,%rax,1),%xmm0
248 jnz .Ldec_loop4
249
250 .byte 102,15,56,222,209
251 .byte 102,15,56,222,217
252 .byte 102,15,56,222,225
253 .byte 102,15,56,222,233
254 .byte 102,15,56,223,208
255 .byte 102,15,56,223,216
256 .byte 102,15,56,223,224
257 .byte 102,15,56,223,232
258 .byte 0xf3,0xc3
259 .size _aesni_decrypt4,.-_aesni_decrypt4
260 .type _aesni_encrypt6,@function
261 .align 16
262 _aesni_encrypt6:
263 movups (%rcx),%xmm0
264 shll $4,%eax
265 movups 16(%rcx),%xmm1
266 xorps %xmm0,%xmm2
267 pxor %xmm0,%xmm3
268 pxor %xmm0,%xmm4
269 .byte 102,15,56,220,209
270 leaq 32(%rcx,%rax,1),%rcx
271 negq %rax
272 .byte 102,15,56,220,217
273 pxor %xmm0,%xmm5
274 pxor %xmm0,%xmm6
275 .byte 102,15,56,220,225
276 pxor %xmm0,%xmm7
277 movups (%rcx,%rax,1),%xmm0
278 addq $16,%rax
279 jmp .Lenc_loop6_enter
280 .align 16
281 .Lenc_loop6:
282 .byte 102,15,56,220,209
283 .byte 102,15,56,220,217
284 .byte 102,15,56,220,225
285 .Lenc_loop6_enter:
286 .byte 102,15,56,220,233
287 .byte 102,15,56,220,241
288 .byte 102,15,56,220,249
289 movups (%rcx,%rax,1),%xmm1
290 addq $32,%rax
291 .byte 102,15,56,220,208
292 .byte 102,15,56,220,216
293 .byte 102,15,56,220,224
294 .byte 102,15,56,220,232
295 .byte 102,15,56,220,240
296 .byte 102,15,56,220,248
297 movups -16(%rcx,%rax,1),%xmm0
298 jnz .Lenc_loop6
299
300 .byte 102,15,56,220,209
301 .byte 102,15,56,220,217
302 .byte 102,15,56,220,225
303 .byte 102,15,56,220,233
304 .byte 102,15,56,220,241
305 .byte 102,15,56,220,249
306 .byte 102,15,56,221,208
307 .byte 102,15,56,221,216
308 .byte 102,15,56,221,224
309 .byte 102,15,56,221,232
310 .byte 102,15,56,221,240
311 .byte 102,15,56,221,248
312 .byte 0xf3,0xc3
313 .size _aesni_encrypt6,.-_aesni_encrypt6
314 .type _aesni_decrypt6,@function
315 .align 16
316 _aesni_decrypt6:
317 movups (%rcx),%xmm0
318 shll $4,%eax
319 movups 16(%rcx),%xmm1
320 xorps %xmm0,%xmm2
321 pxor %xmm0,%xmm3
322 pxor %xmm0,%xmm4
323 .byte 102,15,56,222,209
324 leaq 32(%rcx,%rax,1),%rcx
325 negq %rax
326 .byte 102,15,56,222,217
327 pxor %xmm0,%xmm5
328 pxor %xmm0,%xmm6
329 .byte 102,15,56,222,225
330 pxor %xmm0,%xmm7
331 movups (%rcx,%rax,1),%xmm0
332 addq $16,%rax
333 jmp .Ldec_loop6_enter
334 .align 16
335 .Ldec_loop6:
336 .byte 102,15,56,222,209
337 .byte 102,15,56,222,217
338 .byte 102,15,56,222,225
339 .Ldec_loop6_enter:
340 .byte 102,15,56,222,233
341 .byte 102,15,56,222,241
342 .byte 102,15,56,222,249
343 movups (%rcx,%rax,1),%xmm1
344 addq $32,%rax
345 .byte 102,15,56,222,208
346 .byte 102,15,56,222,216
347 .byte 102,15,56,222,224
348 .byte 102,15,56,222,232
349 .byte 102,15,56,222,240
350 .byte 102,15,56,222,248
351 movups -16(%rcx,%rax,1),%xmm0
352 jnz .Ldec_loop6
353
354 .byte 102,15,56,222,209
355 .byte 102,15,56,222,217
356 .byte 102,15,56,222,225
357 .byte 102,15,56,222,233
358 .byte 102,15,56,222,241
359 .byte 102,15,56,222,249
360 .byte 102,15,56,223,208
361 .byte 102,15,56,223,216
362 .byte 102,15,56,223,224
363 .byte 102,15,56,223,232
364 .byte 102,15,56,223,240
365 .byte 102,15,56,223,248
366 .byte 0xf3,0xc3
367 .size _aesni_decrypt6,.-_aesni_decrypt6
368 .type _aesni_encrypt8,@function
369 .align 16
370 _aesni_encrypt8:
371 movups (%rcx),%xmm0
372 shll $4,%eax
373 movups 16(%rcx),%xmm1
374 xorps %xmm0,%xmm2
375 xorps %xmm0,%xmm3
376 pxor %xmm0,%xmm4
377 pxor %xmm0,%xmm5
378 pxor %xmm0,%xmm6
379 leaq 32(%rcx,%rax,1),%rcx
380 negq %rax
381 .byte 102,15,56,220,209
382 pxor %xmm0,%xmm7
383 pxor %xmm0,%xmm8
384 .byte 102,15,56,220,217
385 pxor %xmm0,%xmm9
386 movups (%rcx,%rax,1),%xmm0
387 addq $16,%rax
388 jmp .Lenc_loop8_inner
389 .align 16
390 .Lenc_loop8:
391 .byte 102,15,56,220,209
392 .byte 102,15,56,220,217
393 .Lenc_loop8_inner:
394 .byte 102,15,56,220,225
395 .byte 102,15,56,220,233
396 .byte 102,15,56,220,241
397 .byte 102,15,56,220,249
398 .byte 102,68,15,56,220,193
399 .byte 102,68,15,56,220,201
400 .Lenc_loop8_enter:
401 movups (%rcx,%rax,1),%xmm1
402 addq $32,%rax
403 .byte 102,15,56,220,208
404 .byte 102,15,56,220,216
405 .byte 102,15,56,220,224
406 .byte 102,15,56,220,232
407 .byte 102,15,56,220,240
408 .byte 102,15,56,220,248
409 .byte 102,68,15,56,220,192
410 .byte 102,68,15,56,220,200
411 movups -16(%rcx,%rax,1),%xmm0
412 jnz .Lenc_loop8
413
414 .byte 102,15,56,220,209
415 .byte 102,15,56,220,217
416 .byte 102,15,56,220,225
417 .byte 102,15,56,220,233
418 .byte 102,15,56,220,241
419 .byte 102,15,56,220,249
420 .byte 102,68,15,56,220,193
421 .byte 102,68,15,56,220,201
422 .byte 102,15,56,221,208
423 .byte 102,15,56,221,216
424 .byte 102,15,56,221,224
425 .byte 102,15,56,221,232
426 .byte 102,15,56,221,240
427 .byte 102,15,56,221,248
428 .byte 102,68,15,56,221,192
429 .byte 102,68,15,56,221,200
430 .byte 0xf3,0xc3
431 .size _aesni_encrypt8,.-_aesni_encrypt8
432 .type _aesni_decrypt8,@function
433 .align 16
434 _aesni_decrypt8:
435 movups (%rcx),%xmm0
436 shll $4,%eax
437 movups 16(%rcx),%xmm1
438 xorps %xmm0,%xmm2
439 xorps %xmm0,%xmm3
440 pxor %xmm0,%xmm4
441 pxor %xmm0,%xmm5
442 pxor %xmm0,%xmm6
443 leaq 32(%rcx,%rax,1),%rcx
444 negq %rax
445 .byte 102,15,56,222,209
446 pxor %xmm0,%xmm7
447 pxor %xmm0,%xmm8
448 .byte 102,15,56,222,217
449 pxor %xmm0,%xmm9
450 movups (%rcx,%rax,1),%xmm0
451 addq $16,%rax
452 jmp .Ldec_loop8_inner
453 .align 16
454 .Ldec_loop8:
455 .byte 102,15,56,222,209
456 .byte 102,15,56,222,217
457 .Ldec_loop8_inner:
458 .byte 102,15,56,222,225
459 .byte 102,15,56,222,233
460 .byte 102,15,56,222,241
461 .byte 102,15,56,222,249
462 .byte 102,68,15,56,222,193
463 .byte 102,68,15,56,222,201
464 .Ldec_loop8_enter:
465 movups (%rcx,%rax,1),%xmm1
466 addq $32,%rax
467 .byte 102,15,56,222,208
468 .byte 102,15,56,222,216
469 .byte 102,15,56,222,224
470 .byte 102,15,56,222,232
471 .byte 102,15,56,222,240
472 .byte 102,15,56,222,248
473 .byte 102,68,15,56,222,192
474 .byte 102,68,15,56,222,200
475 movups -16(%rcx,%rax,1),%xmm0
476 jnz .Ldec_loop8
477
478 .byte 102,15,56,222,209
479 .byte 102,15,56,222,217
480 .byte 102,15,56,222,225
481 .byte 102,15,56,222,233
482 .byte 102,15,56,222,241
483 .byte 102,15,56,222,249
484 .byte 102,68,15,56,222,193
485 .byte 102,68,15,56,222,201
486 .byte 102,15,56,223,208
487 .byte 102,15,56,223,216
488 .byte 102,15,56,223,224
489 .byte 102,15,56,223,232
490 .byte 102,15,56,223,240
491 .byte 102,15,56,223,248
492 .byte 102,68,15,56,223,192
493 .byte 102,68,15,56,223,200
494 .byte 0xf3,0xc3
495 .size _aesni_decrypt8,.-_aesni_decrypt8
496 .globl aesni_ecb_encrypt
497 .hidden aesni_ecb_encrypt
498 .type aesni_ecb_encrypt,@function
499 .align 16
500 aesni_ecb_encrypt:
501 andq $-16,%rdx
502 jz .Lecb_ret
503
504 movl 240(%rcx),%eax
505 movups (%rcx),%xmm0
506 movq %rcx,%r11
507 movl %eax,%r10d
508 testl %r8d,%r8d
509 jz .Lecb_decrypt
510
511 cmpq $0x80,%rdx
512 jb .Lecb_enc_tail
513
514 movdqu (%rdi),%xmm2
515 movdqu 16(%rdi),%xmm3
516 movdqu 32(%rdi),%xmm4
517 movdqu 48(%rdi),%xmm5
518 movdqu 64(%rdi),%xmm6
519 movdqu 80(%rdi),%xmm7
520 movdqu 96(%rdi),%xmm8
521 movdqu 112(%rdi),%xmm9
522 leaq 128(%rdi),%rdi
523 subq $0x80,%rdx
524 jmp .Lecb_enc_loop8_enter
525 .align 16
526 .Lecb_enc_loop8:
527 movups %xmm2,(%rsi)
528 movq %r11,%rcx
529 movdqu (%rdi),%xmm2
530 movl %r10d,%eax
531 movups %xmm3,16(%rsi)
532 movdqu 16(%rdi),%xmm3
533 movups %xmm4,32(%rsi)
534 movdqu 32(%rdi),%xmm4
535 movups %xmm5,48(%rsi)
536 movdqu 48(%rdi),%xmm5
537 movups %xmm6,64(%rsi)
538 movdqu 64(%rdi),%xmm6
539 movups %xmm7,80(%rsi)
540 movdqu 80(%rdi),%xmm7
541 movups %xmm8,96(%rsi)
542 movdqu 96(%rdi),%xmm8
543 movups %xmm9,112(%rsi)
544 leaq 128(%rsi),%rsi
545 movdqu 112(%rdi),%xmm9
546 leaq 128(%rdi),%rdi
547 .Lecb_enc_loop8_enter:
548
549 call _aesni_encrypt8
550
551 subq $0x80,%rdx
552 jnc .Lecb_enc_loop8
553
554 movups %xmm2,(%rsi)
555 movq %r11,%rcx
556 movups %xmm3,16(%rsi)
557 movl %r10d,%eax
558 movups %xmm4,32(%rsi)
559 movups %xmm5,48(%rsi)
560 movups %xmm6,64(%rsi)
561 movups %xmm7,80(%rsi)
562 movups %xmm8,96(%rsi)
563 movups %xmm9,112(%rsi)
564 leaq 128(%rsi),%rsi
565 addq $0x80,%rdx
566 jz .Lecb_ret
567
568 .Lecb_enc_tail:
569 movups (%rdi),%xmm2
570 cmpq $0x20,%rdx
571 jb .Lecb_enc_one
572 movups 16(%rdi),%xmm3
573 je .Lecb_enc_two
574 movups 32(%rdi),%xmm4
575 cmpq $0x40,%rdx
576 jb .Lecb_enc_three
577 movups 48(%rdi),%xmm5
578 je .Lecb_enc_four
579 movups 64(%rdi),%xmm6
580 cmpq $0x60,%rdx
581 jb .Lecb_enc_five
582 movups 80(%rdi),%xmm7
583 je .Lecb_enc_six
584 movdqu 96(%rdi),%xmm8
585 xorps %xmm9,%xmm9
586 call _aesni_encrypt8
587 movups %xmm2,(%rsi)
588 movups %xmm3,16(%rsi)
589 movups %xmm4,32(%rsi)
590 movups %xmm5,48(%rsi)
591 movups %xmm6,64(%rsi)
592 movups %xmm7,80(%rsi)
593 movups %xmm8,96(%rsi)
594 jmp .Lecb_ret
595 .align 16
596 .Lecb_enc_one:
597 movups (%rcx),%xmm0
598 movups 16(%rcx),%xmm1
599 leaq 32(%rcx),%rcx
600 xorps %xmm0,%xmm2
601 .Loop_enc1_3:
602 .byte 102,15,56,220,209
603 decl %eax
604 movups (%rcx),%xmm1
605 leaq 16(%rcx),%rcx
606 jnz .Loop_enc1_3
607 .byte 102,15,56,221,209
608 movups %xmm2,(%rsi)
609 jmp .Lecb_ret
610 .align 16
611 .Lecb_enc_two:
612 call _aesni_encrypt2
613 movups %xmm2,(%rsi)
614 movups %xmm3,16(%rsi)
615 jmp .Lecb_ret
616 .align 16
617 .Lecb_enc_three:
618 call _aesni_encrypt3
619 movups %xmm2,(%rsi)
620 movups %xmm3,16(%rsi)
621 movups %xmm4,32(%rsi)
622 jmp .Lecb_ret
623 .align 16
624 .Lecb_enc_four:
625 call _aesni_encrypt4
626 movups %xmm2,(%rsi)
627 movups %xmm3,16(%rsi)
628 movups %xmm4,32(%rsi)
629 movups %xmm5,48(%rsi)
630 jmp .Lecb_ret
631 .align 16
632 .Lecb_enc_five:
633 xorps %xmm7,%xmm7
634 call _aesni_encrypt6
635 movups %xmm2,(%rsi)
636 movups %xmm3,16(%rsi)
637 movups %xmm4,32(%rsi)
638 movups %xmm5,48(%rsi)
639 movups %xmm6,64(%rsi)
640 jmp .Lecb_ret
641 .align 16
642 .Lecb_enc_six:
643 call _aesni_encrypt6
644 movups %xmm2,(%rsi)
645 movups %xmm3,16(%rsi)
646 movups %xmm4,32(%rsi)
647 movups %xmm5,48(%rsi)
648 movups %xmm6,64(%rsi)
649 movups %xmm7,80(%rsi)
650 jmp .Lecb_ret
651
652 .align 16
653 .Lecb_decrypt:
654 cmpq $0x80,%rdx
655 jb .Lecb_dec_tail
656
657 movdqu (%rdi),%xmm2
658 movdqu 16(%rdi),%xmm3
659 movdqu 32(%rdi),%xmm4
660 movdqu 48(%rdi),%xmm5
661 movdqu 64(%rdi),%xmm6
662 movdqu 80(%rdi),%xmm7
663 movdqu 96(%rdi),%xmm8
664 movdqu 112(%rdi),%xmm9
665 leaq 128(%rdi),%rdi
666 subq $0x80,%rdx
667 jmp .Lecb_dec_loop8_enter
668 .align 16
669 .Lecb_dec_loop8:
670 movups %xmm2,(%rsi)
671 movq %r11,%rcx
672 movdqu (%rdi),%xmm2
673 movl %r10d,%eax
674 movups %xmm3,16(%rsi)
675 movdqu 16(%rdi),%xmm3
676 movups %xmm4,32(%rsi)
677 movdqu 32(%rdi),%xmm4
678 movups %xmm5,48(%rsi)
679 movdqu 48(%rdi),%xmm5
680 movups %xmm6,64(%rsi)
681 movdqu 64(%rdi),%xmm6
682 movups %xmm7,80(%rsi)
683 movdqu 80(%rdi),%xmm7
684 movups %xmm8,96(%rsi)
685 movdqu 96(%rdi),%xmm8
686 movups %xmm9,112(%rsi)
687 leaq 128(%rsi),%rsi
688 movdqu 112(%rdi),%xmm9
689 leaq 128(%rdi),%rdi
690 .Lecb_dec_loop8_enter:
691
692 call _aesni_decrypt8
693
694 movups (%r11),%xmm0
695 subq $0x80,%rdx
696 jnc .Lecb_dec_loop8
697
698 movups %xmm2,(%rsi)
699 pxor %xmm2,%xmm2
700 movq %r11,%rcx
701 movups %xmm3,16(%rsi)
702 pxor %xmm3,%xmm3
703 movl %r10d,%eax
704 movups %xmm4,32(%rsi)
705 pxor %xmm4,%xmm4
706 movups %xmm5,48(%rsi)
707 pxor %xmm5,%xmm5
708 movups %xmm6,64(%rsi)
709 pxor %xmm6,%xmm6
710 movups %xmm7,80(%rsi)
711 pxor %xmm7,%xmm7
712 movups %xmm8,96(%rsi)
713 pxor %xmm8,%xmm8
714 movups %xmm9,112(%rsi)
715 pxor %xmm9,%xmm9
716 leaq 128(%rsi),%rsi
717 addq $0x80,%rdx
718 jz .Lecb_ret
719
720 .Lecb_dec_tail:
721 movups (%rdi),%xmm2
722 cmpq $0x20,%rdx
723 jb .Lecb_dec_one
724 movups 16(%rdi),%xmm3
725 je .Lecb_dec_two
726 movups 32(%rdi),%xmm4
727 cmpq $0x40,%rdx
728 jb .Lecb_dec_three
729 movups 48(%rdi),%xmm5
730 je .Lecb_dec_four
731 movups 64(%rdi),%xmm6
732 cmpq $0x60,%rdx
733 jb .Lecb_dec_five
734 movups 80(%rdi),%xmm7
735 je .Lecb_dec_six
736 movups 96(%rdi),%xmm8
737 movups (%rcx),%xmm0
738 xorps %xmm9,%xmm9
739 call _aesni_decrypt8
740 movups %xmm2,(%rsi)
741 pxor %xmm2,%xmm2
742 movups %xmm3,16(%rsi)
743 pxor %xmm3,%xmm3
744 movups %xmm4,32(%rsi)
745 pxor %xmm4,%xmm4
746 movups %xmm5,48(%rsi)
747 pxor %xmm5,%xmm5
748 movups %xmm6,64(%rsi)
749 pxor %xmm6,%xmm6
750 movups %xmm7,80(%rsi)
751 pxor %xmm7,%xmm7
752 movups %xmm8,96(%rsi)
753 pxor %xmm8,%xmm8
754 pxor %xmm9,%xmm9
755 jmp .Lecb_ret
756 .align 16
757 .Lecb_dec_one:
758 movups (%rcx),%xmm0
759 movups 16(%rcx),%xmm1
760 leaq 32(%rcx),%rcx
761 xorps %xmm0,%xmm2
762 .Loop_dec1_4:
763 .byte 102,15,56,222,209
764 decl %eax
765 movups (%rcx),%xmm1
766 leaq 16(%rcx),%rcx
767 jnz .Loop_dec1_4
768 .byte 102,15,56,223,209
769 movups %xmm2,(%rsi)
770 pxor %xmm2,%xmm2
771 jmp .Lecb_ret
772 .align 16
773 .Lecb_dec_two:
774 call _aesni_decrypt2
775 movups %xmm2,(%rsi)
776 pxor %xmm2,%xmm2
777 movups %xmm3,16(%rsi)
778 pxor %xmm3,%xmm3
779 jmp .Lecb_ret
780 .align 16
781 .Lecb_dec_three:
782 call _aesni_decrypt3
783 movups %xmm2,(%rsi)
784 pxor %xmm2,%xmm2
785 movups %xmm3,16(%rsi)
786 pxor %xmm3,%xmm3
787 movups %xmm4,32(%rsi)
788 pxor %xmm4,%xmm4
789 jmp .Lecb_ret
790 .align 16
791 .Lecb_dec_four:
792 call _aesni_decrypt4
793 movups %xmm2,(%rsi)
794 pxor %xmm2,%xmm2
795 movups %xmm3,16(%rsi)
796 pxor %xmm3,%xmm3
797 movups %xmm4,32(%rsi)
798 pxor %xmm4,%xmm4
799 movups %xmm5,48(%rsi)
800 pxor %xmm5,%xmm5
801 jmp .Lecb_ret
802 .align 16
803 .Lecb_dec_five:
804 xorps %xmm7,%xmm7
805 call _aesni_decrypt6
806 movups %xmm2,(%rsi)
807 pxor %xmm2,%xmm2
808 movups %xmm3,16(%rsi)
809 pxor %xmm3,%xmm3
810 movups %xmm4,32(%rsi)
811 pxor %xmm4,%xmm4
812 movups %xmm5,48(%rsi)
813 pxor %xmm5,%xmm5
814 movups %xmm6,64(%rsi)
815 pxor %xmm6,%xmm6
816 pxor %xmm7,%xmm7
817 jmp .Lecb_ret
818 .align 16
819 .Lecb_dec_six:
820 call _aesni_decrypt6
821 movups %xmm2,(%rsi)
822 pxor %xmm2,%xmm2
823 movups %xmm3,16(%rsi)
824 pxor %xmm3,%xmm3
825 movups %xmm4,32(%rsi)
826 pxor %xmm4,%xmm4
827 movups %xmm5,48(%rsi)
828 pxor %xmm5,%xmm5
829 movups %xmm6,64(%rsi)
830 pxor %xmm6,%xmm6
831 movups %xmm7,80(%rsi)
832 pxor %xmm7,%xmm7
833
834 .Lecb_ret:
835 xorps %xmm0,%xmm0
836 pxor %xmm1,%xmm1
837 .byte 0xf3,0xc3
838 .size aesni_ecb_encrypt,.-aesni_ecb_encrypt
839 .globl aesni_ccm64_encrypt_blocks
840 .hidden aesni_ccm64_encrypt_blocks
841 .type aesni_ccm64_encrypt_blocks,@function
842 .align 16
843 aesni_ccm64_encrypt_blocks:
844 movl 240(%rcx),%eax
845 movdqu (%r8),%xmm6
846 movdqa .Lincrement64(%rip),%xmm9
847 movdqa .Lbswap_mask(%rip),%xmm7
848
849 shll $4,%eax
850 movl $16,%r10d
851 leaq 0(%rcx),%r11
852 movdqu (%r9),%xmm3
853 movdqa %xmm6,%xmm2
854 leaq 32(%rcx,%rax,1),%rcx
855 .byte 102,15,56,0,247
856 subq %rax,%r10
857 jmp .Lccm64_enc_outer
858 .align 16
859 .Lccm64_enc_outer:
860 movups (%r11),%xmm0
861 movq %r10,%rax
862 movups (%rdi),%xmm8
863
864 xorps %xmm0,%xmm2
865 movups 16(%r11),%xmm1
866 xorps %xmm8,%xmm0
867 xorps %xmm0,%xmm3
868 movups 32(%r11),%xmm0
869
870 .Lccm64_enc2_loop:
871 .byte 102,15,56,220,209
872 .byte 102,15,56,220,217
873 movups (%rcx,%rax,1),%xmm1
874 addq $32,%rax
875 .byte 102,15,56,220,208
876 .byte 102,15,56,220,216
877 movups -16(%rcx,%rax,1),%xmm0
878 jnz .Lccm64_enc2_loop
879 .byte 102,15,56,220,209
880 .byte 102,15,56,220,217
881 paddq %xmm9,%xmm6
882 decq %rdx
883 .byte 102,15,56,221,208
884 .byte 102,15,56,221,216
885
886 leaq 16(%rdi),%rdi
887 xorps %xmm2,%xmm8
888 movdqa %xmm6,%xmm2
889 movups %xmm8,(%rsi)
890 .byte 102,15,56,0,215
891 leaq 16(%rsi),%rsi
892 jnz .Lccm64_enc_outer
893
894 pxor %xmm0,%xmm0
895 pxor %xmm1,%xmm1
896 pxor %xmm2,%xmm2
897 movups %xmm3,(%r9)
898 pxor %xmm3,%xmm3
899 pxor %xmm8,%xmm8
900 pxor %xmm6,%xmm6
901 .byte 0xf3,0xc3
902 .size aesni_ccm64_encrypt_blocks,.-aesni_ccm64_encrypt_blocks
903 .globl aesni_ccm64_decrypt_blocks
904 .hidden aesni_ccm64_decrypt_blocks
905 .type aesni_ccm64_decrypt_blocks,@function
906 .align 16
907 aesni_ccm64_decrypt_blocks:
908 movl 240(%rcx),%eax
909 movups (%r8),%xmm6
910 movdqu (%r9),%xmm3
911 movdqa .Lincrement64(%rip),%xmm9
912 movdqa .Lbswap_mask(%rip),%xmm7
913
914 movaps %xmm6,%xmm2
915 movl %eax,%r10d
916 movq %rcx,%r11
917 .byte 102,15,56,0,247
918 movups (%rcx),%xmm0
919 movups 16(%rcx),%xmm1
920 leaq 32(%rcx),%rcx
921 xorps %xmm0,%xmm2
922 .Loop_enc1_5:
923 .byte 102,15,56,220,209
924 decl %eax
925 movups (%rcx),%xmm1
926 leaq 16(%rcx),%rcx
927 jnz .Loop_enc1_5
928 .byte 102,15,56,221,209
929 shll $4,%r10d
930 movl $16,%eax
931 movups (%rdi),%xmm8
932 paddq %xmm9,%xmm6
933 leaq 16(%rdi),%rdi
934 subq %r10,%rax
935 leaq 32(%r11,%r10,1),%rcx
936 movq %rax,%r10
937 jmp .Lccm64_dec_outer
938 .align 16
939 .Lccm64_dec_outer:
940 xorps %xmm2,%xmm8
941 movdqa %xmm6,%xmm2
942 movups %xmm8,(%rsi)
943 leaq 16(%rsi),%rsi
944 .byte 102,15,56,0,215
945
946 subq $1,%rdx
947 jz .Lccm64_dec_break
948
949 movups (%r11),%xmm0
950 movq %r10,%rax
951 movups 16(%r11),%xmm1
952 xorps %xmm0,%xmm8
953 xorps %xmm0,%xmm2
954 xorps %xmm8,%xmm3
955 movups 32(%r11),%xmm0
956 jmp .Lccm64_dec2_loop
957 .align 16
958 .Lccm64_dec2_loop:
959 .byte 102,15,56,220,209
960 .byte 102,15,56,220,217
961 movups (%rcx,%rax,1),%xmm1
962 addq $32,%rax
963 .byte 102,15,56,220,208
964 .byte 102,15,56,220,216
965 movups -16(%rcx,%rax,1),%xmm0
966 jnz .Lccm64_dec2_loop
967 movups (%rdi),%xmm8
968 paddq %xmm9,%xmm6
969 .byte 102,15,56,220,209
970 .byte 102,15,56,220,217
971 .byte 102,15,56,221,208
972 .byte 102,15,56,221,216
973 leaq 16(%rdi),%rdi
974 jmp .Lccm64_dec_outer
975
976 .align 16
977 .Lccm64_dec_break:
978
979 movl 240(%r11),%eax
980 movups (%r11),%xmm0
981 movups 16(%r11),%xmm1
982 xorps %xmm0,%xmm8
983 leaq 32(%r11),%r11
984 xorps %xmm8,%xmm3
985 .Loop_enc1_6:
986 .byte 102,15,56,220,217
987 decl %eax
988 movups (%r11),%xmm1
989 leaq 16(%r11),%r11
990 jnz .Loop_enc1_6
991 .byte 102,15,56,221,217
992 pxor %xmm0,%xmm0
993 pxor %xmm1,%xmm1
994 pxor %xmm2,%xmm2
995 movups %xmm3,(%r9)
996 pxor %xmm3,%xmm3
997 pxor %xmm8,%xmm8
998 pxor %xmm6,%xmm6
999 .byte 0xf3,0xc3
1000 .size aesni_ccm64_decrypt_blocks,.-aesni_ccm64_decrypt_blocks
1001 .globl aesni_ctr32_encrypt_blocks
1002 .hidden aesni_ctr32_encrypt_blocks
1003 .type aesni_ctr32_encrypt_blocks,@function
1004 .align 16
1005 aesni_ctr32_encrypt_blocks:
1006 cmpq $1,%rdx
1007 jne .Lctr32_bulk
1008
1009
1010
1011 movups (%r8),%xmm2
1012 movups (%rdi),%xmm3
1013 movl 240(%rcx),%edx
1014 movups (%rcx),%xmm0
1015 movups 16(%rcx),%xmm1
1016 leaq 32(%rcx),%rcx
1017 xorps %xmm0,%xmm2
1018 .Loop_enc1_7:
1019 .byte 102,15,56,220,209
1020 decl %edx
1021 movups (%rcx),%xmm1
1022 leaq 16(%rcx),%rcx
1023 jnz .Loop_enc1_7
1024 .byte 102,15,56,221,209
1025 pxor %xmm0,%xmm0
1026 pxor %xmm1,%xmm1
1027 xorps %xmm3,%xmm2
1028 pxor %xmm3,%xmm3
1029 movups %xmm2,(%rsi)
1030 xorps %xmm2,%xmm2
1031 jmp .Lctr32_epilogue
1032
1033 .align 16
1034 .Lctr32_bulk:
1035 leaq (%rsp),%r11
1036 pushq %rbp
1037 subq $128,%rsp
1038 andq $-16,%rsp
1039
1040
1041
1042
1043 movdqu (%r8),%xmm2
1044 movdqu (%rcx),%xmm0
1045 movl 12(%r8),%r8d
1046 pxor %xmm0,%xmm2
1047 movl 12(%rcx),%ebp
1048 movdqa %xmm2,0(%rsp)
1049 bswapl %r8d
1050 movdqa %xmm2,%xmm3
1051 movdqa %xmm2,%xmm4
1052 movdqa %xmm2,%xmm5
1053 movdqa %xmm2,64(%rsp)
1054 movdqa %xmm2,80(%rsp)
1055 movdqa %xmm2,96(%rsp)
1056 movq %rdx,%r10
1057 movdqa %xmm2,112(%rsp)
1058
1059 leaq 1(%r8),%rax
1060 leaq 2(%r8),%rdx
1061 bswapl %eax
1062 bswapl %edx
1063 xorl %ebp,%eax
1064 xorl %ebp,%edx
1065 .byte 102,15,58,34,216,3
1066 leaq 3(%r8),%rax
1067 movdqa %xmm3,16(%rsp)
1068 .byte 102,15,58,34,226,3
1069 bswapl %eax
1070 movq %r10,%rdx
1071 leaq 4(%r8),%r10
1072 movdqa %xmm4,32(%rsp)
1073 xorl %ebp,%eax
1074 bswapl %r10d
1075 .byte 102,15,58,34,232,3
1076 xorl %ebp,%r10d
1077 movdqa %xmm5,48(%rsp)
1078 leaq 5(%r8),%r9
1079 movl %r10d,64+12(%rsp)
1080 bswapl %r9d
1081 leaq 6(%r8),%r10
1082 movl 240(%rcx),%eax
1083 xorl %ebp,%r9d
1084 bswapl %r10d
1085 movl %r9d,80+12(%rsp)
1086 xorl %ebp,%r10d
1087 leaq 7(%r8),%r9
1088 movl %r10d,96+12(%rsp)
1089 bswapl %r9d
1090 movl OPENSSL_ia32cap_P+4(%rip),%r10d
1091 xorl %ebp,%r9d
1092 andl $71303168,%r10d
1093 movl %r9d,112+12(%rsp)
1094
1095 movups 16(%rcx),%xmm1
1096
1097 movdqa 64(%rsp),%xmm6
1098 movdqa 80(%rsp),%xmm7
1099
1100 cmpq $8,%rdx
1101 jb .Lctr32_tail
1102
1103 subq $6,%rdx
1104 cmpl $4194304,%r10d
1105 je .Lctr32_6x
1106
1107 leaq 128(%rcx),%rcx
1108 subq $2,%rdx
1109 jmp .Lctr32_loop8
1110
1111 .align 16
1112 .Lctr32_6x:
1113 shll $4,%eax
1114 movl $48,%r10d
1115 bswapl %ebp
1116 leaq 32(%rcx,%rax,1),%rcx
1117 subq %rax,%r10
1118 jmp .Lctr32_loop6
1119
1120 .align 16
1121 .Lctr32_loop6:
1122 addl $6,%r8d
1123 movups -48(%rcx,%r10,1),%xmm0
1124 .byte 102,15,56,220,209
1125 movl %r8d,%eax
1126 xorl %ebp,%eax
1127 .byte 102,15,56,220,217
1128 .byte 0x0f,0x38,0xf1,0x44,0x24,12
1129 leal 1(%r8),%eax
1130 .byte 102,15,56,220,225
1131 xorl %ebp,%eax
1132 .byte 0x0f,0x38,0xf1,0x44,0x24,28
1133 .byte 102,15,56,220,233
1134 leal 2(%r8),%eax
1135 xorl %ebp,%eax
1136 .byte 102,15,56,220,241
1137 .byte 0x0f,0x38,0xf1,0x44,0x24,44
1138 leal 3(%r8),%eax
1139 .byte 102,15,56,220,249
1140 movups -32(%rcx,%r10,1),%xmm1
1141 xorl %ebp,%eax
1142
1143 .byte 102,15,56,220,208
1144 .byte 0x0f,0x38,0xf1,0x44,0x24,60
1145 leal 4(%r8),%eax
1146 .byte 102,15,56,220,216
1147 xorl %ebp,%eax
1148 .byte 0x0f,0x38,0xf1,0x44,0x24,76
1149 .byte 102,15,56,220,224
1150 leal 5(%r8),%eax
1151 xorl %ebp,%eax
1152 .byte 102,15,56,220,232
1153 .byte 0x0f,0x38,0xf1,0x44,0x24,92
1154 movq %r10,%rax
1155 .byte 102,15,56,220,240
1156 .byte 102,15,56,220,248
1157 movups -16(%rcx,%r10,1),%xmm0
1158
1159 call .Lenc_loop6
1160
1161 movdqu (%rdi),%xmm8
1162 movdqu 16(%rdi),%xmm9
1163 movdqu 32(%rdi),%xmm10
1164 movdqu 48(%rdi),%xmm11
1165 movdqu 64(%rdi),%xmm12
1166 movdqu 80(%rdi),%xmm13
1167 leaq 96(%rdi),%rdi
1168 movups -64(%rcx,%r10,1),%xmm1
1169 pxor %xmm2,%xmm8
1170 movaps 0(%rsp),%xmm2
1171 pxor %xmm3,%xmm9
1172 movaps 16(%rsp),%xmm3
1173 pxor %xmm4,%xmm10
1174 movaps 32(%rsp),%xmm4
1175 pxor %xmm5,%xmm11
1176 movaps 48(%rsp),%xmm5
1177 pxor %xmm6,%xmm12
1178 movaps 64(%rsp),%xmm6
1179 pxor %xmm7,%xmm13
1180 movaps 80(%rsp),%xmm7
1181 movdqu %xmm8,(%rsi)
1182 movdqu %xmm9,16(%rsi)
1183 movdqu %xmm10,32(%rsi)
1184 movdqu %xmm11,48(%rsi)
1185 movdqu %xmm12,64(%rsi)
1186 movdqu %xmm13,80(%rsi)
1187 leaq 96(%rsi),%rsi
1188
1189 subq $6,%rdx
1190 jnc .Lctr32_loop6
1191
1192 addq $6,%rdx
1193 jz .Lctr32_done
1194
1195 leal -48(%r10),%eax
1196 leaq -80(%rcx,%r10,1),%rcx
1197 negl %eax
1198 shrl $4,%eax
1199 jmp .Lctr32_tail
1200
1201 .align 32
1202 .Lctr32_loop8:
1203 addl $8,%r8d
1204 movdqa 96(%rsp),%xmm8
1205 .byte 102,15,56,220,209
1206 movl %r8d,%r9d
1207 movdqa 112(%rsp),%xmm9
1208 .byte 102,15,56,220,217
1209 bswapl %r9d
1210 movups 32-128(%rcx),%xmm0
1211 .byte 102,15,56,220,225
1212 xorl %ebp,%r9d
1213 nop
1214 .byte 102,15,56,220,233
1215 movl %r9d,0+12(%rsp)
1216 leaq 1(%r8),%r9
1217 .byte 102,15,56,220,241
1218 .byte 102,15,56,220,249
1219 .byte 102,68,15,56,220,193
1220 .byte 102,68,15,56,220,201
1221 movups 48-128(%rcx),%xmm1
1222 bswapl %r9d
1223 .byte 102,15,56,220,208
1224 .byte 102,15,56,220,216
1225 xorl %ebp,%r9d
1226 .byte 0x66,0x90
1227 .byte 102,15,56,220,224
1228 .byte 102,15,56,220,232
1229 movl %r9d,16+12(%rsp)
1230 leaq 2(%r8),%r9
1231 .byte 102,15,56,220,240
1232 .byte 102,15,56,220,248
1233 .byte 102,68,15,56,220,192
1234 .byte 102,68,15,56,220,200
1235 movups 64-128(%rcx),%xmm0
1236 bswapl %r9d
1237 .byte 102,15,56,220,209
1238 .byte 102,15,56,220,217
1239 xorl %ebp,%r9d
1240 .byte 0x66,0x90
1241 .byte 102,15,56,220,225
1242 .byte 102,15,56,220,233
1243 movl %r9d,32+12(%rsp)
1244 leaq 3(%r8),%r9
1245 .byte 102,15,56,220,241
1246 .byte 102,15,56,220,249
1247 .byte 102,68,15,56,220,193
1248 .byte 102,68,15,56,220,201
1249 movups 80-128(%rcx),%xmm1
1250 bswapl %r9d
1251 .byte 102,15,56,220,208
1252 .byte 102,15,56,220,216
1253 xorl %ebp,%r9d
1254 .byte 0x66,0x90
1255 .byte 102,15,56,220,224
1256 .byte 102,15,56,220,232
1257 movl %r9d,48+12(%rsp)
1258 leaq 4(%r8),%r9
1259 .byte 102,15,56,220,240
1260 .byte 102,15,56,220,248
1261 .byte 102,68,15,56,220,192
1262 .byte 102,68,15,56,220,200
1263 movups 96-128(%rcx),%xmm0
1264 bswapl %r9d
1265 .byte 102,15,56,220,209
1266 .byte 102,15,56,220,217
1267 xorl %ebp,%r9d
1268 .byte 0x66,0x90
1269 .byte 102,15,56,220,225
1270 .byte 102,15,56,220,233
1271 movl %r9d,64+12(%rsp)
1272 leaq 5(%r8),%r9
1273 .byte 102,15,56,220,241
1274 .byte 102,15,56,220,249
1275 .byte 102,68,15,56,220,193
1276 .byte 102,68,15,56,220,201
1277 movups 112-128(%rcx),%xmm1
1278 bswapl %r9d
1279 .byte 102,15,56,220,208
1280 .byte 102,15,56,220,216
1281 xorl %ebp,%r9d
1282 .byte 0x66,0x90
1283 .byte 102,15,56,220,224
1284 .byte 102,15,56,220,232
1285 movl %r9d,80+12(%rsp)
1286 leaq 6(%r8),%r9
1287 .byte 102,15,56,220,240
1288 .byte 102,15,56,220,248
1289 .byte 102,68,15,56,220,192
1290 .byte 102,68,15,56,220,200
1291 movups 128-128(%rcx),%xmm0
1292 bswapl %r9d
1293 .byte 102,15,56,220,209
1294 .byte 102,15,56,220,217
1295 xorl %ebp,%r9d
1296 .byte 0x66,0x90
1297 .byte 102,15,56,220,225
1298 .byte 102,15,56,220,233
1299 movl %r9d,96+12(%rsp)
1300 leaq 7(%r8),%r9
1301 .byte 102,15,56,220,241
1302 .byte 102,15,56,220,249
1303 .byte 102,68,15,56,220,193
1304 .byte 102,68,15,56,220,201
1305 movups 144-128(%rcx),%xmm1
1306 bswapl %r9d
1307 .byte 102,15,56,220,208
1308 .byte 102,15,56,220,216
1309 .byte 102,15,56,220,224
1310 xorl %ebp,%r9d
1311 movdqu 0(%rdi),%xmm10
1312 .byte 102,15,56,220,232
1313 movl %r9d,112+12(%rsp)
1314 cmpl $11,%eax
1315 .byte 102,15,56,220,240
1316 .byte 102,15,56,220,248
1317 .byte 102,68,15,56,220,192
1318 .byte 102,68,15,56,220,200
1319 movups 160-128(%rcx),%xmm0
1320
1321 jb .Lctr32_enc_done
1322
1323 .byte 102,15,56,220,209
1324 .byte 102,15,56,220,217
1325 .byte 102,15,56,220,225
1326 .byte 102,15,56,220,233
1327 .byte 102,15,56,220,241
1328 .byte 102,15,56,220,249
1329 .byte 102,68,15,56,220,193
1330 .byte 102,68,15,56,220,201
1331 movups 176-128(%rcx),%xmm1
1332
1333 .byte 102,15,56,220,208
1334 .byte 102,15,56,220,216
1335 .byte 102,15,56,220,224
1336 .byte 102,15,56,220,232
1337 .byte 102,15,56,220,240
1338 .byte 102,15,56,220,248
1339 .byte 102,68,15,56,220,192
1340 .byte 102,68,15,56,220,200
1341 movups 192-128(%rcx),%xmm0
1342 je .Lctr32_enc_done
1343
1344 .byte 102,15,56,220,209
1345 .byte 102,15,56,220,217
1346 .byte 102,15,56,220,225
1347 .byte 102,15,56,220,233
1348 .byte 102,15,56,220,241
1349 .byte 102,15,56,220,249
1350 .byte 102,68,15,56,220,193
1351 .byte 102,68,15,56,220,201
1352 movups 208-128(%rcx),%xmm1
1353
1354 .byte 102,15,56,220,208
1355 .byte 102,15,56,220,216
1356 .byte 102,15,56,220,224
1357 .byte 102,15,56,220,232
1358 .byte 102,15,56,220,240
1359 .byte 102,15,56,220,248
1360 .byte 102,68,15,56,220,192
1361 .byte 102,68,15,56,220,200
1362 movups 224-128(%rcx),%xmm0
1363 jmp .Lctr32_enc_done
1364
1365 .align 16
1366 .Lctr32_enc_done:
1367 movdqu 16(%rdi),%xmm11
1368 pxor %xmm0,%xmm10
1369 movdqu 32(%rdi),%xmm12
1370 pxor %xmm0,%xmm11
1371 movdqu 48(%rdi),%xmm13
1372 pxor %xmm0,%xmm12
1373 movdqu 64(%rdi),%xmm14
1374 pxor %xmm0,%xmm13
1375 movdqu 80(%rdi),%xmm15
1376 pxor %xmm0,%xmm14
1377 pxor %xmm0,%xmm15
1378 .byte 102,15,56,220,209
1379 .byte 102,15,56,220,217
1380 .byte 102,15,56,220,225
1381 .byte 102,15,56,220,233
1382 .byte 102,15,56,220,241
1383 .byte 102,15,56,220,249
1384 .byte 102,68,15,56,220,193
1385 .byte 102,68,15,56,220,201
1386 movdqu 96(%rdi),%xmm1
1387 leaq 128(%rdi),%rdi
1388
1389 .byte 102,65,15,56,221,210
1390 pxor %xmm0,%xmm1
1391 movdqu 112-128(%rdi),%xmm10
1392 .byte 102,65,15,56,221,219
1393 pxor %xmm0,%xmm10
1394 movdqa 0(%rsp),%xmm11
1395 .byte 102,65,15,56,221,228
1396 .byte 102,65,15,56,221,237
1397 movdqa 16(%rsp),%xmm12
1398 movdqa 32(%rsp),%xmm13
1399 .byte 102,65,15,56,221,246
1400 .byte 102,65,15,56,221,255
1401 movdqa 48(%rsp),%xmm14
1402 movdqa 64(%rsp),%xmm15
1403 .byte 102,68,15,56,221,193
1404 movdqa 80(%rsp),%xmm0
1405 movups 16-128(%rcx),%xmm1
1406 .byte 102,69,15,56,221,202
1407
1408 movups %xmm2,(%rsi)
1409 movdqa %xmm11,%xmm2
1410 movups %xmm3,16(%rsi)
1411 movdqa %xmm12,%xmm3
1412 movups %xmm4,32(%rsi)
1413 movdqa %xmm13,%xmm4
1414 movups %xmm5,48(%rsi)
1415 movdqa %xmm14,%xmm5
1416 movups %xmm6,64(%rsi)
1417 movdqa %xmm15,%xmm6
1418 movups %xmm7,80(%rsi)
1419 movdqa %xmm0,%xmm7
1420 movups %xmm8,96(%rsi)
1421 movups %xmm9,112(%rsi)
1422 leaq 128(%rsi),%rsi
1423
1424 subq $8,%rdx
1425 jnc .Lctr32_loop8
1426
1427 addq $8,%rdx
1428 jz .Lctr32_done
1429 leaq -128(%rcx),%rcx
1430
1431 .Lctr32_tail:
1432
1433
1434 leaq 16(%rcx),%rcx
1435 cmpq $4,%rdx
1436 jb .Lctr32_loop3
1437 je .Lctr32_loop4
1438
1439
1440 shll $4,%eax
1441 movdqa 96(%rsp),%xmm8
1442 pxor %xmm9,%xmm9
1443
1444 movups 16(%rcx),%xmm0
1445 .byte 102,15,56,220,209
1446 .byte 102,15,56,220,217
1447 leaq 32-16(%rcx,%rax,1),%rcx
1448 negq %rax
1449 .byte 102,15,56,220,225
1450 addq $16,%rax
1451 movups (%rdi),%xmm10
1452 .byte 102,15,56,220,233
1453 .byte 102,15,56,220,241
1454 movups 16(%rdi),%xmm11
1455 movups 32(%rdi),%xmm12
1456 .byte 102,15,56,220,249
1457 .byte 102,68,15,56,220,193
1458
1459 call .Lenc_loop8_enter
1460
1461 movdqu 48(%rdi),%xmm13
1462 pxor %xmm10,%xmm2
1463 movdqu 64(%rdi),%xmm10
1464 pxor %xmm11,%xmm3
1465 movdqu %xmm2,(%rsi)
1466 pxor %xmm12,%xmm4
1467 movdqu %xmm3,16(%rsi)
1468 pxor %xmm13,%xmm5
1469 movdqu %xmm4,32(%rsi)
1470 pxor %xmm10,%xmm6
1471 movdqu %xmm5,48(%rsi)
1472 movdqu %xmm6,64(%rsi)
1473 cmpq $6,%rdx
1474 jb .Lctr32_done
1475
1476 movups 80(%rdi),%xmm11
1477 xorps %xmm11,%xmm7
1478 movups %xmm7,80(%rsi)
1479 je .Lctr32_done
1480
1481 movups 96(%rdi),%xmm12
1482 xorps %xmm12,%xmm8
1483 movups %xmm8,96(%rsi)
1484 jmp .Lctr32_done
1485
1486 .align 32
1487 .Lctr32_loop4:
1488 .byte 102,15,56,220,209
1489 leaq 16(%rcx),%rcx
1490 decl %eax
1491 .byte 102,15,56,220,217
1492 .byte 102,15,56,220,225
1493 .byte 102,15,56,220,233
1494 movups (%rcx),%xmm1
1495 jnz .Lctr32_loop4
1496 .byte 102,15,56,221,209
1497 .byte 102,15,56,221,217
1498 movups (%rdi),%xmm10
1499 movups 16(%rdi),%xmm11
1500 .byte 102,15,56,221,225
1501 .byte 102,15,56,221,233
1502 movups 32(%rdi),%xmm12
1503 movups 48(%rdi),%xmm13
1504
1505 xorps %xmm10,%xmm2
1506 movups %xmm2,(%rsi)
1507 xorps %xmm11,%xmm3
1508 movups %xmm3,16(%rsi)
1509 pxor %xmm12,%xmm4
1510 movdqu %xmm4,32(%rsi)
1511 pxor %xmm13,%xmm5
1512 movdqu %xmm5,48(%rsi)
1513 jmp .Lctr32_done
1514
1515 .align 32
1516 .Lctr32_loop3:
1517 .byte 102,15,56,220,209
1518 leaq 16(%rcx),%rcx
1519 decl %eax
1520 .byte 102,15,56,220,217
1521 .byte 102,15,56,220,225
1522 movups (%rcx),%xmm1
1523 jnz .Lctr32_loop3
1524 .byte 102,15,56,221,209
1525 .byte 102,15,56,221,217
1526 .byte 102,15,56,221,225
1527
1528 movups (%rdi),%xmm10
1529 xorps %xmm10,%xmm2
1530 movups %xmm2,(%rsi)
1531 cmpq $2,%rdx
1532 jb .Lctr32_done
1533
1534 movups 16(%rdi),%xmm11
1535 xorps %xmm11,%xmm3
1536 movups %xmm3,16(%rsi)
1537 je .Lctr32_done
1538
1539 movups 32(%rdi),%xmm12
1540 xorps %xmm12,%xmm4
1541 movups %xmm4,32(%rsi)
1542
1543 .Lctr32_done:
1544 xorps %xmm0,%xmm0
1545 xorl %ebp,%ebp
1546 pxor %xmm1,%xmm1
1547 pxor %xmm2,%xmm2
1548 pxor %xmm3,%xmm3
1549 pxor %xmm4,%xmm4
1550 pxor %xmm5,%xmm5
1551 pxor %xmm6,%xmm6
1552 pxor %xmm7,%xmm7
1553 movaps %xmm0,0(%rsp)
1554 pxor %xmm8,%xmm8
1555 movaps %xmm0,16(%rsp)
1556 pxor %xmm9,%xmm9
1557 movaps %xmm0,32(%rsp)
1558 pxor %xmm10,%xmm10
1559 movaps %xmm0,48(%rsp)
1560 pxor %xmm11,%xmm11
1561 movaps %xmm0,64(%rsp)
1562 pxor %xmm12,%xmm12
1563 movaps %xmm0,80(%rsp)
1564 pxor %xmm13,%xmm13
1565 movaps %xmm0,96(%rsp)
1566 pxor %xmm14,%xmm14
1567 movaps %xmm0,112(%rsp)
1568 pxor %xmm15,%xmm15
1569 movq -8(%r11),%rbp
1570 leaq (%r11),%rsp
1571 .Lctr32_epilogue:
1572 .byte 0xf3,0xc3
1573 .size aesni_ctr32_encrypt_blocks,.-aesni_ctr32_encrypt_blocks
1574 .globl aesni_xts_encrypt
1575 .hidden aesni_xts_encrypt
1576 .type aesni_xts_encrypt,@function
1577 .align 16
1578 aesni_xts_encrypt:
1579 leaq (%rsp),%r11
1580 pushq %rbp
1581 subq $112,%rsp
1582 andq $-16,%rsp
1583 movups (%r9),%xmm2
1584 movl 240(%r8),%eax
1585 movl 240(%rcx),%r10d
1586 movups (%r8),%xmm0
1587 movups 16(%r8),%xmm1
1588 leaq 32(%r8),%r8
1589 xorps %xmm0,%xmm2
1590 .Loop_enc1_8:
1591 .byte 102,15,56,220,209
1592 decl %eax
1593 movups (%r8),%xmm1
1594 leaq 16(%r8),%r8
1595 jnz .Loop_enc1_8
1596 .byte 102,15,56,221,209
1597 movups (%rcx),%xmm0
1598 movq %rcx,%rbp
1599 movl %r10d,%eax
1600 shll $4,%r10d
1601 movq %rdx,%r9
1602 andq $-16,%rdx
1603
1604 movups 16(%rcx,%r10,1),%xmm1
1605
1606 movdqa .Lxts_magic(%rip),%xmm8
1607 movdqa %xmm2,%xmm15
1608 pshufd $0x5f,%xmm2,%xmm9
1609 pxor %xmm0,%xmm1
1610 movdqa %xmm9,%xmm14
1611 paddd %xmm9,%xmm9
1612 movdqa %xmm15,%xmm10
1613 psrad $31,%xmm14
1614 paddq %xmm15,%xmm15
1615 pand %xmm8,%xmm14
1616 pxor %xmm0,%xmm10
1617 pxor %xmm14,%xmm15
1618 movdqa %xmm9,%xmm14
1619 paddd %xmm9,%xmm9
1620 movdqa %xmm15,%xmm11
1621 psrad $31,%xmm14
1622 paddq %xmm15,%xmm15
1623 pand %xmm8,%xmm14
1624 pxor %xmm0,%xmm11
1625 pxor %xmm14,%xmm15
1626 movdqa %xmm9,%xmm14
1627 paddd %xmm9,%xmm9
1628 movdqa %xmm15,%xmm12
1629 psrad $31,%xmm14
1630 paddq %xmm15,%xmm15
1631 pand %xmm8,%xmm14
1632 pxor %xmm0,%xmm12
1633 pxor %xmm14,%xmm15
1634 movdqa %xmm9,%xmm14
1635 paddd %xmm9,%xmm9
1636 movdqa %xmm15,%xmm13
1637 psrad $31,%xmm14
1638 paddq %xmm15,%xmm15
1639 pand %xmm8,%xmm14
1640 pxor %xmm0,%xmm13
1641 pxor %xmm14,%xmm15
1642 movdqa %xmm15,%xmm14
1643 psrad $31,%xmm9
1644 paddq %xmm15,%xmm15
1645 pand %xmm8,%xmm9
1646 pxor %xmm0,%xmm14
1647 pxor %xmm9,%xmm15
1648 movaps %xmm1,96(%rsp)
1649
1650 subq $96,%rdx
1651 jc .Lxts_enc_short
1652
1653 movl $16+96,%eax
1654 leaq 32(%rbp,%r10,1),%rcx
1655 subq %r10,%rax
1656 movups 16(%rbp),%xmm1
1657 movq %rax,%r10
1658 leaq .Lxts_magic(%rip),%r8
1659 jmp .Lxts_enc_grandloop
1660
1661 .align 32
1662 .Lxts_enc_grandloop:
1663 movdqu 0(%rdi),%xmm2
1664 movdqa %xmm0,%xmm8
1665 movdqu 16(%rdi),%xmm3
1666 pxor %xmm10,%xmm2
1667 movdqu 32(%rdi),%xmm4
1668 pxor %xmm11,%xmm3
1669 .byte 102,15,56,220,209
1670 movdqu 48(%rdi),%xmm5
1671 pxor %xmm12,%xmm4
1672 .byte 102,15,56,220,217
1673 movdqu 64(%rdi),%xmm6
1674 pxor %xmm13,%xmm5
1675 .byte 102,15,56,220,225
1676 movdqu 80(%rdi),%xmm7
1677 pxor %xmm15,%xmm8
1678 movdqa 96(%rsp),%xmm9
1679 pxor %xmm14,%xmm6
1680 .byte 102,15,56,220,233
1681 movups 32(%rbp),%xmm0
1682 leaq 96(%rdi),%rdi
1683 pxor %xmm8,%xmm7
1684
1685 pxor %xmm9,%xmm10
1686 .byte 102,15,56,220,241
1687 pxor %xmm9,%xmm11
1688 movdqa %xmm10,0(%rsp)
1689 .byte 102,15,56,220,249
1690 movups 48(%rbp),%xmm1
1691 pxor %xmm9,%xmm12
1692
1693 .byte 102,15,56,220,208
1694 pxor %xmm9,%xmm13
1695 movdqa %xmm11,16(%rsp)
1696 .byte 102,15,56,220,216
1697 pxor %xmm9,%xmm14
1698 movdqa %xmm12,32(%rsp)
1699 .byte 102,15,56,220,224
1700 .byte 102,15,56,220,232
1701 pxor %xmm9,%xmm8
1702 movdqa %xmm14,64(%rsp)
1703 .byte 102,15,56,220,240
1704 .byte 102,15,56,220,248
1705 movups 64(%rbp),%xmm0
1706 movdqa %xmm8,80(%rsp)
1707 pshufd $0x5f,%xmm15,%xmm9
1708 jmp .Lxts_enc_loop6
1709 .align 32
1710 .Lxts_enc_loop6:
1711 .byte 102,15,56,220,209
1712 .byte 102,15,56,220,217
1713 .byte 102,15,56,220,225
1714 .byte 102,15,56,220,233
1715 .byte 102,15,56,220,241
1716 .byte 102,15,56,220,249
1717 movups -64(%rcx,%rax,1),%xmm1
1718 addq $32,%rax
1719
1720 .byte 102,15,56,220,208
1721 .byte 102,15,56,220,216
1722 .byte 102,15,56,220,224
1723 .byte 102,15,56,220,232
1724 .byte 102,15,56,220,240
1725 .byte 102,15,56,220,248
1726 movups -80(%rcx,%rax,1),%xmm0
1727 jnz .Lxts_enc_loop6
1728
1729 movdqa (%r8),%xmm8
1730 movdqa %xmm9,%xmm14
1731 paddd %xmm9,%xmm9
1732 .byte 102,15,56,220,209
1733 paddq %xmm15,%xmm15
1734 psrad $31,%xmm14
1735 .byte 102,15,56,220,217
1736 pand %xmm8,%xmm14
1737 movups (%rbp),%xmm10
1738 .byte 102,15,56,220,225
1739 .byte 102,15,56,220,233
1740 .byte 102,15,56,220,241
1741 pxor %xmm14,%xmm15
1742 movaps %xmm10,%xmm11
1743 .byte 102,15,56,220,249
1744 movups -64(%rcx),%xmm1
1745
1746 movdqa %xmm9,%xmm14
1747 .byte 102,15,56,220,208
1748 paddd %xmm9,%xmm9
1749 pxor %xmm15,%xmm10
1750 .byte 102,15,56,220,216
1751 psrad $31,%xmm14
1752 paddq %xmm15,%xmm15
1753 .byte 102,15,56,220,224
1754 .byte 102,15,56,220,232
1755 pand %xmm8,%xmm14
1756 movaps %xmm11,%xmm12
1757 .byte 102,15,56,220,240
1758 pxor %xmm14,%xmm15
1759 movdqa %xmm9,%xmm14
1760 .byte 102,15,56,220,248
1761 movups -48(%rcx),%xmm0
1762
1763 paddd %xmm9,%xmm9
1764 .byte 102,15,56,220,209
1765 pxor %xmm15,%xmm11
1766 psrad $31,%xmm14
1767 .byte 102,15,56,220,217
1768 paddq %xmm15,%xmm15
1769 pand %xmm8,%xmm14
1770 .byte 102,15,56,220,225
1771 .byte 102,15,56,220,233
1772 movdqa %xmm13,48(%rsp)
1773 pxor %xmm14,%xmm15
1774 .byte 102,15,56,220,241
1775 movaps %xmm12,%xmm13
1776 movdqa %xmm9,%xmm14
1777 .byte 102,15,56,220,249
1778 movups -32(%rcx),%xmm1
1779
1780 paddd %xmm9,%xmm9
1781 .byte 102,15,56,220,208
1782 pxor %xmm15,%xmm12
1783 psrad $31,%xmm14
1784 .byte 102,15,56,220,216
1785 paddq %xmm15,%xmm15
1786 pand %xmm8,%xmm14
1787 .byte 102,15,56,220,224
1788 .byte 102,15,56,220,232
1789 .byte 102,15,56,220,240
1790 pxor %xmm14,%xmm15
1791 movaps %xmm13,%xmm14
1792 .byte 102,15,56,220,248
1793
1794 movdqa %xmm9,%xmm0
1795 paddd %xmm9,%xmm9
1796 .byte 102,15,56,220,209
1797 pxor %xmm15,%xmm13
1798 psrad $31,%xmm0
1799 .byte 102,15,56,220,217
1800 paddq %xmm15,%xmm15
1801 pand %xmm8,%xmm0
1802 .byte 102,15,56,220,225
1803 .byte 102,15,56,220,233
1804 pxor %xmm0,%xmm15
1805 movups (%rbp),%xmm0
1806 .byte 102,15,56,220,241
1807 .byte 102,15,56,220,249
1808 movups 16(%rbp),%xmm1
1809
1810 pxor %xmm15,%xmm14
1811 .byte 102,15,56,221,84,36,0
1812 psrad $31,%xmm9
1813 paddq %xmm15,%xmm15
1814 .byte 102,15,56,221,92,36,16
1815 .byte 102,15,56,221,100,36,32
1816 pand %xmm8,%xmm9
1817 movq %r10,%rax
1818 .byte 102,15,56,221,108,36,48
1819 .byte 102,15,56,221,116,36,64
1820 .byte 102,15,56,221,124,36,80
1821 pxor %xmm9,%xmm15
1822
1823 leaq 96(%rsi),%rsi
1824 movups %xmm2,-96(%rsi)
1825 movups %xmm3,-80(%rsi)
1826 movups %xmm4,-64(%rsi)
1827 movups %xmm5,-48(%rsi)
1828 movups %xmm6,-32(%rsi)
1829 movups %xmm7,-16(%rsi)
1830 subq $96,%rdx
1831 jnc .Lxts_enc_grandloop
1832
1833 movl $16+96,%eax
1834 subl %r10d,%eax
1835 movq %rbp,%rcx
1836 shrl $4,%eax
1837
1838 .Lxts_enc_short:
1839
1840 movl %eax,%r10d
1841 pxor %xmm0,%xmm10
1842 addq $96,%rdx
1843 jz .Lxts_enc_done
1844
1845 pxor %xmm0,%xmm11
1846 cmpq $0x20,%rdx
1847 jb .Lxts_enc_one
1848 pxor %xmm0,%xmm12
1849 je .Lxts_enc_two
1850
1851 pxor %xmm0,%xmm13
1852 cmpq $0x40,%rdx
1853 jb .Lxts_enc_three
1854 pxor %xmm0,%xmm14
1855 je .Lxts_enc_four
1856
1857 movdqu (%rdi),%xmm2
1858 movdqu 16(%rdi),%xmm3
1859 movdqu 32(%rdi),%xmm4
1860 pxor %xmm10,%xmm2
1861 movdqu 48(%rdi),%xmm5
1862 pxor %xmm11,%xmm3
1863 movdqu 64(%rdi),%xmm6
1864 leaq 80(%rdi),%rdi
1865 pxor %xmm12,%xmm4
1866 pxor %xmm13,%xmm5
1867 pxor %xmm14,%xmm6
1868 pxor %xmm7,%xmm7
1869
1870 call _aesni_encrypt6
1871
1872 xorps %xmm10,%xmm2
1873 movdqa %xmm15,%xmm10
1874 xorps %xmm11,%xmm3
1875 xorps %xmm12,%xmm4
1876 movdqu %xmm2,(%rsi)
1877 xorps %xmm13,%xmm5
1878 movdqu %xmm3,16(%rsi)
1879 xorps %xmm14,%xmm6
1880 movdqu %xmm4,32(%rsi)
1881 movdqu %xmm5,48(%rsi)
1882 movdqu %xmm6,64(%rsi)
1883 leaq 80(%rsi),%rsi
1884 jmp .Lxts_enc_done
1885
1886 .align 16
1887 .Lxts_enc_one:
1888 movups (%rdi),%xmm2
1889 leaq 16(%rdi),%rdi
1890 xorps %xmm10,%xmm2
1891 movups (%rcx),%xmm0
1892 movups 16(%rcx),%xmm1
1893 leaq 32(%rcx),%rcx
1894 xorps %xmm0,%xmm2
1895 .Loop_enc1_9:
1896 .byte 102,15,56,220,209
1897 decl %eax
1898 movups (%rcx),%xmm1
1899 leaq 16(%rcx),%rcx
1900 jnz .Loop_enc1_9
1901 .byte 102,15,56,221,209
1902 xorps %xmm10,%xmm2
1903 movdqa %xmm11,%xmm10
1904 movups %xmm2,(%rsi)
1905 leaq 16(%rsi),%rsi
1906 jmp .Lxts_enc_done
1907
1908 .align 16
1909 .Lxts_enc_two:
1910 movups (%rdi),%xmm2
1911 movups 16(%rdi),%xmm3
1912 leaq 32(%rdi),%rdi
1913 xorps %xmm10,%xmm2
1914 xorps %xmm11,%xmm3
1915
1916 call _aesni_encrypt2
1917
1918 xorps %xmm10,%xmm2
1919 movdqa %xmm12,%xmm10
1920 xorps %xmm11,%xmm3
1921 movups %xmm2,(%rsi)
1922 movups %xmm3,16(%rsi)
1923 leaq 32(%rsi),%rsi
1924 jmp .Lxts_enc_done
1925
1926 .align 16
1927 .Lxts_enc_three:
1928 movups (%rdi),%xmm2
1929 movups 16(%rdi),%xmm3
1930 movups 32(%rdi),%xmm4
1931 leaq 48(%rdi),%rdi
1932 xorps %xmm10,%xmm2
1933 xorps %xmm11,%xmm3
1934 xorps %xmm12,%xmm4
1935
1936 call _aesni_encrypt3
1937
1938 xorps %xmm10,%xmm2
1939 movdqa %xmm13,%xmm10
1940 xorps %xmm11,%xmm3
1941 xorps %xmm12,%xmm4
1942 movups %xmm2,(%rsi)
1943 movups %xmm3,16(%rsi)
1944 movups %xmm4,32(%rsi)
1945 leaq 48(%rsi),%rsi
1946 jmp .Lxts_enc_done
1947
1948 .align 16
1949 .Lxts_enc_four:
1950 movups (%rdi),%xmm2
1951 movups 16(%rdi),%xmm3
1952 movups 32(%rdi),%xmm4
1953 xorps %xmm10,%xmm2
1954 movups 48(%rdi),%xmm5
1955 leaq 64(%rdi),%rdi
1956 xorps %xmm11,%xmm3
1957 xorps %xmm12,%xmm4
1958 xorps %xmm13,%xmm5
1959
1960 call _aesni_encrypt4
1961
1962 pxor %xmm10,%xmm2
1963 movdqa %xmm14,%xmm10
1964 pxor %xmm11,%xmm3
1965 pxor %xmm12,%xmm4
1966 movdqu %xmm2,(%rsi)
1967 pxor %xmm13,%xmm5
1968 movdqu %xmm3,16(%rsi)
1969 movdqu %xmm4,32(%rsi)
1970 movdqu %xmm5,48(%rsi)
1971 leaq 64(%rsi),%rsi
1972 jmp .Lxts_enc_done
1973
1974 .align 16
1975 .Lxts_enc_done:
1976 andq $15,%r9
1977 jz .Lxts_enc_ret
1978 movq %r9,%rdx
1979
1980 .Lxts_enc_steal:
1981 movzbl (%rdi),%eax
1982 movzbl -16(%rsi),%ecx
1983 leaq 1(%rdi),%rdi
1984 movb %al,-16(%rsi)
1985 movb %cl,0(%rsi)
1986 leaq 1(%rsi),%rsi
1987 subq $1,%rdx
1988 jnz .Lxts_enc_steal
1989
1990 subq %r9,%rsi
1991 movq %rbp,%rcx
1992 movl %r10d,%eax
1993
1994 movups -16(%rsi),%xmm2
1995 xorps %xmm10,%xmm2
1996 movups (%rcx),%xmm0
1997 movups 16(%rcx),%xmm1
1998 leaq 32(%rcx),%rcx
1999 xorps %xmm0,%xmm2
2000 .Loop_enc1_10:
2001 .byte 102,15,56,220,209
2002 decl %eax
2003 movups (%rcx),%xmm1
2004 leaq 16(%rcx),%rcx
2005 jnz .Loop_enc1_10
2006 .byte 102,15,56,221,209
2007 xorps %xmm10,%xmm2
2008 movups %xmm2,-16(%rsi)
2009
2010 .Lxts_enc_ret:
2011 xorps %xmm0,%xmm0
2012 pxor %xmm1,%xmm1
2013 pxor %xmm2,%xmm2
2014 pxor %xmm3,%xmm3
2015 pxor %xmm4,%xmm4
2016 pxor %xmm5,%xmm5
2017 pxor %xmm6,%xmm6
2018 pxor %xmm7,%xmm7
2019 movaps %xmm0,0(%rsp)
2020 pxor %xmm8,%xmm8
2021 movaps %xmm0,16(%rsp)
2022 pxor %xmm9,%xmm9
2023 movaps %xmm0,32(%rsp)
2024 pxor %xmm10,%xmm10
2025 movaps %xmm0,48(%rsp)
2026 pxor %xmm11,%xmm11
2027 movaps %xmm0,64(%rsp)
2028 pxor %xmm12,%xmm12
2029 movaps %xmm0,80(%rsp)
2030 pxor %xmm13,%xmm13
2031 movaps %xmm0,96(%rsp)
2032 pxor %xmm14,%xmm14
2033 pxor %xmm15,%xmm15
2034 movq -8(%r11),%rbp
2035 leaq (%r11),%rsp
2036 .Lxts_enc_epilogue:
2037 .byte 0xf3,0xc3
2038 .size aesni_xts_encrypt,.-aesni_xts_encrypt
2039 .globl aesni_xts_decrypt
2040 .hidden aesni_xts_decrypt
2041 .type aesni_xts_decrypt,@function
2042 .align 16
2043 aesni_xts_decrypt:
2044 leaq (%rsp),%r11
2045 pushq %rbp
2046 subq $112,%rsp
2047 andq $-16,%rsp
2048 movups (%r9),%xmm2
2049 movl 240(%r8),%eax
2050 movl 240(%rcx),%r10d
2051 movups (%r8),%xmm0
2052 movups 16(%r8),%xmm1
2053 leaq 32(%r8),%r8
2054 xorps %xmm0,%xmm2
2055 .Loop_enc1_11:
2056 .byte 102,15,56,220,209
2057 decl %eax
2058 movups (%r8),%xmm1
2059 leaq 16(%r8),%r8
2060 jnz .Loop_enc1_11
2061 .byte 102,15,56,221,209
2062 xorl %eax,%eax
2063 testq $15,%rdx
2064 setnz %al
2065 shlq $4,%rax
2066 subq %rax,%rdx
2067
2068 movups (%rcx),%xmm0
2069 movq %rcx,%rbp
2070 movl %r10d,%eax
2071 shll $4,%r10d
2072 movq %rdx,%r9
2073 andq $-16,%rdx
2074
2075 movups 16(%rcx,%r10,1),%xmm1
2076
2077 movdqa .Lxts_magic(%rip),%xmm8
2078 movdqa %xmm2,%xmm15
2079 pshufd $0x5f,%xmm2,%xmm9
2080 pxor %xmm0,%xmm1
2081 movdqa %xmm9,%xmm14
2082 paddd %xmm9,%xmm9
2083 movdqa %xmm15,%xmm10
2084 psrad $31,%xmm14
2085 paddq %xmm15,%xmm15
2086 pand %xmm8,%xmm14
2087 pxor %xmm0,%xmm10
2088 pxor %xmm14,%xmm15
2089 movdqa %xmm9,%xmm14
2090 paddd %xmm9,%xmm9
2091 movdqa %xmm15,%xmm11
2092 psrad $31,%xmm14
2093 paddq %xmm15,%xmm15
2094 pand %xmm8,%xmm14
2095 pxor %xmm0,%xmm11
2096 pxor %xmm14,%xmm15
2097 movdqa %xmm9,%xmm14
2098 paddd %xmm9,%xmm9
2099 movdqa %xmm15,%xmm12
2100 psrad $31,%xmm14
2101 paddq %xmm15,%xmm15
2102 pand %xmm8,%xmm14
2103 pxor %xmm0,%xmm12
2104 pxor %xmm14,%xmm15
2105 movdqa %xmm9,%xmm14
2106 paddd %xmm9,%xmm9
2107 movdqa %xmm15,%xmm13
2108 psrad $31,%xmm14
2109 paddq %xmm15,%xmm15
2110 pand %xmm8,%xmm14
2111 pxor %xmm0,%xmm13
2112 pxor %xmm14,%xmm15
2113 movdqa %xmm15,%xmm14
2114 psrad $31,%xmm9
2115 paddq %xmm15,%xmm15
2116 pand %xmm8,%xmm9
2117 pxor %xmm0,%xmm14
2118 pxor %xmm9,%xmm15
2119 movaps %xmm1,96(%rsp)
2120
2121 subq $96,%rdx
2122 jc .Lxts_dec_short
2123
2124 movl $16+96,%eax
2125 leaq 32(%rbp,%r10,1),%rcx
2126 subq %r10,%rax
2127 movups 16(%rbp),%xmm1
2128 movq %rax,%r10
2129 leaq .Lxts_magic(%rip),%r8
2130 jmp .Lxts_dec_grandloop
2131
2132 .align 32
2133 .Lxts_dec_grandloop:
2134 movdqu 0(%rdi),%xmm2
2135 movdqa %xmm0,%xmm8
2136 movdqu 16(%rdi),%xmm3
2137 pxor %xmm10,%xmm2
2138 movdqu 32(%rdi),%xmm4
2139 pxor %xmm11,%xmm3
2140 .byte 102,15,56,222,209
2141 movdqu 48(%rdi),%xmm5
2142 pxor %xmm12,%xmm4
2143 .byte 102,15,56,222,217
2144 movdqu 64(%rdi),%xmm6
2145 pxor %xmm13,%xmm5
2146 .byte 102,15,56,222,225
2147 movdqu 80(%rdi),%xmm7
2148 pxor %xmm15,%xmm8
2149 movdqa 96(%rsp),%xmm9
2150 pxor %xmm14,%xmm6
2151 .byte 102,15,56,222,233
2152 movups 32(%rbp),%xmm0
2153 leaq 96(%rdi),%rdi
2154 pxor %xmm8,%xmm7
2155
2156 pxor %xmm9,%xmm10
2157 .byte 102,15,56,222,241
2158 pxor %xmm9,%xmm11
2159 movdqa %xmm10,0(%rsp)
2160 .byte 102,15,56,222,249
2161 movups 48(%rbp),%xmm1
2162 pxor %xmm9,%xmm12
2163
2164 .byte 102,15,56,222,208
2165 pxor %xmm9,%xmm13
2166 movdqa %xmm11,16(%rsp)
2167 .byte 102,15,56,222,216
2168 pxor %xmm9,%xmm14
2169 movdqa %xmm12,32(%rsp)
2170 .byte 102,15,56,222,224
2171 .byte 102,15,56,222,232
2172 pxor %xmm9,%xmm8
2173 movdqa %xmm14,64(%rsp)
2174 .byte 102,15,56,222,240
2175 .byte 102,15,56,222,248
2176 movups 64(%rbp),%xmm0
2177 movdqa %xmm8,80(%rsp)
2178 pshufd $0x5f,%xmm15,%xmm9
2179 jmp .Lxts_dec_loop6
2180 .align 32
2181 .Lxts_dec_loop6:
2182 .byte 102,15,56,222,209
2183 .byte 102,15,56,222,217
2184 .byte 102,15,56,222,225
2185 .byte 102,15,56,222,233
2186 .byte 102,15,56,222,241
2187 .byte 102,15,56,222,249
2188 movups -64(%rcx,%rax,1),%xmm1
2189 addq $32,%rax
2190
2191 .byte 102,15,56,222,208
2192 .byte 102,15,56,222,216
2193 .byte 102,15,56,222,224
2194 .byte 102,15,56,222,232
2195 .byte 102,15,56,222,240
2196 .byte 102,15,56,222,248
2197 movups -80(%rcx,%rax,1),%xmm0
2198 jnz .Lxts_dec_loop6
2199
2200 movdqa (%r8),%xmm8
2201 movdqa %xmm9,%xmm14
2202 paddd %xmm9,%xmm9
2203 .byte 102,15,56,222,209
2204 paddq %xmm15,%xmm15
2205 psrad $31,%xmm14
2206 .byte 102,15,56,222,217
2207 pand %xmm8,%xmm14
2208 movups (%rbp),%xmm10
2209 .byte 102,15,56,222,225
2210 .byte 102,15,56,222,233
2211 .byte 102,15,56,222,241
2212 pxor %xmm14,%xmm15
2213 movaps %xmm10,%xmm11
2214 .byte 102,15,56,222,249
2215 movups -64(%rcx),%xmm1
2216
2217 movdqa %xmm9,%xmm14
2218 .byte 102,15,56,222,208
2219 paddd %xmm9,%xmm9
2220 pxor %xmm15,%xmm10
2221 .byte 102,15,56,222,216
2222 psrad $31,%xmm14
2223 paddq %xmm15,%xmm15
2224 .byte 102,15,56,222,224
2225 .byte 102,15,56,222,232
2226 pand %xmm8,%xmm14
2227 movaps %xmm11,%xmm12
2228 .byte 102,15,56,222,240
2229 pxor %xmm14,%xmm15
2230 movdqa %xmm9,%xmm14
2231 .byte 102,15,56,222,248
2232 movups -48(%rcx),%xmm0
2233
2234 paddd %xmm9,%xmm9
2235 .byte 102,15,56,222,209
2236 pxor %xmm15,%xmm11
2237 psrad $31,%xmm14
2238 .byte 102,15,56,222,217
2239 paddq %xmm15,%xmm15
2240 pand %xmm8,%xmm14
2241 .byte 102,15,56,222,225
2242 .byte 102,15,56,222,233
2243 movdqa %xmm13,48(%rsp)
2244 pxor %xmm14,%xmm15
2245 .byte 102,15,56,222,241
2246 movaps %xmm12,%xmm13
2247 movdqa %xmm9,%xmm14
2248 .byte 102,15,56,222,249
2249 movups -32(%rcx),%xmm1
2250
2251 paddd %xmm9,%xmm9
2252 .byte 102,15,56,222,208
2253 pxor %xmm15,%xmm12
2254 psrad $31,%xmm14
2255 .byte 102,15,56,222,216
2256 paddq %xmm15,%xmm15
2257 pand %xmm8,%xmm14
2258 .byte 102,15,56,222,224
2259 .byte 102,15,56,222,232
2260 .byte 102,15,56,222,240
2261 pxor %xmm14,%xmm15
2262 movaps %xmm13,%xmm14
2263 .byte 102,15,56,222,248
2264
2265 movdqa %xmm9,%xmm0
2266 paddd %xmm9,%xmm9
2267 .byte 102,15,56,222,209
2268 pxor %xmm15,%xmm13
2269 psrad $31,%xmm0
2270 .byte 102,15,56,222,217
2271 paddq %xmm15,%xmm15
2272 pand %xmm8,%xmm0
2273 .byte 102,15,56,222,225
2274 .byte 102,15,56,222,233
2275 pxor %xmm0,%xmm15
2276 movups (%rbp),%xmm0
2277 .byte 102,15,56,222,241
2278 .byte 102,15,56,222,249
2279 movups 16(%rbp),%xmm1
2280
2281 pxor %xmm15,%xmm14
2282 .byte 102,15,56,223,84,36,0
2283 psrad $31,%xmm9
2284 paddq %xmm15,%xmm15
2285 .byte 102,15,56,223,92,36,16
2286 .byte 102,15,56,223,100,36,32
2287 pand %xmm8,%xmm9
2288 movq %r10,%rax
2289 .byte 102,15,56,223,108,36,48
2290 .byte 102,15,56,223,116,36,64
2291 .byte 102,15,56,223,124,36,80
2292 pxor %xmm9,%xmm15
2293
2294 leaq 96(%rsi),%rsi
2295 movups %xmm2,-96(%rsi)
2296 movups %xmm3,-80(%rsi)
2297 movups %xmm4,-64(%rsi)
2298 movups %xmm5,-48(%rsi)
2299 movups %xmm6,-32(%rsi)
2300 movups %xmm7,-16(%rsi)
2301 subq $96,%rdx
2302 jnc .Lxts_dec_grandloop
2303
2304 movl $16+96,%eax
2305 subl %r10d,%eax
2306 movq %rbp,%rcx
2307 shrl $4,%eax
2308
2309 .Lxts_dec_short:
2310
2311 movl %eax,%r10d
2312 pxor %xmm0,%xmm10
2313 pxor %xmm0,%xmm11
2314 addq $96,%rdx
2315 jz .Lxts_dec_done
2316
2317 pxor %xmm0,%xmm12
2318 cmpq $0x20,%rdx
2319 jb .Lxts_dec_one
2320 pxor %xmm0,%xmm13
2321 je .Lxts_dec_two
2322
2323 pxor %xmm0,%xmm14
2324 cmpq $0x40,%rdx
2325 jb .Lxts_dec_three
2326 je .Lxts_dec_four
2327
2328 movdqu (%rdi),%xmm2
2329 movdqu 16(%rdi),%xmm3
2330 movdqu 32(%rdi),%xmm4
2331 pxor %xmm10,%xmm2
2332 movdqu 48(%rdi),%xmm5
2333 pxor %xmm11,%xmm3
2334 movdqu 64(%rdi),%xmm6
2335 leaq 80(%rdi),%rdi
2336 pxor %xmm12,%xmm4
2337 pxor %xmm13,%xmm5
2338 pxor %xmm14,%xmm6
2339
2340 call _aesni_decrypt6
2341
2342 xorps %xmm10,%xmm2
2343 xorps %xmm11,%xmm3
2344 xorps %xmm12,%xmm4
2345 movdqu %xmm2,(%rsi)
2346 xorps %xmm13,%xmm5
2347 movdqu %xmm3,16(%rsi)
2348 xorps %xmm14,%xmm6
2349 movdqu %xmm4,32(%rsi)
2350 pxor %xmm14,%xmm14
2351 movdqu %xmm5,48(%rsi)
2352 pcmpgtd %xmm15,%xmm14
2353 movdqu %xmm6,64(%rsi)
2354 leaq 80(%rsi),%rsi
2355 pshufd $0x13,%xmm14,%xmm11
2356 andq $15,%r9
2357 jz .Lxts_dec_ret
2358
2359 movdqa %xmm15,%xmm10
2360 paddq %xmm15,%xmm15
2361 pand %xmm8,%xmm11
2362 pxor %xmm15,%xmm11
2363 jmp .Lxts_dec_done2
2364
2365 .align 16
2366 .Lxts_dec_one:
2367 movups (%rdi),%xmm2
2368 leaq 16(%rdi),%rdi
2369 xorps %xmm10,%xmm2
2370 movups (%rcx),%xmm0
2371 movups 16(%rcx),%xmm1
2372 leaq 32(%rcx),%rcx
2373 xorps %xmm0,%xmm2
2374 .Loop_dec1_12:
2375 .byte 102,15,56,222,209
2376 decl %eax
2377 movups (%rcx),%xmm1
2378 leaq 16(%rcx),%rcx
2379 jnz .Loop_dec1_12
2380 .byte 102,15,56,223,209
2381 xorps %xmm10,%xmm2
2382 movdqa %xmm11,%xmm10
2383 movups %xmm2,(%rsi)
2384 movdqa %xmm12,%xmm11
2385 leaq 16(%rsi),%rsi
2386 jmp .Lxts_dec_done
2387
2388 .align 16
2389 .Lxts_dec_two:
2390 movups (%rdi),%xmm2
2391 movups 16(%rdi),%xmm3
2392 leaq 32(%rdi),%rdi
2393 xorps %xmm10,%xmm2
2394 xorps %xmm11,%xmm3
2395
2396 call _aesni_decrypt2
2397
2398 xorps %xmm10,%xmm2
2399 movdqa %xmm12,%xmm10
2400 xorps %xmm11,%xmm3
2401 movdqa %xmm13,%xmm11
2402 movups %xmm2,(%rsi)
2403 movups %xmm3,16(%rsi)
2404 leaq 32(%rsi),%rsi
2405 jmp .Lxts_dec_done
2406
2407 .align 16
2408 .Lxts_dec_three:
2409 movups (%rdi),%xmm2
2410 movups 16(%rdi),%xmm3
2411 movups 32(%rdi),%xmm4
2412 leaq 48(%rdi),%rdi
2413 xorps %xmm10,%xmm2
2414 xorps %xmm11,%xmm3
2415 xorps %xmm12,%xmm4
2416
2417 call _aesni_decrypt3
2418
2419 xorps %xmm10,%xmm2
2420 movdqa %xmm13,%xmm10
2421 xorps %xmm11,%xmm3
2422 movdqa %xmm14,%xmm11
2423 xorps %xmm12,%xmm4
2424 movups %xmm2,(%rsi)
2425 movups %xmm3,16(%rsi)
2426 movups %xmm4,32(%rsi)
2427 leaq 48(%rsi),%rsi
2428 jmp .Lxts_dec_done
2429
2430 .align 16
2431 .Lxts_dec_four:
2432 movups (%rdi),%xmm2
2433 movups 16(%rdi),%xmm3
2434 movups 32(%rdi),%xmm4
2435 xorps %xmm10,%xmm2
2436 movups 48(%rdi),%xmm5
2437 leaq 64(%rdi),%rdi
2438 xorps %xmm11,%xmm3
2439 xorps %xmm12,%xmm4
2440 xorps %xmm13,%xmm5
2441
2442 call _aesni_decrypt4
2443
2444 pxor %xmm10,%xmm2
2445 movdqa %xmm14,%xmm10
2446 pxor %xmm11,%xmm3
2447 movdqa %xmm15,%xmm11
2448 pxor %xmm12,%xmm4
2449 movdqu %xmm2,(%rsi)
2450 pxor %xmm13,%xmm5
2451 movdqu %xmm3,16(%rsi)
2452 movdqu %xmm4,32(%rsi)
2453 movdqu %xmm5,48(%rsi)
2454 leaq 64(%rsi),%rsi
2455 jmp .Lxts_dec_done
2456
2457 .align 16
2458 .Lxts_dec_done:
2459 andq $15,%r9
2460 jz .Lxts_dec_ret
2461 .Lxts_dec_done2:
2462 movq %r9,%rdx
2463 movq %rbp,%rcx
2464 movl %r10d,%eax
2465
2466 movups (%rdi),%xmm2
2467 xorps %xmm11,%xmm2
2468 movups (%rcx),%xmm0
2469 movups 16(%rcx),%xmm1
2470 leaq 32(%rcx),%rcx
2471 xorps %xmm0,%xmm2
2472 .Loop_dec1_13:
2473 .byte 102,15,56,222,209
2474 decl %eax
2475 movups (%rcx),%xmm1
2476 leaq 16(%rcx),%rcx
2477 jnz .Loop_dec1_13
2478 .byte 102,15,56,223,209
2479 xorps %xmm11,%xmm2
2480 movups %xmm2,(%rsi)
2481
2482 .Lxts_dec_steal:
2483 movzbl 16(%rdi),%eax
2484 movzbl (%rsi),%ecx
2485 leaq 1(%rdi),%rdi
2486 movb %al,(%rsi)
2487 movb %cl,16(%rsi)
2488 leaq 1(%rsi),%rsi
2489 subq $1,%rdx
2490 jnz .Lxts_dec_steal
2491
2492 subq %r9,%rsi
2493 movq %rbp,%rcx
2494 movl %r10d,%eax
2495
2496 movups (%rsi),%xmm2
2497 xorps %xmm10,%xmm2
2498 movups (%rcx),%xmm0
2499 movups 16(%rcx),%xmm1
2500 leaq 32(%rcx),%rcx
2501 xorps %xmm0,%xmm2
2502 .Loop_dec1_14:
2503 .byte 102,15,56,222,209
2504 decl %eax
2505 movups (%rcx),%xmm1
2506 leaq 16(%rcx),%rcx
2507 jnz .Loop_dec1_14
2508 .byte 102,15,56,223,209
2509 xorps %xmm10,%xmm2
2510 movups %xmm2,(%rsi)
2511
2512 .Lxts_dec_ret:
2513 xorps %xmm0,%xmm0
2514 pxor %xmm1,%xmm1
2515 pxor %xmm2,%xmm2
2516 pxor %xmm3,%xmm3
2517 pxor %xmm4,%xmm4
2518 pxor %xmm5,%xmm5
2519 pxor %xmm6,%xmm6
2520 pxor %xmm7,%xmm7
2521 movaps %xmm0,0(%rsp)
2522 pxor %xmm8,%xmm8
2523 movaps %xmm0,16(%rsp)
2524 pxor %xmm9,%xmm9
2525 movaps %xmm0,32(%rsp)
2526 pxor %xmm10,%xmm10
2527 movaps %xmm0,48(%rsp)
2528 pxor %xmm11,%xmm11
2529 movaps %xmm0,64(%rsp)
2530 pxor %xmm12,%xmm12
2531 movaps %xmm0,80(%rsp)
2532 pxor %xmm13,%xmm13
2533 movaps %xmm0,96(%rsp)
2534 pxor %xmm14,%xmm14
2535 pxor %xmm15,%xmm15
2536 movq -8(%r11),%rbp
2537 leaq (%r11),%rsp
2538 .Lxts_dec_epilogue:
2539 .byte 0xf3,0xc3
2540 .size aesni_xts_decrypt,.-aesni_xts_decrypt
2541 .globl aesni_ocb_encrypt
2542 .hidden aesni_ocb_encrypt
2543 .type aesni_ocb_encrypt,@function
2544 .align 32
2545 aesni_ocb_encrypt:
2546 leaq (%rsp),%rax
2547 pushq %rbx
2548 pushq %rbp
2549 pushq %r12
2550 pushq %r13
2551 pushq %r14
2552 movq 8(%rax),%rbx
2553 movq 8+8(%rax),%rbp
2554
2555 movl 240(%rcx),%r10d
2556 movq %rcx,%r11
2557 shll $4,%r10d
2558 movups (%rcx),%xmm9
2559 movups 16(%rcx,%r10,1),%xmm1
2560
2561 movdqu (%r9),%xmm15
2562 pxor %xmm1,%xmm9
2563 pxor %xmm1,%xmm15
2564
2565 movl $16+32,%eax
2566 leaq 32(%r11,%r10,1),%rcx
2567 movups 16(%r11),%xmm1
2568 subq %r10,%rax
2569 movq %rax,%r10
2570
2571 movdqu (%rbx),%xmm10
2572 movdqu (%rbp),%xmm8
2573
2574 testq $1,%r8
2575 jnz .Locb_enc_odd
2576
2577 bsfq %r8,%r12
2578 addq $1,%r8
2579 shlq $4,%r12
2580 movdqu (%rbx,%r12,1),%xmm7
2581 movdqu (%rdi),%xmm2
2582 leaq 16(%rdi),%rdi
2583
2584 call __ocb_encrypt1
2585
2586 movdqa %xmm7,%xmm15
2587 movups %xmm2,(%rsi)
2588 leaq 16(%rsi),%rsi
2589 subq $1,%rdx
2590 jz .Locb_enc_done
2591
2592 .Locb_enc_odd:
2593 leaq 1(%r8),%r12
2594 leaq 3(%r8),%r13
2595 leaq 5(%r8),%r14
2596 leaq 6(%r8),%r8
2597 bsfq %r12,%r12
2598 bsfq %r13,%r13
2599 bsfq %r14,%r14
2600 shlq $4,%r12
2601 shlq $4,%r13
2602 shlq $4,%r14
2603
2604 subq $6,%rdx
2605 jc .Locb_enc_short
2606 jmp .Locb_enc_grandloop
2607
2608 .align 32
2609 .Locb_enc_grandloop:
2610 movdqu 0(%rdi),%xmm2
2611 movdqu 16(%rdi),%xmm3
2612 movdqu 32(%rdi),%xmm4
2613 movdqu 48(%rdi),%xmm5
2614 movdqu 64(%rdi),%xmm6
2615 movdqu 80(%rdi),%xmm7
2616 leaq 96(%rdi),%rdi
2617
2618 call __ocb_encrypt6
2619
2620 movups %xmm2,0(%rsi)
2621 movups %xmm3,16(%rsi)
2622 movups %xmm4,32(%rsi)
2623 movups %xmm5,48(%rsi)
2624 movups %xmm6,64(%rsi)
2625 movups %xmm7,80(%rsi)
2626 leaq 96(%rsi),%rsi
2627 subq $6,%rdx
2628 jnc .Locb_enc_grandloop
2629
2630 .Locb_enc_short:
2631 addq $6,%rdx
2632 jz .Locb_enc_done
2633
2634 movdqu 0(%rdi),%xmm2
2635 cmpq $2,%rdx
2636 jb .Locb_enc_one
2637 movdqu 16(%rdi),%xmm3
2638 je .Locb_enc_two
2639
2640 movdqu 32(%rdi),%xmm4
2641 cmpq $4,%rdx
2642 jb .Locb_enc_three
2643 movdqu 48(%rdi),%xmm5
2644 je .Locb_enc_four
2645
2646 movdqu 64(%rdi),%xmm6
2647 pxor %xmm7,%xmm7
2648
2649 call __ocb_encrypt6
2650
2651 movdqa %xmm14,%xmm15
2652 movups %xmm2,0(%rsi)
2653 movups %xmm3,16(%rsi)
2654 movups %xmm4,32(%rsi)
2655 movups %xmm5,48(%rsi)
2656 movups %xmm6,64(%rsi)
2657
2658 jmp .Locb_enc_done
2659
2660 .align 16
2661 .Locb_enc_one:
2662 movdqa %xmm10,%xmm7
2663
2664 call __ocb_encrypt1
2665
2666 movdqa %xmm7,%xmm15
2667 movups %xmm2,0(%rsi)
2668 jmp .Locb_enc_done
2669
2670 .align 16
2671 .Locb_enc_two:
2672 pxor %xmm4,%xmm4
2673 pxor %xmm5,%xmm5
2674
2675 call __ocb_encrypt4
2676
2677 movdqa %xmm11,%xmm15
2678 movups %xmm2,0(%rsi)
2679 movups %xmm3,16(%rsi)
2680
2681 jmp .Locb_enc_done
2682
2683 .align 16
2684 .Locb_enc_three:
2685 pxor %xmm5,%xmm5
2686
2687 call __ocb_encrypt4
2688
2689 movdqa %xmm12,%xmm15
2690 movups %xmm2,0(%rsi)
2691 movups %xmm3,16(%rsi)
2692 movups %xmm4,32(%rsi)
2693
2694 jmp .Locb_enc_done
2695
2696 .align 16
2697 .Locb_enc_four:
2698 call __ocb_encrypt4
2699
2700 movdqa %xmm13,%xmm15
2701 movups %xmm2,0(%rsi)
2702 movups %xmm3,16(%rsi)
2703 movups %xmm4,32(%rsi)
2704 movups %xmm5,48(%rsi)
2705
2706 .Locb_enc_done:
2707 pxor %xmm0,%xmm15
2708 movdqu %xmm8,(%rbp)
2709 movdqu %xmm15,(%r9)
2710
2711 xorps %xmm0,%xmm0
2712 pxor %xmm1,%xmm1
2713 pxor %xmm2,%xmm2
2714 pxor %xmm3,%xmm3
2715 pxor %xmm4,%xmm4
2716 pxor %xmm5,%xmm5
2717 pxor %xmm6,%xmm6
2718 pxor %xmm7,%xmm7
2719 pxor %xmm8,%xmm8
2720 pxor %xmm9,%xmm9
2721 pxor %xmm10,%xmm10
2722 pxor %xmm11,%xmm11
2723 pxor %xmm12,%xmm12
2724 pxor %xmm13,%xmm13
2725 pxor %xmm14,%xmm14
2726 pxor %xmm15,%xmm15
2727 leaq 40(%rsp),%rax
2728 movq -40(%rax),%r14
2729 movq -32(%rax),%r13
2730 movq -24(%rax),%r12
2731 movq -16(%rax),%rbp
2732 movq -8(%rax),%rbx
2733 leaq (%rax),%rsp
2734 .Locb_enc_epilogue:
2735 .byte 0xf3,0xc3
2736 .size aesni_ocb_encrypt,.-aesni_ocb_encrypt
2737
2738 .type __ocb_encrypt6,@function
2739 .align 32
2740 __ocb_encrypt6:
2741 pxor %xmm9,%xmm15
2742 movdqu (%rbx,%r12,1),%xmm11
2743 movdqa %xmm10,%xmm12
2744 movdqu (%rbx,%r13,1),%xmm13
2745 movdqa %xmm10,%xmm14
2746 pxor %xmm15,%xmm10
2747 movdqu (%rbx,%r14,1),%xmm15
2748 pxor %xmm10,%xmm11
2749 pxor %xmm2,%xmm8
2750 pxor %xmm10,%xmm2
2751 pxor %xmm11,%xmm12
2752 pxor %xmm3,%xmm8
2753 pxor %xmm11,%xmm3
2754 pxor %xmm12,%xmm13
2755 pxor %xmm4,%xmm8
2756 pxor %xmm12,%xmm4
2757 pxor %xmm13,%xmm14
2758 pxor %xmm5,%xmm8
2759 pxor %xmm13,%xmm5
2760 pxor %xmm14,%xmm15
2761 pxor %xmm6,%xmm8
2762 pxor %xmm14,%xmm6
2763 pxor %xmm7,%xmm8
2764 pxor %xmm15,%xmm7
2765 movups 32(%r11),%xmm0
2766
2767 leaq 1(%r8),%r12
2768 leaq 3(%r8),%r13
2769 leaq 5(%r8),%r14
2770 addq $6,%r8
2771 pxor %xmm9,%xmm10
2772 bsfq %r12,%r12
2773 bsfq %r13,%r13
2774 bsfq %r14,%r14
2775
2776 .byte 102,15,56,220,209
2777 .byte 102,15,56,220,217
2778 .byte 102,15,56,220,225
2779 .byte 102,15,56,220,233
2780 pxor %xmm9,%xmm11
2781 pxor %xmm9,%xmm12
2782 .byte 102,15,56,220,241
2783 pxor %xmm9,%xmm13
2784 pxor %xmm9,%xmm14
2785 .byte 102,15,56,220,249
2786 movups 48(%r11),%xmm1
2787 pxor %xmm9,%xmm15
2788
2789 .byte 102,15,56,220,208
2790 .byte 102,15,56,220,216
2791 .byte 102,15,56,220,224
2792 .byte 102,15,56,220,232
2793 .byte 102,15,56,220,240
2794 .byte 102,15,56,220,248
2795 movups 64(%r11),%xmm0
2796 shlq $4,%r12
2797 shlq $4,%r13
2798 jmp .Locb_enc_loop6
2799
2800 .align 32
2801 .Locb_enc_loop6:
2802 .byte 102,15,56,220,209
2803 .byte 102,15,56,220,217
2804 .byte 102,15,56,220,225
2805 .byte 102,15,56,220,233
2806 .byte 102,15,56,220,241
2807 .byte 102,15,56,220,249
2808 movups (%rcx,%rax,1),%xmm1
2809 addq $32,%rax
2810
2811 .byte 102,15,56,220,208
2812 .byte 102,15,56,220,216
2813 .byte 102,15,56,220,224
2814 .byte 102,15,56,220,232
2815 .byte 102,15,56,220,240
2816 .byte 102,15,56,220,248
2817 movups -16(%rcx,%rax,1),%xmm0
2818 jnz .Locb_enc_loop6
2819
2820 .byte 102,15,56,220,209
2821 .byte 102,15,56,220,217
2822 .byte 102,15,56,220,225
2823 .byte 102,15,56,220,233
2824 .byte 102,15,56,220,241
2825 .byte 102,15,56,220,249
2826 movups 16(%r11),%xmm1
2827 shlq $4,%r14
2828
2829 .byte 102,65,15,56,221,210
2830 movdqu (%rbx),%xmm10
2831 movq %r10,%rax
2832 .byte 102,65,15,56,221,219
2833 .byte 102,65,15,56,221,228
2834 .byte 102,65,15,56,221,237
2835 .byte 102,65,15,56,221,246
2836 .byte 102,65,15,56,221,255
2837 .byte 0xf3,0xc3
2838 .size __ocb_encrypt6,.-__ocb_encrypt6
2839
2840 .type __ocb_encrypt4,@function
2841 .align 32
2842 __ocb_encrypt4:
2843 pxor %xmm9,%xmm15
2844 movdqu (%rbx,%r12,1),%xmm11
2845 movdqa %xmm10,%xmm12
2846 movdqu (%rbx,%r13,1),%xmm13
2847 pxor %xmm15,%xmm10
2848 pxor %xmm10,%xmm11
2849 pxor %xmm2,%xmm8
2850 pxor %xmm10,%xmm2
2851 pxor %xmm11,%xmm12
2852 pxor %xmm3,%xmm8
2853 pxor %xmm11,%xmm3
2854 pxor %xmm12,%xmm13
2855 pxor %xmm4,%xmm8
2856 pxor %xmm12,%xmm4
2857 pxor %xmm5,%xmm8
2858 pxor %xmm13,%xmm5
2859 movups 32(%r11),%xmm0
2860
2861 pxor %xmm9,%xmm10
2862 pxor %xmm9,%xmm11
2863 pxor %xmm9,%xmm12
2864 pxor %xmm9,%xmm13
2865
2866 .byte 102,15,56,220,209
2867 .byte 102,15,56,220,217
2868 .byte 102,15,56,220,225
2869 .byte 102,15,56,220,233
2870 movups 48(%r11),%xmm1
2871
2872 .byte 102,15,56,220,208
2873 .byte 102,15,56,220,216
2874 .byte 102,15,56,220,224
2875 .byte 102,15,56,220,232
2876 movups 64(%r11),%xmm0
2877 jmp .Locb_enc_loop4
2878
2879 .align 32
2880 .Locb_enc_loop4:
2881 .byte 102,15,56,220,209
2882 .byte 102,15,56,220,217
2883 .byte 102,15,56,220,225
2884 .byte 102,15,56,220,233
2885 movups (%rcx,%rax,1),%xmm1
2886 addq $32,%rax
2887
2888 .byte 102,15,56,220,208
2889 .byte 102,15,56,220,216
2890 .byte 102,15,56,220,224
2891 .byte 102,15,56,220,232
2892 movups -16(%rcx,%rax,1),%xmm0
2893 jnz .Locb_enc_loop4
2894
2895 .byte 102,15,56,220,209
2896 .byte 102,15,56,220,217
2897 .byte 102,15,56,220,225
2898 .byte 102,15,56,220,233
2899 movups 16(%r11),%xmm1
2900 movq %r10,%rax
2901
2902 .byte 102,65,15,56,221,210
2903 .byte 102,65,15,56,221,219
2904 .byte 102,65,15,56,221,228
2905 .byte 102,65,15,56,221,237
2906 .byte 0xf3,0xc3
2907 .size __ocb_encrypt4,.-__ocb_encrypt4
2908
2909 .type __ocb_encrypt1,@function
2910 .align 32
2911 __ocb_encrypt1:
2912 pxor %xmm15,%xmm7
2913 pxor %xmm9,%xmm7
2914 pxor %xmm2,%xmm8
2915 pxor %xmm7,%xmm2
2916 movups 32(%r11),%xmm0
2917
2918 .byte 102,15,56,220,209
2919 movups 48(%r11),%xmm1
2920 pxor %xmm9,%xmm7
2921
2922 .byte 102,15,56,220,208
2923 movups 64(%r11),%xmm0
2924 jmp .Locb_enc_loop1
2925
2926 .align 32
2927 .Locb_enc_loop1:
2928 .byte 102,15,56,220,209
2929 movups (%rcx,%rax,1),%xmm1
2930 addq $32,%rax
2931
2932 .byte 102,15,56,220,208
2933 movups -16(%rcx,%rax,1),%xmm0
2934 jnz .Locb_enc_loop1
2935
2936 .byte 102,15,56,220,209
2937 movups 16(%r11),%xmm1
2938 movq %r10,%rax
2939
2940 .byte 102,15,56,221,215
2941 .byte 0xf3,0xc3
2942 .size __ocb_encrypt1,.-__ocb_encrypt1
2943
2944 .globl aesni_ocb_decrypt
2945 .hidden aesni_ocb_decrypt
2946 .type aesni_ocb_decrypt,@function
2947 .align 32
2948 aesni_ocb_decrypt:
2949 leaq (%rsp),%rax
2950 pushq %rbx
2951 pushq %rbp
2952 pushq %r12
2953 pushq %r13
2954 pushq %r14
2955 movq 8(%rax),%rbx
2956 movq 8+8(%rax),%rbp
2957
2958 movl 240(%rcx),%r10d
2959 movq %rcx,%r11
2960 shll $4,%r10d
2961 movups (%rcx),%xmm9
2962 movups 16(%rcx,%r10,1),%xmm1
2963
2964 movdqu (%r9),%xmm15
2965 pxor %xmm1,%xmm9
2966 pxor %xmm1,%xmm15
2967
2968 movl $16+32,%eax
2969 leaq 32(%r11,%r10,1),%rcx
2970 movups 16(%r11),%xmm1
2971 subq %r10,%rax
2972 movq %rax,%r10
2973
2974 movdqu (%rbx),%xmm10
2975 movdqu (%rbp),%xmm8
2976
2977 testq $1,%r8
2978 jnz .Locb_dec_odd
2979
2980 bsfq %r8,%r12
2981 addq $1,%r8
2982 shlq $4,%r12
2983 movdqu (%rbx,%r12,1),%xmm7
2984 movdqu (%rdi),%xmm2
2985 leaq 16(%rdi),%rdi
2986
2987 call __ocb_decrypt1
2988
2989 movdqa %xmm7,%xmm15
2990 movups %xmm2,(%rsi)
2991 xorps %xmm2,%xmm8
2992 leaq 16(%rsi),%rsi
2993 subq $1,%rdx
2994 jz .Locb_dec_done
2995
2996 .Locb_dec_odd:
2997 leaq 1(%r8),%r12
2998 leaq 3(%r8),%r13
2999 leaq 5(%r8),%r14
3000 leaq 6(%r8),%r8
3001 bsfq %r12,%r12
3002 bsfq %r13,%r13
3003 bsfq %r14,%r14
3004 shlq $4,%r12
3005 shlq $4,%r13
3006 shlq $4,%r14
3007
3008 subq $6,%rdx
3009 jc .Locb_dec_short
3010 jmp .Locb_dec_grandloop
3011
3012 .align 32
3013 .Locb_dec_grandloop:
3014 movdqu 0(%rdi),%xmm2
3015 movdqu 16(%rdi),%xmm3
3016 movdqu 32(%rdi),%xmm4
3017 movdqu 48(%rdi),%xmm5
3018 movdqu 64(%rdi),%xmm6
3019 movdqu 80(%rdi),%xmm7
3020 leaq 96(%rdi),%rdi
3021
3022 call __ocb_decrypt6
3023
3024 movups %xmm2,0(%rsi)
3025 pxor %xmm2,%xmm8
3026 movups %xmm3,16(%rsi)
3027 pxor %xmm3,%xmm8
3028 movups %xmm4,32(%rsi)
3029 pxor %xmm4,%xmm8
3030 movups %xmm5,48(%rsi)
3031 pxor %xmm5,%xmm8
3032 movups %xmm6,64(%rsi)
3033 pxor %xmm6,%xmm8
3034 movups %xmm7,80(%rsi)
3035 pxor %xmm7,%xmm8
3036 leaq 96(%rsi),%rsi
3037 subq $6,%rdx
3038 jnc .Locb_dec_grandloop
3039
3040 .Locb_dec_short:
3041 addq $6,%rdx
3042 jz .Locb_dec_done
3043
3044 movdqu 0(%rdi),%xmm2
3045 cmpq $2,%rdx
3046 jb .Locb_dec_one
3047 movdqu 16(%rdi),%xmm3
3048 je .Locb_dec_two
3049
3050 movdqu 32(%rdi),%xmm4
3051 cmpq $4,%rdx
3052 jb .Locb_dec_three
3053 movdqu 48(%rdi),%xmm5
3054 je .Locb_dec_four
3055
3056 movdqu 64(%rdi),%xmm6
3057 pxor %xmm7,%xmm7
3058
3059 call __ocb_decrypt6
3060
3061 movdqa %xmm14,%xmm15
3062 movups %xmm2,0(%rsi)
3063 pxor %xmm2,%xmm8
3064 movups %xmm3,16(%rsi)
3065 pxor %xmm3,%xmm8
3066 movups %xmm4,32(%rsi)
3067 pxor %xmm4,%xmm8
3068 movups %xmm5,48(%rsi)
3069 pxor %xmm5,%xmm8
3070 movups %xmm6,64(%rsi)
3071 pxor %xmm6,%xmm8
3072
3073 jmp .Locb_dec_done
3074
3075 .align 16
3076 .Locb_dec_one:
3077 movdqa %xmm10,%xmm7
3078
3079 call __ocb_decrypt1
3080
3081 movdqa %xmm7,%xmm15
3082 movups %xmm2,0(%rsi)
3083 xorps %xmm2,%xmm8
3084 jmp .Locb_dec_done
3085
3086 .align 16
3087 .Locb_dec_two:
3088 pxor %xmm4,%xmm4
3089 pxor %xmm5,%xmm5
3090
3091 call __ocb_decrypt4
3092
3093 movdqa %xmm11,%xmm15
3094 movups %xmm2,0(%rsi)
3095 xorps %xmm2,%xmm8
3096 movups %xmm3,16(%rsi)
3097 xorps %xmm3,%xmm8
3098
3099 jmp .Locb_dec_done
3100
3101 .align 16
3102 .Locb_dec_three:
3103 pxor %xmm5,%xmm5
3104
3105 call __ocb_decrypt4
3106
3107 movdqa %xmm12,%xmm15
3108 movups %xmm2,0(%rsi)
3109 xorps %xmm2,%xmm8
3110 movups %xmm3,16(%rsi)
3111 xorps %xmm3,%xmm8
3112 movups %xmm4,32(%rsi)
3113 xorps %xmm4,%xmm8
3114
3115 jmp .Locb_dec_done
3116
3117 .align 16
3118 .Locb_dec_four:
3119 call __ocb_decrypt4
3120
3121 movdqa %xmm13,%xmm15
3122 movups %xmm2,0(%rsi)
3123 pxor %xmm2,%xmm8
3124 movups %xmm3,16(%rsi)
3125 pxor %xmm3,%xmm8
3126 movups %xmm4,32(%rsi)
3127 pxor %xmm4,%xmm8
3128 movups %xmm5,48(%rsi)
3129 pxor %xmm5,%xmm8
3130
3131 .Locb_dec_done:
3132 pxor %xmm0,%xmm15
3133 movdqu %xmm8,(%rbp)
3134 movdqu %xmm15,(%r9)
3135
3136 xorps %xmm0,%xmm0
3137 pxor %xmm1,%xmm1
3138 pxor %xmm2,%xmm2
3139 pxor %xmm3,%xmm3
3140 pxor %xmm4,%xmm4
3141 pxor %xmm5,%xmm5
3142 pxor %xmm6,%xmm6
3143 pxor %xmm7,%xmm7
3144 pxor %xmm8,%xmm8
3145 pxor %xmm9,%xmm9
3146 pxor %xmm10,%xmm10
3147 pxor %xmm11,%xmm11
3148 pxor %xmm12,%xmm12
3149 pxor %xmm13,%xmm13
3150 pxor %xmm14,%xmm14
3151 pxor %xmm15,%xmm15
3152 leaq 40(%rsp),%rax
3153 movq -40(%rax),%r14
3154 movq -32(%rax),%r13
3155 movq -24(%rax),%r12
3156 movq -16(%rax),%rbp
3157 movq -8(%rax),%rbx
3158 leaq (%rax),%rsp
3159 .Locb_dec_epilogue:
3160 .byte 0xf3,0xc3
3161 .size aesni_ocb_decrypt,.-aesni_ocb_decrypt
3162
3163 .type __ocb_decrypt6,@function
3164 .align 32
3165 __ocb_decrypt6:
3166 pxor %xmm9,%xmm15
3167 movdqu (%rbx,%r12,1),%xmm11
3168 movdqa %xmm10,%xmm12
3169 movdqu (%rbx,%r13,1),%xmm13
3170 movdqa %xmm10,%xmm14
3171 pxor %xmm15,%xmm10
3172 movdqu (%rbx,%r14,1),%xmm15
3173 pxor %xmm10,%xmm11
3174 pxor %xmm10,%xmm2
3175 pxor %xmm11,%xmm12
3176 pxor %xmm11,%xmm3
3177 pxor %xmm12,%xmm13
3178 pxor %xmm12,%xmm4
3179 pxor %xmm13,%xmm14
3180 pxor %xmm13,%xmm5
3181 pxor %xmm14,%xmm15
3182 pxor %xmm14,%xmm6
3183 pxor %xmm15,%xmm7
3184 movups 32(%r11),%xmm0
3185
3186 leaq 1(%r8),%r12
3187 leaq 3(%r8),%r13
3188 leaq 5(%r8),%r14
3189 addq $6,%r8
3190 pxor %xmm9,%xmm10
3191 bsfq %r12,%r12
3192 bsfq %r13,%r13
3193 bsfq %r14,%r14
3194
3195 .byte 102,15,56,222,209
3196 .byte 102,15,56,222,217
3197 .byte 102,15,56,222,225
3198 .byte 102,15,56,222,233
3199 pxor %xmm9,%xmm11
3200 pxor %xmm9,%xmm12
3201 .byte 102,15,56,222,241
3202 pxor %xmm9,%xmm13
3203 pxor %xmm9,%xmm14
3204 .byte 102,15,56,222,249
3205 movups 48(%r11),%xmm1
3206 pxor %xmm9,%xmm15
3207
3208 .byte 102,15,56,222,208
3209 .byte 102,15,56,222,216
3210 .byte 102,15,56,222,224
3211 .byte 102,15,56,222,232
3212 .byte 102,15,56,222,240
3213 .byte 102,15,56,222,248
3214 movups 64(%r11),%xmm0
3215 shlq $4,%r12
3216 shlq $4,%r13
3217 jmp .Locb_dec_loop6
3218
3219 .align 32
3220 .Locb_dec_loop6:
3221 .byte 102,15,56,222,209
3222 .byte 102,15,56,222,217
3223 .byte 102,15,56,222,225
3224 .byte 102,15,56,222,233
3225 .byte 102,15,56,222,241
3226 .byte 102,15,56,222,249
3227 movups (%rcx,%rax,1),%xmm1
3228 addq $32,%rax
3229
3230 .byte 102,15,56,222,208
3231 .byte 102,15,56,222,216
3232 .byte 102,15,56,222,224
3233 .byte 102,15,56,222,232
3234 .byte 102,15,56,222,240
3235 .byte 102,15,56,222,248
3236 movups -16(%rcx,%rax,1),%xmm0
3237 jnz .Locb_dec_loop6
3238
3239 .byte 102,15,56,222,209
3240 .byte 102,15,56,222,217
3241 .byte 102,15,56,222,225
3242 .byte 102,15,56,222,233
3243 .byte 102,15,56,222,241
3244 .byte 102,15,56,222,249
3245 movups 16(%r11),%xmm1
3246 shlq $4,%r14
3247
3248 .byte 102,65,15,56,223,210
3249 movdqu (%rbx),%xmm10
3250 movq %r10,%rax
3251 .byte 102,65,15,56,223,219
3252 .byte 102,65,15,56,223,228
3253 .byte 102,65,15,56,223,237
3254 .byte 102,65,15,56,223,246
3255 .byte 102,65,15,56,223,255
3256 .byte 0xf3,0xc3
3257 .size __ocb_decrypt6,.-__ocb_decrypt6
3258
3259 .type __ocb_decrypt4,@function
3260 .align 32
3261 __ocb_decrypt4:
3262 pxor %xmm9,%xmm15
3263 movdqu (%rbx,%r12,1),%xmm11
3264 movdqa %xmm10,%xmm12
3265 movdqu (%rbx,%r13,1),%xmm13
3266 pxor %xmm15,%xmm10
3267 pxor %xmm10,%xmm11
3268 pxor %xmm10,%xmm2
3269 pxor %xmm11,%xmm12
3270 pxor %xmm11,%xmm3
3271 pxor %xmm12,%xmm13
3272 pxor %xmm12,%xmm4
3273 pxor %xmm13,%xmm5
3274 movups 32(%r11),%xmm0
3275
3276 pxor %xmm9,%xmm10
3277 pxor %xmm9,%xmm11
3278 pxor %xmm9,%xmm12
3279 pxor %xmm9,%xmm13
3280
3281 .byte 102,15,56,222,209
3282 .byte 102,15,56,222,217
3283 .byte 102,15,56,222,225
3284 .byte 102,15,56,222,233
3285 movups 48(%r11),%xmm1
3286
3287 .byte 102,15,56,222,208
3288 .byte 102,15,56,222,216
3289 .byte 102,15,56,222,224
3290 .byte 102,15,56,222,232
3291 movups 64(%r11),%xmm0
3292 jmp .Locb_dec_loop4
3293
3294 .align 32
3295 .Locb_dec_loop4:
3296 .byte 102,15,56,222,209
3297 .byte 102,15,56,222,217
3298 .byte 102,15,56,222,225
3299 .byte 102,15,56,222,233
3300 movups (%rcx,%rax,1),%xmm1
3301 addq $32,%rax
3302
3303 .byte 102,15,56,222,208
3304 .byte 102,15,56,222,216
3305 .byte 102,15,56,222,224
3306 .byte 102,15,56,222,232
3307 movups -16(%rcx,%rax,1),%xmm0
3308 jnz .Locb_dec_loop4
3309
3310 .byte 102,15,56,222,209
3311 .byte 102,15,56,222,217
3312 .byte 102,15,56,222,225
3313 .byte 102,15,56,222,233
3314 movups 16(%r11),%xmm1
3315 movq %r10,%rax
3316
3317 .byte 102,65,15,56,223,210
3318 .byte 102,65,15,56,223,219
3319 .byte 102,65,15,56,223,228
3320 .byte 102,65,15,56,223,237
3321 .byte 0xf3,0xc3
3322 .size __ocb_decrypt4,.-__ocb_decrypt4
3323
3324 .type __ocb_decrypt1,@function
3325 .align 32
3326 __ocb_decrypt1:
3327 pxor %xmm15,%xmm7
3328 pxor %xmm9,%xmm7
3329 pxor %xmm7,%xmm2
3330 movups 32(%r11),%xmm0
3331
3332 .byte 102,15,56,222,209
3333 movups 48(%r11),%xmm1
3334 pxor %xmm9,%xmm7
3335
3336 .byte 102,15,56,222,208
3337 movups 64(%r11),%xmm0
3338 jmp .Locb_dec_loop1
3339
3340 .align 32
3341 .Locb_dec_loop1:
3342 .byte 102,15,56,222,209
3343 movups (%rcx,%rax,1),%xmm1
3344 addq $32,%rax
3345
3346 .byte 102,15,56,222,208
3347 movups -16(%rcx,%rax,1),%xmm0
3348 jnz .Locb_dec_loop1
3349
3350 .byte 102,15,56,222,209
3351 movups 16(%r11),%xmm1
3352 movq %r10,%rax
3353
3354 .byte 102,15,56,223,215
3355 .byte 0xf3,0xc3
3356 .size __ocb_decrypt1,.-__ocb_decrypt1
3357 .globl aesni_cbc_encrypt
3358 .hidden aesni_cbc_encrypt
3359 .type aesni_cbc_encrypt,@function
3360 .align 16
3361 aesni_cbc_encrypt:
3362 testq %rdx,%rdx
3363 jz .Lcbc_ret
3364
3365 movl 240(%rcx),%r10d
3366 movq %rcx,%r11
3367 testl %r9d,%r9d
3368 jz .Lcbc_decrypt
3369
3370 movups (%r8),%xmm2
3371 movl %r10d,%eax
3372 cmpq $16,%rdx
3373 jb .Lcbc_enc_tail
3374 subq $16,%rdx
3375 jmp .Lcbc_enc_loop
3376 .align 16
3377 .Lcbc_enc_loop:
3378 movups (%rdi),%xmm3
3379 leaq 16(%rdi),%rdi
3380
3381 movups (%rcx),%xmm0
3382 movups 16(%rcx),%xmm1
3383 xorps %xmm0,%xmm3
3384 leaq 32(%rcx),%rcx
3385 xorps %xmm3,%xmm2
3386 .Loop_enc1_15:
3387 .byte 102,15,56,220,209
3388 decl %eax
3389 movups (%rcx),%xmm1
3390 leaq 16(%rcx),%rcx
3391 jnz .Loop_enc1_15
3392 .byte 102,15,56,221,209
3393 movl %r10d,%eax
3394 movq %r11,%rcx
3395 movups %xmm2,0(%rsi)
3396 leaq 16(%rsi),%rsi
3397 subq $16,%rdx
3398 jnc .Lcbc_enc_loop
3399 addq $16,%rdx
3400 jnz .Lcbc_enc_tail
3401 pxor %xmm0,%xmm0
3402 pxor %xmm1,%xmm1
3403 movups %xmm2,(%r8)
3404 pxor %xmm2,%xmm2
3405 pxor %xmm3,%xmm3
3406 jmp .Lcbc_ret
3407
3408 .Lcbc_enc_tail:
3409 movq %rdx,%rcx
3410 xchgq %rdi,%rsi
3411 .long 0x9066A4F3
3412 movl $16,%ecx
3413 subq %rdx,%rcx
3414 xorl %eax,%eax
3415 .long 0x9066AAF3
3416 leaq -16(%rdi),%rdi
3417 movl %r10d,%eax
3418 movq %rdi,%rsi
3419 movq %r11,%rcx
3420 xorq %rdx,%rdx
3421 jmp .Lcbc_enc_loop
3422
3423 .align 16
3424 .Lcbc_decrypt:
3425 cmpq $16,%rdx
3426 jne .Lcbc_decrypt_bulk
3427
3428
3429
3430 movdqu (%rdi),%xmm2
3431 movdqu (%r8),%xmm3
3432 movdqa %xmm2,%xmm4
3433 movups (%rcx),%xmm0
3434 movups 16(%rcx),%xmm1
3435 leaq 32(%rcx),%rcx
3436 xorps %xmm0,%xmm2
3437 .Loop_dec1_16:
3438 .byte 102,15,56,222,209
3439 decl %r10d
3440 movups (%rcx),%xmm1
3441 leaq 16(%rcx),%rcx
3442 jnz .Loop_dec1_16
3443 .byte 102,15,56,223,209
3444 pxor %xmm0,%xmm0
3445 pxor %xmm1,%xmm1
3446 movdqu %xmm4,(%r8)
3447 xorps %xmm3,%xmm2
3448 pxor %xmm3,%xmm3
3449 movups %xmm2,(%rsi)
3450 pxor %xmm2,%xmm2
3451 jmp .Lcbc_ret
3452 .align 16
3453 .Lcbc_decrypt_bulk:
3454 leaq (%rsp),%r11
3455 pushq %rbp
3456 subq $16,%rsp
3457 andq $-16,%rsp
3458 movq %rcx,%rbp
3459 movups (%r8),%xmm10
3460 movl %r10d,%eax
3461 cmpq $0x50,%rdx
3462 jbe .Lcbc_dec_tail
3463
3464 movups (%rcx),%xmm0
3465 movdqu 0(%rdi),%xmm2
3466 movdqu 16(%rdi),%xmm3
3467 movdqa %xmm2,%xmm11
3468 movdqu 32(%rdi),%xmm4
3469 movdqa %xmm3,%xmm12
3470 movdqu 48(%rdi),%xmm5
3471 movdqa %xmm4,%xmm13
3472 movdqu 64(%rdi),%xmm6
3473 movdqa %xmm5,%xmm14
3474 movdqu 80(%rdi),%xmm7
3475 movdqa %xmm6,%xmm15
3476 movl OPENSSL_ia32cap_P+4(%rip),%r9d
3477 cmpq $0x70,%rdx
3478 jbe .Lcbc_dec_six_or_seven
3479
3480 andl $71303168,%r9d
3481 subq $0x50,%rdx
3482 cmpl $4194304,%r9d
3483 je .Lcbc_dec_loop6_enter
3484 subq $0x20,%rdx
3485 leaq 112(%rcx),%rcx
3486 jmp .Lcbc_dec_loop8_enter
3487 .align 16
3488 .Lcbc_dec_loop8:
3489 movups %xmm9,(%rsi)
3490 leaq 16(%rsi),%rsi
3491 .Lcbc_dec_loop8_enter:
3492 movdqu 96(%rdi),%xmm8
3493 pxor %xmm0,%xmm2
3494 movdqu 112(%rdi),%xmm9
3495 pxor %xmm0,%xmm3
3496 movups 16-112(%rcx),%xmm1
3497 pxor %xmm0,%xmm4
3498 movq $-1,%rbp
3499 cmpq $0x70,%rdx
3500 pxor %xmm0,%xmm5
3501 pxor %xmm0,%xmm6
3502 pxor %xmm0,%xmm7
3503 pxor %xmm0,%xmm8
3504
3505 .byte 102,15,56,222,209
3506 pxor %xmm0,%xmm9
3507 movups 32-112(%rcx),%xmm0
3508 .byte 102,15,56,222,217
3509 .byte 102,15,56,222,225
3510 .byte 102,15,56,222,233
3511 .byte 102,15,56,222,241
3512 .byte 102,15,56,222,249
3513 .byte 102,68,15,56,222,193
3514 adcq $0,%rbp
3515 andq $128,%rbp
3516 .byte 102,68,15,56,222,201
3517 addq %rdi,%rbp
3518 movups 48-112(%rcx),%xmm1
3519 .byte 102,15,56,222,208
3520 .byte 102,15,56,222,216
3521 .byte 102,15,56,222,224
3522 .byte 102,15,56,222,232
3523 .byte 102,15,56,222,240
3524 .byte 102,15,56,222,248
3525 .byte 102,68,15,56,222,192
3526 .byte 102,68,15,56,222,200
3527 movups 64-112(%rcx),%xmm0
3528 nop
3529 .byte 102,15,56,222,209
3530 .byte 102,15,56,222,217
3531 .byte 102,15,56,222,225
3532 .byte 102,15,56,222,233
3533 .byte 102,15,56,222,241
3534 .byte 102,15,56,222,249
3535 .byte 102,68,15,56,222,193
3536 .byte 102,68,15,56,222,201
3537 movups 80-112(%rcx),%xmm1
3538 nop
3539 .byte 102,15,56,222,208
3540 .byte 102,15,56,222,216
3541 .byte 102,15,56,222,224
3542 .byte 102,15,56,222,232
3543 .byte 102,15,56,222,240
3544 .byte 102,15,56,222,248
3545 .byte 102,68,15,56,222,192
3546 .byte 102,68,15,56,222,200
3547 movups 96-112(%rcx),%xmm0
3548 nop
3549 .byte 102,15,56,222,209
3550 .byte 102,15,56,222,217
3551 .byte 102,15,56,222,225
3552 .byte 102,15,56,222,233
3553 .byte 102,15,56,222,241
3554 .byte 102,15,56,222,249
3555 .byte 102,68,15,56,222,193
3556 .byte 102,68,15,56,222,201
3557 movups 112-112(%rcx),%xmm1
3558 nop
3559 .byte 102,15,56,222,208
3560 .byte 102,15,56,222,216
3561 .byte 102,15,56,222,224
3562 .byte 102,15,56,222,232
3563 .byte 102,15,56,222,240
3564 .byte 102,15,56,222,248
3565 .byte 102,68,15,56,222,192
3566 .byte 102,68,15,56,222,200
3567 movups 128-112(%rcx),%xmm0
3568 nop
3569 .byte 102,15,56,222,209
3570 .byte 102,15,56,222,217
3571 .byte 102,15,56,222,225
3572 .byte 102,15,56,222,233
3573 .byte 102,15,56,222,241
3574 .byte 102,15,56,222,249
3575 .byte 102,68,15,56,222,193
3576 .byte 102,68,15,56,222,201
3577 movups 144-112(%rcx),%xmm1
3578 cmpl $11,%eax
3579 .byte 102,15,56,222,208
3580 .byte 102,15,56,222,216
3581 .byte 102,15,56,222,224
3582 .byte 102,15,56,222,232
3583 .byte 102,15,56,222,240
3584 .byte 102,15,56,222,248
3585 .byte 102,68,15,56,222,192
3586 .byte 102,68,15,56,222,200
3587 movups 160-112(%rcx),%xmm0
3588 jb .Lcbc_dec_done
3589 .byte 102,15,56,222,209
3590 .byte 102,15,56,222,217
3591 .byte 102,15,56,222,225
3592 .byte 102,15,56,222,233
3593 .byte 102,15,56,222,241
3594 .byte 102,15,56,222,249
3595 .byte 102,68,15,56,222,193
3596 .byte 102,68,15,56,222,201
3597 movups 176-112(%rcx),%xmm1
3598 nop
3599 .byte 102,15,56,222,208
3600 .byte 102,15,56,222,216
3601 .byte 102,15,56,222,224
3602 .byte 102,15,56,222,232
3603 .byte 102,15,56,222,240
3604 .byte 102,15,56,222,248
3605 .byte 102,68,15,56,222,192
3606 .byte 102,68,15,56,222,200
3607 movups 192-112(%rcx),%xmm0
3608 je .Lcbc_dec_done
3609 .byte 102,15,56,222,209
3610 .byte 102,15,56,222,217
3611 .byte 102,15,56,222,225
3612 .byte 102,15,56,222,233
3613 .byte 102,15,56,222,241
3614 .byte 102,15,56,222,249
3615 .byte 102,68,15,56,222,193
3616 .byte 102,68,15,56,222,201
3617 movups 208-112(%rcx),%xmm1
3618 nop
3619 .byte 102,15,56,222,208
3620 .byte 102,15,56,222,216
3621 .byte 102,15,56,222,224
3622 .byte 102,15,56,222,232
3623 .byte 102,15,56,222,240
3624 .byte 102,15,56,222,248
3625 .byte 102,68,15,56,222,192
3626 .byte 102,68,15,56,222,200
3627 movups 224-112(%rcx),%xmm0
3628 jmp .Lcbc_dec_done
3629 .align 16
3630 .Lcbc_dec_done:
3631 .byte 102,15,56,222,209
3632 .byte 102,15,56,222,217
3633 pxor %xmm0,%xmm10
3634 pxor %xmm0,%xmm11
3635 .byte 102,15,56,222,225
3636 .byte 102,15,56,222,233
3637 pxor %xmm0,%xmm12
3638 pxor %xmm0,%xmm13
3639 .byte 102,15,56,222,241
3640 .byte 102,15,56,222,249
3641 pxor %xmm0,%xmm14
3642 pxor %xmm0,%xmm15
3643 .byte 102,68,15,56,222,193
3644 .byte 102,68,15,56,222,201
3645 movdqu 80(%rdi),%xmm1
3646
3647 .byte 102,65,15,56,223,210
3648 movdqu 96(%rdi),%xmm10
3649 pxor %xmm0,%xmm1
3650 .byte 102,65,15,56,223,219
3651 pxor %xmm0,%xmm10
3652 movdqu 112(%rdi),%xmm0
3653 .byte 102,65,15,56,223,228
3654 leaq 128(%rdi),%rdi
3655 movdqu 0(%rbp),%xmm11
3656 .byte 102,65,15,56,223,237
3657 .byte 102,65,15,56,223,246
3658 movdqu 16(%rbp),%xmm12
3659 movdqu 32(%rbp),%xmm13
3660 .byte 102,65,15,56,223,255
3661 .byte 102,68,15,56,223,193
3662 movdqu 48(%rbp),%xmm14
3663 movdqu 64(%rbp),%xmm15
3664 .byte 102,69,15,56,223,202
3665 movdqa %xmm0,%xmm10
3666 movdqu 80(%rbp),%xmm1
3667 movups -112(%rcx),%xmm0
3668
3669 movups %xmm2,(%rsi)
3670 movdqa %xmm11,%xmm2
3671 movups %xmm3,16(%rsi)
3672 movdqa %xmm12,%xmm3
3673 movups %xmm4,32(%rsi)
3674 movdqa %xmm13,%xmm4
3675 movups %xmm5,48(%rsi)
3676 movdqa %xmm14,%xmm5
3677 movups %xmm6,64(%rsi)
3678 movdqa %xmm15,%xmm6
3679 movups %xmm7,80(%rsi)
3680 movdqa %xmm1,%xmm7
3681 movups %xmm8,96(%rsi)
3682 leaq 112(%rsi),%rsi
3683
3684 subq $0x80,%rdx
3685 ja .Lcbc_dec_loop8
3686
3687 movaps %xmm9,%xmm2
3688 leaq -112(%rcx),%rcx
3689 addq $0x70,%rdx
3690 jle .Lcbc_dec_clear_tail_collected
3691 movups %xmm9,(%rsi)
3692 leaq 16(%rsi),%rsi
3693 cmpq $0x50,%rdx
3694 jbe .Lcbc_dec_tail
3695
3696 movaps %xmm11,%xmm2
3697 .Lcbc_dec_six_or_seven:
3698 cmpq $0x60,%rdx
3699 ja .Lcbc_dec_seven
3700
3701 movaps %xmm7,%xmm8
3702 call _aesni_decrypt6
3703 pxor %xmm10,%xmm2
3704 movaps %xmm8,%xmm10
3705 pxor %xmm11,%xmm3
3706 movdqu %xmm2,(%rsi)
3707 pxor %xmm12,%xmm4
3708 movdqu %xmm3,16(%rsi)
3709 pxor %xmm3,%xmm3
3710 pxor %xmm13,%xmm5
3711 movdqu %xmm4,32(%rsi)
3712 pxor %xmm4,%xmm4
3713 pxor %xmm14,%xmm6
3714 movdqu %xmm5,48(%rsi)
3715 pxor %xmm5,%xmm5
3716 pxor %xmm15,%xmm7
3717 movdqu %xmm6,64(%rsi)
3718 pxor %xmm6,%xmm6
3719 leaq 80(%rsi),%rsi
3720 movdqa %xmm7,%xmm2
3721 pxor %xmm7,%xmm7
3722 jmp .Lcbc_dec_tail_collected
3723
3724 .align 16
3725 .Lcbc_dec_seven:
3726 movups 96(%rdi),%xmm8
3727 xorps %xmm9,%xmm9
3728 call _aesni_decrypt8
3729 movups 80(%rdi),%xmm9
3730 pxor %xmm10,%xmm2
3731 movups 96(%rdi),%xmm10
3732 pxor %xmm11,%xmm3
3733 movdqu %xmm2,(%rsi)
3734 pxor %xmm12,%xmm4
3735 movdqu %xmm3,16(%rsi)
3736 pxor %xmm3,%xmm3
3737 pxor %xmm13,%xmm5
3738 movdqu %xmm4,32(%rsi)
3739 pxor %xmm4,%xmm4
3740 pxor %xmm14,%xmm6
3741 movdqu %xmm5,48(%rsi)
3742 pxor %xmm5,%xmm5
3743 pxor %xmm15,%xmm7
3744 movdqu %xmm6,64(%rsi)
3745 pxor %xmm6,%xmm6
3746 pxor %xmm9,%xmm8
3747 movdqu %xmm7,80(%rsi)
3748 pxor %xmm7,%xmm7
3749 leaq 96(%rsi),%rsi
3750 movdqa %xmm8,%xmm2
3751 pxor %xmm8,%xmm8
3752 pxor %xmm9,%xmm9
3753 jmp .Lcbc_dec_tail_collected
3754
3755 .align 16
3756 .Lcbc_dec_loop6:
3757 movups %xmm7,(%rsi)
3758 leaq 16(%rsi),%rsi
3759 movdqu 0(%rdi),%xmm2
3760 movdqu 16(%rdi),%xmm3
3761 movdqa %xmm2,%xmm11
3762 movdqu 32(%rdi),%xmm4
3763 movdqa %xmm3,%xmm12
3764 movdqu 48(%rdi),%xmm5
3765 movdqa %xmm4,%xmm13
3766 movdqu 64(%rdi),%xmm6
3767 movdqa %xmm5,%xmm14
3768 movdqu 80(%rdi),%xmm7
3769 movdqa %xmm6,%xmm15
3770 .Lcbc_dec_loop6_enter:
3771 leaq 96(%rdi),%rdi
3772 movdqa %xmm7,%xmm8
3773
3774 call _aesni_decrypt6
3775
3776 pxor %xmm10,%xmm2
3777 movdqa %xmm8,%xmm10
3778 pxor %xmm11,%xmm3
3779 movdqu %xmm2,(%rsi)
3780 pxor %xmm12,%xmm4
3781 movdqu %xmm3,16(%rsi)
3782 pxor %xmm13,%xmm5
3783 movdqu %xmm4,32(%rsi)
3784 pxor %xmm14,%xmm6
3785 movq %rbp,%rcx
3786 movdqu %xmm5,48(%rsi)
3787 pxor %xmm15,%xmm7
3788 movl %r10d,%eax
3789 movdqu %xmm6,64(%rsi)
3790 leaq 80(%rsi),%rsi
3791 subq $0x60,%rdx
3792 ja .Lcbc_dec_loop6
3793
3794 movdqa %xmm7,%xmm2
3795 addq $0x50,%rdx
3796 jle .Lcbc_dec_clear_tail_collected
3797 movups %xmm7,(%rsi)
3798 leaq 16(%rsi),%rsi
3799
3800 .Lcbc_dec_tail:
3801 movups (%rdi),%xmm2
3802 subq $0x10,%rdx
3803 jbe .Lcbc_dec_one
3804
3805 movups 16(%rdi),%xmm3
3806 movaps %xmm2,%xmm11
3807 subq $0x10,%rdx
3808 jbe .Lcbc_dec_two
3809
3810 movups 32(%rdi),%xmm4
3811 movaps %xmm3,%xmm12
3812 subq $0x10,%rdx
3813 jbe .Lcbc_dec_three
3814
3815 movups 48(%rdi),%xmm5
3816 movaps %xmm4,%xmm13
3817 subq $0x10,%rdx
3818 jbe .Lcbc_dec_four
3819
3820 movups 64(%rdi),%xmm6
3821 movaps %xmm5,%xmm14
3822 movaps %xmm6,%xmm15
3823 xorps %xmm7,%xmm7
3824 call _aesni_decrypt6
3825 pxor %xmm10,%xmm2
3826 movaps %xmm15,%xmm10
3827 pxor %xmm11,%xmm3
3828 movdqu %xmm2,(%rsi)
3829 pxor %xmm12,%xmm4
3830 movdqu %xmm3,16(%rsi)
3831 pxor %xmm3,%xmm3
3832 pxor %xmm13,%xmm5
3833 movdqu %xmm4,32(%rsi)
3834 pxor %xmm4,%xmm4
3835 pxor %xmm14,%xmm6
3836 movdqu %xmm5,48(%rsi)
3837 pxor %xmm5,%xmm5
3838 leaq 64(%rsi),%rsi
3839 movdqa %xmm6,%xmm2
3840 pxor %xmm6,%xmm6
3841 pxor %xmm7,%xmm7
3842 subq $0x10,%rdx
3843 jmp .Lcbc_dec_tail_collected
3844
3845 .align 16
3846 .Lcbc_dec_one:
3847 movaps %xmm2,%xmm11
3848 movups (%rcx),%xmm0
3849 movups 16(%rcx),%xmm1
3850 leaq 32(%rcx),%rcx
3851 xorps %xmm0,%xmm2
3852 .Loop_dec1_17:
3853 .byte 102,15,56,222,209
3854 decl %eax
3855 movups (%rcx),%xmm1
3856 leaq 16(%rcx),%rcx
3857 jnz .Loop_dec1_17
3858 .byte 102,15,56,223,209
3859 xorps %xmm10,%xmm2
3860 movaps %xmm11,%xmm10
3861 jmp .Lcbc_dec_tail_collected
3862 .align 16
3863 .Lcbc_dec_two:
3864 movaps %xmm3,%xmm12
3865 call _aesni_decrypt2
3866 pxor %xmm10,%xmm2
3867 movaps %xmm12,%xmm10
3868 pxor %xmm11,%xmm3
3869 movdqu %xmm2,(%rsi)
3870 movdqa %xmm3,%xmm2
3871 pxor %xmm3,%xmm3
3872 leaq 16(%rsi),%rsi
3873 jmp .Lcbc_dec_tail_collected
3874 .align 16
3875 .Lcbc_dec_three:
3876 movaps %xmm4,%xmm13
3877 call _aesni_decrypt3
3878 pxor %xmm10,%xmm2
3879 movaps %xmm13,%xmm10
3880 pxor %xmm11,%xmm3
3881 movdqu %xmm2,(%rsi)
3882 pxor %xmm12,%xmm4
3883 movdqu %xmm3,16(%rsi)
3884 pxor %xmm3,%xmm3
3885 movdqa %xmm4,%xmm2
3886 pxor %xmm4,%xmm4
3887 leaq 32(%rsi),%rsi
3888 jmp .Lcbc_dec_tail_collected
3889 .align 16
3890 .Lcbc_dec_four:
3891 movaps %xmm5,%xmm14
3892 call _aesni_decrypt4
3893 pxor %xmm10,%xmm2
3894 movaps %xmm14,%xmm10
3895 pxor %xmm11,%xmm3
3896 movdqu %xmm2,(%rsi)
3897 pxor %xmm12,%xmm4
3898 movdqu %xmm3,16(%rsi)
3899 pxor %xmm3,%xmm3
3900 pxor %xmm13,%xmm5
3901 movdqu %xmm4,32(%rsi)
3902 pxor %xmm4,%xmm4
3903 movdqa %xmm5,%xmm2
3904 pxor %xmm5,%xmm5
3905 leaq 48(%rsi),%rsi
3906 jmp .Lcbc_dec_tail_collected
3907
3908 .align 16
3909 .Lcbc_dec_clear_tail_collected:
3910 pxor %xmm3,%xmm3
3911 pxor %xmm4,%xmm4
3912 pxor %xmm5,%xmm5
3913 pxor %xmm6,%xmm6
3914 pxor %xmm7,%xmm7
3915 pxor %xmm8,%xmm8
3916 pxor %xmm9,%xmm9
3917 .Lcbc_dec_tail_collected:
3918 movups %xmm10,(%r8)
3919 andq $15,%rdx
3920 jnz .Lcbc_dec_tail_partial
3921 movups %xmm2,(%rsi)
3922 pxor %xmm2,%xmm2
3923 jmp .Lcbc_dec_ret
3924 .align 16
3925 .Lcbc_dec_tail_partial:
3926 movaps %xmm2,(%rsp)
3927 pxor %xmm2,%xmm2
3928 movq $16,%rcx
3929 movq %rsi,%rdi
3930 subq %rdx,%rcx
3931 leaq (%rsp),%rsi
3932 .long 0x9066A4F3
3933 movdqa %xmm2,(%rsp)
3934
3935 .Lcbc_dec_ret:
3936 xorps %xmm0,%xmm0
3937 pxor %xmm1,%xmm1
3938 movq -8(%r11),%rbp
3939 leaq (%r11),%rsp
3940 .Lcbc_ret:
3941 .byte 0xf3,0xc3
3942 .size aesni_cbc_encrypt,.-aesni_cbc_encrypt
3943 .globl aesni_set_decrypt_key
3944 .hidden aesni_set_decrypt_key
3945 .type aesni_set_decrypt_key,@function
3946 .align 16
3947 aesni_set_decrypt_key:
3948 .byte 0x48,0x83,0xEC,0x08
3949 call __aesni_set_encrypt_key
3950 shll $4,%esi
3951 testl %eax,%eax
3952 jnz .Ldec_key_ret
3953 leaq 16(%rdx,%rsi,1),%rdi
3954
3955 movups (%rdx),%xmm0
3956 movups (%rdi),%xmm1
3957 movups %xmm0,(%rdi)
3958 movups %xmm1,(%rdx)
3959 leaq 16(%rdx),%rdx
3960 leaq -16(%rdi),%rdi
3961
3962 .Ldec_key_inverse:
3963 movups (%rdx),%xmm0
3964 movups (%rdi),%xmm1
3965 .byte 102,15,56,219,192
3966 .byte 102,15,56,219,201
3967 leaq 16(%rdx),%rdx
3968 leaq -16(%rdi),%rdi
3969 movups %xmm0,16(%rdi)
3970 movups %xmm1,-16(%rdx)
3971 cmpq %rdx,%rdi
3972 ja .Ldec_key_inverse
3973
3974 movups (%rdx),%xmm0
3975 .byte 102,15,56,219,192
3976 pxor %xmm1,%xmm1
3977 movups %xmm0,(%rdi)
3978 pxor %xmm0,%xmm0
3979 .Ldec_key_ret:
3980 addq $8,%rsp
3981 .byte 0xf3,0xc3
3982 .LSEH_end_set_decrypt_key:
3983 .size aesni_set_decrypt_key,.-aesni_set_decrypt_key
3984 .globl aesni_set_encrypt_key
3985 .hidden aesni_set_encrypt_key
3986 .type aesni_set_encrypt_key,@function
3987 .align 16
3988 aesni_set_encrypt_key:
3989 __aesni_set_encrypt_key:
3990 .byte 0x48,0x83,0xEC,0x08
3991 movq $-1,%rax
3992 testq %rdi,%rdi
3993 jz .Lenc_key_ret
3994 testq %rdx,%rdx
3995 jz .Lenc_key_ret
3996
3997 movl $268437504,%r10d
3998 movups (%rdi),%xmm0
3999 xorps %xmm4,%xmm4
4000 andl OPENSSL_ia32cap_P+4(%rip),%r10d
4001 leaq 16(%rdx),%rax
4002 cmpl $256,%esi
4003 je .L14rounds
4004 cmpl $192,%esi
4005 je .L12rounds
4006 cmpl $128,%esi
4007 jne .Lbad_keybits
4008
4009 .L10rounds:
4010 movl $9,%esi
4011 cmpl $268435456,%r10d
4012 je .L10rounds_alt
4013
4014 movups %xmm0,(%rdx)
4015 .byte 102,15,58,223,200,1
4016 call .Lkey_expansion_128_cold
4017 .byte 102,15,58,223,200,2
4018 call .Lkey_expansion_128
4019 .byte 102,15,58,223,200,4
4020 call .Lkey_expansion_128
4021 .byte 102,15,58,223,200,8
4022 call .Lkey_expansion_128
4023 .byte 102,15,58,223,200,16
4024 call .Lkey_expansion_128
4025 .byte 102,15,58,223,200,32
4026 call .Lkey_expansion_128
4027 .byte 102,15,58,223,200,64
4028 call .Lkey_expansion_128
4029 .byte 102,15,58,223,200,128
4030 call .Lkey_expansion_128
4031 .byte 102,15,58,223,200,27
4032 call .Lkey_expansion_128
4033 .byte 102,15,58,223,200,54
4034 call .Lkey_expansion_128
4035 movups %xmm0,(%rax)
4036 movl %esi,80(%rax)
4037 xorl %eax,%eax
4038 jmp .Lenc_key_ret
4039
4040 .align 16
4041 .L10rounds_alt:
4042 movdqa .Lkey_rotate(%rip),%xmm5
4043 movl $8,%r10d
4044 movdqa .Lkey_rcon1(%rip),%xmm4
4045 movdqa %xmm0,%xmm2
4046 movdqu %xmm0,(%rdx)
4047 jmp .Loop_key128
4048
4049 .align 16
4050 .Loop_key128:
4051 .byte 102,15,56,0,197
4052 .byte 102,15,56,221,196
4053 pslld $1,%xmm4
4054 leaq 16(%rax),%rax
4055
4056 movdqa %xmm2,%xmm3
4057 pslldq $4,%xmm2
4058 pxor %xmm2,%xmm3
4059 pslldq $4,%xmm2
4060 pxor %xmm2,%xmm3
4061 pslldq $4,%xmm2
4062 pxor %xmm3,%xmm2
4063
4064 pxor %xmm2,%xmm0
4065 movdqu %xmm0,-16(%rax)
4066 movdqa %xmm0,%xmm2
4067
4068 decl %r10d
4069 jnz .Loop_key128
4070
4071 movdqa .Lkey_rcon1b(%rip),%xmm4
4072
4073 .byte 102,15,56,0,197
4074 .byte 102,15,56,221,196
4075 pslld $1,%xmm4
4076
4077 movdqa %xmm2,%xmm3
4078 pslldq $4,%xmm2
4079 pxor %xmm2,%xmm3
4080 pslldq $4,%xmm2
4081 pxor %xmm2,%xmm3
4082 pslldq $4,%xmm2
4083 pxor %xmm3,%xmm2
4084
4085 pxor %xmm2,%xmm0
4086 movdqu %xmm0,(%rax)
4087
4088 movdqa %xmm0,%xmm2
4089 .byte 102,15,56,0,197
4090 .byte 102,15,56,221,196
4091
4092 movdqa %xmm2,%xmm3
4093 pslldq $4,%xmm2
4094 pxor %xmm2,%xmm3
4095 pslldq $4,%xmm2
4096 pxor %xmm2,%xmm3
4097 pslldq $4,%xmm2
4098 pxor %xmm3,%xmm2
4099
4100 pxor %xmm2,%xmm0
4101 movdqu %xmm0,16(%rax)
4102
4103 movl %esi,96(%rax)
4104 xorl %eax,%eax
4105 jmp .Lenc_key_ret
4106
4107 .align 16
4108 .L12rounds:
4109 movq 16(%rdi),%xmm2
4110 movl $11,%esi
4111 cmpl $268435456,%r10d
4112 je .L12rounds_alt
4113
4114 movups %xmm0,(%rdx)
4115 .byte 102,15,58,223,202,1
4116 call .Lkey_expansion_192a_cold
4117 .byte 102,15,58,223,202,2
4118 call .Lkey_expansion_192b
4119 .byte 102,15,58,223,202,4
4120 call .Lkey_expansion_192a
4121 .byte 102,15,58,223,202,8
4122 call .Lkey_expansion_192b
4123 .byte 102,15,58,223,202,16
4124 call .Lkey_expansion_192a
4125 .byte 102,15,58,223,202,32
4126 call .Lkey_expansion_192b
4127 .byte 102,15,58,223,202,64
4128 call .Lkey_expansion_192a
4129 .byte 102,15,58,223,202,128
4130 call .Lkey_expansion_192b
4131 movups %xmm0,(%rax)
4132 movl %esi,48(%rax)
4133 xorq %rax,%rax
4134 jmp .Lenc_key_ret
4135
4136 .align 16
4137 .L12rounds_alt:
4138 movdqa .Lkey_rotate192(%rip),%xmm5
4139 movdqa .Lkey_rcon1(%rip),%xmm4
4140 movl $8,%r10d
4141 movdqu %xmm0,(%rdx)
4142 jmp .Loop_key192
4143
4144 .align 16
4145 .Loop_key192:
4146 movq %xmm2,0(%rax)
4147 movdqa %xmm2,%xmm1
4148 .byte 102,15,56,0,213
4149 .byte 102,15,56,221,212
4150 pslld $1,%xmm4
4151 leaq 24(%rax),%rax
4152
4153 movdqa %xmm0,%xmm3
4154 pslldq $4,%xmm0
4155 pxor %xmm0,%xmm3
4156 pslldq $4,%xmm0
4157 pxor %xmm0,%xmm3
4158 pslldq $4,%xmm0
4159 pxor %xmm3,%xmm0
4160
4161 pshufd $0xff,%xmm0,%xmm3
4162 pxor %xmm1,%xmm3
4163 pslldq $4,%xmm1
4164 pxor %xmm1,%xmm3
4165
4166 pxor %xmm2,%xmm0
4167 pxor %xmm3,%xmm2
4168 movdqu %xmm0,-16(%rax)
4169
4170 decl %r10d
4171 jnz .Loop_key192
4172
4173 movl %esi,32(%rax)
4174 xorl %eax,%eax
4175 jmp .Lenc_key_ret
4176
4177 .align 16
4178 .L14rounds:
4179 movups 16(%rdi),%xmm2
4180 movl $13,%esi
4181 leaq 16(%rax),%rax
4182 cmpl $268435456,%r10d
4183 je .L14rounds_alt
4184
4185 movups %xmm0,(%rdx)
4186 movups %xmm2,16(%rdx)
4187 .byte 102,15,58,223,202,1
4188 call .Lkey_expansion_256a_cold
4189 .byte 102,15,58,223,200,1
4190 call .Lkey_expansion_256b
4191 .byte 102,15,58,223,202,2
4192 call .Lkey_expansion_256a
4193 .byte 102,15,58,223,200,2
4194 call .Lkey_expansion_256b
4195 .byte 102,15,58,223,202,4
4196 call .Lkey_expansion_256a
4197 .byte 102,15,58,223,200,4
4198 call .Lkey_expansion_256b
4199 .byte 102,15,58,223,202,8
4200 call .Lkey_expansion_256a
4201 .byte 102,15,58,223,200,8
4202 call .Lkey_expansion_256b
4203 .byte 102,15,58,223,202,16
4204 call .Lkey_expansion_256a
4205 .byte 102,15,58,223,200,16
4206 call .Lkey_expansion_256b
4207 .byte 102,15,58,223,202,32
4208 call .Lkey_expansion_256a
4209 .byte 102,15,58,223,200,32
4210 call .Lkey_expansion_256b
4211 .byte 102,15,58,223,202,64
4212 call .Lkey_expansion_256a
4213 movups %xmm0,(%rax)
4214 movl %esi,16(%rax)
4215 xorq %rax,%rax
4216 jmp .Lenc_key_ret
4217
4218 .align 16
4219 .L14rounds_alt:
4220 movdqa .Lkey_rotate(%rip),%xmm5
4221 movdqa .Lkey_rcon1(%rip),%xmm4
4222 movl $7,%r10d
4223 movdqu %xmm0,0(%rdx)
4224 movdqa %xmm2,%xmm1
4225 movdqu %xmm2,16(%rdx)
4226 jmp .Loop_key256
4227
4228 .align 16
4229 .Loop_key256:
4230 .byte 102,15,56,0,213
4231 .byte 102,15,56,221,212
4232
4233 movdqa %xmm0,%xmm3
4234 pslldq $4,%xmm0
4235 pxor %xmm0,%xmm3
4236 pslldq $4,%xmm0
4237 pxor %xmm0,%xmm3
4238 pslldq $4,%xmm0
4239 pxor %xmm3,%xmm0
4240 pslld $1,%xmm4
4241
4242 pxor %xmm2,%xmm0
4243 movdqu %xmm0,(%rax)
4244
4245 decl %r10d
4246 jz .Ldone_key256
4247
4248 pshufd $0xff,%xmm0,%xmm2
4249 pxor %xmm3,%xmm3
4250 .byte 102,15,56,221,211
4251
4252 movdqa %xmm1,%xmm3
4253 pslldq $4,%xmm1
4254 pxor %xmm1,%xmm3
4255 pslldq $4,%xmm1
4256 pxor %xmm1,%xmm3
4257 pslldq $4,%xmm1
4258 pxor %xmm3,%xmm1
4259
4260 pxor %xmm1,%xmm2
4261 movdqu %xmm2,16(%rax)
4262 leaq 32(%rax),%rax
4263 movdqa %xmm2,%xmm1
4264
4265 jmp .Loop_key256
4266
4267 .Ldone_key256:
4268 movl %esi,16(%rax)
4269 xorl %eax,%eax
4270 jmp .Lenc_key_ret
4271
4272 .align 16
4273 .Lbad_keybits:
4274 movq $-2,%rax
4275 .Lenc_key_ret:
4276 pxor %xmm0,%xmm0
4277 pxor %xmm1,%xmm1
4278 pxor %xmm2,%xmm2
4279 pxor %xmm3,%xmm3
4280 pxor %xmm4,%xmm4
4281 pxor %xmm5,%xmm5
4282 addq $8,%rsp
4283 .byte 0xf3,0xc3
4284 .LSEH_end_set_encrypt_key:
4285
4286 .align 16
4287 .Lkey_expansion_128:
4288 movups %xmm0,(%rax)
4289 leaq 16(%rax),%rax
4290 .Lkey_expansion_128_cold:
4291 shufps $16,%xmm0,%xmm4
4292 xorps %xmm4,%xmm0
4293 shufps $140,%xmm0,%xmm4
4294 xorps %xmm4,%xmm0
4295 shufps $255,%xmm1,%xmm1
4296 xorps %xmm1,%xmm0
4297 .byte 0xf3,0xc3
4298
4299 .align 16
4300 .Lkey_expansion_192a:
4301 movups %xmm0,(%rax)
4302 leaq 16(%rax),%rax
4303 .Lkey_expansion_192a_cold:
4304 movaps %xmm2,%xmm5
4305 .Lkey_expansion_192b_warm:
4306 shufps $16,%xmm0,%xmm4
4307 movdqa %xmm2,%xmm3
4308 xorps %xmm4,%xmm0
4309 shufps $140,%xmm0,%xmm4
4310 pslldq $4,%xmm3
4311 xorps %xmm4,%xmm0
4312 pshufd $85,%xmm1,%xmm1
4313 pxor %xmm3,%xmm2
4314 pxor %xmm1,%xmm0
4315 pshufd $255,%xmm0,%xmm3
4316 pxor %xmm3,%xmm2
4317 .byte 0xf3,0xc3
4318
4319 .align 16
4320 .Lkey_expansion_192b:
4321 movaps %xmm0,%xmm3
4322 shufps $68,%xmm0,%xmm5
4323 movups %xmm5,(%rax)
4324 shufps $78,%xmm2,%xmm3
4325 movups %xmm3,16(%rax)
4326 leaq 32(%rax),%rax
4327 jmp .Lkey_expansion_192b_warm
4328
4329 .align 16
4330 .Lkey_expansion_256a:
4331 movups %xmm2,(%rax)
4332 leaq 16(%rax),%rax
4333 .Lkey_expansion_256a_cold:
4334 shufps $16,%xmm0,%xmm4
4335 xorps %xmm4,%xmm0
4336 shufps $140,%xmm0,%xmm4
4337 xorps %xmm4,%xmm0
4338 shufps $255,%xmm1,%xmm1
4339 xorps %xmm1,%xmm0
4340 .byte 0xf3,0xc3
4341
4342 .align 16
4343 .Lkey_expansion_256b:
4344 movups %xmm0,(%rax)
4345 leaq 16(%rax),%rax
4346
4347 shufps $16,%xmm2,%xmm4
4348 xorps %xmm4,%xmm2
4349 shufps $140,%xmm2,%xmm4
4350 xorps %xmm4,%xmm2
4351 shufps $170,%xmm1,%xmm1
4352 xorps %xmm1,%xmm2
4353 .byte 0xf3,0xc3
4354 .size aesni_set_encrypt_key,.-aesni_set_encrypt_key
4355 .size __aesni_set_encrypt_key,.-__aesni_set_encrypt_key
4356 .align 64
4357 .Lbswap_mask:
4358 .byte 15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0
4359 .Lincrement32:
4360 .long 6,6,6,0
4361 .Lincrement64:
4362 .long 1,0,0,0
4363 .Lxts_magic:
4364 .long 0x87,0,1,0
4365 .Lincrement1:
4366 .byte 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1
4367 .Lkey_rotate:
4368 .long 0x0c0f0e0d,0x0c0f0e0d,0x0c0f0e0d,0x0c0f0e0d
4369 .Lkey_rotate192:
4370 .long 0x04070605,0x04070605,0x04070605,0x04070605
4371 .Lkey_rcon1:
4372 .long 1,1,1,1
4373 .Lkey_rcon1b:
4374 .long 0x1b,0x1b,0x1b,0x1b
4375
4376 .byte 65,69,83,32,102,111,114,32,73,110,116,101,108,32,65,69,83,45,78,73,44,32 ,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101 ,110,115,115,108,46,111,114,103,62,0
4377 .align 64
4378 #endif
OLDNEW

Powered by Google App Engine
This is Rietveld 408576698