Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(94)

Side by Side Diff: third_party/boringssl/linux-x86_64/crypto/aes/aesni-x86_64.S

Issue 2354623003: Pull boringssl generated source from boringssl_gen (Closed)
Patch Set: . Created 4 years, 3 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
OLDNEW
(Empty)
1 #if defined(__x86_64__)
2 .text
3 .extern OPENSSL_ia32cap_P
4 .hidden OPENSSL_ia32cap_P
5 .globl aesni_encrypt
6 .hidden aesni_encrypt
7 .type aesni_encrypt,@function
8 .align 16
9 aesni_encrypt:
10 movups (%rdi),%xmm2
11 movl 240(%rdx),%eax
12 movups (%rdx),%xmm0
13 movups 16(%rdx),%xmm1
14 leaq 32(%rdx),%rdx
15 xorps %xmm0,%xmm2
16 .Loop_enc1_1:
17 .byte 102,15,56,220,209
18 decl %eax
19 movups (%rdx),%xmm1
20 leaq 16(%rdx),%rdx
21 jnz .Loop_enc1_1
22 .byte 102,15,56,221,209
23 pxor %xmm0,%xmm0
24 pxor %xmm1,%xmm1
25 movups %xmm2,(%rsi)
26 pxor %xmm2,%xmm2
27 .byte 0xf3,0xc3
28 .size aesni_encrypt,.-aesni_encrypt
29
30 .globl aesni_decrypt
31 .hidden aesni_decrypt
32 .type aesni_decrypt,@function
33 .align 16
34 aesni_decrypt:
35 movups (%rdi),%xmm2
36 movl 240(%rdx),%eax
37 movups (%rdx),%xmm0
38 movups 16(%rdx),%xmm1
39 leaq 32(%rdx),%rdx
40 xorps %xmm0,%xmm2
41 .Loop_dec1_2:
42 .byte 102,15,56,222,209
43 decl %eax
44 movups (%rdx),%xmm1
45 leaq 16(%rdx),%rdx
46 jnz .Loop_dec1_2
47 .byte 102,15,56,223,209
48 pxor %xmm0,%xmm0
49 pxor %xmm1,%xmm1
50 movups %xmm2,(%rsi)
51 pxor %xmm2,%xmm2
52 .byte 0xf3,0xc3
53 .size aesni_decrypt, .-aesni_decrypt
54 .type _aesni_encrypt2,@function
55 .align 16
56 _aesni_encrypt2:
57 movups (%rcx),%xmm0
58 shll $4,%eax
59 movups 16(%rcx),%xmm1
60 xorps %xmm0,%xmm2
61 xorps %xmm0,%xmm3
62 movups 32(%rcx),%xmm0
63 leaq 32(%rcx,%rax,1),%rcx
64 negq %rax
65 addq $16,%rax
66
67 .Lenc_loop2:
68 .byte 102,15,56,220,209
69 .byte 102,15,56,220,217
70 movups (%rcx,%rax,1),%xmm1
71 addq $32,%rax
72 .byte 102,15,56,220,208
73 .byte 102,15,56,220,216
74 movups -16(%rcx,%rax,1),%xmm0
75 jnz .Lenc_loop2
76
77 .byte 102,15,56,220,209
78 .byte 102,15,56,220,217
79 .byte 102,15,56,221,208
80 .byte 102,15,56,221,216
81 .byte 0xf3,0xc3
82 .size _aesni_encrypt2,.-_aesni_encrypt2
83 .type _aesni_decrypt2,@function
84 .align 16
85 _aesni_decrypt2:
86 movups (%rcx),%xmm0
87 shll $4,%eax
88 movups 16(%rcx),%xmm1
89 xorps %xmm0,%xmm2
90 xorps %xmm0,%xmm3
91 movups 32(%rcx),%xmm0
92 leaq 32(%rcx,%rax,1),%rcx
93 negq %rax
94 addq $16,%rax
95
96 .Ldec_loop2:
97 .byte 102,15,56,222,209
98 .byte 102,15,56,222,217
99 movups (%rcx,%rax,1),%xmm1
100 addq $32,%rax
101 .byte 102,15,56,222,208
102 .byte 102,15,56,222,216
103 movups -16(%rcx,%rax,1),%xmm0
104 jnz .Ldec_loop2
105
106 .byte 102,15,56,222,209
107 .byte 102,15,56,222,217
108 .byte 102,15,56,223,208
109 .byte 102,15,56,223,216
110 .byte 0xf3,0xc3
111 .size _aesni_decrypt2,.-_aesni_decrypt2
112 .type _aesni_encrypt3,@function
113 .align 16
114 _aesni_encrypt3:
115 movups (%rcx),%xmm0
116 shll $4,%eax
117 movups 16(%rcx),%xmm1
118 xorps %xmm0,%xmm2
119 xorps %xmm0,%xmm3
120 xorps %xmm0,%xmm4
121 movups 32(%rcx),%xmm0
122 leaq 32(%rcx,%rax,1),%rcx
123 negq %rax
124 addq $16,%rax
125
126 .Lenc_loop3:
127 .byte 102,15,56,220,209
128 .byte 102,15,56,220,217
129 .byte 102,15,56,220,225
130 movups (%rcx,%rax,1),%xmm1
131 addq $32,%rax
132 .byte 102,15,56,220,208
133 .byte 102,15,56,220,216
134 .byte 102,15,56,220,224
135 movups -16(%rcx,%rax,1),%xmm0
136 jnz .Lenc_loop3
137
138 .byte 102,15,56,220,209
139 .byte 102,15,56,220,217
140 .byte 102,15,56,220,225
141 .byte 102,15,56,221,208
142 .byte 102,15,56,221,216
143 .byte 102,15,56,221,224
144 .byte 0xf3,0xc3
145 .size _aesni_encrypt3,.-_aesni_encrypt3
146 .type _aesni_decrypt3,@function
147 .align 16
148 _aesni_decrypt3:
149 movups (%rcx),%xmm0
150 shll $4,%eax
151 movups 16(%rcx),%xmm1
152 xorps %xmm0,%xmm2
153 xorps %xmm0,%xmm3
154 xorps %xmm0,%xmm4
155 movups 32(%rcx),%xmm0
156 leaq 32(%rcx,%rax,1),%rcx
157 negq %rax
158 addq $16,%rax
159
160 .Ldec_loop3:
161 .byte 102,15,56,222,209
162 .byte 102,15,56,222,217
163 .byte 102,15,56,222,225
164 movups (%rcx,%rax,1),%xmm1
165 addq $32,%rax
166 .byte 102,15,56,222,208
167 .byte 102,15,56,222,216
168 .byte 102,15,56,222,224
169 movups -16(%rcx,%rax,1),%xmm0
170 jnz .Ldec_loop3
171
172 .byte 102,15,56,222,209
173 .byte 102,15,56,222,217
174 .byte 102,15,56,222,225
175 .byte 102,15,56,223,208
176 .byte 102,15,56,223,216
177 .byte 102,15,56,223,224
178 .byte 0xf3,0xc3
179 .size _aesni_decrypt3,.-_aesni_decrypt3
180 .type _aesni_encrypt4,@function
181 .align 16
182 _aesni_encrypt4:
183 movups (%rcx),%xmm0
184 shll $4,%eax
185 movups 16(%rcx),%xmm1
186 xorps %xmm0,%xmm2
187 xorps %xmm0,%xmm3
188 xorps %xmm0,%xmm4
189 xorps %xmm0,%xmm5
190 movups 32(%rcx),%xmm0
191 leaq 32(%rcx,%rax,1),%rcx
192 negq %rax
193 .byte 0x0f,0x1f,0x00
194 addq $16,%rax
195
196 .Lenc_loop4:
197 .byte 102,15,56,220,209
198 .byte 102,15,56,220,217
199 .byte 102,15,56,220,225
200 .byte 102,15,56,220,233
201 movups (%rcx,%rax,1),%xmm1
202 addq $32,%rax
203 .byte 102,15,56,220,208
204 .byte 102,15,56,220,216
205 .byte 102,15,56,220,224
206 .byte 102,15,56,220,232
207 movups -16(%rcx,%rax,1),%xmm0
208 jnz .Lenc_loop4
209
210 .byte 102,15,56,220,209
211 .byte 102,15,56,220,217
212 .byte 102,15,56,220,225
213 .byte 102,15,56,220,233
214 .byte 102,15,56,221,208
215 .byte 102,15,56,221,216
216 .byte 102,15,56,221,224
217 .byte 102,15,56,221,232
218 .byte 0xf3,0xc3
219 .size _aesni_encrypt4,.-_aesni_encrypt4
220 .type _aesni_decrypt4,@function
221 .align 16
222 _aesni_decrypt4:
223 movups (%rcx),%xmm0
224 shll $4,%eax
225 movups 16(%rcx),%xmm1
226 xorps %xmm0,%xmm2
227 xorps %xmm0,%xmm3
228 xorps %xmm0,%xmm4
229 xorps %xmm0,%xmm5
230 movups 32(%rcx),%xmm0
231 leaq 32(%rcx,%rax,1),%rcx
232 negq %rax
233 .byte 0x0f,0x1f,0x00
234 addq $16,%rax
235
236 .Ldec_loop4:
237 .byte 102,15,56,222,209
238 .byte 102,15,56,222,217
239 .byte 102,15,56,222,225
240 .byte 102,15,56,222,233
241 movups (%rcx,%rax,1),%xmm1
242 addq $32,%rax
243 .byte 102,15,56,222,208
244 .byte 102,15,56,222,216
245 .byte 102,15,56,222,224
246 .byte 102,15,56,222,232
247 movups -16(%rcx,%rax,1),%xmm0
248 jnz .Ldec_loop4
249
250 .byte 102,15,56,222,209
251 .byte 102,15,56,222,217
252 .byte 102,15,56,222,225
253 .byte 102,15,56,222,233
254 .byte 102,15,56,223,208
255 .byte 102,15,56,223,216
256 .byte 102,15,56,223,224
257 .byte 102,15,56,223,232
258 .byte 0xf3,0xc3
259 .size _aesni_decrypt4,.-_aesni_decrypt4
260 .type _aesni_encrypt6,@function
261 .align 16
262 _aesni_encrypt6:
263 movups (%rcx),%xmm0
264 shll $4,%eax
265 movups 16(%rcx),%xmm1
266 xorps %xmm0,%xmm2
267 pxor %xmm0,%xmm3
268 pxor %xmm0,%xmm4
269 .byte 102,15,56,220,209
270 leaq 32(%rcx,%rax,1),%rcx
271 negq %rax
272 .byte 102,15,56,220,217
273 pxor %xmm0,%xmm5
274 pxor %xmm0,%xmm6
275 .byte 102,15,56,220,225
276 pxor %xmm0,%xmm7
277 movups (%rcx,%rax,1),%xmm0
278 addq $16,%rax
279 jmp .Lenc_loop6_enter
280 .align 16
281 .Lenc_loop6:
282 .byte 102,15,56,220,209
283 .byte 102,15,56,220,217
284 .byte 102,15,56,220,225
285 .Lenc_loop6_enter:
286 .byte 102,15,56,220,233
287 .byte 102,15,56,220,241
288 .byte 102,15,56,220,249
289 movups (%rcx,%rax,1),%xmm1
290 addq $32,%rax
291 .byte 102,15,56,220,208
292 .byte 102,15,56,220,216
293 .byte 102,15,56,220,224
294 .byte 102,15,56,220,232
295 .byte 102,15,56,220,240
296 .byte 102,15,56,220,248
297 movups -16(%rcx,%rax,1),%xmm0
298 jnz .Lenc_loop6
299
300 .byte 102,15,56,220,209
301 .byte 102,15,56,220,217
302 .byte 102,15,56,220,225
303 .byte 102,15,56,220,233
304 .byte 102,15,56,220,241
305 .byte 102,15,56,220,249
306 .byte 102,15,56,221,208
307 .byte 102,15,56,221,216
308 .byte 102,15,56,221,224
309 .byte 102,15,56,221,232
310 .byte 102,15,56,221,240
311 .byte 102,15,56,221,248
312 .byte 0xf3,0xc3
313 .size _aesni_encrypt6,.-_aesni_encrypt6
314 .type _aesni_decrypt6,@function
315 .align 16
316 _aesni_decrypt6:
317 movups (%rcx),%xmm0
318 shll $4,%eax
319 movups 16(%rcx),%xmm1
320 xorps %xmm0,%xmm2
321 pxor %xmm0,%xmm3
322 pxor %xmm0,%xmm4
323 .byte 102,15,56,222,209
324 leaq 32(%rcx,%rax,1),%rcx
325 negq %rax
326 .byte 102,15,56,222,217
327 pxor %xmm0,%xmm5
328 pxor %xmm0,%xmm6
329 .byte 102,15,56,222,225
330 pxor %xmm0,%xmm7
331 movups (%rcx,%rax,1),%xmm0
332 addq $16,%rax
333 jmp .Ldec_loop6_enter
334 .align 16
335 .Ldec_loop6:
336 .byte 102,15,56,222,209
337 .byte 102,15,56,222,217
338 .byte 102,15,56,222,225
339 .Ldec_loop6_enter:
340 .byte 102,15,56,222,233
341 .byte 102,15,56,222,241
342 .byte 102,15,56,222,249
343 movups (%rcx,%rax,1),%xmm1
344 addq $32,%rax
345 .byte 102,15,56,222,208
346 .byte 102,15,56,222,216
347 .byte 102,15,56,222,224
348 .byte 102,15,56,222,232
349 .byte 102,15,56,222,240
350 .byte 102,15,56,222,248
351 movups -16(%rcx,%rax,1),%xmm0
352 jnz .Ldec_loop6
353
354 .byte 102,15,56,222,209
355 .byte 102,15,56,222,217
356 .byte 102,15,56,222,225
357 .byte 102,15,56,222,233
358 .byte 102,15,56,222,241
359 .byte 102,15,56,222,249
360 .byte 102,15,56,223,208
361 .byte 102,15,56,223,216
362 .byte 102,15,56,223,224
363 .byte 102,15,56,223,232
364 .byte 102,15,56,223,240
365 .byte 102,15,56,223,248
366 .byte 0xf3,0xc3
367 .size _aesni_decrypt6,.-_aesni_decrypt6
368 .type _aesni_encrypt8,@function
369 .align 16
370 _aesni_encrypt8:
371 movups (%rcx),%xmm0
372 shll $4,%eax
373 movups 16(%rcx),%xmm1
374 xorps %xmm0,%xmm2
375 xorps %xmm0,%xmm3
376 pxor %xmm0,%xmm4
377 pxor %xmm0,%xmm5
378 pxor %xmm0,%xmm6
379 leaq 32(%rcx,%rax,1),%rcx
380 negq %rax
381 .byte 102,15,56,220,209
382 pxor %xmm0,%xmm7
383 pxor %xmm0,%xmm8
384 .byte 102,15,56,220,217
385 pxor %xmm0,%xmm9
386 movups (%rcx,%rax,1),%xmm0
387 addq $16,%rax
388 jmp .Lenc_loop8_inner
389 .align 16
390 .Lenc_loop8:
391 .byte 102,15,56,220,209
392 .byte 102,15,56,220,217
393 .Lenc_loop8_inner:
394 .byte 102,15,56,220,225
395 .byte 102,15,56,220,233
396 .byte 102,15,56,220,241
397 .byte 102,15,56,220,249
398 .byte 102,68,15,56,220,193
399 .byte 102,68,15,56,220,201
400 .Lenc_loop8_enter:
401 movups (%rcx,%rax,1),%xmm1
402 addq $32,%rax
403 .byte 102,15,56,220,208
404 .byte 102,15,56,220,216
405 .byte 102,15,56,220,224
406 .byte 102,15,56,220,232
407 .byte 102,15,56,220,240
408 .byte 102,15,56,220,248
409 .byte 102,68,15,56,220,192
410 .byte 102,68,15,56,220,200
411 movups -16(%rcx,%rax,1),%xmm0
412 jnz .Lenc_loop8
413
414 .byte 102,15,56,220,209
415 .byte 102,15,56,220,217
416 .byte 102,15,56,220,225
417 .byte 102,15,56,220,233
418 .byte 102,15,56,220,241
419 .byte 102,15,56,220,249
420 .byte 102,68,15,56,220,193
421 .byte 102,68,15,56,220,201
422 .byte 102,15,56,221,208
423 .byte 102,15,56,221,216
424 .byte 102,15,56,221,224
425 .byte 102,15,56,221,232
426 .byte 102,15,56,221,240
427 .byte 102,15,56,221,248
428 .byte 102,68,15,56,221,192
429 .byte 102,68,15,56,221,200
430 .byte 0xf3,0xc3
431 .size _aesni_encrypt8,.-_aesni_encrypt8
432 .type _aesni_decrypt8,@function
433 .align 16
434 _aesni_decrypt8:
435 movups (%rcx),%xmm0
436 shll $4,%eax
437 movups 16(%rcx),%xmm1
438 xorps %xmm0,%xmm2
439 xorps %xmm0,%xmm3
440 pxor %xmm0,%xmm4
441 pxor %xmm0,%xmm5
442 pxor %xmm0,%xmm6
443 leaq 32(%rcx,%rax,1),%rcx
444 negq %rax
445 .byte 102,15,56,222,209
446 pxor %xmm0,%xmm7
447 pxor %xmm0,%xmm8
448 .byte 102,15,56,222,217
449 pxor %xmm0,%xmm9
450 movups (%rcx,%rax,1),%xmm0
451 addq $16,%rax
452 jmp .Ldec_loop8_inner
453 .align 16
454 .Ldec_loop8:
455 .byte 102,15,56,222,209
456 .byte 102,15,56,222,217
457 .Ldec_loop8_inner:
458 .byte 102,15,56,222,225
459 .byte 102,15,56,222,233
460 .byte 102,15,56,222,241
461 .byte 102,15,56,222,249
462 .byte 102,68,15,56,222,193
463 .byte 102,68,15,56,222,201
464 .Ldec_loop8_enter:
465 movups (%rcx,%rax,1),%xmm1
466 addq $32,%rax
467 .byte 102,15,56,222,208
468 .byte 102,15,56,222,216
469 .byte 102,15,56,222,224
470 .byte 102,15,56,222,232
471 .byte 102,15,56,222,240
472 .byte 102,15,56,222,248
473 .byte 102,68,15,56,222,192
474 .byte 102,68,15,56,222,200
475 movups -16(%rcx,%rax,1),%xmm0
476 jnz .Ldec_loop8
477
478 .byte 102,15,56,222,209
479 .byte 102,15,56,222,217
480 .byte 102,15,56,222,225
481 .byte 102,15,56,222,233
482 .byte 102,15,56,222,241
483 .byte 102,15,56,222,249
484 .byte 102,68,15,56,222,193
485 .byte 102,68,15,56,222,201
486 .byte 102,15,56,223,208
487 .byte 102,15,56,223,216
488 .byte 102,15,56,223,224
489 .byte 102,15,56,223,232
490 .byte 102,15,56,223,240
491 .byte 102,15,56,223,248
492 .byte 102,68,15,56,223,192
493 .byte 102,68,15,56,223,200
494 .byte 0xf3,0xc3
495 .size _aesni_decrypt8,.-_aesni_decrypt8
496 .globl aesni_ecb_encrypt
497 .hidden aesni_ecb_encrypt
498 .type aesni_ecb_encrypt,@function
499 .align 16
500 aesni_ecb_encrypt:
501 andq $-16,%rdx
502 jz .Lecb_ret
503
504 movl 240(%rcx),%eax
505 movups (%rcx),%xmm0
506 movq %rcx,%r11
507 movl %eax,%r10d
508 testl %r8d,%r8d
509 jz .Lecb_decrypt
510
511 cmpq $0x80,%rdx
512 jb .Lecb_enc_tail
513
514 movdqu (%rdi),%xmm2
515 movdqu 16(%rdi),%xmm3
516 movdqu 32(%rdi),%xmm4
517 movdqu 48(%rdi),%xmm5
518 movdqu 64(%rdi),%xmm6
519 movdqu 80(%rdi),%xmm7
520 movdqu 96(%rdi),%xmm8
521 movdqu 112(%rdi),%xmm9
522 leaq 128(%rdi),%rdi
523 subq $0x80,%rdx
524 jmp .Lecb_enc_loop8_enter
525 .align 16
526 .Lecb_enc_loop8:
527 movups %xmm2,(%rsi)
528 movq %r11,%rcx
529 movdqu (%rdi),%xmm2
530 movl %r10d,%eax
531 movups %xmm3,16(%rsi)
532 movdqu 16(%rdi),%xmm3
533 movups %xmm4,32(%rsi)
534 movdqu 32(%rdi),%xmm4
535 movups %xmm5,48(%rsi)
536 movdqu 48(%rdi),%xmm5
537 movups %xmm6,64(%rsi)
538 movdqu 64(%rdi),%xmm6
539 movups %xmm7,80(%rsi)
540 movdqu 80(%rdi),%xmm7
541 movups %xmm8,96(%rsi)
542 movdqu 96(%rdi),%xmm8
543 movups %xmm9,112(%rsi)
544 leaq 128(%rsi),%rsi
545 movdqu 112(%rdi),%xmm9
546 leaq 128(%rdi),%rdi
547 .Lecb_enc_loop8_enter:
548
549 call _aesni_encrypt8
550
551 subq $0x80,%rdx
552 jnc .Lecb_enc_loop8
553
554 movups %xmm2,(%rsi)
555 movq %r11,%rcx
556 movups %xmm3,16(%rsi)
557 movl %r10d,%eax
558 movups %xmm4,32(%rsi)
559 movups %xmm5,48(%rsi)
560 movups %xmm6,64(%rsi)
561 movups %xmm7,80(%rsi)
562 movups %xmm8,96(%rsi)
563 movups %xmm9,112(%rsi)
564 leaq 128(%rsi),%rsi
565 addq $0x80,%rdx
566 jz .Lecb_ret
567
568 .Lecb_enc_tail:
569 movups (%rdi),%xmm2
570 cmpq $0x20,%rdx
571 jb .Lecb_enc_one
572 movups 16(%rdi),%xmm3
573 je .Lecb_enc_two
574 movups 32(%rdi),%xmm4
575 cmpq $0x40,%rdx
576 jb .Lecb_enc_three
577 movups 48(%rdi),%xmm5
578 je .Lecb_enc_four
579 movups 64(%rdi),%xmm6
580 cmpq $0x60,%rdx
581 jb .Lecb_enc_five
582 movups 80(%rdi),%xmm7
583 je .Lecb_enc_six
584 movdqu 96(%rdi),%xmm8
585 xorps %xmm9,%xmm9
586 call _aesni_encrypt8
587 movups %xmm2,(%rsi)
588 movups %xmm3,16(%rsi)
589 movups %xmm4,32(%rsi)
590 movups %xmm5,48(%rsi)
591 movups %xmm6,64(%rsi)
592 movups %xmm7,80(%rsi)
593 movups %xmm8,96(%rsi)
594 jmp .Lecb_ret
595 .align 16
596 .Lecb_enc_one:
597 movups (%rcx),%xmm0
598 movups 16(%rcx),%xmm1
599 leaq 32(%rcx),%rcx
600 xorps %xmm0,%xmm2
601 .Loop_enc1_3:
602 .byte 102,15,56,220,209
603 decl %eax
604 movups (%rcx),%xmm1
605 leaq 16(%rcx),%rcx
606 jnz .Loop_enc1_3
607 .byte 102,15,56,221,209
608 movups %xmm2,(%rsi)
609 jmp .Lecb_ret
610 .align 16
611 .Lecb_enc_two:
612 call _aesni_encrypt2
613 movups %xmm2,(%rsi)
614 movups %xmm3,16(%rsi)
615 jmp .Lecb_ret
616 .align 16
617 .Lecb_enc_three:
618 call _aesni_encrypt3
619 movups %xmm2,(%rsi)
620 movups %xmm3,16(%rsi)
621 movups %xmm4,32(%rsi)
622 jmp .Lecb_ret
623 .align 16
624 .Lecb_enc_four:
625 call _aesni_encrypt4
626 movups %xmm2,(%rsi)
627 movups %xmm3,16(%rsi)
628 movups %xmm4,32(%rsi)
629 movups %xmm5,48(%rsi)
630 jmp .Lecb_ret
631 .align 16
632 .Lecb_enc_five:
633 xorps %xmm7,%xmm7
634 call _aesni_encrypt6
635 movups %xmm2,(%rsi)
636 movups %xmm3,16(%rsi)
637 movups %xmm4,32(%rsi)
638 movups %xmm5,48(%rsi)
639 movups %xmm6,64(%rsi)
640 jmp .Lecb_ret
641 .align 16
642 .Lecb_enc_six:
643 call _aesni_encrypt6
644 movups %xmm2,(%rsi)
645 movups %xmm3,16(%rsi)
646 movups %xmm4,32(%rsi)
647 movups %xmm5,48(%rsi)
648 movups %xmm6,64(%rsi)
649 movups %xmm7,80(%rsi)
650 jmp .Lecb_ret
651
652 .align 16
653 .Lecb_decrypt:
654 cmpq $0x80,%rdx
655 jb .Lecb_dec_tail
656
657 movdqu (%rdi),%xmm2
658 movdqu 16(%rdi),%xmm3
659 movdqu 32(%rdi),%xmm4
660 movdqu 48(%rdi),%xmm5
661 movdqu 64(%rdi),%xmm6
662 movdqu 80(%rdi),%xmm7
663 movdqu 96(%rdi),%xmm8
664 movdqu 112(%rdi),%xmm9
665 leaq 128(%rdi),%rdi
666 subq $0x80,%rdx
667 jmp .Lecb_dec_loop8_enter
668 .align 16
669 .Lecb_dec_loop8:
670 movups %xmm2,(%rsi)
671 movq %r11,%rcx
672 movdqu (%rdi),%xmm2
673 movl %r10d,%eax
674 movups %xmm3,16(%rsi)
675 movdqu 16(%rdi),%xmm3
676 movups %xmm4,32(%rsi)
677 movdqu 32(%rdi),%xmm4
678 movups %xmm5,48(%rsi)
679 movdqu 48(%rdi),%xmm5
680 movups %xmm6,64(%rsi)
681 movdqu 64(%rdi),%xmm6
682 movups %xmm7,80(%rsi)
683 movdqu 80(%rdi),%xmm7
684 movups %xmm8,96(%rsi)
685 movdqu 96(%rdi),%xmm8
686 movups %xmm9,112(%rsi)
687 leaq 128(%rsi),%rsi
688 movdqu 112(%rdi),%xmm9
689 leaq 128(%rdi),%rdi
690 .Lecb_dec_loop8_enter:
691
692 call _aesni_decrypt8
693
694 movups (%r11),%xmm0
695 subq $0x80,%rdx
696 jnc .Lecb_dec_loop8
697
698 movups %xmm2,(%rsi)
699 pxor %xmm2,%xmm2
700 movq %r11,%rcx
701 movups %xmm3,16(%rsi)
702 pxor %xmm3,%xmm3
703 movl %r10d,%eax
704 movups %xmm4,32(%rsi)
705 pxor %xmm4,%xmm4
706 movups %xmm5,48(%rsi)
707 pxor %xmm5,%xmm5
708 movups %xmm6,64(%rsi)
709 pxor %xmm6,%xmm6
710 movups %xmm7,80(%rsi)
711 pxor %xmm7,%xmm7
712 movups %xmm8,96(%rsi)
713 pxor %xmm8,%xmm8
714 movups %xmm9,112(%rsi)
715 pxor %xmm9,%xmm9
716 leaq 128(%rsi),%rsi
717 addq $0x80,%rdx
718 jz .Lecb_ret
719
720 .Lecb_dec_tail:
721 movups (%rdi),%xmm2
722 cmpq $0x20,%rdx
723 jb .Lecb_dec_one
724 movups 16(%rdi),%xmm3
725 je .Lecb_dec_two
726 movups 32(%rdi),%xmm4
727 cmpq $0x40,%rdx
728 jb .Lecb_dec_three
729 movups 48(%rdi),%xmm5
730 je .Lecb_dec_four
731 movups 64(%rdi),%xmm6
732 cmpq $0x60,%rdx
733 jb .Lecb_dec_five
734 movups 80(%rdi),%xmm7
735 je .Lecb_dec_six
736 movups 96(%rdi),%xmm8
737 movups (%rcx),%xmm0
738 xorps %xmm9,%xmm9
739 call _aesni_decrypt8
740 movups %xmm2,(%rsi)
741 pxor %xmm2,%xmm2
742 movups %xmm3,16(%rsi)
743 pxor %xmm3,%xmm3
744 movups %xmm4,32(%rsi)
745 pxor %xmm4,%xmm4
746 movups %xmm5,48(%rsi)
747 pxor %xmm5,%xmm5
748 movups %xmm6,64(%rsi)
749 pxor %xmm6,%xmm6
750 movups %xmm7,80(%rsi)
751 pxor %xmm7,%xmm7
752 movups %xmm8,96(%rsi)
753 pxor %xmm8,%xmm8
754 pxor %xmm9,%xmm9
755 jmp .Lecb_ret
756 .align 16
757 .Lecb_dec_one:
758 movups (%rcx),%xmm0
759 movups 16(%rcx),%xmm1
760 leaq 32(%rcx),%rcx
761 xorps %xmm0,%xmm2
762 .Loop_dec1_4:
763 .byte 102,15,56,222,209
764 decl %eax
765 movups (%rcx),%xmm1
766 leaq 16(%rcx),%rcx
767 jnz .Loop_dec1_4
768 .byte 102,15,56,223,209
769 movups %xmm2,(%rsi)
770 pxor %xmm2,%xmm2
771 jmp .Lecb_ret
772 .align 16
773 .Lecb_dec_two:
774 call _aesni_decrypt2
775 movups %xmm2,(%rsi)
776 pxor %xmm2,%xmm2
777 movups %xmm3,16(%rsi)
778 pxor %xmm3,%xmm3
779 jmp .Lecb_ret
780 .align 16
781 .Lecb_dec_three:
782 call _aesni_decrypt3
783 movups %xmm2,(%rsi)
784 pxor %xmm2,%xmm2
785 movups %xmm3,16(%rsi)
786 pxor %xmm3,%xmm3
787 movups %xmm4,32(%rsi)
788 pxor %xmm4,%xmm4
789 jmp .Lecb_ret
790 .align 16
791 .Lecb_dec_four:
792 call _aesni_decrypt4
793 movups %xmm2,(%rsi)
794 pxor %xmm2,%xmm2
795 movups %xmm3,16(%rsi)
796 pxor %xmm3,%xmm3
797 movups %xmm4,32(%rsi)
798 pxor %xmm4,%xmm4
799 movups %xmm5,48(%rsi)
800 pxor %xmm5,%xmm5
801 jmp .Lecb_ret
802 .align 16
803 .Lecb_dec_five:
804 xorps %xmm7,%xmm7
805 call _aesni_decrypt6
806 movups %xmm2,(%rsi)
807 pxor %xmm2,%xmm2
808 movups %xmm3,16(%rsi)
809 pxor %xmm3,%xmm3
810 movups %xmm4,32(%rsi)
811 pxor %xmm4,%xmm4
812 movups %xmm5,48(%rsi)
813 pxor %xmm5,%xmm5
814 movups %xmm6,64(%rsi)
815 pxor %xmm6,%xmm6
816 pxor %xmm7,%xmm7
817 jmp .Lecb_ret
818 .align 16
819 .Lecb_dec_six:
820 call _aesni_decrypt6
821 movups %xmm2,(%rsi)
822 pxor %xmm2,%xmm2
823 movups %xmm3,16(%rsi)
824 pxor %xmm3,%xmm3
825 movups %xmm4,32(%rsi)
826 pxor %xmm4,%xmm4
827 movups %xmm5,48(%rsi)
828 pxor %xmm5,%xmm5
829 movups %xmm6,64(%rsi)
830 pxor %xmm6,%xmm6
831 movups %xmm7,80(%rsi)
832 pxor %xmm7,%xmm7
833
834 .Lecb_ret:
835 xorps %xmm0,%xmm0
836 pxor %xmm1,%xmm1
837 .byte 0xf3,0xc3
838 .size aesni_ecb_encrypt,.-aesni_ecb_encrypt
839 .globl aesni_ccm64_encrypt_blocks
840 .hidden aesni_ccm64_encrypt_blocks
841 .type aesni_ccm64_encrypt_blocks,@function
842 .align 16
843 aesni_ccm64_encrypt_blocks:
844 movl 240(%rcx),%eax
845 movdqu (%r8),%xmm6
846 movdqa .Lincrement64(%rip),%xmm9
847 movdqa .Lbswap_mask(%rip),%xmm7
848
849 shll $4,%eax
850 movl $16,%r10d
851 leaq 0(%rcx),%r11
852 movdqu (%r9),%xmm3
853 movdqa %xmm6,%xmm2
854 leaq 32(%rcx,%rax,1),%rcx
855 .byte 102,15,56,0,247
856 subq %rax,%r10
857 jmp .Lccm64_enc_outer
858 .align 16
859 .Lccm64_enc_outer:
860 movups (%r11),%xmm0
861 movq %r10,%rax
862 movups (%rdi),%xmm8
863
864 xorps %xmm0,%xmm2
865 movups 16(%r11),%xmm1
866 xorps %xmm8,%xmm0
867 xorps %xmm0,%xmm3
868 movups 32(%r11),%xmm0
869
870 .Lccm64_enc2_loop:
871 .byte 102,15,56,220,209
872 .byte 102,15,56,220,217
873 movups (%rcx,%rax,1),%xmm1
874 addq $32,%rax
875 .byte 102,15,56,220,208
876 .byte 102,15,56,220,216
877 movups -16(%rcx,%rax,1),%xmm0
878 jnz .Lccm64_enc2_loop
879 .byte 102,15,56,220,209
880 .byte 102,15,56,220,217
881 paddq %xmm9,%xmm6
882 decq %rdx
883 .byte 102,15,56,221,208
884 .byte 102,15,56,221,216
885
886 leaq 16(%rdi),%rdi
887 xorps %xmm2,%xmm8
888 movdqa %xmm6,%xmm2
889 movups %xmm8,(%rsi)
890 .byte 102,15,56,0,215
891 leaq 16(%rsi),%rsi
892 jnz .Lccm64_enc_outer
893
894 pxor %xmm0,%xmm0
895 pxor %xmm1,%xmm1
896 pxor %xmm2,%xmm2
897 movups %xmm3,(%r9)
898 pxor %xmm3,%xmm3
899 pxor %xmm8,%xmm8
900 pxor %xmm6,%xmm6
901 .byte 0xf3,0xc3
902 .size aesni_ccm64_encrypt_blocks,.-aesni_ccm64_encrypt_blocks
903 .globl aesni_ccm64_decrypt_blocks
904 .hidden aesni_ccm64_decrypt_blocks
905 .type aesni_ccm64_decrypt_blocks,@function
906 .align 16
907 aesni_ccm64_decrypt_blocks:
908 movl 240(%rcx),%eax
909 movups (%r8),%xmm6
910 movdqu (%r9),%xmm3
911 movdqa .Lincrement64(%rip),%xmm9
912 movdqa .Lbswap_mask(%rip),%xmm7
913
914 movaps %xmm6,%xmm2
915 movl %eax,%r10d
916 movq %rcx,%r11
917 .byte 102,15,56,0,247
918 movups (%rcx),%xmm0
919 movups 16(%rcx),%xmm1
920 leaq 32(%rcx),%rcx
921 xorps %xmm0,%xmm2
922 .Loop_enc1_5:
923 .byte 102,15,56,220,209
924 decl %eax
925 movups (%rcx),%xmm1
926 leaq 16(%rcx),%rcx
927 jnz .Loop_enc1_5
928 .byte 102,15,56,221,209
929 shll $4,%r10d
930 movl $16,%eax
931 movups (%rdi),%xmm8
932 paddq %xmm9,%xmm6
933 leaq 16(%rdi),%rdi
934 subq %r10,%rax
935 leaq 32(%r11,%r10,1),%rcx
936 movq %rax,%r10
937 jmp .Lccm64_dec_outer
938 .align 16
939 .Lccm64_dec_outer:
940 xorps %xmm2,%xmm8
941 movdqa %xmm6,%xmm2
942 movups %xmm8,(%rsi)
943 leaq 16(%rsi),%rsi
944 .byte 102,15,56,0,215
945
946 subq $1,%rdx
947 jz .Lccm64_dec_break
948
949 movups (%r11),%xmm0
950 movq %r10,%rax
951 movups 16(%r11),%xmm1
952 xorps %xmm0,%xmm8
953 xorps %xmm0,%xmm2
954 xorps %xmm8,%xmm3
955 movups 32(%r11),%xmm0
956 jmp .Lccm64_dec2_loop
957 .align 16
958 .Lccm64_dec2_loop:
959 .byte 102,15,56,220,209
960 .byte 102,15,56,220,217
961 movups (%rcx,%rax,1),%xmm1
962 addq $32,%rax
963 .byte 102,15,56,220,208
964 .byte 102,15,56,220,216
965 movups -16(%rcx,%rax,1),%xmm0
966 jnz .Lccm64_dec2_loop
967 movups (%rdi),%xmm8
968 paddq %xmm9,%xmm6
969 .byte 102,15,56,220,209
970 .byte 102,15,56,220,217
971 .byte 102,15,56,221,208
972 .byte 102,15,56,221,216
973 leaq 16(%rdi),%rdi
974 jmp .Lccm64_dec_outer
975
976 .align 16
977 .Lccm64_dec_break:
978
979 movl 240(%r11),%eax
980 movups (%r11),%xmm0
981 movups 16(%r11),%xmm1
982 xorps %xmm0,%xmm8
983 leaq 32(%r11),%r11
984 xorps %xmm8,%xmm3
985 .Loop_enc1_6:
986 .byte 102,15,56,220,217
987 decl %eax
988 movups (%r11),%xmm1
989 leaq 16(%r11),%r11
990 jnz .Loop_enc1_6
991 .byte 102,15,56,221,217
992 pxor %xmm0,%xmm0
993 pxor %xmm1,%xmm1
994 pxor %xmm2,%xmm2
995 movups %xmm3,(%r9)
996 pxor %xmm3,%xmm3
997 pxor %xmm8,%xmm8
998 pxor %xmm6,%xmm6
999 .byte 0xf3,0xc3
1000 .size aesni_ccm64_decrypt_blocks,.-aesni_ccm64_decrypt_blocks
1001 .globl aesni_ctr32_encrypt_blocks
1002 .hidden aesni_ctr32_encrypt_blocks
1003 .type aesni_ctr32_encrypt_blocks,@function
1004 .align 16
1005 aesni_ctr32_encrypt_blocks:
1006 cmpq $1,%rdx
1007 jne .Lctr32_bulk
1008
1009
1010
1011 movups (%r8),%xmm2
1012 movups (%rdi),%xmm3
1013 movl 240(%rcx),%edx
1014 movups (%rcx),%xmm0
1015 movups 16(%rcx),%xmm1
1016 leaq 32(%rcx),%rcx
1017 xorps %xmm0,%xmm2
1018 .Loop_enc1_7:
1019 .byte 102,15,56,220,209
1020 decl %edx
1021 movups (%rcx),%xmm1
1022 leaq 16(%rcx),%rcx
1023 jnz .Loop_enc1_7
1024 .byte 102,15,56,221,209
1025 pxor %xmm0,%xmm0
1026 pxor %xmm1,%xmm1
1027 xorps %xmm3,%xmm2
1028 pxor %xmm3,%xmm3
1029 movups %xmm2,(%rsi)
1030 xorps %xmm2,%xmm2
1031 jmp .Lctr32_epilogue
1032
1033 .align 16
1034 .Lctr32_bulk:
1035 leaq (%rsp),%rax
1036 pushq %rbp
1037 subq $128,%rsp
1038 andq $-16,%rsp
1039 leaq -8(%rax),%rbp
1040
1041
1042
1043
1044 movdqu (%r8),%xmm2
1045 movdqu (%rcx),%xmm0
1046 movl 12(%r8),%r8d
1047 pxor %xmm0,%xmm2
1048 movl 12(%rcx),%r11d
1049 movdqa %xmm2,0(%rsp)
1050 bswapl %r8d
1051 movdqa %xmm2,%xmm3
1052 movdqa %xmm2,%xmm4
1053 movdqa %xmm2,%xmm5
1054 movdqa %xmm2,64(%rsp)
1055 movdqa %xmm2,80(%rsp)
1056 movdqa %xmm2,96(%rsp)
1057 movq %rdx,%r10
1058 movdqa %xmm2,112(%rsp)
1059
1060 leaq 1(%r8),%rax
1061 leaq 2(%r8),%rdx
1062 bswapl %eax
1063 bswapl %edx
1064 xorl %r11d,%eax
1065 xorl %r11d,%edx
1066 .byte 102,15,58,34,216,3
1067 leaq 3(%r8),%rax
1068 movdqa %xmm3,16(%rsp)
1069 .byte 102,15,58,34,226,3
1070 bswapl %eax
1071 movq %r10,%rdx
1072 leaq 4(%r8),%r10
1073 movdqa %xmm4,32(%rsp)
1074 xorl %r11d,%eax
1075 bswapl %r10d
1076 .byte 102,15,58,34,232,3
1077 xorl %r11d,%r10d
1078 movdqa %xmm5,48(%rsp)
1079 leaq 5(%r8),%r9
1080 movl %r10d,64+12(%rsp)
1081 bswapl %r9d
1082 leaq 6(%r8),%r10
1083 movl 240(%rcx),%eax
1084 xorl %r11d,%r9d
1085 bswapl %r10d
1086 movl %r9d,80+12(%rsp)
1087 xorl %r11d,%r10d
1088 leaq 7(%r8),%r9
1089 movl %r10d,96+12(%rsp)
1090 bswapl %r9d
1091 movl OPENSSL_ia32cap_P+4(%rip),%r10d
1092 xorl %r11d,%r9d
1093 andl $71303168,%r10d
1094 movl %r9d,112+12(%rsp)
1095
1096 movups 16(%rcx),%xmm1
1097
1098 movdqa 64(%rsp),%xmm6
1099 movdqa 80(%rsp),%xmm7
1100
1101 cmpq $8,%rdx
1102 jb .Lctr32_tail
1103
1104 subq $6,%rdx
1105 cmpl $4194304,%r10d
1106 je .Lctr32_6x
1107
1108 leaq 128(%rcx),%rcx
1109 subq $2,%rdx
1110 jmp .Lctr32_loop8
1111
1112 .align 16
1113 .Lctr32_6x:
1114 shll $4,%eax
1115 movl $48,%r10d
1116 bswapl %r11d
1117 leaq 32(%rcx,%rax,1),%rcx
1118 subq %rax,%r10
1119 jmp .Lctr32_loop6
1120
1121 .align 16
1122 .Lctr32_loop6:
1123 addl $6,%r8d
1124 movups -48(%rcx,%r10,1),%xmm0
1125 .byte 102,15,56,220,209
1126 movl %r8d,%eax
1127 xorl %r11d,%eax
1128 .byte 102,15,56,220,217
1129 .byte 0x0f,0x38,0xf1,0x44,0x24,12
1130 leal 1(%r8),%eax
1131 .byte 102,15,56,220,225
1132 xorl %r11d,%eax
1133 .byte 0x0f,0x38,0xf1,0x44,0x24,28
1134 .byte 102,15,56,220,233
1135 leal 2(%r8),%eax
1136 xorl %r11d,%eax
1137 .byte 102,15,56,220,241
1138 .byte 0x0f,0x38,0xf1,0x44,0x24,44
1139 leal 3(%r8),%eax
1140 .byte 102,15,56,220,249
1141 movups -32(%rcx,%r10,1),%xmm1
1142 xorl %r11d,%eax
1143
1144 .byte 102,15,56,220,208
1145 .byte 0x0f,0x38,0xf1,0x44,0x24,60
1146 leal 4(%r8),%eax
1147 .byte 102,15,56,220,216
1148 xorl %r11d,%eax
1149 .byte 0x0f,0x38,0xf1,0x44,0x24,76
1150 .byte 102,15,56,220,224
1151 leal 5(%r8),%eax
1152 xorl %r11d,%eax
1153 .byte 102,15,56,220,232
1154 .byte 0x0f,0x38,0xf1,0x44,0x24,92
1155 movq %r10,%rax
1156 .byte 102,15,56,220,240
1157 .byte 102,15,56,220,248
1158 movups -16(%rcx,%r10,1),%xmm0
1159
1160 call .Lenc_loop6
1161
1162 movdqu (%rdi),%xmm8
1163 movdqu 16(%rdi),%xmm9
1164 movdqu 32(%rdi),%xmm10
1165 movdqu 48(%rdi),%xmm11
1166 movdqu 64(%rdi),%xmm12
1167 movdqu 80(%rdi),%xmm13
1168 leaq 96(%rdi),%rdi
1169 movups -64(%rcx,%r10,1),%xmm1
1170 pxor %xmm2,%xmm8
1171 movaps 0(%rsp),%xmm2
1172 pxor %xmm3,%xmm9
1173 movaps 16(%rsp),%xmm3
1174 pxor %xmm4,%xmm10
1175 movaps 32(%rsp),%xmm4
1176 pxor %xmm5,%xmm11
1177 movaps 48(%rsp),%xmm5
1178 pxor %xmm6,%xmm12
1179 movaps 64(%rsp),%xmm6
1180 pxor %xmm7,%xmm13
1181 movaps 80(%rsp),%xmm7
1182 movdqu %xmm8,(%rsi)
1183 movdqu %xmm9,16(%rsi)
1184 movdqu %xmm10,32(%rsi)
1185 movdqu %xmm11,48(%rsi)
1186 movdqu %xmm12,64(%rsi)
1187 movdqu %xmm13,80(%rsi)
1188 leaq 96(%rsi),%rsi
1189
1190 subq $6,%rdx
1191 jnc .Lctr32_loop6
1192
1193 addq $6,%rdx
1194 jz .Lctr32_done
1195
1196 leal -48(%r10),%eax
1197 leaq -80(%rcx,%r10,1),%rcx
1198 negl %eax
1199 shrl $4,%eax
1200 jmp .Lctr32_tail
1201
1202 .align 32
1203 .Lctr32_loop8:
1204 addl $8,%r8d
1205 movdqa 96(%rsp),%xmm8
1206 .byte 102,15,56,220,209
1207 movl %r8d,%r9d
1208 movdqa 112(%rsp),%xmm9
1209 .byte 102,15,56,220,217
1210 bswapl %r9d
1211 movups 32-128(%rcx),%xmm0
1212 .byte 102,15,56,220,225
1213 xorl %r11d,%r9d
1214 nop
1215 .byte 102,15,56,220,233
1216 movl %r9d,0+12(%rsp)
1217 leaq 1(%r8),%r9
1218 .byte 102,15,56,220,241
1219 .byte 102,15,56,220,249
1220 .byte 102,68,15,56,220,193
1221 .byte 102,68,15,56,220,201
1222 movups 48-128(%rcx),%xmm1
1223 bswapl %r9d
1224 .byte 102,15,56,220,208
1225 .byte 102,15,56,220,216
1226 xorl %r11d,%r9d
1227 .byte 0x66,0x90
1228 .byte 102,15,56,220,224
1229 .byte 102,15,56,220,232
1230 movl %r9d,16+12(%rsp)
1231 leaq 2(%r8),%r9
1232 .byte 102,15,56,220,240
1233 .byte 102,15,56,220,248
1234 .byte 102,68,15,56,220,192
1235 .byte 102,68,15,56,220,200
1236 movups 64-128(%rcx),%xmm0
1237 bswapl %r9d
1238 .byte 102,15,56,220,209
1239 .byte 102,15,56,220,217
1240 xorl %r11d,%r9d
1241 .byte 0x66,0x90
1242 .byte 102,15,56,220,225
1243 .byte 102,15,56,220,233
1244 movl %r9d,32+12(%rsp)
1245 leaq 3(%r8),%r9
1246 .byte 102,15,56,220,241
1247 .byte 102,15,56,220,249
1248 .byte 102,68,15,56,220,193
1249 .byte 102,68,15,56,220,201
1250 movups 80-128(%rcx),%xmm1
1251 bswapl %r9d
1252 .byte 102,15,56,220,208
1253 .byte 102,15,56,220,216
1254 xorl %r11d,%r9d
1255 .byte 0x66,0x90
1256 .byte 102,15,56,220,224
1257 .byte 102,15,56,220,232
1258 movl %r9d,48+12(%rsp)
1259 leaq 4(%r8),%r9
1260 .byte 102,15,56,220,240
1261 .byte 102,15,56,220,248
1262 .byte 102,68,15,56,220,192
1263 .byte 102,68,15,56,220,200
1264 movups 96-128(%rcx),%xmm0
1265 bswapl %r9d
1266 .byte 102,15,56,220,209
1267 .byte 102,15,56,220,217
1268 xorl %r11d,%r9d
1269 .byte 0x66,0x90
1270 .byte 102,15,56,220,225
1271 .byte 102,15,56,220,233
1272 movl %r9d,64+12(%rsp)
1273 leaq 5(%r8),%r9
1274 .byte 102,15,56,220,241
1275 .byte 102,15,56,220,249
1276 .byte 102,68,15,56,220,193
1277 .byte 102,68,15,56,220,201
1278 movups 112-128(%rcx),%xmm1
1279 bswapl %r9d
1280 .byte 102,15,56,220,208
1281 .byte 102,15,56,220,216
1282 xorl %r11d,%r9d
1283 .byte 0x66,0x90
1284 .byte 102,15,56,220,224
1285 .byte 102,15,56,220,232
1286 movl %r9d,80+12(%rsp)
1287 leaq 6(%r8),%r9
1288 .byte 102,15,56,220,240
1289 .byte 102,15,56,220,248
1290 .byte 102,68,15,56,220,192
1291 .byte 102,68,15,56,220,200
1292 movups 128-128(%rcx),%xmm0
1293 bswapl %r9d
1294 .byte 102,15,56,220,209
1295 .byte 102,15,56,220,217
1296 xorl %r11d,%r9d
1297 .byte 0x66,0x90
1298 .byte 102,15,56,220,225
1299 .byte 102,15,56,220,233
1300 movl %r9d,96+12(%rsp)
1301 leaq 7(%r8),%r9
1302 .byte 102,15,56,220,241
1303 .byte 102,15,56,220,249
1304 .byte 102,68,15,56,220,193
1305 .byte 102,68,15,56,220,201
1306 movups 144-128(%rcx),%xmm1
1307 bswapl %r9d
1308 .byte 102,15,56,220,208
1309 .byte 102,15,56,220,216
1310 .byte 102,15,56,220,224
1311 xorl %r11d,%r9d
1312 movdqu 0(%rdi),%xmm10
1313 .byte 102,15,56,220,232
1314 movl %r9d,112+12(%rsp)
1315 cmpl $11,%eax
1316 .byte 102,15,56,220,240
1317 .byte 102,15,56,220,248
1318 .byte 102,68,15,56,220,192
1319 .byte 102,68,15,56,220,200
1320 movups 160-128(%rcx),%xmm0
1321
1322 jb .Lctr32_enc_done
1323
1324 .byte 102,15,56,220,209
1325 .byte 102,15,56,220,217
1326 .byte 102,15,56,220,225
1327 .byte 102,15,56,220,233
1328 .byte 102,15,56,220,241
1329 .byte 102,15,56,220,249
1330 .byte 102,68,15,56,220,193
1331 .byte 102,68,15,56,220,201
1332 movups 176-128(%rcx),%xmm1
1333
1334 .byte 102,15,56,220,208
1335 .byte 102,15,56,220,216
1336 .byte 102,15,56,220,224
1337 .byte 102,15,56,220,232
1338 .byte 102,15,56,220,240
1339 .byte 102,15,56,220,248
1340 .byte 102,68,15,56,220,192
1341 .byte 102,68,15,56,220,200
1342 movups 192-128(%rcx),%xmm0
1343 je .Lctr32_enc_done
1344
1345 .byte 102,15,56,220,209
1346 .byte 102,15,56,220,217
1347 .byte 102,15,56,220,225
1348 .byte 102,15,56,220,233
1349 .byte 102,15,56,220,241
1350 .byte 102,15,56,220,249
1351 .byte 102,68,15,56,220,193
1352 .byte 102,68,15,56,220,201
1353 movups 208-128(%rcx),%xmm1
1354
1355 .byte 102,15,56,220,208
1356 .byte 102,15,56,220,216
1357 .byte 102,15,56,220,224
1358 .byte 102,15,56,220,232
1359 .byte 102,15,56,220,240
1360 .byte 102,15,56,220,248
1361 .byte 102,68,15,56,220,192
1362 .byte 102,68,15,56,220,200
1363 movups 224-128(%rcx),%xmm0
1364 jmp .Lctr32_enc_done
1365
1366 .align 16
1367 .Lctr32_enc_done:
1368 movdqu 16(%rdi),%xmm11
1369 pxor %xmm0,%xmm10
1370 movdqu 32(%rdi),%xmm12
1371 pxor %xmm0,%xmm11
1372 movdqu 48(%rdi),%xmm13
1373 pxor %xmm0,%xmm12
1374 movdqu 64(%rdi),%xmm14
1375 pxor %xmm0,%xmm13
1376 movdqu 80(%rdi),%xmm15
1377 pxor %xmm0,%xmm14
1378 pxor %xmm0,%xmm15
1379 .byte 102,15,56,220,209
1380 .byte 102,15,56,220,217
1381 .byte 102,15,56,220,225
1382 .byte 102,15,56,220,233
1383 .byte 102,15,56,220,241
1384 .byte 102,15,56,220,249
1385 .byte 102,68,15,56,220,193
1386 .byte 102,68,15,56,220,201
1387 movdqu 96(%rdi),%xmm1
1388 leaq 128(%rdi),%rdi
1389
1390 .byte 102,65,15,56,221,210
1391 pxor %xmm0,%xmm1
1392 movdqu 112-128(%rdi),%xmm10
1393 .byte 102,65,15,56,221,219
1394 pxor %xmm0,%xmm10
1395 movdqa 0(%rsp),%xmm11
1396 .byte 102,65,15,56,221,228
1397 .byte 102,65,15,56,221,237
1398 movdqa 16(%rsp),%xmm12
1399 movdqa 32(%rsp),%xmm13
1400 .byte 102,65,15,56,221,246
1401 .byte 102,65,15,56,221,255
1402 movdqa 48(%rsp),%xmm14
1403 movdqa 64(%rsp),%xmm15
1404 .byte 102,68,15,56,221,193
1405 movdqa 80(%rsp),%xmm0
1406 movups 16-128(%rcx),%xmm1
1407 .byte 102,69,15,56,221,202
1408
1409 movups %xmm2,(%rsi)
1410 movdqa %xmm11,%xmm2
1411 movups %xmm3,16(%rsi)
1412 movdqa %xmm12,%xmm3
1413 movups %xmm4,32(%rsi)
1414 movdqa %xmm13,%xmm4
1415 movups %xmm5,48(%rsi)
1416 movdqa %xmm14,%xmm5
1417 movups %xmm6,64(%rsi)
1418 movdqa %xmm15,%xmm6
1419 movups %xmm7,80(%rsi)
1420 movdqa %xmm0,%xmm7
1421 movups %xmm8,96(%rsi)
1422 movups %xmm9,112(%rsi)
1423 leaq 128(%rsi),%rsi
1424
1425 subq $8,%rdx
1426 jnc .Lctr32_loop8
1427
1428 addq $8,%rdx
1429 jz .Lctr32_done
1430 leaq -128(%rcx),%rcx
1431
1432 .Lctr32_tail:
1433
1434
1435 leaq 16(%rcx),%rcx
1436 cmpq $4,%rdx
1437 jb .Lctr32_loop3
1438 je .Lctr32_loop4
1439
1440
1441 shll $4,%eax
1442 movdqa 96(%rsp),%xmm8
1443 pxor %xmm9,%xmm9
1444
1445 movups 16(%rcx),%xmm0
1446 .byte 102,15,56,220,209
1447 .byte 102,15,56,220,217
1448 leaq 32-16(%rcx,%rax,1),%rcx
1449 negq %rax
1450 .byte 102,15,56,220,225
1451 addq $16,%rax
1452 movups (%rdi),%xmm10
1453 .byte 102,15,56,220,233
1454 .byte 102,15,56,220,241
1455 movups 16(%rdi),%xmm11
1456 movups 32(%rdi),%xmm12
1457 .byte 102,15,56,220,249
1458 .byte 102,68,15,56,220,193
1459
1460 call .Lenc_loop8_enter
1461
1462 movdqu 48(%rdi),%xmm13
1463 pxor %xmm10,%xmm2
1464 movdqu 64(%rdi),%xmm10
1465 pxor %xmm11,%xmm3
1466 movdqu %xmm2,(%rsi)
1467 pxor %xmm12,%xmm4
1468 movdqu %xmm3,16(%rsi)
1469 pxor %xmm13,%xmm5
1470 movdqu %xmm4,32(%rsi)
1471 pxor %xmm10,%xmm6
1472 movdqu %xmm5,48(%rsi)
1473 movdqu %xmm6,64(%rsi)
1474 cmpq $6,%rdx
1475 jb .Lctr32_done
1476
1477 movups 80(%rdi),%xmm11
1478 xorps %xmm11,%xmm7
1479 movups %xmm7,80(%rsi)
1480 je .Lctr32_done
1481
1482 movups 96(%rdi),%xmm12
1483 xorps %xmm12,%xmm8
1484 movups %xmm8,96(%rsi)
1485 jmp .Lctr32_done
1486
1487 .align 32
1488 .Lctr32_loop4:
1489 .byte 102,15,56,220,209
1490 leaq 16(%rcx),%rcx
1491 decl %eax
1492 .byte 102,15,56,220,217
1493 .byte 102,15,56,220,225
1494 .byte 102,15,56,220,233
1495 movups (%rcx),%xmm1
1496 jnz .Lctr32_loop4
1497 .byte 102,15,56,221,209
1498 .byte 102,15,56,221,217
1499 movups (%rdi),%xmm10
1500 movups 16(%rdi),%xmm11
1501 .byte 102,15,56,221,225
1502 .byte 102,15,56,221,233
1503 movups 32(%rdi),%xmm12
1504 movups 48(%rdi),%xmm13
1505
1506 xorps %xmm10,%xmm2
1507 movups %xmm2,(%rsi)
1508 xorps %xmm11,%xmm3
1509 movups %xmm3,16(%rsi)
1510 pxor %xmm12,%xmm4
1511 movdqu %xmm4,32(%rsi)
1512 pxor %xmm13,%xmm5
1513 movdqu %xmm5,48(%rsi)
1514 jmp .Lctr32_done
1515
1516 .align 32
1517 .Lctr32_loop3:
1518 .byte 102,15,56,220,209
1519 leaq 16(%rcx),%rcx
1520 decl %eax
1521 .byte 102,15,56,220,217
1522 .byte 102,15,56,220,225
1523 movups (%rcx),%xmm1
1524 jnz .Lctr32_loop3
1525 .byte 102,15,56,221,209
1526 .byte 102,15,56,221,217
1527 .byte 102,15,56,221,225
1528
1529 movups (%rdi),%xmm10
1530 xorps %xmm10,%xmm2
1531 movups %xmm2,(%rsi)
1532 cmpq $2,%rdx
1533 jb .Lctr32_done
1534
1535 movups 16(%rdi),%xmm11
1536 xorps %xmm11,%xmm3
1537 movups %xmm3,16(%rsi)
1538 je .Lctr32_done
1539
1540 movups 32(%rdi),%xmm12
1541 xorps %xmm12,%xmm4
1542 movups %xmm4,32(%rsi)
1543
1544 .Lctr32_done:
1545 xorps %xmm0,%xmm0
1546 xorl %r11d,%r11d
1547 pxor %xmm1,%xmm1
1548 pxor %xmm2,%xmm2
1549 pxor %xmm3,%xmm3
1550 pxor %xmm4,%xmm4
1551 pxor %xmm5,%xmm5
1552 pxor %xmm6,%xmm6
1553 pxor %xmm7,%xmm7
1554 movaps %xmm0,0(%rsp)
1555 pxor %xmm8,%xmm8
1556 movaps %xmm0,16(%rsp)
1557 pxor %xmm9,%xmm9
1558 movaps %xmm0,32(%rsp)
1559 pxor %xmm10,%xmm10
1560 movaps %xmm0,48(%rsp)
1561 pxor %xmm11,%xmm11
1562 movaps %xmm0,64(%rsp)
1563 pxor %xmm12,%xmm12
1564 movaps %xmm0,80(%rsp)
1565 pxor %xmm13,%xmm13
1566 movaps %xmm0,96(%rsp)
1567 pxor %xmm14,%xmm14
1568 movaps %xmm0,112(%rsp)
1569 pxor %xmm15,%xmm15
1570 leaq (%rbp),%rsp
1571 popq %rbp
1572 .Lctr32_epilogue:
1573 .byte 0xf3,0xc3
1574 .size aesni_ctr32_encrypt_blocks,.-aesni_ctr32_encrypt_blocks
1575 .globl aesni_xts_encrypt
1576 .hidden aesni_xts_encrypt
1577 .type aesni_xts_encrypt,@function
1578 .align 16
1579 aesni_xts_encrypt:
1580 leaq (%rsp),%rax
1581 pushq %rbp
1582 subq $112,%rsp
1583 andq $-16,%rsp
1584 leaq -8(%rax),%rbp
1585 movups (%r9),%xmm2
1586 movl 240(%r8),%eax
1587 movl 240(%rcx),%r10d
1588 movups (%r8),%xmm0
1589 movups 16(%r8),%xmm1
1590 leaq 32(%r8),%r8
1591 xorps %xmm0,%xmm2
1592 .Loop_enc1_8:
1593 .byte 102,15,56,220,209
1594 decl %eax
1595 movups (%r8),%xmm1
1596 leaq 16(%r8),%r8
1597 jnz .Loop_enc1_8
1598 .byte 102,15,56,221,209
1599 movups (%rcx),%xmm0
1600 movq %rcx,%r11
1601 movl %r10d,%eax
1602 shll $4,%r10d
1603 movq %rdx,%r9
1604 andq $-16,%rdx
1605
1606 movups 16(%rcx,%r10,1),%xmm1
1607
1608 movdqa .Lxts_magic(%rip),%xmm8
1609 movdqa %xmm2,%xmm15
1610 pshufd $0x5f,%xmm2,%xmm9
1611 pxor %xmm0,%xmm1
1612 movdqa %xmm9,%xmm14
1613 paddd %xmm9,%xmm9
1614 movdqa %xmm15,%xmm10
1615 psrad $31,%xmm14
1616 paddq %xmm15,%xmm15
1617 pand %xmm8,%xmm14
1618 pxor %xmm0,%xmm10
1619 pxor %xmm14,%xmm15
1620 movdqa %xmm9,%xmm14
1621 paddd %xmm9,%xmm9
1622 movdqa %xmm15,%xmm11
1623 psrad $31,%xmm14
1624 paddq %xmm15,%xmm15
1625 pand %xmm8,%xmm14
1626 pxor %xmm0,%xmm11
1627 pxor %xmm14,%xmm15
1628 movdqa %xmm9,%xmm14
1629 paddd %xmm9,%xmm9
1630 movdqa %xmm15,%xmm12
1631 psrad $31,%xmm14
1632 paddq %xmm15,%xmm15
1633 pand %xmm8,%xmm14
1634 pxor %xmm0,%xmm12
1635 pxor %xmm14,%xmm15
1636 movdqa %xmm9,%xmm14
1637 paddd %xmm9,%xmm9
1638 movdqa %xmm15,%xmm13
1639 psrad $31,%xmm14
1640 paddq %xmm15,%xmm15
1641 pand %xmm8,%xmm14
1642 pxor %xmm0,%xmm13
1643 pxor %xmm14,%xmm15
1644 movdqa %xmm15,%xmm14
1645 psrad $31,%xmm9
1646 paddq %xmm15,%xmm15
1647 pand %xmm8,%xmm9
1648 pxor %xmm0,%xmm14
1649 pxor %xmm9,%xmm15
1650 movaps %xmm1,96(%rsp)
1651
1652 subq $96,%rdx
1653 jc .Lxts_enc_short
1654
1655 movl $16+96,%eax
1656 leaq 32(%r11,%r10,1),%rcx
1657 subq %r10,%rax
1658 movups 16(%r11),%xmm1
1659 movq %rax,%r10
1660 leaq .Lxts_magic(%rip),%r8
1661 jmp .Lxts_enc_grandloop
1662
1663 .align 32
1664 .Lxts_enc_grandloop:
1665 movdqu 0(%rdi),%xmm2
1666 movdqa %xmm0,%xmm8
1667 movdqu 16(%rdi),%xmm3
1668 pxor %xmm10,%xmm2
1669 movdqu 32(%rdi),%xmm4
1670 pxor %xmm11,%xmm3
1671 .byte 102,15,56,220,209
1672 movdqu 48(%rdi),%xmm5
1673 pxor %xmm12,%xmm4
1674 .byte 102,15,56,220,217
1675 movdqu 64(%rdi),%xmm6
1676 pxor %xmm13,%xmm5
1677 .byte 102,15,56,220,225
1678 movdqu 80(%rdi),%xmm7
1679 pxor %xmm15,%xmm8
1680 movdqa 96(%rsp),%xmm9
1681 pxor %xmm14,%xmm6
1682 .byte 102,15,56,220,233
1683 movups 32(%r11),%xmm0
1684 leaq 96(%rdi),%rdi
1685 pxor %xmm8,%xmm7
1686
1687 pxor %xmm9,%xmm10
1688 .byte 102,15,56,220,241
1689 pxor %xmm9,%xmm11
1690 movdqa %xmm10,0(%rsp)
1691 .byte 102,15,56,220,249
1692 movups 48(%r11),%xmm1
1693 pxor %xmm9,%xmm12
1694
1695 .byte 102,15,56,220,208
1696 pxor %xmm9,%xmm13
1697 movdqa %xmm11,16(%rsp)
1698 .byte 102,15,56,220,216
1699 pxor %xmm9,%xmm14
1700 movdqa %xmm12,32(%rsp)
1701 .byte 102,15,56,220,224
1702 .byte 102,15,56,220,232
1703 pxor %xmm9,%xmm8
1704 movdqa %xmm14,64(%rsp)
1705 .byte 102,15,56,220,240
1706 .byte 102,15,56,220,248
1707 movups 64(%r11),%xmm0
1708 movdqa %xmm8,80(%rsp)
1709 pshufd $0x5f,%xmm15,%xmm9
1710 jmp .Lxts_enc_loop6
1711 .align 32
1712 .Lxts_enc_loop6:
1713 .byte 102,15,56,220,209
1714 .byte 102,15,56,220,217
1715 .byte 102,15,56,220,225
1716 .byte 102,15,56,220,233
1717 .byte 102,15,56,220,241
1718 .byte 102,15,56,220,249
1719 movups -64(%rcx,%rax,1),%xmm1
1720 addq $32,%rax
1721
1722 .byte 102,15,56,220,208
1723 .byte 102,15,56,220,216
1724 .byte 102,15,56,220,224
1725 .byte 102,15,56,220,232
1726 .byte 102,15,56,220,240
1727 .byte 102,15,56,220,248
1728 movups -80(%rcx,%rax,1),%xmm0
1729 jnz .Lxts_enc_loop6
1730
1731 movdqa (%r8),%xmm8
1732 movdqa %xmm9,%xmm14
1733 paddd %xmm9,%xmm9
1734 .byte 102,15,56,220,209
1735 paddq %xmm15,%xmm15
1736 psrad $31,%xmm14
1737 .byte 102,15,56,220,217
1738 pand %xmm8,%xmm14
1739 movups (%r11),%xmm10
1740 .byte 102,15,56,220,225
1741 .byte 102,15,56,220,233
1742 .byte 102,15,56,220,241
1743 pxor %xmm14,%xmm15
1744 movaps %xmm10,%xmm11
1745 .byte 102,15,56,220,249
1746 movups -64(%rcx),%xmm1
1747
1748 movdqa %xmm9,%xmm14
1749 .byte 102,15,56,220,208
1750 paddd %xmm9,%xmm9
1751 pxor %xmm15,%xmm10
1752 .byte 102,15,56,220,216
1753 psrad $31,%xmm14
1754 paddq %xmm15,%xmm15
1755 .byte 102,15,56,220,224
1756 .byte 102,15,56,220,232
1757 pand %xmm8,%xmm14
1758 movaps %xmm11,%xmm12
1759 .byte 102,15,56,220,240
1760 pxor %xmm14,%xmm15
1761 movdqa %xmm9,%xmm14
1762 .byte 102,15,56,220,248
1763 movups -48(%rcx),%xmm0
1764
1765 paddd %xmm9,%xmm9
1766 .byte 102,15,56,220,209
1767 pxor %xmm15,%xmm11
1768 psrad $31,%xmm14
1769 .byte 102,15,56,220,217
1770 paddq %xmm15,%xmm15
1771 pand %xmm8,%xmm14
1772 .byte 102,15,56,220,225
1773 .byte 102,15,56,220,233
1774 movdqa %xmm13,48(%rsp)
1775 pxor %xmm14,%xmm15
1776 .byte 102,15,56,220,241
1777 movaps %xmm12,%xmm13
1778 movdqa %xmm9,%xmm14
1779 .byte 102,15,56,220,249
1780 movups -32(%rcx),%xmm1
1781
1782 paddd %xmm9,%xmm9
1783 .byte 102,15,56,220,208
1784 pxor %xmm15,%xmm12
1785 psrad $31,%xmm14
1786 .byte 102,15,56,220,216
1787 paddq %xmm15,%xmm15
1788 pand %xmm8,%xmm14
1789 .byte 102,15,56,220,224
1790 .byte 102,15,56,220,232
1791 .byte 102,15,56,220,240
1792 pxor %xmm14,%xmm15
1793 movaps %xmm13,%xmm14
1794 .byte 102,15,56,220,248
1795
1796 movdqa %xmm9,%xmm0
1797 paddd %xmm9,%xmm9
1798 .byte 102,15,56,220,209
1799 pxor %xmm15,%xmm13
1800 psrad $31,%xmm0
1801 .byte 102,15,56,220,217
1802 paddq %xmm15,%xmm15
1803 pand %xmm8,%xmm0
1804 .byte 102,15,56,220,225
1805 .byte 102,15,56,220,233
1806 pxor %xmm0,%xmm15
1807 movups (%r11),%xmm0
1808 .byte 102,15,56,220,241
1809 .byte 102,15,56,220,249
1810 movups 16(%r11),%xmm1
1811
1812 pxor %xmm15,%xmm14
1813 .byte 102,15,56,221,84,36,0
1814 psrad $31,%xmm9
1815 paddq %xmm15,%xmm15
1816 .byte 102,15,56,221,92,36,16
1817 .byte 102,15,56,221,100,36,32
1818 pand %xmm8,%xmm9
1819 movq %r10,%rax
1820 .byte 102,15,56,221,108,36,48
1821 .byte 102,15,56,221,116,36,64
1822 .byte 102,15,56,221,124,36,80
1823 pxor %xmm9,%xmm15
1824
1825 leaq 96(%rsi),%rsi
1826 movups %xmm2,-96(%rsi)
1827 movups %xmm3,-80(%rsi)
1828 movups %xmm4,-64(%rsi)
1829 movups %xmm5,-48(%rsi)
1830 movups %xmm6,-32(%rsi)
1831 movups %xmm7,-16(%rsi)
1832 subq $96,%rdx
1833 jnc .Lxts_enc_grandloop
1834
1835 movl $16+96,%eax
1836 subl %r10d,%eax
1837 movq %r11,%rcx
1838 shrl $4,%eax
1839
1840 .Lxts_enc_short:
1841
1842 movl %eax,%r10d
1843 pxor %xmm0,%xmm10
1844 addq $96,%rdx
1845 jz .Lxts_enc_done
1846
1847 pxor %xmm0,%xmm11
1848 cmpq $0x20,%rdx
1849 jb .Lxts_enc_one
1850 pxor %xmm0,%xmm12
1851 je .Lxts_enc_two
1852
1853 pxor %xmm0,%xmm13
1854 cmpq $0x40,%rdx
1855 jb .Lxts_enc_three
1856 pxor %xmm0,%xmm14
1857 je .Lxts_enc_four
1858
1859 movdqu (%rdi),%xmm2
1860 movdqu 16(%rdi),%xmm3
1861 movdqu 32(%rdi),%xmm4
1862 pxor %xmm10,%xmm2
1863 movdqu 48(%rdi),%xmm5
1864 pxor %xmm11,%xmm3
1865 movdqu 64(%rdi),%xmm6
1866 leaq 80(%rdi),%rdi
1867 pxor %xmm12,%xmm4
1868 pxor %xmm13,%xmm5
1869 pxor %xmm14,%xmm6
1870 pxor %xmm7,%xmm7
1871
1872 call _aesni_encrypt6
1873
1874 xorps %xmm10,%xmm2
1875 movdqa %xmm15,%xmm10
1876 xorps %xmm11,%xmm3
1877 xorps %xmm12,%xmm4
1878 movdqu %xmm2,(%rsi)
1879 xorps %xmm13,%xmm5
1880 movdqu %xmm3,16(%rsi)
1881 xorps %xmm14,%xmm6
1882 movdqu %xmm4,32(%rsi)
1883 movdqu %xmm5,48(%rsi)
1884 movdqu %xmm6,64(%rsi)
1885 leaq 80(%rsi),%rsi
1886 jmp .Lxts_enc_done
1887
1888 .align 16
1889 .Lxts_enc_one:
1890 movups (%rdi),%xmm2
1891 leaq 16(%rdi),%rdi
1892 xorps %xmm10,%xmm2
1893 movups (%rcx),%xmm0
1894 movups 16(%rcx),%xmm1
1895 leaq 32(%rcx),%rcx
1896 xorps %xmm0,%xmm2
1897 .Loop_enc1_9:
1898 .byte 102,15,56,220,209
1899 decl %eax
1900 movups (%rcx),%xmm1
1901 leaq 16(%rcx),%rcx
1902 jnz .Loop_enc1_9
1903 .byte 102,15,56,221,209
1904 xorps %xmm10,%xmm2
1905 movdqa %xmm11,%xmm10
1906 movups %xmm2,(%rsi)
1907 leaq 16(%rsi),%rsi
1908 jmp .Lxts_enc_done
1909
1910 .align 16
1911 .Lxts_enc_two:
1912 movups (%rdi),%xmm2
1913 movups 16(%rdi),%xmm3
1914 leaq 32(%rdi),%rdi
1915 xorps %xmm10,%xmm2
1916 xorps %xmm11,%xmm3
1917
1918 call _aesni_encrypt2
1919
1920 xorps %xmm10,%xmm2
1921 movdqa %xmm12,%xmm10
1922 xorps %xmm11,%xmm3
1923 movups %xmm2,(%rsi)
1924 movups %xmm3,16(%rsi)
1925 leaq 32(%rsi),%rsi
1926 jmp .Lxts_enc_done
1927
1928 .align 16
1929 .Lxts_enc_three:
1930 movups (%rdi),%xmm2
1931 movups 16(%rdi),%xmm3
1932 movups 32(%rdi),%xmm4
1933 leaq 48(%rdi),%rdi
1934 xorps %xmm10,%xmm2
1935 xorps %xmm11,%xmm3
1936 xorps %xmm12,%xmm4
1937
1938 call _aesni_encrypt3
1939
1940 xorps %xmm10,%xmm2
1941 movdqa %xmm13,%xmm10
1942 xorps %xmm11,%xmm3
1943 xorps %xmm12,%xmm4
1944 movups %xmm2,(%rsi)
1945 movups %xmm3,16(%rsi)
1946 movups %xmm4,32(%rsi)
1947 leaq 48(%rsi),%rsi
1948 jmp .Lxts_enc_done
1949
1950 .align 16
1951 .Lxts_enc_four:
1952 movups (%rdi),%xmm2
1953 movups 16(%rdi),%xmm3
1954 movups 32(%rdi),%xmm4
1955 xorps %xmm10,%xmm2
1956 movups 48(%rdi),%xmm5
1957 leaq 64(%rdi),%rdi
1958 xorps %xmm11,%xmm3
1959 xorps %xmm12,%xmm4
1960 xorps %xmm13,%xmm5
1961
1962 call _aesni_encrypt4
1963
1964 pxor %xmm10,%xmm2
1965 movdqa %xmm14,%xmm10
1966 pxor %xmm11,%xmm3
1967 pxor %xmm12,%xmm4
1968 movdqu %xmm2,(%rsi)
1969 pxor %xmm13,%xmm5
1970 movdqu %xmm3,16(%rsi)
1971 movdqu %xmm4,32(%rsi)
1972 movdqu %xmm5,48(%rsi)
1973 leaq 64(%rsi),%rsi
1974 jmp .Lxts_enc_done
1975
1976 .align 16
1977 .Lxts_enc_done:
1978 andq $15,%r9
1979 jz .Lxts_enc_ret
1980 movq %r9,%rdx
1981
1982 .Lxts_enc_steal:
1983 movzbl (%rdi),%eax
1984 movzbl -16(%rsi),%ecx
1985 leaq 1(%rdi),%rdi
1986 movb %al,-16(%rsi)
1987 movb %cl,0(%rsi)
1988 leaq 1(%rsi),%rsi
1989 subq $1,%rdx
1990 jnz .Lxts_enc_steal
1991
1992 subq %r9,%rsi
1993 movq %r11,%rcx
1994 movl %r10d,%eax
1995
1996 movups -16(%rsi),%xmm2
1997 xorps %xmm10,%xmm2
1998 movups (%rcx),%xmm0
1999 movups 16(%rcx),%xmm1
2000 leaq 32(%rcx),%rcx
2001 xorps %xmm0,%xmm2
2002 .Loop_enc1_10:
2003 .byte 102,15,56,220,209
2004 decl %eax
2005 movups (%rcx),%xmm1
2006 leaq 16(%rcx),%rcx
2007 jnz .Loop_enc1_10
2008 .byte 102,15,56,221,209
2009 xorps %xmm10,%xmm2
2010 movups %xmm2,-16(%rsi)
2011
2012 .Lxts_enc_ret:
2013 xorps %xmm0,%xmm0
2014 pxor %xmm1,%xmm1
2015 pxor %xmm2,%xmm2
2016 pxor %xmm3,%xmm3
2017 pxor %xmm4,%xmm4
2018 pxor %xmm5,%xmm5
2019 pxor %xmm6,%xmm6
2020 pxor %xmm7,%xmm7
2021 movaps %xmm0,0(%rsp)
2022 pxor %xmm8,%xmm8
2023 movaps %xmm0,16(%rsp)
2024 pxor %xmm9,%xmm9
2025 movaps %xmm0,32(%rsp)
2026 pxor %xmm10,%xmm10
2027 movaps %xmm0,48(%rsp)
2028 pxor %xmm11,%xmm11
2029 movaps %xmm0,64(%rsp)
2030 pxor %xmm12,%xmm12
2031 movaps %xmm0,80(%rsp)
2032 pxor %xmm13,%xmm13
2033 movaps %xmm0,96(%rsp)
2034 pxor %xmm14,%xmm14
2035 pxor %xmm15,%xmm15
2036 leaq (%rbp),%rsp
2037 popq %rbp
2038 .Lxts_enc_epilogue:
2039 .byte 0xf3,0xc3
2040 .size aesni_xts_encrypt,.-aesni_xts_encrypt
2041 .globl aesni_xts_decrypt
2042 .hidden aesni_xts_decrypt
2043 .type aesni_xts_decrypt,@function
2044 .align 16
2045 aesni_xts_decrypt:
2046 leaq (%rsp),%rax
2047 pushq %rbp
2048 subq $112,%rsp
2049 andq $-16,%rsp
2050 leaq -8(%rax),%rbp
2051 movups (%r9),%xmm2
2052 movl 240(%r8),%eax
2053 movl 240(%rcx),%r10d
2054 movups (%r8),%xmm0
2055 movups 16(%r8),%xmm1
2056 leaq 32(%r8),%r8
2057 xorps %xmm0,%xmm2
2058 .Loop_enc1_11:
2059 .byte 102,15,56,220,209
2060 decl %eax
2061 movups (%r8),%xmm1
2062 leaq 16(%r8),%r8
2063 jnz .Loop_enc1_11
2064 .byte 102,15,56,221,209
2065 xorl %eax,%eax
2066 testq $15,%rdx
2067 setnz %al
2068 shlq $4,%rax
2069 subq %rax,%rdx
2070
2071 movups (%rcx),%xmm0
2072 movq %rcx,%r11
2073 movl %r10d,%eax
2074 shll $4,%r10d
2075 movq %rdx,%r9
2076 andq $-16,%rdx
2077
2078 movups 16(%rcx,%r10,1),%xmm1
2079
2080 movdqa .Lxts_magic(%rip),%xmm8
2081 movdqa %xmm2,%xmm15
2082 pshufd $0x5f,%xmm2,%xmm9
2083 pxor %xmm0,%xmm1
2084 movdqa %xmm9,%xmm14
2085 paddd %xmm9,%xmm9
2086 movdqa %xmm15,%xmm10
2087 psrad $31,%xmm14
2088 paddq %xmm15,%xmm15
2089 pand %xmm8,%xmm14
2090 pxor %xmm0,%xmm10
2091 pxor %xmm14,%xmm15
2092 movdqa %xmm9,%xmm14
2093 paddd %xmm9,%xmm9
2094 movdqa %xmm15,%xmm11
2095 psrad $31,%xmm14
2096 paddq %xmm15,%xmm15
2097 pand %xmm8,%xmm14
2098 pxor %xmm0,%xmm11
2099 pxor %xmm14,%xmm15
2100 movdqa %xmm9,%xmm14
2101 paddd %xmm9,%xmm9
2102 movdqa %xmm15,%xmm12
2103 psrad $31,%xmm14
2104 paddq %xmm15,%xmm15
2105 pand %xmm8,%xmm14
2106 pxor %xmm0,%xmm12
2107 pxor %xmm14,%xmm15
2108 movdqa %xmm9,%xmm14
2109 paddd %xmm9,%xmm9
2110 movdqa %xmm15,%xmm13
2111 psrad $31,%xmm14
2112 paddq %xmm15,%xmm15
2113 pand %xmm8,%xmm14
2114 pxor %xmm0,%xmm13
2115 pxor %xmm14,%xmm15
2116 movdqa %xmm15,%xmm14
2117 psrad $31,%xmm9
2118 paddq %xmm15,%xmm15
2119 pand %xmm8,%xmm9
2120 pxor %xmm0,%xmm14
2121 pxor %xmm9,%xmm15
2122 movaps %xmm1,96(%rsp)
2123
2124 subq $96,%rdx
2125 jc .Lxts_dec_short
2126
2127 movl $16+96,%eax
2128 leaq 32(%r11,%r10,1),%rcx
2129 subq %r10,%rax
2130 movups 16(%r11),%xmm1
2131 movq %rax,%r10
2132 leaq .Lxts_magic(%rip),%r8
2133 jmp .Lxts_dec_grandloop
2134
2135 .align 32
2136 .Lxts_dec_grandloop:
2137 movdqu 0(%rdi),%xmm2
2138 movdqa %xmm0,%xmm8
2139 movdqu 16(%rdi),%xmm3
2140 pxor %xmm10,%xmm2
2141 movdqu 32(%rdi),%xmm4
2142 pxor %xmm11,%xmm3
2143 .byte 102,15,56,222,209
2144 movdqu 48(%rdi),%xmm5
2145 pxor %xmm12,%xmm4
2146 .byte 102,15,56,222,217
2147 movdqu 64(%rdi),%xmm6
2148 pxor %xmm13,%xmm5
2149 .byte 102,15,56,222,225
2150 movdqu 80(%rdi),%xmm7
2151 pxor %xmm15,%xmm8
2152 movdqa 96(%rsp),%xmm9
2153 pxor %xmm14,%xmm6
2154 .byte 102,15,56,222,233
2155 movups 32(%r11),%xmm0
2156 leaq 96(%rdi),%rdi
2157 pxor %xmm8,%xmm7
2158
2159 pxor %xmm9,%xmm10
2160 .byte 102,15,56,222,241
2161 pxor %xmm9,%xmm11
2162 movdqa %xmm10,0(%rsp)
2163 .byte 102,15,56,222,249
2164 movups 48(%r11),%xmm1
2165 pxor %xmm9,%xmm12
2166
2167 .byte 102,15,56,222,208
2168 pxor %xmm9,%xmm13
2169 movdqa %xmm11,16(%rsp)
2170 .byte 102,15,56,222,216
2171 pxor %xmm9,%xmm14
2172 movdqa %xmm12,32(%rsp)
2173 .byte 102,15,56,222,224
2174 .byte 102,15,56,222,232
2175 pxor %xmm9,%xmm8
2176 movdqa %xmm14,64(%rsp)
2177 .byte 102,15,56,222,240
2178 .byte 102,15,56,222,248
2179 movups 64(%r11),%xmm0
2180 movdqa %xmm8,80(%rsp)
2181 pshufd $0x5f,%xmm15,%xmm9
2182 jmp .Lxts_dec_loop6
2183 .align 32
2184 .Lxts_dec_loop6:
2185 .byte 102,15,56,222,209
2186 .byte 102,15,56,222,217
2187 .byte 102,15,56,222,225
2188 .byte 102,15,56,222,233
2189 .byte 102,15,56,222,241
2190 .byte 102,15,56,222,249
2191 movups -64(%rcx,%rax,1),%xmm1
2192 addq $32,%rax
2193
2194 .byte 102,15,56,222,208
2195 .byte 102,15,56,222,216
2196 .byte 102,15,56,222,224
2197 .byte 102,15,56,222,232
2198 .byte 102,15,56,222,240
2199 .byte 102,15,56,222,248
2200 movups -80(%rcx,%rax,1),%xmm0
2201 jnz .Lxts_dec_loop6
2202
2203 movdqa (%r8),%xmm8
2204 movdqa %xmm9,%xmm14
2205 paddd %xmm9,%xmm9
2206 .byte 102,15,56,222,209
2207 paddq %xmm15,%xmm15
2208 psrad $31,%xmm14
2209 .byte 102,15,56,222,217
2210 pand %xmm8,%xmm14
2211 movups (%r11),%xmm10
2212 .byte 102,15,56,222,225
2213 .byte 102,15,56,222,233
2214 .byte 102,15,56,222,241
2215 pxor %xmm14,%xmm15
2216 movaps %xmm10,%xmm11
2217 .byte 102,15,56,222,249
2218 movups -64(%rcx),%xmm1
2219
2220 movdqa %xmm9,%xmm14
2221 .byte 102,15,56,222,208
2222 paddd %xmm9,%xmm9
2223 pxor %xmm15,%xmm10
2224 .byte 102,15,56,222,216
2225 psrad $31,%xmm14
2226 paddq %xmm15,%xmm15
2227 .byte 102,15,56,222,224
2228 .byte 102,15,56,222,232
2229 pand %xmm8,%xmm14
2230 movaps %xmm11,%xmm12
2231 .byte 102,15,56,222,240
2232 pxor %xmm14,%xmm15
2233 movdqa %xmm9,%xmm14
2234 .byte 102,15,56,222,248
2235 movups -48(%rcx),%xmm0
2236
2237 paddd %xmm9,%xmm9
2238 .byte 102,15,56,222,209
2239 pxor %xmm15,%xmm11
2240 psrad $31,%xmm14
2241 .byte 102,15,56,222,217
2242 paddq %xmm15,%xmm15
2243 pand %xmm8,%xmm14
2244 .byte 102,15,56,222,225
2245 .byte 102,15,56,222,233
2246 movdqa %xmm13,48(%rsp)
2247 pxor %xmm14,%xmm15
2248 .byte 102,15,56,222,241
2249 movaps %xmm12,%xmm13
2250 movdqa %xmm9,%xmm14
2251 .byte 102,15,56,222,249
2252 movups -32(%rcx),%xmm1
2253
2254 paddd %xmm9,%xmm9
2255 .byte 102,15,56,222,208
2256 pxor %xmm15,%xmm12
2257 psrad $31,%xmm14
2258 .byte 102,15,56,222,216
2259 paddq %xmm15,%xmm15
2260 pand %xmm8,%xmm14
2261 .byte 102,15,56,222,224
2262 .byte 102,15,56,222,232
2263 .byte 102,15,56,222,240
2264 pxor %xmm14,%xmm15
2265 movaps %xmm13,%xmm14
2266 .byte 102,15,56,222,248
2267
2268 movdqa %xmm9,%xmm0
2269 paddd %xmm9,%xmm9
2270 .byte 102,15,56,222,209
2271 pxor %xmm15,%xmm13
2272 psrad $31,%xmm0
2273 .byte 102,15,56,222,217
2274 paddq %xmm15,%xmm15
2275 pand %xmm8,%xmm0
2276 .byte 102,15,56,222,225
2277 .byte 102,15,56,222,233
2278 pxor %xmm0,%xmm15
2279 movups (%r11),%xmm0
2280 .byte 102,15,56,222,241
2281 .byte 102,15,56,222,249
2282 movups 16(%r11),%xmm1
2283
2284 pxor %xmm15,%xmm14
2285 .byte 102,15,56,223,84,36,0
2286 psrad $31,%xmm9
2287 paddq %xmm15,%xmm15
2288 .byte 102,15,56,223,92,36,16
2289 .byte 102,15,56,223,100,36,32
2290 pand %xmm8,%xmm9
2291 movq %r10,%rax
2292 .byte 102,15,56,223,108,36,48
2293 .byte 102,15,56,223,116,36,64
2294 .byte 102,15,56,223,124,36,80
2295 pxor %xmm9,%xmm15
2296
2297 leaq 96(%rsi),%rsi
2298 movups %xmm2,-96(%rsi)
2299 movups %xmm3,-80(%rsi)
2300 movups %xmm4,-64(%rsi)
2301 movups %xmm5,-48(%rsi)
2302 movups %xmm6,-32(%rsi)
2303 movups %xmm7,-16(%rsi)
2304 subq $96,%rdx
2305 jnc .Lxts_dec_grandloop
2306
2307 movl $16+96,%eax
2308 subl %r10d,%eax
2309 movq %r11,%rcx
2310 shrl $4,%eax
2311
2312 .Lxts_dec_short:
2313
2314 movl %eax,%r10d
2315 pxor %xmm0,%xmm10
2316 pxor %xmm0,%xmm11
2317 addq $96,%rdx
2318 jz .Lxts_dec_done
2319
2320 pxor %xmm0,%xmm12
2321 cmpq $0x20,%rdx
2322 jb .Lxts_dec_one
2323 pxor %xmm0,%xmm13
2324 je .Lxts_dec_two
2325
2326 pxor %xmm0,%xmm14
2327 cmpq $0x40,%rdx
2328 jb .Lxts_dec_three
2329 je .Lxts_dec_four
2330
2331 movdqu (%rdi),%xmm2
2332 movdqu 16(%rdi),%xmm3
2333 movdqu 32(%rdi),%xmm4
2334 pxor %xmm10,%xmm2
2335 movdqu 48(%rdi),%xmm5
2336 pxor %xmm11,%xmm3
2337 movdqu 64(%rdi),%xmm6
2338 leaq 80(%rdi),%rdi
2339 pxor %xmm12,%xmm4
2340 pxor %xmm13,%xmm5
2341 pxor %xmm14,%xmm6
2342
2343 call _aesni_decrypt6
2344
2345 xorps %xmm10,%xmm2
2346 xorps %xmm11,%xmm3
2347 xorps %xmm12,%xmm4
2348 movdqu %xmm2,(%rsi)
2349 xorps %xmm13,%xmm5
2350 movdqu %xmm3,16(%rsi)
2351 xorps %xmm14,%xmm6
2352 movdqu %xmm4,32(%rsi)
2353 pxor %xmm14,%xmm14
2354 movdqu %xmm5,48(%rsi)
2355 pcmpgtd %xmm15,%xmm14
2356 movdqu %xmm6,64(%rsi)
2357 leaq 80(%rsi),%rsi
2358 pshufd $0x13,%xmm14,%xmm11
2359 andq $15,%r9
2360 jz .Lxts_dec_ret
2361
2362 movdqa %xmm15,%xmm10
2363 paddq %xmm15,%xmm15
2364 pand %xmm8,%xmm11
2365 pxor %xmm15,%xmm11
2366 jmp .Lxts_dec_done2
2367
2368 .align 16
2369 .Lxts_dec_one:
2370 movups (%rdi),%xmm2
2371 leaq 16(%rdi),%rdi
2372 xorps %xmm10,%xmm2
2373 movups (%rcx),%xmm0
2374 movups 16(%rcx),%xmm1
2375 leaq 32(%rcx),%rcx
2376 xorps %xmm0,%xmm2
2377 .Loop_dec1_12:
2378 .byte 102,15,56,222,209
2379 decl %eax
2380 movups (%rcx),%xmm1
2381 leaq 16(%rcx),%rcx
2382 jnz .Loop_dec1_12
2383 .byte 102,15,56,223,209
2384 xorps %xmm10,%xmm2
2385 movdqa %xmm11,%xmm10
2386 movups %xmm2,(%rsi)
2387 movdqa %xmm12,%xmm11
2388 leaq 16(%rsi),%rsi
2389 jmp .Lxts_dec_done
2390
2391 .align 16
2392 .Lxts_dec_two:
2393 movups (%rdi),%xmm2
2394 movups 16(%rdi),%xmm3
2395 leaq 32(%rdi),%rdi
2396 xorps %xmm10,%xmm2
2397 xorps %xmm11,%xmm3
2398
2399 call _aesni_decrypt2
2400
2401 xorps %xmm10,%xmm2
2402 movdqa %xmm12,%xmm10
2403 xorps %xmm11,%xmm3
2404 movdqa %xmm13,%xmm11
2405 movups %xmm2,(%rsi)
2406 movups %xmm3,16(%rsi)
2407 leaq 32(%rsi),%rsi
2408 jmp .Lxts_dec_done
2409
2410 .align 16
2411 .Lxts_dec_three:
2412 movups (%rdi),%xmm2
2413 movups 16(%rdi),%xmm3
2414 movups 32(%rdi),%xmm4
2415 leaq 48(%rdi),%rdi
2416 xorps %xmm10,%xmm2
2417 xorps %xmm11,%xmm3
2418 xorps %xmm12,%xmm4
2419
2420 call _aesni_decrypt3
2421
2422 xorps %xmm10,%xmm2
2423 movdqa %xmm13,%xmm10
2424 xorps %xmm11,%xmm3
2425 movdqa %xmm14,%xmm11
2426 xorps %xmm12,%xmm4
2427 movups %xmm2,(%rsi)
2428 movups %xmm3,16(%rsi)
2429 movups %xmm4,32(%rsi)
2430 leaq 48(%rsi),%rsi
2431 jmp .Lxts_dec_done
2432
2433 .align 16
2434 .Lxts_dec_four:
2435 movups (%rdi),%xmm2
2436 movups 16(%rdi),%xmm3
2437 movups 32(%rdi),%xmm4
2438 xorps %xmm10,%xmm2
2439 movups 48(%rdi),%xmm5
2440 leaq 64(%rdi),%rdi
2441 xorps %xmm11,%xmm3
2442 xorps %xmm12,%xmm4
2443 xorps %xmm13,%xmm5
2444
2445 call _aesni_decrypt4
2446
2447 pxor %xmm10,%xmm2
2448 movdqa %xmm14,%xmm10
2449 pxor %xmm11,%xmm3
2450 movdqa %xmm15,%xmm11
2451 pxor %xmm12,%xmm4
2452 movdqu %xmm2,(%rsi)
2453 pxor %xmm13,%xmm5
2454 movdqu %xmm3,16(%rsi)
2455 movdqu %xmm4,32(%rsi)
2456 movdqu %xmm5,48(%rsi)
2457 leaq 64(%rsi),%rsi
2458 jmp .Lxts_dec_done
2459
2460 .align 16
2461 .Lxts_dec_done:
2462 andq $15,%r9
2463 jz .Lxts_dec_ret
2464 .Lxts_dec_done2:
2465 movq %r9,%rdx
2466 movq %r11,%rcx
2467 movl %r10d,%eax
2468
2469 movups (%rdi),%xmm2
2470 xorps %xmm11,%xmm2
2471 movups (%rcx),%xmm0
2472 movups 16(%rcx),%xmm1
2473 leaq 32(%rcx),%rcx
2474 xorps %xmm0,%xmm2
2475 .Loop_dec1_13:
2476 .byte 102,15,56,222,209
2477 decl %eax
2478 movups (%rcx),%xmm1
2479 leaq 16(%rcx),%rcx
2480 jnz .Loop_dec1_13
2481 .byte 102,15,56,223,209
2482 xorps %xmm11,%xmm2
2483 movups %xmm2,(%rsi)
2484
2485 .Lxts_dec_steal:
2486 movzbl 16(%rdi),%eax
2487 movzbl (%rsi),%ecx
2488 leaq 1(%rdi),%rdi
2489 movb %al,(%rsi)
2490 movb %cl,16(%rsi)
2491 leaq 1(%rsi),%rsi
2492 subq $1,%rdx
2493 jnz .Lxts_dec_steal
2494
2495 subq %r9,%rsi
2496 movq %r11,%rcx
2497 movl %r10d,%eax
2498
2499 movups (%rsi),%xmm2
2500 xorps %xmm10,%xmm2
2501 movups (%rcx),%xmm0
2502 movups 16(%rcx),%xmm1
2503 leaq 32(%rcx),%rcx
2504 xorps %xmm0,%xmm2
2505 .Loop_dec1_14:
2506 .byte 102,15,56,222,209
2507 decl %eax
2508 movups (%rcx),%xmm1
2509 leaq 16(%rcx),%rcx
2510 jnz .Loop_dec1_14
2511 .byte 102,15,56,223,209
2512 xorps %xmm10,%xmm2
2513 movups %xmm2,(%rsi)
2514
2515 .Lxts_dec_ret:
2516 xorps %xmm0,%xmm0
2517 pxor %xmm1,%xmm1
2518 pxor %xmm2,%xmm2
2519 pxor %xmm3,%xmm3
2520 pxor %xmm4,%xmm4
2521 pxor %xmm5,%xmm5
2522 pxor %xmm6,%xmm6
2523 pxor %xmm7,%xmm7
2524 movaps %xmm0,0(%rsp)
2525 pxor %xmm8,%xmm8
2526 movaps %xmm0,16(%rsp)
2527 pxor %xmm9,%xmm9
2528 movaps %xmm0,32(%rsp)
2529 pxor %xmm10,%xmm10
2530 movaps %xmm0,48(%rsp)
2531 pxor %xmm11,%xmm11
2532 movaps %xmm0,64(%rsp)
2533 pxor %xmm12,%xmm12
2534 movaps %xmm0,80(%rsp)
2535 pxor %xmm13,%xmm13
2536 movaps %xmm0,96(%rsp)
2537 pxor %xmm14,%xmm14
2538 pxor %xmm15,%xmm15
2539 leaq (%rbp),%rsp
2540 popq %rbp
2541 .Lxts_dec_epilogue:
2542 .byte 0xf3,0xc3
2543 .size aesni_xts_decrypt,.-aesni_xts_decrypt
2544 .globl aesni_cbc_encrypt
2545 .hidden aesni_cbc_encrypt
2546 .type aesni_cbc_encrypt,@function
2547 .align 16
2548 aesni_cbc_encrypt:
2549 testq %rdx,%rdx
2550 jz .Lcbc_ret
2551
2552 movl 240(%rcx),%r10d
2553 movq %rcx,%r11
2554 testl %r9d,%r9d
2555 jz .Lcbc_decrypt
2556
2557 movups (%r8),%xmm2
2558 movl %r10d,%eax
2559 cmpq $16,%rdx
2560 jb .Lcbc_enc_tail
2561 subq $16,%rdx
2562 jmp .Lcbc_enc_loop
2563 .align 16
2564 .Lcbc_enc_loop:
2565 movups (%rdi),%xmm3
2566 leaq 16(%rdi),%rdi
2567
2568 movups (%rcx),%xmm0
2569 movups 16(%rcx),%xmm1
2570 xorps %xmm0,%xmm3
2571 leaq 32(%rcx),%rcx
2572 xorps %xmm3,%xmm2
2573 .Loop_enc1_15:
2574 .byte 102,15,56,220,209
2575 decl %eax
2576 movups (%rcx),%xmm1
2577 leaq 16(%rcx),%rcx
2578 jnz .Loop_enc1_15
2579 .byte 102,15,56,221,209
2580 movl %r10d,%eax
2581 movq %r11,%rcx
2582 movups %xmm2,0(%rsi)
2583 leaq 16(%rsi),%rsi
2584 subq $16,%rdx
2585 jnc .Lcbc_enc_loop
2586 addq $16,%rdx
2587 jnz .Lcbc_enc_tail
2588 pxor %xmm0,%xmm0
2589 pxor %xmm1,%xmm1
2590 movups %xmm2,(%r8)
2591 pxor %xmm2,%xmm2
2592 pxor %xmm3,%xmm3
2593 jmp .Lcbc_ret
2594
2595 .Lcbc_enc_tail:
2596 movq %rdx,%rcx
2597 xchgq %rdi,%rsi
2598 .long 0x9066A4F3
2599 movl $16,%ecx
2600 subq %rdx,%rcx
2601 xorl %eax,%eax
2602 .long 0x9066AAF3
2603 leaq -16(%rdi),%rdi
2604 movl %r10d,%eax
2605 movq %rdi,%rsi
2606 movq %r11,%rcx
2607 xorq %rdx,%rdx
2608 jmp .Lcbc_enc_loop
2609
2610 .align 16
2611 .Lcbc_decrypt:
2612 cmpq $16,%rdx
2613 jne .Lcbc_decrypt_bulk
2614
2615
2616
2617 movdqu (%rdi),%xmm2
2618 movdqu (%r8),%xmm3
2619 movdqa %xmm2,%xmm4
2620 movups (%rcx),%xmm0
2621 movups 16(%rcx),%xmm1
2622 leaq 32(%rcx),%rcx
2623 xorps %xmm0,%xmm2
2624 .Loop_dec1_16:
2625 .byte 102,15,56,222,209
2626 decl %r10d
2627 movups (%rcx),%xmm1
2628 leaq 16(%rcx),%rcx
2629 jnz .Loop_dec1_16
2630 .byte 102,15,56,223,209
2631 pxor %xmm0,%xmm0
2632 pxor %xmm1,%xmm1
2633 movdqu %xmm4,(%r8)
2634 xorps %xmm3,%xmm2
2635 pxor %xmm3,%xmm3
2636 movups %xmm2,(%rsi)
2637 pxor %xmm2,%xmm2
2638 jmp .Lcbc_ret
2639 .align 16
2640 .Lcbc_decrypt_bulk:
2641 leaq (%rsp),%rax
2642 pushq %rbp
2643 subq $16,%rsp
2644 andq $-16,%rsp
2645 leaq -8(%rax),%rbp
2646 movups (%r8),%xmm10
2647 movl %r10d,%eax
2648 cmpq $0x50,%rdx
2649 jbe .Lcbc_dec_tail
2650
2651 movups (%rcx),%xmm0
2652 movdqu 0(%rdi),%xmm2
2653 movdqu 16(%rdi),%xmm3
2654 movdqa %xmm2,%xmm11
2655 movdqu 32(%rdi),%xmm4
2656 movdqa %xmm3,%xmm12
2657 movdqu 48(%rdi),%xmm5
2658 movdqa %xmm4,%xmm13
2659 movdqu 64(%rdi),%xmm6
2660 movdqa %xmm5,%xmm14
2661 movdqu 80(%rdi),%xmm7
2662 movdqa %xmm6,%xmm15
2663 movl OPENSSL_ia32cap_P+4(%rip),%r9d
2664 cmpq $0x70,%rdx
2665 jbe .Lcbc_dec_six_or_seven
2666
2667 andl $71303168,%r9d
2668 subq $0x50,%rdx
2669 cmpl $4194304,%r9d
2670 je .Lcbc_dec_loop6_enter
2671 subq $0x20,%rdx
2672 leaq 112(%rcx),%rcx
2673 jmp .Lcbc_dec_loop8_enter
2674 .align 16
2675 .Lcbc_dec_loop8:
2676 movups %xmm9,(%rsi)
2677 leaq 16(%rsi),%rsi
2678 .Lcbc_dec_loop8_enter:
2679 movdqu 96(%rdi),%xmm8
2680 pxor %xmm0,%xmm2
2681 movdqu 112(%rdi),%xmm9
2682 pxor %xmm0,%xmm3
2683 movups 16-112(%rcx),%xmm1
2684 pxor %xmm0,%xmm4
2685 xorq %r11,%r11
2686 cmpq $0x70,%rdx
2687 pxor %xmm0,%xmm5
2688 pxor %xmm0,%xmm6
2689 pxor %xmm0,%xmm7
2690 pxor %xmm0,%xmm8
2691
2692 .byte 102,15,56,222,209
2693 pxor %xmm0,%xmm9
2694 movups 32-112(%rcx),%xmm0
2695 .byte 102,15,56,222,217
2696 .byte 102,15,56,222,225
2697 .byte 102,15,56,222,233
2698 .byte 102,15,56,222,241
2699 .byte 102,15,56,222,249
2700 .byte 102,68,15,56,222,193
2701 setnc %r11b
2702 shlq $7,%r11
2703 .byte 102,68,15,56,222,201
2704 addq %rdi,%r11
2705 movups 48-112(%rcx),%xmm1
2706 .byte 102,15,56,222,208
2707 .byte 102,15,56,222,216
2708 .byte 102,15,56,222,224
2709 .byte 102,15,56,222,232
2710 .byte 102,15,56,222,240
2711 .byte 102,15,56,222,248
2712 .byte 102,68,15,56,222,192
2713 .byte 102,68,15,56,222,200
2714 movups 64-112(%rcx),%xmm0
2715 nop
2716 .byte 102,15,56,222,209
2717 .byte 102,15,56,222,217
2718 .byte 102,15,56,222,225
2719 .byte 102,15,56,222,233
2720 .byte 102,15,56,222,241
2721 .byte 102,15,56,222,249
2722 .byte 102,68,15,56,222,193
2723 .byte 102,68,15,56,222,201
2724 movups 80-112(%rcx),%xmm1
2725 nop
2726 .byte 102,15,56,222,208
2727 .byte 102,15,56,222,216
2728 .byte 102,15,56,222,224
2729 .byte 102,15,56,222,232
2730 .byte 102,15,56,222,240
2731 .byte 102,15,56,222,248
2732 .byte 102,68,15,56,222,192
2733 .byte 102,68,15,56,222,200
2734 movups 96-112(%rcx),%xmm0
2735 nop
2736 .byte 102,15,56,222,209
2737 .byte 102,15,56,222,217
2738 .byte 102,15,56,222,225
2739 .byte 102,15,56,222,233
2740 .byte 102,15,56,222,241
2741 .byte 102,15,56,222,249
2742 .byte 102,68,15,56,222,193
2743 .byte 102,68,15,56,222,201
2744 movups 112-112(%rcx),%xmm1
2745 nop
2746 .byte 102,15,56,222,208
2747 .byte 102,15,56,222,216
2748 .byte 102,15,56,222,224
2749 .byte 102,15,56,222,232
2750 .byte 102,15,56,222,240
2751 .byte 102,15,56,222,248
2752 .byte 102,68,15,56,222,192
2753 .byte 102,68,15,56,222,200
2754 movups 128-112(%rcx),%xmm0
2755 nop
2756 .byte 102,15,56,222,209
2757 .byte 102,15,56,222,217
2758 .byte 102,15,56,222,225
2759 .byte 102,15,56,222,233
2760 .byte 102,15,56,222,241
2761 .byte 102,15,56,222,249
2762 .byte 102,68,15,56,222,193
2763 .byte 102,68,15,56,222,201
2764 movups 144-112(%rcx),%xmm1
2765 cmpl $11,%eax
2766 .byte 102,15,56,222,208
2767 .byte 102,15,56,222,216
2768 .byte 102,15,56,222,224
2769 .byte 102,15,56,222,232
2770 .byte 102,15,56,222,240
2771 .byte 102,15,56,222,248
2772 .byte 102,68,15,56,222,192
2773 .byte 102,68,15,56,222,200
2774 movups 160-112(%rcx),%xmm0
2775 jb .Lcbc_dec_done
2776 .byte 102,15,56,222,209
2777 .byte 102,15,56,222,217
2778 .byte 102,15,56,222,225
2779 .byte 102,15,56,222,233
2780 .byte 102,15,56,222,241
2781 .byte 102,15,56,222,249
2782 .byte 102,68,15,56,222,193
2783 .byte 102,68,15,56,222,201
2784 movups 176-112(%rcx),%xmm1
2785 nop
2786 .byte 102,15,56,222,208
2787 .byte 102,15,56,222,216
2788 .byte 102,15,56,222,224
2789 .byte 102,15,56,222,232
2790 .byte 102,15,56,222,240
2791 .byte 102,15,56,222,248
2792 .byte 102,68,15,56,222,192
2793 .byte 102,68,15,56,222,200
2794 movups 192-112(%rcx),%xmm0
2795 je .Lcbc_dec_done
2796 .byte 102,15,56,222,209
2797 .byte 102,15,56,222,217
2798 .byte 102,15,56,222,225
2799 .byte 102,15,56,222,233
2800 .byte 102,15,56,222,241
2801 .byte 102,15,56,222,249
2802 .byte 102,68,15,56,222,193
2803 .byte 102,68,15,56,222,201
2804 movups 208-112(%rcx),%xmm1
2805 nop
2806 .byte 102,15,56,222,208
2807 .byte 102,15,56,222,216
2808 .byte 102,15,56,222,224
2809 .byte 102,15,56,222,232
2810 .byte 102,15,56,222,240
2811 .byte 102,15,56,222,248
2812 .byte 102,68,15,56,222,192
2813 .byte 102,68,15,56,222,200
2814 movups 224-112(%rcx),%xmm0
2815 jmp .Lcbc_dec_done
2816 .align 16
2817 .Lcbc_dec_done:
2818 .byte 102,15,56,222,209
2819 .byte 102,15,56,222,217
2820 pxor %xmm0,%xmm10
2821 pxor %xmm0,%xmm11
2822 .byte 102,15,56,222,225
2823 .byte 102,15,56,222,233
2824 pxor %xmm0,%xmm12
2825 pxor %xmm0,%xmm13
2826 .byte 102,15,56,222,241
2827 .byte 102,15,56,222,249
2828 pxor %xmm0,%xmm14
2829 pxor %xmm0,%xmm15
2830 .byte 102,68,15,56,222,193
2831 .byte 102,68,15,56,222,201
2832 movdqu 80(%rdi),%xmm1
2833
2834 .byte 102,65,15,56,223,210
2835 movdqu 96(%rdi),%xmm10
2836 pxor %xmm0,%xmm1
2837 .byte 102,65,15,56,223,219
2838 pxor %xmm0,%xmm10
2839 movdqu 112(%rdi),%xmm0
2840 .byte 102,65,15,56,223,228
2841 leaq 128(%rdi),%rdi
2842 movdqu 0(%r11),%xmm11
2843 .byte 102,65,15,56,223,237
2844 .byte 102,65,15,56,223,246
2845 movdqu 16(%r11),%xmm12
2846 movdqu 32(%r11),%xmm13
2847 .byte 102,65,15,56,223,255
2848 .byte 102,68,15,56,223,193
2849 movdqu 48(%r11),%xmm14
2850 movdqu 64(%r11),%xmm15
2851 .byte 102,69,15,56,223,202
2852 movdqa %xmm0,%xmm10
2853 movdqu 80(%r11),%xmm1
2854 movups -112(%rcx),%xmm0
2855
2856 movups %xmm2,(%rsi)
2857 movdqa %xmm11,%xmm2
2858 movups %xmm3,16(%rsi)
2859 movdqa %xmm12,%xmm3
2860 movups %xmm4,32(%rsi)
2861 movdqa %xmm13,%xmm4
2862 movups %xmm5,48(%rsi)
2863 movdqa %xmm14,%xmm5
2864 movups %xmm6,64(%rsi)
2865 movdqa %xmm15,%xmm6
2866 movups %xmm7,80(%rsi)
2867 movdqa %xmm1,%xmm7
2868 movups %xmm8,96(%rsi)
2869 leaq 112(%rsi),%rsi
2870
2871 subq $0x80,%rdx
2872 ja .Lcbc_dec_loop8
2873
2874 movaps %xmm9,%xmm2
2875 leaq -112(%rcx),%rcx
2876 addq $0x70,%rdx
2877 jle .Lcbc_dec_clear_tail_collected
2878 movups %xmm9,(%rsi)
2879 leaq 16(%rsi),%rsi
2880 cmpq $0x50,%rdx
2881 jbe .Lcbc_dec_tail
2882
2883 movaps %xmm11,%xmm2
2884 .Lcbc_dec_six_or_seven:
2885 cmpq $0x60,%rdx
2886 ja .Lcbc_dec_seven
2887
2888 movaps %xmm7,%xmm8
2889 call _aesni_decrypt6
2890 pxor %xmm10,%xmm2
2891 movaps %xmm8,%xmm10
2892 pxor %xmm11,%xmm3
2893 movdqu %xmm2,(%rsi)
2894 pxor %xmm12,%xmm4
2895 movdqu %xmm3,16(%rsi)
2896 pxor %xmm3,%xmm3
2897 pxor %xmm13,%xmm5
2898 movdqu %xmm4,32(%rsi)
2899 pxor %xmm4,%xmm4
2900 pxor %xmm14,%xmm6
2901 movdqu %xmm5,48(%rsi)
2902 pxor %xmm5,%xmm5
2903 pxor %xmm15,%xmm7
2904 movdqu %xmm6,64(%rsi)
2905 pxor %xmm6,%xmm6
2906 leaq 80(%rsi),%rsi
2907 movdqa %xmm7,%xmm2
2908 pxor %xmm7,%xmm7
2909 jmp .Lcbc_dec_tail_collected
2910
2911 .align 16
2912 .Lcbc_dec_seven:
2913 movups 96(%rdi),%xmm8
2914 xorps %xmm9,%xmm9
2915 call _aesni_decrypt8
2916 movups 80(%rdi),%xmm9
2917 pxor %xmm10,%xmm2
2918 movups 96(%rdi),%xmm10
2919 pxor %xmm11,%xmm3
2920 movdqu %xmm2,(%rsi)
2921 pxor %xmm12,%xmm4
2922 movdqu %xmm3,16(%rsi)
2923 pxor %xmm3,%xmm3
2924 pxor %xmm13,%xmm5
2925 movdqu %xmm4,32(%rsi)
2926 pxor %xmm4,%xmm4
2927 pxor %xmm14,%xmm6
2928 movdqu %xmm5,48(%rsi)
2929 pxor %xmm5,%xmm5
2930 pxor %xmm15,%xmm7
2931 movdqu %xmm6,64(%rsi)
2932 pxor %xmm6,%xmm6
2933 pxor %xmm9,%xmm8
2934 movdqu %xmm7,80(%rsi)
2935 pxor %xmm7,%xmm7
2936 leaq 96(%rsi),%rsi
2937 movdqa %xmm8,%xmm2
2938 pxor %xmm8,%xmm8
2939 pxor %xmm9,%xmm9
2940 jmp .Lcbc_dec_tail_collected
2941
2942 .align 16
2943 .Lcbc_dec_loop6:
2944 movups %xmm7,(%rsi)
2945 leaq 16(%rsi),%rsi
2946 movdqu 0(%rdi),%xmm2
2947 movdqu 16(%rdi),%xmm3
2948 movdqa %xmm2,%xmm11
2949 movdqu 32(%rdi),%xmm4
2950 movdqa %xmm3,%xmm12
2951 movdqu 48(%rdi),%xmm5
2952 movdqa %xmm4,%xmm13
2953 movdqu 64(%rdi),%xmm6
2954 movdqa %xmm5,%xmm14
2955 movdqu 80(%rdi),%xmm7
2956 movdqa %xmm6,%xmm15
2957 .Lcbc_dec_loop6_enter:
2958 leaq 96(%rdi),%rdi
2959 movdqa %xmm7,%xmm8
2960
2961 call _aesni_decrypt6
2962
2963 pxor %xmm10,%xmm2
2964 movdqa %xmm8,%xmm10
2965 pxor %xmm11,%xmm3
2966 movdqu %xmm2,(%rsi)
2967 pxor %xmm12,%xmm4
2968 movdqu %xmm3,16(%rsi)
2969 pxor %xmm13,%xmm5
2970 movdqu %xmm4,32(%rsi)
2971 pxor %xmm14,%xmm6
2972 movq %r11,%rcx
2973 movdqu %xmm5,48(%rsi)
2974 pxor %xmm15,%xmm7
2975 movl %r10d,%eax
2976 movdqu %xmm6,64(%rsi)
2977 leaq 80(%rsi),%rsi
2978 subq $0x60,%rdx
2979 ja .Lcbc_dec_loop6
2980
2981 movdqa %xmm7,%xmm2
2982 addq $0x50,%rdx
2983 jle .Lcbc_dec_clear_tail_collected
2984 movups %xmm7,(%rsi)
2985 leaq 16(%rsi),%rsi
2986
2987 .Lcbc_dec_tail:
2988 movups (%rdi),%xmm2
2989 subq $0x10,%rdx
2990 jbe .Lcbc_dec_one
2991
2992 movups 16(%rdi),%xmm3
2993 movaps %xmm2,%xmm11
2994 subq $0x10,%rdx
2995 jbe .Lcbc_dec_two
2996
2997 movups 32(%rdi),%xmm4
2998 movaps %xmm3,%xmm12
2999 subq $0x10,%rdx
3000 jbe .Lcbc_dec_three
3001
3002 movups 48(%rdi),%xmm5
3003 movaps %xmm4,%xmm13
3004 subq $0x10,%rdx
3005 jbe .Lcbc_dec_four
3006
3007 movups 64(%rdi),%xmm6
3008 movaps %xmm5,%xmm14
3009 movaps %xmm6,%xmm15
3010 xorps %xmm7,%xmm7
3011 call _aesni_decrypt6
3012 pxor %xmm10,%xmm2
3013 movaps %xmm15,%xmm10
3014 pxor %xmm11,%xmm3
3015 movdqu %xmm2,(%rsi)
3016 pxor %xmm12,%xmm4
3017 movdqu %xmm3,16(%rsi)
3018 pxor %xmm3,%xmm3
3019 pxor %xmm13,%xmm5
3020 movdqu %xmm4,32(%rsi)
3021 pxor %xmm4,%xmm4
3022 pxor %xmm14,%xmm6
3023 movdqu %xmm5,48(%rsi)
3024 pxor %xmm5,%xmm5
3025 leaq 64(%rsi),%rsi
3026 movdqa %xmm6,%xmm2
3027 pxor %xmm6,%xmm6
3028 pxor %xmm7,%xmm7
3029 subq $0x10,%rdx
3030 jmp .Lcbc_dec_tail_collected
3031
3032 .align 16
3033 .Lcbc_dec_one:
3034 movaps %xmm2,%xmm11
3035 movups (%rcx),%xmm0
3036 movups 16(%rcx),%xmm1
3037 leaq 32(%rcx),%rcx
3038 xorps %xmm0,%xmm2
3039 .Loop_dec1_17:
3040 .byte 102,15,56,222,209
3041 decl %eax
3042 movups (%rcx),%xmm1
3043 leaq 16(%rcx),%rcx
3044 jnz .Loop_dec1_17
3045 .byte 102,15,56,223,209
3046 xorps %xmm10,%xmm2
3047 movaps %xmm11,%xmm10
3048 jmp .Lcbc_dec_tail_collected
3049 .align 16
3050 .Lcbc_dec_two:
3051 movaps %xmm3,%xmm12
3052 call _aesni_decrypt2
3053 pxor %xmm10,%xmm2
3054 movaps %xmm12,%xmm10
3055 pxor %xmm11,%xmm3
3056 movdqu %xmm2,(%rsi)
3057 movdqa %xmm3,%xmm2
3058 pxor %xmm3,%xmm3
3059 leaq 16(%rsi),%rsi
3060 jmp .Lcbc_dec_tail_collected
3061 .align 16
3062 .Lcbc_dec_three:
3063 movaps %xmm4,%xmm13
3064 call _aesni_decrypt3
3065 pxor %xmm10,%xmm2
3066 movaps %xmm13,%xmm10
3067 pxor %xmm11,%xmm3
3068 movdqu %xmm2,(%rsi)
3069 pxor %xmm12,%xmm4
3070 movdqu %xmm3,16(%rsi)
3071 pxor %xmm3,%xmm3
3072 movdqa %xmm4,%xmm2
3073 pxor %xmm4,%xmm4
3074 leaq 32(%rsi),%rsi
3075 jmp .Lcbc_dec_tail_collected
3076 .align 16
3077 .Lcbc_dec_four:
3078 movaps %xmm5,%xmm14
3079 call _aesni_decrypt4
3080 pxor %xmm10,%xmm2
3081 movaps %xmm14,%xmm10
3082 pxor %xmm11,%xmm3
3083 movdqu %xmm2,(%rsi)
3084 pxor %xmm12,%xmm4
3085 movdqu %xmm3,16(%rsi)
3086 pxor %xmm3,%xmm3
3087 pxor %xmm13,%xmm5
3088 movdqu %xmm4,32(%rsi)
3089 pxor %xmm4,%xmm4
3090 movdqa %xmm5,%xmm2
3091 pxor %xmm5,%xmm5
3092 leaq 48(%rsi),%rsi
3093 jmp .Lcbc_dec_tail_collected
3094
3095 .align 16
3096 .Lcbc_dec_clear_tail_collected:
3097 pxor %xmm3,%xmm3
3098 pxor %xmm4,%xmm4
3099 pxor %xmm5,%xmm5
3100 pxor %xmm6,%xmm6
3101 pxor %xmm7,%xmm7
3102 pxor %xmm8,%xmm8
3103 pxor %xmm9,%xmm9
3104 .Lcbc_dec_tail_collected:
3105 movups %xmm10,(%r8)
3106 andq $15,%rdx
3107 jnz .Lcbc_dec_tail_partial
3108 movups %xmm2,(%rsi)
3109 pxor %xmm2,%xmm2
3110 jmp .Lcbc_dec_ret
3111 .align 16
3112 .Lcbc_dec_tail_partial:
3113 movaps %xmm2,(%rsp)
3114 pxor %xmm2,%xmm2
3115 movq $16,%rcx
3116 movq %rsi,%rdi
3117 subq %rdx,%rcx
3118 leaq (%rsp),%rsi
3119 .long 0x9066A4F3
3120 movdqa %xmm2,(%rsp)
3121
3122 .Lcbc_dec_ret:
3123 xorps %xmm0,%xmm0
3124 pxor %xmm1,%xmm1
3125 leaq (%rbp),%rsp
3126 popq %rbp
3127 .Lcbc_ret:
3128 .byte 0xf3,0xc3
3129 .size aesni_cbc_encrypt,.-aesni_cbc_encrypt
3130 .globl aesni_set_decrypt_key
3131 .hidden aesni_set_decrypt_key
3132 .type aesni_set_decrypt_key,@function
3133 .align 16
3134 aesni_set_decrypt_key:
3135 .byte 0x48,0x83,0xEC,0x08
3136 call __aesni_set_encrypt_key
3137 shll $4,%esi
3138 testl %eax,%eax
3139 jnz .Ldec_key_ret
3140 leaq 16(%rdx,%rsi,1),%rdi
3141
3142 movups (%rdx),%xmm0
3143 movups (%rdi),%xmm1
3144 movups %xmm0,(%rdi)
3145 movups %xmm1,(%rdx)
3146 leaq 16(%rdx),%rdx
3147 leaq -16(%rdi),%rdi
3148
3149 .Ldec_key_inverse:
3150 movups (%rdx),%xmm0
3151 movups (%rdi),%xmm1
3152 .byte 102,15,56,219,192
3153 .byte 102,15,56,219,201
3154 leaq 16(%rdx),%rdx
3155 leaq -16(%rdi),%rdi
3156 movups %xmm0,16(%rdi)
3157 movups %xmm1,-16(%rdx)
3158 cmpq %rdx,%rdi
3159 ja .Ldec_key_inverse
3160
3161 movups (%rdx),%xmm0
3162 .byte 102,15,56,219,192
3163 pxor %xmm1,%xmm1
3164 movups %xmm0,(%rdi)
3165 pxor %xmm0,%xmm0
3166 .Ldec_key_ret:
3167 addq $8,%rsp
3168 .byte 0xf3,0xc3
3169 .LSEH_end_set_decrypt_key:
3170 .size aesni_set_decrypt_key,.-aesni_set_decrypt_key
3171 .globl aesni_set_encrypt_key
3172 .hidden aesni_set_encrypt_key
3173 .type aesni_set_encrypt_key,@function
3174 .align 16
3175 aesni_set_encrypt_key:
3176 __aesni_set_encrypt_key:
3177 .byte 0x48,0x83,0xEC,0x08
3178 movq $-1,%rax
3179 testq %rdi,%rdi
3180 jz .Lenc_key_ret
3181 testq %rdx,%rdx
3182 jz .Lenc_key_ret
3183
3184 movl $268437504,%r10d
3185 movups (%rdi),%xmm0
3186 xorps %xmm4,%xmm4
3187 andl OPENSSL_ia32cap_P+4(%rip),%r10d
3188 leaq 16(%rdx),%rax
3189 cmpl $256,%esi
3190 je .L14rounds
3191 cmpl $192,%esi
3192 je .L12rounds
3193 cmpl $128,%esi
3194 jne .Lbad_keybits
3195
3196 .L10rounds:
3197 movl $9,%esi
3198 cmpl $268435456,%r10d
3199 je .L10rounds_alt
3200
3201 movups %xmm0,(%rdx)
3202 .byte 102,15,58,223,200,1
3203 call .Lkey_expansion_128_cold
3204 .byte 102,15,58,223,200,2
3205 call .Lkey_expansion_128
3206 .byte 102,15,58,223,200,4
3207 call .Lkey_expansion_128
3208 .byte 102,15,58,223,200,8
3209 call .Lkey_expansion_128
3210 .byte 102,15,58,223,200,16
3211 call .Lkey_expansion_128
3212 .byte 102,15,58,223,200,32
3213 call .Lkey_expansion_128
3214 .byte 102,15,58,223,200,64
3215 call .Lkey_expansion_128
3216 .byte 102,15,58,223,200,128
3217 call .Lkey_expansion_128
3218 .byte 102,15,58,223,200,27
3219 call .Lkey_expansion_128
3220 .byte 102,15,58,223,200,54
3221 call .Lkey_expansion_128
3222 movups %xmm0,(%rax)
3223 movl %esi,80(%rax)
3224 xorl %eax,%eax
3225 jmp .Lenc_key_ret
3226
3227 .align 16
3228 .L10rounds_alt:
3229 movdqa .Lkey_rotate(%rip),%xmm5
3230 movl $8,%r10d
3231 movdqa .Lkey_rcon1(%rip),%xmm4
3232 movdqa %xmm0,%xmm2
3233 movdqu %xmm0,(%rdx)
3234 jmp .Loop_key128
3235
3236 .align 16
3237 .Loop_key128:
3238 .byte 102,15,56,0,197
3239 .byte 102,15,56,221,196
3240 pslld $1,%xmm4
3241 leaq 16(%rax),%rax
3242
3243 movdqa %xmm2,%xmm3
3244 pslldq $4,%xmm2
3245 pxor %xmm2,%xmm3
3246 pslldq $4,%xmm2
3247 pxor %xmm2,%xmm3
3248 pslldq $4,%xmm2
3249 pxor %xmm3,%xmm2
3250
3251 pxor %xmm2,%xmm0
3252 movdqu %xmm0,-16(%rax)
3253 movdqa %xmm0,%xmm2
3254
3255 decl %r10d
3256 jnz .Loop_key128
3257
3258 movdqa .Lkey_rcon1b(%rip),%xmm4
3259
3260 .byte 102,15,56,0,197
3261 .byte 102,15,56,221,196
3262 pslld $1,%xmm4
3263
3264 movdqa %xmm2,%xmm3
3265 pslldq $4,%xmm2
3266 pxor %xmm2,%xmm3
3267 pslldq $4,%xmm2
3268 pxor %xmm2,%xmm3
3269 pslldq $4,%xmm2
3270 pxor %xmm3,%xmm2
3271
3272 pxor %xmm2,%xmm0
3273 movdqu %xmm0,(%rax)
3274
3275 movdqa %xmm0,%xmm2
3276 .byte 102,15,56,0,197
3277 .byte 102,15,56,221,196
3278
3279 movdqa %xmm2,%xmm3
3280 pslldq $4,%xmm2
3281 pxor %xmm2,%xmm3
3282 pslldq $4,%xmm2
3283 pxor %xmm2,%xmm3
3284 pslldq $4,%xmm2
3285 pxor %xmm3,%xmm2
3286
3287 pxor %xmm2,%xmm0
3288 movdqu %xmm0,16(%rax)
3289
3290 movl %esi,96(%rax)
3291 xorl %eax,%eax
3292 jmp .Lenc_key_ret
3293
3294 .align 16
3295 .L12rounds:
3296 movq 16(%rdi),%xmm2
3297 movl $11,%esi
3298 cmpl $268435456,%r10d
3299 je .L12rounds_alt
3300
3301 movups %xmm0,(%rdx)
3302 .byte 102,15,58,223,202,1
3303 call .Lkey_expansion_192a_cold
3304 .byte 102,15,58,223,202,2
3305 call .Lkey_expansion_192b
3306 .byte 102,15,58,223,202,4
3307 call .Lkey_expansion_192a
3308 .byte 102,15,58,223,202,8
3309 call .Lkey_expansion_192b
3310 .byte 102,15,58,223,202,16
3311 call .Lkey_expansion_192a
3312 .byte 102,15,58,223,202,32
3313 call .Lkey_expansion_192b
3314 .byte 102,15,58,223,202,64
3315 call .Lkey_expansion_192a
3316 .byte 102,15,58,223,202,128
3317 call .Lkey_expansion_192b
3318 movups %xmm0,(%rax)
3319 movl %esi,48(%rax)
3320 xorq %rax,%rax
3321 jmp .Lenc_key_ret
3322
3323 .align 16
3324 .L12rounds_alt:
3325 movdqa .Lkey_rotate192(%rip),%xmm5
3326 movdqa .Lkey_rcon1(%rip),%xmm4
3327 movl $8,%r10d
3328 movdqu %xmm0,(%rdx)
3329 jmp .Loop_key192
3330
3331 .align 16
3332 .Loop_key192:
3333 movq %xmm2,0(%rax)
3334 movdqa %xmm2,%xmm1
3335 .byte 102,15,56,0,213
3336 .byte 102,15,56,221,212
3337 pslld $1,%xmm4
3338 leaq 24(%rax),%rax
3339
3340 movdqa %xmm0,%xmm3
3341 pslldq $4,%xmm0
3342 pxor %xmm0,%xmm3
3343 pslldq $4,%xmm0
3344 pxor %xmm0,%xmm3
3345 pslldq $4,%xmm0
3346 pxor %xmm3,%xmm0
3347
3348 pshufd $0xff,%xmm0,%xmm3
3349 pxor %xmm1,%xmm3
3350 pslldq $4,%xmm1
3351 pxor %xmm1,%xmm3
3352
3353 pxor %xmm2,%xmm0
3354 pxor %xmm3,%xmm2
3355 movdqu %xmm0,-16(%rax)
3356
3357 decl %r10d
3358 jnz .Loop_key192
3359
3360 movl %esi,32(%rax)
3361 xorl %eax,%eax
3362 jmp .Lenc_key_ret
3363
3364 .align 16
3365 .L14rounds:
3366 movups 16(%rdi),%xmm2
3367 movl $13,%esi
3368 leaq 16(%rax),%rax
3369 cmpl $268435456,%r10d
3370 je .L14rounds_alt
3371
3372 movups %xmm0,(%rdx)
3373 movups %xmm2,16(%rdx)
3374 .byte 102,15,58,223,202,1
3375 call .Lkey_expansion_256a_cold
3376 .byte 102,15,58,223,200,1
3377 call .Lkey_expansion_256b
3378 .byte 102,15,58,223,202,2
3379 call .Lkey_expansion_256a
3380 .byte 102,15,58,223,200,2
3381 call .Lkey_expansion_256b
3382 .byte 102,15,58,223,202,4
3383 call .Lkey_expansion_256a
3384 .byte 102,15,58,223,200,4
3385 call .Lkey_expansion_256b
3386 .byte 102,15,58,223,202,8
3387 call .Lkey_expansion_256a
3388 .byte 102,15,58,223,200,8
3389 call .Lkey_expansion_256b
3390 .byte 102,15,58,223,202,16
3391 call .Lkey_expansion_256a
3392 .byte 102,15,58,223,200,16
3393 call .Lkey_expansion_256b
3394 .byte 102,15,58,223,202,32
3395 call .Lkey_expansion_256a
3396 .byte 102,15,58,223,200,32
3397 call .Lkey_expansion_256b
3398 .byte 102,15,58,223,202,64
3399 call .Lkey_expansion_256a
3400 movups %xmm0,(%rax)
3401 movl %esi,16(%rax)
3402 xorq %rax,%rax
3403 jmp .Lenc_key_ret
3404
3405 .align 16
3406 .L14rounds_alt:
3407 movdqa .Lkey_rotate(%rip),%xmm5
3408 movdqa .Lkey_rcon1(%rip),%xmm4
3409 movl $7,%r10d
3410 movdqu %xmm0,0(%rdx)
3411 movdqa %xmm2,%xmm1
3412 movdqu %xmm2,16(%rdx)
3413 jmp .Loop_key256
3414
3415 .align 16
3416 .Loop_key256:
3417 .byte 102,15,56,0,213
3418 .byte 102,15,56,221,212
3419
3420 movdqa %xmm0,%xmm3
3421 pslldq $4,%xmm0
3422 pxor %xmm0,%xmm3
3423 pslldq $4,%xmm0
3424 pxor %xmm0,%xmm3
3425 pslldq $4,%xmm0
3426 pxor %xmm3,%xmm0
3427 pslld $1,%xmm4
3428
3429 pxor %xmm2,%xmm0
3430 movdqu %xmm0,(%rax)
3431
3432 decl %r10d
3433 jz .Ldone_key256
3434
3435 pshufd $0xff,%xmm0,%xmm2
3436 pxor %xmm3,%xmm3
3437 .byte 102,15,56,221,211
3438
3439 movdqa %xmm1,%xmm3
3440 pslldq $4,%xmm1
3441 pxor %xmm1,%xmm3
3442 pslldq $4,%xmm1
3443 pxor %xmm1,%xmm3
3444 pslldq $4,%xmm1
3445 pxor %xmm3,%xmm1
3446
3447 pxor %xmm1,%xmm2
3448 movdqu %xmm2,16(%rax)
3449 leaq 32(%rax),%rax
3450 movdqa %xmm2,%xmm1
3451
3452 jmp .Loop_key256
3453
3454 .Ldone_key256:
3455 movl %esi,16(%rax)
3456 xorl %eax,%eax
3457 jmp .Lenc_key_ret
3458
3459 .align 16
3460 .Lbad_keybits:
3461 movq $-2,%rax
3462 .Lenc_key_ret:
3463 pxor %xmm0,%xmm0
3464 pxor %xmm1,%xmm1
3465 pxor %xmm2,%xmm2
3466 pxor %xmm3,%xmm3
3467 pxor %xmm4,%xmm4
3468 pxor %xmm5,%xmm5
3469 addq $8,%rsp
3470 .byte 0xf3,0xc3
3471 .LSEH_end_set_encrypt_key:
3472
3473 .align 16
3474 .Lkey_expansion_128:
3475 movups %xmm0,(%rax)
3476 leaq 16(%rax),%rax
3477 .Lkey_expansion_128_cold:
3478 shufps $16,%xmm0,%xmm4
3479 xorps %xmm4,%xmm0
3480 shufps $140,%xmm0,%xmm4
3481 xorps %xmm4,%xmm0
3482 shufps $255,%xmm1,%xmm1
3483 xorps %xmm1,%xmm0
3484 .byte 0xf3,0xc3
3485
3486 .align 16
3487 .Lkey_expansion_192a:
3488 movups %xmm0,(%rax)
3489 leaq 16(%rax),%rax
3490 .Lkey_expansion_192a_cold:
3491 movaps %xmm2,%xmm5
3492 .Lkey_expansion_192b_warm:
3493 shufps $16,%xmm0,%xmm4
3494 movdqa %xmm2,%xmm3
3495 xorps %xmm4,%xmm0
3496 shufps $140,%xmm0,%xmm4
3497 pslldq $4,%xmm3
3498 xorps %xmm4,%xmm0
3499 pshufd $85,%xmm1,%xmm1
3500 pxor %xmm3,%xmm2
3501 pxor %xmm1,%xmm0
3502 pshufd $255,%xmm0,%xmm3
3503 pxor %xmm3,%xmm2
3504 .byte 0xf3,0xc3
3505
3506 .align 16
3507 .Lkey_expansion_192b:
3508 movaps %xmm0,%xmm3
3509 shufps $68,%xmm0,%xmm5
3510 movups %xmm5,(%rax)
3511 shufps $78,%xmm2,%xmm3
3512 movups %xmm3,16(%rax)
3513 leaq 32(%rax),%rax
3514 jmp .Lkey_expansion_192b_warm
3515
3516 .align 16
3517 .Lkey_expansion_256a:
3518 movups %xmm2,(%rax)
3519 leaq 16(%rax),%rax
3520 .Lkey_expansion_256a_cold:
3521 shufps $16,%xmm0,%xmm4
3522 xorps %xmm4,%xmm0
3523 shufps $140,%xmm0,%xmm4
3524 xorps %xmm4,%xmm0
3525 shufps $255,%xmm1,%xmm1
3526 xorps %xmm1,%xmm0
3527 .byte 0xf3,0xc3
3528
3529 .align 16
3530 .Lkey_expansion_256b:
3531 movups %xmm0,(%rax)
3532 leaq 16(%rax),%rax
3533
3534 shufps $16,%xmm2,%xmm4
3535 xorps %xmm4,%xmm2
3536 shufps $140,%xmm2,%xmm4
3537 xorps %xmm4,%xmm2
3538 shufps $170,%xmm1,%xmm1
3539 xorps %xmm1,%xmm2
3540 .byte 0xf3,0xc3
3541 .size aesni_set_encrypt_key,.-aesni_set_encrypt_key
3542 .size __aesni_set_encrypt_key,.-__aesni_set_encrypt_key
3543 .align 64
3544 .Lbswap_mask:
3545 .byte 15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0
3546 .Lincrement32:
3547 .long 6,6,6,0
3548 .Lincrement64:
3549 .long 1,0,0,0
3550 .Lxts_magic:
3551 .long 0x87,0,1,0
3552 .Lincrement1:
3553 .byte 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1
3554 .Lkey_rotate:
3555 .long 0x0c0f0e0d,0x0c0f0e0d,0x0c0f0e0d,0x0c0f0e0d
3556 .Lkey_rotate192:
3557 .long 0x04070605,0x04070605,0x04070605,0x04070605
3558 .Lkey_rcon1:
3559 .long 1,1,1,1
3560 .Lkey_rcon1b:
3561 .long 0x1b,0x1b,0x1b,0x1b
3562
3563 .byte 65,69,83,32,102,111,114,32,73,110,116,101,108,32,65,69,83,45,78,73,44,32 ,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101 ,110,115,115,108,46,111,114,103,62,0
3564 .align 64
3565 #endif
OLDNEW

Powered by Google App Engine
This is Rietveld 408576698