Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(288)

Side by Side Diff: third_party/boringssl/mac-x86_64/crypto/aes/aesni-x86_64.S

Issue 2354623003: Pull boringssl generated source from boringssl_gen (Closed)
Patch Set: . Created 4 years, 3 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
OLDNEW
(Empty)
1 #if defined(__x86_64__)
2 .text
3
4 .globl _aesni_encrypt
5 .private_extern _aesni_encrypt
6
7 .p2align 4
8 _aesni_encrypt:
9 movups (%rdi),%xmm2
10 movl 240(%rdx),%eax
11 movups (%rdx),%xmm0
12 movups 16(%rdx),%xmm1
13 leaq 32(%rdx),%rdx
14 xorps %xmm0,%xmm2
15 L$oop_enc1_1:
16 .byte 102,15,56,220,209
17 decl %eax
18 movups (%rdx),%xmm1
19 leaq 16(%rdx),%rdx
20 jnz L$oop_enc1_1
21 .byte 102,15,56,221,209
22 pxor %xmm0,%xmm0
23 pxor %xmm1,%xmm1
24 movups %xmm2,(%rsi)
25 pxor %xmm2,%xmm2
26 .byte 0xf3,0xc3
27
28
29 .globl _aesni_decrypt
30 .private_extern _aesni_decrypt
31
32 .p2align 4
33 _aesni_decrypt:
34 movups (%rdi),%xmm2
35 movl 240(%rdx),%eax
36 movups (%rdx),%xmm0
37 movups 16(%rdx),%xmm1
38 leaq 32(%rdx),%rdx
39 xorps %xmm0,%xmm2
40 L$oop_dec1_2:
41 .byte 102,15,56,222,209
42 decl %eax
43 movups (%rdx),%xmm1
44 leaq 16(%rdx),%rdx
45 jnz L$oop_dec1_2
46 .byte 102,15,56,223,209
47 pxor %xmm0,%xmm0
48 pxor %xmm1,%xmm1
49 movups %xmm2,(%rsi)
50 pxor %xmm2,%xmm2
51 .byte 0xf3,0xc3
52
53
54 .p2align 4
55 _aesni_encrypt2:
56 movups (%rcx),%xmm0
57 shll $4,%eax
58 movups 16(%rcx),%xmm1
59 xorps %xmm0,%xmm2
60 xorps %xmm0,%xmm3
61 movups 32(%rcx),%xmm0
62 leaq 32(%rcx,%rax,1),%rcx
63 negq %rax
64 addq $16,%rax
65
66 L$enc_loop2:
67 .byte 102,15,56,220,209
68 .byte 102,15,56,220,217
69 movups (%rcx,%rax,1),%xmm1
70 addq $32,%rax
71 .byte 102,15,56,220,208
72 .byte 102,15,56,220,216
73 movups -16(%rcx,%rax,1),%xmm0
74 jnz L$enc_loop2
75
76 .byte 102,15,56,220,209
77 .byte 102,15,56,220,217
78 .byte 102,15,56,221,208
79 .byte 102,15,56,221,216
80 .byte 0xf3,0xc3
81
82
83 .p2align 4
84 _aesni_decrypt2:
85 movups (%rcx),%xmm0
86 shll $4,%eax
87 movups 16(%rcx),%xmm1
88 xorps %xmm0,%xmm2
89 xorps %xmm0,%xmm3
90 movups 32(%rcx),%xmm0
91 leaq 32(%rcx,%rax,1),%rcx
92 negq %rax
93 addq $16,%rax
94
95 L$dec_loop2:
96 .byte 102,15,56,222,209
97 .byte 102,15,56,222,217
98 movups (%rcx,%rax,1),%xmm1
99 addq $32,%rax
100 .byte 102,15,56,222,208
101 .byte 102,15,56,222,216
102 movups -16(%rcx,%rax,1),%xmm0
103 jnz L$dec_loop2
104
105 .byte 102,15,56,222,209
106 .byte 102,15,56,222,217
107 .byte 102,15,56,223,208
108 .byte 102,15,56,223,216
109 .byte 0xf3,0xc3
110
111
112 .p2align 4
113 _aesni_encrypt3:
114 movups (%rcx),%xmm0
115 shll $4,%eax
116 movups 16(%rcx),%xmm1
117 xorps %xmm0,%xmm2
118 xorps %xmm0,%xmm3
119 xorps %xmm0,%xmm4
120 movups 32(%rcx),%xmm0
121 leaq 32(%rcx,%rax,1),%rcx
122 negq %rax
123 addq $16,%rax
124
125 L$enc_loop3:
126 .byte 102,15,56,220,209
127 .byte 102,15,56,220,217
128 .byte 102,15,56,220,225
129 movups (%rcx,%rax,1),%xmm1
130 addq $32,%rax
131 .byte 102,15,56,220,208
132 .byte 102,15,56,220,216
133 .byte 102,15,56,220,224
134 movups -16(%rcx,%rax,1),%xmm0
135 jnz L$enc_loop3
136
137 .byte 102,15,56,220,209
138 .byte 102,15,56,220,217
139 .byte 102,15,56,220,225
140 .byte 102,15,56,221,208
141 .byte 102,15,56,221,216
142 .byte 102,15,56,221,224
143 .byte 0xf3,0xc3
144
145
146 .p2align 4
147 _aesni_decrypt3:
148 movups (%rcx),%xmm0
149 shll $4,%eax
150 movups 16(%rcx),%xmm1
151 xorps %xmm0,%xmm2
152 xorps %xmm0,%xmm3
153 xorps %xmm0,%xmm4
154 movups 32(%rcx),%xmm0
155 leaq 32(%rcx,%rax,1),%rcx
156 negq %rax
157 addq $16,%rax
158
159 L$dec_loop3:
160 .byte 102,15,56,222,209
161 .byte 102,15,56,222,217
162 .byte 102,15,56,222,225
163 movups (%rcx,%rax,1),%xmm1
164 addq $32,%rax
165 .byte 102,15,56,222,208
166 .byte 102,15,56,222,216
167 .byte 102,15,56,222,224
168 movups -16(%rcx,%rax,1),%xmm0
169 jnz L$dec_loop3
170
171 .byte 102,15,56,222,209
172 .byte 102,15,56,222,217
173 .byte 102,15,56,222,225
174 .byte 102,15,56,223,208
175 .byte 102,15,56,223,216
176 .byte 102,15,56,223,224
177 .byte 0xf3,0xc3
178
179
180 .p2align 4
181 _aesni_encrypt4:
182 movups (%rcx),%xmm0
183 shll $4,%eax
184 movups 16(%rcx),%xmm1
185 xorps %xmm0,%xmm2
186 xorps %xmm0,%xmm3
187 xorps %xmm0,%xmm4
188 xorps %xmm0,%xmm5
189 movups 32(%rcx),%xmm0
190 leaq 32(%rcx,%rax,1),%rcx
191 negq %rax
192 .byte 0x0f,0x1f,0x00
193 addq $16,%rax
194
195 L$enc_loop4:
196 .byte 102,15,56,220,209
197 .byte 102,15,56,220,217
198 .byte 102,15,56,220,225
199 .byte 102,15,56,220,233
200 movups (%rcx,%rax,1),%xmm1
201 addq $32,%rax
202 .byte 102,15,56,220,208
203 .byte 102,15,56,220,216
204 .byte 102,15,56,220,224
205 .byte 102,15,56,220,232
206 movups -16(%rcx,%rax,1),%xmm0
207 jnz L$enc_loop4
208
209 .byte 102,15,56,220,209
210 .byte 102,15,56,220,217
211 .byte 102,15,56,220,225
212 .byte 102,15,56,220,233
213 .byte 102,15,56,221,208
214 .byte 102,15,56,221,216
215 .byte 102,15,56,221,224
216 .byte 102,15,56,221,232
217 .byte 0xf3,0xc3
218
219
220 .p2align 4
221 _aesni_decrypt4:
222 movups (%rcx),%xmm0
223 shll $4,%eax
224 movups 16(%rcx),%xmm1
225 xorps %xmm0,%xmm2
226 xorps %xmm0,%xmm3
227 xorps %xmm0,%xmm4
228 xorps %xmm0,%xmm5
229 movups 32(%rcx),%xmm0
230 leaq 32(%rcx,%rax,1),%rcx
231 negq %rax
232 .byte 0x0f,0x1f,0x00
233 addq $16,%rax
234
235 L$dec_loop4:
236 .byte 102,15,56,222,209
237 .byte 102,15,56,222,217
238 .byte 102,15,56,222,225
239 .byte 102,15,56,222,233
240 movups (%rcx,%rax,1),%xmm1
241 addq $32,%rax
242 .byte 102,15,56,222,208
243 .byte 102,15,56,222,216
244 .byte 102,15,56,222,224
245 .byte 102,15,56,222,232
246 movups -16(%rcx,%rax,1),%xmm0
247 jnz L$dec_loop4
248
249 .byte 102,15,56,222,209
250 .byte 102,15,56,222,217
251 .byte 102,15,56,222,225
252 .byte 102,15,56,222,233
253 .byte 102,15,56,223,208
254 .byte 102,15,56,223,216
255 .byte 102,15,56,223,224
256 .byte 102,15,56,223,232
257 .byte 0xf3,0xc3
258
259
260 .p2align 4
261 _aesni_encrypt6:
262 movups (%rcx),%xmm0
263 shll $4,%eax
264 movups 16(%rcx),%xmm1
265 xorps %xmm0,%xmm2
266 pxor %xmm0,%xmm3
267 pxor %xmm0,%xmm4
268 .byte 102,15,56,220,209
269 leaq 32(%rcx,%rax,1),%rcx
270 negq %rax
271 .byte 102,15,56,220,217
272 pxor %xmm0,%xmm5
273 pxor %xmm0,%xmm6
274 .byte 102,15,56,220,225
275 pxor %xmm0,%xmm7
276 movups (%rcx,%rax,1),%xmm0
277 addq $16,%rax
278 jmp L$enc_loop6_enter
279 .p2align 4
280 L$enc_loop6:
281 .byte 102,15,56,220,209
282 .byte 102,15,56,220,217
283 .byte 102,15,56,220,225
284 L$enc_loop6_enter:
285 .byte 102,15,56,220,233
286 .byte 102,15,56,220,241
287 .byte 102,15,56,220,249
288 movups (%rcx,%rax,1),%xmm1
289 addq $32,%rax
290 .byte 102,15,56,220,208
291 .byte 102,15,56,220,216
292 .byte 102,15,56,220,224
293 .byte 102,15,56,220,232
294 .byte 102,15,56,220,240
295 .byte 102,15,56,220,248
296 movups -16(%rcx,%rax,1),%xmm0
297 jnz L$enc_loop6
298
299 .byte 102,15,56,220,209
300 .byte 102,15,56,220,217
301 .byte 102,15,56,220,225
302 .byte 102,15,56,220,233
303 .byte 102,15,56,220,241
304 .byte 102,15,56,220,249
305 .byte 102,15,56,221,208
306 .byte 102,15,56,221,216
307 .byte 102,15,56,221,224
308 .byte 102,15,56,221,232
309 .byte 102,15,56,221,240
310 .byte 102,15,56,221,248
311 .byte 0xf3,0xc3
312
313
314 .p2align 4
315 _aesni_decrypt6:
316 movups (%rcx),%xmm0
317 shll $4,%eax
318 movups 16(%rcx),%xmm1
319 xorps %xmm0,%xmm2
320 pxor %xmm0,%xmm3
321 pxor %xmm0,%xmm4
322 .byte 102,15,56,222,209
323 leaq 32(%rcx,%rax,1),%rcx
324 negq %rax
325 .byte 102,15,56,222,217
326 pxor %xmm0,%xmm5
327 pxor %xmm0,%xmm6
328 .byte 102,15,56,222,225
329 pxor %xmm0,%xmm7
330 movups (%rcx,%rax,1),%xmm0
331 addq $16,%rax
332 jmp L$dec_loop6_enter
333 .p2align 4
334 L$dec_loop6:
335 .byte 102,15,56,222,209
336 .byte 102,15,56,222,217
337 .byte 102,15,56,222,225
338 L$dec_loop6_enter:
339 .byte 102,15,56,222,233
340 .byte 102,15,56,222,241
341 .byte 102,15,56,222,249
342 movups (%rcx,%rax,1),%xmm1
343 addq $32,%rax
344 .byte 102,15,56,222,208
345 .byte 102,15,56,222,216
346 .byte 102,15,56,222,224
347 .byte 102,15,56,222,232
348 .byte 102,15,56,222,240
349 .byte 102,15,56,222,248
350 movups -16(%rcx,%rax,1),%xmm0
351 jnz L$dec_loop6
352
353 .byte 102,15,56,222,209
354 .byte 102,15,56,222,217
355 .byte 102,15,56,222,225
356 .byte 102,15,56,222,233
357 .byte 102,15,56,222,241
358 .byte 102,15,56,222,249
359 .byte 102,15,56,223,208
360 .byte 102,15,56,223,216
361 .byte 102,15,56,223,224
362 .byte 102,15,56,223,232
363 .byte 102,15,56,223,240
364 .byte 102,15,56,223,248
365 .byte 0xf3,0xc3
366
367
368 .p2align 4
369 _aesni_encrypt8:
370 movups (%rcx),%xmm0
371 shll $4,%eax
372 movups 16(%rcx),%xmm1
373 xorps %xmm0,%xmm2
374 xorps %xmm0,%xmm3
375 pxor %xmm0,%xmm4
376 pxor %xmm0,%xmm5
377 pxor %xmm0,%xmm6
378 leaq 32(%rcx,%rax,1),%rcx
379 negq %rax
380 .byte 102,15,56,220,209
381 pxor %xmm0,%xmm7
382 pxor %xmm0,%xmm8
383 .byte 102,15,56,220,217
384 pxor %xmm0,%xmm9
385 movups (%rcx,%rax,1),%xmm0
386 addq $16,%rax
387 jmp L$enc_loop8_inner
388 .p2align 4
389 L$enc_loop8:
390 .byte 102,15,56,220,209
391 .byte 102,15,56,220,217
392 L$enc_loop8_inner:
393 .byte 102,15,56,220,225
394 .byte 102,15,56,220,233
395 .byte 102,15,56,220,241
396 .byte 102,15,56,220,249
397 .byte 102,68,15,56,220,193
398 .byte 102,68,15,56,220,201
399 L$enc_loop8_enter:
400 movups (%rcx,%rax,1),%xmm1
401 addq $32,%rax
402 .byte 102,15,56,220,208
403 .byte 102,15,56,220,216
404 .byte 102,15,56,220,224
405 .byte 102,15,56,220,232
406 .byte 102,15,56,220,240
407 .byte 102,15,56,220,248
408 .byte 102,68,15,56,220,192
409 .byte 102,68,15,56,220,200
410 movups -16(%rcx,%rax,1),%xmm0
411 jnz L$enc_loop8
412
413 .byte 102,15,56,220,209
414 .byte 102,15,56,220,217
415 .byte 102,15,56,220,225
416 .byte 102,15,56,220,233
417 .byte 102,15,56,220,241
418 .byte 102,15,56,220,249
419 .byte 102,68,15,56,220,193
420 .byte 102,68,15,56,220,201
421 .byte 102,15,56,221,208
422 .byte 102,15,56,221,216
423 .byte 102,15,56,221,224
424 .byte 102,15,56,221,232
425 .byte 102,15,56,221,240
426 .byte 102,15,56,221,248
427 .byte 102,68,15,56,221,192
428 .byte 102,68,15,56,221,200
429 .byte 0xf3,0xc3
430
431
432 .p2align 4
433 _aesni_decrypt8:
434 movups (%rcx),%xmm0
435 shll $4,%eax
436 movups 16(%rcx),%xmm1
437 xorps %xmm0,%xmm2
438 xorps %xmm0,%xmm3
439 pxor %xmm0,%xmm4
440 pxor %xmm0,%xmm5
441 pxor %xmm0,%xmm6
442 leaq 32(%rcx,%rax,1),%rcx
443 negq %rax
444 .byte 102,15,56,222,209
445 pxor %xmm0,%xmm7
446 pxor %xmm0,%xmm8
447 .byte 102,15,56,222,217
448 pxor %xmm0,%xmm9
449 movups (%rcx,%rax,1),%xmm0
450 addq $16,%rax
451 jmp L$dec_loop8_inner
452 .p2align 4
453 L$dec_loop8:
454 .byte 102,15,56,222,209
455 .byte 102,15,56,222,217
456 L$dec_loop8_inner:
457 .byte 102,15,56,222,225
458 .byte 102,15,56,222,233
459 .byte 102,15,56,222,241
460 .byte 102,15,56,222,249
461 .byte 102,68,15,56,222,193
462 .byte 102,68,15,56,222,201
463 L$dec_loop8_enter:
464 movups (%rcx,%rax,1),%xmm1
465 addq $32,%rax
466 .byte 102,15,56,222,208
467 .byte 102,15,56,222,216
468 .byte 102,15,56,222,224
469 .byte 102,15,56,222,232
470 .byte 102,15,56,222,240
471 .byte 102,15,56,222,248
472 .byte 102,68,15,56,222,192
473 .byte 102,68,15,56,222,200
474 movups -16(%rcx,%rax,1),%xmm0
475 jnz L$dec_loop8
476
477 .byte 102,15,56,222,209
478 .byte 102,15,56,222,217
479 .byte 102,15,56,222,225
480 .byte 102,15,56,222,233
481 .byte 102,15,56,222,241
482 .byte 102,15,56,222,249
483 .byte 102,68,15,56,222,193
484 .byte 102,68,15,56,222,201
485 .byte 102,15,56,223,208
486 .byte 102,15,56,223,216
487 .byte 102,15,56,223,224
488 .byte 102,15,56,223,232
489 .byte 102,15,56,223,240
490 .byte 102,15,56,223,248
491 .byte 102,68,15,56,223,192
492 .byte 102,68,15,56,223,200
493 .byte 0xf3,0xc3
494
495 .globl _aesni_ecb_encrypt
496 .private_extern _aesni_ecb_encrypt
497
498 .p2align 4
499 _aesni_ecb_encrypt:
500 andq $-16,%rdx
501 jz L$ecb_ret
502
503 movl 240(%rcx),%eax
504 movups (%rcx),%xmm0
505 movq %rcx,%r11
506 movl %eax,%r10d
507 testl %r8d,%r8d
508 jz L$ecb_decrypt
509
510 cmpq $0x80,%rdx
511 jb L$ecb_enc_tail
512
513 movdqu (%rdi),%xmm2
514 movdqu 16(%rdi),%xmm3
515 movdqu 32(%rdi),%xmm4
516 movdqu 48(%rdi),%xmm5
517 movdqu 64(%rdi),%xmm6
518 movdqu 80(%rdi),%xmm7
519 movdqu 96(%rdi),%xmm8
520 movdqu 112(%rdi),%xmm9
521 leaq 128(%rdi),%rdi
522 subq $0x80,%rdx
523 jmp L$ecb_enc_loop8_enter
524 .p2align 4
525 L$ecb_enc_loop8:
526 movups %xmm2,(%rsi)
527 movq %r11,%rcx
528 movdqu (%rdi),%xmm2
529 movl %r10d,%eax
530 movups %xmm3,16(%rsi)
531 movdqu 16(%rdi),%xmm3
532 movups %xmm4,32(%rsi)
533 movdqu 32(%rdi),%xmm4
534 movups %xmm5,48(%rsi)
535 movdqu 48(%rdi),%xmm5
536 movups %xmm6,64(%rsi)
537 movdqu 64(%rdi),%xmm6
538 movups %xmm7,80(%rsi)
539 movdqu 80(%rdi),%xmm7
540 movups %xmm8,96(%rsi)
541 movdqu 96(%rdi),%xmm8
542 movups %xmm9,112(%rsi)
543 leaq 128(%rsi),%rsi
544 movdqu 112(%rdi),%xmm9
545 leaq 128(%rdi),%rdi
546 L$ecb_enc_loop8_enter:
547
548 call _aesni_encrypt8
549
550 subq $0x80,%rdx
551 jnc L$ecb_enc_loop8
552
553 movups %xmm2,(%rsi)
554 movq %r11,%rcx
555 movups %xmm3,16(%rsi)
556 movl %r10d,%eax
557 movups %xmm4,32(%rsi)
558 movups %xmm5,48(%rsi)
559 movups %xmm6,64(%rsi)
560 movups %xmm7,80(%rsi)
561 movups %xmm8,96(%rsi)
562 movups %xmm9,112(%rsi)
563 leaq 128(%rsi),%rsi
564 addq $0x80,%rdx
565 jz L$ecb_ret
566
567 L$ecb_enc_tail:
568 movups (%rdi),%xmm2
569 cmpq $0x20,%rdx
570 jb L$ecb_enc_one
571 movups 16(%rdi),%xmm3
572 je L$ecb_enc_two
573 movups 32(%rdi),%xmm4
574 cmpq $0x40,%rdx
575 jb L$ecb_enc_three
576 movups 48(%rdi),%xmm5
577 je L$ecb_enc_four
578 movups 64(%rdi),%xmm6
579 cmpq $0x60,%rdx
580 jb L$ecb_enc_five
581 movups 80(%rdi),%xmm7
582 je L$ecb_enc_six
583 movdqu 96(%rdi),%xmm8
584 xorps %xmm9,%xmm9
585 call _aesni_encrypt8
586 movups %xmm2,(%rsi)
587 movups %xmm3,16(%rsi)
588 movups %xmm4,32(%rsi)
589 movups %xmm5,48(%rsi)
590 movups %xmm6,64(%rsi)
591 movups %xmm7,80(%rsi)
592 movups %xmm8,96(%rsi)
593 jmp L$ecb_ret
594 .p2align 4
595 L$ecb_enc_one:
596 movups (%rcx),%xmm0
597 movups 16(%rcx),%xmm1
598 leaq 32(%rcx),%rcx
599 xorps %xmm0,%xmm2
600 L$oop_enc1_3:
601 .byte 102,15,56,220,209
602 decl %eax
603 movups (%rcx),%xmm1
604 leaq 16(%rcx),%rcx
605 jnz L$oop_enc1_3
606 .byte 102,15,56,221,209
607 movups %xmm2,(%rsi)
608 jmp L$ecb_ret
609 .p2align 4
610 L$ecb_enc_two:
611 call _aesni_encrypt2
612 movups %xmm2,(%rsi)
613 movups %xmm3,16(%rsi)
614 jmp L$ecb_ret
615 .p2align 4
616 L$ecb_enc_three:
617 call _aesni_encrypt3
618 movups %xmm2,(%rsi)
619 movups %xmm3,16(%rsi)
620 movups %xmm4,32(%rsi)
621 jmp L$ecb_ret
622 .p2align 4
623 L$ecb_enc_four:
624 call _aesni_encrypt4
625 movups %xmm2,(%rsi)
626 movups %xmm3,16(%rsi)
627 movups %xmm4,32(%rsi)
628 movups %xmm5,48(%rsi)
629 jmp L$ecb_ret
630 .p2align 4
631 L$ecb_enc_five:
632 xorps %xmm7,%xmm7
633 call _aesni_encrypt6
634 movups %xmm2,(%rsi)
635 movups %xmm3,16(%rsi)
636 movups %xmm4,32(%rsi)
637 movups %xmm5,48(%rsi)
638 movups %xmm6,64(%rsi)
639 jmp L$ecb_ret
640 .p2align 4
641 L$ecb_enc_six:
642 call _aesni_encrypt6
643 movups %xmm2,(%rsi)
644 movups %xmm3,16(%rsi)
645 movups %xmm4,32(%rsi)
646 movups %xmm5,48(%rsi)
647 movups %xmm6,64(%rsi)
648 movups %xmm7,80(%rsi)
649 jmp L$ecb_ret
650
651 .p2align 4
652 L$ecb_decrypt:
653 cmpq $0x80,%rdx
654 jb L$ecb_dec_tail
655
656 movdqu (%rdi),%xmm2
657 movdqu 16(%rdi),%xmm3
658 movdqu 32(%rdi),%xmm4
659 movdqu 48(%rdi),%xmm5
660 movdqu 64(%rdi),%xmm6
661 movdqu 80(%rdi),%xmm7
662 movdqu 96(%rdi),%xmm8
663 movdqu 112(%rdi),%xmm9
664 leaq 128(%rdi),%rdi
665 subq $0x80,%rdx
666 jmp L$ecb_dec_loop8_enter
667 .p2align 4
668 L$ecb_dec_loop8:
669 movups %xmm2,(%rsi)
670 movq %r11,%rcx
671 movdqu (%rdi),%xmm2
672 movl %r10d,%eax
673 movups %xmm3,16(%rsi)
674 movdqu 16(%rdi),%xmm3
675 movups %xmm4,32(%rsi)
676 movdqu 32(%rdi),%xmm4
677 movups %xmm5,48(%rsi)
678 movdqu 48(%rdi),%xmm5
679 movups %xmm6,64(%rsi)
680 movdqu 64(%rdi),%xmm6
681 movups %xmm7,80(%rsi)
682 movdqu 80(%rdi),%xmm7
683 movups %xmm8,96(%rsi)
684 movdqu 96(%rdi),%xmm8
685 movups %xmm9,112(%rsi)
686 leaq 128(%rsi),%rsi
687 movdqu 112(%rdi),%xmm9
688 leaq 128(%rdi),%rdi
689 L$ecb_dec_loop8_enter:
690
691 call _aesni_decrypt8
692
693 movups (%r11),%xmm0
694 subq $0x80,%rdx
695 jnc L$ecb_dec_loop8
696
697 movups %xmm2,(%rsi)
698 pxor %xmm2,%xmm2
699 movq %r11,%rcx
700 movups %xmm3,16(%rsi)
701 pxor %xmm3,%xmm3
702 movl %r10d,%eax
703 movups %xmm4,32(%rsi)
704 pxor %xmm4,%xmm4
705 movups %xmm5,48(%rsi)
706 pxor %xmm5,%xmm5
707 movups %xmm6,64(%rsi)
708 pxor %xmm6,%xmm6
709 movups %xmm7,80(%rsi)
710 pxor %xmm7,%xmm7
711 movups %xmm8,96(%rsi)
712 pxor %xmm8,%xmm8
713 movups %xmm9,112(%rsi)
714 pxor %xmm9,%xmm9
715 leaq 128(%rsi),%rsi
716 addq $0x80,%rdx
717 jz L$ecb_ret
718
719 L$ecb_dec_tail:
720 movups (%rdi),%xmm2
721 cmpq $0x20,%rdx
722 jb L$ecb_dec_one
723 movups 16(%rdi),%xmm3
724 je L$ecb_dec_two
725 movups 32(%rdi),%xmm4
726 cmpq $0x40,%rdx
727 jb L$ecb_dec_three
728 movups 48(%rdi),%xmm5
729 je L$ecb_dec_four
730 movups 64(%rdi),%xmm6
731 cmpq $0x60,%rdx
732 jb L$ecb_dec_five
733 movups 80(%rdi),%xmm7
734 je L$ecb_dec_six
735 movups 96(%rdi),%xmm8
736 movups (%rcx),%xmm0
737 xorps %xmm9,%xmm9
738 call _aesni_decrypt8
739 movups %xmm2,(%rsi)
740 pxor %xmm2,%xmm2
741 movups %xmm3,16(%rsi)
742 pxor %xmm3,%xmm3
743 movups %xmm4,32(%rsi)
744 pxor %xmm4,%xmm4
745 movups %xmm5,48(%rsi)
746 pxor %xmm5,%xmm5
747 movups %xmm6,64(%rsi)
748 pxor %xmm6,%xmm6
749 movups %xmm7,80(%rsi)
750 pxor %xmm7,%xmm7
751 movups %xmm8,96(%rsi)
752 pxor %xmm8,%xmm8
753 pxor %xmm9,%xmm9
754 jmp L$ecb_ret
755 .p2align 4
756 L$ecb_dec_one:
757 movups (%rcx),%xmm0
758 movups 16(%rcx),%xmm1
759 leaq 32(%rcx),%rcx
760 xorps %xmm0,%xmm2
761 L$oop_dec1_4:
762 .byte 102,15,56,222,209
763 decl %eax
764 movups (%rcx),%xmm1
765 leaq 16(%rcx),%rcx
766 jnz L$oop_dec1_4
767 .byte 102,15,56,223,209
768 movups %xmm2,(%rsi)
769 pxor %xmm2,%xmm2
770 jmp L$ecb_ret
771 .p2align 4
772 L$ecb_dec_two:
773 call _aesni_decrypt2
774 movups %xmm2,(%rsi)
775 pxor %xmm2,%xmm2
776 movups %xmm3,16(%rsi)
777 pxor %xmm3,%xmm3
778 jmp L$ecb_ret
779 .p2align 4
780 L$ecb_dec_three:
781 call _aesni_decrypt3
782 movups %xmm2,(%rsi)
783 pxor %xmm2,%xmm2
784 movups %xmm3,16(%rsi)
785 pxor %xmm3,%xmm3
786 movups %xmm4,32(%rsi)
787 pxor %xmm4,%xmm4
788 jmp L$ecb_ret
789 .p2align 4
790 L$ecb_dec_four:
791 call _aesni_decrypt4
792 movups %xmm2,(%rsi)
793 pxor %xmm2,%xmm2
794 movups %xmm3,16(%rsi)
795 pxor %xmm3,%xmm3
796 movups %xmm4,32(%rsi)
797 pxor %xmm4,%xmm4
798 movups %xmm5,48(%rsi)
799 pxor %xmm5,%xmm5
800 jmp L$ecb_ret
801 .p2align 4
802 L$ecb_dec_five:
803 xorps %xmm7,%xmm7
804 call _aesni_decrypt6
805 movups %xmm2,(%rsi)
806 pxor %xmm2,%xmm2
807 movups %xmm3,16(%rsi)
808 pxor %xmm3,%xmm3
809 movups %xmm4,32(%rsi)
810 pxor %xmm4,%xmm4
811 movups %xmm5,48(%rsi)
812 pxor %xmm5,%xmm5
813 movups %xmm6,64(%rsi)
814 pxor %xmm6,%xmm6
815 pxor %xmm7,%xmm7
816 jmp L$ecb_ret
817 .p2align 4
818 L$ecb_dec_six:
819 call _aesni_decrypt6
820 movups %xmm2,(%rsi)
821 pxor %xmm2,%xmm2
822 movups %xmm3,16(%rsi)
823 pxor %xmm3,%xmm3
824 movups %xmm4,32(%rsi)
825 pxor %xmm4,%xmm4
826 movups %xmm5,48(%rsi)
827 pxor %xmm5,%xmm5
828 movups %xmm6,64(%rsi)
829 pxor %xmm6,%xmm6
830 movups %xmm7,80(%rsi)
831 pxor %xmm7,%xmm7
832
833 L$ecb_ret:
834 xorps %xmm0,%xmm0
835 pxor %xmm1,%xmm1
836 .byte 0xf3,0xc3
837
838 .globl _aesni_ccm64_encrypt_blocks
839 .private_extern _aesni_ccm64_encrypt_blocks
840
841 .p2align 4
842 _aesni_ccm64_encrypt_blocks:
843 movl 240(%rcx),%eax
844 movdqu (%r8),%xmm6
845 movdqa L$increment64(%rip),%xmm9
846 movdqa L$bswap_mask(%rip),%xmm7
847
848 shll $4,%eax
849 movl $16,%r10d
850 leaq 0(%rcx),%r11
851 movdqu (%r9),%xmm3
852 movdqa %xmm6,%xmm2
853 leaq 32(%rcx,%rax,1),%rcx
854 .byte 102,15,56,0,247
855 subq %rax,%r10
856 jmp L$ccm64_enc_outer
857 .p2align 4
858 L$ccm64_enc_outer:
859 movups (%r11),%xmm0
860 movq %r10,%rax
861 movups (%rdi),%xmm8
862
863 xorps %xmm0,%xmm2
864 movups 16(%r11),%xmm1
865 xorps %xmm8,%xmm0
866 xorps %xmm0,%xmm3
867 movups 32(%r11),%xmm0
868
869 L$ccm64_enc2_loop:
870 .byte 102,15,56,220,209
871 .byte 102,15,56,220,217
872 movups (%rcx,%rax,1),%xmm1
873 addq $32,%rax
874 .byte 102,15,56,220,208
875 .byte 102,15,56,220,216
876 movups -16(%rcx,%rax,1),%xmm0
877 jnz L$ccm64_enc2_loop
878 .byte 102,15,56,220,209
879 .byte 102,15,56,220,217
880 paddq %xmm9,%xmm6
881 decq %rdx
882 .byte 102,15,56,221,208
883 .byte 102,15,56,221,216
884
885 leaq 16(%rdi),%rdi
886 xorps %xmm2,%xmm8
887 movdqa %xmm6,%xmm2
888 movups %xmm8,(%rsi)
889 .byte 102,15,56,0,215
890 leaq 16(%rsi),%rsi
891 jnz L$ccm64_enc_outer
892
893 pxor %xmm0,%xmm0
894 pxor %xmm1,%xmm1
895 pxor %xmm2,%xmm2
896 movups %xmm3,(%r9)
897 pxor %xmm3,%xmm3
898 pxor %xmm8,%xmm8
899 pxor %xmm6,%xmm6
900 .byte 0xf3,0xc3
901
902 .globl _aesni_ccm64_decrypt_blocks
903 .private_extern _aesni_ccm64_decrypt_blocks
904
905 .p2align 4
906 _aesni_ccm64_decrypt_blocks:
907 movl 240(%rcx),%eax
908 movups (%r8),%xmm6
909 movdqu (%r9),%xmm3
910 movdqa L$increment64(%rip),%xmm9
911 movdqa L$bswap_mask(%rip),%xmm7
912
913 movaps %xmm6,%xmm2
914 movl %eax,%r10d
915 movq %rcx,%r11
916 .byte 102,15,56,0,247
917 movups (%rcx),%xmm0
918 movups 16(%rcx),%xmm1
919 leaq 32(%rcx),%rcx
920 xorps %xmm0,%xmm2
921 L$oop_enc1_5:
922 .byte 102,15,56,220,209
923 decl %eax
924 movups (%rcx),%xmm1
925 leaq 16(%rcx),%rcx
926 jnz L$oop_enc1_5
927 .byte 102,15,56,221,209
928 shll $4,%r10d
929 movl $16,%eax
930 movups (%rdi),%xmm8
931 paddq %xmm9,%xmm6
932 leaq 16(%rdi),%rdi
933 subq %r10,%rax
934 leaq 32(%r11,%r10,1),%rcx
935 movq %rax,%r10
936 jmp L$ccm64_dec_outer
937 .p2align 4
938 L$ccm64_dec_outer:
939 xorps %xmm2,%xmm8
940 movdqa %xmm6,%xmm2
941 movups %xmm8,(%rsi)
942 leaq 16(%rsi),%rsi
943 .byte 102,15,56,0,215
944
945 subq $1,%rdx
946 jz L$ccm64_dec_break
947
948 movups (%r11),%xmm0
949 movq %r10,%rax
950 movups 16(%r11),%xmm1
951 xorps %xmm0,%xmm8
952 xorps %xmm0,%xmm2
953 xorps %xmm8,%xmm3
954 movups 32(%r11),%xmm0
955 jmp L$ccm64_dec2_loop
956 .p2align 4
957 L$ccm64_dec2_loop:
958 .byte 102,15,56,220,209
959 .byte 102,15,56,220,217
960 movups (%rcx,%rax,1),%xmm1
961 addq $32,%rax
962 .byte 102,15,56,220,208
963 .byte 102,15,56,220,216
964 movups -16(%rcx,%rax,1),%xmm0
965 jnz L$ccm64_dec2_loop
966 movups (%rdi),%xmm8
967 paddq %xmm9,%xmm6
968 .byte 102,15,56,220,209
969 .byte 102,15,56,220,217
970 .byte 102,15,56,221,208
971 .byte 102,15,56,221,216
972 leaq 16(%rdi),%rdi
973 jmp L$ccm64_dec_outer
974
975 .p2align 4
976 L$ccm64_dec_break:
977
978 movl 240(%r11),%eax
979 movups (%r11),%xmm0
980 movups 16(%r11),%xmm1
981 xorps %xmm0,%xmm8
982 leaq 32(%r11),%r11
983 xorps %xmm8,%xmm3
984 L$oop_enc1_6:
985 .byte 102,15,56,220,217
986 decl %eax
987 movups (%r11),%xmm1
988 leaq 16(%r11),%r11
989 jnz L$oop_enc1_6
990 .byte 102,15,56,221,217
991 pxor %xmm0,%xmm0
992 pxor %xmm1,%xmm1
993 pxor %xmm2,%xmm2
994 movups %xmm3,(%r9)
995 pxor %xmm3,%xmm3
996 pxor %xmm8,%xmm8
997 pxor %xmm6,%xmm6
998 .byte 0xf3,0xc3
999
1000 .globl _aesni_ctr32_encrypt_blocks
1001 .private_extern _aesni_ctr32_encrypt_blocks
1002
1003 .p2align 4
1004 _aesni_ctr32_encrypt_blocks:
1005 cmpq $1,%rdx
1006 jne L$ctr32_bulk
1007
1008
1009
1010 movups (%r8),%xmm2
1011 movups (%rdi),%xmm3
1012 movl 240(%rcx),%edx
1013 movups (%rcx),%xmm0
1014 movups 16(%rcx),%xmm1
1015 leaq 32(%rcx),%rcx
1016 xorps %xmm0,%xmm2
1017 L$oop_enc1_7:
1018 .byte 102,15,56,220,209
1019 decl %edx
1020 movups (%rcx),%xmm1
1021 leaq 16(%rcx),%rcx
1022 jnz L$oop_enc1_7
1023 .byte 102,15,56,221,209
1024 pxor %xmm0,%xmm0
1025 pxor %xmm1,%xmm1
1026 xorps %xmm3,%xmm2
1027 pxor %xmm3,%xmm3
1028 movups %xmm2,(%rsi)
1029 xorps %xmm2,%xmm2
1030 jmp L$ctr32_epilogue
1031
1032 .p2align 4
1033 L$ctr32_bulk:
1034 leaq (%rsp),%rax
1035 pushq %rbp
1036 subq $128,%rsp
1037 andq $-16,%rsp
1038 leaq -8(%rax),%rbp
1039
1040
1041
1042
1043 movdqu (%r8),%xmm2
1044 movdqu (%rcx),%xmm0
1045 movl 12(%r8),%r8d
1046 pxor %xmm0,%xmm2
1047 movl 12(%rcx),%r11d
1048 movdqa %xmm2,0(%rsp)
1049 bswapl %r8d
1050 movdqa %xmm2,%xmm3
1051 movdqa %xmm2,%xmm4
1052 movdqa %xmm2,%xmm5
1053 movdqa %xmm2,64(%rsp)
1054 movdqa %xmm2,80(%rsp)
1055 movdqa %xmm2,96(%rsp)
1056 movq %rdx,%r10
1057 movdqa %xmm2,112(%rsp)
1058
1059 leaq 1(%r8),%rax
1060 leaq 2(%r8),%rdx
1061 bswapl %eax
1062 bswapl %edx
1063 xorl %r11d,%eax
1064 xorl %r11d,%edx
1065 .byte 102,15,58,34,216,3
1066 leaq 3(%r8),%rax
1067 movdqa %xmm3,16(%rsp)
1068 .byte 102,15,58,34,226,3
1069 bswapl %eax
1070 movq %r10,%rdx
1071 leaq 4(%r8),%r10
1072 movdqa %xmm4,32(%rsp)
1073 xorl %r11d,%eax
1074 bswapl %r10d
1075 .byte 102,15,58,34,232,3
1076 xorl %r11d,%r10d
1077 movdqa %xmm5,48(%rsp)
1078 leaq 5(%r8),%r9
1079 movl %r10d,64+12(%rsp)
1080 bswapl %r9d
1081 leaq 6(%r8),%r10
1082 movl 240(%rcx),%eax
1083 xorl %r11d,%r9d
1084 bswapl %r10d
1085 movl %r9d,80+12(%rsp)
1086 xorl %r11d,%r10d
1087 leaq 7(%r8),%r9
1088 movl %r10d,96+12(%rsp)
1089 bswapl %r9d
1090 movl _OPENSSL_ia32cap_P+4(%rip),%r10d
1091 xorl %r11d,%r9d
1092 andl $71303168,%r10d
1093 movl %r9d,112+12(%rsp)
1094
1095 movups 16(%rcx),%xmm1
1096
1097 movdqa 64(%rsp),%xmm6
1098 movdqa 80(%rsp),%xmm7
1099
1100 cmpq $8,%rdx
1101 jb L$ctr32_tail
1102
1103 subq $6,%rdx
1104 cmpl $4194304,%r10d
1105 je L$ctr32_6x
1106
1107 leaq 128(%rcx),%rcx
1108 subq $2,%rdx
1109 jmp L$ctr32_loop8
1110
1111 .p2align 4
1112 L$ctr32_6x:
1113 shll $4,%eax
1114 movl $48,%r10d
1115 bswapl %r11d
1116 leaq 32(%rcx,%rax,1),%rcx
1117 subq %rax,%r10
1118 jmp L$ctr32_loop6
1119
1120 .p2align 4
1121 L$ctr32_loop6:
1122 addl $6,%r8d
1123 movups -48(%rcx,%r10,1),%xmm0
1124 .byte 102,15,56,220,209
1125 movl %r8d,%eax
1126 xorl %r11d,%eax
1127 .byte 102,15,56,220,217
1128 .byte 0x0f,0x38,0xf1,0x44,0x24,12
1129 leal 1(%r8),%eax
1130 .byte 102,15,56,220,225
1131 xorl %r11d,%eax
1132 .byte 0x0f,0x38,0xf1,0x44,0x24,28
1133 .byte 102,15,56,220,233
1134 leal 2(%r8),%eax
1135 xorl %r11d,%eax
1136 .byte 102,15,56,220,241
1137 .byte 0x0f,0x38,0xf1,0x44,0x24,44
1138 leal 3(%r8),%eax
1139 .byte 102,15,56,220,249
1140 movups -32(%rcx,%r10,1),%xmm1
1141 xorl %r11d,%eax
1142
1143 .byte 102,15,56,220,208
1144 .byte 0x0f,0x38,0xf1,0x44,0x24,60
1145 leal 4(%r8),%eax
1146 .byte 102,15,56,220,216
1147 xorl %r11d,%eax
1148 .byte 0x0f,0x38,0xf1,0x44,0x24,76
1149 .byte 102,15,56,220,224
1150 leal 5(%r8),%eax
1151 xorl %r11d,%eax
1152 .byte 102,15,56,220,232
1153 .byte 0x0f,0x38,0xf1,0x44,0x24,92
1154 movq %r10,%rax
1155 .byte 102,15,56,220,240
1156 .byte 102,15,56,220,248
1157 movups -16(%rcx,%r10,1),%xmm0
1158
1159 call L$enc_loop6
1160
1161 movdqu (%rdi),%xmm8
1162 movdqu 16(%rdi),%xmm9
1163 movdqu 32(%rdi),%xmm10
1164 movdqu 48(%rdi),%xmm11
1165 movdqu 64(%rdi),%xmm12
1166 movdqu 80(%rdi),%xmm13
1167 leaq 96(%rdi),%rdi
1168 movups -64(%rcx,%r10,1),%xmm1
1169 pxor %xmm2,%xmm8
1170 movaps 0(%rsp),%xmm2
1171 pxor %xmm3,%xmm9
1172 movaps 16(%rsp),%xmm3
1173 pxor %xmm4,%xmm10
1174 movaps 32(%rsp),%xmm4
1175 pxor %xmm5,%xmm11
1176 movaps 48(%rsp),%xmm5
1177 pxor %xmm6,%xmm12
1178 movaps 64(%rsp),%xmm6
1179 pxor %xmm7,%xmm13
1180 movaps 80(%rsp),%xmm7
1181 movdqu %xmm8,(%rsi)
1182 movdqu %xmm9,16(%rsi)
1183 movdqu %xmm10,32(%rsi)
1184 movdqu %xmm11,48(%rsi)
1185 movdqu %xmm12,64(%rsi)
1186 movdqu %xmm13,80(%rsi)
1187 leaq 96(%rsi),%rsi
1188
1189 subq $6,%rdx
1190 jnc L$ctr32_loop6
1191
1192 addq $6,%rdx
1193 jz L$ctr32_done
1194
1195 leal -48(%r10),%eax
1196 leaq -80(%rcx,%r10,1),%rcx
1197 negl %eax
1198 shrl $4,%eax
1199 jmp L$ctr32_tail
1200
1201 .p2align 5
1202 L$ctr32_loop8:
1203 addl $8,%r8d
1204 movdqa 96(%rsp),%xmm8
1205 .byte 102,15,56,220,209
1206 movl %r8d,%r9d
1207 movdqa 112(%rsp),%xmm9
1208 .byte 102,15,56,220,217
1209 bswapl %r9d
1210 movups 32-128(%rcx),%xmm0
1211 .byte 102,15,56,220,225
1212 xorl %r11d,%r9d
1213 nop
1214 .byte 102,15,56,220,233
1215 movl %r9d,0+12(%rsp)
1216 leaq 1(%r8),%r9
1217 .byte 102,15,56,220,241
1218 .byte 102,15,56,220,249
1219 .byte 102,68,15,56,220,193
1220 .byte 102,68,15,56,220,201
1221 movups 48-128(%rcx),%xmm1
1222 bswapl %r9d
1223 .byte 102,15,56,220,208
1224 .byte 102,15,56,220,216
1225 xorl %r11d,%r9d
1226 .byte 0x66,0x90
1227 .byte 102,15,56,220,224
1228 .byte 102,15,56,220,232
1229 movl %r9d,16+12(%rsp)
1230 leaq 2(%r8),%r9
1231 .byte 102,15,56,220,240
1232 .byte 102,15,56,220,248
1233 .byte 102,68,15,56,220,192
1234 .byte 102,68,15,56,220,200
1235 movups 64-128(%rcx),%xmm0
1236 bswapl %r9d
1237 .byte 102,15,56,220,209
1238 .byte 102,15,56,220,217
1239 xorl %r11d,%r9d
1240 .byte 0x66,0x90
1241 .byte 102,15,56,220,225
1242 .byte 102,15,56,220,233
1243 movl %r9d,32+12(%rsp)
1244 leaq 3(%r8),%r9
1245 .byte 102,15,56,220,241
1246 .byte 102,15,56,220,249
1247 .byte 102,68,15,56,220,193
1248 .byte 102,68,15,56,220,201
1249 movups 80-128(%rcx),%xmm1
1250 bswapl %r9d
1251 .byte 102,15,56,220,208
1252 .byte 102,15,56,220,216
1253 xorl %r11d,%r9d
1254 .byte 0x66,0x90
1255 .byte 102,15,56,220,224
1256 .byte 102,15,56,220,232
1257 movl %r9d,48+12(%rsp)
1258 leaq 4(%r8),%r9
1259 .byte 102,15,56,220,240
1260 .byte 102,15,56,220,248
1261 .byte 102,68,15,56,220,192
1262 .byte 102,68,15,56,220,200
1263 movups 96-128(%rcx),%xmm0
1264 bswapl %r9d
1265 .byte 102,15,56,220,209
1266 .byte 102,15,56,220,217
1267 xorl %r11d,%r9d
1268 .byte 0x66,0x90
1269 .byte 102,15,56,220,225
1270 .byte 102,15,56,220,233
1271 movl %r9d,64+12(%rsp)
1272 leaq 5(%r8),%r9
1273 .byte 102,15,56,220,241
1274 .byte 102,15,56,220,249
1275 .byte 102,68,15,56,220,193
1276 .byte 102,68,15,56,220,201
1277 movups 112-128(%rcx),%xmm1
1278 bswapl %r9d
1279 .byte 102,15,56,220,208
1280 .byte 102,15,56,220,216
1281 xorl %r11d,%r9d
1282 .byte 0x66,0x90
1283 .byte 102,15,56,220,224
1284 .byte 102,15,56,220,232
1285 movl %r9d,80+12(%rsp)
1286 leaq 6(%r8),%r9
1287 .byte 102,15,56,220,240
1288 .byte 102,15,56,220,248
1289 .byte 102,68,15,56,220,192
1290 .byte 102,68,15,56,220,200
1291 movups 128-128(%rcx),%xmm0
1292 bswapl %r9d
1293 .byte 102,15,56,220,209
1294 .byte 102,15,56,220,217
1295 xorl %r11d,%r9d
1296 .byte 0x66,0x90
1297 .byte 102,15,56,220,225
1298 .byte 102,15,56,220,233
1299 movl %r9d,96+12(%rsp)
1300 leaq 7(%r8),%r9
1301 .byte 102,15,56,220,241
1302 .byte 102,15,56,220,249
1303 .byte 102,68,15,56,220,193
1304 .byte 102,68,15,56,220,201
1305 movups 144-128(%rcx),%xmm1
1306 bswapl %r9d
1307 .byte 102,15,56,220,208
1308 .byte 102,15,56,220,216
1309 .byte 102,15,56,220,224
1310 xorl %r11d,%r9d
1311 movdqu 0(%rdi),%xmm10
1312 .byte 102,15,56,220,232
1313 movl %r9d,112+12(%rsp)
1314 cmpl $11,%eax
1315 .byte 102,15,56,220,240
1316 .byte 102,15,56,220,248
1317 .byte 102,68,15,56,220,192
1318 .byte 102,68,15,56,220,200
1319 movups 160-128(%rcx),%xmm0
1320
1321 jb L$ctr32_enc_done
1322
1323 .byte 102,15,56,220,209
1324 .byte 102,15,56,220,217
1325 .byte 102,15,56,220,225
1326 .byte 102,15,56,220,233
1327 .byte 102,15,56,220,241
1328 .byte 102,15,56,220,249
1329 .byte 102,68,15,56,220,193
1330 .byte 102,68,15,56,220,201
1331 movups 176-128(%rcx),%xmm1
1332
1333 .byte 102,15,56,220,208
1334 .byte 102,15,56,220,216
1335 .byte 102,15,56,220,224
1336 .byte 102,15,56,220,232
1337 .byte 102,15,56,220,240
1338 .byte 102,15,56,220,248
1339 .byte 102,68,15,56,220,192
1340 .byte 102,68,15,56,220,200
1341 movups 192-128(%rcx),%xmm0
1342 je L$ctr32_enc_done
1343
1344 .byte 102,15,56,220,209
1345 .byte 102,15,56,220,217
1346 .byte 102,15,56,220,225
1347 .byte 102,15,56,220,233
1348 .byte 102,15,56,220,241
1349 .byte 102,15,56,220,249
1350 .byte 102,68,15,56,220,193
1351 .byte 102,68,15,56,220,201
1352 movups 208-128(%rcx),%xmm1
1353
1354 .byte 102,15,56,220,208
1355 .byte 102,15,56,220,216
1356 .byte 102,15,56,220,224
1357 .byte 102,15,56,220,232
1358 .byte 102,15,56,220,240
1359 .byte 102,15,56,220,248
1360 .byte 102,68,15,56,220,192
1361 .byte 102,68,15,56,220,200
1362 movups 224-128(%rcx),%xmm0
1363 jmp L$ctr32_enc_done
1364
1365 .p2align 4
1366 L$ctr32_enc_done:
1367 movdqu 16(%rdi),%xmm11
1368 pxor %xmm0,%xmm10
1369 movdqu 32(%rdi),%xmm12
1370 pxor %xmm0,%xmm11
1371 movdqu 48(%rdi),%xmm13
1372 pxor %xmm0,%xmm12
1373 movdqu 64(%rdi),%xmm14
1374 pxor %xmm0,%xmm13
1375 movdqu 80(%rdi),%xmm15
1376 pxor %xmm0,%xmm14
1377 pxor %xmm0,%xmm15
1378 .byte 102,15,56,220,209
1379 .byte 102,15,56,220,217
1380 .byte 102,15,56,220,225
1381 .byte 102,15,56,220,233
1382 .byte 102,15,56,220,241
1383 .byte 102,15,56,220,249
1384 .byte 102,68,15,56,220,193
1385 .byte 102,68,15,56,220,201
1386 movdqu 96(%rdi),%xmm1
1387 leaq 128(%rdi),%rdi
1388
1389 .byte 102,65,15,56,221,210
1390 pxor %xmm0,%xmm1
1391 movdqu 112-128(%rdi),%xmm10
1392 .byte 102,65,15,56,221,219
1393 pxor %xmm0,%xmm10
1394 movdqa 0(%rsp),%xmm11
1395 .byte 102,65,15,56,221,228
1396 .byte 102,65,15,56,221,237
1397 movdqa 16(%rsp),%xmm12
1398 movdqa 32(%rsp),%xmm13
1399 .byte 102,65,15,56,221,246
1400 .byte 102,65,15,56,221,255
1401 movdqa 48(%rsp),%xmm14
1402 movdqa 64(%rsp),%xmm15
1403 .byte 102,68,15,56,221,193
1404 movdqa 80(%rsp),%xmm0
1405 movups 16-128(%rcx),%xmm1
1406 .byte 102,69,15,56,221,202
1407
1408 movups %xmm2,(%rsi)
1409 movdqa %xmm11,%xmm2
1410 movups %xmm3,16(%rsi)
1411 movdqa %xmm12,%xmm3
1412 movups %xmm4,32(%rsi)
1413 movdqa %xmm13,%xmm4
1414 movups %xmm5,48(%rsi)
1415 movdqa %xmm14,%xmm5
1416 movups %xmm6,64(%rsi)
1417 movdqa %xmm15,%xmm6
1418 movups %xmm7,80(%rsi)
1419 movdqa %xmm0,%xmm7
1420 movups %xmm8,96(%rsi)
1421 movups %xmm9,112(%rsi)
1422 leaq 128(%rsi),%rsi
1423
1424 subq $8,%rdx
1425 jnc L$ctr32_loop8
1426
1427 addq $8,%rdx
1428 jz L$ctr32_done
1429 leaq -128(%rcx),%rcx
1430
1431 L$ctr32_tail:
1432
1433
1434 leaq 16(%rcx),%rcx
1435 cmpq $4,%rdx
1436 jb L$ctr32_loop3
1437 je L$ctr32_loop4
1438
1439
1440 shll $4,%eax
1441 movdqa 96(%rsp),%xmm8
1442 pxor %xmm9,%xmm9
1443
1444 movups 16(%rcx),%xmm0
1445 .byte 102,15,56,220,209
1446 .byte 102,15,56,220,217
1447 leaq 32-16(%rcx,%rax,1),%rcx
1448 negq %rax
1449 .byte 102,15,56,220,225
1450 addq $16,%rax
1451 movups (%rdi),%xmm10
1452 .byte 102,15,56,220,233
1453 .byte 102,15,56,220,241
1454 movups 16(%rdi),%xmm11
1455 movups 32(%rdi),%xmm12
1456 .byte 102,15,56,220,249
1457 .byte 102,68,15,56,220,193
1458
1459 call L$enc_loop8_enter
1460
1461 movdqu 48(%rdi),%xmm13
1462 pxor %xmm10,%xmm2
1463 movdqu 64(%rdi),%xmm10
1464 pxor %xmm11,%xmm3
1465 movdqu %xmm2,(%rsi)
1466 pxor %xmm12,%xmm4
1467 movdqu %xmm3,16(%rsi)
1468 pxor %xmm13,%xmm5
1469 movdqu %xmm4,32(%rsi)
1470 pxor %xmm10,%xmm6
1471 movdqu %xmm5,48(%rsi)
1472 movdqu %xmm6,64(%rsi)
1473 cmpq $6,%rdx
1474 jb L$ctr32_done
1475
1476 movups 80(%rdi),%xmm11
1477 xorps %xmm11,%xmm7
1478 movups %xmm7,80(%rsi)
1479 je L$ctr32_done
1480
1481 movups 96(%rdi),%xmm12
1482 xorps %xmm12,%xmm8
1483 movups %xmm8,96(%rsi)
1484 jmp L$ctr32_done
1485
1486 .p2align 5
1487 L$ctr32_loop4:
1488 .byte 102,15,56,220,209
1489 leaq 16(%rcx),%rcx
1490 decl %eax
1491 .byte 102,15,56,220,217
1492 .byte 102,15,56,220,225
1493 .byte 102,15,56,220,233
1494 movups (%rcx),%xmm1
1495 jnz L$ctr32_loop4
1496 .byte 102,15,56,221,209
1497 .byte 102,15,56,221,217
1498 movups (%rdi),%xmm10
1499 movups 16(%rdi),%xmm11
1500 .byte 102,15,56,221,225
1501 .byte 102,15,56,221,233
1502 movups 32(%rdi),%xmm12
1503 movups 48(%rdi),%xmm13
1504
1505 xorps %xmm10,%xmm2
1506 movups %xmm2,(%rsi)
1507 xorps %xmm11,%xmm3
1508 movups %xmm3,16(%rsi)
1509 pxor %xmm12,%xmm4
1510 movdqu %xmm4,32(%rsi)
1511 pxor %xmm13,%xmm5
1512 movdqu %xmm5,48(%rsi)
1513 jmp L$ctr32_done
1514
1515 .p2align 5
1516 L$ctr32_loop3:
1517 .byte 102,15,56,220,209
1518 leaq 16(%rcx),%rcx
1519 decl %eax
1520 .byte 102,15,56,220,217
1521 .byte 102,15,56,220,225
1522 movups (%rcx),%xmm1
1523 jnz L$ctr32_loop3
1524 .byte 102,15,56,221,209
1525 .byte 102,15,56,221,217
1526 .byte 102,15,56,221,225
1527
1528 movups (%rdi),%xmm10
1529 xorps %xmm10,%xmm2
1530 movups %xmm2,(%rsi)
1531 cmpq $2,%rdx
1532 jb L$ctr32_done
1533
1534 movups 16(%rdi),%xmm11
1535 xorps %xmm11,%xmm3
1536 movups %xmm3,16(%rsi)
1537 je L$ctr32_done
1538
1539 movups 32(%rdi),%xmm12
1540 xorps %xmm12,%xmm4
1541 movups %xmm4,32(%rsi)
1542
1543 L$ctr32_done:
1544 xorps %xmm0,%xmm0
1545 xorl %r11d,%r11d
1546 pxor %xmm1,%xmm1
1547 pxor %xmm2,%xmm2
1548 pxor %xmm3,%xmm3
1549 pxor %xmm4,%xmm4
1550 pxor %xmm5,%xmm5
1551 pxor %xmm6,%xmm6
1552 pxor %xmm7,%xmm7
1553 movaps %xmm0,0(%rsp)
1554 pxor %xmm8,%xmm8
1555 movaps %xmm0,16(%rsp)
1556 pxor %xmm9,%xmm9
1557 movaps %xmm0,32(%rsp)
1558 pxor %xmm10,%xmm10
1559 movaps %xmm0,48(%rsp)
1560 pxor %xmm11,%xmm11
1561 movaps %xmm0,64(%rsp)
1562 pxor %xmm12,%xmm12
1563 movaps %xmm0,80(%rsp)
1564 pxor %xmm13,%xmm13
1565 movaps %xmm0,96(%rsp)
1566 pxor %xmm14,%xmm14
1567 movaps %xmm0,112(%rsp)
1568 pxor %xmm15,%xmm15
1569 leaq (%rbp),%rsp
1570 popq %rbp
1571 L$ctr32_epilogue:
1572 .byte 0xf3,0xc3
1573
1574 .globl _aesni_xts_encrypt
1575 .private_extern _aesni_xts_encrypt
1576
1577 .p2align 4
1578 _aesni_xts_encrypt:
1579 leaq (%rsp),%rax
1580 pushq %rbp
1581 subq $112,%rsp
1582 andq $-16,%rsp
1583 leaq -8(%rax),%rbp
1584 movups (%r9),%xmm2
1585 movl 240(%r8),%eax
1586 movl 240(%rcx),%r10d
1587 movups (%r8),%xmm0
1588 movups 16(%r8),%xmm1
1589 leaq 32(%r8),%r8
1590 xorps %xmm0,%xmm2
1591 L$oop_enc1_8:
1592 .byte 102,15,56,220,209
1593 decl %eax
1594 movups (%r8),%xmm1
1595 leaq 16(%r8),%r8
1596 jnz L$oop_enc1_8
1597 .byte 102,15,56,221,209
1598 movups (%rcx),%xmm0
1599 movq %rcx,%r11
1600 movl %r10d,%eax
1601 shll $4,%r10d
1602 movq %rdx,%r9
1603 andq $-16,%rdx
1604
1605 movups 16(%rcx,%r10,1),%xmm1
1606
1607 movdqa L$xts_magic(%rip),%xmm8
1608 movdqa %xmm2,%xmm15
1609 pshufd $0x5f,%xmm2,%xmm9
1610 pxor %xmm0,%xmm1
1611 movdqa %xmm9,%xmm14
1612 paddd %xmm9,%xmm9
1613 movdqa %xmm15,%xmm10
1614 psrad $31,%xmm14
1615 paddq %xmm15,%xmm15
1616 pand %xmm8,%xmm14
1617 pxor %xmm0,%xmm10
1618 pxor %xmm14,%xmm15
1619 movdqa %xmm9,%xmm14
1620 paddd %xmm9,%xmm9
1621 movdqa %xmm15,%xmm11
1622 psrad $31,%xmm14
1623 paddq %xmm15,%xmm15
1624 pand %xmm8,%xmm14
1625 pxor %xmm0,%xmm11
1626 pxor %xmm14,%xmm15
1627 movdqa %xmm9,%xmm14
1628 paddd %xmm9,%xmm9
1629 movdqa %xmm15,%xmm12
1630 psrad $31,%xmm14
1631 paddq %xmm15,%xmm15
1632 pand %xmm8,%xmm14
1633 pxor %xmm0,%xmm12
1634 pxor %xmm14,%xmm15
1635 movdqa %xmm9,%xmm14
1636 paddd %xmm9,%xmm9
1637 movdqa %xmm15,%xmm13
1638 psrad $31,%xmm14
1639 paddq %xmm15,%xmm15
1640 pand %xmm8,%xmm14
1641 pxor %xmm0,%xmm13
1642 pxor %xmm14,%xmm15
1643 movdqa %xmm15,%xmm14
1644 psrad $31,%xmm9
1645 paddq %xmm15,%xmm15
1646 pand %xmm8,%xmm9
1647 pxor %xmm0,%xmm14
1648 pxor %xmm9,%xmm15
1649 movaps %xmm1,96(%rsp)
1650
1651 subq $96,%rdx
1652 jc L$xts_enc_short
1653
1654 movl $16+96,%eax
1655 leaq 32(%r11,%r10,1),%rcx
1656 subq %r10,%rax
1657 movups 16(%r11),%xmm1
1658 movq %rax,%r10
1659 leaq L$xts_magic(%rip),%r8
1660 jmp L$xts_enc_grandloop
1661
1662 .p2align 5
1663 L$xts_enc_grandloop:
1664 movdqu 0(%rdi),%xmm2
1665 movdqa %xmm0,%xmm8
1666 movdqu 16(%rdi),%xmm3
1667 pxor %xmm10,%xmm2
1668 movdqu 32(%rdi),%xmm4
1669 pxor %xmm11,%xmm3
1670 .byte 102,15,56,220,209
1671 movdqu 48(%rdi),%xmm5
1672 pxor %xmm12,%xmm4
1673 .byte 102,15,56,220,217
1674 movdqu 64(%rdi),%xmm6
1675 pxor %xmm13,%xmm5
1676 .byte 102,15,56,220,225
1677 movdqu 80(%rdi),%xmm7
1678 pxor %xmm15,%xmm8
1679 movdqa 96(%rsp),%xmm9
1680 pxor %xmm14,%xmm6
1681 .byte 102,15,56,220,233
1682 movups 32(%r11),%xmm0
1683 leaq 96(%rdi),%rdi
1684 pxor %xmm8,%xmm7
1685
1686 pxor %xmm9,%xmm10
1687 .byte 102,15,56,220,241
1688 pxor %xmm9,%xmm11
1689 movdqa %xmm10,0(%rsp)
1690 .byte 102,15,56,220,249
1691 movups 48(%r11),%xmm1
1692 pxor %xmm9,%xmm12
1693
1694 .byte 102,15,56,220,208
1695 pxor %xmm9,%xmm13
1696 movdqa %xmm11,16(%rsp)
1697 .byte 102,15,56,220,216
1698 pxor %xmm9,%xmm14
1699 movdqa %xmm12,32(%rsp)
1700 .byte 102,15,56,220,224
1701 .byte 102,15,56,220,232
1702 pxor %xmm9,%xmm8
1703 movdqa %xmm14,64(%rsp)
1704 .byte 102,15,56,220,240
1705 .byte 102,15,56,220,248
1706 movups 64(%r11),%xmm0
1707 movdqa %xmm8,80(%rsp)
1708 pshufd $0x5f,%xmm15,%xmm9
1709 jmp L$xts_enc_loop6
1710 .p2align 5
1711 L$xts_enc_loop6:
1712 .byte 102,15,56,220,209
1713 .byte 102,15,56,220,217
1714 .byte 102,15,56,220,225
1715 .byte 102,15,56,220,233
1716 .byte 102,15,56,220,241
1717 .byte 102,15,56,220,249
1718 movups -64(%rcx,%rax,1),%xmm1
1719 addq $32,%rax
1720
1721 .byte 102,15,56,220,208
1722 .byte 102,15,56,220,216
1723 .byte 102,15,56,220,224
1724 .byte 102,15,56,220,232
1725 .byte 102,15,56,220,240
1726 .byte 102,15,56,220,248
1727 movups -80(%rcx,%rax,1),%xmm0
1728 jnz L$xts_enc_loop6
1729
1730 movdqa (%r8),%xmm8
1731 movdqa %xmm9,%xmm14
1732 paddd %xmm9,%xmm9
1733 .byte 102,15,56,220,209
1734 paddq %xmm15,%xmm15
1735 psrad $31,%xmm14
1736 .byte 102,15,56,220,217
1737 pand %xmm8,%xmm14
1738 movups (%r11),%xmm10
1739 .byte 102,15,56,220,225
1740 .byte 102,15,56,220,233
1741 .byte 102,15,56,220,241
1742 pxor %xmm14,%xmm15
1743 movaps %xmm10,%xmm11
1744 .byte 102,15,56,220,249
1745 movups -64(%rcx),%xmm1
1746
1747 movdqa %xmm9,%xmm14
1748 .byte 102,15,56,220,208
1749 paddd %xmm9,%xmm9
1750 pxor %xmm15,%xmm10
1751 .byte 102,15,56,220,216
1752 psrad $31,%xmm14
1753 paddq %xmm15,%xmm15
1754 .byte 102,15,56,220,224
1755 .byte 102,15,56,220,232
1756 pand %xmm8,%xmm14
1757 movaps %xmm11,%xmm12
1758 .byte 102,15,56,220,240
1759 pxor %xmm14,%xmm15
1760 movdqa %xmm9,%xmm14
1761 .byte 102,15,56,220,248
1762 movups -48(%rcx),%xmm0
1763
1764 paddd %xmm9,%xmm9
1765 .byte 102,15,56,220,209
1766 pxor %xmm15,%xmm11
1767 psrad $31,%xmm14
1768 .byte 102,15,56,220,217
1769 paddq %xmm15,%xmm15
1770 pand %xmm8,%xmm14
1771 .byte 102,15,56,220,225
1772 .byte 102,15,56,220,233
1773 movdqa %xmm13,48(%rsp)
1774 pxor %xmm14,%xmm15
1775 .byte 102,15,56,220,241
1776 movaps %xmm12,%xmm13
1777 movdqa %xmm9,%xmm14
1778 .byte 102,15,56,220,249
1779 movups -32(%rcx),%xmm1
1780
1781 paddd %xmm9,%xmm9
1782 .byte 102,15,56,220,208
1783 pxor %xmm15,%xmm12
1784 psrad $31,%xmm14
1785 .byte 102,15,56,220,216
1786 paddq %xmm15,%xmm15
1787 pand %xmm8,%xmm14
1788 .byte 102,15,56,220,224
1789 .byte 102,15,56,220,232
1790 .byte 102,15,56,220,240
1791 pxor %xmm14,%xmm15
1792 movaps %xmm13,%xmm14
1793 .byte 102,15,56,220,248
1794
1795 movdqa %xmm9,%xmm0
1796 paddd %xmm9,%xmm9
1797 .byte 102,15,56,220,209
1798 pxor %xmm15,%xmm13
1799 psrad $31,%xmm0
1800 .byte 102,15,56,220,217
1801 paddq %xmm15,%xmm15
1802 pand %xmm8,%xmm0
1803 .byte 102,15,56,220,225
1804 .byte 102,15,56,220,233
1805 pxor %xmm0,%xmm15
1806 movups (%r11),%xmm0
1807 .byte 102,15,56,220,241
1808 .byte 102,15,56,220,249
1809 movups 16(%r11),%xmm1
1810
1811 pxor %xmm15,%xmm14
1812 .byte 102,15,56,221,84,36,0
1813 psrad $31,%xmm9
1814 paddq %xmm15,%xmm15
1815 .byte 102,15,56,221,92,36,16
1816 .byte 102,15,56,221,100,36,32
1817 pand %xmm8,%xmm9
1818 movq %r10,%rax
1819 .byte 102,15,56,221,108,36,48
1820 .byte 102,15,56,221,116,36,64
1821 .byte 102,15,56,221,124,36,80
1822 pxor %xmm9,%xmm15
1823
1824 leaq 96(%rsi),%rsi
1825 movups %xmm2,-96(%rsi)
1826 movups %xmm3,-80(%rsi)
1827 movups %xmm4,-64(%rsi)
1828 movups %xmm5,-48(%rsi)
1829 movups %xmm6,-32(%rsi)
1830 movups %xmm7,-16(%rsi)
1831 subq $96,%rdx
1832 jnc L$xts_enc_grandloop
1833
1834 movl $16+96,%eax
1835 subl %r10d,%eax
1836 movq %r11,%rcx
1837 shrl $4,%eax
1838
1839 L$xts_enc_short:
1840
1841 movl %eax,%r10d
1842 pxor %xmm0,%xmm10
1843 addq $96,%rdx
1844 jz L$xts_enc_done
1845
1846 pxor %xmm0,%xmm11
1847 cmpq $0x20,%rdx
1848 jb L$xts_enc_one
1849 pxor %xmm0,%xmm12
1850 je L$xts_enc_two
1851
1852 pxor %xmm0,%xmm13
1853 cmpq $0x40,%rdx
1854 jb L$xts_enc_three
1855 pxor %xmm0,%xmm14
1856 je L$xts_enc_four
1857
1858 movdqu (%rdi),%xmm2
1859 movdqu 16(%rdi),%xmm3
1860 movdqu 32(%rdi),%xmm4
1861 pxor %xmm10,%xmm2
1862 movdqu 48(%rdi),%xmm5
1863 pxor %xmm11,%xmm3
1864 movdqu 64(%rdi),%xmm6
1865 leaq 80(%rdi),%rdi
1866 pxor %xmm12,%xmm4
1867 pxor %xmm13,%xmm5
1868 pxor %xmm14,%xmm6
1869 pxor %xmm7,%xmm7
1870
1871 call _aesni_encrypt6
1872
1873 xorps %xmm10,%xmm2
1874 movdqa %xmm15,%xmm10
1875 xorps %xmm11,%xmm3
1876 xorps %xmm12,%xmm4
1877 movdqu %xmm2,(%rsi)
1878 xorps %xmm13,%xmm5
1879 movdqu %xmm3,16(%rsi)
1880 xorps %xmm14,%xmm6
1881 movdqu %xmm4,32(%rsi)
1882 movdqu %xmm5,48(%rsi)
1883 movdqu %xmm6,64(%rsi)
1884 leaq 80(%rsi),%rsi
1885 jmp L$xts_enc_done
1886
1887 .p2align 4
1888 L$xts_enc_one:
1889 movups (%rdi),%xmm2
1890 leaq 16(%rdi),%rdi
1891 xorps %xmm10,%xmm2
1892 movups (%rcx),%xmm0
1893 movups 16(%rcx),%xmm1
1894 leaq 32(%rcx),%rcx
1895 xorps %xmm0,%xmm2
1896 L$oop_enc1_9:
1897 .byte 102,15,56,220,209
1898 decl %eax
1899 movups (%rcx),%xmm1
1900 leaq 16(%rcx),%rcx
1901 jnz L$oop_enc1_9
1902 .byte 102,15,56,221,209
1903 xorps %xmm10,%xmm2
1904 movdqa %xmm11,%xmm10
1905 movups %xmm2,(%rsi)
1906 leaq 16(%rsi),%rsi
1907 jmp L$xts_enc_done
1908
1909 .p2align 4
1910 L$xts_enc_two:
1911 movups (%rdi),%xmm2
1912 movups 16(%rdi),%xmm3
1913 leaq 32(%rdi),%rdi
1914 xorps %xmm10,%xmm2
1915 xorps %xmm11,%xmm3
1916
1917 call _aesni_encrypt2
1918
1919 xorps %xmm10,%xmm2
1920 movdqa %xmm12,%xmm10
1921 xorps %xmm11,%xmm3
1922 movups %xmm2,(%rsi)
1923 movups %xmm3,16(%rsi)
1924 leaq 32(%rsi),%rsi
1925 jmp L$xts_enc_done
1926
1927 .p2align 4
1928 L$xts_enc_three:
1929 movups (%rdi),%xmm2
1930 movups 16(%rdi),%xmm3
1931 movups 32(%rdi),%xmm4
1932 leaq 48(%rdi),%rdi
1933 xorps %xmm10,%xmm2
1934 xorps %xmm11,%xmm3
1935 xorps %xmm12,%xmm4
1936
1937 call _aesni_encrypt3
1938
1939 xorps %xmm10,%xmm2
1940 movdqa %xmm13,%xmm10
1941 xorps %xmm11,%xmm3
1942 xorps %xmm12,%xmm4
1943 movups %xmm2,(%rsi)
1944 movups %xmm3,16(%rsi)
1945 movups %xmm4,32(%rsi)
1946 leaq 48(%rsi),%rsi
1947 jmp L$xts_enc_done
1948
1949 .p2align 4
1950 L$xts_enc_four:
1951 movups (%rdi),%xmm2
1952 movups 16(%rdi),%xmm3
1953 movups 32(%rdi),%xmm4
1954 xorps %xmm10,%xmm2
1955 movups 48(%rdi),%xmm5
1956 leaq 64(%rdi),%rdi
1957 xorps %xmm11,%xmm3
1958 xorps %xmm12,%xmm4
1959 xorps %xmm13,%xmm5
1960
1961 call _aesni_encrypt4
1962
1963 pxor %xmm10,%xmm2
1964 movdqa %xmm14,%xmm10
1965 pxor %xmm11,%xmm3
1966 pxor %xmm12,%xmm4
1967 movdqu %xmm2,(%rsi)
1968 pxor %xmm13,%xmm5
1969 movdqu %xmm3,16(%rsi)
1970 movdqu %xmm4,32(%rsi)
1971 movdqu %xmm5,48(%rsi)
1972 leaq 64(%rsi),%rsi
1973 jmp L$xts_enc_done
1974
1975 .p2align 4
1976 L$xts_enc_done:
1977 andq $15,%r9
1978 jz L$xts_enc_ret
1979 movq %r9,%rdx
1980
1981 L$xts_enc_steal:
1982 movzbl (%rdi),%eax
1983 movzbl -16(%rsi),%ecx
1984 leaq 1(%rdi),%rdi
1985 movb %al,-16(%rsi)
1986 movb %cl,0(%rsi)
1987 leaq 1(%rsi),%rsi
1988 subq $1,%rdx
1989 jnz L$xts_enc_steal
1990
1991 subq %r9,%rsi
1992 movq %r11,%rcx
1993 movl %r10d,%eax
1994
1995 movups -16(%rsi),%xmm2
1996 xorps %xmm10,%xmm2
1997 movups (%rcx),%xmm0
1998 movups 16(%rcx),%xmm1
1999 leaq 32(%rcx),%rcx
2000 xorps %xmm0,%xmm2
2001 L$oop_enc1_10:
2002 .byte 102,15,56,220,209
2003 decl %eax
2004 movups (%rcx),%xmm1
2005 leaq 16(%rcx),%rcx
2006 jnz L$oop_enc1_10
2007 .byte 102,15,56,221,209
2008 xorps %xmm10,%xmm2
2009 movups %xmm2,-16(%rsi)
2010
2011 L$xts_enc_ret:
2012 xorps %xmm0,%xmm0
2013 pxor %xmm1,%xmm1
2014 pxor %xmm2,%xmm2
2015 pxor %xmm3,%xmm3
2016 pxor %xmm4,%xmm4
2017 pxor %xmm5,%xmm5
2018 pxor %xmm6,%xmm6
2019 pxor %xmm7,%xmm7
2020 movaps %xmm0,0(%rsp)
2021 pxor %xmm8,%xmm8
2022 movaps %xmm0,16(%rsp)
2023 pxor %xmm9,%xmm9
2024 movaps %xmm0,32(%rsp)
2025 pxor %xmm10,%xmm10
2026 movaps %xmm0,48(%rsp)
2027 pxor %xmm11,%xmm11
2028 movaps %xmm0,64(%rsp)
2029 pxor %xmm12,%xmm12
2030 movaps %xmm0,80(%rsp)
2031 pxor %xmm13,%xmm13
2032 movaps %xmm0,96(%rsp)
2033 pxor %xmm14,%xmm14
2034 pxor %xmm15,%xmm15
2035 leaq (%rbp),%rsp
2036 popq %rbp
2037 L$xts_enc_epilogue:
2038 .byte 0xf3,0xc3
2039
2040 .globl _aesni_xts_decrypt
2041 .private_extern _aesni_xts_decrypt
2042
2043 .p2align 4
2044 _aesni_xts_decrypt:
2045 leaq (%rsp),%rax
2046 pushq %rbp
2047 subq $112,%rsp
2048 andq $-16,%rsp
2049 leaq -8(%rax),%rbp
2050 movups (%r9),%xmm2
2051 movl 240(%r8),%eax
2052 movl 240(%rcx),%r10d
2053 movups (%r8),%xmm0
2054 movups 16(%r8),%xmm1
2055 leaq 32(%r8),%r8
2056 xorps %xmm0,%xmm2
2057 L$oop_enc1_11:
2058 .byte 102,15,56,220,209
2059 decl %eax
2060 movups (%r8),%xmm1
2061 leaq 16(%r8),%r8
2062 jnz L$oop_enc1_11
2063 .byte 102,15,56,221,209
2064 xorl %eax,%eax
2065 testq $15,%rdx
2066 setnz %al
2067 shlq $4,%rax
2068 subq %rax,%rdx
2069
2070 movups (%rcx),%xmm0
2071 movq %rcx,%r11
2072 movl %r10d,%eax
2073 shll $4,%r10d
2074 movq %rdx,%r9
2075 andq $-16,%rdx
2076
2077 movups 16(%rcx,%r10,1),%xmm1
2078
2079 movdqa L$xts_magic(%rip),%xmm8
2080 movdqa %xmm2,%xmm15
2081 pshufd $0x5f,%xmm2,%xmm9
2082 pxor %xmm0,%xmm1
2083 movdqa %xmm9,%xmm14
2084 paddd %xmm9,%xmm9
2085 movdqa %xmm15,%xmm10
2086 psrad $31,%xmm14
2087 paddq %xmm15,%xmm15
2088 pand %xmm8,%xmm14
2089 pxor %xmm0,%xmm10
2090 pxor %xmm14,%xmm15
2091 movdqa %xmm9,%xmm14
2092 paddd %xmm9,%xmm9
2093 movdqa %xmm15,%xmm11
2094 psrad $31,%xmm14
2095 paddq %xmm15,%xmm15
2096 pand %xmm8,%xmm14
2097 pxor %xmm0,%xmm11
2098 pxor %xmm14,%xmm15
2099 movdqa %xmm9,%xmm14
2100 paddd %xmm9,%xmm9
2101 movdqa %xmm15,%xmm12
2102 psrad $31,%xmm14
2103 paddq %xmm15,%xmm15
2104 pand %xmm8,%xmm14
2105 pxor %xmm0,%xmm12
2106 pxor %xmm14,%xmm15
2107 movdqa %xmm9,%xmm14
2108 paddd %xmm9,%xmm9
2109 movdqa %xmm15,%xmm13
2110 psrad $31,%xmm14
2111 paddq %xmm15,%xmm15
2112 pand %xmm8,%xmm14
2113 pxor %xmm0,%xmm13
2114 pxor %xmm14,%xmm15
2115 movdqa %xmm15,%xmm14
2116 psrad $31,%xmm9
2117 paddq %xmm15,%xmm15
2118 pand %xmm8,%xmm9
2119 pxor %xmm0,%xmm14
2120 pxor %xmm9,%xmm15
2121 movaps %xmm1,96(%rsp)
2122
2123 subq $96,%rdx
2124 jc L$xts_dec_short
2125
2126 movl $16+96,%eax
2127 leaq 32(%r11,%r10,1),%rcx
2128 subq %r10,%rax
2129 movups 16(%r11),%xmm1
2130 movq %rax,%r10
2131 leaq L$xts_magic(%rip),%r8
2132 jmp L$xts_dec_grandloop
2133
2134 .p2align 5
2135 L$xts_dec_grandloop:
2136 movdqu 0(%rdi),%xmm2
2137 movdqa %xmm0,%xmm8
2138 movdqu 16(%rdi),%xmm3
2139 pxor %xmm10,%xmm2
2140 movdqu 32(%rdi),%xmm4
2141 pxor %xmm11,%xmm3
2142 .byte 102,15,56,222,209
2143 movdqu 48(%rdi),%xmm5
2144 pxor %xmm12,%xmm4
2145 .byte 102,15,56,222,217
2146 movdqu 64(%rdi),%xmm6
2147 pxor %xmm13,%xmm5
2148 .byte 102,15,56,222,225
2149 movdqu 80(%rdi),%xmm7
2150 pxor %xmm15,%xmm8
2151 movdqa 96(%rsp),%xmm9
2152 pxor %xmm14,%xmm6
2153 .byte 102,15,56,222,233
2154 movups 32(%r11),%xmm0
2155 leaq 96(%rdi),%rdi
2156 pxor %xmm8,%xmm7
2157
2158 pxor %xmm9,%xmm10
2159 .byte 102,15,56,222,241
2160 pxor %xmm9,%xmm11
2161 movdqa %xmm10,0(%rsp)
2162 .byte 102,15,56,222,249
2163 movups 48(%r11),%xmm1
2164 pxor %xmm9,%xmm12
2165
2166 .byte 102,15,56,222,208
2167 pxor %xmm9,%xmm13
2168 movdqa %xmm11,16(%rsp)
2169 .byte 102,15,56,222,216
2170 pxor %xmm9,%xmm14
2171 movdqa %xmm12,32(%rsp)
2172 .byte 102,15,56,222,224
2173 .byte 102,15,56,222,232
2174 pxor %xmm9,%xmm8
2175 movdqa %xmm14,64(%rsp)
2176 .byte 102,15,56,222,240
2177 .byte 102,15,56,222,248
2178 movups 64(%r11),%xmm0
2179 movdqa %xmm8,80(%rsp)
2180 pshufd $0x5f,%xmm15,%xmm9
2181 jmp L$xts_dec_loop6
2182 .p2align 5
2183 L$xts_dec_loop6:
2184 .byte 102,15,56,222,209
2185 .byte 102,15,56,222,217
2186 .byte 102,15,56,222,225
2187 .byte 102,15,56,222,233
2188 .byte 102,15,56,222,241
2189 .byte 102,15,56,222,249
2190 movups -64(%rcx,%rax,1),%xmm1
2191 addq $32,%rax
2192
2193 .byte 102,15,56,222,208
2194 .byte 102,15,56,222,216
2195 .byte 102,15,56,222,224
2196 .byte 102,15,56,222,232
2197 .byte 102,15,56,222,240
2198 .byte 102,15,56,222,248
2199 movups -80(%rcx,%rax,1),%xmm0
2200 jnz L$xts_dec_loop6
2201
2202 movdqa (%r8),%xmm8
2203 movdqa %xmm9,%xmm14
2204 paddd %xmm9,%xmm9
2205 .byte 102,15,56,222,209
2206 paddq %xmm15,%xmm15
2207 psrad $31,%xmm14
2208 .byte 102,15,56,222,217
2209 pand %xmm8,%xmm14
2210 movups (%r11),%xmm10
2211 .byte 102,15,56,222,225
2212 .byte 102,15,56,222,233
2213 .byte 102,15,56,222,241
2214 pxor %xmm14,%xmm15
2215 movaps %xmm10,%xmm11
2216 .byte 102,15,56,222,249
2217 movups -64(%rcx),%xmm1
2218
2219 movdqa %xmm9,%xmm14
2220 .byte 102,15,56,222,208
2221 paddd %xmm9,%xmm9
2222 pxor %xmm15,%xmm10
2223 .byte 102,15,56,222,216
2224 psrad $31,%xmm14
2225 paddq %xmm15,%xmm15
2226 .byte 102,15,56,222,224
2227 .byte 102,15,56,222,232
2228 pand %xmm8,%xmm14
2229 movaps %xmm11,%xmm12
2230 .byte 102,15,56,222,240
2231 pxor %xmm14,%xmm15
2232 movdqa %xmm9,%xmm14
2233 .byte 102,15,56,222,248
2234 movups -48(%rcx),%xmm0
2235
2236 paddd %xmm9,%xmm9
2237 .byte 102,15,56,222,209
2238 pxor %xmm15,%xmm11
2239 psrad $31,%xmm14
2240 .byte 102,15,56,222,217
2241 paddq %xmm15,%xmm15
2242 pand %xmm8,%xmm14
2243 .byte 102,15,56,222,225
2244 .byte 102,15,56,222,233
2245 movdqa %xmm13,48(%rsp)
2246 pxor %xmm14,%xmm15
2247 .byte 102,15,56,222,241
2248 movaps %xmm12,%xmm13
2249 movdqa %xmm9,%xmm14
2250 .byte 102,15,56,222,249
2251 movups -32(%rcx),%xmm1
2252
2253 paddd %xmm9,%xmm9
2254 .byte 102,15,56,222,208
2255 pxor %xmm15,%xmm12
2256 psrad $31,%xmm14
2257 .byte 102,15,56,222,216
2258 paddq %xmm15,%xmm15
2259 pand %xmm8,%xmm14
2260 .byte 102,15,56,222,224
2261 .byte 102,15,56,222,232
2262 .byte 102,15,56,222,240
2263 pxor %xmm14,%xmm15
2264 movaps %xmm13,%xmm14
2265 .byte 102,15,56,222,248
2266
2267 movdqa %xmm9,%xmm0
2268 paddd %xmm9,%xmm9
2269 .byte 102,15,56,222,209
2270 pxor %xmm15,%xmm13
2271 psrad $31,%xmm0
2272 .byte 102,15,56,222,217
2273 paddq %xmm15,%xmm15
2274 pand %xmm8,%xmm0
2275 .byte 102,15,56,222,225
2276 .byte 102,15,56,222,233
2277 pxor %xmm0,%xmm15
2278 movups (%r11),%xmm0
2279 .byte 102,15,56,222,241
2280 .byte 102,15,56,222,249
2281 movups 16(%r11),%xmm1
2282
2283 pxor %xmm15,%xmm14
2284 .byte 102,15,56,223,84,36,0
2285 psrad $31,%xmm9
2286 paddq %xmm15,%xmm15
2287 .byte 102,15,56,223,92,36,16
2288 .byte 102,15,56,223,100,36,32
2289 pand %xmm8,%xmm9
2290 movq %r10,%rax
2291 .byte 102,15,56,223,108,36,48
2292 .byte 102,15,56,223,116,36,64
2293 .byte 102,15,56,223,124,36,80
2294 pxor %xmm9,%xmm15
2295
2296 leaq 96(%rsi),%rsi
2297 movups %xmm2,-96(%rsi)
2298 movups %xmm3,-80(%rsi)
2299 movups %xmm4,-64(%rsi)
2300 movups %xmm5,-48(%rsi)
2301 movups %xmm6,-32(%rsi)
2302 movups %xmm7,-16(%rsi)
2303 subq $96,%rdx
2304 jnc L$xts_dec_grandloop
2305
2306 movl $16+96,%eax
2307 subl %r10d,%eax
2308 movq %r11,%rcx
2309 shrl $4,%eax
2310
2311 L$xts_dec_short:
2312
2313 movl %eax,%r10d
2314 pxor %xmm0,%xmm10
2315 pxor %xmm0,%xmm11
2316 addq $96,%rdx
2317 jz L$xts_dec_done
2318
2319 pxor %xmm0,%xmm12
2320 cmpq $0x20,%rdx
2321 jb L$xts_dec_one
2322 pxor %xmm0,%xmm13
2323 je L$xts_dec_two
2324
2325 pxor %xmm0,%xmm14
2326 cmpq $0x40,%rdx
2327 jb L$xts_dec_three
2328 je L$xts_dec_four
2329
2330 movdqu (%rdi),%xmm2
2331 movdqu 16(%rdi),%xmm3
2332 movdqu 32(%rdi),%xmm4
2333 pxor %xmm10,%xmm2
2334 movdqu 48(%rdi),%xmm5
2335 pxor %xmm11,%xmm3
2336 movdqu 64(%rdi),%xmm6
2337 leaq 80(%rdi),%rdi
2338 pxor %xmm12,%xmm4
2339 pxor %xmm13,%xmm5
2340 pxor %xmm14,%xmm6
2341
2342 call _aesni_decrypt6
2343
2344 xorps %xmm10,%xmm2
2345 xorps %xmm11,%xmm3
2346 xorps %xmm12,%xmm4
2347 movdqu %xmm2,(%rsi)
2348 xorps %xmm13,%xmm5
2349 movdqu %xmm3,16(%rsi)
2350 xorps %xmm14,%xmm6
2351 movdqu %xmm4,32(%rsi)
2352 pxor %xmm14,%xmm14
2353 movdqu %xmm5,48(%rsi)
2354 pcmpgtd %xmm15,%xmm14
2355 movdqu %xmm6,64(%rsi)
2356 leaq 80(%rsi),%rsi
2357 pshufd $0x13,%xmm14,%xmm11
2358 andq $15,%r9
2359 jz L$xts_dec_ret
2360
2361 movdqa %xmm15,%xmm10
2362 paddq %xmm15,%xmm15
2363 pand %xmm8,%xmm11
2364 pxor %xmm15,%xmm11
2365 jmp L$xts_dec_done2
2366
2367 .p2align 4
2368 L$xts_dec_one:
2369 movups (%rdi),%xmm2
2370 leaq 16(%rdi),%rdi
2371 xorps %xmm10,%xmm2
2372 movups (%rcx),%xmm0
2373 movups 16(%rcx),%xmm1
2374 leaq 32(%rcx),%rcx
2375 xorps %xmm0,%xmm2
2376 L$oop_dec1_12:
2377 .byte 102,15,56,222,209
2378 decl %eax
2379 movups (%rcx),%xmm1
2380 leaq 16(%rcx),%rcx
2381 jnz L$oop_dec1_12
2382 .byte 102,15,56,223,209
2383 xorps %xmm10,%xmm2
2384 movdqa %xmm11,%xmm10
2385 movups %xmm2,(%rsi)
2386 movdqa %xmm12,%xmm11
2387 leaq 16(%rsi),%rsi
2388 jmp L$xts_dec_done
2389
2390 .p2align 4
2391 L$xts_dec_two:
2392 movups (%rdi),%xmm2
2393 movups 16(%rdi),%xmm3
2394 leaq 32(%rdi),%rdi
2395 xorps %xmm10,%xmm2
2396 xorps %xmm11,%xmm3
2397
2398 call _aesni_decrypt2
2399
2400 xorps %xmm10,%xmm2
2401 movdqa %xmm12,%xmm10
2402 xorps %xmm11,%xmm3
2403 movdqa %xmm13,%xmm11
2404 movups %xmm2,(%rsi)
2405 movups %xmm3,16(%rsi)
2406 leaq 32(%rsi),%rsi
2407 jmp L$xts_dec_done
2408
2409 .p2align 4
2410 L$xts_dec_three:
2411 movups (%rdi),%xmm2
2412 movups 16(%rdi),%xmm3
2413 movups 32(%rdi),%xmm4
2414 leaq 48(%rdi),%rdi
2415 xorps %xmm10,%xmm2
2416 xorps %xmm11,%xmm3
2417 xorps %xmm12,%xmm4
2418
2419 call _aesni_decrypt3
2420
2421 xorps %xmm10,%xmm2
2422 movdqa %xmm13,%xmm10
2423 xorps %xmm11,%xmm3
2424 movdqa %xmm14,%xmm11
2425 xorps %xmm12,%xmm4
2426 movups %xmm2,(%rsi)
2427 movups %xmm3,16(%rsi)
2428 movups %xmm4,32(%rsi)
2429 leaq 48(%rsi),%rsi
2430 jmp L$xts_dec_done
2431
2432 .p2align 4
2433 L$xts_dec_four:
2434 movups (%rdi),%xmm2
2435 movups 16(%rdi),%xmm3
2436 movups 32(%rdi),%xmm4
2437 xorps %xmm10,%xmm2
2438 movups 48(%rdi),%xmm5
2439 leaq 64(%rdi),%rdi
2440 xorps %xmm11,%xmm3
2441 xorps %xmm12,%xmm4
2442 xorps %xmm13,%xmm5
2443
2444 call _aesni_decrypt4
2445
2446 pxor %xmm10,%xmm2
2447 movdqa %xmm14,%xmm10
2448 pxor %xmm11,%xmm3
2449 movdqa %xmm15,%xmm11
2450 pxor %xmm12,%xmm4
2451 movdqu %xmm2,(%rsi)
2452 pxor %xmm13,%xmm5
2453 movdqu %xmm3,16(%rsi)
2454 movdqu %xmm4,32(%rsi)
2455 movdqu %xmm5,48(%rsi)
2456 leaq 64(%rsi),%rsi
2457 jmp L$xts_dec_done
2458
2459 .p2align 4
2460 L$xts_dec_done:
2461 andq $15,%r9
2462 jz L$xts_dec_ret
2463 L$xts_dec_done2:
2464 movq %r9,%rdx
2465 movq %r11,%rcx
2466 movl %r10d,%eax
2467
2468 movups (%rdi),%xmm2
2469 xorps %xmm11,%xmm2
2470 movups (%rcx),%xmm0
2471 movups 16(%rcx),%xmm1
2472 leaq 32(%rcx),%rcx
2473 xorps %xmm0,%xmm2
2474 L$oop_dec1_13:
2475 .byte 102,15,56,222,209
2476 decl %eax
2477 movups (%rcx),%xmm1
2478 leaq 16(%rcx),%rcx
2479 jnz L$oop_dec1_13
2480 .byte 102,15,56,223,209
2481 xorps %xmm11,%xmm2
2482 movups %xmm2,(%rsi)
2483
2484 L$xts_dec_steal:
2485 movzbl 16(%rdi),%eax
2486 movzbl (%rsi),%ecx
2487 leaq 1(%rdi),%rdi
2488 movb %al,(%rsi)
2489 movb %cl,16(%rsi)
2490 leaq 1(%rsi),%rsi
2491 subq $1,%rdx
2492 jnz L$xts_dec_steal
2493
2494 subq %r9,%rsi
2495 movq %r11,%rcx
2496 movl %r10d,%eax
2497
2498 movups (%rsi),%xmm2
2499 xorps %xmm10,%xmm2
2500 movups (%rcx),%xmm0
2501 movups 16(%rcx),%xmm1
2502 leaq 32(%rcx),%rcx
2503 xorps %xmm0,%xmm2
2504 L$oop_dec1_14:
2505 .byte 102,15,56,222,209
2506 decl %eax
2507 movups (%rcx),%xmm1
2508 leaq 16(%rcx),%rcx
2509 jnz L$oop_dec1_14
2510 .byte 102,15,56,223,209
2511 xorps %xmm10,%xmm2
2512 movups %xmm2,(%rsi)
2513
2514 L$xts_dec_ret:
2515 xorps %xmm0,%xmm0
2516 pxor %xmm1,%xmm1
2517 pxor %xmm2,%xmm2
2518 pxor %xmm3,%xmm3
2519 pxor %xmm4,%xmm4
2520 pxor %xmm5,%xmm5
2521 pxor %xmm6,%xmm6
2522 pxor %xmm7,%xmm7
2523 movaps %xmm0,0(%rsp)
2524 pxor %xmm8,%xmm8
2525 movaps %xmm0,16(%rsp)
2526 pxor %xmm9,%xmm9
2527 movaps %xmm0,32(%rsp)
2528 pxor %xmm10,%xmm10
2529 movaps %xmm0,48(%rsp)
2530 pxor %xmm11,%xmm11
2531 movaps %xmm0,64(%rsp)
2532 pxor %xmm12,%xmm12
2533 movaps %xmm0,80(%rsp)
2534 pxor %xmm13,%xmm13
2535 movaps %xmm0,96(%rsp)
2536 pxor %xmm14,%xmm14
2537 pxor %xmm15,%xmm15
2538 leaq (%rbp),%rsp
2539 popq %rbp
2540 L$xts_dec_epilogue:
2541 .byte 0xf3,0xc3
2542
2543 .globl _aesni_cbc_encrypt
2544 .private_extern _aesni_cbc_encrypt
2545
2546 .p2align 4
2547 _aesni_cbc_encrypt:
2548 testq %rdx,%rdx
2549 jz L$cbc_ret
2550
2551 movl 240(%rcx),%r10d
2552 movq %rcx,%r11
2553 testl %r9d,%r9d
2554 jz L$cbc_decrypt
2555
2556 movups (%r8),%xmm2
2557 movl %r10d,%eax
2558 cmpq $16,%rdx
2559 jb L$cbc_enc_tail
2560 subq $16,%rdx
2561 jmp L$cbc_enc_loop
2562 .p2align 4
2563 L$cbc_enc_loop:
2564 movups (%rdi),%xmm3
2565 leaq 16(%rdi),%rdi
2566
2567 movups (%rcx),%xmm0
2568 movups 16(%rcx),%xmm1
2569 xorps %xmm0,%xmm3
2570 leaq 32(%rcx),%rcx
2571 xorps %xmm3,%xmm2
2572 L$oop_enc1_15:
2573 .byte 102,15,56,220,209
2574 decl %eax
2575 movups (%rcx),%xmm1
2576 leaq 16(%rcx),%rcx
2577 jnz L$oop_enc1_15
2578 .byte 102,15,56,221,209
2579 movl %r10d,%eax
2580 movq %r11,%rcx
2581 movups %xmm2,0(%rsi)
2582 leaq 16(%rsi),%rsi
2583 subq $16,%rdx
2584 jnc L$cbc_enc_loop
2585 addq $16,%rdx
2586 jnz L$cbc_enc_tail
2587 pxor %xmm0,%xmm0
2588 pxor %xmm1,%xmm1
2589 movups %xmm2,(%r8)
2590 pxor %xmm2,%xmm2
2591 pxor %xmm3,%xmm3
2592 jmp L$cbc_ret
2593
2594 L$cbc_enc_tail:
2595 movq %rdx,%rcx
2596 xchgq %rdi,%rsi
2597 .long 0x9066A4F3
2598 movl $16,%ecx
2599 subq %rdx,%rcx
2600 xorl %eax,%eax
2601 .long 0x9066AAF3
2602 leaq -16(%rdi),%rdi
2603 movl %r10d,%eax
2604 movq %rdi,%rsi
2605 movq %r11,%rcx
2606 xorq %rdx,%rdx
2607 jmp L$cbc_enc_loop
2608
2609 .p2align 4
2610 L$cbc_decrypt:
2611 cmpq $16,%rdx
2612 jne L$cbc_decrypt_bulk
2613
2614
2615
2616 movdqu (%rdi),%xmm2
2617 movdqu (%r8),%xmm3
2618 movdqa %xmm2,%xmm4
2619 movups (%rcx),%xmm0
2620 movups 16(%rcx),%xmm1
2621 leaq 32(%rcx),%rcx
2622 xorps %xmm0,%xmm2
2623 L$oop_dec1_16:
2624 .byte 102,15,56,222,209
2625 decl %r10d
2626 movups (%rcx),%xmm1
2627 leaq 16(%rcx),%rcx
2628 jnz L$oop_dec1_16
2629 .byte 102,15,56,223,209
2630 pxor %xmm0,%xmm0
2631 pxor %xmm1,%xmm1
2632 movdqu %xmm4,(%r8)
2633 xorps %xmm3,%xmm2
2634 pxor %xmm3,%xmm3
2635 movups %xmm2,(%rsi)
2636 pxor %xmm2,%xmm2
2637 jmp L$cbc_ret
2638 .p2align 4
2639 L$cbc_decrypt_bulk:
2640 leaq (%rsp),%rax
2641 pushq %rbp
2642 subq $16,%rsp
2643 andq $-16,%rsp
2644 leaq -8(%rax),%rbp
2645 movups (%r8),%xmm10
2646 movl %r10d,%eax
2647 cmpq $0x50,%rdx
2648 jbe L$cbc_dec_tail
2649
2650 movups (%rcx),%xmm0
2651 movdqu 0(%rdi),%xmm2
2652 movdqu 16(%rdi),%xmm3
2653 movdqa %xmm2,%xmm11
2654 movdqu 32(%rdi),%xmm4
2655 movdqa %xmm3,%xmm12
2656 movdqu 48(%rdi),%xmm5
2657 movdqa %xmm4,%xmm13
2658 movdqu 64(%rdi),%xmm6
2659 movdqa %xmm5,%xmm14
2660 movdqu 80(%rdi),%xmm7
2661 movdqa %xmm6,%xmm15
2662 movl _OPENSSL_ia32cap_P+4(%rip),%r9d
2663 cmpq $0x70,%rdx
2664 jbe L$cbc_dec_six_or_seven
2665
2666 andl $71303168,%r9d
2667 subq $0x50,%rdx
2668 cmpl $4194304,%r9d
2669 je L$cbc_dec_loop6_enter
2670 subq $0x20,%rdx
2671 leaq 112(%rcx),%rcx
2672 jmp L$cbc_dec_loop8_enter
2673 .p2align 4
2674 L$cbc_dec_loop8:
2675 movups %xmm9,(%rsi)
2676 leaq 16(%rsi),%rsi
2677 L$cbc_dec_loop8_enter:
2678 movdqu 96(%rdi),%xmm8
2679 pxor %xmm0,%xmm2
2680 movdqu 112(%rdi),%xmm9
2681 pxor %xmm0,%xmm3
2682 movups 16-112(%rcx),%xmm1
2683 pxor %xmm0,%xmm4
2684 xorq %r11,%r11
2685 cmpq $0x70,%rdx
2686 pxor %xmm0,%xmm5
2687 pxor %xmm0,%xmm6
2688 pxor %xmm0,%xmm7
2689 pxor %xmm0,%xmm8
2690
2691 .byte 102,15,56,222,209
2692 pxor %xmm0,%xmm9
2693 movups 32-112(%rcx),%xmm0
2694 .byte 102,15,56,222,217
2695 .byte 102,15,56,222,225
2696 .byte 102,15,56,222,233
2697 .byte 102,15,56,222,241
2698 .byte 102,15,56,222,249
2699 .byte 102,68,15,56,222,193
2700 setnc %r11b
2701 shlq $7,%r11
2702 .byte 102,68,15,56,222,201
2703 addq %rdi,%r11
2704 movups 48-112(%rcx),%xmm1
2705 .byte 102,15,56,222,208
2706 .byte 102,15,56,222,216
2707 .byte 102,15,56,222,224
2708 .byte 102,15,56,222,232
2709 .byte 102,15,56,222,240
2710 .byte 102,15,56,222,248
2711 .byte 102,68,15,56,222,192
2712 .byte 102,68,15,56,222,200
2713 movups 64-112(%rcx),%xmm0
2714 nop
2715 .byte 102,15,56,222,209
2716 .byte 102,15,56,222,217
2717 .byte 102,15,56,222,225
2718 .byte 102,15,56,222,233
2719 .byte 102,15,56,222,241
2720 .byte 102,15,56,222,249
2721 .byte 102,68,15,56,222,193
2722 .byte 102,68,15,56,222,201
2723 movups 80-112(%rcx),%xmm1
2724 nop
2725 .byte 102,15,56,222,208
2726 .byte 102,15,56,222,216
2727 .byte 102,15,56,222,224
2728 .byte 102,15,56,222,232
2729 .byte 102,15,56,222,240
2730 .byte 102,15,56,222,248
2731 .byte 102,68,15,56,222,192
2732 .byte 102,68,15,56,222,200
2733 movups 96-112(%rcx),%xmm0
2734 nop
2735 .byte 102,15,56,222,209
2736 .byte 102,15,56,222,217
2737 .byte 102,15,56,222,225
2738 .byte 102,15,56,222,233
2739 .byte 102,15,56,222,241
2740 .byte 102,15,56,222,249
2741 .byte 102,68,15,56,222,193
2742 .byte 102,68,15,56,222,201
2743 movups 112-112(%rcx),%xmm1
2744 nop
2745 .byte 102,15,56,222,208
2746 .byte 102,15,56,222,216
2747 .byte 102,15,56,222,224
2748 .byte 102,15,56,222,232
2749 .byte 102,15,56,222,240
2750 .byte 102,15,56,222,248
2751 .byte 102,68,15,56,222,192
2752 .byte 102,68,15,56,222,200
2753 movups 128-112(%rcx),%xmm0
2754 nop
2755 .byte 102,15,56,222,209
2756 .byte 102,15,56,222,217
2757 .byte 102,15,56,222,225
2758 .byte 102,15,56,222,233
2759 .byte 102,15,56,222,241
2760 .byte 102,15,56,222,249
2761 .byte 102,68,15,56,222,193
2762 .byte 102,68,15,56,222,201
2763 movups 144-112(%rcx),%xmm1
2764 cmpl $11,%eax
2765 .byte 102,15,56,222,208
2766 .byte 102,15,56,222,216
2767 .byte 102,15,56,222,224
2768 .byte 102,15,56,222,232
2769 .byte 102,15,56,222,240
2770 .byte 102,15,56,222,248
2771 .byte 102,68,15,56,222,192
2772 .byte 102,68,15,56,222,200
2773 movups 160-112(%rcx),%xmm0
2774 jb L$cbc_dec_done
2775 .byte 102,15,56,222,209
2776 .byte 102,15,56,222,217
2777 .byte 102,15,56,222,225
2778 .byte 102,15,56,222,233
2779 .byte 102,15,56,222,241
2780 .byte 102,15,56,222,249
2781 .byte 102,68,15,56,222,193
2782 .byte 102,68,15,56,222,201
2783 movups 176-112(%rcx),%xmm1
2784 nop
2785 .byte 102,15,56,222,208
2786 .byte 102,15,56,222,216
2787 .byte 102,15,56,222,224
2788 .byte 102,15,56,222,232
2789 .byte 102,15,56,222,240
2790 .byte 102,15,56,222,248
2791 .byte 102,68,15,56,222,192
2792 .byte 102,68,15,56,222,200
2793 movups 192-112(%rcx),%xmm0
2794 je L$cbc_dec_done
2795 .byte 102,15,56,222,209
2796 .byte 102,15,56,222,217
2797 .byte 102,15,56,222,225
2798 .byte 102,15,56,222,233
2799 .byte 102,15,56,222,241
2800 .byte 102,15,56,222,249
2801 .byte 102,68,15,56,222,193
2802 .byte 102,68,15,56,222,201
2803 movups 208-112(%rcx),%xmm1
2804 nop
2805 .byte 102,15,56,222,208
2806 .byte 102,15,56,222,216
2807 .byte 102,15,56,222,224
2808 .byte 102,15,56,222,232
2809 .byte 102,15,56,222,240
2810 .byte 102,15,56,222,248
2811 .byte 102,68,15,56,222,192
2812 .byte 102,68,15,56,222,200
2813 movups 224-112(%rcx),%xmm0
2814 jmp L$cbc_dec_done
2815 .p2align 4
2816 L$cbc_dec_done:
2817 .byte 102,15,56,222,209
2818 .byte 102,15,56,222,217
2819 pxor %xmm0,%xmm10
2820 pxor %xmm0,%xmm11
2821 .byte 102,15,56,222,225
2822 .byte 102,15,56,222,233
2823 pxor %xmm0,%xmm12
2824 pxor %xmm0,%xmm13
2825 .byte 102,15,56,222,241
2826 .byte 102,15,56,222,249
2827 pxor %xmm0,%xmm14
2828 pxor %xmm0,%xmm15
2829 .byte 102,68,15,56,222,193
2830 .byte 102,68,15,56,222,201
2831 movdqu 80(%rdi),%xmm1
2832
2833 .byte 102,65,15,56,223,210
2834 movdqu 96(%rdi),%xmm10
2835 pxor %xmm0,%xmm1
2836 .byte 102,65,15,56,223,219
2837 pxor %xmm0,%xmm10
2838 movdqu 112(%rdi),%xmm0
2839 .byte 102,65,15,56,223,228
2840 leaq 128(%rdi),%rdi
2841 movdqu 0(%r11),%xmm11
2842 .byte 102,65,15,56,223,237
2843 .byte 102,65,15,56,223,246
2844 movdqu 16(%r11),%xmm12
2845 movdqu 32(%r11),%xmm13
2846 .byte 102,65,15,56,223,255
2847 .byte 102,68,15,56,223,193
2848 movdqu 48(%r11),%xmm14
2849 movdqu 64(%r11),%xmm15
2850 .byte 102,69,15,56,223,202
2851 movdqa %xmm0,%xmm10
2852 movdqu 80(%r11),%xmm1
2853 movups -112(%rcx),%xmm0
2854
2855 movups %xmm2,(%rsi)
2856 movdqa %xmm11,%xmm2
2857 movups %xmm3,16(%rsi)
2858 movdqa %xmm12,%xmm3
2859 movups %xmm4,32(%rsi)
2860 movdqa %xmm13,%xmm4
2861 movups %xmm5,48(%rsi)
2862 movdqa %xmm14,%xmm5
2863 movups %xmm6,64(%rsi)
2864 movdqa %xmm15,%xmm6
2865 movups %xmm7,80(%rsi)
2866 movdqa %xmm1,%xmm7
2867 movups %xmm8,96(%rsi)
2868 leaq 112(%rsi),%rsi
2869
2870 subq $0x80,%rdx
2871 ja L$cbc_dec_loop8
2872
2873 movaps %xmm9,%xmm2
2874 leaq -112(%rcx),%rcx
2875 addq $0x70,%rdx
2876 jle L$cbc_dec_clear_tail_collected
2877 movups %xmm9,(%rsi)
2878 leaq 16(%rsi),%rsi
2879 cmpq $0x50,%rdx
2880 jbe L$cbc_dec_tail
2881
2882 movaps %xmm11,%xmm2
2883 L$cbc_dec_six_or_seven:
2884 cmpq $0x60,%rdx
2885 ja L$cbc_dec_seven
2886
2887 movaps %xmm7,%xmm8
2888 call _aesni_decrypt6
2889 pxor %xmm10,%xmm2
2890 movaps %xmm8,%xmm10
2891 pxor %xmm11,%xmm3
2892 movdqu %xmm2,(%rsi)
2893 pxor %xmm12,%xmm4
2894 movdqu %xmm3,16(%rsi)
2895 pxor %xmm3,%xmm3
2896 pxor %xmm13,%xmm5
2897 movdqu %xmm4,32(%rsi)
2898 pxor %xmm4,%xmm4
2899 pxor %xmm14,%xmm6
2900 movdqu %xmm5,48(%rsi)
2901 pxor %xmm5,%xmm5
2902 pxor %xmm15,%xmm7
2903 movdqu %xmm6,64(%rsi)
2904 pxor %xmm6,%xmm6
2905 leaq 80(%rsi),%rsi
2906 movdqa %xmm7,%xmm2
2907 pxor %xmm7,%xmm7
2908 jmp L$cbc_dec_tail_collected
2909
2910 .p2align 4
2911 L$cbc_dec_seven:
2912 movups 96(%rdi),%xmm8
2913 xorps %xmm9,%xmm9
2914 call _aesni_decrypt8
2915 movups 80(%rdi),%xmm9
2916 pxor %xmm10,%xmm2
2917 movups 96(%rdi),%xmm10
2918 pxor %xmm11,%xmm3
2919 movdqu %xmm2,(%rsi)
2920 pxor %xmm12,%xmm4
2921 movdqu %xmm3,16(%rsi)
2922 pxor %xmm3,%xmm3
2923 pxor %xmm13,%xmm5
2924 movdqu %xmm4,32(%rsi)
2925 pxor %xmm4,%xmm4
2926 pxor %xmm14,%xmm6
2927 movdqu %xmm5,48(%rsi)
2928 pxor %xmm5,%xmm5
2929 pxor %xmm15,%xmm7
2930 movdqu %xmm6,64(%rsi)
2931 pxor %xmm6,%xmm6
2932 pxor %xmm9,%xmm8
2933 movdqu %xmm7,80(%rsi)
2934 pxor %xmm7,%xmm7
2935 leaq 96(%rsi),%rsi
2936 movdqa %xmm8,%xmm2
2937 pxor %xmm8,%xmm8
2938 pxor %xmm9,%xmm9
2939 jmp L$cbc_dec_tail_collected
2940
2941 .p2align 4
2942 L$cbc_dec_loop6:
2943 movups %xmm7,(%rsi)
2944 leaq 16(%rsi),%rsi
2945 movdqu 0(%rdi),%xmm2
2946 movdqu 16(%rdi),%xmm3
2947 movdqa %xmm2,%xmm11
2948 movdqu 32(%rdi),%xmm4
2949 movdqa %xmm3,%xmm12
2950 movdqu 48(%rdi),%xmm5
2951 movdqa %xmm4,%xmm13
2952 movdqu 64(%rdi),%xmm6
2953 movdqa %xmm5,%xmm14
2954 movdqu 80(%rdi),%xmm7
2955 movdqa %xmm6,%xmm15
2956 L$cbc_dec_loop6_enter:
2957 leaq 96(%rdi),%rdi
2958 movdqa %xmm7,%xmm8
2959
2960 call _aesni_decrypt6
2961
2962 pxor %xmm10,%xmm2
2963 movdqa %xmm8,%xmm10
2964 pxor %xmm11,%xmm3
2965 movdqu %xmm2,(%rsi)
2966 pxor %xmm12,%xmm4
2967 movdqu %xmm3,16(%rsi)
2968 pxor %xmm13,%xmm5
2969 movdqu %xmm4,32(%rsi)
2970 pxor %xmm14,%xmm6
2971 movq %r11,%rcx
2972 movdqu %xmm5,48(%rsi)
2973 pxor %xmm15,%xmm7
2974 movl %r10d,%eax
2975 movdqu %xmm6,64(%rsi)
2976 leaq 80(%rsi),%rsi
2977 subq $0x60,%rdx
2978 ja L$cbc_dec_loop6
2979
2980 movdqa %xmm7,%xmm2
2981 addq $0x50,%rdx
2982 jle L$cbc_dec_clear_tail_collected
2983 movups %xmm7,(%rsi)
2984 leaq 16(%rsi),%rsi
2985
2986 L$cbc_dec_tail:
2987 movups (%rdi),%xmm2
2988 subq $0x10,%rdx
2989 jbe L$cbc_dec_one
2990
2991 movups 16(%rdi),%xmm3
2992 movaps %xmm2,%xmm11
2993 subq $0x10,%rdx
2994 jbe L$cbc_dec_two
2995
2996 movups 32(%rdi),%xmm4
2997 movaps %xmm3,%xmm12
2998 subq $0x10,%rdx
2999 jbe L$cbc_dec_three
3000
3001 movups 48(%rdi),%xmm5
3002 movaps %xmm4,%xmm13
3003 subq $0x10,%rdx
3004 jbe L$cbc_dec_four
3005
3006 movups 64(%rdi),%xmm6
3007 movaps %xmm5,%xmm14
3008 movaps %xmm6,%xmm15
3009 xorps %xmm7,%xmm7
3010 call _aesni_decrypt6
3011 pxor %xmm10,%xmm2
3012 movaps %xmm15,%xmm10
3013 pxor %xmm11,%xmm3
3014 movdqu %xmm2,(%rsi)
3015 pxor %xmm12,%xmm4
3016 movdqu %xmm3,16(%rsi)
3017 pxor %xmm3,%xmm3
3018 pxor %xmm13,%xmm5
3019 movdqu %xmm4,32(%rsi)
3020 pxor %xmm4,%xmm4
3021 pxor %xmm14,%xmm6
3022 movdqu %xmm5,48(%rsi)
3023 pxor %xmm5,%xmm5
3024 leaq 64(%rsi),%rsi
3025 movdqa %xmm6,%xmm2
3026 pxor %xmm6,%xmm6
3027 pxor %xmm7,%xmm7
3028 subq $0x10,%rdx
3029 jmp L$cbc_dec_tail_collected
3030
3031 .p2align 4
3032 L$cbc_dec_one:
3033 movaps %xmm2,%xmm11
3034 movups (%rcx),%xmm0
3035 movups 16(%rcx),%xmm1
3036 leaq 32(%rcx),%rcx
3037 xorps %xmm0,%xmm2
3038 L$oop_dec1_17:
3039 .byte 102,15,56,222,209
3040 decl %eax
3041 movups (%rcx),%xmm1
3042 leaq 16(%rcx),%rcx
3043 jnz L$oop_dec1_17
3044 .byte 102,15,56,223,209
3045 xorps %xmm10,%xmm2
3046 movaps %xmm11,%xmm10
3047 jmp L$cbc_dec_tail_collected
3048 .p2align 4
3049 L$cbc_dec_two:
3050 movaps %xmm3,%xmm12
3051 call _aesni_decrypt2
3052 pxor %xmm10,%xmm2
3053 movaps %xmm12,%xmm10
3054 pxor %xmm11,%xmm3
3055 movdqu %xmm2,(%rsi)
3056 movdqa %xmm3,%xmm2
3057 pxor %xmm3,%xmm3
3058 leaq 16(%rsi),%rsi
3059 jmp L$cbc_dec_tail_collected
3060 .p2align 4
3061 L$cbc_dec_three:
3062 movaps %xmm4,%xmm13
3063 call _aesni_decrypt3
3064 pxor %xmm10,%xmm2
3065 movaps %xmm13,%xmm10
3066 pxor %xmm11,%xmm3
3067 movdqu %xmm2,(%rsi)
3068 pxor %xmm12,%xmm4
3069 movdqu %xmm3,16(%rsi)
3070 pxor %xmm3,%xmm3
3071 movdqa %xmm4,%xmm2
3072 pxor %xmm4,%xmm4
3073 leaq 32(%rsi),%rsi
3074 jmp L$cbc_dec_tail_collected
3075 .p2align 4
3076 L$cbc_dec_four:
3077 movaps %xmm5,%xmm14
3078 call _aesni_decrypt4
3079 pxor %xmm10,%xmm2
3080 movaps %xmm14,%xmm10
3081 pxor %xmm11,%xmm3
3082 movdqu %xmm2,(%rsi)
3083 pxor %xmm12,%xmm4
3084 movdqu %xmm3,16(%rsi)
3085 pxor %xmm3,%xmm3
3086 pxor %xmm13,%xmm5
3087 movdqu %xmm4,32(%rsi)
3088 pxor %xmm4,%xmm4
3089 movdqa %xmm5,%xmm2
3090 pxor %xmm5,%xmm5
3091 leaq 48(%rsi),%rsi
3092 jmp L$cbc_dec_tail_collected
3093
3094 .p2align 4
3095 L$cbc_dec_clear_tail_collected:
3096 pxor %xmm3,%xmm3
3097 pxor %xmm4,%xmm4
3098 pxor %xmm5,%xmm5
3099 pxor %xmm6,%xmm6
3100 pxor %xmm7,%xmm7
3101 pxor %xmm8,%xmm8
3102 pxor %xmm9,%xmm9
3103 L$cbc_dec_tail_collected:
3104 movups %xmm10,(%r8)
3105 andq $15,%rdx
3106 jnz L$cbc_dec_tail_partial
3107 movups %xmm2,(%rsi)
3108 pxor %xmm2,%xmm2
3109 jmp L$cbc_dec_ret
3110 .p2align 4
3111 L$cbc_dec_tail_partial:
3112 movaps %xmm2,(%rsp)
3113 pxor %xmm2,%xmm2
3114 movq $16,%rcx
3115 movq %rsi,%rdi
3116 subq %rdx,%rcx
3117 leaq (%rsp),%rsi
3118 .long 0x9066A4F3
3119 movdqa %xmm2,(%rsp)
3120
3121 L$cbc_dec_ret:
3122 xorps %xmm0,%xmm0
3123 pxor %xmm1,%xmm1
3124 leaq (%rbp),%rsp
3125 popq %rbp
3126 L$cbc_ret:
3127 .byte 0xf3,0xc3
3128
3129 .globl _aesni_set_decrypt_key
3130 .private_extern _aesni_set_decrypt_key
3131
3132 .p2align 4
3133 _aesni_set_decrypt_key:
3134 .byte 0x48,0x83,0xEC,0x08
3135 call __aesni_set_encrypt_key
3136 shll $4,%esi
3137 testl %eax,%eax
3138 jnz L$dec_key_ret
3139 leaq 16(%rdx,%rsi,1),%rdi
3140
3141 movups (%rdx),%xmm0
3142 movups (%rdi),%xmm1
3143 movups %xmm0,(%rdi)
3144 movups %xmm1,(%rdx)
3145 leaq 16(%rdx),%rdx
3146 leaq -16(%rdi),%rdi
3147
3148 L$dec_key_inverse:
3149 movups (%rdx),%xmm0
3150 movups (%rdi),%xmm1
3151 .byte 102,15,56,219,192
3152 .byte 102,15,56,219,201
3153 leaq 16(%rdx),%rdx
3154 leaq -16(%rdi),%rdi
3155 movups %xmm0,16(%rdi)
3156 movups %xmm1,-16(%rdx)
3157 cmpq %rdx,%rdi
3158 ja L$dec_key_inverse
3159
3160 movups (%rdx),%xmm0
3161 .byte 102,15,56,219,192
3162 pxor %xmm1,%xmm1
3163 movups %xmm0,(%rdi)
3164 pxor %xmm0,%xmm0
3165 L$dec_key_ret:
3166 addq $8,%rsp
3167 .byte 0xf3,0xc3
3168 L$SEH_end_set_decrypt_key:
3169
3170 .globl _aesni_set_encrypt_key
3171 .private_extern _aesni_set_encrypt_key
3172
3173 .p2align 4
3174 _aesni_set_encrypt_key:
3175 __aesni_set_encrypt_key:
3176 .byte 0x48,0x83,0xEC,0x08
3177 movq $-1,%rax
3178 testq %rdi,%rdi
3179 jz L$enc_key_ret
3180 testq %rdx,%rdx
3181 jz L$enc_key_ret
3182
3183 movl $268437504,%r10d
3184 movups (%rdi),%xmm0
3185 xorps %xmm4,%xmm4
3186 andl _OPENSSL_ia32cap_P+4(%rip),%r10d
3187 leaq 16(%rdx),%rax
3188 cmpl $256,%esi
3189 je L$14rounds
3190 cmpl $192,%esi
3191 je L$12rounds
3192 cmpl $128,%esi
3193 jne L$bad_keybits
3194
3195 L$10rounds:
3196 movl $9,%esi
3197 cmpl $268435456,%r10d
3198 je L$10rounds_alt
3199
3200 movups %xmm0,(%rdx)
3201 .byte 102,15,58,223,200,1
3202 call L$key_expansion_128_cold
3203 .byte 102,15,58,223,200,2
3204 call L$key_expansion_128
3205 .byte 102,15,58,223,200,4
3206 call L$key_expansion_128
3207 .byte 102,15,58,223,200,8
3208 call L$key_expansion_128
3209 .byte 102,15,58,223,200,16
3210 call L$key_expansion_128
3211 .byte 102,15,58,223,200,32
3212 call L$key_expansion_128
3213 .byte 102,15,58,223,200,64
3214 call L$key_expansion_128
3215 .byte 102,15,58,223,200,128
3216 call L$key_expansion_128
3217 .byte 102,15,58,223,200,27
3218 call L$key_expansion_128
3219 .byte 102,15,58,223,200,54
3220 call L$key_expansion_128
3221 movups %xmm0,(%rax)
3222 movl %esi,80(%rax)
3223 xorl %eax,%eax
3224 jmp L$enc_key_ret
3225
3226 .p2align 4
3227 L$10rounds_alt:
3228 movdqa L$key_rotate(%rip),%xmm5
3229 movl $8,%r10d
3230 movdqa L$key_rcon1(%rip),%xmm4
3231 movdqa %xmm0,%xmm2
3232 movdqu %xmm0,(%rdx)
3233 jmp L$oop_key128
3234
3235 .p2align 4
3236 L$oop_key128:
3237 .byte 102,15,56,0,197
3238 .byte 102,15,56,221,196
3239 pslld $1,%xmm4
3240 leaq 16(%rax),%rax
3241
3242 movdqa %xmm2,%xmm3
3243 pslldq $4,%xmm2
3244 pxor %xmm2,%xmm3
3245 pslldq $4,%xmm2
3246 pxor %xmm2,%xmm3
3247 pslldq $4,%xmm2
3248 pxor %xmm3,%xmm2
3249
3250 pxor %xmm2,%xmm0
3251 movdqu %xmm0,-16(%rax)
3252 movdqa %xmm0,%xmm2
3253
3254 decl %r10d
3255 jnz L$oop_key128
3256
3257 movdqa L$key_rcon1b(%rip),%xmm4
3258
3259 .byte 102,15,56,0,197
3260 .byte 102,15,56,221,196
3261 pslld $1,%xmm4
3262
3263 movdqa %xmm2,%xmm3
3264 pslldq $4,%xmm2
3265 pxor %xmm2,%xmm3
3266 pslldq $4,%xmm2
3267 pxor %xmm2,%xmm3
3268 pslldq $4,%xmm2
3269 pxor %xmm3,%xmm2
3270
3271 pxor %xmm2,%xmm0
3272 movdqu %xmm0,(%rax)
3273
3274 movdqa %xmm0,%xmm2
3275 .byte 102,15,56,0,197
3276 .byte 102,15,56,221,196
3277
3278 movdqa %xmm2,%xmm3
3279 pslldq $4,%xmm2
3280 pxor %xmm2,%xmm3
3281 pslldq $4,%xmm2
3282 pxor %xmm2,%xmm3
3283 pslldq $4,%xmm2
3284 pxor %xmm3,%xmm2
3285
3286 pxor %xmm2,%xmm0
3287 movdqu %xmm0,16(%rax)
3288
3289 movl %esi,96(%rax)
3290 xorl %eax,%eax
3291 jmp L$enc_key_ret
3292
3293 .p2align 4
3294 L$12rounds:
3295 movq 16(%rdi),%xmm2
3296 movl $11,%esi
3297 cmpl $268435456,%r10d
3298 je L$12rounds_alt
3299
3300 movups %xmm0,(%rdx)
3301 .byte 102,15,58,223,202,1
3302 call L$key_expansion_192a_cold
3303 .byte 102,15,58,223,202,2
3304 call L$key_expansion_192b
3305 .byte 102,15,58,223,202,4
3306 call L$key_expansion_192a
3307 .byte 102,15,58,223,202,8
3308 call L$key_expansion_192b
3309 .byte 102,15,58,223,202,16
3310 call L$key_expansion_192a
3311 .byte 102,15,58,223,202,32
3312 call L$key_expansion_192b
3313 .byte 102,15,58,223,202,64
3314 call L$key_expansion_192a
3315 .byte 102,15,58,223,202,128
3316 call L$key_expansion_192b
3317 movups %xmm0,(%rax)
3318 movl %esi,48(%rax)
3319 xorq %rax,%rax
3320 jmp L$enc_key_ret
3321
3322 .p2align 4
3323 L$12rounds_alt:
3324 movdqa L$key_rotate192(%rip),%xmm5
3325 movdqa L$key_rcon1(%rip),%xmm4
3326 movl $8,%r10d
3327 movdqu %xmm0,(%rdx)
3328 jmp L$oop_key192
3329
3330 .p2align 4
3331 L$oop_key192:
3332 movq %xmm2,0(%rax)
3333 movdqa %xmm2,%xmm1
3334 .byte 102,15,56,0,213
3335 .byte 102,15,56,221,212
3336 pslld $1,%xmm4
3337 leaq 24(%rax),%rax
3338
3339 movdqa %xmm0,%xmm3
3340 pslldq $4,%xmm0
3341 pxor %xmm0,%xmm3
3342 pslldq $4,%xmm0
3343 pxor %xmm0,%xmm3
3344 pslldq $4,%xmm0
3345 pxor %xmm3,%xmm0
3346
3347 pshufd $0xff,%xmm0,%xmm3
3348 pxor %xmm1,%xmm3
3349 pslldq $4,%xmm1
3350 pxor %xmm1,%xmm3
3351
3352 pxor %xmm2,%xmm0
3353 pxor %xmm3,%xmm2
3354 movdqu %xmm0,-16(%rax)
3355
3356 decl %r10d
3357 jnz L$oop_key192
3358
3359 movl %esi,32(%rax)
3360 xorl %eax,%eax
3361 jmp L$enc_key_ret
3362
3363 .p2align 4
3364 L$14rounds:
3365 movups 16(%rdi),%xmm2
3366 movl $13,%esi
3367 leaq 16(%rax),%rax
3368 cmpl $268435456,%r10d
3369 je L$14rounds_alt
3370
3371 movups %xmm0,(%rdx)
3372 movups %xmm2,16(%rdx)
3373 .byte 102,15,58,223,202,1
3374 call L$key_expansion_256a_cold
3375 .byte 102,15,58,223,200,1
3376 call L$key_expansion_256b
3377 .byte 102,15,58,223,202,2
3378 call L$key_expansion_256a
3379 .byte 102,15,58,223,200,2
3380 call L$key_expansion_256b
3381 .byte 102,15,58,223,202,4
3382 call L$key_expansion_256a
3383 .byte 102,15,58,223,200,4
3384 call L$key_expansion_256b
3385 .byte 102,15,58,223,202,8
3386 call L$key_expansion_256a
3387 .byte 102,15,58,223,200,8
3388 call L$key_expansion_256b
3389 .byte 102,15,58,223,202,16
3390 call L$key_expansion_256a
3391 .byte 102,15,58,223,200,16
3392 call L$key_expansion_256b
3393 .byte 102,15,58,223,202,32
3394 call L$key_expansion_256a
3395 .byte 102,15,58,223,200,32
3396 call L$key_expansion_256b
3397 .byte 102,15,58,223,202,64
3398 call L$key_expansion_256a
3399 movups %xmm0,(%rax)
3400 movl %esi,16(%rax)
3401 xorq %rax,%rax
3402 jmp L$enc_key_ret
3403
3404 .p2align 4
3405 L$14rounds_alt:
3406 movdqa L$key_rotate(%rip),%xmm5
3407 movdqa L$key_rcon1(%rip),%xmm4
3408 movl $7,%r10d
3409 movdqu %xmm0,0(%rdx)
3410 movdqa %xmm2,%xmm1
3411 movdqu %xmm2,16(%rdx)
3412 jmp L$oop_key256
3413
3414 .p2align 4
3415 L$oop_key256:
3416 .byte 102,15,56,0,213
3417 .byte 102,15,56,221,212
3418
3419 movdqa %xmm0,%xmm3
3420 pslldq $4,%xmm0
3421 pxor %xmm0,%xmm3
3422 pslldq $4,%xmm0
3423 pxor %xmm0,%xmm3
3424 pslldq $4,%xmm0
3425 pxor %xmm3,%xmm0
3426 pslld $1,%xmm4
3427
3428 pxor %xmm2,%xmm0
3429 movdqu %xmm0,(%rax)
3430
3431 decl %r10d
3432 jz L$done_key256
3433
3434 pshufd $0xff,%xmm0,%xmm2
3435 pxor %xmm3,%xmm3
3436 .byte 102,15,56,221,211
3437
3438 movdqa %xmm1,%xmm3
3439 pslldq $4,%xmm1
3440 pxor %xmm1,%xmm3
3441 pslldq $4,%xmm1
3442 pxor %xmm1,%xmm3
3443 pslldq $4,%xmm1
3444 pxor %xmm3,%xmm1
3445
3446 pxor %xmm1,%xmm2
3447 movdqu %xmm2,16(%rax)
3448 leaq 32(%rax),%rax
3449 movdqa %xmm2,%xmm1
3450
3451 jmp L$oop_key256
3452
3453 L$done_key256:
3454 movl %esi,16(%rax)
3455 xorl %eax,%eax
3456 jmp L$enc_key_ret
3457
3458 .p2align 4
3459 L$bad_keybits:
3460 movq $-2,%rax
3461 L$enc_key_ret:
3462 pxor %xmm0,%xmm0
3463 pxor %xmm1,%xmm1
3464 pxor %xmm2,%xmm2
3465 pxor %xmm3,%xmm3
3466 pxor %xmm4,%xmm4
3467 pxor %xmm5,%xmm5
3468 addq $8,%rsp
3469 .byte 0xf3,0xc3
3470 L$SEH_end_set_encrypt_key:
3471
3472 .p2align 4
3473 L$key_expansion_128:
3474 movups %xmm0,(%rax)
3475 leaq 16(%rax),%rax
3476 L$key_expansion_128_cold:
3477 shufps $16,%xmm0,%xmm4
3478 xorps %xmm4,%xmm0
3479 shufps $140,%xmm0,%xmm4
3480 xorps %xmm4,%xmm0
3481 shufps $255,%xmm1,%xmm1
3482 xorps %xmm1,%xmm0
3483 .byte 0xf3,0xc3
3484
3485 .p2align 4
3486 L$key_expansion_192a:
3487 movups %xmm0,(%rax)
3488 leaq 16(%rax),%rax
3489 L$key_expansion_192a_cold:
3490 movaps %xmm2,%xmm5
3491 L$key_expansion_192b_warm:
3492 shufps $16,%xmm0,%xmm4
3493 movdqa %xmm2,%xmm3
3494 xorps %xmm4,%xmm0
3495 shufps $140,%xmm0,%xmm4
3496 pslldq $4,%xmm3
3497 xorps %xmm4,%xmm0
3498 pshufd $85,%xmm1,%xmm1
3499 pxor %xmm3,%xmm2
3500 pxor %xmm1,%xmm0
3501 pshufd $255,%xmm0,%xmm3
3502 pxor %xmm3,%xmm2
3503 .byte 0xf3,0xc3
3504
3505 .p2align 4
3506 L$key_expansion_192b:
3507 movaps %xmm0,%xmm3
3508 shufps $68,%xmm0,%xmm5
3509 movups %xmm5,(%rax)
3510 shufps $78,%xmm2,%xmm3
3511 movups %xmm3,16(%rax)
3512 leaq 32(%rax),%rax
3513 jmp L$key_expansion_192b_warm
3514
3515 .p2align 4
3516 L$key_expansion_256a:
3517 movups %xmm2,(%rax)
3518 leaq 16(%rax),%rax
3519 L$key_expansion_256a_cold:
3520 shufps $16,%xmm0,%xmm4
3521 xorps %xmm4,%xmm0
3522 shufps $140,%xmm0,%xmm4
3523 xorps %xmm4,%xmm0
3524 shufps $255,%xmm1,%xmm1
3525 xorps %xmm1,%xmm0
3526 .byte 0xf3,0xc3
3527
3528 .p2align 4
3529 L$key_expansion_256b:
3530 movups %xmm0,(%rax)
3531 leaq 16(%rax),%rax
3532
3533 shufps $16,%xmm2,%xmm4
3534 xorps %xmm4,%xmm2
3535 shufps $140,%xmm2,%xmm4
3536 xorps %xmm4,%xmm2
3537 shufps $170,%xmm1,%xmm1
3538 xorps %xmm1,%xmm2
3539 .byte 0xf3,0xc3
3540
3541
3542 .p2align 6
3543 L$bswap_mask:
3544 .byte 15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0
3545 L$increment32:
3546 .long 6,6,6,0
3547 L$increment64:
3548 .long 1,0,0,0
3549 L$xts_magic:
3550 .long 0x87,0,1,0
3551 L$increment1:
3552 .byte 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1
3553 L$key_rotate:
3554 .long 0x0c0f0e0d,0x0c0f0e0d,0x0c0f0e0d,0x0c0f0e0d
3555 L$key_rotate192:
3556 .long 0x04070605,0x04070605,0x04070605,0x04070605
3557 L$key_rcon1:
3558 .long 1,1,1,1
3559 L$key_rcon1b:
3560 .long 0x1b,0x1b,0x1b,0x1b
3561
3562 .byte 65,69,83,32,102,111,114,32,73,110,116,101,108,32,65,69,83,45,78,73,44,32 ,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101 ,110,115,115,108,46,111,114,103,62,0
3563 .p2align 6
3564 #endif
OLDNEW
« no previous file with comments | « third_party/boringssl/mac-x86_64/crypto/aes/aes-x86_64.S ('k') | third_party/boringssl/mac-x86_64/crypto/aes/bsaes-x86_64.S » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698