Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(1257)

Side by Side Diff: third_party/boringssl/mac-x86_64/crypto/aes/aesni-x86_64.S

Issue 377783004: Add BoringSSL GYP files. (Closed) Base URL: svn://svn.chromium.org/chrome/trunk/src
Patch Set: Final Python fix. Created 6 years, 5 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch | Annotate | Revision Log
OLDNEW
(Empty)
1 #if defined(__x86_64__)
2 .text
3
4 .globl _aesni_encrypt
5
6 .p2align 4
7 _aesni_encrypt:
8 movups (%rdi),%xmm2
9 movl 240(%rdx),%eax
10 movups (%rdx),%xmm0
11 movups 16(%rdx),%xmm1
12 leaq 32(%rdx),%rdx
13 xorps %xmm0,%xmm2
14 L$oop_enc1_1:
15 .byte 102,15,56,220,209
16 decl %eax
17 movups (%rdx),%xmm1
18 leaq 16(%rdx),%rdx
19 jnz L$oop_enc1_1
20 .byte 102,15,56,221,209
21 movups %xmm2,(%rsi)
22 .byte 0xf3,0xc3
23
24
25 .globl _aesni_decrypt
26
27 .p2align 4
28 _aesni_decrypt:
29 movups (%rdi),%xmm2
30 movl 240(%rdx),%eax
31 movups (%rdx),%xmm0
32 movups 16(%rdx),%xmm1
33 leaq 32(%rdx),%rdx
34 xorps %xmm0,%xmm2
35 L$oop_dec1_2:
36 .byte 102,15,56,222,209
37 decl %eax
38 movups (%rdx),%xmm1
39 leaq 16(%rdx),%rdx
40 jnz L$oop_dec1_2
41 .byte 102,15,56,223,209
42 movups %xmm2,(%rsi)
43 .byte 0xf3,0xc3
44
45
46 .p2align 4
47 _aesni_encrypt2:
48 movups (%rcx),%xmm0
49 shll $4,%eax
50 movups 16(%rcx),%xmm1
51 xorps %xmm0,%xmm2
52 xorps %xmm0,%xmm3
53 movups 32(%rcx),%xmm0
54 leaq 32(%rcx,%rax,1),%rcx
55 negq %rax
56 addq $16,%rax
57
58 L$enc_loop2:
59 .byte 102,15,56,220,209
60 .byte 102,15,56,220,217
61 movups (%rcx,%rax,1),%xmm1
62 addq $32,%rax
63 .byte 102,15,56,220,208
64 .byte 102,15,56,220,216
65 movups -16(%rcx,%rax,1),%xmm0
66 jnz L$enc_loop2
67
68 .byte 102,15,56,220,209
69 .byte 102,15,56,220,217
70 .byte 102,15,56,221,208
71 .byte 102,15,56,221,216
72 .byte 0xf3,0xc3
73
74
75 .p2align 4
76 _aesni_decrypt2:
77 movups (%rcx),%xmm0
78 shll $4,%eax
79 movups 16(%rcx),%xmm1
80 xorps %xmm0,%xmm2
81 xorps %xmm0,%xmm3
82 movups 32(%rcx),%xmm0
83 leaq 32(%rcx,%rax,1),%rcx
84 negq %rax
85 addq $16,%rax
86
87 L$dec_loop2:
88 .byte 102,15,56,222,209
89 .byte 102,15,56,222,217
90 movups (%rcx,%rax,1),%xmm1
91 addq $32,%rax
92 .byte 102,15,56,222,208
93 .byte 102,15,56,222,216
94 movups -16(%rcx,%rax,1),%xmm0
95 jnz L$dec_loop2
96
97 .byte 102,15,56,222,209
98 .byte 102,15,56,222,217
99 .byte 102,15,56,223,208
100 .byte 102,15,56,223,216
101 .byte 0xf3,0xc3
102
103
104 .p2align 4
105 _aesni_encrypt3:
106 movups (%rcx),%xmm0
107 shll $4,%eax
108 movups 16(%rcx),%xmm1
109 xorps %xmm0,%xmm2
110 xorps %xmm0,%xmm3
111 xorps %xmm0,%xmm4
112 movups 32(%rcx),%xmm0
113 leaq 32(%rcx,%rax,1),%rcx
114 negq %rax
115 addq $16,%rax
116
117 L$enc_loop3:
118 .byte 102,15,56,220,209
119 .byte 102,15,56,220,217
120 .byte 102,15,56,220,225
121 movups (%rcx,%rax,1),%xmm1
122 addq $32,%rax
123 .byte 102,15,56,220,208
124 .byte 102,15,56,220,216
125 .byte 102,15,56,220,224
126 movups -16(%rcx,%rax,1),%xmm0
127 jnz L$enc_loop3
128
129 .byte 102,15,56,220,209
130 .byte 102,15,56,220,217
131 .byte 102,15,56,220,225
132 .byte 102,15,56,221,208
133 .byte 102,15,56,221,216
134 .byte 102,15,56,221,224
135 .byte 0xf3,0xc3
136
137
138 .p2align 4
139 _aesni_decrypt3:
140 movups (%rcx),%xmm0
141 shll $4,%eax
142 movups 16(%rcx),%xmm1
143 xorps %xmm0,%xmm2
144 xorps %xmm0,%xmm3
145 xorps %xmm0,%xmm4
146 movups 32(%rcx),%xmm0
147 leaq 32(%rcx,%rax,1),%rcx
148 negq %rax
149 addq $16,%rax
150
151 L$dec_loop3:
152 .byte 102,15,56,222,209
153 .byte 102,15,56,222,217
154 .byte 102,15,56,222,225
155 movups (%rcx,%rax,1),%xmm1
156 addq $32,%rax
157 .byte 102,15,56,222,208
158 .byte 102,15,56,222,216
159 .byte 102,15,56,222,224
160 movups -16(%rcx,%rax,1),%xmm0
161 jnz L$dec_loop3
162
163 .byte 102,15,56,222,209
164 .byte 102,15,56,222,217
165 .byte 102,15,56,222,225
166 .byte 102,15,56,223,208
167 .byte 102,15,56,223,216
168 .byte 102,15,56,223,224
169 .byte 0xf3,0xc3
170
171
172 .p2align 4
173 _aesni_encrypt4:
174 movups (%rcx),%xmm0
175 shll $4,%eax
176 movups 16(%rcx),%xmm1
177 xorps %xmm0,%xmm2
178 xorps %xmm0,%xmm3
179 xorps %xmm0,%xmm4
180 xorps %xmm0,%xmm5
181 movups 32(%rcx),%xmm0
182 leaq 32(%rcx,%rax,1),%rcx
183 negq %rax
184 .byte 0x0f,0x1f,0x00
185 addq $16,%rax
186
187 L$enc_loop4:
188 .byte 102,15,56,220,209
189 .byte 102,15,56,220,217
190 .byte 102,15,56,220,225
191 .byte 102,15,56,220,233
192 movups (%rcx,%rax,1),%xmm1
193 addq $32,%rax
194 .byte 102,15,56,220,208
195 .byte 102,15,56,220,216
196 .byte 102,15,56,220,224
197 .byte 102,15,56,220,232
198 movups -16(%rcx,%rax,1),%xmm0
199 jnz L$enc_loop4
200
201 .byte 102,15,56,220,209
202 .byte 102,15,56,220,217
203 .byte 102,15,56,220,225
204 .byte 102,15,56,220,233
205 .byte 102,15,56,221,208
206 .byte 102,15,56,221,216
207 .byte 102,15,56,221,224
208 .byte 102,15,56,221,232
209 .byte 0xf3,0xc3
210
211
212 .p2align 4
213 _aesni_decrypt4:
214 movups (%rcx),%xmm0
215 shll $4,%eax
216 movups 16(%rcx),%xmm1
217 xorps %xmm0,%xmm2
218 xorps %xmm0,%xmm3
219 xorps %xmm0,%xmm4
220 xorps %xmm0,%xmm5
221 movups 32(%rcx),%xmm0
222 leaq 32(%rcx,%rax,1),%rcx
223 negq %rax
224 .byte 0x0f,0x1f,0x00
225 addq $16,%rax
226
227 L$dec_loop4:
228 .byte 102,15,56,222,209
229 .byte 102,15,56,222,217
230 .byte 102,15,56,222,225
231 .byte 102,15,56,222,233
232 movups (%rcx,%rax,1),%xmm1
233 addq $32,%rax
234 .byte 102,15,56,222,208
235 .byte 102,15,56,222,216
236 .byte 102,15,56,222,224
237 .byte 102,15,56,222,232
238 movups -16(%rcx,%rax,1),%xmm0
239 jnz L$dec_loop4
240
241 .byte 102,15,56,222,209
242 .byte 102,15,56,222,217
243 .byte 102,15,56,222,225
244 .byte 102,15,56,222,233
245 .byte 102,15,56,223,208
246 .byte 102,15,56,223,216
247 .byte 102,15,56,223,224
248 .byte 102,15,56,223,232
249 .byte 0xf3,0xc3
250
251
252 .p2align 4
253 _aesni_encrypt6:
254 movups (%rcx),%xmm0
255 shll $4,%eax
256 movups 16(%rcx),%xmm1
257 xorps %xmm0,%xmm2
258 pxor %xmm0,%xmm3
259 pxor %xmm0,%xmm4
260 .byte 102,15,56,220,209
261 leaq 32(%rcx,%rax,1),%rcx
262 negq %rax
263 .byte 102,15,56,220,217
264 pxor %xmm0,%xmm5
265 pxor %xmm0,%xmm6
266 .byte 102,15,56,220,225
267 pxor %xmm0,%xmm7
268 addq $16,%rax
269 .byte 102,15,56,220,233
270 .byte 102,15,56,220,241
271 .byte 102,15,56,220,249
272 movups -16(%rcx,%rax,1),%xmm0
273 jmp L$enc_loop6_enter
274 .p2align 4
275 L$enc_loop6:
276 .byte 102,15,56,220,209
277 .byte 102,15,56,220,217
278 .byte 102,15,56,220,225
279 .byte 102,15,56,220,233
280 .byte 102,15,56,220,241
281 .byte 102,15,56,220,249
282 L$enc_loop6_enter:
283 movups (%rcx,%rax,1),%xmm1
284 addq $32,%rax
285 .byte 102,15,56,220,208
286 .byte 102,15,56,220,216
287 .byte 102,15,56,220,224
288 .byte 102,15,56,220,232
289 .byte 102,15,56,220,240
290 .byte 102,15,56,220,248
291 movups -16(%rcx,%rax,1),%xmm0
292 jnz L$enc_loop6
293
294 .byte 102,15,56,220,209
295 .byte 102,15,56,220,217
296 .byte 102,15,56,220,225
297 .byte 102,15,56,220,233
298 .byte 102,15,56,220,241
299 .byte 102,15,56,220,249
300 .byte 102,15,56,221,208
301 .byte 102,15,56,221,216
302 .byte 102,15,56,221,224
303 .byte 102,15,56,221,232
304 .byte 102,15,56,221,240
305 .byte 102,15,56,221,248
306 .byte 0xf3,0xc3
307
308
309 .p2align 4
310 _aesni_decrypt6:
311 movups (%rcx),%xmm0
312 shll $4,%eax
313 movups 16(%rcx),%xmm1
314 xorps %xmm0,%xmm2
315 pxor %xmm0,%xmm3
316 pxor %xmm0,%xmm4
317 .byte 102,15,56,222,209
318 leaq 32(%rcx,%rax,1),%rcx
319 negq %rax
320 .byte 102,15,56,222,217
321 pxor %xmm0,%xmm5
322 pxor %xmm0,%xmm6
323 .byte 102,15,56,222,225
324 pxor %xmm0,%xmm7
325 addq $16,%rax
326 .byte 102,15,56,222,233
327 .byte 102,15,56,222,241
328 .byte 102,15,56,222,249
329 movups -16(%rcx,%rax,1),%xmm0
330 jmp L$dec_loop6_enter
331 .p2align 4
332 L$dec_loop6:
333 .byte 102,15,56,222,209
334 .byte 102,15,56,222,217
335 .byte 102,15,56,222,225
336 .byte 102,15,56,222,233
337 .byte 102,15,56,222,241
338 .byte 102,15,56,222,249
339 L$dec_loop6_enter:
340 movups (%rcx,%rax,1),%xmm1
341 addq $32,%rax
342 .byte 102,15,56,222,208
343 .byte 102,15,56,222,216
344 .byte 102,15,56,222,224
345 .byte 102,15,56,222,232
346 .byte 102,15,56,222,240
347 .byte 102,15,56,222,248
348 movups -16(%rcx,%rax,1),%xmm0
349 jnz L$dec_loop6
350
351 .byte 102,15,56,222,209
352 .byte 102,15,56,222,217
353 .byte 102,15,56,222,225
354 .byte 102,15,56,222,233
355 .byte 102,15,56,222,241
356 .byte 102,15,56,222,249
357 .byte 102,15,56,223,208
358 .byte 102,15,56,223,216
359 .byte 102,15,56,223,224
360 .byte 102,15,56,223,232
361 .byte 102,15,56,223,240
362 .byte 102,15,56,223,248
363 .byte 0xf3,0xc3
364
365
366 .p2align 4
367 _aesni_encrypt8:
368 movups (%rcx),%xmm0
369 shll $4,%eax
370 movups 16(%rcx),%xmm1
371 xorps %xmm0,%xmm2
372 xorps %xmm0,%xmm3
373 pxor %xmm0,%xmm4
374 pxor %xmm0,%xmm5
375 pxor %xmm0,%xmm6
376 leaq 32(%rcx,%rax,1),%rcx
377 negq %rax
378 .byte 102,15,56,220,209
379 addq $16,%rax
380 pxor %xmm0,%xmm7
381 .byte 102,15,56,220,217
382 pxor %xmm0,%xmm8
383 pxor %xmm0,%xmm9
384 .byte 102,15,56,220,225
385 .byte 102,15,56,220,233
386 .byte 102,15,56,220,241
387 .byte 102,15,56,220,249
388 .byte 102,68,15,56,220,193
389 .byte 102,68,15,56,220,201
390 movups -16(%rcx,%rax,1),%xmm0
391 jmp L$enc_loop8_enter
392 .p2align 4
393 L$enc_loop8:
394 .byte 102,15,56,220,209
395 .byte 102,15,56,220,217
396 .byte 102,15,56,220,225
397 .byte 102,15,56,220,233
398 .byte 102,15,56,220,241
399 .byte 102,15,56,220,249
400 .byte 102,68,15,56,220,193
401 .byte 102,68,15,56,220,201
402 L$enc_loop8_enter:
403 movups (%rcx,%rax,1),%xmm1
404 addq $32,%rax
405 .byte 102,15,56,220,208
406 .byte 102,15,56,220,216
407 .byte 102,15,56,220,224
408 .byte 102,15,56,220,232
409 .byte 102,15,56,220,240
410 .byte 102,15,56,220,248
411 .byte 102,68,15,56,220,192
412 .byte 102,68,15,56,220,200
413 movups -16(%rcx,%rax,1),%xmm0
414 jnz L$enc_loop8
415
416 .byte 102,15,56,220,209
417 .byte 102,15,56,220,217
418 .byte 102,15,56,220,225
419 .byte 102,15,56,220,233
420 .byte 102,15,56,220,241
421 .byte 102,15,56,220,249
422 .byte 102,68,15,56,220,193
423 .byte 102,68,15,56,220,201
424 .byte 102,15,56,221,208
425 .byte 102,15,56,221,216
426 .byte 102,15,56,221,224
427 .byte 102,15,56,221,232
428 .byte 102,15,56,221,240
429 .byte 102,15,56,221,248
430 .byte 102,68,15,56,221,192
431 .byte 102,68,15,56,221,200
432 .byte 0xf3,0xc3
433
434
435 .p2align 4
436 _aesni_decrypt8:
437 movups (%rcx),%xmm0
438 shll $4,%eax
439 movups 16(%rcx),%xmm1
440 xorps %xmm0,%xmm2
441 xorps %xmm0,%xmm3
442 pxor %xmm0,%xmm4
443 pxor %xmm0,%xmm5
444 pxor %xmm0,%xmm6
445 leaq 32(%rcx,%rax,1),%rcx
446 negq %rax
447 .byte 102,15,56,222,209
448 addq $16,%rax
449 pxor %xmm0,%xmm7
450 .byte 102,15,56,222,217
451 pxor %xmm0,%xmm8
452 pxor %xmm0,%xmm9
453 .byte 102,15,56,222,225
454 .byte 102,15,56,222,233
455 .byte 102,15,56,222,241
456 .byte 102,15,56,222,249
457 .byte 102,68,15,56,222,193
458 .byte 102,68,15,56,222,201
459 movups -16(%rcx,%rax,1),%xmm0
460 jmp L$dec_loop8_enter
461 .p2align 4
462 L$dec_loop8:
463 .byte 102,15,56,222,209
464 .byte 102,15,56,222,217
465 .byte 102,15,56,222,225
466 .byte 102,15,56,222,233
467 .byte 102,15,56,222,241
468 .byte 102,15,56,222,249
469 .byte 102,68,15,56,222,193
470 .byte 102,68,15,56,222,201
471 L$dec_loop8_enter:
472 movups (%rcx,%rax,1),%xmm1
473 addq $32,%rax
474 .byte 102,15,56,222,208
475 .byte 102,15,56,222,216
476 .byte 102,15,56,222,224
477 .byte 102,15,56,222,232
478 .byte 102,15,56,222,240
479 .byte 102,15,56,222,248
480 .byte 102,68,15,56,222,192
481 .byte 102,68,15,56,222,200
482 movups -16(%rcx,%rax,1),%xmm0
483 jnz L$dec_loop8
484
485 .byte 102,15,56,222,209
486 .byte 102,15,56,222,217
487 .byte 102,15,56,222,225
488 .byte 102,15,56,222,233
489 .byte 102,15,56,222,241
490 .byte 102,15,56,222,249
491 .byte 102,68,15,56,222,193
492 .byte 102,68,15,56,222,201
493 .byte 102,15,56,223,208
494 .byte 102,15,56,223,216
495 .byte 102,15,56,223,224
496 .byte 102,15,56,223,232
497 .byte 102,15,56,223,240
498 .byte 102,15,56,223,248
499 .byte 102,68,15,56,223,192
500 .byte 102,68,15,56,223,200
501 .byte 0xf3,0xc3
502
503 .globl _aesni_ecb_encrypt
504
505 .p2align 4
506 _aesni_ecb_encrypt:
507 andq $-16,%rdx
508 jz L$ecb_ret
509
510 movl 240(%rcx),%eax
511 movups (%rcx),%xmm0
512 movq %rcx,%r11
513 movl %eax,%r10d
514 testl %r8d,%r8d
515 jz L$ecb_decrypt
516
517 cmpq $128,%rdx
518 jb L$ecb_enc_tail
519
520 movdqu (%rdi),%xmm2
521 movdqu 16(%rdi),%xmm3
522 movdqu 32(%rdi),%xmm4
523 movdqu 48(%rdi),%xmm5
524 movdqu 64(%rdi),%xmm6
525 movdqu 80(%rdi),%xmm7
526 movdqu 96(%rdi),%xmm8
527 movdqu 112(%rdi),%xmm9
528 leaq 128(%rdi),%rdi
529 subq $128,%rdx
530 jmp L$ecb_enc_loop8_enter
531 .p2align 4
532 L$ecb_enc_loop8:
533 movups %xmm2,(%rsi)
534 movq %r11,%rcx
535 movdqu (%rdi),%xmm2
536 movl %r10d,%eax
537 movups %xmm3,16(%rsi)
538 movdqu 16(%rdi),%xmm3
539 movups %xmm4,32(%rsi)
540 movdqu 32(%rdi),%xmm4
541 movups %xmm5,48(%rsi)
542 movdqu 48(%rdi),%xmm5
543 movups %xmm6,64(%rsi)
544 movdqu 64(%rdi),%xmm6
545 movups %xmm7,80(%rsi)
546 movdqu 80(%rdi),%xmm7
547 movups %xmm8,96(%rsi)
548 movdqu 96(%rdi),%xmm8
549 movups %xmm9,112(%rsi)
550 leaq 128(%rsi),%rsi
551 movdqu 112(%rdi),%xmm9
552 leaq 128(%rdi),%rdi
553 L$ecb_enc_loop8_enter:
554
555 call _aesni_encrypt8
556
557 subq $128,%rdx
558 jnc L$ecb_enc_loop8
559
560 movups %xmm2,(%rsi)
561 movq %r11,%rcx
562 movups %xmm3,16(%rsi)
563 movl %r10d,%eax
564 movups %xmm4,32(%rsi)
565 movups %xmm5,48(%rsi)
566 movups %xmm6,64(%rsi)
567 movups %xmm7,80(%rsi)
568 movups %xmm8,96(%rsi)
569 movups %xmm9,112(%rsi)
570 leaq 128(%rsi),%rsi
571 addq $128,%rdx
572 jz L$ecb_ret
573
574 L$ecb_enc_tail:
575 movups (%rdi),%xmm2
576 cmpq $32,%rdx
577 jb L$ecb_enc_one
578 movups 16(%rdi),%xmm3
579 je L$ecb_enc_two
580 movups 32(%rdi),%xmm4
581 cmpq $64,%rdx
582 jb L$ecb_enc_three
583 movups 48(%rdi),%xmm5
584 je L$ecb_enc_four
585 movups 64(%rdi),%xmm6
586 cmpq $96,%rdx
587 jb L$ecb_enc_five
588 movups 80(%rdi),%xmm7
589 je L$ecb_enc_six
590 movdqu 96(%rdi),%xmm8
591 call _aesni_encrypt8
592 movups %xmm2,(%rsi)
593 movups %xmm3,16(%rsi)
594 movups %xmm4,32(%rsi)
595 movups %xmm5,48(%rsi)
596 movups %xmm6,64(%rsi)
597 movups %xmm7,80(%rsi)
598 movups %xmm8,96(%rsi)
599 jmp L$ecb_ret
600 .p2align 4
601 L$ecb_enc_one:
602 movups (%rcx),%xmm0
603 movups 16(%rcx),%xmm1
604 leaq 32(%rcx),%rcx
605 xorps %xmm0,%xmm2
606 L$oop_enc1_3:
607 .byte 102,15,56,220,209
608 decl %eax
609 movups (%rcx),%xmm1
610 leaq 16(%rcx),%rcx
611 jnz L$oop_enc1_3
612 .byte 102,15,56,221,209
613 movups %xmm2,(%rsi)
614 jmp L$ecb_ret
615 .p2align 4
616 L$ecb_enc_two:
617 call _aesni_encrypt2
618 movups %xmm2,(%rsi)
619 movups %xmm3,16(%rsi)
620 jmp L$ecb_ret
621 .p2align 4
622 L$ecb_enc_three:
623 call _aesni_encrypt3
624 movups %xmm2,(%rsi)
625 movups %xmm3,16(%rsi)
626 movups %xmm4,32(%rsi)
627 jmp L$ecb_ret
628 .p2align 4
629 L$ecb_enc_four:
630 call _aesni_encrypt4
631 movups %xmm2,(%rsi)
632 movups %xmm3,16(%rsi)
633 movups %xmm4,32(%rsi)
634 movups %xmm5,48(%rsi)
635 jmp L$ecb_ret
636 .p2align 4
637 L$ecb_enc_five:
638 xorps %xmm7,%xmm7
639 call _aesni_encrypt6
640 movups %xmm2,(%rsi)
641 movups %xmm3,16(%rsi)
642 movups %xmm4,32(%rsi)
643 movups %xmm5,48(%rsi)
644 movups %xmm6,64(%rsi)
645 jmp L$ecb_ret
646 .p2align 4
647 L$ecb_enc_six:
648 call _aesni_encrypt6
649 movups %xmm2,(%rsi)
650 movups %xmm3,16(%rsi)
651 movups %xmm4,32(%rsi)
652 movups %xmm5,48(%rsi)
653 movups %xmm6,64(%rsi)
654 movups %xmm7,80(%rsi)
655 jmp L$ecb_ret
656
657 .p2align 4
658 L$ecb_decrypt:
659 cmpq $128,%rdx
660 jb L$ecb_dec_tail
661
662 movdqu (%rdi),%xmm2
663 movdqu 16(%rdi),%xmm3
664 movdqu 32(%rdi),%xmm4
665 movdqu 48(%rdi),%xmm5
666 movdqu 64(%rdi),%xmm6
667 movdqu 80(%rdi),%xmm7
668 movdqu 96(%rdi),%xmm8
669 movdqu 112(%rdi),%xmm9
670 leaq 128(%rdi),%rdi
671 subq $128,%rdx
672 jmp L$ecb_dec_loop8_enter
673 .p2align 4
674 L$ecb_dec_loop8:
675 movups %xmm2,(%rsi)
676 movq %r11,%rcx
677 movdqu (%rdi),%xmm2
678 movl %r10d,%eax
679 movups %xmm3,16(%rsi)
680 movdqu 16(%rdi),%xmm3
681 movups %xmm4,32(%rsi)
682 movdqu 32(%rdi),%xmm4
683 movups %xmm5,48(%rsi)
684 movdqu 48(%rdi),%xmm5
685 movups %xmm6,64(%rsi)
686 movdqu 64(%rdi),%xmm6
687 movups %xmm7,80(%rsi)
688 movdqu 80(%rdi),%xmm7
689 movups %xmm8,96(%rsi)
690 movdqu 96(%rdi),%xmm8
691 movups %xmm9,112(%rsi)
692 leaq 128(%rsi),%rsi
693 movdqu 112(%rdi),%xmm9
694 leaq 128(%rdi),%rdi
695 L$ecb_dec_loop8_enter:
696
697 call _aesni_decrypt8
698
699 movups (%r11),%xmm0
700 subq $128,%rdx
701 jnc L$ecb_dec_loop8
702
703 movups %xmm2,(%rsi)
704 movq %r11,%rcx
705 movups %xmm3,16(%rsi)
706 movl %r10d,%eax
707 movups %xmm4,32(%rsi)
708 movups %xmm5,48(%rsi)
709 movups %xmm6,64(%rsi)
710 movups %xmm7,80(%rsi)
711 movups %xmm8,96(%rsi)
712 movups %xmm9,112(%rsi)
713 leaq 128(%rsi),%rsi
714 addq $128,%rdx
715 jz L$ecb_ret
716
717 L$ecb_dec_tail:
718 movups (%rdi),%xmm2
719 cmpq $32,%rdx
720 jb L$ecb_dec_one
721 movups 16(%rdi),%xmm3
722 je L$ecb_dec_two
723 movups 32(%rdi),%xmm4
724 cmpq $64,%rdx
725 jb L$ecb_dec_three
726 movups 48(%rdi),%xmm5
727 je L$ecb_dec_four
728 movups 64(%rdi),%xmm6
729 cmpq $96,%rdx
730 jb L$ecb_dec_five
731 movups 80(%rdi),%xmm7
732 je L$ecb_dec_six
733 movups 96(%rdi),%xmm8
734 movups (%rcx),%xmm0
735 call _aesni_decrypt8
736 movups %xmm2,(%rsi)
737 movups %xmm3,16(%rsi)
738 movups %xmm4,32(%rsi)
739 movups %xmm5,48(%rsi)
740 movups %xmm6,64(%rsi)
741 movups %xmm7,80(%rsi)
742 movups %xmm8,96(%rsi)
743 jmp L$ecb_ret
744 .p2align 4
745 L$ecb_dec_one:
746 movups (%rcx),%xmm0
747 movups 16(%rcx),%xmm1
748 leaq 32(%rcx),%rcx
749 xorps %xmm0,%xmm2
750 L$oop_dec1_4:
751 .byte 102,15,56,222,209
752 decl %eax
753 movups (%rcx),%xmm1
754 leaq 16(%rcx),%rcx
755 jnz L$oop_dec1_4
756 .byte 102,15,56,223,209
757 movups %xmm2,(%rsi)
758 jmp L$ecb_ret
759 .p2align 4
760 L$ecb_dec_two:
761 call _aesni_decrypt2
762 movups %xmm2,(%rsi)
763 movups %xmm3,16(%rsi)
764 jmp L$ecb_ret
765 .p2align 4
766 L$ecb_dec_three:
767 call _aesni_decrypt3
768 movups %xmm2,(%rsi)
769 movups %xmm3,16(%rsi)
770 movups %xmm4,32(%rsi)
771 jmp L$ecb_ret
772 .p2align 4
773 L$ecb_dec_four:
774 call _aesni_decrypt4
775 movups %xmm2,(%rsi)
776 movups %xmm3,16(%rsi)
777 movups %xmm4,32(%rsi)
778 movups %xmm5,48(%rsi)
779 jmp L$ecb_ret
780 .p2align 4
781 L$ecb_dec_five:
782 xorps %xmm7,%xmm7
783 call _aesni_decrypt6
784 movups %xmm2,(%rsi)
785 movups %xmm3,16(%rsi)
786 movups %xmm4,32(%rsi)
787 movups %xmm5,48(%rsi)
788 movups %xmm6,64(%rsi)
789 jmp L$ecb_ret
790 .p2align 4
791 L$ecb_dec_six:
792 call _aesni_decrypt6
793 movups %xmm2,(%rsi)
794 movups %xmm3,16(%rsi)
795 movups %xmm4,32(%rsi)
796 movups %xmm5,48(%rsi)
797 movups %xmm6,64(%rsi)
798 movups %xmm7,80(%rsi)
799
800 L$ecb_ret:
801 .byte 0xf3,0xc3
802
803 .globl _aesni_ccm64_encrypt_blocks
804
805 .p2align 4
806 _aesni_ccm64_encrypt_blocks:
807 movl 240(%rcx),%eax
808 movdqu (%r8),%xmm6
809 movdqa L$increment64(%rip),%xmm9
810 movdqa L$bswap_mask(%rip),%xmm7
811
812 shll $4,%eax
813 movl $16,%r10d
814 leaq 0(%rcx),%r11
815 movdqu (%r9),%xmm3
816 movdqa %xmm6,%xmm2
817 leaq 32(%rcx,%rax,1),%rcx
818 .byte 102,15,56,0,247
819 subq %rax,%r10
820 jmp L$ccm64_enc_outer
821 .p2align 4
822 L$ccm64_enc_outer:
823 movups (%r11),%xmm0
824 movq %r10,%rax
825 movups (%rdi),%xmm8
826
827 xorps %xmm0,%xmm2
828 movups 16(%r11),%xmm1
829 xorps %xmm8,%xmm0
830 xorps %xmm0,%xmm3
831 movups 32(%r11),%xmm0
832
833 L$ccm64_enc2_loop:
834 .byte 102,15,56,220,209
835 .byte 102,15,56,220,217
836 movups (%rcx,%rax,1),%xmm1
837 addq $32,%rax
838 .byte 102,15,56,220,208
839 .byte 102,15,56,220,216
840 movups -16(%rcx,%rax,1),%xmm0
841 jnz L$ccm64_enc2_loop
842 .byte 102,15,56,220,209
843 .byte 102,15,56,220,217
844 paddq %xmm9,%xmm6
845 decq %rdx
846 .byte 102,15,56,221,208
847 .byte 102,15,56,221,216
848
849 leaq 16(%rdi),%rdi
850 xorps %xmm2,%xmm8
851 movdqa %xmm6,%xmm2
852 movups %xmm8,(%rsi)
853 .byte 102,15,56,0,215
854 leaq 16(%rsi),%rsi
855 jnz L$ccm64_enc_outer
856
857 movups %xmm3,(%r9)
858 .byte 0xf3,0xc3
859
860 .globl _aesni_ccm64_decrypt_blocks
861
862 .p2align 4
863 _aesni_ccm64_decrypt_blocks:
864 movl 240(%rcx),%eax
865 movups (%r8),%xmm6
866 movdqu (%r9),%xmm3
867 movdqa L$increment64(%rip),%xmm9
868 movdqa L$bswap_mask(%rip),%xmm7
869
870 movaps %xmm6,%xmm2
871 movl %eax,%r10d
872 movq %rcx,%r11
873 .byte 102,15,56,0,247
874 movups (%rcx),%xmm0
875 movups 16(%rcx),%xmm1
876 leaq 32(%rcx),%rcx
877 xorps %xmm0,%xmm2
878 L$oop_enc1_5:
879 .byte 102,15,56,220,209
880 decl %eax
881 movups (%rcx),%xmm1
882 leaq 16(%rcx),%rcx
883 jnz L$oop_enc1_5
884 .byte 102,15,56,221,209
885 shll $4,%r10d
886 movl $16,%eax
887 movups (%rdi),%xmm8
888 paddq %xmm9,%xmm6
889 leaq 16(%rdi),%rdi
890 subq %r10,%rax
891 leaq 32(%r11,%r10,1),%rcx
892 movq %rax,%r10
893 jmp L$ccm64_dec_outer
894 .p2align 4
895 L$ccm64_dec_outer:
896 xorps %xmm2,%xmm8
897 movdqa %xmm6,%xmm2
898 movups %xmm8,(%rsi)
899 leaq 16(%rsi),%rsi
900 .byte 102,15,56,0,215
901
902 subq $1,%rdx
903 jz L$ccm64_dec_break
904
905 movups (%r11),%xmm0
906 movq %r10,%rax
907 movups 16(%r11),%xmm1
908 xorps %xmm0,%xmm8
909 xorps %xmm0,%xmm2
910 xorps %xmm8,%xmm3
911 movups 32(%r11),%xmm0
912 jmp L$ccm64_dec2_loop
913 .p2align 4
914 L$ccm64_dec2_loop:
915 .byte 102,15,56,220,209
916 .byte 102,15,56,220,217
917 movups (%rcx,%rax,1),%xmm1
918 addq $32,%rax
919 .byte 102,15,56,220,208
920 .byte 102,15,56,220,216
921 movups -16(%rcx,%rax,1),%xmm0
922 jnz L$ccm64_dec2_loop
923 movups (%rdi),%xmm8
924 paddq %xmm9,%xmm6
925 .byte 102,15,56,220,209
926 .byte 102,15,56,220,217
927 .byte 102,15,56,221,208
928 .byte 102,15,56,221,216
929 leaq 16(%rdi),%rdi
930 jmp L$ccm64_dec_outer
931
932 .p2align 4
933 L$ccm64_dec_break:
934
935 movl 240(%r11),%eax
936 movups (%r11),%xmm0
937 movups 16(%r11),%xmm1
938 xorps %xmm0,%xmm8
939 leaq 32(%r11),%r11
940 xorps %xmm8,%xmm3
941 L$oop_enc1_6:
942 .byte 102,15,56,220,217
943 decl %eax
944 movups (%r11),%xmm1
945 leaq 16(%r11),%r11
946 jnz L$oop_enc1_6
947 .byte 102,15,56,221,217
948 movups %xmm3,(%r9)
949 .byte 0xf3,0xc3
950
951 .globl _aesni_ctr32_encrypt_blocks
952
953 .p2align 4
954 _aesni_ctr32_encrypt_blocks:
955 leaq (%rsp),%rax
956 pushq %rbp
957 subq $128,%rsp
958 andq $-16,%rsp
959 leaq -8(%rax),%rbp
960
961 cmpq $1,%rdx
962 je L$ctr32_one_shortcut
963
964 movdqu (%r8),%xmm2
965 movdqu (%rcx),%xmm0
966 movl 12(%r8),%r8d
967 pxor %xmm0,%xmm2
968 movl 12(%rcx),%r11d
969 movdqa %xmm2,0(%rsp)
970 bswapl %r8d
971 movdqa %xmm2,%xmm3
972 movdqa %xmm2,%xmm4
973 movdqa %xmm2,%xmm5
974 movdqa %xmm2,64(%rsp)
975 movdqa %xmm2,80(%rsp)
976 movdqa %xmm2,96(%rsp)
977 movq %rdx,%r10
978 movdqa %xmm2,112(%rsp)
979
980 leaq 1(%r8),%rax
981 leaq 2(%r8),%rdx
982 bswapl %eax
983 bswapl %edx
984 xorl %r11d,%eax
985 xorl %r11d,%edx
986 .byte 102,15,58,34,216,3
987 leaq 3(%r8),%rax
988 movdqa %xmm3,16(%rsp)
989 .byte 102,15,58,34,226,3
990 bswapl %eax
991 movq %r10,%rdx
992 leaq 4(%r8),%r10
993 movdqa %xmm4,32(%rsp)
994 xorl %r11d,%eax
995 bswapl %r10d
996 .byte 102,15,58,34,232,3
997 xorl %r11d,%r10d
998 movdqa %xmm5,48(%rsp)
999 leaq 5(%r8),%r9
1000 movl %r10d,64+12(%rsp)
1001 bswapl %r9d
1002 leaq 6(%r8),%r10
1003 movl 240(%rcx),%eax
1004 xorl %r11d,%r9d
1005 bswapl %r10d
1006 movl %r9d,80+12(%rsp)
1007 xorl %r11d,%r10d
1008 leaq 7(%r8),%r9
1009 movl %r10d,96+12(%rsp)
1010 bswapl %r9d
1011 movl _OPENSSL_ia32cap_P+4(%rip),%r10d
1012 xorl %r11d,%r9d
1013 andl $71303168,%r10d
1014 movl %r9d,112+12(%rsp)
1015
1016 movups 16(%rcx),%xmm1
1017
1018 movdqa 64(%rsp),%xmm6
1019 movdqa 80(%rsp),%xmm7
1020
1021 cmpq $8,%rdx
1022 jb L$ctr32_tail
1023
1024 subq $6,%rdx
1025 cmpl $4194304,%r10d
1026 je L$ctr32_6x
1027
1028 leaq 128(%rcx),%rcx
1029 subq $2,%rdx
1030 jmp L$ctr32_loop8
1031
1032 .p2align 4
1033 L$ctr32_6x:
1034 shll $4,%eax
1035 movl $48,%r10d
1036 bswapl %r11d
1037 leaq 32(%rcx,%rax,1),%rcx
1038 subq %rax,%r10
1039 jmp L$ctr32_loop6
1040
1041 .p2align 4
1042 L$ctr32_loop6:
1043 addl $6,%r8d
1044 movups -48(%rcx,%r10,1),%xmm0
1045 .byte 102,15,56,220,209
1046 movl %r8d,%eax
1047 xorl %r11d,%eax
1048 .byte 102,15,56,220,217
1049 .byte 0x0f,0x38,0xf1,0x44,0x24,12
1050 leal 1(%r8),%eax
1051 .byte 102,15,56,220,225
1052 xorl %r11d,%eax
1053 .byte 0x0f,0x38,0xf1,0x44,0x24,28
1054 .byte 102,15,56,220,233
1055 leal 2(%r8),%eax
1056 xorl %r11d,%eax
1057 .byte 102,15,56,220,241
1058 .byte 0x0f,0x38,0xf1,0x44,0x24,44
1059 leal 3(%r8),%eax
1060 .byte 102,15,56,220,249
1061 movups -32(%rcx,%r10,1),%xmm1
1062 xorl %r11d,%eax
1063
1064 .byte 102,15,56,220,208
1065 .byte 0x0f,0x38,0xf1,0x44,0x24,60
1066 leal 4(%r8),%eax
1067 .byte 102,15,56,220,216
1068 xorl %r11d,%eax
1069 .byte 0x0f,0x38,0xf1,0x44,0x24,76
1070 .byte 102,15,56,220,224
1071 leal 5(%r8),%eax
1072 xorl %r11d,%eax
1073 .byte 102,15,56,220,232
1074 .byte 0x0f,0x38,0xf1,0x44,0x24,92
1075 movq %r10,%rax
1076 .byte 102,15,56,220,240
1077 .byte 102,15,56,220,248
1078 movups -16(%rcx,%r10,1),%xmm0
1079
1080 call L$enc_loop6
1081
1082 movdqu (%rdi),%xmm8
1083 movdqu 16(%rdi),%xmm9
1084 movdqu 32(%rdi),%xmm10
1085 movdqu 48(%rdi),%xmm11
1086 movdqu 64(%rdi),%xmm12
1087 movdqu 80(%rdi),%xmm13
1088 leaq 96(%rdi),%rdi
1089 movups -64(%rcx,%r10,1),%xmm1
1090 pxor %xmm2,%xmm8
1091 movaps 0(%rsp),%xmm2
1092 pxor %xmm3,%xmm9
1093 movaps 16(%rsp),%xmm3
1094 pxor %xmm4,%xmm10
1095 movaps 32(%rsp),%xmm4
1096 pxor %xmm5,%xmm11
1097 movaps 48(%rsp),%xmm5
1098 pxor %xmm6,%xmm12
1099 movaps 64(%rsp),%xmm6
1100 pxor %xmm7,%xmm13
1101 movaps 80(%rsp),%xmm7
1102 movdqu %xmm8,(%rsi)
1103 movdqu %xmm9,16(%rsi)
1104 movdqu %xmm10,32(%rsi)
1105 movdqu %xmm11,48(%rsi)
1106 movdqu %xmm12,64(%rsi)
1107 movdqu %xmm13,80(%rsi)
1108 leaq 96(%rsi),%rsi
1109
1110 subq $6,%rdx
1111 jnc L$ctr32_loop6
1112
1113 addq $6,%rdx
1114 jz L$ctr32_done
1115
1116 leal -48(%r10),%eax
1117 leaq -80(%rcx,%r10,1),%rcx
1118 negl %eax
1119 shrl $4,%eax
1120 jmp L$ctr32_tail
1121
1122 .p2align 5
1123 L$ctr32_loop8:
1124 addl $8,%r8d
1125 movdqa 96(%rsp),%xmm8
1126 .byte 102,15,56,220,209
1127 movl %r8d,%r9d
1128 movdqa 112(%rsp),%xmm9
1129 .byte 102,15,56,220,217
1130 bswapl %r9d
1131 movups 32-128(%rcx),%xmm0
1132 .byte 102,15,56,220,225
1133 xorl %r11d,%r9d
1134 nop
1135 .byte 102,15,56,220,233
1136 movl %r9d,0+12(%rsp)
1137 leaq 1(%r8),%r9
1138 .byte 102,15,56,220,241
1139 .byte 102,15,56,220,249
1140 .byte 102,68,15,56,220,193
1141 .byte 102,68,15,56,220,201
1142 movups 48-128(%rcx),%xmm1
1143 bswapl %r9d
1144 .byte 102,15,56,220,208
1145 .byte 102,15,56,220,216
1146 xorl %r11d,%r9d
1147 .byte 0x66,0x90
1148 .byte 102,15,56,220,224
1149 .byte 102,15,56,220,232
1150 movl %r9d,16+12(%rsp)
1151 leaq 2(%r8),%r9
1152 .byte 102,15,56,220,240
1153 .byte 102,15,56,220,248
1154 .byte 102,68,15,56,220,192
1155 .byte 102,68,15,56,220,200
1156 movups 64-128(%rcx),%xmm0
1157 bswapl %r9d
1158 .byte 102,15,56,220,209
1159 .byte 102,15,56,220,217
1160 xorl %r11d,%r9d
1161 .byte 0x66,0x90
1162 .byte 102,15,56,220,225
1163 .byte 102,15,56,220,233
1164 movl %r9d,32+12(%rsp)
1165 leaq 3(%r8),%r9
1166 .byte 102,15,56,220,241
1167 .byte 102,15,56,220,249
1168 .byte 102,68,15,56,220,193
1169 .byte 102,68,15,56,220,201
1170 movups 80-128(%rcx),%xmm1
1171 bswapl %r9d
1172 .byte 102,15,56,220,208
1173 .byte 102,15,56,220,216
1174 xorl %r11d,%r9d
1175 .byte 0x66,0x90
1176 .byte 102,15,56,220,224
1177 .byte 102,15,56,220,232
1178 movl %r9d,48+12(%rsp)
1179 leaq 4(%r8),%r9
1180 .byte 102,15,56,220,240
1181 .byte 102,15,56,220,248
1182 .byte 102,68,15,56,220,192
1183 .byte 102,68,15,56,220,200
1184 movups 96-128(%rcx),%xmm0
1185 bswapl %r9d
1186 .byte 102,15,56,220,209
1187 .byte 102,15,56,220,217
1188 xorl %r11d,%r9d
1189 .byte 0x66,0x90
1190 .byte 102,15,56,220,225
1191 .byte 102,15,56,220,233
1192 movl %r9d,64+12(%rsp)
1193 leaq 5(%r8),%r9
1194 .byte 102,15,56,220,241
1195 .byte 102,15,56,220,249
1196 .byte 102,68,15,56,220,193
1197 .byte 102,68,15,56,220,201
1198 movups 112-128(%rcx),%xmm1
1199 bswapl %r9d
1200 .byte 102,15,56,220,208
1201 .byte 102,15,56,220,216
1202 xorl %r11d,%r9d
1203 .byte 0x66,0x90
1204 .byte 102,15,56,220,224
1205 .byte 102,15,56,220,232
1206 movl %r9d,80+12(%rsp)
1207 leaq 6(%r8),%r9
1208 .byte 102,15,56,220,240
1209 .byte 102,15,56,220,248
1210 .byte 102,68,15,56,220,192
1211 .byte 102,68,15,56,220,200
1212 movups 128-128(%rcx),%xmm0
1213 bswapl %r9d
1214 .byte 102,15,56,220,209
1215 .byte 102,15,56,220,217
1216 xorl %r11d,%r9d
1217 .byte 0x66,0x90
1218 .byte 102,15,56,220,225
1219 .byte 102,15,56,220,233
1220 movl %r9d,96+12(%rsp)
1221 leaq 7(%r8),%r9
1222 .byte 102,15,56,220,241
1223 .byte 102,15,56,220,249
1224 .byte 102,68,15,56,220,193
1225 .byte 102,68,15,56,220,201
1226 movups 144-128(%rcx),%xmm1
1227 bswapl %r9d
1228 .byte 102,15,56,220,208
1229 .byte 102,15,56,220,216
1230 .byte 102,15,56,220,224
1231 xorl %r11d,%r9d
1232 movdqu 0(%rdi),%xmm10
1233 .byte 102,15,56,220,232
1234 movl %r9d,112+12(%rsp)
1235 cmpl $11,%eax
1236 .byte 102,15,56,220,240
1237 .byte 102,15,56,220,248
1238 .byte 102,68,15,56,220,192
1239 .byte 102,68,15,56,220,200
1240 movups 160-128(%rcx),%xmm0
1241
1242 jb L$ctr32_enc_done
1243
1244 .byte 102,15,56,220,209
1245 .byte 102,15,56,220,217
1246 .byte 102,15,56,220,225
1247 .byte 102,15,56,220,233
1248 .byte 102,15,56,220,241
1249 .byte 102,15,56,220,249
1250 .byte 102,68,15,56,220,193
1251 .byte 102,68,15,56,220,201
1252 movups 176-128(%rcx),%xmm1
1253
1254 .byte 102,15,56,220,208
1255 .byte 102,15,56,220,216
1256 .byte 102,15,56,220,224
1257 .byte 102,15,56,220,232
1258 .byte 102,15,56,220,240
1259 .byte 102,15,56,220,248
1260 .byte 102,68,15,56,220,192
1261 .byte 102,68,15,56,220,200
1262 movups 192-128(%rcx),%xmm0
1263 je L$ctr32_enc_done
1264
1265 .byte 102,15,56,220,209
1266 .byte 102,15,56,220,217
1267 .byte 102,15,56,220,225
1268 .byte 102,15,56,220,233
1269 .byte 102,15,56,220,241
1270 .byte 102,15,56,220,249
1271 .byte 102,68,15,56,220,193
1272 .byte 102,68,15,56,220,201
1273 movups 208-128(%rcx),%xmm1
1274
1275 .byte 102,15,56,220,208
1276 .byte 102,15,56,220,216
1277 .byte 102,15,56,220,224
1278 .byte 102,15,56,220,232
1279 .byte 102,15,56,220,240
1280 .byte 102,15,56,220,248
1281 .byte 102,68,15,56,220,192
1282 .byte 102,68,15,56,220,200
1283 movups 224-128(%rcx),%xmm0
1284 jmp L$ctr32_enc_done
1285
1286 .p2align 4
1287 L$ctr32_enc_done:
1288 movdqu 16(%rdi),%xmm11
1289 pxor %xmm0,%xmm10
1290 movdqu 32(%rdi),%xmm12
1291 pxor %xmm0,%xmm11
1292 movdqu 48(%rdi),%xmm13
1293 pxor %xmm0,%xmm12
1294 movdqu 64(%rdi),%xmm14
1295 pxor %xmm0,%xmm13
1296 movdqu 80(%rdi),%xmm15
1297 pxor %xmm0,%xmm14
1298 pxor %xmm0,%xmm15
1299 .byte 102,15,56,220,209
1300 .byte 102,15,56,220,217
1301 .byte 102,15,56,220,225
1302 .byte 102,15,56,220,233
1303 .byte 102,15,56,220,241
1304 .byte 102,15,56,220,249
1305 .byte 102,68,15,56,220,193
1306 .byte 102,68,15,56,220,201
1307 movdqu 96(%rdi),%xmm1
1308 leaq 128(%rdi),%rdi
1309
1310 .byte 102,65,15,56,221,210
1311 pxor %xmm0,%xmm1
1312 movdqu 112-128(%rdi),%xmm10
1313 .byte 102,65,15,56,221,219
1314 pxor %xmm0,%xmm10
1315 movdqa 0(%rsp),%xmm11
1316 .byte 102,65,15,56,221,228
1317 .byte 102,65,15,56,221,237
1318 movdqa 16(%rsp),%xmm12
1319 movdqa 32(%rsp),%xmm13
1320 .byte 102,65,15,56,221,246
1321 .byte 102,65,15,56,221,255
1322 movdqa 48(%rsp),%xmm14
1323 movdqa 64(%rsp),%xmm15
1324 .byte 102,68,15,56,221,193
1325 movdqa 80(%rsp),%xmm0
1326 movups 16-128(%rcx),%xmm1
1327 .byte 102,69,15,56,221,202
1328
1329 movups %xmm2,(%rsi)
1330 movdqa %xmm11,%xmm2
1331 movups %xmm3,16(%rsi)
1332 movdqa %xmm12,%xmm3
1333 movups %xmm4,32(%rsi)
1334 movdqa %xmm13,%xmm4
1335 movups %xmm5,48(%rsi)
1336 movdqa %xmm14,%xmm5
1337 movups %xmm6,64(%rsi)
1338 movdqa %xmm15,%xmm6
1339 movups %xmm7,80(%rsi)
1340 movdqa %xmm0,%xmm7
1341 movups %xmm8,96(%rsi)
1342 movups %xmm9,112(%rsi)
1343 leaq 128(%rsi),%rsi
1344
1345 subq $8,%rdx
1346 jnc L$ctr32_loop8
1347
1348 addq $8,%rdx
1349 jz L$ctr32_done
1350 leaq -128(%rcx),%rcx
1351
1352 L$ctr32_tail:
1353 leaq 16(%rcx),%rcx
1354 cmpq $4,%rdx
1355 jb L$ctr32_loop3
1356 je L$ctr32_loop4
1357
1358 shll $4,%eax
1359 movdqa 96(%rsp),%xmm8
1360 pxor %xmm9,%xmm9
1361
1362 movups 16(%rcx),%xmm0
1363 .byte 102,15,56,220,209
1364 .byte 102,15,56,220,217
1365 leaq 32-16(%rcx,%rax,1),%rcx
1366 negq %rax
1367 .byte 102,15,56,220,225
1368 addq $16,%rax
1369 movups (%rdi),%xmm10
1370 .byte 102,15,56,220,233
1371 .byte 102,15,56,220,241
1372 movups 16(%rdi),%xmm11
1373 movups 32(%rdi),%xmm12
1374 .byte 102,15,56,220,249
1375 .byte 102,68,15,56,220,193
1376
1377 call L$enc_loop8_enter
1378
1379 movdqu 48(%rdi),%xmm13
1380 pxor %xmm10,%xmm2
1381 movdqu 64(%rdi),%xmm10
1382 pxor %xmm11,%xmm3
1383 movdqu %xmm2,(%rsi)
1384 pxor %xmm12,%xmm4
1385 movdqu %xmm3,16(%rsi)
1386 pxor %xmm13,%xmm5
1387 movdqu %xmm4,32(%rsi)
1388 pxor %xmm10,%xmm6
1389 movdqu %xmm5,48(%rsi)
1390 movdqu %xmm6,64(%rsi)
1391 cmpq $6,%rdx
1392 jb L$ctr32_done
1393
1394 movups 80(%rdi),%xmm11
1395 xorps %xmm11,%xmm7
1396 movups %xmm7,80(%rsi)
1397 je L$ctr32_done
1398
1399 movups 96(%rdi),%xmm12
1400 xorps %xmm12,%xmm8
1401 movups %xmm8,96(%rsi)
1402 jmp L$ctr32_done
1403
1404 .p2align 5
1405 L$ctr32_loop4:
1406 .byte 102,15,56,220,209
1407 leaq 16(%rcx),%rcx
1408 decl %eax
1409 .byte 102,15,56,220,217
1410 .byte 102,15,56,220,225
1411 .byte 102,15,56,220,233
1412 movups (%rcx),%xmm1
1413 jnz L$ctr32_loop4
1414 .byte 102,15,56,221,209
1415 .byte 102,15,56,221,217
1416 movups (%rdi),%xmm10
1417 movups 16(%rdi),%xmm11
1418 .byte 102,15,56,221,225
1419 .byte 102,15,56,221,233
1420 movups 32(%rdi),%xmm12
1421 movups 48(%rdi),%xmm13
1422
1423 xorps %xmm10,%xmm2
1424 movups %xmm2,(%rsi)
1425 xorps %xmm11,%xmm3
1426 movups %xmm3,16(%rsi)
1427 pxor %xmm12,%xmm4
1428 movdqu %xmm4,32(%rsi)
1429 pxor %xmm13,%xmm5
1430 movdqu %xmm5,48(%rsi)
1431 jmp L$ctr32_done
1432
1433 .p2align 5
1434 L$ctr32_loop3:
1435 .byte 102,15,56,220,209
1436 leaq 16(%rcx),%rcx
1437 decl %eax
1438 .byte 102,15,56,220,217
1439 .byte 102,15,56,220,225
1440 movups (%rcx),%xmm1
1441 jnz L$ctr32_loop3
1442 .byte 102,15,56,221,209
1443 .byte 102,15,56,221,217
1444 .byte 102,15,56,221,225
1445
1446 movups (%rdi),%xmm10
1447 xorps %xmm10,%xmm2
1448 movups %xmm2,(%rsi)
1449 cmpq $2,%rdx
1450 jb L$ctr32_done
1451
1452 movups 16(%rdi),%xmm11
1453 xorps %xmm11,%xmm3
1454 movups %xmm3,16(%rsi)
1455 je L$ctr32_done
1456
1457 movups 32(%rdi),%xmm12
1458 xorps %xmm12,%xmm4
1459 movups %xmm4,32(%rsi)
1460 jmp L$ctr32_done
1461
1462 .p2align 4
1463 L$ctr32_one_shortcut:
1464 movups (%r8),%xmm2
1465 movups (%rdi),%xmm10
1466 movl 240(%rcx),%eax
1467 movups (%rcx),%xmm0
1468 movups 16(%rcx),%xmm1
1469 leaq 32(%rcx),%rcx
1470 xorps %xmm0,%xmm2
1471 L$oop_enc1_7:
1472 .byte 102,15,56,220,209
1473 decl %eax
1474 movups (%rcx),%xmm1
1475 leaq 16(%rcx),%rcx
1476 jnz L$oop_enc1_7
1477 .byte 102,15,56,221,209
1478 xorps %xmm10,%xmm2
1479 movups %xmm2,(%rsi)
1480 jmp L$ctr32_done
1481
1482 .p2align 4
1483 L$ctr32_done:
1484 leaq (%rbp),%rsp
1485 popq %rbp
1486 L$ctr32_epilogue:
1487 .byte 0xf3,0xc3
1488
1489 .globl _aesni_xts_encrypt
1490
1491 .p2align 4
1492 _aesni_xts_encrypt:
1493 leaq (%rsp),%rax
1494 pushq %rbp
1495 subq $112,%rsp
1496 andq $-16,%rsp
1497 leaq -8(%rax),%rbp
1498 movups (%r9),%xmm2
1499 movl 240(%r8),%eax
1500 movl 240(%rcx),%r10d
1501 movups (%r8),%xmm0
1502 movups 16(%r8),%xmm1
1503 leaq 32(%r8),%r8
1504 xorps %xmm0,%xmm2
1505 L$oop_enc1_8:
1506 .byte 102,15,56,220,209
1507 decl %eax
1508 movups (%r8),%xmm1
1509 leaq 16(%r8),%r8
1510 jnz L$oop_enc1_8
1511 .byte 102,15,56,221,209
1512 movups (%rcx),%xmm0
1513 movq %rcx,%r11
1514 movl %r10d,%eax
1515 shll $4,%r10d
1516 movq %rdx,%r9
1517 andq $-16,%rdx
1518
1519 movups 16(%rcx,%r10,1),%xmm1
1520
1521 movdqa L$xts_magic(%rip),%xmm8
1522 movdqa %xmm2,%xmm15
1523 pshufd $95,%xmm2,%xmm9
1524 pxor %xmm0,%xmm1
1525 movdqa %xmm9,%xmm14
1526 paddd %xmm9,%xmm9
1527 movdqa %xmm15,%xmm10
1528 psrad $31,%xmm14
1529 paddq %xmm15,%xmm15
1530 pand %xmm8,%xmm14
1531 pxor %xmm0,%xmm10
1532 pxor %xmm14,%xmm15
1533 movdqa %xmm9,%xmm14
1534 paddd %xmm9,%xmm9
1535 movdqa %xmm15,%xmm11
1536 psrad $31,%xmm14
1537 paddq %xmm15,%xmm15
1538 pand %xmm8,%xmm14
1539 pxor %xmm0,%xmm11
1540 pxor %xmm14,%xmm15
1541 movdqa %xmm9,%xmm14
1542 paddd %xmm9,%xmm9
1543 movdqa %xmm15,%xmm12
1544 psrad $31,%xmm14
1545 paddq %xmm15,%xmm15
1546 pand %xmm8,%xmm14
1547 pxor %xmm0,%xmm12
1548 pxor %xmm14,%xmm15
1549 movdqa %xmm9,%xmm14
1550 paddd %xmm9,%xmm9
1551 movdqa %xmm15,%xmm13
1552 psrad $31,%xmm14
1553 paddq %xmm15,%xmm15
1554 pand %xmm8,%xmm14
1555 pxor %xmm0,%xmm13
1556 pxor %xmm14,%xmm15
1557 movdqa %xmm15,%xmm14
1558 psrad $31,%xmm9
1559 paddq %xmm15,%xmm15
1560 pand %xmm8,%xmm9
1561 pxor %xmm0,%xmm14
1562 pxor %xmm9,%xmm15
1563 movaps %xmm1,96(%rsp)
1564
1565 subq $96,%rdx
1566 jc L$xts_enc_short
1567
1568 movl $16+96,%eax
1569 leaq 32(%r11,%r10,1),%rcx
1570 subq %r10,%rax
1571 movups 16(%r11),%xmm1
1572 movq %rax,%r10
1573 leaq L$xts_magic(%rip),%r8
1574 jmp L$xts_enc_grandloop
1575
1576 .p2align 5
1577 L$xts_enc_grandloop:
1578 movdqu 0(%rdi),%xmm2
1579 movdqa %xmm0,%xmm8
1580 movdqu 16(%rdi),%xmm3
1581 pxor %xmm10,%xmm2
1582 movdqu 32(%rdi),%xmm4
1583 pxor %xmm11,%xmm3
1584 .byte 102,15,56,220,209
1585 movdqu 48(%rdi),%xmm5
1586 pxor %xmm12,%xmm4
1587 .byte 102,15,56,220,217
1588 movdqu 64(%rdi),%xmm6
1589 pxor %xmm13,%xmm5
1590 .byte 102,15,56,220,225
1591 movdqu 80(%rdi),%xmm7
1592 pxor %xmm15,%xmm8
1593 movdqa 96(%rsp),%xmm9
1594 pxor %xmm14,%xmm6
1595 .byte 102,15,56,220,233
1596 movups 32(%r11),%xmm0
1597 leaq 96(%rdi),%rdi
1598 pxor %xmm8,%xmm7
1599
1600 pxor %xmm9,%xmm10
1601 .byte 102,15,56,220,241
1602 pxor %xmm9,%xmm11
1603 movdqa %xmm10,0(%rsp)
1604 .byte 102,15,56,220,249
1605 movups 48(%r11),%xmm1
1606 pxor %xmm9,%xmm12
1607
1608 .byte 102,15,56,220,208
1609 pxor %xmm9,%xmm13
1610 movdqa %xmm11,16(%rsp)
1611 .byte 102,15,56,220,216
1612 pxor %xmm9,%xmm14
1613 movdqa %xmm12,32(%rsp)
1614 .byte 102,15,56,220,224
1615 .byte 102,15,56,220,232
1616 pxor %xmm9,%xmm8
1617 movdqa %xmm14,64(%rsp)
1618 .byte 102,15,56,220,240
1619 .byte 102,15,56,220,248
1620 movups 64(%r11),%xmm0
1621 movdqa %xmm8,80(%rsp)
1622 pshufd $95,%xmm15,%xmm9
1623 jmp L$xts_enc_loop6
1624 .p2align 5
1625 L$xts_enc_loop6:
1626 .byte 102,15,56,220,209
1627 .byte 102,15,56,220,217
1628 .byte 102,15,56,220,225
1629 .byte 102,15,56,220,233
1630 .byte 102,15,56,220,241
1631 .byte 102,15,56,220,249
1632 movups -64(%rcx,%rax,1),%xmm1
1633 addq $32,%rax
1634
1635 .byte 102,15,56,220,208
1636 .byte 102,15,56,220,216
1637 .byte 102,15,56,220,224
1638 .byte 102,15,56,220,232
1639 .byte 102,15,56,220,240
1640 .byte 102,15,56,220,248
1641 movups -80(%rcx,%rax,1),%xmm0
1642 jnz L$xts_enc_loop6
1643
1644 movdqa (%r8),%xmm8
1645 movdqa %xmm9,%xmm14
1646 paddd %xmm9,%xmm9
1647 .byte 102,15,56,220,209
1648 paddq %xmm15,%xmm15
1649 psrad $31,%xmm14
1650 .byte 102,15,56,220,217
1651 pand %xmm8,%xmm14
1652 movups (%r11),%xmm10
1653 .byte 102,15,56,220,225
1654 .byte 102,15,56,220,233
1655 .byte 102,15,56,220,241
1656 pxor %xmm14,%xmm15
1657 movaps %xmm10,%xmm11
1658 .byte 102,15,56,220,249
1659 movups -64(%rcx),%xmm1
1660
1661 movdqa %xmm9,%xmm14
1662 .byte 102,15,56,220,208
1663 paddd %xmm9,%xmm9
1664 pxor %xmm15,%xmm10
1665 .byte 102,15,56,220,216
1666 psrad $31,%xmm14
1667 paddq %xmm15,%xmm15
1668 .byte 102,15,56,220,224
1669 .byte 102,15,56,220,232
1670 pand %xmm8,%xmm14
1671 movaps %xmm11,%xmm12
1672 .byte 102,15,56,220,240
1673 pxor %xmm14,%xmm15
1674 movdqa %xmm9,%xmm14
1675 .byte 102,15,56,220,248
1676 movups -48(%rcx),%xmm0
1677
1678 paddd %xmm9,%xmm9
1679 .byte 102,15,56,220,209
1680 pxor %xmm15,%xmm11
1681 psrad $31,%xmm14
1682 .byte 102,15,56,220,217
1683 paddq %xmm15,%xmm15
1684 pand %xmm8,%xmm14
1685 .byte 102,15,56,220,225
1686 .byte 102,15,56,220,233
1687 movdqa %xmm13,48(%rsp)
1688 pxor %xmm14,%xmm15
1689 .byte 102,15,56,220,241
1690 movaps %xmm12,%xmm13
1691 movdqa %xmm9,%xmm14
1692 .byte 102,15,56,220,249
1693 movups -32(%rcx),%xmm1
1694
1695 paddd %xmm9,%xmm9
1696 .byte 102,15,56,220,208
1697 pxor %xmm15,%xmm12
1698 psrad $31,%xmm14
1699 .byte 102,15,56,220,216
1700 paddq %xmm15,%xmm15
1701 pand %xmm8,%xmm14
1702 .byte 102,15,56,220,224
1703 .byte 102,15,56,220,232
1704 .byte 102,15,56,220,240
1705 pxor %xmm14,%xmm15
1706 movaps %xmm13,%xmm14
1707 .byte 102,15,56,220,248
1708
1709 movdqa %xmm9,%xmm0
1710 paddd %xmm9,%xmm9
1711 .byte 102,15,56,220,209
1712 pxor %xmm15,%xmm13
1713 psrad $31,%xmm0
1714 .byte 102,15,56,220,217
1715 paddq %xmm15,%xmm15
1716 pand %xmm8,%xmm0
1717 .byte 102,15,56,220,225
1718 .byte 102,15,56,220,233
1719 pxor %xmm0,%xmm15
1720 movups (%r11),%xmm0
1721 .byte 102,15,56,220,241
1722 .byte 102,15,56,220,249
1723 movups 16(%r11),%xmm1
1724
1725 pxor %xmm15,%xmm14
1726 .byte 102,15,56,221,84,36,0
1727 psrad $31,%xmm9
1728 paddq %xmm15,%xmm15
1729 .byte 102,15,56,221,92,36,16
1730 .byte 102,15,56,221,100,36,32
1731 pand %xmm8,%xmm9
1732 movq %r10,%rax
1733 .byte 102,15,56,221,108,36,48
1734 .byte 102,15,56,221,116,36,64
1735 .byte 102,15,56,221,124,36,80
1736 pxor %xmm9,%xmm15
1737
1738 leaq 96(%rsi),%rsi
1739 movups %xmm2,-96(%rsi)
1740 movups %xmm3,-80(%rsi)
1741 movups %xmm4,-64(%rsi)
1742 movups %xmm5,-48(%rsi)
1743 movups %xmm6,-32(%rsi)
1744 movups %xmm7,-16(%rsi)
1745 subq $96,%rdx
1746 jnc L$xts_enc_grandloop
1747
1748 movl $16+96,%eax
1749 subl %r10d,%eax
1750 movq %r11,%rcx
1751 shrl $4,%eax
1752
1753 L$xts_enc_short:
1754 movl %eax,%r10d
1755 pxor %xmm0,%xmm10
1756 addq $96,%rdx
1757 jz L$xts_enc_done
1758
1759 pxor %xmm0,%xmm11
1760 cmpq $32,%rdx
1761 jb L$xts_enc_one
1762 pxor %xmm0,%xmm12
1763 je L$xts_enc_two
1764
1765 pxor %xmm0,%xmm13
1766 cmpq $64,%rdx
1767 jb L$xts_enc_three
1768 pxor %xmm0,%xmm14
1769 je L$xts_enc_four
1770
1771 movdqu (%rdi),%xmm2
1772 movdqu 16(%rdi),%xmm3
1773 movdqu 32(%rdi),%xmm4
1774 pxor %xmm10,%xmm2
1775 movdqu 48(%rdi),%xmm5
1776 pxor %xmm11,%xmm3
1777 movdqu 64(%rdi),%xmm6
1778 leaq 80(%rdi),%rdi
1779 pxor %xmm12,%xmm4
1780 pxor %xmm13,%xmm5
1781 pxor %xmm14,%xmm6
1782
1783 call _aesni_encrypt6
1784
1785 xorps %xmm10,%xmm2
1786 movdqa %xmm15,%xmm10
1787 xorps %xmm11,%xmm3
1788 xorps %xmm12,%xmm4
1789 movdqu %xmm2,(%rsi)
1790 xorps %xmm13,%xmm5
1791 movdqu %xmm3,16(%rsi)
1792 xorps %xmm14,%xmm6
1793 movdqu %xmm4,32(%rsi)
1794 movdqu %xmm5,48(%rsi)
1795 movdqu %xmm6,64(%rsi)
1796 leaq 80(%rsi),%rsi
1797 jmp L$xts_enc_done
1798
1799 .p2align 4
1800 L$xts_enc_one:
1801 movups (%rdi),%xmm2
1802 leaq 16(%rdi),%rdi
1803 xorps %xmm10,%xmm2
1804 movups (%rcx),%xmm0
1805 movups 16(%rcx),%xmm1
1806 leaq 32(%rcx),%rcx
1807 xorps %xmm0,%xmm2
1808 L$oop_enc1_9:
1809 .byte 102,15,56,220,209
1810 decl %eax
1811 movups (%rcx),%xmm1
1812 leaq 16(%rcx),%rcx
1813 jnz L$oop_enc1_9
1814 .byte 102,15,56,221,209
1815 xorps %xmm10,%xmm2
1816 movdqa %xmm11,%xmm10
1817 movups %xmm2,(%rsi)
1818 leaq 16(%rsi),%rsi
1819 jmp L$xts_enc_done
1820
1821 .p2align 4
1822 L$xts_enc_two:
1823 movups (%rdi),%xmm2
1824 movups 16(%rdi),%xmm3
1825 leaq 32(%rdi),%rdi
1826 xorps %xmm10,%xmm2
1827 xorps %xmm11,%xmm3
1828
1829 call _aesni_encrypt2
1830
1831 xorps %xmm10,%xmm2
1832 movdqa %xmm12,%xmm10
1833 xorps %xmm11,%xmm3
1834 movups %xmm2,(%rsi)
1835 movups %xmm3,16(%rsi)
1836 leaq 32(%rsi),%rsi
1837 jmp L$xts_enc_done
1838
1839 .p2align 4
1840 L$xts_enc_three:
1841 movups (%rdi),%xmm2
1842 movups 16(%rdi),%xmm3
1843 movups 32(%rdi),%xmm4
1844 leaq 48(%rdi),%rdi
1845 xorps %xmm10,%xmm2
1846 xorps %xmm11,%xmm3
1847 xorps %xmm12,%xmm4
1848
1849 call _aesni_encrypt3
1850
1851 xorps %xmm10,%xmm2
1852 movdqa %xmm13,%xmm10
1853 xorps %xmm11,%xmm3
1854 xorps %xmm12,%xmm4
1855 movups %xmm2,(%rsi)
1856 movups %xmm3,16(%rsi)
1857 movups %xmm4,32(%rsi)
1858 leaq 48(%rsi),%rsi
1859 jmp L$xts_enc_done
1860
1861 .p2align 4
1862 L$xts_enc_four:
1863 movups (%rdi),%xmm2
1864 movups 16(%rdi),%xmm3
1865 movups 32(%rdi),%xmm4
1866 xorps %xmm10,%xmm2
1867 movups 48(%rdi),%xmm5
1868 leaq 64(%rdi),%rdi
1869 xorps %xmm11,%xmm3
1870 xorps %xmm12,%xmm4
1871 xorps %xmm13,%xmm5
1872
1873 call _aesni_encrypt4
1874
1875 pxor %xmm10,%xmm2
1876 movdqa %xmm14,%xmm10
1877 pxor %xmm11,%xmm3
1878 pxor %xmm12,%xmm4
1879 movdqu %xmm2,(%rsi)
1880 pxor %xmm13,%xmm5
1881 movdqu %xmm3,16(%rsi)
1882 movdqu %xmm4,32(%rsi)
1883 movdqu %xmm5,48(%rsi)
1884 leaq 64(%rsi),%rsi
1885 jmp L$xts_enc_done
1886
1887 .p2align 4
1888 L$xts_enc_done:
1889 andq $15,%r9
1890 jz L$xts_enc_ret
1891 movq %r9,%rdx
1892
1893 L$xts_enc_steal:
1894 movzbl (%rdi),%eax
1895 movzbl -16(%rsi),%ecx
1896 leaq 1(%rdi),%rdi
1897 movb %al,-16(%rsi)
1898 movb %cl,0(%rsi)
1899 leaq 1(%rsi),%rsi
1900 subq $1,%rdx
1901 jnz L$xts_enc_steal
1902
1903 subq %r9,%rsi
1904 movq %r11,%rcx
1905 movl %r10d,%eax
1906
1907 movups -16(%rsi),%xmm2
1908 xorps %xmm10,%xmm2
1909 movups (%rcx),%xmm0
1910 movups 16(%rcx),%xmm1
1911 leaq 32(%rcx),%rcx
1912 xorps %xmm0,%xmm2
1913 L$oop_enc1_10:
1914 .byte 102,15,56,220,209
1915 decl %eax
1916 movups (%rcx),%xmm1
1917 leaq 16(%rcx),%rcx
1918 jnz L$oop_enc1_10
1919 .byte 102,15,56,221,209
1920 xorps %xmm10,%xmm2
1921 movups %xmm2,-16(%rsi)
1922
1923 L$xts_enc_ret:
1924 leaq (%rbp),%rsp
1925 popq %rbp
1926 L$xts_enc_epilogue:
1927 .byte 0xf3,0xc3
1928
1929 .globl _aesni_xts_decrypt
1930
1931 .p2align 4
1932 _aesni_xts_decrypt:
1933 leaq (%rsp),%rax
1934 pushq %rbp
1935 subq $112,%rsp
1936 andq $-16,%rsp
1937 leaq -8(%rax),%rbp
1938 movups (%r9),%xmm2
1939 movl 240(%r8),%eax
1940 movl 240(%rcx),%r10d
1941 movups (%r8),%xmm0
1942 movups 16(%r8),%xmm1
1943 leaq 32(%r8),%r8
1944 xorps %xmm0,%xmm2
1945 L$oop_enc1_11:
1946 .byte 102,15,56,220,209
1947 decl %eax
1948 movups (%r8),%xmm1
1949 leaq 16(%r8),%r8
1950 jnz L$oop_enc1_11
1951 .byte 102,15,56,221,209
1952 xorl %eax,%eax
1953 testq $15,%rdx
1954 setnz %al
1955 shlq $4,%rax
1956 subq %rax,%rdx
1957
1958 movups (%rcx),%xmm0
1959 movq %rcx,%r11
1960 movl %r10d,%eax
1961 shll $4,%r10d
1962 movq %rdx,%r9
1963 andq $-16,%rdx
1964
1965 movups 16(%rcx,%r10,1),%xmm1
1966
1967 movdqa L$xts_magic(%rip),%xmm8
1968 movdqa %xmm2,%xmm15
1969 pshufd $95,%xmm2,%xmm9
1970 pxor %xmm0,%xmm1
1971 movdqa %xmm9,%xmm14
1972 paddd %xmm9,%xmm9
1973 movdqa %xmm15,%xmm10
1974 psrad $31,%xmm14
1975 paddq %xmm15,%xmm15
1976 pand %xmm8,%xmm14
1977 pxor %xmm0,%xmm10
1978 pxor %xmm14,%xmm15
1979 movdqa %xmm9,%xmm14
1980 paddd %xmm9,%xmm9
1981 movdqa %xmm15,%xmm11
1982 psrad $31,%xmm14
1983 paddq %xmm15,%xmm15
1984 pand %xmm8,%xmm14
1985 pxor %xmm0,%xmm11
1986 pxor %xmm14,%xmm15
1987 movdqa %xmm9,%xmm14
1988 paddd %xmm9,%xmm9
1989 movdqa %xmm15,%xmm12
1990 psrad $31,%xmm14
1991 paddq %xmm15,%xmm15
1992 pand %xmm8,%xmm14
1993 pxor %xmm0,%xmm12
1994 pxor %xmm14,%xmm15
1995 movdqa %xmm9,%xmm14
1996 paddd %xmm9,%xmm9
1997 movdqa %xmm15,%xmm13
1998 psrad $31,%xmm14
1999 paddq %xmm15,%xmm15
2000 pand %xmm8,%xmm14
2001 pxor %xmm0,%xmm13
2002 pxor %xmm14,%xmm15
2003 movdqa %xmm15,%xmm14
2004 psrad $31,%xmm9
2005 paddq %xmm15,%xmm15
2006 pand %xmm8,%xmm9
2007 pxor %xmm0,%xmm14
2008 pxor %xmm9,%xmm15
2009 movaps %xmm1,96(%rsp)
2010
2011 subq $96,%rdx
2012 jc L$xts_dec_short
2013
2014 movl $16+96,%eax
2015 leaq 32(%r11,%r10,1),%rcx
2016 subq %r10,%rax
2017 movups 16(%r11),%xmm1
2018 movq %rax,%r10
2019 leaq L$xts_magic(%rip),%r8
2020 jmp L$xts_dec_grandloop
2021
2022 .p2align 5
2023 L$xts_dec_grandloop:
2024 movdqu 0(%rdi),%xmm2
2025 movdqa %xmm0,%xmm8
2026 movdqu 16(%rdi),%xmm3
2027 pxor %xmm10,%xmm2
2028 movdqu 32(%rdi),%xmm4
2029 pxor %xmm11,%xmm3
2030 .byte 102,15,56,222,209
2031 movdqu 48(%rdi),%xmm5
2032 pxor %xmm12,%xmm4
2033 .byte 102,15,56,222,217
2034 movdqu 64(%rdi),%xmm6
2035 pxor %xmm13,%xmm5
2036 .byte 102,15,56,222,225
2037 movdqu 80(%rdi),%xmm7
2038 pxor %xmm15,%xmm8
2039 movdqa 96(%rsp),%xmm9
2040 pxor %xmm14,%xmm6
2041 .byte 102,15,56,222,233
2042 movups 32(%r11),%xmm0
2043 leaq 96(%rdi),%rdi
2044 pxor %xmm8,%xmm7
2045
2046 pxor %xmm9,%xmm10
2047 .byte 102,15,56,222,241
2048 pxor %xmm9,%xmm11
2049 movdqa %xmm10,0(%rsp)
2050 .byte 102,15,56,222,249
2051 movups 48(%r11),%xmm1
2052 pxor %xmm9,%xmm12
2053
2054 .byte 102,15,56,222,208
2055 pxor %xmm9,%xmm13
2056 movdqa %xmm11,16(%rsp)
2057 .byte 102,15,56,222,216
2058 pxor %xmm9,%xmm14
2059 movdqa %xmm12,32(%rsp)
2060 .byte 102,15,56,222,224
2061 .byte 102,15,56,222,232
2062 pxor %xmm9,%xmm8
2063 movdqa %xmm14,64(%rsp)
2064 .byte 102,15,56,222,240
2065 .byte 102,15,56,222,248
2066 movups 64(%r11),%xmm0
2067 movdqa %xmm8,80(%rsp)
2068 pshufd $95,%xmm15,%xmm9
2069 jmp L$xts_dec_loop6
2070 .p2align 5
2071 L$xts_dec_loop6:
2072 .byte 102,15,56,222,209
2073 .byte 102,15,56,222,217
2074 .byte 102,15,56,222,225
2075 .byte 102,15,56,222,233
2076 .byte 102,15,56,222,241
2077 .byte 102,15,56,222,249
2078 movups -64(%rcx,%rax,1),%xmm1
2079 addq $32,%rax
2080
2081 .byte 102,15,56,222,208
2082 .byte 102,15,56,222,216
2083 .byte 102,15,56,222,224
2084 .byte 102,15,56,222,232
2085 .byte 102,15,56,222,240
2086 .byte 102,15,56,222,248
2087 movups -80(%rcx,%rax,1),%xmm0
2088 jnz L$xts_dec_loop6
2089
2090 movdqa (%r8),%xmm8
2091 movdqa %xmm9,%xmm14
2092 paddd %xmm9,%xmm9
2093 .byte 102,15,56,222,209
2094 paddq %xmm15,%xmm15
2095 psrad $31,%xmm14
2096 .byte 102,15,56,222,217
2097 pand %xmm8,%xmm14
2098 movups (%r11),%xmm10
2099 .byte 102,15,56,222,225
2100 .byte 102,15,56,222,233
2101 .byte 102,15,56,222,241
2102 pxor %xmm14,%xmm15
2103 movaps %xmm10,%xmm11
2104 .byte 102,15,56,222,249
2105 movups -64(%rcx),%xmm1
2106
2107 movdqa %xmm9,%xmm14
2108 .byte 102,15,56,222,208
2109 paddd %xmm9,%xmm9
2110 pxor %xmm15,%xmm10
2111 .byte 102,15,56,222,216
2112 psrad $31,%xmm14
2113 paddq %xmm15,%xmm15
2114 .byte 102,15,56,222,224
2115 .byte 102,15,56,222,232
2116 pand %xmm8,%xmm14
2117 movaps %xmm11,%xmm12
2118 .byte 102,15,56,222,240
2119 pxor %xmm14,%xmm15
2120 movdqa %xmm9,%xmm14
2121 .byte 102,15,56,222,248
2122 movups -48(%rcx),%xmm0
2123
2124 paddd %xmm9,%xmm9
2125 .byte 102,15,56,222,209
2126 pxor %xmm15,%xmm11
2127 psrad $31,%xmm14
2128 .byte 102,15,56,222,217
2129 paddq %xmm15,%xmm15
2130 pand %xmm8,%xmm14
2131 .byte 102,15,56,222,225
2132 .byte 102,15,56,222,233
2133 movdqa %xmm13,48(%rsp)
2134 pxor %xmm14,%xmm15
2135 .byte 102,15,56,222,241
2136 movaps %xmm12,%xmm13
2137 movdqa %xmm9,%xmm14
2138 .byte 102,15,56,222,249
2139 movups -32(%rcx),%xmm1
2140
2141 paddd %xmm9,%xmm9
2142 .byte 102,15,56,222,208
2143 pxor %xmm15,%xmm12
2144 psrad $31,%xmm14
2145 .byte 102,15,56,222,216
2146 paddq %xmm15,%xmm15
2147 pand %xmm8,%xmm14
2148 .byte 102,15,56,222,224
2149 .byte 102,15,56,222,232
2150 .byte 102,15,56,222,240
2151 pxor %xmm14,%xmm15
2152 movaps %xmm13,%xmm14
2153 .byte 102,15,56,222,248
2154
2155 movdqa %xmm9,%xmm0
2156 paddd %xmm9,%xmm9
2157 .byte 102,15,56,222,209
2158 pxor %xmm15,%xmm13
2159 psrad $31,%xmm0
2160 .byte 102,15,56,222,217
2161 paddq %xmm15,%xmm15
2162 pand %xmm8,%xmm0
2163 .byte 102,15,56,222,225
2164 .byte 102,15,56,222,233
2165 pxor %xmm0,%xmm15
2166 movups (%r11),%xmm0
2167 .byte 102,15,56,222,241
2168 .byte 102,15,56,222,249
2169 movups 16(%r11),%xmm1
2170
2171 pxor %xmm15,%xmm14
2172 .byte 102,15,56,223,84,36,0
2173 psrad $31,%xmm9
2174 paddq %xmm15,%xmm15
2175 .byte 102,15,56,223,92,36,16
2176 .byte 102,15,56,223,100,36,32
2177 pand %xmm8,%xmm9
2178 movq %r10,%rax
2179 .byte 102,15,56,223,108,36,48
2180 .byte 102,15,56,223,116,36,64
2181 .byte 102,15,56,223,124,36,80
2182 pxor %xmm9,%xmm15
2183
2184 leaq 96(%rsi),%rsi
2185 movups %xmm2,-96(%rsi)
2186 movups %xmm3,-80(%rsi)
2187 movups %xmm4,-64(%rsi)
2188 movups %xmm5,-48(%rsi)
2189 movups %xmm6,-32(%rsi)
2190 movups %xmm7,-16(%rsi)
2191 subq $96,%rdx
2192 jnc L$xts_dec_grandloop
2193
2194 movl $16+96,%eax
2195 subl %r10d,%eax
2196 movq %r11,%rcx
2197 shrl $4,%eax
2198
2199 L$xts_dec_short:
2200 movl %eax,%r10d
2201 pxor %xmm0,%xmm10
2202 pxor %xmm0,%xmm11
2203 addq $96,%rdx
2204 jz L$xts_dec_done
2205
2206 pxor %xmm0,%xmm12
2207 cmpq $32,%rdx
2208 jb L$xts_dec_one
2209 pxor %xmm0,%xmm13
2210 je L$xts_dec_two
2211
2212 pxor %xmm0,%xmm14
2213 cmpq $64,%rdx
2214 jb L$xts_dec_three
2215 je L$xts_dec_four
2216
2217 movdqu (%rdi),%xmm2
2218 movdqu 16(%rdi),%xmm3
2219 movdqu 32(%rdi),%xmm4
2220 pxor %xmm10,%xmm2
2221 movdqu 48(%rdi),%xmm5
2222 pxor %xmm11,%xmm3
2223 movdqu 64(%rdi),%xmm6
2224 leaq 80(%rdi),%rdi
2225 pxor %xmm12,%xmm4
2226 pxor %xmm13,%xmm5
2227 pxor %xmm14,%xmm6
2228
2229 call _aesni_decrypt6
2230
2231 xorps %xmm10,%xmm2
2232 xorps %xmm11,%xmm3
2233 xorps %xmm12,%xmm4
2234 movdqu %xmm2,(%rsi)
2235 xorps %xmm13,%xmm5
2236 movdqu %xmm3,16(%rsi)
2237 xorps %xmm14,%xmm6
2238 movdqu %xmm4,32(%rsi)
2239 pxor %xmm14,%xmm14
2240 movdqu %xmm5,48(%rsi)
2241 pcmpgtd %xmm15,%xmm14
2242 movdqu %xmm6,64(%rsi)
2243 leaq 80(%rsi),%rsi
2244 pshufd $19,%xmm14,%xmm11
2245 andq $15,%r9
2246 jz L$xts_dec_ret
2247
2248 movdqa %xmm15,%xmm10
2249 paddq %xmm15,%xmm15
2250 pand %xmm8,%xmm11
2251 pxor %xmm15,%xmm11
2252 jmp L$xts_dec_done2
2253
2254 .p2align 4
2255 L$xts_dec_one:
2256 movups (%rdi),%xmm2
2257 leaq 16(%rdi),%rdi
2258 xorps %xmm10,%xmm2
2259 movups (%rcx),%xmm0
2260 movups 16(%rcx),%xmm1
2261 leaq 32(%rcx),%rcx
2262 xorps %xmm0,%xmm2
2263 L$oop_dec1_12:
2264 .byte 102,15,56,222,209
2265 decl %eax
2266 movups (%rcx),%xmm1
2267 leaq 16(%rcx),%rcx
2268 jnz L$oop_dec1_12
2269 .byte 102,15,56,223,209
2270 xorps %xmm10,%xmm2
2271 movdqa %xmm11,%xmm10
2272 movups %xmm2,(%rsi)
2273 movdqa %xmm12,%xmm11
2274 leaq 16(%rsi),%rsi
2275 jmp L$xts_dec_done
2276
2277 .p2align 4
2278 L$xts_dec_two:
2279 movups (%rdi),%xmm2
2280 movups 16(%rdi),%xmm3
2281 leaq 32(%rdi),%rdi
2282 xorps %xmm10,%xmm2
2283 xorps %xmm11,%xmm3
2284
2285 call _aesni_decrypt2
2286
2287 xorps %xmm10,%xmm2
2288 movdqa %xmm12,%xmm10
2289 xorps %xmm11,%xmm3
2290 movdqa %xmm13,%xmm11
2291 movups %xmm2,(%rsi)
2292 movups %xmm3,16(%rsi)
2293 leaq 32(%rsi),%rsi
2294 jmp L$xts_dec_done
2295
2296 .p2align 4
2297 L$xts_dec_three:
2298 movups (%rdi),%xmm2
2299 movups 16(%rdi),%xmm3
2300 movups 32(%rdi),%xmm4
2301 leaq 48(%rdi),%rdi
2302 xorps %xmm10,%xmm2
2303 xorps %xmm11,%xmm3
2304 xorps %xmm12,%xmm4
2305
2306 call _aesni_decrypt3
2307
2308 xorps %xmm10,%xmm2
2309 movdqa %xmm13,%xmm10
2310 xorps %xmm11,%xmm3
2311 movdqa %xmm14,%xmm11
2312 xorps %xmm12,%xmm4
2313 movups %xmm2,(%rsi)
2314 movups %xmm3,16(%rsi)
2315 movups %xmm4,32(%rsi)
2316 leaq 48(%rsi),%rsi
2317 jmp L$xts_dec_done
2318
2319 .p2align 4
2320 L$xts_dec_four:
2321 movups (%rdi),%xmm2
2322 movups 16(%rdi),%xmm3
2323 movups 32(%rdi),%xmm4
2324 xorps %xmm10,%xmm2
2325 movups 48(%rdi),%xmm5
2326 leaq 64(%rdi),%rdi
2327 xorps %xmm11,%xmm3
2328 xorps %xmm12,%xmm4
2329 xorps %xmm13,%xmm5
2330
2331 call _aesni_decrypt4
2332
2333 pxor %xmm10,%xmm2
2334 movdqa %xmm14,%xmm10
2335 pxor %xmm11,%xmm3
2336 movdqa %xmm15,%xmm11
2337 pxor %xmm12,%xmm4
2338 movdqu %xmm2,(%rsi)
2339 pxor %xmm13,%xmm5
2340 movdqu %xmm3,16(%rsi)
2341 movdqu %xmm4,32(%rsi)
2342 movdqu %xmm5,48(%rsi)
2343 leaq 64(%rsi),%rsi
2344 jmp L$xts_dec_done
2345
2346 .p2align 4
2347 L$xts_dec_done:
2348 andq $15,%r9
2349 jz L$xts_dec_ret
2350 L$xts_dec_done2:
2351 movq %r9,%rdx
2352 movq %r11,%rcx
2353 movl %r10d,%eax
2354
2355 movups (%rdi),%xmm2
2356 xorps %xmm11,%xmm2
2357 movups (%rcx),%xmm0
2358 movups 16(%rcx),%xmm1
2359 leaq 32(%rcx),%rcx
2360 xorps %xmm0,%xmm2
2361 L$oop_dec1_13:
2362 .byte 102,15,56,222,209
2363 decl %eax
2364 movups (%rcx),%xmm1
2365 leaq 16(%rcx),%rcx
2366 jnz L$oop_dec1_13
2367 .byte 102,15,56,223,209
2368 xorps %xmm11,%xmm2
2369 movups %xmm2,(%rsi)
2370
2371 L$xts_dec_steal:
2372 movzbl 16(%rdi),%eax
2373 movzbl (%rsi),%ecx
2374 leaq 1(%rdi),%rdi
2375 movb %al,(%rsi)
2376 movb %cl,16(%rsi)
2377 leaq 1(%rsi),%rsi
2378 subq $1,%rdx
2379 jnz L$xts_dec_steal
2380
2381 subq %r9,%rsi
2382 movq %r11,%rcx
2383 movl %r10d,%eax
2384
2385 movups (%rsi),%xmm2
2386 xorps %xmm10,%xmm2
2387 movups (%rcx),%xmm0
2388 movups 16(%rcx),%xmm1
2389 leaq 32(%rcx),%rcx
2390 xorps %xmm0,%xmm2
2391 L$oop_dec1_14:
2392 .byte 102,15,56,222,209
2393 decl %eax
2394 movups (%rcx),%xmm1
2395 leaq 16(%rcx),%rcx
2396 jnz L$oop_dec1_14
2397 .byte 102,15,56,223,209
2398 xorps %xmm10,%xmm2
2399 movups %xmm2,(%rsi)
2400
2401 L$xts_dec_ret:
2402 leaq (%rbp),%rsp
2403 popq %rbp
2404 L$xts_dec_epilogue:
2405 .byte 0xf3,0xc3
2406
2407 .globl _aesni_cbc_encrypt
2408
2409 .p2align 4
2410 _aesni_cbc_encrypt:
2411 testq %rdx,%rdx
2412 jz L$cbc_ret
2413
2414 movl 240(%rcx),%r10d
2415 movq %rcx,%r11
2416 testl %r9d,%r9d
2417 jz L$cbc_decrypt
2418
2419 movups (%r8),%xmm2
2420 movl %r10d,%eax
2421 cmpq $16,%rdx
2422 jb L$cbc_enc_tail
2423 subq $16,%rdx
2424 jmp L$cbc_enc_loop
2425 .p2align 4
2426 L$cbc_enc_loop:
2427 movups (%rdi),%xmm3
2428 leaq 16(%rdi),%rdi
2429
2430 movups (%rcx),%xmm0
2431 movups 16(%rcx),%xmm1
2432 xorps %xmm0,%xmm3
2433 leaq 32(%rcx),%rcx
2434 xorps %xmm3,%xmm2
2435 L$oop_enc1_15:
2436 .byte 102,15,56,220,209
2437 decl %eax
2438 movups (%rcx),%xmm1
2439 leaq 16(%rcx),%rcx
2440 jnz L$oop_enc1_15
2441 .byte 102,15,56,221,209
2442 movl %r10d,%eax
2443 movq %r11,%rcx
2444 movups %xmm2,0(%rsi)
2445 leaq 16(%rsi),%rsi
2446 subq $16,%rdx
2447 jnc L$cbc_enc_loop
2448 addq $16,%rdx
2449 jnz L$cbc_enc_tail
2450 movups %xmm2,(%r8)
2451 jmp L$cbc_ret
2452
2453 L$cbc_enc_tail:
2454 movq %rdx,%rcx
2455 xchgq %rdi,%rsi
2456 .long 0x9066A4F3
2457 movl $16,%ecx
2458 subq %rdx,%rcx
2459 xorl %eax,%eax
2460 .long 0x9066AAF3
2461 leaq -16(%rdi),%rdi
2462 movl %r10d,%eax
2463 movq %rdi,%rsi
2464 movq %r11,%rcx
2465 xorq %rdx,%rdx
2466 jmp L$cbc_enc_loop
2467
2468 .p2align 4
2469 L$cbc_decrypt:
2470 leaq (%rsp),%rax
2471 pushq %rbp
2472 subq $16,%rsp
2473 andq $-16,%rsp
2474 leaq -8(%rax),%rbp
2475 movups (%r8),%xmm10
2476 movl %r10d,%eax
2477 cmpq $80,%rdx
2478 jbe L$cbc_dec_tail
2479
2480 movups (%rcx),%xmm0
2481 movdqu 0(%rdi),%xmm2
2482 movdqu 16(%rdi),%xmm3
2483 movdqa %xmm2,%xmm11
2484 movdqu 32(%rdi),%xmm4
2485 movdqa %xmm3,%xmm12
2486 movdqu 48(%rdi),%xmm5
2487 movdqa %xmm4,%xmm13
2488 movdqu 64(%rdi),%xmm6
2489 movdqa %xmm5,%xmm14
2490 movdqu 80(%rdi),%xmm7
2491 movdqa %xmm6,%xmm15
2492 movl _OPENSSL_ia32cap_P+4(%rip),%r9d
2493 cmpq $112,%rdx
2494 jbe L$cbc_dec_six_or_seven
2495
2496 andl $71303168,%r9d
2497 subq $80,%rdx
2498 cmpl $4194304,%r9d
2499 je L$cbc_dec_loop6_enter
2500 subq $32,%rdx
2501 leaq 112(%rcx),%rcx
2502 jmp L$cbc_dec_loop8_enter
2503 .p2align 4
2504 L$cbc_dec_loop8:
2505 movups %xmm9,(%rsi)
2506 leaq 16(%rsi),%rsi
2507 L$cbc_dec_loop8_enter:
2508 movdqu 96(%rdi),%xmm8
2509 pxor %xmm0,%xmm2
2510 movdqu 112(%rdi),%xmm9
2511 pxor %xmm0,%xmm3
2512 movups 16-112(%rcx),%xmm1
2513 pxor %xmm0,%xmm4
2514 xorq %r11,%r11
2515 cmpq $112,%rdx
2516 pxor %xmm0,%xmm5
2517 pxor %xmm0,%xmm6
2518 pxor %xmm0,%xmm7
2519 pxor %xmm0,%xmm8
2520
2521 .byte 102,15,56,222,209
2522 pxor %xmm0,%xmm9
2523 movups 32-112(%rcx),%xmm0
2524 .byte 102,15,56,222,217
2525 .byte 102,15,56,222,225
2526 .byte 102,15,56,222,233
2527 .byte 102,15,56,222,241
2528 .byte 102,15,56,222,249
2529 .byte 102,68,15,56,222,193
2530 setnc %r11b
2531 shlq $7,%r11
2532 .byte 102,68,15,56,222,201
2533 addq %rdi,%r11
2534 movups 48-112(%rcx),%xmm1
2535 .byte 102,15,56,222,208
2536 .byte 102,15,56,222,216
2537 .byte 102,15,56,222,224
2538 .byte 102,15,56,222,232
2539 .byte 102,15,56,222,240
2540 .byte 102,15,56,222,248
2541 .byte 102,68,15,56,222,192
2542 .byte 102,68,15,56,222,200
2543 movups 64-112(%rcx),%xmm0
2544 nop
2545 .byte 102,15,56,222,209
2546 .byte 102,15,56,222,217
2547 .byte 102,15,56,222,225
2548 .byte 102,15,56,222,233
2549 .byte 102,15,56,222,241
2550 .byte 102,15,56,222,249
2551 .byte 102,68,15,56,222,193
2552 .byte 102,68,15,56,222,201
2553 movups 80-112(%rcx),%xmm1
2554 nop
2555 .byte 102,15,56,222,208
2556 .byte 102,15,56,222,216
2557 .byte 102,15,56,222,224
2558 .byte 102,15,56,222,232
2559 .byte 102,15,56,222,240
2560 .byte 102,15,56,222,248
2561 .byte 102,68,15,56,222,192
2562 .byte 102,68,15,56,222,200
2563 movups 96-112(%rcx),%xmm0
2564 nop
2565 .byte 102,15,56,222,209
2566 .byte 102,15,56,222,217
2567 .byte 102,15,56,222,225
2568 .byte 102,15,56,222,233
2569 .byte 102,15,56,222,241
2570 .byte 102,15,56,222,249
2571 .byte 102,68,15,56,222,193
2572 .byte 102,68,15,56,222,201
2573 movups 112-112(%rcx),%xmm1
2574 nop
2575 .byte 102,15,56,222,208
2576 .byte 102,15,56,222,216
2577 .byte 102,15,56,222,224
2578 .byte 102,15,56,222,232
2579 .byte 102,15,56,222,240
2580 .byte 102,15,56,222,248
2581 .byte 102,68,15,56,222,192
2582 .byte 102,68,15,56,222,200
2583 movups 128-112(%rcx),%xmm0
2584 nop
2585 .byte 102,15,56,222,209
2586 .byte 102,15,56,222,217
2587 .byte 102,15,56,222,225
2588 .byte 102,15,56,222,233
2589 .byte 102,15,56,222,241
2590 .byte 102,15,56,222,249
2591 .byte 102,68,15,56,222,193
2592 .byte 102,68,15,56,222,201
2593 movups 144-112(%rcx),%xmm1
2594 cmpl $11,%eax
2595 .byte 102,15,56,222,208
2596 .byte 102,15,56,222,216
2597 .byte 102,15,56,222,224
2598 .byte 102,15,56,222,232
2599 .byte 102,15,56,222,240
2600 .byte 102,15,56,222,248
2601 .byte 102,68,15,56,222,192
2602 .byte 102,68,15,56,222,200
2603 movups 160-112(%rcx),%xmm0
2604 jb L$cbc_dec_done
2605 .byte 102,15,56,222,209
2606 .byte 102,15,56,222,217
2607 .byte 102,15,56,222,225
2608 .byte 102,15,56,222,233
2609 .byte 102,15,56,222,241
2610 .byte 102,15,56,222,249
2611 .byte 102,68,15,56,222,193
2612 .byte 102,68,15,56,222,201
2613 movups 176-112(%rcx),%xmm1
2614 nop
2615 .byte 102,15,56,222,208
2616 .byte 102,15,56,222,216
2617 .byte 102,15,56,222,224
2618 .byte 102,15,56,222,232
2619 .byte 102,15,56,222,240
2620 .byte 102,15,56,222,248
2621 .byte 102,68,15,56,222,192
2622 .byte 102,68,15,56,222,200
2623 movups 192-112(%rcx),%xmm0
2624 je L$cbc_dec_done
2625 .byte 102,15,56,222,209
2626 .byte 102,15,56,222,217
2627 .byte 102,15,56,222,225
2628 .byte 102,15,56,222,233
2629 .byte 102,15,56,222,241
2630 .byte 102,15,56,222,249
2631 .byte 102,68,15,56,222,193
2632 .byte 102,68,15,56,222,201
2633 movups 208-112(%rcx),%xmm1
2634 nop
2635 .byte 102,15,56,222,208
2636 .byte 102,15,56,222,216
2637 .byte 102,15,56,222,224
2638 .byte 102,15,56,222,232
2639 .byte 102,15,56,222,240
2640 .byte 102,15,56,222,248
2641 .byte 102,68,15,56,222,192
2642 .byte 102,68,15,56,222,200
2643 movups 224-112(%rcx),%xmm0
2644 jmp L$cbc_dec_done
2645 .p2align 4
2646 L$cbc_dec_done:
2647 .byte 102,15,56,222,209
2648 .byte 102,15,56,222,217
2649 pxor %xmm0,%xmm10
2650 pxor %xmm0,%xmm11
2651 .byte 102,15,56,222,225
2652 .byte 102,15,56,222,233
2653 pxor %xmm0,%xmm12
2654 pxor %xmm0,%xmm13
2655 .byte 102,15,56,222,241
2656 .byte 102,15,56,222,249
2657 pxor %xmm0,%xmm14
2658 pxor %xmm0,%xmm15
2659 .byte 102,68,15,56,222,193
2660 .byte 102,68,15,56,222,201
2661 movdqu 80(%rdi),%xmm1
2662
2663 .byte 102,65,15,56,223,210
2664 movdqu 96(%rdi),%xmm10
2665 pxor %xmm0,%xmm1
2666 .byte 102,65,15,56,223,219
2667 pxor %xmm0,%xmm10
2668 movdqu 112(%rdi),%xmm0
2669 .byte 102,65,15,56,223,228
2670 leaq 128(%rdi),%rdi
2671 movdqu 0(%r11),%xmm11
2672 .byte 102,65,15,56,223,237
2673 .byte 102,65,15,56,223,246
2674 movdqu 16(%r11),%xmm12
2675 movdqu 32(%r11),%xmm13
2676 .byte 102,65,15,56,223,255
2677 .byte 102,68,15,56,223,193
2678 movdqu 48(%r11),%xmm14
2679 movdqu 64(%r11),%xmm15
2680 .byte 102,69,15,56,223,202
2681 movdqa %xmm0,%xmm10
2682 movdqu 80(%r11),%xmm1
2683 movups -112(%rcx),%xmm0
2684
2685 movups %xmm2,(%rsi)
2686 movdqa %xmm11,%xmm2
2687 movups %xmm3,16(%rsi)
2688 movdqa %xmm12,%xmm3
2689 movups %xmm4,32(%rsi)
2690 movdqa %xmm13,%xmm4
2691 movups %xmm5,48(%rsi)
2692 movdqa %xmm14,%xmm5
2693 movups %xmm6,64(%rsi)
2694 movdqa %xmm15,%xmm6
2695 movups %xmm7,80(%rsi)
2696 movdqa %xmm1,%xmm7
2697 movups %xmm8,96(%rsi)
2698 leaq 112(%rsi),%rsi
2699
2700 subq $128,%rdx
2701 ja L$cbc_dec_loop8
2702
2703 movaps %xmm9,%xmm2
2704 leaq -112(%rcx),%rcx
2705 addq $112,%rdx
2706 jle L$cbc_dec_tail_collected
2707 movups %xmm9,(%rsi)
2708 leaq 16(%rsi),%rsi
2709 cmpq $80,%rdx
2710 jbe L$cbc_dec_tail
2711
2712 movaps %xmm11,%xmm2
2713 L$cbc_dec_six_or_seven:
2714 cmpq $96,%rdx
2715 ja L$cbc_dec_seven
2716
2717 movaps %xmm7,%xmm8
2718 call _aesni_decrypt6
2719 pxor %xmm10,%xmm2
2720 movaps %xmm8,%xmm10
2721 pxor %xmm11,%xmm3
2722 movdqu %xmm2,(%rsi)
2723 pxor %xmm12,%xmm4
2724 movdqu %xmm3,16(%rsi)
2725 pxor %xmm13,%xmm5
2726 movdqu %xmm4,32(%rsi)
2727 pxor %xmm14,%xmm6
2728 movdqu %xmm5,48(%rsi)
2729 pxor %xmm15,%xmm7
2730 movdqu %xmm6,64(%rsi)
2731 leaq 80(%rsi),%rsi
2732 movdqa %xmm7,%xmm2
2733 jmp L$cbc_dec_tail_collected
2734
2735 .p2align 4
2736 L$cbc_dec_seven:
2737 movups 96(%rdi),%xmm8
2738 xorps %xmm9,%xmm9
2739 call _aesni_decrypt8
2740 movups 80(%rdi),%xmm9
2741 pxor %xmm10,%xmm2
2742 movups 96(%rdi),%xmm10
2743 pxor %xmm11,%xmm3
2744 movdqu %xmm2,(%rsi)
2745 pxor %xmm12,%xmm4
2746 movdqu %xmm3,16(%rsi)
2747 pxor %xmm13,%xmm5
2748 movdqu %xmm4,32(%rsi)
2749 pxor %xmm14,%xmm6
2750 movdqu %xmm5,48(%rsi)
2751 pxor %xmm15,%xmm7
2752 movdqu %xmm6,64(%rsi)
2753 pxor %xmm9,%xmm8
2754 movdqu %xmm7,80(%rsi)
2755 leaq 96(%rsi),%rsi
2756 movdqa %xmm8,%xmm2
2757 jmp L$cbc_dec_tail_collected
2758
2759 .p2align 4
2760 L$cbc_dec_loop6:
2761 movups %xmm7,(%rsi)
2762 leaq 16(%rsi),%rsi
2763 movdqu 0(%rdi),%xmm2
2764 movdqu 16(%rdi),%xmm3
2765 movdqa %xmm2,%xmm11
2766 movdqu 32(%rdi),%xmm4
2767 movdqa %xmm3,%xmm12
2768 movdqu 48(%rdi),%xmm5
2769 movdqa %xmm4,%xmm13
2770 movdqu 64(%rdi),%xmm6
2771 movdqa %xmm5,%xmm14
2772 movdqu 80(%rdi),%xmm7
2773 movdqa %xmm6,%xmm15
2774 L$cbc_dec_loop6_enter:
2775 leaq 96(%rdi),%rdi
2776 movdqa %xmm7,%xmm8
2777
2778 call _aesni_decrypt6
2779
2780 pxor %xmm10,%xmm2
2781 movdqa %xmm8,%xmm10
2782 pxor %xmm11,%xmm3
2783 movdqu %xmm2,(%rsi)
2784 pxor %xmm12,%xmm4
2785 movdqu %xmm3,16(%rsi)
2786 pxor %xmm13,%xmm5
2787 movdqu %xmm4,32(%rsi)
2788 pxor %xmm14,%xmm6
2789 movq %r11,%rcx
2790 movdqu %xmm5,48(%rsi)
2791 pxor %xmm15,%xmm7
2792 movl %r10d,%eax
2793 movdqu %xmm6,64(%rsi)
2794 leaq 80(%rsi),%rsi
2795 subq $96,%rdx
2796 ja L$cbc_dec_loop6
2797
2798 movdqa %xmm7,%xmm2
2799 addq $80,%rdx
2800 jle L$cbc_dec_tail_collected
2801 movups %xmm7,(%rsi)
2802 leaq 16(%rsi),%rsi
2803
2804 L$cbc_dec_tail:
2805 movups (%rdi),%xmm2
2806 subq $16,%rdx
2807 jbe L$cbc_dec_one
2808
2809 movups 16(%rdi),%xmm3
2810 movaps %xmm2,%xmm11
2811 subq $16,%rdx
2812 jbe L$cbc_dec_two
2813
2814 movups 32(%rdi),%xmm4
2815 movaps %xmm3,%xmm12
2816 subq $16,%rdx
2817 jbe L$cbc_dec_three
2818
2819 movups 48(%rdi),%xmm5
2820 movaps %xmm4,%xmm13
2821 subq $16,%rdx
2822 jbe L$cbc_dec_four
2823
2824 movups 64(%rdi),%xmm6
2825 movaps %xmm5,%xmm14
2826 movaps %xmm6,%xmm15
2827 xorps %xmm7,%xmm7
2828 call _aesni_decrypt6
2829 pxor %xmm10,%xmm2
2830 movaps %xmm15,%xmm10
2831 pxor %xmm11,%xmm3
2832 movdqu %xmm2,(%rsi)
2833 pxor %xmm12,%xmm4
2834 movdqu %xmm3,16(%rsi)
2835 pxor %xmm13,%xmm5
2836 movdqu %xmm4,32(%rsi)
2837 pxor %xmm14,%xmm6
2838 movdqu %xmm5,48(%rsi)
2839 leaq 64(%rsi),%rsi
2840 movdqa %xmm6,%xmm2
2841 subq $16,%rdx
2842 jmp L$cbc_dec_tail_collected
2843
2844 .p2align 4
2845 L$cbc_dec_one:
2846 movaps %xmm2,%xmm11
2847 movups (%rcx),%xmm0
2848 movups 16(%rcx),%xmm1
2849 leaq 32(%rcx),%rcx
2850 xorps %xmm0,%xmm2
2851 L$oop_dec1_16:
2852 .byte 102,15,56,222,209
2853 decl %eax
2854 movups (%rcx),%xmm1
2855 leaq 16(%rcx),%rcx
2856 jnz L$oop_dec1_16
2857 .byte 102,15,56,223,209
2858 xorps %xmm10,%xmm2
2859 movaps %xmm11,%xmm10
2860 jmp L$cbc_dec_tail_collected
2861 .p2align 4
2862 L$cbc_dec_two:
2863 movaps %xmm3,%xmm12
2864 call _aesni_decrypt2
2865 pxor %xmm10,%xmm2
2866 movaps %xmm12,%xmm10
2867 pxor %xmm11,%xmm3
2868 movdqu %xmm2,(%rsi)
2869 movdqa %xmm3,%xmm2
2870 leaq 16(%rsi),%rsi
2871 jmp L$cbc_dec_tail_collected
2872 .p2align 4
2873 L$cbc_dec_three:
2874 movaps %xmm4,%xmm13
2875 call _aesni_decrypt3
2876 pxor %xmm10,%xmm2
2877 movaps %xmm13,%xmm10
2878 pxor %xmm11,%xmm3
2879 movdqu %xmm2,(%rsi)
2880 pxor %xmm12,%xmm4
2881 movdqu %xmm3,16(%rsi)
2882 movdqa %xmm4,%xmm2
2883 leaq 32(%rsi),%rsi
2884 jmp L$cbc_dec_tail_collected
2885 .p2align 4
2886 L$cbc_dec_four:
2887 movaps %xmm5,%xmm14
2888 call _aesni_decrypt4
2889 pxor %xmm10,%xmm2
2890 movaps %xmm14,%xmm10
2891 pxor %xmm11,%xmm3
2892 movdqu %xmm2,(%rsi)
2893 pxor %xmm12,%xmm4
2894 movdqu %xmm3,16(%rsi)
2895 pxor %xmm13,%xmm5
2896 movdqu %xmm4,32(%rsi)
2897 movdqa %xmm5,%xmm2
2898 leaq 48(%rsi),%rsi
2899 jmp L$cbc_dec_tail_collected
2900
2901 .p2align 4
2902 L$cbc_dec_tail_collected:
2903 movups %xmm10,(%r8)
2904 andq $15,%rdx
2905 jnz L$cbc_dec_tail_partial
2906 movups %xmm2,(%rsi)
2907 jmp L$cbc_dec_ret
2908 .p2align 4
2909 L$cbc_dec_tail_partial:
2910 movaps %xmm2,(%rsp)
2911 movq $16,%rcx
2912 movq %rsi,%rdi
2913 subq %rdx,%rcx
2914 leaq (%rsp),%rsi
2915 .long 0x9066A4F3
2916
2917 L$cbc_dec_ret:
2918 leaq (%rbp),%rsp
2919 popq %rbp
2920 L$cbc_ret:
2921 .byte 0xf3,0xc3
2922
2923 .globl _aesni_set_decrypt_key
2924
2925 .p2align 4
2926 _aesni_set_decrypt_key:
2927 .byte 0x48,0x83,0xEC,0x08
2928 call __aesni_set_encrypt_key
2929 shll $4,%esi
2930 testl %eax,%eax
2931 jnz L$dec_key_ret
2932 leaq 16(%rdx,%rsi,1),%rdi
2933
2934 movups (%rdx),%xmm0
2935 movups (%rdi),%xmm1
2936 movups %xmm0,(%rdi)
2937 movups %xmm1,(%rdx)
2938 leaq 16(%rdx),%rdx
2939 leaq -16(%rdi),%rdi
2940
2941 L$dec_key_inverse:
2942 movups (%rdx),%xmm0
2943 movups (%rdi),%xmm1
2944 .byte 102,15,56,219,192
2945 .byte 102,15,56,219,201
2946 leaq 16(%rdx),%rdx
2947 leaq -16(%rdi),%rdi
2948 movups %xmm0,16(%rdi)
2949 movups %xmm1,-16(%rdx)
2950 cmpq %rdx,%rdi
2951 ja L$dec_key_inverse
2952
2953 movups (%rdx),%xmm0
2954 .byte 102,15,56,219,192
2955 movups %xmm0,(%rdi)
2956 L$dec_key_ret:
2957 addq $8,%rsp
2958 .byte 0xf3,0xc3
2959 L$SEH_end_set_decrypt_key:
2960
2961 .globl _aesni_set_encrypt_key
2962
2963 .p2align 4
2964 _aesni_set_encrypt_key:
2965 __aesni_set_encrypt_key:
2966 .byte 0x48,0x83,0xEC,0x08
2967 movq $-1,%rax
2968 testq %rdi,%rdi
2969 jz L$enc_key_ret
2970 testq %rdx,%rdx
2971 jz L$enc_key_ret
2972
2973 movups (%rdi),%xmm0
2974 xorps %xmm4,%xmm4
2975 leaq 16(%rdx),%rax
2976 cmpl $256,%esi
2977 je L$14rounds
2978 cmpl $192,%esi
2979 je L$12rounds
2980 cmpl $128,%esi
2981 jne L$bad_keybits
2982
2983 L$10rounds:
2984 movl $9,%esi
2985 movups %xmm0,(%rdx)
2986 .byte 102,15,58,223,200,1
2987 call L$key_expansion_128_cold
2988 .byte 102,15,58,223,200,2
2989 call L$key_expansion_128
2990 .byte 102,15,58,223,200,4
2991 call L$key_expansion_128
2992 .byte 102,15,58,223,200,8
2993 call L$key_expansion_128
2994 .byte 102,15,58,223,200,16
2995 call L$key_expansion_128
2996 .byte 102,15,58,223,200,32
2997 call L$key_expansion_128
2998 .byte 102,15,58,223,200,64
2999 call L$key_expansion_128
3000 .byte 102,15,58,223,200,128
3001 call L$key_expansion_128
3002 .byte 102,15,58,223,200,27
3003 call L$key_expansion_128
3004 .byte 102,15,58,223,200,54
3005 call L$key_expansion_128
3006 movups %xmm0,(%rax)
3007 movl %esi,80(%rax)
3008 xorl %eax,%eax
3009 jmp L$enc_key_ret
3010
3011 .p2align 4
3012 L$12rounds:
3013 movq 16(%rdi),%xmm2
3014 movl $11,%esi
3015 movups %xmm0,(%rdx)
3016 .byte 102,15,58,223,202,1
3017 call L$key_expansion_192a_cold
3018 .byte 102,15,58,223,202,2
3019 call L$key_expansion_192b
3020 .byte 102,15,58,223,202,4
3021 call L$key_expansion_192a
3022 .byte 102,15,58,223,202,8
3023 call L$key_expansion_192b
3024 .byte 102,15,58,223,202,16
3025 call L$key_expansion_192a
3026 .byte 102,15,58,223,202,32
3027 call L$key_expansion_192b
3028 .byte 102,15,58,223,202,64
3029 call L$key_expansion_192a
3030 .byte 102,15,58,223,202,128
3031 call L$key_expansion_192b
3032 movups %xmm0,(%rax)
3033 movl %esi,48(%rax)
3034 xorq %rax,%rax
3035 jmp L$enc_key_ret
3036
3037 .p2align 4
3038 L$14rounds:
3039 movups 16(%rdi),%xmm2
3040 movl $13,%esi
3041 leaq 16(%rax),%rax
3042 movups %xmm0,(%rdx)
3043 movups %xmm2,16(%rdx)
3044 .byte 102,15,58,223,202,1
3045 call L$key_expansion_256a_cold
3046 .byte 102,15,58,223,200,1
3047 call L$key_expansion_256b
3048 .byte 102,15,58,223,202,2
3049 call L$key_expansion_256a
3050 .byte 102,15,58,223,200,2
3051 call L$key_expansion_256b
3052 .byte 102,15,58,223,202,4
3053 call L$key_expansion_256a
3054 .byte 102,15,58,223,200,4
3055 call L$key_expansion_256b
3056 .byte 102,15,58,223,202,8
3057 call L$key_expansion_256a
3058 .byte 102,15,58,223,200,8
3059 call L$key_expansion_256b
3060 .byte 102,15,58,223,202,16
3061 call L$key_expansion_256a
3062 .byte 102,15,58,223,200,16
3063 call L$key_expansion_256b
3064 .byte 102,15,58,223,202,32
3065 call L$key_expansion_256a
3066 .byte 102,15,58,223,200,32
3067 call L$key_expansion_256b
3068 .byte 102,15,58,223,202,64
3069 call L$key_expansion_256a
3070 movups %xmm0,(%rax)
3071 movl %esi,16(%rax)
3072 xorq %rax,%rax
3073 jmp L$enc_key_ret
3074
3075 .p2align 4
3076 L$bad_keybits:
3077 movq $-2,%rax
3078 L$enc_key_ret:
3079 addq $8,%rsp
3080 .byte 0xf3,0xc3
3081 L$SEH_end_set_encrypt_key:
3082
3083 .p2align 4
3084 L$key_expansion_128:
3085 movups %xmm0,(%rax)
3086 leaq 16(%rax),%rax
3087 L$key_expansion_128_cold:
3088 shufps $16,%xmm0,%xmm4
3089 xorps %xmm4,%xmm0
3090 shufps $140,%xmm0,%xmm4
3091 xorps %xmm4,%xmm0
3092 shufps $255,%xmm1,%xmm1
3093 xorps %xmm1,%xmm0
3094 .byte 0xf3,0xc3
3095
3096 .p2align 4
3097 L$key_expansion_192a:
3098 movups %xmm0,(%rax)
3099 leaq 16(%rax),%rax
3100 L$key_expansion_192a_cold:
3101 movaps %xmm2,%xmm5
3102 L$key_expansion_192b_warm:
3103 shufps $16,%xmm0,%xmm4
3104 movdqa %xmm2,%xmm3
3105 xorps %xmm4,%xmm0
3106 shufps $140,%xmm0,%xmm4
3107 pslldq $4,%xmm3
3108 xorps %xmm4,%xmm0
3109 pshufd $85,%xmm1,%xmm1
3110 pxor %xmm3,%xmm2
3111 pxor %xmm1,%xmm0
3112 pshufd $255,%xmm0,%xmm3
3113 pxor %xmm3,%xmm2
3114 .byte 0xf3,0xc3
3115
3116 .p2align 4
3117 L$key_expansion_192b:
3118 movaps %xmm0,%xmm3
3119 shufps $68,%xmm0,%xmm5
3120 movups %xmm5,(%rax)
3121 shufps $78,%xmm2,%xmm3
3122 movups %xmm3,16(%rax)
3123 leaq 32(%rax),%rax
3124 jmp L$key_expansion_192b_warm
3125
3126 .p2align 4
3127 L$key_expansion_256a:
3128 movups %xmm2,(%rax)
3129 leaq 16(%rax),%rax
3130 L$key_expansion_256a_cold:
3131 shufps $16,%xmm0,%xmm4
3132 xorps %xmm4,%xmm0
3133 shufps $140,%xmm0,%xmm4
3134 xorps %xmm4,%xmm0
3135 shufps $255,%xmm1,%xmm1
3136 xorps %xmm1,%xmm0
3137 .byte 0xf3,0xc3
3138
3139 .p2align 4
3140 L$key_expansion_256b:
3141 movups %xmm0,(%rax)
3142 leaq 16(%rax),%rax
3143
3144 shufps $16,%xmm2,%xmm4
3145 xorps %xmm4,%xmm2
3146 shufps $140,%xmm2,%xmm4
3147 xorps %xmm4,%xmm2
3148 shufps $170,%xmm1,%xmm1
3149 xorps %xmm1,%xmm2
3150 .byte 0xf3,0xc3
3151
3152
3153 .p2align 6
3154 L$bswap_mask:
3155 .byte 15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0
3156 L$increment32:
3157 .long 6,6,6,0
3158 L$increment64:
3159 .long 1,0,0,0
3160 L$xts_magic:
3161 .long 0x87,0,1,0
3162 L$increment1:
3163 .byte 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1
3164
3165 .byte 65,69,83,32,102,111,114,32,73,110,116,101,108,32,65,69,83,45,78,73,44,32 ,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101 ,110,115,115,108,46,111,114,103,62,0
3166 .p2align 6
3167 #endif
OLDNEW
« no previous file with comments | « third_party/boringssl/mac-x86_64/crypto/aes/aes-x86_64.S ('k') | third_party/boringssl/mac-x86_64/crypto/aes/bsaes-x86_64.S » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698