Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(283)

Side by Side Diff: third_party/boringssl/mac-x86_64/crypto/aes/aesni-x86_64.S

Issue 2829743002: Roll src/third_party/boringssl/src bc6a76b0e..777fdd644 (Closed)
Patch Set: Created 3 years, 8 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
OLDNEW
(Empty)
1 #if defined(__x86_64__) && !defined(OPENSSL_NO_ASM)
2 .text
3
4 .globl _aesni_encrypt
5 .private_extern _aesni_encrypt
6
7 .p2align 4
8 _aesni_encrypt:
9 movups (%rdi),%xmm2
10 movl 240(%rdx),%eax
11 movups (%rdx),%xmm0
12 movups 16(%rdx),%xmm1
13 leaq 32(%rdx),%rdx
14 xorps %xmm0,%xmm2
15 L$oop_enc1_1:
16 .byte 102,15,56,220,209
17 decl %eax
18 movups (%rdx),%xmm1
19 leaq 16(%rdx),%rdx
20 jnz L$oop_enc1_1
21 .byte 102,15,56,221,209
22 pxor %xmm0,%xmm0
23 pxor %xmm1,%xmm1
24 movups %xmm2,(%rsi)
25 pxor %xmm2,%xmm2
26 .byte 0xf3,0xc3
27
28
29 .globl _aesni_decrypt
30 .private_extern _aesni_decrypt
31
32 .p2align 4
33 _aesni_decrypt:
34 movups (%rdi),%xmm2
35 movl 240(%rdx),%eax
36 movups (%rdx),%xmm0
37 movups 16(%rdx),%xmm1
38 leaq 32(%rdx),%rdx
39 xorps %xmm0,%xmm2
40 L$oop_dec1_2:
41 .byte 102,15,56,222,209
42 decl %eax
43 movups (%rdx),%xmm1
44 leaq 16(%rdx),%rdx
45 jnz L$oop_dec1_2
46 .byte 102,15,56,223,209
47 pxor %xmm0,%xmm0
48 pxor %xmm1,%xmm1
49 movups %xmm2,(%rsi)
50 pxor %xmm2,%xmm2
51 .byte 0xf3,0xc3
52
53
54 .p2align 4
55 _aesni_encrypt2:
56 movups (%rcx),%xmm0
57 shll $4,%eax
58 movups 16(%rcx),%xmm1
59 xorps %xmm0,%xmm2
60 xorps %xmm0,%xmm3
61 movups 32(%rcx),%xmm0
62 leaq 32(%rcx,%rax,1),%rcx
63 negq %rax
64 addq $16,%rax
65
66 L$enc_loop2:
67 .byte 102,15,56,220,209
68 .byte 102,15,56,220,217
69 movups (%rcx,%rax,1),%xmm1
70 addq $32,%rax
71 .byte 102,15,56,220,208
72 .byte 102,15,56,220,216
73 movups -16(%rcx,%rax,1),%xmm0
74 jnz L$enc_loop2
75
76 .byte 102,15,56,220,209
77 .byte 102,15,56,220,217
78 .byte 102,15,56,221,208
79 .byte 102,15,56,221,216
80 .byte 0xf3,0xc3
81
82
83 .p2align 4
84 _aesni_decrypt2:
85 movups (%rcx),%xmm0
86 shll $4,%eax
87 movups 16(%rcx),%xmm1
88 xorps %xmm0,%xmm2
89 xorps %xmm0,%xmm3
90 movups 32(%rcx),%xmm0
91 leaq 32(%rcx,%rax,1),%rcx
92 negq %rax
93 addq $16,%rax
94
95 L$dec_loop2:
96 .byte 102,15,56,222,209
97 .byte 102,15,56,222,217
98 movups (%rcx,%rax,1),%xmm1
99 addq $32,%rax
100 .byte 102,15,56,222,208
101 .byte 102,15,56,222,216
102 movups -16(%rcx,%rax,1),%xmm0
103 jnz L$dec_loop2
104
105 .byte 102,15,56,222,209
106 .byte 102,15,56,222,217
107 .byte 102,15,56,223,208
108 .byte 102,15,56,223,216
109 .byte 0xf3,0xc3
110
111
112 .p2align 4
113 _aesni_encrypt3:
114 movups (%rcx),%xmm0
115 shll $4,%eax
116 movups 16(%rcx),%xmm1
117 xorps %xmm0,%xmm2
118 xorps %xmm0,%xmm3
119 xorps %xmm0,%xmm4
120 movups 32(%rcx),%xmm0
121 leaq 32(%rcx,%rax,1),%rcx
122 negq %rax
123 addq $16,%rax
124
125 L$enc_loop3:
126 .byte 102,15,56,220,209
127 .byte 102,15,56,220,217
128 .byte 102,15,56,220,225
129 movups (%rcx,%rax,1),%xmm1
130 addq $32,%rax
131 .byte 102,15,56,220,208
132 .byte 102,15,56,220,216
133 .byte 102,15,56,220,224
134 movups -16(%rcx,%rax,1),%xmm0
135 jnz L$enc_loop3
136
137 .byte 102,15,56,220,209
138 .byte 102,15,56,220,217
139 .byte 102,15,56,220,225
140 .byte 102,15,56,221,208
141 .byte 102,15,56,221,216
142 .byte 102,15,56,221,224
143 .byte 0xf3,0xc3
144
145
146 .p2align 4
147 _aesni_decrypt3:
148 movups (%rcx),%xmm0
149 shll $4,%eax
150 movups 16(%rcx),%xmm1
151 xorps %xmm0,%xmm2
152 xorps %xmm0,%xmm3
153 xorps %xmm0,%xmm4
154 movups 32(%rcx),%xmm0
155 leaq 32(%rcx,%rax,1),%rcx
156 negq %rax
157 addq $16,%rax
158
159 L$dec_loop3:
160 .byte 102,15,56,222,209
161 .byte 102,15,56,222,217
162 .byte 102,15,56,222,225
163 movups (%rcx,%rax,1),%xmm1
164 addq $32,%rax
165 .byte 102,15,56,222,208
166 .byte 102,15,56,222,216
167 .byte 102,15,56,222,224
168 movups -16(%rcx,%rax,1),%xmm0
169 jnz L$dec_loop3
170
171 .byte 102,15,56,222,209
172 .byte 102,15,56,222,217
173 .byte 102,15,56,222,225
174 .byte 102,15,56,223,208
175 .byte 102,15,56,223,216
176 .byte 102,15,56,223,224
177 .byte 0xf3,0xc3
178
179
180 .p2align 4
181 _aesni_encrypt4:
182 movups (%rcx),%xmm0
183 shll $4,%eax
184 movups 16(%rcx),%xmm1
185 xorps %xmm0,%xmm2
186 xorps %xmm0,%xmm3
187 xorps %xmm0,%xmm4
188 xorps %xmm0,%xmm5
189 movups 32(%rcx),%xmm0
190 leaq 32(%rcx,%rax,1),%rcx
191 negq %rax
192 .byte 0x0f,0x1f,0x00
193 addq $16,%rax
194
195 L$enc_loop4:
196 .byte 102,15,56,220,209
197 .byte 102,15,56,220,217
198 .byte 102,15,56,220,225
199 .byte 102,15,56,220,233
200 movups (%rcx,%rax,1),%xmm1
201 addq $32,%rax
202 .byte 102,15,56,220,208
203 .byte 102,15,56,220,216
204 .byte 102,15,56,220,224
205 .byte 102,15,56,220,232
206 movups -16(%rcx,%rax,1),%xmm0
207 jnz L$enc_loop4
208
209 .byte 102,15,56,220,209
210 .byte 102,15,56,220,217
211 .byte 102,15,56,220,225
212 .byte 102,15,56,220,233
213 .byte 102,15,56,221,208
214 .byte 102,15,56,221,216
215 .byte 102,15,56,221,224
216 .byte 102,15,56,221,232
217 .byte 0xf3,0xc3
218
219
220 .p2align 4
221 _aesni_decrypt4:
222 movups (%rcx),%xmm0
223 shll $4,%eax
224 movups 16(%rcx),%xmm1
225 xorps %xmm0,%xmm2
226 xorps %xmm0,%xmm3
227 xorps %xmm0,%xmm4
228 xorps %xmm0,%xmm5
229 movups 32(%rcx),%xmm0
230 leaq 32(%rcx,%rax,1),%rcx
231 negq %rax
232 .byte 0x0f,0x1f,0x00
233 addq $16,%rax
234
235 L$dec_loop4:
236 .byte 102,15,56,222,209
237 .byte 102,15,56,222,217
238 .byte 102,15,56,222,225
239 .byte 102,15,56,222,233
240 movups (%rcx,%rax,1),%xmm1
241 addq $32,%rax
242 .byte 102,15,56,222,208
243 .byte 102,15,56,222,216
244 .byte 102,15,56,222,224
245 .byte 102,15,56,222,232
246 movups -16(%rcx,%rax,1),%xmm0
247 jnz L$dec_loop4
248
249 .byte 102,15,56,222,209
250 .byte 102,15,56,222,217
251 .byte 102,15,56,222,225
252 .byte 102,15,56,222,233
253 .byte 102,15,56,223,208
254 .byte 102,15,56,223,216
255 .byte 102,15,56,223,224
256 .byte 102,15,56,223,232
257 .byte 0xf3,0xc3
258
259
260 .p2align 4
261 _aesni_encrypt6:
262 movups (%rcx),%xmm0
263 shll $4,%eax
264 movups 16(%rcx),%xmm1
265 xorps %xmm0,%xmm2
266 pxor %xmm0,%xmm3
267 pxor %xmm0,%xmm4
268 .byte 102,15,56,220,209
269 leaq 32(%rcx,%rax,1),%rcx
270 negq %rax
271 .byte 102,15,56,220,217
272 pxor %xmm0,%xmm5
273 pxor %xmm0,%xmm6
274 .byte 102,15,56,220,225
275 pxor %xmm0,%xmm7
276 movups (%rcx,%rax,1),%xmm0
277 addq $16,%rax
278 jmp L$enc_loop6_enter
279 .p2align 4
280 L$enc_loop6:
281 .byte 102,15,56,220,209
282 .byte 102,15,56,220,217
283 .byte 102,15,56,220,225
284 L$enc_loop6_enter:
285 .byte 102,15,56,220,233
286 .byte 102,15,56,220,241
287 .byte 102,15,56,220,249
288 movups (%rcx,%rax,1),%xmm1
289 addq $32,%rax
290 .byte 102,15,56,220,208
291 .byte 102,15,56,220,216
292 .byte 102,15,56,220,224
293 .byte 102,15,56,220,232
294 .byte 102,15,56,220,240
295 .byte 102,15,56,220,248
296 movups -16(%rcx,%rax,1),%xmm0
297 jnz L$enc_loop6
298
299 .byte 102,15,56,220,209
300 .byte 102,15,56,220,217
301 .byte 102,15,56,220,225
302 .byte 102,15,56,220,233
303 .byte 102,15,56,220,241
304 .byte 102,15,56,220,249
305 .byte 102,15,56,221,208
306 .byte 102,15,56,221,216
307 .byte 102,15,56,221,224
308 .byte 102,15,56,221,232
309 .byte 102,15,56,221,240
310 .byte 102,15,56,221,248
311 .byte 0xf3,0xc3
312
313
314 .p2align 4
315 _aesni_decrypt6:
316 movups (%rcx),%xmm0
317 shll $4,%eax
318 movups 16(%rcx),%xmm1
319 xorps %xmm0,%xmm2
320 pxor %xmm0,%xmm3
321 pxor %xmm0,%xmm4
322 .byte 102,15,56,222,209
323 leaq 32(%rcx,%rax,1),%rcx
324 negq %rax
325 .byte 102,15,56,222,217
326 pxor %xmm0,%xmm5
327 pxor %xmm0,%xmm6
328 .byte 102,15,56,222,225
329 pxor %xmm0,%xmm7
330 movups (%rcx,%rax,1),%xmm0
331 addq $16,%rax
332 jmp L$dec_loop6_enter
333 .p2align 4
334 L$dec_loop6:
335 .byte 102,15,56,222,209
336 .byte 102,15,56,222,217
337 .byte 102,15,56,222,225
338 L$dec_loop6_enter:
339 .byte 102,15,56,222,233
340 .byte 102,15,56,222,241
341 .byte 102,15,56,222,249
342 movups (%rcx,%rax,1),%xmm1
343 addq $32,%rax
344 .byte 102,15,56,222,208
345 .byte 102,15,56,222,216
346 .byte 102,15,56,222,224
347 .byte 102,15,56,222,232
348 .byte 102,15,56,222,240
349 .byte 102,15,56,222,248
350 movups -16(%rcx,%rax,1),%xmm0
351 jnz L$dec_loop6
352
353 .byte 102,15,56,222,209
354 .byte 102,15,56,222,217
355 .byte 102,15,56,222,225
356 .byte 102,15,56,222,233
357 .byte 102,15,56,222,241
358 .byte 102,15,56,222,249
359 .byte 102,15,56,223,208
360 .byte 102,15,56,223,216
361 .byte 102,15,56,223,224
362 .byte 102,15,56,223,232
363 .byte 102,15,56,223,240
364 .byte 102,15,56,223,248
365 .byte 0xf3,0xc3
366
367
368 .p2align 4
369 _aesni_encrypt8:
370 movups (%rcx),%xmm0
371 shll $4,%eax
372 movups 16(%rcx),%xmm1
373 xorps %xmm0,%xmm2
374 xorps %xmm0,%xmm3
375 pxor %xmm0,%xmm4
376 pxor %xmm0,%xmm5
377 pxor %xmm0,%xmm6
378 leaq 32(%rcx,%rax,1),%rcx
379 negq %rax
380 .byte 102,15,56,220,209
381 pxor %xmm0,%xmm7
382 pxor %xmm0,%xmm8
383 .byte 102,15,56,220,217
384 pxor %xmm0,%xmm9
385 movups (%rcx,%rax,1),%xmm0
386 addq $16,%rax
387 jmp L$enc_loop8_inner
388 .p2align 4
389 L$enc_loop8:
390 .byte 102,15,56,220,209
391 .byte 102,15,56,220,217
392 L$enc_loop8_inner:
393 .byte 102,15,56,220,225
394 .byte 102,15,56,220,233
395 .byte 102,15,56,220,241
396 .byte 102,15,56,220,249
397 .byte 102,68,15,56,220,193
398 .byte 102,68,15,56,220,201
399 L$enc_loop8_enter:
400 movups (%rcx,%rax,1),%xmm1
401 addq $32,%rax
402 .byte 102,15,56,220,208
403 .byte 102,15,56,220,216
404 .byte 102,15,56,220,224
405 .byte 102,15,56,220,232
406 .byte 102,15,56,220,240
407 .byte 102,15,56,220,248
408 .byte 102,68,15,56,220,192
409 .byte 102,68,15,56,220,200
410 movups -16(%rcx,%rax,1),%xmm0
411 jnz L$enc_loop8
412
413 .byte 102,15,56,220,209
414 .byte 102,15,56,220,217
415 .byte 102,15,56,220,225
416 .byte 102,15,56,220,233
417 .byte 102,15,56,220,241
418 .byte 102,15,56,220,249
419 .byte 102,68,15,56,220,193
420 .byte 102,68,15,56,220,201
421 .byte 102,15,56,221,208
422 .byte 102,15,56,221,216
423 .byte 102,15,56,221,224
424 .byte 102,15,56,221,232
425 .byte 102,15,56,221,240
426 .byte 102,15,56,221,248
427 .byte 102,68,15,56,221,192
428 .byte 102,68,15,56,221,200
429 .byte 0xf3,0xc3
430
431
432 .p2align 4
433 _aesni_decrypt8:
434 movups (%rcx),%xmm0
435 shll $4,%eax
436 movups 16(%rcx),%xmm1
437 xorps %xmm0,%xmm2
438 xorps %xmm0,%xmm3
439 pxor %xmm0,%xmm4
440 pxor %xmm0,%xmm5
441 pxor %xmm0,%xmm6
442 leaq 32(%rcx,%rax,1),%rcx
443 negq %rax
444 .byte 102,15,56,222,209
445 pxor %xmm0,%xmm7
446 pxor %xmm0,%xmm8
447 .byte 102,15,56,222,217
448 pxor %xmm0,%xmm9
449 movups (%rcx,%rax,1),%xmm0
450 addq $16,%rax
451 jmp L$dec_loop8_inner
452 .p2align 4
453 L$dec_loop8:
454 .byte 102,15,56,222,209
455 .byte 102,15,56,222,217
456 L$dec_loop8_inner:
457 .byte 102,15,56,222,225
458 .byte 102,15,56,222,233
459 .byte 102,15,56,222,241
460 .byte 102,15,56,222,249
461 .byte 102,68,15,56,222,193
462 .byte 102,68,15,56,222,201
463 L$dec_loop8_enter:
464 movups (%rcx,%rax,1),%xmm1
465 addq $32,%rax
466 .byte 102,15,56,222,208
467 .byte 102,15,56,222,216
468 .byte 102,15,56,222,224
469 .byte 102,15,56,222,232
470 .byte 102,15,56,222,240
471 .byte 102,15,56,222,248
472 .byte 102,68,15,56,222,192
473 .byte 102,68,15,56,222,200
474 movups -16(%rcx,%rax,1),%xmm0
475 jnz L$dec_loop8
476
477 .byte 102,15,56,222,209
478 .byte 102,15,56,222,217
479 .byte 102,15,56,222,225
480 .byte 102,15,56,222,233
481 .byte 102,15,56,222,241
482 .byte 102,15,56,222,249
483 .byte 102,68,15,56,222,193
484 .byte 102,68,15,56,222,201
485 .byte 102,15,56,223,208
486 .byte 102,15,56,223,216
487 .byte 102,15,56,223,224
488 .byte 102,15,56,223,232
489 .byte 102,15,56,223,240
490 .byte 102,15,56,223,248
491 .byte 102,68,15,56,223,192
492 .byte 102,68,15,56,223,200
493 .byte 0xf3,0xc3
494
495 .globl _aesni_ecb_encrypt
496 .private_extern _aesni_ecb_encrypt
497
498 .p2align 4
499 _aesni_ecb_encrypt:
500 andq $-16,%rdx
501 jz L$ecb_ret
502
503 movl 240(%rcx),%eax
504 movups (%rcx),%xmm0
505 movq %rcx,%r11
506 movl %eax,%r10d
507 testl %r8d,%r8d
508 jz L$ecb_decrypt
509
510 cmpq $0x80,%rdx
511 jb L$ecb_enc_tail
512
513 movdqu (%rdi),%xmm2
514 movdqu 16(%rdi),%xmm3
515 movdqu 32(%rdi),%xmm4
516 movdqu 48(%rdi),%xmm5
517 movdqu 64(%rdi),%xmm6
518 movdqu 80(%rdi),%xmm7
519 movdqu 96(%rdi),%xmm8
520 movdqu 112(%rdi),%xmm9
521 leaq 128(%rdi),%rdi
522 subq $0x80,%rdx
523 jmp L$ecb_enc_loop8_enter
524 .p2align 4
525 L$ecb_enc_loop8:
526 movups %xmm2,(%rsi)
527 movq %r11,%rcx
528 movdqu (%rdi),%xmm2
529 movl %r10d,%eax
530 movups %xmm3,16(%rsi)
531 movdqu 16(%rdi),%xmm3
532 movups %xmm4,32(%rsi)
533 movdqu 32(%rdi),%xmm4
534 movups %xmm5,48(%rsi)
535 movdqu 48(%rdi),%xmm5
536 movups %xmm6,64(%rsi)
537 movdqu 64(%rdi),%xmm6
538 movups %xmm7,80(%rsi)
539 movdqu 80(%rdi),%xmm7
540 movups %xmm8,96(%rsi)
541 movdqu 96(%rdi),%xmm8
542 movups %xmm9,112(%rsi)
543 leaq 128(%rsi),%rsi
544 movdqu 112(%rdi),%xmm9
545 leaq 128(%rdi),%rdi
546 L$ecb_enc_loop8_enter:
547
548 call _aesni_encrypt8
549
550 subq $0x80,%rdx
551 jnc L$ecb_enc_loop8
552
553 movups %xmm2,(%rsi)
554 movq %r11,%rcx
555 movups %xmm3,16(%rsi)
556 movl %r10d,%eax
557 movups %xmm4,32(%rsi)
558 movups %xmm5,48(%rsi)
559 movups %xmm6,64(%rsi)
560 movups %xmm7,80(%rsi)
561 movups %xmm8,96(%rsi)
562 movups %xmm9,112(%rsi)
563 leaq 128(%rsi),%rsi
564 addq $0x80,%rdx
565 jz L$ecb_ret
566
567 L$ecb_enc_tail:
568 movups (%rdi),%xmm2
569 cmpq $0x20,%rdx
570 jb L$ecb_enc_one
571 movups 16(%rdi),%xmm3
572 je L$ecb_enc_two
573 movups 32(%rdi),%xmm4
574 cmpq $0x40,%rdx
575 jb L$ecb_enc_three
576 movups 48(%rdi),%xmm5
577 je L$ecb_enc_four
578 movups 64(%rdi),%xmm6
579 cmpq $0x60,%rdx
580 jb L$ecb_enc_five
581 movups 80(%rdi),%xmm7
582 je L$ecb_enc_six
583 movdqu 96(%rdi),%xmm8
584 xorps %xmm9,%xmm9
585 call _aesni_encrypt8
586 movups %xmm2,(%rsi)
587 movups %xmm3,16(%rsi)
588 movups %xmm4,32(%rsi)
589 movups %xmm5,48(%rsi)
590 movups %xmm6,64(%rsi)
591 movups %xmm7,80(%rsi)
592 movups %xmm8,96(%rsi)
593 jmp L$ecb_ret
594 .p2align 4
595 L$ecb_enc_one:
596 movups (%rcx),%xmm0
597 movups 16(%rcx),%xmm1
598 leaq 32(%rcx),%rcx
599 xorps %xmm0,%xmm2
600 L$oop_enc1_3:
601 .byte 102,15,56,220,209
602 decl %eax
603 movups (%rcx),%xmm1
604 leaq 16(%rcx),%rcx
605 jnz L$oop_enc1_3
606 .byte 102,15,56,221,209
607 movups %xmm2,(%rsi)
608 jmp L$ecb_ret
609 .p2align 4
610 L$ecb_enc_two:
611 call _aesni_encrypt2
612 movups %xmm2,(%rsi)
613 movups %xmm3,16(%rsi)
614 jmp L$ecb_ret
615 .p2align 4
616 L$ecb_enc_three:
617 call _aesni_encrypt3
618 movups %xmm2,(%rsi)
619 movups %xmm3,16(%rsi)
620 movups %xmm4,32(%rsi)
621 jmp L$ecb_ret
622 .p2align 4
623 L$ecb_enc_four:
624 call _aesni_encrypt4
625 movups %xmm2,(%rsi)
626 movups %xmm3,16(%rsi)
627 movups %xmm4,32(%rsi)
628 movups %xmm5,48(%rsi)
629 jmp L$ecb_ret
630 .p2align 4
631 L$ecb_enc_five:
632 xorps %xmm7,%xmm7
633 call _aesni_encrypt6
634 movups %xmm2,(%rsi)
635 movups %xmm3,16(%rsi)
636 movups %xmm4,32(%rsi)
637 movups %xmm5,48(%rsi)
638 movups %xmm6,64(%rsi)
639 jmp L$ecb_ret
640 .p2align 4
641 L$ecb_enc_six:
642 call _aesni_encrypt6
643 movups %xmm2,(%rsi)
644 movups %xmm3,16(%rsi)
645 movups %xmm4,32(%rsi)
646 movups %xmm5,48(%rsi)
647 movups %xmm6,64(%rsi)
648 movups %xmm7,80(%rsi)
649 jmp L$ecb_ret
650
651 .p2align 4
652 L$ecb_decrypt:
653 cmpq $0x80,%rdx
654 jb L$ecb_dec_tail
655
656 movdqu (%rdi),%xmm2
657 movdqu 16(%rdi),%xmm3
658 movdqu 32(%rdi),%xmm4
659 movdqu 48(%rdi),%xmm5
660 movdqu 64(%rdi),%xmm6
661 movdqu 80(%rdi),%xmm7
662 movdqu 96(%rdi),%xmm8
663 movdqu 112(%rdi),%xmm9
664 leaq 128(%rdi),%rdi
665 subq $0x80,%rdx
666 jmp L$ecb_dec_loop8_enter
667 .p2align 4
668 L$ecb_dec_loop8:
669 movups %xmm2,(%rsi)
670 movq %r11,%rcx
671 movdqu (%rdi),%xmm2
672 movl %r10d,%eax
673 movups %xmm3,16(%rsi)
674 movdqu 16(%rdi),%xmm3
675 movups %xmm4,32(%rsi)
676 movdqu 32(%rdi),%xmm4
677 movups %xmm5,48(%rsi)
678 movdqu 48(%rdi),%xmm5
679 movups %xmm6,64(%rsi)
680 movdqu 64(%rdi),%xmm6
681 movups %xmm7,80(%rsi)
682 movdqu 80(%rdi),%xmm7
683 movups %xmm8,96(%rsi)
684 movdqu 96(%rdi),%xmm8
685 movups %xmm9,112(%rsi)
686 leaq 128(%rsi),%rsi
687 movdqu 112(%rdi),%xmm9
688 leaq 128(%rdi),%rdi
689 L$ecb_dec_loop8_enter:
690
691 call _aesni_decrypt8
692
693 movups (%r11),%xmm0
694 subq $0x80,%rdx
695 jnc L$ecb_dec_loop8
696
697 movups %xmm2,(%rsi)
698 pxor %xmm2,%xmm2
699 movq %r11,%rcx
700 movups %xmm3,16(%rsi)
701 pxor %xmm3,%xmm3
702 movl %r10d,%eax
703 movups %xmm4,32(%rsi)
704 pxor %xmm4,%xmm4
705 movups %xmm5,48(%rsi)
706 pxor %xmm5,%xmm5
707 movups %xmm6,64(%rsi)
708 pxor %xmm6,%xmm6
709 movups %xmm7,80(%rsi)
710 pxor %xmm7,%xmm7
711 movups %xmm8,96(%rsi)
712 pxor %xmm8,%xmm8
713 movups %xmm9,112(%rsi)
714 pxor %xmm9,%xmm9
715 leaq 128(%rsi),%rsi
716 addq $0x80,%rdx
717 jz L$ecb_ret
718
719 L$ecb_dec_tail:
720 movups (%rdi),%xmm2
721 cmpq $0x20,%rdx
722 jb L$ecb_dec_one
723 movups 16(%rdi),%xmm3
724 je L$ecb_dec_two
725 movups 32(%rdi),%xmm4
726 cmpq $0x40,%rdx
727 jb L$ecb_dec_three
728 movups 48(%rdi),%xmm5
729 je L$ecb_dec_four
730 movups 64(%rdi),%xmm6
731 cmpq $0x60,%rdx
732 jb L$ecb_dec_five
733 movups 80(%rdi),%xmm7
734 je L$ecb_dec_six
735 movups 96(%rdi),%xmm8
736 movups (%rcx),%xmm0
737 xorps %xmm9,%xmm9
738 call _aesni_decrypt8
739 movups %xmm2,(%rsi)
740 pxor %xmm2,%xmm2
741 movups %xmm3,16(%rsi)
742 pxor %xmm3,%xmm3
743 movups %xmm4,32(%rsi)
744 pxor %xmm4,%xmm4
745 movups %xmm5,48(%rsi)
746 pxor %xmm5,%xmm5
747 movups %xmm6,64(%rsi)
748 pxor %xmm6,%xmm6
749 movups %xmm7,80(%rsi)
750 pxor %xmm7,%xmm7
751 movups %xmm8,96(%rsi)
752 pxor %xmm8,%xmm8
753 pxor %xmm9,%xmm9
754 jmp L$ecb_ret
755 .p2align 4
756 L$ecb_dec_one:
757 movups (%rcx),%xmm0
758 movups 16(%rcx),%xmm1
759 leaq 32(%rcx),%rcx
760 xorps %xmm0,%xmm2
761 L$oop_dec1_4:
762 .byte 102,15,56,222,209
763 decl %eax
764 movups (%rcx),%xmm1
765 leaq 16(%rcx),%rcx
766 jnz L$oop_dec1_4
767 .byte 102,15,56,223,209
768 movups %xmm2,(%rsi)
769 pxor %xmm2,%xmm2
770 jmp L$ecb_ret
771 .p2align 4
772 L$ecb_dec_two:
773 call _aesni_decrypt2
774 movups %xmm2,(%rsi)
775 pxor %xmm2,%xmm2
776 movups %xmm3,16(%rsi)
777 pxor %xmm3,%xmm3
778 jmp L$ecb_ret
779 .p2align 4
780 L$ecb_dec_three:
781 call _aesni_decrypt3
782 movups %xmm2,(%rsi)
783 pxor %xmm2,%xmm2
784 movups %xmm3,16(%rsi)
785 pxor %xmm3,%xmm3
786 movups %xmm4,32(%rsi)
787 pxor %xmm4,%xmm4
788 jmp L$ecb_ret
789 .p2align 4
790 L$ecb_dec_four:
791 call _aesni_decrypt4
792 movups %xmm2,(%rsi)
793 pxor %xmm2,%xmm2
794 movups %xmm3,16(%rsi)
795 pxor %xmm3,%xmm3
796 movups %xmm4,32(%rsi)
797 pxor %xmm4,%xmm4
798 movups %xmm5,48(%rsi)
799 pxor %xmm5,%xmm5
800 jmp L$ecb_ret
801 .p2align 4
802 L$ecb_dec_five:
803 xorps %xmm7,%xmm7
804 call _aesni_decrypt6
805 movups %xmm2,(%rsi)
806 pxor %xmm2,%xmm2
807 movups %xmm3,16(%rsi)
808 pxor %xmm3,%xmm3
809 movups %xmm4,32(%rsi)
810 pxor %xmm4,%xmm4
811 movups %xmm5,48(%rsi)
812 pxor %xmm5,%xmm5
813 movups %xmm6,64(%rsi)
814 pxor %xmm6,%xmm6
815 pxor %xmm7,%xmm7
816 jmp L$ecb_ret
817 .p2align 4
818 L$ecb_dec_six:
819 call _aesni_decrypt6
820 movups %xmm2,(%rsi)
821 pxor %xmm2,%xmm2
822 movups %xmm3,16(%rsi)
823 pxor %xmm3,%xmm3
824 movups %xmm4,32(%rsi)
825 pxor %xmm4,%xmm4
826 movups %xmm5,48(%rsi)
827 pxor %xmm5,%xmm5
828 movups %xmm6,64(%rsi)
829 pxor %xmm6,%xmm6
830 movups %xmm7,80(%rsi)
831 pxor %xmm7,%xmm7
832
833 L$ecb_ret:
834 xorps %xmm0,%xmm0
835 pxor %xmm1,%xmm1
836 .byte 0xf3,0xc3
837
838 .globl _aesni_ccm64_encrypt_blocks
839 .private_extern _aesni_ccm64_encrypt_blocks
840
841 .p2align 4
842 _aesni_ccm64_encrypt_blocks:
843 movl 240(%rcx),%eax
844 movdqu (%r8),%xmm6
845 movdqa L$increment64(%rip),%xmm9
846 movdqa L$bswap_mask(%rip),%xmm7
847
848 shll $4,%eax
849 movl $16,%r10d
850 leaq 0(%rcx),%r11
851 movdqu (%r9),%xmm3
852 movdqa %xmm6,%xmm2
853 leaq 32(%rcx,%rax,1),%rcx
854 .byte 102,15,56,0,247
855 subq %rax,%r10
856 jmp L$ccm64_enc_outer
857 .p2align 4
858 L$ccm64_enc_outer:
859 movups (%r11),%xmm0
860 movq %r10,%rax
861 movups (%rdi),%xmm8
862
863 xorps %xmm0,%xmm2
864 movups 16(%r11),%xmm1
865 xorps %xmm8,%xmm0
866 xorps %xmm0,%xmm3
867 movups 32(%r11),%xmm0
868
869 L$ccm64_enc2_loop:
870 .byte 102,15,56,220,209
871 .byte 102,15,56,220,217
872 movups (%rcx,%rax,1),%xmm1
873 addq $32,%rax
874 .byte 102,15,56,220,208
875 .byte 102,15,56,220,216
876 movups -16(%rcx,%rax,1),%xmm0
877 jnz L$ccm64_enc2_loop
878 .byte 102,15,56,220,209
879 .byte 102,15,56,220,217
880 paddq %xmm9,%xmm6
881 decq %rdx
882 .byte 102,15,56,221,208
883 .byte 102,15,56,221,216
884
885 leaq 16(%rdi),%rdi
886 xorps %xmm2,%xmm8
887 movdqa %xmm6,%xmm2
888 movups %xmm8,(%rsi)
889 .byte 102,15,56,0,215
890 leaq 16(%rsi),%rsi
891 jnz L$ccm64_enc_outer
892
893 pxor %xmm0,%xmm0
894 pxor %xmm1,%xmm1
895 pxor %xmm2,%xmm2
896 movups %xmm3,(%r9)
897 pxor %xmm3,%xmm3
898 pxor %xmm8,%xmm8
899 pxor %xmm6,%xmm6
900 .byte 0xf3,0xc3
901
902 .globl _aesni_ccm64_decrypt_blocks
903 .private_extern _aesni_ccm64_decrypt_blocks
904
905 .p2align 4
906 _aesni_ccm64_decrypt_blocks:
907 movl 240(%rcx),%eax
908 movups (%r8),%xmm6
909 movdqu (%r9),%xmm3
910 movdqa L$increment64(%rip),%xmm9
911 movdqa L$bswap_mask(%rip),%xmm7
912
913 movaps %xmm6,%xmm2
914 movl %eax,%r10d
915 movq %rcx,%r11
916 .byte 102,15,56,0,247
917 movups (%rcx),%xmm0
918 movups 16(%rcx),%xmm1
919 leaq 32(%rcx),%rcx
920 xorps %xmm0,%xmm2
921 L$oop_enc1_5:
922 .byte 102,15,56,220,209
923 decl %eax
924 movups (%rcx),%xmm1
925 leaq 16(%rcx),%rcx
926 jnz L$oop_enc1_5
927 .byte 102,15,56,221,209
928 shll $4,%r10d
929 movl $16,%eax
930 movups (%rdi),%xmm8
931 paddq %xmm9,%xmm6
932 leaq 16(%rdi),%rdi
933 subq %r10,%rax
934 leaq 32(%r11,%r10,1),%rcx
935 movq %rax,%r10
936 jmp L$ccm64_dec_outer
937 .p2align 4
938 L$ccm64_dec_outer:
939 xorps %xmm2,%xmm8
940 movdqa %xmm6,%xmm2
941 movups %xmm8,(%rsi)
942 leaq 16(%rsi),%rsi
943 .byte 102,15,56,0,215
944
945 subq $1,%rdx
946 jz L$ccm64_dec_break
947
948 movups (%r11),%xmm0
949 movq %r10,%rax
950 movups 16(%r11),%xmm1
951 xorps %xmm0,%xmm8
952 xorps %xmm0,%xmm2
953 xorps %xmm8,%xmm3
954 movups 32(%r11),%xmm0
955 jmp L$ccm64_dec2_loop
956 .p2align 4
957 L$ccm64_dec2_loop:
958 .byte 102,15,56,220,209
959 .byte 102,15,56,220,217
960 movups (%rcx,%rax,1),%xmm1
961 addq $32,%rax
962 .byte 102,15,56,220,208
963 .byte 102,15,56,220,216
964 movups -16(%rcx,%rax,1),%xmm0
965 jnz L$ccm64_dec2_loop
966 movups (%rdi),%xmm8
967 paddq %xmm9,%xmm6
968 .byte 102,15,56,220,209
969 .byte 102,15,56,220,217
970 .byte 102,15,56,221,208
971 .byte 102,15,56,221,216
972 leaq 16(%rdi),%rdi
973 jmp L$ccm64_dec_outer
974
975 .p2align 4
976 L$ccm64_dec_break:
977
978 movl 240(%r11),%eax
979 movups (%r11),%xmm0
980 movups 16(%r11),%xmm1
981 xorps %xmm0,%xmm8
982 leaq 32(%r11),%r11
983 xorps %xmm8,%xmm3
984 L$oop_enc1_6:
985 .byte 102,15,56,220,217
986 decl %eax
987 movups (%r11),%xmm1
988 leaq 16(%r11),%r11
989 jnz L$oop_enc1_6
990 .byte 102,15,56,221,217
991 pxor %xmm0,%xmm0
992 pxor %xmm1,%xmm1
993 pxor %xmm2,%xmm2
994 movups %xmm3,(%r9)
995 pxor %xmm3,%xmm3
996 pxor %xmm8,%xmm8
997 pxor %xmm6,%xmm6
998 .byte 0xf3,0xc3
999
1000 .globl _aesni_ctr32_encrypt_blocks
1001 .private_extern _aesni_ctr32_encrypt_blocks
1002
1003 .p2align 4
1004 _aesni_ctr32_encrypt_blocks:
1005 cmpq $1,%rdx
1006 jne L$ctr32_bulk
1007
1008
1009
1010 movups (%r8),%xmm2
1011 movups (%rdi),%xmm3
1012 movl 240(%rcx),%edx
1013 movups (%rcx),%xmm0
1014 movups 16(%rcx),%xmm1
1015 leaq 32(%rcx),%rcx
1016 xorps %xmm0,%xmm2
1017 L$oop_enc1_7:
1018 .byte 102,15,56,220,209
1019 decl %edx
1020 movups (%rcx),%xmm1
1021 leaq 16(%rcx),%rcx
1022 jnz L$oop_enc1_7
1023 .byte 102,15,56,221,209
1024 pxor %xmm0,%xmm0
1025 pxor %xmm1,%xmm1
1026 xorps %xmm3,%xmm2
1027 pxor %xmm3,%xmm3
1028 movups %xmm2,(%rsi)
1029 xorps %xmm2,%xmm2
1030 jmp L$ctr32_epilogue
1031
1032 .p2align 4
1033 L$ctr32_bulk:
1034 leaq (%rsp),%r11
1035 pushq %rbp
1036 subq $128,%rsp
1037 andq $-16,%rsp
1038
1039
1040
1041
1042 movdqu (%r8),%xmm2
1043 movdqu (%rcx),%xmm0
1044 movl 12(%r8),%r8d
1045 pxor %xmm0,%xmm2
1046 movl 12(%rcx),%ebp
1047 movdqa %xmm2,0(%rsp)
1048 bswapl %r8d
1049 movdqa %xmm2,%xmm3
1050 movdqa %xmm2,%xmm4
1051 movdqa %xmm2,%xmm5
1052 movdqa %xmm2,64(%rsp)
1053 movdqa %xmm2,80(%rsp)
1054 movdqa %xmm2,96(%rsp)
1055 movq %rdx,%r10
1056 movdqa %xmm2,112(%rsp)
1057
1058 leaq 1(%r8),%rax
1059 leaq 2(%r8),%rdx
1060 bswapl %eax
1061 bswapl %edx
1062 xorl %ebp,%eax
1063 xorl %ebp,%edx
1064 .byte 102,15,58,34,216,3
1065 leaq 3(%r8),%rax
1066 movdqa %xmm3,16(%rsp)
1067 .byte 102,15,58,34,226,3
1068 bswapl %eax
1069 movq %r10,%rdx
1070 leaq 4(%r8),%r10
1071 movdqa %xmm4,32(%rsp)
1072 xorl %ebp,%eax
1073 bswapl %r10d
1074 .byte 102,15,58,34,232,3
1075 xorl %ebp,%r10d
1076 movdqa %xmm5,48(%rsp)
1077 leaq 5(%r8),%r9
1078 movl %r10d,64+12(%rsp)
1079 bswapl %r9d
1080 leaq 6(%r8),%r10
1081 movl 240(%rcx),%eax
1082 xorl %ebp,%r9d
1083 bswapl %r10d
1084 movl %r9d,80+12(%rsp)
1085 xorl %ebp,%r10d
1086 leaq 7(%r8),%r9
1087 movl %r10d,96+12(%rsp)
1088 bswapl %r9d
1089 movl _OPENSSL_ia32cap_P+4(%rip),%r10d
1090 xorl %ebp,%r9d
1091 andl $71303168,%r10d
1092 movl %r9d,112+12(%rsp)
1093
1094 movups 16(%rcx),%xmm1
1095
1096 movdqa 64(%rsp),%xmm6
1097 movdqa 80(%rsp),%xmm7
1098
1099 cmpq $8,%rdx
1100 jb L$ctr32_tail
1101
1102 subq $6,%rdx
1103 cmpl $4194304,%r10d
1104 je L$ctr32_6x
1105
1106 leaq 128(%rcx),%rcx
1107 subq $2,%rdx
1108 jmp L$ctr32_loop8
1109
1110 .p2align 4
1111 L$ctr32_6x:
1112 shll $4,%eax
1113 movl $48,%r10d
1114 bswapl %ebp
1115 leaq 32(%rcx,%rax,1),%rcx
1116 subq %rax,%r10
1117 jmp L$ctr32_loop6
1118
1119 .p2align 4
1120 L$ctr32_loop6:
1121 addl $6,%r8d
1122 movups -48(%rcx,%r10,1),%xmm0
1123 .byte 102,15,56,220,209
1124 movl %r8d,%eax
1125 xorl %ebp,%eax
1126 .byte 102,15,56,220,217
1127 .byte 0x0f,0x38,0xf1,0x44,0x24,12
1128 leal 1(%r8),%eax
1129 .byte 102,15,56,220,225
1130 xorl %ebp,%eax
1131 .byte 0x0f,0x38,0xf1,0x44,0x24,28
1132 .byte 102,15,56,220,233
1133 leal 2(%r8),%eax
1134 xorl %ebp,%eax
1135 .byte 102,15,56,220,241
1136 .byte 0x0f,0x38,0xf1,0x44,0x24,44
1137 leal 3(%r8),%eax
1138 .byte 102,15,56,220,249
1139 movups -32(%rcx,%r10,1),%xmm1
1140 xorl %ebp,%eax
1141
1142 .byte 102,15,56,220,208
1143 .byte 0x0f,0x38,0xf1,0x44,0x24,60
1144 leal 4(%r8),%eax
1145 .byte 102,15,56,220,216
1146 xorl %ebp,%eax
1147 .byte 0x0f,0x38,0xf1,0x44,0x24,76
1148 .byte 102,15,56,220,224
1149 leal 5(%r8),%eax
1150 xorl %ebp,%eax
1151 .byte 102,15,56,220,232
1152 .byte 0x0f,0x38,0xf1,0x44,0x24,92
1153 movq %r10,%rax
1154 .byte 102,15,56,220,240
1155 .byte 102,15,56,220,248
1156 movups -16(%rcx,%r10,1),%xmm0
1157
1158 call L$enc_loop6
1159
1160 movdqu (%rdi),%xmm8
1161 movdqu 16(%rdi),%xmm9
1162 movdqu 32(%rdi),%xmm10
1163 movdqu 48(%rdi),%xmm11
1164 movdqu 64(%rdi),%xmm12
1165 movdqu 80(%rdi),%xmm13
1166 leaq 96(%rdi),%rdi
1167 movups -64(%rcx,%r10,1),%xmm1
1168 pxor %xmm2,%xmm8
1169 movaps 0(%rsp),%xmm2
1170 pxor %xmm3,%xmm9
1171 movaps 16(%rsp),%xmm3
1172 pxor %xmm4,%xmm10
1173 movaps 32(%rsp),%xmm4
1174 pxor %xmm5,%xmm11
1175 movaps 48(%rsp),%xmm5
1176 pxor %xmm6,%xmm12
1177 movaps 64(%rsp),%xmm6
1178 pxor %xmm7,%xmm13
1179 movaps 80(%rsp),%xmm7
1180 movdqu %xmm8,(%rsi)
1181 movdqu %xmm9,16(%rsi)
1182 movdqu %xmm10,32(%rsi)
1183 movdqu %xmm11,48(%rsi)
1184 movdqu %xmm12,64(%rsi)
1185 movdqu %xmm13,80(%rsi)
1186 leaq 96(%rsi),%rsi
1187
1188 subq $6,%rdx
1189 jnc L$ctr32_loop6
1190
1191 addq $6,%rdx
1192 jz L$ctr32_done
1193
1194 leal -48(%r10),%eax
1195 leaq -80(%rcx,%r10,1),%rcx
1196 negl %eax
1197 shrl $4,%eax
1198 jmp L$ctr32_tail
1199
1200 .p2align 5
1201 L$ctr32_loop8:
1202 addl $8,%r8d
1203 movdqa 96(%rsp),%xmm8
1204 .byte 102,15,56,220,209
1205 movl %r8d,%r9d
1206 movdqa 112(%rsp),%xmm9
1207 .byte 102,15,56,220,217
1208 bswapl %r9d
1209 movups 32-128(%rcx),%xmm0
1210 .byte 102,15,56,220,225
1211 xorl %ebp,%r9d
1212 nop
1213 .byte 102,15,56,220,233
1214 movl %r9d,0+12(%rsp)
1215 leaq 1(%r8),%r9
1216 .byte 102,15,56,220,241
1217 .byte 102,15,56,220,249
1218 .byte 102,68,15,56,220,193
1219 .byte 102,68,15,56,220,201
1220 movups 48-128(%rcx),%xmm1
1221 bswapl %r9d
1222 .byte 102,15,56,220,208
1223 .byte 102,15,56,220,216
1224 xorl %ebp,%r9d
1225 .byte 0x66,0x90
1226 .byte 102,15,56,220,224
1227 .byte 102,15,56,220,232
1228 movl %r9d,16+12(%rsp)
1229 leaq 2(%r8),%r9
1230 .byte 102,15,56,220,240
1231 .byte 102,15,56,220,248
1232 .byte 102,68,15,56,220,192
1233 .byte 102,68,15,56,220,200
1234 movups 64-128(%rcx),%xmm0
1235 bswapl %r9d
1236 .byte 102,15,56,220,209
1237 .byte 102,15,56,220,217
1238 xorl %ebp,%r9d
1239 .byte 0x66,0x90
1240 .byte 102,15,56,220,225
1241 .byte 102,15,56,220,233
1242 movl %r9d,32+12(%rsp)
1243 leaq 3(%r8),%r9
1244 .byte 102,15,56,220,241
1245 .byte 102,15,56,220,249
1246 .byte 102,68,15,56,220,193
1247 .byte 102,68,15,56,220,201
1248 movups 80-128(%rcx),%xmm1
1249 bswapl %r9d
1250 .byte 102,15,56,220,208
1251 .byte 102,15,56,220,216
1252 xorl %ebp,%r9d
1253 .byte 0x66,0x90
1254 .byte 102,15,56,220,224
1255 .byte 102,15,56,220,232
1256 movl %r9d,48+12(%rsp)
1257 leaq 4(%r8),%r9
1258 .byte 102,15,56,220,240
1259 .byte 102,15,56,220,248
1260 .byte 102,68,15,56,220,192
1261 .byte 102,68,15,56,220,200
1262 movups 96-128(%rcx),%xmm0
1263 bswapl %r9d
1264 .byte 102,15,56,220,209
1265 .byte 102,15,56,220,217
1266 xorl %ebp,%r9d
1267 .byte 0x66,0x90
1268 .byte 102,15,56,220,225
1269 .byte 102,15,56,220,233
1270 movl %r9d,64+12(%rsp)
1271 leaq 5(%r8),%r9
1272 .byte 102,15,56,220,241
1273 .byte 102,15,56,220,249
1274 .byte 102,68,15,56,220,193
1275 .byte 102,68,15,56,220,201
1276 movups 112-128(%rcx),%xmm1
1277 bswapl %r9d
1278 .byte 102,15,56,220,208
1279 .byte 102,15,56,220,216
1280 xorl %ebp,%r9d
1281 .byte 0x66,0x90
1282 .byte 102,15,56,220,224
1283 .byte 102,15,56,220,232
1284 movl %r9d,80+12(%rsp)
1285 leaq 6(%r8),%r9
1286 .byte 102,15,56,220,240
1287 .byte 102,15,56,220,248
1288 .byte 102,68,15,56,220,192
1289 .byte 102,68,15,56,220,200
1290 movups 128-128(%rcx),%xmm0
1291 bswapl %r9d
1292 .byte 102,15,56,220,209
1293 .byte 102,15,56,220,217
1294 xorl %ebp,%r9d
1295 .byte 0x66,0x90
1296 .byte 102,15,56,220,225
1297 .byte 102,15,56,220,233
1298 movl %r9d,96+12(%rsp)
1299 leaq 7(%r8),%r9
1300 .byte 102,15,56,220,241
1301 .byte 102,15,56,220,249
1302 .byte 102,68,15,56,220,193
1303 .byte 102,68,15,56,220,201
1304 movups 144-128(%rcx),%xmm1
1305 bswapl %r9d
1306 .byte 102,15,56,220,208
1307 .byte 102,15,56,220,216
1308 .byte 102,15,56,220,224
1309 xorl %ebp,%r9d
1310 movdqu 0(%rdi),%xmm10
1311 .byte 102,15,56,220,232
1312 movl %r9d,112+12(%rsp)
1313 cmpl $11,%eax
1314 .byte 102,15,56,220,240
1315 .byte 102,15,56,220,248
1316 .byte 102,68,15,56,220,192
1317 .byte 102,68,15,56,220,200
1318 movups 160-128(%rcx),%xmm0
1319
1320 jb L$ctr32_enc_done
1321
1322 .byte 102,15,56,220,209
1323 .byte 102,15,56,220,217
1324 .byte 102,15,56,220,225
1325 .byte 102,15,56,220,233
1326 .byte 102,15,56,220,241
1327 .byte 102,15,56,220,249
1328 .byte 102,68,15,56,220,193
1329 .byte 102,68,15,56,220,201
1330 movups 176-128(%rcx),%xmm1
1331
1332 .byte 102,15,56,220,208
1333 .byte 102,15,56,220,216
1334 .byte 102,15,56,220,224
1335 .byte 102,15,56,220,232
1336 .byte 102,15,56,220,240
1337 .byte 102,15,56,220,248
1338 .byte 102,68,15,56,220,192
1339 .byte 102,68,15,56,220,200
1340 movups 192-128(%rcx),%xmm0
1341 je L$ctr32_enc_done
1342
1343 .byte 102,15,56,220,209
1344 .byte 102,15,56,220,217
1345 .byte 102,15,56,220,225
1346 .byte 102,15,56,220,233
1347 .byte 102,15,56,220,241
1348 .byte 102,15,56,220,249
1349 .byte 102,68,15,56,220,193
1350 .byte 102,68,15,56,220,201
1351 movups 208-128(%rcx),%xmm1
1352
1353 .byte 102,15,56,220,208
1354 .byte 102,15,56,220,216
1355 .byte 102,15,56,220,224
1356 .byte 102,15,56,220,232
1357 .byte 102,15,56,220,240
1358 .byte 102,15,56,220,248
1359 .byte 102,68,15,56,220,192
1360 .byte 102,68,15,56,220,200
1361 movups 224-128(%rcx),%xmm0
1362 jmp L$ctr32_enc_done
1363
1364 .p2align 4
1365 L$ctr32_enc_done:
1366 movdqu 16(%rdi),%xmm11
1367 pxor %xmm0,%xmm10
1368 movdqu 32(%rdi),%xmm12
1369 pxor %xmm0,%xmm11
1370 movdqu 48(%rdi),%xmm13
1371 pxor %xmm0,%xmm12
1372 movdqu 64(%rdi),%xmm14
1373 pxor %xmm0,%xmm13
1374 movdqu 80(%rdi),%xmm15
1375 pxor %xmm0,%xmm14
1376 pxor %xmm0,%xmm15
1377 .byte 102,15,56,220,209
1378 .byte 102,15,56,220,217
1379 .byte 102,15,56,220,225
1380 .byte 102,15,56,220,233
1381 .byte 102,15,56,220,241
1382 .byte 102,15,56,220,249
1383 .byte 102,68,15,56,220,193
1384 .byte 102,68,15,56,220,201
1385 movdqu 96(%rdi),%xmm1
1386 leaq 128(%rdi),%rdi
1387
1388 .byte 102,65,15,56,221,210
1389 pxor %xmm0,%xmm1
1390 movdqu 112-128(%rdi),%xmm10
1391 .byte 102,65,15,56,221,219
1392 pxor %xmm0,%xmm10
1393 movdqa 0(%rsp),%xmm11
1394 .byte 102,65,15,56,221,228
1395 .byte 102,65,15,56,221,237
1396 movdqa 16(%rsp),%xmm12
1397 movdqa 32(%rsp),%xmm13
1398 .byte 102,65,15,56,221,246
1399 .byte 102,65,15,56,221,255
1400 movdqa 48(%rsp),%xmm14
1401 movdqa 64(%rsp),%xmm15
1402 .byte 102,68,15,56,221,193
1403 movdqa 80(%rsp),%xmm0
1404 movups 16-128(%rcx),%xmm1
1405 .byte 102,69,15,56,221,202
1406
1407 movups %xmm2,(%rsi)
1408 movdqa %xmm11,%xmm2
1409 movups %xmm3,16(%rsi)
1410 movdqa %xmm12,%xmm3
1411 movups %xmm4,32(%rsi)
1412 movdqa %xmm13,%xmm4
1413 movups %xmm5,48(%rsi)
1414 movdqa %xmm14,%xmm5
1415 movups %xmm6,64(%rsi)
1416 movdqa %xmm15,%xmm6
1417 movups %xmm7,80(%rsi)
1418 movdqa %xmm0,%xmm7
1419 movups %xmm8,96(%rsi)
1420 movups %xmm9,112(%rsi)
1421 leaq 128(%rsi),%rsi
1422
1423 subq $8,%rdx
1424 jnc L$ctr32_loop8
1425
1426 addq $8,%rdx
1427 jz L$ctr32_done
1428 leaq -128(%rcx),%rcx
1429
1430 L$ctr32_tail:
1431
1432
1433 leaq 16(%rcx),%rcx
1434 cmpq $4,%rdx
1435 jb L$ctr32_loop3
1436 je L$ctr32_loop4
1437
1438
1439 shll $4,%eax
1440 movdqa 96(%rsp),%xmm8
1441 pxor %xmm9,%xmm9
1442
1443 movups 16(%rcx),%xmm0
1444 .byte 102,15,56,220,209
1445 .byte 102,15,56,220,217
1446 leaq 32-16(%rcx,%rax,1),%rcx
1447 negq %rax
1448 .byte 102,15,56,220,225
1449 addq $16,%rax
1450 movups (%rdi),%xmm10
1451 .byte 102,15,56,220,233
1452 .byte 102,15,56,220,241
1453 movups 16(%rdi),%xmm11
1454 movups 32(%rdi),%xmm12
1455 .byte 102,15,56,220,249
1456 .byte 102,68,15,56,220,193
1457
1458 call L$enc_loop8_enter
1459
1460 movdqu 48(%rdi),%xmm13
1461 pxor %xmm10,%xmm2
1462 movdqu 64(%rdi),%xmm10
1463 pxor %xmm11,%xmm3
1464 movdqu %xmm2,(%rsi)
1465 pxor %xmm12,%xmm4
1466 movdqu %xmm3,16(%rsi)
1467 pxor %xmm13,%xmm5
1468 movdqu %xmm4,32(%rsi)
1469 pxor %xmm10,%xmm6
1470 movdqu %xmm5,48(%rsi)
1471 movdqu %xmm6,64(%rsi)
1472 cmpq $6,%rdx
1473 jb L$ctr32_done
1474
1475 movups 80(%rdi),%xmm11
1476 xorps %xmm11,%xmm7
1477 movups %xmm7,80(%rsi)
1478 je L$ctr32_done
1479
1480 movups 96(%rdi),%xmm12
1481 xorps %xmm12,%xmm8
1482 movups %xmm8,96(%rsi)
1483 jmp L$ctr32_done
1484
1485 .p2align 5
1486 L$ctr32_loop4:
1487 .byte 102,15,56,220,209
1488 leaq 16(%rcx),%rcx
1489 decl %eax
1490 .byte 102,15,56,220,217
1491 .byte 102,15,56,220,225
1492 .byte 102,15,56,220,233
1493 movups (%rcx),%xmm1
1494 jnz L$ctr32_loop4
1495 .byte 102,15,56,221,209
1496 .byte 102,15,56,221,217
1497 movups (%rdi),%xmm10
1498 movups 16(%rdi),%xmm11
1499 .byte 102,15,56,221,225
1500 .byte 102,15,56,221,233
1501 movups 32(%rdi),%xmm12
1502 movups 48(%rdi),%xmm13
1503
1504 xorps %xmm10,%xmm2
1505 movups %xmm2,(%rsi)
1506 xorps %xmm11,%xmm3
1507 movups %xmm3,16(%rsi)
1508 pxor %xmm12,%xmm4
1509 movdqu %xmm4,32(%rsi)
1510 pxor %xmm13,%xmm5
1511 movdqu %xmm5,48(%rsi)
1512 jmp L$ctr32_done
1513
1514 .p2align 5
1515 L$ctr32_loop3:
1516 .byte 102,15,56,220,209
1517 leaq 16(%rcx),%rcx
1518 decl %eax
1519 .byte 102,15,56,220,217
1520 .byte 102,15,56,220,225
1521 movups (%rcx),%xmm1
1522 jnz L$ctr32_loop3
1523 .byte 102,15,56,221,209
1524 .byte 102,15,56,221,217
1525 .byte 102,15,56,221,225
1526
1527 movups (%rdi),%xmm10
1528 xorps %xmm10,%xmm2
1529 movups %xmm2,(%rsi)
1530 cmpq $2,%rdx
1531 jb L$ctr32_done
1532
1533 movups 16(%rdi),%xmm11
1534 xorps %xmm11,%xmm3
1535 movups %xmm3,16(%rsi)
1536 je L$ctr32_done
1537
1538 movups 32(%rdi),%xmm12
1539 xorps %xmm12,%xmm4
1540 movups %xmm4,32(%rsi)
1541
1542 L$ctr32_done:
1543 xorps %xmm0,%xmm0
1544 xorl %ebp,%ebp
1545 pxor %xmm1,%xmm1
1546 pxor %xmm2,%xmm2
1547 pxor %xmm3,%xmm3
1548 pxor %xmm4,%xmm4
1549 pxor %xmm5,%xmm5
1550 pxor %xmm6,%xmm6
1551 pxor %xmm7,%xmm7
1552 movaps %xmm0,0(%rsp)
1553 pxor %xmm8,%xmm8
1554 movaps %xmm0,16(%rsp)
1555 pxor %xmm9,%xmm9
1556 movaps %xmm0,32(%rsp)
1557 pxor %xmm10,%xmm10
1558 movaps %xmm0,48(%rsp)
1559 pxor %xmm11,%xmm11
1560 movaps %xmm0,64(%rsp)
1561 pxor %xmm12,%xmm12
1562 movaps %xmm0,80(%rsp)
1563 pxor %xmm13,%xmm13
1564 movaps %xmm0,96(%rsp)
1565 pxor %xmm14,%xmm14
1566 movaps %xmm0,112(%rsp)
1567 pxor %xmm15,%xmm15
1568 movq -8(%r11),%rbp
1569 leaq (%r11),%rsp
1570 L$ctr32_epilogue:
1571 .byte 0xf3,0xc3
1572
1573 .globl _aesni_xts_encrypt
1574 .private_extern _aesni_xts_encrypt
1575
1576 .p2align 4
1577 _aesni_xts_encrypt:
1578 leaq (%rsp),%r11
1579 pushq %rbp
1580 subq $112,%rsp
1581 andq $-16,%rsp
1582 movups (%r9),%xmm2
1583 movl 240(%r8),%eax
1584 movl 240(%rcx),%r10d
1585 movups (%r8),%xmm0
1586 movups 16(%r8),%xmm1
1587 leaq 32(%r8),%r8
1588 xorps %xmm0,%xmm2
1589 L$oop_enc1_8:
1590 .byte 102,15,56,220,209
1591 decl %eax
1592 movups (%r8),%xmm1
1593 leaq 16(%r8),%r8
1594 jnz L$oop_enc1_8
1595 .byte 102,15,56,221,209
1596 movups (%rcx),%xmm0
1597 movq %rcx,%rbp
1598 movl %r10d,%eax
1599 shll $4,%r10d
1600 movq %rdx,%r9
1601 andq $-16,%rdx
1602
1603 movups 16(%rcx,%r10,1),%xmm1
1604
1605 movdqa L$xts_magic(%rip),%xmm8
1606 movdqa %xmm2,%xmm15
1607 pshufd $0x5f,%xmm2,%xmm9
1608 pxor %xmm0,%xmm1
1609 movdqa %xmm9,%xmm14
1610 paddd %xmm9,%xmm9
1611 movdqa %xmm15,%xmm10
1612 psrad $31,%xmm14
1613 paddq %xmm15,%xmm15
1614 pand %xmm8,%xmm14
1615 pxor %xmm0,%xmm10
1616 pxor %xmm14,%xmm15
1617 movdqa %xmm9,%xmm14
1618 paddd %xmm9,%xmm9
1619 movdqa %xmm15,%xmm11
1620 psrad $31,%xmm14
1621 paddq %xmm15,%xmm15
1622 pand %xmm8,%xmm14
1623 pxor %xmm0,%xmm11
1624 pxor %xmm14,%xmm15
1625 movdqa %xmm9,%xmm14
1626 paddd %xmm9,%xmm9
1627 movdqa %xmm15,%xmm12
1628 psrad $31,%xmm14
1629 paddq %xmm15,%xmm15
1630 pand %xmm8,%xmm14
1631 pxor %xmm0,%xmm12
1632 pxor %xmm14,%xmm15
1633 movdqa %xmm9,%xmm14
1634 paddd %xmm9,%xmm9
1635 movdqa %xmm15,%xmm13
1636 psrad $31,%xmm14
1637 paddq %xmm15,%xmm15
1638 pand %xmm8,%xmm14
1639 pxor %xmm0,%xmm13
1640 pxor %xmm14,%xmm15
1641 movdqa %xmm15,%xmm14
1642 psrad $31,%xmm9
1643 paddq %xmm15,%xmm15
1644 pand %xmm8,%xmm9
1645 pxor %xmm0,%xmm14
1646 pxor %xmm9,%xmm15
1647 movaps %xmm1,96(%rsp)
1648
1649 subq $96,%rdx
1650 jc L$xts_enc_short
1651
1652 movl $16+96,%eax
1653 leaq 32(%rbp,%r10,1),%rcx
1654 subq %r10,%rax
1655 movups 16(%rbp),%xmm1
1656 movq %rax,%r10
1657 leaq L$xts_magic(%rip),%r8
1658 jmp L$xts_enc_grandloop
1659
1660 .p2align 5
1661 L$xts_enc_grandloop:
1662 movdqu 0(%rdi),%xmm2
1663 movdqa %xmm0,%xmm8
1664 movdqu 16(%rdi),%xmm3
1665 pxor %xmm10,%xmm2
1666 movdqu 32(%rdi),%xmm4
1667 pxor %xmm11,%xmm3
1668 .byte 102,15,56,220,209
1669 movdqu 48(%rdi),%xmm5
1670 pxor %xmm12,%xmm4
1671 .byte 102,15,56,220,217
1672 movdqu 64(%rdi),%xmm6
1673 pxor %xmm13,%xmm5
1674 .byte 102,15,56,220,225
1675 movdqu 80(%rdi),%xmm7
1676 pxor %xmm15,%xmm8
1677 movdqa 96(%rsp),%xmm9
1678 pxor %xmm14,%xmm6
1679 .byte 102,15,56,220,233
1680 movups 32(%rbp),%xmm0
1681 leaq 96(%rdi),%rdi
1682 pxor %xmm8,%xmm7
1683
1684 pxor %xmm9,%xmm10
1685 .byte 102,15,56,220,241
1686 pxor %xmm9,%xmm11
1687 movdqa %xmm10,0(%rsp)
1688 .byte 102,15,56,220,249
1689 movups 48(%rbp),%xmm1
1690 pxor %xmm9,%xmm12
1691
1692 .byte 102,15,56,220,208
1693 pxor %xmm9,%xmm13
1694 movdqa %xmm11,16(%rsp)
1695 .byte 102,15,56,220,216
1696 pxor %xmm9,%xmm14
1697 movdqa %xmm12,32(%rsp)
1698 .byte 102,15,56,220,224
1699 .byte 102,15,56,220,232
1700 pxor %xmm9,%xmm8
1701 movdqa %xmm14,64(%rsp)
1702 .byte 102,15,56,220,240
1703 .byte 102,15,56,220,248
1704 movups 64(%rbp),%xmm0
1705 movdqa %xmm8,80(%rsp)
1706 pshufd $0x5f,%xmm15,%xmm9
1707 jmp L$xts_enc_loop6
1708 .p2align 5
1709 L$xts_enc_loop6:
1710 .byte 102,15,56,220,209
1711 .byte 102,15,56,220,217
1712 .byte 102,15,56,220,225
1713 .byte 102,15,56,220,233
1714 .byte 102,15,56,220,241
1715 .byte 102,15,56,220,249
1716 movups -64(%rcx,%rax,1),%xmm1
1717 addq $32,%rax
1718
1719 .byte 102,15,56,220,208
1720 .byte 102,15,56,220,216
1721 .byte 102,15,56,220,224
1722 .byte 102,15,56,220,232
1723 .byte 102,15,56,220,240
1724 .byte 102,15,56,220,248
1725 movups -80(%rcx,%rax,1),%xmm0
1726 jnz L$xts_enc_loop6
1727
1728 movdqa (%r8),%xmm8
1729 movdqa %xmm9,%xmm14
1730 paddd %xmm9,%xmm9
1731 .byte 102,15,56,220,209
1732 paddq %xmm15,%xmm15
1733 psrad $31,%xmm14
1734 .byte 102,15,56,220,217
1735 pand %xmm8,%xmm14
1736 movups (%rbp),%xmm10
1737 .byte 102,15,56,220,225
1738 .byte 102,15,56,220,233
1739 .byte 102,15,56,220,241
1740 pxor %xmm14,%xmm15
1741 movaps %xmm10,%xmm11
1742 .byte 102,15,56,220,249
1743 movups -64(%rcx),%xmm1
1744
1745 movdqa %xmm9,%xmm14
1746 .byte 102,15,56,220,208
1747 paddd %xmm9,%xmm9
1748 pxor %xmm15,%xmm10
1749 .byte 102,15,56,220,216
1750 psrad $31,%xmm14
1751 paddq %xmm15,%xmm15
1752 .byte 102,15,56,220,224
1753 .byte 102,15,56,220,232
1754 pand %xmm8,%xmm14
1755 movaps %xmm11,%xmm12
1756 .byte 102,15,56,220,240
1757 pxor %xmm14,%xmm15
1758 movdqa %xmm9,%xmm14
1759 .byte 102,15,56,220,248
1760 movups -48(%rcx),%xmm0
1761
1762 paddd %xmm9,%xmm9
1763 .byte 102,15,56,220,209
1764 pxor %xmm15,%xmm11
1765 psrad $31,%xmm14
1766 .byte 102,15,56,220,217
1767 paddq %xmm15,%xmm15
1768 pand %xmm8,%xmm14
1769 .byte 102,15,56,220,225
1770 .byte 102,15,56,220,233
1771 movdqa %xmm13,48(%rsp)
1772 pxor %xmm14,%xmm15
1773 .byte 102,15,56,220,241
1774 movaps %xmm12,%xmm13
1775 movdqa %xmm9,%xmm14
1776 .byte 102,15,56,220,249
1777 movups -32(%rcx),%xmm1
1778
1779 paddd %xmm9,%xmm9
1780 .byte 102,15,56,220,208
1781 pxor %xmm15,%xmm12
1782 psrad $31,%xmm14
1783 .byte 102,15,56,220,216
1784 paddq %xmm15,%xmm15
1785 pand %xmm8,%xmm14
1786 .byte 102,15,56,220,224
1787 .byte 102,15,56,220,232
1788 .byte 102,15,56,220,240
1789 pxor %xmm14,%xmm15
1790 movaps %xmm13,%xmm14
1791 .byte 102,15,56,220,248
1792
1793 movdqa %xmm9,%xmm0
1794 paddd %xmm9,%xmm9
1795 .byte 102,15,56,220,209
1796 pxor %xmm15,%xmm13
1797 psrad $31,%xmm0
1798 .byte 102,15,56,220,217
1799 paddq %xmm15,%xmm15
1800 pand %xmm8,%xmm0
1801 .byte 102,15,56,220,225
1802 .byte 102,15,56,220,233
1803 pxor %xmm0,%xmm15
1804 movups (%rbp),%xmm0
1805 .byte 102,15,56,220,241
1806 .byte 102,15,56,220,249
1807 movups 16(%rbp),%xmm1
1808
1809 pxor %xmm15,%xmm14
1810 .byte 102,15,56,221,84,36,0
1811 psrad $31,%xmm9
1812 paddq %xmm15,%xmm15
1813 .byte 102,15,56,221,92,36,16
1814 .byte 102,15,56,221,100,36,32
1815 pand %xmm8,%xmm9
1816 movq %r10,%rax
1817 .byte 102,15,56,221,108,36,48
1818 .byte 102,15,56,221,116,36,64
1819 .byte 102,15,56,221,124,36,80
1820 pxor %xmm9,%xmm15
1821
1822 leaq 96(%rsi),%rsi
1823 movups %xmm2,-96(%rsi)
1824 movups %xmm3,-80(%rsi)
1825 movups %xmm4,-64(%rsi)
1826 movups %xmm5,-48(%rsi)
1827 movups %xmm6,-32(%rsi)
1828 movups %xmm7,-16(%rsi)
1829 subq $96,%rdx
1830 jnc L$xts_enc_grandloop
1831
1832 movl $16+96,%eax
1833 subl %r10d,%eax
1834 movq %rbp,%rcx
1835 shrl $4,%eax
1836
1837 L$xts_enc_short:
1838
1839 movl %eax,%r10d
1840 pxor %xmm0,%xmm10
1841 addq $96,%rdx
1842 jz L$xts_enc_done
1843
1844 pxor %xmm0,%xmm11
1845 cmpq $0x20,%rdx
1846 jb L$xts_enc_one
1847 pxor %xmm0,%xmm12
1848 je L$xts_enc_two
1849
1850 pxor %xmm0,%xmm13
1851 cmpq $0x40,%rdx
1852 jb L$xts_enc_three
1853 pxor %xmm0,%xmm14
1854 je L$xts_enc_four
1855
1856 movdqu (%rdi),%xmm2
1857 movdqu 16(%rdi),%xmm3
1858 movdqu 32(%rdi),%xmm4
1859 pxor %xmm10,%xmm2
1860 movdqu 48(%rdi),%xmm5
1861 pxor %xmm11,%xmm3
1862 movdqu 64(%rdi),%xmm6
1863 leaq 80(%rdi),%rdi
1864 pxor %xmm12,%xmm4
1865 pxor %xmm13,%xmm5
1866 pxor %xmm14,%xmm6
1867 pxor %xmm7,%xmm7
1868
1869 call _aesni_encrypt6
1870
1871 xorps %xmm10,%xmm2
1872 movdqa %xmm15,%xmm10
1873 xorps %xmm11,%xmm3
1874 xorps %xmm12,%xmm4
1875 movdqu %xmm2,(%rsi)
1876 xorps %xmm13,%xmm5
1877 movdqu %xmm3,16(%rsi)
1878 xorps %xmm14,%xmm6
1879 movdqu %xmm4,32(%rsi)
1880 movdqu %xmm5,48(%rsi)
1881 movdqu %xmm6,64(%rsi)
1882 leaq 80(%rsi),%rsi
1883 jmp L$xts_enc_done
1884
1885 .p2align 4
1886 L$xts_enc_one:
1887 movups (%rdi),%xmm2
1888 leaq 16(%rdi),%rdi
1889 xorps %xmm10,%xmm2
1890 movups (%rcx),%xmm0
1891 movups 16(%rcx),%xmm1
1892 leaq 32(%rcx),%rcx
1893 xorps %xmm0,%xmm2
1894 L$oop_enc1_9:
1895 .byte 102,15,56,220,209
1896 decl %eax
1897 movups (%rcx),%xmm1
1898 leaq 16(%rcx),%rcx
1899 jnz L$oop_enc1_9
1900 .byte 102,15,56,221,209
1901 xorps %xmm10,%xmm2
1902 movdqa %xmm11,%xmm10
1903 movups %xmm2,(%rsi)
1904 leaq 16(%rsi),%rsi
1905 jmp L$xts_enc_done
1906
1907 .p2align 4
1908 L$xts_enc_two:
1909 movups (%rdi),%xmm2
1910 movups 16(%rdi),%xmm3
1911 leaq 32(%rdi),%rdi
1912 xorps %xmm10,%xmm2
1913 xorps %xmm11,%xmm3
1914
1915 call _aesni_encrypt2
1916
1917 xorps %xmm10,%xmm2
1918 movdqa %xmm12,%xmm10
1919 xorps %xmm11,%xmm3
1920 movups %xmm2,(%rsi)
1921 movups %xmm3,16(%rsi)
1922 leaq 32(%rsi),%rsi
1923 jmp L$xts_enc_done
1924
1925 .p2align 4
1926 L$xts_enc_three:
1927 movups (%rdi),%xmm2
1928 movups 16(%rdi),%xmm3
1929 movups 32(%rdi),%xmm4
1930 leaq 48(%rdi),%rdi
1931 xorps %xmm10,%xmm2
1932 xorps %xmm11,%xmm3
1933 xorps %xmm12,%xmm4
1934
1935 call _aesni_encrypt3
1936
1937 xorps %xmm10,%xmm2
1938 movdqa %xmm13,%xmm10
1939 xorps %xmm11,%xmm3
1940 xorps %xmm12,%xmm4
1941 movups %xmm2,(%rsi)
1942 movups %xmm3,16(%rsi)
1943 movups %xmm4,32(%rsi)
1944 leaq 48(%rsi),%rsi
1945 jmp L$xts_enc_done
1946
1947 .p2align 4
1948 L$xts_enc_four:
1949 movups (%rdi),%xmm2
1950 movups 16(%rdi),%xmm3
1951 movups 32(%rdi),%xmm4
1952 xorps %xmm10,%xmm2
1953 movups 48(%rdi),%xmm5
1954 leaq 64(%rdi),%rdi
1955 xorps %xmm11,%xmm3
1956 xorps %xmm12,%xmm4
1957 xorps %xmm13,%xmm5
1958
1959 call _aesni_encrypt4
1960
1961 pxor %xmm10,%xmm2
1962 movdqa %xmm14,%xmm10
1963 pxor %xmm11,%xmm3
1964 pxor %xmm12,%xmm4
1965 movdqu %xmm2,(%rsi)
1966 pxor %xmm13,%xmm5
1967 movdqu %xmm3,16(%rsi)
1968 movdqu %xmm4,32(%rsi)
1969 movdqu %xmm5,48(%rsi)
1970 leaq 64(%rsi),%rsi
1971 jmp L$xts_enc_done
1972
1973 .p2align 4
1974 L$xts_enc_done:
1975 andq $15,%r9
1976 jz L$xts_enc_ret
1977 movq %r9,%rdx
1978
1979 L$xts_enc_steal:
1980 movzbl (%rdi),%eax
1981 movzbl -16(%rsi),%ecx
1982 leaq 1(%rdi),%rdi
1983 movb %al,-16(%rsi)
1984 movb %cl,0(%rsi)
1985 leaq 1(%rsi),%rsi
1986 subq $1,%rdx
1987 jnz L$xts_enc_steal
1988
1989 subq %r9,%rsi
1990 movq %rbp,%rcx
1991 movl %r10d,%eax
1992
1993 movups -16(%rsi),%xmm2
1994 xorps %xmm10,%xmm2
1995 movups (%rcx),%xmm0
1996 movups 16(%rcx),%xmm1
1997 leaq 32(%rcx),%rcx
1998 xorps %xmm0,%xmm2
1999 L$oop_enc1_10:
2000 .byte 102,15,56,220,209
2001 decl %eax
2002 movups (%rcx),%xmm1
2003 leaq 16(%rcx),%rcx
2004 jnz L$oop_enc1_10
2005 .byte 102,15,56,221,209
2006 xorps %xmm10,%xmm2
2007 movups %xmm2,-16(%rsi)
2008
2009 L$xts_enc_ret:
2010 xorps %xmm0,%xmm0
2011 pxor %xmm1,%xmm1
2012 pxor %xmm2,%xmm2
2013 pxor %xmm3,%xmm3
2014 pxor %xmm4,%xmm4
2015 pxor %xmm5,%xmm5
2016 pxor %xmm6,%xmm6
2017 pxor %xmm7,%xmm7
2018 movaps %xmm0,0(%rsp)
2019 pxor %xmm8,%xmm8
2020 movaps %xmm0,16(%rsp)
2021 pxor %xmm9,%xmm9
2022 movaps %xmm0,32(%rsp)
2023 pxor %xmm10,%xmm10
2024 movaps %xmm0,48(%rsp)
2025 pxor %xmm11,%xmm11
2026 movaps %xmm0,64(%rsp)
2027 pxor %xmm12,%xmm12
2028 movaps %xmm0,80(%rsp)
2029 pxor %xmm13,%xmm13
2030 movaps %xmm0,96(%rsp)
2031 pxor %xmm14,%xmm14
2032 pxor %xmm15,%xmm15
2033 movq -8(%r11),%rbp
2034 leaq (%r11),%rsp
2035 L$xts_enc_epilogue:
2036 .byte 0xf3,0xc3
2037
2038 .globl _aesni_xts_decrypt
2039 .private_extern _aesni_xts_decrypt
2040
2041 .p2align 4
2042 _aesni_xts_decrypt:
2043 leaq (%rsp),%r11
2044 pushq %rbp
2045 subq $112,%rsp
2046 andq $-16,%rsp
2047 movups (%r9),%xmm2
2048 movl 240(%r8),%eax
2049 movl 240(%rcx),%r10d
2050 movups (%r8),%xmm0
2051 movups 16(%r8),%xmm1
2052 leaq 32(%r8),%r8
2053 xorps %xmm0,%xmm2
2054 L$oop_enc1_11:
2055 .byte 102,15,56,220,209
2056 decl %eax
2057 movups (%r8),%xmm1
2058 leaq 16(%r8),%r8
2059 jnz L$oop_enc1_11
2060 .byte 102,15,56,221,209
2061 xorl %eax,%eax
2062 testq $15,%rdx
2063 setnz %al
2064 shlq $4,%rax
2065 subq %rax,%rdx
2066
2067 movups (%rcx),%xmm0
2068 movq %rcx,%rbp
2069 movl %r10d,%eax
2070 shll $4,%r10d
2071 movq %rdx,%r9
2072 andq $-16,%rdx
2073
2074 movups 16(%rcx,%r10,1),%xmm1
2075
2076 movdqa L$xts_magic(%rip),%xmm8
2077 movdqa %xmm2,%xmm15
2078 pshufd $0x5f,%xmm2,%xmm9
2079 pxor %xmm0,%xmm1
2080 movdqa %xmm9,%xmm14
2081 paddd %xmm9,%xmm9
2082 movdqa %xmm15,%xmm10
2083 psrad $31,%xmm14
2084 paddq %xmm15,%xmm15
2085 pand %xmm8,%xmm14
2086 pxor %xmm0,%xmm10
2087 pxor %xmm14,%xmm15
2088 movdqa %xmm9,%xmm14
2089 paddd %xmm9,%xmm9
2090 movdqa %xmm15,%xmm11
2091 psrad $31,%xmm14
2092 paddq %xmm15,%xmm15
2093 pand %xmm8,%xmm14
2094 pxor %xmm0,%xmm11
2095 pxor %xmm14,%xmm15
2096 movdqa %xmm9,%xmm14
2097 paddd %xmm9,%xmm9
2098 movdqa %xmm15,%xmm12
2099 psrad $31,%xmm14
2100 paddq %xmm15,%xmm15
2101 pand %xmm8,%xmm14
2102 pxor %xmm0,%xmm12
2103 pxor %xmm14,%xmm15
2104 movdqa %xmm9,%xmm14
2105 paddd %xmm9,%xmm9
2106 movdqa %xmm15,%xmm13
2107 psrad $31,%xmm14
2108 paddq %xmm15,%xmm15
2109 pand %xmm8,%xmm14
2110 pxor %xmm0,%xmm13
2111 pxor %xmm14,%xmm15
2112 movdqa %xmm15,%xmm14
2113 psrad $31,%xmm9
2114 paddq %xmm15,%xmm15
2115 pand %xmm8,%xmm9
2116 pxor %xmm0,%xmm14
2117 pxor %xmm9,%xmm15
2118 movaps %xmm1,96(%rsp)
2119
2120 subq $96,%rdx
2121 jc L$xts_dec_short
2122
2123 movl $16+96,%eax
2124 leaq 32(%rbp,%r10,1),%rcx
2125 subq %r10,%rax
2126 movups 16(%rbp),%xmm1
2127 movq %rax,%r10
2128 leaq L$xts_magic(%rip),%r8
2129 jmp L$xts_dec_grandloop
2130
2131 .p2align 5
2132 L$xts_dec_grandloop:
2133 movdqu 0(%rdi),%xmm2
2134 movdqa %xmm0,%xmm8
2135 movdqu 16(%rdi),%xmm3
2136 pxor %xmm10,%xmm2
2137 movdqu 32(%rdi),%xmm4
2138 pxor %xmm11,%xmm3
2139 .byte 102,15,56,222,209
2140 movdqu 48(%rdi),%xmm5
2141 pxor %xmm12,%xmm4
2142 .byte 102,15,56,222,217
2143 movdqu 64(%rdi),%xmm6
2144 pxor %xmm13,%xmm5
2145 .byte 102,15,56,222,225
2146 movdqu 80(%rdi),%xmm7
2147 pxor %xmm15,%xmm8
2148 movdqa 96(%rsp),%xmm9
2149 pxor %xmm14,%xmm6
2150 .byte 102,15,56,222,233
2151 movups 32(%rbp),%xmm0
2152 leaq 96(%rdi),%rdi
2153 pxor %xmm8,%xmm7
2154
2155 pxor %xmm9,%xmm10
2156 .byte 102,15,56,222,241
2157 pxor %xmm9,%xmm11
2158 movdqa %xmm10,0(%rsp)
2159 .byte 102,15,56,222,249
2160 movups 48(%rbp),%xmm1
2161 pxor %xmm9,%xmm12
2162
2163 .byte 102,15,56,222,208
2164 pxor %xmm9,%xmm13
2165 movdqa %xmm11,16(%rsp)
2166 .byte 102,15,56,222,216
2167 pxor %xmm9,%xmm14
2168 movdqa %xmm12,32(%rsp)
2169 .byte 102,15,56,222,224
2170 .byte 102,15,56,222,232
2171 pxor %xmm9,%xmm8
2172 movdqa %xmm14,64(%rsp)
2173 .byte 102,15,56,222,240
2174 .byte 102,15,56,222,248
2175 movups 64(%rbp),%xmm0
2176 movdqa %xmm8,80(%rsp)
2177 pshufd $0x5f,%xmm15,%xmm9
2178 jmp L$xts_dec_loop6
2179 .p2align 5
2180 L$xts_dec_loop6:
2181 .byte 102,15,56,222,209
2182 .byte 102,15,56,222,217
2183 .byte 102,15,56,222,225
2184 .byte 102,15,56,222,233
2185 .byte 102,15,56,222,241
2186 .byte 102,15,56,222,249
2187 movups -64(%rcx,%rax,1),%xmm1
2188 addq $32,%rax
2189
2190 .byte 102,15,56,222,208
2191 .byte 102,15,56,222,216
2192 .byte 102,15,56,222,224
2193 .byte 102,15,56,222,232
2194 .byte 102,15,56,222,240
2195 .byte 102,15,56,222,248
2196 movups -80(%rcx,%rax,1),%xmm0
2197 jnz L$xts_dec_loop6
2198
2199 movdqa (%r8),%xmm8
2200 movdqa %xmm9,%xmm14
2201 paddd %xmm9,%xmm9
2202 .byte 102,15,56,222,209
2203 paddq %xmm15,%xmm15
2204 psrad $31,%xmm14
2205 .byte 102,15,56,222,217
2206 pand %xmm8,%xmm14
2207 movups (%rbp),%xmm10
2208 .byte 102,15,56,222,225
2209 .byte 102,15,56,222,233
2210 .byte 102,15,56,222,241
2211 pxor %xmm14,%xmm15
2212 movaps %xmm10,%xmm11
2213 .byte 102,15,56,222,249
2214 movups -64(%rcx),%xmm1
2215
2216 movdqa %xmm9,%xmm14
2217 .byte 102,15,56,222,208
2218 paddd %xmm9,%xmm9
2219 pxor %xmm15,%xmm10
2220 .byte 102,15,56,222,216
2221 psrad $31,%xmm14
2222 paddq %xmm15,%xmm15
2223 .byte 102,15,56,222,224
2224 .byte 102,15,56,222,232
2225 pand %xmm8,%xmm14
2226 movaps %xmm11,%xmm12
2227 .byte 102,15,56,222,240
2228 pxor %xmm14,%xmm15
2229 movdqa %xmm9,%xmm14
2230 .byte 102,15,56,222,248
2231 movups -48(%rcx),%xmm0
2232
2233 paddd %xmm9,%xmm9
2234 .byte 102,15,56,222,209
2235 pxor %xmm15,%xmm11
2236 psrad $31,%xmm14
2237 .byte 102,15,56,222,217
2238 paddq %xmm15,%xmm15
2239 pand %xmm8,%xmm14
2240 .byte 102,15,56,222,225
2241 .byte 102,15,56,222,233
2242 movdqa %xmm13,48(%rsp)
2243 pxor %xmm14,%xmm15
2244 .byte 102,15,56,222,241
2245 movaps %xmm12,%xmm13
2246 movdqa %xmm9,%xmm14
2247 .byte 102,15,56,222,249
2248 movups -32(%rcx),%xmm1
2249
2250 paddd %xmm9,%xmm9
2251 .byte 102,15,56,222,208
2252 pxor %xmm15,%xmm12
2253 psrad $31,%xmm14
2254 .byte 102,15,56,222,216
2255 paddq %xmm15,%xmm15
2256 pand %xmm8,%xmm14
2257 .byte 102,15,56,222,224
2258 .byte 102,15,56,222,232
2259 .byte 102,15,56,222,240
2260 pxor %xmm14,%xmm15
2261 movaps %xmm13,%xmm14
2262 .byte 102,15,56,222,248
2263
2264 movdqa %xmm9,%xmm0
2265 paddd %xmm9,%xmm9
2266 .byte 102,15,56,222,209
2267 pxor %xmm15,%xmm13
2268 psrad $31,%xmm0
2269 .byte 102,15,56,222,217
2270 paddq %xmm15,%xmm15
2271 pand %xmm8,%xmm0
2272 .byte 102,15,56,222,225
2273 .byte 102,15,56,222,233
2274 pxor %xmm0,%xmm15
2275 movups (%rbp),%xmm0
2276 .byte 102,15,56,222,241
2277 .byte 102,15,56,222,249
2278 movups 16(%rbp),%xmm1
2279
2280 pxor %xmm15,%xmm14
2281 .byte 102,15,56,223,84,36,0
2282 psrad $31,%xmm9
2283 paddq %xmm15,%xmm15
2284 .byte 102,15,56,223,92,36,16
2285 .byte 102,15,56,223,100,36,32
2286 pand %xmm8,%xmm9
2287 movq %r10,%rax
2288 .byte 102,15,56,223,108,36,48
2289 .byte 102,15,56,223,116,36,64
2290 .byte 102,15,56,223,124,36,80
2291 pxor %xmm9,%xmm15
2292
2293 leaq 96(%rsi),%rsi
2294 movups %xmm2,-96(%rsi)
2295 movups %xmm3,-80(%rsi)
2296 movups %xmm4,-64(%rsi)
2297 movups %xmm5,-48(%rsi)
2298 movups %xmm6,-32(%rsi)
2299 movups %xmm7,-16(%rsi)
2300 subq $96,%rdx
2301 jnc L$xts_dec_grandloop
2302
2303 movl $16+96,%eax
2304 subl %r10d,%eax
2305 movq %rbp,%rcx
2306 shrl $4,%eax
2307
2308 L$xts_dec_short:
2309
2310 movl %eax,%r10d
2311 pxor %xmm0,%xmm10
2312 pxor %xmm0,%xmm11
2313 addq $96,%rdx
2314 jz L$xts_dec_done
2315
2316 pxor %xmm0,%xmm12
2317 cmpq $0x20,%rdx
2318 jb L$xts_dec_one
2319 pxor %xmm0,%xmm13
2320 je L$xts_dec_two
2321
2322 pxor %xmm0,%xmm14
2323 cmpq $0x40,%rdx
2324 jb L$xts_dec_three
2325 je L$xts_dec_four
2326
2327 movdqu (%rdi),%xmm2
2328 movdqu 16(%rdi),%xmm3
2329 movdqu 32(%rdi),%xmm4
2330 pxor %xmm10,%xmm2
2331 movdqu 48(%rdi),%xmm5
2332 pxor %xmm11,%xmm3
2333 movdqu 64(%rdi),%xmm6
2334 leaq 80(%rdi),%rdi
2335 pxor %xmm12,%xmm4
2336 pxor %xmm13,%xmm5
2337 pxor %xmm14,%xmm6
2338
2339 call _aesni_decrypt6
2340
2341 xorps %xmm10,%xmm2
2342 xorps %xmm11,%xmm3
2343 xorps %xmm12,%xmm4
2344 movdqu %xmm2,(%rsi)
2345 xorps %xmm13,%xmm5
2346 movdqu %xmm3,16(%rsi)
2347 xorps %xmm14,%xmm6
2348 movdqu %xmm4,32(%rsi)
2349 pxor %xmm14,%xmm14
2350 movdqu %xmm5,48(%rsi)
2351 pcmpgtd %xmm15,%xmm14
2352 movdqu %xmm6,64(%rsi)
2353 leaq 80(%rsi),%rsi
2354 pshufd $0x13,%xmm14,%xmm11
2355 andq $15,%r9
2356 jz L$xts_dec_ret
2357
2358 movdqa %xmm15,%xmm10
2359 paddq %xmm15,%xmm15
2360 pand %xmm8,%xmm11
2361 pxor %xmm15,%xmm11
2362 jmp L$xts_dec_done2
2363
2364 .p2align 4
2365 L$xts_dec_one:
2366 movups (%rdi),%xmm2
2367 leaq 16(%rdi),%rdi
2368 xorps %xmm10,%xmm2
2369 movups (%rcx),%xmm0
2370 movups 16(%rcx),%xmm1
2371 leaq 32(%rcx),%rcx
2372 xorps %xmm0,%xmm2
2373 L$oop_dec1_12:
2374 .byte 102,15,56,222,209
2375 decl %eax
2376 movups (%rcx),%xmm1
2377 leaq 16(%rcx),%rcx
2378 jnz L$oop_dec1_12
2379 .byte 102,15,56,223,209
2380 xorps %xmm10,%xmm2
2381 movdqa %xmm11,%xmm10
2382 movups %xmm2,(%rsi)
2383 movdqa %xmm12,%xmm11
2384 leaq 16(%rsi),%rsi
2385 jmp L$xts_dec_done
2386
2387 .p2align 4
2388 L$xts_dec_two:
2389 movups (%rdi),%xmm2
2390 movups 16(%rdi),%xmm3
2391 leaq 32(%rdi),%rdi
2392 xorps %xmm10,%xmm2
2393 xorps %xmm11,%xmm3
2394
2395 call _aesni_decrypt2
2396
2397 xorps %xmm10,%xmm2
2398 movdqa %xmm12,%xmm10
2399 xorps %xmm11,%xmm3
2400 movdqa %xmm13,%xmm11
2401 movups %xmm2,(%rsi)
2402 movups %xmm3,16(%rsi)
2403 leaq 32(%rsi),%rsi
2404 jmp L$xts_dec_done
2405
2406 .p2align 4
2407 L$xts_dec_three:
2408 movups (%rdi),%xmm2
2409 movups 16(%rdi),%xmm3
2410 movups 32(%rdi),%xmm4
2411 leaq 48(%rdi),%rdi
2412 xorps %xmm10,%xmm2
2413 xorps %xmm11,%xmm3
2414 xorps %xmm12,%xmm4
2415
2416 call _aesni_decrypt3
2417
2418 xorps %xmm10,%xmm2
2419 movdqa %xmm13,%xmm10
2420 xorps %xmm11,%xmm3
2421 movdqa %xmm14,%xmm11
2422 xorps %xmm12,%xmm4
2423 movups %xmm2,(%rsi)
2424 movups %xmm3,16(%rsi)
2425 movups %xmm4,32(%rsi)
2426 leaq 48(%rsi),%rsi
2427 jmp L$xts_dec_done
2428
2429 .p2align 4
2430 L$xts_dec_four:
2431 movups (%rdi),%xmm2
2432 movups 16(%rdi),%xmm3
2433 movups 32(%rdi),%xmm4
2434 xorps %xmm10,%xmm2
2435 movups 48(%rdi),%xmm5
2436 leaq 64(%rdi),%rdi
2437 xorps %xmm11,%xmm3
2438 xorps %xmm12,%xmm4
2439 xorps %xmm13,%xmm5
2440
2441 call _aesni_decrypt4
2442
2443 pxor %xmm10,%xmm2
2444 movdqa %xmm14,%xmm10
2445 pxor %xmm11,%xmm3
2446 movdqa %xmm15,%xmm11
2447 pxor %xmm12,%xmm4
2448 movdqu %xmm2,(%rsi)
2449 pxor %xmm13,%xmm5
2450 movdqu %xmm3,16(%rsi)
2451 movdqu %xmm4,32(%rsi)
2452 movdqu %xmm5,48(%rsi)
2453 leaq 64(%rsi),%rsi
2454 jmp L$xts_dec_done
2455
2456 .p2align 4
2457 L$xts_dec_done:
2458 andq $15,%r9
2459 jz L$xts_dec_ret
2460 L$xts_dec_done2:
2461 movq %r9,%rdx
2462 movq %rbp,%rcx
2463 movl %r10d,%eax
2464
2465 movups (%rdi),%xmm2
2466 xorps %xmm11,%xmm2
2467 movups (%rcx),%xmm0
2468 movups 16(%rcx),%xmm1
2469 leaq 32(%rcx),%rcx
2470 xorps %xmm0,%xmm2
2471 L$oop_dec1_13:
2472 .byte 102,15,56,222,209
2473 decl %eax
2474 movups (%rcx),%xmm1
2475 leaq 16(%rcx),%rcx
2476 jnz L$oop_dec1_13
2477 .byte 102,15,56,223,209
2478 xorps %xmm11,%xmm2
2479 movups %xmm2,(%rsi)
2480
2481 L$xts_dec_steal:
2482 movzbl 16(%rdi),%eax
2483 movzbl (%rsi),%ecx
2484 leaq 1(%rdi),%rdi
2485 movb %al,(%rsi)
2486 movb %cl,16(%rsi)
2487 leaq 1(%rsi),%rsi
2488 subq $1,%rdx
2489 jnz L$xts_dec_steal
2490
2491 subq %r9,%rsi
2492 movq %rbp,%rcx
2493 movl %r10d,%eax
2494
2495 movups (%rsi),%xmm2
2496 xorps %xmm10,%xmm2
2497 movups (%rcx),%xmm0
2498 movups 16(%rcx),%xmm1
2499 leaq 32(%rcx),%rcx
2500 xorps %xmm0,%xmm2
2501 L$oop_dec1_14:
2502 .byte 102,15,56,222,209
2503 decl %eax
2504 movups (%rcx),%xmm1
2505 leaq 16(%rcx),%rcx
2506 jnz L$oop_dec1_14
2507 .byte 102,15,56,223,209
2508 xorps %xmm10,%xmm2
2509 movups %xmm2,(%rsi)
2510
2511 L$xts_dec_ret:
2512 xorps %xmm0,%xmm0
2513 pxor %xmm1,%xmm1
2514 pxor %xmm2,%xmm2
2515 pxor %xmm3,%xmm3
2516 pxor %xmm4,%xmm4
2517 pxor %xmm5,%xmm5
2518 pxor %xmm6,%xmm6
2519 pxor %xmm7,%xmm7
2520 movaps %xmm0,0(%rsp)
2521 pxor %xmm8,%xmm8
2522 movaps %xmm0,16(%rsp)
2523 pxor %xmm9,%xmm9
2524 movaps %xmm0,32(%rsp)
2525 pxor %xmm10,%xmm10
2526 movaps %xmm0,48(%rsp)
2527 pxor %xmm11,%xmm11
2528 movaps %xmm0,64(%rsp)
2529 pxor %xmm12,%xmm12
2530 movaps %xmm0,80(%rsp)
2531 pxor %xmm13,%xmm13
2532 movaps %xmm0,96(%rsp)
2533 pxor %xmm14,%xmm14
2534 pxor %xmm15,%xmm15
2535 movq -8(%r11),%rbp
2536 leaq (%r11),%rsp
2537 L$xts_dec_epilogue:
2538 .byte 0xf3,0xc3
2539
2540 .globl _aesni_ocb_encrypt
2541 .private_extern _aesni_ocb_encrypt
2542
2543 .p2align 5
2544 _aesni_ocb_encrypt:
2545 leaq (%rsp),%rax
2546 pushq %rbx
2547 pushq %rbp
2548 pushq %r12
2549 pushq %r13
2550 pushq %r14
2551 movq 8(%rax),%rbx
2552 movq 8+8(%rax),%rbp
2553
2554 movl 240(%rcx),%r10d
2555 movq %rcx,%r11
2556 shll $4,%r10d
2557 movups (%rcx),%xmm9
2558 movups 16(%rcx,%r10,1),%xmm1
2559
2560 movdqu (%r9),%xmm15
2561 pxor %xmm1,%xmm9
2562 pxor %xmm1,%xmm15
2563
2564 movl $16+32,%eax
2565 leaq 32(%r11,%r10,1),%rcx
2566 movups 16(%r11),%xmm1
2567 subq %r10,%rax
2568 movq %rax,%r10
2569
2570 movdqu (%rbx),%xmm10
2571 movdqu (%rbp),%xmm8
2572
2573 testq $1,%r8
2574 jnz L$ocb_enc_odd
2575
2576 bsfq %r8,%r12
2577 addq $1,%r8
2578 shlq $4,%r12
2579 movdqu (%rbx,%r12,1),%xmm7
2580 movdqu (%rdi),%xmm2
2581 leaq 16(%rdi),%rdi
2582
2583 call __ocb_encrypt1
2584
2585 movdqa %xmm7,%xmm15
2586 movups %xmm2,(%rsi)
2587 leaq 16(%rsi),%rsi
2588 subq $1,%rdx
2589 jz L$ocb_enc_done
2590
2591 L$ocb_enc_odd:
2592 leaq 1(%r8),%r12
2593 leaq 3(%r8),%r13
2594 leaq 5(%r8),%r14
2595 leaq 6(%r8),%r8
2596 bsfq %r12,%r12
2597 bsfq %r13,%r13
2598 bsfq %r14,%r14
2599 shlq $4,%r12
2600 shlq $4,%r13
2601 shlq $4,%r14
2602
2603 subq $6,%rdx
2604 jc L$ocb_enc_short
2605 jmp L$ocb_enc_grandloop
2606
2607 .p2align 5
2608 L$ocb_enc_grandloop:
2609 movdqu 0(%rdi),%xmm2
2610 movdqu 16(%rdi),%xmm3
2611 movdqu 32(%rdi),%xmm4
2612 movdqu 48(%rdi),%xmm5
2613 movdqu 64(%rdi),%xmm6
2614 movdqu 80(%rdi),%xmm7
2615 leaq 96(%rdi),%rdi
2616
2617 call __ocb_encrypt6
2618
2619 movups %xmm2,0(%rsi)
2620 movups %xmm3,16(%rsi)
2621 movups %xmm4,32(%rsi)
2622 movups %xmm5,48(%rsi)
2623 movups %xmm6,64(%rsi)
2624 movups %xmm7,80(%rsi)
2625 leaq 96(%rsi),%rsi
2626 subq $6,%rdx
2627 jnc L$ocb_enc_grandloop
2628
2629 L$ocb_enc_short:
2630 addq $6,%rdx
2631 jz L$ocb_enc_done
2632
2633 movdqu 0(%rdi),%xmm2
2634 cmpq $2,%rdx
2635 jb L$ocb_enc_one
2636 movdqu 16(%rdi),%xmm3
2637 je L$ocb_enc_two
2638
2639 movdqu 32(%rdi),%xmm4
2640 cmpq $4,%rdx
2641 jb L$ocb_enc_three
2642 movdqu 48(%rdi),%xmm5
2643 je L$ocb_enc_four
2644
2645 movdqu 64(%rdi),%xmm6
2646 pxor %xmm7,%xmm7
2647
2648 call __ocb_encrypt6
2649
2650 movdqa %xmm14,%xmm15
2651 movups %xmm2,0(%rsi)
2652 movups %xmm3,16(%rsi)
2653 movups %xmm4,32(%rsi)
2654 movups %xmm5,48(%rsi)
2655 movups %xmm6,64(%rsi)
2656
2657 jmp L$ocb_enc_done
2658
2659 .p2align 4
2660 L$ocb_enc_one:
2661 movdqa %xmm10,%xmm7
2662
2663 call __ocb_encrypt1
2664
2665 movdqa %xmm7,%xmm15
2666 movups %xmm2,0(%rsi)
2667 jmp L$ocb_enc_done
2668
2669 .p2align 4
2670 L$ocb_enc_two:
2671 pxor %xmm4,%xmm4
2672 pxor %xmm5,%xmm5
2673
2674 call __ocb_encrypt4
2675
2676 movdqa %xmm11,%xmm15
2677 movups %xmm2,0(%rsi)
2678 movups %xmm3,16(%rsi)
2679
2680 jmp L$ocb_enc_done
2681
2682 .p2align 4
2683 L$ocb_enc_three:
2684 pxor %xmm5,%xmm5
2685
2686 call __ocb_encrypt4
2687
2688 movdqa %xmm12,%xmm15
2689 movups %xmm2,0(%rsi)
2690 movups %xmm3,16(%rsi)
2691 movups %xmm4,32(%rsi)
2692
2693 jmp L$ocb_enc_done
2694
2695 .p2align 4
2696 L$ocb_enc_four:
2697 call __ocb_encrypt4
2698
2699 movdqa %xmm13,%xmm15
2700 movups %xmm2,0(%rsi)
2701 movups %xmm3,16(%rsi)
2702 movups %xmm4,32(%rsi)
2703 movups %xmm5,48(%rsi)
2704
2705 L$ocb_enc_done:
2706 pxor %xmm0,%xmm15
2707 movdqu %xmm8,(%rbp)
2708 movdqu %xmm15,(%r9)
2709
2710 xorps %xmm0,%xmm0
2711 pxor %xmm1,%xmm1
2712 pxor %xmm2,%xmm2
2713 pxor %xmm3,%xmm3
2714 pxor %xmm4,%xmm4
2715 pxor %xmm5,%xmm5
2716 pxor %xmm6,%xmm6
2717 pxor %xmm7,%xmm7
2718 pxor %xmm8,%xmm8
2719 pxor %xmm9,%xmm9
2720 pxor %xmm10,%xmm10
2721 pxor %xmm11,%xmm11
2722 pxor %xmm12,%xmm12
2723 pxor %xmm13,%xmm13
2724 pxor %xmm14,%xmm14
2725 pxor %xmm15,%xmm15
2726 leaq 40(%rsp),%rax
2727 movq -40(%rax),%r14
2728 movq -32(%rax),%r13
2729 movq -24(%rax),%r12
2730 movq -16(%rax),%rbp
2731 movq -8(%rax),%rbx
2732 leaq (%rax),%rsp
2733 L$ocb_enc_epilogue:
2734 .byte 0xf3,0xc3
2735
2736
2737
2738 .p2align 5
2739 __ocb_encrypt6:
2740 pxor %xmm9,%xmm15
2741 movdqu (%rbx,%r12,1),%xmm11
2742 movdqa %xmm10,%xmm12
2743 movdqu (%rbx,%r13,1),%xmm13
2744 movdqa %xmm10,%xmm14
2745 pxor %xmm15,%xmm10
2746 movdqu (%rbx,%r14,1),%xmm15
2747 pxor %xmm10,%xmm11
2748 pxor %xmm2,%xmm8
2749 pxor %xmm10,%xmm2
2750 pxor %xmm11,%xmm12
2751 pxor %xmm3,%xmm8
2752 pxor %xmm11,%xmm3
2753 pxor %xmm12,%xmm13
2754 pxor %xmm4,%xmm8
2755 pxor %xmm12,%xmm4
2756 pxor %xmm13,%xmm14
2757 pxor %xmm5,%xmm8
2758 pxor %xmm13,%xmm5
2759 pxor %xmm14,%xmm15
2760 pxor %xmm6,%xmm8
2761 pxor %xmm14,%xmm6
2762 pxor %xmm7,%xmm8
2763 pxor %xmm15,%xmm7
2764 movups 32(%r11),%xmm0
2765
2766 leaq 1(%r8),%r12
2767 leaq 3(%r8),%r13
2768 leaq 5(%r8),%r14
2769 addq $6,%r8
2770 pxor %xmm9,%xmm10
2771 bsfq %r12,%r12
2772 bsfq %r13,%r13
2773 bsfq %r14,%r14
2774
2775 .byte 102,15,56,220,209
2776 .byte 102,15,56,220,217
2777 .byte 102,15,56,220,225
2778 .byte 102,15,56,220,233
2779 pxor %xmm9,%xmm11
2780 pxor %xmm9,%xmm12
2781 .byte 102,15,56,220,241
2782 pxor %xmm9,%xmm13
2783 pxor %xmm9,%xmm14
2784 .byte 102,15,56,220,249
2785 movups 48(%r11),%xmm1
2786 pxor %xmm9,%xmm15
2787
2788 .byte 102,15,56,220,208
2789 .byte 102,15,56,220,216
2790 .byte 102,15,56,220,224
2791 .byte 102,15,56,220,232
2792 .byte 102,15,56,220,240
2793 .byte 102,15,56,220,248
2794 movups 64(%r11),%xmm0
2795 shlq $4,%r12
2796 shlq $4,%r13
2797 jmp L$ocb_enc_loop6
2798
2799 .p2align 5
2800 L$ocb_enc_loop6:
2801 .byte 102,15,56,220,209
2802 .byte 102,15,56,220,217
2803 .byte 102,15,56,220,225
2804 .byte 102,15,56,220,233
2805 .byte 102,15,56,220,241
2806 .byte 102,15,56,220,249
2807 movups (%rcx,%rax,1),%xmm1
2808 addq $32,%rax
2809
2810 .byte 102,15,56,220,208
2811 .byte 102,15,56,220,216
2812 .byte 102,15,56,220,224
2813 .byte 102,15,56,220,232
2814 .byte 102,15,56,220,240
2815 .byte 102,15,56,220,248
2816 movups -16(%rcx,%rax,1),%xmm0
2817 jnz L$ocb_enc_loop6
2818
2819 .byte 102,15,56,220,209
2820 .byte 102,15,56,220,217
2821 .byte 102,15,56,220,225
2822 .byte 102,15,56,220,233
2823 .byte 102,15,56,220,241
2824 .byte 102,15,56,220,249
2825 movups 16(%r11),%xmm1
2826 shlq $4,%r14
2827
2828 .byte 102,65,15,56,221,210
2829 movdqu (%rbx),%xmm10
2830 movq %r10,%rax
2831 .byte 102,65,15,56,221,219
2832 .byte 102,65,15,56,221,228
2833 .byte 102,65,15,56,221,237
2834 .byte 102,65,15,56,221,246
2835 .byte 102,65,15,56,221,255
2836 .byte 0xf3,0xc3
2837
2838
2839
2840 .p2align 5
2841 __ocb_encrypt4:
2842 pxor %xmm9,%xmm15
2843 movdqu (%rbx,%r12,1),%xmm11
2844 movdqa %xmm10,%xmm12
2845 movdqu (%rbx,%r13,1),%xmm13
2846 pxor %xmm15,%xmm10
2847 pxor %xmm10,%xmm11
2848 pxor %xmm2,%xmm8
2849 pxor %xmm10,%xmm2
2850 pxor %xmm11,%xmm12
2851 pxor %xmm3,%xmm8
2852 pxor %xmm11,%xmm3
2853 pxor %xmm12,%xmm13
2854 pxor %xmm4,%xmm8
2855 pxor %xmm12,%xmm4
2856 pxor %xmm5,%xmm8
2857 pxor %xmm13,%xmm5
2858 movups 32(%r11),%xmm0
2859
2860 pxor %xmm9,%xmm10
2861 pxor %xmm9,%xmm11
2862 pxor %xmm9,%xmm12
2863 pxor %xmm9,%xmm13
2864
2865 .byte 102,15,56,220,209
2866 .byte 102,15,56,220,217
2867 .byte 102,15,56,220,225
2868 .byte 102,15,56,220,233
2869 movups 48(%r11),%xmm1
2870
2871 .byte 102,15,56,220,208
2872 .byte 102,15,56,220,216
2873 .byte 102,15,56,220,224
2874 .byte 102,15,56,220,232
2875 movups 64(%r11),%xmm0
2876 jmp L$ocb_enc_loop4
2877
2878 .p2align 5
2879 L$ocb_enc_loop4:
2880 .byte 102,15,56,220,209
2881 .byte 102,15,56,220,217
2882 .byte 102,15,56,220,225
2883 .byte 102,15,56,220,233
2884 movups (%rcx,%rax,1),%xmm1
2885 addq $32,%rax
2886
2887 .byte 102,15,56,220,208
2888 .byte 102,15,56,220,216
2889 .byte 102,15,56,220,224
2890 .byte 102,15,56,220,232
2891 movups -16(%rcx,%rax,1),%xmm0
2892 jnz L$ocb_enc_loop4
2893
2894 .byte 102,15,56,220,209
2895 .byte 102,15,56,220,217
2896 .byte 102,15,56,220,225
2897 .byte 102,15,56,220,233
2898 movups 16(%r11),%xmm1
2899 movq %r10,%rax
2900
2901 .byte 102,65,15,56,221,210
2902 .byte 102,65,15,56,221,219
2903 .byte 102,65,15,56,221,228
2904 .byte 102,65,15,56,221,237
2905 .byte 0xf3,0xc3
2906
2907
2908
2909 .p2align 5
2910 __ocb_encrypt1:
2911 pxor %xmm15,%xmm7
2912 pxor %xmm9,%xmm7
2913 pxor %xmm2,%xmm8
2914 pxor %xmm7,%xmm2
2915 movups 32(%r11),%xmm0
2916
2917 .byte 102,15,56,220,209
2918 movups 48(%r11),%xmm1
2919 pxor %xmm9,%xmm7
2920
2921 .byte 102,15,56,220,208
2922 movups 64(%r11),%xmm0
2923 jmp L$ocb_enc_loop1
2924
2925 .p2align 5
2926 L$ocb_enc_loop1:
2927 .byte 102,15,56,220,209
2928 movups (%rcx,%rax,1),%xmm1
2929 addq $32,%rax
2930
2931 .byte 102,15,56,220,208
2932 movups -16(%rcx,%rax,1),%xmm0
2933 jnz L$ocb_enc_loop1
2934
2935 .byte 102,15,56,220,209
2936 movups 16(%r11),%xmm1
2937 movq %r10,%rax
2938
2939 .byte 102,15,56,221,215
2940 .byte 0xf3,0xc3
2941
2942
2943 .globl _aesni_ocb_decrypt
2944 .private_extern _aesni_ocb_decrypt
2945
2946 .p2align 5
2947 _aesni_ocb_decrypt:
2948 leaq (%rsp),%rax
2949 pushq %rbx
2950 pushq %rbp
2951 pushq %r12
2952 pushq %r13
2953 pushq %r14
2954 movq 8(%rax),%rbx
2955 movq 8+8(%rax),%rbp
2956
2957 movl 240(%rcx),%r10d
2958 movq %rcx,%r11
2959 shll $4,%r10d
2960 movups (%rcx),%xmm9
2961 movups 16(%rcx,%r10,1),%xmm1
2962
2963 movdqu (%r9),%xmm15
2964 pxor %xmm1,%xmm9
2965 pxor %xmm1,%xmm15
2966
2967 movl $16+32,%eax
2968 leaq 32(%r11,%r10,1),%rcx
2969 movups 16(%r11),%xmm1
2970 subq %r10,%rax
2971 movq %rax,%r10
2972
2973 movdqu (%rbx),%xmm10
2974 movdqu (%rbp),%xmm8
2975
2976 testq $1,%r8
2977 jnz L$ocb_dec_odd
2978
2979 bsfq %r8,%r12
2980 addq $1,%r8
2981 shlq $4,%r12
2982 movdqu (%rbx,%r12,1),%xmm7
2983 movdqu (%rdi),%xmm2
2984 leaq 16(%rdi),%rdi
2985
2986 call __ocb_decrypt1
2987
2988 movdqa %xmm7,%xmm15
2989 movups %xmm2,(%rsi)
2990 xorps %xmm2,%xmm8
2991 leaq 16(%rsi),%rsi
2992 subq $1,%rdx
2993 jz L$ocb_dec_done
2994
2995 L$ocb_dec_odd:
2996 leaq 1(%r8),%r12
2997 leaq 3(%r8),%r13
2998 leaq 5(%r8),%r14
2999 leaq 6(%r8),%r8
3000 bsfq %r12,%r12
3001 bsfq %r13,%r13
3002 bsfq %r14,%r14
3003 shlq $4,%r12
3004 shlq $4,%r13
3005 shlq $4,%r14
3006
3007 subq $6,%rdx
3008 jc L$ocb_dec_short
3009 jmp L$ocb_dec_grandloop
3010
3011 .p2align 5
3012 L$ocb_dec_grandloop:
3013 movdqu 0(%rdi),%xmm2
3014 movdqu 16(%rdi),%xmm3
3015 movdqu 32(%rdi),%xmm4
3016 movdqu 48(%rdi),%xmm5
3017 movdqu 64(%rdi),%xmm6
3018 movdqu 80(%rdi),%xmm7
3019 leaq 96(%rdi),%rdi
3020
3021 call __ocb_decrypt6
3022
3023 movups %xmm2,0(%rsi)
3024 pxor %xmm2,%xmm8
3025 movups %xmm3,16(%rsi)
3026 pxor %xmm3,%xmm8
3027 movups %xmm4,32(%rsi)
3028 pxor %xmm4,%xmm8
3029 movups %xmm5,48(%rsi)
3030 pxor %xmm5,%xmm8
3031 movups %xmm6,64(%rsi)
3032 pxor %xmm6,%xmm8
3033 movups %xmm7,80(%rsi)
3034 pxor %xmm7,%xmm8
3035 leaq 96(%rsi),%rsi
3036 subq $6,%rdx
3037 jnc L$ocb_dec_grandloop
3038
3039 L$ocb_dec_short:
3040 addq $6,%rdx
3041 jz L$ocb_dec_done
3042
3043 movdqu 0(%rdi),%xmm2
3044 cmpq $2,%rdx
3045 jb L$ocb_dec_one
3046 movdqu 16(%rdi),%xmm3
3047 je L$ocb_dec_two
3048
3049 movdqu 32(%rdi),%xmm4
3050 cmpq $4,%rdx
3051 jb L$ocb_dec_three
3052 movdqu 48(%rdi),%xmm5
3053 je L$ocb_dec_four
3054
3055 movdqu 64(%rdi),%xmm6
3056 pxor %xmm7,%xmm7
3057
3058 call __ocb_decrypt6
3059
3060 movdqa %xmm14,%xmm15
3061 movups %xmm2,0(%rsi)
3062 pxor %xmm2,%xmm8
3063 movups %xmm3,16(%rsi)
3064 pxor %xmm3,%xmm8
3065 movups %xmm4,32(%rsi)
3066 pxor %xmm4,%xmm8
3067 movups %xmm5,48(%rsi)
3068 pxor %xmm5,%xmm8
3069 movups %xmm6,64(%rsi)
3070 pxor %xmm6,%xmm8
3071
3072 jmp L$ocb_dec_done
3073
3074 .p2align 4
3075 L$ocb_dec_one:
3076 movdqa %xmm10,%xmm7
3077
3078 call __ocb_decrypt1
3079
3080 movdqa %xmm7,%xmm15
3081 movups %xmm2,0(%rsi)
3082 xorps %xmm2,%xmm8
3083 jmp L$ocb_dec_done
3084
3085 .p2align 4
3086 L$ocb_dec_two:
3087 pxor %xmm4,%xmm4
3088 pxor %xmm5,%xmm5
3089
3090 call __ocb_decrypt4
3091
3092 movdqa %xmm11,%xmm15
3093 movups %xmm2,0(%rsi)
3094 xorps %xmm2,%xmm8
3095 movups %xmm3,16(%rsi)
3096 xorps %xmm3,%xmm8
3097
3098 jmp L$ocb_dec_done
3099
3100 .p2align 4
3101 L$ocb_dec_three:
3102 pxor %xmm5,%xmm5
3103
3104 call __ocb_decrypt4
3105
3106 movdqa %xmm12,%xmm15
3107 movups %xmm2,0(%rsi)
3108 xorps %xmm2,%xmm8
3109 movups %xmm3,16(%rsi)
3110 xorps %xmm3,%xmm8
3111 movups %xmm4,32(%rsi)
3112 xorps %xmm4,%xmm8
3113
3114 jmp L$ocb_dec_done
3115
3116 .p2align 4
3117 L$ocb_dec_four:
3118 call __ocb_decrypt4
3119
3120 movdqa %xmm13,%xmm15
3121 movups %xmm2,0(%rsi)
3122 pxor %xmm2,%xmm8
3123 movups %xmm3,16(%rsi)
3124 pxor %xmm3,%xmm8
3125 movups %xmm4,32(%rsi)
3126 pxor %xmm4,%xmm8
3127 movups %xmm5,48(%rsi)
3128 pxor %xmm5,%xmm8
3129
3130 L$ocb_dec_done:
3131 pxor %xmm0,%xmm15
3132 movdqu %xmm8,(%rbp)
3133 movdqu %xmm15,(%r9)
3134
3135 xorps %xmm0,%xmm0
3136 pxor %xmm1,%xmm1
3137 pxor %xmm2,%xmm2
3138 pxor %xmm3,%xmm3
3139 pxor %xmm4,%xmm4
3140 pxor %xmm5,%xmm5
3141 pxor %xmm6,%xmm6
3142 pxor %xmm7,%xmm7
3143 pxor %xmm8,%xmm8
3144 pxor %xmm9,%xmm9
3145 pxor %xmm10,%xmm10
3146 pxor %xmm11,%xmm11
3147 pxor %xmm12,%xmm12
3148 pxor %xmm13,%xmm13
3149 pxor %xmm14,%xmm14
3150 pxor %xmm15,%xmm15
3151 leaq 40(%rsp),%rax
3152 movq -40(%rax),%r14
3153 movq -32(%rax),%r13
3154 movq -24(%rax),%r12
3155 movq -16(%rax),%rbp
3156 movq -8(%rax),%rbx
3157 leaq (%rax),%rsp
3158 L$ocb_dec_epilogue:
3159 .byte 0xf3,0xc3
3160
3161
3162
3163 .p2align 5
3164 __ocb_decrypt6:
3165 pxor %xmm9,%xmm15
3166 movdqu (%rbx,%r12,1),%xmm11
3167 movdqa %xmm10,%xmm12
3168 movdqu (%rbx,%r13,1),%xmm13
3169 movdqa %xmm10,%xmm14
3170 pxor %xmm15,%xmm10
3171 movdqu (%rbx,%r14,1),%xmm15
3172 pxor %xmm10,%xmm11
3173 pxor %xmm10,%xmm2
3174 pxor %xmm11,%xmm12
3175 pxor %xmm11,%xmm3
3176 pxor %xmm12,%xmm13
3177 pxor %xmm12,%xmm4
3178 pxor %xmm13,%xmm14
3179 pxor %xmm13,%xmm5
3180 pxor %xmm14,%xmm15
3181 pxor %xmm14,%xmm6
3182 pxor %xmm15,%xmm7
3183 movups 32(%r11),%xmm0
3184
3185 leaq 1(%r8),%r12
3186 leaq 3(%r8),%r13
3187 leaq 5(%r8),%r14
3188 addq $6,%r8
3189 pxor %xmm9,%xmm10
3190 bsfq %r12,%r12
3191 bsfq %r13,%r13
3192 bsfq %r14,%r14
3193
3194 .byte 102,15,56,222,209
3195 .byte 102,15,56,222,217
3196 .byte 102,15,56,222,225
3197 .byte 102,15,56,222,233
3198 pxor %xmm9,%xmm11
3199 pxor %xmm9,%xmm12
3200 .byte 102,15,56,222,241
3201 pxor %xmm9,%xmm13
3202 pxor %xmm9,%xmm14
3203 .byte 102,15,56,222,249
3204 movups 48(%r11),%xmm1
3205 pxor %xmm9,%xmm15
3206
3207 .byte 102,15,56,222,208
3208 .byte 102,15,56,222,216
3209 .byte 102,15,56,222,224
3210 .byte 102,15,56,222,232
3211 .byte 102,15,56,222,240
3212 .byte 102,15,56,222,248
3213 movups 64(%r11),%xmm0
3214 shlq $4,%r12
3215 shlq $4,%r13
3216 jmp L$ocb_dec_loop6
3217
3218 .p2align 5
3219 L$ocb_dec_loop6:
3220 .byte 102,15,56,222,209
3221 .byte 102,15,56,222,217
3222 .byte 102,15,56,222,225
3223 .byte 102,15,56,222,233
3224 .byte 102,15,56,222,241
3225 .byte 102,15,56,222,249
3226 movups (%rcx,%rax,1),%xmm1
3227 addq $32,%rax
3228
3229 .byte 102,15,56,222,208
3230 .byte 102,15,56,222,216
3231 .byte 102,15,56,222,224
3232 .byte 102,15,56,222,232
3233 .byte 102,15,56,222,240
3234 .byte 102,15,56,222,248
3235 movups -16(%rcx,%rax,1),%xmm0
3236 jnz L$ocb_dec_loop6
3237
3238 .byte 102,15,56,222,209
3239 .byte 102,15,56,222,217
3240 .byte 102,15,56,222,225
3241 .byte 102,15,56,222,233
3242 .byte 102,15,56,222,241
3243 .byte 102,15,56,222,249
3244 movups 16(%r11),%xmm1
3245 shlq $4,%r14
3246
3247 .byte 102,65,15,56,223,210
3248 movdqu (%rbx),%xmm10
3249 movq %r10,%rax
3250 .byte 102,65,15,56,223,219
3251 .byte 102,65,15,56,223,228
3252 .byte 102,65,15,56,223,237
3253 .byte 102,65,15,56,223,246
3254 .byte 102,65,15,56,223,255
3255 .byte 0xf3,0xc3
3256
3257
3258
3259 .p2align 5
3260 __ocb_decrypt4:
3261 pxor %xmm9,%xmm15
3262 movdqu (%rbx,%r12,1),%xmm11
3263 movdqa %xmm10,%xmm12
3264 movdqu (%rbx,%r13,1),%xmm13
3265 pxor %xmm15,%xmm10
3266 pxor %xmm10,%xmm11
3267 pxor %xmm10,%xmm2
3268 pxor %xmm11,%xmm12
3269 pxor %xmm11,%xmm3
3270 pxor %xmm12,%xmm13
3271 pxor %xmm12,%xmm4
3272 pxor %xmm13,%xmm5
3273 movups 32(%r11),%xmm0
3274
3275 pxor %xmm9,%xmm10
3276 pxor %xmm9,%xmm11
3277 pxor %xmm9,%xmm12
3278 pxor %xmm9,%xmm13
3279
3280 .byte 102,15,56,222,209
3281 .byte 102,15,56,222,217
3282 .byte 102,15,56,222,225
3283 .byte 102,15,56,222,233
3284 movups 48(%r11),%xmm1
3285
3286 .byte 102,15,56,222,208
3287 .byte 102,15,56,222,216
3288 .byte 102,15,56,222,224
3289 .byte 102,15,56,222,232
3290 movups 64(%r11),%xmm0
3291 jmp L$ocb_dec_loop4
3292
3293 .p2align 5
3294 L$ocb_dec_loop4:
3295 .byte 102,15,56,222,209
3296 .byte 102,15,56,222,217
3297 .byte 102,15,56,222,225
3298 .byte 102,15,56,222,233
3299 movups (%rcx,%rax,1),%xmm1
3300 addq $32,%rax
3301
3302 .byte 102,15,56,222,208
3303 .byte 102,15,56,222,216
3304 .byte 102,15,56,222,224
3305 .byte 102,15,56,222,232
3306 movups -16(%rcx,%rax,1),%xmm0
3307 jnz L$ocb_dec_loop4
3308
3309 .byte 102,15,56,222,209
3310 .byte 102,15,56,222,217
3311 .byte 102,15,56,222,225
3312 .byte 102,15,56,222,233
3313 movups 16(%r11),%xmm1
3314 movq %r10,%rax
3315
3316 .byte 102,65,15,56,223,210
3317 .byte 102,65,15,56,223,219
3318 .byte 102,65,15,56,223,228
3319 .byte 102,65,15,56,223,237
3320 .byte 0xf3,0xc3
3321
3322
3323
3324 .p2align 5
3325 __ocb_decrypt1:
3326 pxor %xmm15,%xmm7
3327 pxor %xmm9,%xmm7
3328 pxor %xmm7,%xmm2
3329 movups 32(%r11),%xmm0
3330
3331 .byte 102,15,56,222,209
3332 movups 48(%r11),%xmm1
3333 pxor %xmm9,%xmm7
3334
3335 .byte 102,15,56,222,208
3336 movups 64(%r11),%xmm0
3337 jmp L$ocb_dec_loop1
3338
3339 .p2align 5
3340 L$ocb_dec_loop1:
3341 .byte 102,15,56,222,209
3342 movups (%rcx,%rax,1),%xmm1
3343 addq $32,%rax
3344
3345 .byte 102,15,56,222,208
3346 movups -16(%rcx,%rax,1),%xmm0
3347 jnz L$ocb_dec_loop1
3348
3349 .byte 102,15,56,222,209
3350 movups 16(%r11),%xmm1
3351 movq %r10,%rax
3352
3353 .byte 102,15,56,223,215
3354 .byte 0xf3,0xc3
3355
3356 .globl _aesni_cbc_encrypt
3357 .private_extern _aesni_cbc_encrypt
3358
3359 .p2align 4
3360 _aesni_cbc_encrypt:
3361 testq %rdx,%rdx
3362 jz L$cbc_ret
3363
3364 movl 240(%rcx),%r10d
3365 movq %rcx,%r11
3366 testl %r9d,%r9d
3367 jz L$cbc_decrypt
3368
3369 movups (%r8),%xmm2
3370 movl %r10d,%eax
3371 cmpq $16,%rdx
3372 jb L$cbc_enc_tail
3373 subq $16,%rdx
3374 jmp L$cbc_enc_loop
3375 .p2align 4
3376 L$cbc_enc_loop:
3377 movups (%rdi),%xmm3
3378 leaq 16(%rdi),%rdi
3379
3380 movups (%rcx),%xmm0
3381 movups 16(%rcx),%xmm1
3382 xorps %xmm0,%xmm3
3383 leaq 32(%rcx),%rcx
3384 xorps %xmm3,%xmm2
3385 L$oop_enc1_15:
3386 .byte 102,15,56,220,209
3387 decl %eax
3388 movups (%rcx),%xmm1
3389 leaq 16(%rcx),%rcx
3390 jnz L$oop_enc1_15
3391 .byte 102,15,56,221,209
3392 movl %r10d,%eax
3393 movq %r11,%rcx
3394 movups %xmm2,0(%rsi)
3395 leaq 16(%rsi),%rsi
3396 subq $16,%rdx
3397 jnc L$cbc_enc_loop
3398 addq $16,%rdx
3399 jnz L$cbc_enc_tail
3400 pxor %xmm0,%xmm0
3401 pxor %xmm1,%xmm1
3402 movups %xmm2,(%r8)
3403 pxor %xmm2,%xmm2
3404 pxor %xmm3,%xmm3
3405 jmp L$cbc_ret
3406
3407 L$cbc_enc_tail:
3408 movq %rdx,%rcx
3409 xchgq %rdi,%rsi
3410 .long 0x9066A4F3
3411 movl $16,%ecx
3412 subq %rdx,%rcx
3413 xorl %eax,%eax
3414 .long 0x9066AAF3
3415 leaq -16(%rdi),%rdi
3416 movl %r10d,%eax
3417 movq %rdi,%rsi
3418 movq %r11,%rcx
3419 xorq %rdx,%rdx
3420 jmp L$cbc_enc_loop
3421
3422 .p2align 4
3423 L$cbc_decrypt:
3424 cmpq $16,%rdx
3425 jne L$cbc_decrypt_bulk
3426
3427
3428
3429 movdqu (%rdi),%xmm2
3430 movdqu (%r8),%xmm3
3431 movdqa %xmm2,%xmm4
3432 movups (%rcx),%xmm0
3433 movups 16(%rcx),%xmm1
3434 leaq 32(%rcx),%rcx
3435 xorps %xmm0,%xmm2
3436 L$oop_dec1_16:
3437 .byte 102,15,56,222,209
3438 decl %r10d
3439 movups (%rcx),%xmm1
3440 leaq 16(%rcx),%rcx
3441 jnz L$oop_dec1_16
3442 .byte 102,15,56,223,209
3443 pxor %xmm0,%xmm0
3444 pxor %xmm1,%xmm1
3445 movdqu %xmm4,(%r8)
3446 xorps %xmm3,%xmm2
3447 pxor %xmm3,%xmm3
3448 movups %xmm2,(%rsi)
3449 pxor %xmm2,%xmm2
3450 jmp L$cbc_ret
3451 .p2align 4
3452 L$cbc_decrypt_bulk:
3453 leaq (%rsp),%r11
3454 pushq %rbp
3455 subq $16,%rsp
3456 andq $-16,%rsp
3457 movq %rcx,%rbp
3458 movups (%r8),%xmm10
3459 movl %r10d,%eax
3460 cmpq $0x50,%rdx
3461 jbe L$cbc_dec_tail
3462
3463 movups (%rcx),%xmm0
3464 movdqu 0(%rdi),%xmm2
3465 movdqu 16(%rdi),%xmm3
3466 movdqa %xmm2,%xmm11
3467 movdqu 32(%rdi),%xmm4
3468 movdqa %xmm3,%xmm12
3469 movdqu 48(%rdi),%xmm5
3470 movdqa %xmm4,%xmm13
3471 movdqu 64(%rdi),%xmm6
3472 movdqa %xmm5,%xmm14
3473 movdqu 80(%rdi),%xmm7
3474 movdqa %xmm6,%xmm15
3475 movl _OPENSSL_ia32cap_P+4(%rip),%r9d
3476 cmpq $0x70,%rdx
3477 jbe L$cbc_dec_six_or_seven
3478
3479 andl $71303168,%r9d
3480 subq $0x50,%rdx
3481 cmpl $4194304,%r9d
3482 je L$cbc_dec_loop6_enter
3483 subq $0x20,%rdx
3484 leaq 112(%rcx),%rcx
3485 jmp L$cbc_dec_loop8_enter
3486 .p2align 4
3487 L$cbc_dec_loop8:
3488 movups %xmm9,(%rsi)
3489 leaq 16(%rsi),%rsi
3490 L$cbc_dec_loop8_enter:
3491 movdqu 96(%rdi),%xmm8
3492 pxor %xmm0,%xmm2
3493 movdqu 112(%rdi),%xmm9
3494 pxor %xmm0,%xmm3
3495 movups 16-112(%rcx),%xmm1
3496 pxor %xmm0,%xmm4
3497 movq $-1,%rbp
3498 cmpq $0x70,%rdx
3499 pxor %xmm0,%xmm5
3500 pxor %xmm0,%xmm6
3501 pxor %xmm0,%xmm7
3502 pxor %xmm0,%xmm8
3503
3504 .byte 102,15,56,222,209
3505 pxor %xmm0,%xmm9
3506 movups 32-112(%rcx),%xmm0
3507 .byte 102,15,56,222,217
3508 .byte 102,15,56,222,225
3509 .byte 102,15,56,222,233
3510 .byte 102,15,56,222,241
3511 .byte 102,15,56,222,249
3512 .byte 102,68,15,56,222,193
3513 adcq $0,%rbp
3514 andq $128,%rbp
3515 .byte 102,68,15,56,222,201
3516 addq %rdi,%rbp
3517 movups 48-112(%rcx),%xmm1
3518 .byte 102,15,56,222,208
3519 .byte 102,15,56,222,216
3520 .byte 102,15,56,222,224
3521 .byte 102,15,56,222,232
3522 .byte 102,15,56,222,240
3523 .byte 102,15,56,222,248
3524 .byte 102,68,15,56,222,192
3525 .byte 102,68,15,56,222,200
3526 movups 64-112(%rcx),%xmm0
3527 nop
3528 .byte 102,15,56,222,209
3529 .byte 102,15,56,222,217
3530 .byte 102,15,56,222,225
3531 .byte 102,15,56,222,233
3532 .byte 102,15,56,222,241
3533 .byte 102,15,56,222,249
3534 .byte 102,68,15,56,222,193
3535 .byte 102,68,15,56,222,201
3536 movups 80-112(%rcx),%xmm1
3537 nop
3538 .byte 102,15,56,222,208
3539 .byte 102,15,56,222,216
3540 .byte 102,15,56,222,224
3541 .byte 102,15,56,222,232
3542 .byte 102,15,56,222,240
3543 .byte 102,15,56,222,248
3544 .byte 102,68,15,56,222,192
3545 .byte 102,68,15,56,222,200
3546 movups 96-112(%rcx),%xmm0
3547 nop
3548 .byte 102,15,56,222,209
3549 .byte 102,15,56,222,217
3550 .byte 102,15,56,222,225
3551 .byte 102,15,56,222,233
3552 .byte 102,15,56,222,241
3553 .byte 102,15,56,222,249
3554 .byte 102,68,15,56,222,193
3555 .byte 102,68,15,56,222,201
3556 movups 112-112(%rcx),%xmm1
3557 nop
3558 .byte 102,15,56,222,208
3559 .byte 102,15,56,222,216
3560 .byte 102,15,56,222,224
3561 .byte 102,15,56,222,232
3562 .byte 102,15,56,222,240
3563 .byte 102,15,56,222,248
3564 .byte 102,68,15,56,222,192
3565 .byte 102,68,15,56,222,200
3566 movups 128-112(%rcx),%xmm0
3567 nop
3568 .byte 102,15,56,222,209
3569 .byte 102,15,56,222,217
3570 .byte 102,15,56,222,225
3571 .byte 102,15,56,222,233
3572 .byte 102,15,56,222,241
3573 .byte 102,15,56,222,249
3574 .byte 102,68,15,56,222,193
3575 .byte 102,68,15,56,222,201
3576 movups 144-112(%rcx),%xmm1
3577 cmpl $11,%eax
3578 .byte 102,15,56,222,208
3579 .byte 102,15,56,222,216
3580 .byte 102,15,56,222,224
3581 .byte 102,15,56,222,232
3582 .byte 102,15,56,222,240
3583 .byte 102,15,56,222,248
3584 .byte 102,68,15,56,222,192
3585 .byte 102,68,15,56,222,200
3586 movups 160-112(%rcx),%xmm0
3587 jb L$cbc_dec_done
3588 .byte 102,15,56,222,209
3589 .byte 102,15,56,222,217
3590 .byte 102,15,56,222,225
3591 .byte 102,15,56,222,233
3592 .byte 102,15,56,222,241
3593 .byte 102,15,56,222,249
3594 .byte 102,68,15,56,222,193
3595 .byte 102,68,15,56,222,201
3596 movups 176-112(%rcx),%xmm1
3597 nop
3598 .byte 102,15,56,222,208
3599 .byte 102,15,56,222,216
3600 .byte 102,15,56,222,224
3601 .byte 102,15,56,222,232
3602 .byte 102,15,56,222,240
3603 .byte 102,15,56,222,248
3604 .byte 102,68,15,56,222,192
3605 .byte 102,68,15,56,222,200
3606 movups 192-112(%rcx),%xmm0
3607 je L$cbc_dec_done
3608 .byte 102,15,56,222,209
3609 .byte 102,15,56,222,217
3610 .byte 102,15,56,222,225
3611 .byte 102,15,56,222,233
3612 .byte 102,15,56,222,241
3613 .byte 102,15,56,222,249
3614 .byte 102,68,15,56,222,193
3615 .byte 102,68,15,56,222,201
3616 movups 208-112(%rcx),%xmm1
3617 nop
3618 .byte 102,15,56,222,208
3619 .byte 102,15,56,222,216
3620 .byte 102,15,56,222,224
3621 .byte 102,15,56,222,232
3622 .byte 102,15,56,222,240
3623 .byte 102,15,56,222,248
3624 .byte 102,68,15,56,222,192
3625 .byte 102,68,15,56,222,200
3626 movups 224-112(%rcx),%xmm0
3627 jmp L$cbc_dec_done
3628 .p2align 4
3629 L$cbc_dec_done:
3630 .byte 102,15,56,222,209
3631 .byte 102,15,56,222,217
3632 pxor %xmm0,%xmm10
3633 pxor %xmm0,%xmm11
3634 .byte 102,15,56,222,225
3635 .byte 102,15,56,222,233
3636 pxor %xmm0,%xmm12
3637 pxor %xmm0,%xmm13
3638 .byte 102,15,56,222,241
3639 .byte 102,15,56,222,249
3640 pxor %xmm0,%xmm14
3641 pxor %xmm0,%xmm15
3642 .byte 102,68,15,56,222,193
3643 .byte 102,68,15,56,222,201
3644 movdqu 80(%rdi),%xmm1
3645
3646 .byte 102,65,15,56,223,210
3647 movdqu 96(%rdi),%xmm10
3648 pxor %xmm0,%xmm1
3649 .byte 102,65,15,56,223,219
3650 pxor %xmm0,%xmm10
3651 movdqu 112(%rdi),%xmm0
3652 .byte 102,65,15,56,223,228
3653 leaq 128(%rdi),%rdi
3654 movdqu 0(%rbp),%xmm11
3655 .byte 102,65,15,56,223,237
3656 .byte 102,65,15,56,223,246
3657 movdqu 16(%rbp),%xmm12
3658 movdqu 32(%rbp),%xmm13
3659 .byte 102,65,15,56,223,255
3660 .byte 102,68,15,56,223,193
3661 movdqu 48(%rbp),%xmm14
3662 movdqu 64(%rbp),%xmm15
3663 .byte 102,69,15,56,223,202
3664 movdqa %xmm0,%xmm10
3665 movdqu 80(%rbp),%xmm1
3666 movups -112(%rcx),%xmm0
3667
3668 movups %xmm2,(%rsi)
3669 movdqa %xmm11,%xmm2
3670 movups %xmm3,16(%rsi)
3671 movdqa %xmm12,%xmm3
3672 movups %xmm4,32(%rsi)
3673 movdqa %xmm13,%xmm4
3674 movups %xmm5,48(%rsi)
3675 movdqa %xmm14,%xmm5
3676 movups %xmm6,64(%rsi)
3677 movdqa %xmm15,%xmm6
3678 movups %xmm7,80(%rsi)
3679 movdqa %xmm1,%xmm7
3680 movups %xmm8,96(%rsi)
3681 leaq 112(%rsi),%rsi
3682
3683 subq $0x80,%rdx
3684 ja L$cbc_dec_loop8
3685
3686 movaps %xmm9,%xmm2
3687 leaq -112(%rcx),%rcx
3688 addq $0x70,%rdx
3689 jle L$cbc_dec_clear_tail_collected
3690 movups %xmm9,(%rsi)
3691 leaq 16(%rsi),%rsi
3692 cmpq $0x50,%rdx
3693 jbe L$cbc_dec_tail
3694
3695 movaps %xmm11,%xmm2
3696 L$cbc_dec_six_or_seven:
3697 cmpq $0x60,%rdx
3698 ja L$cbc_dec_seven
3699
3700 movaps %xmm7,%xmm8
3701 call _aesni_decrypt6
3702 pxor %xmm10,%xmm2
3703 movaps %xmm8,%xmm10
3704 pxor %xmm11,%xmm3
3705 movdqu %xmm2,(%rsi)
3706 pxor %xmm12,%xmm4
3707 movdqu %xmm3,16(%rsi)
3708 pxor %xmm3,%xmm3
3709 pxor %xmm13,%xmm5
3710 movdqu %xmm4,32(%rsi)
3711 pxor %xmm4,%xmm4
3712 pxor %xmm14,%xmm6
3713 movdqu %xmm5,48(%rsi)
3714 pxor %xmm5,%xmm5
3715 pxor %xmm15,%xmm7
3716 movdqu %xmm6,64(%rsi)
3717 pxor %xmm6,%xmm6
3718 leaq 80(%rsi),%rsi
3719 movdqa %xmm7,%xmm2
3720 pxor %xmm7,%xmm7
3721 jmp L$cbc_dec_tail_collected
3722
3723 .p2align 4
3724 L$cbc_dec_seven:
3725 movups 96(%rdi),%xmm8
3726 xorps %xmm9,%xmm9
3727 call _aesni_decrypt8
3728 movups 80(%rdi),%xmm9
3729 pxor %xmm10,%xmm2
3730 movups 96(%rdi),%xmm10
3731 pxor %xmm11,%xmm3
3732 movdqu %xmm2,(%rsi)
3733 pxor %xmm12,%xmm4
3734 movdqu %xmm3,16(%rsi)
3735 pxor %xmm3,%xmm3
3736 pxor %xmm13,%xmm5
3737 movdqu %xmm4,32(%rsi)
3738 pxor %xmm4,%xmm4
3739 pxor %xmm14,%xmm6
3740 movdqu %xmm5,48(%rsi)
3741 pxor %xmm5,%xmm5
3742 pxor %xmm15,%xmm7
3743 movdqu %xmm6,64(%rsi)
3744 pxor %xmm6,%xmm6
3745 pxor %xmm9,%xmm8
3746 movdqu %xmm7,80(%rsi)
3747 pxor %xmm7,%xmm7
3748 leaq 96(%rsi),%rsi
3749 movdqa %xmm8,%xmm2
3750 pxor %xmm8,%xmm8
3751 pxor %xmm9,%xmm9
3752 jmp L$cbc_dec_tail_collected
3753
3754 .p2align 4
3755 L$cbc_dec_loop6:
3756 movups %xmm7,(%rsi)
3757 leaq 16(%rsi),%rsi
3758 movdqu 0(%rdi),%xmm2
3759 movdqu 16(%rdi),%xmm3
3760 movdqa %xmm2,%xmm11
3761 movdqu 32(%rdi),%xmm4
3762 movdqa %xmm3,%xmm12
3763 movdqu 48(%rdi),%xmm5
3764 movdqa %xmm4,%xmm13
3765 movdqu 64(%rdi),%xmm6
3766 movdqa %xmm5,%xmm14
3767 movdqu 80(%rdi),%xmm7
3768 movdqa %xmm6,%xmm15
3769 L$cbc_dec_loop6_enter:
3770 leaq 96(%rdi),%rdi
3771 movdqa %xmm7,%xmm8
3772
3773 call _aesni_decrypt6
3774
3775 pxor %xmm10,%xmm2
3776 movdqa %xmm8,%xmm10
3777 pxor %xmm11,%xmm3
3778 movdqu %xmm2,(%rsi)
3779 pxor %xmm12,%xmm4
3780 movdqu %xmm3,16(%rsi)
3781 pxor %xmm13,%xmm5
3782 movdqu %xmm4,32(%rsi)
3783 pxor %xmm14,%xmm6
3784 movq %rbp,%rcx
3785 movdqu %xmm5,48(%rsi)
3786 pxor %xmm15,%xmm7
3787 movl %r10d,%eax
3788 movdqu %xmm6,64(%rsi)
3789 leaq 80(%rsi),%rsi
3790 subq $0x60,%rdx
3791 ja L$cbc_dec_loop6
3792
3793 movdqa %xmm7,%xmm2
3794 addq $0x50,%rdx
3795 jle L$cbc_dec_clear_tail_collected
3796 movups %xmm7,(%rsi)
3797 leaq 16(%rsi),%rsi
3798
3799 L$cbc_dec_tail:
3800 movups (%rdi),%xmm2
3801 subq $0x10,%rdx
3802 jbe L$cbc_dec_one
3803
3804 movups 16(%rdi),%xmm3
3805 movaps %xmm2,%xmm11
3806 subq $0x10,%rdx
3807 jbe L$cbc_dec_two
3808
3809 movups 32(%rdi),%xmm4
3810 movaps %xmm3,%xmm12
3811 subq $0x10,%rdx
3812 jbe L$cbc_dec_three
3813
3814 movups 48(%rdi),%xmm5
3815 movaps %xmm4,%xmm13
3816 subq $0x10,%rdx
3817 jbe L$cbc_dec_four
3818
3819 movups 64(%rdi),%xmm6
3820 movaps %xmm5,%xmm14
3821 movaps %xmm6,%xmm15
3822 xorps %xmm7,%xmm7
3823 call _aesni_decrypt6
3824 pxor %xmm10,%xmm2
3825 movaps %xmm15,%xmm10
3826 pxor %xmm11,%xmm3
3827 movdqu %xmm2,(%rsi)
3828 pxor %xmm12,%xmm4
3829 movdqu %xmm3,16(%rsi)
3830 pxor %xmm3,%xmm3
3831 pxor %xmm13,%xmm5
3832 movdqu %xmm4,32(%rsi)
3833 pxor %xmm4,%xmm4
3834 pxor %xmm14,%xmm6
3835 movdqu %xmm5,48(%rsi)
3836 pxor %xmm5,%xmm5
3837 leaq 64(%rsi),%rsi
3838 movdqa %xmm6,%xmm2
3839 pxor %xmm6,%xmm6
3840 pxor %xmm7,%xmm7
3841 subq $0x10,%rdx
3842 jmp L$cbc_dec_tail_collected
3843
3844 .p2align 4
3845 L$cbc_dec_one:
3846 movaps %xmm2,%xmm11
3847 movups (%rcx),%xmm0
3848 movups 16(%rcx),%xmm1
3849 leaq 32(%rcx),%rcx
3850 xorps %xmm0,%xmm2
3851 L$oop_dec1_17:
3852 .byte 102,15,56,222,209
3853 decl %eax
3854 movups (%rcx),%xmm1
3855 leaq 16(%rcx),%rcx
3856 jnz L$oop_dec1_17
3857 .byte 102,15,56,223,209
3858 xorps %xmm10,%xmm2
3859 movaps %xmm11,%xmm10
3860 jmp L$cbc_dec_tail_collected
3861 .p2align 4
3862 L$cbc_dec_two:
3863 movaps %xmm3,%xmm12
3864 call _aesni_decrypt2
3865 pxor %xmm10,%xmm2
3866 movaps %xmm12,%xmm10
3867 pxor %xmm11,%xmm3
3868 movdqu %xmm2,(%rsi)
3869 movdqa %xmm3,%xmm2
3870 pxor %xmm3,%xmm3
3871 leaq 16(%rsi),%rsi
3872 jmp L$cbc_dec_tail_collected
3873 .p2align 4
3874 L$cbc_dec_three:
3875 movaps %xmm4,%xmm13
3876 call _aesni_decrypt3
3877 pxor %xmm10,%xmm2
3878 movaps %xmm13,%xmm10
3879 pxor %xmm11,%xmm3
3880 movdqu %xmm2,(%rsi)
3881 pxor %xmm12,%xmm4
3882 movdqu %xmm3,16(%rsi)
3883 pxor %xmm3,%xmm3
3884 movdqa %xmm4,%xmm2
3885 pxor %xmm4,%xmm4
3886 leaq 32(%rsi),%rsi
3887 jmp L$cbc_dec_tail_collected
3888 .p2align 4
3889 L$cbc_dec_four:
3890 movaps %xmm5,%xmm14
3891 call _aesni_decrypt4
3892 pxor %xmm10,%xmm2
3893 movaps %xmm14,%xmm10
3894 pxor %xmm11,%xmm3
3895 movdqu %xmm2,(%rsi)
3896 pxor %xmm12,%xmm4
3897 movdqu %xmm3,16(%rsi)
3898 pxor %xmm3,%xmm3
3899 pxor %xmm13,%xmm5
3900 movdqu %xmm4,32(%rsi)
3901 pxor %xmm4,%xmm4
3902 movdqa %xmm5,%xmm2
3903 pxor %xmm5,%xmm5
3904 leaq 48(%rsi),%rsi
3905 jmp L$cbc_dec_tail_collected
3906
3907 .p2align 4
3908 L$cbc_dec_clear_tail_collected:
3909 pxor %xmm3,%xmm3
3910 pxor %xmm4,%xmm4
3911 pxor %xmm5,%xmm5
3912 pxor %xmm6,%xmm6
3913 pxor %xmm7,%xmm7
3914 pxor %xmm8,%xmm8
3915 pxor %xmm9,%xmm9
3916 L$cbc_dec_tail_collected:
3917 movups %xmm10,(%r8)
3918 andq $15,%rdx
3919 jnz L$cbc_dec_tail_partial
3920 movups %xmm2,(%rsi)
3921 pxor %xmm2,%xmm2
3922 jmp L$cbc_dec_ret
3923 .p2align 4
3924 L$cbc_dec_tail_partial:
3925 movaps %xmm2,(%rsp)
3926 pxor %xmm2,%xmm2
3927 movq $16,%rcx
3928 movq %rsi,%rdi
3929 subq %rdx,%rcx
3930 leaq (%rsp),%rsi
3931 .long 0x9066A4F3
3932 movdqa %xmm2,(%rsp)
3933
3934 L$cbc_dec_ret:
3935 xorps %xmm0,%xmm0
3936 pxor %xmm1,%xmm1
3937 movq -8(%r11),%rbp
3938 leaq (%r11),%rsp
3939 L$cbc_ret:
3940 .byte 0xf3,0xc3
3941
3942 .globl _aesni_set_decrypt_key
3943 .private_extern _aesni_set_decrypt_key
3944
3945 .p2align 4
3946 _aesni_set_decrypt_key:
3947 .byte 0x48,0x83,0xEC,0x08
3948 call __aesni_set_encrypt_key
3949 shll $4,%esi
3950 testl %eax,%eax
3951 jnz L$dec_key_ret
3952 leaq 16(%rdx,%rsi,1),%rdi
3953
3954 movups (%rdx),%xmm0
3955 movups (%rdi),%xmm1
3956 movups %xmm0,(%rdi)
3957 movups %xmm1,(%rdx)
3958 leaq 16(%rdx),%rdx
3959 leaq -16(%rdi),%rdi
3960
3961 L$dec_key_inverse:
3962 movups (%rdx),%xmm0
3963 movups (%rdi),%xmm1
3964 .byte 102,15,56,219,192
3965 .byte 102,15,56,219,201
3966 leaq 16(%rdx),%rdx
3967 leaq -16(%rdi),%rdi
3968 movups %xmm0,16(%rdi)
3969 movups %xmm1,-16(%rdx)
3970 cmpq %rdx,%rdi
3971 ja L$dec_key_inverse
3972
3973 movups (%rdx),%xmm0
3974 .byte 102,15,56,219,192
3975 pxor %xmm1,%xmm1
3976 movups %xmm0,(%rdi)
3977 pxor %xmm0,%xmm0
3978 L$dec_key_ret:
3979 addq $8,%rsp
3980 .byte 0xf3,0xc3
3981 L$SEH_end_set_decrypt_key:
3982
3983 .globl _aesni_set_encrypt_key
3984 .private_extern _aesni_set_encrypt_key
3985
3986 .p2align 4
3987 _aesni_set_encrypt_key:
3988 __aesni_set_encrypt_key:
3989 .byte 0x48,0x83,0xEC,0x08
3990 movq $-1,%rax
3991 testq %rdi,%rdi
3992 jz L$enc_key_ret
3993 testq %rdx,%rdx
3994 jz L$enc_key_ret
3995
3996 movl $268437504,%r10d
3997 movups (%rdi),%xmm0
3998 xorps %xmm4,%xmm4
3999 andl _OPENSSL_ia32cap_P+4(%rip),%r10d
4000 leaq 16(%rdx),%rax
4001 cmpl $256,%esi
4002 je L$14rounds
4003 cmpl $192,%esi
4004 je L$12rounds
4005 cmpl $128,%esi
4006 jne L$bad_keybits
4007
4008 L$10rounds:
4009 movl $9,%esi
4010 cmpl $268435456,%r10d
4011 je L$10rounds_alt
4012
4013 movups %xmm0,(%rdx)
4014 .byte 102,15,58,223,200,1
4015 call L$key_expansion_128_cold
4016 .byte 102,15,58,223,200,2
4017 call L$key_expansion_128
4018 .byte 102,15,58,223,200,4
4019 call L$key_expansion_128
4020 .byte 102,15,58,223,200,8
4021 call L$key_expansion_128
4022 .byte 102,15,58,223,200,16
4023 call L$key_expansion_128
4024 .byte 102,15,58,223,200,32
4025 call L$key_expansion_128
4026 .byte 102,15,58,223,200,64
4027 call L$key_expansion_128
4028 .byte 102,15,58,223,200,128
4029 call L$key_expansion_128
4030 .byte 102,15,58,223,200,27
4031 call L$key_expansion_128
4032 .byte 102,15,58,223,200,54
4033 call L$key_expansion_128
4034 movups %xmm0,(%rax)
4035 movl %esi,80(%rax)
4036 xorl %eax,%eax
4037 jmp L$enc_key_ret
4038
4039 .p2align 4
4040 L$10rounds_alt:
4041 movdqa L$key_rotate(%rip),%xmm5
4042 movl $8,%r10d
4043 movdqa L$key_rcon1(%rip),%xmm4
4044 movdqa %xmm0,%xmm2
4045 movdqu %xmm0,(%rdx)
4046 jmp L$oop_key128
4047
4048 .p2align 4
4049 L$oop_key128:
4050 .byte 102,15,56,0,197
4051 .byte 102,15,56,221,196
4052 pslld $1,%xmm4
4053 leaq 16(%rax),%rax
4054
4055 movdqa %xmm2,%xmm3
4056 pslldq $4,%xmm2
4057 pxor %xmm2,%xmm3
4058 pslldq $4,%xmm2
4059 pxor %xmm2,%xmm3
4060 pslldq $4,%xmm2
4061 pxor %xmm3,%xmm2
4062
4063 pxor %xmm2,%xmm0
4064 movdqu %xmm0,-16(%rax)
4065 movdqa %xmm0,%xmm2
4066
4067 decl %r10d
4068 jnz L$oop_key128
4069
4070 movdqa L$key_rcon1b(%rip),%xmm4
4071
4072 .byte 102,15,56,0,197
4073 .byte 102,15,56,221,196
4074 pslld $1,%xmm4
4075
4076 movdqa %xmm2,%xmm3
4077 pslldq $4,%xmm2
4078 pxor %xmm2,%xmm3
4079 pslldq $4,%xmm2
4080 pxor %xmm2,%xmm3
4081 pslldq $4,%xmm2
4082 pxor %xmm3,%xmm2
4083
4084 pxor %xmm2,%xmm0
4085 movdqu %xmm0,(%rax)
4086
4087 movdqa %xmm0,%xmm2
4088 .byte 102,15,56,0,197
4089 .byte 102,15,56,221,196
4090
4091 movdqa %xmm2,%xmm3
4092 pslldq $4,%xmm2
4093 pxor %xmm2,%xmm3
4094 pslldq $4,%xmm2
4095 pxor %xmm2,%xmm3
4096 pslldq $4,%xmm2
4097 pxor %xmm3,%xmm2
4098
4099 pxor %xmm2,%xmm0
4100 movdqu %xmm0,16(%rax)
4101
4102 movl %esi,96(%rax)
4103 xorl %eax,%eax
4104 jmp L$enc_key_ret
4105
4106 .p2align 4
4107 L$12rounds:
4108 movq 16(%rdi),%xmm2
4109 movl $11,%esi
4110 cmpl $268435456,%r10d
4111 je L$12rounds_alt
4112
4113 movups %xmm0,(%rdx)
4114 .byte 102,15,58,223,202,1
4115 call L$key_expansion_192a_cold
4116 .byte 102,15,58,223,202,2
4117 call L$key_expansion_192b
4118 .byte 102,15,58,223,202,4
4119 call L$key_expansion_192a
4120 .byte 102,15,58,223,202,8
4121 call L$key_expansion_192b
4122 .byte 102,15,58,223,202,16
4123 call L$key_expansion_192a
4124 .byte 102,15,58,223,202,32
4125 call L$key_expansion_192b
4126 .byte 102,15,58,223,202,64
4127 call L$key_expansion_192a
4128 .byte 102,15,58,223,202,128
4129 call L$key_expansion_192b
4130 movups %xmm0,(%rax)
4131 movl %esi,48(%rax)
4132 xorq %rax,%rax
4133 jmp L$enc_key_ret
4134
4135 .p2align 4
4136 L$12rounds_alt:
4137 movdqa L$key_rotate192(%rip),%xmm5
4138 movdqa L$key_rcon1(%rip),%xmm4
4139 movl $8,%r10d
4140 movdqu %xmm0,(%rdx)
4141 jmp L$oop_key192
4142
4143 .p2align 4
4144 L$oop_key192:
4145 movq %xmm2,0(%rax)
4146 movdqa %xmm2,%xmm1
4147 .byte 102,15,56,0,213
4148 .byte 102,15,56,221,212
4149 pslld $1,%xmm4
4150 leaq 24(%rax),%rax
4151
4152 movdqa %xmm0,%xmm3
4153 pslldq $4,%xmm0
4154 pxor %xmm0,%xmm3
4155 pslldq $4,%xmm0
4156 pxor %xmm0,%xmm3
4157 pslldq $4,%xmm0
4158 pxor %xmm3,%xmm0
4159
4160 pshufd $0xff,%xmm0,%xmm3
4161 pxor %xmm1,%xmm3
4162 pslldq $4,%xmm1
4163 pxor %xmm1,%xmm3
4164
4165 pxor %xmm2,%xmm0
4166 pxor %xmm3,%xmm2
4167 movdqu %xmm0,-16(%rax)
4168
4169 decl %r10d
4170 jnz L$oop_key192
4171
4172 movl %esi,32(%rax)
4173 xorl %eax,%eax
4174 jmp L$enc_key_ret
4175
4176 .p2align 4
4177 L$14rounds:
4178 movups 16(%rdi),%xmm2
4179 movl $13,%esi
4180 leaq 16(%rax),%rax
4181 cmpl $268435456,%r10d
4182 je L$14rounds_alt
4183
4184 movups %xmm0,(%rdx)
4185 movups %xmm2,16(%rdx)
4186 .byte 102,15,58,223,202,1
4187 call L$key_expansion_256a_cold
4188 .byte 102,15,58,223,200,1
4189 call L$key_expansion_256b
4190 .byte 102,15,58,223,202,2
4191 call L$key_expansion_256a
4192 .byte 102,15,58,223,200,2
4193 call L$key_expansion_256b
4194 .byte 102,15,58,223,202,4
4195 call L$key_expansion_256a
4196 .byte 102,15,58,223,200,4
4197 call L$key_expansion_256b
4198 .byte 102,15,58,223,202,8
4199 call L$key_expansion_256a
4200 .byte 102,15,58,223,200,8
4201 call L$key_expansion_256b
4202 .byte 102,15,58,223,202,16
4203 call L$key_expansion_256a
4204 .byte 102,15,58,223,200,16
4205 call L$key_expansion_256b
4206 .byte 102,15,58,223,202,32
4207 call L$key_expansion_256a
4208 .byte 102,15,58,223,200,32
4209 call L$key_expansion_256b
4210 .byte 102,15,58,223,202,64
4211 call L$key_expansion_256a
4212 movups %xmm0,(%rax)
4213 movl %esi,16(%rax)
4214 xorq %rax,%rax
4215 jmp L$enc_key_ret
4216
4217 .p2align 4
4218 L$14rounds_alt:
4219 movdqa L$key_rotate(%rip),%xmm5
4220 movdqa L$key_rcon1(%rip),%xmm4
4221 movl $7,%r10d
4222 movdqu %xmm0,0(%rdx)
4223 movdqa %xmm2,%xmm1
4224 movdqu %xmm2,16(%rdx)
4225 jmp L$oop_key256
4226
4227 .p2align 4
4228 L$oop_key256:
4229 .byte 102,15,56,0,213
4230 .byte 102,15,56,221,212
4231
4232 movdqa %xmm0,%xmm3
4233 pslldq $4,%xmm0
4234 pxor %xmm0,%xmm3
4235 pslldq $4,%xmm0
4236 pxor %xmm0,%xmm3
4237 pslldq $4,%xmm0
4238 pxor %xmm3,%xmm0
4239 pslld $1,%xmm4
4240
4241 pxor %xmm2,%xmm0
4242 movdqu %xmm0,(%rax)
4243
4244 decl %r10d
4245 jz L$done_key256
4246
4247 pshufd $0xff,%xmm0,%xmm2
4248 pxor %xmm3,%xmm3
4249 .byte 102,15,56,221,211
4250
4251 movdqa %xmm1,%xmm3
4252 pslldq $4,%xmm1
4253 pxor %xmm1,%xmm3
4254 pslldq $4,%xmm1
4255 pxor %xmm1,%xmm3
4256 pslldq $4,%xmm1
4257 pxor %xmm3,%xmm1
4258
4259 pxor %xmm1,%xmm2
4260 movdqu %xmm2,16(%rax)
4261 leaq 32(%rax),%rax
4262 movdqa %xmm2,%xmm1
4263
4264 jmp L$oop_key256
4265
4266 L$done_key256:
4267 movl %esi,16(%rax)
4268 xorl %eax,%eax
4269 jmp L$enc_key_ret
4270
4271 .p2align 4
4272 L$bad_keybits:
4273 movq $-2,%rax
4274 L$enc_key_ret:
4275 pxor %xmm0,%xmm0
4276 pxor %xmm1,%xmm1
4277 pxor %xmm2,%xmm2
4278 pxor %xmm3,%xmm3
4279 pxor %xmm4,%xmm4
4280 pxor %xmm5,%xmm5
4281 addq $8,%rsp
4282 .byte 0xf3,0xc3
4283 L$SEH_end_set_encrypt_key:
4284
4285 .p2align 4
4286 L$key_expansion_128:
4287 movups %xmm0,(%rax)
4288 leaq 16(%rax),%rax
4289 L$key_expansion_128_cold:
4290 shufps $16,%xmm0,%xmm4
4291 xorps %xmm4,%xmm0
4292 shufps $140,%xmm0,%xmm4
4293 xorps %xmm4,%xmm0
4294 shufps $255,%xmm1,%xmm1
4295 xorps %xmm1,%xmm0
4296 .byte 0xf3,0xc3
4297
4298 .p2align 4
4299 L$key_expansion_192a:
4300 movups %xmm0,(%rax)
4301 leaq 16(%rax),%rax
4302 L$key_expansion_192a_cold:
4303 movaps %xmm2,%xmm5
4304 L$key_expansion_192b_warm:
4305 shufps $16,%xmm0,%xmm4
4306 movdqa %xmm2,%xmm3
4307 xorps %xmm4,%xmm0
4308 shufps $140,%xmm0,%xmm4
4309 pslldq $4,%xmm3
4310 xorps %xmm4,%xmm0
4311 pshufd $85,%xmm1,%xmm1
4312 pxor %xmm3,%xmm2
4313 pxor %xmm1,%xmm0
4314 pshufd $255,%xmm0,%xmm3
4315 pxor %xmm3,%xmm2
4316 .byte 0xf3,0xc3
4317
4318 .p2align 4
4319 L$key_expansion_192b:
4320 movaps %xmm0,%xmm3
4321 shufps $68,%xmm0,%xmm5
4322 movups %xmm5,(%rax)
4323 shufps $78,%xmm2,%xmm3
4324 movups %xmm3,16(%rax)
4325 leaq 32(%rax),%rax
4326 jmp L$key_expansion_192b_warm
4327
4328 .p2align 4
4329 L$key_expansion_256a:
4330 movups %xmm2,(%rax)
4331 leaq 16(%rax),%rax
4332 L$key_expansion_256a_cold:
4333 shufps $16,%xmm0,%xmm4
4334 xorps %xmm4,%xmm0
4335 shufps $140,%xmm0,%xmm4
4336 xorps %xmm4,%xmm0
4337 shufps $255,%xmm1,%xmm1
4338 xorps %xmm1,%xmm0
4339 .byte 0xf3,0xc3
4340
4341 .p2align 4
4342 L$key_expansion_256b:
4343 movups %xmm0,(%rax)
4344 leaq 16(%rax),%rax
4345
4346 shufps $16,%xmm2,%xmm4
4347 xorps %xmm4,%xmm2
4348 shufps $140,%xmm2,%xmm4
4349 xorps %xmm4,%xmm2
4350 shufps $170,%xmm1,%xmm1
4351 xorps %xmm1,%xmm2
4352 .byte 0xf3,0xc3
4353
4354
4355 .p2align 6
4356 L$bswap_mask:
4357 .byte 15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0
4358 L$increment32:
4359 .long 6,6,6,0
4360 L$increment64:
4361 .long 1,0,0,0
4362 L$xts_magic:
4363 .long 0x87,0,1,0
4364 L$increment1:
4365 .byte 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1
4366 L$key_rotate:
4367 .long 0x0c0f0e0d,0x0c0f0e0d,0x0c0f0e0d,0x0c0f0e0d
4368 L$key_rotate192:
4369 .long 0x04070605,0x04070605,0x04070605,0x04070605
4370 L$key_rcon1:
4371 .long 1,1,1,1
4372 L$key_rcon1b:
4373 .long 0x1b,0x1b,0x1b,0x1b
4374
4375 .byte 65,69,83,32,102,111,114,32,73,110,116,101,108,32,65,69,83,45,78,73,44,32 ,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101 ,110,115,115,108,46,111,114,103,62,0
4376 .p2align 6
4377 #endif
OLDNEW
« no previous file with comments | « third_party/boringssl/mac-x86_64/crypto/aes/aes-x86_64.S ('k') | third_party/boringssl/mac-x86_64/crypto/aes/bsaes-x86_64.S » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698