Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(463)

Side by Side Diff: third_party/boringssl/mac-x86/crypto/aes/aesni-x86.S

Issue 2354623003: Pull boringssl generated source from boringssl_gen (Closed)
Patch Set: . Created 4 years, 3 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
OLDNEW
(Empty)
1 #if defined(__i386__)
2 .file "src/crypto/aes/asm/aesni-x86.S"
3 .text
4 .globl _aesni_encrypt
5 .private_extern _aesni_encrypt
6 .align 4
7 _aesni_encrypt:
8 L_aesni_encrypt_begin:
9 movl 4(%esp),%eax
10 movl 12(%esp),%edx
11 movups (%eax),%xmm2
12 movl 240(%edx),%ecx
13 movl 8(%esp),%eax
14 movups (%edx),%xmm0
15 movups 16(%edx),%xmm1
16 leal 32(%edx),%edx
17 xorps %xmm0,%xmm2
18 L000enc1_loop_1:
19 .byte 102,15,56,220,209
20 decl %ecx
21 movups (%edx),%xmm1
22 leal 16(%edx),%edx
23 jnz L000enc1_loop_1
24 .byte 102,15,56,221,209
25 pxor %xmm0,%xmm0
26 pxor %xmm1,%xmm1
27 movups %xmm2,(%eax)
28 pxor %xmm2,%xmm2
29 ret
30 .globl _aesni_decrypt
31 .private_extern _aesni_decrypt
32 .align 4
33 _aesni_decrypt:
34 L_aesni_decrypt_begin:
35 movl 4(%esp),%eax
36 movl 12(%esp),%edx
37 movups (%eax),%xmm2
38 movl 240(%edx),%ecx
39 movl 8(%esp),%eax
40 movups (%edx),%xmm0
41 movups 16(%edx),%xmm1
42 leal 32(%edx),%edx
43 xorps %xmm0,%xmm2
44 L001dec1_loop_2:
45 .byte 102,15,56,222,209
46 decl %ecx
47 movups (%edx),%xmm1
48 leal 16(%edx),%edx
49 jnz L001dec1_loop_2
50 .byte 102,15,56,223,209
51 pxor %xmm0,%xmm0
52 pxor %xmm1,%xmm1
53 movups %xmm2,(%eax)
54 pxor %xmm2,%xmm2
55 ret
56 .private_extern __aesni_encrypt2
57 .align 4
58 __aesni_encrypt2:
59 movups (%edx),%xmm0
60 shll $4,%ecx
61 movups 16(%edx),%xmm1
62 xorps %xmm0,%xmm2
63 pxor %xmm0,%xmm3
64 movups 32(%edx),%xmm0
65 leal 32(%edx,%ecx,1),%edx
66 negl %ecx
67 addl $16,%ecx
68 L002enc2_loop:
69 .byte 102,15,56,220,209
70 .byte 102,15,56,220,217
71 movups (%edx,%ecx,1),%xmm1
72 addl $32,%ecx
73 .byte 102,15,56,220,208
74 .byte 102,15,56,220,216
75 movups -16(%edx,%ecx,1),%xmm0
76 jnz L002enc2_loop
77 .byte 102,15,56,220,209
78 .byte 102,15,56,220,217
79 .byte 102,15,56,221,208
80 .byte 102,15,56,221,216
81 ret
82 .private_extern __aesni_decrypt2
83 .align 4
84 __aesni_decrypt2:
85 movups (%edx),%xmm0
86 shll $4,%ecx
87 movups 16(%edx),%xmm1
88 xorps %xmm0,%xmm2
89 pxor %xmm0,%xmm3
90 movups 32(%edx),%xmm0
91 leal 32(%edx,%ecx,1),%edx
92 negl %ecx
93 addl $16,%ecx
94 L003dec2_loop:
95 .byte 102,15,56,222,209
96 .byte 102,15,56,222,217
97 movups (%edx,%ecx,1),%xmm1
98 addl $32,%ecx
99 .byte 102,15,56,222,208
100 .byte 102,15,56,222,216
101 movups -16(%edx,%ecx,1),%xmm0
102 jnz L003dec2_loop
103 .byte 102,15,56,222,209
104 .byte 102,15,56,222,217
105 .byte 102,15,56,223,208
106 .byte 102,15,56,223,216
107 ret
108 .private_extern __aesni_encrypt3
109 .align 4
110 __aesni_encrypt3:
111 movups (%edx),%xmm0
112 shll $4,%ecx
113 movups 16(%edx),%xmm1
114 xorps %xmm0,%xmm2
115 pxor %xmm0,%xmm3
116 pxor %xmm0,%xmm4
117 movups 32(%edx),%xmm0
118 leal 32(%edx,%ecx,1),%edx
119 negl %ecx
120 addl $16,%ecx
121 L004enc3_loop:
122 .byte 102,15,56,220,209
123 .byte 102,15,56,220,217
124 .byte 102,15,56,220,225
125 movups (%edx,%ecx,1),%xmm1
126 addl $32,%ecx
127 .byte 102,15,56,220,208
128 .byte 102,15,56,220,216
129 .byte 102,15,56,220,224
130 movups -16(%edx,%ecx,1),%xmm0
131 jnz L004enc3_loop
132 .byte 102,15,56,220,209
133 .byte 102,15,56,220,217
134 .byte 102,15,56,220,225
135 .byte 102,15,56,221,208
136 .byte 102,15,56,221,216
137 .byte 102,15,56,221,224
138 ret
139 .private_extern __aesni_decrypt3
140 .align 4
141 __aesni_decrypt3:
142 movups (%edx),%xmm0
143 shll $4,%ecx
144 movups 16(%edx),%xmm1
145 xorps %xmm0,%xmm2
146 pxor %xmm0,%xmm3
147 pxor %xmm0,%xmm4
148 movups 32(%edx),%xmm0
149 leal 32(%edx,%ecx,1),%edx
150 negl %ecx
151 addl $16,%ecx
152 L005dec3_loop:
153 .byte 102,15,56,222,209
154 .byte 102,15,56,222,217
155 .byte 102,15,56,222,225
156 movups (%edx,%ecx,1),%xmm1
157 addl $32,%ecx
158 .byte 102,15,56,222,208
159 .byte 102,15,56,222,216
160 .byte 102,15,56,222,224
161 movups -16(%edx,%ecx,1),%xmm0
162 jnz L005dec3_loop
163 .byte 102,15,56,222,209
164 .byte 102,15,56,222,217
165 .byte 102,15,56,222,225
166 .byte 102,15,56,223,208
167 .byte 102,15,56,223,216
168 .byte 102,15,56,223,224
169 ret
170 .private_extern __aesni_encrypt4
171 .align 4
172 __aesni_encrypt4:
173 movups (%edx),%xmm0
174 movups 16(%edx),%xmm1
175 shll $4,%ecx
176 xorps %xmm0,%xmm2
177 pxor %xmm0,%xmm3
178 pxor %xmm0,%xmm4
179 pxor %xmm0,%xmm5
180 movups 32(%edx),%xmm0
181 leal 32(%edx,%ecx,1),%edx
182 negl %ecx
183 .byte 15,31,64,0
184 addl $16,%ecx
185 L006enc4_loop:
186 .byte 102,15,56,220,209
187 .byte 102,15,56,220,217
188 .byte 102,15,56,220,225
189 .byte 102,15,56,220,233
190 movups (%edx,%ecx,1),%xmm1
191 addl $32,%ecx
192 .byte 102,15,56,220,208
193 .byte 102,15,56,220,216
194 .byte 102,15,56,220,224
195 .byte 102,15,56,220,232
196 movups -16(%edx,%ecx,1),%xmm0
197 jnz L006enc4_loop
198 .byte 102,15,56,220,209
199 .byte 102,15,56,220,217
200 .byte 102,15,56,220,225
201 .byte 102,15,56,220,233
202 .byte 102,15,56,221,208
203 .byte 102,15,56,221,216
204 .byte 102,15,56,221,224
205 .byte 102,15,56,221,232
206 ret
207 .private_extern __aesni_decrypt4
208 .align 4
209 __aesni_decrypt4:
210 movups (%edx),%xmm0
211 movups 16(%edx),%xmm1
212 shll $4,%ecx
213 xorps %xmm0,%xmm2
214 pxor %xmm0,%xmm3
215 pxor %xmm0,%xmm4
216 pxor %xmm0,%xmm5
217 movups 32(%edx),%xmm0
218 leal 32(%edx,%ecx,1),%edx
219 negl %ecx
220 .byte 15,31,64,0
221 addl $16,%ecx
222 L007dec4_loop:
223 .byte 102,15,56,222,209
224 .byte 102,15,56,222,217
225 .byte 102,15,56,222,225
226 .byte 102,15,56,222,233
227 movups (%edx,%ecx,1),%xmm1
228 addl $32,%ecx
229 .byte 102,15,56,222,208
230 .byte 102,15,56,222,216
231 .byte 102,15,56,222,224
232 .byte 102,15,56,222,232
233 movups -16(%edx,%ecx,1),%xmm0
234 jnz L007dec4_loop
235 .byte 102,15,56,222,209
236 .byte 102,15,56,222,217
237 .byte 102,15,56,222,225
238 .byte 102,15,56,222,233
239 .byte 102,15,56,223,208
240 .byte 102,15,56,223,216
241 .byte 102,15,56,223,224
242 .byte 102,15,56,223,232
243 ret
244 .private_extern __aesni_encrypt6
245 .align 4
246 __aesni_encrypt6:
247 movups (%edx),%xmm0
248 shll $4,%ecx
249 movups 16(%edx),%xmm1
250 xorps %xmm0,%xmm2
251 pxor %xmm0,%xmm3
252 pxor %xmm0,%xmm4
253 .byte 102,15,56,220,209
254 pxor %xmm0,%xmm5
255 pxor %xmm0,%xmm6
256 .byte 102,15,56,220,217
257 leal 32(%edx,%ecx,1),%edx
258 negl %ecx
259 .byte 102,15,56,220,225
260 pxor %xmm0,%xmm7
261 movups (%edx,%ecx,1),%xmm0
262 addl $16,%ecx
263 jmp L008_aesni_encrypt6_inner
264 .align 4,0x90
265 L009enc6_loop:
266 .byte 102,15,56,220,209
267 .byte 102,15,56,220,217
268 .byte 102,15,56,220,225
269 L008_aesni_encrypt6_inner:
270 .byte 102,15,56,220,233
271 .byte 102,15,56,220,241
272 .byte 102,15,56,220,249
273 L_aesni_encrypt6_enter:
274 movups (%edx,%ecx,1),%xmm1
275 addl $32,%ecx
276 .byte 102,15,56,220,208
277 .byte 102,15,56,220,216
278 .byte 102,15,56,220,224
279 .byte 102,15,56,220,232
280 .byte 102,15,56,220,240
281 .byte 102,15,56,220,248
282 movups -16(%edx,%ecx,1),%xmm0
283 jnz L009enc6_loop
284 .byte 102,15,56,220,209
285 .byte 102,15,56,220,217
286 .byte 102,15,56,220,225
287 .byte 102,15,56,220,233
288 .byte 102,15,56,220,241
289 .byte 102,15,56,220,249
290 .byte 102,15,56,221,208
291 .byte 102,15,56,221,216
292 .byte 102,15,56,221,224
293 .byte 102,15,56,221,232
294 .byte 102,15,56,221,240
295 .byte 102,15,56,221,248
296 ret
297 .private_extern __aesni_decrypt6
298 .align 4
299 __aesni_decrypt6:
300 movups (%edx),%xmm0
301 shll $4,%ecx
302 movups 16(%edx),%xmm1
303 xorps %xmm0,%xmm2
304 pxor %xmm0,%xmm3
305 pxor %xmm0,%xmm4
306 .byte 102,15,56,222,209
307 pxor %xmm0,%xmm5
308 pxor %xmm0,%xmm6
309 .byte 102,15,56,222,217
310 leal 32(%edx,%ecx,1),%edx
311 negl %ecx
312 .byte 102,15,56,222,225
313 pxor %xmm0,%xmm7
314 movups (%edx,%ecx,1),%xmm0
315 addl $16,%ecx
316 jmp L010_aesni_decrypt6_inner
317 .align 4,0x90
318 L011dec6_loop:
319 .byte 102,15,56,222,209
320 .byte 102,15,56,222,217
321 .byte 102,15,56,222,225
322 L010_aesni_decrypt6_inner:
323 .byte 102,15,56,222,233
324 .byte 102,15,56,222,241
325 .byte 102,15,56,222,249
326 L_aesni_decrypt6_enter:
327 movups (%edx,%ecx,1),%xmm1
328 addl $32,%ecx
329 .byte 102,15,56,222,208
330 .byte 102,15,56,222,216
331 .byte 102,15,56,222,224
332 .byte 102,15,56,222,232
333 .byte 102,15,56,222,240
334 .byte 102,15,56,222,248
335 movups -16(%edx,%ecx,1),%xmm0
336 jnz L011dec6_loop
337 .byte 102,15,56,222,209
338 .byte 102,15,56,222,217
339 .byte 102,15,56,222,225
340 .byte 102,15,56,222,233
341 .byte 102,15,56,222,241
342 .byte 102,15,56,222,249
343 .byte 102,15,56,223,208
344 .byte 102,15,56,223,216
345 .byte 102,15,56,223,224
346 .byte 102,15,56,223,232
347 .byte 102,15,56,223,240
348 .byte 102,15,56,223,248
349 ret
350 .globl _aesni_ecb_encrypt
351 .private_extern _aesni_ecb_encrypt
352 .align 4
353 _aesni_ecb_encrypt:
354 L_aesni_ecb_encrypt_begin:
355 pushl %ebp
356 pushl %ebx
357 pushl %esi
358 pushl %edi
359 movl 20(%esp),%esi
360 movl 24(%esp),%edi
361 movl 28(%esp),%eax
362 movl 32(%esp),%edx
363 movl 36(%esp),%ebx
364 andl $-16,%eax
365 jz L012ecb_ret
366 movl 240(%edx),%ecx
367 testl %ebx,%ebx
368 jz L013ecb_decrypt
369 movl %edx,%ebp
370 movl %ecx,%ebx
371 cmpl $96,%eax
372 jb L014ecb_enc_tail
373 movdqu (%esi),%xmm2
374 movdqu 16(%esi),%xmm3
375 movdqu 32(%esi),%xmm4
376 movdqu 48(%esi),%xmm5
377 movdqu 64(%esi),%xmm6
378 movdqu 80(%esi),%xmm7
379 leal 96(%esi),%esi
380 subl $96,%eax
381 jmp L015ecb_enc_loop6_enter
382 .align 4,0x90
383 L016ecb_enc_loop6:
384 movups %xmm2,(%edi)
385 movdqu (%esi),%xmm2
386 movups %xmm3,16(%edi)
387 movdqu 16(%esi),%xmm3
388 movups %xmm4,32(%edi)
389 movdqu 32(%esi),%xmm4
390 movups %xmm5,48(%edi)
391 movdqu 48(%esi),%xmm5
392 movups %xmm6,64(%edi)
393 movdqu 64(%esi),%xmm6
394 movups %xmm7,80(%edi)
395 leal 96(%edi),%edi
396 movdqu 80(%esi),%xmm7
397 leal 96(%esi),%esi
398 L015ecb_enc_loop6_enter:
399 call __aesni_encrypt6
400 movl %ebp,%edx
401 movl %ebx,%ecx
402 subl $96,%eax
403 jnc L016ecb_enc_loop6
404 movups %xmm2,(%edi)
405 movups %xmm3,16(%edi)
406 movups %xmm4,32(%edi)
407 movups %xmm5,48(%edi)
408 movups %xmm6,64(%edi)
409 movups %xmm7,80(%edi)
410 leal 96(%edi),%edi
411 addl $96,%eax
412 jz L012ecb_ret
413 L014ecb_enc_tail:
414 movups (%esi),%xmm2
415 cmpl $32,%eax
416 jb L017ecb_enc_one
417 movups 16(%esi),%xmm3
418 je L018ecb_enc_two
419 movups 32(%esi),%xmm4
420 cmpl $64,%eax
421 jb L019ecb_enc_three
422 movups 48(%esi),%xmm5
423 je L020ecb_enc_four
424 movups 64(%esi),%xmm6
425 xorps %xmm7,%xmm7
426 call __aesni_encrypt6
427 movups %xmm2,(%edi)
428 movups %xmm3,16(%edi)
429 movups %xmm4,32(%edi)
430 movups %xmm5,48(%edi)
431 movups %xmm6,64(%edi)
432 jmp L012ecb_ret
433 .align 4,0x90
434 L017ecb_enc_one:
435 movups (%edx),%xmm0
436 movups 16(%edx),%xmm1
437 leal 32(%edx),%edx
438 xorps %xmm0,%xmm2
439 L021enc1_loop_3:
440 .byte 102,15,56,220,209
441 decl %ecx
442 movups (%edx),%xmm1
443 leal 16(%edx),%edx
444 jnz L021enc1_loop_3
445 .byte 102,15,56,221,209
446 movups %xmm2,(%edi)
447 jmp L012ecb_ret
448 .align 4,0x90
449 L018ecb_enc_two:
450 call __aesni_encrypt2
451 movups %xmm2,(%edi)
452 movups %xmm3,16(%edi)
453 jmp L012ecb_ret
454 .align 4,0x90
455 L019ecb_enc_three:
456 call __aesni_encrypt3
457 movups %xmm2,(%edi)
458 movups %xmm3,16(%edi)
459 movups %xmm4,32(%edi)
460 jmp L012ecb_ret
461 .align 4,0x90
462 L020ecb_enc_four:
463 call __aesni_encrypt4
464 movups %xmm2,(%edi)
465 movups %xmm3,16(%edi)
466 movups %xmm4,32(%edi)
467 movups %xmm5,48(%edi)
468 jmp L012ecb_ret
469 .align 4,0x90
470 L013ecb_decrypt:
471 movl %edx,%ebp
472 movl %ecx,%ebx
473 cmpl $96,%eax
474 jb L022ecb_dec_tail
475 movdqu (%esi),%xmm2
476 movdqu 16(%esi),%xmm3
477 movdqu 32(%esi),%xmm4
478 movdqu 48(%esi),%xmm5
479 movdqu 64(%esi),%xmm6
480 movdqu 80(%esi),%xmm7
481 leal 96(%esi),%esi
482 subl $96,%eax
483 jmp L023ecb_dec_loop6_enter
484 .align 4,0x90
485 L024ecb_dec_loop6:
486 movups %xmm2,(%edi)
487 movdqu (%esi),%xmm2
488 movups %xmm3,16(%edi)
489 movdqu 16(%esi),%xmm3
490 movups %xmm4,32(%edi)
491 movdqu 32(%esi),%xmm4
492 movups %xmm5,48(%edi)
493 movdqu 48(%esi),%xmm5
494 movups %xmm6,64(%edi)
495 movdqu 64(%esi),%xmm6
496 movups %xmm7,80(%edi)
497 leal 96(%edi),%edi
498 movdqu 80(%esi),%xmm7
499 leal 96(%esi),%esi
500 L023ecb_dec_loop6_enter:
501 call __aesni_decrypt6
502 movl %ebp,%edx
503 movl %ebx,%ecx
504 subl $96,%eax
505 jnc L024ecb_dec_loop6
506 movups %xmm2,(%edi)
507 movups %xmm3,16(%edi)
508 movups %xmm4,32(%edi)
509 movups %xmm5,48(%edi)
510 movups %xmm6,64(%edi)
511 movups %xmm7,80(%edi)
512 leal 96(%edi),%edi
513 addl $96,%eax
514 jz L012ecb_ret
515 L022ecb_dec_tail:
516 movups (%esi),%xmm2
517 cmpl $32,%eax
518 jb L025ecb_dec_one
519 movups 16(%esi),%xmm3
520 je L026ecb_dec_two
521 movups 32(%esi),%xmm4
522 cmpl $64,%eax
523 jb L027ecb_dec_three
524 movups 48(%esi),%xmm5
525 je L028ecb_dec_four
526 movups 64(%esi),%xmm6
527 xorps %xmm7,%xmm7
528 call __aesni_decrypt6
529 movups %xmm2,(%edi)
530 movups %xmm3,16(%edi)
531 movups %xmm4,32(%edi)
532 movups %xmm5,48(%edi)
533 movups %xmm6,64(%edi)
534 jmp L012ecb_ret
535 .align 4,0x90
536 L025ecb_dec_one:
537 movups (%edx),%xmm0
538 movups 16(%edx),%xmm1
539 leal 32(%edx),%edx
540 xorps %xmm0,%xmm2
541 L029dec1_loop_4:
542 .byte 102,15,56,222,209
543 decl %ecx
544 movups (%edx),%xmm1
545 leal 16(%edx),%edx
546 jnz L029dec1_loop_4
547 .byte 102,15,56,223,209
548 movups %xmm2,(%edi)
549 jmp L012ecb_ret
550 .align 4,0x90
551 L026ecb_dec_two:
552 call __aesni_decrypt2
553 movups %xmm2,(%edi)
554 movups %xmm3,16(%edi)
555 jmp L012ecb_ret
556 .align 4,0x90
557 L027ecb_dec_three:
558 call __aesni_decrypt3
559 movups %xmm2,(%edi)
560 movups %xmm3,16(%edi)
561 movups %xmm4,32(%edi)
562 jmp L012ecb_ret
563 .align 4,0x90
564 L028ecb_dec_four:
565 call __aesni_decrypt4
566 movups %xmm2,(%edi)
567 movups %xmm3,16(%edi)
568 movups %xmm4,32(%edi)
569 movups %xmm5,48(%edi)
570 L012ecb_ret:
571 pxor %xmm0,%xmm0
572 pxor %xmm1,%xmm1
573 pxor %xmm2,%xmm2
574 pxor %xmm3,%xmm3
575 pxor %xmm4,%xmm4
576 pxor %xmm5,%xmm5
577 pxor %xmm6,%xmm6
578 pxor %xmm7,%xmm7
579 popl %edi
580 popl %esi
581 popl %ebx
582 popl %ebp
583 ret
584 .globl _aesni_ccm64_encrypt_blocks
585 .private_extern _aesni_ccm64_encrypt_blocks
586 .align 4
587 _aesni_ccm64_encrypt_blocks:
588 L_aesni_ccm64_encrypt_blocks_begin:
589 pushl %ebp
590 pushl %ebx
591 pushl %esi
592 pushl %edi
593 movl 20(%esp),%esi
594 movl 24(%esp),%edi
595 movl 28(%esp),%eax
596 movl 32(%esp),%edx
597 movl 36(%esp),%ebx
598 movl 40(%esp),%ecx
599 movl %esp,%ebp
600 subl $60,%esp
601 andl $-16,%esp
602 movl %ebp,48(%esp)
603 movdqu (%ebx),%xmm7
604 movdqu (%ecx),%xmm3
605 movl 240(%edx),%ecx
606 movl $202182159,(%esp)
607 movl $134810123,4(%esp)
608 movl $67438087,8(%esp)
609 movl $66051,12(%esp)
610 movl $1,%ebx
611 xorl %ebp,%ebp
612 movl %ebx,16(%esp)
613 movl %ebp,20(%esp)
614 movl %ebp,24(%esp)
615 movl %ebp,28(%esp)
616 shll $4,%ecx
617 movl $16,%ebx
618 leal (%edx),%ebp
619 movdqa (%esp),%xmm5
620 movdqa %xmm7,%xmm2
621 leal 32(%edx,%ecx,1),%edx
622 subl %ecx,%ebx
623 .byte 102,15,56,0,253
624 L030ccm64_enc_outer:
625 movups (%ebp),%xmm0
626 movl %ebx,%ecx
627 movups (%esi),%xmm6
628 xorps %xmm0,%xmm2
629 movups 16(%ebp),%xmm1
630 xorps %xmm6,%xmm0
631 xorps %xmm0,%xmm3
632 movups 32(%ebp),%xmm0
633 L031ccm64_enc2_loop:
634 .byte 102,15,56,220,209
635 .byte 102,15,56,220,217
636 movups (%edx,%ecx,1),%xmm1
637 addl $32,%ecx
638 .byte 102,15,56,220,208
639 .byte 102,15,56,220,216
640 movups -16(%edx,%ecx,1),%xmm0
641 jnz L031ccm64_enc2_loop
642 .byte 102,15,56,220,209
643 .byte 102,15,56,220,217
644 paddq 16(%esp),%xmm7
645 decl %eax
646 .byte 102,15,56,221,208
647 .byte 102,15,56,221,216
648 leal 16(%esi),%esi
649 xorps %xmm2,%xmm6
650 movdqa %xmm7,%xmm2
651 movups %xmm6,(%edi)
652 .byte 102,15,56,0,213
653 leal 16(%edi),%edi
654 jnz L030ccm64_enc_outer
655 movl 48(%esp),%esp
656 movl 40(%esp),%edi
657 movups %xmm3,(%edi)
658 pxor %xmm0,%xmm0
659 pxor %xmm1,%xmm1
660 pxor %xmm2,%xmm2
661 pxor %xmm3,%xmm3
662 pxor %xmm4,%xmm4
663 pxor %xmm5,%xmm5
664 pxor %xmm6,%xmm6
665 pxor %xmm7,%xmm7
666 popl %edi
667 popl %esi
668 popl %ebx
669 popl %ebp
670 ret
671 .globl _aesni_ccm64_decrypt_blocks
672 .private_extern _aesni_ccm64_decrypt_blocks
673 .align 4
674 _aesni_ccm64_decrypt_blocks:
675 L_aesni_ccm64_decrypt_blocks_begin:
676 pushl %ebp
677 pushl %ebx
678 pushl %esi
679 pushl %edi
680 movl 20(%esp),%esi
681 movl 24(%esp),%edi
682 movl 28(%esp),%eax
683 movl 32(%esp),%edx
684 movl 36(%esp),%ebx
685 movl 40(%esp),%ecx
686 movl %esp,%ebp
687 subl $60,%esp
688 andl $-16,%esp
689 movl %ebp,48(%esp)
690 movdqu (%ebx),%xmm7
691 movdqu (%ecx),%xmm3
692 movl 240(%edx),%ecx
693 movl $202182159,(%esp)
694 movl $134810123,4(%esp)
695 movl $67438087,8(%esp)
696 movl $66051,12(%esp)
697 movl $1,%ebx
698 xorl %ebp,%ebp
699 movl %ebx,16(%esp)
700 movl %ebp,20(%esp)
701 movl %ebp,24(%esp)
702 movl %ebp,28(%esp)
703 movdqa (%esp),%xmm5
704 movdqa %xmm7,%xmm2
705 movl %edx,%ebp
706 movl %ecx,%ebx
707 .byte 102,15,56,0,253
708 movups (%edx),%xmm0
709 movups 16(%edx),%xmm1
710 leal 32(%edx),%edx
711 xorps %xmm0,%xmm2
712 L032enc1_loop_5:
713 .byte 102,15,56,220,209
714 decl %ecx
715 movups (%edx),%xmm1
716 leal 16(%edx),%edx
717 jnz L032enc1_loop_5
718 .byte 102,15,56,221,209
719 shll $4,%ebx
720 movl $16,%ecx
721 movups (%esi),%xmm6
722 paddq 16(%esp),%xmm7
723 leal 16(%esi),%esi
724 subl %ebx,%ecx
725 leal 32(%ebp,%ebx,1),%edx
726 movl %ecx,%ebx
727 jmp L033ccm64_dec_outer
728 .align 4,0x90
729 L033ccm64_dec_outer:
730 xorps %xmm2,%xmm6
731 movdqa %xmm7,%xmm2
732 movups %xmm6,(%edi)
733 leal 16(%edi),%edi
734 .byte 102,15,56,0,213
735 subl $1,%eax
736 jz L034ccm64_dec_break
737 movups (%ebp),%xmm0
738 movl %ebx,%ecx
739 movups 16(%ebp),%xmm1
740 xorps %xmm0,%xmm6
741 xorps %xmm0,%xmm2
742 xorps %xmm6,%xmm3
743 movups 32(%ebp),%xmm0
744 L035ccm64_dec2_loop:
745 .byte 102,15,56,220,209
746 .byte 102,15,56,220,217
747 movups (%edx,%ecx,1),%xmm1
748 addl $32,%ecx
749 .byte 102,15,56,220,208
750 .byte 102,15,56,220,216
751 movups -16(%edx,%ecx,1),%xmm0
752 jnz L035ccm64_dec2_loop
753 movups (%esi),%xmm6
754 paddq 16(%esp),%xmm7
755 .byte 102,15,56,220,209
756 .byte 102,15,56,220,217
757 .byte 102,15,56,221,208
758 .byte 102,15,56,221,216
759 leal 16(%esi),%esi
760 jmp L033ccm64_dec_outer
761 .align 4,0x90
762 L034ccm64_dec_break:
763 movl 240(%ebp),%ecx
764 movl %ebp,%edx
765 movups (%edx),%xmm0
766 movups 16(%edx),%xmm1
767 xorps %xmm0,%xmm6
768 leal 32(%edx),%edx
769 xorps %xmm6,%xmm3
770 L036enc1_loop_6:
771 .byte 102,15,56,220,217
772 decl %ecx
773 movups (%edx),%xmm1
774 leal 16(%edx),%edx
775 jnz L036enc1_loop_6
776 .byte 102,15,56,221,217
777 movl 48(%esp),%esp
778 movl 40(%esp),%edi
779 movups %xmm3,(%edi)
780 pxor %xmm0,%xmm0
781 pxor %xmm1,%xmm1
782 pxor %xmm2,%xmm2
783 pxor %xmm3,%xmm3
784 pxor %xmm4,%xmm4
785 pxor %xmm5,%xmm5
786 pxor %xmm6,%xmm6
787 pxor %xmm7,%xmm7
788 popl %edi
789 popl %esi
790 popl %ebx
791 popl %ebp
792 ret
793 .globl _aesni_ctr32_encrypt_blocks
794 .private_extern _aesni_ctr32_encrypt_blocks
795 .align 4
796 _aesni_ctr32_encrypt_blocks:
797 L_aesni_ctr32_encrypt_blocks_begin:
798 pushl %ebp
799 pushl %ebx
800 pushl %esi
801 pushl %edi
802 movl 20(%esp),%esi
803 movl 24(%esp),%edi
804 movl 28(%esp),%eax
805 movl 32(%esp),%edx
806 movl 36(%esp),%ebx
807 movl %esp,%ebp
808 subl $88,%esp
809 andl $-16,%esp
810 movl %ebp,80(%esp)
811 cmpl $1,%eax
812 je L037ctr32_one_shortcut
813 movdqu (%ebx),%xmm7
814 movl $202182159,(%esp)
815 movl $134810123,4(%esp)
816 movl $67438087,8(%esp)
817 movl $66051,12(%esp)
818 movl $6,%ecx
819 xorl %ebp,%ebp
820 movl %ecx,16(%esp)
821 movl %ecx,20(%esp)
822 movl %ecx,24(%esp)
823 movl %ebp,28(%esp)
824 .byte 102,15,58,22,251,3
825 .byte 102,15,58,34,253,3
826 movl 240(%edx),%ecx
827 bswap %ebx
828 pxor %xmm0,%xmm0
829 pxor %xmm1,%xmm1
830 movdqa (%esp),%xmm2
831 .byte 102,15,58,34,195,0
832 leal 3(%ebx),%ebp
833 .byte 102,15,58,34,205,0
834 incl %ebx
835 .byte 102,15,58,34,195,1
836 incl %ebp
837 .byte 102,15,58,34,205,1
838 incl %ebx
839 .byte 102,15,58,34,195,2
840 incl %ebp
841 .byte 102,15,58,34,205,2
842 movdqa %xmm0,48(%esp)
843 .byte 102,15,56,0,194
844 movdqu (%edx),%xmm6
845 movdqa %xmm1,64(%esp)
846 .byte 102,15,56,0,202
847 pshufd $192,%xmm0,%xmm2
848 pshufd $128,%xmm0,%xmm3
849 cmpl $6,%eax
850 jb L038ctr32_tail
851 pxor %xmm6,%xmm7
852 shll $4,%ecx
853 movl $16,%ebx
854 movdqa %xmm7,32(%esp)
855 movl %edx,%ebp
856 subl %ecx,%ebx
857 leal 32(%edx,%ecx,1),%edx
858 subl $6,%eax
859 jmp L039ctr32_loop6
860 .align 4,0x90
861 L039ctr32_loop6:
862 pshufd $64,%xmm0,%xmm4
863 movdqa 32(%esp),%xmm0
864 pshufd $192,%xmm1,%xmm5
865 pxor %xmm0,%xmm2
866 pshufd $128,%xmm1,%xmm6
867 pxor %xmm0,%xmm3
868 pshufd $64,%xmm1,%xmm7
869 movups 16(%ebp),%xmm1
870 pxor %xmm0,%xmm4
871 pxor %xmm0,%xmm5
872 .byte 102,15,56,220,209
873 pxor %xmm0,%xmm6
874 pxor %xmm0,%xmm7
875 .byte 102,15,56,220,217
876 movups 32(%ebp),%xmm0
877 movl %ebx,%ecx
878 .byte 102,15,56,220,225
879 .byte 102,15,56,220,233
880 .byte 102,15,56,220,241
881 .byte 102,15,56,220,249
882 call L_aesni_encrypt6_enter
883 movups (%esi),%xmm1
884 movups 16(%esi),%xmm0
885 xorps %xmm1,%xmm2
886 movups 32(%esi),%xmm1
887 xorps %xmm0,%xmm3
888 movups %xmm2,(%edi)
889 movdqa 16(%esp),%xmm0
890 xorps %xmm1,%xmm4
891 movdqa 64(%esp),%xmm1
892 movups %xmm3,16(%edi)
893 movups %xmm4,32(%edi)
894 paddd %xmm0,%xmm1
895 paddd 48(%esp),%xmm0
896 movdqa (%esp),%xmm2
897 movups 48(%esi),%xmm3
898 movups 64(%esi),%xmm4
899 xorps %xmm3,%xmm5
900 movups 80(%esi),%xmm3
901 leal 96(%esi),%esi
902 movdqa %xmm0,48(%esp)
903 .byte 102,15,56,0,194
904 xorps %xmm4,%xmm6
905 movups %xmm5,48(%edi)
906 xorps %xmm3,%xmm7
907 movdqa %xmm1,64(%esp)
908 .byte 102,15,56,0,202
909 movups %xmm6,64(%edi)
910 pshufd $192,%xmm0,%xmm2
911 movups %xmm7,80(%edi)
912 leal 96(%edi),%edi
913 pshufd $128,%xmm0,%xmm3
914 subl $6,%eax
915 jnc L039ctr32_loop6
916 addl $6,%eax
917 jz L040ctr32_ret
918 movdqu (%ebp),%xmm7
919 movl %ebp,%edx
920 pxor 32(%esp),%xmm7
921 movl 240(%ebp),%ecx
922 L038ctr32_tail:
923 por %xmm7,%xmm2
924 cmpl $2,%eax
925 jb L041ctr32_one
926 pshufd $64,%xmm0,%xmm4
927 por %xmm7,%xmm3
928 je L042ctr32_two
929 pshufd $192,%xmm1,%xmm5
930 por %xmm7,%xmm4
931 cmpl $4,%eax
932 jb L043ctr32_three
933 pshufd $128,%xmm1,%xmm6
934 por %xmm7,%xmm5
935 je L044ctr32_four
936 por %xmm7,%xmm6
937 call __aesni_encrypt6
938 movups (%esi),%xmm1
939 movups 16(%esi),%xmm0
940 xorps %xmm1,%xmm2
941 movups 32(%esi),%xmm1
942 xorps %xmm0,%xmm3
943 movups 48(%esi),%xmm0
944 xorps %xmm1,%xmm4
945 movups 64(%esi),%xmm1
946 xorps %xmm0,%xmm5
947 movups %xmm2,(%edi)
948 xorps %xmm1,%xmm6
949 movups %xmm3,16(%edi)
950 movups %xmm4,32(%edi)
951 movups %xmm5,48(%edi)
952 movups %xmm6,64(%edi)
953 jmp L040ctr32_ret
954 .align 4,0x90
955 L037ctr32_one_shortcut:
956 movups (%ebx),%xmm2
957 movl 240(%edx),%ecx
958 L041ctr32_one:
959 movups (%edx),%xmm0
960 movups 16(%edx),%xmm1
961 leal 32(%edx),%edx
962 xorps %xmm0,%xmm2
963 L045enc1_loop_7:
964 .byte 102,15,56,220,209
965 decl %ecx
966 movups (%edx),%xmm1
967 leal 16(%edx),%edx
968 jnz L045enc1_loop_7
969 .byte 102,15,56,221,209
970 movups (%esi),%xmm6
971 xorps %xmm2,%xmm6
972 movups %xmm6,(%edi)
973 jmp L040ctr32_ret
974 .align 4,0x90
975 L042ctr32_two:
976 call __aesni_encrypt2
977 movups (%esi),%xmm5
978 movups 16(%esi),%xmm6
979 xorps %xmm5,%xmm2
980 xorps %xmm6,%xmm3
981 movups %xmm2,(%edi)
982 movups %xmm3,16(%edi)
983 jmp L040ctr32_ret
984 .align 4,0x90
985 L043ctr32_three:
986 call __aesni_encrypt3
987 movups (%esi),%xmm5
988 movups 16(%esi),%xmm6
989 xorps %xmm5,%xmm2
990 movups 32(%esi),%xmm7
991 xorps %xmm6,%xmm3
992 movups %xmm2,(%edi)
993 xorps %xmm7,%xmm4
994 movups %xmm3,16(%edi)
995 movups %xmm4,32(%edi)
996 jmp L040ctr32_ret
997 .align 4,0x90
998 L044ctr32_four:
999 call __aesni_encrypt4
1000 movups (%esi),%xmm6
1001 movups 16(%esi),%xmm7
1002 movups 32(%esi),%xmm1
1003 xorps %xmm6,%xmm2
1004 movups 48(%esi),%xmm0
1005 xorps %xmm7,%xmm3
1006 movups %xmm2,(%edi)
1007 xorps %xmm1,%xmm4
1008 movups %xmm3,16(%edi)
1009 xorps %xmm0,%xmm5
1010 movups %xmm4,32(%edi)
1011 movups %xmm5,48(%edi)
1012 L040ctr32_ret:
1013 pxor %xmm0,%xmm0
1014 pxor %xmm1,%xmm1
1015 pxor %xmm2,%xmm2
1016 pxor %xmm3,%xmm3
1017 pxor %xmm4,%xmm4
1018 movdqa %xmm0,32(%esp)
1019 pxor %xmm5,%xmm5
1020 movdqa %xmm0,48(%esp)
1021 pxor %xmm6,%xmm6
1022 movdqa %xmm0,64(%esp)
1023 pxor %xmm7,%xmm7
1024 movl 80(%esp),%esp
1025 popl %edi
1026 popl %esi
1027 popl %ebx
1028 popl %ebp
1029 ret
1030 .globl _aesni_xts_encrypt
1031 .private_extern _aesni_xts_encrypt
1032 .align 4
1033 _aesni_xts_encrypt:
1034 L_aesni_xts_encrypt_begin:
1035 pushl %ebp
1036 pushl %ebx
1037 pushl %esi
1038 pushl %edi
1039 movl 36(%esp),%edx
1040 movl 40(%esp),%esi
1041 movl 240(%edx),%ecx
1042 movups (%esi),%xmm2
1043 movups (%edx),%xmm0
1044 movups 16(%edx),%xmm1
1045 leal 32(%edx),%edx
1046 xorps %xmm0,%xmm2
1047 L046enc1_loop_8:
1048 .byte 102,15,56,220,209
1049 decl %ecx
1050 movups (%edx),%xmm1
1051 leal 16(%edx),%edx
1052 jnz L046enc1_loop_8
1053 .byte 102,15,56,221,209
1054 movl 20(%esp),%esi
1055 movl 24(%esp),%edi
1056 movl 28(%esp),%eax
1057 movl 32(%esp),%edx
1058 movl %esp,%ebp
1059 subl $120,%esp
1060 movl 240(%edx),%ecx
1061 andl $-16,%esp
1062 movl $135,96(%esp)
1063 movl $0,100(%esp)
1064 movl $1,104(%esp)
1065 movl $0,108(%esp)
1066 movl %eax,112(%esp)
1067 movl %ebp,116(%esp)
1068 movdqa %xmm2,%xmm1
1069 pxor %xmm0,%xmm0
1070 movdqa 96(%esp),%xmm3
1071 pcmpgtd %xmm1,%xmm0
1072 andl $-16,%eax
1073 movl %edx,%ebp
1074 movl %ecx,%ebx
1075 subl $96,%eax
1076 jc L047xts_enc_short
1077 shll $4,%ecx
1078 movl $16,%ebx
1079 subl %ecx,%ebx
1080 leal 32(%edx,%ecx,1),%edx
1081 jmp L048xts_enc_loop6
1082 .align 4,0x90
1083 L048xts_enc_loop6:
1084 pshufd $19,%xmm0,%xmm2
1085 pxor %xmm0,%xmm0
1086 movdqa %xmm1,(%esp)
1087 paddq %xmm1,%xmm1
1088 pand %xmm3,%xmm2
1089 pcmpgtd %xmm1,%xmm0
1090 pxor %xmm2,%xmm1
1091 pshufd $19,%xmm0,%xmm2
1092 pxor %xmm0,%xmm0
1093 movdqa %xmm1,16(%esp)
1094 paddq %xmm1,%xmm1
1095 pand %xmm3,%xmm2
1096 pcmpgtd %xmm1,%xmm0
1097 pxor %xmm2,%xmm1
1098 pshufd $19,%xmm0,%xmm2
1099 pxor %xmm0,%xmm0
1100 movdqa %xmm1,32(%esp)
1101 paddq %xmm1,%xmm1
1102 pand %xmm3,%xmm2
1103 pcmpgtd %xmm1,%xmm0
1104 pxor %xmm2,%xmm1
1105 pshufd $19,%xmm0,%xmm2
1106 pxor %xmm0,%xmm0
1107 movdqa %xmm1,48(%esp)
1108 paddq %xmm1,%xmm1
1109 pand %xmm3,%xmm2
1110 pcmpgtd %xmm1,%xmm0
1111 pxor %xmm2,%xmm1
1112 pshufd $19,%xmm0,%xmm7
1113 movdqa %xmm1,64(%esp)
1114 paddq %xmm1,%xmm1
1115 movups (%ebp),%xmm0
1116 pand %xmm3,%xmm7
1117 movups (%esi),%xmm2
1118 pxor %xmm1,%xmm7
1119 movl %ebx,%ecx
1120 movdqu 16(%esi),%xmm3
1121 xorps %xmm0,%xmm2
1122 movdqu 32(%esi),%xmm4
1123 pxor %xmm0,%xmm3
1124 movdqu 48(%esi),%xmm5
1125 pxor %xmm0,%xmm4
1126 movdqu 64(%esi),%xmm6
1127 pxor %xmm0,%xmm5
1128 movdqu 80(%esi),%xmm1
1129 pxor %xmm0,%xmm6
1130 leal 96(%esi),%esi
1131 pxor (%esp),%xmm2
1132 movdqa %xmm7,80(%esp)
1133 pxor %xmm1,%xmm7
1134 movups 16(%ebp),%xmm1
1135 pxor 16(%esp),%xmm3
1136 pxor 32(%esp),%xmm4
1137 .byte 102,15,56,220,209
1138 pxor 48(%esp),%xmm5
1139 pxor 64(%esp),%xmm6
1140 .byte 102,15,56,220,217
1141 pxor %xmm0,%xmm7
1142 movups 32(%ebp),%xmm0
1143 .byte 102,15,56,220,225
1144 .byte 102,15,56,220,233
1145 .byte 102,15,56,220,241
1146 .byte 102,15,56,220,249
1147 call L_aesni_encrypt6_enter
1148 movdqa 80(%esp),%xmm1
1149 pxor %xmm0,%xmm0
1150 xorps (%esp),%xmm2
1151 pcmpgtd %xmm1,%xmm0
1152 xorps 16(%esp),%xmm3
1153 movups %xmm2,(%edi)
1154 xorps 32(%esp),%xmm4
1155 movups %xmm3,16(%edi)
1156 xorps 48(%esp),%xmm5
1157 movups %xmm4,32(%edi)
1158 xorps 64(%esp),%xmm6
1159 movups %xmm5,48(%edi)
1160 xorps %xmm1,%xmm7
1161 movups %xmm6,64(%edi)
1162 pshufd $19,%xmm0,%xmm2
1163 movups %xmm7,80(%edi)
1164 leal 96(%edi),%edi
1165 movdqa 96(%esp),%xmm3
1166 pxor %xmm0,%xmm0
1167 paddq %xmm1,%xmm1
1168 pand %xmm3,%xmm2
1169 pcmpgtd %xmm1,%xmm0
1170 pxor %xmm2,%xmm1
1171 subl $96,%eax
1172 jnc L048xts_enc_loop6
1173 movl 240(%ebp),%ecx
1174 movl %ebp,%edx
1175 movl %ecx,%ebx
1176 L047xts_enc_short:
1177 addl $96,%eax
1178 jz L049xts_enc_done6x
1179 movdqa %xmm1,%xmm5
1180 cmpl $32,%eax
1181 jb L050xts_enc_one
1182 pshufd $19,%xmm0,%xmm2
1183 pxor %xmm0,%xmm0
1184 paddq %xmm1,%xmm1
1185 pand %xmm3,%xmm2
1186 pcmpgtd %xmm1,%xmm0
1187 pxor %xmm2,%xmm1
1188 je L051xts_enc_two
1189 pshufd $19,%xmm0,%xmm2
1190 pxor %xmm0,%xmm0
1191 movdqa %xmm1,%xmm6
1192 paddq %xmm1,%xmm1
1193 pand %xmm3,%xmm2
1194 pcmpgtd %xmm1,%xmm0
1195 pxor %xmm2,%xmm1
1196 cmpl $64,%eax
1197 jb L052xts_enc_three
1198 pshufd $19,%xmm0,%xmm2
1199 pxor %xmm0,%xmm0
1200 movdqa %xmm1,%xmm7
1201 paddq %xmm1,%xmm1
1202 pand %xmm3,%xmm2
1203 pcmpgtd %xmm1,%xmm0
1204 pxor %xmm2,%xmm1
1205 movdqa %xmm5,(%esp)
1206 movdqa %xmm6,16(%esp)
1207 je L053xts_enc_four
1208 movdqa %xmm7,32(%esp)
1209 pshufd $19,%xmm0,%xmm7
1210 movdqa %xmm1,48(%esp)
1211 paddq %xmm1,%xmm1
1212 pand %xmm3,%xmm7
1213 pxor %xmm1,%xmm7
1214 movdqu (%esi),%xmm2
1215 movdqu 16(%esi),%xmm3
1216 movdqu 32(%esi),%xmm4
1217 pxor (%esp),%xmm2
1218 movdqu 48(%esi),%xmm5
1219 pxor 16(%esp),%xmm3
1220 movdqu 64(%esi),%xmm6
1221 pxor 32(%esp),%xmm4
1222 leal 80(%esi),%esi
1223 pxor 48(%esp),%xmm5
1224 movdqa %xmm7,64(%esp)
1225 pxor %xmm7,%xmm6
1226 call __aesni_encrypt6
1227 movaps 64(%esp),%xmm1
1228 xorps (%esp),%xmm2
1229 xorps 16(%esp),%xmm3
1230 xorps 32(%esp),%xmm4
1231 movups %xmm2,(%edi)
1232 xorps 48(%esp),%xmm5
1233 movups %xmm3,16(%edi)
1234 xorps %xmm1,%xmm6
1235 movups %xmm4,32(%edi)
1236 movups %xmm5,48(%edi)
1237 movups %xmm6,64(%edi)
1238 leal 80(%edi),%edi
1239 jmp L054xts_enc_done
1240 .align 4,0x90
1241 L050xts_enc_one:
1242 movups (%esi),%xmm2
1243 leal 16(%esi),%esi
1244 xorps %xmm5,%xmm2
1245 movups (%edx),%xmm0
1246 movups 16(%edx),%xmm1
1247 leal 32(%edx),%edx
1248 xorps %xmm0,%xmm2
1249 L055enc1_loop_9:
1250 .byte 102,15,56,220,209
1251 decl %ecx
1252 movups (%edx),%xmm1
1253 leal 16(%edx),%edx
1254 jnz L055enc1_loop_9
1255 .byte 102,15,56,221,209
1256 xorps %xmm5,%xmm2
1257 movups %xmm2,(%edi)
1258 leal 16(%edi),%edi
1259 movdqa %xmm5,%xmm1
1260 jmp L054xts_enc_done
1261 .align 4,0x90
1262 L051xts_enc_two:
1263 movaps %xmm1,%xmm6
1264 movups (%esi),%xmm2
1265 movups 16(%esi),%xmm3
1266 leal 32(%esi),%esi
1267 xorps %xmm5,%xmm2
1268 xorps %xmm6,%xmm3
1269 call __aesni_encrypt2
1270 xorps %xmm5,%xmm2
1271 xorps %xmm6,%xmm3
1272 movups %xmm2,(%edi)
1273 movups %xmm3,16(%edi)
1274 leal 32(%edi),%edi
1275 movdqa %xmm6,%xmm1
1276 jmp L054xts_enc_done
1277 .align 4,0x90
1278 L052xts_enc_three:
1279 movaps %xmm1,%xmm7
1280 movups (%esi),%xmm2
1281 movups 16(%esi),%xmm3
1282 movups 32(%esi),%xmm4
1283 leal 48(%esi),%esi
1284 xorps %xmm5,%xmm2
1285 xorps %xmm6,%xmm3
1286 xorps %xmm7,%xmm4
1287 call __aesni_encrypt3
1288 xorps %xmm5,%xmm2
1289 xorps %xmm6,%xmm3
1290 xorps %xmm7,%xmm4
1291 movups %xmm2,(%edi)
1292 movups %xmm3,16(%edi)
1293 movups %xmm4,32(%edi)
1294 leal 48(%edi),%edi
1295 movdqa %xmm7,%xmm1
1296 jmp L054xts_enc_done
1297 .align 4,0x90
1298 L053xts_enc_four:
1299 movaps %xmm1,%xmm6
1300 movups (%esi),%xmm2
1301 movups 16(%esi),%xmm3
1302 movups 32(%esi),%xmm4
1303 xorps (%esp),%xmm2
1304 movups 48(%esi),%xmm5
1305 leal 64(%esi),%esi
1306 xorps 16(%esp),%xmm3
1307 xorps %xmm7,%xmm4
1308 xorps %xmm6,%xmm5
1309 call __aesni_encrypt4
1310 xorps (%esp),%xmm2
1311 xorps 16(%esp),%xmm3
1312 xorps %xmm7,%xmm4
1313 movups %xmm2,(%edi)
1314 xorps %xmm6,%xmm5
1315 movups %xmm3,16(%edi)
1316 movups %xmm4,32(%edi)
1317 movups %xmm5,48(%edi)
1318 leal 64(%edi),%edi
1319 movdqa %xmm6,%xmm1
1320 jmp L054xts_enc_done
1321 .align 4,0x90
1322 L049xts_enc_done6x:
1323 movl 112(%esp),%eax
1324 andl $15,%eax
1325 jz L056xts_enc_ret
1326 movdqa %xmm1,%xmm5
1327 movl %eax,112(%esp)
1328 jmp L057xts_enc_steal
1329 .align 4,0x90
1330 L054xts_enc_done:
1331 movl 112(%esp),%eax
1332 pxor %xmm0,%xmm0
1333 andl $15,%eax
1334 jz L056xts_enc_ret
1335 pcmpgtd %xmm1,%xmm0
1336 movl %eax,112(%esp)
1337 pshufd $19,%xmm0,%xmm5
1338 paddq %xmm1,%xmm1
1339 pand 96(%esp),%xmm5
1340 pxor %xmm1,%xmm5
1341 L057xts_enc_steal:
1342 movzbl (%esi),%ecx
1343 movzbl -16(%edi),%edx
1344 leal 1(%esi),%esi
1345 movb %cl,-16(%edi)
1346 movb %dl,(%edi)
1347 leal 1(%edi),%edi
1348 subl $1,%eax
1349 jnz L057xts_enc_steal
1350 subl 112(%esp),%edi
1351 movl %ebp,%edx
1352 movl %ebx,%ecx
1353 movups -16(%edi),%xmm2
1354 xorps %xmm5,%xmm2
1355 movups (%edx),%xmm0
1356 movups 16(%edx),%xmm1
1357 leal 32(%edx),%edx
1358 xorps %xmm0,%xmm2
1359 L058enc1_loop_10:
1360 .byte 102,15,56,220,209
1361 decl %ecx
1362 movups (%edx),%xmm1
1363 leal 16(%edx),%edx
1364 jnz L058enc1_loop_10
1365 .byte 102,15,56,221,209
1366 xorps %xmm5,%xmm2
1367 movups %xmm2,-16(%edi)
1368 L056xts_enc_ret:
1369 pxor %xmm0,%xmm0
1370 pxor %xmm1,%xmm1
1371 pxor %xmm2,%xmm2
1372 movdqa %xmm0,(%esp)
1373 pxor %xmm3,%xmm3
1374 movdqa %xmm0,16(%esp)
1375 pxor %xmm4,%xmm4
1376 movdqa %xmm0,32(%esp)
1377 pxor %xmm5,%xmm5
1378 movdqa %xmm0,48(%esp)
1379 pxor %xmm6,%xmm6
1380 movdqa %xmm0,64(%esp)
1381 pxor %xmm7,%xmm7
1382 movdqa %xmm0,80(%esp)
1383 movl 116(%esp),%esp
1384 popl %edi
1385 popl %esi
1386 popl %ebx
1387 popl %ebp
1388 ret
1389 .globl _aesni_xts_decrypt
1390 .private_extern _aesni_xts_decrypt
1391 .align 4
1392 _aesni_xts_decrypt:
1393 L_aesni_xts_decrypt_begin:
1394 pushl %ebp
1395 pushl %ebx
1396 pushl %esi
1397 pushl %edi
1398 movl 36(%esp),%edx
1399 movl 40(%esp),%esi
1400 movl 240(%edx),%ecx
1401 movups (%esi),%xmm2
1402 movups (%edx),%xmm0
1403 movups 16(%edx),%xmm1
1404 leal 32(%edx),%edx
1405 xorps %xmm0,%xmm2
1406 L059enc1_loop_11:
1407 .byte 102,15,56,220,209
1408 decl %ecx
1409 movups (%edx),%xmm1
1410 leal 16(%edx),%edx
1411 jnz L059enc1_loop_11
1412 .byte 102,15,56,221,209
1413 movl 20(%esp),%esi
1414 movl 24(%esp),%edi
1415 movl 28(%esp),%eax
1416 movl 32(%esp),%edx
1417 movl %esp,%ebp
1418 subl $120,%esp
1419 andl $-16,%esp
1420 xorl %ebx,%ebx
1421 testl $15,%eax
1422 setnz %bl
1423 shll $4,%ebx
1424 subl %ebx,%eax
1425 movl $135,96(%esp)
1426 movl $0,100(%esp)
1427 movl $1,104(%esp)
1428 movl $0,108(%esp)
1429 movl %eax,112(%esp)
1430 movl %ebp,116(%esp)
1431 movl 240(%edx),%ecx
1432 movl %edx,%ebp
1433 movl %ecx,%ebx
1434 movdqa %xmm2,%xmm1
1435 pxor %xmm0,%xmm0
1436 movdqa 96(%esp),%xmm3
1437 pcmpgtd %xmm1,%xmm0
1438 andl $-16,%eax
1439 subl $96,%eax
1440 jc L060xts_dec_short
1441 shll $4,%ecx
1442 movl $16,%ebx
1443 subl %ecx,%ebx
1444 leal 32(%edx,%ecx,1),%edx
1445 jmp L061xts_dec_loop6
1446 .align 4,0x90
1447 L061xts_dec_loop6:
1448 pshufd $19,%xmm0,%xmm2
1449 pxor %xmm0,%xmm0
1450 movdqa %xmm1,(%esp)
1451 paddq %xmm1,%xmm1
1452 pand %xmm3,%xmm2
1453 pcmpgtd %xmm1,%xmm0
1454 pxor %xmm2,%xmm1
1455 pshufd $19,%xmm0,%xmm2
1456 pxor %xmm0,%xmm0
1457 movdqa %xmm1,16(%esp)
1458 paddq %xmm1,%xmm1
1459 pand %xmm3,%xmm2
1460 pcmpgtd %xmm1,%xmm0
1461 pxor %xmm2,%xmm1
1462 pshufd $19,%xmm0,%xmm2
1463 pxor %xmm0,%xmm0
1464 movdqa %xmm1,32(%esp)
1465 paddq %xmm1,%xmm1
1466 pand %xmm3,%xmm2
1467 pcmpgtd %xmm1,%xmm0
1468 pxor %xmm2,%xmm1
1469 pshufd $19,%xmm0,%xmm2
1470 pxor %xmm0,%xmm0
1471 movdqa %xmm1,48(%esp)
1472 paddq %xmm1,%xmm1
1473 pand %xmm3,%xmm2
1474 pcmpgtd %xmm1,%xmm0
1475 pxor %xmm2,%xmm1
1476 pshufd $19,%xmm0,%xmm7
1477 movdqa %xmm1,64(%esp)
1478 paddq %xmm1,%xmm1
1479 movups (%ebp),%xmm0
1480 pand %xmm3,%xmm7
1481 movups (%esi),%xmm2
1482 pxor %xmm1,%xmm7
1483 movl %ebx,%ecx
1484 movdqu 16(%esi),%xmm3
1485 xorps %xmm0,%xmm2
1486 movdqu 32(%esi),%xmm4
1487 pxor %xmm0,%xmm3
1488 movdqu 48(%esi),%xmm5
1489 pxor %xmm0,%xmm4
1490 movdqu 64(%esi),%xmm6
1491 pxor %xmm0,%xmm5
1492 movdqu 80(%esi),%xmm1
1493 pxor %xmm0,%xmm6
1494 leal 96(%esi),%esi
1495 pxor (%esp),%xmm2
1496 movdqa %xmm7,80(%esp)
1497 pxor %xmm1,%xmm7
1498 movups 16(%ebp),%xmm1
1499 pxor 16(%esp),%xmm3
1500 pxor 32(%esp),%xmm4
1501 .byte 102,15,56,222,209
1502 pxor 48(%esp),%xmm5
1503 pxor 64(%esp),%xmm6
1504 .byte 102,15,56,222,217
1505 pxor %xmm0,%xmm7
1506 movups 32(%ebp),%xmm0
1507 .byte 102,15,56,222,225
1508 .byte 102,15,56,222,233
1509 .byte 102,15,56,222,241
1510 .byte 102,15,56,222,249
1511 call L_aesni_decrypt6_enter
1512 movdqa 80(%esp),%xmm1
1513 pxor %xmm0,%xmm0
1514 xorps (%esp),%xmm2
1515 pcmpgtd %xmm1,%xmm0
1516 xorps 16(%esp),%xmm3
1517 movups %xmm2,(%edi)
1518 xorps 32(%esp),%xmm4
1519 movups %xmm3,16(%edi)
1520 xorps 48(%esp),%xmm5
1521 movups %xmm4,32(%edi)
1522 xorps 64(%esp),%xmm6
1523 movups %xmm5,48(%edi)
1524 xorps %xmm1,%xmm7
1525 movups %xmm6,64(%edi)
1526 pshufd $19,%xmm0,%xmm2
1527 movups %xmm7,80(%edi)
1528 leal 96(%edi),%edi
1529 movdqa 96(%esp),%xmm3
1530 pxor %xmm0,%xmm0
1531 paddq %xmm1,%xmm1
1532 pand %xmm3,%xmm2
1533 pcmpgtd %xmm1,%xmm0
1534 pxor %xmm2,%xmm1
1535 subl $96,%eax
1536 jnc L061xts_dec_loop6
1537 movl 240(%ebp),%ecx
1538 movl %ebp,%edx
1539 movl %ecx,%ebx
1540 L060xts_dec_short:
1541 addl $96,%eax
1542 jz L062xts_dec_done6x
1543 movdqa %xmm1,%xmm5
1544 cmpl $32,%eax
1545 jb L063xts_dec_one
1546 pshufd $19,%xmm0,%xmm2
1547 pxor %xmm0,%xmm0
1548 paddq %xmm1,%xmm1
1549 pand %xmm3,%xmm2
1550 pcmpgtd %xmm1,%xmm0
1551 pxor %xmm2,%xmm1
1552 je L064xts_dec_two
1553 pshufd $19,%xmm0,%xmm2
1554 pxor %xmm0,%xmm0
1555 movdqa %xmm1,%xmm6
1556 paddq %xmm1,%xmm1
1557 pand %xmm3,%xmm2
1558 pcmpgtd %xmm1,%xmm0
1559 pxor %xmm2,%xmm1
1560 cmpl $64,%eax
1561 jb L065xts_dec_three
1562 pshufd $19,%xmm0,%xmm2
1563 pxor %xmm0,%xmm0
1564 movdqa %xmm1,%xmm7
1565 paddq %xmm1,%xmm1
1566 pand %xmm3,%xmm2
1567 pcmpgtd %xmm1,%xmm0
1568 pxor %xmm2,%xmm1
1569 movdqa %xmm5,(%esp)
1570 movdqa %xmm6,16(%esp)
1571 je L066xts_dec_four
1572 movdqa %xmm7,32(%esp)
1573 pshufd $19,%xmm0,%xmm7
1574 movdqa %xmm1,48(%esp)
1575 paddq %xmm1,%xmm1
1576 pand %xmm3,%xmm7
1577 pxor %xmm1,%xmm7
1578 movdqu (%esi),%xmm2
1579 movdqu 16(%esi),%xmm3
1580 movdqu 32(%esi),%xmm4
1581 pxor (%esp),%xmm2
1582 movdqu 48(%esi),%xmm5
1583 pxor 16(%esp),%xmm3
1584 movdqu 64(%esi),%xmm6
1585 pxor 32(%esp),%xmm4
1586 leal 80(%esi),%esi
1587 pxor 48(%esp),%xmm5
1588 movdqa %xmm7,64(%esp)
1589 pxor %xmm7,%xmm6
1590 call __aesni_decrypt6
1591 movaps 64(%esp),%xmm1
1592 xorps (%esp),%xmm2
1593 xorps 16(%esp),%xmm3
1594 xorps 32(%esp),%xmm4
1595 movups %xmm2,(%edi)
1596 xorps 48(%esp),%xmm5
1597 movups %xmm3,16(%edi)
1598 xorps %xmm1,%xmm6
1599 movups %xmm4,32(%edi)
1600 movups %xmm5,48(%edi)
1601 movups %xmm6,64(%edi)
1602 leal 80(%edi),%edi
1603 jmp L067xts_dec_done
1604 .align 4,0x90
1605 L063xts_dec_one:
1606 movups (%esi),%xmm2
1607 leal 16(%esi),%esi
1608 xorps %xmm5,%xmm2
1609 movups (%edx),%xmm0
1610 movups 16(%edx),%xmm1
1611 leal 32(%edx),%edx
1612 xorps %xmm0,%xmm2
1613 L068dec1_loop_12:
1614 .byte 102,15,56,222,209
1615 decl %ecx
1616 movups (%edx),%xmm1
1617 leal 16(%edx),%edx
1618 jnz L068dec1_loop_12
1619 .byte 102,15,56,223,209
1620 xorps %xmm5,%xmm2
1621 movups %xmm2,(%edi)
1622 leal 16(%edi),%edi
1623 movdqa %xmm5,%xmm1
1624 jmp L067xts_dec_done
1625 .align 4,0x90
1626 L064xts_dec_two:
1627 movaps %xmm1,%xmm6
1628 movups (%esi),%xmm2
1629 movups 16(%esi),%xmm3
1630 leal 32(%esi),%esi
1631 xorps %xmm5,%xmm2
1632 xorps %xmm6,%xmm3
1633 call __aesni_decrypt2
1634 xorps %xmm5,%xmm2
1635 xorps %xmm6,%xmm3
1636 movups %xmm2,(%edi)
1637 movups %xmm3,16(%edi)
1638 leal 32(%edi),%edi
1639 movdqa %xmm6,%xmm1
1640 jmp L067xts_dec_done
1641 .align 4,0x90
1642 L065xts_dec_three:
1643 movaps %xmm1,%xmm7
1644 movups (%esi),%xmm2
1645 movups 16(%esi),%xmm3
1646 movups 32(%esi),%xmm4
1647 leal 48(%esi),%esi
1648 xorps %xmm5,%xmm2
1649 xorps %xmm6,%xmm3
1650 xorps %xmm7,%xmm4
1651 call __aesni_decrypt3
1652 xorps %xmm5,%xmm2
1653 xorps %xmm6,%xmm3
1654 xorps %xmm7,%xmm4
1655 movups %xmm2,(%edi)
1656 movups %xmm3,16(%edi)
1657 movups %xmm4,32(%edi)
1658 leal 48(%edi),%edi
1659 movdqa %xmm7,%xmm1
1660 jmp L067xts_dec_done
1661 .align 4,0x90
1662 L066xts_dec_four:
1663 movaps %xmm1,%xmm6
1664 movups (%esi),%xmm2
1665 movups 16(%esi),%xmm3
1666 movups 32(%esi),%xmm4
1667 xorps (%esp),%xmm2
1668 movups 48(%esi),%xmm5
1669 leal 64(%esi),%esi
1670 xorps 16(%esp),%xmm3
1671 xorps %xmm7,%xmm4
1672 xorps %xmm6,%xmm5
1673 call __aesni_decrypt4
1674 xorps (%esp),%xmm2
1675 xorps 16(%esp),%xmm3
1676 xorps %xmm7,%xmm4
1677 movups %xmm2,(%edi)
1678 xorps %xmm6,%xmm5
1679 movups %xmm3,16(%edi)
1680 movups %xmm4,32(%edi)
1681 movups %xmm5,48(%edi)
1682 leal 64(%edi),%edi
1683 movdqa %xmm6,%xmm1
1684 jmp L067xts_dec_done
1685 .align 4,0x90
1686 L062xts_dec_done6x:
1687 movl 112(%esp),%eax
1688 andl $15,%eax
1689 jz L069xts_dec_ret
1690 movl %eax,112(%esp)
1691 jmp L070xts_dec_only_one_more
1692 .align 4,0x90
1693 L067xts_dec_done:
1694 movl 112(%esp),%eax
1695 pxor %xmm0,%xmm0
1696 andl $15,%eax
1697 jz L069xts_dec_ret
1698 pcmpgtd %xmm1,%xmm0
1699 movl %eax,112(%esp)
1700 pshufd $19,%xmm0,%xmm2
1701 pxor %xmm0,%xmm0
1702 movdqa 96(%esp),%xmm3
1703 paddq %xmm1,%xmm1
1704 pand %xmm3,%xmm2
1705 pcmpgtd %xmm1,%xmm0
1706 pxor %xmm2,%xmm1
1707 L070xts_dec_only_one_more:
1708 pshufd $19,%xmm0,%xmm5
1709 movdqa %xmm1,%xmm6
1710 paddq %xmm1,%xmm1
1711 pand %xmm3,%xmm5
1712 pxor %xmm1,%xmm5
1713 movl %ebp,%edx
1714 movl %ebx,%ecx
1715 movups (%esi),%xmm2
1716 xorps %xmm5,%xmm2
1717 movups (%edx),%xmm0
1718 movups 16(%edx),%xmm1
1719 leal 32(%edx),%edx
1720 xorps %xmm0,%xmm2
1721 L071dec1_loop_13:
1722 .byte 102,15,56,222,209
1723 decl %ecx
1724 movups (%edx),%xmm1
1725 leal 16(%edx),%edx
1726 jnz L071dec1_loop_13
1727 .byte 102,15,56,223,209
1728 xorps %xmm5,%xmm2
1729 movups %xmm2,(%edi)
1730 L072xts_dec_steal:
1731 movzbl 16(%esi),%ecx
1732 movzbl (%edi),%edx
1733 leal 1(%esi),%esi
1734 movb %cl,(%edi)
1735 movb %dl,16(%edi)
1736 leal 1(%edi),%edi
1737 subl $1,%eax
1738 jnz L072xts_dec_steal
1739 subl 112(%esp),%edi
1740 movl %ebp,%edx
1741 movl %ebx,%ecx
1742 movups (%edi),%xmm2
1743 xorps %xmm6,%xmm2
1744 movups (%edx),%xmm0
1745 movups 16(%edx),%xmm1
1746 leal 32(%edx),%edx
1747 xorps %xmm0,%xmm2
1748 L073dec1_loop_14:
1749 .byte 102,15,56,222,209
1750 decl %ecx
1751 movups (%edx),%xmm1
1752 leal 16(%edx),%edx
1753 jnz L073dec1_loop_14
1754 .byte 102,15,56,223,209
1755 xorps %xmm6,%xmm2
1756 movups %xmm2,(%edi)
1757 L069xts_dec_ret:
1758 pxor %xmm0,%xmm0
1759 pxor %xmm1,%xmm1
1760 pxor %xmm2,%xmm2
1761 movdqa %xmm0,(%esp)
1762 pxor %xmm3,%xmm3
1763 movdqa %xmm0,16(%esp)
1764 pxor %xmm4,%xmm4
1765 movdqa %xmm0,32(%esp)
1766 pxor %xmm5,%xmm5
1767 movdqa %xmm0,48(%esp)
1768 pxor %xmm6,%xmm6
1769 movdqa %xmm0,64(%esp)
1770 pxor %xmm7,%xmm7
1771 movdqa %xmm0,80(%esp)
1772 movl 116(%esp),%esp
1773 popl %edi
1774 popl %esi
1775 popl %ebx
1776 popl %ebp
1777 ret
1778 .globl _aesni_cbc_encrypt
1779 .private_extern _aesni_cbc_encrypt
1780 .align 4
1781 _aesni_cbc_encrypt:
1782 L_aesni_cbc_encrypt_begin:
1783 pushl %ebp
1784 pushl %ebx
1785 pushl %esi
1786 pushl %edi
1787 movl 20(%esp),%esi
1788 movl %esp,%ebx
1789 movl 24(%esp),%edi
1790 subl $24,%ebx
1791 movl 28(%esp),%eax
1792 andl $-16,%ebx
1793 movl 32(%esp),%edx
1794 movl 36(%esp),%ebp
1795 testl %eax,%eax
1796 jz L074cbc_abort
1797 cmpl $0,40(%esp)
1798 xchgl %esp,%ebx
1799 movups (%ebp),%xmm7
1800 movl 240(%edx),%ecx
1801 movl %edx,%ebp
1802 movl %ebx,16(%esp)
1803 movl %ecx,%ebx
1804 je L075cbc_decrypt
1805 movaps %xmm7,%xmm2
1806 cmpl $16,%eax
1807 jb L076cbc_enc_tail
1808 subl $16,%eax
1809 jmp L077cbc_enc_loop
1810 .align 4,0x90
1811 L077cbc_enc_loop:
1812 movups (%esi),%xmm7
1813 leal 16(%esi),%esi
1814 movups (%edx),%xmm0
1815 movups 16(%edx),%xmm1
1816 xorps %xmm0,%xmm7
1817 leal 32(%edx),%edx
1818 xorps %xmm7,%xmm2
1819 L078enc1_loop_15:
1820 .byte 102,15,56,220,209
1821 decl %ecx
1822 movups (%edx),%xmm1
1823 leal 16(%edx),%edx
1824 jnz L078enc1_loop_15
1825 .byte 102,15,56,221,209
1826 movl %ebx,%ecx
1827 movl %ebp,%edx
1828 movups %xmm2,(%edi)
1829 leal 16(%edi),%edi
1830 subl $16,%eax
1831 jnc L077cbc_enc_loop
1832 addl $16,%eax
1833 jnz L076cbc_enc_tail
1834 movaps %xmm2,%xmm7
1835 pxor %xmm2,%xmm2
1836 jmp L079cbc_ret
1837 L076cbc_enc_tail:
1838 movl %eax,%ecx
1839 .long 2767451785
1840 movl $16,%ecx
1841 subl %eax,%ecx
1842 xorl %eax,%eax
1843 .long 2868115081
1844 leal -16(%edi),%edi
1845 movl %ebx,%ecx
1846 movl %edi,%esi
1847 movl %ebp,%edx
1848 jmp L077cbc_enc_loop
1849 .align 4,0x90
1850 L075cbc_decrypt:
1851 cmpl $80,%eax
1852 jbe L080cbc_dec_tail
1853 movaps %xmm7,(%esp)
1854 subl $80,%eax
1855 jmp L081cbc_dec_loop6_enter
1856 .align 4,0x90
1857 L082cbc_dec_loop6:
1858 movaps %xmm0,(%esp)
1859 movups %xmm7,(%edi)
1860 leal 16(%edi),%edi
1861 L081cbc_dec_loop6_enter:
1862 movdqu (%esi),%xmm2
1863 movdqu 16(%esi),%xmm3
1864 movdqu 32(%esi),%xmm4
1865 movdqu 48(%esi),%xmm5
1866 movdqu 64(%esi),%xmm6
1867 movdqu 80(%esi),%xmm7
1868 call __aesni_decrypt6
1869 movups (%esi),%xmm1
1870 movups 16(%esi),%xmm0
1871 xorps (%esp),%xmm2
1872 xorps %xmm1,%xmm3
1873 movups 32(%esi),%xmm1
1874 xorps %xmm0,%xmm4
1875 movups 48(%esi),%xmm0
1876 xorps %xmm1,%xmm5
1877 movups 64(%esi),%xmm1
1878 xorps %xmm0,%xmm6
1879 movups 80(%esi),%xmm0
1880 xorps %xmm1,%xmm7
1881 movups %xmm2,(%edi)
1882 movups %xmm3,16(%edi)
1883 leal 96(%esi),%esi
1884 movups %xmm4,32(%edi)
1885 movl %ebx,%ecx
1886 movups %xmm5,48(%edi)
1887 movl %ebp,%edx
1888 movups %xmm6,64(%edi)
1889 leal 80(%edi),%edi
1890 subl $96,%eax
1891 ja L082cbc_dec_loop6
1892 movaps %xmm7,%xmm2
1893 movaps %xmm0,%xmm7
1894 addl $80,%eax
1895 jle L083cbc_dec_clear_tail_collected
1896 movups %xmm2,(%edi)
1897 leal 16(%edi),%edi
1898 L080cbc_dec_tail:
1899 movups (%esi),%xmm2
1900 movaps %xmm2,%xmm6
1901 cmpl $16,%eax
1902 jbe L084cbc_dec_one
1903 movups 16(%esi),%xmm3
1904 movaps %xmm3,%xmm5
1905 cmpl $32,%eax
1906 jbe L085cbc_dec_two
1907 movups 32(%esi),%xmm4
1908 cmpl $48,%eax
1909 jbe L086cbc_dec_three
1910 movups 48(%esi),%xmm5
1911 cmpl $64,%eax
1912 jbe L087cbc_dec_four
1913 movups 64(%esi),%xmm6
1914 movaps %xmm7,(%esp)
1915 movups (%esi),%xmm2
1916 xorps %xmm7,%xmm7
1917 call __aesni_decrypt6
1918 movups (%esi),%xmm1
1919 movups 16(%esi),%xmm0
1920 xorps (%esp),%xmm2
1921 xorps %xmm1,%xmm3
1922 movups 32(%esi),%xmm1
1923 xorps %xmm0,%xmm4
1924 movups 48(%esi),%xmm0
1925 xorps %xmm1,%xmm5
1926 movups 64(%esi),%xmm7
1927 xorps %xmm0,%xmm6
1928 movups %xmm2,(%edi)
1929 movups %xmm3,16(%edi)
1930 pxor %xmm3,%xmm3
1931 movups %xmm4,32(%edi)
1932 pxor %xmm4,%xmm4
1933 movups %xmm5,48(%edi)
1934 pxor %xmm5,%xmm5
1935 leal 64(%edi),%edi
1936 movaps %xmm6,%xmm2
1937 pxor %xmm6,%xmm6
1938 subl $80,%eax
1939 jmp L088cbc_dec_tail_collected
1940 .align 4,0x90
1941 L084cbc_dec_one:
1942 movups (%edx),%xmm0
1943 movups 16(%edx),%xmm1
1944 leal 32(%edx),%edx
1945 xorps %xmm0,%xmm2
1946 L089dec1_loop_16:
1947 .byte 102,15,56,222,209
1948 decl %ecx
1949 movups (%edx),%xmm1
1950 leal 16(%edx),%edx
1951 jnz L089dec1_loop_16
1952 .byte 102,15,56,223,209
1953 xorps %xmm7,%xmm2
1954 movaps %xmm6,%xmm7
1955 subl $16,%eax
1956 jmp L088cbc_dec_tail_collected
1957 .align 4,0x90
1958 L085cbc_dec_two:
1959 call __aesni_decrypt2
1960 xorps %xmm7,%xmm2
1961 xorps %xmm6,%xmm3
1962 movups %xmm2,(%edi)
1963 movaps %xmm3,%xmm2
1964 pxor %xmm3,%xmm3
1965 leal 16(%edi),%edi
1966 movaps %xmm5,%xmm7
1967 subl $32,%eax
1968 jmp L088cbc_dec_tail_collected
1969 .align 4,0x90
1970 L086cbc_dec_three:
1971 call __aesni_decrypt3
1972 xorps %xmm7,%xmm2
1973 xorps %xmm6,%xmm3
1974 xorps %xmm5,%xmm4
1975 movups %xmm2,(%edi)
1976 movaps %xmm4,%xmm2
1977 pxor %xmm4,%xmm4
1978 movups %xmm3,16(%edi)
1979 pxor %xmm3,%xmm3
1980 leal 32(%edi),%edi
1981 movups 32(%esi),%xmm7
1982 subl $48,%eax
1983 jmp L088cbc_dec_tail_collected
1984 .align 4,0x90
1985 L087cbc_dec_four:
1986 call __aesni_decrypt4
1987 movups 16(%esi),%xmm1
1988 movups 32(%esi),%xmm0
1989 xorps %xmm7,%xmm2
1990 movups 48(%esi),%xmm7
1991 xorps %xmm6,%xmm3
1992 movups %xmm2,(%edi)
1993 xorps %xmm1,%xmm4
1994 movups %xmm3,16(%edi)
1995 pxor %xmm3,%xmm3
1996 xorps %xmm0,%xmm5
1997 movups %xmm4,32(%edi)
1998 pxor %xmm4,%xmm4
1999 leal 48(%edi),%edi
2000 movaps %xmm5,%xmm2
2001 pxor %xmm5,%xmm5
2002 subl $64,%eax
2003 jmp L088cbc_dec_tail_collected
2004 .align 4,0x90
2005 L083cbc_dec_clear_tail_collected:
2006 pxor %xmm3,%xmm3
2007 pxor %xmm4,%xmm4
2008 pxor %xmm5,%xmm5
2009 pxor %xmm6,%xmm6
2010 L088cbc_dec_tail_collected:
2011 andl $15,%eax
2012 jnz L090cbc_dec_tail_partial
2013 movups %xmm2,(%edi)
2014 pxor %xmm0,%xmm0
2015 jmp L079cbc_ret
2016 .align 4,0x90
2017 L090cbc_dec_tail_partial:
2018 movaps %xmm2,(%esp)
2019 pxor %xmm0,%xmm0
2020 movl $16,%ecx
2021 movl %esp,%esi
2022 subl %eax,%ecx
2023 .long 2767451785
2024 movdqa %xmm2,(%esp)
2025 L079cbc_ret:
2026 movl 16(%esp),%esp
2027 movl 36(%esp),%ebp
2028 pxor %xmm2,%xmm2
2029 pxor %xmm1,%xmm1
2030 movups %xmm7,(%ebp)
2031 pxor %xmm7,%xmm7
2032 L074cbc_abort:
2033 popl %edi
2034 popl %esi
2035 popl %ebx
2036 popl %ebp
2037 ret
2038 .private_extern __aesni_set_encrypt_key
2039 .align 4
2040 __aesni_set_encrypt_key:
2041 pushl %ebp
2042 pushl %ebx
2043 testl %eax,%eax
2044 jz L091bad_pointer
2045 testl %edx,%edx
2046 jz L091bad_pointer
2047 call L092pic
2048 L092pic:
2049 popl %ebx
2050 leal Lkey_const-L092pic(%ebx),%ebx
2051 movl L_OPENSSL_ia32cap_P$non_lazy_ptr-Lkey_const(%ebx),%ebp
2052 movups (%eax),%xmm0
2053 xorps %xmm4,%xmm4
2054 movl 4(%ebp),%ebp
2055 leal 16(%edx),%edx
2056 andl $268437504,%ebp
2057 cmpl $256,%ecx
2058 je L09314rounds
2059 cmpl $192,%ecx
2060 je L09412rounds
2061 cmpl $128,%ecx
2062 jne L095bad_keybits
2063 .align 4,0x90
2064 L09610rounds:
2065 cmpl $268435456,%ebp
2066 je L09710rounds_alt
2067 movl $9,%ecx
2068 movups %xmm0,-16(%edx)
2069 .byte 102,15,58,223,200,1
2070 call L098key_128_cold
2071 .byte 102,15,58,223,200,2
2072 call L099key_128
2073 .byte 102,15,58,223,200,4
2074 call L099key_128
2075 .byte 102,15,58,223,200,8
2076 call L099key_128
2077 .byte 102,15,58,223,200,16
2078 call L099key_128
2079 .byte 102,15,58,223,200,32
2080 call L099key_128
2081 .byte 102,15,58,223,200,64
2082 call L099key_128
2083 .byte 102,15,58,223,200,128
2084 call L099key_128
2085 .byte 102,15,58,223,200,27
2086 call L099key_128
2087 .byte 102,15,58,223,200,54
2088 call L099key_128
2089 movups %xmm0,(%edx)
2090 movl %ecx,80(%edx)
2091 jmp L100good_key
2092 .align 4,0x90
2093 L099key_128:
2094 movups %xmm0,(%edx)
2095 leal 16(%edx),%edx
2096 L098key_128_cold:
2097 shufps $16,%xmm0,%xmm4
2098 xorps %xmm4,%xmm0
2099 shufps $140,%xmm0,%xmm4
2100 xorps %xmm4,%xmm0
2101 shufps $255,%xmm1,%xmm1
2102 xorps %xmm1,%xmm0
2103 ret
2104 .align 4,0x90
2105 L09710rounds_alt:
2106 movdqa (%ebx),%xmm5
2107 movl $8,%ecx
2108 movdqa 32(%ebx),%xmm4
2109 movdqa %xmm0,%xmm2
2110 movdqu %xmm0,-16(%edx)
2111 L101loop_key128:
2112 .byte 102,15,56,0,197
2113 .byte 102,15,56,221,196
2114 pslld $1,%xmm4
2115 leal 16(%edx),%edx
2116 movdqa %xmm2,%xmm3
2117 pslldq $4,%xmm2
2118 pxor %xmm2,%xmm3
2119 pslldq $4,%xmm2
2120 pxor %xmm2,%xmm3
2121 pslldq $4,%xmm2
2122 pxor %xmm3,%xmm2
2123 pxor %xmm2,%xmm0
2124 movdqu %xmm0,-16(%edx)
2125 movdqa %xmm0,%xmm2
2126 decl %ecx
2127 jnz L101loop_key128
2128 movdqa 48(%ebx),%xmm4
2129 .byte 102,15,56,0,197
2130 .byte 102,15,56,221,196
2131 pslld $1,%xmm4
2132 movdqa %xmm2,%xmm3
2133 pslldq $4,%xmm2
2134 pxor %xmm2,%xmm3
2135 pslldq $4,%xmm2
2136 pxor %xmm2,%xmm3
2137 pslldq $4,%xmm2
2138 pxor %xmm3,%xmm2
2139 pxor %xmm2,%xmm0
2140 movdqu %xmm0,(%edx)
2141 movdqa %xmm0,%xmm2
2142 .byte 102,15,56,0,197
2143 .byte 102,15,56,221,196
2144 movdqa %xmm2,%xmm3
2145 pslldq $4,%xmm2
2146 pxor %xmm2,%xmm3
2147 pslldq $4,%xmm2
2148 pxor %xmm2,%xmm3
2149 pslldq $4,%xmm2
2150 pxor %xmm3,%xmm2
2151 pxor %xmm2,%xmm0
2152 movdqu %xmm0,16(%edx)
2153 movl $9,%ecx
2154 movl %ecx,96(%edx)
2155 jmp L100good_key
2156 .align 4,0x90
2157 L09412rounds:
2158 movq 16(%eax),%xmm2
2159 cmpl $268435456,%ebp
2160 je L10212rounds_alt
2161 movl $11,%ecx
2162 movups %xmm0,-16(%edx)
2163 .byte 102,15,58,223,202,1
2164 call L103key_192a_cold
2165 .byte 102,15,58,223,202,2
2166 call L104key_192b
2167 .byte 102,15,58,223,202,4
2168 call L105key_192a
2169 .byte 102,15,58,223,202,8
2170 call L104key_192b
2171 .byte 102,15,58,223,202,16
2172 call L105key_192a
2173 .byte 102,15,58,223,202,32
2174 call L104key_192b
2175 .byte 102,15,58,223,202,64
2176 call L105key_192a
2177 .byte 102,15,58,223,202,128
2178 call L104key_192b
2179 movups %xmm0,(%edx)
2180 movl %ecx,48(%edx)
2181 jmp L100good_key
2182 .align 4,0x90
2183 L105key_192a:
2184 movups %xmm0,(%edx)
2185 leal 16(%edx),%edx
2186 .align 4,0x90
2187 L103key_192a_cold:
2188 movaps %xmm2,%xmm5
2189 L106key_192b_warm:
2190 shufps $16,%xmm0,%xmm4
2191 movdqa %xmm2,%xmm3
2192 xorps %xmm4,%xmm0
2193 shufps $140,%xmm0,%xmm4
2194 pslldq $4,%xmm3
2195 xorps %xmm4,%xmm0
2196 pshufd $85,%xmm1,%xmm1
2197 pxor %xmm3,%xmm2
2198 pxor %xmm1,%xmm0
2199 pshufd $255,%xmm0,%xmm3
2200 pxor %xmm3,%xmm2
2201 ret
2202 .align 4,0x90
2203 L104key_192b:
2204 movaps %xmm0,%xmm3
2205 shufps $68,%xmm0,%xmm5
2206 movups %xmm5,(%edx)
2207 shufps $78,%xmm2,%xmm3
2208 movups %xmm3,16(%edx)
2209 leal 32(%edx),%edx
2210 jmp L106key_192b_warm
2211 .align 4,0x90
2212 L10212rounds_alt:
2213 movdqa 16(%ebx),%xmm5
2214 movdqa 32(%ebx),%xmm4
2215 movl $8,%ecx
2216 movdqu %xmm0,-16(%edx)
2217 L107loop_key192:
2218 movq %xmm2,(%edx)
2219 movdqa %xmm2,%xmm1
2220 .byte 102,15,56,0,213
2221 .byte 102,15,56,221,212
2222 pslld $1,%xmm4
2223 leal 24(%edx),%edx
2224 movdqa %xmm0,%xmm3
2225 pslldq $4,%xmm0
2226 pxor %xmm0,%xmm3
2227 pslldq $4,%xmm0
2228 pxor %xmm0,%xmm3
2229 pslldq $4,%xmm0
2230 pxor %xmm3,%xmm0
2231 pshufd $255,%xmm0,%xmm3
2232 pxor %xmm1,%xmm3
2233 pslldq $4,%xmm1
2234 pxor %xmm1,%xmm3
2235 pxor %xmm2,%xmm0
2236 pxor %xmm3,%xmm2
2237 movdqu %xmm0,-16(%edx)
2238 decl %ecx
2239 jnz L107loop_key192
2240 movl $11,%ecx
2241 movl %ecx,32(%edx)
2242 jmp L100good_key
2243 .align 4,0x90
2244 L09314rounds:
2245 movups 16(%eax),%xmm2
2246 leal 16(%edx),%edx
2247 cmpl $268435456,%ebp
2248 je L10814rounds_alt
2249 movl $13,%ecx
2250 movups %xmm0,-32(%edx)
2251 movups %xmm2,-16(%edx)
2252 .byte 102,15,58,223,202,1
2253 call L109key_256a_cold
2254 .byte 102,15,58,223,200,1
2255 call L110key_256b
2256 .byte 102,15,58,223,202,2
2257 call L111key_256a
2258 .byte 102,15,58,223,200,2
2259 call L110key_256b
2260 .byte 102,15,58,223,202,4
2261 call L111key_256a
2262 .byte 102,15,58,223,200,4
2263 call L110key_256b
2264 .byte 102,15,58,223,202,8
2265 call L111key_256a
2266 .byte 102,15,58,223,200,8
2267 call L110key_256b
2268 .byte 102,15,58,223,202,16
2269 call L111key_256a
2270 .byte 102,15,58,223,200,16
2271 call L110key_256b
2272 .byte 102,15,58,223,202,32
2273 call L111key_256a
2274 .byte 102,15,58,223,200,32
2275 call L110key_256b
2276 .byte 102,15,58,223,202,64
2277 call L111key_256a
2278 movups %xmm0,(%edx)
2279 movl %ecx,16(%edx)
2280 xorl %eax,%eax
2281 jmp L100good_key
2282 .align 4,0x90
2283 L111key_256a:
2284 movups %xmm2,(%edx)
2285 leal 16(%edx),%edx
2286 L109key_256a_cold:
2287 shufps $16,%xmm0,%xmm4
2288 xorps %xmm4,%xmm0
2289 shufps $140,%xmm0,%xmm4
2290 xorps %xmm4,%xmm0
2291 shufps $255,%xmm1,%xmm1
2292 xorps %xmm1,%xmm0
2293 ret
2294 .align 4,0x90
2295 L110key_256b:
2296 movups %xmm0,(%edx)
2297 leal 16(%edx),%edx
2298 shufps $16,%xmm2,%xmm4
2299 xorps %xmm4,%xmm2
2300 shufps $140,%xmm2,%xmm4
2301 xorps %xmm4,%xmm2
2302 shufps $170,%xmm1,%xmm1
2303 xorps %xmm1,%xmm2
2304 ret
2305 .align 4,0x90
2306 L10814rounds_alt:
2307 movdqa (%ebx),%xmm5
2308 movdqa 32(%ebx),%xmm4
2309 movl $7,%ecx
2310 movdqu %xmm0,-32(%edx)
2311 movdqa %xmm2,%xmm1
2312 movdqu %xmm2,-16(%edx)
2313 L112loop_key256:
2314 .byte 102,15,56,0,213
2315 .byte 102,15,56,221,212
2316 movdqa %xmm0,%xmm3
2317 pslldq $4,%xmm0
2318 pxor %xmm0,%xmm3
2319 pslldq $4,%xmm0
2320 pxor %xmm0,%xmm3
2321 pslldq $4,%xmm0
2322 pxor %xmm3,%xmm0
2323 pslld $1,%xmm4
2324 pxor %xmm2,%xmm0
2325 movdqu %xmm0,(%edx)
2326 decl %ecx
2327 jz L113done_key256
2328 pshufd $255,%xmm0,%xmm2
2329 pxor %xmm3,%xmm3
2330 .byte 102,15,56,221,211
2331 movdqa %xmm1,%xmm3
2332 pslldq $4,%xmm1
2333 pxor %xmm1,%xmm3
2334 pslldq $4,%xmm1
2335 pxor %xmm1,%xmm3
2336 pslldq $4,%xmm1
2337 pxor %xmm3,%xmm1
2338 pxor %xmm1,%xmm2
2339 movdqu %xmm2,16(%edx)
2340 leal 32(%edx),%edx
2341 movdqa %xmm2,%xmm1
2342 jmp L112loop_key256
2343 L113done_key256:
2344 movl $13,%ecx
2345 movl %ecx,16(%edx)
2346 L100good_key:
2347 pxor %xmm0,%xmm0
2348 pxor %xmm1,%xmm1
2349 pxor %xmm2,%xmm2
2350 pxor %xmm3,%xmm3
2351 pxor %xmm4,%xmm4
2352 pxor %xmm5,%xmm5
2353 xorl %eax,%eax
2354 popl %ebx
2355 popl %ebp
2356 ret
2357 .align 2,0x90
2358 L091bad_pointer:
2359 movl $-1,%eax
2360 popl %ebx
2361 popl %ebp
2362 ret
2363 .align 2,0x90
2364 L095bad_keybits:
2365 pxor %xmm0,%xmm0
2366 movl $-2,%eax
2367 popl %ebx
2368 popl %ebp
2369 ret
2370 .globl _aesni_set_encrypt_key
2371 .private_extern _aesni_set_encrypt_key
2372 .align 4
2373 _aesni_set_encrypt_key:
2374 L_aesni_set_encrypt_key_begin:
2375 movl 4(%esp),%eax
2376 movl 8(%esp),%ecx
2377 movl 12(%esp),%edx
2378 call __aesni_set_encrypt_key
2379 ret
2380 .globl _aesni_set_decrypt_key
2381 .private_extern _aesni_set_decrypt_key
2382 .align 4
2383 _aesni_set_decrypt_key:
2384 L_aesni_set_decrypt_key_begin:
2385 movl 4(%esp),%eax
2386 movl 8(%esp),%ecx
2387 movl 12(%esp),%edx
2388 call __aesni_set_encrypt_key
2389 movl 12(%esp),%edx
2390 shll $4,%ecx
2391 testl %eax,%eax
2392 jnz L114dec_key_ret
2393 leal 16(%edx,%ecx,1),%eax
2394 movups (%edx),%xmm0
2395 movups (%eax),%xmm1
2396 movups %xmm0,(%eax)
2397 movups %xmm1,(%edx)
2398 leal 16(%edx),%edx
2399 leal -16(%eax),%eax
2400 L115dec_key_inverse:
2401 movups (%edx),%xmm0
2402 movups (%eax),%xmm1
2403 .byte 102,15,56,219,192
2404 .byte 102,15,56,219,201
2405 leal 16(%edx),%edx
2406 leal -16(%eax),%eax
2407 movups %xmm0,16(%eax)
2408 movups %xmm1,-16(%edx)
2409 cmpl %edx,%eax
2410 ja L115dec_key_inverse
2411 movups (%edx),%xmm0
2412 .byte 102,15,56,219,192
2413 movups %xmm0,(%edx)
2414 pxor %xmm0,%xmm0
2415 pxor %xmm1,%xmm1
2416 xorl %eax,%eax
2417 L114dec_key_ret:
2418 ret
2419 .align 6,0x90
2420 Lkey_const:
2421 .long 202313229,202313229,202313229,202313229
2422 .long 67569157,67569157,67569157,67569157
2423 .long 1,1,1,1
2424 .long 27,27,27,27
2425 .byte 65,69,83,32,102,111,114,32,73,110,116,101,108,32,65,69
2426 .byte 83,45,78,73,44,32,67,82,89,80,84,79,71,65,77,83
2427 .byte 32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115
2428 .byte 115,108,46,111,114,103,62,0
2429 .section __IMPORT,__pointers,non_lazy_symbol_pointers
2430 L_OPENSSL_ia32cap_P$non_lazy_ptr:
2431 .indirect_symbol _OPENSSL_ia32cap_P
2432 .long 0
2433 #endif
OLDNEW
« no previous file with comments | « third_party/boringssl/mac-x86/crypto/aes/aes-586.S ('k') | third_party/boringssl/mac-x86/crypto/aes/vpaes-x86.S » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698