OLD | NEW |
| (Empty) |
1 #if defined(__i386__) | |
2 .file "src/crypto/aes/asm/aesni-x86.S" | |
3 .text | |
4 .globl _aesni_encrypt | |
5 .private_extern _aesni_encrypt | |
6 .align 4 | |
7 _aesni_encrypt: | |
8 L_aesni_encrypt_begin: | |
9 movl 4(%esp),%eax | |
10 movl 12(%esp),%edx | |
11 movups (%eax),%xmm2 | |
12 movl 240(%edx),%ecx | |
13 movl 8(%esp),%eax | |
14 movups (%edx),%xmm0 | |
15 movups 16(%edx),%xmm1 | |
16 leal 32(%edx),%edx | |
17 xorps %xmm0,%xmm2 | |
18 L000enc1_loop_1: | |
19 .byte 102,15,56,220,209 | |
20 decl %ecx | |
21 movups (%edx),%xmm1 | |
22 leal 16(%edx),%edx | |
23 jnz L000enc1_loop_1 | |
24 .byte 102,15,56,221,209 | |
25 pxor %xmm0,%xmm0 | |
26 pxor %xmm1,%xmm1 | |
27 movups %xmm2,(%eax) | |
28 pxor %xmm2,%xmm2 | |
29 ret | |
30 .globl _aesni_decrypt | |
31 .private_extern _aesni_decrypt | |
32 .align 4 | |
33 _aesni_decrypt: | |
34 L_aesni_decrypt_begin: | |
35 movl 4(%esp),%eax | |
36 movl 12(%esp),%edx | |
37 movups (%eax),%xmm2 | |
38 movl 240(%edx),%ecx | |
39 movl 8(%esp),%eax | |
40 movups (%edx),%xmm0 | |
41 movups 16(%edx),%xmm1 | |
42 leal 32(%edx),%edx | |
43 xorps %xmm0,%xmm2 | |
44 L001dec1_loop_2: | |
45 .byte 102,15,56,222,209 | |
46 decl %ecx | |
47 movups (%edx),%xmm1 | |
48 leal 16(%edx),%edx | |
49 jnz L001dec1_loop_2 | |
50 .byte 102,15,56,223,209 | |
51 pxor %xmm0,%xmm0 | |
52 pxor %xmm1,%xmm1 | |
53 movups %xmm2,(%eax) | |
54 pxor %xmm2,%xmm2 | |
55 ret | |
56 .private_extern __aesni_encrypt2 | |
57 .align 4 | |
58 __aesni_encrypt2: | |
59 movups (%edx),%xmm0 | |
60 shll $4,%ecx | |
61 movups 16(%edx),%xmm1 | |
62 xorps %xmm0,%xmm2 | |
63 pxor %xmm0,%xmm3 | |
64 movups 32(%edx),%xmm0 | |
65 leal 32(%edx,%ecx,1),%edx | |
66 negl %ecx | |
67 addl $16,%ecx | |
68 L002enc2_loop: | |
69 .byte 102,15,56,220,209 | |
70 .byte 102,15,56,220,217 | |
71 movups (%edx,%ecx,1),%xmm1 | |
72 addl $32,%ecx | |
73 .byte 102,15,56,220,208 | |
74 .byte 102,15,56,220,216 | |
75 movups -16(%edx,%ecx,1),%xmm0 | |
76 jnz L002enc2_loop | |
77 .byte 102,15,56,220,209 | |
78 .byte 102,15,56,220,217 | |
79 .byte 102,15,56,221,208 | |
80 .byte 102,15,56,221,216 | |
81 ret | |
82 .private_extern __aesni_decrypt2 | |
83 .align 4 | |
84 __aesni_decrypt2: | |
85 movups (%edx),%xmm0 | |
86 shll $4,%ecx | |
87 movups 16(%edx),%xmm1 | |
88 xorps %xmm0,%xmm2 | |
89 pxor %xmm0,%xmm3 | |
90 movups 32(%edx),%xmm0 | |
91 leal 32(%edx,%ecx,1),%edx | |
92 negl %ecx | |
93 addl $16,%ecx | |
94 L003dec2_loop: | |
95 .byte 102,15,56,222,209 | |
96 .byte 102,15,56,222,217 | |
97 movups (%edx,%ecx,1),%xmm1 | |
98 addl $32,%ecx | |
99 .byte 102,15,56,222,208 | |
100 .byte 102,15,56,222,216 | |
101 movups -16(%edx,%ecx,1),%xmm0 | |
102 jnz L003dec2_loop | |
103 .byte 102,15,56,222,209 | |
104 .byte 102,15,56,222,217 | |
105 .byte 102,15,56,223,208 | |
106 .byte 102,15,56,223,216 | |
107 ret | |
108 .private_extern __aesni_encrypt3 | |
109 .align 4 | |
110 __aesni_encrypt3: | |
111 movups (%edx),%xmm0 | |
112 shll $4,%ecx | |
113 movups 16(%edx),%xmm1 | |
114 xorps %xmm0,%xmm2 | |
115 pxor %xmm0,%xmm3 | |
116 pxor %xmm0,%xmm4 | |
117 movups 32(%edx),%xmm0 | |
118 leal 32(%edx,%ecx,1),%edx | |
119 negl %ecx | |
120 addl $16,%ecx | |
121 L004enc3_loop: | |
122 .byte 102,15,56,220,209 | |
123 .byte 102,15,56,220,217 | |
124 .byte 102,15,56,220,225 | |
125 movups (%edx,%ecx,1),%xmm1 | |
126 addl $32,%ecx | |
127 .byte 102,15,56,220,208 | |
128 .byte 102,15,56,220,216 | |
129 .byte 102,15,56,220,224 | |
130 movups -16(%edx,%ecx,1),%xmm0 | |
131 jnz L004enc3_loop | |
132 .byte 102,15,56,220,209 | |
133 .byte 102,15,56,220,217 | |
134 .byte 102,15,56,220,225 | |
135 .byte 102,15,56,221,208 | |
136 .byte 102,15,56,221,216 | |
137 .byte 102,15,56,221,224 | |
138 ret | |
139 .private_extern __aesni_decrypt3 | |
140 .align 4 | |
141 __aesni_decrypt3: | |
142 movups (%edx),%xmm0 | |
143 shll $4,%ecx | |
144 movups 16(%edx),%xmm1 | |
145 xorps %xmm0,%xmm2 | |
146 pxor %xmm0,%xmm3 | |
147 pxor %xmm0,%xmm4 | |
148 movups 32(%edx),%xmm0 | |
149 leal 32(%edx,%ecx,1),%edx | |
150 negl %ecx | |
151 addl $16,%ecx | |
152 L005dec3_loop: | |
153 .byte 102,15,56,222,209 | |
154 .byte 102,15,56,222,217 | |
155 .byte 102,15,56,222,225 | |
156 movups (%edx,%ecx,1),%xmm1 | |
157 addl $32,%ecx | |
158 .byte 102,15,56,222,208 | |
159 .byte 102,15,56,222,216 | |
160 .byte 102,15,56,222,224 | |
161 movups -16(%edx,%ecx,1),%xmm0 | |
162 jnz L005dec3_loop | |
163 .byte 102,15,56,222,209 | |
164 .byte 102,15,56,222,217 | |
165 .byte 102,15,56,222,225 | |
166 .byte 102,15,56,223,208 | |
167 .byte 102,15,56,223,216 | |
168 .byte 102,15,56,223,224 | |
169 ret | |
170 .private_extern __aesni_encrypt4 | |
171 .align 4 | |
172 __aesni_encrypt4: | |
173 movups (%edx),%xmm0 | |
174 movups 16(%edx),%xmm1 | |
175 shll $4,%ecx | |
176 xorps %xmm0,%xmm2 | |
177 pxor %xmm0,%xmm3 | |
178 pxor %xmm0,%xmm4 | |
179 pxor %xmm0,%xmm5 | |
180 movups 32(%edx),%xmm0 | |
181 leal 32(%edx,%ecx,1),%edx | |
182 negl %ecx | |
183 .byte 15,31,64,0 | |
184 addl $16,%ecx | |
185 L006enc4_loop: | |
186 .byte 102,15,56,220,209 | |
187 .byte 102,15,56,220,217 | |
188 .byte 102,15,56,220,225 | |
189 .byte 102,15,56,220,233 | |
190 movups (%edx,%ecx,1),%xmm1 | |
191 addl $32,%ecx | |
192 .byte 102,15,56,220,208 | |
193 .byte 102,15,56,220,216 | |
194 .byte 102,15,56,220,224 | |
195 .byte 102,15,56,220,232 | |
196 movups -16(%edx,%ecx,1),%xmm0 | |
197 jnz L006enc4_loop | |
198 .byte 102,15,56,220,209 | |
199 .byte 102,15,56,220,217 | |
200 .byte 102,15,56,220,225 | |
201 .byte 102,15,56,220,233 | |
202 .byte 102,15,56,221,208 | |
203 .byte 102,15,56,221,216 | |
204 .byte 102,15,56,221,224 | |
205 .byte 102,15,56,221,232 | |
206 ret | |
207 .private_extern __aesni_decrypt4 | |
208 .align 4 | |
209 __aesni_decrypt4: | |
210 movups (%edx),%xmm0 | |
211 movups 16(%edx),%xmm1 | |
212 shll $4,%ecx | |
213 xorps %xmm0,%xmm2 | |
214 pxor %xmm0,%xmm3 | |
215 pxor %xmm0,%xmm4 | |
216 pxor %xmm0,%xmm5 | |
217 movups 32(%edx),%xmm0 | |
218 leal 32(%edx,%ecx,1),%edx | |
219 negl %ecx | |
220 .byte 15,31,64,0 | |
221 addl $16,%ecx | |
222 L007dec4_loop: | |
223 .byte 102,15,56,222,209 | |
224 .byte 102,15,56,222,217 | |
225 .byte 102,15,56,222,225 | |
226 .byte 102,15,56,222,233 | |
227 movups (%edx,%ecx,1),%xmm1 | |
228 addl $32,%ecx | |
229 .byte 102,15,56,222,208 | |
230 .byte 102,15,56,222,216 | |
231 .byte 102,15,56,222,224 | |
232 .byte 102,15,56,222,232 | |
233 movups -16(%edx,%ecx,1),%xmm0 | |
234 jnz L007dec4_loop | |
235 .byte 102,15,56,222,209 | |
236 .byte 102,15,56,222,217 | |
237 .byte 102,15,56,222,225 | |
238 .byte 102,15,56,222,233 | |
239 .byte 102,15,56,223,208 | |
240 .byte 102,15,56,223,216 | |
241 .byte 102,15,56,223,224 | |
242 .byte 102,15,56,223,232 | |
243 ret | |
244 .private_extern __aesni_encrypt6 | |
245 .align 4 | |
246 __aesni_encrypt6: | |
247 movups (%edx),%xmm0 | |
248 shll $4,%ecx | |
249 movups 16(%edx),%xmm1 | |
250 xorps %xmm0,%xmm2 | |
251 pxor %xmm0,%xmm3 | |
252 pxor %xmm0,%xmm4 | |
253 .byte 102,15,56,220,209 | |
254 pxor %xmm0,%xmm5 | |
255 pxor %xmm0,%xmm6 | |
256 .byte 102,15,56,220,217 | |
257 leal 32(%edx,%ecx,1),%edx | |
258 negl %ecx | |
259 .byte 102,15,56,220,225 | |
260 pxor %xmm0,%xmm7 | |
261 movups (%edx,%ecx,1),%xmm0 | |
262 addl $16,%ecx | |
263 jmp L008_aesni_encrypt6_inner | |
264 .align 4,0x90 | |
265 L009enc6_loop: | |
266 .byte 102,15,56,220,209 | |
267 .byte 102,15,56,220,217 | |
268 .byte 102,15,56,220,225 | |
269 L008_aesni_encrypt6_inner: | |
270 .byte 102,15,56,220,233 | |
271 .byte 102,15,56,220,241 | |
272 .byte 102,15,56,220,249 | |
273 L_aesni_encrypt6_enter: | |
274 movups (%edx,%ecx,1),%xmm1 | |
275 addl $32,%ecx | |
276 .byte 102,15,56,220,208 | |
277 .byte 102,15,56,220,216 | |
278 .byte 102,15,56,220,224 | |
279 .byte 102,15,56,220,232 | |
280 .byte 102,15,56,220,240 | |
281 .byte 102,15,56,220,248 | |
282 movups -16(%edx,%ecx,1),%xmm0 | |
283 jnz L009enc6_loop | |
284 .byte 102,15,56,220,209 | |
285 .byte 102,15,56,220,217 | |
286 .byte 102,15,56,220,225 | |
287 .byte 102,15,56,220,233 | |
288 .byte 102,15,56,220,241 | |
289 .byte 102,15,56,220,249 | |
290 .byte 102,15,56,221,208 | |
291 .byte 102,15,56,221,216 | |
292 .byte 102,15,56,221,224 | |
293 .byte 102,15,56,221,232 | |
294 .byte 102,15,56,221,240 | |
295 .byte 102,15,56,221,248 | |
296 ret | |
297 .private_extern __aesni_decrypt6 | |
298 .align 4 | |
299 __aesni_decrypt6: | |
300 movups (%edx),%xmm0 | |
301 shll $4,%ecx | |
302 movups 16(%edx),%xmm1 | |
303 xorps %xmm0,%xmm2 | |
304 pxor %xmm0,%xmm3 | |
305 pxor %xmm0,%xmm4 | |
306 .byte 102,15,56,222,209 | |
307 pxor %xmm0,%xmm5 | |
308 pxor %xmm0,%xmm6 | |
309 .byte 102,15,56,222,217 | |
310 leal 32(%edx,%ecx,1),%edx | |
311 negl %ecx | |
312 .byte 102,15,56,222,225 | |
313 pxor %xmm0,%xmm7 | |
314 movups (%edx,%ecx,1),%xmm0 | |
315 addl $16,%ecx | |
316 jmp L010_aesni_decrypt6_inner | |
317 .align 4,0x90 | |
318 L011dec6_loop: | |
319 .byte 102,15,56,222,209 | |
320 .byte 102,15,56,222,217 | |
321 .byte 102,15,56,222,225 | |
322 L010_aesni_decrypt6_inner: | |
323 .byte 102,15,56,222,233 | |
324 .byte 102,15,56,222,241 | |
325 .byte 102,15,56,222,249 | |
326 L_aesni_decrypt6_enter: | |
327 movups (%edx,%ecx,1),%xmm1 | |
328 addl $32,%ecx | |
329 .byte 102,15,56,222,208 | |
330 .byte 102,15,56,222,216 | |
331 .byte 102,15,56,222,224 | |
332 .byte 102,15,56,222,232 | |
333 .byte 102,15,56,222,240 | |
334 .byte 102,15,56,222,248 | |
335 movups -16(%edx,%ecx,1),%xmm0 | |
336 jnz L011dec6_loop | |
337 .byte 102,15,56,222,209 | |
338 .byte 102,15,56,222,217 | |
339 .byte 102,15,56,222,225 | |
340 .byte 102,15,56,222,233 | |
341 .byte 102,15,56,222,241 | |
342 .byte 102,15,56,222,249 | |
343 .byte 102,15,56,223,208 | |
344 .byte 102,15,56,223,216 | |
345 .byte 102,15,56,223,224 | |
346 .byte 102,15,56,223,232 | |
347 .byte 102,15,56,223,240 | |
348 .byte 102,15,56,223,248 | |
349 ret | |
350 .globl _aesni_ecb_encrypt | |
351 .private_extern _aesni_ecb_encrypt | |
352 .align 4 | |
353 _aesni_ecb_encrypt: | |
354 L_aesni_ecb_encrypt_begin: | |
355 pushl %ebp | |
356 pushl %ebx | |
357 pushl %esi | |
358 pushl %edi | |
359 movl 20(%esp),%esi | |
360 movl 24(%esp),%edi | |
361 movl 28(%esp),%eax | |
362 movl 32(%esp),%edx | |
363 movl 36(%esp),%ebx | |
364 andl $-16,%eax | |
365 jz L012ecb_ret | |
366 movl 240(%edx),%ecx | |
367 testl %ebx,%ebx | |
368 jz L013ecb_decrypt | |
369 movl %edx,%ebp | |
370 movl %ecx,%ebx | |
371 cmpl $96,%eax | |
372 jb L014ecb_enc_tail | |
373 movdqu (%esi),%xmm2 | |
374 movdqu 16(%esi),%xmm3 | |
375 movdqu 32(%esi),%xmm4 | |
376 movdqu 48(%esi),%xmm5 | |
377 movdqu 64(%esi),%xmm6 | |
378 movdqu 80(%esi),%xmm7 | |
379 leal 96(%esi),%esi | |
380 subl $96,%eax | |
381 jmp L015ecb_enc_loop6_enter | |
382 .align 4,0x90 | |
383 L016ecb_enc_loop6: | |
384 movups %xmm2,(%edi) | |
385 movdqu (%esi),%xmm2 | |
386 movups %xmm3,16(%edi) | |
387 movdqu 16(%esi),%xmm3 | |
388 movups %xmm4,32(%edi) | |
389 movdqu 32(%esi),%xmm4 | |
390 movups %xmm5,48(%edi) | |
391 movdqu 48(%esi),%xmm5 | |
392 movups %xmm6,64(%edi) | |
393 movdqu 64(%esi),%xmm6 | |
394 movups %xmm7,80(%edi) | |
395 leal 96(%edi),%edi | |
396 movdqu 80(%esi),%xmm7 | |
397 leal 96(%esi),%esi | |
398 L015ecb_enc_loop6_enter: | |
399 call __aesni_encrypt6 | |
400 movl %ebp,%edx | |
401 movl %ebx,%ecx | |
402 subl $96,%eax | |
403 jnc L016ecb_enc_loop6 | |
404 movups %xmm2,(%edi) | |
405 movups %xmm3,16(%edi) | |
406 movups %xmm4,32(%edi) | |
407 movups %xmm5,48(%edi) | |
408 movups %xmm6,64(%edi) | |
409 movups %xmm7,80(%edi) | |
410 leal 96(%edi),%edi | |
411 addl $96,%eax | |
412 jz L012ecb_ret | |
413 L014ecb_enc_tail: | |
414 movups (%esi),%xmm2 | |
415 cmpl $32,%eax | |
416 jb L017ecb_enc_one | |
417 movups 16(%esi),%xmm3 | |
418 je L018ecb_enc_two | |
419 movups 32(%esi),%xmm4 | |
420 cmpl $64,%eax | |
421 jb L019ecb_enc_three | |
422 movups 48(%esi),%xmm5 | |
423 je L020ecb_enc_four | |
424 movups 64(%esi),%xmm6 | |
425 xorps %xmm7,%xmm7 | |
426 call __aesni_encrypt6 | |
427 movups %xmm2,(%edi) | |
428 movups %xmm3,16(%edi) | |
429 movups %xmm4,32(%edi) | |
430 movups %xmm5,48(%edi) | |
431 movups %xmm6,64(%edi) | |
432 jmp L012ecb_ret | |
433 .align 4,0x90 | |
434 L017ecb_enc_one: | |
435 movups (%edx),%xmm0 | |
436 movups 16(%edx),%xmm1 | |
437 leal 32(%edx),%edx | |
438 xorps %xmm0,%xmm2 | |
439 L021enc1_loop_3: | |
440 .byte 102,15,56,220,209 | |
441 decl %ecx | |
442 movups (%edx),%xmm1 | |
443 leal 16(%edx),%edx | |
444 jnz L021enc1_loop_3 | |
445 .byte 102,15,56,221,209 | |
446 movups %xmm2,(%edi) | |
447 jmp L012ecb_ret | |
448 .align 4,0x90 | |
449 L018ecb_enc_two: | |
450 call __aesni_encrypt2 | |
451 movups %xmm2,(%edi) | |
452 movups %xmm3,16(%edi) | |
453 jmp L012ecb_ret | |
454 .align 4,0x90 | |
455 L019ecb_enc_three: | |
456 call __aesni_encrypt3 | |
457 movups %xmm2,(%edi) | |
458 movups %xmm3,16(%edi) | |
459 movups %xmm4,32(%edi) | |
460 jmp L012ecb_ret | |
461 .align 4,0x90 | |
462 L020ecb_enc_four: | |
463 call __aesni_encrypt4 | |
464 movups %xmm2,(%edi) | |
465 movups %xmm3,16(%edi) | |
466 movups %xmm4,32(%edi) | |
467 movups %xmm5,48(%edi) | |
468 jmp L012ecb_ret | |
469 .align 4,0x90 | |
470 L013ecb_decrypt: | |
471 movl %edx,%ebp | |
472 movl %ecx,%ebx | |
473 cmpl $96,%eax | |
474 jb L022ecb_dec_tail | |
475 movdqu (%esi),%xmm2 | |
476 movdqu 16(%esi),%xmm3 | |
477 movdqu 32(%esi),%xmm4 | |
478 movdqu 48(%esi),%xmm5 | |
479 movdqu 64(%esi),%xmm6 | |
480 movdqu 80(%esi),%xmm7 | |
481 leal 96(%esi),%esi | |
482 subl $96,%eax | |
483 jmp L023ecb_dec_loop6_enter | |
484 .align 4,0x90 | |
485 L024ecb_dec_loop6: | |
486 movups %xmm2,(%edi) | |
487 movdqu (%esi),%xmm2 | |
488 movups %xmm3,16(%edi) | |
489 movdqu 16(%esi),%xmm3 | |
490 movups %xmm4,32(%edi) | |
491 movdqu 32(%esi),%xmm4 | |
492 movups %xmm5,48(%edi) | |
493 movdqu 48(%esi),%xmm5 | |
494 movups %xmm6,64(%edi) | |
495 movdqu 64(%esi),%xmm6 | |
496 movups %xmm7,80(%edi) | |
497 leal 96(%edi),%edi | |
498 movdqu 80(%esi),%xmm7 | |
499 leal 96(%esi),%esi | |
500 L023ecb_dec_loop6_enter: | |
501 call __aesni_decrypt6 | |
502 movl %ebp,%edx | |
503 movl %ebx,%ecx | |
504 subl $96,%eax | |
505 jnc L024ecb_dec_loop6 | |
506 movups %xmm2,(%edi) | |
507 movups %xmm3,16(%edi) | |
508 movups %xmm4,32(%edi) | |
509 movups %xmm5,48(%edi) | |
510 movups %xmm6,64(%edi) | |
511 movups %xmm7,80(%edi) | |
512 leal 96(%edi),%edi | |
513 addl $96,%eax | |
514 jz L012ecb_ret | |
515 L022ecb_dec_tail: | |
516 movups (%esi),%xmm2 | |
517 cmpl $32,%eax | |
518 jb L025ecb_dec_one | |
519 movups 16(%esi),%xmm3 | |
520 je L026ecb_dec_two | |
521 movups 32(%esi),%xmm4 | |
522 cmpl $64,%eax | |
523 jb L027ecb_dec_three | |
524 movups 48(%esi),%xmm5 | |
525 je L028ecb_dec_four | |
526 movups 64(%esi),%xmm6 | |
527 xorps %xmm7,%xmm7 | |
528 call __aesni_decrypt6 | |
529 movups %xmm2,(%edi) | |
530 movups %xmm3,16(%edi) | |
531 movups %xmm4,32(%edi) | |
532 movups %xmm5,48(%edi) | |
533 movups %xmm6,64(%edi) | |
534 jmp L012ecb_ret | |
535 .align 4,0x90 | |
536 L025ecb_dec_one: | |
537 movups (%edx),%xmm0 | |
538 movups 16(%edx),%xmm1 | |
539 leal 32(%edx),%edx | |
540 xorps %xmm0,%xmm2 | |
541 L029dec1_loop_4: | |
542 .byte 102,15,56,222,209 | |
543 decl %ecx | |
544 movups (%edx),%xmm1 | |
545 leal 16(%edx),%edx | |
546 jnz L029dec1_loop_4 | |
547 .byte 102,15,56,223,209 | |
548 movups %xmm2,(%edi) | |
549 jmp L012ecb_ret | |
550 .align 4,0x90 | |
551 L026ecb_dec_two: | |
552 call __aesni_decrypt2 | |
553 movups %xmm2,(%edi) | |
554 movups %xmm3,16(%edi) | |
555 jmp L012ecb_ret | |
556 .align 4,0x90 | |
557 L027ecb_dec_three: | |
558 call __aesni_decrypt3 | |
559 movups %xmm2,(%edi) | |
560 movups %xmm3,16(%edi) | |
561 movups %xmm4,32(%edi) | |
562 jmp L012ecb_ret | |
563 .align 4,0x90 | |
564 L028ecb_dec_four: | |
565 call __aesni_decrypt4 | |
566 movups %xmm2,(%edi) | |
567 movups %xmm3,16(%edi) | |
568 movups %xmm4,32(%edi) | |
569 movups %xmm5,48(%edi) | |
570 L012ecb_ret: | |
571 pxor %xmm0,%xmm0 | |
572 pxor %xmm1,%xmm1 | |
573 pxor %xmm2,%xmm2 | |
574 pxor %xmm3,%xmm3 | |
575 pxor %xmm4,%xmm4 | |
576 pxor %xmm5,%xmm5 | |
577 pxor %xmm6,%xmm6 | |
578 pxor %xmm7,%xmm7 | |
579 popl %edi | |
580 popl %esi | |
581 popl %ebx | |
582 popl %ebp | |
583 ret | |
584 .globl _aesni_ccm64_encrypt_blocks | |
585 .private_extern _aesni_ccm64_encrypt_blocks | |
586 .align 4 | |
587 _aesni_ccm64_encrypt_blocks: | |
588 L_aesni_ccm64_encrypt_blocks_begin: | |
589 pushl %ebp | |
590 pushl %ebx | |
591 pushl %esi | |
592 pushl %edi | |
593 movl 20(%esp),%esi | |
594 movl 24(%esp),%edi | |
595 movl 28(%esp),%eax | |
596 movl 32(%esp),%edx | |
597 movl 36(%esp),%ebx | |
598 movl 40(%esp),%ecx | |
599 movl %esp,%ebp | |
600 subl $60,%esp | |
601 andl $-16,%esp | |
602 movl %ebp,48(%esp) | |
603 movdqu (%ebx),%xmm7 | |
604 movdqu (%ecx),%xmm3 | |
605 movl 240(%edx),%ecx | |
606 movl $202182159,(%esp) | |
607 movl $134810123,4(%esp) | |
608 movl $67438087,8(%esp) | |
609 movl $66051,12(%esp) | |
610 movl $1,%ebx | |
611 xorl %ebp,%ebp | |
612 movl %ebx,16(%esp) | |
613 movl %ebp,20(%esp) | |
614 movl %ebp,24(%esp) | |
615 movl %ebp,28(%esp) | |
616 shll $4,%ecx | |
617 movl $16,%ebx | |
618 leal (%edx),%ebp | |
619 movdqa (%esp),%xmm5 | |
620 movdqa %xmm7,%xmm2 | |
621 leal 32(%edx,%ecx,1),%edx | |
622 subl %ecx,%ebx | |
623 .byte 102,15,56,0,253 | |
624 L030ccm64_enc_outer: | |
625 movups (%ebp),%xmm0 | |
626 movl %ebx,%ecx | |
627 movups (%esi),%xmm6 | |
628 xorps %xmm0,%xmm2 | |
629 movups 16(%ebp),%xmm1 | |
630 xorps %xmm6,%xmm0 | |
631 xorps %xmm0,%xmm3 | |
632 movups 32(%ebp),%xmm0 | |
633 L031ccm64_enc2_loop: | |
634 .byte 102,15,56,220,209 | |
635 .byte 102,15,56,220,217 | |
636 movups (%edx,%ecx,1),%xmm1 | |
637 addl $32,%ecx | |
638 .byte 102,15,56,220,208 | |
639 .byte 102,15,56,220,216 | |
640 movups -16(%edx,%ecx,1),%xmm0 | |
641 jnz L031ccm64_enc2_loop | |
642 .byte 102,15,56,220,209 | |
643 .byte 102,15,56,220,217 | |
644 paddq 16(%esp),%xmm7 | |
645 decl %eax | |
646 .byte 102,15,56,221,208 | |
647 .byte 102,15,56,221,216 | |
648 leal 16(%esi),%esi | |
649 xorps %xmm2,%xmm6 | |
650 movdqa %xmm7,%xmm2 | |
651 movups %xmm6,(%edi) | |
652 .byte 102,15,56,0,213 | |
653 leal 16(%edi),%edi | |
654 jnz L030ccm64_enc_outer | |
655 movl 48(%esp),%esp | |
656 movl 40(%esp),%edi | |
657 movups %xmm3,(%edi) | |
658 pxor %xmm0,%xmm0 | |
659 pxor %xmm1,%xmm1 | |
660 pxor %xmm2,%xmm2 | |
661 pxor %xmm3,%xmm3 | |
662 pxor %xmm4,%xmm4 | |
663 pxor %xmm5,%xmm5 | |
664 pxor %xmm6,%xmm6 | |
665 pxor %xmm7,%xmm7 | |
666 popl %edi | |
667 popl %esi | |
668 popl %ebx | |
669 popl %ebp | |
670 ret | |
671 .globl _aesni_ccm64_decrypt_blocks | |
672 .private_extern _aesni_ccm64_decrypt_blocks | |
673 .align 4 | |
674 _aesni_ccm64_decrypt_blocks: | |
675 L_aesni_ccm64_decrypt_blocks_begin: | |
676 pushl %ebp | |
677 pushl %ebx | |
678 pushl %esi | |
679 pushl %edi | |
680 movl 20(%esp),%esi | |
681 movl 24(%esp),%edi | |
682 movl 28(%esp),%eax | |
683 movl 32(%esp),%edx | |
684 movl 36(%esp),%ebx | |
685 movl 40(%esp),%ecx | |
686 movl %esp,%ebp | |
687 subl $60,%esp | |
688 andl $-16,%esp | |
689 movl %ebp,48(%esp) | |
690 movdqu (%ebx),%xmm7 | |
691 movdqu (%ecx),%xmm3 | |
692 movl 240(%edx),%ecx | |
693 movl $202182159,(%esp) | |
694 movl $134810123,4(%esp) | |
695 movl $67438087,8(%esp) | |
696 movl $66051,12(%esp) | |
697 movl $1,%ebx | |
698 xorl %ebp,%ebp | |
699 movl %ebx,16(%esp) | |
700 movl %ebp,20(%esp) | |
701 movl %ebp,24(%esp) | |
702 movl %ebp,28(%esp) | |
703 movdqa (%esp),%xmm5 | |
704 movdqa %xmm7,%xmm2 | |
705 movl %edx,%ebp | |
706 movl %ecx,%ebx | |
707 .byte 102,15,56,0,253 | |
708 movups (%edx),%xmm0 | |
709 movups 16(%edx),%xmm1 | |
710 leal 32(%edx),%edx | |
711 xorps %xmm0,%xmm2 | |
712 L032enc1_loop_5: | |
713 .byte 102,15,56,220,209 | |
714 decl %ecx | |
715 movups (%edx),%xmm1 | |
716 leal 16(%edx),%edx | |
717 jnz L032enc1_loop_5 | |
718 .byte 102,15,56,221,209 | |
719 shll $4,%ebx | |
720 movl $16,%ecx | |
721 movups (%esi),%xmm6 | |
722 paddq 16(%esp),%xmm7 | |
723 leal 16(%esi),%esi | |
724 subl %ebx,%ecx | |
725 leal 32(%ebp,%ebx,1),%edx | |
726 movl %ecx,%ebx | |
727 jmp L033ccm64_dec_outer | |
728 .align 4,0x90 | |
729 L033ccm64_dec_outer: | |
730 xorps %xmm2,%xmm6 | |
731 movdqa %xmm7,%xmm2 | |
732 movups %xmm6,(%edi) | |
733 leal 16(%edi),%edi | |
734 .byte 102,15,56,0,213 | |
735 subl $1,%eax | |
736 jz L034ccm64_dec_break | |
737 movups (%ebp),%xmm0 | |
738 movl %ebx,%ecx | |
739 movups 16(%ebp),%xmm1 | |
740 xorps %xmm0,%xmm6 | |
741 xorps %xmm0,%xmm2 | |
742 xorps %xmm6,%xmm3 | |
743 movups 32(%ebp),%xmm0 | |
744 L035ccm64_dec2_loop: | |
745 .byte 102,15,56,220,209 | |
746 .byte 102,15,56,220,217 | |
747 movups (%edx,%ecx,1),%xmm1 | |
748 addl $32,%ecx | |
749 .byte 102,15,56,220,208 | |
750 .byte 102,15,56,220,216 | |
751 movups -16(%edx,%ecx,1),%xmm0 | |
752 jnz L035ccm64_dec2_loop | |
753 movups (%esi),%xmm6 | |
754 paddq 16(%esp),%xmm7 | |
755 .byte 102,15,56,220,209 | |
756 .byte 102,15,56,220,217 | |
757 .byte 102,15,56,221,208 | |
758 .byte 102,15,56,221,216 | |
759 leal 16(%esi),%esi | |
760 jmp L033ccm64_dec_outer | |
761 .align 4,0x90 | |
762 L034ccm64_dec_break: | |
763 movl 240(%ebp),%ecx | |
764 movl %ebp,%edx | |
765 movups (%edx),%xmm0 | |
766 movups 16(%edx),%xmm1 | |
767 xorps %xmm0,%xmm6 | |
768 leal 32(%edx),%edx | |
769 xorps %xmm6,%xmm3 | |
770 L036enc1_loop_6: | |
771 .byte 102,15,56,220,217 | |
772 decl %ecx | |
773 movups (%edx),%xmm1 | |
774 leal 16(%edx),%edx | |
775 jnz L036enc1_loop_6 | |
776 .byte 102,15,56,221,217 | |
777 movl 48(%esp),%esp | |
778 movl 40(%esp),%edi | |
779 movups %xmm3,(%edi) | |
780 pxor %xmm0,%xmm0 | |
781 pxor %xmm1,%xmm1 | |
782 pxor %xmm2,%xmm2 | |
783 pxor %xmm3,%xmm3 | |
784 pxor %xmm4,%xmm4 | |
785 pxor %xmm5,%xmm5 | |
786 pxor %xmm6,%xmm6 | |
787 pxor %xmm7,%xmm7 | |
788 popl %edi | |
789 popl %esi | |
790 popl %ebx | |
791 popl %ebp | |
792 ret | |
793 .globl _aesni_ctr32_encrypt_blocks | |
794 .private_extern _aesni_ctr32_encrypt_blocks | |
795 .align 4 | |
796 _aesni_ctr32_encrypt_blocks: | |
797 L_aesni_ctr32_encrypt_blocks_begin: | |
798 pushl %ebp | |
799 pushl %ebx | |
800 pushl %esi | |
801 pushl %edi | |
802 movl 20(%esp),%esi | |
803 movl 24(%esp),%edi | |
804 movl 28(%esp),%eax | |
805 movl 32(%esp),%edx | |
806 movl 36(%esp),%ebx | |
807 movl %esp,%ebp | |
808 subl $88,%esp | |
809 andl $-16,%esp | |
810 movl %ebp,80(%esp) | |
811 cmpl $1,%eax | |
812 je L037ctr32_one_shortcut | |
813 movdqu (%ebx),%xmm7 | |
814 movl $202182159,(%esp) | |
815 movl $134810123,4(%esp) | |
816 movl $67438087,8(%esp) | |
817 movl $66051,12(%esp) | |
818 movl $6,%ecx | |
819 xorl %ebp,%ebp | |
820 movl %ecx,16(%esp) | |
821 movl %ecx,20(%esp) | |
822 movl %ecx,24(%esp) | |
823 movl %ebp,28(%esp) | |
824 .byte 102,15,58,22,251,3 | |
825 .byte 102,15,58,34,253,3 | |
826 movl 240(%edx),%ecx | |
827 bswap %ebx | |
828 pxor %xmm0,%xmm0 | |
829 pxor %xmm1,%xmm1 | |
830 movdqa (%esp),%xmm2 | |
831 .byte 102,15,58,34,195,0 | |
832 leal 3(%ebx),%ebp | |
833 .byte 102,15,58,34,205,0 | |
834 incl %ebx | |
835 .byte 102,15,58,34,195,1 | |
836 incl %ebp | |
837 .byte 102,15,58,34,205,1 | |
838 incl %ebx | |
839 .byte 102,15,58,34,195,2 | |
840 incl %ebp | |
841 .byte 102,15,58,34,205,2 | |
842 movdqa %xmm0,48(%esp) | |
843 .byte 102,15,56,0,194 | |
844 movdqu (%edx),%xmm6 | |
845 movdqa %xmm1,64(%esp) | |
846 .byte 102,15,56,0,202 | |
847 pshufd $192,%xmm0,%xmm2 | |
848 pshufd $128,%xmm0,%xmm3 | |
849 cmpl $6,%eax | |
850 jb L038ctr32_tail | |
851 pxor %xmm6,%xmm7 | |
852 shll $4,%ecx | |
853 movl $16,%ebx | |
854 movdqa %xmm7,32(%esp) | |
855 movl %edx,%ebp | |
856 subl %ecx,%ebx | |
857 leal 32(%edx,%ecx,1),%edx | |
858 subl $6,%eax | |
859 jmp L039ctr32_loop6 | |
860 .align 4,0x90 | |
861 L039ctr32_loop6: | |
862 pshufd $64,%xmm0,%xmm4 | |
863 movdqa 32(%esp),%xmm0 | |
864 pshufd $192,%xmm1,%xmm5 | |
865 pxor %xmm0,%xmm2 | |
866 pshufd $128,%xmm1,%xmm6 | |
867 pxor %xmm0,%xmm3 | |
868 pshufd $64,%xmm1,%xmm7 | |
869 movups 16(%ebp),%xmm1 | |
870 pxor %xmm0,%xmm4 | |
871 pxor %xmm0,%xmm5 | |
872 .byte 102,15,56,220,209 | |
873 pxor %xmm0,%xmm6 | |
874 pxor %xmm0,%xmm7 | |
875 .byte 102,15,56,220,217 | |
876 movups 32(%ebp),%xmm0 | |
877 movl %ebx,%ecx | |
878 .byte 102,15,56,220,225 | |
879 .byte 102,15,56,220,233 | |
880 .byte 102,15,56,220,241 | |
881 .byte 102,15,56,220,249 | |
882 call L_aesni_encrypt6_enter | |
883 movups (%esi),%xmm1 | |
884 movups 16(%esi),%xmm0 | |
885 xorps %xmm1,%xmm2 | |
886 movups 32(%esi),%xmm1 | |
887 xorps %xmm0,%xmm3 | |
888 movups %xmm2,(%edi) | |
889 movdqa 16(%esp),%xmm0 | |
890 xorps %xmm1,%xmm4 | |
891 movdqa 64(%esp),%xmm1 | |
892 movups %xmm3,16(%edi) | |
893 movups %xmm4,32(%edi) | |
894 paddd %xmm0,%xmm1 | |
895 paddd 48(%esp),%xmm0 | |
896 movdqa (%esp),%xmm2 | |
897 movups 48(%esi),%xmm3 | |
898 movups 64(%esi),%xmm4 | |
899 xorps %xmm3,%xmm5 | |
900 movups 80(%esi),%xmm3 | |
901 leal 96(%esi),%esi | |
902 movdqa %xmm0,48(%esp) | |
903 .byte 102,15,56,0,194 | |
904 xorps %xmm4,%xmm6 | |
905 movups %xmm5,48(%edi) | |
906 xorps %xmm3,%xmm7 | |
907 movdqa %xmm1,64(%esp) | |
908 .byte 102,15,56,0,202 | |
909 movups %xmm6,64(%edi) | |
910 pshufd $192,%xmm0,%xmm2 | |
911 movups %xmm7,80(%edi) | |
912 leal 96(%edi),%edi | |
913 pshufd $128,%xmm0,%xmm3 | |
914 subl $6,%eax | |
915 jnc L039ctr32_loop6 | |
916 addl $6,%eax | |
917 jz L040ctr32_ret | |
918 movdqu (%ebp),%xmm7 | |
919 movl %ebp,%edx | |
920 pxor 32(%esp),%xmm7 | |
921 movl 240(%ebp),%ecx | |
922 L038ctr32_tail: | |
923 por %xmm7,%xmm2 | |
924 cmpl $2,%eax | |
925 jb L041ctr32_one | |
926 pshufd $64,%xmm0,%xmm4 | |
927 por %xmm7,%xmm3 | |
928 je L042ctr32_two | |
929 pshufd $192,%xmm1,%xmm5 | |
930 por %xmm7,%xmm4 | |
931 cmpl $4,%eax | |
932 jb L043ctr32_three | |
933 pshufd $128,%xmm1,%xmm6 | |
934 por %xmm7,%xmm5 | |
935 je L044ctr32_four | |
936 por %xmm7,%xmm6 | |
937 call __aesni_encrypt6 | |
938 movups (%esi),%xmm1 | |
939 movups 16(%esi),%xmm0 | |
940 xorps %xmm1,%xmm2 | |
941 movups 32(%esi),%xmm1 | |
942 xorps %xmm0,%xmm3 | |
943 movups 48(%esi),%xmm0 | |
944 xorps %xmm1,%xmm4 | |
945 movups 64(%esi),%xmm1 | |
946 xorps %xmm0,%xmm5 | |
947 movups %xmm2,(%edi) | |
948 xorps %xmm1,%xmm6 | |
949 movups %xmm3,16(%edi) | |
950 movups %xmm4,32(%edi) | |
951 movups %xmm5,48(%edi) | |
952 movups %xmm6,64(%edi) | |
953 jmp L040ctr32_ret | |
954 .align 4,0x90 | |
955 L037ctr32_one_shortcut: | |
956 movups (%ebx),%xmm2 | |
957 movl 240(%edx),%ecx | |
958 L041ctr32_one: | |
959 movups (%edx),%xmm0 | |
960 movups 16(%edx),%xmm1 | |
961 leal 32(%edx),%edx | |
962 xorps %xmm0,%xmm2 | |
963 L045enc1_loop_7: | |
964 .byte 102,15,56,220,209 | |
965 decl %ecx | |
966 movups (%edx),%xmm1 | |
967 leal 16(%edx),%edx | |
968 jnz L045enc1_loop_7 | |
969 .byte 102,15,56,221,209 | |
970 movups (%esi),%xmm6 | |
971 xorps %xmm2,%xmm6 | |
972 movups %xmm6,(%edi) | |
973 jmp L040ctr32_ret | |
974 .align 4,0x90 | |
975 L042ctr32_two: | |
976 call __aesni_encrypt2 | |
977 movups (%esi),%xmm5 | |
978 movups 16(%esi),%xmm6 | |
979 xorps %xmm5,%xmm2 | |
980 xorps %xmm6,%xmm3 | |
981 movups %xmm2,(%edi) | |
982 movups %xmm3,16(%edi) | |
983 jmp L040ctr32_ret | |
984 .align 4,0x90 | |
985 L043ctr32_three: | |
986 call __aesni_encrypt3 | |
987 movups (%esi),%xmm5 | |
988 movups 16(%esi),%xmm6 | |
989 xorps %xmm5,%xmm2 | |
990 movups 32(%esi),%xmm7 | |
991 xorps %xmm6,%xmm3 | |
992 movups %xmm2,(%edi) | |
993 xorps %xmm7,%xmm4 | |
994 movups %xmm3,16(%edi) | |
995 movups %xmm4,32(%edi) | |
996 jmp L040ctr32_ret | |
997 .align 4,0x90 | |
998 L044ctr32_four: | |
999 call __aesni_encrypt4 | |
1000 movups (%esi),%xmm6 | |
1001 movups 16(%esi),%xmm7 | |
1002 movups 32(%esi),%xmm1 | |
1003 xorps %xmm6,%xmm2 | |
1004 movups 48(%esi),%xmm0 | |
1005 xorps %xmm7,%xmm3 | |
1006 movups %xmm2,(%edi) | |
1007 xorps %xmm1,%xmm4 | |
1008 movups %xmm3,16(%edi) | |
1009 xorps %xmm0,%xmm5 | |
1010 movups %xmm4,32(%edi) | |
1011 movups %xmm5,48(%edi) | |
1012 L040ctr32_ret: | |
1013 pxor %xmm0,%xmm0 | |
1014 pxor %xmm1,%xmm1 | |
1015 pxor %xmm2,%xmm2 | |
1016 pxor %xmm3,%xmm3 | |
1017 pxor %xmm4,%xmm4 | |
1018 movdqa %xmm0,32(%esp) | |
1019 pxor %xmm5,%xmm5 | |
1020 movdqa %xmm0,48(%esp) | |
1021 pxor %xmm6,%xmm6 | |
1022 movdqa %xmm0,64(%esp) | |
1023 pxor %xmm7,%xmm7 | |
1024 movl 80(%esp),%esp | |
1025 popl %edi | |
1026 popl %esi | |
1027 popl %ebx | |
1028 popl %ebp | |
1029 ret | |
1030 .globl _aesni_xts_encrypt | |
1031 .private_extern _aesni_xts_encrypt | |
1032 .align 4 | |
1033 _aesni_xts_encrypt: | |
1034 L_aesni_xts_encrypt_begin: | |
1035 pushl %ebp | |
1036 pushl %ebx | |
1037 pushl %esi | |
1038 pushl %edi | |
1039 movl 36(%esp),%edx | |
1040 movl 40(%esp),%esi | |
1041 movl 240(%edx),%ecx | |
1042 movups (%esi),%xmm2 | |
1043 movups (%edx),%xmm0 | |
1044 movups 16(%edx),%xmm1 | |
1045 leal 32(%edx),%edx | |
1046 xorps %xmm0,%xmm2 | |
1047 L046enc1_loop_8: | |
1048 .byte 102,15,56,220,209 | |
1049 decl %ecx | |
1050 movups (%edx),%xmm1 | |
1051 leal 16(%edx),%edx | |
1052 jnz L046enc1_loop_8 | |
1053 .byte 102,15,56,221,209 | |
1054 movl 20(%esp),%esi | |
1055 movl 24(%esp),%edi | |
1056 movl 28(%esp),%eax | |
1057 movl 32(%esp),%edx | |
1058 movl %esp,%ebp | |
1059 subl $120,%esp | |
1060 movl 240(%edx),%ecx | |
1061 andl $-16,%esp | |
1062 movl $135,96(%esp) | |
1063 movl $0,100(%esp) | |
1064 movl $1,104(%esp) | |
1065 movl $0,108(%esp) | |
1066 movl %eax,112(%esp) | |
1067 movl %ebp,116(%esp) | |
1068 movdqa %xmm2,%xmm1 | |
1069 pxor %xmm0,%xmm0 | |
1070 movdqa 96(%esp),%xmm3 | |
1071 pcmpgtd %xmm1,%xmm0 | |
1072 andl $-16,%eax | |
1073 movl %edx,%ebp | |
1074 movl %ecx,%ebx | |
1075 subl $96,%eax | |
1076 jc L047xts_enc_short | |
1077 shll $4,%ecx | |
1078 movl $16,%ebx | |
1079 subl %ecx,%ebx | |
1080 leal 32(%edx,%ecx,1),%edx | |
1081 jmp L048xts_enc_loop6 | |
1082 .align 4,0x90 | |
1083 L048xts_enc_loop6: | |
1084 pshufd $19,%xmm0,%xmm2 | |
1085 pxor %xmm0,%xmm0 | |
1086 movdqa %xmm1,(%esp) | |
1087 paddq %xmm1,%xmm1 | |
1088 pand %xmm3,%xmm2 | |
1089 pcmpgtd %xmm1,%xmm0 | |
1090 pxor %xmm2,%xmm1 | |
1091 pshufd $19,%xmm0,%xmm2 | |
1092 pxor %xmm0,%xmm0 | |
1093 movdqa %xmm1,16(%esp) | |
1094 paddq %xmm1,%xmm1 | |
1095 pand %xmm3,%xmm2 | |
1096 pcmpgtd %xmm1,%xmm0 | |
1097 pxor %xmm2,%xmm1 | |
1098 pshufd $19,%xmm0,%xmm2 | |
1099 pxor %xmm0,%xmm0 | |
1100 movdqa %xmm1,32(%esp) | |
1101 paddq %xmm1,%xmm1 | |
1102 pand %xmm3,%xmm2 | |
1103 pcmpgtd %xmm1,%xmm0 | |
1104 pxor %xmm2,%xmm1 | |
1105 pshufd $19,%xmm0,%xmm2 | |
1106 pxor %xmm0,%xmm0 | |
1107 movdqa %xmm1,48(%esp) | |
1108 paddq %xmm1,%xmm1 | |
1109 pand %xmm3,%xmm2 | |
1110 pcmpgtd %xmm1,%xmm0 | |
1111 pxor %xmm2,%xmm1 | |
1112 pshufd $19,%xmm0,%xmm7 | |
1113 movdqa %xmm1,64(%esp) | |
1114 paddq %xmm1,%xmm1 | |
1115 movups (%ebp),%xmm0 | |
1116 pand %xmm3,%xmm7 | |
1117 movups (%esi),%xmm2 | |
1118 pxor %xmm1,%xmm7 | |
1119 movl %ebx,%ecx | |
1120 movdqu 16(%esi),%xmm3 | |
1121 xorps %xmm0,%xmm2 | |
1122 movdqu 32(%esi),%xmm4 | |
1123 pxor %xmm0,%xmm3 | |
1124 movdqu 48(%esi),%xmm5 | |
1125 pxor %xmm0,%xmm4 | |
1126 movdqu 64(%esi),%xmm6 | |
1127 pxor %xmm0,%xmm5 | |
1128 movdqu 80(%esi),%xmm1 | |
1129 pxor %xmm0,%xmm6 | |
1130 leal 96(%esi),%esi | |
1131 pxor (%esp),%xmm2 | |
1132 movdqa %xmm7,80(%esp) | |
1133 pxor %xmm1,%xmm7 | |
1134 movups 16(%ebp),%xmm1 | |
1135 pxor 16(%esp),%xmm3 | |
1136 pxor 32(%esp),%xmm4 | |
1137 .byte 102,15,56,220,209 | |
1138 pxor 48(%esp),%xmm5 | |
1139 pxor 64(%esp),%xmm6 | |
1140 .byte 102,15,56,220,217 | |
1141 pxor %xmm0,%xmm7 | |
1142 movups 32(%ebp),%xmm0 | |
1143 .byte 102,15,56,220,225 | |
1144 .byte 102,15,56,220,233 | |
1145 .byte 102,15,56,220,241 | |
1146 .byte 102,15,56,220,249 | |
1147 call L_aesni_encrypt6_enter | |
1148 movdqa 80(%esp),%xmm1 | |
1149 pxor %xmm0,%xmm0 | |
1150 xorps (%esp),%xmm2 | |
1151 pcmpgtd %xmm1,%xmm0 | |
1152 xorps 16(%esp),%xmm3 | |
1153 movups %xmm2,(%edi) | |
1154 xorps 32(%esp),%xmm4 | |
1155 movups %xmm3,16(%edi) | |
1156 xorps 48(%esp),%xmm5 | |
1157 movups %xmm4,32(%edi) | |
1158 xorps 64(%esp),%xmm6 | |
1159 movups %xmm5,48(%edi) | |
1160 xorps %xmm1,%xmm7 | |
1161 movups %xmm6,64(%edi) | |
1162 pshufd $19,%xmm0,%xmm2 | |
1163 movups %xmm7,80(%edi) | |
1164 leal 96(%edi),%edi | |
1165 movdqa 96(%esp),%xmm3 | |
1166 pxor %xmm0,%xmm0 | |
1167 paddq %xmm1,%xmm1 | |
1168 pand %xmm3,%xmm2 | |
1169 pcmpgtd %xmm1,%xmm0 | |
1170 pxor %xmm2,%xmm1 | |
1171 subl $96,%eax | |
1172 jnc L048xts_enc_loop6 | |
1173 movl 240(%ebp),%ecx | |
1174 movl %ebp,%edx | |
1175 movl %ecx,%ebx | |
1176 L047xts_enc_short: | |
1177 addl $96,%eax | |
1178 jz L049xts_enc_done6x | |
1179 movdqa %xmm1,%xmm5 | |
1180 cmpl $32,%eax | |
1181 jb L050xts_enc_one | |
1182 pshufd $19,%xmm0,%xmm2 | |
1183 pxor %xmm0,%xmm0 | |
1184 paddq %xmm1,%xmm1 | |
1185 pand %xmm3,%xmm2 | |
1186 pcmpgtd %xmm1,%xmm0 | |
1187 pxor %xmm2,%xmm1 | |
1188 je L051xts_enc_two | |
1189 pshufd $19,%xmm0,%xmm2 | |
1190 pxor %xmm0,%xmm0 | |
1191 movdqa %xmm1,%xmm6 | |
1192 paddq %xmm1,%xmm1 | |
1193 pand %xmm3,%xmm2 | |
1194 pcmpgtd %xmm1,%xmm0 | |
1195 pxor %xmm2,%xmm1 | |
1196 cmpl $64,%eax | |
1197 jb L052xts_enc_three | |
1198 pshufd $19,%xmm0,%xmm2 | |
1199 pxor %xmm0,%xmm0 | |
1200 movdqa %xmm1,%xmm7 | |
1201 paddq %xmm1,%xmm1 | |
1202 pand %xmm3,%xmm2 | |
1203 pcmpgtd %xmm1,%xmm0 | |
1204 pxor %xmm2,%xmm1 | |
1205 movdqa %xmm5,(%esp) | |
1206 movdqa %xmm6,16(%esp) | |
1207 je L053xts_enc_four | |
1208 movdqa %xmm7,32(%esp) | |
1209 pshufd $19,%xmm0,%xmm7 | |
1210 movdqa %xmm1,48(%esp) | |
1211 paddq %xmm1,%xmm1 | |
1212 pand %xmm3,%xmm7 | |
1213 pxor %xmm1,%xmm7 | |
1214 movdqu (%esi),%xmm2 | |
1215 movdqu 16(%esi),%xmm3 | |
1216 movdqu 32(%esi),%xmm4 | |
1217 pxor (%esp),%xmm2 | |
1218 movdqu 48(%esi),%xmm5 | |
1219 pxor 16(%esp),%xmm3 | |
1220 movdqu 64(%esi),%xmm6 | |
1221 pxor 32(%esp),%xmm4 | |
1222 leal 80(%esi),%esi | |
1223 pxor 48(%esp),%xmm5 | |
1224 movdqa %xmm7,64(%esp) | |
1225 pxor %xmm7,%xmm6 | |
1226 call __aesni_encrypt6 | |
1227 movaps 64(%esp),%xmm1 | |
1228 xorps (%esp),%xmm2 | |
1229 xorps 16(%esp),%xmm3 | |
1230 xorps 32(%esp),%xmm4 | |
1231 movups %xmm2,(%edi) | |
1232 xorps 48(%esp),%xmm5 | |
1233 movups %xmm3,16(%edi) | |
1234 xorps %xmm1,%xmm6 | |
1235 movups %xmm4,32(%edi) | |
1236 movups %xmm5,48(%edi) | |
1237 movups %xmm6,64(%edi) | |
1238 leal 80(%edi),%edi | |
1239 jmp L054xts_enc_done | |
1240 .align 4,0x90 | |
1241 L050xts_enc_one: | |
1242 movups (%esi),%xmm2 | |
1243 leal 16(%esi),%esi | |
1244 xorps %xmm5,%xmm2 | |
1245 movups (%edx),%xmm0 | |
1246 movups 16(%edx),%xmm1 | |
1247 leal 32(%edx),%edx | |
1248 xorps %xmm0,%xmm2 | |
1249 L055enc1_loop_9: | |
1250 .byte 102,15,56,220,209 | |
1251 decl %ecx | |
1252 movups (%edx),%xmm1 | |
1253 leal 16(%edx),%edx | |
1254 jnz L055enc1_loop_9 | |
1255 .byte 102,15,56,221,209 | |
1256 xorps %xmm5,%xmm2 | |
1257 movups %xmm2,(%edi) | |
1258 leal 16(%edi),%edi | |
1259 movdqa %xmm5,%xmm1 | |
1260 jmp L054xts_enc_done | |
1261 .align 4,0x90 | |
1262 L051xts_enc_two: | |
1263 movaps %xmm1,%xmm6 | |
1264 movups (%esi),%xmm2 | |
1265 movups 16(%esi),%xmm3 | |
1266 leal 32(%esi),%esi | |
1267 xorps %xmm5,%xmm2 | |
1268 xorps %xmm6,%xmm3 | |
1269 call __aesni_encrypt2 | |
1270 xorps %xmm5,%xmm2 | |
1271 xorps %xmm6,%xmm3 | |
1272 movups %xmm2,(%edi) | |
1273 movups %xmm3,16(%edi) | |
1274 leal 32(%edi),%edi | |
1275 movdqa %xmm6,%xmm1 | |
1276 jmp L054xts_enc_done | |
1277 .align 4,0x90 | |
1278 L052xts_enc_three: | |
1279 movaps %xmm1,%xmm7 | |
1280 movups (%esi),%xmm2 | |
1281 movups 16(%esi),%xmm3 | |
1282 movups 32(%esi),%xmm4 | |
1283 leal 48(%esi),%esi | |
1284 xorps %xmm5,%xmm2 | |
1285 xorps %xmm6,%xmm3 | |
1286 xorps %xmm7,%xmm4 | |
1287 call __aesni_encrypt3 | |
1288 xorps %xmm5,%xmm2 | |
1289 xorps %xmm6,%xmm3 | |
1290 xorps %xmm7,%xmm4 | |
1291 movups %xmm2,(%edi) | |
1292 movups %xmm3,16(%edi) | |
1293 movups %xmm4,32(%edi) | |
1294 leal 48(%edi),%edi | |
1295 movdqa %xmm7,%xmm1 | |
1296 jmp L054xts_enc_done | |
1297 .align 4,0x90 | |
1298 L053xts_enc_four: | |
1299 movaps %xmm1,%xmm6 | |
1300 movups (%esi),%xmm2 | |
1301 movups 16(%esi),%xmm3 | |
1302 movups 32(%esi),%xmm4 | |
1303 xorps (%esp),%xmm2 | |
1304 movups 48(%esi),%xmm5 | |
1305 leal 64(%esi),%esi | |
1306 xorps 16(%esp),%xmm3 | |
1307 xorps %xmm7,%xmm4 | |
1308 xorps %xmm6,%xmm5 | |
1309 call __aesni_encrypt4 | |
1310 xorps (%esp),%xmm2 | |
1311 xorps 16(%esp),%xmm3 | |
1312 xorps %xmm7,%xmm4 | |
1313 movups %xmm2,(%edi) | |
1314 xorps %xmm6,%xmm5 | |
1315 movups %xmm3,16(%edi) | |
1316 movups %xmm4,32(%edi) | |
1317 movups %xmm5,48(%edi) | |
1318 leal 64(%edi),%edi | |
1319 movdqa %xmm6,%xmm1 | |
1320 jmp L054xts_enc_done | |
1321 .align 4,0x90 | |
1322 L049xts_enc_done6x: | |
1323 movl 112(%esp),%eax | |
1324 andl $15,%eax | |
1325 jz L056xts_enc_ret | |
1326 movdqa %xmm1,%xmm5 | |
1327 movl %eax,112(%esp) | |
1328 jmp L057xts_enc_steal | |
1329 .align 4,0x90 | |
1330 L054xts_enc_done: | |
1331 movl 112(%esp),%eax | |
1332 pxor %xmm0,%xmm0 | |
1333 andl $15,%eax | |
1334 jz L056xts_enc_ret | |
1335 pcmpgtd %xmm1,%xmm0 | |
1336 movl %eax,112(%esp) | |
1337 pshufd $19,%xmm0,%xmm5 | |
1338 paddq %xmm1,%xmm1 | |
1339 pand 96(%esp),%xmm5 | |
1340 pxor %xmm1,%xmm5 | |
1341 L057xts_enc_steal: | |
1342 movzbl (%esi),%ecx | |
1343 movzbl -16(%edi),%edx | |
1344 leal 1(%esi),%esi | |
1345 movb %cl,-16(%edi) | |
1346 movb %dl,(%edi) | |
1347 leal 1(%edi),%edi | |
1348 subl $1,%eax | |
1349 jnz L057xts_enc_steal | |
1350 subl 112(%esp),%edi | |
1351 movl %ebp,%edx | |
1352 movl %ebx,%ecx | |
1353 movups -16(%edi),%xmm2 | |
1354 xorps %xmm5,%xmm2 | |
1355 movups (%edx),%xmm0 | |
1356 movups 16(%edx),%xmm1 | |
1357 leal 32(%edx),%edx | |
1358 xorps %xmm0,%xmm2 | |
1359 L058enc1_loop_10: | |
1360 .byte 102,15,56,220,209 | |
1361 decl %ecx | |
1362 movups (%edx),%xmm1 | |
1363 leal 16(%edx),%edx | |
1364 jnz L058enc1_loop_10 | |
1365 .byte 102,15,56,221,209 | |
1366 xorps %xmm5,%xmm2 | |
1367 movups %xmm2,-16(%edi) | |
1368 L056xts_enc_ret: | |
1369 pxor %xmm0,%xmm0 | |
1370 pxor %xmm1,%xmm1 | |
1371 pxor %xmm2,%xmm2 | |
1372 movdqa %xmm0,(%esp) | |
1373 pxor %xmm3,%xmm3 | |
1374 movdqa %xmm0,16(%esp) | |
1375 pxor %xmm4,%xmm4 | |
1376 movdqa %xmm0,32(%esp) | |
1377 pxor %xmm5,%xmm5 | |
1378 movdqa %xmm0,48(%esp) | |
1379 pxor %xmm6,%xmm6 | |
1380 movdqa %xmm0,64(%esp) | |
1381 pxor %xmm7,%xmm7 | |
1382 movdqa %xmm0,80(%esp) | |
1383 movl 116(%esp),%esp | |
1384 popl %edi | |
1385 popl %esi | |
1386 popl %ebx | |
1387 popl %ebp | |
1388 ret | |
1389 .globl _aesni_xts_decrypt | |
1390 .private_extern _aesni_xts_decrypt | |
1391 .align 4 | |
1392 _aesni_xts_decrypt: | |
1393 L_aesni_xts_decrypt_begin: | |
1394 pushl %ebp | |
1395 pushl %ebx | |
1396 pushl %esi | |
1397 pushl %edi | |
1398 movl 36(%esp),%edx | |
1399 movl 40(%esp),%esi | |
1400 movl 240(%edx),%ecx | |
1401 movups (%esi),%xmm2 | |
1402 movups (%edx),%xmm0 | |
1403 movups 16(%edx),%xmm1 | |
1404 leal 32(%edx),%edx | |
1405 xorps %xmm0,%xmm2 | |
1406 L059enc1_loop_11: | |
1407 .byte 102,15,56,220,209 | |
1408 decl %ecx | |
1409 movups (%edx),%xmm1 | |
1410 leal 16(%edx),%edx | |
1411 jnz L059enc1_loop_11 | |
1412 .byte 102,15,56,221,209 | |
1413 movl 20(%esp),%esi | |
1414 movl 24(%esp),%edi | |
1415 movl 28(%esp),%eax | |
1416 movl 32(%esp),%edx | |
1417 movl %esp,%ebp | |
1418 subl $120,%esp | |
1419 andl $-16,%esp | |
1420 xorl %ebx,%ebx | |
1421 testl $15,%eax | |
1422 setnz %bl | |
1423 shll $4,%ebx | |
1424 subl %ebx,%eax | |
1425 movl $135,96(%esp) | |
1426 movl $0,100(%esp) | |
1427 movl $1,104(%esp) | |
1428 movl $0,108(%esp) | |
1429 movl %eax,112(%esp) | |
1430 movl %ebp,116(%esp) | |
1431 movl 240(%edx),%ecx | |
1432 movl %edx,%ebp | |
1433 movl %ecx,%ebx | |
1434 movdqa %xmm2,%xmm1 | |
1435 pxor %xmm0,%xmm0 | |
1436 movdqa 96(%esp),%xmm3 | |
1437 pcmpgtd %xmm1,%xmm0 | |
1438 andl $-16,%eax | |
1439 subl $96,%eax | |
1440 jc L060xts_dec_short | |
1441 shll $4,%ecx | |
1442 movl $16,%ebx | |
1443 subl %ecx,%ebx | |
1444 leal 32(%edx,%ecx,1),%edx | |
1445 jmp L061xts_dec_loop6 | |
1446 .align 4,0x90 | |
1447 L061xts_dec_loop6: | |
1448 pshufd $19,%xmm0,%xmm2 | |
1449 pxor %xmm0,%xmm0 | |
1450 movdqa %xmm1,(%esp) | |
1451 paddq %xmm1,%xmm1 | |
1452 pand %xmm3,%xmm2 | |
1453 pcmpgtd %xmm1,%xmm0 | |
1454 pxor %xmm2,%xmm1 | |
1455 pshufd $19,%xmm0,%xmm2 | |
1456 pxor %xmm0,%xmm0 | |
1457 movdqa %xmm1,16(%esp) | |
1458 paddq %xmm1,%xmm1 | |
1459 pand %xmm3,%xmm2 | |
1460 pcmpgtd %xmm1,%xmm0 | |
1461 pxor %xmm2,%xmm1 | |
1462 pshufd $19,%xmm0,%xmm2 | |
1463 pxor %xmm0,%xmm0 | |
1464 movdqa %xmm1,32(%esp) | |
1465 paddq %xmm1,%xmm1 | |
1466 pand %xmm3,%xmm2 | |
1467 pcmpgtd %xmm1,%xmm0 | |
1468 pxor %xmm2,%xmm1 | |
1469 pshufd $19,%xmm0,%xmm2 | |
1470 pxor %xmm0,%xmm0 | |
1471 movdqa %xmm1,48(%esp) | |
1472 paddq %xmm1,%xmm1 | |
1473 pand %xmm3,%xmm2 | |
1474 pcmpgtd %xmm1,%xmm0 | |
1475 pxor %xmm2,%xmm1 | |
1476 pshufd $19,%xmm0,%xmm7 | |
1477 movdqa %xmm1,64(%esp) | |
1478 paddq %xmm1,%xmm1 | |
1479 movups (%ebp),%xmm0 | |
1480 pand %xmm3,%xmm7 | |
1481 movups (%esi),%xmm2 | |
1482 pxor %xmm1,%xmm7 | |
1483 movl %ebx,%ecx | |
1484 movdqu 16(%esi),%xmm3 | |
1485 xorps %xmm0,%xmm2 | |
1486 movdqu 32(%esi),%xmm4 | |
1487 pxor %xmm0,%xmm3 | |
1488 movdqu 48(%esi),%xmm5 | |
1489 pxor %xmm0,%xmm4 | |
1490 movdqu 64(%esi),%xmm6 | |
1491 pxor %xmm0,%xmm5 | |
1492 movdqu 80(%esi),%xmm1 | |
1493 pxor %xmm0,%xmm6 | |
1494 leal 96(%esi),%esi | |
1495 pxor (%esp),%xmm2 | |
1496 movdqa %xmm7,80(%esp) | |
1497 pxor %xmm1,%xmm7 | |
1498 movups 16(%ebp),%xmm1 | |
1499 pxor 16(%esp),%xmm3 | |
1500 pxor 32(%esp),%xmm4 | |
1501 .byte 102,15,56,222,209 | |
1502 pxor 48(%esp),%xmm5 | |
1503 pxor 64(%esp),%xmm6 | |
1504 .byte 102,15,56,222,217 | |
1505 pxor %xmm0,%xmm7 | |
1506 movups 32(%ebp),%xmm0 | |
1507 .byte 102,15,56,222,225 | |
1508 .byte 102,15,56,222,233 | |
1509 .byte 102,15,56,222,241 | |
1510 .byte 102,15,56,222,249 | |
1511 call L_aesni_decrypt6_enter | |
1512 movdqa 80(%esp),%xmm1 | |
1513 pxor %xmm0,%xmm0 | |
1514 xorps (%esp),%xmm2 | |
1515 pcmpgtd %xmm1,%xmm0 | |
1516 xorps 16(%esp),%xmm3 | |
1517 movups %xmm2,(%edi) | |
1518 xorps 32(%esp),%xmm4 | |
1519 movups %xmm3,16(%edi) | |
1520 xorps 48(%esp),%xmm5 | |
1521 movups %xmm4,32(%edi) | |
1522 xorps 64(%esp),%xmm6 | |
1523 movups %xmm5,48(%edi) | |
1524 xorps %xmm1,%xmm7 | |
1525 movups %xmm6,64(%edi) | |
1526 pshufd $19,%xmm0,%xmm2 | |
1527 movups %xmm7,80(%edi) | |
1528 leal 96(%edi),%edi | |
1529 movdqa 96(%esp),%xmm3 | |
1530 pxor %xmm0,%xmm0 | |
1531 paddq %xmm1,%xmm1 | |
1532 pand %xmm3,%xmm2 | |
1533 pcmpgtd %xmm1,%xmm0 | |
1534 pxor %xmm2,%xmm1 | |
1535 subl $96,%eax | |
1536 jnc L061xts_dec_loop6 | |
1537 movl 240(%ebp),%ecx | |
1538 movl %ebp,%edx | |
1539 movl %ecx,%ebx | |
1540 L060xts_dec_short: | |
1541 addl $96,%eax | |
1542 jz L062xts_dec_done6x | |
1543 movdqa %xmm1,%xmm5 | |
1544 cmpl $32,%eax | |
1545 jb L063xts_dec_one | |
1546 pshufd $19,%xmm0,%xmm2 | |
1547 pxor %xmm0,%xmm0 | |
1548 paddq %xmm1,%xmm1 | |
1549 pand %xmm3,%xmm2 | |
1550 pcmpgtd %xmm1,%xmm0 | |
1551 pxor %xmm2,%xmm1 | |
1552 je L064xts_dec_two | |
1553 pshufd $19,%xmm0,%xmm2 | |
1554 pxor %xmm0,%xmm0 | |
1555 movdqa %xmm1,%xmm6 | |
1556 paddq %xmm1,%xmm1 | |
1557 pand %xmm3,%xmm2 | |
1558 pcmpgtd %xmm1,%xmm0 | |
1559 pxor %xmm2,%xmm1 | |
1560 cmpl $64,%eax | |
1561 jb L065xts_dec_three | |
1562 pshufd $19,%xmm0,%xmm2 | |
1563 pxor %xmm0,%xmm0 | |
1564 movdqa %xmm1,%xmm7 | |
1565 paddq %xmm1,%xmm1 | |
1566 pand %xmm3,%xmm2 | |
1567 pcmpgtd %xmm1,%xmm0 | |
1568 pxor %xmm2,%xmm1 | |
1569 movdqa %xmm5,(%esp) | |
1570 movdqa %xmm6,16(%esp) | |
1571 je L066xts_dec_four | |
1572 movdqa %xmm7,32(%esp) | |
1573 pshufd $19,%xmm0,%xmm7 | |
1574 movdqa %xmm1,48(%esp) | |
1575 paddq %xmm1,%xmm1 | |
1576 pand %xmm3,%xmm7 | |
1577 pxor %xmm1,%xmm7 | |
1578 movdqu (%esi),%xmm2 | |
1579 movdqu 16(%esi),%xmm3 | |
1580 movdqu 32(%esi),%xmm4 | |
1581 pxor (%esp),%xmm2 | |
1582 movdqu 48(%esi),%xmm5 | |
1583 pxor 16(%esp),%xmm3 | |
1584 movdqu 64(%esi),%xmm6 | |
1585 pxor 32(%esp),%xmm4 | |
1586 leal 80(%esi),%esi | |
1587 pxor 48(%esp),%xmm5 | |
1588 movdqa %xmm7,64(%esp) | |
1589 pxor %xmm7,%xmm6 | |
1590 call __aesni_decrypt6 | |
1591 movaps 64(%esp),%xmm1 | |
1592 xorps (%esp),%xmm2 | |
1593 xorps 16(%esp),%xmm3 | |
1594 xorps 32(%esp),%xmm4 | |
1595 movups %xmm2,(%edi) | |
1596 xorps 48(%esp),%xmm5 | |
1597 movups %xmm3,16(%edi) | |
1598 xorps %xmm1,%xmm6 | |
1599 movups %xmm4,32(%edi) | |
1600 movups %xmm5,48(%edi) | |
1601 movups %xmm6,64(%edi) | |
1602 leal 80(%edi),%edi | |
1603 jmp L067xts_dec_done | |
1604 .align 4,0x90 | |
1605 L063xts_dec_one: | |
1606 movups (%esi),%xmm2 | |
1607 leal 16(%esi),%esi | |
1608 xorps %xmm5,%xmm2 | |
1609 movups (%edx),%xmm0 | |
1610 movups 16(%edx),%xmm1 | |
1611 leal 32(%edx),%edx | |
1612 xorps %xmm0,%xmm2 | |
1613 L068dec1_loop_12: | |
1614 .byte 102,15,56,222,209 | |
1615 decl %ecx | |
1616 movups (%edx),%xmm1 | |
1617 leal 16(%edx),%edx | |
1618 jnz L068dec1_loop_12 | |
1619 .byte 102,15,56,223,209 | |
1620 xorps %xmm5,%xmm2 | |
1621 movups %xmm2,(%edi) | |
1622 leal 16(%edi),%edi | |
1623 movdqa %xmm5,%xmm1 | |
1624 jmp L067xts_dec_done | |
1625 .align 4,0x90 | |
1626 L064xts_dec_two: | |
1627 movaps %xmm1,%xmm6 | |
1628 movups (%esi),%xmm2 | |
1629 movups 16(%esi),%xmm3 | |
1630 leal 32(%esi),%esi | |
1631 xorps %xmm5,%xmm2 | |
1632 xorps %xmm6,%xmm3 | |
1633 call __aesni_decrypt2 | |
1634 xorps %xmm5,%xmm2 | |
1635 xorps %xmm6,%xmm3 | |
1636 movups %xmm2,(%edi) | |
1637 movups %xmm3,16(%edi) | |
1638 leal 32(%edi),%edi | |
1639 movdqa %xmm6,%xmm1 | |
1640 jmp L067xts_dec_done | |
1641 .align 4,0x90 | |
1642 L065xts_dec_three: | |
1643 movaps %xmm1,%xmm7 | |
1644 movups (%esi),%xmm2 | |
1645 movups 16(%esi),%xmm3 | |
1646 movups 32(%esi),%xmm4 | |
1647 leal 48(%esi),%esi | |
1648 xorps %xmm5,%xmm2 | |
1649 xorps %xmm6,%xmm3 | |
1650 xorps %xmm7,%xmm4 | |
1651 call __aesni_decrypt3 | |
1652 xorps %xmm5,%xmm2 | |
1653 xorps %xmm6,%xmm3 | |
1654 xorps %xmm7,%xmm4 | |
1655 movups %xmm2,(%edi) | |
1656 movups %xmm3,16(%edi) | |
1657 movups %xmm4,32(%edi) | |
1658 leal 48(%edi),%edi | |
1659 movdqa %xmm7,%xmm1 | |
1660 jmp L067xts_dec_done | |
1661 .align 4,0x90 | |
1662 L066xts_dec_four: | |
1663 movaps %xmm1,%xmm6 | |
1664 movups (%esi),%xmm2 | |
1665 movups 16(%esi),%xmm3 | |
1666 movups 32(%esi),%xmm4 | |
1667 xorps (%esp),%xmm2 | |
1668 movups 48(%esi),%xmm5 | |
1669 leal 64(%esi),%esi | |
1670 xorps 16(%esp),%xmm3 | |
1671 xorps %xmm7,%xmm4 | |
1672 xorps %xmm6,%xmm5 | |
1673 call __aesni_decrypt4 | |
1674 xorps (%esp),%xmm2 | |
1675 xorps 16(%esp),%xmm3 | |
1676 xorps %xmm7,%xmm4 | |
1677 movups %xmm2,(%edi) | |
1678 xorps %xmm6,%xmm5 | |
1679 movups %xmm3,16(%edi) | |
1680 movups %xmm4,32(%edi) | |
1681 movups %xmm5,48(%edi) | |
1682 leal 64(%edi),%edi | |
1683 movdqa %xmm6,%xmm1 | |
1684 jmp L067xts_dec_done | |
1685 .align 4,0x90 | |
1686 L062xts_dec_done6x: | |
1687 movl 112(%esp),%eax | |
1688 andl $15,%eax | |
1689 jz L069xts_dec_ret | |
1690 movl %eax,112(%esp) | |
1691 jmp L070xts_dec_only_one_more | |
1692 .align 4,0x90 | |
1693 L067xts_dec_done: | |
1694 movl 112(%esp),%eax | |
1695 pxor %xmm0,%xmm0 | |
1696 andl $15,%eax | |
1697 jz L069xts_dec_ret | |
1698 pcmpgtd %xmm1,%xmm0 | |
1699 movl %eax,112(%esp) | |
1700 pshufd $19,%xmm0,%xmm2 | |
1701 pxor %xmm0,%xmm0 | |
1702 movdqa 96(%esp),%xmm3 | |
1703 paddq %xmm1,%xmm1 | |
1704 pand %xmm3,%xmm2 | |
1705 pcmpgtd %xmm1,%xmm0 | |
1706 pxor %xmm2,%xmm1 | |
1707 L070xts_dec_only_one_more: | |
1708 pshufd $19,%xmm0,%xmm5 | |
1709 movdqa %xmm1,%xmm6 | |
1710 paddq %xmm1,%xmm1 | |
1711 pand %xmm3,%xmm5 | |
1712 pxor %xmm1,%xmm5 | |
1713 movl %ebp,%edx | |
1714 movl %ebx,%ecx | |
1715 movups (%esi),%xmm2 | |
1716 xorps %xmm5,%xmm2 | |
1717 movups (%edx),%xmm0 | |
1718 movups 16(%edx),%xmm1 | |
1719 leal 32(%edx),%edx | |
1720 xorps %xmm0,%xmm2 | |
1721 L071dec1_loop_13: | |
1722 .byte 102,15,56,222,209 | |
1723 decl %ecx | |
1724 movups (%edx),%xmm1 | |
1725 leal 16(%edx),%edx | |
1726 jnz L071dec1_loop_13 | |
1727 .byte 102,15,56,223,209 | |
1728 xorps %xmm5,%xmm2 | |
1729 movups %xmm2,(%edi) | |
1730 L072xts_dec_steal: | |
1731 movzbl 16(%esi),%ecx | |
1732 movzbl (%edi),%edx | |
1733 leal 1(%esi),%esi | |
1734 movb %cl,(%edi) | |
1735 movb %dl,16(%edi) | |
1736 leal 1(%edi),%edi | |
1737 subl $1,%eax | |
1738 jnz L072xts_dec_steal | |
1739 subl 112(%esp),%edi | |
1740 movl %ebp,%edx | |
1741 movl %ebx,%ecx | |
1742 movups (%edi),%xmm2 | |
1743 xorps %xmm6,%xmm2 | |
1744 movups (%edx),%xmm0 | |
1745 movups 16(%edx),%xmm1 | |
1746 leal 32(%edx),%edx | |
1747 xorps %xmm0,%xmm2 | |
1748 L073dec1_loop_14: | |
1749 .byte 102,15,56,222,209 | |
1750 decl %ecx | |
1751 movups (%edx),%xmm1 | |
1752 leal 16(%edx),%edx | |
1753 jnz L073dec1_loop_14 | |
1754 .byte 102,15,56,223,209 | |
1755 xorps %xmm6,%xmm2 | |
1756 movups %xmm2,(%edi) | |
1757 L069xts_dec_ret: | |
1758 pxor %xmm0,%xmm0 | |
1759 pxor %xmm1,%xmm1 | |
1760 pxor %xmm2,%xmm2 | |
1761 movdqa %xmm0,(%esp) | |
1762 pxor %xmm3,%xmm3 | |
1763 movdqa %xmm0,16(%esp) | |
1764 pxor %xmm4,%xmm4 | |
1765 movdqa %xmm0,32(%esp) | |
1766 pxor %xmm5,%xmm5 | |
1767 movdqa %xmm0,48(%esp) | |
1768 pxor %xmm6,%xmm6 | |
1769 movdqa %xmm0,64(%esp) | |
1770 pxor %xmm7,%xmm7 | |
1771 movdqa %xmm0,80(%esp) | |
1772 movl 116(%esp),%esp | |
1773 popl %edi | |
1774 popl %esi | |
1775 popl %ebx | |
1776 popl %ebp | |
1777 ret | |
1778 .globl _aesni_cbc_encrypt | |
1779 .private_extern _aesni_cbc_encrypt | |
1780 .align 4 | |
1781 _aesni_cbc_encrypt: | |
1782 L_aesni_cbc_encrypt_begin: | |
1783 pushl %ebp | |
1784 pushl %ebx | |
1785 pushl %esi | |
1786 pushl %edi | |
1787 movl 20(%esp),%esi | |
1788 movl %esp,%ebx | |
1789 movl 24(%esp),%edi | |
1790 subl $24,%ebx | |
1791 movl 28(%esp),%eax | |
1792 andl $-16,%ebx | |
1793 movl 32(%esp),%edx | |
1794 movl 36(%esp),%ebp | |
1795 testl %eax,%eax | |
1796 jz L074cbc_abort | |
1797 cmpl $0,40(%esp) | |
1798 xchgl %esp,%ebx | |
1799 movups (%ebp),%xmm7 | |
1800 movl 240(%edx),%ecx | |
1801 movl %edx,%ebp | |
1802 movl %ebx,16(%esp) | |
1803 movl %ecx,%ebx | |
1804 je L075cbc_decrypt | |
1805 movaps %xmm7,%xmm2 | |
1806 cmpl $16,%eax | |
1807 jb L076cbc_enc_tail | |
1808 subl $16,%eax | |
1809 jmp L077cbc_enc_loop | |
1810 .align 4,0x90 | |
1811 L077cbc_enc_loop: | |
1812 movups (%esi),%xmm7 | |
1813 leal 16(%esi),%esi | |
1814 movups (%edx),%xmm0 | |
1815 movups 16(%edx),%xmm1 | |
1816 xorps %xmm0,%xmm7 | |
1817 leal 32(%edx),%edx | |
1818 xorps %xmm7,%xmm2 | |
1819 L078enc1_loop_15: | |
1820 .byte 102,15,56,220,209 | |
1821 decl %ecx | |
1822 movups (%edx),%xmm1 | |
1823 leal 16(%edx),%edx | |
1824 jnz L078enc1_loop_15 | |
1825 .byte 102,15,56,221,209 | |
1826 movl %ebx,%ecx | |
1827 movl %ebp,%edx | |
1828 movups %xmm2,(%edi) | |
1829 leal 16(%edi),%edi | |
1830 subl $16,%eax | |
1831 jnc L077cbc_enc_loop | |
1832 addl $16,%eax | |
1833 jnz L076cbc_enc_tail | |
1834 movaps %xmm2,%xmm7 | |
1835 pxor %xmm2,%xmm2 | |
1836 jmp L079cbc_ret | |
1837 L076cbc_enc_tail: | |
1838 movl %eax,%ecx | |
1839 .long 2767451785 | |
1840 movl $16,%ecx | |
1841 subl %eax,%ecx | |
1842 xorl %eax,%eax | |
1843 .long 2868115081 | |
1844 leal -16(%edi),%edi | |
1845 movl %ebx,%ecx | |
1846 movl %edi,%esi | |
1847 movl %ebp,%edx | |
1848 jmp L077cbc_enc_loop | |
1849 .align 4,0x90 | |
1850 L075cbc_decrypt: | |
1851 cmpl $80,%eax | |
1852 jbe L080cbc_dec_tail | |
1853 movaps %xmm7,(%esp) | |
1854 subl $80,%eax | |
1855 jmp L081cbc_dec_loop6_enter | |
1856 .align 4,0x90 | |
1857 L082cbc_dec_loop6: | |
1858 movaps %xmm0,(%esp) | |
1859 movups %xmm7,(%edi) | |
1860 leal 16(%edi),%edi | |
1861 L081cbc_dec_loop6_enter: | |
1862 movdqu (%esi),%xmm2 | |
1863 movdqu 16(%esi),%xmm3 | |
1864 movdqu 32(%esi),%xmm4 | |
1865 movdqu 48(%esi),%xmm5 | |
1866 movdqu 64(%esi),%xmm6 | |
1867 movdqu 80(%esi),%xmm7 | |
1868 call __aesni_decrypt6 | |
1869 movups (%esi),%xmm1 | |
1870 movups 16(%esi),%xmm0 | |
1871 xorps (%esp),%xmm2 | |
1872 xorps %xmm1,%xmm3 | |
1873 movups 32(%esi),%xmm1 | |
1874 xorps %xmm0,%xmm4 | |
1875 movups 48(%esi),%xmm0 | |
1876 xorps %xmm1,%xmm5 | |
1877 movups 64(%esi),%xmm1 | |
1878 xorps %xmm0,%xmm6 | |
1879 movups 80(%esi),%xmm0 | |
1880 xorps %xmm1,%xmm7 | |
1881 movups %xmm2,(%edi) | |
1882 movups %xmm3,16(%edi) | |
1883 leal 96(%esi),%esi | |
1884 movups %xmm4,32(%edi) | |
1885 movl %ebx,%ecx | |
1886 movups %xmm5,48(%edi) | |
1887 movl %ebp,%edx | |
1888 movups %xmm6,64(%edi) | |
1889 leal 80(%edi),%edi | |
1890 subl $96,%eax | |
1891 ja L082cbc_dec_loop6 | |
1892 movaps %xmm7,%xmm2 | |
1893 movaps %xmm0,%xmm7 | |
1894 addl $80,%eax | |
1895 jle L083cbc_dec_clear_tail_collected | |
1896 movups %xmm2,(%edi) | |
1897 leal 16(%edi),%edi | |
1898 L080cbc_dec_tail: | |
1899 movups (%esi),%xmm2 | |
1900 movaps %xmm2,%xmm6 | |
1901 cmpl $16,%eax | |
1902 jbe L084cbc_dec_one | |
1903 movups 16(%esi),%xmm3 | |
1904 movaps %xmm3,%xmm5 | |
1905 cmpl $32,%eax | |
1906 jbe L085cbc_dec_two | |
1907 movups 32(%esi),%xmm4 | |
1908 cmpl $48,%eax | |
1909 jbe L086cbc_dec_three | |
1910 movups 48(%esi),%xmm5 | |
1911 cmpl $64,%eax | |
1912 jbe L087cbc_dec_four | |
1913 movups 64(%esi),%xmm6 | |
1914 movaps %xmm7,(%esp) | |
1915 movups (%esi),%xmm2 | |
1916 xorps %xmm7,%xmm7 | |
1917 call __aesni_decrypt6 | |
1918 movups (%esi),%xmm1 | |
1919 movups 16(%esi),%xmm0 | |
1920 xorps (%esp),%xmm2 | |
1921 xorps %xmm1,%xmm3 | |
1922 movups 32(%esi),%xmm1 | |
1923 xorps %xmm0,%xmm4 | |
1924 movups 48(%esi),%xmm0 | |
1925 xorps %xmm1,%xmm5 | |
1926 movups 64(%esi),%xmm7 | |
1927 xorps %xmm0,%xmm6 | |
1928 movups %xmm2,(%edi) | |
1929 movups %xmm3,16(%edi) | |
1930 pxor %xmm3,%xmm3 | |
1931 movups %xmm4,32(%edi) | |
1932 pxor %xmm4,%xmm4 | |
1933 movups %xmm5,48(%edi) | |
1934 pxor %xmm5,%xmm5 | |
1935 leal 64(%edi),%edi | |
1936 movaps %xmm6,%xmm2 | |
1937 pxor %xmm6,%xmm6 | |
1938 subl $80,%eax | |
1939 jmp L088cbc_dec_tail_collected | |
1940 .align 4,0x90 | |
1941 L084cbc_dec_one: | |
1942 movups (%edx),%xmm0 | |
1943 movups 16(%edx),%xmm1 | |
1944 leal 32(%edx),%edx | |
1945 xorps %xmm0,%xmm2 | |
1946 L089dec1_loop_16: | |
1947 .byte 102,15,56,222,209 | |
1948 decl %ecx | |
1949 movups (%edx),%xmm1 | |
1950 leal 16(%edx),%edx | |
1951 jnz L089dec1_loop_16 | |
1952 .byte 102,15,56,223,209 | |
1953 xorps %xmm7,%xmm2 | |
1954 movaps %xmm6,%xmm7 | |
1955 subl $16,%eax | |
1956 jmp L088cbc_dec_tail_collected | |
1957 .align 4,0x90 | |
1958 L085cbc_dec_two: | |
1959 call __aesni_decrypt2 | |
1960 xorps %xmm7,%xmm2 | |
1961 xorps %xmm6,%xmm3 | |
1962 movups %xmm2,(%edi) | |
1963 movaps %xmm3,%xmm2 | |
1964 pxor %xmm3,%xmm3 | |
1965 leal 16(%edi),%edi | |
1966 movaps %xmm5,%xmm7 | |
1967 subl $32,%eax | |
1968 jmp L088cbc_dec_tail_collected | |
1969 .align 4,0x90 | |
1970 L086cbc_dec_three: | |
1971 call __aesni_decrypt3 | |
1972 xorps %xmm7,%xmm2 | |
1973 xorps %xmm6,%xmm3 | |
1974 xorps %xmm5,%xmm4 | |
1975 movups %xmm2,(%edi) | |
1976 movaps %xmm4,%xmm2 | |
1977 pxor %xmm4,%xmm4 | |
1978 movups %xmm3,16(%edi) | |
1979 pxor %xmm3,%xmm3 | |
1980 leal 32(%edi),%edi | |
1981 movups 32(%esi),%xmm7 | |
1982 subl $48,%eax | |
1983 jmp L088cbc_dec_tail_collected | |
1984 .align 4,0x90 | |
1985 L087cbc_dec_four: | |
1986 call __aesni_decrypt4 | |
1987 movups 16(%esi),%xmm1 | |
1988 movups 32(%esi),%xmm0 | |
1989 xorps %xmm7,%xmm2 | |
1990 movups 48(%esi),%xmm7 | |
1991 xorps %xmm6,%xmm3 | |
1992 movups %xmm2,(%edi) | |
1993 xorps %xmm1,%xmm4 | |
1994 movups %xmm3,16(%edi) | |
1995 pxor %xmm3,%xmm3 | |
1996 xorps %xmm0,%xmm5 | |
1997 movups %xmm4,32(%edi) | |
1998 pxor %xmm4,%xmm4 | |
1999 leal 48(%edi),%edi | |
2000 movaps %xmm5,%xmm2 | |
2001 pxor %xmm5,%xmm5 | |
2002 subl $64,%eax | |
2003 jmp L088cbc_dec_tail_collected | |
2004 .align 4,0x90 | |
2005 L083cbc_dec_clear_tail_collected: | |
2006 pxor %xmm3,%xmm3 | |
2007 pxor %xmm4,%xmm4 | |
2008 pxor %xmm5,%xmm5 | |
2009 pxor %xmm6,%xmm6 | |
2010 L088cbc_dec_tail_collected: | |
2011 andl $15,%eax | |
2012 jnz L090cbc_dec_tail_partial | |
2013 movups %xmm2,(%edi) | |
2014 pxor %xmm0,%xmm0 | |
2015 jmp L079cbc_ret | |
2016 .align 4,0x90 | |
2017 L090cbc_dec_tail_partial: | |
2018 movaps %xmm2,(%esp) | |
2019 pxor %xmm0,%xmm0 | |
2020 movl $16,%ecx | |
2021 movl %esp,%esi | |
2022 subl %eax,%ecx | |
2023 .long 2767451785 | |
2024 movdqa %xmm2,(%esp) | |
2025 L079cbc_ret: | |
2026 movl 16(%esp),%esp | |
2027 movl 36(%esp),%ebp | |
2028 pxor %xmm2,%xmm2 | |
2029 pxor %xmm1,%xmm1 | |
2030 movups %xmm7,(%ebp) | |
2031 pxor %xmm7,%xmm7 | |
2032 L074cbc_abort: | |
2033 popl %edi | |
2034 popl %esi | |
2035 popl %ebx | |
2036 popl %ebp | |
2037 ret | |
2038 .private_extern __aesni_set_encrypt_key | |
2039 .align 4 | |
2040 __aesni_set_encrypt_key: | |
2041 pushl %ebp | |
2042 pushl %ebx | |
2043 testl %eax,%eax | |
2044 jz L091bad_pointer | |
2045 testl %edx,%edx | |
2046 jz L091bad_pointer | |
2047 call L092pic | |
2048 L092pic: | |
2049 popl %ebx | |
2050 leal Lkey_const-L092pic(%ebx),%ebx | |
2051 movl L_OPENSSL_ia32cap_P$non_lazy_ptr-Lkey_const(%ebx),%ebp | |
2052 movups (%eax),%xmm0 | |
2053 xorps %xmm4,%xmm4 | |
2054 movl 4(%ebp),%ebp | |
2055 leal 16(%edx),%edx | |
2056 andl $268437504,%ebp | |
2057 cmpl $256,%ecx | |
2058 je L09314rounds | |
2059 cmpl $192,%ecx | |
2060 je L09412rounds | |
2061 cmpl $128,%ecx | |
2062 jne L095bad_keybits | |
2063 .align 4,0x90 | |
2064 L09610rounds: | |
2065 cmpl $268435456,%ebp | |
2066 je L09710rounds_alt | |
2067 movl $9,%ecx | |
2068 movups %xmm0,-16(%edx) | |
2069 .byte 102,15,58,223,200,1 | |
2070 call L098key_128_cold | |
2071 .byte 102,15,58,223,200,2 | |
2072 call L099key_128 | |
2073 .byte 102,15,58,223,200,4 | |
2074 call L099key_128 | |
2075 .byte 102,15,58,223,200,8 | |
2076 call L099key_128 | |
2077 .byte 102,15,58,223,200,16 | |
2078 call L099key_128 | |
2079 .byte 102,15,58,223,200,32 | |
2080 call L099key_128 | |
2081 .byte 102,15,58,223,200,64 | |
2082 call L099key_128 | |
2083 .byte 102,15,58,223,200,128 | |
2084 call L099key_128 | |
2085 .byte 102,15,58,223,200,27 | |
2086 call L099key_128 | |
2087 .byte 102,15,58,223,200,54 | |
2088 call L099key_128 | |
2089 movups %xmm0,(%edx) | |
2090 movl %ecx,80(%edx) | |
2091 jmp L100good_key | |
2092 .align 4,0x90 | |
2093 L099key_128: | |
2094 movups %xmm0,(%edx) | |
2095 leal 16(%edx),%edx | |
2096 L098key_128_cold: | |
2097 shufps $16,%xmm0,%xmm4 | |
2098 xorps %xmm4,%xmm0 | |
2099 shufps $140,%xmm0,%xmm4 | |
2100 xorps %xmm4,%xmm0 | |
2101 shufps $255,%xmm1,%xmm1 | |
2102 xorps %xmm1,%xmm0 | |
2103 ret | |
2104 .align 4,0x90 | |
2105 L09710rounds_alt: | |
2106 movdqa (%ebx),%xmm5 | |
2107 movl $8,%ecx | |
2108 movdqa 32(%ebx),%xmm4 | |
2109 movdqa %xmm0,%xmm2 | |
2110 movdqu %xmm0,-16(%edx) | |
2111 L101loop_key128: | |
2112 .byte 102,15,56,0,197 | |
2113 .byte 102,15,56,221,196 | |
2114 pslld $1,%xmm4 | |
2115 leal 16(%edx),%edx | |
2116 movdqa %xmm2,%xmm3 | |
2117 pslldq $4,%xmm2 | |
2118 pxor %xmm2,%xmm3 | |
2119 pslldq $4,%xmm2 | |
2120 pxor %xmm2,%xmm3 | |
2121 pslldq $4,%xmm2 | |
2122 pxor %xmm3,%xmm2 | |
2123 pxor %xmm2,%xmm0 | |
2124 movdqu %xmm0,-16(%edx) | |
2125 movdqa %xmm0,%xmm2 | |
2126 decl %ecx | |
2127 jnz L101loop_key128 | |
2128 movdqa 48(%ebx),%xmm4 | |
2129 .byte 102,15,56,0,197 | |
2130 .byte 102,15,56,221,196 | |
2131 pslld $1,%xmm4 | |
2132 movdqa %xmm2,%xmm3 | |
2133 pslldq $4,%xmm2 | |
2134 pxor %xmm2,%xmm3 | |
2135 pslldq $4,%xmm2 | |
2136 pxor %xmm2,%xmm3 | |
2137 pslldq $4,%xmm2 | |
2138 pxor %xmm3,%xmm2 | |
2139 pxor %xmm2,%xmm0 | |
2140 movdqu %xmm0,(%edx) | |
2141 movdqa %xmm0,%xmm2 | |
2142 .byte 102,15,56,0,197 | |
2143 .byte 102,15,56,221,196 | |
2144 movdqa %xmm2,%xmm3 | |
2145 pslldq $4,%xmm2 | |
2146 pxor %xmm2,%xmm3 | |
2147 pslldq $4,%xmm2 | |
2148 pxor %xmm2,%xmm3 | |
2149 pslldq $4,%xmm2 | |
2150 pxor %xmm3,%xmm2 | |
2151 pxor %xmm2,%xmm0 | |
2152 movdqu %xmm0,16(%edx) | |
2153 movl $9,%ecx | |
2154 movl %ecx,96(%edx) | |
2155 jmp L100good_key | |
2156 .align 4,0x90 | |
2157 L09412rounds: | |
2158 movq 16(%eax),%xmm2 | |
2159 cmpl $268435456,%ebp | |
2160 je L10212rounds_alt | |
2161 movl $11,%ecx | |
2162 movups %xmm0,-16(%edx) | |
2163 .byte 102,15,58,223,202,1 | |
2164 call L103key_192a_cold | |
2165 .byte 102,15,58,223,202,2 | |
2166 call L104key_192b | |
2167 .byte 102,15,58,223,202,4 | |
2168 call L105key_192a | |
2169 .byte 102,15,58,223,202,8 | |
2170 call L104key_192b | |
2171 .byte 102,15,58,223,202,16 | |
2172 call L105key_192a | |
2173 .byte 102,15,58,223,202,32 | |
2174 call L104key_192b | |
2175 .byte 102,15,58,223,202,64 | |
2176 call L105key_192a | |
2177 .byte 102,15,58,223,202,128 | |
2178 call L104key_192b | |
2179 movups %xmm0,(%edx) | |
2180 movl %ecx,48(%edx) | |
2181 jmp L100good_key | |
2182 .align 4,0x90 | |
2183 L105key_192a: | |
2184 movups %xmm0,(%edx) | |
2185 leal 16(%edx),%edx | |
2186 .align 4,0x90 | |
2187 L103key_192a_cold: | |
2188 movaps %xmm2,%xmm5 | |
2189 L106key_192b_warm: | |
2190 shufps $16,%xmm0,%xmm4 | |
2191 movdqa %xmm2,%xmm3 | |
2192 xorps %xmm4,%xmm0 | |
2193 shufps $140,%xmm0,%xmm4 | |
2194 pslldq $4,%xmm3 | |
2195 xorps %xmm4,%xmm0 | |
2196 pshufd $85,%xmm1,%xmm1 | |
2197 pxor %xmm3,%xmm2 | |
2198 pxor %xmm1,%xmm0 | |
2199 pshufd $255,%xmm0,%xmm3 | |
2200 pxor %xmm3,%xmm2 | |
2201 ret | |
2202 .align 4,0x90 | |
2203 L104key_192b: | |
2204 movaps %xmm0,%xmm3 | |
2205 shufps $68,%xmm0,%xmm5 | |
2206 movups %xmm5,(%edx) | |
2207 shufps $78,%xmm2,%xmm3 | |
2208 movups %xmm3,16(%edx) | |
2209 leal 32(%edx),%edx | |
2210 jmp L106key_192b_warm | |
2211 .align 4,0x90 | |
2212 L10212rounds_alt: | |
2213 movdqa 16(%ebx),%xmm5 | |
2214 movdqa 32(%ebx),%xmm4 | |
2215 movl $8,%ecx | |
2216 movdqu %xmm0,-16(%edx) | |
2217 L107loop_key192: | |
2218 movq %xmm2,(%edx) | |
2219 movdqa %xmm2,%xmm1 | |
2220 .byte 102,15,56,0,213 | |
2221 .byte 102,15,56,221,212 | |
2222 pslld $1,%xmm4 | |
2223 leal 24(%edx),%edx | |
2224 movdqa %xmm0,%xmm3 | |
2225 pslldq $4,%xmm0 | |
2226 pxor %xmm0,%xmm3 | |
2227 pslldq $4,%xmm0 | |
2228 pxor %xmm0,%xmm3 | |
2229 pslldq $4,%xmm0 | |
2230 pxor %xmm3,%xmm0 | |
2231 pshufd $255,%xmm0,%xmm3 | |
2232 pxor %xmm1,%xmm3 | |
2233 pslldq $4,%xmm1 | |
2234 pxor %xmm1,%xmm3 | |
2235 pxor %xmm2,%xmm0 | |
2236 pxor %xmm3,%xmm2 | |
2237 movdqu %xmm0,-16(%edx) | |
2238 decl %ecx | |
2239 jnz L107loop_key192 | |
2240 movl $11,%ecx | |
2241 movl %ecx,32(%edx) | |
2242 jmp L100good_key | |
2243 .align 4,0x90 | |
2244 L09314rounds: | |
2245 movups 16(%eax),%xmm2 | |
2246 leal 16(%edx),%edx | |
2247 cmpl $268435456,%ebp | |
2248 je L10814rounds_alt | |
2249 movl $13,%ecx | |
2250 movups %xmm0,-32(%edx) | |
2251 movups %xmm2,-16(%edx) | |
2252 .byte 102,15,58,223,202,1 | |
2253 call L109key_256a_cold | |
2254 .byte 102,15,58,223,200,1 | |
2255 call L110key_256b | |
2256 .byte 102,15,58,223,202,2 | |
2257 call L111key_256a | |
2258 .byte 102,15,58,223,200,2 | |
2259 call L110key_256b | |
2260 .byte 102,15,58,223,202,4 | |
2261 call L111key_256a | |
2262 .byte 102,15,58,223,200,4 | |
2263 call L110key_256b | |
2264 .byte 102,15,58,223,202,8 | |
2265 call L111key_256a | |
2266 .byte 102,15,58,223,200,8 | |
2267 call L110key_256b | |
2268 .byte 102,15,58,223,202,16 | |
2269 call L111key_256a | |
2270 .byte 102,15,58,223,200,16 | |
2271 call L110key_256b | |
2272 .byte 102,15,58,223,202,32 | |
2273 call L111key_256a | |
2274 .byte 102,15,58,223,200,32 | |
2275 call L110key_256b | |
2276 .byte 102,15,58,223,202,64 | |
2277 call L111key_256a | |
2278 movups %xmm0,(%edx) | |
2279 movl %ecx,16(%edx) | |
2280 xorl %eax,%eax | |
2281 jmp L100good_key | |
2282 .align 4,0x90 | |
2283 L111key_256a: | |
2284 movups %xmm2,(%edx) | |
2285 leal 16(%edx),%edx | |
2286 L109key_256a_cold: | |
2287 shufps $16,%xmm0,%xmm4 | |
2288 xorps %xmm4,%xmm0 | |
2289 shufps $140,%xmm0,%xmm4 | |
2290 xorps %xmm4,%xmm0 | |
2291 shufps $255,%xmm1,%xmm1 | |
2292 xorps %xmm1,%xmm0 | |
2293 ret | |
2294 .align 4,0x90 | |
2295 L110key_256b: | |
2296 movups %xmm0,(%edx) | |
2297 leal 16(%edx),%edx | |
2298 shufps $16,%xmm2,%xmm4 | |
2299 xorps %xmm4,%xmm2 | |
2300 shufps $140,%xmm2,%xmm4 | |
2301 xorps %xmm4,%xmm2 | |
2302 shufps $170,%xmm1,%xmm1 | |
2303 xorps %xmm1,%xmm2 | |
2304 ret | |
2305 .align 4,0x90 | |
2306 L10814rounds_alt: | |
2307 movdqa (%ebx),%xmm5 | |
2308 movdqa 32(%ebx),%xmm4 | |
2309 movl $7,%ecx | |
2310 movdqu %xmm0,-32(%edx) | |
2311 movdqa %xmm2,%xmm1 | |
2312 movdqu %xmm2,-16(%edx) | |
2313 L112loop_key256: | |
2314 .byte 102,15,56,0,213 | |
2315 .byte 102,15,56,221,212 | |
2316 movdqa %xmm0,%xmm3 | |
2317 pslldq $4,%xmm0 | |
2318 pxor %xmm0,%xmm3 | |
2319 pslldq $4,%xmm0 | |
2320 pxor %xmm0,%xmm3 | |
2321 pslldq $4,%xmm0 | |
2322 pxor %xmm3,%xmm0 | |
2323 pslld $1,%xmm4 | |
2324 pxor %xmm2,%xmm0 | |
2325 movdqu %xmm0,(%edx) | |
2326 decl %ecx | |
2327 jz L113done_key256 | |
2328 pshufd $255,%xmm0,%xmm2 | |
2329 pxor %xmm3,%xmm3 | |
2330 .byte 102,15,56,221,211 | |
2331 movdqa %xmm1,%xmm3 | |
2332 pslldq $4,%xmm1 | |
2333 pxor %xmm1,%xmm3 | |
2334 pslldq $4,%xmm1 | |
2335 pxor %xmm1,%xmm3 | |
2336 pslldq $4,%xmm1 | |
2337 pxor %xmm3,%xmm1 | |
2338 pxor %xmm1,%xmm2 | |
2339 movdqu %xmm2,16(%edx) | |
2340 leal 32(%edx),%edx | |
2341 movdqa %xmm2,%xmm1 | |
2342 jmp L112loop_key256 | |
2343 L113done_key256: | |
2344 movl $13,%ecx | |
2345 movl %ecx,16(%edx) | |
2346 L100good_key: | |
2347 pxor %xmm0,%xmm0 | |
2348 pxor %xmm1,%xmm1 | |
2349 pxor %xmm2,%xmm2 | |
2350 pxor %xmm3,%xmm3 | |
2351 pxor %xmm4,%xmm4 | |
2352 pxor %xmm5,%xmm5 | |
2353 xorl %eax,%eax | |
2354 popl %ebx | |
2355 popl %ebp | |
2356 ret | |
2357 .align 2,0x90 | |
2358 L091bad_pointer: | |
2359 movl $-1,%eax | |
2360 popl %ebx | |
2361 popl %ebp | |
2362 ret | |
2363 .align 2,0x90 | |
2364 L095bad_keybits: | |
2365 pxor %xmm0,%xmm0 | |
2366 movl $-2,%eax | |
2367 popl %ebx | |
2368 popl %ebp | |
2369 ret | |
2370 .globl _aesni_set_encrypt_key | |
2371 .private_extern _aesni_set_encrypt_key | |
2372 .align 4 | |
2373 _aesni_set_encrypt_key: | |
2374 L_aesni_set_encrypt_key_begin: | |
2375 movl 4(%esp),%eax | |
2376 movl 8(%esp),%ecx | |
2377 movl 12(%esp),%edx | |
2378 call __aesni_set_encrypt_key | |
2379 ret | |
2380 .globl _aesni_set_decrypt_key | |
2381 .private_extern _aesni_set_decrypt_key | |
2382 .align 4 | |
2383 _aesni_set_decrypt_key: | |
2384 L_aesni_set_decrypt_key_begin: | |
2385 movl 4(%esp),%eax | |
2386 movl 8(%esp),%ecx | |
2387 movl 12(%esp),%edx | |
2388 call __aesni_set_encrypt_key | |
2389 movl 12(%esp),%edx | |
2390 shll $4,%ecx | |
2391 testl %eax,%eax | |
2392 jnz L114dec_key_ret | |
2393 leal 16(%edx,%ecx,1),%eax | |
2394 movups (%edx),%xmm0 | |
2395 movups (%eax),%xmm1 | |
2396 movups %xmm0,(%eax) | |
2397 movups %xmm1,(%edx) | |
2398 leal 16(%edx),%edx | |
2399 leal -16(%eax),%eax | |
2400 L115dec_key_inverse: | |
2401 movups (%edx),%xmm0 | |
2402 movups (%eax),%xmm1 | |
2403 .byte 102,15,56,219,192 | |
2404 .byte 102,15,56,219,201 | |
2405 leal 16(%edx),%edx | |
2406 leal -16(%eax),%eax | |
2407 movups %xmm0,16(%eax) | |
2408 movups %xmm1,-16(%edx) | |
2409 cmpl %edx,%eax | |
2410 ja L115dec_key_inverse | |
2411 movups (%edx),%xmm0 | |
2412 .byte 102,15,56,219,192 | |
2413 movups %xmm0,(%edx) | |
2414 pxor %xmm0,%xmm0 | |
2415 pxor %xmm1,%xmm1 | |
2416 xorl %eax,%eax | |
2417 L114dec_key_ret: | |
2418 ret | |
2419 .align 6,0x90 | |
2420 Lkey_const: | |
2421 .long 202313229,202313229,202313229,202313229 | |
2422 .long 67569157,67569157,67569157,67569157 | |
2423 .long 1,1,1,1 | |
2424 .long 27,27,27,27 | |
2425 .byte 65,69,83,32,102,111,114,32,73,110,116,101,108,32,65,69 | |
2426 .byte 83,45,78,73,44,32,67,82,89,80,84,79,71,65,77,83 | |
2427 .byte 32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115 | |
2428 .byte 115,108,46,111,114,103,62,0 | |
2429 .section __IMPORT,__pointers,non_lazy_symbol_pointers | |
2430 L_OPENSSL_ia32cap_P$non_lazy_ptr: | |
2431 .indirect_symbol _OPENSSL_ia32cap_P | |
2432 .long 0 | |
2433 #endif | |
OLD | NEW |