OLD | NEW |
| (Empty) |
1 #if defined(__x86_64__) && !defined(OPENSSL_NO_ASM) | |
2 .text | |
3 | |
4 .globl _aesni_encrypt | |
5 .private_extern _aesni_encrypt | |
6 | |
7 .p2align 4 | |
8 _aesni_encrypt: | |
9 movups (%rdi),%xmm2 | |
10 movl 240(%rdx),%eax | |
11 movups (%rdx),%xmm0 | |
12 movups 16(%rdx),%xmm1 | |
13 leaq 32(%rdx),%rdx | |
14 xorps %xmm0,%xmm2 | |
15 L$oop_enc1_1: | |
16 .byte 102,15,56,220,209 | |
17 decl %eax | |
18 movups (%rdx),%xmm1 | |
19 leaq 16(%rdx),%rdx | |
20 jnz L$oop_enc1_1 | |
21 .byte 102,15,56,221,209 | |
22 pxor %xmm0,%xmm0 | |
23 pxor %xmm1,%xmm1 | |
24 movups %xmm2,(%rsi) | |
25 pxor %xmm2,%xmm2 | |
26 .byte 0xf3,0xc3 | |
27 | |
28 | |
29 .globl _aesni_decrypt | |
30 .private_extern _aesni_decrypt | |
31 | |
32 .p2align 4 | |
33 _aesni_decrypt: | |
34 movups (%rdi),%xmm2 | |
35 movl 240(%rdx),%eax | |
36 movups (%rdx),%xmm0 | |
37 movups 16(%rdx),%xmm1 | |
38 leaq 32(%rdx),%rdx | |
39 xorps %xmm0,%xmm2 | |
40 L$oop_dec1_2: | |
41 .byte 102,15,56,222,209 | |
42 decl %eax | |
43 movups (%rdx),%xmm1 | |
44 leaq 16(%rdx),%rdx | |
45 jnz L$oop_dec1_2 | |
46 .byte 102,15,56,223,209 | |
47 pxor %xmm0,%xmm0 | |
48 pxor %xmm1,%xmm1 | |
49 movups %xmm2,(%rsi) | |
50 pxor %xmm2,%xmm2 | |
51 .byte 0xf3,0xc3 | |
52 | |
53 | |
54 .p2align 4 | |
55 _aesni_encrypt2: | |
56 movups (%rcx),%xmm0 | |
57 shll $4,%eax | |
58 movups 16(%rcx),%xmm1 | |
59 xorps %xmm0,%xmm2 | |
60 xorps %xmm0,%xmm3 | |
61 movups 32(%rcx),%xmm0 | |
62 leaq 32(%rcx,%rax,1),%rcx | |
63 negq %rax | |
64 addq $16,%rax | |
65 | |
66 L$enc_loop2: | |
67 .byte 102,15,56,220,209 | |
68 .byte 102,15,56,220,217 | |
69 movups (%rcx,%rax,1),%xmm1 | |
70 addq $32,%rax | |
71 .byte 102,15,56,220,208 | |
72 .byte 102,15,56,220,216 | |
73 movups -16(%rcx,%rax,1),%xmm0 | |
74 jnz L$enc_loop2 | |
75 | |
76 .byte 102,15,56,220,209 | |
77 .byte 102,15,56,220,217 | |
78 .byte 102,15,56,221,208 | |
79 .byte 102,15,56,221,216 | |
80 .byte 0xf3,0xc3 | |
81 | |
82 | |
83 .p2align 4 | |
84 _aesni_decrypt2: | |
85 movups (%rcx),%xmm0 | |
86 shll $4,%eax | |
87 movups 16(%rcx),%xmm1 | |
88 xorps %xmm0,%xmm2 | |
89 xorps %xmm0,%xmm3 | |
90 movups 32(%rcx),%xmm0 | |
91 leaq 32(%rcx,%rax,1),%rcx | |
92 negq %rax | |
93 addq $16,%rax | |
94 | |
95 L$dec_loop2: | |
96 .byte 102,15,56,222,209 | |
97 .byte 102,15,56,222,217 | |
98 movups (%rcx,%rax,1),%xmm1 | |
99 addq $32,%rax | |
100 .byte 102,15,56,222,208 | |
101 .byte 102,15,56,222,216 | |
102 movups -16(%rcx,%rax,1),%xmm0 | |
103 jnz L$dec_loop2 | |
104 | |
105 .byte 102,15,56,222,209 | |
106 .byte 102,15,56,222,217 | |
107 .byte 102,15,56,223,208 | |
108 .byte 102,15,56,223,216 | |
109 .byte 0xf3,0xc3 | |
110 | |
111 | |
112 .p2align 4 | |
113 _aesni_encrypt3: | |
114 movups (%rcx),%xmm0 | |
115 shll $4,%eax | |
116 movups 16(%rcx),%xmm1 | |
117 xorps %xmm0,%xmm2 | |
118 xorps %xmm0,%xmm3 | |
119 xorps %xmm0,%xmm4 | |
120 movups 32(%rcx),%xmm0 | |
121 leaq 32(%rcx,%rax,1),%rcx | |
122 negq %rax | |
123 addq $16,%rax | |
124 | |
125 L$enc_loop3: | |
126 .byte 102,15,56,220,209 | |
127 .byte 102,15,56,220,217 | |
128 .byte 102,15,56,220,225 | |
129 movups (%rcx,%rax,1),%xmm1 | |
130 addq $32,%rax | |
131 .byte 102,15,56,220,208 | |
132 .byte 102,15,56,220,216 | |
133 .byte 102,15,56,220,224 | |
134 movups -16(%rcx,%rax,1),%xmm0 | |
135 jnz L$enc_loop3 | |
136 | |
137 .byte 102,15,56,220,209 | |
138 .byte 102,15,56,220,217 | |
139 .byte 102,15,56,220,225 | |
140 .byte 102,15,56,221,208 | |
141 .byte 102,15,56,221,216 | |
142 .byte 102,15,56,221,224 | |
143 .byte 0xf3,0xc3 | |
144 | |
145 | |
146 .p2align 4 | |
147 _aesni_decrypt3: | |
148 movups (%rcx),%xmm0 | |
149 shll $4,%eax | |
150 movups 16(%rcx),%xmm1 | |
151 xorps %xmm0,%xmm2 | |
152 xorps %xmm0,%xmm3 | |
153 xorps %xmm0,%xmm4 | |
154 movups 32(%rcx),%xmm0 | |
155 leaq 32(%rcx,%rax,1),%rcx | |
156 negq %rax | |
157 addq $16,%rax | |
158 | |
159 L$dec_loop3: | |
160 .byte 102,15,56,222,209 | |
161 .byte 102,15,56,222,217 | |
162 .byte 102,15,56,222,225 | |
163 movups (%rcx,%rax,1),%xmm1 | |
164 addq $32,%rax | |
165 .byte 102,15,56,222,208 | |
166 .byte 102,15,56,222,216 | |
167 .byte 102,15,56,222,224 | |
168 movups -16(%rcx,%rax,1),%xmm0 | |
169 jnz L$dec_loop3 | |
170 | |
171 .byte 102,15,56,222,209 | |
172 .byte 102,15,56,222,217 | |
173 .byte 102,15,56,222,225 | |
174 .byte 102,15,56,223,208 | |
175 .byte 102,15,56,223,216 | |
176 .byte 102,15,56,223,224 | |
177 .byte 0xf3,0xc3 | |
178 | |
179 | |
180 .p2align 4 | |
181 _aesni_encrypt4: | |
182 movups (%rcx),%xmm0 | |
183 shll $4,%eax | |
184 movups 16(%rcx),%xmm1 | |
185 xorps %xmm0,%xmm2 | |
186 xorps %xmm0,%xmm3 | |
187 xorps %xmm0,%xmm4 | |
188 xorps %xmm0,%xmm5 | |
189 movups 32(%rcx),%xmm0 | |
190 leaq 32(%rcx,%rax,1),%rcx | |
191 negq %rax | |
192 .byte 0x0f,0x1f,0x00 | |
193 addq $16,%rax | |
194 | |
195 L$enc_loop4: | |
196 .byte 102,15,56,220,209 | |
197 .byte 102,15,56,220,217 | |
198 .byte 102,15,56,220,225 | |
199 .byte 102,15,56,220,233 | |
200 movups (%rcx,%rax,1),%xmm1 | |
201 addq $32,%rax | |
202 .byte 102,15,56,220,208 | |
203 .byte 102,15,56,220,216 | |
204 .byte 102,15,56,220,224 | |
205 .byte 102,15,56,220,232 | |
206 movups -16(%rcx,%rax,1),%xmm0 | |
207 jnz L$enc_loop4 | |
208 | |
209 .byte 102,15,56,220,209 | |
210 .byte 102,15,56,220,217 | |
211 .byte 102,15,56,220,225 | |
212 .byte 102,15,56,220,233 | |
213 .byte 102,15,56,221,208 | |
214 .byte 102,15,56,221,216 | |
215 .byte 102,15,56,221,224 | |
216 .byte 102,15,56,221,232 | |
217 .byte 0xf3,0xc3 | |
218 | |
219 | |
220 .p2align 4 | |
221 _aesni_decrypt4: | |
222 movups (%rcx),%xmm0 | |
223 shll $4,%eax | |
224 movups 16(%rcx),%xmm1 | |
225 xorps %xmm0,%xmm2 | |
226 xorps %xmm0,%xmm3 | |
227 xorps %xmm0,%xmm4 | |
228 xorps %xmm0,%xmm5 | |
229 movups 32(%rcx),%xmm0 | |
230 leaq 32(%rcx,%rax,1),%rcx | |
231 negq %rax | |
232 .byte 0x0f,0x1f,0x00 | |
233 addq $16,%rax | |
234 | |
235 L$dec_loop4: | |
236 .byte 102,15,56,222,209 | |
237 .byte 102,15,56,222,217 | |
238 .byte 102,15,56,222,225 | |
239 .byte 102,15,56,222,233 | |
240 movups (%rcx,%rax,1),%xmm1 | |
241 addq $32,%rax | |
242 .byte 102,15,56,222,208 | |
243 .byte 102,15,56,222,216 | |
244 .byte 102,15,56,222,224 | |
245 .byte 102,15,56,222,232 | |
246 movups -16(%rcx,%rax,1),%xmm0 | |
247 jnz L$dec_loop4 | |
248 | |
249 .byte 102,15,56,222,209 | |
250 .byte 102,15,56,222,217 | |
251 .byte 102,15,56,222,225 | |
252 .byte 102,15,56,222,233 | |
253 .byte 102,15,56,223,208 | |
254 .byte 102,15,56,223,216 | |
255 .byte 102,15,56,223,224 | |
256 .byte 102,15,56,223,232 | |
257 .byte 0xf3,0xc3 | |
258 | |
259 | |
260 .p2align 4 | |
261 _aesni_encrypt6: | |
262 movups (%rcx),%xmm0 | |
263 shll $4,%eax | |
264 movups 16(%rcx),%xmm1 | |
265 xorps %xmm0,%xmm2 | |
266 pxor %xmm0,%xmm3 | |
267 pxor %xmm0,%xmm4 | |
268 .byte 102,15,56,220,209 | |
269 leaq 32(%rcx,%rax,1),%rcx | |
270 negq %rax | |
271 .byte 102,15,56,220,217 | |
272 pxor %xmm0,%xmm5 | |
273 pxor %xmm0,%xmm6 | |
274 .byte 102,15,56,220,225 | |
275 pxor %xmm0,%xmm7 | |
276 movups (%rcx,%rax,1),%xmm0 | |
277 addq $16,%rax | |
278 jmp L$enc_loop6_enter | |
279 .p2align 4 | |
280 L$enc_loop6: | |
281 .byte 102,15,56,220,209 | |
282 .byte 102,15,56,220,217 | |
283 .byte 102,15,56,220,225 | |
284 L$enc_loop6_enter: | |
285 .byte 102,15,56,220,233 | |
286 .byte 102,15,56,220,241 | |
287 .byte 102,15,56,220,249 | |
288 movups (%rcx,%rax,1),%xmm1 | |
289 addq $32,%rax | |
290 .byte 102,15,56,220,208 | |
291 .byte 102,15,56,220,216 | |
292 .byte 102,15,56,220,224 | |
293 .byte 102,15,56,220,232 | |
294 .byte 102,15,56,220,240 | |
295 .byte 102,15,56,220,248 | |
296 movups -16(%rcx,%rax,1),%xmm0 | |
297 jnz L$enc_loop6 | |
298 | |
299 .byte 102,15,56,220,209 | |
300 .byte 102,15,56,220,217 | |
301 .byte 102,15,56,220,225 | |
302 .byte 102,15,56,220,233 | |
303 .byte 102,15,56,220,241 | |
304 .byte 102,15,56,220,249 | |
305 .byte 102,15,56,221,208 | |
306 .byte 102,15,56,221,216 | |
307 .byte 102,15,56,221,224 | |
308 .byte 102,15,56,221,232 | |
309 .byte 102,15,56,221,240 | |
310 .byte 102,15,56,221,248 | |
311 .byte 0xf3,0xc3 | |
312 | |
313 | |
314 .p2align 4 | |
315 _aesni_decrypt6: | |
316 movups (%rcx),%xmm0 | |
317 shll $4,%eax | |
318 movups 16(%rcx),%xmm1 | |
319 xorps %xmm0,%xmm2 | |
320 pxor %xmm0,%xmm3 | |
321 pxor %xmm0,%xmm4 | |
322 .byte 102,15,56,222,209 | |
323 leaq 32(%rcx,%rax,1),%rcx | |
324 negq %rax | |
325 .byte 102,15,56,222,217 | |
326 pxor %xmm0,%xmm5 | |
327 pxor %xmm0,%xmm6 | |
328 .byte 102,15,56,222,225 | |
329 pxor %xmm0,%xmm7 | |
330 movups (%rcx,%rax,1),%xmm0 | |
331 addq $16,%rax | |
332 jmp L$dec_loop6_enter | |
333 .p2align 4 | |
334 L$dec_loop6: | |
335 .byte 102,15,56,222,209 | |
336 .byte 102,15,56,222,217 | |
337 .byte 102,15,56,222,225 | |
338 L$dec_loop6_enter: | |
339 .byte 102,15,56,222,233 | |
340 .byte 102,15,56,222,241 | |
341 .byte 102,15,56,222,249 | |
342 movups (%rcx,%rax,1),%xmm1 | |
343 addq $32,%rax | |
344 .byte 102,15,56,222,208 | |
345 .byte 102,15,56,222,216 | |
346 .byte 102,15,56,222,224 | |
347 .byte 102,15,56,222,232 | |
348 .byte 102,15,56,222,240 | |
349 .byte 102,15,56,222,248 | |
350 movups -16(%rcx,%rax,1),%xmm0 | |
351 jnz L$dec_loop6 | |
352 | |
353 .byte 102,15,56,222,209 | |
354 .byte 102,15,56,222,217 | |
355 .byte 102,15,56,222,225 | |
356 .byte 102,15,56,222,233 | |
357 .byte 102,15,56,222,241 | |
358 .byte 102,15,56,222,249 | |
359 .byte 102,15,56,223,208 | |
360 .byte 102,15,56,223,216 | |
361 .byte 102,15,56,223,224 | |
362 .byte 102,15,56,223,232 | |
363 .byte 102,15,56,223,240 | |
364 .byte 102,15,56,223,248 | |
365 .byte 0xf3,0xc3 | |
366 | |
367 | |
368 .p2align 4 | |
369 _aesni_encrypt8: | |
370 movups (%rcx),%xmm0 | |
371 shll $4,%eax | |
372 movups 16(%rcx),%xmm1 | |
373 xorps %xmm0,%xmm2 | |
374 xorps %xmm0,%xmm3 | |
375 pxor %xmm0,%xmm4 | |
376 pxor %xmm0,%xmm5 | |
377 pxor %xmm0,%xmm6 | |
378 leaq 32(%rcx,%rax,1),%rcx | |
379 negq %rax | |
380 .byte 102,15,56,220,209 | |
381 pxor %xmm0,%xmm7 | |
382 pxor %xmm0,%xmm8 | |
383 .byte 102,15,56,220,217 | |
384 pxor %xmm0,%xmm9 | |
385 movups (%rcx,%rax,1),%xmm0 | |
386 addq $16,%rax | |
387 jmp L$enc_loop8_inner | |
388 .p2align 4 | |
389 L$enc_loop8: | |
390 .byte 102,15,56,220,209 | |
391 .byte 102,15,56,220,217 | |
392 L$enc_loop8_inner: | |
393 .byte 102,15,56,220,225 | |
394 .byte 102,15,56,220,233 | |
395 .byte 102,15,56,220,241 | |
396 .byte 102,15,56,220,249 | |
397 .byte 102,68,15,56,220,193 | |
398 .byte 102,68,15,56,220,201 | |
399 L$enc_loop8_enter: | |
400 movups (%rcx,%rax,1),%xmm1 | |
401 addq $32,%rax | |
402 .byte 102,15,56,220,208 | |
403 .byte 102,15,56,220,216 | |
404 .byte 102,15,56,220,224 | |
405 .byte 102,15,56,220,232 | |
406 .byte 102,15,56,220,240 | |
407 .byte 102,15,56,220,248 | |
408 .byte 102,68,15,56,220,192 | |
409 .byte 102,68,15,56,220,200 | |
410 movups -16(%rcx,%rax,1),%xmm0 | |
411 jnz L$enc_loop8 | |
412 | |
413 .byte 102,15,56,220,209 | |
414 .byte 102,15,56,220,217 | |
415 .byte 102,15,56,220,225 | |
416 .byte 102,15,56,220,233 | |
417 .byte 102,15,56,220,241 | |
418 .byte 102,15,56,220,249 | |
419 .byte 102,68,15,56,220,193 | |
420 .byte 102,68,15,56,220,201 | |
421 .byte 102,15,56,221,208 | |
422 .byte 102,15,56,221,216 | |
423 .byte 102,15,56,221,224 | |
424 .byte 102,15,56,221,232 | |
425 .byte 102,15,56,221,240 | |
426 .byte 102,15,56,221,248 | |
427 .byte 102,68,15,56,221,192 | |
428 .byte 102,68,15,56,221,200 | |
429 .byte 0xf3,0xc3 | |
430 | |
431 | |
432 .p2align 4 | |
433 _aesni_decrypt8: | |
434 movups (%rcx),%xmm0 | |
435 shll $4,%eax | |
436 movups 16(%rcx),%xmm1 | |
437 xorps %xmm0,%xmm2 | |
438 xorps %xmm0,%xmm3 | |
439 pxor %xmm0,%xmm4 | |
440 pxor %xmm0,%xmm5 | |
441 pxor %xmm0,%xmm6 | |
442 leaq 32(%rcx,%rax,1),%rcx | |
443 negq %rax | |
444 .byte 102,15,56,222,209 | |
445 pxor %xmm0,%xmm7 | |
446 pxor %xmm0,%xmm8 | |
447 .byte 102,15,56,222,217 | |
448 pxor %xmm0,%xmm9 | |
449 movups (%rcx,%rax,1),%xmm0 | |
450 addq $16,%rax | |
451 jmp L$dec_loop8_inner | |
452 .p2align 4 | |
453 L$dec_loop8: | |
454 .byte 102,15,56,222,209 | |
455 .byte 102,15,56,222,217 | |
456 L$dec_loop8_inner: | |
457 .byte 102,15,56,222,225 | |
458 .byte 102,15,56,222,233 | |
459 .byte 102,15,56,222,241 | |
460 .byte 102,15,56,222,249 | |
461 .byte 102,68,15,56,222,193 | |
462 .byte 102,68,15,56,222,201 | |
463 L$dec_loop8_enter: | |
464 movups (%rcx,%rax,1),%xmm1 | |
465 addq $32,%rax | |
466 .byte 102,15,56,222,208 | |
467 .byte 102,15,56,222,216 | |
468 .byte 102,15,56,222,224 | |
469 .byte 102,15,56,222,232 | |
470 .byte 102,15,56,222,240 | |
471 .byte 102,15,56,222,248 | |
472 .byte 102,68,15,56,222,192 | |
473 .byte 102,68,15,56,222,200 | |
474 movups -16(%rcx,%rax,1),%xmm0 | |
475 jnz L$dec_loop8 | |
476 | |
477 .byte 102,15,56,222,209 | |
478 .byte 102,15,56,222,217 | |
479 .byte 102,15,56,222,225 | |
480 .byte 102,15,56,222,233 | |
481 .byte 102,15,56,222,241 | |
482 .byte 102,15,56,222,249 | |
483 .byte 102,68,15,56,222,193 | |
484 .byte 102,68,15,56,222,201 | |
485 .byte 102,15,56,223,208 | |
486 .byte 102,15,56,223,216 | |
487 .byte 102,15,56,223,224 | |
488 .byte 102,15,56,223,232 | |
489 .byte 102,15,56,223,240 | |
490 .byte 102,15,56,223,248 | |
491 .byte 102,68,15,56,223,192 | |
492 .byte 102,68,15,56,223,200 | |
493 .byte 0xf3,0xc3 | |
494 | |
495 .globl _aesni_ecb_encrypt | |
496 .private_extern _aesni_ecb_encrypt | |
497 | |
498 .p2align 4 | |
499 _aesni_ecb_encrypt: | |
500 andq $-16,%rdx | |
501 jz L$ecb_ret | |
502 | |
503 movl 240(%rcx),%eax | |
504 movups (%rcx),%xmm0 | |
505 movq %rcx,%r11 | |
506 movl %eax,%r10d | |
507 testl %r8d,%r8d | |
508 jz L$ecb_decrypt | |
509 | |
510 cmpq $0x80,%rdx | |
511 jb L$ecb_enc_tail | |
512 | |
513 movdqu (%rdi),%xmm2 | |
514 movdqu 16(%rdi),%xmm3 | |
515 movdqu 32(%rdi),%xmm4 | |
516 movdqu 48(%rdi),%xmm5 | |
517 movdqu 64(%rdi),%xmm6 | |
518 movdqu 80(%rdi),%xmm7 | |
519 movdqu 96(%rdi),%xmm8 | |
520 movdqu 112(%rdi),%xmm9 | |
521 leaq 128(%rdi),%rdi | |
522 subq $0x80,%rdx | |
523 jmp L$ecb_enc_loop8_enter | |
524 .p2align 4 | |
525 L$ecb_enc_loop8: | |
526 movups %xmm2,(%rsi) | |
527 movq %r11,%rcx | |
528 movdqu (%rdi),%xmm2 | |
529 movl %r10d,%eax | |
530 movups %xmm3,16(%rsi) | |
531 movdqu 16(%rdi),%xmm3 | |
532 movups %xmm4,32(%rsi) | |
533 movdqu 32(%rdi),%xmm4 | |
534 movups %xmm5,48(%rsi) | |
535 movdqu 48(%rdi),%xmm5 | |
536 movups %xmm6,64(%rsi) | |
537 movdqu 64(%rdi),%xmm6 | |
538 movups %xmm7,80(%rsi) | |
539 movdqu 80(%rdi),%xmm7 | |
540 movups %xmm8,96(%rsi) | |
541 movdqu 96(%rdi),%xmm8 | |
542 movups %xmm9,112(%rsi) | |
543 leaq 128(%rsi),%rsi | |
544 movdqu 112(%rdi),%xmm9 | |
545 leaq 128(%rdi),%rdi | |
546 L$ecb_enc_loop8_enter: | |
547 | |
548 call _aesni_encrypt8 | |
549 | |
550 subq $0x80,%rdx | |
551 jnc L$ecb_enc_loop8 | |
552 | |
553 movups %xmm2,(%rsi) | |
554 movq %r11,%rcx | |
555 movups %xmm3,16(%rsi) | |
556 movl %r10d,%eax | |
557 movups %xmm4,32(%rsi) | |
558 movups %xmm5,48(%rsi) | |
559 movups %xmm6,64(%rsi) | |
560 movups %xmm7,80(%rsi) | |
561 movups %xmm8,96(%rsi) | |
562 movups %xmm9,112(%rsi) | |
563 leaq 128(%rsi),%rsi | |
564 addq $0x80,%rdx | |
565 jz L$ecb_ret | |
566 | |
567 L$ecb_enc_tail: | |
568 movups (%rdi),%xmm2 | |
569 cmpq $0x20,%rdx | |
570 jb L$ecb_enc_one | |
571 movups 16(%rdi),%xmm3 | |
572 je L$ecb_enc_two | |
573 movups 32(%rdi),%xmm4 | |
574 cmpq $0x40,%rdx | |
575 jb L$ecb_enc_three | |
576 movups 48(%rdi),%xmm5 | |
577 je L$ecb_enc_four | |
578 movups 64(%rdi),%xmm6 | |
579 cmpq $0x60,%rdx | |
580 jb L$ecb_enc_five | |
581 movups 80(%rdi),%xmm7 | |
582 je L$ecb_enc_six | |
583 movdqu 96(%rdi),%xmm8 | |
584 xorps %xmm9,%xmm9 | |
585 call _aesni_encrypt8 | |
586 movups %xmm2,(%rsi) | |
587 movups %xmm3,16(%rsi) | |
588 movups %xmm4,32(%rsi) | |
589 movups %xmm5,48(%rsi) | |
590 movups %xmm6,64(%rsi) | |
591 movups %xmm7,80(%rsi) | |
592 movups %xmm8,96(%rsi) | |
593 jmp L$ecb_ret | |
594 .p2align 4 | |
595 L$ecb_enc_one: | |
596 movups (%rcx),%xmm0 | |
597 movups 16(%rcx),%xmm1 | |
598 leaq 32(%rcx),%rcx | |
599 xorps %xmm0,%xmm2 | |
600 L$oop_enc1_3: | |
601 .byte 102,15,56,220,209 | |
602 decl %eax | |
603 movups (%rcx),%xmm1 | |
604 leaq 16(%rcx),%rcx | |
605 jnz L$oop_enc1_3 | |
606 .byte 102,15,56,221,209 | |
607 movups %xmm2,(%rsi) | |
608 jmp L$ecb_ret | |
609 .p2align 4 | |
610 L$ecb_enc_two: | |
611 call _aesni_encrypt2 | |
612 movups %xmm2,(%rsi) | |
613 movups %xmm3,16(%rsi) | |
614 jmp L$ecb_ret | |
615 .p2align 4 | |
616 L$ecb_enc_three: | |
617 call _aesni_encrypt3 | |
618 movups %xmm2,(%rsi) | |
619 movups %xmm3,16(%rsi) | |
620 movups %xmm4,32(%rsi) | |
621 jmp L$ecb_ret | |
622 .p2align 4 | |
623 L$ecb_enc_four: | |
624 call _aesni_encrypt4 | |
625 movups %xmm2,(%rsi) | |
626 movups %xmm3,16(%rsi) | |
627 movups %xmm4,32(%rsi) | |
628 movups %xmm5,48(%rsi) | |
629 jmp L$ecb_ret | |
630 .p2align 4 | |
631 L$ecb_enc_five: | |
632 xorps %xmm7,%xmm7 | |
633 call _aesni_encrypt6 | |
634 movups %xmm2,(%rsi) | |
635 movups %xmm3,16(%rsi) | |
636 movups %xmm4,32(%rsi) | |
637 movups %xmm5,48(%rsi) | |
638 movups %xmm6,64(%rsi) | |
639 jmp L$ecb_ret | |
640 .p2align 4 | |
641 L$ecb_enc_six: | |
642 call _aesni_encrypt6 | |
643 movups %xmm2,(%rsi) | |
644 movups %xmm3,16(%rsi) | |
645 movups %xmm4,32(%rsi) | |
646 movups %xmm5,48(%rsi) | |
647 movups %xmm6,64(%rsi) | |
648 movups %xmm7,80(%rsi) | |
649 jmp L$ecb_ret | |
650 | |
651 .p2align 4 | |
652 L$ecb_decrypt: | |
653 cmpq $0x80,%rdx | |
654 jb L$ecb_dec_tail | |
655 | |
656 movdqu (%rdi),%xmm2 | |
657 movdqu 16(%rdi),%xmm3 | |
658 movdqu 32(%rdi),%xmm4 | |
659 movdqu 48(%rdi),%xmm5 | |
660 movdqu 64(%rdi),%xmm6 | |
661 movdqu 80(%rdi),%xmm7 | |
662 movdqu 96(%rdi),%xmm8 | |
663 movdqu 112(%rdi),%xmm9 | |
664 leaq 128(%rdi),%rdi | |
665 subq $0x80,%rdx | |
666 jmp L$ecb_dec_loop8_enter | |
667 .p2align 4 | |
668 L$ecb_dec_loop8: | |
669 movups %xmm2,(%rsi) | |
670 movq %r11,%rcx | |
671 movdqu (%rdi),%xmm2 | |
672 movl %r10d,%eax | |
673 movups %xmm3,16(%rsi) | |
674 movdqu 16(%rdi),%xmm3 | |
675 movups %xmm4,32(%rsi) | |
676 movdqu 32(%rdi),%xmm4 | |
677 movups %xmm5,48(%rsi) | |
678 movdqu 48(%rdi),%xmm5 | |
679 movups %xmm6,64(%rsi) | |
680 movdqu 64(%rdi),%xmm6 | |
681 movups %xmm7,80(%rsi) | |
682 movdqu 80(%rdi),%xmm7 | |
683 movups %xmm8,96(%rsi) | |
684 movdqu 96(%rdi),%xmm8 | |
685 movups %xmm9,112(%rsi) | |
686 leaq 128(%rsi),%rsi | |
687 movdqu 112(%rdi),%xmm9 | |
688 leaq 128(%rdi),%rdi | |
689 L$ecb_dec_loop8_enter: | |
690 | |
691 call _aesni_decrypt8 | |
692 | |
693 movups (%r11),%xmm0 | |
694 subq $0x80,%rdx | |
695 jnc L$ecb_dec_loop8 | |
696 | |
697 movups %xmm2,(%rsi) | |
698 pxor %xmm2,%xmm2 | |
699 movq %r11,%rcx | |
700 movups %xmm3,16(%rsi) | |
701 pxor %xmm3,%xmm3 | |
702 movl %r10d,%eax | |
703 movups %xmm4,32(%rsi) | |
704 pxor %xmm4,%xmm4 | |
705 movups %xmm5,48(%rsi) | |
706 pxor %xmm5,%xmm5 | |
707 movups %xmm6,64(%rsi) | |
708 pxor %xmm6,%xmm6 | |
709 movups %xmm7,80(%rsi) | |
710 pxor %xmm7,%xmm7 | |
711 movups %xmm8,96(%rsi) | |
712 pxor %xmm8,%xmm8 | |
713 movups %xmm9,112(%rsi) | |
714 pxor %xmm9,%xmm9 | |
715 leaq 128(%rsi),%rsi | |
716 addq $0x80,%rdx | |
717 jz L$ecb_ret | |
718 | |
719 L$ecb_dec_tail: | |
720 movups (%rdi),%xmm2 | |
721 cmpq $0x20,%rdx | |
722 jb L$ecb_dec_one | |
723 movups 16(%rdi),%xmm3 | |
724 je L$ecb_dec_two | |
725 movups 32(%rdi),%xmm4 | |
726 cmpq $0x40,%rdx | |
727 jb L$ecb_dec_three | |
728 movups 48(%rdi),%xmm5 | |
729 je L$ecb_dec_four | |
730 movups 64(%rdi),%xmm6 | |
731 cmpq $0x60,%rdx | |
732 jb L$ecb_dec_five | |
733 movups 80(%rdi),%xmm7 | |
734 je L$ecb_dec_six | |
735 movups 96(%rdi),%xmm8 | |
736 movups (%rcx),%xmm0 | |
737 xorps %xmm9,%xmm9 | |
738 call _aesni_decrypt8 | |
739 movups %xmm2,(%rsi) | |
740 pxor %xmm2,%xmm2 | |
741 movups %xmm3,16(%rsi) | |
742 pxor %xmm3,%xmm3 | |
743 movups %xmm4,32(%rsi) | |
744 pxor %xmm4,%xmm4 | |
745 movups %xmm5,48(%rsi) | |
746 pxor %xmm5,%xmm5 | |
747 movups %xmm6,64(%rsi) | |
748 pxor %xmm6,%xmm6 | |
749 movups %xmm7,80(%rsi) | |
750 pxor %xmm7,%xmm7 | |
751 movups %xmm8,96(%rsi) | |
752 pxor %xmm8,%xmm8 | |
753 pxor %xmm9,%xmm9 | |
754 jmp L$ecb_ret | |
755 .p2align 4 | |
756 L$ecb_dec_one: | |
757 movups (%rcx),%xmm0 | |
758 movups 16(%rcx),%xmm1 | |
759 leaq 32(%rcx),%rcx | |
760 xorps %xmm0,%xmm2 | |
761 L$oop_dec1_4: | |
762 .byte 102,15,56,222,209 | |
763 decl %eax | |
764 movups (%rcx),%xmm1 | |
765 leaq 16(%rcx),%rcx | |
766 jnz L$oop_dec1_4 | |
767 .byte 102,15,56,223,209 | |
768 movups %xmm2,(%rsi) | |
769 pxor %xmm2,%xmm2 | |
770 jmp L$ecb_ret | |
771 .p2align 4 | |
772 L$ecb_dec_two: | |
773 call _aesni_decrypt2 | |
774 movups %xmm2,(%rsi) | |
775 pxor %xmm2,%xmm2 | |
776 movups %xmm3,16(%rsi) | |
777 pxor %xmm3,%xmm3 | |
778 jmp L$ecb_ret | |
779 .p2align 4 | |
780 L$ecb_dec_three: | |
781 call _aesni_decrypt3 | |
782 movups %xmm2,(%rsi) | |
783 pxor %xmm2,%xmm2 | |
784 movups %xmm3,16(%rsi) | |
785 pxor %xmm3,%xmm3 | |
786 movups %xmm4,32(%rsi) | |
787 pxor %xmm4,%xmm4 | |
788 jmp L$ecb_ret | |
789 .p2align 4 | |
790 L$ecb_dec_four: | |
791 call _aesni_decrypt4 | |
792 movups %xmm2,(%rsi) | |
793 pxor %xmm2,%xmm2 | |
794 movups %xmm3,16(%rsi) | |
795 pxor %xmm3,%xmm3 | |
796 movups %xmm4,32(%rsi) | |
797 pxor %xmm4,%xmm4 | |
798 movups %xmm5,48(%rsi) | |
799 pxor %xmm5,%xmm5 | |
800 jmp L$ecb_ret | |
801 .p2align 4 | |
802 L$ecb_dec_five: | |
803 xorps %xmm7,%xmm7 | |
804 call _aesni_decrypt6 | |
805 movups %xmm2,(%rsi) | |
806 pxor %xmm2,%xmm2 | |
807 movups %xmm3,16(%rsi) | |
808 pxor %xmm3,%xmm3 | |
809 movups %xmm4,32(%rsi) | |
810 pxor %xmm4,%xmm4 | |
811 movups %xmm5,48(%rsi) | |
812 pxor %xmm5,%xmm5 | |
813 movups %xmm6,64(%rsi) | |
814 pxor %xmm6,%xmm6 | |
815 pxor %xmm7,%xmm7 | |
816 jmp L$ecb_ret | |
817 .p2align 4 | |
818 L$ecb_dec_six: | |
819 call _aesni_decrypt6 | |
820 movups %xmm2,(%rsi) | |
821 pxor %xmm2,%xmm2 | |
822 movups %xmm3,16(%rsi) | |
823 pxor %xmm3,%xmm3 | |
824 movups %xmm4,32(%rsi) | |
825 pxor %xmm4,%xmm4 | |
826 movups %xmm5,48(%rsi) | |
827 pxor %xmm5,%xmm5 | |
828 movups %xmm6,64(%rsi) | |
829 pxor %xmm6,%xmm6 | |
830 movups %xmm7,80(%rsi) | |
831 pxor %xmm7,%xmm7 | |
832 | |
833 L$ecb_ret: | |
834 xorps %xmm0,%xmm0 | |
835 pxor %xmm1,%xmm1 | |
836 .byte 0xf3,0xc3 | |
837 | |
838 .globl _aesni_ccm64_encrypt_blocks | |
839 .private_extern _aesni_ccm64_encrypt_blocks | |
840 | |
841 .p2align 4 | |
842 _aesni_ccm64_encrypt_blocks: | |
843 movl 240(%rcx),%eax | |
844 movdqu (%r8),%xmm6 | |
845 movdqa L$increment64(%rip),%xmm9 | |
846 movdqa L$bswap_mask(%rip),%xmm7 | |
847 | |
848 shll $4,%eax | |
849 movl $16,%r10d | |
850 leaq 0(%rcx),%r11 | |
851 movdqu (%r9),%xmm3 | |
852 movdqa %xmm6,%xmm2 | |
853 leaq 32(%rcx,%rax,1),%rcx | |
854 .byte 102,15,56,0,247 | |
855 subq %rax,%r10 | |
856 jmp L$ccm64_enc_outer | |
857 .p2align 4 | |
858 L$ccm64_enc_outer: | |
859 movups (%r11),%xmm0 | |
860 movq %r10,%rax | |
861 movups (%rdi),%xmm8 | |
862 | |
863 xorps %xmm0,%xmm2 | |
864 movups 16(%r11),%xmm1 | |
865 xorps %xmm8,%xmm0 | |
866 xorps %xmm0,%xmm3 | |
867 movups 32(%r11),%xmm0 | |
868 | |
869 L$ccm64_enc2_loop: | |
870 .byte 102,15,56,220,209 | |
871 .byte 102,15,56,220,217 | |
872 movups (%rcx,%rax,1),%xmm1 | |
873 addq $32,%rax | |
874 .byte 102,15,56,220,208 | |
875 .byte 102,15,56,220,216 | |
876 movups -16(%rcx,%rax,1),%xmm0 | |
877 jnz L$ccm64_enc2_loop | |
878 .byte 102,15,56,220,209 | |
879 .byte 102,15,56,220,217 | |
880 paddq %xmm9,%xmm6 | |
881 decq %rdx | |
882 .byte 102,15,56,221,208 | |
883 .byte 102,15,56,221,216 | |
884 | |
885 leaq 16(%rdi),%rdi | |
886 xorps %xmm2,%xmm8 | |
887 movdqa %xmm6,%xmm2 | |
888 movups %xmm8,(%rsi) | |
889 .byte 102,15,56,0,215 | |
890 leaq 16(%rsi),%rsi | |
891 jnz L$ccm64_enc_outer | |
892 | |
893 pxor %xmm0,%xmm0 | |
894 pxor %xmm1,%xmm1 | |
895 pxor %xmm2,%xmm2 | |
896 movups %xmm3,(%r9) | |
897 pxor %xmm3,%xmm3 | |
898 pxor %xmm8,%xmm8 | |
899 pxor %xmm6,%xmm6 | |
900 .byte 0xf3,0xc3 | |
901 | |
902 .globl _aesni_ccm64_decrypt_blocks | |
903 .private_extern _aesni_ccm64_decrypt_blocks | |
904 | |
905 .p2align 4 | |
906 _aesni_ccm64_decrypt_blocks: | |
907 movl 240(%rcx),%eax | |
908 movups (%r8),%xmm6 | |
909 movdqu (%r9),%xmm3 | |
910 movdqa L$increment64(%rip),%xmm9 | |
911 movdqa L$bswap_mask(%rip),%xmm7 | |
912 | |
913 movaps %xmm6,%xmm2 | |
914 movl %eax,%r10d | |
915 movq %rcx,%r11 | |
916 .byte 102,15,56,0,247 | |
917 movups (%rcx),%xmm0 | |
918 movups 16(%rcx),%xmm1 | |
919 leaq 32(%rcx),%rcx | |
920 xorps %xmm0,%xmm2 | |
921 L$oop_enc1_5: | |
922 .byte 102,15,56,220,209 | |
923 decl %eax | |
924 movups (%rcx),%xmm1 | |
925 leaq 16(%rcx),%rcx | |
926 jnz L$oop_enc1_5 | |
927 .byte 102,15,56,221,209 | |
928 shll $4,%r10d | |
929 movl $16,%eax | |
930 movups (%rdi),%xmm8 | |
931 paddq %xmm9,%xmm6 | |
932 leaq 16(%rdi),%rdi | |
933 subq %r10,%rax | |
934 leaq 32(%r11,%r10,1),%rcx | |
935 movq %rax,%r10 | |
936 jmp L$ccm64_dec_outer | |
937 .p2align 4 | |
938 L$ccm64_dec_outer: | |
939 xorps %xmm2,%xmm8 | |
940 movdqa %xmm6,%xmm2 | |
941 movups %xmm8,(%rsi) | |
942 leaq 16(%rsi),%rsi | |
943 .byte 102,15,56,0,215 | |
944 | |
945 subq $1,%rdx | |
946 jz L$ccm64_dec_break | |
947 | |
948 movups (%r11),%xmm0 | |
949 movq %r10,%rax | |
950 movups 16(%r11),%xmm1 | |
951 xorps %xmm0,%xmm8 | |
952 xorps %xmm0,%xmm2 | |
953 xorps %xmm8,%xmm3 | |
954 movups 32(%r11),%xmm0 | |
955 jmp L$ccm64_dec2_loop | |
956 .p2align 4 | |
957 L$ccm64_dec2_loop: | |
958 .byte 102,15,56,220,209 | |
959 .byte 102,15,56,220,217 | |
960 movups (%rcx,%rax,1),%xmm1 | |
961 addq $32,%rax | |
962 .byte 102,15,56,220,208 | |
963 .byte 102,15,56,220,216 | |
964 movups -16(%rcx,%rax,1),%xmm0 | |
965 jnz L$ccm64_dec2_loop | |
966 movups (%rdi),%xmm8 | |
967 paddq %xmm9,%xmm6 | |
968 .byte 102,15,56,220,209 | |
969 .byte 102,15,56,220,217 | |
970 .byte 102,15,56,221,208 | |
971 .byte 102,15,56,221,216 | |
972 leaq 16(%rdi),%rdi | |
973 jmp L$ccm64_dec_outer | |
974 | |
975 .p2align 4 | |
976 L$ccm64_dec_break: | |
977 | |
978 movl 240(%r11),%eax | |
979 movups (%r11),%xmm0 | |
980 movups 16(%r11),%xmm1 | |
981 xorps %xmm0,%xmm8 | |
982 leaq 32(%r11),%r11 | |
983 xorps %xmm8,%xmm3 | |
984 L$oop_enc1_6: | |
985 .byte 102,15,56,220,217 | |
986 decl %eax | |
987 movups (%r11),%xmm1 | |
988 leaq 16(%r11),%r11 | |
989 jnz L$oop_enc1_6 | |
990 .byte 102,15,56,221,217 | |
991 pxor %xmm0,%xmm0 | |
992 pxor %xmm1,%xmm1 | |
993 pxor %xmm2,%xmm2 | |
994 movups %xmm3,(%r9) | |
995 pxor %xmm3,%xmm3 | |
996 pxor %xmm8,%xmm8 | |
997 pxor %xmm6,%xmm6 | |
998 .byte 0xf3,0xc3 | |
999 | |
1000 .globl _aesni_ctr32_encrypt_blocks | |
1001 .private_extern _aesni_ctr32_encrypt_blocks | |
1002 | |
1003 .p2align 4 | |
1004 _aesni_ctr32_encrypt_blocks: | |
1005 cmpq $1,%rdx | |
1006 jne L$ctr32_bulk | |
1007 | |
1008 | |
1009 | |
1010 movups (%r8),%xmm2 | |
1011 movups (%rdi),%xmm3 | |
1012 movl 240(%rcx),%edx | |
1013 movups (%rcx),%xmm0 | |
1014 movups 16(%rcx),%xmm1 | |
1015 leaq 32(%rcx),%rcx | |
1016 xorps %xmm0,%xmm2 | |
1017 L$oop_enc1_7: | |
1018 .byte 102,15,56,220,209 | |
1019 decl %edx | |
1020 movups (%rcx),%xmm1 | |
1021 leaq 16(%rcx),%rcx | |
1022 jnz L$oop_enc1_7 | |
1023 .byte 102,15,56,221,209 | |
1024 pxor %xmm0,%xmm0 | |
1025 pxor %xmm1,%xmm1 | |
1026 xorps %xmm3,%xmm2 | |
1027 pxor %xmm3,%xmm3 | |
1028 movups %xmm2,(%rsi) | |
1029 xorps %xmm2,%xmm2 | |
1030 jmp L$ctr32_epilogue | |
1031 | |
1032 .p2align 4 | |
1033 L$ctr32_bulk: | |
1034 leaq (%rsp),%r11 | |
1035 pushq %rbp | |
1036 subq $128,%rsp | |
1037 andq $-16,%rsp | |
1038 | |
1039 | |
1040 | |
1041 | |
1042 movdqu (%r8),%xmm2 | |
1043 movdqu (%rcx),%xmm0 | |
1044 movl 12(%r8),%r8d | |
1045 pxor %xmm0,%xmm2 | |
1046 movl 12(%rcx),%ebp | |
1047 movdqa %xmm2,0(%rsp) | |
1048 bswapl %r8d | |
1049 movdqa %xmm2,%xmm3 | |
1050 movdqa %xmm2,%xmm4 | |
1051 movdqa %xmm2,%xmm5 | |
1052 movdqa %xmm2,64(%rsp) | |
1053 movdqa %xmm2,80(%rsp) | |
1054 movdqa %xmm2,96(%rsp) | |
1055 movq %rdx,%r10 | |
1056 movdqa %xmm2,112(%rsp) | |
1057 | |
1058 leaq 1(%r8),%rax | |
1059 leaq 2(%r8),%rdx | |
1060 bswapl %eax | |
1061 bswapl %edx | |
1062 xorl %ebp,%eax | |
1063 xorl %ebp,%edx | |
1064 .byte 102,15,58,34,216,3 | |
1065 leaq 3(%r8),%rax | |
1066 movdqa %xmm3,16(%rsp) | |
1067 .byte 102,15,58,34,226,3 | |
1068 bswapl %eax | |
1069 movq %r10,%rdx | |
1070 leaq 4(%r8),%r10 | |
1071 movdqa %xmm4,32(%rsp) | |
1072 xorl %ebp,%eax | |
1073 bswapl %r10d | |
1074 .byte 102,15,58,34,232,3 | |
1075 xorl %ebp,%r10d | |
1076 movdqa %xmm5,48(%rsp) | |
1077 leaq 5(%r8),%r9 | |
1078 movl %r10d,64+12(%rsp) | |
1079 bswapl %r9d | |
1080 leaq 6(%r8),%r10 | |
1081 movl 240(%rcx),%eax | |
1082 xorl %ebp,%r9d | |
1083 bswapl %r10d | |
1084 movl %r9d,80+12(%rsp) | |
1085 xorl %ebp,%r10d | |
1086 leaq 7(%r8),%r9 | |
1087 movl %r10d,96+12(%rsp) | |
1088 bswapl %r9d | |
1089 movl _OPENSSL_ia32cap_P+4(%rip),%r10d | |
1090 xorl %ebp,%r9d | |
1091 andl $71303168,%r10d | |
1092 movl %r9d,112+12(%rsp) | |
1093 | |
1094 movups 16(%rcx),%xmm1 | |
1095 | |
1096 movdqa 64(%rsp),%xmm6 | |
1097 movdqa 80(%rsp),%xmm7 | |
1098 | |
1099 cmpq $8,%rdx | |
1100 jb L$ctr32_tail | |
1101 | |
1102 subq $6,%rdx | |
1103 cmpl $4194304,%r10d | |
1104 je L$ctr32_6x | |
1105 | |
1106 leaq 128(%rcx),%rcx | |
1107 subq $2,%rdx | |
1108 jmp L$ctr32_loop8 | |
1109 | |
1110 .p2align 4 | |
1111 L$ctr32_6x: | |
1112 shll $4,%eax | |
1113 movl $48,%r10d | |
1114 bswapl %ebp | |
1115 leaq 32(%rcx,%rax,1),%rcx | |
1116 subq %rax,%r10 | |
1117 jmp L$ctr32_loop6 | |
1118 | |
1119 .p2align 4 | |
1120 L$ctr32_loop6: | |
1121 addl $6,%r8d | |
1122 movups -48(%rcx,%r10,1),%xmm0 | |
1123 .byte 102,15,56,220,209 | |
1124 movl %r8d,%eax | |
1125 xorl %ebp,%eax | |
1126 .byte 102,15,56,220,217 | |
1127 .byte 0x0f,0x38,0xf1,0x44,0x24,12 | |
1128 leal 1(%r8),%eax | |
1129 .byte 102,15,56,220,225 | |
1130 xorl %ebp,%eax | |
1131 .byte 0x0f,0x38,0xf1,0x44,0x24,28 | |
1132 .byte 102,15,56,220,233 | |
1133 leal 2(%r8),%eax | |
1134 xorl %ebp,%eax | |
1135 .byte 102,15,56,220,241 | |
1136 .byte 0x0f,0x38,0xf1,0x44,0x24,44 | |
1137 leal 3(%r8),%eax | |
1138 .byte 102,15,56,220,249 | |
1139 movups -32(%rcx,%r10,1),%xmm1 | |
1140 xorl %ebp,%eax | |
1141 | |
1142 .byte 102,15,56,220,208 | |
1143 .byte 0x0f,0x38,0xf1,0x44,0x24,60 | |
1144 leal 4(%r8),%eax | |
1145 .byte 102,15,56,220,216 | |
1146 xorl %ebp,%eax | |
1147 .byte 0x0f,0x38,0xf1,0x44,0x24,76 | |
1148 .byte 102,15,56,220,224 | |
1149 leal 5(%r8),%eax | |
1150 xorl %ebp,%eax | |
1151 .byte 102,15,56,220,232 | |
1152 .byte 0x0f,0x38,0xf1,0x44,0x24,92 | |
1153 movq %r10,%rax | |
1154 .byte 102,15,56,220,240 | |
1155 .byte 102,15,56,220,248 | |
1156 movups -16(%rcx,%r10,1),%xmm0 | |
1157 | |
1158 call L$enc_loop6 | |
1159 | |
1160 movdqu (%rdi),%xmm8 | |
1161 movdqu 16(%rdi),%xmm9 | |
1162 movdqu 32(%rdi),%xmm10 | |
1163 movdqu 48(%rdi),%xmm11 | |
1164 movdqu 64(%rdi),%xmm12 | |
1165 movdqu 80(%rdi),%xmm13 | |
1166 leaq 96(%rdi),%rdi | |
1167 movups -64(%rcx,%r10,1),%xmm1 | |
1168 pxor %xmm2,%xmm8 | |
1169 movaps 0(%rsp),%xmm2 | |
1170 pxor %xmm3,%xmm9 | |
1171 movaps 16(%rsp),%xmm3 | |
1172 pxor %xmm4,%xmm10 | |
1173 movaps 32(%rsp),%xmm4 | |
1174 pxor %xmm5,%xmm11 | |
1175 movaps 48(%rsp),%xmm5 | |
1176 pxor %xmm6,%xmm12 | |
1177 movaps 64(%rsp),%xmm6 | |
1178 pxor %xmm7,%xmm13 | |
1179 movaps 80(%rsp),%xmm7 | |
1180 movdqu %xmm8,(%rsi) | |
1181 movdqu %xmm9,16(%rsi) | |
1182 movdqu %xmm10,32(%rsi) | |
1183 movdqu %xmm11,48(%rsi) | |
1184 movdqu %xmm12,64(%rsi) | |
1185 movdqu %xmm13,80(%rsi) | |
1186 leaq 96(%rsi),%rsi | |
1187 | |
1188 subq $6,%rdx | |
1189 jnc L$ctr32_loop6 | |
1190 | |
1191 addq $6,%rdx | |
1192 jz L$ctr32_done | |
1193 | |
1194 leal -48(%r10),%eax | |
1195 leaq -80(%rcx,%r10,1),%rcx | |
1196 negl %eax | |
1197 shrl $4,%eax | |
1198 jmp L$ctr32_tail | |
1199 | |
1200 .p2align 5 | |
1201 L$ctr32_loop8: | |
1202 addl $8,%r8d | |
1203 movdqa 96(%rsp),%xmm8 | |
1204 .byte 102,15,56,220,209 | |
1205 movl %r8d,%r9d | |
1206 movdqa 112(%rsp),%xmm9 | |
1207 .byte 102,15,56,220,217 | |
1208 bswapl %r9d | |
1209 movups 32-128(%rcx),%xmm0 | |
1210 .byte 102,15,56,220,225 | |
1211 xorl %ebp,%r9d | |
1212 nop | |
1213 .byte 102,15,56,220,233 | |
1214 movl %r9d,0+12(%rsp) | |
1215 leaq 1(%r8),%r9 | |
1216 .byte 102,15,56,220,241 | |
1217 .byte 102,15,56,220,249 | |
1218 .byte 102,68,15,56,220,193 | |
1219 .byte 102,68,15,56,220,201 | |
1220 movups 48-128(%rcx),%xmm1 | |
1221 bswapl %r9d | |
1222 .byte 102,15,56,220,208 | |
1223 .byte 102,15,56,220,216 | |
1224 xorl %ebp,%r9d | |
1225 .byte 0x66,0x90 | |
1226 .byte 102,15,56,220,224 | |
1227 .byte 102,15,56,220,232 | |
1228 movl %r9d,16+12(%rsp) | |
1229 leaq 2(%r8),%r9 | |
1230 .byte 102,15,56,220,240 | |
1231 .byte 102,15,56,220,248 | |
1232 .byte 102,68,15,56,220,192 | |
1233 .byte 102,68,15,56,220,200 | |
1234 movups 64-128(%rcx),%xmm0 | |
1235 bswapl %r9d | |
1236 .byte 102,15,56,220,209 | |
1237 .byte 102,15,56,220,217 | |
1238 xorl %ebp,%r9d | |
1239 .byte 0x66,0x90 | |
1240 .byte 102,15,56,220,225 | |
1241 .byte 102,15,56,220,233 | |
1242 movl %r9d,32+12(%rsp) | |
1243 leaq 3(%r8),%r9 | |
1244 .byte 102,15,56,220,241 | |
1245 .byte 102,15,56,220,249 | |
1246 .byte 102,68,15,56,220,193 | |
1247 .byte 102,68,15,56,220,201 | |
1248 movups 80-128(%rcx),%xmm1 | |
1249 bswapl %r9d | |
1250 .byte 102,15,56,220,208 | |
1251 .byte 102,15,56,220,216 | |
1252 xorl %ebp,%r9d | |
1253 .byte 0x66,0x90 | |
1254 .byte 102,15,56,220,224 | |
1255 .byte 102,15,56,220,232 | |
1256 movl %r9d,48+12(%rsp) | |
1257 leaq 4(%r8),%r9 | |
1258 .byte 102,15,56,220,240 | |
1259 .byte 102,15,56,220,248 | |
1260 .byte 102,68,15,56,220,192 | |
1261 .byte 102,68,15,56,220,200 | |
1262 movups 96-128(%rcx),%xmm0 | |
1263 bswapl %r9d | |
1264 .byte 102,15,56,220,209 | |
1265 .byte 102,15,56,220,217 | |
1266 xorl %ebp,%r9d | |
1267 .byte 0x66,0x90 | |
1268 .byte 102,15,56,220,225 | |
1269 .byte 102,15,56,220,233 | |
1270 movl %r9d,64+12(%rsp) | |
1271 leaq 5(%r8),%r9 | |
1272 .byte 102,15,56,220,241 | |
1273 .byte 102,15,56,220,249 | |
1274 .byte 102,68,15,56,220,193 | |
1275 .byte 102,68,15,56,220,201 | |
1276 movups 112-128(%rcx),%xmm1 | |
1277 bswapl %r9d | |
1278 .byte 102,15,56,220,208 | |
1279 .byte 102,15,56,220,216 | |
1280 xorl %ebp,%r9d | |
1281 .byte 0x66,0x90 | |
1282 .byte 102,15,56,220,224 | |
1283 .byte 102,15,56,220,232 | |
1284 movl %r9d,80+12(%rsp) | |
1285 leaq 6(%r8),%r9 | |
1286 .byte 102,15,56,220,240 | |
1287 .byte 102,15,56,220,248 | |
1288 .byte 102,68,15,56,220,192 | |
1289 .byte 102,68,15,56,220,200 | |
1290 movups 128-128(%rcx),%xmm0 | |
1291 bswapl %r9d | |
1292 .byte 102,15,56,220,209 | |
1293 .byte 102,15,56,220,217 | |
1294 xorl %ebp,%r9d | |
1295 .byte 0x66,0x90 | |
1296 .byte 102,15,56,220,225 | |
1297 .byte 102,15,56,220,233 | |
1298 movl %r9d,96+12(%rsp) | |
1299 leaq 7(%r8),%r9 | |
1300 .byte 102,15,56,220,241 | |
1301 .byte 102,15,56,220,249 | |
1302 .byte 102,68,15,56,220,193 | |
1303 .byte 102,68,15,56,220,201 | |
1304 movups 144-128(%rcx),%xmm1 | |
1305 bswapl %r9d | |
1306 .byte 102,15,56,220,208 | |
1307 .byte 102,15,56,220,216 | |
1308 .byte 102,15,56,220,224 | |
1309 xorl %ebp,%r9d | |
1310 movdqu 0(%rdi),%xmm10 | |
1311 .byte 102,15,56,220,232 | |
1312 movl %r9d,112+12(%rsp) | |
1313 cmpl $11,%eax | |
1314 .byte 102,15,56,220,240 | |
1315 .byte 102,15,56,220,248 | |
1316 .byte 102,68,15,56,220,192 | |
1317 .byte 102,68,15,56,220,200 | |
1318 movups 160-128(%rcx),%xmm0 | |
1319 | |
1320 jb L$ctr32_enc_done | |
1321 | |
1322 .byte 102,15,56,220,209 | |
1323 .byte 102,15,56,220,217 | |
1324 .byte 102,15,56,220,225 | |
1325 .byte 102,15,56,220,233 | |
1326 .byte 102,15,56,220,241 | |
1327 .byte 102,15,56,220,249 | |
1328 .byte 102,68,15,56,220,193 | |
1329 .byte 102,68,15,56,220,201 | |
1330 movups 176-128(%rcx),%xmm1 | |
1331 | |
1332 .byte 102,15,56,220,208 | |
1333 .byte 102,15,56,220,216 | |
1334 .byte 102,15,56,220,224 | |
1335 .byte 102,15,56,220,232 | |
1336 .byte 102,15,56,220,240 | |
1337 .byte 102,15,56,220,248 | |
1338 .byte 102,68,15,56,220,192 | |
1339 .byte 102,68,15,56,220,200 | |
1340 movups 192-128(%rcx),%xmm0 | |
1341 je L$ctr32_enc_done | |
1342 | |
1343 .byte 102,15,56,220,209 | |
1344 .byte 102,15,56,220,217 | |
1345 .byte 102,15,56,220,225 | |
1346 .byte 102,15,56,220,233 | |
1347 .byte 102,15,56,220,241 | |
1348 .byte 102,15,56,220,249 | |
1349 .byte 102,68,15,56,220,193 | |
1350 .byte 102,68,15,56,220,201 | |
1351 movups 208-128(%rcx),%xmm1 | |
1352 | |
1353 .byte 102,15,56,220,208 | |
1354 .byte 102,15,56,220,216 | |
1355 .byte 102,15,56,220,224 | |
1356 .byte 102,15,56,220,232 | |
1357 .byte 102,15,56,220,240 | |
1358 .byte 102,15,56,220,248 | |
1359 .byte 102,68,15,56,220,192 | |
1360 .byte 102,68,15,56,220,200 | |
1361 movups 224-128(%rcx),%xmm0 | |
1362 jmp L$ctr32_enc_done | |
1363 | |
1364 .p2align 4 | |
1365 L$ctr32_enc_done: | |
1366 movdqu 16(%rdi),%xmm11 | |
1367 pxor %xmm0,%xmm10 | |
1368 movdqu 32(%rdi),%xmm12 | |
1369 pxor %xmm0,%xmm11 | |
1370 movdqu 48(%rdi),%xmm13 | |
1371 pxor %xmm0,%xmm12 | |
1372 movdqu 64(%rdi),%xmm14 | |
1373 pxor %xmm0,%xmm13 | |
1374 movdqu 80(%rdi),%xmm15 | |
1375 pxor %xmm0,%xmm14 | |
1376 pxor %xmm0,%xmm15 | |
1377 .byte 102,15,56,220,209 | |
1378 .byte 102,15,56,220,217 | |
1379 .byte 102,15,56,220,225 | |
1380 .byte 102,15,56,220,233 | |
1381 .byte 102,15,56,220,241 | |
1382 .byte 102,15,56,220,249 | |
1383 .byte 102,68,15,56,220,193 | |
1384 .byte 102,68,15,56,220,201 | |
1385 movdqu 96(%rdi),%xmm1 | |
1386 leaq 128(%rdi),%rdi | |
1387 | |
1388 .byte 102,65,15,56,221,210 | |
1389 pxor %xmm0,%xmm1 | |
1390 movdqu 112-128(%rdi),%xmm10 | |
1391 .byte 102,65,15,56,221,219 | |
1392 pxor %xmm0,%xmm10 | |
1393 movdqa 0(%rsp),%xmm11 | |
1394 .byte 102,65,15,56,221,228 | |
1395 .byte 102,65,15,56,221,237 | |
1396 movdqa 16(%rsp),%xmm12 | |
1397 movdqa 32(%rsp),%xmm13 | |
1398 .byte 102,65,15,56,221,246 | |
1399 .byte 102,65,15,56,221,255 | |
1400 movdqa 48(%rsp),%xmm14 | |
1401 movdqa 64(%rsp),%xmm15 | |
1402 .byte 102,68,15,56,221,193 | |
1403 movdqa 80(%rsp),%xmm0 | |
1404 movups 16-128(%rcx),%xmm1 | |
1405 .byte 102,69,15,56,221,202 | |
1406 | |
1407 movups %xmm2,(%rsi) | |
1408 movdqa %xmm11,%xmm2 | |
1409 movups %xmm3,16(%rsi) | |
1410 movdqa %xmm12,%xmm3 | |
1411 movups %xmm4,32(%rsi) | |
1412 movdqa %xmm13,%xmm4 | |
1413 movups %xmm5,48(%rsi) | |
1414 movdqa %xmm14,%xmm5 | |
1415 movups %xmm6,64(%rsi) | |
1416 movdqa %xmm15,%xmm6 | |
1417 movups %xmm7,80(%rsi) | |
1418 movdqa %xmm0,%xmm7 | |
1419 movups %xmm8,96(%rsi) | |
1420 movups %xmm9,112(%rsi) | |
1421 leaq 128(%rsi),%rsi | |
1422 | |
1423 subq $8,%rdx | |
1424 jnc L$ctr32_loop8 | |
1425 | |
1426 addq $8,%rdx | |
1427 jz L$ctr32_done | |
1428 leaq -128(%rcx),%rcx | |
1429 | |
1430 L$ctr32_tail: | |
1431 | |
1432 | |
1433 leaq 16(%rcx),%rcx | |
1434 cmpq $4,%rdx | |
1435 jb L$ctr32_loop3 | |
1436 je L$ctr32_loop4 | |
1437 | |
1438 | |
1439 shll $4,%eax | |
1440 movdqa 96(%rsp),%xmm8 | |
1441 pxor %xmm9,%xmm9 | |
1442 | |
1443 movups 16(%rcx),%xmm0 | |
1444 .byte 102,15,56,220,209 | |
1445 .byte 102,15,56,220,217 | |
1446 leaq 32-16(%rcx,%rax,1),%rcx | |
1447 negq %rax | |
1448 .byte 102,15,56,220,225 | |
1449 addq $16,%rax | |
1450 movups (%rdi),%xmm10 | |
1451 .byte 102,15,56,220,233 | |
1452 .byte 102,15,56,220,241 | |
1453 movups 16(%rdi),%xmm11 | |
1454 movups 32(%rdi),%xmm12 | |
1455 .byte 102,15,56,220,249 | |
1456 .byte 102,68,15,56,220,193 | |
1457 | |
1458 call L$enc_loop8_enter | |
1459 | |
1460 movdqu 48(%rdi),%xmm13 | |
1461 pxor %xmm10,%xmm2 | |
1462 movdqu 64(%rdi),%xmm10 | |
1463 pxor %xmm11,%xmm3 | |
1464 movdqu %xmm2,(%rsi) | |
1465 pxor %xmm12,%xmm4 | |
1466 movdqu %xmm3,16(%rsi) | |
1467 pxor %xmm13,%xmm5 | |
1468 movdqu %xmm4,32(%rsi) | |
1469 pxor %xmm10,%xmm6 | |
1470 movdqu %xmm5,48(%rsi) | |
1471 movdqu %xmm6,64(%rsi) | |
1472 cmpq $6,%rdx | |
1473 jb L$ctr32_done | |
1474 | |
1475 movups 80(%rdi),%xmm11 | |
1476 xorps %xmm11,%xmm7 | |
1477 movups %xmm7,80(%rsi) | |
1478 je L$ctr32_done | |
1479 | |
1480 movups 96(%rdi),%xmm12 | |
1481 xorps %xmm12,%xmm8 | |
1482 movups %xmm8,96(%rsi) | |
1483 jmp L$ctr32_done | |
1484 | |
1485 .p2align 5 | |
1486 L$ctr32_loop4: | |
1487 .byte 102,15,56,220,209 | |
1488 leaq 16(%rcx),%rcx | |
1489 decl %eax | |
1490 .byte 102,15,56,220,217 | |
1491 .byte 102,15,56,220,225 | |
1492 .byte 102,15,56,220,233 | |
1493 movups (%rcx),%xmm1 | |
1494 jnz L$ctr32_loop4 | |
1495 .byte 102,15,56,221,209 | |
1496 .byte 102,15,56,221,217 | |
1497 movups (%rdi),%xmm10 | |
1498 movups 16(%rdi),%xmm11 | |
1499 .byte 102,15,56,221,225 | |
1500 .byte 102,15,56,221,233 | |
1501 movups 32(%rdi),%xmm12 | |
1502 movups 48(%rdi),%xmm13 | |
1503 | |
1504 xorps %xmm10,%xmm2 | |
1505 movups %xmm2,(%rsi) | |
1506 xorps %xmm11,%xmm3 | |
1507 movups %xmm3,16(%rsi) | |
1508 pxor %xmm12,%xmm4 | |
1509 movdqu %xmm4,32(%rsi) | |
1510 pxor %xmm13,%xmm5 | |
1511 movdqu %xmm5,48(%rsi) | |
1512 jmp L$ctr32_done | |
1513 | |
1514 .p2align 5 | |
1515 L$ctr32_loop3: | |
1516 .byte 102,15,56,220,209 | |
1517 leaq 16(%rcx),%rcx | |
1518 decl %eax | |
1519 .byte 102,15,56,220,217 | |
1520 .byte 102,15,56,220,225 | |
1521 movups (%rcx),%xmm1 | |
1522 jnz L$ctr32_loop3 | |
1523 .byte 102,15,56,221,209 | |
1524 .byte 102,15,56,221,217 | |
1525 .byte 102,15,56,221,225 | |
1526 | |
1527 movups (%rdi),%xmm10 | |
1528 xorps %xmm10,%xmm2 | |
1529 movups %xmm2,(%rsi) | |
1530 cmpq $2,%rdx | |
1531 jb L$ctr32_done | |
1532 | |
1533 movups 16(%rdi),%xmm11 | |
1534 xorps %xmm11,%xmm3 | |
1535 movups %xmm3,16(%rsi) | |
1536 je L$ctr32_done | |
1537 | |
1538 movups 32(%rdi),%xmm12 | |
1539 xorps %xmm12,%xmm4 | |
1540 movups %xmm4,32(%rsi) | |
1541 | |
1542 L$ctr32_done: | |
1543 xorps %xmm0,%xmm0 | |
1544 xorl %ebp,%ebp | |
1545 pxor %xmm1,%xmm1 | |
1546 pxor %xmm2,%xmm2 | |
1547 pxor %xmm3,%xmm3 | |
1548 pxor %xmm4,%xmm4 | |
1549 pxor %xmm5,%xmm5 | |
1550 pxor %xmm6,%xmm6 | |
1551 pxor %xmm7,%xmm7 | |
1552 movaps %xmm0,0(%rsp) | |
1553 pxor %xmm8,%xmm8 | |
1554 movaps %xmm0,16(%rsp) | |
1555 pxor %xmm9,%xmm9 | |
1556 movaps %xmm0,32(%rsp) | |
1557 pxor %xmm10,%xmm10 | |
1558 movaps %xmm0,48(%rsp) | |
1559 pxor %xmm11,%xmm11 | |
1560 movaps %xmm0,64(%rsp) | |
1561 pxor %xmm12,%xmm12 | |
1562 movaps %xmm0,80(%rsp) | |
1563 pxor %xmm13,%xmm13 | |
1564 movaps %xmm0,96(%rsp) | |
1565 pxor %xmm14,%xmm14 | |
1566 movaps %xmm0,112(%rsp) | |
1567 pxor %xmm15,%xmm15 | |
1568 movq -8(%r11),%rbp | |
1569 leaq (%r11),%rsp | |
1570 L$ctr32_epilogue: | |
1571 .byte 0xf3,0xc3 | |
1572 | |
1573 .globl _aesni_xts_encrypt | |
1574 .private_extern _aesni_xts_encrypt | |
1575 | |
1576 .p2align 4 | |
1577 _aesni_xts_encrypt: | |
1578 leaq (%rsp),%r11 | |
1579 pushq %rbp | |
1580 subq $112,%rsp | |
1581 andq $-16,%rsp | |
1582 movups (%r9),%xmm2 | |
1583 movl 240(%r8),%eax | |
1584 movl 240(%rcx),%r10d | |
1585 movups (%r8),%xmm0 | |
1586 movups 16(%r8),%xmm1 | |
1587 leaq 32(%r8),%r8 | |
1588 xorps %xmm0,%xmm2 | |
1589 L$oop_enc1_8: | |
1590 .byte 102,15,56,220,209 | |
1591 decl %eax | |
1592 movups (%r8),%xmm1 | |
1593 leaq 16(%r8),%r8 | |
1594 jnz L$oop_enc1_8 | |
1595 .byte 102,15,56,221,209 | |
1596 movups (%rcx),%xmm0 | |
1597 movq %rcx,%rbp | |
1598 movl %r10d,%eax | |
1599 shll $4,%r10d | |
1600 movq %rdx,%r9 | |
1601 andq $-16,%rdx | |
1602 | |
1603 movups 16(%rcx,%r10,1),%xmm1 | |
1604 | |
1605 movdqa L$xts_magic(%rip),%xmm8 | |
1606 movdqa %xmm2,%xmm15 | |
1607 pshufd $0x5f,%xmm2,%xmm9 | |
1608 pxor %xmm0,%xmm1 | |
1609 movdqa %xmm9,%xmm14 | |
1610 paddd %xmm9,%xmm9 | |
1611 movdqa %xmm15,%xmm10 | |
1612 psrad $31,%xmm14 | |
1613 paddq %xmm15,%xmm15 | |
1614 pand %xmm8,%xmm14 | |
1615 pxor %xmm0,%xmm10 | |
1616 pxor %xmm14,%xmm15 | |
1617 movdqa %xmm9,%xmm14 | |
1618 paddd %xmm9,%xmm9 | |
1619 movdqa %xmm15,%xmm11 | |
1620 psrad $31,%xmm14 | |
1621 paddq %xmm15,%xmm15 | |
1622 pand %xmm8,%xmm14 | |
1623 pxor %xmm0,%xmm11 | |
1624 pxor %xmm14,%xmm15 | |
1625 movdqa %xmm9,%xmm14 | |
1626 paddd %xmm9,%xmm9 | |
1627 movdqa %xmm15,%xmm12 | |
1628 psrad $31,%xmm14 | |
1629 paddq %xmm15,%xmm15 | |
1630 pand %xmm8,%xmm14 | |
1631 pxor %xmm0,%xmm12 | |
1632 pxor %xmm14,%xmm15 | |
1633 movdqa %xmm9,%xmm14 | |
1634 paddd %xmm9,%xmm9 | |
1635 movdqa %xmm15,%xmm13 | |
1636 psrad $31,%xmm14 | |
1637 paddq %xmm15,%xmm15 | |
1638 pand %xmm8,%xmm14 | |
1639 pxor %xmm0,%xmm13 | |
1640 pxor %xmm14,%xmm15 | |
1641 movdqa %xmm15,%xmm14 | |
1642 psrad $31,%xmm9 | |
1643 paddq %xmm15,%xmm15 | |
1644 pand %xmm8,%xmm9 | |
1645 pxor %xmm0,%xmm14 | |
1646 pxor %xmm9,%xmm15 | |
1647 movaps %xmm1,96(%rsp) | |
1648 | |
1649 subq $96,%rdx | |
1650 jc L$xts_enc_short | |
1651 | |
1652 movl $16+96,%eax | |
1653 leaq 32(%rbp,%r10,1),%rcx | |
1654 subq %r10,%rax | |
1655 movups 16(%rbp),%xmm1 | |
1656 movq %rax,%r10 | |
1657 leaq L$xts_magic(%rip),%r8 | |
1658 jmp L$xts_enc_grandloop | |
1659 | |
1660 .p2align 5 | |
1661 L$xts_enc_grandloop: | |
1662 movdqu 0(%rdi),%xmm2 | |
1663 movdqa %xmm0,%xmm8 | |
1664 movdqu 16(%rdi),%xmm3 | |
1665 pxor %xmm10,%xmm2 | |
1666 movdqu 32(%rdi),%xmm4 | |
1667 pxor %xmm11,%xmm3 | |
1668 .byte 102,15,56,220,209 | |
1669 movdqu 48(%rdi),%xmm5 | |
1670 pxor %xmm12,%xmm4 | |
1671 .byte 102,15,56,220,217 | |
1672 movdqu 64(%rdi),%xmm6 | |
1673 pxor %xmm13,%xmm5 | |
1674 .byte 102,15,56,220,225 | |
1675 movdqu 80(%rdi),%xmm7 | |
1676 pxor %xmm15,%xmm8 | |
1677 movdqa 96(%rsp),%xmm9 | |
1678 pxor %xmm14,%xmm6 | |
1679 .byte 102,15,56,220,233 | |
1680 movups 32(%rbp),%xmm0 | |
1681 leaq 96(%rdi),%rdi | |
1682 pxor %xmm8,%xmm7 | |
1683 | |
1684 pxor %xmm9,%xmm10 | |
1685 .byte 102,15,56,220,241 | |
1686 pxor %xmm9,%xmm11 | |
1687 movdqa %xmm10,0(%rsp) | |
1688 .byte 102,15,56,220,249 | |
1689 movups 48(%rbp),%xmm1 | |
1690 pxor %xmm9,%xmm12 | |
1691 | |
1692 .byte 102,15,56,220,208 | |
1693 pxor %xmm9,%xmm13 | |
1694 movdqa %xmm11,16(%rsp) | |
1695 .byte 102,15,56,220,216 | |
1696 pxor %xmm9,%xmm14 | |
1697 movdqa %xmm12,32(%rsp) | |
1698 .byte 102,15,56,220,224 | |
1699 .byte 102,15,56,220,232 | |
1700 pxor %xmm9,%xmm8 | |
1701 movdqa %xmm14,64(%rsp) | |
1702 .byte 102,15,56,220,240 | |
1703 .byte 102,15,56,220,248 | |
1704 movups 64(%rbp),%xmm0 | |
1705 movdqa %xmm8,80(%rsp) | |
1706 pshufd $0x5f,%xmm15,%xmm9 | |
1707 jmp L$xts_enc_loop6 | |
1708 .p2align 5 | |
1709 L$xts_enc_loop6: | |
1710 .byte 102,15,56,220,209 | |
1711 .byte 102,15,56,220,217 | |
1712 .byte 102,15,56,220,225 | |
1713 .byte 102,15,56,220,233 | |
1714 .byte 102,15,56,220,241 | |
1715 .byte 102,15,56,220,249 | |
1716 movups -64(%rcx,%rax,1),%xmm1 | |
1717 addq $32,%rax | |
1718 | |
1719 .byte 102,15,56,220,208 | |
1720 .byte 102,15,56,220,216 | |
1721 .byte 102,15,56,220,224 | |
1722 .byte 102,15,56,220,232 | |
1723 .byte 102,15,56,220,240 | |
1724 .byte 102,15,56,220,248 | |
1725 movups -80(%rcx,%rax,1),%xmm0 | |
1726 jnz L$xts_enc_loop6 | |
1727 | |
1728 movdqa (%r8),%xmm8 | |
1729 movdqa %xmm9,%xmm14 | |
1730 paddd %xmm9,%xmm9 | |
1731 .byte 102,15,56,220,209 | |
1732 paddq %xmm15,%xmm15 | |
1733 psrad $31,%xmm14 | |
1734 .byte 102,15,56,220,217 | |
1735 pand %xmm8,%xmm14 | |
1736 movups (%rbp),%xmm10 | |
1737 .byte 102,15,56,220,225 | |
1738 .byte 102,15,56,220,233 | |
1739 .byte 102,15,56,220,241 | |
1740 pxor %xmm14,%xmm15 | |
1741 movaps %xmm10,%xmm11 | |
1742 .byte 102,15,56,220,249 | |
1743 movups -64(%rcx),%xmm1 | |
1744 | |
1745 movdqa %xmm9,%xmm14 | |
1746 .byte 102,15,56,220,208 | |
1747 paddd %xmm9,%xmm9 | |
1748 pxor %xmm15,%xmm10 | |
1749 .byte 102,15,56,220,216 | |
1750 psrad $31,%xmm14 | |
1751 paddq %xmm15,%xmm15 | |
1752 .byte 102,15,56,220,224 | |
1753 .byte 102,15,56,220,232 | |
1754 pand %xmm8,%xmm14 | |
1755 movaps %xmm11,%xmm12 | |
1756 .byte 102,15,56,220,240 | |
1757 pxor %xmm14,%xmm15 | |
1758 movdqa %xmm9,%xmm14 | |
1759 .byte 102,15,56,220,248 | |
1760 movups -48(%rcx),%xmm0 | |
1761 | |
1762 paddd %xmm9,%xmm9 | |
1763 .byte 102,15,56,220,209 | |
1764 pxor %xmm15,%xmm11 | |
1765 psrad $31,%xmm14 | |
1766 .byte 102,15,56,220,217 | |
1767 paddq %xmm15,%xmm15 | |
1768 pand %xmm8,%xmm14 | |
1769 .byte 102,15,56,220,225 | |
1770 .byte 102,15,56,220,233 | |
1771 movdqa %xmm13,48(%rsp) | |
1772 pxor %xmm14,%xmm15 | |
1773 .byte 102,15,56,220,241 | |
1774 movaps %xmm12,%xmm13 | |
1775 movdqa %xmm9,%xmm14 | |
1776 .byte 102,15,56,220,249 | |
1777 movups -32(%rcx),%xmm1 | |
1778 | |
1779 paddd %xmm9,%xmm9 | |
1780 .byte 102,15,56,220,208 | |
1781 pxor %xmm15,%xmm12 | |
1782 psrad $31,%xmm14 | |
1783 .byte 102,15,56,220,216 | |
1784 paddq %xmm15,%xmm15 | |
1785 pand %xmm8,%xmm14 | |
1786 .byte 102,15,56,220,224 | |
1787 .byte 102,15,56,220,232 | |
1788 .byte 102,15,56,220,240 | |
1789 pxor %xmm14,%xmm15 | |
1790 movaps %xmm13,%xmm14 | |
1791 .byte 102,15,56,220,248 | |
1792 | |
1793 movdqa %xmm9,%xmm0 | |
1794 paddd %xmm9,%xmm9 | |
1795 .byte 102,15,56,220,209 | |
1796 pxor %xmm15,%xmm13 | |
1797 psrad $31,%xmm0 | |
1798 .byte 102,15,56,220,217 | |
1799 paddq %xmm15,%xmm15 | |
1800 pand %xmm8,%xmm0 | |
1801 .byte 102,15,56,220,225 | |
1802 .byte 102,15,56,220,233 | |
1803 pxor %xmm0,%xmm15 | |
1804 movups (%rbp),%xmm0 | |
1805 .byte 102,15,56,220,241 | |
1806 .byte 102,15,56,220,249 | |
1807 movups 16(%rbp),%xmm1 | |
1808 | |
1809 pxor %xmm15,%xmm14 | |
1810 .byte 102,15,56,221,84,36,0 | |
1811 psrad $31,%xmm9 | |
1812 paddq %xmm15,%xmm15 | |
1813 .byte 102,15,56,221,92,36,16 | |
1814 .byte 102,15,56,221,100,36,32 | |
1815 pand %xmm8,%xmm9 | |
1816 movq %r10,%rax | |
1817 .byte 102,15,56,221,108,36,48 | |
1818 .byte 102,15,56,221,116,36,64 | |
1819 .byte 102,15,56,221,124,36,80 | |
1820 pxor %xmm9,%xmm15 | |
1821 | |
1822 leaq 96(%rsi),%rsi | |
1823 movups %xmm2,-96(%rsi) | |
1824 movups %xmm3,-80(%rsi) | |
1825 movups %xmm4,-64(%rsi) | |
1826 movups %xmm5,-48(%rsi) | |
1827 movups %xmm6,-32(%rsi) | |
1828 movups %xmm7,-16(%rsi) | |
1829 subq $96,%rdx | |
1830 jnc L$xts_enc_grandloop | |
1831 | |
1832 movl $16+96,%eax | |
1833 subl %r10d,%eax | |
1834 movq %rbp,%rcx | |
1835 shrl $4,%eax | |
1836 | |
1837 L$xts_enc_short: | |
1838 | |
1839 movl %eax,%r10d | |
1840 pxor %xmm0,%xmm10 | |
1841 addq $96,%rdx | |
1842 jz L$xts_enc_done | |
1843 | |
1844 pxor %xmm0,%xmm11 | |
1845 cmpq $0x20,%rdx | |
1846 jb L$xts_enc_one | |
1847 pxor %xmm0,%xmm12 | |
1848 je L$xts_enc_two | |
1849 | |
1850 pxor %xmm0,%xmm13 | |
1851 cmpq $0x40,%rdx | |
1852 jb L$xts_enc_three | |
1853 pxor %xmm0,%xmm14 | |
1854 je L$xts_enc_four | |
1855 | |
1856 movdqu (%rdi),%xmm2 | |
1857 movdqu 16(%rdi),%xmm3 | |
1858 movdqu 32(%rdi),%xmm4 | |
1859 pxor %xmm10,%xmm2 | |
1860 movdqu 48(%rdi),%xmm5 | |
1861 pxor %xmm11,%xmm3 | |
1862 movdqu 64(%rdi),%xmm6 | |
1863 leaq 80(%rdi),%rdi | |
1864 pxor %xmm12,%xmm4 | |
1865 pxor %xmm13,%xmm5 | |
1866 pxor %xmm14,%xmm6 | |
1867 pxor %xmm7,%xmm7 | |
1868 | |
1869 call _aesni_encrypt6 | |
1870 | |
1871 xorps %xmm10,%xmm2 | |
1872 movdqa %xmm15,%xmm10 | |
1873 xorps %xmm11,%xmm3 | |
1874 xorps %xmm12,%xmm4 | |
1875 movdqu %xmm2,(%rsi) | |
1876 xorps %xmm13,%xmm5 | |
1877 movdqu %xmm3,16(%rsi) | |
1878 xorps %xmm14,%xmm6 | |
1879 movdqu %xmm4,32(%rsi) | |
1880 movdqu %xmm5,48(%rsi) | |
1881 movdqu %xmm6,64(%rsi) | |
1882 leaq 80(%rsi),%rsi | |
1883 jmp L$xts_enc_done | |
1884 | |
1885 .p2align 4 | |
1886 L$xts_enc_one: | |
1887 movups (%rdi),%xmm2 | |
1888 leaq 16(%rdi),%rdi | |
1889 xorps %xmm10,%xmm2 | |
1890 movups (%rcx),%xmm0 | |
1891 movups 16(%rcx),%xmm1 | |
1892 leaq 32(%rcx),%rcx | |
1893 xorps %xmm0,%xmm2 | |
1894 L$oop_enc1_9: | |
1895 .byte 102,15,56,220,209 | |
1896 decl %eax | |
1897 movups (%rcx),%xmm1 | |
1898 leaq 16(%rcx),%rcx | |
1899 jnz L$oop_enc1_9 | |
1900 .byte 102,15,56,221,209 | |
1901 xorps %xmm10,%xmm2 | |
1902 movdqa %xmm11,%xmm10 | |
1903 movups %xmm2,(%rsi) | |
1904 leaq 16(%rsi),%rsi | |
1905 jmp L$xts_enc_done | |
1906 | |
1907 .p2align 4 | |
1908 L$xts_enc_two: | |
1909 movups (%rdi),%xmm2 | |
1910 movups 16(%rdi),%xmm3 | |
1911 leaq 32(%rdi),%rdi | |
1912 xorps %xmm10,%xmm2 | |
1913 xorps %xmm11,%xmm3 | |
1914 | |
1915 call _aesni_encrypt2 | |
1916 | |
1917 xorps %xmm10,%xmm2 | |
1918 movdqa %xmm12,%xmm10 | |
1919 xorps %xmm11,%xmm3 | |
1920 movups %xmm2,(%rsi) | |
1921 movups %xmm3,16(%rsi) | |
1922 leaq 32(%rsi),%rsi | |
1923 jmp L$xts_enc_done | |
1924 | |
1925 .p2align 4 | |
1926 L$xts_enc_three: | |
1927 movups (%rdi),%xmm2 | |
1928 movups 16(%rdi),%xmm3 | |
1929 movups 32(%rdi),%xmm4 | |
1930 leaq 48(%rdi),%rdi | |
1931 xorps %xmm10,%xmm2 | |
1932 xorps %xmm11,%xmm3 | |
1933 xorps %xmm12,%xmm4 | |
1934 | |
1935 call _aesni_encrypt3 | |
1936 | |
1937 xorps %xmm10,%xmm2 | |
1938 movdqa %xmm13,%xmm10 | |
1939 xorps %xmm11,%xmm3 | |
1940 xorps %xmm12,%xmm4 | |
1941 movups %xmm2,(%rsi) | |
1942 movups %xmm3,16(%rsi) | |
1943 movups %xmm4,32(%rsi) | |
1944 leaq 48(%rsi),%rsi | |
1945 jmp L$xts_enc_done | |
1946 | |
1947 .p2align 4 | |
1948 L$xts_enc_four: | |
1949 movups (%rdi),%xmm2 | |
1950 movups 16(%rdi),%xmm3 | |
1951 movups 32(%rdi),%xmm4 | |
1952 xorps %xmm10,%xmm2 | |
1953 movups 48(%rdi),%xmm5 | |
1954 leaq 64(%rdi),%rdi | |
1955 xorps %xmm11,%xmm3 | |
1956 xorps %xmm12,%xmm4 | |
1957 xorps %xmm13,%xmm5 | |
1958 | |
1959 call _aesni_encrypt4 | |
1960 | |
1961 pxor %xmm10,%xmm2 | |
1962 movdqa %xmm14,%xmm10 | |
1963 pxor %xmm11,%xmm3 | |
1964 pxor %xmm12,%xmm4 | |
1965 movdqu %xmm2,(%rsi) | |
1966 pxor %xmm13,%xmm5 | |
1967 movdqu %xmm3,16(%rsi) | |
1968 movdqu %xmm4,32(%rsi) | |
1969 movdqu %xmm5,48(%rsi) | |
1970 leaq 64(%rsi),%rsi | |
1971 jmp L$xts_enc_done | |
1972 | |
1973 .p2align 4 | |
1974 L$xts_enc_done: | |
1975 andq $15,%r9 | |
1976 jz L$xts_enc_ret | |
1977 movq %r9,%rdx | |
1978 | |
1979 L$xts_enc_steal: | |
1980 movzbl (%rdi),%eax | |
1981 movzbl -16(%rsi),%ecx | |
1982 leaq 1(%rdi),%rdi | |
1983 movb %al,-16(%rsi) | |
1984 movb %cl,0(%rsi) | |
1985 leaq 1(%rsi),%rsi | |
1986 subq $1,%rdx | |
1987 jnz L$xts_enc_steal | |
1988 | |
1989 subq %r9,%rsi | |
1990 movq %rbp,%rcx | |
1991 movl %r10d,%eax | |
1992 | |
1993 movups -16(%rsi),%xmm2 | |
1994 xorps %xmm10,%xmm2 | |
1995 movups (%rcx),%xmm0 | |
1996 movups 16(%rcx),%xmm1 | |
1997 leaq 32(%rcx),%rcx | |
1998 xorps %xmm0,%xmm2 | |
1999 L$oop_enc1_10: | |
2000 .byte 102,15,56,220,209 | |
2001 decl %eax | |
2002 movups (%rcx),%xmm1 | |
2003 leaq 16(%rcx),%rcx | |
2004 jnz L$oop_enc1_10 | |
2005 .byte 102,15,56,221,209 | |
2006 xorps %xmm10,%xmm2 | |
2007 movups %xmm2,-16(%rsi) | |
2008 | |
2009 L$xts_enc_ret: | |
2010 xorps %xmm0,%xmm0 | |
2011 pxor %xmm1,%xmm1 | |
2012 pxor %xmm2,%xmm2 | |
2013 pxor %xmm3,%xmm3 | |
2014 pxor %xmm4,%xmm4 | |
2015 pxor %xmm5,%xmm5 | |
2016 pxor %xmm6,%xmm6 | |
2017 pxor %xmm7,%xmm7 | |
2018 movaps %xmm0,0(%rsp) | |
2019 pxor %xmm8,%xmm8 | |
2020 movaps %xmm0,16(%rsp) | |
2021 pxor %xmm9,%xmm9 | |
2022 movaps %xmm0,32(%rsp) | |
2023 pxor %xmm10,%xmm10 | |
2024 movaps %xmm0,48(%rsp) | |
2025 pxor %xmm11,%xmm11 | |
2026 movaps %xmm0,64(%rsp) | |
2027 pxor %xmm12,%xmm12 | |
2028 movaps %xmm0,80(%rsp) | |
2029 pxor %xmm13,%xmm13 | |
2030 movaps %xmm0,96(%rsp) | |
2031 pxor %xmm14,%xmm14 | |
2032 pxor %xmm15,%xmm15 | |
2033 movq -8(%r11),%rbp | |
2034 leaq (%r11),%rsp | |
2035 L$xts_enc_epilogue: | |
2036 .byte 0xf3,0xc3 | |
2037 | |
2038 .globl _aesni_xts_decrypt | |
2039 .private_extern _aesni_xts_decrypt | |
2040 | |
2041 .p2align 4 | |
2042 _aesni_xts_decrypt: | |
2043 leaq (%rsp),%r11 | |
2044 pushq %rbp | |
2045 subq $112,%rsp | |
2046 andq $-16,%rsp | |
2047 movups (%r9),%xmm2 | |
2048 movl 240(%r8),%eax | |
2049 movl 240(%rcx),%r10d | |
2050 movups (%r8),%xmm0 | |
2051 movups 16(%r8),%xmm1 | |
2052 leaq 32(%r8),%r8 | |
2053 xorps %xmm0,%xmm2 | |
2054 L$oop_enc1_11: | |
2055 .byte 102,15,56,220,209 | |
2056 decl %eax | |
2057 movups (%r8),%xmm1 | |
2058 leaq 16(%r8),%r8 | |
2059 jnz L$oop_enc1_11 | |
2060 .byte 102,15,56,221,209 | |
2061 xorl %eax,%eax | |
2062 testq $15,%rdx | |
2063 setnz %al | |
2064 shlq $4,%rax | |
2065 subq %rax,%rdx | |
2066 | |
2067 movups (%rcx),%xmm0 | |
2068 movq %rcx,%rbp | |
2069 movl %r10d,%eax | |
2070 shll $4,%r10d | |
2071 movq %rdx,%r9 | |
2072 andq $-16,%rdx | |
2073 | |
2074 movups 16(%rcx,%r10,1),%xmm1 | |
2075 | |
2076 movdqa L$xts_magic(%rip),%xmm8 | |
2077 movdqa %xmm2,%xmm15 | |
2078 pshufd $0x5f,%xmm2,%xmm9 | |
2079 pxor %xmm0,%xmm1 | |
2080 movdqa %xmm9,%xmm14 | |
2081 paddd %xmm9,%xmm9 | |
2082 movdqa %xmm15,%xmm10 | |
2083 psrad $31,%xmm14 | |
2084 paddq %xmm15,%xmm15 | |
2085 pand %xmm8,%xmm14 | |
2086 pxor %xmm0,%xmm10 | |
2087 pxor %xmm14,%xmm15 | |
2088 movdqa %xmm9,%xmm14 | |
2089 paddd %xmm9,%xmm9 | |
2090 movdqa %xmm15,%xmm11 | |
2091 psrad $31,%xmm14 | |
2092 paddq %xmm15,%xmm15 | |
2093 pand %xmm8,%xmm14 | |
2094 pxor %xmm0,%xmm11 | |
2095 pxor %xmm14,%xmm15 | |
2096 movdqa %xmm9,%xmm14 | |
2097 paddd %xmm9,%xmm9 | |
2098 movdqa %xmm15,%xmm12 | |
2099 psrad $31,%xmm14 | |
2100 paddq %xmm15,%xmm15 | |
2101 pand %xmm8,%xmm14 | |
2102 pxor %xmm0,%xmm12 | |
2103 pxor %xmm14,%xmm15 | |
2104 movdqa %xmm9,%xmm14 | |
2105 paddd %xmm9,%xmm9 | |
2106 movdqa %xmm15,%xmm13 | |
2107 psrad $31,%xmm14 | |
2108 paddq %xmm15,%xmm15 | |
2109 pand %xmm8,%xmm14 | |
2110 pxor %xmm0,%xmm13 | |
2111 pxor %xmm14,%xmm15 | |
2112 movdqa %xmm15,%xmm14 | |
2113 psrad $31,%xmm9 | |
2114 paddq %xmm15,%xmm15 | |
2115 pand %xmm8,%xmm9 | |
2116 pxor %xmm0,%xmm14 | |
2117 pxor %xmm9,%xmm15 | |
2118 movaps %xmm1,96(%rsp) | |
2119 | |
2120 subq $96,%rdx | |
2121 jc L$xts_dec_short | |
2122 | |
2123 movl $16+96,%eax | |
2124 leaq 32(%rbp,%r10,1),%rcx | |
2125 subq %r10,%rax | |
2126 movups 16(%rbp),%xmm1 | |
2127 movq %rax,%r10 | |
2128 leaq L$xts_magic(%rip),%r8 | |
2129 jmp L$xts_dec_grandloop | |
2130 | |
2131 .p2align 5 | |
2132 L$xts_dec_grandloop: | |
2133 movdqu 0(%rdi),%xmm2 | |
2134 movdqa %xmm0,%xmm8 | |
2135 movdqu 16(%rdi),%xmm3 | |
2136 pxor %xmm10,%xmm2 | |
2137 movdqu 32(%rdi),%xmm4 | |
2138 pxor %xmm11,%xmm3 | |
2139 .byte 102,15,56,222,209 | |
2140 movdqu 48(%rdi),%xmm5 | |
2141 pxor %xmm12,%xmm4 | |
2142 .byte 102,15,56,222,217 | |
2143 movdqu 64(%rdi),%xmm6 | |
2144 pxor %xmm13,%xmm5 | |
2145 .byte 102,15,56,222,225 | |
2146 movdqu 80(%rdi),%xmm7 | |
2147 pxor %xmm15,%xmm8 | |
2148 movdqa 96(%rsp),%xmm9 | |
2149 pxor %xmm14,%xmm6 | |
2150 .byte 102,15,56,222,233 | |
2151 movups 32(%rbp),%xmm0 | |
2152 leaq 96(%rdi),%rdi | |
2153 pxor %xmm8,%xmm7 | |
2154 | |
2155 pxor %xmm9,%xmm10 | |
2156 .byte 102,15,56,222,241 | |
2157 pxor %xmm9,%xmm11 | |
2158 movdqa %xmm10,0(%rsp) | |
2159 .byte 102,15,56,222,249 | |
2160 movups 48(%rbp),%xmm1 | |
2161 pxor %xmm9,%xmm12 | |
2162 | |
2163 .byte 102,15,56,222,208 | |
2164 pxor %xmm9,%xmm13 | |
2165 movdqa %xmm11,16(%rsp) | |
2166 .byte 102,15,56,222,216 | |
2167 pxor %xmm9,%xmm14 | |
2168 movdqa %xmm12,32(%rsp) | |
2169 .byte 102,15,56,222,224 | |
2170 .byte 102,15,56,222,232 | |
2171 pxor %xmm9,%xmm8 | |
2172 movdqa %xmm14,64(%rsp) | |
2173 .byte 102,15,56,222,240 | |
2174 .byte 102,15,56,222,248 | |
2175 movups 64(%rbp),%xmm0 | |
2176 movdqa %xmm8,80(%rsp) | |
2177 pshufd $0x5f,%xmm15,%xmm9 | |
2178 jmp L$xts_dec_loop6 | |
2179 .p2align 5 | |
2180 L$xts_dec_loop6: | |
2181 .byte 102,15,56,222,209 | |
2182 .byte 102,15,56,222,217 | |
2183 .byte 102,15,56,222,225 | |
2184 .byte 102,15,56,222,233 | |
2185 .byte 102,15,56,222,241 | |
2186 .byte 102,15,56,222,249 | |
2187 movups -64(%rcx,%rax,1),%xmm1 | |
2188 addq $32,%rax | |
2189 | |
2190 .byte 102,15,56,222,208 | |
2191 .byte 102,15,56,222,216 | |
2192 .byte 102,15,56,222,224 | |
2193 .byte 102,15,56,222,232 | |
2194 .byte 102,15,56,222,240 | |
2195 .byte 102,15,56,222,248 | |
2196 movups -80(%rcx,%rax,1),%xmm0 | |
2197 jnz L$xts_dec_loop6 | |
2198 | |
2199 movdqa (%r8),%xmm8 | |
2200 movdqa %xmm9,%xmm14 | |
2201 paddd %xmm9,%xmm9 | |
2202 .byte 102,15,56,222,209 | |
2203 paddq %xmm15,%xmm15 | |
2204 psrad $31,%xmm14 | |
2205 .byte 102,15,56,222,217 | |
2206 pand %xmm8,%xmm14 | |
2207 movups (%rbp),%xmm10 | |
2208 .byte 102,15,56,222,225 | |
2209 .byte 102,15,56,222,233 | |
2210 .byte 102,15,56,222,241 | |
2211 pxor %xmm14,%xmm15 | |
2212 movaps %xmm10,%xmm11 | |
2213 .byte 102,15,56,222,249 | |
2214 movups -64(%rcx),%xmm1 | |
2215 | |
2216 movdqa %xmm9,%xmm14 | |
2217 .byte 102,15,56,222,208 | |
2218 paddd %xmm9,%xmm9 | |
2219 pxor %xmm15,%xmm10 | |
2220 .byte 102,15,56,222,216 | |
2221 psrad $31,%xmm14 | |
2222 paddq %xmm15,%xmm15 | |
2223 .byte 102,15,56,222,224 | |
2224 .byte 102,15,56,222,232 | |
2225 pand %xmm8,%xmm14 | |
2226 movaps %xmm11,%xmm12 | |
2227 .byte 102,15,56,222,240 | |
2228 pxor %xmm14,%xmm15 | |
2229 movdqa %xmm9,%xmm14 | |
2230 .byte 102,15,56,222,248 | |
2231 movups -48(%rcx),%xmm0 | |
2232 | |
2233 paddd %xmm9,%xmm9 | |
2234 .byte 102,15,56,222,209 | |
2235 pxor %xmm15,%xmm11 | |
2236 psrad $31,%xmm14 | |
2237 .byte 102,15,56,222,217 | |
2238 paddq %xmm15,%xmm15 | |
2239 pand %xmm8,%xmm14 | |
2240 .byte 102,15,56,222,225 | |
2241 .byte 102,15,56,222,233 | |
2242 movdqa %xmm13,48(%rsp) | |
2243 pxor %xmm14,%xmm15 | |
2244 .byte 102,15,56,222,241 | |
2245 movaps %xmm12,%xmm13 | |
2246 movdqa %xmm9,%xmm14 | |
2247 .byte 102,15,56,222,249 | |
2248 movups -32(%rcx),%xmm1 | |
2249 | |
2250 paddd %xmm9,%xmm9 | |
2251 .byte 102,15,56,222,208 | |
2252 pxor %xmm15,%xmm12 | |
2253 psrad $31,%xmm14 | |
2254 .byte 102,15,56,222,216 | |
2255 paddq %xmm15,%xmm15 | |
2256 pand %xmm8,%xmm14 | |
2257 .byte 102,15,56,222,224 | |
2258 .byte 102,15,56,222,232 | |
2259 .byte 102,15,56,222,240 | |
2260 pxor %xmm14,%xmm15 | |
2261 movaps %xmm13,%xmm14 | |
2262 .byte 102,15,56,222,248 | |
2263 | |
2264 movdqa %xmm9,%xmm0 | |
2265 paddd %xmm9,%xmm9 | |
2266 .byte 102,15,56,222,209 | |
2267 pxor %xmm15,%xmm13 | |
2268 psrad $31,%xmm0 | |
2269 .byte 102,15,56,222,217 | |
2270 paddq %xmm15,%xmm15 | |
2271 pand %xmm8,%xmm0 | |
2272 .byte 102,15,56,222,225 | |
2273 .byte 102,15,56,222,233 | |
2274 pxor %xmm0,%xmm15 | |
2275 movups (%rbp),%xmm0 | |
2276 .byte 102,15,56,222,241 | |
2277 .byte 102,15,56,222,249 | |
2278 movups 16(%rbp),%xmm1 | |
2279 | |
2280 pxor %xmm15,%xmm14 | |
2281 .byte 102,15,56,223,84,36,0 | |
2282 psrad $31,%xmm9 | |
2283 paddq %xmm15,%xmm15 | |
2284 .byte 102,15,56,223,92,36,16 | |
2285 .byte 102,15,56,223,100,36,32 | |
2286 pand %xmm8,%xmm9 | |
2287 movq %r10,%rax | |
2288 .byte 102,15,56,223,108,36,48 | |
2289 .byte 102,15,56,223,116,36,64 | |
2290 .byte 102,15,56,223,124,36,80 | |
2291 pxor %xmm9,%xmm15 | |
2292 | |
2293 leaq 96(%rsi),%rsi | |
2294 movups %xmm2,-96(%rsi) | |
2295 movups %xmm3,-80(%rsi) | |
2296 movups %xmm4,-64(%rsi) | |
2297 movups %xmm5,-48(%rsi) | |
2298 movups %xmm6,-32(%rsi) | |
2299 movups %xmm7,-16(%rsi) | |
2300 subq $96,%rdx | |
2301 jnc L$xts_dec_grandloop | |
2302 | |
2303 movl $16+96,%eax | |
2304 subl %r10d,%eax | |
2305 movq %rbp,%rcx | |
2306 shrl $4,%eax | |
2307 | |
2308 L$xts_dec_short: | |
2309 | |
2310 movl %eax,%r10d | |
2311 pxor %xmm0,%xmm10 | |
2312 pxor %xmm0,%xmm11 | |
2313 addq $96,%rdx | |
2314 jz L$xts_dec_done | |
2315 | |
2316 pxor %xmm0,%xmm12 | |
2317 cmpq $0x20,%rdx | |
2318 jb L$xts_dec_one | |
2319 pxor %xmm0,%xmm13 | |
2320 je L$xts_dec_two | |
2321 | |
2322 pxor %xmm0,%xmm14 | |
2323 cmpq $0x40,%rdx | |
2324 jb L$xts_dec_three | |
2325 je L$xts_dec_four | |
2326 | |
2327 movdqu (%rdi),%xmm2 | |
2328 movdqu 16(%rdi),%xmm3 | |
2329 movdqu 32(%rdi),%xmm4 | |
2330 pxor %xmm10,%xmm2 | |
2331 movdqu 48(%rdi),%xmm5 | |
2332 pxor %xmm11,%xmm3 | |
2333 movdqu 64(%rdi),%xmm6 | |
2334 leaq 80(%rdi),%rdi | |
2335 pxor %xmm12,%xmm4 | |
2336 pxor %xmm13,%xmm5 | |
2337 pxor %xmm14,%xmm6 | |
2338 | |
2339 call _aesni_decrypt6 | |
2340 | |
2341 xorps %xmm10,%xmm2 | |
2342 xorps %xmm11,%xmm3 | |
2343 xorps %xmm12,%xmm4 | |
2344 movdqu %xmm2,(%rsi) | |
2345 xorps %xmm13,%xmm5 | |
2346 movdqu %xmm3,16(%rsi) | |
2347 xorps %xmm14,%xmm6 | |
2348 movdqu %xmm4,32(%rsi) | |
2349 pxor %xmm14,%xmm14 | |
2350 movdqu %xmm5,48(%rsi) | |
2351 pcmpgtd %xmm15,%xmm14 | |
2352 movdqu %xmm6,64(%rsi) | |
2353 leaq 80(%rsi),%rsi | |
2354 pshufd $0x13,%xmm14,%xmm11 | |
2355 andq $15,%r9 | |
2356 jz L$xts_dec_ret | |
2357 | |
2358 movdqa %xmm15,%xmm10 | |
2359 paddq %xmm15,%xmm15 | |
2360 pand %xmm8,%xmm11 | |
2361 pxor %xmm15,%xmm11 | |
2362 jmp L$xts_dec_done2 | |
2363 | |
2364 .p2align 4 | |
2365 L$xts_dec_one: | |
2366 movups (%rdi),%xmm2 | |
2367 leaq 16(%rdi),%rdi | |
2368 xorps %xmm10,%xmm2 | |
2369 movups (%rcx),%xmm0 | |
2370 movups 16(%rcx),%xmm1 | |
2371 leaq 32(%rcx),%rcx | |
2372 xorps %xmm0,%xmm2 | |
2373 L$oop_dec1_12: | |
2374 .byte 102,15,56,222,209 | |
2375 decl %eax | |
2376 movups (%rcx),%xmm1 | |
2377 leaq 16(%rcx),%rcx | |
2378 jnz L$oop_dec1_12 | |
2379 .byte 102,15,56,223,209 | |
2380 xorps %xmm10,%xmm2 | |
2381 movdqa %xmm11,%xmm10 | |
2382 movups %xmm2,(%rsi) | |
2383 movdqa %xmm12,%xmm11 | |
2384 leaq 16(%rsi),%rsi | |
2385 jmp L$xts_dec_done | |
2386 | |
2387 .p2align 4 | |
2388 L$xts_dec_two: | |
2389 movups (%rdi),%xmm2 | |
2390 movups 16(%rdi),%xmm3 | |
2391 leaq 32(%rdi),%rdi | |
2392 xorps %xmm10,%xmm2 | |
2393 xorps %xmm11,%xmm3 | |
2394 | |
2395 call _aesni_decrypt2 | |
2396 | |
2397 xorps %xmm10,%xmm2 | |
2398 movdqa %xmm12,%xmm10 | |
2399 xorps %xmm11,%xmm3 | |
2400 movdqa %xmm13,%xmm11 | |
2401 movups %xmm2,(%rsi) | |
2402 movups %xmm3,16(%rsi) | |
2403 leaq 32(%rsi),%rsi | |
2404 jmp L$xts_dec_done | |
2405 | |
2406 .p2align 4 | |
2407 L$xts_dec_three: | |
2408 movups (%rdi),%xmm2 | |
2409 movups 16(%rdi),%xmm3 | |
2410 movups 32(%rdi),%xmm4 | |
2411 leaq 48(%rdi),%rdi | |
2412 xorps %xmm10,%xmm2 | |
2413 xorps %xmm11,%xmm3 | |
2414 xorps %xmm12,%xmm4 | |
2415 | |
2416 call _aesni_decrypt3 | |
2417 | |
2418 xorps %xmm10,%xmm2 | |
2419 movdqa %xmm13,%xmm10 | |
2420 xorps %xmm11,%xmm3 | |
2421 movdqa %xmm14,%xmm11 | |
2422 xorps %xmm12,%xmm4 | |
2423 movups %xmm2,(%rsi) | |
2424 movups %xmm3,16(%rsi) | |
2425 movups %xmm4,32(%rsi) | |
2426 leaq 48(%rsi),%rsi | |
2427 jmp L$xts_dec_done | |
2428 | |
2429 .p2align 4 | |
2430 L$xts_dec_four: | |
2431 movups (%rdi),%xmm2 | |
2432 movups 16(%rdi),%xmm3 | |
2433 movups 32(%rdi),%xmm4 | |
2434 xorps %xmm10,%xmm2 | |
2435 movups 48(%rdi),%xmm5 | |
2436 leaq 64(%rdi),%rdi | |
2437 xorps %xmm11,%xmm3 | |
2438 xorps %xmm12,%xmm4 | |
2439 xorps %xmm13,%xmm5 | |
2440 | |
2441 call _aesni_decrypt4 | |
2442 | |
2443 pxor %xmm10,%xmm2 | |
2444 movdqa %xmm14,%xmm10 | |
2445 pxor %xmm11,%xmm3 | |
2446 movdqa %xmm15,%xmm11 | |
2447 pxor %xmm12,%xmm4 | |
2448 movdqu %xmm2,(%rsi) | |
2449 pxor %xmm13,%xmm5 | |
2450 movdqu %xmm3,16(%rsi) | |
2451 movdqu %xmm4,32(%rsi) | |
2452 movdqu %xmm5,48(%rsi) | |
2453 leaq 64(%rsi),%rsi | |
2454 jmp L$xts_dec_done | |
2455 | |
2456 .p2align 4 | |
2457 L$xts_dec_done: | |
2458 andq $15,%r9 | |
2459 jz L$xts_dec_ret | |
2460 L$xts_dec_done2: | |
2461 movq %r9,%rdx | |
2462 movq %rbp,%rcx | |
2463 movl %r10d,%eax | |
2464 | |
2465 movups (%rdi),%xmm2 | |
2466 xorps %xmm11,%xmm2 | |
2467 movups (%rcx),%xmm0 | |
2468 movups 16(%rcx),%xmm1 | |
2469 leaq 32(%rcx),%rcx | |
2470 xorps %xmm0,%xmm2 | |
2471 L$oop_dec1_13: | |
2472 .byte 102,15,56,222,209 | |
2473 decl %eax | |
2474 movups (%rcx),%xmm1 | |
2475 leaq 16(%rcx),%rcx | |
2476 jnz L$oop_dec1_13 | |
2477 .byte 102,15,56,223,209 | |
2478 xorps %xmm11,%xmm2 | |
2479 movups %xmm2,(%rsi) | |
2480 | |
2481 L$xts_dec_steal: | |
2482 movzbl 16(%rdi),%eax | |
2483 movzbl (%rsi),%ecx | |
2484 leaq 1(%rdi),%rdi | |
2485 movb %al,(%rsi) | |
2486 movb %cl,16(%rsi) | |
2487 leaq 1(%rsi),%rsi | |
2488 subq $1,%rdx | |
2489 jnz L$xts_dec_steal | |
2490 | |
2491 subq %r9,%rsi | |
2492 movq %rbp,%rcx | |
2493 movl %r10d,%eax | |
2494 | |
2495 movups (%rsi),%xmm2 | |
2496 xorps %xmm10,%xmm2 | |
2497 movups (%rcx),%xmm0 | |
2498 movups 16(%rcx),%xmm1 | |
2499 leaq 32(%rcx),%rcx | |
2500 xorps %xmm0,%xmm2 | |
2501 L$oop_dec1_14: | |
2502 .byte 102,15,56,222,209 | |
2503 decl %eax | |
2504 movups (%rcx),%xmm1 | |
2505 leaq 16(%rcx),%rcx | |
2506 jnz L$oop_dec1_14 | |
2507 .byte 102,15,56,223,209 | |
2508 xorps %xmm10,%xmm2 | |
2509 movups %xmm2,(%rsi) | |
2510 | |
2511 L$xts_dec_ret: | |
2512 xorps %xmm0,%xmm0 | |
2513 pxor %xmm1,%xmm1 | |
2514 pxor %xmm2,%xmm2 | |
2515 pxor %xmm3,%xmm3 | |
2516 pxor %xmm4,%xmm4 | |
2517 pxor %xmm5,%xmm5 | |
2518 pxor %xmm6,%xmm6 | |
2519 pxor %xmm7,%xmm7 | |
2520 movaps %xmm0,0(%rsp) | |
2521 pxor %xmm8,%xmm8 | |
2522 movaps %xmm0,16(%rsp) | |
2523 pxor %xmm9,%xmm9 | |
2524 movaps %xmm0,32(%rsp) | |
2525 pxor %xmm10,%xmm10 | |
2526 movaps %xmm0,48(%rsp) | |
2527 pxor %xmm11,%xmm11 | |
2528 movaps %xmm0,64(%rsp) | |
2529 pxor %xmm12,%xmm12 | |
2530 movaps %xmm0,80(%rsp) | |
2531 pxor %xmm13,%xmm13 | |
2532 movaps %xmm0,96(%rsp) | |
2533 pxor %xmm14,%xmm14 | |
2534 pxor %xmm15,%xmm15 | |
2535 movq -8(%r11),%rbp | |
2536 leaq (%r11),%rsp | |
2537 L$xts_dec_epilogue: | |
2538 .byte 0xf3,0xc3 | |
2539 | |
2540 .globl _aesni_ocb_encrypt | |
2541 .private_extern _aesni_ocb_encrypt | |
2542 | |
2543 .p2align 5 | |
2544 _aesni_ocb_encrypt: | |
2545 leaq (%rsp),%rax | |
2546 pushq %rbx | |
2547 pushq %rbp | |
2548 pushq %r12 | |
2549 pushq %r13 | |
2550 pushq %r14 | |
2551 movq 8(%rax),%rbx | |
2552 movq 8+8(%rax),%rbp | |
2553 | |
2554 movl 240(%rcx),%r10d | |
2555 movq %rcx,%r11 | |
2556 shll $4,%r10d | |
2557 movups (%rcx),%xmm9 | |
2558 movups 16(%rcx,%r10,1),%xmm1 | |
2559 | |
2560 movdqu (%r9),%xmm15 | |
2561 pxor %xmm1,%xmm9 | |
2562 pxor %xmm1,%xmm15 | |
2563 | |
2564 movl $16+32,%eax | |
2565 leaq 32(%r11,%r10,1),%rcx | |
2566 movups 16(%r11),%xmm1 | |
2567 subq %r10,%rax | |
2568 movq %rax,%r10 | |
2569 | |
2570 movdqu (%rbx),%xmm10 | |
2571 movdqu (%rbp),%xmm8 | |
2572 | |
2573 testq $1,%r8 | |
2574 jnz L$ocb_enc_odd | |
2575 | |
2576 bsfq %r8,%r12 | |
2577 addq $1,%r8 | |
2578 shlq $4,%r12 | |
2579 movdqu (%rbx,%r12,1),%xmm7 | |
2580 movdqu (%rdi),%xmm2 | |
2581 leaq 16(%rdi),%rdi | |
2582 | |
2583 call __ocb_encrypt1 | |
2584 | |
2585 movdqa %xmm7,%xmm15 | |
2586 movups %xmm2,(%rsi) | |
2587 leaq 16(%rsi),%rsi | |
2588 subq $1,%rdx | |
2589 jz L$ocb_enc_done | |
2590 | |
2591 L$ocb_enc_odd: | |
2592 leaq 1(%r8),%r12 | |
2593 leaq 3(%r8),%r13 | |
2594 leaq 5(%r8),%r14 | |
2595 leaq 6(%r8),%r8 | |
2596 bsfq %r12,%r12 | |
2597 bsfq %r13,%r13 | |
2598 bsfq %r14,%r14 | |
2599 shlq $4,%r12 | |
2600 shlq $4,%r13 | |
2601 shlq $4,%r14 | |
2602 | |
2603 subq $6,%rdx | |
2604 jc L$ocb_enc_short | |
2605 jmp L$ocb_enc_grandloop | |
2606 | |
2607 .p2align 5 | |
2608 L$ocb_enc_grandloop: | |
2609 movdqu 0(%rdi),%xmm2 | |
2610 movdqu 16(%rdi),%xmm3 | |
2611 movdqu 32(%rdi),%xmm4 | |
2612 movdqu 48(%rdi),%xmm5 | |
2613 movdqu 64(%rdi),%xmm6 | |
2614 movdqu 80(%rdi),%xmm7 | |
2615 leaq 96(%rdi),%rdi | |
2616 | |
2617 call __ocb_encrypt6 | |
2618 | |
2619 movups %xmm2,0(%rsi) | |
2620 movups %xmm3,16(%rsi) | |
2621 movups %xmm4,32(%rsi) | |
2622 movups %xmm5,48(%rsi) | |
2623 movups %xmm6,64(%rsi) | |
2624 movups %xmm7,80(%rsi) | |
2625 leaq 96(%rsi),%rsi | |
2626 subq $6,%rdx | |
2627 jnc L$ocb_enc_grandloop | |
2628 | |
2629 L$ocb_enc_short: | |
2630 addq $6,%rdx | |
2631 jz L$ocb_enc_done | |
2632 | |
2633 movdqu 0(%rdi),%xmm2 | |
2634 cmpq $2,%rdx | |
2635 jb L$ocb_enc_one | |
2636 movdqu 16(%rdi),%xmm3 | |
2637 je L$ocb_enc_two | |
2638 | |
2639 movdqu 32(%rdi),%xmm4 | |
2640 cmpq $4,%rdx | |
2641 jb L$ocb_enc_three | |
2642 movdqu 48(%rdi),%xmm5 | |
2643 je L$ocb_enc_four | |
2644 | |
2645 movdqu 64(%rdi),%xmm6 | |
2646 pxor %xmm7,%xmm7 | |
2647 | |
2648 call __ocb_encrypt6 | |
2649 | |
2650 movdqa %xmm14,%xmm15 | |
2651 movups %xmm2,0(%rsi) | |
2652 movups %xmm3,16(%rsi) | |
2653 movups %xmm4,32(%rsi) | |
2654 movups %xmm5,48(%rsi) | |
2655 movups %xmm6,64(%rsi) | |
2656 | |
2657 jmp L$ocb_enc_done | |
2658 | |
2659 .p2align 4 | |
2660 L$ocb_enc_one: | |
2661 movdqa %xmm10,%xmm7 | |
2662 | |
2663 call __ocb_encrypt1 | |
2664 | |
2665 movdqa %xmm7,%xmm15 | |
2666 movups %xmm2,0(%rsi) | |
2667 jmp L$ocb_enc_done | |
2668 | |
2669 .p2align 4 | |
2670 L$ocb_enc_two: | |
2671 pxor %xmm4,%xmm4 | |
2672 pxor %xmm5,%xmm5 | |
2673 | |
2674 call __ocb_encrypt4 | |
2675 | |
2676 movdqa %xmm11,%xmm15 | |
2677 movups %xmm2,0(%rsi) | |
2678 movups %xmm3,16(%rsi) | |
2679 | |
2680 jmp L$ocb_enc_done | |
2681 | |
2682 .p2align 4 | |
2683 L$ocb_enc_three: | |
2684 pxor %xmm5,%xmm5 | |
2685 | |
2686 call __ocb_encrypt4 | |
2687 | |
2688 movdqa %xmm12,%xmm15 | |
2689 movups %xmm2,0(%rsi) | |
2690 movups %xmm3,16(%rsi) | |
2691 movups %xmm4,32(%rsi) | |
2692 | |
2693 jmp L$ocb_enc_done | |
2694 | |
2695 .p2align 4 | |
2696 L$ocb_enc_four: | |
2697 call __ocb_encrypt4 | |
2698 | |
2699 movdqa %xmm13,%xmm15 | |
2700 movups %xmm2,0(%rsi) | |
2701 movups %xmm3,16(%rsi) | |
2702 movups %xmm4,32(%rsi) | |
2703 movups %xmm5,48(%rsi) | |
2704 | |
2705 L$ocb_enc_done: | |
2706 pxor %xmm0,%xmm15 | |
2707 movdqu %xmm8,(%rbp) | |
2708 movdqu %xmm15,(%r9) | |
2709 | |
2710 xorps %xmm0,%xmm0 | |
2711 pxor %xmm1,%xmm1 | |
2712 pxor %xmm2,%xmm2 | |
2713 pxor %xmm3,%xmm3 | |
2714 pxor %xmm4,%xmm4 | |
2715 pxor %xmm5,%xmm5 | |
2716 pxor %xmm6,%xmm6 | |
2717 pxor %xmm7,%xmm7 | |
2718 pxor %xmm8,%xmm8 | |
2719 pxor %xmm9,%xmm9 | |
2720 pxor %xmm10,%xmm10 | |
2721 pxor %xmm11,%xmm11 | |
2722 pxor %xmm12,%xmm12 | |
2723 pxor %xmm13,%xmm13 | |
2724 pxor %xmm14,%xmm14 | |
2725 pxor %xmm15,%xmm15 | |
2726 leaq 40(%rsp),%rax | |
2727 movq -40(%rax),%r14 | |
2728 movq -32(%rax),%r13 | |
2729 movq -24(%rax),%r12 | |
2730 movq -16(%rax),%rbp | |
2731 movq -8(%rax),%rbx | |
2732 leaq (%rax),%rsp | |
2733 L$ocb_enc_epilogue: | |
2734 .byte 0xf3,0xc3 | |
2735 | |
2736 | |
2737 | |
2738 .p2align 5 | |
2739 __ocb_encrypt6: | |
2740 pxor %xmm9,%xmm15 | |
2741 movdqu (%rbx,%r12,1),%xmm11 | |
2742 movdqa %xmm10,%xmm12 | |
2743 movdqu (%rbx,%r13,1),%xmm13 | |
2744 movdqa %xmm10,%xmm14 | |
2745 pxor %xmm15,%xmm10 | |
2746 movdqu (%rbx,%r14,1),%xmm15 | |
2747 pxor %xmm10,%xmm11 | |
2748 pxor %xmm2,%xmm8 | |
2749 pxor %xmm10,%xmm2 | |
2750 pxor %xmm11,%xmm12 | |
2751 pxor %xmm3,%xmm8 | |
2752 pxor %xmm11,%xmm3 | |
2753 pxor %xmm12,%xmm13 | |
2754 pxor %xmm4,%xmm8 | |
2755 pxor %xmm12,%xmm4 | |
2756 pxor %xmm13,%xmm14 | |
2757 pxor %xmm5,%xmm8 | |
2758 pxor %xmm13,%xmm5 | |
2759 pxor %xmm14,%xmm15 | |
2760 pxor %xmm6,%xmm8 | |
2761 pxor %xmm14,%xmm6 | |
2762 pxor %xmm7,%xmm8 | |
2763 pxor %xmm15,%xmm7 | |
2764 movups 32(%r11),%xmm0 | |
2765 | |
2766 leaq 1(%r8),%r12 | |
2767 leaq 3(%r8),%r13 | |
2768 leaq 5(%r8),%r14 | |
2769 addq $6,%r8 | |
2770 pxor %xmm9,%xmm10 | |
2771 bsfq %r12,%r12 | |
2772 bsfq %r13,%r13 | |
2773 bsfq %r14,%r14 | |
2774 | |
2775 .byte 102,15,56,220,209 | |
2776 .byte 102,15,56,220,217 | |
2777 .byte 102,15,56,220,225 | |
2778 .byte 102,15,56,220,233 | |
2779 pxor %xmm9,%xmm11 | |
2780 pxor %xmm9,%xmm12 | |
2781 .byte 102,15,56,220,241 | |
2782 pxor %xmm9,%xmm13 | |
2783 pxor %xmm9,%xmm14 | |
2784 .byte 102,15,56,220,249 | |
2785 movups 48(%r11),%xmm1 | |
2786 pxor %xmm9,%xmm15 | |
2787 | |
2788 .byte 102,15,56,220,208 | |
2789 .byte 102,15,56,220,216 | |
2790 .byte 102,15,56,220,224 | |
2791 .byte 102,15,56,220,232 | |
2792 .byte 102,15,56,220,240 | |
2793 .byte 102,15,56,220,248 | |
2794 movups 64(%r11),%xmm0 | |
2795 shlq $4,%r12 | |
2796 shlq $4,%r13 | |
2797 jmp L$ocb_enc_loop6 | |
2798 | |
2799 .p2align 5 | |
2800 L$ocb_enc_loop6: | |
2801 .byte 102,15,56,220,209 | |
2802 .byte 102,15,56,220,217 | |
2803 .byte 102,15,56,220,225 | |
2804 .byte 102,15,56,220,233 | |
2805 .byte 102,15,56,220,241 | |
2806 .byte 102,15,56,220,249 | |
2807 movups (%rcx,%rax,1),%xmm1 | |
2808 addq $32,%rax | |
2809 | |
2810 .byte 102,15,56,220,208 | |
2811 .byte 102,15,56,220,216 | |
2812 .byte 102,15,56,220,224 | |
2813 .byte 102,15,56,220,232 | |
2814 .byte 102,15,56,220,240 | |
2815 .byte 102,15,56,220,248 | |
2816 movups -16(%rcx,%rax,1),%xmm0 | |
2817 jnz L$ocb_enc_loop6 | |
2818 | |
2819 .byte 102,15,56,220,209 | |
2820 .byte 102,15,56,220,217 | |
2821 .byte 102,15,56,220,225 | |
2822 .byte 102,15,56,220,233 | |
2823 .byte 102,15,56,220,241 | |
2824 .byte 102,15,56,220,249 | |
2825 movups 16(%r11),%xmm1 | |
2826 shlq $4,%r14 | |
2827 | |
2828 .byte 102,65,15,56,221,210 | |
2829 movdqu (%rbx),%xmm10 | |
2830 movq %r10,%rax | |
2831 .byte 102,65,15,56,221,219 | |
2832 .byte 102,65,15,56,221,228 | |
2833 .byte 102,65,15,56,221,237 | |
2834 .byte 102,65,15,56,221,246 | |
2835 .byte 102,65,15,56,221,255 | |
2836 .byte 0xf3,0xc3 | |
2837 | |
2838 | |
2839 | |
2840 .p2align 5 | |
2841 __ocb_encrypt4: | |
2842 pxor %xmm9,%xmm15 | |
2843 movdqu (%rbx,%r12,1),%xmm11 | |
2844 movdqa %xmm10,%xmm12 | |
2845 movdqu (%rbx,%r13,1),%xmm13 | |
2846 pxor %xmm15,%xmm10 | |
2847 pxor %xmm10,%xmm11 | |
2848 pxor %xmm2,%xmm8 | |
2849 pxor %xmm10,%xmm2 | |
2850 pxor %xmm11,%xmm12 | |
2851 pxor %xmm3,%xmm8 | |
2852 pxor %xmm11,%xmm3 | |
2853 pxor %xmm12,%xmm13 | |
2854 pxor %xmm4,%xmm8 | |
2855 pxor %xmm12,%xmm4 | |
2856 pxor %xmm5,%xmm8 | |
2857 pxor %xmm13,%xmm5 | |
2858 movups 32(%r11),%xmm0 | |
2859 | |
2860 pxor %xmm9,%xmm10 | |
2861 pxor %xmm9,%xmm11 | |
2862 pxor %xmm9,%xmm12 | |
2863 pxor %xmm9,%xmm13 | |
2864 | |
2865 .byte 102,15,56,220,209 | |
2866 .byte 102,15,56,220,217 | |
2867 .byte 102,15,56,220,225 | |
2868 .byte 102,15,56,220,233 | |
2869 movups 48(%r11),%xmm1 | |
2870 | |
2871 .byte 102,15,56,220,208 | |
2872 .byte 102,15,56,220,216 | |
2873 .byte 102,15,56,220,224 | |
2874 .byte 102,15,56,220,232 | |
2875 movups 64(%r11),%xmm0 | |
2876 jmp L$ocb_enc_loop4 | |
2877 | |
2878 .p2align 5 | |
2879 L$ocb_enc_loop4: | |
2880 .byte 102,15,56,220,209 | |
2881 .byte 102,15,56,220,217 | |
2882 .byte 102,15,56,220,225 | |
2883 .byte 102,15,56,220,233 | |
2884 movups (%rcx,%rax,1),%xmm1 | |
2885 addq $32,%rax | |
2886 | |
2887 .byte 102,15,56,220,208 | |
2888 .byte 102,15,56,220,216 | |
2889 .byte 102,15,56,220,224 | |
2890 .byte 102,15,56,220,232 | |
2891 movups -16(%rcx,%rax,1),%xmm0 | |
2892 jnz L$ocb_enc_loop4 | |
2893 | |
2894 .byte 102,15,56,220,209 | |
2895 .byte 102,15,56,220,217 | |
2896 .byte 102,15,56,220,225 | |
2897 .byte 102,15,56,220,233 | |
2898 movups 16(%r11),%xmm1 | |
2899 movq %r10,%rax | |
2900 | |
2901 .byte 102,65,15,56,221,210 | |
2902 .byte 102,65,15,56,221,219 | |
2903 .byte 102,65,15,56,221,228 | |
2904 .byte 102,65,15,56,221,237 | |
2905 .byte 0xf3,0xc3 | |
2906 | |
2907 | |
2908 | |
2909 .p2align 5 | |
2910 __ocb_encrypt1: | |
2911 pxor %xmm15,%xmm7 | |
2912 pxor %xmm9,%xmm7 | |
2913 pxor %xmm2,%xmm8 | |
2914 pxor %xmm7,%xmm2 | |
2915 movups 32(%r11),%xmm0 | |
2916 | |
2917 .byte 102,15,56,220,209 | |
2918 movups 48(%r11),%xmm1 | |
2919 pxor %xmm9,%xmm7 | |
2920 | |
2921 .byte 102,15,56,220,208 | |
2922 movups 64(%r11),%xmm0 | |
2923 jmp L$ocb_enc_loop1 | |
2924 | |
2925 .p2align 5 | |
2926 L$ocb_enc_loop1: | |
2927 .byte 102,15,56,220,209 | |
2928 movups (%rcx,%rax,1),%xmm1 | |
2929 addq $32,%rax | |
2930 | |
2931 .byte 102,15,56,220,208 | |
2932 movups -16(%rcx,%rax,1),%xmm0 | |
2933 jnz L$ocb_enc_loop1 | |
2934 | |
2935 .byte 102,15,56,220,209 | |
2936 movups 16(%r11),%xmm1 | |
2937 movq %r10,%rax | |
2938 | |
2939 .byte 102,15,56,221,215 | |
2940 .byte 0xf3,0xc3 | |
2941 | |
2942 | |
2943 .globl _aesni_ocb_decrypt | |
2944 .private_extern _aesni_ocb_decrypt | |
2945 | |
2946 .p2align 5 | |
2947 _aesni_ocb_decrypt: | |
2948 leaq (%rsp),%rax | |
2949 pushq %rbx | |
2950 pushq %rbp | |
2951 pushq %r12 | |
2952 pushq %r13 | |
2953 pushq %r14 | |
2954 movq 8(%rax),%rbx | |
2955 movq 8+8(%rax),%rbp | |
2956 | |
2957 movl 240(%rcx),%r10d | |
2958 movq %rcx,%r11 | |
2959 shll $4,%r10d | |
2960 movups (%rcx),%xmm9 | |
2961 movups 16(%rcx,%r10,1),%xmm1 | |
2962 | |
2963 movdqu (%r9),%xmm15 | |
2964 pxor %xmm1,%xmm9 | |
2965 pxor %xmm1,%xmm15 | |
2966 | |
2967 movl $16+32,%eax | |
2968 leaq 32(%r11,%r10,1),%rcx | |
2969 movups 16(%r11),%xmm1 | |
2970 subq %r10,%rax | |
2971 movq %rax,%r10 | |
2972 | |
2973 movdqu (%rbx),%xmm10 | |
2974 movdqu (%rbp),%xmm8 | |
2975 | |
2976 testq $1,%r8 | |
2977 jnz L$ocb_dec_odd | |
2978 | |
2979 bsfq %r8,%r12 | |
2980 addq $1,%r8 | |
2981 shlq $4,%r12 | |
2982 movdqu (%rbx,%r12,1),%xmm7 | |
2983 movdqu (%rdi),%xmm2 | |
2984 leaq 16(%rdi),%rdi | |
2985 | |
2986 call __ocb_decrypt1 | |
2987 | |
2988 movdqa %xmm7,%xmm15 | |
2989 movups %xmm2,(%rsi) | |
2990 xorps %xmm2,%xmm8 | |
2991 leaq 16(%rsi),%rsi | |
2992 subq $1,%rdx | |
2993 jz L$ocb_dec_done | |
2994 | |
2995 L$ocb_dec_odd: | |
2996 leaq 1(%r8),%r12 | |
2997 leaq 3(%r8),%r13 | |
2998 leaq 5(%r8),%r14 | |
2999 leaq 6(%r8),%r8 | |
3000 bsfq %r12,%r12 | |
3001 bsfq %r13,%r13 | |
3002 bsfq %r14,%r14 | |
3003 shlq $4,%r12 | |
3004 shlq $4,%r13 | |
3005 shlq $4,%r14 | |
3006 | |
3007 subq $6,%rdx | |
3008 jc L$ocb_dec_short | |
3009 jmp L$ocb_dec_grandloop | |
3010 | |
3011 .p2align 5 | |
3012 L$ocb_dec_grandloop: | |
3013 movdqu 0(%rdi),%xmm2 | |
3014 movdqu 16(%rdi),%xmm3 | |
3015 movdqu 32(%rdi),%xmm4 | |
3016 movdqu 48(%rdi),%xmm5 | |
3017 movdqu 64(%rdi),%xmm6 | |
3018 movdqu 80(%rdi),%xmm7 | |
3019 leaq 96(%rdi),%rdi | |
3020 | |
3021 call __ocb_decrypt6 | |
3022 | |
3023 movups %xmm2,0(%rsi) | |
3024 pxor %xmm2,%xmm8 | |
3025 movups %xmm3,16(%rsi) | |
3026 pxor %xmm3,%xmm8 | |
3027 movups %xmm4,32(%rsi) | |
3028 pxor %xmm4,%xmm8 | |
3029 movups %xmm5,48(%rsi) | |
3030 pxor %xmm5,%xmm8 | |
3031 movups %xmm6,64(%rsi) | |
3032 pxor %xmm6,%xmm8 | |
3033 movups %xmm7,80(%rsi) | |
3034 pxor %xmm7,%xmm8 | |
3035 leaq 96(%rsi),%rsi | |
3036 subq $6,%rdx | |
3037 jnc L$ocb_dec_grandloop | |
3038 | |
3039 L$ocb_dec_short: | |
3040 addq $6,%rdx | |
3041 jz L$ocb_dec_done | |
3042 | |
3043 movdqu 0(%rdi),%xmm2 | |
3044 cmpq $2,%rdx | |
3045 jb L$ocb_dec_one | |
3046 movdqu 16(%rdi),%xmm3 | |
3047 je L$ocb_dec_two | |
3048 | |
3049 movdqu 32(%rdi),%xmm4 | |
3050 cmpq $4,%rdx | |
3051 jb L$ocb_dec_three | |
3052 movdqu 48(%rdi),%xmm5 | |
3053 je L$ocb_dec_four | |
3054 | |
3055 movdqu 64(%rdi),%xmm6 | |
3056 pxor %xmm7,%xmm7 | |
3057 | |
3058 call __ocb_decrypt6 | |
3059 | |
3060 movdqa %xmm14,%xmm15 | |
3061 movups %xmm2,0(%rsi) | |
3062 pxor %xmm2,%xmm8 | |
3063 movups %xmm3,16(%rsi) | |
3064 pxor %xmm3,%xmm8 | |
3065 movups %xmm4,32(%rsi) | |
3066 pxor %xmm4,%xmm8 | |
3067 movups %xmm5,48(%rsi) | |
3068 pxor %xmm5,%xmm8 | |
3069 movups %xmm6,64(%rsi) | |
3070 pxor %xmm6,%xmm8 | |
3071 | |
3072 jmp L$ocb_dec_done | |
3073 | |
3074 .p2align 4 | |
3075 L$ocb_dec_one: | |
3076 movdqa %xmm10,%xmm7 | |
3077 | |
3078 call __ocb_decrypt1 | |
3079 | |
3080 movdqa %xmm7,%xmm15 | |
3081 movups %xmm2,0(%rsi) | |
3082 xorps %xmm2,%xmm8 | |
3083 jmp L$ocb_dec_done | |
3084 | |
3085 .p2align 4 | |
3086 L$ocb_dec_two: | |
3087 pxor %xmm4,%xmm4 | |
3088 pxor %xmm5,%xmm5 | |
3089 | |
3090 call __ocb_decrypt4 | |
3091 | |
3092 movdqa %xmm11,%xmm15 | |
3093 movups %xmm2,0(%rsi) | |
3094 xorps %xmm2,%xmm8 | |
3095 movups %xmm3,16(%rsi) | |
3096 xorps %xmm3,%xmm8 | |
3097 | |
3098 jmp L$ocb_dec_done | |
3099 | |
3100 .p2align 4 | |
3101 L$ocb_dec_three: | |
3102 pxor %xmm5,%xmm5 | |
3103 | |
3104 call __ocb_decrypt4 | |
3105 | |
3106 movdqa %xmm12,%xmm15 | |
3107 movups %xmm2,0(%rsi) | |
3108 xorps %xmm2,%xmm8 | |
3109 movups %xmm3,16(%rsi) | |
3110 xorps %xmm3,%xmm8 | |
3111 movups %xmm4,32(%rsi) | |
3112 xorps %xmm4,%xmm8 | |
3113 | |
3114 jmp L$ocb_dec_done | |
3115 | |
3116 .p2align 4 | |
3117 L$ocb_dec_four: | |
3118 call __ocb_decrypt4 | |
3119 | |
3120 movdqa %xmm13,%xmm15 | |
3121 movups %xmm2,0(%rsi) | |
3122 pxor %xmm2,%xmm8 | |
3123 movups %xmm3,16(%rsi) | |
3124 pxor %xmm3,%xmm8 | |
3125 movups %xmm4,32(%rsi) | |
3126 pxor %xmm4,%xmm8 | |
3127 movups %xmm5,48(%rsi) | |
3128 pxor %xmm5,%xmm8 | |
3129 | |
3130 L$ocb_dec_done: | |
3131 pxor %xmm0,%xmm15 | |
3132 movdqu %xmm8,(%rbp) | |
3133 movdqu %xmm15,(%r9) | |
3134 | |
3135 xorps %xmm0,%xmm0 | |
3136 pxor %xmm1,%xmm1 | |
3137 pxor %xmm2,%xmm2 | |
3138 pxor %xmm3,%xmm3 | |
3139 pxor %xmm4,%xmm4 | |
3140 pxor %xmm5,%xmm5 | |
3141 pxor %xmm6,%xmm6 | |
3142 pxor %xmm7,%xmm7 | |
3143 pxor %xmm8,%xmm8 | |
3144 pxor %xmm9,%xmm9 | |
3145 pxor %xmm10,%xmm10 | |
3146 pxor %xmm11,%xmm11 | |
3147 pxor %xmm12,%xmm12 | |
3148 pxor %xmm13,%xmm13 | |
3149 pxor %xmm14,%xmm14 | |
3150 pxor %xmm15,%xmm15 | |
3151 leaq 40(%rsp),%rax | |
3152 movq -40(%rax),%r14 | |
3153 movq -32(%rax),%r13 | |
3154 movq -24(%rax),%r12 | |
3155 movq -16(%rax),%rbp | |
3156 movq -8(%rax),%rbx | |
3157 leaq (%rax),%rsp | |
3158 L$ocb_dec_epilogue: | |
3159 .byte 0xf3,0xc3 | |
3160 | |
3161 | |
3162 | |
3163 .p2align 5 | |
3164 __ocb_decrypt6: | |
3165 pxor %xmm9,%xmm15 | |
3166 movdqu (%rbx,%r12,1),%xmm11 | |
3167 movdqa %xmm10,%xmm12 | |
3168 movdqu (%rbx,%r13,1),%xmm13 | |
3169 movdqa %xmm10,%xmm14 | |
3170 pxor %xmm15,%xmm10 | |
3171 movdqu (%rbx,%r14,1),%xmm15 | |
3172 pxor %xmm10,%xmm11 | |
3173 pxor %xmm10,%xmm2 | |
3174 pxor %xmm11,%xmm12 | |
3175 pxor %xmm11,%xmm3 | |
3176 pxor %xmm12,%xmm13 | |
3177 pxor %xmm12,%xmm4 | |
3178 pxor %xmm13,%xmm14 | |
3179 pxor %xmm13,%xmm5 | |
3180 pxor %xmm14,%xmm15 | |
3181 pxor %xmm14,%xmm6 | |
3182 pxor %xmm15,%xmm7 | |
3183 movups 32(%r11),%xmm0 | |
3184 | |
3185 leaq 1(%r8),%r12 | |
3186 leaq 3(%r8),%r13 | |
3187 leaq 5(%r8),%r14 | |
3188 addq $6,%r8 | |
3189 pxor %xmm9,%xmm10 | |
3190 bsfq %r12,%r12 | |
3191 bsfq %r13,%r13 | |
3192 bsfq %r14,%r14 | |
3193 | |
3194 .byte 102,15,56,222,209 | |
3195 .byte 102,15,56,222,217 | |
3196 .byte 102,15,56,222,225 | |
3197 .byte 102,15,56,222,233 | |
3198 pxor %xmm9,%xmm11 | |
3199 pxor %xmm9,%xmm12 | |
3200 .byte 102,15,56,222,241 | |
3201 pxor %xmm9,%xmm13 | |
3202 pxor %xmm9,%xmm14 | |
3203 .byte 102,15,56,222,249 | |
3204 movups 48(%r11),%xmm1 | |
3205 pxor %xmm9,%xmm15 | |
3206 | |
3207 .byte 102,15,56,222,208 | |
3208 .byte 102,15,56,222,216 | |
3209 .byte 102,15,56,222,224 | |
3210 .byte 102,15,56,222,232 | |
3211 .byte 102,15,56,222,240 | |
3212 .byte 102,15,56,222,248 | |
3213 movups 64(%r11),%xmm0 | |
3214 shlq $4,%r12 | |
3215 shlq $4,%r13 | |
3216 jmp L$ocb_dec_loop6 | |
3217 | |
3218 .p2align 5 | |
3219 L$ocb_dec_loop6: | |
3220 .byte 102,15,56,222,209 | |
3221 .byte 102,15,56,222,217 | |
3222 .byte 102,15,56,222,225 | |
3223 .byte 102,15,56,222,233 | |
3224 .byte 102,15,56,222,241 | |
3225 .byte 102,15,56,222,249 | |
3226 movups (%rcx,%rax,1),%xmm1 | |
3227 addq $32,%rax | |
3228 | |
3229 .byte 102,15,56,222,208 | |
3230 .byte 102,15,56,222,216 | |
3231 .byte 102,15,56,222,224 | |
3232 .byte 102,15,56,222,232 | |
3233 .byte 102,15,56,222,240 | |
3234 .byte 102,15,56,222,248 | |
3235 movups -16(%rcx,%rax,1),%xmm0 | |
3236 jnz L$ocb_dec_loop6 | |
3237 | |
3238 .byte 102,15,56,222,209 | |
3239 .byte 102,15,56,222,217 | |
3240 .byte 102,15,56,222,225 | |
3241 .byte 102,15,56,222,233 | |
3242 .byte 102,15,56,222,241 | |
3243 .byte 102,15,56,222,249 | |
3244 movups 16(%r11),%xmm1 | |
3245 shlq $4,%r14 | |
3246 | |
3247 .byte 102,65,15,56,223,210 | |
3248 movdqu (%rbx),%xmm10 | |
3249 movq %r10,%rax | |
3250 .byte 102,65,15,56,223,219 | |
3251 .byte 102,65,15,56,223,228 | |
3252 .byte 102,65,15,56,223,237 | |
3253 .byte 102,65,15,56,223,246 | |
3254 .byte 102,65,15,56,223,255 | |
3255 .byte 0xf3,0xc3 | |
3256 | |
3257 | |
3258 | |
3259 .p2align 5 | |
3260 __ocb_decrypt4: | |
3261 pxor %xmm9,%xmm15 | |
3262 movdqu (%rbx,%r12,1),%xmm11 | |
3263 movdqa %xmm10,%xmm12 | |
3264 movdqu (%rbx,%r13,1),%xmm13 | |
3265 pxor %xmm15,%xmm10 | |
3266 pxor %xmm10,%xmm11 | |
3267 pxor %xmm10,%xmm2 | |
3268 pxor %xmm11,%xmm12 | |
3269 pxor %xmm11,%xmm3 | |
3270 pxor %xmm12,%xmm13 | |
3271 pxor %xmm12,%xmm4 | |
3272 pxor %xmm13,%xmm5 | |
3273 movups 32(%r11),%xmm0 | |
3274 | |
3275 pxor %xmm9,%xmm10 | |
3276 pxor %xmm9,%xmm11 | |
3277 pxor %xmm9,%xmm12 | |
3278 pxor %xmm9,%xmm13 | |
3279 | |
3280 .byte 102,15,56,222,209 | |
3281 .byte 102,15,56,222,217 | |
3282 .byte 102,15,56,222,225 | |
3283 .byte 102,15,56,222,233 | |
3284 movups 48(%r11),%xmm1 | |
3285 | |
3286 .byte 102,15,56,222,208 | |
3287 .byte 102,15,56,222,216 | |
3288 .byte 102,15,56,222,224 | |
3289 .byte 102,15,56,222,232 | |
3290 movups 64(%r11),%xmm0 | |
3291 jmp L$ocb_dec_loop4 | |
3292 | |
3293 .p2align 5 | |
3294 L$ocb_dec_loop4: | |
3295 .byte 102,15,56,222,209 | |
3296 .byte 102,15,56,222,217 | |
3297 .byte 102,15,56,222,225 | |
3298 .byte 102,15,56,222,233 | |
3299 movups (%rcx,%rax,1),%xmm1 | |
3300 addq $32,%rax | |
3301 | |
3302 .byte 102,15,56,222,208 | |
3303 .byte 102,15,56,222,216 | |
3304 .byte 102,15,56,222,224 | |
3305 .byte 102,15,56,222,232 | |
3306 movups -16(%rcx,%rax,1),%xmm0 | |
3307 jnz L$ocb_dec_loop4 | |
3308 | |
3309 .byte 102,15,56,222,209 | |
3310 .byte 102,15,56,222,217 | |
3311 .byte 102,15,56,222,225 | |
3312 .byte 102,15,56,222,233 | |
3313 movups 16(%r11),%xmm1 | |
3314 movq %r10,%rax | |
3315 | |
3316 .byte 102,65,15,56,223,210 | |
3317 .byte 102,65,15,56,223,219 | |
3318 .byte 102,65,15,56,223,228 | |
3319 .byte 102,65,15,56,223,237 | |
3320 .byte 0xf3,0xc3 | |
3321 | |
3322 | |
3323 | |
3324 .p2align 5 | |
3325 __ocb_decrypt1: | |
3326 pxor %xmm15,%xmm7 | |
3327 pxor %xmm9,%xmm7 | |
3328 pxor %xmm7,%xmm2 | |
3329 movups 32(%r11),%xmm0 | |
3330 | |
3331 .byte 102,15,56,222,209 | |
3332 movups 48(%r11),%xmm1 | |
3333 pxor %xmm9,%xmm7 | |
3334 | |
3335 .byte 102,15,56,222,208 | |
3336 movups 64(%r11),%xmm0 | |
3337 jmp L$ocb_dec_loop1 | |
3338 | |
3339 .p2align 5 | |
3340 L$ocb_dec_loop1: | |
3341 .byte 102,15,56,222,209 | |
3342 movups (%rcx,%rax,1),%xmm1 | |
3343 addq $32,%rax | |
3344 | |
3345 .byte 102,15,56,222,208 | |
3346 movups -16(%rcx,%rax,1),%xmm0 | |
3347 jnz L$ocb_dec_loop1 | |
3348 | |
3349 .byte 102,15,56,222,209 | |
3350 movups 16(%r11),%xmm1 | |
3351 movq %r10,%rax | |
3352 | |
3353 .byte 102,15,56,223,215 | |
3354 .byte 0xf3,0xc3 | |
3355 | |
3356 .globl _aesni_cbc_encrypt | |
3357 .private_extern _aesni_cbc_encrypt | |
3358 | |
3359 .p2align 4 | |
3360 _aesni_cbc_encrypt: | |
3361 testq %rdx,%rdx | |
3362 jz L$cbc_ret | |
3363 | |
3364 movl 240(%rcx),%r10d | |
3365 movq %rcx,%r11 | |
3366 testl %r9d,%r9d | |
3367 jz L$cbc_decrypt | |
3368 | |
3369 movups (%r8),%xmm2 | |
3370 movl %r10d,%eax | |
3371 cmpq $16,%rdx | |
3372 jb L$cbc_enc_tail | |
3373 subq $16,%rdx | |
3374 jmp L$cbc_enc_loop | |
3375 .p2align 4 | |
3376 L$cbc_enc_loop: | |
3377 movups (%rdi),%xmm3 | |
3378 leaq 16(%rdi),%rdi | |
3379 | |
3380 movups (%rcx),%xmm0 | |
3381 movups 16(%rcx),%xmm1 | |
3382 xorps %xmm0,%xmm3 | |
3383 leaq 32(%rcx),%rcx | |
3384 xorps %xmm3,%xmm2 | |
3385 L$oop_enc1_15: | |
3386 .byte 102,15,56,220,209 | |
3387 decl %eax | |
3388 movups (%rcx),%xmm1 | |
3389 leaq 16(%rcx),%rcx | |
3390 jnz L$oop_enc1_15 | |
3391 .byte 102,15,56,221,209 | |
3392 movl %r10d,%eax | |
3393 movq %r11,%rcx | |
3394 movups %xmm2,0(%rsi) | |
3395 leaq 16(%rsi),%rsi | |
3396 subq $16,%rdx | |
3397 jnc L$cbc_enc_loop | |
3398 addq $16,%rdx | |
3399 jnz L$cbc_enc_tail | |
3400 pxor %xmm0,%xmm0 | |
3401 pxor %xmm1,%xmm1 | |
3402 movups %xmm2,(%r8) | |
3403 pxor %xmm2,%xmm2 | |
3404 pxor %xmm3,%xmm3 | |
3405 jmp L$cbc_ret | |
3406 | |
3407 L$cbc_enc_tail: | |
3408 movq %rdx,%rcx | |
3409 xchgq %rdi,%rsi | |
3410 .long 0x9066A4F3 | |
3411 movl $16,%ecx | |
3412 subq %rdx,%rcx | |
3413 xorl %eax,%eax | |
3414 .long 0x9066AAF3 | |
3415 leaq -16(%rdi),%rdi | |
3416 movl %r10d,%eax | |
3417 movq %rdi,%rsi | |
3418 movq %r11,%rcx | |
3419 xorq %rdx,%rdx | |
3420 jmp L$cbc_enc_loop | |
3421 | |
3422 .p2align 4 | |
3423 L$cbc_decrypt: | |
3424 cmpq $16,%rdx | |
3425 jne L$cbc_decrypt_bulk | |
3426 | |
3427 | |
3428 | |
3429 movdqu (%rdi),%xmm2 | |
3430 movdqu (%r8),%xmm3 | |
3431 movdqa %xmm2,%xmm4 | |
3432 movups (%rcx),%xmm0 | |
3433 movups 16(%rcx),%xmm1 | |
3434 leaq 32(%rcx),%rcx | |
3435 xorps %xmm0,%xmm2 | |
3436 L$oop_dec1_16: | |
3437 .byte 102,15,56,222,209 | |
3438 decl %r10d | |
3439 movups (%rcx),%xmm1 | |
3440 leaq 16(%rcx),%rcx | |
3441 jnz L$oop_dec1_16 | |
3442 .byte 102,15,56,223,209 | |
3443 pxor %xmm0,%xmm0 | |
3444 pxor %xmm1,%xmm1 | |
3445 movdqu %xmm4,(%r8) | |
3446 xorps %xmm3,%xmm2 | |
3447 pxor %xmm3,%xmm3 | |
3448 movups %xmm2,(%rsi) | |
3449 pxor %xmm2,%xmm2 | |
3450 jmp L$cbc_ret | |
3451 .p2align 4 | |
3452 L$cbc_decrypt_bulk: | |
3453 leaq (%rsp),%r11 | |
3454 pushq %rbp | |
3455 subq $16,%rsp | |
3456 andq $-16,%rsp | |
3457 movq %rcx,%rbp | |
3458 movups (%r8),%xmm10 | |
3459 movl %r10d,%eax | |
3460 cmpq $0x50,%rdx | |
3461 jbe L$cbc_dec_tail | |
3462 | |
3463 movups (%rcx),%xmm0 | |
3464 movdqu 0(%rdi),%xmm2 | |
3465 movdqu 16(%rdi),%xmm3 | |
3466 movdqa %xmm2,%xmm11 | |
3467 movdqu 32(%rdi),%xmm4 | |
3468 movdqa %xmm3,%xmm12 | |
3469 movdqu 48(%rdi),%xmm5 | |
3470 movdqa %xmm4,%xmm13 | |
3471 movdqu 64(%rdi),%xmm6 | |
3472 movdqa %xmm5,%xmm14 | |
3473 movdqu 80(%rdi),%xmm7 | |
3474 movdqa %xmm6,%xmm15 | |
3475 movl _OPENSSL_ia32cap_P+4(%rip),%r9d | |
3476 cmpq $0x70,%rdx | |
3477 jbe L$cbc_dec_six_or_seven | |
3478 | |
3479 andl $71303168,%r9d | |
3480 subq $0x50,%rdx | |
3481 cmpl $4194304,%r9d | |
3482 je L$cbc_dec_loop6_enter | |
3483 subq $0x20,%rdx | |
3484 leaq 112(%rcx),%rcx | |
3485 jmp L$cbc_dec_loop8_enter | |
3486 .p2align 4 | |
3487 L$cbc_dec_loop8: | |
3488 movups %xmm9,(%rsi) | |
3489 leaq 16(%rsi),%rsi | |
3490 L$cbc_dec_loop8_enter: | |
3491 movdqu 96(%rdi),%xmm8 | |
3492 pxor %xmm0,%xmm2 | |
3493 movdqu 112(%rdi),%xmm9 | |
3494 pxor %xmm0,%xmm3 | |
3495 movups 16-112(%rcx),%xmm1 | |
3496 pxor %xmm0,%xmm4 | |
3497 movq $-1,%rbp | |
3498 cmpq $0x70,%rdx | |
3499 pxor %xmm0,%xmm5 | |
3500 pxor %xmm0,%xmm6 | |
3501 pxor %xmm0,%xmm7 | |
3502 pxor %xmm0,%xmm8 | |
3503 | |
3504 .byte 102,15,56,222,209 | |
3505 pxor %xmm0,%xmm9 | |
3506 movups 32-112(%rcx),%xmm0 | |
3507 .byte 102,15,56,222,217 | |
3508 .byte 102,15,56,222,225 | |
3509 .byte 102,15,56,222,233 | |
3510 .byte 102,15,56,222,241 | |
3511 .byte 102,15,56,222,249 | |
3512 .byte 102,68,15,56,222,193 | |
3513 adcq $0,%rbp | |
3514 andq $128,%rbp | |
3515 .byte 102,68,15,56,222,201 | |
3516 addq %rdi,%rbp | |
3517 movups 48-112(%rcx),%xmm1 | |
3518 .byte 102,15,56,222,208 | |
3519 .byte 102,15,56,222,216 | |
3520 .byte 102,15,56,222,224 | |
3521 .byte 102,15,56,222,232 | |
3522 .byte 102,15,56,222,240 | |
3523 .byte 102,15,56,222,248 | |
3524 .byte 102,68,15,56,222,192 | |
3525 .byte 102,68,15,56,222,200 | |
3526 movups 64-112(%rcx),%xmm0 | |
3527 nop | |
3528 .byte 102,15,56,222,209 | |
3529 .byte 102,15,56,222,217 | |
3530 .byte 102,15,56,222,225 | |
3531 .byte 102,15,56,222,233 | |
3532 .byte 102,15,56,222,241 | |
3533 .byte 102,15,56,222,249 | |
3534 .byte 102,68,15,56,222,193 | |
3535 .byte 102,68,15,56,222,201 | |
3536 movups 80-112(%rcx),%xmm1 | |
3537 nop | |
3538 .byte 102,15,56,222,208 | |
3539 .byte 102,15,56,222,216 | |
3540 .byte 102,15,56,222,224 | |
3541 .byte 102,15,56,222,232 | |
3542 .byte 102,15,56,222,240 | |
3543 .byte 102,15,56,222,248 | |
3544 .byte 102,68,15,56,222,192 | |
3545 .byte 102,68,15,56,222,200 | |
3546 movups 96-112(%rcx),%xmm0 | |
3547 nop | |
3548 .byte 102,15,56,222,209 | |
3549 .byte 102,15,56,222,217 | |
3550 .byte 102,15,56,222,225 | |
3551 .byte 102,15,56,222,233 | |
3552 .byte 102,15,56,222,241 | |
3553 .byte 102,15,56,222,249 | |
3554 .byte 102,68,15,56,222,193 | |
3555 .byte 102,68,15,56,222,201 | |
3556 movups 112-112(%rcx),%xmm1 | |
3557 nop | |
3558 .byte 102,15,56,222,208 | |
3559 .byte 102,15,56,222,216 | |
3560 .byte 102,15,56,222,224 | |
3561 .byte 102,15,56,222,232 | |
3562 .byte 102,15,56,222,240 | |
3563 .byte 102,15,56,222,248 | |
3564 .byte 102,68,15,56,222,192 | |
3565 .byte 102,68,15,56,222,200 | |
3566 movups 128-112(%rcx),%xmm0 | |
3567 nop | |
3568 .byte 102,15,56,222,209 | |
3569 .byte 102,15,56,222,217 | |
3570 .byte 102,15,56,222,225 | |
3571 .byte 102,15,56,222,233 | |
3572 .byte 102,15,56,222,241 | |
3573 .byte 102,15,56,222,249 | |
3574 .byte 102,68,15,56,222,193 | |
3575 .byte 102,68,15,56,222,201 | |
3576 movups 144-112(%rcx),%xmm1 | |
3577 cmpl $11,%eax | |
3578 .byte 102,15,56,222,208 | |
3579 .byte 102,15,56,222,216 | |
3580 .byte 102,15,56,222,224 | |
3581 .byte 102,15,56,222,232 | |
3582 .byte 102,15,56,222,240 | |
3583 .byte 102,15,56,222,248 | |
3584 .byte 102,68,15,56,222,192 | |
3585 .byte 102,68,15,56,222,200 | |
3586 movups 160-112(%rcx),%xmm0 | |
3587 jb L$cbc_dec_done | |
3588 .byte 102,15,56,222,209 | |
3589 .byte 102,15,56,222,217 | |
3590 .byte 102,15,56,222,225 | |
3591 .byte 102,15,56,222,233 | |
3592 .byte 102,15,56,222,241 | |
3593 .byte 102,15,56,222,249 | |
3594 .byte 102,68,15,56,222,193 | |
3595 .byte 102,68,15,56,222,201 | |
3596 movups 176-112(%rcx),%xmm1 | |
3597 nop | |
3598 .byte 102,15,56,222,208 | |
3599 .byte 102,15,56,222,216 | |
3600 .byte 102,15,56,222,224 | |
3601 .byte 102,15,56,222,232 | |
3602 .byte 102,15,56,222,240 | |
3603 .byte 102,15,56,222,248 | |
3604 .byte 102,68,15,56,222,192 | |
3605 .byte 102,68,15,56,222,200 | |
3606 movups 192-112(%rcx),%xmm0 | |
3607 je L$cbc_dec_done | |
3608 .byte 102,15,56,222,209 | |
3609 .byte 102,15,56,222,217 | |
3610 .byte 102,15,56,222,225 | |
3611 .byte 102,15,56,222,233 | |
3612 .byte 102,15,56,222,241 | |
3613 .byte 102,15,56,222,249 | |
3614 .byte 102,68,15,56,222,193 | |
3615 .byte 102,68,15,56,222,201 | |
3616 movups 208-112(%rcx),%xmm1 | |
3617 nop | |
3618 .byte 102,15,56,222,208 | |
3619 .byte 102,15,56,222,216 | |
3620 .byte 102,15,56,222,224 | |
3621 .byte 102,15,56,222,232 | |
3622 .byte 102,15,56,222,240 | |
3623 .byte 102,15,56,222,248 | |
3624 .byte 102,68,15,56,222,192 | |
3625 .byte 102,68,15,56,222,200 | |
3626 movups 224-112(%rcx),%xmm0 | |
3627 jmp L$cbc_dec_done | |
3628 .p2align 4 | |
3629 L$cbc_dec_done: | |
3630 .byte 102,15,56,222,209 | |
3631 .byte 102,15,56,222,217 | |
3632 pxor %xmm0,%xmm10 | |
3633 pxor %xmm0,%xmm11 | |
3634 .byte 102,15,56,222,225 | |
3635 .byte 102,15,56,222,233 | |
3636 pxor %xmm0,%xmm12 | |
3637 pxor %xmm0,%xmm13 | |
3638 .byte 102,15,56,222,241 | |
3639 .byte 102,15,56,222,249 | |
3640 pxor %xmm0,%xmm14 | |
3641 pxor %xmm0,%xmm15 | |
3642 .byte 102,68,15,56,222,193 | |
3643 .byte 102,68,15,56,222,201 | |
3644 movdqu 80(%rdi),%xmm1 | |
3645 | |
3646 .byte 102,65,15,56,223,210 | |
3647 movdqu 96(%rdi),%xmm10 | |
3648 pxor %xmm0,%xmm1 | |
3649 .byte 102,65,15,56,223,219 | |
3650 pxor %xmm0,%xmm10 | |
3651 movdqu 112(%rdi),%xmm0 | |
3652 .byte 102,65,15,56,223,228 | |
3653 leaq 128(%rdi),%rdi | |
3654 movdqu 0(%rbp),%xmm11 | |
3655 .byte 102,65,15,56,223,237 | |
3656 .byte 102,65,15,56,223,246 | |
3657 movdqu 16(%rbp),%xmm12 | |
3658 movdqu 32(%rbp),%xmm13 | |
3659 .byte 102,65,15,56,223,255 | |
3660 .byte 102,68,15,56,223,193 | |
3661 movdqu 48(%rbp),%xmm14 | |
3662 movdqu 64(%rbp),%xmm15 | |
3663 .byte 102,69,15,56,223,202 | |
3664 movdqa %xmm0,%xmm10 | |
3665 movdqu 80(%rbp),%xmm1 | |
3666 movups -112(%rcx),%xmm0 | |
3667 | |
3668 movups %xmm2,(%rsi) | |
3669 movdqa %xmm11,%xmm2 | |
3670 movups %xmm3,16(%rsi) | |
3671 movdqa %xmm12,%xmm3 | |
3672 movups %xmm4,32(%rsi) | |
3673 movdqa %xmm13,%xmm4 | |
3674 movups %xmm5,48(%rsi) | |
3675 movdqa %xmm14,%xmm5 | |
3676 movups %xmm6,64(%rsi) | |
3677 movdqa %xmm15,%xmm6 | |
3678 movups %xmm7,80(%rsi) | |
3679 movdqa %xmm1,%xmm7 | |
3680 movups %xmm8,96(%rsi) | |
3681 leaq 112(%rsi),%rsi | |
3682 | |
3683 subq $0x80,%rdx | |
3684 ja L$cbc_dec_loop8 | |
3685 | |
3686 movaps %xmm9,%xmm2 | |
3687 leaq -112(%rcx),%rcx | |
3688 addq $0x70,%rdx | |
3689 jle L$cbc_dec_clear_tail_collected | |
3690 movups %xmm9,(%rsi) | |
3691 leaq 16(%rsi),%rsi | |
3692 cmpq $0x50,%rdx | |
3693 jbe L$cbc_dec_tail | |
3694 | |
3695 movaps %xmm11,%xmm2 | |
3696 L$cbc_dec_six_or_seven: | |
3697 cmpq $0x60,%rdx | |
3698 ja L$cbc_dec_seven | |
3699 | |
3700 movaps %xmm7,%xmm8 | |
3701 call _aesni_decrypt6 | |
3702 pxor %xmm10,%xmm2 | |
3703 movaps %xmm8,%xmm10 | |
3704 pxor %xmm11,%xmm3 | |
3705 movdqu %xmm2,(%rsi) | |
3706 pxor %xmm12,%xmm4 | |
3707 movdqu %xmm3,16(%rsi) | |
3708 pxor %xmm3,%xmm3 | |
3709 pxor %xmm13,%xmm5 | |
3710 movdqu %xmm4,32(%rsi) | |
3711 pxor %xmm4,%xmm4 | |
3712 pxor %xmm14,%xmm6 | |
3713 movdqu %xmm5,48(%rsi) | |
3714 pxor %xmm5,%xmm5 | |
3715 pxor %xmm15,%xmm7 | |
3716 movdqu %xmm6,64(%rsi) | |
3717 pxor %xmm6,%xmm6 | |
3718 leaq 80(%rsi),%rsi | |
3719 movdqa %xmm7,%xmm2 | |
3720 pxor %xmm7,%xmm7 | |
3721 jmp L$cbc_dec_tail_collected | |
3722 | |
3723 .p2align 4 | |
3724 L$cbc_dec_seven: | |
3725 movups 96(%rdi),%xmm8 | |
3726 xorps %xmm9,%xmm9 | |
3727 call _aesni_decrypt8 | |
3728 movups 80(%rdi),%xmm9 | |
3729 pxor %xmm10,%xmm2 | |
3730 movups 96(%rdi),%xmm10 | |
3731 pxor %xmm11,%xmm3 | |
3732 movdqu %xmm2,(%rsi) | |
3733 pxor %xmm12,%xmm4 | |
3734 movdqu %xmm3,16(%rsi) | |
3735 pxor %xmm3,%xmm3 | |
3736 pxor %xmm13,%xmm5 | |
3737 movdqu %xmm4,32(%rsi) | |
3738 pxor %xmm4,%xmm4 | |
3739 pxor %xmm14,%xmm6 | |
3740 movdqu %xmm5,48(%rsi) | |
3741 pxor %xmm5,%xmm5 | |
3742 pxor %xmm15,%xmm7 | |
3743 movdqu %xmm6,64(%rsi) | |
3744 pxor %xmm6,%xmm6 | |
3745 pxor %xmm9,%xmm8 | |
3746 movdqu %xmm7,80(%rsi) | |
3747 pxor %xmm7,%xmm7 | |
3748 leaq 96(%rsi),%rsi | |
3749 movdqa %xmm8,%xmm2 | |
3750 pxor %xmm8,%xmm8 | |
3751 pxor %xmm9,%xmm9 | |
3752 jmp L$cbc_dec_tail_collected | |
3753 | |
3754 .p2align 4 | |
3755 L$cbc_dec_loop6: | |
3756 movups %xmm7,(%rsi) | |
3757 leaq 16(%rsi),%rsi | |
3758 movdqu 0(%rdi),%xmm2 | |
3759 movdqu 16(%rdi),%xmm3 | |
3760 movdqa %xmm2,%xmm11 | |
3761 movdqu 32(%rdi),%xmm4 | |
3762 movdqa %xmm3,%xmm12 | |
3763 movdqu 48(%rdi),%xmm5 | |
3764 movdqa %xmm4,%xmm13 | |
3765 movdqu 64(%rdi),%xmm6 | |
3766 movdqa %xmm5,%xmm14 | |
3767 movdqu 80(%rdi),%xmm7 | |
3768 movdqa %xmm6,%xmm15 | |
3769 L$cbc_dec_loop6_enter: | |
3770 leaq 96(%rdi),%rdi | |
3771 movdqa %xmm7,%xmm8 | |
3772 | |
3773 call _aesni_decrypt6 | |
3774 | |
3775 pxor %xmm10,%xmm2 | |
3776 movdqa %xmm8,%xmm10 | |
3777 pxor %xmm11,%xmm3 | |
3778 movdqu %xmm2,(%rsi) | |
3779 pxor %xmm12,%xmm4 | |
3780 movdqu %xmm3,16(%rsi) | |
3781 pxor %xmm13,%xmm5 | |
3782 movdqu %xmm4,32(%rsi) | |
3783 pxor %xmm14,%xmm6 | |
3784 movq %rbp,%rcx | |
3785 movdqu %xmm5,48(%rsi) | |
3786 pxor %xmm15,%xmm7 | |
3787 movl %r10d,%eax | |
3788 movdqu %xmm6,64(%rsi) | |
3789 leaq 80(%rsi),%rsi | |
3790 subq $0x60,%rdx | |
3791 ja L$cbc_dec_loop6 | |
3792 | |
3793 movdqa %xmm7,%xmm2 | |
3794 addq $0x50,%rdx | |
3795 jle L$cbc_dec_clear_tail_collected | |
3796 movups %xmm7,(%rsi) | |
3797 leaq 16(%rsi),%rsi | |
3798 | |
3799 L$cbc_dec_tail: | |
3800 movups (%rdi),%xmm2 | |
3801 subq $0x10,%rdx | |
3802 jbe L$cbc_dec_one | |
3803 | |
3804 movups 16(%rdi),%xmm3 | |
3805 movaps %xmm2,%xmm11 | |
3806 subq $0x10,%rdx | |
3807 jbe L$cbc_dec_two | |
3808 | |
3809 movups 32(%rdi),%xmm4 | |
3810 movaps %xmm3,%xmm12 | |
3811 subq $0x10,%rdx | |
3812 jbe L$cbc_dec_three | |
3813 | |
3814 movups 48(%rdi),%xmm5 | |
3815 movaps %xmm4,%xmm13 | |
3816 subq $0x10,%rdx | |
3817 jbe L$cbc_dec_four | |
3818 | |
3819 movups 64(%rdi),%xmm6 | |
3820 movaps %xmm5,%xmm14 | |
3821 movaps %xmm6,%xmm15 | |
3822 xorps %xmm7,%xmm7 | |
3823 call _aesni_decrypt6 | |
3824 pxor %xmm10,%xmm2 | |
3825 movaps %xmm15,%xmm10 | |
3826 pxor %xmm11,%xmm3 | |
3827 movdqu %xmm2,(%rsi) | |
3828 pxor %xmm12,%xmm4 | |
3829 movdqu %xmm3,16(%rsi) | |
3830 pxor %xmm3,%xmm3 | |
3831 pxor %xmm13,%xmm5 | |
3832 movdqu %xmm4,32(%rsi) | |
3833 pxor %xmm4,%xmm4 | |
3834 pxor %xmm14,%xmm6 | |
3835 movdqu %xmm5,48(%rsi) | |
3836 pxor %xmm5,%xmm5 | |
3837 leaq 64(%rsi),%rsi | |
3838 movdqa %xmm6,%xmm2 | |
3839 pxor %xmm6,%xmm6 | |
3840 pxor %xmm7,%xmm7 | |
3841 subq $0x10,%rdx | |
3842 jmp L$cbc_dec_tail_collected | |
3843 | |
3844 .p2align 4 | |
3845 L$cbc_dec_one: | |
3846 movaps %xmm2,%xmm11 | |
3847 movups (%rcx),%xmm0 | |
3848 movups 16(%rcx),%xmm1 | |
3849 leaq 32(%rcx),%rcx | |
3850 xorps %xmm0,%xmm2 | |
3851 L$oop_dec1_17: | |
3852 .byte 102,15,56,222,209 | |
3853 decl %eax | |
3854 movups (%rcx),%xmm1 | |
3855 leaq 16(%rcx),%rcx | |
3856 jnz L$oop_dec1_17 | |
3857 .byte 102,15,56,223,209 | |
3858 xorps %xmm10,%xmm2 | |
3859 movaps %xmm11,%xmm10 | |
3860 jmp L$cbc_dec_tail_collected | |
3861 .p2align 4 | |
3862 L$cbc_dec_two: | |
3863 movaps %xmm3,%xmm12 | |
3864 call _aesni_decrypt2 | |
3865 pxor %xmm10,%xmm2 | |
3866 movaps %xmm12,%xmm10 | |
3867 pxor %xmm11,%xmm3 | |
3868 movdqu %xmm2,(%rsi) | |
3869 movdqa %xmm3,%xmm2 | |
3870 pxor %xmm3,%xmm3 | |
3871 leaq 16(%rsi),%rsi | |
3872 jmp L$cbc_dec_tail_collected | |
3873 .p2align 4 | |
3874 L$cbc_dec_three: | |
3875 movaps %xmm4,%xmm13 | |
3876 call _aesni_decrypt3 | |
3877 pxor %xmm10,%xmm2 | |
3878 movaps %xmm13,%xmm10 | |
3879 pxor %xmm11,%xmm3 | |
3880 movdqu %xmm2,(%rsi) | |
3881 pxor %xmm12,%xmm4 | |
3882 movdqu %xmm3,16(%rsi) | |
3883 pxor %xmm3,%xmm3 | |
3884 movdqa %xmm4,%xmm2 | |
3885 pxor %xmm4,%xmm4 | |
3886 leaq 32(%rsi),%rsi | |
3887 jmp L$cbc_dec_tail_collected | |
3888 .p2align 4 | |
3889 L$cbc_dec_four: | |
3890 movaps %xmm5,%xmm14 | |
3891 call _aesni_decrypt4 | |
3892 pxor %xmm10,%xmm2 | |
3893 movaps %xmm14,%xmm10 | |
3894 pxor %xmm11,%xmm3 | |
3895 movdqu %xmm2,(%rsi) | |
3896 pxor %xmm12,%xmm4 | |
3897 movdqu %xmm3,16(%rsi) | |
3898 pxor %xmm3,%xmm3 | |
3899 pxor %xmm13,%xmm5 | |
3900 movdqu %xmm4,32(%rsi) | |
3901 pxor %xmm4,%xmm4 | |
3902 movdqa %xmm5,%xmm2 | |
3903 pxor %xmm5,%xmm5 | |
3904 leaq 48(%rsi),%rsi | |
3905 jmp L$cbc_dec_tail_collected | |
3906 | |
3907 .p2align 4 | |
3908 L$cbc_dec_clear_tail_collected: | |
3909 pxor %xmm3,%xmm3 | |
3910 pxor %xmm4,%xmm4 | |
3911 pxor %xmm5,%xmm5 | |
3912 pxor %xmm6,%xmm6 | |
3913 pxor %xmm7,%xmm7 | |
3914 pxor %xmm8,%xmm8 | |
3915 pxor %xmm9,%xmm9 | |
3916 L$cbc_dec_tail_collected: | |
3917 movups %xmm10,(%r8) | |
3918 andq $15,%rdx | |
3919 jnz L$cbc_dec_tail_partial | |
3920 movups %xmm2,(%rsi) | |
3921 pxor %xmm2,%xmm2 | |
3922 jmp L$cbc_dec_ret | |
3923 .p2align 4 | |
3924 L$cbc_dec_tail_partial: | |
3925 movaps %xmm2,(%rsp) | |
3926 pxor %xmm2,%xmm2 | |
3927 movq $16,%rcx | |
3928 movq %rsi,%rdi | |
3929 subq %rdx,%rcx | |
3930 leaq (%rsp),%rsi | |
3931 .long 0x9066A4F3 | |
3932 movdqa %xmm2,(%rsp) | |
3933 | |
3934 L$cbc_dec_ret: | |
3935 xorps %xmm0,%xmm0 | |
3936 pxor %xmm1,%xmm1 | |
3937 movq -8(%r11),%rbp | |
3938 leaq (%r11),%rsp | |
3939 L$cbc_ret: | |
3940 .byte 0xf3,0xc3 | |
3941 | |
3942 .globl _aesni_set_decrypt_key | |
3943 .private_extern _aesni_set_decrypt_key | |
3944 | |
3945 .p2align 4 | |
3946 _aesni_set_decrypt_key: | |
3947 .byte 0x48,0x83,0xEC,0x08 | |
3948 call __aesni_set_encrypt_key | |
3949 shll $4,%esi | |
3950 testl %eax,%eax | |
3951 jnz L$dec_key_ret | |
3952 leaq 16(%rdx,%rsi,1),%rdi | |
3953 | |
3954 movups (%rdx),%xmm0 | |
3955 movups (%rdi),%xmm1 | |
3956 movups %xmm0,(%rdi) | |
3957 movups %xmm1,(%rdx) | |
3958 leaq 16(%rdx),%rdx | |
3959 leaq -16(%rdi),%rdi | |
3960 | |
3961 L$dec_key_inverse: | |
3962 movups (%rdx),%xmm0 | |
3963 movups (%rdi),%xmm1 | |
3964 .byte 102,15,56,219,192 | |
3965 .byte 102,15,56,219,201 | |
3966 leaq 16(%rdx),%rdx | |
3967 leaq -16(%rdi),%rdi | |
3968 movups %xmm0,16(%rdi) | |
3969 movups %xmm1,-16(%rdx) | |
3970 cmpq %rdx,%rdi | |
3971 ja L$dec_key_inverse | |
3972 | |
3973 movups (%rdx),%xmm0 | |
3974 .byte 102,15,56,219,192 | |
3975 pxor %xmm1,%xmm1 | |
3976 movups %xmm0,(%rdi) | |
3977 pxor %xmm0,%xmm0 | |
3978 L$dec_key_ret: | |
3979 addq $8,%rsp | |
3980 .byte 0xf3,0xc3 | |
3981 L$SEH_end_set_decrypt_key: | |
3982 | |
3983 .globl _aesni_set_encrypt_key | |
3984 .private_extern _aesni_set_encrypt_key | |
3985 | |
3986 .p2align 4 | |
3987 _aesni_set_encrypt_key: | |
3988 __aesni_set_encrypt_key: | |
3989 .byte 0x48,0x83,0xEC,0x08 | |
3990 movq $-1,%rax | |
3991 testq %rdi,%rdi | |
3992 jz L$enc_key_ret | |
3993 testq %rdx,%rdx | |
3994 jz L$enc_key_ret | |
3995 | |
3996 movl $268437504,%r10d | |
3997 movups (%rdi),%xmm0 | |
3998 xorps %xmm4,%xmm4 | |
3999 andl _OPENSSL_ia32cap_P+4(%rip),%r10d | |
4000 leaq 16(%rdx),%rax | |
4001 cmpl $256,%esi | |
4002 je L$14rounds | |
4003 cmpl $192,%esi | |
4004 je L$12rounds | |
4005 cmpl $128,%esi | |
4006 jne L$bad_keybits | |
4007 | |
4008 L$10rounds: | |
4009 movl $9,%esi | |
4010 cmpl $268435456,%r10d | |
4011 je L$10rounds_alt | |
4012 | |
4013 movups %xmm0,(%rdx) | |
4014 .byte 102,15,58,223,200,1 | |
4015 call L$key_expansion_128_cold | |
4016 .byte 102,15,58,223,200,2 | |
4017 call L$key_expansion_128 | |
4018 .byte 102,15,58,223,200,4 | |
4019 call L$key_expansion_128 | |
4020 .byte 102,15,58,223,200,8 | |
4021 call L$key_expansion_128 | |
4022 .byte 102,15,58,223,200,16 | |
4023 call L$key_expansion_128 | |
4024 .byte 102,15,58,223,200,32 | |
4025 call L$key_expansion_128 | |
4026 .byte 102,15,58,223,200,64 | |
4027 call L$key_expansion_128 | |
4028 .byte 102,15,58,223,200,128 | |
4029 call L$key_expansion_128 | |
4030 .byte 102,15,58,223,200,27 | |
4031 call L$key_expansion_128 | |
4032 .byte 102,15,58,223,200,54 | |
4033 call L$key_expansion_128 | |
4034 movups %xmm0,(%rax) | |
4035 movl %esi,80(%rax) | |
4036 xorl %eax,%eax | |
4037 jmp L$enc_key_ret | |
4038 | |
4039 .p2align 4 | |
4040 L$10rounds_alt: | |
4041 movdqa L$key_rotate(%rip),%xmm5 | |
4042 movl $8,%r10d | |
4043 movdqa L$key_rcon1(%rip),%xmm4 | |
4044 movdqa %xmm0,%xmm2 | |
4045 movdqu %xmm0,(%rdx) | |
4046 jmp L$oop_key128 | |
4047 | |
4048 .p2align 4 | |
4049 L$oop_key128: | |
4050 .byte 102,15,56,0,197 | |
4051 .byte 102,15,56,221,196 | |
4052 pslld $1,%xmm4 | |
4053 leaq 16(%rax),%rax | |
4054 | |
4055 movdqa %xmm2,%xmm3 | |
4056 pslldq $4,%xmm2 | |
4057 pxor %xmm2,%xmm3 | |
4058 pslldq $4,%xmm2 | |
4059 pxor %xmm2,%xmm3 | |
4060 pslldq $4,%xmm2 | |
4061 pxor %xmm3,%xmm2 | |
4062 | |
4063 pxor %xmm2,%xmm0 | |
4064 movdqu %xmm0,-16(%rax) | |
4065 movdqa %xmm0,%xmm2 | |
4066 | |
4067 decl %r10d | |
4068 jnz L$oop_key128 | |
4069 | |
4070 movdqa L$key_rcon1b(%rip),%xmm4 | |
4071 | |
4072 .byte 102,15,56,0,197 | |
4073 .byte 102,15,56,221,196 | |
4074 pslld $1,%xmm4 | |
4075 | |
4076 movdqa %xmm2,%xmm3 | |
4077 pslldq $4,%xmm2 | |
4078 pxor %xmm2,%xmm3 | |
4079 pslldq $4,%xmm2 | |
4080 pxor %xmm2,%xmm3 | |
4081 pslldq $4,%xmm2 | |
4082 pxor %xmm3,%xmm2 | |
4083 | |
4084 pxor %xmm2,%xmm0 | |
4085 movdqu %xmm0,(%rax) | |
4086 | |
4087 movdqa %xmm0,%xmm2 | |
4088 .byte 102,15,56,0,197 | |
4089 .byte 102,15,56,221,196 | |
4090 | |
4091 movdqa %xmm2,%xmm3 | |
4092 pslldq $4,%xmm2 | |
4093 pxor %xmm2,%xmm3 | |
4094 pslldq $4,%xmm2 | |
4095 pxor %xmm2,%xmm3 | |
4096 pslldq $4,%xmm2 | |
4097 pxor %xmm3,%xmm2 | |
4098 | |
4099 pxor %xmm2,%xmm0 | |
4100 movdqu %xmm0,16(%rax) | |
4101 | |
4102 movl %esi,96(%rax) | |
4103 xorl %eax,%eax | |
4104 jmp L$enc_key_ret | |
4105 | |
4106 .p2align 4 | |
4107 L$12rounds: | |
4108 movq 16(%rdi),%xmm2 | |
4109 movl $11,%esi | |
4110 cmpl $268435456,%r10d | |
4111 je L$12rounds_alt | |
4112 | |
4113 movups %xmm0,(%rdx) | |
4114 .byte 102,15,58,223,202,1 | |
4115 call L$key_expansion_192a_cold | |
4116 .byte 102,15,58,223,202,2 | |
4117 call L$key_expansion_192b | |
4118 .byte 102,15,58,223,202,4 | |
4119 call L$key_expansion_192a | |
4120 .byte 102,15,58,223,202,8 | |
4121 call L$key_expansion_192b | |
4122 .byte 102,15,58,223,202,16 | |
4123 call L$key_expansion_192a | |
4124 .byte 102,15,58,223,202,32 | |
4125 call L$key_expansion_192b | |
4126 .byte 102,15,58,223,202,64 | |
4127 call L$key_expansion_192a | |
4128 .byte 102,15,58,223,202,128 | |
4129 call L$key_expansion_192b | |
4130 movups %xmm0,(%rax) | |
4131 movl %esi,48(%rax) | |
4132 xorq %rax,%rax | |
4133 jmp L$enc_key_ret | |
4134 | |
4135 .p2align 4 | |
4136 L$12rounds_alt: | |
4137 movdqa L$key_rotate192(%rip),%xmm5 | |
4138 movdqa L$key_rcon1(%rip),%xmm4 | |
4139 movl $8,%r10d | |
4140 movdqu %xmm0,(%rdx) | |
4141 jmp L$oop_key192 | |
4142 | |
4143 .p2align 4 | |
4144 L$oop_key192: | |
4145 movq %xmm2,0(%rax) | |
4146 movdqa %xmm2,%xmm1 | |
4147 .byte 102,15,56,0,213 | |
4148 .byte 102,15,56,221,212 | |
4149 pslld $1,%xmm4 | |
4150 leaq 24(%rax),%rax | |
4151 | |
4152 movdqa %xmm0,%xmm3 | |
4153 pslldq $4,%xmm0 | |
4154 pxor %xmm0,%xmm3 | |
4155 pslldq $4,%xmm0 | |
4156 pxor %xmm0,%xmm3 | |
4157 pslldq $4,%xmm0 | |
4158 pxor %xmm3,%xmm0 | |
4159 | |
4160 pshufd $0xff,%xmm0,%xmm3 | |
4161 pxor %xmm1,%xmm3 | |
4162 pslldq $4,%xmm1 | |
4163 pxor %xmm1,%xmm3 | |
4164 | |
4165 pxor %xmm2,%xmm0 | |
4166 pxor %xmm3,%xmm2 | |
4167 movdqu %xmm0,-16(%rax) | |
4168 | |
4169 decl %r10d | |
4170 jnz L$oop_key192 | |
4171 | |
4172 movl %esi,32(%rax) | |
4173 xorl %eax,%eax | |
4174 jmp L$enc_key_ret | |
4175 | |
4176 .p2align 4 | |
4177 L$14rounds: | |
4178 movups 16(%rdi),%xmm2 | |
4179 movl $13,%esi | |
4180 leaq 16(%rax),%rax | |
4181 cmpl $268435456,%r10d | |
4182 je L$14rounds_alt | |
4183 | |
4184 movups %xmm0,(%rdx) | |
4185 movups %xmm2,16(%rdx) | |
4186 .byte 102,15,58,223,202,1 | |
4187 call L$key_expansion_256a_cold | |
4188 .byte 102,15,58,223,200,1 | |
4189 call L$key_expansion_256b | |
4190 .byte 102,15,58,223,202,2 | |
4191 call L$key_expansion_256a | |
4192 .byte 102,15,58,223,200,2 | |
4193 call L$key_expansion_256b | |
4194 .byte 102,15,58,223,202,4 | |
4195 call L$key_expansion_256a | |
4196 .byte 102,15,58,223,200,4 | |
4197 call L$key_expansion_256b | |
4198 .byte 102,15,58,223,202,8 | |
4199 call L$key_expansion_256a | |
4200 .byte 102,15,58,223,200,8 | |
4201 call L$key_expansion_256b | |
4202 .byte 102,15,58,223,202,16 | |
4203 call L$key_expansion_256a | |
4204 .byte 102,15,58,223,200,16 | |
4205 call L$key_expansion_256b | |
4206 .byte 102,15,58,223,202,32 | |
4207 call L$key_expansion_256a | |
4208 .byte 102,15,58,223,200,32 | |
4209 call L$key_expansion_256b | |
4210 .byte 102,15,58,223,202,64 | |
4211 call L$key_expansion_256a | |
4212 movups %xmm0,(%rax) | |
4213 movl %esi,16(%rax) | |
4214 xorq %rax,%rax | |
4215 jmp L$enc_key_ret | |
4216 | |
4217 .p2align 4 | |
4218 L$14rounds_alt: | |
4219 movdqa L$key_rotate(%rip),%xmm5 | |
4220 movdqa L$key_rcon1(%rip),%xmm4 | |
4221 movl $7,%r10d | |
4222 movdqu %xmm0,0(%rdx) | |
4223 movdqa %xmm2,%xmm1 | |
4224 movdqu %xmm2,16(%rdx) | |
4225 jmp L$oop_key256 | |
4226 | |
4227 .p2align 4 | |
4228 L$oop_key256: | |
4229 .byte 102,15,56,0,213 | |
4230 .byte 102,15,56,221,212 | |
4231 | |
4232 movdqa %xmm0,%xmm3 | |
4233 pslldq $4,%xmm0 | |
4234 pxor %xmm0,%xmm3 | |
4235 pslldq $4,%xmm0 | |
4236 pxor %xmm0,%xmm3 | |
4237 pslldq $4,%xmm0 | |
4238 pxor %xmm3,%xmm0 | |
4239 pslld $1,%xmm4 | |
4240 | |
4241 pxor %xmm2,%xmm0 | |
4242 movdqu %xmm0,(%rax) | |
4243 | |
4244 decl %r10d | |
4245 jz L$done_key256 | |
4246 | |
4247 pshufd $0xff,%xmm0,%xmm2 | |
4248 pxor %xmm3,%xmm3 | |
4249 .byte 102,15,56,221,211 | |
4250 | |
4251 movdqa %xmm1,%xmm3 | |
4252 pslldq $4,%xmm1 | |
4253 pxor %xmm1,%xmm3 | |
4254 pslldq $4,%xmm1 | |
4255 pxor %xmm1,%xmm3 | |
4256 pslldq $4,%xmm1 | |
4257 pxor %xmm3,%xmm1 | |
4258 | |
4259 pxor %xmm1,%xmm2 | |
4260 movdqu %xmm2,16(%rax) | |
4261 leaq 32(%rax),%rax | |
4262 movdqa %xmm2,%xmm1 | |
4263 | |
4264 jmp L$oop_key256 | |
4265 | |
4266 L$done_key256: | |
4267 movl %esi,16(%rax) | |
4268 xorl %eax,%eax | |
4269 jmp L$enc_key_ret | |
4270 | |
4271 .p2align 4 | |
4272 L$bad_keybits: | |
4273 movq $-2,%rax | |
4274 L$enc_key_ret: | |
4275 pxor %xmm0,%xmm0 | |
4276 pxor %xmm1,%xmm1 | |
4277 pxor %xmm2,%xmm2 | |
4278 pxor %xmm3,%xmm3 | |
4279 pxor %xmm4,%xmm4 | |
4280 pxor %xmm5,%xmm5 | |
4281 addq $8,%rsp | |
4282 .byte 0xf3,0xc3 | |
4283 L$SEH_end_set_encrypt_key: | |
4284 | |
4285 .p2align 4 | |
4286 L$key_expansion_128: | |
4287 movups %xmm0,(%rax) | |
4288 leaq 16(%rax),%rax | |
4289 L$key_expansion_128_cold: | |
4290 shufps $16,%xmm0,%xmm4 | |
4291 xorps %xmm4,%xmm0 | |
4292 shufps $140,%xmm0,%xmm4 | |
4293 xorps %xmm4,%xmm0 | |
4294 shufps $255,%xmm1,%xmm1 | |
4295 xorps %xmm1,%xmm0 | |
4296 .byte 0xf3,0xc3 | |
4297 | |
4298 .p2align 4 | |
4299 L$key_expansion_192a: | |
4300 movups %xmm0,(%rax) | |
4301 leaq 16(%rax),%rax | |
4302 L$key_expansion_192a_cold: | |
4303 movaps %xmm2,%xmm5 | |
4304 L$key_expansion_192b_warm: | |
4305 shufps $16,%xmm0,%xmm4 | |
4306 movdqa %xmm2,%xmm3 | |
4307 xorps %xmm4,%xmm0 | |
4308 shufps $140,%xmm0,%xmm4 | |
4309 pslldq $4,%xmm3 | |
4310 xorps %xmm4,%xmm0 | |
4311 pshufd $85,%xmm1,%xmm1 | |
4312 pxor %xmm3,%xmm2 | |
4313 pxor %xmm1,%xmm0 | |
4314 pshufd $255,%xmm0,%xmm3 | |
4315 pxor %xmm3,%xmm2 | |
4316 .byte 0xf3,0xc3 | |
4317 | |
4318 .p2align 4 | |
4319 L$key_expansion_192b: | |
4320 movaps %xmm0,%xmm3 | |
4321 shufps $68,%xmm0,%xmm5 | |
4322 movups %xmm5,(%rax) | |
4323 shufps $78,%xmm2,%xmm3 | |
4324 movups %xmm3,16(%rax) | |
4325 leaq 32(%rax),%rax | |
4326 jmp L$key_expansion_192b_warm | |
4327 | |
4328 .p2align 4 | |
4329 L$key_expansion_256a: | |
4330 movups %xmm2,(%rax) | |
4331 leaq 16(%rax),%rax | |
4332 L$key_expansion_256a_cold: | |
4333 shufps $16,%xmm0,%xmm4 | |
4334 xorps %xmm4,%xmm0 | |
4335 shufps $140,%xmm0,%xmm4 | |
4336 xorps %xmm4,%xmm0 | |
4337 shufps $255,%xmm1,%xmm1 | |
4338 xorps %xmm1,%xmm0 | |
4339 .byte 0xf3,0xc3 | |
4340 | |
4341 .p2align 4 | |
4342 L$key_expansion_256b: | |
4343 movups %xmm0,(%rax) | |
4344 leaq 16(%rax),%rax | |
4345 | |
4346 shufps $16,%xmm2,%xmm4 | |
4347 xorps %xmm4,%xmm2 | |
4348 shufps $140,%xmm2,%xmm4 | |
4349 xorps %xmm4,%xmm2 | |
4350 shufps $170,%xmm1,%xmm1 | |
4351 xorps %xmm1,%xmm2 | |
4352 .byte 0xf3,0xc3 | |
4353 | |
4354 | |
4355 .p2align 6 | |
4356 L$bswap_mask: | |
4357 .byte 15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0 | |
4358 L$increment32: | |
4359 .long 6,6,6,0 | |
4360 L$increment64: | |
4361 .long 1,0,0,0 | |
4362 L$xts_magic: | |
4363 .long 0x87,0,1,0 | |
4364 L$increment1: | |
4365 .byte 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1 | |
4366 L$key_rotate: | |
4367 .long 0x0c0f0e0d,0x0c0f0e0d,0x0c0f0e0d,0x0c0f0e0d | |
4368 L$key_rotate192: | |
4369 .long 0x04070605,0x04070605,0x04070605,0x04070605 | |
4370 L$key_rcon1: | |
4371 .long 1,1,1,1 | |
4372 L$key_rcon1b: | |
4373 .long 0x1b,0x1b,0x1b,0x1b | |
4374 | |
4375 .byte 65,69,83,32,102,111,114,32,73,110,116,101,108,32,65,69,83,45,78,73,44,32
,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101
,110,115,115,108,46,111,114,103,62,0 | |
4376 .p2align 6 | |
4377 #endif | |
OLD | NEW |