OLD | NEW |
| (Empty) |
1 #if defined(__x86_64__) | |
2 .text | |
3 | |
4 .globl _aesni_encrypt | |
5 .private_extern _aesni_encrypt | |
6 | |
7 .p2align 4 | |
8 _aesni_encrypt: | |
9 movups (%rdi),%xmm2 | |
10 movl 240(%rdx),%eax | |
11 movups (%rdx),%xmm0 | |
12 movups 16(%rdx),%xmm1 | |
13 leaq 32(%rdx),%rdx | |
14 xorps %xmm0,%xmm2 | |
15 L$oop_enc1_1: | |
16 .byte 102,15,56,220,209 | |
17 decl %eax | |
18 movups (%rdx),%xmm1 | |
19 leaq 16(%rdx),%rdx | |
20 jnz L$oop_enc1_1 | |
21 .byte 102,15,56,221,209 | |
22 pxor %xmm0,%xmm0 | |
23 pxor %xmm1,%xmm1 | |
24 movups %xmm2,(%rsi) | |
25 pxor %xmm2,%xmm2 | |
26 .byte 0xf3,0xc3 | |
27 | |
28 | |
29 .globl _aesni_decrypt | |
30 .private_extern _aesni_decrypt | |
31 | |
32 .p2align 4 | |
33 _aesni_decrypt: | |
34 movups (%rdi),%xmm2 | |
35 movl 240(%rdx),%eax | |
36 movups (%rdx),%xmm0 | |
37 movups 16(%rdx),%xmm1 | |
38 leaq 32(%rdx),%rdx | |
39 xorps %xmm0,%xmm2 | |
40 L$oop_dec1_2: | |
41 .byte 102,15,56,222,209 | |
42 decl %eax | |
43 movups (%rdx),%xmm1 | |
44 leaq 16(%rdx),%rdx | |
45 jnz L$oop_dec1_2 | |
46 .byte 102,15,56,223,209 | |
47 pxor %xmm0,%xmm0 | |
48 pxor %xmm1,%xmm1 | |
49 movups %xmm2,(%rsi) | |
50 pxor %xmm2,%xmm2 | |
51 .byte 0xf3,0xc3 | |
52 | |
53 | |
54 .p2align 4 | |
55 _aesni_encrypt2: | |
56 movups (%rcx),%xmm0 | |
57 shll $4,%eax | |
58 movups 16(%rcx),%xmm1 | |
59 xorps %xmm0,%xmm2 | |
60 xorps %xmm0,%xmm3 | |
61 movups 32(%rcx),%xmm0 | |
62 leaq 32(%rcx,%rax,1),%rcx | |
63 negq %rax | |
64 addq $16,%rax | |
65 | |
66 L$enc_loop2: | |
67 .byte 102,15,56,220,209 | |
68 .byte 102,15,56,220,217 | |
69 movups (%rcx,%rax,1),%xmm1 | |
70 addq $32,%rax | |
71 .byte 102,15,56,220,208 | |
72 .byte 102,15,56,220,216 | |
73 movups -16(%rcx,%rax,1),%xmm0 | |
74 jnz L$enc_loop2 | |
75 | |
76 .byte 102,15,56,220,209 | |
77 .byte 102,15,56,220,217 | |
78 .byte 102,15,56,221,208 | |
79 .byte 102,15,56,221,216 | |
80 .byte 0xf3,0xc3 | |
81 | |
82 | |
83 .p2align 4 | |
84 _aesni_decrypt2: | |
85 movups (%rcx),%xmm0 | |
86 shll $4,%eax | |
87 movups 16(%rcx),%xmm1 | |
88 xorps %xmm0,%xmm2 | |
89 xorps %xmm0,%xmm3 | |
90 movups 32(%rcx),%xmm0 | |
91 leaq 32(%rcx,%rax,1),%rcx | |
92 negq %rax | |
93 addq $16,%rax | |
94 | |
95 L$dec_loop2: | |
96 .byte 102,15,56,222,209 | |
97 .byte 102,15,56,222,217 | |
98 movups (%rcx,%rax,1),%xmm1 | |
99 addq $32,%rax | |
100 .byte 102,15,56,222,208 | |
101 .byte 102,15,56,222,216 | |
102 movups -16(%rcx,%rax,1),%xmm0 | |
103 jnz L$dec_loop2 | |
104 | |
105 .byte 102,15,56,222,209 | |
106 .byte 102,15,56,222,217 | |
107 .byte 102,15,56,223,208 | |
108 .byte 102,15,56,223,216 | |
109 .byte 0xf3,0xc3 | |
110 | |
111 | |
112 .p2align 4 | |
113 _aesni_encrypt3: | |
114 movups (%rcx),%xmm0 | |
115 shll $4,%eax | |
116 movups 16(%rcx),%xmm1 | |
117 xorps %xmm0,%xmm2 | |
118 xorps %xmm0,%xmm3 | |
119 xorps %xmm0,%xmm4 | |
120 movups 32(%rcx),%xmm0 | |
121 leaq 32(%rcx,%rax,1),%rcx | |
122 negq %rax | |
123 addq $16,%rax | |
124 | |
125 L$enc_loop3: | |
126 .byte 102,15,56,220,209 | |
127 .byte 102,15,56,220,217 | |
128 .byte 102,15,56,220,225 | |
129 movups (%rcx,%rax,1),%xmm1 | |
130 addq $32,%rax | |
131 .byte 102,15,56,220,208 | |
132 .byte 102,15,56,220,216 | |
133 .byte 102,15,56,220,224 | |
134 movups -16(%rcx,%rax,1),%xmm0 | |
135 jnz L$enc_loop3 | |
136 | |
137 .byte 102,15,56,220,209 | |
138 .byte 102,15,56,220,217 | |
139 .byte 102,15,56,220,225 | |
140 .byte 102,15,56,221,208 | |
141 .byte 102,15,56,221,216 | |
142 .byte 102,15,56,221,224 | |
143 .byte 0xf3,0xc3 | |
144 | |
145 | |
146 .p2align 4 | |
147 _aesni_decrypt3: | |
148 movups (%rcx),%xmm0 | |
149 shll $4,%eax | |
150 movups 16(%rcx),%xmm1 | |
151 xorps %xmm0,%xmm2 | |
152 xorps %xmm0,%xmm3 | |
153 xorps %xmm0,%xmm4 | |
154 movups 32(%rcx),%xmm0 | |
155 leaq 32(%rcx,%rax,1),%rcx | |
156 negq %rax | |
157 addq $16,%rax | |
158 | |
159 L$dec_loop3: | |
160 .byte 102,15,56,222,209 | |
161 .byte 102,15,56,222,217 | |
162 .byte 102,15,56,222,225 | |
163 movups (%rcx,%rax,1),%xmm1 | |
164 addq $32,%rax | |
165 .byte 102,15,56,222,208 | |
166 .byte 102,15,56,222,216 | |
167 .byte 102,15,56,222,224 | |
168 movups -16(%rcx,%rax,1),%xmm0 | |
169 jnz L$dec_loop3 | |
170 | |
171 .byte 102,15,56,222,209 | |
172 .byte 102,15,56,222,217 | |
173 .byte 102,15,56,222,225 | |
174 .byte 102,15,56,223,208 | |
175 .byte 102,15,56,223,216 | |
176 .byte 102,15,56,223,224 | |
177 .byte 0xf3,0xc3 | |
178 | |
179 | |
180 .p2align 4 | |
181 _aesni_encrypt4: | |
182 movups (%rcx),%xmm0 | |
183 shll $4,%eax | |
184 movups 16(%rcx),%xmm1 | |
185 xorps %xmm0,%xmm2 | |
186 xorps %xmm0,%xmm3 | |
187 xorps %xmm0,%xmm4 | |
188 xorps %xmm0,%xmm5 | |
189 movups 32(%rcx),%xmm0 | |
190 leaq 32(%rcx,%rax,1),%rcx | |
191 negq %rax | |
192 .byte 0x0f,0x1f,0x00 | |
193 addq $16,%rax | |
194 | |
195 L$enc_loop4: | |
196 .byte 102,15,56,220,209 | |
197 .byte 102,15,56,220,217 | |
198 .byte 102,15,56,220,225 | |
199 .byte 102,15,56,220,233 | |
200 movups (%rcx,%rax,1),%xmm1 | |
201 addq $32,%rax | |
202 .byte 102,15,56,220,208 | |
203 .byte 102,15,56,220,216 | |
204 .byte 102,15,56,220,224 | |
205 .byte 102,15,56,220,232 | |
206 movups -16(%rcx,%rax,1),%xmm0 | |
207 jnz L$enc_loop4 | |
208 | |
209 .byte 102,15,56,220,209 | |
210 .byte 102,15,56,220,217 | |
211 .byte 102,15,56,220,225 | |
212 .byte 102,15,56,220,233 | |
213 .byte 102,15,56,221,208 | |
214 .byte 102,15,56,221,216 | |
215 .byte 102,15,56,221,224 | |
216 .byte 102,15,56,221,232 | |
217 .byte 0xf3,0xc3 | |
218 | |
219 | |
220 .p2align 4 | |
221 _aesni_decrypt4: | |
222 movups (%rcx),%xmm0 | |
223 shll $4,%eax | |
224 movups 16(%rcx),%xmm1 | |
225 xorps %xmm0,%xmm2 | |
226 xorps %xmm0,%xmm3 | |
227 xorps %xmm0,%xmm4 | |
228 xorps %xmm0,%xmm5 | |
229 movups 32(%rcx),%xmm0 | |
230 leaq 32(%rcx,%rax,1),%rcx | |
231 negq %rax | |
232 .byte 0x0f,0x1f,0x00 | |
233 addq $16,%rax | |
234 | |
235 L$dec_loop4: | |
236 .byte 102,15,56,222,209 | |
237 .byte 102,15,56,222,217 | |
238 .byte 102,15,56,222,225 | |
239 .byte 102,15,56,222,233 | |
240 movups (%rcx,%rax,1),%xmm1 | |
241 addq $32,%rax | |
242 .byte 102,15,56,222,208 | |
243 .byte 102,15,56,222,216 | |
244 .byte 102,15,56,222,224 | |
245 .byte 102,15,56,222,232 | |
246 movups -16(%rcx,%rax,1),%xmm0 | |
247 jnz L$dec_loop4 | |
248 | |
249 .byte 102,15,56,222,209 | |
250 .byte 102,15,56,222,217 | |
251 .byte 102,15,56,222,225 | |
252 .byte 102,15,56,222,233 | |
253 .byte 102,15,56,223,208 | |
254 .byte 102,15,56,223,216 | |
255 .byte 102,15,56,223,224 | |
256 .byte 102,15,56,223,232 | |
257 .byte 0xf3,0xc3 | |
258 | |
259 | |
260 .p2align 4 | |
261 _aesni_encrypt6: | |
262 movups (%rcx),%xmm0 | |
263 shll $4,%eax | |
264 movups 16(%rcx),%xmm1 | |
265 xorps %xmm0,%xmm2 | |
266 pxor %xmm0,%xmm3 | |
267 pxor %xmm0,%xmm4 | |
268 .byte 102,15,56,220,209 | |
269 leaq 32(%rcx,%rax,1),%rcx | |
270 negq %rax | |
271 .byte 102,15,56,220,217 | |
272 pxor %xmm0,%xmm5 | |
273 pxor %xmm0,%xmm6 | |
274 .byte 102,15,56,220,225 | |
275 pxor %xmm0,%xmm7 | |
276 movups (%rcx,%rax,1),%xmm0 | |
277 addq $16,%rax | |
278 jmp L$enc_loop6_enter | |
279 .p2align 4 | |
280 L$enc_loop6: | |
281 .byte 102,15,56,220,209 | |
282 .byte 102,15,56,220,217 | |
283 .byte 102,15,56,220,225 | |
284 L$enc_loop6_enter: | |
285 .byte 102,15,56,220,233 | |
286 .byte 102,15,56,220,241 | |
287 .byte 102,15,56,220,249 | |
288 movups (%rcx,%rax,1),%xmm1 | |
289 addq $32,%rax | |
290 .byte 102,15,56,220,208 | |
291 .byte 102,15,56,220,216 | |
292 .byte 102,15,56,220,224 | |
293 .byte 102,15,56,220,232 | |
294 .byte 102,15,56,220,240 | |
295 .byte 102,15,56,220,248 | |
296 movups -16(%rcx,%rax,1),%xmm0 | |
297 jnz L$enc_loop6 | |
298 | |
299 .byte 102,15,56,220,209 | |
300 .byte 102,15,56,220,217 | |
301 .byte 102,15,56,220,225 | |
302 .byte 102,15,56,220,233 | |
303 .byte 102,15,56,220,241 | |
304 .byte 102,15,56,220,249 | |
305 .byte 102,15,56,221,208 | |
306 .byte 102,15,56,221,216 | |
307 .byte 102,15,56,221,224 | |
308 .byte 102,15,56,221,232 | |
309 .byte 102,15,56,221,240 | |
310 .byte 102,15,56,221,248 | |
311 .byte 0xf3,0xc3 | |
312 | |
313 | |
314 .p2align 4 | |
315 _aesni_decrypt6: | |
316 movups (%rcx),%xmm0 | |
317 shll $4,%eax | |
318 movups 16(%rcx),%xmm1 | |
319 xorps %xmm0,%xmm2 | |
320 pxor %xmm0,%xmm3 | |
321 pxor %xmm0,%xmm4 | |
322 .byte 102,15,56,222,209 | |
323 leaq 32(%rcx,%rax,1),%rcx | |
324 negq %rax | |
325 .byte 102,15,56,222,217 | |
326 pxor %xmm0,%xmm5 | |
327 pxor %xmm0,%xmm6 | |
328 .byte 102,15,56,222,225 | |
329 pxor %xmm0,%xmm7 | |
330 movups (%rcx,%rax,1),%xmm0 | |
331 addq $16,%rax | |
332 jmp L$dec_loop6_enter | |
333 .p2align 4 | |
334 L$dec_loop6: | |
335 .byte 102,15,56,222,209 | |
336 .byte 102,15,56,222,217 | |
337 .byte 102,15,56,222,225 | |
338 L$dec_loop6_enter: | |
339 .byte 102,15,56,222,233 | |
340 .byte 102,15,56,222,241 | |
341 .byte 102,15,56,222,249 | |
342 movups (%rcx,%rax,1),%xmm1 | |
343 addq $32,%rax | |
344 .byte 102,15,56,222,208 | |
345 .byte 102,15,56,222,216 | |
346 .byte 102,15,56,222,224 | |
347 .byte 102,15,56,222,232 | |
348 .byte 102,15,56,222,240 | |
349 .byte 102,15,56,222,248 | |
350 movups -16(%rcx,%rax,1),%xmm0 | |
351 jnz L$dec_loop6 | |
352 | |
353 .byte 102,15,56,222,209 | |
354 .byte 102,15,56,222,217 | |
355 .byte 102,15,56,222,225 | |
356 .byte 102,15,56,222,233 | |
357 .byte 102,15,56,222,241 | |
358 .byte 102,15,56,222,249 | |
359 .byte 102,15,56,223,208 | |
360 .byte 102,15,56,223,216 | |
361 .byte 102,15,56,223,224 | |
362 .byte 102,15,56,223,232 | |
363 .byte 102,15,56,223,240 | |
364 .byte 102,15,56,223,248 | |
365 .byte 0xf3,0xc3 | |
366 | |
367 | |
368 .p2align 4 | |
369 _aesni_encrypt8: | |
370 movups (%rcx),%xmm0 | |
371 shll $4,%eax | |
372 movups 16(%rcx),%xmm1 | |
373 xorps %xmm0,%xmm2 | |
374 xorps %xmm0,%xmm3 | |
375 pxor %xmm0,%xmm4 | |
376 pxor %xmm0,%xmm5 | |
377 pxor %xmm0,%xmm6 | |
378 leaq 32(%rcx,%rax,1),%rcx | |
379 negq %rax | |
380 .byte 102,15,56,220,209 | |
381 pxor %xmm0,%xmm7 | |
382 pxor %xmm0,%xmm8 | |
383 .byte 102,15,56,220,217 | |
384 pxor %xmm0,%xmm9 | |
385 movups (%rcx,%rax,1),%xmm0 | |
386 addq $16,%rax | |
387 jmp L$enc_loop8_inner | |
388 .p2align 4 | |
389 L$enc_loop8: | |
390 .byte 102,15,56,220,209 | |
391 .byte 102,15,56,220,217 | |
392 L$enc_loop8_inner: | |
393 .byte 102,15,56,220,225 | |
394 .byte 102,15,56,220,233 | |
395 .byte 102,15,56,220,241 | |
396 .byte 102,15,56,220,249 | |
397 .byte 102,68,15,56,220,193 | |
398 .byte 102,68,15,56,220,201 | |
399 L$enc_loop8_enter: | |
400 movups (%rcx,%rax,1),%xmm1 | |
401 addq $32,%rax | |
402 .byte 102,15,56,220,208 | |
403 .byte 102,15,56,220,216 | |
404 .byte 102,15,56,220,224 | |
405 .byte 102,15,56,220,232 | |
406 .byte 102,15,56,220,240 | |
407 .byte 102,15,56,220,248 | |
408 .byte 102,68,15,56,220,192 | |
409 .byte 102,68,15,56,220,200 | |
410 movups -16(%rcx,%rax,1),%xmm0 | |
411 jnz L$enc_loop8 | |
412 | |
413 .byte 102,15,56,220,209 | |
414 .byte 102,15,56,220,217 | |
415 .byte 102,15,56,220,225 | |
416 .byte 102,15,56,220,233 | |
417 .byte 102,15,56,220,241 | |
418 .byte 102,15,56,220,249 | |
419 .byte 102,68,15,56,220,193 | |
420 .byte 102,68,15,56,220,201 | |
421 .byte 102,15,56,221,208 | |
422 .byte 102,15,56,221,216 | |
423 .byte 102,15,56,221,224 | |
424 .byte 102,15,56,221,232 | |
425 .byte 102,15,56,221,240 | |
426 .byte 102,15,56,221,248 | |
427 .byte 102,68,15,56,221,192 | |
428 .byte 102,68,15,56,221,200 | |
429 .byte 0xf3,0xc3 | |
430 | |
431 | |
432 .p2align 4 | |
433 _aesni_decrypt8: | |
434 movups (%rcx),%xmm0 | |
435 shll $4,%eax | |
436 movups 16(%rcx),%xmm1 | |
437 xorps %xmm0,%xmm2 | |
438 xorps %xmm0,%xmm3 | |
439 pxor %xmm0,%xmm4 | |
440 pxor %xmm0,%xmm5 | |
441 pxor %xmm0,%xmm6 | |
442 leaq 32(%rcx,%rax,1),%rcx | |
443 negq %rax | |
444 .byte 102,15,56,222,209 | |
445 pxor %xmm0,%xmm7 | |
446 pxor %xmm0,%xmm8 | |
447 .byte 102,15,56,222,217 | |
448 pxor %xmm0,%xmm9 | |
449 movups (%rcx,%rax,1),%xmm0 | |
450 addq $16,%rax | |
451 jmp L$dec_loop8_inner | |
452 .p2align 4 | |
453 L$dec_loop8: | |
454 .byte 102,15,56,222,209 | |
455 .byte 102,15,56,222,217 | |
456 L$dec_loop8_inner: | |
457 .byte 102,15,56,222,225 | |
458 .byte 102,15,56,222,233 | |
459 .byte 102,15,56,222,241 | |
460 .byte 102,15,56,222,249 | |
461 .byte 102,68,15,56,222,193 | |
462 .byte 102,68,15,56,222,201 | |
463 L$dec_loop8_enter: | |
464 movups (%rcx,%rax,1),%xmm1 | |
465 addq $32,%rax | |
466 .byte 102,15,56,222,208 | |
467 .byte 102,15,56,222,216 | |
468 .byte 102,15,56,222,224 | |
469 .byte 102,15,56,222,232 | |
470 .byte 102,15,56,222,240 | |
471 .byte 102,15,56,222,248 | |
472 .byte 102,68,15,56,222,192 | |
473 .byte 102,68,15,56,222,200 | |
474 movups -16(%rcx,%rax,1),%xmm0 | |
475 jnz L$dec_loop8 | |
476 | |
477 .byte 102,15,56,222,209 | |
478 .byte 102,15,56,222,217 | |
479 .byte 102,15,56,222,225 | |
480 .byte 102,15,56,222,233 | |
481 .byte 102,15,56,222,241 | |
482 .byte 102,15,56,222,249 | |
483 .byte 102,68,15,56,222,193 | |
484 .byte 102,68,15,56,222,201 | |
485 .byte 102,15,56,223,208 | |
486 .byte 102,15,56,223,216 | |
487 .byte 102,15,56,223,224 | |
488 .byte 102,15,56,223,232 | |
489 .byte 102,15,56,223,240 | |
490 .byte 102,15,56,223,248 | |
491 .byte 102,68,15,56,223,192 | |
492 .byte 102,68,15,56,223,200 | |
493 .byte 0xf3,0xc3 | |
494 | |
495 .globl _aesni_ecb_encrypt | |
496 .private_extern _aesni_ecb_encrypt | |
497 | |
498 .p2align 4 | |
499 _aesni_ecb_encrypt: | |
500 andq $-16,%rdx | |
501 jz L$ecb_ret | |
502 | |
503 movl 240(%rcx),%eax | |
504 movups (%rcx),%xmm0 | |
505 movq %rcx,%r11 | |
506 movl %eax,%r10d | |
507 testl %r8d,%r8d | |
508 jz L$ecb_decrypt | |
509 | |
510 cmpq $0x80,%rdx | |
511 jb L$ecb_enc_tail | |
512 | |
513 movdqu (%rdi),%xmm2 | |
514 movdqu 16(%rdi),%xmm3 | |
515 movdqu 32(%rdi),%xmm4 | |
516 movdqu 48(%rdi),%xmm5 | |
517 movdqu 64(%rdi),%xmm6 | |
518 movdqu 80(%rdi),%xmm7 | |
519 movdqu 96(%rdi),%xmm8 | |
520 movdqu 112(%rdi),%xmm9 | |
521 leaq 128(%rdi),%rdi | |
522 subq $0x80,%rdx | |
523 jmp L$ecb_enc_loop8_enter | |
524 .p2align 4 | |
525 L$ecb_enc_loop8: | |
526 movups %xmm2,(%rsi) | |
527 movq %r11,%rcx | |
528 movdqu (%rdi),%xmm2 | |
529 movl %r10d,%eax | |
530 movups %xmm3,16(%rsi) | |
531 movdqu 16(%rdi),%xmm3 | |
532 movups %xmm4,32(%rsi) | |
533 movdqu 32(%rdi),%xmm4 | |
534 movups %xmm5,48(%rsi) | |
535 movdqu 48(%rdi),%xmm5 | |
536 movups %xmm6,64(%rsi) | |
537 movdqu 64(%rdi),%xmm6 | |
538 movups %xmm7,80(%rsi) | |
539 movdqu 80(%rdi),%xmm7 | |
540 movups %xmm8,96(%rsi) | |
541 movdqu 96(%rdi),%xmm8 | |
542 movups %xmm9,112(%rsi) | |
543 leaq 128(%rsi),%rsi | |
544 movdqu 112(%rdi),%xmm9 | |
545 leaq 128(%rdi),%rdi | |
546 L$ecb_enc_loop8_enter: | |
547 | |
548 call _aesni_encrypt8 | |
549 | |
550 subq $0x80,%rdx | |
551 jnc L$ecb_enc_loop8 | |
552 | |
553 movups %xmm2,(%rsi) | |
554 movq %r11,%rcx | |
555 movups %xmm3,16(%rsi) | |
556 movl %r10d,%eax | |
557 movups %xmm4,32(%rsi) | |
558 movups %xmm5,48(%rsi) | |
559 movups %xmm6,64(%rsi) | |
560 movups %xmm7,80(%rsi) | |
561 movups %xmm8,96(%rsi) | |
562 movups %xmm9,112(%rsi) | |
563 leaq 128(%rsi),%rsi | |
564 addq $0x80,%rdx | |
565 jz L$ecb_ret | |
566 | |
567 L$ecb_enc_tail: | |
568 movups (%rdi),%xmm2 | |
569 cmpq $0x20,%rdx | |
570 jb L$ecb_enc_one | |
571 movups 16(%rdi),%xmm3 | |
572 je L$ecb_enc_two | |
573 movups 32(%rdi),%xmm4 | |
574 cmpq $0x40,%rdx | |
575 jb L$ecb_enc_three | |
576 movups 48(%rdi),%xmm5 | |
577 je L$ecb_enc_four | |
578 movups 64(%rdi),%xmm6 | |
579 cmpq $0x60,%rdx | |
580 jb L$ecb_enc_five | |
581 movups 80(%rdi),%xmm7 | |
582 je L$ecb_enc_six | |
583 movdqu 96(%rdi),%xmm8 | |
584 xorps %xmm9,%xmm9 | |
585 call _aesni_encrypt8 | |
586 movups %xmm2,(%rsi) | |
587 movups %xmm3,16(%rsi) | |
588 movups %xmm4,32(%rsi) | |
589 movups %xmm5,48(%rsi) | |
590 movups %xmm6,64(%rsi) | |
591 movups %xmm7,80(%rsi) | |
592 movups %xmm8,96(%rsi) | |
593 jmp L$ecb_ret | |
594 .p2align 4 | |
595 L$ecb_enc_one: | |
596 movups (%rcx),%xmm0 | |
597 movups 16(%rcx),%xmm1 | |
598 leaq 32(%rcx),%rcx | |
599 xorps %xmm0,%xmm2 | |
600 L$oop_enc1_3: | |
601 .byte 102,15,56,220,209 | |
602 decl %eax | |
603 movups (%rcx),%xmm1 | |
604 leaq 16(%rcx),%rcx | |
605 jnz L$oop_enc1_3 | |
606 .byte 102,15,56,221,209 | |
607 movups %xmm2,(%rsi) | |
608 jmp L$ecb_ret | |
609 .p2align 4 | |
610 L$ecb_enc_two: | |
611 call _aesni_encrypt2 | |
612 movups %xmm2,(%rsi) | |
613 movups %xmm3,16(%rsi) | |
614 jmp L$ecb_ret | |
615 .p2align 4 | |
616 L$ecb_enc_three: | |
617 call _aesni_encrypt3 | |
618 movups %xmm2,(%rsi) | |
619 movups %xmm3,16(%rsi) | |
620 movups %xmm4,32(%rsi) | |
621 jmp L$ecb_ret | |
622 .p2align 4 | |
623 L$ecb_enc_four: | |
624 call _aesni_encrypt4 | |
625 movups %xmm2,(%rsi) | |
626 movups %xmm3,16(%rsi) | |
627 movups %xmm4,32(%rsi) | |
628 movups %xmm5,48(%rsi) | |
629 jmp L$ecb_ret | |
630 .p2align 4 | |
631 L$ecb_enc_five: | |
632 xorps %xmm7,%xmm7 | |
633 call _aesni_encrypt6 | |
634 movups %xmm2,(%rsi) | |
635 movups %xmm3,16(%rsi) | |
636 movups %xmm4,32(%rsi) | |
637 movups %xmm5,48(%rsi) | |
638 movups %xmm6,64(%rsi) | |
639 jmp L$ecb_ret | |
640 .p2align 4 | |
641 L$ecb_enc_six: | |
642 call _aesni_encrypt6 | |
643 movups %xmm2,(%rsi) | |
644 movups %xmm3,16(%rsi) | |
645 movups %xmm4,32(%rsi) | |
646 movups %xmm5,48(%rsi) | |
647 movups %xmm6,64(%rsi) | |
648 movups %xmm7,80(%rsi) | |
649 jmp L$ecb_ret | |
650 | |
651 .p2align 4 | |
652 L$ecb_decrypt: | |
653 cmpq $0x80,%rdx | |
654 jb L$ecb_dec_tail | |
655 | |
656 movdqu (%rdi),%xmm2 | |
657 movdqu 16(%rdi),%xmm3 | |
658 movdqu 32(%rdi),%xmm4 | |
659 movdqu 48(%rdi),%xmm5 | |
660 movdqu 64(%rdi),%xmm6 | |
661 movdqu 80(%rdi),%xmm7 | |
662 movdqu 96(%rdi),%xmm8 | |
663 movdqu 112(%rdi),%xmm9 | |
664 leaq 128(%rdi),%rdi | |
665 subq $0x80,%rdx | |
666 jmp L$ecb_dec_loop8_enter | |
667 .p2align 4 | |
668 L$ecb_dec_loop8: | |
669 movups %xmm2,(%rsi) | |
670 movq %r11,%rcx | |
671 movdqu (%rdi),%xmm2 | |
672 movl %r10d,%eax | |
673 movups %xmm3,16(%rsi) | |
674 movdqu 16(%rdi),%xmm3 | |
675 movups %xmm4,32(%rsi) | |
676 movdqu 32(%rdi),%xmm4 | |
677 movups %xmm5,48(%rsi) | |
678 movdqu 48(%rdi),%xmm5 | |
679 movups %xmm6,64(%rsi) | |
680 movdqu 64(%rdi),%xmm6 | |
681 movups %xmm7,80(%rsi) | |
682 movdqu 80(%rdi),%xmm7 | |
683 movups %xmm8,96(%rsi) | |
684 movdqu 96(%rdi),%xmm8 | |
685 movups %xmm9,112(%rsi) | |
686 leaq 128(%rsi),%rsi | |
687 movdqu 112(%rdi),%xmm9 | |
688 leaq 128(%rdi),%rdi | |
689 L$ecb_dec_loop8_enter: | |
690 | |
691 call _aesni_decrypt8 | |
692 | |
693 movups (%r11),%xmm0 | |
694 subq $0x80,%rdx | |
695 jnc L$ecb_dec_loop8 | |
696 | |
697 movups %xmm2,(%rsi) | |
698 pxor %xmm2,%xmm2 | |
699 movq %r11,%rcx | |
700 movups %xmm3,16(%rsi) | |
701 pxor %xmm3,%xmm3 | |
702 movl %r10d,%eax | |
703 movups %xmm4,32(%rsi) | |
704 pxor %xmm4,%xmm4 | |
705 movups %xmm5,48(%rsi) | |
706 pxor %xmm5,%xmm5 | |
707 movups %xmm6,64(%rsi) | |
708 pxor %xmm6,%xmm6 | |
709 movups %xmm7,80(%rsi) | |
710 pxor %xmm7,%xmm7 | |
711 movups %xmm8,96(%rsi) | |
712 pxor %xmm8,%xmm8 | |
713 movups %xmm9,112(%rsi) | |
714 pxor %xmm9,%xmm9 | |
715 leaq 128(%rsi),%rsi | |
716 addq $0x80,%rdx | |
717 jz L$ecb_ret | |
718 | |
719 L$ecb_dec_tail: | |
720 movups (%rdi),%xmm2 | |
721 cmpq $0x20,%rdx | |
722 jb L$ecb_dec_one | |
723 movups 16(%rdi),%xmm3 | |
724 je L$ecb_dec_two | |
725 movups 32(%rdi),%xmm4 | |
726 cmpq $0x40,%rdx | |
727 jb L$ecb_dec_three | |
728 movups 48(%rdi),%xmm5 | |
729 je L$ecb_dec_four | |
730 movups 64(%rdi),%xmm6 | |
731 cmpq $0x60,%rdx | |
732 jb L$ecb_dec_five | |
733 movups 80(%rdi),%xmm7 | |
734 je L$ecb_dec_six | |
735 movups 96(%rdi),%xmm8 | |
736 movups (%rcx),%xmm0 | |
737 xorps %xmm9,%xmm9 | |
738 call _aesni_decrypt8 | |
739 movups %xmm2,(%rsi) | |
740 pxor %xmm2,%xmm2 | |
741 movups %xmm3,16(%rsi) | |
742 pxor %xmm3,%xmm3 | |
743 movups %xmm4,32(%rsi) | |
744 pxor %xmm4,%xmm4 | |
745 movups %xmm5,48(%rsi) | |
746 pxor %xmm5,%xmm5 | |
747 movups %xmm6,64(%rsi) | |
748 pxor %xmm6,%xmm6 | |
749 movups %xmm7,80(%rsi) | |
750 pxor %xmm7,%xmm7 | |
751 movups %xmm8,96(%rsi) | |
752 pxor %xmm8,%xmm8 | |
753 pxor %xmm9,%xmm9 | |
754 jmp L$ecb_ret | |
755 .p2align 4 | |
756 L$ecb_dec_one: | |
757 movups (%rcx),%xmm0 | |
758 movups 16(%rcx),%xmm1 | |
759 leaq 32(%rcx),%rcx | |
760 xorps %xmm0,%xmm2 | |
761 L$oop_dec1_4: | |
762 .byte 102,15,56,222,209 | |
763 decl %eax | |
764 movups (%rcx),%xmm1 | |
765 leaq 16(%rcx),%rcx | |
766 jnz L$oop_dec1_4 | |
767 .byte 102,15,56,223,209 | |
768 movups %xmm2,(%rsi) | |
769 pxor %xmm2,%xmm2 | |
770 jmp L$ecb_ret | |
771 .p2align 4 | |
772 L$ecb_dec_two: | |
773 call _aesni_decrypt2 | |
774 movups %xmm2,(%rsi) | |
775 pxor %xmm2,%xmm2 | |
776 movups %xmm3,16(%rsi) | |
777 pxor %xmm3,%xmm3 | |
778 jmp L$ecb_ret | |
779 .p2align 4 | |
780 L$ecb_dec_three: | |
781 call _aesni_decrypt3 | |
782 movups %xmm2,(%rsi) | |
783 pxor %xmm2,%xmm2 | |
784 movups %xmm3,16(%rsi) | |
785 pxor %xmm3,%xmm3 | |
786 movups %xmm4,32(%rsi) | |
787 pxor %xmm4,%xmm4 | |
788 jmp L$ecb_ret | |
789 .p2align 4 | |
790 L$ecb_dec_four: | |
791 call _aesni_decrypt4 | |
792 movups %xmm2,(%rsi) | |
793 pxor %xmm2,%xmm2 | |
794 movups %xmm3,16(%rsi) | |
795 pxor %xmm3,%xmm3 | |
796 movups %xmm4,32(%rsi) | |
797 pxor %xmm4,%xmm4 | |
798 movups %xmm5,48(%rsi) | |
799 pxor %xmm5,%xmm5 | |
800 jmp L$ecb_ret | |
801 .p2align 4 | |
802 L$ecb_dec_five: | |
803 xorps %xmm7,%xmm7 | |
804 call _aesni_decrypt6 | |
805 movups %xmm2,(%rsi) | |
806 pxor %xmm2,%xmm2 | |
807 movups %xmm3,16(%rsi) | |
808 pxor %xmm3,%xmm3 | |
809 movups %xmm4,32(%rsi) | |
810 pxor %xmm4,%xmm4 | |
811 movups %xmm5,48(%rsi) | |
812 pxor %xmm5,%xmm5 | |
813 movups %xmm6,64(%rsi) | |
814 pxor %xmm6,%xmm6 | |
815 pxor %xmm7,%xmm7 | |
816 jmp L$ecb_ret | |
817 .p2align 4 | |
818 L$ecb_dec_six: | |
819 call _aesni_decrypt6 | |
820 movups %xmm2,(%rsi) | |
821 pxor %xmm2,%xmm2 | |
822 movups %xmm3,16(%rsi) | |
823 pxor %xmm3,%xmm3 | |
824 movups %xmm4,32(%rsi) | |
825 pxor %xmm4,%xmm4 | |
826 movups %xmm5,48(%rsi) | |
827 pxor %xmm5,%xmm5 | |
828 movups %xmm6,64(%rsi) | |
829 pxor %xmm6,%xmm6 | |
830 movups %xmm7,80(%rsi) | |
831 pxor %xmm7,%xmm7 | |
832 | |
833 L$ecb_ret: | |
834 xorps %xmm0,%xmm0 | |
835 pxor %xmm1,%xmm1 | |
836 .byte 0xf3,0xc3 | |
837 | |
838 .globl _aesni_ccm64_encrypt_blocks | |
839 .private_extern _aesni_ccm64_encrypt_blocks | |
840 | |
841 .p2align 4 | |
842 _aesni_ccm64_encrypt_blocks: | |
843 movl 240(%rcx),%eax | |
844 movdqu (%r8),%xmm6 | |
845 movdqa L$increment64(%rip),%xmm9 | |
846 movdqa L$bswap_mask(%rip),%xmm7 | |
847 | |
848 shll $4,%eax | |
849 movl $16,%r10d | |
850 leaq 0(%rcx),%r11 | |
851 movdqu (%r9),%xmm3 | |
852 movdqa %xmm6,%xmm2 | |
853 leaq 32(%rcx,%rax,1),%rcx | |
854 .byte 102,15,56,0,247 | |
855 subq %rax,%r10 | |
856 jmp L$ccm64_enc_outer | |
857 .p2align 4 | |
858 L$ccm64_enc_outer: | |
859 movups (%r11),%xmm0 | |
860 movq %r10,%rax | |
861 movups (%rdi),%xmm8 | |
862 | |
863 xorps %xmm0,%xmm2 | |
864 movups 16(%r11),%xmm1 | |
865 xorps %xmm8,%xmm0 | |
866 xorps %xmm0,%xmm3 | |
867 movups 32(%r11),%xmm0 | |
868 | |
869 L$ccm64_enc2_loop: | |
870 .byte 102,15,56,220,209 | |
871 .byte 102,15,56,220,217 | |
872 movups (%rcx,%rax,1),%xmm1 | |
873 addq $32,%rax | |
874 .byte 102,15,56,220,208 | |
875 .byte 102,15,56,220,216 | |
876 movups -16(%rcx,%rax,1),%xmm0 | |
877 jnz L$ccm64_enc2_loop | |
878 .byte 102,15,56,220,209 | |
879 .byte 102,15,56,220,217 | |
880 paddq %xmm9,%xmm6 | |
881 decq %rdx | |
882 .byte 102,15,56,221,208 | |
883 .byte 102,15,56,221,216 | |
884 | |
885 leaq 16(%rdi),%rdi | |
886 xorps %xmm2,%xmm8 | |
887 movdqa %xmm6,%xmm2 | |
888 movups %xmm8,(%rsi) | |
889 .byte 102,15,56,0,215 | |
890 leaq 16(%rsi),%rsi | |
891 jnz L$ccm64_enc_outer | |
892 | |
893 pxor %xmm0,%xmm0 | |
894 pxor %xmm1,%xmm1 | |
895 pxor %xmm2,%xmm2 | |
896 movups %xmm3,(%r9) | |
897 pxor %xmm3,%xmm3 | |
898 pxor %xmm8,%xmm8 | |
899 pxor %xmm6,%xmm6 | |
900 .byte 0xf3,0xc3 | |
901 | |
902 .globl _aesni_ccm64_decrypt_blocks | |
903 .private_extern _aesni_ccm64_decrypt_blocks | |
904 | |
905 .p2align 4 | |
906 _aesni_ccm64_decrypt_blocks: | |
907 movl 240(%rcx),%eax | |
908 movups (%r8),%xmm6 | |
909 movdqu (%r9),%xmm3 | |
910 movdqa L$increment64(%rip),%xmm9 | |
911 movdqa L$bswap_mask(%rip),%xmm7 | |
912 | |
913 movaps %xmm6,%xmm2 | |
914 movl %eax,%r10d | |
915 movq %rcx,%r11 | |
916 .byte 102,15,56,0,247 | |
917 movups (%rcx),%xmm0 | |
918 movups 16(%rcx),%xmm1 | |
919 leaq 32(%rcx),%rcx | |
920 xorps %xmm0,%xmm2 | |
921 L$oop_enc1_5: | |
922 .byte 102,15,56,220,209 | |
923 decl %eax | |
924 movups (%rcx),%xmm1 | |
925 leaq 16(%rcx),%rcx | |
926 jnz L$oop_enc1_5 | |
927 .byte 102,15,56,221,209 | |
928 shll $4,%r10d | |
929 movl $16,%eax | |
930 movups (%rdi),%xmm8 | |
931 paddq %xmm9,%xmm6 | |
932 leaq 16(%rdi),%rdi | |
933 subq %r10,%rax | |
934 leaq 32(%r11,%r10,1),%rcx | |
935 movq %rax,%r10 | |
936 jmp L$ccm64_dec_outer | |
937 .p2align 4 | |
938 L$ccm64_dec_outer: | |
939 xorps %xmm2,%xmm8 | |
940 movdqa %xmm6,%xmm2 | |
941 movups %xmm8,(%rsi) | |
942 leaq 16(%rsi),%rsi | |
943 .byte 102,15,56,0,215 | |
944 | |
945 subq $1,%rdx | |
946 jz L$ccm64_dec_break | |
947 | |
948 movups (%r11),%xmm0 | |
949 movq %r10,%rax | |
950 movups 16(%r11),%xmm1 | |
951 xorps %xmm0,%xmm8 | |
952 xorps %xmm0,%xmm2 | |
953 xorps %xmm8,%xmm3 | |
954 movups 32(%r11),%xmm0 | |
955 jmp L$ccm64_dec2_loop | |
956 .p2align 4 | |
957 L$ccm64_dec2_loop: | |
958 .byte 102,15,56,220,209 | |
959 .byte 102,15,56,220,217 | |
960 movups (%rcx,%rax,1),%xmm1 | |
961 addq $32,%rax | |
962 .byte 102,15,56,220,208 | |
963 .byte 102,15,56,220,216 | |
964 movups -16(%rcx,%rax,1),%xmm0 | |
965 jnz L$ccm64_dec2_loop | |
966 movups (%rdi),%xmm8 | |
967 paddq %xmm9,%xmm6 | |
968 .byte 102,15,56,220,209 | |
969 .byte 102,15,56,220,217 | |
970 .byte 102,15,56,221,208 | |
971 .byte 102,15,56,221,216 | |
972 leaq 16(%rdi),%rdi | |
973 jmp L$ccm64_dec_outer | |
974 | |
975 .p2align 4 | |
976 L$ccm64_dec_break: | |
977 | |
978 movl 240(%r11),%eax | |
979 movups (%r11),%xmm0 | |
980 movups 16(%r11),%xmm1 | |
981 xorps %xmm0,%xmm8 | |
982 leaq 32(%r11),%r11 | |
983 xorps %xmm8,%xmm3 | |
984 L$oop_enc1_6: | |
985 .byte 102,15,56,220,217 | |
986 decl %eax | |
987 movups (%r11),%xmm1 | |
988 leaq 16(%r11),%r11 | |
989 jnz L$oop_enc1_6 | |
990 .byte 102,15,56,221,217 | |
991 pxor %xmm0,%xmm0 | |
992 pxor %xmm1,%xmm1 | |
993 pxor %xmm2,%xmm2 | |
994 movups %xmm3,(%r9) | |
995 pxor %xmm3,%xmm3 | |
996 pxor %xmm8,%xmm8 | |
997 pxor %xmm6,%xmm6 | |
998 .byte 0xf3,0xc3 | |
999 | |
1000 .globl _aesni_ctr32_encrypt_blocks | |
1001 .private_extern _aesni_ctr32_encrypt_blocks | |
1002 | |
1003 .p2align 4 | |
1004 _aesni_ctr32_encrypt_blocks: | |
1005 cmpq $1,%rdx | |
1006 jne L$ctr32_bulk | |
1007 | |
1008 | |
1009 | |
1010 movups (%r8),%xmm2 | |
1011 movups (%rdi),%xmm3 | |
1012 movl 240(%rcx),%edx | |
1013 movups (%rcx),%xmm0 | |
1014 movups 16(%rcx),%xmm1 | |
1015 leaq 32(%rcx),%rcx | |
1016 xorps %xmm0,%xmm2 | |
1017 L$oop_enc1_7: | |
1018 .byte 102,15,56,220,209 | |
1019 decl %edx | |
1020 movups (%rcx),%xmm1 | |
1021 leaq 16(%rcx),%rcx | |
1022 jnz L$oop_enc1_7 | |
1023 .byte 102,15,56,221,209 | |
1024 pxor %xmm0,%xmm0 | |
1025 pxor %xmm1,%xmm1 | |
1026 xorps %xmm3,%xmm2 | |
1027 pxor %xmm3,%xmm3 | |
1028 movups %xmm2,(%rsi) | |
1029 xorps %xmm2,%xmm2 | |
1030 jmp L$ctr32_epilogue | |
1031 | |
1032 .p2align 4 | |
1033 L$ctr32_bulk: | |
1034 leaq (%rsp),%rax | |
1035 pushq %rbp | |
1036 subq $128,%rsp | |
1037 andq $-16,%rsp | |
1038 leaq -8(%rax),%rbp | |
1039 | |
1040 | |
1041 | |
1042 | |
1043 movdqu (%r8),%xmm2 | |
1044 movdqu (%rcx),%xmm0 | |
1045 movl 12(%r8),%r8d | |
1046 pxor %xmm0,%xmm2 | |
1047 movl 12(%rcx),%r11d | |
1048 movdqa %xmm2,0(%rsp) | |
1049 bswapl %r8d | |
1050 movdqa %xmm2,%xmm3 | |
1051 movdqa %xmm2,%xmm4 | |
1052 movdqa %xmm2,%xmm5 | |
1053 movdqa %xmm2,64(%rsp) | |
1054 movdqa %xmm2,80(%rsp) | |
1055 movdqa %xmm2,96(%rsp) | |
1056 movq %rdx,%r10 | |
1057 movdqa %xmm2,112(%rsp) | |
1058 | |
1059 leaq 1(%r8),%rax | |
1060 leaq 2(%r8),%rdx | |
1061 bswapl %eax | |
1062 bswapl %edx | |
1063 xorl %r11d,%eax | |
1064 xorl %r11d,%edx | |
1065 .byte 102,15,58,34,216,3 | |
1066 leaq 3(%r8),%rax | |
1067 movdqa %xmm3,16(%rsp) | |
1068 .byte 102,15,58,34,226,3 | |
1069 bswapl %eax | |
1070 movq %r10,%rdx | |
1071 leaq 4(%r8),%r10 | |
1072 movdqa %xmm4,32(%rsp) | |
1073 xorl %r11d,%eax | |
1074 bswapl %r10d | |
1075 .byte 102,15,58,34,232,3 | |
1076 xorl %r11d,%r10d | |
1077 movdqa %xmm5,48(%rsp) | |
1078 leaq 5(%r8),%r9 | |
1079 movl %r10d,64+12(%rsp) | |
1080 bswapl %r9d | |
1081 leaq 6(%r8),%r10 | |
1082 movl 240(%rcx),%eax | |
1083 xorl %r11d,%r9d | |
1084 bswapl %r10d | |
1085 movl %r9d,80+12(%rsp) | |
1086 xorl %r11d,%r10d | |
1087 leaq 7(%r8),%r9 | |
1088 movl %r10d,96+12(%rsp) | |
1089 bswapl %r9d | |
1090 movl _OPENSSL_ia32cap_P+4(%rip),%r10d | |
1091 xorl %r11d,%r9d | |
1092 andl $71303168,%r10d | |
1093 movl %r9d,112+12(%rsp) | |
1094 | |
1095 movups 16(%rcx),%xmm1 | |
1096 | |
1097 movdqa 64(%rsp),%xmm6 | |
1098 movdqa 80(%rsp),%xmm7 | |
1099 | |
1100 cmpq $8,%rdx | |
1101 jb L$ctr32_tail | |
1102 | |
1103 subq $6,%rdx | |
1104 cmpl $4194304,%r10d | |
1105 je L$ctr32_6x | |
1106 | |
1107 leaq 128(%rcx),%rcx | |
1108 subq $2,%rdx | |
1109 jmp L$ctr32_loop8 | |
1110 | |
1111 .p2align 4 | |
1112 L$ctr32_6x: | |
1113 shll $4,%eax | |
1114 movl $48,%r10d | |
1115 bswapl %r11d | |
1116 leaq 32(%rcx,%rax,1),%rcx | |
1117 subq %rax,%r10 | |
1118 jmp L$ctr32_loop6 | |
1119 | |
1120 .p2align 4 | |
1121 L$ctr32_loop6: | |
1122 addl $6,%r8d | |
1123 movups -48(%rcx,%r10,1),%xmm0 | |
1124 .byte 102,15,56,220,209 | |
1125 movl %r8d,%eax | |
1126 xorl %r11d,%eax | |
1127 .byte 102,15,56,220,217 | |
1128 .byte 0x0f,0x38,0xf1,0x44,0x24,12 | |
1129 leal 1(%r8),%eax | |
1130 .byte 102,15,56,220,225 | |
1131 xorl %r11d,%eax | |
1132 .byte 0x0f,0x38,0xf1,0x44,0x24,28 | |
1133 .byte 102,15,56,220,233 | |
1134 leal 2(%r8),%eax | |
1135 xorl %r11d,%eax | |
1136 .byte 102,15,56,220,241 | |
1137 .byte 0x0f,0x38,0xf1,0x44,0x24,44 | |
1138 leal 3(%r8),%eax | |
1139 .byte 102,15,56,220,249 | |
1140 movups -32(%rcx,%r10,1),%xmm1 | |
1141 xorl %r11d,%eax | |
1142 | |
1143 .byte 102,15,56,220,208 | |
1144 .byte 0x0f,0x38,0xf1,0x44,0x24,60 | |
1145 leal 4(%r8),%eax | |
1146 .byte 102,15,56,220,216 | |
1147 xorl %r11d,%eax | |
1148 .byte 0x0f,0x38,0xf1,0x44,0x24,76 | |
1149 .byte 102,15,56,220,224 | |
1150 leal 5(%r8),%eax | |
1151 xorl %r11d,%eax | |
1152 .byte 102,15,56,220,232 | |
1153 .byte 0x0f,0x38,0xf1,0x44,0x24,92 | |
1154 movq %r10,%rax | |
1155 .byte 102,15,56,220,240 | |
1156 .byte 102,15,56,220,248 | |
1157 movups -16(%rcx,%r10,1),%xmm0 | |
1158 | |
1159 call L$enc_loop6 | |
1160 | |
1161 movdqu (%rdi),%xmm8 | |
1162 movdqu 16(%rdi),%xmm9 | |
1163 movdqu 32(%rdi),%xmm10 | |
1164 movdqu 48(%rdi),%xmm11 | |
1165 movdqu 64(%rdi),%xmm12 | |
1166 movdqu 80(%rdi),%xmm13 | |
1167 leaq 96(%rdi),%rdi | |
1168 movups -64(%rcx,%r10,1),%xmm1 | |
1169 pxor %xmm2,%xmm8 | |
1170 movaps 0(%rsp),%xmm2 | |
1171 pxor %xmm3,%xmm9 | |
1172 movaps 16(%rsp),%xmm3 | |
1173 pxor %xmm4,%xmm10 | |
1174 movaps 32(%rsp),%xmm4 | |
1175 pxor %xmm5,%xmm11 | |
1176 movaps 48(%rsp),%xmm5 | |
1177 pxor %xmm6,%xmm12 | |
1178 movaps 64(%rsp),%xmm6 | |
1179 pxor %xmm7,%xmm13 | |
1180 movaps 80(%rsp),%xmm7 | |
1181 movdqu %xmm8,(%rsi) | |
1182 movdqu %xmm9,16(%rsi) | |
1183 movdqu %xmm10,32(%rsi) | |
1184 movdqu %xmm11,48(%rsi) | |
1185 movdqu %xmm12,64(%rsi) | |
1186 movdqu %xmm13,80(%rsi) | |
1187 leaq 96(%rsi),%rsi | |
1188 | |
1189 subq $6,%rdx | |
1190 jnc L$ctr32_loop6 | |
1191 | |
1192 addq $6,%rdx | |
1193 jz L$ctr32_done | |
1194 | |
1195 leal -48(%r10),%eax | |
1196 leaq -80(%rcx,%r10,1),%rcx | |
1197 negl %eax | |
1198 shrl $4,%eax | |
1199 jmp L$ctr32_tail | |
1200 | |
1201 .p2align 5 | |
1202 L$ctr32_loop8: | |
1203 addl $8,%r8d | |
1204 movdqa 96(%rsp),%xmm8 | |
1205 .byte 102,15,56,220,209 | |
1206 movl %r8d,%r9d | |
1207 movdqa 112(%rsp),%xmm9 | |
1208 .byte 102,15,56,220,217 | |
1209 bswapl %r9d | |
1210 movups 32-128(%rcx),%xmm0 | |
1211 .byte 102,15,56,220,225 | |
1212 xorl %r11d,%r9d | |
1213 nop | |
1214 .byte 102,15,56,220,233 | |
1215 movl %r9d,0+12(%rsp) | |
1216 leaq 1(%r8),%r9 | |
1217 .byte 102,15,56,220,241 | |
1218 .byte 102,15,56,220,249 | |
1219 .byte 102,68,15,56,220,193 | |
1220 .byte 102,68,15,56,220,201 | |
1221 movups 48-128(%rcx),%xmm1 | |
1222 bswapl %r9d | |
1223 .byte 102,15,56,220,208 | |
1224 .byte 102,15,56,220,216 | |
1225 xorl %r11d,%r9d | |
1226 .byte 0x66,0x90 | |
1227 .byte 102,15,56,220,224 | |
1228 .byte 102,15,56,220,232 | |
1229 movl %r9d,16+12(%rsp) | |
1230 leaq 2(%r8),%r9 | |
1231 .byte 102,15,56,220,240 | |
1232 .byte 102,15,56,220,248 | |
1233 .byte 102,68,15,56,220,192 | |
1234 .byte 102,68,15,56,220,200 | |
1235 movups 64-128(%rcx),%xmm0 | |
1236 bswapl %r9d | |
1237 .byte 102,15,56,220,209 | |
1238 .byte 102,15,56,220,217 | |
1239 xorl %r11d,%r9d | |
1240 .byte 0x66,0x90 | |
1241 .byte 102,15,56,220,225 | |
1242 .byte 102,15,56,220,233 | |
1243 movl %r9d,32+12(%rsp) | |
1244 leaq 3(%r8),%r9 | |
1245 .byte 102,15,56,220,241 | |
1246 .byte 102,15,56,220,249 | |
1247 .byte 102,68,15,56,220,193 | |
1248 .byte 102,68,15,56,220,201 | |
1249 movups 80-128(%rcx),%xmm1 | |
1250 bswapl %r9d | |
1251 .byte 102,15,56,220,208 | |
1252 .byte 102,15,56,220,216 | |
1253 xorl %r11d,%r9d | |
1254 .byte 0x66,0x90 | |
1255 .byte 102,15,56,220,224 | |
1256 .byte 102,15,56,220,232 | |
1257 movl %r9d,48+12(%rsp) | |
1258 leaq 4(%r8),%r9 | |
1259 .byte 102,15,56,220,240 | |
1260 .byte 102,15,56,220,248 | |
1261 .byte 102,68,15,56,220,192 | |
1262 .byte 102,68,15,56,220,200 | |
1263 movups 96-128(%rcx),%xmm0 | |
1264 bswapl %r9d | |
1265 .byte 102,15,56,220,209 | |
1266 .byte 102,15,56,220,217 | |
1267 xorl %r11d,%r9d | |
1268 .byte 0x66,0x90 | |
1269 .byte 102,15,56,220,225 | |
1270 .byte 102,15,56,220,233 | |
1271 movl %r9d,64+12(%rsp) | |
1272 leaq 5(%r8),%r9 | |
1273 .byte 102,15,56,220,241 | |
1274 .byte 102,15,56,220,249 | |
1275 .byte 102,68,15,56,220,193 | |
1276 .byte 102,68,15,56,220,201 | |
1277 movups 112-128(%rcx),%xmm1 | |
1278 bswapl %r9d | |
1279 .byte 102,15,56,220,208 | |
1280 .byte 102,15,56,220,216 | |
1281 xorl %r11d,%r9d | |
1282 .byte 0x66,0x90 | |
1283 .byte 102,15,56,220,224 | |
1284 .byte 102,15,56,220,232 | |
1285 movl %r9d,80+12(%rsp) | |
1286 leaq 6(%r8),%r9 | |
1287 .byte 102,15,56,220,240 | |
1288 .byte 102,15,56,220,248 | |
1289 .byte 102,68,15,56,220,192 | |
1290 .byte 102,68,15,56,220,200 | |
1291 movups 128-128(%rcx),%xmm0 | |
1292 bswapl %r9d | |
1293 .byte 102,15,56,220,209 | |
1294 .byte 102,15,56,220,217 | |
1295 xorl %r11d,%r9d | |
1296 .byte 0x66,0x90 | |
1297 .byte 102,15,56,220,225 | |
1298 .byte 102,15,56,220,233 | |
1299 movl %r9d,96+12(%rsp) | |
1300 leaq 7(%r8),%r9 | |
1301 .byte 102,15,56,220,241 | |
1302 .byte 102,15,56,220,249 | |
1303 .byte 102,68,15,56,220,193 | |
1304 .byte 102,68,15,56,220,201 | |
1305 movups 144-128(%rcx),%xmm1 | |
1306 bswapl %r9d | |
1307 .byte 102,15,56,220,208 | |
1308 .byte 102,15,56,220,216 | |
1309 .byte 102,15,56,220,224 | |
1310 xorl %r11d,%r9d | |
1311 movdqu 0(%rdi),%xmm10 | |
1312 .byte 102,15,56,220,232 | |
1313 movl %r9d,112+12(%rsp) | |
1314 cmpl $11,%eax | |
1315 .byte 102,15,56,220,240 | |
1316 .byte 102,15,56,220,248 | |
1317 .byte 102,68,15,56,220,192 | |
1318 .byte 102,68,15,56,220,200 | |
1319 movups 160-128(%rcx),%xmm0 | |
1320 | |
1321 jb L$ctr32_enc_done | |
1322 | |
1323 .byte 102,15,56,220,209 | |
1324 .byte 102,15,56,220,217 | |
1325 .byte 102,15,56,220,225 | |
1326 .byte 102,15,56,220,233 | |
1327 .byte 102,15,56,220,241 | |
1328 .byte 102,15,56,220,249 | |
1329 .byte 102,68,15,56,220,193 | |
1330 .byte 102,68,15,56,220,201 | |
1331 movups 176-128(%rcx),%xmm1 | |
1332 | |
1333 .byte 102,15,56,220,208 | |
1334 .byte 102,15,56,220,216 | |
1335 .byte 102,15,56,220,224 | |
1336 .byte 102,15,56,220,232 | |
1337 .byte 102,15,56,220,240 | |
1338 .byte 102,15,56,220,248 | |
1339 .byte 102,68,15,56,220,192 | |
1340 .byte 102,68,15,56,220,200 | |
1341 movups 192-128(%rcx),%xmm0 | |
1342 je L$ctr32_enc_done | |
1343 | |
1344 .byte 102,15,56,220,209 | |
1345 .byte 102,15,56,220,217 | |
1346 .byte 102,15,56,220,225 | |
1347 .byte 102,15,56,220,233 | |
1348 .byte 102,15,56,220,241 | |
1349 .byte 102,15,56,220,249 | |
1350 .byte 102,68,15,56,220,193 | |
1351 .byte 102,68,15,56,220,201 | |
1352 movups 208-128(%rcx),%xmm1 | |
1353 | |
1354 .byte 102,15,56,220,208 | |
1355 .byte 102,15,56,220,216 | |
1356 .byte 102,15,56,220,224 | |
1357 .byte 102,15,56,220,232 | |
1358 .byte 102,15,56,220,240 | |
1359 .byte 102,15,56,220,248 | |
1360 .byte 102,68,15,56,220,192 | |
1361 .byte 102,68,15,56,220,200 | |
1362 movups 224-128(%rcx),%xmm0 | |
1363 jmp L$ctr32_enc_done | |
1364 | |
1365 .p2align 4 | |
1366 L$ctr32_enc_done: | |
1367 movdqu 16(%rdi),%xmm11 | |
1368 pxor %xmm0,%xmm10 | |
1369 movdqu 32(%rdi),%xmm12 | |
1370 pxor %xmm0,%xmm11 | |
1371 movdqu 48(%rdi),%xmm13 | |
1372 pxor %xmm0,%xmm12 | |
1373 movdqu 64(%rdi),%xmm14 | |
1374 pxor %xmm0,%xmm13 | |
1375 movdqu 80(%rdi),%xmm15 | |
1376 pxor %xmm0,%xmm14 | |
1377 pxor %xmm0,%xmm15 | |
1378 .byte 102,15,56,220,209 | |
1379 .byte 102,15,56,220,217 | |
1380 .byte 102,15,56,220,225 | |
1381 .byte 102,15,56,220,233 | |
1382 .byte 102,15,56,220,241 | |
1383 .byte 102,15,56,220,249 | |
1384 .byte 102,68,15,56,220,193 | |
1385 .byte 102,68,15,56,220,201 | |
1386 movdqu 96(%rdi),%xmm1 | |
1387 leaq 128(%rdi),%rdi | |
1388 | |
1389 .byte 102,65,15,56,221,210 | |
1390 pxor %xmm0,%xmm1 | |
1391 movdqu 112-128(%rdi),%xmm10 | |
1392 .byte 102,65,15,56,221,219 | |
1393 pxor %xmm0,%xmm10 | |
1394 movdqa 0(%rsp),%xmm11 | |
1395 .byte 102,65,15,56,221,228 | |
1396 .byte 102,65,15,56,221,237 | |
1397 movdqa 16(%rsp),%xmm12 | |
1398 movdqa 32(%rsp),%xmm13 | |
1399 .byte 102,65,15,56,221,246 | |
1400 .byte 102,65,15,56,221,255 | |
1401 movdqa 48(%rsp),%xmm14 | |
1402 movdqa 64(%rsp),%xmm15 | |
1403 .byte 102,68,15,56,221,193 | |
1404 movdqa 80(%rsp),%xmm0 | |
1405 movups 16-128(%rcx),%xmm1 | |
1406 .byte 102,69,15,56,221,202 | |
1407 | |
1408 movups %xmm2,(%rsi) | |
1409 movdqa %xmm11,%xmm2 | |
1410 movups %xmm3,16(%rsi) | |
1411 movdqa %xmm12,%xmm3 | |
1412 movups %xmm4,32(%rsi) | |
1413 movdqa %xmm13,%xmm4 | |
1414 movups %xmm5,48(%rsi) | |
1415 movdqa %xmm14,%xmm5 | |
1416 movups %xmm6,64(%rsi) | |
1417 movdqa %xmm15,%xmm6 | |
1418 movups %xmm7,80(%rsi) | |
1419 movdqa %xmm0,%xmm7 | |
1420 movups %xmm8,96(%rsi) | |
1421 movups %xmm9,112(%rsi) | |
1422 leaq 128(%rsi),%rsi | |
1423 | |
1424 subq $8,%rdx | |
1425 jnc L$ctr32_loop8 | |
1426 | |
1427 addq $8,%rdx | |
1428 jz L$ctr32_done | |
1429 leaq -128(%rcx),%rcx | |
1430 | |
1431 L$ctr32_tail: | |
1432 | |
1433 | |
1434 leaq 16(%rcx),%rcx | |
1435 cmpq $4,%rdx | |
1436 jb L$ctr32_loop3 | |
1437 je L$ctr32_loop4 | |
1438 | |
1439 | |
1440 shll $4,%eax | |
1441 movdqa 96(%rsp),%xmm8 | |
1442 pxor %xmm9,%xmm9 | |
1443 | |
1444 movups 16(%rcx),%xmm0 | |
1445 .byte 102,15,56,220,209 | |
1446 .byte 102,15,56,220,217 | |
1447 leaq 32-16(%rcx,%rax,1),%rcx | |
1448 negq %rax | |
1449 .byte 102,15,56,220,225 | |
1450 addq $16,%rax | |
1451 movups (%rdi),%xmm10 | |
1452 .byte 102,15,56,220,233 | |
1453 .byte 102,15,56,220,241 | |
1454 movups 16(%rdi),%xmm11 | |
1455 movups 32(%rdi),%xmm12 | |
1456 .byte 102,15,56,220,249 | |
1457 .byte 102,68,15,56,220,193 | |
1458 | |
1459 call L$enc_loop8_enter | |
1460 | |
1461 movdqu 48(%rdi),%xmm13 | |
1462 pxor %xmm10,%xmm2 | |
1463 movdqu 64(%rdi),%xmm10 | |
1464 pxor %xmm11,%xmm3 | |
1465 movdqu %xmm2,(%rsi) | |
1466 pxor %xmm12,%xmm4 | |
1467 movdqu %xmm3,16(%rsi) | |
1468 pxor %xmm13,%xmm5 | |
1469 movdqu %xmm4,32(%rsi) | |
1470 pxor %xmm10,%xmm6 | |
1471 movdqu %xmm5,48(%rsi) | |
1472 movdqu %xmm6,64(%rsi) | |
1473 cmpq $6,%rdx | |
1474 jb L$ctr32_done | |
1475 | |
1476 movups 80(%rdi),%xmm11 | |
1477 xorps %xmm11,%xmm7 | |
1478 movups %xmm7,80(%rsi) | |
1479 je L$ctr32_done | |
1480 | |
1481 movups 96(%rdi),%xmm12 | |
1482 xorps %xmm12,%xmm8 | |
1483 movups %xmm8,96(%rsi) | |
1484 jmp L$ctr32_done | |
1485 | |
1486 .p2align 5 | |
1487 L$ctr32_loop4: | |
1488 .byte 102,15,56,220,209 | |
1489 leaq 16(%rcx),%rcx | |
1490 decl %eax | |
1491 .byte 102,15,56,220,217 | |
1492 .byte 102,15,56,220,225 | |
1493 .byte 102,15,56,220,233 | |
1494 movups (%rcx),%xmm1 | |
1495 jnz L$ctr32_loop4 | |
1496 .byte 102,15,56,221,209 | |
1497 .byte 102,15,56,221,217 | |
1498 movups (%rdi),%xmm10 | |
1499 movups 16(%rdi),%xmm11 | |
1500 .byte 102,15,56,221,225 | |
1501 .byte 102,15,56,221,233 | |
1502 movups 32(%rdi),%xmm12 | |
1503 movups 48(%rdi),%xmm13 | |
1504 | |
1505 xorps %xmm10,%xmm2 | |
1506 movups %xmm2,(%rsi) | |
1507 xorps %xmm11,%xmm3 | |
1508 movups %xmm3,16(%rsi) | |
1509 pxor %xmm12,%xmm4 | |
1510 movdqu %xmm4,32(%rsi) | |
1511 pxor %xmm13,%xmm5 | |
1512 movdqu %xmm5,48(%rsi) | |
1513 jmp L$ctr32_done | |
1514 | |
1515 .p2align 5 | |
1516 L$ctr32_loop3: | |
1517 .byte 102,15,56,220,209 | |
1518 leaq 16(%rcx),%rcx | |
1519 decl %eax | |
1520 .byte 102,15,56,220,217 | |
1521 .byte 102,15,56,220,225 | |
1522 movups (%rcx),%xmm1 | |
1523 jnz L$ctr32_loop3 | |
1524 .byte 102,15,56,221,209 | |
1525 .byte 102,15,56,221,217 | |
1526 .byte 102,15,56,221,225 | |
1527 | |
1528 movups (%rdi),%xmm10 | |
1529 xorps %xmm10,%xmm2 | |
1530 movups %xmm2,(%rsi) | |
1531 cmpq $2,%rdx | |
1532 jb L$ctr32_done | |
1533 | |
1534 movups 16(%rdi),%xmm11 | |
1535 xorps %xmm11,%xmm3 | |
1536 movups %xmm3,16(%rsi) | |
1537 je L$ctr32_done | |
1538 | |
1539 movups 32(%rdi),%xmm12 | |
1540 xorps %xmm12,%xmm4 | |
1541 movups %xmm4,32(%rsi) | |
1542 | |
1543 L$ctr32_done: | |
1544 xorps %xmm0,%xmm0 | |
1545 xorl %r11d,%r11d | |
1546 pxor %xmm1,%xmm1 | |
1547 pxor %xmm2,%xmm2 | |
1548 pxor %xmm3,%xmm3 | |
1549 pxor %xmm4,%xmm4 | |
1550 pxor %xmm5,%xmm5 | |
1551 pxor %xmm6,%xmm6 | |
1552 pxor %xmm7,%xmm7 | |
1553 movaps %xmm0,0(%rsp) | |
1554 pxor %xmm8,%xmm8 | |
1555 movaps %xmm0,16(%rsp) | |
1556 pxor %xmm9,%xmm9 | |
1557 movaps %xmm0,32(%rsp) | |
1558 pxor %xmm10,%xmm10 | |
1559 movaps %xmm0,48(%rsp) | |
1560 pxor %xmm11,%xmm11 | |
1561 movaps %xmm0,64(%rsp) | |
1562 pxor %xmm12,%xmm12 | |
1563 movaps %xmm0,80(%rsp) | |
1564 pxor %xmm13,%xmm13 | |
1565 movaps %xmm0,96(%rsp) | |
1566 pxor %xmm14,%xmm14 | |
1567 movaps %xmm0,112(%rsp) | |
1568 pxor %xmm15,%xmm15 | |
1569 leaq (%rbp),%rsp | |
1570 popq %rbp | |
1571 L$ctr32_epilogue: | |
1572 .byte 0xf3,0xc3 | |
1573 | |
1574 .globl _aesni_xts_encrypt | |
1575 .private_extern _aesni_xts_encrypt | |
1576 | |
1577 .p2align 4 | |
1578 _aesni_xts_encrypt: | |
1579 leaq (%rsp),%rax | |
1580 pushq %rbp | |
1581 subq $112,%rsp | |
1582 andq $-16,%rsp | |
1583 leaq -8(%rax),%rbp | |
1584 movups (%r9),%xmm2 | |
1585 movl 240(%r8),%eax | |
1586 movl 240(%rcx),%r10d | |
1587 movups (%r8),%xmm0 | |
1588 movups 16(%r8),%xmm1 | |
1589 leaq 32(%r8),%r8 | |
1590 xorps %xmm0,%xmm2 | |
1591 L$oop_enc1_8: | |
1592 .byte 102,15,56,220,209 | |
1593 decl %eax | |
1594 movups (%r8),%xmm1 | |
1595 leaq 16(%r8),%r8 | |
1596 jnz L$oop_enc1_8 | |
1597 .byte 102,15,56,221,209 | |
1598 movups (%rcx),%xmm0 | |
1599 movq %rcx,%r11 | |
1600 movl %r10d,%eax | |
1601 shll $4,%r10d | |
1602 movq %rdx,%r9 | |
1603 andq $-16,%rdx | |
1604 | |
1605 movups 16(%rcx,%r10,1),%xmm1 | |
1606 | |
1607 movdqa L$xts_magic(%rip),%xmm8 | |
1608 movdqa %xmm2,%xmm15 | |
1609 pshufd $0x5f,%xmm2,%xmm9 | |
1610 pxor %xmm0,%xmm1 | |
1611 movdqa %xmm9,%xmm14 | |
1612 paddd %xmm9,%xmm9 | |
1613 movdqa %xmm15,%xmm10 | |
1614 psrad $31,%xmm14 | |
1615 paddq %xmm15,%xmm15 | |
1616 pand %xmm8,%xmm14 | |
1617 pxor %xmm0,%xmm10 | |
1618 pxor %xmm14,%xmm15 | |
1619 movdqa %xmm9,%xmm14 | |
1620 paddd %xmm9,%xmm9 | |
1621 movdqa %xmm15,%xmm11 | |
1622 psrad $31,%xmm14 | |
1623 paddq %xmm15,%xmm15 | |
1624 pand %xmm8,%xmm14 | |
1625 pxor %xmm0,%xmm11 | |
1626 pxor %xmm14,%xmm15 | |
1627 movdqa %xmm9,%xmm14 | |
1628 paddd %xmm9,%xmm9 | |
1629 movdqa %xmm15,%xmm12 | |
1630 psrad $31,%xmm14 | |
1631 paddq %xmm15,%xmm15 | |
1632 pand %xmm8,%xmm14 | |
1633 pxor %xmm0,%xmm12 | |
1634 pxor %xmm14,%xmm15 | |
1635 movdqa %xmm9,%xmm14 | |
1636 paddd %xmm9,%xmm9 | |
1637 movdqa %xmm15,%xmm13 | |
1638 psrad $31,%xmm14 | |
1639 paddq %xmm15,%xmm15 | |
1640 pand %xmm8,%xmm14 | |
1641 pxor %xmm0,%xmm13 | |
1642 pxor %xmm14,%xmm15 | |
1643 movdqa %xmm15,%xmm14 | |
1644 psrad $31,%xmm9 | |
1645 paddq %xmm15,%xmm15 | |
1646 pand %xmm8,%xmm9 | |
1647 pxor %xmm0,%xmm14 | |
1648 pxor %xmm9,%xmm15 | |
1649 movaps %xmm1,96(%rsp) | |
1650 | |
1651 subq $96,%rdx | |
1652 jc L$xts_enc_short | |
1653 | |
1654 movl $16+96,%eax | |
1655 leaq 32(%r11,%r10,1),%rcx | |
1656 subq %r10,%rax | |
1657 movups 16(%r11),%xmm1 | |
1658 movq %rax,%r10 | |
1659 leaq L$xts_magic(%rip),%r8 | |
1660 jmp L$xts_enc_grandloop | |
1661 | |
1662 .p2align 5 | |
1663 L$xts_enc_grandloop: | |
1664 movdqu 0(%rdi),%xmm2 | |
1665 movdqa %xmm0,%xmm8 | |
1666 movdqu 16(%rdi),%xmm3 | |
1667 pxor %xmm10,%xmm2 | |
1668 movdqu 32(%rdi),%xmm4 | |
1669 pxor %xmm11,%xmm3 | |
1670 .byte 102,15,56,220,209 | |
1671 movdqu 48(%rdi),%xmm5 | |
1672 pxor %xmm12,%xmm4 | |
1673 .byte 102,15,56,220,217 | |
1674 movdqu 64(%rdi),%xmm6 | |
1675 pxor %xmm13,%xmm5 | |
1676 .byte 102,15,56,220,225 | |
1677 movdqu 80(%rdi),%xmm7 | |
1678 pxor %xmm15,%xmm8 | |
1679 movdqa 96(%rsp),%xmm9 | |
1680 pxor %xmm14,%xmm6 | |
1681 .byte 102,15,56,220,233 | |
1682 movups 32(%r11),%xmm0 | |
1683 leaq 96(%rdi),%rdi | |
1684 pxor %xmm8,%xmm7 | |
1685 | |
1686 pxor %xmm9,%xmm10 | |
1687 .byte 102,15,56,220,241 | |
1688 pxor %xmm9,%xmm11 | |
1689 movdqa %xmm10,0(%rsp) | |
1690 .byte 102,15,56,220,249 | |
1691 movups 48(%r11),%xmm1 | |
1692 pxor %xmm9,%xmm12 | |
1693 | |
1694 .byte 102,15,56,220,208 | |
1695 pxor %xmm9,%xmm13 | |
1696 movdqa %xmm11,16(%rsp) | |
1697 .byte 102,15,56,220,216 | |
1698 pxor %xmm9,%xmm14 | |
1699 movdqa %xmm12,32(%rsp) | |
1700 .byte 102,15,56,220,224 | |
1701 .byte 102,15,56,220,232 | |
1702 pxor %xmm9,%xmm8 | |
1703 movdqa %xmm14,64(%rsp) | |
1704 .byte 102,15,56,220,240 | |
1705 .byte 102,15,56,220,248 | |
1706 movups 64(%r11),%xmm0 | |
1707 movdqa %xmm8,80(%rsp) | |
1708 pshufd $0x5f,%xmm15,%xmm9 | |
1709 jmp L$xts_enc_loop6 | |
1710 .p2align 5 | |
1711 L$xts_enc_loop6: | |
1712 .byte 102,15,56,220,209 | |
1713 .byte 102,15,56,220,217 | |
1714 .byte 102,15,56,220,225 | |
1715 .byte 102,15,56,220,233 | |
1716 .byte 102,15,56,220,241 | |
1717 .byte 102,15,56,220,249 | |
1718 movups -64(%rcx,%rax,1),%xmm1 | |
1719 addq $32,%rax | |
1720 | |
1721 .byte 102,15,56,220,208 | |
1722 .byte 102,15,56,220,216 | |
1723 .byte 102,15,56,220,224 | |
1724 .byte 102,15,56,220,232 | |
1725 .byte 102,15,56,220,240 | |
1726 .byte 102,15,56,220,248 | |
1727 movups -80(%rcx,%rax,1),%xmm0 | |
1728 jnz L$xts_enc_loop6 | |
1729 | |
1730 movdqa (%r8),%xmm8 | |
1731 movdqa %xmm9,%xmm14 | |
1732 paddd %xmm9,%xmm9 | |
1733 .byte 102,15,56,220,209 | |
1734 paddq %xmm15,%xmm15 | |
1735 psrad $31,%xmm14 | |
1736 .byte 102,15,56,220,217 | |
1737 pand %xmm8,%xmm14 | |
1738 movups (%r11),%xmm10 | |
1739 .byte 102,15,56,220,225 | |
1740 .byte 102,15,56,220,233 | |
1741 .byte 102,15,56,220,241 | |
1742 pxor %xmm14,%xmm15 | |
1743 movaps %xmm10,%xmm11 | |
1744 .byte 102,15,56,220,249 | |
1745 movups -64(%rcx),%xmm1 | |
1746 | |
1747 movdqa %xmm9,%xmm14 | |
1748 .byte 102,15,56,220,208 | |
1749 paddd %xmm9,%xmm9 | |
1750 pxor %xmm15,%xmm10 | |
1751 .byte 102,15,56,220,216 | |
1752 psrad $31,%xmm14 | |
1753 paddq %xmm15,%xmm15 | |
1754 .byte 102,15,56,220,224 | |
1755 .byte 102,15,56,220,232 | |
1756 pand %xmm8,%xmm14 | |
1757 movaps %xmm11,%xmm12 | |
1758 .byte 102,15,56,220,240 | |
1759 pxor %xmm14,%xmm15 | |
1760 movdqa %xmm9,%xmm14 | |
1761 .byte 102,15,56,220,248 | |
1762 movups -48(%rcx),%xmm0 | |
1763 | |
1764 paddd %xmm9,%xmm9 | |
1765 .byte 102,15,56,220,209 | |
1766 pxor %xmm15,%xmm11 | |
1767 psrad $31,%xmm14 | |
1768 .byte 102,15,56,220,217 | |
1769 paddq %xmm15,%xmm15 | |
1770 pand %xmm8,%xmm14 | |
1771 .byte 102,15,56,220,225 | |
1772 .byte 102,15,56,220,233 | |
1773 movdqa %xmm13,48(%rsp) | |
1774 pxor %xmm14,%xmm15 | |
1775 .byte 102,15,56,220,241 | |
1776 movaps %xmm12,%xmm13 | |
1777 movdqa %xmm9,%xmm14 | |
1778 .byte 102,15,56,220,249 | |
1779 movups -32(%rcx),%xmm1 | |
1780 | |
1781 paddd %xmm9,%xmm9 | |
1782 .byte 102,15,56,220,208 | |
1783 pxor %xmm15,%xmm12 | |
1784 psrad $31,%xmm14 | |
1785 .byte 102,15,56,220,216 | |
1786 paddq %xmm15,%xmm15 | |
1787 pand %xmm8,%xmm14 | |
1788 .byte 102,15,56,220,224 | |
1789 .byte 102,15,56,220,232 | |
1790 .byte 102,15,56,220,240 | |
1791 pxor %xmm14,%xmm15 | |
1792 movaps %xmm13,%xmm14 | |
1793 .byte 102,15,56,220,248 | |
1794 | |
1795 movdqa %xmm9,%xmm0 | |
1796 paddd %xmm9,%xmm9 | |
1797 .byte 102,15,56,220,209 | |
1798 pxor %xmm15,%xmm13 | |
1799 psrad $31,%xmm0 | |
1800 .byte 102,15,56,220,217 | |
1801 paddq %xmm15,%xmm15 | |
1802 pand %xmm8,%xmm0 | |
1803 .byte 102,15,56,220,225 | |
1804 .byte 102,15,56,220,233 | |
1805 pxor %xmm0,%xmm15 | |
1806 movups (%r11),%xmm0 | |
1807 .byte 102,15,56,220,241 | |
1808 .byte 102,15,56,220,249 | |
1809 movups 16(%r11),%xmm1 | |
1810 | |
1811 pxor %xmm15,%xmm14 | |
1812 .byte 102,15,56,221,84,36,0 | |
1813 psrad $31,%xmm9 | |
1814 paddq %xmm15,%xmm15 | |
1815 .byte 102,15,56,221,92,36,16 | |
1816 .byte 102,15,56,221,100,36,32 | |
1817 pand %xmm8,%xmm9 | |
1818 movq %r10,%rax | |
1819 .byte 102,15,56,221,108,36,48 | |
1820 .byte 102,15,56,221,116,36,64 | |
1821 .byte 102,15,56,221,124,36,80 | |
1822 pxor %xmm9,%xmm15 | |
1823 | |
1824 leaq 96(%rsi),%rsi | |
1825 movups %xmm2,-96(%rsi) | |
1826 movups %xmm3,-80(%rsi) | |
1827 movups %xmm4,-64(%rsi) | |
1828 movups %xmm5,-48(%rsi) | |
1829 movups %xmm6,-32(%rsi) | |
1830 movups %xmm7,-16(%rsi) | |
1831 subq $96,%rdx | |
1832 jnc L$xts_enc_grandloop | |
1833 | |
1834 movl $16+96,%eax | |
1835 subl %r10d,%eax | |
1836 movq %r11,%rcx | |
1837 shrl $4,%eax | |
1838 | |
1839 L$xts_enc_short: | |
1840 | |
1841 movl %eax,%r10d | |
1842 pxor %xmm0,%xmm10 | |
1843 addq $96,%rdx | |
1844 jz L$xts_enc_done | |
1845 | |
1846 pxor %xmm0,%xmm11 | |
1847 cmpq $0x20,%rdx | |
1848 jb L$xts_enc_one | |
1849 pxor %xmm0,%xmm12 | |
1850 je L$xts_enc_two | |
1851 | |
1852 pxor %xmm0,%xmm13 | |
1853 cmpq $0x40,%rdx | |
1854 jb L$xts_enc_three | |
1855 pxor %xmm0,%xmm14 | |
1856 je L$xts_enc_four | |
1857 | |
1858 movdqu (%rdi),%xmm2 | |
1859 movdqu 16(%rdi),%xmm3 | |
1860 movdqu 32(%rdi),%xmm4 | |
1861 pxor %xmm10,%xmm2 | |
1862 movdqu 48(%rdi),%xmm5 | |
1863 pxor %xmm11,%xmm3 | |
1864 movdqu 64(%rdi),%xmm6 | |
1865 leaq 80(%rdi),%rdi | |
1866 pxor %xmm12,%xmm4 | |
1867 pxor %xmm13,%xmm5 | |
1868 pxor %xmm14,%xmm6 | |
1869 pxor %xmm7,%xmm7 | |
1870 | |
1871 call _aesni_encrypt6 | |
1872 | |
1873 xorps %xmm10,%xmm2 | |
1874 movdqa %xmm15,%xmm10 | |
1875 xorps %xmm11,%xmm3 | |
1876 xorps %xmm12,%xmm4 | |
1877 movdqu %xmm2,(%rsi) | |
1878 xorps %xmm13,%xmm5 | |
1879 movdqu %xmm3,16(%rsi) | |
1880 xorps %xmm14,%xmm6 | |
1881 movdqu %xmm4,32(%rsi) | |
1882 movdqu %xmm5,48(%rsi) | |
1883 movdqu %xmm6,64(%rsi) | |
1884 leaq 80(%rsi),%rsi | |
1885 jmp L$xts_enc_done | |
1886 | |
1887 .p2align 4 | |
1888 L$xts_enc_one: | |
1889 movups (%rdi),%xmm2 | |
1890 leaq 16(%rdi),%rdi | |
1891 xorps %xmm10,%xmm2 | |
1892 movups (%rcx),%xmm0 | |
1893 movups 16(%rcx),%xmm1 | |
1894 leaq 32(%rcx),%rcx | |
1895 xorps %xmm0,%xmm2 | |
1896 L$oop_enc1_9: | |
1897 .byte 102,15,56,220,209 | |
1898 decl %eax | |
1899 movups (%rcx),%xmm1 | |
1900 leaq 16(%rcx),%rcx | |
1901 jnz L$oop_enc1_9 | |
1902 .byte 102,15,56,221,209 | |
1903 xorps %xmm10,%xmm2 | |
1904 movdqa %xmm11,%xmm10 | |
1905 movups %xmm2,(%rsi) | |
1906 leaq 16(%rsi),%rsi | |
1907 jmp L$xts_enc_done | |
1908 | |
1909 .p2align 4 | |
1910 L$xts_enc_two: | |
1911 movups (%rdi),%xmm2 | |
1912 movups 16(%rdi),%xmm3 | |
1913 leaq 32(%rdi),%rdi | |
1914 xorps %xmm10,%xmm2 | |
1915 xorps %xmm11,%xmm3 | |
1916 | |
1917 call _aesni_encrypt2 | |
1918 | |
1919 xorps %xmm10,%xmm2 | |
1920 movdqa %xmm12,%xmm10 | |
1921 xorps %xmm11,%xmm3 | |
1922 movups %xmm2,(%rsi) | |
1923 movups %xmm3,16(%rsi) | |
1924 leaq 32(%rsi),%rsi | |
1925 jmp L$xts_enc_done | |
1926 | |
1927 .p2align 4 | |
1928 L$xts_enc_three: | |
1929 movups (%rdi),%xmm2 | |
1930 movups 16(%rdi),%xmm3 | |
1931 movups 32(%rdi),%xmm4 | |
1932 leaq 48(%rdi),%rdi | |
1933 xorps %xmm10,%xmm2 | |
1934 xorps %xmm11,%xmm3 | |
1935 xorps %xmm12,%xmm4 | |
1936 | |
1937 call _aesni_encrypt3 | |
1938 | |
1939 xorps %xmm10,%xmm2 | |
1940 movdqa %xmm13,%xmm10 | |
1941 xorps %xmm11,%xmm3 | |
1942 xorps %xmm12,%xmm4 | |
1943 movups %xmm2,(%rsi) | |
1944 movups %xmm3,16(%rsi) | |
1945 movups %xmm4,32(%rsi) | |
1946 leaq 48(%rsi),%rsi | |
1947 jmp L$xts_enc_done | |
1948 | |
1949 .p2align 4 | |
1950 L$xts_enc_four: | |
1951 movups (%rdi),%xmm2 | |
1952 movups 16(%rdi),%xmm3 | |
1953 movups 32(%rdi),%xmm4 | |
1954 xorps %xmm10,%xmm2 | |
1955 movups 48(%rdi),%xmm5 | |
1956 leaq 64(%rdi),%rdi | |
1957 xorps %xmm11,%xmm3 | |
1958 xorps %xmm12,%xmm4 | |
1959 xorps %xmm13,%xmm5 | |
1960 | |
1961 call _aesni_encrypt4 | |
1962 | |
1963 pxor %xmm10,%xmm2 | |
1964 movdqa %xmm14,%xmm10 | |
1965 pxor %xmm11,%xmm3 | |
1966 pxor %xmm12,%xmm4 | |
1967 movdqu %xmm2,(%rsi) | |
1968 pxor %xmm13,%xmm5 | |
1969 movdqu %xmm3,16(%rsi) | |
1970 movdqu %xmm4,32(%rsi) | |
1971 movdqu %xmm5,48(%rsi) | |
1972 leaq 64(%rsi),%rsi | |
1973 jmp L$xts_enc_done | |
1974 | |
1975 .p2align 4 | |
1976 L$xts_enc_done: | |
1977 andq $15,%r9 | |
1978 jz L$xts_enc_ret | |
1979 movq %r9,%rdx | |
1980 | |
1981 L$xts_enc_steal: | |
1982 movzbl (%rdi),%eax | |
1983 movzbl -16(%rsi),%ecx | |
1984 leaq 1(%rdi),%rdi | |
1985 movb %al,-16(%rsi) | |
1986 movb %cl,0(%rsi) | |
1987 leaq 1(%rsi),%rsi | |
1988 subq $1,%rdx | |
1989 jnz L$xts_enc_steal | |
1990 | |
1991 subq %r9,%rsi | |
1992 movq %r11,%rcx | |
1993 movl %r10d,%eax | |
1994 | |
1995 movups -16(%rsi),%xmm2 | |
1996 xorps %xmm10,%xmm2 | |
1997 movups (%rcx),%xmm0 | |
1998 movups 16(%rcx),%xmm1 | |
1999 leaq 32(%rcx),%rcx | |
2000 xorps %xmm0,%xmm2 | |
2001 L$oop_enc1_10: | |
2002 .byte 102,15,56,220,209 | |
2003 decl %eax | |
2004 movups (%rcx),%xmm1 | |
2005 leaq 16(%rcx),%rcx | |
2006 jnz L$oop_enc1_10 | |
2007 .byte 102,15,56,221,209 | |
2008 xorps %xmm10,%xmm2 | |
2009 movups %xmm2,-16(%rsi) | |
2010 | |
2011 L$xts_enc_ret: | |
2012 xorps %xmm0,%xmm0 | |
2013 pxor %xmm1,%xmm1 | |
2014 pxor %xmm2,%xmm2 | |
2015 pxor %xmm3,%xmm3 | |
2016 pxor %xmm4,%xmm4 | |
2017 pxor %xmm5,%xmm5 | |
2018 pxor %xmm6,%xmm6 | |
2019 pxor %xmm7,%xmm7 | |
2020 movaps %xmm0,0(%rsp) | |
2021 pxor %xmm8,%xmm8 | |
2022 movaps %xmm0,16(%rsp) | |
2023 pxor %xmm9,%xmm9 | |
2024 movaps %xmm0,32(%rsp) | |
2025 pxor %xmm10,%xmm10 | |
2026 movaps %xmm0,48(%rsp) | |
2027 pxor %xmm11,%xmm11 | |
2028 movaps %xmm0,64(%rsp) | |
2029 pxor %xmm12,%xmm12 | |
2030 movaps %xmm0,80(%rsp) | |
2031 pxor %xmm13,%xmm13 | |
2032 movaps %xmm0,96(%rsp) | |
2033 pxor %xmm14,%xmm14 | |
2034 pxor %xmm15,%xmm15 | |
2035 leaq (%rbp),%rsp | |
2036 popq %rbp | |
2037 L$xts_enc_epilogue: | |
2038 .byte 0xf3,0xc3 | |
2039 | |
2040 .globl _aesni_xts_decrypt | |
2041 .private_extern _aesni_xts_decrypt | |
2042 | |
2043 .p2align 4 | |
2044 _aesni_xts_decrypt: | |
2045 leaq (%rsp),%rax | |
2046 pushq %rbp | |
2047 subq $112,%rsp | |
2048 andq $-16,%rsp | |
2049 leaq -8(%rax),%rbp | |
2050 movups (%r9),%xmm2 | |
2051 movl 240(%r8),%eax | |
2052 movl 240(%rcx),%r10d | |
2053 movups (%r8),%xmm0 | |
2054 movups 16(%r8),%xmm1 | |
2055 leaq 32(%r8),%r8 | |
2056 xorps %xmm0,%xmm2 | |
2057 L$oop_enc1_11: | |
2058 .byte 102,15,56,220,209 | |
2059 decl %eax | |
2060 movups (%r8),%xmm1 | |
2061 leaq 16(%r8),%r8 | |
2062 jnz L$oop_enc1_11 | |
2063 .byte 102,15,56,221,209 | |
2064 xorl %eax,%eax | |
2065 testq $15,%rdx | |
2066 setnz %al | |
2067 shlq $4,%rax | |
2068 subq %rax,%rdx | |
2069 | |
2070 movups (%rcx),%xmm0 | |
2071 movq %rcx,%r11 | |
2072 movl %r10d,%eax | |
2073 shll $4,%r10d | |
2074 movq %rdx,%r9 | |
2075 andq $-16,%rdx | |
2076 | |
2077 movups 16(%rcx,%r10,1),%xmm1 | |
2078 | |
2079 movdqa L$xts_magic(%rip),%xmm8 | |
2080 movdqa %xmm2,%xmm15 | |
2081 pshufd $0x5f,%xmm2,%xmm9 | |
2082 pxor %xmm0,%xmm1 | |
2083 movdqa %xmm9,%xmm14 | |
2084 paddd %xmm9,%xmm9 | |
2085 movdqa %xmm15,%xmm10 | |
2086 psrad $31,%xmm14 | |
2087 paddq %xmm15,%xmm15 | |
2088 pand %xmm8,%xmm14 | |
2089 pxor %xmm0,%xmm10 | |
2090 pxor %xmm14,%xmm15 | |
2091 movdqa %xmm9,%xmm14 | |
2092 paddd %xmm9,%xmm9 | |
2093 movdqa %xmm15,%xmm11 | |
2094 psrad $31,%xmm14 | |
2095 paddq %xmm15,%xmm15 | |
2096 pand %xmm8,%xmm14 | |
2097 pxor %xmm0,%xmm11 | |
2098 pxor %xmm14,%xmm15 | |
2099 movdqa %xmm9,%xmm14 | |
2100 paddd %xmm9,%xmm9 | |
2101 movdqa %xmm15,%xmm12 | |
2102 psrad $31,%xmm14 | |
2103 paddq %xmm15,%xmm15 | |
2104 pand %xmm8,%xmm14 | |
2105 pxor %xmm0,%xmm12 | |
2106 pxor %xmm14,%xmm15 | |
2107 movdqa %xmm9,%xmm14 | |
2108 paddd %xmm9,%xmm9 | |
2109 movdqa %xmm15,%xmm13 | |
2110 psrad $31,%xmm14 | |
2111 paddq %xmm15,%xmm15 | |
2112 pand %xmm8,%xmm14 | |
2113 pxor %xmm0,%xmm13 | |
2114 pxor %xmm14,%xmm15 | |
2115 movdqa %xmm15,%xmm14 | |
2116 psrad $31,%xmm9 | |
2117 paddq %xmm15,%xmm15 | |
2118 pand %xmm8,%xmm9 | |
2119 pxor %xmm0,%xmm14 | |
2120 pxor %xmm9,%xmm15 | |
2121 movaps %xmm1,96(%rsp) | |
2122 | |
2123 subq $96,%rdx | |
2124 jc L$xts_dec_short | |
2125 | |
2126 movl $16+96,%eax | |
2127 leaq 32(%r11,%r10,1),%rcx | |
2128 subq %r10,%rax | |
2129 movups 16(%r11),%xmm1 | |
2130 movq %rax,%r10 | |
2131 leaq L$xts_magic(%rip),%r8 | |
2132 jmp L$xts_dec_grandloop | |
2133 | |
2134 .p2align 5 | |
2135 L$xts_dec_grandloop: | |
2136 movdqu 0(%rdi),%xmm2 | |
2137 movdqa %xmm0,%xmm8 | |
2138 movdqu 16(%rdi),%xmm3 | |
2139 pxor %xmm10,%xmm2 | |
2140 movdqu 32(%rdi),%xmm4 | |
2141 pxor %xmm11,%xmm3 | |
2142 .byte 102,15,56,222,209 | |
2143 movdqu 48(%rdi),%xmm5 | |
2144 pxor %xmm12,%xmm4 | |
2145 .byte 102,15,56,222,217 | |
2146 movdqu 64(%rdi),%xmm6 | |
2147 pxor %xmm13,%xmm5 | |
2148 .byte 102,15,56,222,225 | |
2149 movdqu 80(%rdi),%xmm7 | |
2150 pxor %xmm15,%xmm8 | |
2151 movdqa 96(%rsp),%xmm9 | |
2152 pxor %xmm14,%xmm6 | |
2153 .byte 102,15,56,222,233 | |
2154 movups 32(%r11),%xmm0 | |
2155 leaq 96(%rdi),%rdi | |
2156 pxor %xmm8,%xmm7 | |
2157 | |
2158 pxor %xmm9,%xmm10 | |
2159 .byte 102,15,56,222,241 | |
2160 pxor %xmm9,%xmm11 | |
2161 movdqa %xmm10,0(%rsp) | |
2162 .byte 102,15,56,222,249 | |
2163 movups 48(%r11),%xmm1 | |
2164 pxor %xmm9,%xmm12 | |
2165 | |
2166 .byte 102,15,56,222,208 | |
2167 pxor %xmm9,%xmm13 | |
2168 movdqa %xmm11,16(%rsp) | |
2169 .byte 102,15,56,222,216 | |
2170 pxor %xmm9,%xmm14 | |
2171 movdqa %xmm12,32(%rsp) | |
2172 .byte 102,15,56,222,224 | |
2173 .byte 102,15,56,222,232 | |
2174 pxor %xmm9,%xmm8 | |
2175 movdqa %xmm14,64(%rsp) | |
2176 .byte 102,15,56,222,240 | |
2177 .byte 102,15,56,222,248 | |
2178 movups 64(%r11),%xmm0 | |
2179 movdqa %xmm8,80(%rsp) | |
2180 pshufd $0x5f,%xmm15,%xmm9 | |
2181 jmp L$xts_dec_loop6 | |
2182 .p2align 5 | |
2183 L$xts_dec_loop6: | |
2184 .byte 102,15,56,222,209 | |
2185 .byte 102,15,56,222,217 | |
2186 .byte 102,15,56,222,225 | |
2187 .byte 102,15,56,222,233 | |
2188 .byte 102,15,56,222,241 | |
2189 .byte 102,15,56,222,249 | |
2190 movups -64(%rcx,%rax,1),%xmm1 | |
2191 addq $32,%rax | |
2192 | |
2193 .byte 102,15,56,222,208 | |
2194 .byte 102,15,56,222,216 | |
2195 .byte 102,15,56,222,224 | |
2196 .byte 102,15,56,222,232 | |
2197 .byte 102,15,56,222,240 | |
2198 .byte 102,15,56,222,248 | |
2199 movups -80(%rcx,%rax,1),%xmm0 | |
2200 jnz L$xts_dec_loop6 | |
2201 | |
2202 movdqa (%r8),%xmm8 | |
2203 movdqa %xmm9,%xmm14 | |
2204 paddd %xmm9,%xmm9 | |
2205 .byte 102,15,56,222,209 | |
2206 paddq %xmm15,%xmm15 | |
2207 psrad $31,%xmm14 | |
2208 .byte 102,15,56,222,217 | |
2209 pand %xmm8,%xmm14 | |
2210 movups (%r11),%xmm10 | |
2211 .byte 102,15,56,222,225 | |
2212 .byte 102,15,56,222,233 | |
2213 .byte 102,15,56,222,241 | |
2214 pxor %xmm14,%xmm15 | |
2215 movaps %xmm10,%xmm11 | |
2216 .byte 102,15,56,222,249 | |
2217 movups -64(%rcx),%xmm1 | |
2218 | |
2219 movdqa %xmm9,%xmm14 | |
2220 .byte 102,15,56,222,208 | |
2221 paddd %xmm9,%xmm9 | |
2222 pxor %xmm15,%xmm10 | |
2223 .byte 102,15,56,222,216 | |
2224 psrad $31,%xmm14 | |
2225 paddq %xmm15,%xmm15 | |
2226 .byte 102,15,56,222,224 | |
2227 .byte 102,15,56,222,232 | |
2228 pand %xmm8,%xmm14 | |
2229 movaps %xmm11,%xmm12 | |
2230 .byte 102,15,56,222,240 | |
2231 pxor %xmm14,%xmm15 | |
2232 movdqa %xmm9,%xmm14 | |
2233 .byte 102,15,56,222,248 | |
2234 movups -48(%rcx),%xmm0 | |
2235 | |
2236 paddd %xmm9,%xmm9 | |
2237 .byte 102,15,56,222,209 | |
2238 pxor %xmm15,%xmm11 | |
2239 psrad $31,%xmm14 | |
2240 .byte 102,15,56,222,217 | |
2241 paddq %xmm15,%xmm15 | |
2242 pand %xmm8,%xmm14 | |
2243 .byte 102,15,56,222,225 | |
2244 .byte 102,15,56,222,233 | |
2245 movdqa %xmm13,48(%rsp) | |
2246 pxor %xmm14,%xmm15 | |
2247 .byte 102,15,56,222,241 | |
2248 movaps %xmm12,%xmm13 | |
2249 movdqa %xmm9,%xmm14 | |
2250 .byte 102,15,56,222,249 | |
2251 movups -32(%rcx),%xmm1 | |
2252 | |
2253 paddd %xmm9,%xmm9 | |
2254 .byte 102,15,56,222,208 | |
2255 pxor %xmm15,%xmm12 | |
2256 psrad $31,%xmm14 | |
2257 .byte 102,15,56,222,216 | |
2258 paddq %xmm15,%xmm15 | |
2259 pand %xmm8,%xmm14 | |
2260 .byte 102,15,56,222,224 | |
2261 .byte 102,15,56,222,232 | |
2262 .byte 102,15,56,222,240 | |
2263 pxor %xmm14,%xmm15 | |
2264 movaps %xmm13,%xmm14 | |
2265 .byte 102,15,56,222,248 | |
2266 | |
2267 movdqa %xmm9,%xmm0 | |
2268 paddd %xmm9,%xmm9 | |
2269 .byte 102,15,56,222,209 | |
2270 pxor %xmm15,%xmm13 | |
2271 psrad $31,%xmm0 | |
2272 .byte 102,15,56,222,217 | |
2273 paddq %xmm15,%xmm15 | |
2274 pand %xmm8,%xmm0 | |
2275 .byte 102,15,56,222,225 | |
2276 .byte 102,15,56,222,233 | |
2277 pxor %xmm0,%xmm15 | |
2278 movups (%r11),%xmm0 | |
2279 .byte 102,15,56,222,241 | |
2280 .byte 102,15,56,222,249 | |
2281 movups 16(%r11),%xmm1 | |
2282 | |
2283 pxor %xmm15,%xmm14 | |
2284 .byte 102,15,56,223,84,36,0 | |
2285 psrad $31,%xmm9 | |
2286 paddq %xmm15,%xmm15 | |
2287 .byte 102,15,56,223,92,36,16 | |
2288 .byte 102,15,56,223,100,36,32 | |
2289 pand %xmm8,%xmm9 | |
2290 movq %r10,%rax | |
2291 .byte 102,15,56,223,108,36,48 | |
2292 .byte 102,15,56,223,116,36,64 | |
2293 .byte 102,15,56,223,124,36,80 | |
2294 pxor %xmm9,%xmm15 | |
2295 | |
2296 leaq 96(%rsi),%rsi | |
2297 movups %xmm2,-96(%rsi) | |
2298 movups %xmm3,-80(%rsi) | |
2299 movups %xmm4,-64(%rsi) | |
2300 movups %xmm5,-48(%rsi) | |
2301 movups %xmm6,-32(%rsi) | |
2302 movups %xmm7,-16(%rsi) | |
2303 subq $96,%rdx | |
2304 jnc L$xts_dec_grandloop | |
2305 | |
2306 movl $16+96,%eax | |
2307 subl %r10d,%eax | |
2308 movq %r11,%rcx | |
2309 shrl $4,%eax | |
2310 | |
2311 L$xts_dec_short: | |
2312 | |
2313 movl %eax,%r10d | |
2314 pxor %xmm0,%xmm10 | |
2315 pxor %xmm0,%xmm11 | |
2316 addq $96,%rdx | |
2317 jz L$xts_dec_done | |
2318 | |
2319 pxor %xmm0,%xmm12 | |
2320 cmpq $0x20,%rdx | |
2321 jb L$xts_dec_one | |
2322 pxor %xmm0,%xmm13 | |
2323 je L$xts_dec_two | |
2324 | |
2325 pxor %xmm0,%xmm14 | |
2326 cmpq $0x40,%rdx | |
2327 jb L$xts_dec_three | |
2328 je L$xts_dec_four | |
2329 | |
2330 movdqu (%rdi),%xmm2 | |
2331 movdqu 16(%rdi),%xmm3 | |
2332 movdqu 32(%rdi),%xmm4 | |
2333 pxor %xmm10,%xmm2 | |
2334 movdqu 48(%rdi),%xmm5 | |
2335 pxor %xmm11,%xmm3 | |
2336 movdqu 64(%rdi),%xmm6 | |
2337 leaq 80(%rdi),%rdi | |
2338 pxor %xmm12,%xmm4 | |
2339 pxor %xmm13,%xmm5 | |
2340 pxor %xmm14,%xmm6 | |
2341 | |
2342 call _aesni_decrypt6 | |
2343 | |
2344 xorps %xmm10,%xmm2 | |
2345 xorps %xmm11,%xmm3 | |
2346 xorps %xmm12,%xmm4 | |
2347 movdqu %xmm2,(%rsi) | |
2348 xorps %xmm13,%xmm5 | |
2349 movdqu %xmm3,16(%rsi) | |
2350 xorps %xmm14,%xmm6 | |
2351 movdqu %xmm4,32(%rsi) | |
2352 pxor %xmm14,%xmm14 | |
2353 movdqu %xmm5,48(%rsi) | |
2354 pcmpgtd %xmm15,%xmm14 | |
2355 movdqu %xmm6,64(%rsi) | |
2356 leaq 80(%rsi),%rsi | |
2357 pshufd $0x13,%xmm14,%xmm11 | |
2358 andq $15,%r9 | |
2359 jz L$xts_dec_ret | |
2360 | |
2361 movdqa %xmm15,%xmm10 | |
2362 paddq %xmm15,%xmm15 | |
2363 pand %xmm8,%xmm11 | |
2364 pxor %xmm15,%xmm11 | |
2365 jmp L$xts_dec_done2 | |
2366 | |
2367 .p2align 4 | |
2368 L$xts_dec_one: | |
2369 movups (%rdi),%xmm2 | |
2370 leaq 16(%rdi),%rdi | |
2371 xorps %xmm10,%xmm2 | |
2372 movups (%rcx),%xmm0 | |
2373 movups 16(%rcx),%xmm1 | |
2374 leaq 32(%rcx),%rcx | |
2375 xorps %xmm0,%xmm2 | |
2376 L$oop_dec1_12: | |
2377 .byte 102,15,56,222,209 | |
2378 decl %eax | |
2379 movups (%rcx),%xmm1 | |
2380 leaq 16(%rcx),%rcx | |
2381 jnz L$oop_dec1_12 | |
2382 .byte 102,15,56,223,209 | |
2383 xorps %xmm10,%xmm2 | |
2384 movdqa %xmm11,%xmm10 | |
2385 movups %xmm2,(%rsi) | |
2386 movdqa %xmm12,%xmm11 | |
2387 leaq 16(%rsi),%rsi | |
2388 jmp L$xts_dec_done | |
2389 | |
2390 .p2align 4 | |
2391 L$xts_dec_two: | |
2392 movups (%rdi),%xmm2 | |
2393 movups 16(%rdi),%xmm3 | |
2394 leaq 32(%rdi),%rdi | |
2395 xorps %xmm10,%xmm2 | |
2396 xorps %xmm11,%xmm3 | |
2397 | |
2398 call _aesni_decrypt2 | |
2399 | |
2400 xorps %xmm10,%xmm2 | |
2401 movdqa %xmm12,%xmm10 | |
2402 xorps %xmm11,%xmm3 | |
2403 movdqa %xmm13,%xmm11 | |
2404 movups %xmm2,(%rsi) | |
2405 movups %xmm3,16(%rsi) | |
2406 leaq 32(%rsi),%rsi | |
2407 jmp L$xts_dec_done | |
2408 | |
2409 .p2align 4 | |
2410 L$xts_dec_three: | |
2411 movups (%rdi),%xmm2 | |
2412 movups 16(%rdi),%xmm3 | |
2413 movups 32(%rdi),%xmm4 | |
2414 leaq 48(%rdi),%rdi | |
2415 xorps %xmm10,%xmm2 | |
2416 xorps %xmm11,%xmm3 | |
2417 xorps %xmm12,%xmm4 | |
2418 | |
2419 call _aesni_decrypt3 | |
2420 | |
2421 xorps %xmm10,%xmm2 | |
2422 movdqa %xmm13,%xmm10 | |
2423 xorps %xmm11,%xmm3 | |
2424 movdqa %xmm14,%xmm11 | |
2425 xorps %xmm12,%xmm4 | |
2426 movups %xmm2,(%rsi) | |
2427 movups %xmm3,16(%rsi) | |
2428 movups %xmm4,32(%rsi) | |
2429 leaq 48(%rsi),%rsi | |
2430 jmp L$xts_dec_done | |
2431 | |
2432 .p2align 4 | |
2433 L$xts_dec_four: | |
2434 movups (%rdi),%xmm2 | |
2435 movups 16(%rdi),%xmm3 | |
2436 movups 32(%rdi),%xmm4 | |
2437 xorps %xmm10,%xmm2 | |
2438 movups 48(%rdi),%xmm5 | |
2439 leaq 64(%rdi),%rdi | |
2440 xorps %xmm11,%xmm3 | |
2441 xorps %xmm12,%xmm4 | |
2442 xorps %xmm13,%xmm5 | |
2443 | |
2444 call _aesni_decrypt4 | |
2445 | |
2446 pxor %xmm10,%xmm2 | |
2447 movdqa %xmm14,%xmm10 | |
2448 pxor %xmm11,%xmm3 | |
2449 movdqa %xmm15,%xmm11 | |
2450 pxor %xmm12,%xmm4 | |
2451 movdqu %xmm2,(%rsi) | |
2452 pxor %xmm13,%xmm5 | |
2453 movdqu %xmm3,16(%rsi) | |
2454 movdqu %xmm4,32(%rsi) | |
2455 movdqu %xmm5,48(%rsi) | |
2456 leaq 64(%rsi),%rsi | |
2457 jmp L$xts_dec_done | |
2458 | |
2459 .p2align 4 | |
2460 L$xts_dec_done: | |
2461 andq $15,%r9 | |
2462 jz L$xts_dec_ret | |
2463 L$xts_dec_done2: | |
2464 movq %r9,%rdx | |
2465 movq %r11,%rcx | |
2466 movl %r10d,%eax | |
2467 | |
2468 movups (%rdi),%xmm2 | |
2469 xorps %xmm11,%xmm2 | |
2470 movups (%rcx),%xmm0 | |
2471 movups 16(%rcx),%xmm1 | |
2472 leaq 32(%rcx),%rcx | |
2473 xorps %xmm0,%xmm2 | |
2474 L$oop_dec1_13: | |
2475 .byte 102,15,56,222,209 | |
2476 decl %eax | |
2477 movups (%rcx),%xmm1 | |
2478 leaq 16(%rcx),%rcx | |
2479 jnz L$oop_dec1_13 | |
2480 .byte 102,15,56,223,209 | |
2481 xorps %xmm11,%xmm2 | |
2482 movups %xmm2,(%rsi) | |
2483 | |
2484 L$xts_dec_steal: | |
2485 movzbl 16(%rdi),%eax | |
2486 movzbl (%rsi),%ecx | |
2487 leaq 1(%rdi),%rdi | |
2488 movb %al,(%rsi) | |
2489 movb %cl,16(%rsi) | |
2490 leaq 1(%rsi),%rsi | |
2491 subq $1,%rdx | |
2492 jnz L$xts_dec_steal | |
2493 | |
2494 subq %r9,%rsi | |
2495 movq %r11,%rcx | |
2496 movl %r10d,%eax | |
2497 | |
2498 movups (%rsi),%xmm2 | |
2499 xorps %xmm10,%xmm2 | |
2500 movups (%rcx),%xmm0 | |
2501 movups 16(%rcx),%xmm1 | |
2502 leaq 32(%rcx),%rcx | |
2503 xorps %xmm0,%xmm2 | |
2504 L$oop_dec1_14: | |
2505 .byte 102,15,56,222,209 | |
2506 decl %eax | |
2507 movups (%rcx),%xmm1 | |
2508 leaq 16(%rcx),%rcx | |
2509 jnz L$oop_dec1_14 | |
2510 .byte 102,15,56,223,209 | |
2511 xorps %xmm10,%xmm2 | |
2512 movups %xmm2,(%rsi) | |
2513 | |
2514 L$xts_dec_ret: | |
2515 xorps %xmm0,%xmm0 | |
2516 pxor %xmm1,%xmm1 | |
2517 pxor %xmm2,%xmm2 | |
2518 pxor %xmm3,%xmm3 | |
2519 pxor %xmm4,%xmm4 | |
2520 pxor %xmm5,%xmm5 | |
2521 pxor %xmm6,%xmm6 | |
2522 pxor %xmm7,%xmm7 | |
2523 movaps %xmm0,0(%rsp) | |
2524 pxor %xmm8,%xmm8 | |
2525 movaps %xmm0,16(%rsp) | |
2526 pxor %xmm9,%xmm9 | |
2527 movaps %xmm0,32(%rsp) | |
2528 pxor %xmm10,%xmm10 | |
2529 movaps %xmm0,48(%rsp) | |
2530 pxor %xmm11,%xmm11 | |
2531 movaps %xmm0,64(%rsp) | |
2532 pxor %xmm12,%xmm12 | |
2533 movaps %xmm0,80(%rsp) | |
2534 pxor %xmm13,%xmm13 | |
2535 movaps %xmm0,96(%rsp) | |
2536 pxor %xmm14,%xmm14 | |
2537 pxor %xmm15,%xmm15 | |
2538 leaq (%rbp),%rsp | |
2539 popq %rbp | |
2540 L$xts_dec_epilogue: | |
2541 .byte 0xf3,0xc3 | |
2542 | |
2543 .globl _aesni_cbc_encrypt | |
2544 .private_extern _aesni_cbc_encrypt | |
2545 | |
2546 .p2align 4 | |
2547 _aesni_cbc_encrypt: | |
2548 testq %rdx,%rdx | |
2549 jz L$cbc_ret | |
2550 | |
2551 movl 240(%rcx),%r10d | |
2552 movq %rcx,%r11 | |
2553 testl %r9d,%r9d | |
2554 jz L$cbc_decrypt | |
2555 | |
2556 movups (%r8),%xmm2 | |
2557 movl %r10d,%eax | |
2558 cmpq $16,%rdx | |
2559 jb L$cbc_enc_tail | |
2560 subq $16,%rdx | |
2561 jmp L$cbc_enc_loop | |
2562 .p2align 4 | |
2563 L$cbc_enc_loop: | |
2564 movups (%rdi),%xmm3 | |
2565 leaq 16(%rdi),%rdi | |
2566 | |
2567 movups (%rcx),%xmm0 | |
2568 movups 16(%rcx),%xmm1 | |
2569 xorps %xmm0,%xmm3 | |
2570 leaq 32(%rcx),%rcx | |
2571 xorps %xmm3,%xmm2 | |
2572 L$oop_enc1_15: | |
2573 .byte 102,15,56,220,209 | |
2574 decl %eax | |
2575 movups (%rcx),%xmm1 | |
2576 leaq 16(%rcx),%rcx | |
2577 jnz L$oop_enc1_15 | |
2578 .byte 102,15,56,221,209 | |
2579 movl %r10d,%eax | |
2580 movq %r11,%rcx | |
2581 movups %xmm2,0(%rsi) | |
2582 leaq 16(%rsi),%rsi | |
2583 subq $16,%rdx | |
2584 jnc L$cbc_enc_loop | |
2585 addq $16,%rdx | |
2586 jnz L$cbc_enc_tail | |
2587 pxor %xmm0,%xmm0 | |
2588 pxor %xmm1,%xmm1 | |
2589 movups %xmm2,(%r8) | |
2590 pxor %xmm2,%xmm2 | |
2591 pxor %xmm3,%xmm3 | |
2592 jmp L$cbc_ret | |
2593 | |
2594 L$cbc_enc_tail: | |
2595 movq %rdx,%rcx | |
2596 xchgq %rdi,%rsi | |
2597 .long 0x9066A4F3 | |
2598 movl $16,%ecx | |
2599 subq %rdx,%rcx | |
2600 xorl %eax,%eax | |
2601 .long 0x9066AAF3 | |
2602 leaq -16(%rdi),%rdi | |
2603 movl %r10d,%eax | |
2604 movq %rdi,%rsi | |
2605 movq %r11,%rcx | |
2606 xorq %rdx,%rdx | |
2607 jmp L$cbc_enc_loop | |
2608 | |
2609 .p2align 4 | |
2610 L$cbc_decrypt: | |
2611 cmpq $16,%rdx | |
2612 jne L$cbc_decrypt_bulk | |
2613 | |
2614 | |
2615 | |
2616 movdqu (%rdi),%xmm2 | |
2617 movdqu (%r8),%xmm3 | |
2618 movdqa %xmm2,%xmm4 | |
2619 movups (%rcx),%xmm0 | |
2620 movups 16(%rcx),%xmm1 | |
2621 leaq 32(%rcx),%rcx | |
2622 xorps %xmm0,%xmm2 | |
2623 L$oop_dec1_16: | |
2624 .byte 102,15,56,222,209 | |
2625 decl %r10d | |
2626 movups (%rcx),%xmm1 | |
2627 leaq 16(%rcx),%rcx | |
2628 jnz L$oop_dec1_16 | |
2629 .byte 102,15,56,223,209 | |
2630 pxor %xmm0,%xmm0 | |
2631 pxor %xmm1,%xmm1 | |
2632 movdqu %xmm4,(%r8) | |
2633 xorps %xmm3,%xmm2 | |
2634 pxor %xmm3,%xmm3 | |
2635 movups %xmm2,(%rsi) | |
2636 pxor %xmm2,%xmm2 | |
2637 jmp L$cbc_ret | |
2638 .p2align 4 | |
2639 L$cbc_decrypt_bulk: | |
2640 leaq (%rsp),%rax | |
2641 pushq %rbp | |
2642 subq $16,%rsp | |
2643 andq $-16,%rsp | |
2644 leaq -8(%rax),%rbp | |
2645 movups (%r8),%xmm10 | |
2646 movl %r10d,%eax | |
2647 cmpq $0x50,%rdx | |
2648 jbe L$cbc_dec_tail | |
2649 | |
2650 movups (%rcx),%xmm0 | |
2651 movdqu 0(%rdi),%xmm2 | |
2652 movdqu 16(%rdi),%xmm3 | |
2653 movdqa %xmm2,%xmm11 | |
2654 movdqu 32(%rdi),%xmm4 | |
2655 movdqa %xmm3,%xmm12 | |
2656 movdqu 48(%rdi),%xmm5 | |
2657 movdqa %xmm4,%xmm13 | |
2658 movdqu 64(%rdi),%xmm6 | |
2659 movdqa %xmm5,%xmm14 | |
2660 movdqu 80(%rdi),%xmm7 | |
2661 movdqa %xmm6,%xmm15 | |
2662 movl _OPENSSL_ia32cap_P+4(%rip),%r9d | |
2663 cmpq $0x70,%rdx | |
2664 jbe L$cbc_dec_six_or_seven | |
2665 | |
2666 andl $71303168,%r9d | |
2667 subq $0x50,%rdx | |
2668 cmpl $4194304,%r9d | |
2669 je L$cbc_dec_loop6_enter | |
2670 subq $0x20,%rdx | |
2671 leaq 112(%rcx),%rcx | |
2672 jmp L$cbc_dec_loop8_enter | |
2673 .p2align 4 | |
2674 L$cbc_dec_loop8: | |
2675 movups %xmm9,(%rsi) | |
2676 leaq 16(%rsi),%rsi | |
2677 L$cbc_dec_loop8_enter: | |
2678 movdqu 96(%rdi),%xmm8 | |
2679 pxor %xmm0,%xmm2 | |
2680 movdqu 112(%rdi),%xmm9 | |
2681 pxor %xmm0,%xmm3 | |
2682 movups 16-112(%rcx),%xmm1 | |
2683 pxor %xmm0,%xmm4 | |
2684 xorq %r11,%r11 | |
2685 cmpq $0x70,%rdx | |
2686 pxor %xmm0,%xmm5 | |
2687 pxor %xmm0,%xmm6 | |
2688 pxor %xmm0,%xmm7 | |
2689 pxor %xmm0,%xmm8 | |
2690 | |
2691 .byte 102,15,56,222,209 | |
2692 pxor %xmm0,%xmm9 | |
2693 movups 32-112(%rcx),%xmm0 | |
2694 .byte 102,15,56,222,217 | |
2695 .byte 102,15,56,222,225 | |
2696 .byte 102,15,56,222,233 | |
2697 .byte 102,15,56,222,241 | |
2698 .byte 102,15,56,222,249 | |
2699 .byte 102,68,15,56,222,193 | |
2700 setnc %r11b | |
2701 shlq $7,%r11 | |
2702 .byte 102,68,15,56,222,201 | |
2703 addq %rdi,%r11 | |
2704 movups 48-112(%rcx),%xmm1 | |
2705 .byte 102,15,56,222,208 | |
2706 .byte 102,15,56,222,216 | |
2707 .byte 102,15,56,222,224 | |
2708 .byte 102,15,56,222,232 | |
2709 .byte 102,15,56,222,240 | |
2710 .byte 102,15,56,222,248 | |
2711 .byte 102,68,15,56,222,192 | |
2712 .byte 102,68,15,56,222,200 | |
2713 movups 64-112(%rcx),%xmm0 | |
2714 nop | |
2715 .byte 102,15,56,222,209 | |
2716 .byte 102,15,56,222,217 | |
2717 .byte 102,15,56,222,225 | |
2718 .byte 102,15,56,222,233 | |
2719 .byte 102,15,56,222,241 | |
2720 .byte 102,15,56,222,249 | |
2721 .byte 102,68,15,56,222,193 | |
2722 .byte 102,68,15,56,222,201 | |
2723 movups 80-112(%rcx),%xmm1 | |
2724 nop | |
2725 .byte 102,15,56,222,208 | |
2726 .byte 102,15,56,222,216 | |
2727 .byte 102,15,56,222,224 | |
2728 .byte 102,15,56,222,232 | |
2729 .byte 102,15,56,222,240 | |
2730 .byte 102,15,56,222,248 | |
2731 .byte 102,68,15,56,222,192 | |
2732 .byte 102,68,15,56,222,200 | |
2733 movups 96-112(%rcx),%xmm0 | |
2734 nop | |
2735 .byte 102,15,56,222,209 | |
2736 .byte 102,15,56,222,217 | |
2737 .byte 102,15,56,222,225 | |
2738 .byte 102,15,56,222,233 | |
2739 .byte 102,15,56,222,241 | |
2740 .byte 102,15,56,222,249 | |
2741 .byte 102,68,15,56,222,193 | |
2742 .byte 102,68,15,56,222,201 | |
2743 movups 112-112(%rcx),%xmm1 | |
2744 nop | |
2745 .byte 102,15,56,222,208 | |
2746 .byte 102,15,56,222,216 | |
2747 .byte 102,15,56,222,224 | |
2748 .byte 102,15,56,222,232 | |
2749 .byte 102,15,56,222,240 | |
2750 .byte 102,15,56,222,248 | |
2751 .byte 102,68,15,56,222,192 | |
2752 .byte 102,68,15,56,222,200 | |
2753 movups 128-112(%rcx),%xmm0 | |
2754 nop | |
2755 .byte 102,15,56,222,209 | |
2756 .byte 102,15,56,222,217 | |
2757 .byte 102,15,56,222,225 | |
2758 .byte 102,15,56,222,233 | |
2759 .byte 102,15,56,222,241 | |
2760 .byte 102,15,56,222,249 | |
2761 .byte 102,68,15,56,222,193 | |
2762 .byte 102,68,15,56,222,201 | |
2763 movups 144-112(%rcx),%xmm1 | |
2764 cmpl $11,%eax | |
2765 .byte 102,15,56,222,208 | |
2766 .byte 102,15,56,222,216 | |
2767 .byte 102,15,56,222,224 | |
2768 .byte 102,15,56,222,232 | |
2769 .byte 102,15,56,222,240 | |
2770 .byte 102,15,56,222,248 | |
2771 .byte 102,68,15,56,222,192 | |
2772 .byte 102,68,15,56,222,200 | |
2773 movups 160-112(%rcx),%xmm0 | |
2774 jb L$cbc_dec_done | |
2775 .byte 102,15,56,222,209 | |
2776 .byte 102,15,56,222,217 | |
2777 .byte 102,15,56,222,225 | |
2778 .byte 102,15,56,222,233 | |
2779 .byte 102,15,56,222,241 | |
2780 .byte 102,15,56,222,249 | |
2781 .byte 102,68,15,56,222,193 | |
2782 .byte 102,68,15,56,222,201 | |
2783 movups 176-112(%rcx),%xmm1 | |
2784 nop | |
2785 .byte 102,15,56,222,208 | |
2786 .byte 102,15,56,222,216 | |
2787 .byte 102,15,56,222,224 | |
2788 .byte 102,15,56,222,232 | |
2789 .byte 102,15,56,222,240 | |
2790 .byte 102,15,56,222,248 | |
2791 .byte 102,68,15,56,222,192 | |
2792 .byte 102,68,15,56,222,200 | |
2793 movups 192-112(%rcx),%xmm0 | |
2794 je L$cbc_dec_done | |
2795 .byte 102,15,56,222,209 | |
2796 .byte 102,15,56,222,217 | |
2797 .byte 102,15,56,222,225 | |
2798 .byte 102,15,56,222,233 | |
2799 .byte 102,15,56,222,241 | |
2800 .byte 102,15,56,222,249 | |
2801 .byte 102,68,15,56,222,193 | |
2802 .byte 102,68,15,56,222,201 | |
2803 movups 208-112(%rcx),%xmm1 | |
2804 nop | |
2805 .byte 102,15,56,222,208 | |
2806 .byte 102,15,56,222,216 | |
2807 .byte 102,15,56,222,224 | |
2808 .byte 102,15,56,222,232 | |
2809 .byte 102,15,56,222,240 | |
2810 .byte 102,15,56,222,248 | |
2811 .byte 102,68,15,56,222,192 | |
2812 .byte 102,68,15,56,222,200 | |
2813 movups 224-112(%rcx),%xmm0 | |
2814 jmp L$cbc_dec_done | |
2815 .p2align 4 | |
2816 L$cbc_dec_done: | |
2817 .byte 102,15,56,222,209 | |
2818 .byte 102,15,56,222,217 | |
2819 pxor %xmm0,%xmm10 | |
2820 pxor %xmm0,%xmm11 | |
2821 .byte 102,15,56,222,225 | |
2822 .byte 102,15,56,222,233 | |
2823 pxor %xmm0,%xmm12 | |
2824 pxor %xmm0,%xmm13 | |
2825 .byte 102,15,56,222,241 | |
2826 .byte 102,15,56,222,249 | |
2827 pxor %xmm0,%xmm14 | |
2828 pxor %xmm0,%xmm15 | |
2829 .byte 102,68,15,56,222,193 | |
2830 .byte 102,68,15,56,222,201 | |
2831 movdqu 80(%rdi),%xmm1 | |
2832 | |
2833 .byte 102,65,15,56,223,210 | |
2834 movdqu 96(%rdi),%xmm10 | |
2835 pxor %xmm0,%xmm1 | |
2836 .byte 102,65,15,56,223,219 | |
2837 pxor %xmm0,%xmm10 | |
2838 movdqu 112(%rdi),%xmm0 | |
2839 .byte 102,65,15,56,223,228 | |
2840 leaq 128(%rdi),%rdi | |
2841 movdqu 0(%r11),%xmm11 | |
2842 .byte 102,65,15,56,223,237 | |
2843 .byte 102,65,15,56,223,246 | |
2844 movdqu 16(%r11),%xmm12 | |
2845 movdqu 32(%r11),%xmm13 | |
2846 .byte 102,65,15,56,223,255 | |
2847 .byte 102,68,15,56,223,193 | |
2848 movdqu 48(%r11),%xmm14 | |
2849 movdqu 64(%r11),%xmm15 | |
2850 .byte 102,69,15,56,223,202 | |
2851 movdqa %xmm0,%xmm10 | |
2852 movdqu 80(%r11),%xmm1 | |
2853 movups -112(%rcx),%xmm0 | |
2854 | |
2855 movups %xmm2,(%rsi) | |
2856 movdqa %xmm11,%xmm2 | |
2857 movups %xmm3,16(%rsi) | |
2858 movdqa %xmm12,%xmm3 | |
2859 movups %xmm4,32(%rsi) | |
2860 movdqa %xmm13,%xmm4 | |
2861 movups %xmm5,48(%rsi) | |
2862 movdqa %xmm14,%xmm5 | |
2863 movups %xmm6,64(%rsi) | |
2864 movdqa %xmm15,%xmm6 | |
2865 movups %xmm7,80(%rsi) | |
2866 movdqa %xmm1,%xmm7 | |
2867 movups %xmm8,96(%rsi) | |
2868 leaq 112(%rsi),%rsi | |
2869 | |
2870 subq $0x80,%rdx | |
2871 ja L$cbc_dec_loop8 | |
2872 | |
2873 movaps %xmm9,%xmm2 | |
2874 leaq -112(%rcx),%rcx | |
2875 addq $0x70,%rdx | |
2876 jle L$cbc_dec_clear_tail_collected | |
2877 movups %xmm9,(%rsi) | |
2878 leaq 16(%rsi),%rsi | |
2879 cmpq $0x50,%rdx | |
2880 jbe L$cbc_dec_tail | |
2881 | |
2882 movaps %xmm11,%xmm2 | |
2883 L$cbc_dec_six_or_seven: | |
2884 cmpq $0x60,%rdx | |
2885 ja L$cbc_dec_seven | |
2886 | |
2887 movaps %xmm7,%xmm8 | |
2888 call _aesni_decrypt6 | |
2889 pxor %xmm10,%xmm2 | |
2890 movaps %xmm8,%xmm10 | |
2891 pxor %xmm11,%xmm3 | |
2892 movdqu %xmm2,(%rsi) | |
2893 pxor %xmm12,%xmm4 | |
2894 movdqu %xmm3,16(%rsi) | |
2895 pxor %xmm3,%xmm3 | |
2896 pxor %xmm13,%xmm5 | |
2897 movdqu %xmm4,32(%rsi) | |
2898 pxor %xmm4,%xmm4 | |
2899 pxor %xmm14,%xmm6 | |
2900 movdqu %xmm5,48(%rsi) | |
2901 pxor %xmm5,%xmm5 | |
2902 pxor %xmm15,%xmm7 | |
2903 movdqu %xmm6,64(%rsi) | |
2904 pxor %xmm6,%xmm6 | |
2905 leaq 80(%rsi),%rsi | |
2906 movdqa %xmm7,%xmm2 | |
2907 pxor %xmm7,%xmm7 | |
2908 jmp L$cbc_dec_tail_collected | |
2909 | |
2910 .p2align 4 | |
2911 L$cbc_dec_seven: | |
2912 movups 96(%rdi),%xmm8 | |
2913 xorps %xmm9,%xmm9 | |
2914 call _aesni_decrypt8 | |
2915 movups 80(%rdi),%xmm9 | |
2916 pxor %xmm10,%xmm2 | |
2917 movups 96(%rdi),%xmm10 | |
2918 pxor %xmm11,%xmm3 | |
2919 movdqu %xmm2,(%rsi) | |
2920 pxor %xmm12,%xmm4 | |
2921 movdqu %xmm3,16(%rsi) | |
2922 pxor %xmm3,%xmm3 | |
2923 pxor %xmm13,%xmm5 | |
2924 movdqu %xmm4,32(%rsi) | |
2925 pxor %xmm4,%xmm4 | |
2926 pxor %xmm14,%xmm6 | |
2927 movdqu %xmm5,48(%rsi) | |
2928 pxor %xmm5,%xmm5 | |
2929 pxor %xmm15,%xmm7 | |
2930 movdqu %xmm6,64(%rsi) | |
2931 pxor %xmm6,%xmm6 | |
2932 pxor %xmm9,%xmm8 | |
2933 movdqu %xmm7,80(%rsi) | |
2934 pxor %xmm7,%xmm7 | |
2935 leaq 96(%rsi),%rsi | |
2936 movdqa %xmm8,%xmm2 | |
2937 pxor %xmm8,%xmm8 | |
2938 pxor %xmm9,%xmm9 | |
2939 jmp L$cbc_dec_tail_collected | |
2940 | |
2941 .p2align 4 | |
2942 L$cbc_dec_loop6: | |
2943 movups %xmm7,(%rsi) | |
2944 leaq 16(%rsi),%rsi | |
2945 movdqu 0(%rdi),%xmm2 | |
2946 movdqu 16(%rdi),%xmm3 | |
2947 movdqa %xmm2,%xmm11 | |
2948 movdqu 32(%rdi),%xmm4 | |
2949 movdqa %xmm3,%xmm12 | |
2950 movdqu 48(%rdi),%xmm5 | |
2951 movdqa %xmm4,%xmm13 | |
2952 movdqu 64(%rdi),%xmm6 | |
2953 movdqa %xmm5,%xmm14 | |
2954 movdqu 80(%rdi),%xmm7 | |
2955 movdqa %xmm6,%xmm15 | |
2956 L$cbc_dec_loop6_enter: | |
2957 leaq 96(%rdi),%rdi | |
2958 movdqa %xmm7,%xmm8 | |
2959 | |
2960 call _aesni_decrypt6 | |
2961 | |
2962 pxor %xmm10,%xmm2 | |
2963 movdqa %xmm8,%xmm10 | |
2964 pxor %xmm11,%xmm3 | |
2965 movdqu %xmm2,(%rsi) | |
2966 pxor %xmm12,%xmm4 | |
2967 movdqu %xmm3,16(%rsi) | |
2968 pxor %xmm13,%xmm5 | |
2969 movdqu %xmm4,32(%rsi) | |
2970 pxor %xmm14,%xmm6 | |
2971 movq %r11,%rcx | |
2972 movdqu %xmm5,48(%rsi) | |
2973 pxor %xmm15,%xmm7 | |
2974 movl %r10d,%eax | |
2975 movdqu %xmm6,64(%rsi) | |
2976 leaq 80(%rsi),%rsi | |
2977 subq $0x60,%rdx | |
2978 ja L$cbc_dec_loop6 | |
2979 | |
2980 movdqa %xmm7,%xmm2 | |
2981 addq $0x50,%rdx | |
2982 jle L$cbc_dec_clear_tail_collected | |
2983 movups %xmm7,(%rsi) | |
2984 leaq 16(%rsi),%rsi | |
2985 | |
2986 L$cbc_dec_tail: | |
2987 movups (%rdi),%xmm2 | |
2988 subq $0x10,%rdx | |
2989 jbe L$cbc_dec_one | |
2990 | |
2991 movups 16(%rdi),%xmm3 | |
2992 movaps %xmm2,%xmm11 | |
2993 subq $0x10,%rdx | |
2994 jbe L$cbc_dec_two | |
2995 | |
2996 movups 32(%rdi),%xmm4 | |
2997 movaps %xmm3,%xmm12 | |
2998 subq $0x10,%rdx | |
2999 jbe L$cbc_dec_three | |
3000 | |
3001 movups 48(%rdi),%xmm5 | |
3002 movaps %xmm4,%xmm13 | |
3003 subq $0x10,%rdx | |
3004 jbe L$cbc_dec_four | |
3005 | |
3006 movups 64(%rdi),%xmm6 | |
3007 movaps %xmm5,%xmm14 | |
3008 movaps %xmm6,%xmm15 | |
3009 xorps %xmm7,%xmm7 | |
3010 call _aesni_decrypt6 | |
3011 pxor %xmm10,%xmm2 | |
3012 movaps %xmm15,%xmm10 | |
3013 pxor %xmm11,%xmm3 | |
3014 movdqu %xmm2,(%rsi) | |
3015 pxor %xmm12,%xmm4 | |
3016 movdqu %xmm3,16(%rsi) | |
3017 pxor %xmm3,%xmm3 | |
3018 pxor %xmm13,%xmm5 | |
3019 movdqu %xmm4,32(%rsi) | |
3020 pxor %xmm4,%xmm4 | |
3021 pxor %xmm14,%xmm6 | |
3022 movdqu %xmm5,48(%rsi) | |
3023 pxor %xmm5,%xmm5 | |
3024 leaq 64(%rsi),%rsi | |
3025 movdqa %xmm6,%xmm2 | |
3026 pxor %xmm6,%xmm6 | |
3027 pxor %xmm7,%xmm7 | |
3028 subq $0x10,%rdx | |
3029 jmp L$cbc_dec_tail_collected | |
3030 | |
3031 .p2align 4 | |
3032 L$cbc_dec_one: | |
3033 movaps %xmm2,%xmm11 | |
3034 movups (%rcx),%xmm0 | |
3035 movups 16(%rcx),%xmm1 | |
3036 leaq 32(%rcx),%rcx | |
3037 xorps %xmm0,%xmm2 | |
3038 L$oop_dec1_17: | |
3039 .byte 102,15,56,222,209 | |
3040 decl %eax | |
3041 movups (%rcx),%xmm1 | |
3042 leaq 16(%rcx),%rcx | |
3043 jnz L$oop_dec1_17 | |
3044 .byte 102,15,56,223,209 | |
3045 xorps %xmm10,%xmm2 | |
3046 movaps %xmm11,%xmm10 | |
3047 jmp L$cbc_dec_tail_collected | |
3048 .p2align 4 | |
3049 L$cbc_dec_two: | |
3050 movaps %xmm3,%xmm12 | |
3051 call _aesni_decrypt2 | |
3052 pxor %xmm10,%xmm2 | |
3053 movaps %xmm12,%xmm10 | |
3054 pxor %xmm11,%xmm3 | |
3055 movdqu %xmm2,(%rsi) | |
3056 movdqa %xmm3,%xmm2 | |
3057 pxor %xmm3,%xmm3 | |
3058 leaq 16(%rsi),%rsi | |
3059 jmp L$cbc_dec_tail_collected | |
3060 .p2align 4 | |
3061 L$cbc_dec_three: | |
3062 movaps %xmm4,%xmm13 | |
3063 call _aesni_decrypt3 | |
3064 pxor %xmm10,%xmm2 | |
3065 movaps %xmm13,%xmm10 | |
3066 pxor %xmm11,%xmm3 | |
3067 movdqu %xmm2,(%rsi) | |
3068 pxor %xmm12,%xmm4 | |
3069 movdqu %xmm3,16(%rsi) | |
3070 pxor %xmm3,%xmm3 | |
3071 movdqa %xmm4,%xmm2 | |
3072 pxor %xmm4,%xmm4 | |
3073 leaq 32(%rsi),%rsi | |
3074 jmp L$cbc_dec_tail_collected | |
3075 .p2align 4 | |
3076 L$cbc_dec_four: | |
3077 movaps %xmm5,%xmm14 | |
3078 call _aesni_decrypt4 | |
3079 pxor %xmm10,%xmm2 | |
3080 movaps %xmm14,%xmm10 | |
3081 pxor %xmm11,%xmm3 | |
3082 movdqu %xmm2,(%rsi) | |
3083 pxor %xmm12,%xmm4 | |
3084 movdqu %xmm3,16(%rsi) | |
3085 pxor %xmm3,%xmm3 | |
3086 pxor %xmm13,%xmm5 | |
3087 movdqu %xmm4,32(%rsi) | |
3088 pxor %xmm4,%xmm4 | |
3089 movdqa %xmm5,%xmm2 | |
3090 pxor %xmm5,%xmm5 | |
3091 leaq 48(%rsi),%rsi | |
3092 jmp L$cbc_dec_tail_collected | |
3093 | |
3094 .p2align 4 | |
3095 L$cbc_dec_clear_tail_collected: | |
3096 pxor %xmm3,%xmm3 | |
3097 pxor %xmm4,%xmm4 | |
3098 pxor %xmm5,%xmm5 | |
3099 pxor %xmm6,%xmm6 | |
3100 pxor %xmm7,%xmm7 | |
3101 pxor %xmm8,%xmm8 | |
3102 pxor %xmm9,%xmm9 | |
3103 L$cbc_dec_tail_collected: | |
3104 movups %xmm10,(%r8) | |
3105 andq $15,%rdx | |
3106 jnz L$cbc_dec_tail_partial | |
3107 movups %xmm2,(%rsi) | |
3108 pxor %xmm2,%xmm2 | |
3109 jmp L$cbc_dec_ret | |
3110 .p2align 4 | |
3111 L$cbc_dec_tail_partial: | |
3112 movaps %xmm2,(%rsp) | |
3113 pxor %xmm2,%xmm2 | |
3114 movq $16,%rcx | |
3115 movq %rsi,%rdi | |
3116 subq %rdx,%rcx | |
3117 leaq (%rsp),%rsi | |
3118 .long 0x9066A4F3 | |
3119 movdqa %xmm2,(%rsp) | |
3120 | |
3121 L$cbc_dec_ret: | |
3122 xorps %xmm0,%xmm0 | |
3123 pxor %xmm1,%xmm1 | |
3124 leaq (%rbp),%rsp | |
3125 popq %rbp | |
3126 L$cbc_ret: | |
3127 .byte 0xf3,0xc3 | |
3128 | |
3129 .globl _aesni_set_decrypt_key | |
3130 .private_extern _aesni_set_decrypt_key | |
3131 | |
3132 .p2align 4 | |
3133 _aesni_set_decrypt_key: | |
3134 .byte 0x48,0x83,0xEC,0x08 | |
3135 call __aesni_set_encrypt_key | |
3136 shll $4,%esi | |
3137 testl %eax,%eax | |
3138 jnz L$dec_key_ret | |
3139 leaq 16(%rdx,%rsi,1),%rdi | |
3140 | |
3141 movups (%rdx),%xmm0 | |
3142 movups (%rdi),%xmm1 | |
3143 movups %xmm0,(%rdi) | |
3144 movups %xmm1,(%rdx) | |
3145 leaq 16(%rdx),%rdx | |
3146 leaq -16(%rdi),%rdi | |
3147 | |
3148 L$dec_key_inverse: | |
3149 movups (%rdx),%xmm0 | |
3150 movups (%rdi),%xmm1 | |
3151 .byte 102,15,56,219,192 | |
3152 .byte 102,15,56,219,201 | |
3153 leaq 16(%rdx),%rdx | |
3154 leaq -16(%rdi),%rdi | |
3155 movups %xmm0,16(%rdi) | |
3156 movups %xmm1,-16(%rdx) | |
3157 cmpq %rdx,%rdi | |
3158 ja L$dec_key_inverse | |
3159 | |
3160 movups (%rdx),%xmm0 | |
3161 .byte 102,15,56,219,192 | |
3162 pxor %xmm1,%xmm1 | |
3163 movups %xmm0,(%rdi) | |
3164 pxor %xmm0,%xmm0 | |
3165 L$dec_key_ret: | |
3166 addq $8,%rsp | |
3167 .byte 0xf3,0xc3 | |
3168 L$SEH_end_set_decrypt_key: | |
3169 | |
3170 .globl _aesni_set_encrypt_key | |
3171 .private_extern _aesni_set_encrypt_key | |
3172 | |
3173 .p2align 4 | |
3174 _aesni_set_encrypt_key: | |
3175 __aesni_set_encrypt_key: | |
3176 .byte 0x48,0x83,0xEC,0x08 | |
3177 movq $-1,%rax | |
3178 testq %rdi,%rdi | |
3179 jz L$enc_key_ret | |
3180 testq %rdx,%rdx | |
3181 jz L$enc_key_ret | |
3182 | |
3183 movl $268437504,%r10d | |
3184 movups (%rdi),%xmm0 | |
3185 xorps %xmm4,%xmm4 | |
3186 andl _OPENSSL_ia32cap_P+4(%rip),%r10d | |
3187 leaq 16(%rdx),%rax | |
3188 cmpl $256,%esi | |
3189 je L$14rounds | |
3190 cmpl $192,%esi | |
3191 je L$12rounds | |
3192 cmpl $128,%esi | |
3193 jne L$bad_keybits | |
3194 | |
3195 L$10rounds: | |
3196 movl $9,%esi | |
3197 cmpl $268435456,%r10d | |
3198 je L$10rounds_alt | |
3199 | |
3200 movups %xmm0,(%rdx) | |
3201 .byte 102,15,58,223,200,1 | |
3202 call L$key_expansion_128_cold | |
3203 .byte 102,15,58,223,200,2 | |
3204 call L$key_expansion_128 | |
3205 .byte 102,15,58,223,200,4 | |
3206 call L$key_expansion_128 | |
3207 .byte 102,15,58,223,200,8 | |
3208 call L$key_expansion_128 | |
3209 .byte 102,15,58,223,200,16 | |
3210 call L$key_expansion_128 | |
3211 .byte 102,15,58,223,200,32 | |
3212 call L$key_expansion_128 | |
3213 .byte 102,15,58,223,200,64 | |
3214 call L$key_expansion_128 | |
3215 .byte 102,15,58,223,200,128 | |
3216 call L$key_expansion_128 | |
3217 .byte 102,15,58,223,200,27 | |
3218 call L$key_expansion_128 | |
3219 .byte 102,15,58,223,200,54 | |
3220 call L$key_expansion_128 | |
3221 movups %xmm0,(%rax) | |
3222 movl %esi,80(%rax) | |
3223 xorl %eax,%eax | |
3224 jmp L$enc_key_ret | |
3225 | |
3226 .p2align 4 | |
3227 L$10rounds_alt: | |
3228 movdqa L$key_rotate(%rip),%xmm5 | |
3229 movl $8,%r10d | |
3230 movdqa L$key_rcon1(%rip),%xmm4 | |
3231 movdqa %xmm0,%xmm2 | |
3232 movdqu %xmm0,(%rdx) | |
3233 jmp L$oop_key128 | |
3234 | |
3235 .p2align 4 | |
3236 L$oop_key128: | |
3237 .byte 102,15,56,0,197 | |
3238 .byte 102,15,56,221,196 | |
3239 pslld $1,%xmm4 | |
3240 leaq 16(%rax),%rax | |
3241 | |
3242 movdqa %xmm2,%xmm3 | |
3243 pslldq $4,%xmm2 | |
3244 pxor %xmm2,%xmm3 | |
3245 pslldq $4,%xmm2 | |
3246 pxor %xmm2,%xmm3 | |
3247 pslldq $4,%xmm2 | |
3248 pxor %xmm3,%xmm2 | |
3249 | |
3250 pxor %xmm2,%xmm0 | |
3251 movdqu %xmm0,-16(%rax) | |
3252 movdqa %xmm0,%xmm2 | |
3253 | |
3254 decl %r10d | |
3255 jnz L$oop_key128 | |
3256 | |
3257 movdqa L$key_rcon1b(%rip),%xmm4 | |
3258 | |
3259 .byte 102,15,56,0,197 | |
3260 .byte 102,15,56,221,196 | |
3261 pslld $1,%xmm4 | |
3262 | |
3263 movdqa %xmm2,%xmm3 | |
3264 pslldq $4,%xmm2 | |
3265 pxor %xmm2,%xmm3 | |
3266 pslldq $4,%xmm2 | |
3267 pxor %xmm2,%xmm3 | |
3268 pslldq $4,%xmm2 | |
3269 pxor %xmm3,%xmm2 | |
3270 | |
3271 pxor %xmm2,%xmm0 | |
3272 movdqu %xmm0,(%rax) | |
3273 | |
3274 movdqa %xmm0,%xmm2 | |
3275 .byte 102,15,56,0,197 | |
3276 .byte 102,15,56,221,196 | |
3277 | |
3278 movdqa %xmm2,%xmm3 | |
3279 pslldq $4,%xmm2 | |
3280 pxor %xmm2,%xmm3 | |
3281 pslldq $4,%xmm2 | |
3282 pxor %xmm2,%xmm3 | |
3283 pslldq $4,%xmm2 | |
3284 pxor %xmm3,%xmm2 | |
3285 | |
3286 pxor %xmm2,%xmm0 | |
3287 movdqu %xmm0,16(%rax) | |
3288 | |
3289 movl %esi,96(%rax) | |
3290 xorl %eax,%eax | |
3291 jmp L$enc_key_ret | |
3292 | |
3293 .p2align 4 | |
3294 L$12rounds: | |
3295 movq 16(%rdi),%xmm2 | |
3296 movl $11,%esi | |
3297 cmpl $268435456,%r10d | |
3298 je L$12rounds_alt | |
3299 | |
3300 movups %xmm0,(%rdx) | |
3301 .byte 102,15,58,223,202,1 | |
3302 call L$key_expansion_192a_cold | |
3303 .byte 102,15,58,223,202,2 | |
3304 call L$key_expansion_192b | |
3305 .byte 102,15,58,223,202,4 | |
3306 call L$key_expansion_192a | |
3307 .byte 102,15,58,223,202,8 | |
3308 call L$key_expansion_192b | |
3309 .byte 102,15,58,223,202,16 | |
3310 call L$key_expansion_192a | |
3311 .byte 102,15,58,223,202,32 | |
3312 call L$key_expansion_192b | |
3313 .byte 102,15,58,223,202,64 | |
3314 call L$key_expansion_192a | |
3315 .byte 102,15,58,223,202,128 | |
3316 call L$key_expansion_192b | |
3317 movups %xmm0,(%rax) | |
3318 movl %esi,48(%rax) | |
3319 xorq %rax,%rax | |
3320 jmp L$enc_key_ret | |
3321 | |
3322 .p2align 4 | |
3323 L$12rounds_alt: | |
3324 movdqa L$key_rotate192(%rip),%xmm5 | |
3325 movdqa L$key_rcon1(%rip),%xmm4 | |
3326 movl $8,%r10d | |
3327 movdqu %xmm0,(%rdx) | |
3328 jmp L$oop_key192 | |
3329 | |
3330 .p2align 4 | |
3331 L$oop_key192: | |
3332 movq %xmm2,0(%rax) | |
3333 movdqa %xmm2,%xmm1 | |
3334 .byte 102,15,56,0,213 | |
3335 .byte 102,15,56,221,212 | |
3336 pslld $1,%xmm4 | |
3337 leaq 24(%rax),%rax | |
3338 | |
3339 movdqa %xmm0,%xmm3 | |
3340 pslldq $4,%xmm0 | |
3341 pxor %xmm0,%xmm3 | |
3342 pslldq $4,%xmm0 | |
3343 pxor %xmm0,%xmm3 | |
3344 pslldq $4,%xmm0 | |
3345 pxor %xmm3,%xmm0 | |
3346 | |
3347 pshufd $0xff,%xmm0,%xmm3 | |
3348 pxor %xmm1,%xmm3 | |
3349 pslldq $4,%xmm1 | |
3350 pxor %xmm1,%xmm3 | |
3351 | |
3352 pxor %xmm2,%xmm0 | |
3353 pxor %xmm3,%xmm2 | |
3354 movdqu %xmm0,-16(%rax) | |
3355 | |
3356 decl %r10d | |
3357 jnz L$oop_key192 | |
3358 | |
3359 movl %esi,32(%rax) | |
3360 xorl %eax,%eax | |
3361 jmp L$enc_key_ret | |
3362 | |
3363 .p2align 4 | |
3364 L$14rounds: | |
3365 movups 16(%rdi),%xmm2 | |
3366 movl $13,%esi | |
3367 leaq 16(%rax),%rax | |
3368 cmpl $268435456,%r10d | |
3369 je L$14rounds_alt | |
3370 | |
3371 movups %xmm0,(%rdx) | |
3372 movups %xmm2,16(%rdx) | |
3373 .byte 102,15,58,223,202,1 | |
3374 call L$key_expansion_256a_cold | |
3375 .byte 102,15,58,223,200,1 | |
3376 call L$key_expansion_256b | |
3377 .byte 102,15,58,223,202,2 | |
3378 call L$key_expansion_256a | |
3379 .byte 102,15,58,223,200,2 | |
3380 call L$key_expansion_256b | |
3381 .byte 102,15,58,223,202,4 | |
3382 call L$key_expansion_256a | |
3383 .byte 102,15,58,223,200,4 | |
3384 call L$key_expansion_256b | |
3385 .byte 102,15,58,223,202,8 | |
3386 call L$key_expansion_256a | |
3387 .byte 102,15,58,223,200,8 | |
3388 call L$key_expansion_256b | |
3389 .byte 102,15,58,223,202,16 | |
3390 call L$key_expansion_256a | |
3391 .byte 102,15,58,223,200,16 | |
3392 call L$key_expansion_256b | |
3393 .byte 102,15,58,223,202,32 | |
3394 call L$key_expansion_256a | |
3395 .byte 102,15,58,223,200,32 | |
3396 call L$key_expansion_256b | |
3397 .byte 102,15,58,223,202,64 | |
3398 call L$key_expansion_256a | |
3399 movups %xmm0,(%rax) | |
3400 movl %esi,16(%rax) | |
3401 xorq %rax,%rax | |
3402 jmp L$enc_key_ret | |
3403 | |
3404 .p2align 4 | |
3405 L$14rounds_alt: | |
3406 movdqa L$key_rotate(%rip),%xmm5 | |
3407 movdqa L$key_rcon1(%rip),%xmm4 | |
3408 movl $7,%r10d | |
3409 movdqu %xmm0,0(%rdx) | |
3410 movdqa %xmm2,%xmm1 | |
3411 movdqu %xmm2,16(%rdx) | |
3412 jmp L$oop_key256 | |
3413 | |
3414 .p2align 4 | |
3415 L$oop_key256: | |
3416 .byte 102,15,56,0,213 | |
3417 .byte 102,15,56,221,212 | |
3418 | |
3419 movdqa %xmm0,%xmm3 | |
3420 pslldq $4,%xmm0 | |
3421 pxor %xmm0,%xmm3 | |
3422 pslldq $4,%xmm0 | |
3423 pxor %xmm0,%xmm3 | |
3424 pslldq $4,%xmm0 | |
3425 pxor %xmm3,%xmm0 | |
3426 pslld $1,%xmm4 | |
3427 | |
3428 pxor %xmm2,%xmm0 | |
3429 movdqu %xmm0,(%rax) | |
3430 | |
3431 decl %r10d | |
3432 jz L$done_key256 | |
3433 | |
3434 pshufd $0xff,%xmm0,%xmm2 | |
3435 pxor %xmm3,%xmm3 | |
3436 .byte 102,15,56,221,211 | |
3437 | |
3438 movdqa %xmm1,%xmm3 | |
3439 pslldq $4,%xmm1 | |
3440 pxor %xmm1,%xmm3 | |
3441 pslldq $4,%xmm1 | |
3442 pxor %xmm1,%xmm3 | |
3443 pslldq $4,%xmm1 | |
3444 pxor %xmm3,%xmm1 | |
3445 | |
3446 pxor %xmm1,%xmm2 | |
3447 movdqu %xmm2,16(%rax) | |
3448 leaq 32(%rax),%rax | |
3449 movdqa %xmm2,%xmm1 | |
3450 | |
3451 jmp L$oop_key256 | |
3452 | |
3453 L$done_key256: | |
3454 movl %esi,16(%rax) | |
3455 xorl %eax,%eax | |
3456 jmp L$enc_key_ret | |
3457 | |
3458 .p2align 4 | |
3459 L$bad_keybits: | |
3460 movq $-2,%rax | |
3461 L$enc_key_ret: | |
3462 pxor %xmm0,%xmm0 | |
3463 pxor %xmm1,%xmm1 | |
3464 pxor %xmm2,%xmm2 | |
3465 pxor %xmm3,%xmm3 | |
3466 pxor %xmm4,%xmm4 | |
3467 pxor %xmm5,%xmm5 | |
3468 addq $8,%rsp | |
3469 .byte 0xf3,0xc3 | |
3470 L$SEH_end_set_encrypt_key: | |
3471 | |
3472 .p2align 4 | |
3473 L$key_expansion_128: | |
3474 movups %xmm0,(%rax) | |
3475 leaq 16(%rax),%rax | |
3476 L$key_expansion_128_cold: | |
3477 shufps $16,%xmm0,%xmm4 | |
3478 xorps %xmm4,%xmm0 | |
3479 shufps $140,%xmm0,%xmm4 | |
3480 xorps %xmm4,%xmm0 | |
3481 shufps $255,%xmm1,%xmm1 | |
3482 xorps %xmm1,%xmm0 | |
3483 .byte 0xf3,0xc3 | |
3484 | |
3485 .p2align 4 | |
3486 L$key_expansion_192a: | |
3487 movups %xmm0,(%rax) | |
3488 leaq 16(%rax),%rax | |
3489 L$key_expansion_192a_cold: | |
3490 movaps %xmm2,%xmm5 | |
3491 L$key_expansion_192b_warm: | |
3492 shufps $16,%xmm0,%xmm4 | |
3493 movdqa %xmm2,%xmm3 | |
3494 xorps %xmm4,%xmm0 | |
3495 shufps $140,%xmm0,%xmm4 | |
3496 pslldq $4,%xmm3 | |
3497 xorps %xmm4,%xmm0 | |
3498 pshufd $85,%xmm1,%xmm1 | |
3499 pxor %xmm3,%xmm2 | |
3500 pxor %xmm1,%xmm0 | |
3501 pshufd $255,%xmm0,%xmm3 | |
3502 pxor %xmm3,%xmm2 | |
3503 .byte 0xf3,0xc3 | |
3504 | |
3505 .p2align 4 | |
3506 L$key_expansion_192b: | |
3507 movaps %xmm0,%xmm3 | |
3508 shufps $68,%xmm0,%xmm5 | |
3509 movups %xmm5,(%rax) | |
3510 shufps $78,%xmm2,%xmm3 | |
3511 movups %xmm3,16(%rax) | |
3512 leaq 32(%rax),%rax | |
3513 jmp L$key_expansion_192b_warm | |
3514 | |
3515 .p2align 4 | |
3516 L$key_expansion_256a: | |
3517 movups %xmm2,(%rax) | |
3518 leaq 16(%rax),%rax | |
3519 L$key_expansion_256a_cold: | |
3520 shufps $16,%xmm0,%xmm4 | |
3521 xorps %xmm4,%xmm0 | |
3522 shufps $140,%xmm0,%xmm4 | |
3523 xorps %xmm4,%xmm0 | |
3524 shufps $255,%xmm1,%xmm1 | |
3525 xorps %xmm1,%xmm0 | |
3526 .byte 0xf3,0xc3 | |
3527 | |
3528 .p2align 4 | |
3529 L$key_expansion_256b: | |
3530 movups %xmm0,(%rax) | |
3531 leaq 16(%rax),%rax | |
3532 | |
3533 shufps $16,%xmm2,%xmm4 | |
3534 xorps %xmm4,%xmm2 | |
3535 shufps $140,%xmm2,%xmm4 | |
3536 xorps %xmm4,%xmm2 | |
3537 shufps $170,%xmm1,%xmm1 | |
3538 xorps %xmm1,%xmm2 | |
3539 .byte 0xf3,0xc3 | |
3540 | |
3541 | |
3542 .p2align 6 | |
3543 L$bswap_mask: | |
3544 .byte 15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0 | |
3545 L$increment32: | |
3546 .long 6,6,6,0 | |
3547 L$increment64: | |
3548 .long 1,0,0,0 | |
3549 L$xts_magic: | |
3550 .long 0x87,0,1,0 | |
3551 L$increment1: | |
3552 .byte 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1 | |
3553 L$key_rotate: | |
3554 .long 0x0c0f0e0d,0x0c0f0e0d,0x0c0f0e0d,0x0c0f0e0d | |
3555 L$key_rotate192: | |
3556 .long 0x04070605,0x04070605,0x04070605,0x04070605 | |
3557 L$key_rcon1: | |
3558 .long 1,1,1,1 | |
3559 L$key_rcon1b: | |
3560 .long 0x1b,0x1b,0x1b,0x1b | |
3561 | |
3562 .byte 65,69,83,32,102,111,114,32,73,110,116,101,108,32,65,69,83,45,78,73,44,32
,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101
,110,115,115,108,46,111,114,103,62,0 | |
3563 .p2align 6 | |
3564 #endif | |
OLD | NEW |