OLD | NEW |
| (Empty) |
1 .text | |
2 .globl aesni_encrypt | |
3 .type aesni_encrypt,@function | |
4 .align 16 | |
5 aesni_encrypt: | |
6 movups (%rdi),%xmm2 | |
7 movl 240(%rdx),%eax | |
8 movups (%rdx),%xmm0 | |
9 movups 16(%rdx),%xmm1 | |
10 leaq 32(%rdx),%rdx | |
11 xorps %xmm0,%xmm2 | |
12 .Loop_enc1_1: | |
13 .byte 102,15,56,220,209 | |
14 decl %eax | |
15 movups (%rdx),%xmm1 | |
16 leaq 16(%rdx),%rdx | |
17 jnz .Loop_enc1_1 | |
18 .byte 102,15,56,221,209 | |
19 movups %xmm2,(%rsi) | |
20 .byte 0xf3,0xc3 | |
21 .size aesni_encrypt,.-aesni_encrypt | |
22 | |
23 .globl aesni_decrypt | |
24 .type aesni_decrypt,@function | |
25 .align 16 | |
26 aesni_decrypt: | |
27 movups (%rdi),%xmm2 | |
28 movl 240(%rdx),%eax | |
29 movups (%rdx),%xmm0 | |
30 movups 16(%rdx),%xmm1 | |
31 leaq 32(%rdx),%rdx | |
32 xorps %xmm0,%xmm2 | |
33 .Loop_dec1_2: | |
34 .byte 102,15,56,222,209 | |
35 decl %eax | |
36 movups (%rdx),%xmm1 | |
37 leaq 16(%rdx),%rdx | |
38 jnz .Loop_dec1_2 | |
39 .byte 102,15,56,223,209 | |
40 movups %xmm2,(%rsi) | |
41 .byte 0xf3,0xc3 | |
42 .size aesni_decrypt, .-aesni_decrypt | |
43 .type _aesni_encrypt3,@function | |
44 .align 16 | |
45 _aesni_encrypt3: | |
46 movups (%rcx),%xmm0 | |
47 shrl $1,%eax | |
48 movups 16(%rcx),%xmm1 | |
49 leaq 32(%rcx),%rcx | |
50 xorps %xmm0,%xmm2 | |
51 xorps %xmm0,%xmm3 | |
52 xorps %xmm0,%xmm4 | |
53 movups (%rcx),%xmm0 | |
54 | |
55 .Lenc_loop3: | |
56 .byte 102,15,56,220,209 | |
57 .byte 102,15,56,220,217 | |
58 decl %eax | |
59 .byte 102,15,56,220,225 | |
60 movups 16(%rcx),%xmm1 | |
61 .byte 102,15,56,220,208 | |
62 .byte 102,15,56,220,216 | |
63 leaq 32(%rcx),%rcx | |
64 .byte 102,15,56,220,224 | |
65 movups (%rcx),%xmm0 | |
66 jnz .Lenc_loop3 | |
67 | |
68 .byte 102,15,56,220,209 | |
69 .byte 102,15,56,220,217 | |
70 .byte 102,15,56,220,225 | |
71 .byte 102,15,56,221,208 | |
72 .byte 102,15,56,221,216 | |
73 .byte 102,15,56,221,224 | |
74 .byte 0xf3,0xc3 | |
75 .size _aesni_encrypt3,.-_aesni_encrypt3 | |
76 .type _aesni_decrypt3,@function | |
77 .align 16 | |
78 _aesni_decrypt3: | |
79 movups (%rcx),%xmm0 | |
80 shrl $1,%eax | |
81 movups 16(%rcx),%xmm1 | |
82 leaq 32(%rcx),%rcx | |
83 xorps %xmm0,%xmm2 | |
84 xorps %xmm0,%xmm3 | |
85 xorps %xmm0,%xmm4 | |
86 movups (%rcx),%xmm0 | |
87 | |
88 .Ldec_loop3: | |
89 .byte 102,15,56,222,209 | |
90 .byte 102,15,56,222,217 | |
91 decl %eax | |
92 .byte 102,15,56,222,225 | |
93 movups 16(%rcx),%xmm1 | |
94 .byte 102,15,56,222,208 | |
95 .byte 102,15,56,222,216 | |
96 leaq 32(%rcx),%rcx | |
97 .byte 102,15,56,222,224 | |
98 movups (%rcx),%xmm0 | |
99 jnz .Ldec_loop3 | |
100 | |
101 .byte 102,15,56,222,209 | |
102 .byte 102,15,56,222,217 | |
103 .byte 102,15,56,222,225 | |
104 .byte 102,15,56,223,208 | |
105 .byte 102,15,56,223,216 | |
106 .byte 102,15,56,223,224 | |
107 .byte 0xf3,0xc3 | |
108 .size _aesni_decrypt3,.-_aesni_decrypt3 | |
109 .type _aesni_encrypt4,@function | |
110 .align 16 | |
111 _aesni_encrypt4: | |
112 movups (%rcx),%xmm0 | |
113 shrl $1,%eax | |
114 movups 16(%rcx),%xmm1 | |
115 leaq 32(%rcx),%rcx | |
116 xorps %xmm0,%xmm2 | |
117 xorps %xmm0,%xmm3 | |
118 xorps %xmm0,%xmm4 | |
119 xorps %xmm0,%xmm5 | |
120 movups (%rcx),%xmm0 | |
121 | |
122 .Lenc_loop4: | |
123 .byte 102,15,56,220,209 | |
124 .byte 102,15,56,220,217 | |
125 decl %eax | |
126 .byte 102,15,56,220,225 | |
127 .byte 102,15,56,220,233 | |
128 movups 16(%rcx),%xmm1 | |
129 .byte 102,15,56,220,208 | |
130 .byte 102,15,56,220,216 | |
131 leaq 32(%rcx),%rcx | |
132 .byte 102,15,56,220,224 | |
133 .byte 102,15,56,220,232 | |
134 movups (%rcx),%xmm0 | |
135 jnz .Lenc_loop4 | |
136 | |
137 .byte 102,15,56,220,209 | |
138 .byte 102,15,56,220,217 | |
139 .byte 102,15,56,220,225 | |
140 .byte 102,15,56,220,233 | |
141 .byte 102,15,56,221,208 | |
142 .byte 102,15,56,221,216 | |
143 .byte 102,15,56,221,224 | |
144 .byte 102,15,56,221,232 | |
145 .byte 0xf3,0xc3 | |
146 .size _aesni_encrypt4,.-_aesni_encrypt4 | |
147 .type _aesni_decrypt4,@function | |
148 .align 16 | |
149 _aesni_decrypt4: | |
150 movups (%rcx),%xmm0 | |
151 shrl $1,%eax | |
152 movups 16(%rcx),%xmm1 | |
153 leaq 32(%rcx),%rcx | |
154 xorps %xmm0,%xmm2 | |
155 xorps %xmm0,%xmm3 | |
156 xorps %xmm0,%xmm4 | |
157 xorps %xmm0,%xmm5 | |
158 movups (%rcx),%xmm0 | |
159 | |
160 .Ldec_loop4: | |
161 .byte 102,15,56,222,209 | |
162 .byte 102,15,56,222,217 | |
163 decl %eax | |
164 .byte 102,15,56,222,225 | |
165 .byte 102,15,56,222,233 | |
166 movups 16(%rcx),%xmm1 | |
167 .byte 102,15,56,222,208 | |
168 .byte 102,15,56,222,216 | |
169 leaq 32(%rcx),%rcx | |
170 .byte 102,15,56,222,224 | |
171 .byte 102,15,56,222,232 | |
172 movups (%rcx),%xmm0 | |
173 jnz .Ldec_loop4 | |
174 | |
175 .byte 102,15,56,222,209 | |
176 .byte 102,15,56,222,217 | |
177 .byte 102,15,56,222,225 | |
178 .byte 102,15,56,222,233 | |
179 .byte 102,15,56,223,208 | |
180 .byte 102,15,56,223,216 | |
181 .byte 102,15,56,223,224 | |
182 .byte 102,15,56,223,232 | |
183 .byte 0xf3,0xc3 | |
184 .size _aesni_decrypt4,.-_aesni_decrypt4 | |
185 .type _aesni_encrypt6,@function | |
186 .align 16 | |
187 _aesni_encrypt6: | |
188 movups (%rcx),%xmm0 | |
189 shrl $1,%eax | |
190 movups 16(%rcx),%xmm1 | |
191 leaq 32(%rcx),%rcx | |
192 xorps %xmm0,%xmm2 | |
193 pxor %xmm0,%xmm3 | |
194 .byte 102,15,56,220,209 | |
195 pxor %xmm0,%xmm4 | |
196 .byte 102,15,56,220,217 | |
197 pxor %xmm0,%xmm5 | |
198 .byte 102,15,56,220,225 | |
199 pxor %xmm0,%xmm6 | |
200 .byte 102,15,56,220,233 | |
201 pxor %xmm0,%xmm7 | |
202 decl %eax | |
203 .byte 102,15,56,220,241 | |
204 movups (%rcx),%xmm0 | |
205 .byte 102,15,56,220,249 | |
206 jmp .Lenc_loop6_enter | |
207 .align 16 | |
208 .Lenc_loop6: | |
209 .byte 102,15,56,220,209 | |
210 .byte 102,15,56,220,217 | |
211 decl %eax | |
212 .byte 102,15,56,220,225 | |
213 .byte 102,15,56,220,233 | |
214 .byte 102,15,56,220,241 | |
215 .byte 102,15,56,220,249 | |
216 .Lenc_loop6_enter: | |
217 movups 16(%rcx),%xmm1 | |
218 .byte 102,15,56,220,208 | |
219 .byte 102,15,56,220,216 | |
220 leaq 32(%rcx),%rcx | |
221 .byte 102,15,56,220,224 | |
222 .byte 102,15,56,220,232 | |
223 .byte 102,15,56,220,240 | |
224 .byte 102,15,56,220,248 | |
225 movups (%rcx),%xmm0 | |
226 jnz .Lenc_loop6 | |
227 | |
228 .byte 102,15,56,220,209 | |
229 .byte 102,15,56,220,217 | |
230 .byte 102,15,56,220,225 | |
231 .byte 102,15,56,220,233 | |
232 .byte 102,15,56,220,241 | |
233 .byte 102,15,56,220,249 | |
234 .byte 102,15,56,221,208 | |
235 .byte 102,15,56,221,216 | |
236 .byte 102,15,56,221,224 | |
237 .byte 102,15,56,221,232 | |
238 .byte 102,15,56,221,240 | |
239 .byte 102,15,56,221,248 | |
240 .byte 0xf3,0xc3 | |
241 .size _aesni_encrypt6,.-_aesni_encrypt6 | |
242 .type _aesni_decrypt6,@function | |
243 .align 16 | |
244 _aesni_decrypt6: | |
245 movups (%rcx),%xmm0 | |
246 shrl $1,%eax | |
247 movups 16(%rcx),%xmm1 | |
248 leaq 32(%rcx),%rcx | |
249 xorps %xmm0,%xmm2 | |
250 pxor %xmm0,%xmm3 | |
251 .byte 102,15,56,222,209 | |
252 pxor %xmm0,%xmm4 | |
253 .byte 102,15,56,222,217 | |
254 pxor %xmm0,%xmm5 | |
255 .byte 102,15,56,222,225 | |
256 pxor %xmm0,%xmm6 | |
257 .byte 102,15,56,222,233 | |
258 pxor %xmm0,%xmm7 | |
259 decl %eax | |
260 .byte 102,15,56,222,241 | |
261 movups (%rcx),%xmm0 | |
262 .byte 102,15,56,222,249 | |
263 jmp .Ldec_loop6_enter | |
264 .align 16 | |
265 .Ldec_loop6: | |
266 .byte 102,15,56,222,209 | |
267 .byte 102,15,56,222,217 | |
268 decl %eax | |
269 .byte 102,15,56,222,225 | |
270 .byte 102,15,56,222,233 | |
271 .byte 102,15,56,222,241 | |
272 .byte 102,15,56,222,249 | |
273 .Ldec_loop6_enter: | |
274 movups 16(%rcx),%xmm1 | |
275 .byte 102,15,56,222,208 | |
276 .byte 102,15,56,222,216 | |
277 leaq 32(%rcx),%rcx | |
278 .byte 102,15,56,222,224 | |
279 .byte 102,15,56,222,232 | |
280 .byte 102,15,56,222,240 | |
281 .byte 102,15,56,222,248 | |
282 movups (%rcx),%xmm0 | |
283 jnz .Ldec_loop6 | |
284 | |
285 .byte 102,15,56,222,209 | |
286 .byte 102,15,56,222,217 | |
287 .byte 102,15,56,222,225 | |
288 .byte 102,15,56,222,233 | |
289 .byte 102,15,56,222,241 | |
290 .byte 102,15,56,222,249 | |
291 .byte 102,15,56,223,208 | |
292 .byte 102,15,56,223,216 | |
293 .byte 102,15,56,223,224 | |
294 .byte 102,15,56,223,232 | |
295 .byte 102,15,56,223,240 | |
296 .byte 102,15,56,223,248 | |
297 .byte 0xf3,0xc3 | |
298 .size _aesni_decrypt6,.-_aesni_decrypt6 | |
299 .type _aesni_encrypt8,@function | |
300 .align 16 | |
301 _aesni_encrypt8: | |
302 movups (%rcx),%xmm0 | |
303 shrl $1,%eax | |
304 movups 16(%rcx),%xmm1 | |
305 leaq 32(%rcx),%rcx | |
306 xorps %xmm0,%xmm2 | |
307 xorps %xmm0,%xmm3 | |
308 .byte 102,15,56,220,209 | |
309 pxor %xmm0,%xmm4 | |
310 .byte 102,15,56,220,217 | |
311 pxor %xmm0,%xmm5 | |
312 .byte 102,15,56,220,225 | |
313 pxor %xmm0,%xmm6 | |
314 .byte 102,15,56,220,233 | |
315 pxor %xmm0,%xmm7 | |
316 decl %eax | |
317 .byte 102,15,56,220,241 | |
318 pxor %xmm0,%xmm8 | |
319 .byte 102,15,56,220,249 | |
320 pxor %xmm0,%xmm9 | |
321 movups (%rcx),%xmm0 | |
322 .byte 102,68,15,56,220,193 | |
323 .byte 102,68,15,56,220,201 | |
324 movups 16(%rcx),%xmm1 | |
325 jmp .Lenc_loop8_enter | |
326 .align 16 | |
327 .Lenc_loop8: | |
328 .byte 102,15,56,220,209 | |
329 .byte 102,15,56,220,217 | |
330 decl %eax | |
331 .byte 102,15,56,220,225 | |
332 .byte 102,15,56,220,233 | |
333 .byte 102,15,56,220,241 | |
334 .byte 102,15,56,220,249 | |
335 .byte 102,68,15,56,220,193 | |
336 .byte 102,68,15,56,220,201 | |
337 movups 16(%rcx),%xmm1 | |
338 .Lenc_loop8_enter: | |
339 .byte 102,15,56,220,208 | |
340 .byte 102,15,56,220,216 | |
341 leaq 32(%rcx),%rcx | |
342 .byte 102,15,56,220,224 | |
343 .byte 102,15,56,220,232 | |
344 .byte 102,15,56,220,240 | |
345 .byte 102,15,56,220,248 | |
346 .byte 102,68,15,56,220,192 | |
347 .byte 102,68,15,56,220,200 | |
348 movups (%rcx),%xmm0 | |
349 jnz .Lenc_loop8 | |
350 | |
351 .byte 102,15,56,220,209 | |
352 .byte 102,15,56,220,217 | |
353 .byte 102,15,56,220,225 | |
354 .byte 102,15,56,220,233 | |
355 .byte 102,15,56,220,241 | |
356 .byte 102,15,56,220,249 | |
357 .byte 102,68,15,56,220,193 | |
358 .byte 102,68,15,56,220,201 | |
359 .byte 102,15,56,221,208 | |
360 .byte 102,15,56,221,216 | |
361 .byte 102,15,56,221,224 | |
362 .byte 102,15,56,221,232 | |
363 .byte 102,15,56,221,240 | |
364 .byte 102,15,56,221,248 | |
365 .byte 102,68,15,56,221,192 | |
366 .byte 102,68,15,56,221,200 | |
367 .byte 0xf3,0xc3 | |
368 .size _aesni_encrypt8,.-_aesni_encrypt8 | |
369 .type _aesni_decrypt8,@function | |
370 .align 16 | |
371 _aesni_decrypt8: | |
372 movups (%rcx),%xmm0 | |
373 shrl $1,%eax | |
374 movups 16(%rcx),%xmm1 | |
375 leaq 32(%rcx),%rcx | |
376 xorps %xmm0,%xmm2 | |
377 xorps %xmm0,%xmm3 | |
378 .byte 102,15,56,222,209 | |
379 pxor %xmm0,%xmm4 | |
380 .byte 102,15,56,222,217 | |
381 pxor %xmm0,%xmm5 | |
382 .byte 102,15,56,222,225 | |
383 pxor %xmm0,%xmm6 | |
384 .byte 102,15,56,222,233 | |
385 pxor %xmm0,%xmm7 | |
386 decl %eax | |
387 .byte 102,15,56,222,241 | |
388 pxor %xmm0,%xmm8 | |
389 .byte 102,15,56,222,249 | |
390 pxor %xmm0,%xmm9 | |
391 movups (%rcx),%xmm0 | |
392 .byte 102,68,15,56,222,193 | |
393 .byte 102,68,15,56,222,201 | |
394 movups 16(%rcx),%xmm1 | |
395 jmp .Ldec_loop8_enter | |
396 .align 16 | |
397 .Ldec_loop8: | |
398 .byte 102,15,56,222,209 | |
399 .byte 102,15,56,222,217 | |
400 decl %eax | |
401 .byte 102,15,56,222,225 | |
402 .byte 102,15,56,222,233 | |
403 .byte 102,15,56,222,241 | |
404 .byte 102,15,56,222,249 | |
405 .byte 102,68,15,56,222,193 | |
406 .byte 102,68,15,56,222,201 | |
407 movups 16(%rcx),%xmm1 | |
408 .Ldec_loop8_enter: | |
409 .byte 102,15,56,222,208 | |
410 .byte 102,15,56,222,216 | |
411 leaq 32(%rcx),%rcx | |
412 .byte 102,15,56,222,224 | |
413 .byte 102,15,56,222,232 | |
414 .byte 102,15,56,222,240 | |
415 .byte 102,15,56,222,248 | |
416 .byte 102,68,15,56,222,192 | |
417 .byte 102,68,15,56,222,200 | |
418 movups (%rcx),%xmm0 | |
419 jnz .Ldec_loop8 | |
420 | |
421 .byte 102,15,56,222,209 | |
422 .byte 102,15,56,222,217 | |
423 .byte 102,15,56,222,225 | |
424 .byte 102,15,56,222,233 | |
425 .byte 102,15,56,222,241 | |
426 .byte 102,15,56,222,249 | |
427 .byte 102,68,15,56,222,193 | |
428 .byte 102,68,15,56,222,201 | |
429 .byte 102,15,56,223,208 | |
430 .byte 102,15,56,223,216 | |
431 .byte 102,15,56,223,224 | |
432 .byte 102,15,56,223,232 | |
433 .byte 102,15,56,223,240 | |
434 .byte 102,15,56,223,248 | |
435 .byte 102,68,15,56,223,192 | |
436 .byte 102,68,15,56,223,200 | |
437 .byte 0xf3,0xc3 | |
438 .size _aesni_decrypt8,.-_aesni_decrypt8 | |
439 .globl aesni_ecb_encrypt | |
440 .type aesni_ecb_encrypt,@function | |
441 .align 16 | |
442 aesni_ecb_encrypt: | |
443 andq $-16,%rdx | |
444 jz .Lecb_ret | |
445 | |
446 movl 240(%rcx),%eax | |
447 movups (%rcx),%xmm0 | |
448 movq %rcx,%r11 | |
449 movl %eax,%r10d | |
450 testl %r8d,%r8d | |
451 jz .Lecb_decrypt | |
452 | |
453 cmpq $128,%rdx | |
454 jb .Lecb_enc_tail | |
455 | |
456 movdqu (%rdi),%xmm2 | |
457 movdqu 16(%rdi),%xmm3 | |
458 movdqu 32(%rdi),%xmm4 | |
459 movdqu 48(%rdi),%xmm5 | |
460 movdqu 64(%rdi),%xmm6 | |
461 movdqu 80(%rdi),%xmm7 | |
462 movdqu 96(%rdi),%xmm8 | |
463 movdqu 112(%rdi),%xmm9 | |
464 leaq 128(%rdi),%rdi | |
465 subq $128,%rdx | |
466 jmp .Lecb_enc_loop8_enter | |
467 .align 16 | |
468 .Lecb_enc_loop8: | |
469 movups %xmm2,(%rsi) | |
470 movq %r11,%rcx | |
471 movdqu (%rdi),%xmm2 | |
472 movl %r10d,%eax | |
473 movups %xmm3,16(%rsi) | |
474 movdqu 16(%rdi),%xmm3 | |
475 movups %xmm4,32(%rsi) | |
476 movdqu 32(%rdi),%xmm4 | |
477 movups %xmm5,48(%rsi) | |
478 movdqu 48(%rdi),%xmm5 | |
479 movups %xmm6,64(%rsi) | |
480 movdqu 64(%rdi),%xmm6 | |
481 movups %xmm7,80(%rsi) | |
482 movdqu 80(%rdi),%xmm7 | |
483 movups %xmm8,96(%rsi) | |
484 movdqu 96(%rdi),%xmm8 | |
485 movups %xmm9,112(%rsi) | |
486 leaq 128(%rsi),%rsi | |
487 movdqu 112(%rdi),%xmm9 | |
488 leaq 128(%rdi),%rdi | |
489 .Lecb_enc_loop8_enter: | |
490 | |
491 call _aesni_encrypt8 | |
492 | |
493 subq $128,%rdx | |
494 jnc .Lecb_enc_loop8 | |
495 | |
496 movups %xmm2,(%rsi) | |
497 movq %r11,%rcx | |
498 movups %xmm3,16(%rsi) | |
499 movl %r10d,%eax | |
500 movups %xmm4,32(%rsi) | |
501 movups %xmm5,48(%rsi) | |
502 movups %xmm6,64(%rsi) | |
503 movups %xmm7,80(%rsi) | |
504 movups %xmm8,96(%rsi) | |
505 movups %xmm9,112(%rsi) | |
506 leaq 128(%rsi),%rsi | |
507 addq $128,%rdx | |
508 jz .Lecb_ret | |
509 | |
510 .Lecb_enc_tail: | |
511 movups (%rdi),%xmm2 | |
512 cmpq $32,%rdx | |
513 jb .Lecb_enc_one | |
514 movups 16(%rdi),%xmm3 | |
515 je .Lecb_enc_two | |
516 movups 32(%rdi),%xmm4 | |
517 cmpq $64,%rdx | |
518 jb .Lecb_enc_three | |
519 movups 48(%rdi),%xmm5 | |
520 je .Lecb_enc_four | |
521 movups 64(%rdi),%xmm6 | |
522 cmpq $96,%rdx | |
523 jb .Lecb_enc_five | |
524 movups 80(%rdi),%xmm7 | |
525 je .Lecb_enc_six | |
526 movdqu 96(%rdi),%xmm8 | |
527 call _aesni_encrypt8 | |
528 movups %xmm2,(%rsi) | |
529 movups %xmm3,16(%rsi) | |
530 movups %xmm4,32(%rsi) | |
531 movups %xmm5,48(%rsi) | |
532 movups %xmm6,64(%rsi) | |
533 movups %xmm7,80(%rsi) | |
534 movups %xmm8,96(%rsi) | |
535 jmp .Lecb_ret | |
536 .align 16 | |
537 .Lecb_enc_one: | |
538 movups (%rcx),%xmm0 | |
539 movups 16(%rcx),%xmm1 | |
540 leaq 32(%rcx),%rcx | |
541 xorps %xmm0,%xmm2 | |
542 .Loop_enc1_3: | |
543 .byte 102,15,56,220,209 | |
544 decl %eax | |
545 movups (%rcx),%xmm1 | |
546 leaq 16(%rcx),%rcx | |
547 jnz .Loop_enc1_3 | |
548 .byte 102,15,56,221,209 | |
549 movups %xmm2,(%rsi) | |
550 jmp .Lecb_ret | |
551 .align 16 | |
552 .Lecb_enc_two: | |
553 xorps %xmm4,%xmm4 | |
554 call _aesni_encrypt3 | |
555 movups %xmm2,(%rsi) | |
556 movups %xmm3,16(%rsi) | |
557 jmp .Lecb_ret | |
558 .align 16 | |
559 .Lecb_enc_three: | |
560 call _aesni_encrypt3 | |
561 movups %xmm2,(%rsi) | |
562 movups %xmm3,16(%rsi) | |
563 movups %xmm4,32(%rsi) | |
564 jmp .Lecb_ret | |
565 .align 16 | |
566 .Lecb_enc_four: | |
567 call _aesni_encrypt4 | |
568 movups %xmm2,(%rsi) | |
569 movups %xmm3,16(%rsi) | |
570 movups %xmm4,32(%rsi) | |
571 movups %xmm5,48(%rsi) | |
572 jmp .Lecb_ret | |
573 .align 16 | |
574 .Lecb_enc_five: | |
575 xorps %xmm7,%xmm7 | |
576 call _aesni_encrypt6 | |
577 movups %xmm2,(%rsi) | |
578 movups %xmm3,16(%rsi) | |
579 movups %xmm4,32(%rsi) | |
580 movups %xmm5,48(%rsi) | |
581 movups %xmm6,64(%rsi) | |
582 jmp .Lecb_ret | |
583 .align 16 | |
584 .Lecb_enc_six: | |
585 call _aesni_encrypt6 | |
586 movups %xmm2,(%rsi) | |
587 movups %xmm3,16(%rsi) | |
588 movups %xmm4,32(%rsi) | |
589 movups %xmm5,48(%rsi) | |
590 movups %xmm6,64(%rsi) | |
591 movups %xmm7,80(%rsi) | |
592 jmp .Lecb_ret | |
593 | |
594 .align 16 | |
595 .Lecb_decrypt: | |
596 cmpq $128,%rdx | |
597 jb .Lecb_dec_tail | |
598 | |
599 movdqu (%rdi),%xmm2 | |
600 movdqu 16(%rdi),%xmm3 | |
601 movdqu 32(%rdi),%xmm4 | |
602 movdqu 48(%rdi),%xmm5 | |
603 movdqu 64(%rdi),%xmm6 | |
604 movdqu 80(%rdi),%xmm7 | |
605 movdqu 96(%rdi),%xmm8 | |
606 movdqu 112(%rdi),%xmm9 | |
607 leaq 128(%rdi),%rdi | |
608 subq $128,%rdx | |
609 jmp .Lecb_dec_loop8_enter | |
610 .align 16 | |
611 .Lecb_dec_loop8: | |
612 movups %xmm2,(%rsi) | |
613 movq %r11,%rcx | |
614 movdqu (%rdi),%xmm2 | |
615 movl %r10d,%eax | |
616 movups %xmm3,16(%rsi) | |
617 movdqu 16(%rdi),%xmm3 | |
618 movups %xmm4,32(%rsi) | |
619 movdqu 32(%rdi),%xmm4 | |
620 movups %xmm5,48(%rsi) | |
621 movdqu 48(%rdi),%xmm5 | |
622 movups %xmm6,64(%rsi) | |
623 movdqu 64(%rdi),%xmm6 | |
624 movups %xmm7,80(%rsi) | |
625 movdqu 80(%rdi),%xmm7 | |
626 movups %xmm8,96(%rsi) | |
627 movdqu 96(%rdi),%xmm8 | |
628 movups %xmm9,112(%rsi) | |
629 leaq 128(%rsi),%rsi | |
630 movdqu 112(%rdi),%xmm9 | |
631 leaq 128(%rdi),%rdi | |
632 .Lecb_dec_loop8_enter: | |
633 | |
634 call _aesni_decrypt8 | |
635 | |
636 movups (%r11),%xmm0 | |
637 subq $128,%rdx | |
638 jnc .Lecb_dec_loop8 | |
639 | |
640 movups %xmm2,(%rsi) | |
641 movq %r11,%rcx | |
642 movups %xmm3,16(%rsi) | |
643 movl %r10d,%eax | |
644 movups %xmm4,32(%rsi) | |
645 movups %xmm5,48(%rsi) | |
646 movups %xmm6,64(%rsi) | |
647 movups %xmm7,80(%rsi) | |
648 movups %xmm8,96(%rsi) | |
649 movups %xmm9,112(%rsi) | |
650 leaq 128(%rsi),%rsi | |
651 addq $128,%rdx | |
652 jz .Lecb_ret | |
653 | |
654 .Lecb_dec_tail: | |
655 movups (%rdi),%xmm2 | |
656 cmpq $32,%rdx | |
657 jb .Lecb_dec_one | |
658 movups 16(%rdi),%xmm3 | |
659 je .Lecb_dec_two | |
660 movups 32(%rdi),%xmm4 | |
661 cmpq $64,%rdx | |
662 jb .Lecb_dec_three | |
663 movups 48(%rdi),%xmm5 | |
664 je .Lecb_dec_four | |
665 movups 64(%rdi),%xmm6 | |
666 cmpq $96,%rdx | |
667 jb .Lecb_dec_five | |
668 movups 80(%rdi),%xmm7 | |
669 je .Lecb_dec_six | |
670 movups 96(%rdi),%xmm8 | |
671 movups (%rcx),%xmm0 | |
672 call _aesni_decrypt8 | |
673 movups %xmm2,(%rsi) | |
674 movups %xmm3,16(%rsi) | |
675 movups %xmm4,32(%rsi) | |
676 movups %xmm5,48(%rsi) | |
677 movups %xmm6,64(%rsi) | |
678 movups %xmm7,80(%rsi) | |
679 movups %xmm8,96(%rsi) | |
680 jmp .Lecb_ret | |
681 .align 16 | |
682 .Lecb_dec_one: | |
683 movups (%rcx),%xmm0 | |
684 movups 16(%rcx),%xmm1 | |
685 leaq 32(%rcx),%rcx | |
686 xorps %xmm0,%xmm2 | |
687 .Loop_dec1_4: | |
688 .byte 102,15,56,222,209 | |
689 decl %eax | |
690 movups (%rcx),%xmm1 | |
691 leaq 16(%rcx),%rcx | |
692 jnz .Loop_dec1_4 | |
693 .byte 102,15,56,223,209 | |
694 movups %xmm2,(%rsi) | |
695 jmp .Lecb_ret | |
696 .align 16 | |
697 .Lecb_dec_two: | |
698 xorps %xmm4,%xmm4 | |
699 call _aesni_decrypt3 | |
700 movups %xmm2,(%rsi) | |
701 movups %xmm3,16(%rsi) | |
702 jmp .Lecb_ret | |
703 .align 16 | |
704 .Lecb_dec_three: | |
705 call _aesni_decrypt3 | |
706 movups %xmm2,(%rsi) | |
707 movups %xmm3,16(%rsi) | |
708 movups %xmm4,32(%rsi) | |
709 jmp .Lecb_ret | |
710 .align 16 | |
711 .Lecb_dec_four: | |
712 call _aesni_decrypt4 | |
713 movups %xmm2,(%rsi) | |
714 movups %xmm3,16(%rsi) | |
715 movups %xmm4,32(%rsi) | |
716 movups %xmm5,48(%rsi) | |
717 jmp .Lecb_ret | |
718 .align 16 | |
719 .Lecb_dec_five: | |
720 xorps %xmm7,%xmm7 | |
721 call _aesni_decrypt6 | |
722 movups %xmm2,(%rsi) | |
723 movups %xmm3,16(%rsi) | |
724 movups %xmm4,32(%rsi) | |
725 movups %xmm5,48(%rsi) | |
726 movups %xmm6,64(%rsi) | |
727 jmp .Lecb_ret | |
728 .align 16 | |
729 .Lecb_dec_six: | |
730 call _aesni_decrypt6 | |
731 movups %xmm2,(%rsi) | |
732 movups %xmm3,16(%rsi) | |
733 movups %xmm4,32(%rsi) | |
734 movups %xmm5,48(%rsi) | |
735 movups %xmm6,64(%rsi) | |
736 movups %xmm7,80(%rsi) | |
737 | |
738 .Lecb_ret: | |
739 .byte 0xf3,0xc3 | |
740 .size aesni_ecb_encrypt,.-aesni_ecb_encrypt | |
741 .globl aesni_ccm64_encrypt_blocks | |
742 .type aesni_ccm64_encrypt_blocks,@function | |
743 .align 16 | |
744 aesni_ccm64_encrypt_blocks: | |
745 movl 240(%rcx),%eax | |
746 movdqu (%r8),%xmm9 | |
747 movdqa .Lincrement64(%rip),%xmm6 | |
748 movdqa .Lbswap_mask(%rip),%xmm7 | |
749 | |
750 shrl $1,%eax | |
751 leaq 0(%rcx),%r11 | |
752 movdqu (%r9),%xmm3 | |
753 movdqa %xmm9,%xmm2 | |
754 movl %eax,%r10d | |
755 .byte 102,68,15,56,0,207 | |
756 jmp .Lccm64_enc_outer | |
757 .align 16 | |
758 .Lccm64_enc_outer: | |
759 movups (%r11),%xmm0 | |
760 movl %r10d,%eax | |
761 movups (%rdi),%xmm8 | |
762 | |
763 xorps %xmm0,%xmm2 | |
764 movups 16(%r11),%xmm1 | |
765 xorps %xmm8,%xmm0 | |
766 leaq 32(%r11),%rcx | |
767 xorps %xmm0,%xmm3 | |
768 movups (%rcx),%xmm0 | |
769 | |
770 .Lccm64_enc2_loop: | |
771 .byte 102,15,56,220,209 | |
772 decl %eax | |
773 .byte 102,15,56,220,217 | |
774 movups 16(%rcx),%xmm1 | |
775 .byte 102,15,56,220,208 | |
776 leaq 32(%rcx),%rcx | |
777 .byte 102,15,56,220,216 | |
778 movups 0(%rcx),%xmm0 | |
779 jnz .Lccm64_enc2_loop | |
780 .byte 102,15,56,220,209 | |
781 .byte 102,15,56,220,217 | |
782 paddq %xmm6,%xmm9 | |
783 .byte 102,15,56,221,208 | |
784 .byte 102,15,56,221,216 | |
785 | |
786 decq %rdx | |
787 leaq 16(%rdi),%rdi | |
788 xorps %xmm2,%xmm8 | |
789 movdqa %xmm9,%xmm2 | |
790 movups %xmm8,(%rsi) | |
791 leaq 16(%rsi),%rsi | |
792 .byte 102,15,56,0,215 | |
793 jnz .Lccm64_enc_outer | |
794 | |
795 movups %xmm3,(%r9) | |
796 .byte 0xf3,0xc3 | |
797 .size aesni_ccm64_encrypt_blocks,.-aesni_ccm64_encrypt_blocks | |
798 .globl aesni_ccm64_decrypt_blocks | |
799 .type aesni_ccm64_decrypt_blocks,@function | |
800 .align 16 | |
801 aesni_ccm64_decrypt_blocks: | |
802 movl 240(%rcx),%eax | |
803 movups (%r8),%xmm9 | |
804 movdqu (%r9),%xmm3 | |
805 movdqa .Lincrement64(%rip),%xmm6 | |
806 movdqa .Lbswap_mask(%rip),%xmm7 | |
807 | |
808 movaps %xmm9,%xmm2 | |
809 movl %eax,%r10d | |
810 movq %rcx,%r11 | |
811 .byte 102,68,15,56,0,207 | |
812 movups (%rcx),%xmm0 | |
813 movups 16(%rcx),%xmm1 | |
814 leaq 32(%rcx),%rcx | |
815 xorps %xmm0,%xmm2 | |
816 .Loop_enc1_5: | |
817 .byte 102,15,56,220,209 | |
818 decl %eax | |
819 movups (%rcx),%xmm1 | |
820 leaq 16(%rcx),%rcx | |
821 jnz .Loop_enc1_5 | |
822 .byte 102,15,56,221,209 | |
823 movups (%rdi),%xmm8 | |
824 paddq %xmm6,%xmm9 | |
825 leaq 16(%rdi),%rdi | |
826 jmp .Lccm64_dec_outer | |
827 .align 16 | |
828 .Lccm64_dec_outer: | |
829 xorps %xmm2,%xmm8 | |
830 movdqa %xmm9,%xmm2 | |
831 movl %r10d,%eax | |
832 movups %xmm8,(%rsi) | |
833 leaq 16(%rsi),%rsi | |
834 .byte 102,15,56,0,215 | |
835 | |
836 subq $1,%rdx | |
837 jz .Lccm64_dec_break | |
838 | |
839 movups (%r11),%xmm0 | |
840 shrl $1,%eax | |
841 movups 16(%r11),%xmm1 | |
842 xorps %xmm0,%xmm8 | |
843 leaq 32(%r11),%rcx | |
844 xorps %xmm0,%xmm2 | |
845 xorps %xmm8,%xmm3 | |
846 movups (%rcx),%xmm0 | |
847 | |
848 .Lccm64_dec2_loop: | |
849 .byte 102,15,56,220,209 | |
850 decl %eax | |
851 .byte 102,15,56,220,217 | |
852 movups 16(%rcx),%xmm1 | |
853 .byte 102,15,56,220,208 | |
854 leaq 32(%rcx),%rcx | |
855 .byte 102,15,56,220,216 | |
856 movups 0(%rcx),%xmm0 | |
857 jnz .Lccm64_dec2_loop | |
858 movups (%rdi),%xmm8 | |
859 paddq %xmm6,%xmm9 | |
860 .byte 102,15,56,220,209 | |
861 .byte 102,15,56,220,217 | |
862 leaq 16(%rdi),%rdi | |
863 .byte 102,15,56,221,208 | |
864 .byte 102,15,56,221,216 | |
865 jmp .Lccm64_dec_outer | |
866 | |
867 .align 16 | |
868 .Lccm64_dec_break: | |
869 | |
870 movups (%r11),%xmm0 | |
871 movups 16(%r11),%xmm1 | |
872 xorps %xmm0,%xmm8 | |
873 leaq 32(%r11),%r11 | |
874 xorps %xmm8,%xmm3 | |
875 .Loop_enc1_6: | |
876 .byte 102,15,56,220,217 | |
877 decl %eax | |
878 movups (%r11),%xmm1 | |
879 leaq 16(%r11),%r11 | |
880 jnz .Loop_enc1_6 | |
881 .byte 102,15,56,221,217 | |
882 movups %xmm3,(%r9) | |
883 .byte 0xf3,0xc3 | |
884 .size aesni_ccm64_decrypt_blocks,.-aesni_ccm64_decrypt_blocks | |
885 .globl aesni_ctr32_encrypt_blocks | |
886 .type aesni_ctr32_encrypt_blocks,@function | |
887 .align 16 | |
888 aesni_ctr32_encrypt_blocks: | |
889 cmpq $1,%rdx | |
890 je .Lctr32_one_shortcut | |
891 | |
892 movdqu (%r8),%xmm14 | |
893 movdqa .Lbswap_mask(%rip),%xmm15 | |
894 xorl %eax,%eax | |
895 .byte 102,69,15,58,22,242,3 | |
896 .byte 102,68,15,58,34,240,3 | |
897 | |
898 movl 240(%rcx),%eax | |
899 bswapl %r10d | |
900 pxor %xmm12,%xmm12 | |
901 pxor %xmm13,%xmm13 | |
902 .byte 102,69,15,58,34,226,0 | |
903 leaq 3(%r10),%r11 | |
904 .byte 102,69,15,58,34,235,0 | |
905 incl %r10d | |
906 .byte 102,69,15,58,34,226,1 | |
907 incq %r11 | |
908 .byte 102,69,15,58,34,235,1 | |
909 incl %r10d | |
910 .byte 102,69,15,58,34,226,2 | |
911 incq %r11 | |
912 .byte 102,69,15,58,34,235,2 | |
913 movdqa %xmm12,-40(%rsp) | |
914 .byte 102,69,15,56,0,231 | |
915 movdqa %xmm13,-24(%rsp) | |
916 .byte 102,69,15,56,0,239 | |
917 | |
918 pshufd $192,%xmm12,%xmm2 | |
919 pshufd $128,%xmm12,%xmm3 | |
920 pshufd $64,%xmm12,%xmm4 | |
921 cmpq $6,%rdx | |
922 jb .Lctr32_tail | |
923 shrl $1,%eax | |
924 movq %rcx,%r11 | |
925 movl %eax,%r10d | |
926 subq $6,%rdx | |
927 jmp .Lctr32_loop6 | |
928 | |
929 .align 16 | |
930 .Lctr32_loop6: | |
931 pshufd $192,%xmm13,%xmm5 | |
932 por %xmm14,%xmm2 | |
933 movups (%r11),%xmm0 | |
934 pshufd $128,%xmm13,%xmm6 | |
935 por %xmm14,%xmm3 | |
936 movups 16(%r11),%xmm1 | |
937 pshufd $64,%xmm13,%xmm7 | |
938 por %xmm14,%xmm4 | |
939 por %xmm14,%xmm5 | |
940 xorps %xmm0,%xmm2 | |
941 por %xmm14,%xmm6 | |
942 por %xmm14,%xmm7 | |
943 | |
944 | |
945 | |
946 | |
947 pxor %xmm0,%xmm3 | |
948 .byte 102,15,56,220,209 | |
949 leaq 32(%r11),%rcx | |
950 pxor %xmm0,%xmm4 | |
951 .byte 102,15,56,220,217 | |
952 movdqa .Lincrement32(%rip),%xmm13 | |
953 pxor %xmm0,%xmm5 | |
954 .byte 102,15,56,220,225 | |
955 movdqa -40(%rsp),%xmm12 | |
956 pxor %xmm0,%xmm6 | |
957 .byte 102,15,56,220,233 | |
958 pxor %xmm0,%xmm7 | |
959 movups (%rcx),%xmm0 | |
960 decl %eax | |
961 .byte 102,15,56,220,241 | |
962 .byte 102,15,56,220,249 | |
963 jmp .Lctr32_enc_loop6_enter | |
964 .align 16 | |
965 .Lctr32_enc_loop6: | |
966 .byte 102,15,56,220,209 | |
967 .byte 102,15,56,220,217 | |
968 decl %eax | |
969 .byte 102,15,56,220,225 | |
970 .byte 102,15,56,220,233 | |
971 .byte 102,15,56,220,241 | |
972 .byte 102,15,56,220,249 | |
973 .Lctr32_enc_loop6_enter: | |
974 movups 16(%rcx),%xmm1 | |
975 .byte 102,15,56,220,208 | |
976 .byte 102,15,56,220,216 | |
977 leaq 32(%rcx),%rcx | |
978 .byte 102,15,56,220,224 | |
979 .byte 102,15,56,220,232 | |
980 .byte 102,15,56,220,240 | |
981 .byte 102,15,56,220,248 | |
982 movups (%rcx),%xmm0 | |
983 jnz .Lctr32_enc_loop6 | |
984 | |
985 .byte 102,15,56,220,209 | |
986 paddd %xmm13,%xmm12 | |
987 .byte 102,15,56,220,217 | |
988 paddd -24(%rsp),%xmm13 | |
989 .byte 102,15,56,220,225 | |
990 movdqa %xmm12,-40(%rsp) | |
991 .byte 102,15,56,220,233 | |
992 movdqa %xmm13,-24(%rsp) | |
993 .byte 102,15,56,220,241 | |
994 .byte 102,69,15,56,0,231 | |
995 .byte 102,15,56,220,249 | |
996 .byte 102,69,15,56,0,239 | |
997 | |
998 .byte 102,15,56,221,208 | |
999 movups (%rdi),%xmm8 | |
1000 .byte 102,15,56,221,216 | |
1001 movups 16(%rdi),%xmm9 | |
1002 .byte 102,15,56,221,224 | |
1003 movups 32(%rdi),%xmm10 | |
1004 .byte 102,15,56,221,232 | |
1005 movups 48(%rdi),%xmm11 | |
1006 .byte 102,15,56,221,240 | |
1007 movups 64(%rdi),%xmm1 | |
1008 .byte 102,15,56,221,248 | |
1009 movups 80(%rdi),%xmm0 | |
1010 leaq 96(%rdi),%rdi | |
1011 | |
1012 xorps %xmm2,%xmm8 | |
1013 pshufd $192,%xmm12,%xmm2 | |
1014 xorps %xmm3,%xmm9 | |
1015 pshufd $128,%xmm12,%xmm3 | |
1016 movups %xmm8,(%rsi) | |
1017 xorps %xmm4,%xmm10 | |
1018 pshufd $64,%xmm12,%xmm4 | |
1019 movups %xmm9,16(%rsi) | |
1020 xorps %xmm5,%xmm11 | |
1021 movups %xmm10,32(%rsi) | |
1022 xorps %xmm6,%xmm1 | |
1023 movups %xmm11,48(%rsi) | |
1024 xorps %xmm7,%xmm0 | |
1025 movups %xmm1,64(%rsi) | |
1026 movups %xmm0,80(%rsi) | |
1027 leaq 96(%rsi),%rsi | |
1028 movl %r10d,%eax | |
1029 subq $6,%rdx | |
1030 jnc .Lctr32_loop6 | |
1031 | |
1032 addq $6,%rdx | |
1033 jz .Lctr32_done | |
1034 movq %r11,%rcx | |
1035 leal 1(%rax,%rax,1),%eax | |
1036 | |
1037 .Lctr32_tail: | |
1038 por %xmm14,%xmm2 | |
1039 movups (%rdi),%xmm8 | |
1040 cmpq $2,%rdx | |
1041 jb .Lctr32_one | |
1042 | |
1043 por %xmm14,%xmm3 | |
1044 movups 16(%rdi),%xmm9 | |
1045 je .Lctr32_two | |
1046 | |
1047 pshufd $192,%xmm13,%xmm5 | |
1048 por %xmm14,%xmm4 | |
1049 movups 32(%rdi),%xmm10 | |
1050 cmpq $4,%rdx | |
1051 jb .Lctr32_three | |
1052 | |
1053 pshufd $128,%xmm13,%xmm6 | |
1054 por %xmm14,%xmm5 | |
1055 movups 48(%rdi),%xmm11 | |
1056 je .Lctr32_four | |
1057 | |
1058 por %xmm14,%xmm6 | |
1059 xorps %xmm7,%xmm7 | |
1060 | |
1061 call _aesni_encrypt6 | |
1062 | |
1063 movups 64(%rdi),%xmm1 | |
1064 xorps %xmm2,%xmm8 | |
1065 xorps %xmm3,%xmm9 | |
1066 movups %xmm8,(%rsi) | |
1067 xorps %xmm4,%xmm10 | |
1068 movups %xmm9,16(%rsi) | |
1069 xorps %xmm5,%xmm11 | |
1070 movups %xmm10,32(%rsi) | |
1071 xorps %xmm6,%xmm1 | |
1072 movups %xmm11,48(%rsi) | |
1073 movups %xmm1,64(%rsi) | |
1074 jmp .Lctr32_done | |
1075 | |
1076 .align 16 | |
1077 .Lctr32_one_shortcut: | |
1078 movups (%r8),%xmm2 | |
1079 movups (%rdi),%xmm8 | |
1080 movl 240(%rcx),%eax | |
1081 .Lctr32_one: | |
1082 movups (%rcx),%xmm0 | |
1083 movups 16(%rcx),%xmm1 | |
1084 leaq 32(%rcx),%rcx | |
1085 xorps %xmm0,%xmm2 | |
1086 .Loop_enc1_7: | |
1087 .byte 102,15,56,220,209 | |
1088 decl %eax | |
1089 movups (%rcx),%xmm1 | |
1090 leaq 16(%rcx),%rcx | |
1091 jnz .Loop_enc1_7 | |
1092 .byte 102,15,56,221,209 | |
1093 xorps %xmm2,%xmm8 | |
1094 movups %xmm8,(%rsi) | |
1095 jmp .Lctr32_done | |
1096 | |
1097 .align 16 | |
1098 .Lctr32_two: | |
1099 xorps %xmm4,%xmm4 | |
1100 call _aesni_encrypt3 | |
1101 xorps %xmm2,%xmm8 | |
1102 xorps %xmm3,%xmm9 | |
1103 movups %xmm8,(%rsi) | |
1104 movups %xmm9,16(%rsi) | |
1105 jmp .Lctr32_done | |
1106 | |
1107 .align 16 | |
1108 .Lctr32_three: | |
1109 call _aesni_encrypt3 | |
1110 xorps %xmm2,%xmm8 | |
1111 xorps %xmm3,%xmm9 | |
1112 movups %xmm8,(%rsi) | |
1113 xorps %xmm4,%xmm10 | |
1114 movups %xmm9,16(%rsi) | |
1115 movups %xmm10,32(%rsi) | |
1116 jmp .Lctr32_done | |
1117 | |
1118 .align 16 | |
1119 .Lctr32_four: | |
1120 call _aesni_encrypt4 | |
1121 xorps %xmm2,%xmm8 | |
1122 xorps %xmm3,%xmm9 | |
1123 movups %xmm8,(%rsi) | |
1124 xorps %xmm4,%xmm10 | |
1125 movups %xmm9,16(%rsi) | |
1126 xorps %xmm5,%xmm11 | |
1127 movups %xmm10,32(%rsi) | |
1128 movups %xmm11,48(%rsi) | |
1129 | |
1130 .Lctr32_done: | |
1131 .byte 0xf3,0xc3 | |
1132 .size aesni_ctr32_encrypt_blocks,.-aesni_ctr32_encrypt_blocks | |
1133 .globl aesni_xts_encrypt | |
1134 .type aesni_xts_encrypt,@function | |
1135 .align 16 | |
1136 aesni_xts_encrypt: | |
1137 leaq -104(%rsp),%rsp | |
1138 movups (%r9),%xmm15 | |
1139 movl 240(%r8),%eax | |
1140 movl 240(%rcx),%r10d | |
1141 movups (%r8),%xmm0 | |
1142 movups 16(%r8),%xmm1 | |
1143 leaq 32(%r8),%r8 | |
1144 xorps %xmm0,%xmm15 | |
1145 .Loop_enc1_8: | |
1146 .byte 102,68,15,56,220,249 | |
1147 decl %eax | |
1148 movups (%r8),%xmm1 | |
1149 leaq 16(%r8),%r8 | |
1150 jnz .Loop_enc1_8 | |
1151 .byte 102,68,15,56,221,249 | |
1152 movq %rcx,%r11 | |
1153 movl %r10d,%eax | |
1154 movq %rdx,%r9 | |
1155 andq $-16,%rdx | |
1156 | |
1157 movdqa .Lxts_magic(%rip),%xmm8 | |
1158 pxor %xmm14,%xmm14 | |
1159 pcmpgtd %xmm15,%xmm14 | |
1160 pshufd $19,%xmm14,%xmm9 | |
1161 pxor %xmm14,%xmm14 | |
1162 movdqa %xmm15,%xmm10 | |
1163 paddq %xmm15,%xmm15 | |
1164 pand %xmm8,%xmm9 | |
1165 pcmpgtd %xmm15,%xmm14 | |
1166 pxor %xmm9,%xmm15 | |
1167 pshufd $19,%xmm14,%xmm9 | |
1168 pxor %xmm14,%xmm14 | |
1169 movdqa %xmm15,%xmm11 | |
1170 paddq %xmm15,%xmm15 | |
1171 pand %xmm8,%xmm9 | |
1172 pcmpgtd %xmm15,%xmm14 | |
1173 pxor %xmm9,%xmm15 | |
1174 pshufd $19,%xmm14,%xmm9 | |
1175 pxor %xmm14,%xmm14 | |
1176 movdqa %xmm15,%xmm12 | |
1177 paddq %xmm15,%xmm15 | |
1178 pand %xmm8,%xmm9 | |
1179 pcmpgtd %xmm15,%xmm14 | |
1180 pxor %xmm9,%xmm15 | |
1181 pshufd $19,%xmm14,%xmm9 | |
1182 pxor %xmm14,%xmm14 | |
1183 movdqa %xmm15,%xmm13 | |
1184 paddq %xmm15,%xmm15 | |
1185 pand %xmm8,%xmm9 | |
1186 pcmpgtd %xmm15,%xmm14 | |
1187 pxor %xmm9,%xmm15 | |
1188 subq $96,%rdx | |
1189 jc .Lxts_enc_short | |
1190 | |
1191 shrl $1,%eax | |
1192 subl $1,%eax | |
1193 movl %eax,%r10d | |
1194 jmp .Lxts_enc_grandloop | |
1195 | |
1196 .align 16 | |
1197 .Lxts_enc_grandloop: | |
1198 pshufd $19,%xmm14,%xmm9 | |
1199 movdqa %xmm15,%xmm14 | |
1200 paddq %xmm15,%xmm15 | |
1201 movdqu 0(%rdi),%xmm2 | |
1202 pand %xmm8,%xmm9 | |
1203 movdqu 16(%rdi),%xmm3 | |
1204 pxor %xmm9,%xmm15 | |
1205 | |
1206 movdqu 32(%rdi),%xmm4 | |
1207 pxor %xmm10,%xmm2 | |
1208 movdqu 48(%rdi),%xmm5 | |
1209 pxor %xmm11,%xmm3 | |
1210 movdqu 64(%rdi),%xmm6 | |
1211 pxor %xmm12,%xmm4 | |
1212 movdqu 80(%rdi),%xmm7 | |
1213 leaq 96(%rdi),%rdi | |
1214 pxor %xmm13,%xmm5 | |
1215 movups (%r11),%xmm0 | |
1216 pxor %xmm14,%xmm6 | |
1217 pxor %xmm15,%xmm7 | |
1218 | |
1219 | |
1220 | |
1221 movups 16(%r11),%xmm1 | |
1222 pxor %xmm0,%xmm2 | |
1223 pxor %xmm0,%xmm3 | |
1224 movdqa %xmm10,0(%rsp) | |
1225 .byte 102,15,56,220,209 | |
1226 leaq 32(%r11),%rcx | |
1227 pxor %xmm0,%xmm4 | |
1228 movdqa %xmm11,16(%rsp) | |
1229 .byte 102,15,56,220,217 | |
1230 pxor %xmm0,%xmm5 | |
1231 movdqa %xmm12,32(%rsp) | |
1232 .byte 102,15,56,220,225 | |
1233 pxor %xmm0,%xmm6 | |
1234 movdqa %xmm13,48(%rsp) | |
1235 .byte 102,15,56,220,233 | |
1236 pxor %xmm0,%xmm7 | |
1237 movups (%rcx),%xmm0 | |
1238 decl %eax | |
1239 movdqa %xmm14,64(%rsp) | |
1240 .byte 102,15,56,220,241 | |
1241 movdqa %xmm15,80(%rsp) | |
1242 .byte 102,15,56,220,249 | |
1243 pxor %xmm14,%xmm14 | |
1244 pcmpgtd %xmm15,%xmm14 | |
1245 jmp .Lxts_enc_loop6_enter | |
1246 | |
1247 .align 16 | |
1248 .Lxts_enc_loop6: | |
1249 .byte 102,15,56,220,209 | |
1250 .byte 102,15,56,220,217 | |
1251 decl %eax | |
1252 .byte 102,15,56,220,225 | |
1253 .byte 102,15,56,220,233 | |
1254 .byte 102,15,56,220,241 | |
1255 .byte 102,15,56,220,249 | |
1256 .Lxts_enc_loop6_enter: | |
1257 movups 16(%rcx),%xmm1 | |
1258 .byte 102,15,56,220,208 | |
1259 .byte 102,15,56,220,216 | |
1260 leaq 32(%rcx),%rcx | |
1261 .byte 102,15,56,220,224 | |
1262 .byte 102,15,56,220,232 | |
1263 .byte 102,15,56,220,240 | |
1264 .byte 102,15,56,220,248 | |
1265 movups (%rcx),%xmm0 | |
1266 jnz .Lxts_enc_loop6 | |
1267 | |
1268 pshufd $19,%xmm14,%xmm9 | |
1269 pxor %xmm14,%xmm14 | |
1270 paddq %xmm15,%xmm15 | |
1271 .byte 102,15,56,220,209 | |
1272 pand %xmm8,%xmm9 | |
1273 .byte 102,15,56,220,217 | |
1274 pcmpgtd %xmm15,%xmm14 | |
1275 .byte 102,15,56,220,225 | |
1276 pxor %xmm9,%xmm15 | |
1277 .byte 102,15,56,220,233 | |
1278 .byte 102,15,56,220,241 | |
1279 .byte 102,15,56,220,249 | |
1280 movups 16(%rcx),%xmm1 | |
1281 | |
1282 pshufd $19,%xmm14,%xmm9 | |
1283 pxor %xmm14,%xmm14 | |
1284 movdqa %xmm15,%xmm10 | |
1285 paddq %xmm15,%xmm15 | |
1286 .byte 102,15,56,220,208 | |
1287 pand %xmm8,%xmm9 | |
1288 .byte 102,15,56,220,216 | |
1289 pcmpgtd %xmm15,%xmm14 | |
1290 .byte 102,15,56,220,224 | |
1291 pxor %xmm9,%xmm15 | |
1292 .byte 102,15,56,220,232 | |
1293 .byte 102,15,56,220,240 | |
1294 .byte 102,15,56,220,248 | |
1295 movups 32(%rcx),%xmm0 | |
1296 | |
1297 pshufd $19,%xmm14,%xmm9 | |
1298 pxor %xmm14,%xmm14 | |
1299 movdqa %xmm15,%xmm11 | |
1300 paddq %xmm15,%xmm15 | |
1301 .byte 102,15,56,220,209 | |
1302 pand %xmm8,%xmm9 | |
1303 .byte 102,15,56,220,217 | |
1304 pcmpgtd %xmm15,%xmm14 | |
1305 .byte 102,15,56,220,225 | |
1306 pxor %xmm9,%xmm15 | |
1307 .byte 102,15,56,220,233 | |
1308 .byte 102,15,56,220,241 | |
1309 .byte 102,15,56,220,249 | |
1310 | |
1311 pshufd $19,%xmm14,%xmm9 | |
1312 pxor %xmm14,%xmm14 | |
1313 movdqa %xmm15,%xmm12 | |
1314 paddq %xmm15,%xmm15 | |
1315 .byte 102,15,56,221,208 | |
1316 pand %xmm8,%xmm9 | |
1317 .byte 102,15,56,221,216 | |
1318 pcmpgtd %xmm15,%xmm14 | |
1319 .byte 102,15,56,221,224 | |
1320 pxor %xmm9,%xmm15 | |
1321 .byte 102,15,56,221,232 | |
1322 .byte 102,15,56,221,240 | |
1323 .byte 102,15,56,221,248 | |
1324 | |
1325 pshufd $19,%xmm14,%xmm9 | |
1326 pxor %xmm14,%xmm14 | |
1327 movdqa %xmm15,%xmm13 | |
1328 paddq %xmm15,%xmm15 | |
1329 xorps 0(%rsp),%xmm2 | |
1330 pand %xmm8,%xmm9 | |
1331 xorps 16(%rsp),%xmm3 | |
1332 pcmpgtd %xmm15,%xmm14 | |
1333 pxor %xmm9,%xmm15 | |
1334 | |
1335 xorps 32(%rsp),%xmm4 | |
1336 movups %xmm2,0(%rsi) | |
1337 xorps 48(%rsp),%xmm5 | |
1338 movups %xmm3,16(%rsi) | |
1339 xorps 64(%rsp),%xmm6 | |
1340 movups %xmm4,32(%rsi) | |
1341 xorps 80(%rsp),%xmm7 | |
1342 movups %xmm5,48(%rsi) | |
1343 movl %r10d,%eax | |
1344 movups %xmm6,64(%rsi) | |
1345 movups %xmm7,80(%rsi) | |
1346 leaq 96(%rsi),%rsi | |
1347 subq $96,%rdx | |
1348 jnc .Lxts_enc_grandloop | |
1349 | |
1350 leal 3(%rax,%rax,1),%eax | |
1351 movq %r11,%rcx | |
1352 movl %eax,%r10d | |
1353 | |
1354 .Lxts_enc_short: | |
1355 addq $96,%rdx | |
1356 jz .Lxts_enc_done | |
1357 | |
1358 cmpq $32,%rdx | |
1359 jb .Lxts_enc_one | |
1360 je .Lxts_enc_two | |
1361 | |
1362 cmpq $64,%rdx | |
1363 jb .Lxts_enc_three | |
1364 je .Lxts_enc_four | |
1365 | |
1366 pshufd $19,%xmm14,%xmm9 | |
1367 movdqa %xmm15,%xmm14 | |
1368 paddq %xmm15,%xmm15 | |
1369 movdqu (%rdi),%xmm2 | |
1370 pand %xmm8,%xmm9 | |
1371 movdqu 16(%rdi),%xmm3 | |
1372 pxor %xmm9,%xmm15 | |
1373 | |
1374 movdqu 32(%rdi),%xmm4 | |
1375 pxor %xmm10,%xmm2 | |
1376 movdqu 48(%rdi),%xmm5 | |
1377 pxor %xmm11,%xmm3 | |
1378 movdqu 64(%rdi),%xmm6 | |
1379 leaq 80(%rdi),%rdi | |
1380 pxor %xmm12,%xmm4 | |
1381 pxor %xmm13,%xmm5 | |
1382 pxor %xmm14,%xmm6 | |
1383 | |
1384 call _aesni_encrypt6 | |
1385 | |
1386 xorps %xmm10,%xmm2 | |
1387 movdqa %xmm15,%xmm10 | |
1388 xorps %xmm11,%xmm3 | |
1389 xorps %xmm12,%xmm4 | |
1390 movdqu %xmm2,(%rsi) | |
1391 xorps %xmm13,%xmm5 | |
1392 movdqu %xmm3,16(%rsi) | |
1393 xorps %xmm14,%xmm6 | |
1394 movdqu %xmm4,32(%rsi) | |
1395 movdqu %xmm5,48(%rsi) | |
1396 movdqu %xmm6,64(%rsi) | |
1397 leaq 80(%rsi),%rsi | |
1398 jmp .Lxts_enc_done | |
1399 | |
1400 .align 16 | |
1401 .Lxts_enc_one: | |
1402 movups (%rdi),%xmm2 | |
1403 leaq 16(%rdi),%rdi | |
1404 xorps %xmm10,%xmm2 | |
1405 movups (%rcx),%xmm0 | |
1406 movups 16(%rcx),%xmm1 | |
1407 leaq 32(%rcx),%rcx | |
1408 xorps %xmm0,%xmm2 | |
1409 .Loop_enc1_9: | |
1410 .byte 102,15,56,220,209 | |
1411 decl %eax | |
1412 movups (%rcx),%xmm1 | |
1413 leaq 16(%rcx),%rcx | |
1414 jnz .Loop_enc1_9 | |
1415 .byte 102,15,56,221,209 | |
1416 xorps %xmm10,%xmm2 | |
1417 movdqa %xmm11,%xmm10 | |
1418 movups %xmm2,(%rsi) | |
1419 leaq 16(%rsi),%rsi | |
1420 jmp .Lxts_enc_done | |
1421 | |
1422 .align 16 | |
1423 .Lxts_enc_two: | |
1424 movups (%rdi),%xmm2 | |
1425 movups 16(%rdi),%xmm3 | |
1426 leaq 32(%rdi),%rdi | |
1427 xorps %xmm10,%xmm2 | |
1428 xorps %xmm11,%xmm3 | |
1429 | |
1430 call _aesni_encrypt3 | |
1431 | |
1432 xorps %xmm10,%xmm2 | |
1433 movdqa %xmm12,%xmm10 | |
1434 xorps %xmm11,%xmm3 | |
1435 movups %xmm2,(%rsi) | |
1436 movups %xmm3,16(%rsi) | |
1437 leaq 32(%rsi),%rsi | |
1438 jmp .Lxts_enc_done | |
1439 | |
1440 .align 16 | |
1441 .Lxts_enc_three: | |
1442 movups (%rdi),%xmm2 | |
1443 movups 16(%rdi),%xmm3 | |
1444 movups 32(%rdi),%xmm4 | |
1445 leaq 48(%rdi),%rdi | |
1446 xorps %xmm10,%xmm2 | |
1447 xorps %xmm11,%xmm3 | |
1448 xorps %xmm12,%xmm4 | |
1449 | |
1450 call _aesni_encrypt3 | |
1451 | |
1452 xorps %xmm10,%xmm2 | |
1453 movdqa %xmm13,%xmm10 | |
1454 xorps %xmm11,%xmm3 | |
1455 xorps %xmm12,%xmm4 | |
1456 movups %xmm2,(%rsi) | |
1457 movups %xmm3,16(%rsi) | |
1458 movups %xmm4,32(%rsi) | |
1459 leaq 48(%rsi),%rsi | |
1460 jmp .Lxts_enc_done | |
1461 | |
1462 .align 16 | |
1463 .Lxts_enc_four: | |
1464 movups (%rdi),%xmm2 | |
1465 movups 16(%rdi),%xmm3 | |
1466 movups 32(%rdi),%xmm4 | |
1467 xorps %xmm10,%xmm2 | |
1468 movups 48(%rdi),%xmm5 | |
1469 leaq 64(%rdi),%rdi | |
1470 xorps %xmm11,%xmm3 | |
1471 xorps %xmm12,%xmm4 | |
1472 xorps %xmm13,%xmm5 | |
1473 | |
1474 call _aesni_encrypt4 | |
1475 | |
1476 xorps %xmm10,%xmm2 | |
1477 movdqa %xmm15,%xmm10 | |
1478 xorps %xmm11,%xmm3 | |
1479 xorps %xmm12,%xmm4 | |
1480 movups %xmm2,(%rsi) | |
1481 xorps %xmm13,%xmm5 | |
1482 movups %xmm3,16(%rsi) | |
1483 movups %xmm4,32(%rsi) | |
1484 movups %xmm5,48(%rsi) | |
1485 leaq 64(%rsi),%rsi | |
1486 jmp .Lxts_enc_done | |
1487 | |
1488 .align 16 | |
1489 .Lxts_enc_done: | |
1490 andq $15,%r9 | |
1491 jz .Lxts_enc_ret | |
1492 movq %r9,%rdx | |
1493 | |
1494 .Lxts_enc_steal: | |
1495 movzbl (%rdi),%eax | |
1496 movzbl -16(%rsi),%ecx | |
1497 leaq 1(%rdi),%rdi | |
1498 movb %al,-16(%rsi) | |
1499 movb %cl,0(%rsi) | |
1500 leaq 1(%rsi),%rsi | |
1501 subq $1,%rdx | |
1502 jnz .Lxts_enc_steal | |
1503 | |
1504 subq %r9,%rsi | |
1505 movq %r11,%rcx | |
1506 movl %r10d,%eax | |
1507 | |
1508 movups -16(%rsi),%xmm2 | |
1509 xorps %xmm10,%xmm2 | |
1510 movups (%rcx),%xmm0 | |
1511 movups 16(%rcx),%xmm1 | |
1512 leaq 32(%rcx),%rcx | |
1513 xorps %xmm0,%xmm2 | |
1514 .Loop_enc1_10: | |
1515 .byte 102,15,56,220,209 | |
1516 decl %eax | |
1517 movups (%rcx),%xmm1 | |
1518 leaq 16(%rcx),%rcx | |
1519 jnz .Loop_enc1_10 | |
1520 .byte 102,15,56,221,209 | |
1521 xorps %xmm10,%xmm2 | |
1522 movups %xmm2,-16(%rsi) | |
1523 | |
1524 .Lxts_enc_ret: | |
1525 leaq 104(%rsp),%rsp | |
1526 .Lxts_enc_epilogue: | |
1527 .byte 0xf3,0xc3 | |
1528 .size aesni_xts_encrypt,.-aesni_xts_encrypt | |
1529 .globl aesni_xts_decrypt | |
1530 .type aesni_xts_decrypt,@function | |
1531 .align 16 | |
1532 aesni_xts_decrypt: | |
1533 leaq -104(%rsp),%rsp | |
1534 movups (%r9),%xmm15 | |
1535 movl 240(%r8),%eax | |
1536 movl 240(%rcx),%r10d | |
1537 movups (%r8),%xmm0 | |
1538 movups 16(%r8),%xmm1 | |
1539 leaq 32(%r8),%r8 | |
1540 xorps %xmm0,%xmm15 | |
1541 .Loop_enc1_11: | |
1542 .byte 102,68,15,56,220,249 | |
1543 decl %eax | |
1544 movups (%r8),%xmm1 | |
1545 leaq 16(%r8),%r8 | |
1546 jnz .Loop_enc1_11 | |
1547 .byte 102,68,15,56,221,249 | |
1548 xorl %eax,%eax | |
1549 testq $15,%rdx | |
1550 setnz %al | |
1551 shlq $4,%rax | |
1552 subq %rax,%rdx | |
1553 | |
1554 movq %rcx,%r11 | |
1555 movl %r10d,%eax | |
1556 movq %rdx,%r9 | |
1557 andq $-16,%rdx | |
1558 | |
1559 movdqa .Lxts_magic(%rip),%xmm8 | |
1560 pxor %xmm14,%xmm14 | |
1561 pcmpgtd %xmm15,%xmm14 | |
1562 pshufd $19,%xmm14,%xmm9 | |
1563 pxor %xmm14,%xmm14 | |
1564 movdqa %xmm15,%xmm10 | |
1565 paddq %xmm15,%xmm15 | |
1566 pand %xmm8,%xmm9 | |
1567 pcmpgtd %xmm15,%xmm14 | |
1568 pxor %xmm9,%xmm15 | |
1569 pshufd $19,%xmm14,%xmm9 | |
1570 pxor %xmm14,%xmm14 | |
1571 movdqa %xmm15,%xmm11 | |
1572 paddq %xmm15,%xmm15 | |
1573 pand %xmm8,%xmm9 | |
1574 pcmpgtd %xmm15,%xmm14 | |
1575 pxor %xmm9,%xmm15 | |
1576 pshufd $19,%xmm14,%xmm9 | |
1577 pxor %xmm14,%xmm14 | |
1578 movdqa %xmm15,%xmm12 | |
1579 paddq %xmm15,%xmm15 | |
1580 pand %xmm8,%xmm9 | |
1581 pcmpgtd %xmm15,%xmm14 | |
1582 pxor %xmm9,%xmm15 | |
1583 pshufd $19,%xmm14,%xmm9 | |
1584 pxor %xmm14,%xmm14 | |
1585 movdqa %xmm15,%xmm13 | |
1586 paddq %xmm15,%xmm15 | |
1587 pand %xmm8,%xmm9 | |
1588 pcmpgtd %xmm15,%xmm14 | |
1589 pxor %xmm9,%xmm15 | |
1590 subq $96,%rdx | |
1591 jc .Lxts_dec_short | |
1592 | |
1593 shrl $1,%eax | |
1594 subl $1,%eax | |
1595 movl %eax,%r10d | |
1596 jmp .Lxts_dec_grandloop | |
1597 | |
1598 .align 16 | |
1599 .Lxts_dec_grandloop: | |
1600 pshufd $19,%xmm14,%xmm9 | |
1601 movdqa %xmm15,%xmm14 | |
1602 paddq %xmm15,%xmm15 | |
1603 movdqu 0(%rdi),%xmm2 | |
1604 pand %xmm8,%xmm9 | |
1605 movdqu 16(%rdi),%xmm3 | |
1606 pxor %xmm9,%xmm15 | |
1607 | |
1608 movdqu 32(%rdi),%xmm4 | |
1609 pxor %xmm10,%xmm2 | |
1610 movdqu 48(%rdi),%xmm5 | |
1611 pxor %xmm11,%xmm3 | |
1612 movdqu 64(%rdi),%xmm6 | |
1613 pxor %xmm12,%xmm4 | |
1614 movdqu 80(%rdi),%xmm7 | |
1615 leaq 96(%rdi),%rdi | |
1616 pxor %xmm13,%xmm5 | |
1617 movups (%r11),%xmm0 | |
1618 pxor %xmm14,%xmm6 | |
1619 pxor %xmm15,%xmm7 | |
1620 | |
1621 | |
1622 | |
1623 movups 16(%r11),%xmm1 | |
1624 pxor %xmm0,%xmm2 | |
1625 pxor %xmm0,%xmm3 | |
1626 movdqa %xmm10,0(%rsp) | |
1627 .byte 102,15,56,222,209 | |
1628 leaq 32(%r11),%rcx | |
1629 pxor %xmm0,%xmm4 | |
1630 movdqa %xmm11,16(%rsp) | |
1631 .byte 102,15,56,222,217 | |
1632 pxor %xmm0,%xmm5 | |
1633 movdqa %xmm12,32(%rsp) | |
1634 .byte 102,15,56,222,225 | |
1635 pxor %xmm0,%xmm6 | |
1636 movdqa %xmm13,48(%rsp) | |
1637 .byte 102,15,56,222,233 | |
1638 pxor %xmm0,%xmm7 | |
1639 movups (%rcx),%xmm0 | |
1640 decl %eax | |
1641 movdqa %xmm14,64(%rsp) | |
1642 .byte 102,15,56,222,241 | |
1643 movdqa %xmm15,80(%rsp) | |
1644 .byte 102,15,56,222,249 | |
1645 pxor %xmm14,%xmm14 | |
1646 pcmpgtd %xmm15,%xmm14 | |
1647 jmp .Lxts_dec_loop6_enter | |
1648 | |
1649 .align 16 | |
1650 .Lxts_dec_loop6: | |
1651 .byte 102,15,56,222,209 | |
1652 .byte 102,15,56,222,217 | |
1653 decl %eax | |
1654 .byte 102,15,56,222,225 | |
1655 .byte 102,15,56,222,233 | |
1656 .byte 102,15,56,222,241 | |
1657 .byte 102,15,56,222,249 | |
1658 .Lxts_dec_loop6_enter: | |
1659 movups 16(%rcx),%xmm1 | |
1660 .byte 102,15,56,222,208 | |
1661 .byte 102,15,56,222,216 | |
1662 leaq 32(%rcx),%rcx | |
1663 .byte 102,15,56,222,224 | |
1664 .byte 102,15,56,222,232 | |
1665 .byte 102,15,56,222,240 | |
1666 .byte 102,15,56,222,248 | |
1667 movups (%rcx),%xmm0 | |
1668 jnz .Lxts_dec_loop6 | |
1669 | |
1670 pshufd $19,%xmm14,%xmm9 | |
1671 pxor %xmm14,%xmm14 | |
1672 paddq %xmm15,%xmm15 | |
1673 .byte 102,15,56,222,209 | |
1674 pand %xmm8,%xmm9 | |
1675 .byte 102,15,56,222,217 | |
1676 pcmpgtd %xmm15,%xmm14 | |
1677 .byte 102,15,56,222,225 | |
1678 pxor %xmm9,%xmm15 | |
1679 .byte 102,15,56,222,233 | |
1680 .byte 102,15,56,222,241 | |
1681 .byte 102,15,56,222,249 | |
1682 movups 16(%rcx),%xmm1 | |
1683 | |
1684 pshufd $19,%xmm14,%xmm9 | |
1685 pxor %xmm14,%xmm14 | |
1686 movdqa %xmm15,%xmm10 | |
1687 paddq %xmm15,%xmm15 | |
1688 .byte 102,15,56,222,208 | |
1689 pand %xmm8,%xmm9 | |
1690 .byte 102,15,56,222,216 | |
1691 pcmpgtd %xmm15,%xmm14 | |
1692 .byte 102,15,56,222,224 | |
1693 pxor %xmm9,%xmm15 | |
1694 .byte 102,15,56,222,232 | |
1695 .byte 102,15,56,222,240 | |
1696 .byte 102,15,56,222,248 | |
1697 movups 32(%rcx),%xmm0 | |
1698 | |
1699 pshufd $19,%xmm14,%xmm9 | |
1700 pxor %xmm14,%xmm14 | |
1701 movdqa %xmm15,%xmm11 | |
1702 paddq %xmm15,%xmm15 | |
1703 .byte 102,15,56,222,209 | |
1704 pand %xmm8,%xmm9 | |
1705 .byte 102,15,56,222,217 | |
1706 pcmpgtd %xmm15,%xmm14 | |
1707 .byte 102,15,56,222,225 | |
1708 pxor %xmm9,%xmm15 | |
1709 .byte 102,15,56,222,233 | |
1710 .byte 102,15,56,222,241 | |
1711 .byte 102,15,56,222,249 | |
1712 | |
1713 pshufd $19,%xmm14,%xmm9 | |
1714 pxor %xmm14,%xmm14 | |
1715 movdqa %xmm15,%xmm12 | |
1716 paddq %xmm15,%xmm15 | |
1717 .byte 102,15,56,223,208 | |
1718 pand %xmm8,%xmm9 | |
1719 .byte 102,15,56,223,216 | |
1720 pcmpgtd %xmm15,%xmm14 | |
1721 .byte 102,15,56,223,224 | |
1722 pxor %xmm9,%xmm15 | |
1723 .byte 102,15,56,223,232 | |
1724 .byte 102,15,56,223,240 | |
1725 .byte 102,15,56,223,248 | |
1726 | |
1727 pshufd $19,%xmm14,%xmm9 | |
1728 pxor %xmm14,%xmm14 | |
1729 movdqa %xmm15,%xmm13 | |
1730 paddq %xmm15,%xmm15 | |
1731 xorps 0(%rsp),%xmm2 | |
1732 pand %xmm8,%xmm9 | |
1733 xorps 16(%rsp),%xmm3 | |
1734 pcmpgtd %xmm15,%xmm14 | |
1735 pxor %xmm9,%xmm15 | |
1736 | |
1737 xorps 32(%rsp),%xmm4 | |
1738 movups %xmm2,0(%rsi) | |
1739 xorps 48(%rsp),%xmm5 | |
1740 movups %xmm3,16(%rsi) | |
1741 xorps 64(%rsp),%xmm6 | |
1742 movups %xmm4,32(%rsi) | |
1743 xorps 80(%rsp),%xmm7 | |
1744 movups %xmm5,48(%rsi) | |
1745 movl %r10d,%eax | |
1746 movups %xmm6,64(%rsi) | |
1747 movups %xmm7,80(%rsi) | |
1748 leaq 96(%rsi),%rsi | |
1749 subq $96,%rdx | |
1750 jnc .Lxts_dec_grandloop | |
1751 | |
1752 leal 3(%rax,%rax,1),%eax | |
1753 movq %r11,%rcx | |
1754 movl %eax,%r10d | |
1755 | |
1756 .Lxts_dec_short: | |
1757 addq $96,%rdx | |
1758 jz .Lxts_dec_done | |
1759 | |
1760 cmpq $32,%rdx | |
1761 jb .Lxts_dec_one | |
1762 je .Lxts_dec_two | |
1763 | |
1764 cmpq $64,%rdx | |
1765 jb .Lxts_dec_three | |
1766 je .Lxts_dec_four | |
1767 | |
1768 pshufd $19,%xmm14,%xmm9 | |
1769 movdqa %xmm15,%xmm14 | |
1770 paddq %xmm15,%xmm15 | |
1771 movdqu (%rdi),%xmm2 | |
1772 pand %xmm8,%xmm9 | |
1773 movdqu 16(%rdi),%xmm3 | |
1774 pxor %xmm9,%xmm15 | |
1775 | |
1776 movdqu 32(%rdi),%xmm4 | |
1777 pxor %xmm10,%xmm2 | |
1778 movdqu 48(%rdi),%xmm5 | |
1779 pxor %xmm11,%xmm3 | |
1780 movdqu 64(%rdi),%xmm6 | |
1781 leaq 80(%rdi),%rdi | |
1782 pxor %xmm12,%xmm4 | |
1783 pxor %xmm13,%xmm5 | |
1784 pxor %xmm14,%xmm6 | |
1785 | |
1786 call _aesni_decrypt6 | |
1787 | |
1788 xorps %xmm10,%xmm2 | |
1789 xorps %xmm11,%xmm3 | |
1790 xorps %xmm12,%xmm4 | |
1791 movdqu %xmm2,(%rsi) | |
1792 xorps %xmm13,%xmm5 | |
1793 movdqu %xmm3,16(%rsi) | |
1794 xorps %xmm14,%xmm6 | |
1795 movdqu %xmm4,32(%rsi) | |
1796 pxor %xmm14,%xmm14 | |
1797 movdqu %xmm5,48(%rsi) | |
1798 pcmpgtd %xmm15,%xmm14 | |
1799 movdqu %xmm6,64(%rsi) | |
1800 leaq 80(%rsi),%rsi | |
1801 pshufd $19,%xmm14,%xmm11 | |
1802 andq $15,%r9 | |
1803 jz .Lxts_dec_ret | |
1804 | |
1805 movdqa %xmm15,%xmm10 | |
1806 paddq %xmm15,%xmm15 | |
1807 pand %xmm8,%xmm11 | |
1808 pxor %xmm15,%xmm11 | |
1809 jmp .Lxts_dec_done2 | |
1810 | |
1811 .align 16 | |
1812 .Lxts_dec_one: | |
1813 movups (%rdi),%xmm2 | |
1814 leaq 16(%rdi),%rdi | |
1815 xorps %xmm10,%xmm2 | |
1816 movups (%rcx),%xmm0 | |
1817 movups 16(%rcx),%xmm1 | |
1818 leaq 32(%rcx),%rcx | |
1819 xorps %xmm0,%xmm2 | |
1820 .Loop_dec1_12: | |
1821 .byte 102,15,56,222,209 | |
1822 decl %eax | |
1823 movups (%rcx),%xmm1 | |
1824 leaq 16(%rcx),%rcx | |
1825 jnz .Loop_dec1_12 | |
1826 .byte 102,15,56,223,209 | |
1827 xorps %xmm10,%xmm2 | |
1828 movdqa %xmm11,%xmm10 | |
1829 movups %xmm2,(%rsi) | |
1830 movdqa %xmm12,%xmm11 | |
1831 leaq 16(%rsi),%rsi | |
1832 jmp .Lxts_dec_done | |
1833 | |
1834 .align 16 | |
1835 .Lxts_dec_two: | |
1836 movups (%rdi),%xmm2 | |
1837 movups 16(%rdi),%xmm3 | |
1838 leaq 32(%rdi),%rdi | |
1839 xorps %xmm10,%xmm2 | |
1840 xorps %xmm11,%xmm3 | |
1841 | |
1842 call _aesni_decrypt3 | |
1843 | |
1844 xorps %xmm10,%xmm2 | |
1845 movdqa %xmm12,%xmm10 | |
1846 xorps %xmm11,%xmm3 | |
1847 movdqa %xmm13,%xmm11 | |
1848 movups %xmm2,(%rsi) | |
1849 movups %xmm3,16(%rsi) | |
1850 leaq 32(%rsi),%rsi | |
1851 jmp .Lxts_dec_done | |
1852 | |
1853 .align 16 | |
1854 .Lxts_dec_three: | |
1855 movups (%rdi),%xmm2 | |
1856 movups 16(%rdi),%xmm3 | |
1857 movups 32(%rdi),%xmm4 | |
1858 leaq 48(%rdi),%rdi | |
1859 xorps %xmm10,%xmm2 | |
1860 xorps %xmm11,%xmm3 | |
1861 xorps %xmm12,%xmm4 | |
1862 | |
1863 call _aesni_decrypt3 | |
1864 | |
1865 xorps %xmm10,%xmm2 | |
1866 movdqa %xmm13,%xmm10 | |
1867 xorps %xmm11,%xmm3 | |
1868 movdqa %xmm15,%xmm11 | |
1869 xorps %xmm12,%xmm4 | |
1870 movups %xmm2,(%rsi) | |
1871 movups %xmm3,16(%rsi) | |
1872 movups %xmm4,32(%rsi) | |
1873 leaq 48(%rsi),%rsi | |
1874 jmp .Lxts_dec_done | |
1875 | |
1876 .align 16 | |
1877 .Lxts_dec_four: | |
1878 pshufd $19,%xmm14,%xmm9 | |
1879 movdqa %xmm15,%xmm14 | |
1880 paddq %xmm15,%xmm15 | |
1881 movups (%rdi),%xmm2 | |
1882 pand %xmm8,%xmm9 | |
1883 movups 16(%rdi),%xmm3 | |
1884 pxor %xmm9,%xmm15 | |
1885 | |
1886 movups 32(%rdi),%xmm4 | |
1887 xorps %xmm10,%xmm2 | |
1888 movups 48(%rdi),%xmm5 | |
1889 leaq 64(%rdi),%rdi | |
1890 xorps %xmm11,%xmm3 | |
1891 xorps %xmm12,%xmm4 | |
1892 xorps %xmm13,%xmm5 | |
1893 | |
1894 call _aesni_decrypt4 | |
1895 | |
1896 xorps %xmm10,%xmm2 | |
1897 movdqa %xmm14,%xmm10 | |
1898 xorps %xmm11,%xmm3 | |
1899 movdqa %xmm15,%xmm11 | |
1900 xorps %xmm12,%xmm4 | |
1901 movups %xmm2,(%rsi) | |
1902 xorps %xmm13,%xmm5 | |
1903 movups %xmm3,16(%rsi) | |
1904 movups %xmm4,32(%rsi) | |
1905 movups %xmm5,48(%rsi) | |
1906 leaq 64(%rsi),%rsi | |
1907 jmp .Lxts_dec_done | |
1908 | |
1909 .align 16 | |
1910 .Lxts_dec_done: | |
1911 andq $15,%r9 | |
1912 jz .Lxts_dec_ret | |
1913 .Lxts_dec_done2: | |
1914 movq %r9,%rdx | |
1915 movq %r11,%rcx | |
1916 movl %r10d,%eax | |
1917 | |
1918 movups (%rdi),%xmm2 | |
1919 xorps %xmm11,%xmm2 | |
1920 movups (%rcx),%xmm0 | |
1921 movups 16(%rcx),%xmm1 | |
1922 leaq 32(%rcx),%rcx | |
1923 xorps %xmm0,%xmm2 | |
1924 .Loop_dec1_13: | |
1925 .byte 102,15,56,222,209 | |
1926 decl %eax | |
1927 movups (%rcx),%xmm1 | |
1928 leaq 16(%rcx),%rcx | |
1929 jnz .Loop_dec1_13 | |
1930 .byte 102,15,56,223,209 | |
1931 xorps %xmm11,%xmm2 | |
1932 movups %xmm2,(%rsi) | |
1933 | |
1934 .Lxts_dec_steal: | |
1935 movzbl 16(%rdi),%eax | |
1936 movzbl (%rsi),%ecx | |
1937 leaq 1(%rdi),%rdi | |
1938 movb %al,(%rsi) | |
1939 movb %cl,16(%rsi) | |
1940 leaq 1(%rsi),%rsi | |
1941 subq $1,%rdx | |
1942 jnz .Lxts_dec_steal | |
1943 | |
1944 subq %r9,%rsi | |
1945 movq %r11,%rcx | |
1946 movl %r10d,%eax | |
1947 | |
1948 movups (%rsi),%xmm2 | |
1949 xorps %xmm10,%xmm2 | |
1950 movups (%rcx),%xmm0 | |
1951 movups 16(%rcx),%xmm1 | |
1952 leaq 32(%rcx),%rcx | |
1953 xorps %xmm0,%xmm2 | |
1954 .Loop_dec1_14: | |
1955 .byte 102,15,56,222,209 | |
1956 decl %eax | |
1957 movups (%rcx),%xmm1 | |
1958 leaq 16(%rcx),%rcx | |
1959 jnz .Loop_dec1_14 | |
1960 .byte 102,15,56,223,209 | |
1961 xorps %xmm10,%xmm2 | |
1962 movups %xmm2,(%rsi) | |
1963 | |
1964 .Lxts_dec_ret: | |
1965 leaq 104(%rsp),%rsp | |
1966 .Lxts_dec_epilogue: | |
1967 .byte 0xf3,0xc3 | |
1968 .size aesni_xts_decrypt,.-aesni_xts_decrypt | |
1969 .globl aesni_cbc_encrypt | |
1970 .type aesni_cbc_encrypt,@function | |
1971 .align 16 | |
1972 aesni_cbc_encrypt: | |
1973 testq %rdx,%rdx | |
1974 jz .Lcbc_ret | |
1975 | |
1976 movl 240(%rcx),%r10d | |
1977 movq %rcx,%r11 | |
1978 testl %r9d,%r9d | |
1979 jz .Lcbc_decrypt | |
1980 | |
1981 movups (%r8),%xmm2 | |
1982 movl %r10d,%eax | |
1983 cmpq $16,%rdx | |
1984 jb .Lcbc_enc_tail | |
1985 subq $16,%rdx | |
1986 jmp .Lcbc_enc_loop | |
1987 .align 16 | |
1988 .Lcbc_enc_loop: | |
1989 movups (%rdi),%xmm3 | |
1990 leaq 16(%rdi),%rdi | |
1991 | |
1992 movups (%rcx),%xmm0 | |
1993 movups 16(%rcx),%xmm1 | |
1994 xorps %xmm0,%xmm3 | |
1995 leaq 32(%rcx),%rcx | |
1996 xorps %xmm3,%xmm2 | |
1997 .Loop_enc1_15: | |
1998 .byte 102,15,56,220,209 | |
1999 decl %eax | |
2000 movups (%rcx),%xmm1 | |
2001 leaq 16(%rcx),%rcx | |
2002 jnz .Loop_enc1_15 | |
2003 .byte 102,15,56,221,209 | |
2004 movl %r10d,%eax | |
2005 movq %r11,%rcx | |
2006 movups %xmm2,0(%rsi) | |
2007 leaq 16(%rsi),%rsi | |
2008 subq $16,%rdx | |
2009 jnc .Lcbc_enc_loop | |
2010 addq $16,%rdx | |
2011 jnz .Lcbc_enc_tail | |
2012 movups %xmm2,(%r8) | |
2013 jmp .Lcbc_ret | |
2014 | |
2015 .Lcbc_enc_tail: | |
2016 movq %rdx,%rcx | |
2017 xchgq %rdi,%rsi | |
2018 .long 0x9066A4F3 | |
2019 movl $16,%ecx | |
2020 subq %rdx,%rcx | |
2021 xorl %eax,%eax | |
2022 .long 0x9066AAF3 | |
2023 leaq -16(%rdi),%rdi | |
2024 movl %r10d,%eax | |
2025 movq %rdi,%rsi | |
2026 movq %r11,%rcx | |
2027 xorq %rdx,%rdx | |
2028 jmp .Lcbc_enc_loop | |
2029 | |
2030 .align 16 | |
2031 .Lcbc_decrypt: | |
2032 movups (%r8),%xmm9 | |
2033 movl %r10d,%eax | |
2034 cmpq $112,%rdx | |
2035 jbe .Lcbc_dec_tail | |
2036 shrl $1,%r10d | |
2037 subq $112,%rdx | |
2038 movl %r10d,%eax | |
2039 movaps %xmm9,-24(%rsp) | |
2040 jmp .Lcbc_dec_loop8_enter | |
2041 .align 16 | |
2042 .Lcbc_dec_loop8: | |
2043 movaps %xmm0,-24(%rsp) | |
2044 movups %xmm9,(%rsi) | |
2045 leaq 16(%rsi),%rsi | |
2046 .Lcbc_dec_loop8_enter: | |
2047 movups (%rcx),%xmm0 | |
2048 movups (%rdi),%xmm2 | |
2049 movups 16(%rdi),%xmm3 | |
2050 movups 16(%rcx),%xmm1 | |
2051 | |
2052 leaq 32(%rcx),%rcx | |
2053 movdqu 32(%rdi),%xmm4 | |
2054 xorps %xmm0,%xmm2 | |
2055 movdqu 48(%rdi),%xmm5 | |
2056 xorps %xmm0,%xmm3 | |
2057 movdqu 64(%rdi),%xmm6 | |
2058 .byte 102,15,56,222,209 | |
2059 pxor %xmm0,%xmm4 | |
2060 movdqu 80(%rdi),%xmm7 | |
2061 .byte 102,15,56,222,217 | |
2062 pxor %xmm0,%xmm5 | |
2063 movdqu 96(%rdi),%xmm8 | |
2064 .byte 102,15,56,222,225 | |
2065 pxor %xmm0,%xmm6 | |
2066 movdqu 112(%rdi),%xmm9 | |
2067 .byte 102,15,56,222,233 | |
2068 pxor %xmm0,%xmm7 | |
2069 decl %eax | |
2070 .byte 102,15,56,222,241 | |
2071 pxor %xmm0,%xmm8 | |
2072 .byte 102,15,56,222,249 | |
2073 pxor %xmm0,%xmm9 | |
2074 movups (%rcx),%xmm0 | |
2075 .byte 102,68,15,56,222,193 | |
2076 .byte 102,68,15,56,222,201 | |
2077 movups 16(%rcx),%xmm1 | |
2078 | |
2079 call .Ldec_loop8_enter | |
2080 | |
2081 movups (%rdi),%xmm1 | |
2082 movups 16(%rdi),%xmm0 | |
2083 xorps -24(%rsp),%xmm2 | |
2084 xorps %xmm1,%xmm3 | |
2085 movups 32(%rdi),%xmm1 | |
2086 xorps %xmm0,%xmm4 | |
2087 movups 48(%rdi),%xmm0 | |
2088 xorps %xmm1,%xmm5 | |
2089 movups 64(%rdi),%xmm1 | |
2090 xorps %xmm0,%xmm6 | |
2091 movups 80(%rdi),%xmm0 | |
2092 xorps %xmm1,%xmm7 | |
2093 movups 96(%rdi),%xmm1 | |
2094 xorps %xmm0,%xmm8 | |
2095 movups 112(%rdi),%xmm0 | |
2096 xorps %xmm1,%xmm9 | |
2097 movups %xmm2,(%rsi) | |
2098 movups %xmm3,16(%rsi) | |
2099 movups %xmm4,32(%rsi) | |
2100 movups %xmm5,48(%rsi) | |
2101 movl %r10d,%eax | |
2102 movups %xmm6,64(%rsi) | |
2103 movq %r11,%rcx | |
2104 movups %xmm7,80(%rsi) | |
2105 leaq 128(%rdi),%rdi | |
2106 movups %xmm8,96(%rsi) | |
2107 leaq 112(%rsi),%rsi | |
2108 subq $128,%rdx | |
2109 ja .Lcbc_dec_loop8 | |
2110 | |
2111 movaps %xmm9,%xmm2 | |
2112 movaps %xmm0,%xmm9 | |
2113 addq $112,%rdx | |
2114 jle .Lcbc_dec_tail_collected | |
2115 movups %xmm2,(%rsi) | |
2116 leal 1(%r10,%r10,1),%eax | |
2117 leaq 16(%rsi),%rsi | |
2118 .Lcbc_dec_tail: | |
2119 movups (%rdi),%xmm2 | |
2120 movaps %xmm2,%xmm8 | |
2121 cmpq $16,%rdx | |
2122 jbe .Lcbc_dec_one | |
2123 | |
2124 movups 16(%rdi),%xmm3 | |
2125 movaps %xmm3,%xmm7 | |
2126 cmpq $32,%rdx | |
2127 jbe .Lcbc_dec_two | |
2128 | |
2129 movups 32(%rdi),%xmm4 | |
2130 movaps %xmm4,%xmm6 | |
2131 cmpq $48,%rdx | |
2132 jbe .Lcbc_dec_three | |
2133 | |
2134 movups 48(%rdi),%xmm5 | |
2135 cmpq $64,%rdx | |
2136 jbe .Lcbc_dec_four | |
2137 | |
2138 movups 64(%rdi),%xmm6 | |
2139 cmpq $80,%rdx | |
2140 jbe .Lcbc_dec_five | |
2141 | |
2142 movups 80(%rdi),%xmm7 | |
2143 cmpq $96,%rdx | |
2144 jbe .Lcbc_dec_six | |
2145 | |
2146 movups 96(%rdi),%xmm8 | |
2147 movaps %xmm9,-24(%rsp) | |
2148 call _aesni_decrypt8 | |
2149 movups (%rdi),%xmm1 | |
2150 movups 16(%rdi),%xmm0 | |
2151 xorps -24(%rsp),%xmm2 | |
2152 xorps %xmm1,%xmm3 | |
2153 movups 32(%rdi),%xmm1 | |
2154 xorps %xmm0,%xmm4 | |
2155 movups 48(%rdi),%xmm0 | |
2156 xorps %xmm1,%xmm5 | |
2157 movups 64(%rdi),%xmm1 | |
2158 xorps %xmm0,%xmm6 | |
2159 movups 80(%rdi),%xmm0 | |
2160 xorps %xmm1,%xmm7 | |
2161 movups 96(%rdi),%xmm9 | |
2162 xorps %xmm0,%xmm8 | |
2163 movups %xmm2,(%rsi) | |
2164 movups %xmm3,16(%rsi) | |
2165 movups %xmm4,32(%rsi) | |
2166 movups %xmm5,48(%rsi) | |
2167 movups %xmm6,64(%rsi) | |
2168 movups %xmm7,80(%rsi) | |
2169 leaq 96(%rsi),%rsi | |
2170 movaps %xmm8,%xmm2 | |
2171 subq $112,%rdx | |
2172 jmp .Lcbc_dec_tail_collected | |
2173 .align 16 | |
2174 .Lcbc_dec_one: | |
2175 movups (%rcx),%xmm0 | |
2176 movups 16(%rcx),%xmm1 | |
2177 leaq 32(%rcx),%rcx | |
2178 xorps %xmm0,%xmm2 | |
2179 .Loop_dec1_16: | |
2180 .byte 102,15,56,222,209 | |
2181 decl %eax | |
2182 movups (%rcx),%xmm1 | |
2183 leaq 16(%rcx),%rcx | |
2184 jnz .Loop_dec1_16 | |
2185 .byte 102,15,56,223,209 | |
2186 xorps %xmm9,%xmm2 | |
2187 movaps %xmm8,%xmm9 | |
2188 subq $16,%rdx | |
2189 jmp .Lcbc_dec_tail_collected | |
2190 .align 16 | |
2191 .Lcbc_dec_two: | |
2192 xorps %xmm4,%xmm4 | |
2193 call _aesni_decrypt3 | |
2194 xorps %xmm9,%xmm2 | |
2195 xorps %xmm8,%xmm3 | |
2196 movups %xmm2,(%rsi) | |
2197 movaps %xmm7,%xmm9 | |
2198 movaps %xmm3,%xmm2 | |
2199 leaq 16(%rsi),%rsi | |
2200 subq $32,%rdx | |
2201 jmp .Lcbc_dec_tail_collected | |
2202 .align 16 | |
2203 .Lcbc_dec_three: | |
2204 call _aesni_decrypt3 | |
2205 xorps %xmm9,%xmm2 | |
2206 xorps %xmm8,%xmm3 | |
2207 movups %xmm2,(%rsi) | |
2208 xorps %xmm7,%xmm4 | |
2209 movups %xmm3,16(%rsi) | |
2210 movaps %xmm6,%xmm9 | |
2211 movaps %xmm4,%xmm2 | |
2212 leaq 32(%rsi),%rsi | |
2213 subq $48,%rdx | |
2214 jmp .Lcbc_dec_tail_collected | |
2215 .align 16 | |
2216 .Lcbc_dec_four: | |
2217 call _aesni_decrypt4 | |
2218 xorps %xmm9,%xmm2 | |
2219 movups 48(%rdi),%xmm9 | |
2220 xorps %xmm8,%xmm3 | |
2221 movups %xmm2,(%rsi) | |
2222 xorps %xmm7,%xmm4 | |
2223 movups %xmm3,16(%rsi) | |
2224 xorps %xmm6,%xmm5 | |
2225 movups %xmm4,32(%rsi) | |
2226 movaps %xmm5,%xmm2 | |
2227 leaq 48(%rsi),%rsi | |
2228 subq $64,%rdx | |
2229 jmp .Lcbc_dec_tail_collected | |
2230 .align 16 | |
2231 .Lcbc_dec_five: | |
2232 xorps %xmm7,%xmm7 | |
2233 call _aesni_decrypt6 | |
2234 movups 16(%rdi),%xmm1 | |
2235 movups 32(%rdi),%xmm0 | |
2236 xorps %xmm9,%xmm2 | |
2237 xorps %xmm8,%xmm3 | |
2238 xorps %xmm1,%xmm4 | |
2239 movups 48(%rdi),%xmm1 | |
2240 xorps %xmm0,%xmm5 | |
2241 movups 64(%rdi),%xmm9 | |
2242 xorps %xmm1,%xmm6 | |
2243 movups %xmm2,(%rsi) | |
2244 movups %xmm3,16(%rsi) | |
2245 movups %xmm4,32(%rsi) | |
2246 movups %xmm5,48(%rsi) | |
2247 leaq 64(%rsi),%rsi | |
2248 movaps %xmm6,%xmm2 | |
2249 subq $80,%rdx | |
2250 jmp .Lcbc_dec_tail_collected | |
2251 .align 16 | |
2252 .Lcbc_dec_six: | |
2253 call _aesni_decrypt6 | |
2254 movups 16(%rdi),%xmm1 | |
2255 movups 32(%rdi),%xmm0 | |
2256 xorps %xmm9,%xmm2 | |
2257 xorps %xmm8,%xmm3 | |
2258 xorps %xmm1,%xmm4 | |
2259 movups 48(%rdi),%xmm1 | |
2260 xorps %xmm0,%xmm5 | |
2261 movups 64(%rdi),%xmm0 | |
2262 xorps %xmm1,%xmm6 | |
2263 movups 80(%rdi),%xmm9 | |
2264 xorps %xmm0,%xmm7 | |
2265 movups %xmm2,(%rsi) | |
2266 movups %xmm3,16(%rsi) | |
2267 movups %xmm4,32(%rsi) | |
2268 movups %xmm5,48(%rsi) | |
2269 movups %xmm6,64(%rsi) | |
2270 leaq 80(%rsi),%rsi | |
2271 movaps %xmm7,%xmm2 | |
2272 subq $96,%rdx | |
2273 jmp .Lcbc_dec_tail_collected | |
2274 .align 16 | |
2275 .Lcbc_dec_tail_collected: | |
2276 andq $15,%rdx | |
2277 movups %xmm9,(%r8) | |
2278 jnz .Lcbc_dec_tail_partial | |
2279 movups %xmm2,(%rsi) | |
2280 jmp .Lcbc_dec_ret | |
2281 .align 16 | |
2282 .Lcbc_dec_tail_partial: | |
2283 movaps %xmm2,-24(%rsp) | |
2284 movq $16,%rcx | |
2285 movq %rsi,%rdi | |
2286 subq %rdx,%rcx | |
2287 leaq -24(%rsp),%rsi | |
2288 .long 0x9066A4F3 | |
2289 | |
2290 .Lcbc_dec_ret: | |
2291 .Lcbc_ret: | |
2292 .byte 0xf3,0xc3 | |
2293 .size aesni_cbc_encrypt,.-aesni_cbc_encrypt | |
2294 .globl aesni_set_decrypt_key | |
2295 .type aesni_set_decrypt_key,@function | |
2296 .align 16 | |
2297 aesni_set_decrypt_key: | |
2298 .byte 0x48,0x83,0xEC,0x08 | |
2299 call __aesni_set_encrypt_key | |
2300 shll $4,%esi | |
2301 testl %eax,%eax | |
2302 jnz .Ldec_key_ret | |
2303 leaq 16(%rdx,%rsi,1),%rdi | |
2304 | |
2305 movups (%rdx),%xmm0 | |
2306 movups (%rdi),%xmm1 | |
2307 movups %xmm0,(%rdi) | |
2308 movups %xmm1,(%rdx) | |
2309 leaq 16(%rdx),%rdx | |
2310 leaq -16(%rdi),%rdi | |
2311 | |
2312 .Ldec_key_inverse: | |
2313 movups (%rdx),%xmm0 | |
2314 movups (%rdi),%xmm1 | |
2315 .byte 102,15,56,219,192 | |
2316 .byte 102,15,56,219,201 | |
2317 leaq 16(%rdx),%rdx | |
2318 leaq -16(%rdi),%rdi | |
2319 movups %xmm0,16(%rdi) | |
2320 movups %xmm1,-16(%rdx) | |
2321 cmpq %rdx,%rdi | |
2322 ja .Ldec_key_inverse | |
2323 | |
2324 movups (%rdx),%xmm0 | |
2325 .byte 102,15,56,219,192 | |
2326 movups %xmm0,(%rdi) | |
2327 .Ldec_key_ret: | |
2328 addq $8,%rsp | |
2329 .byte 0xf3,0xc3 | |
2330 .LSEH_end_set_decrypt_key: | |
2331 .size aesni_set_decrypt_key,.-aesni_set_decrypt_key | |
2332 .globl aesni_set_encrypt_key | |
2333 .type aesni_set_encrypt_key,@function | |
2334 .align 16 | |
2335 aesni_set_encrypt_key: | |
2336 __aesni_set_encrypt_key: | |
2337 .byte 0x48,0x83,0xEC,0x08 | |
2338 movq $-1,%rax | |
2339 testq %rdi,%rdi | |
2340 jz .Lenc_key_ret | |
2341 testq %rdx,%rdx | |
2342 jz .Lenc_key_ret | |
2343 | |
2344 movups (%rdi),%xmm0 | |
2345 xorps %xmm4,%xmm4 | |
2346 leaq 16(%rdx),%rax | |
2347 cmpl $256,%esi | |
2348 je .L14rounds | |
2349 cmpl $192,%esi | |
2350 je .L12rounds | |
2351 cmpl $128,%esi | |
2352 jne .Lbad_keybits | |
2353 | |
2354 .L10rounds: | |
2355 movl $9,%esi | |
2356 movups %xmm0,(%rdx) | |
2357 .byte 102,15,58,223,200,1 | |
2358 call .Lkey_expansion_128_cold | |
2359 .byte 102,15,58,223,200,2 | |
2360 call .Lkey_expansion_128 | |
2361 .byte 102,15,58,223,200,4 | |
2362 call .Lkey_expansion_128 | |
2363 .byte 102,15,58,223,200,8 | |
2364 call .Lkey_expansion_128 | |
2365 .byte 102,15,58,223,200,16 | |
2366 call .Lkey_expansion_128 | |
2367 .byte 102,15,58,223,200,32 | |
2368 call .Lkey_expansion_128 | |
2369 .byte 102,15,58,223,200,64 | |
2370 call .Lkey_expansion_128 | |
2371 .byte 102,15,58,223,200,128 | |
2372 call .Lkey_expansion_128 | |
2373 .byte 102,15,58,223,200,27 | |
2374 call .Lkey_expansion_128 | |
2375 .byte 102,15,58,223,200,54 | |
2376 call .Lkey_expansion_128 | |
2377 movups %xmm0,(%rax) | |
2378 movl %esi,80(%rax) | |
2379 xorl %eax,%eax | |
2380 jmp .Lenc_key_ret | |
2381 | |
2382 .align 16 | |
2383 .L12rounds: | |
2384 movq 16(%rdi),%xmm2 | |
2385 movl $11,%esi | |
2386 movups %xmm0,(%rdx) | |
2387 .byte 102,15,58,223,202,1 | |
2388 call .Lkey_expansion_192a_cold | |
2389 .byte 102,15,58,223,202,2 | |
2390 call .Lkey_expansion_192b | |
2391 .byte 102,15,58,223,202,4 | |
2392 call .Lkey_expansion_192a | |
2393 .byte 102,15,58,223,202,8 | |
2394 call .Lkey_expansion_192b | |
2395 .byte 102,15,58,223,202,16 | |
2396 call .Lkey_expansion_192a | |
2397 .byte 102,15,58,223,202,32 | |
2398 call .Lkey_expansion_192b | |
2399 .byte 102,15,58,223,202,64 | |
2400 call .Lkey_expansion_192a | |
2401 .byte 102,15,58,223,202,128 | |
2402 call .Lkey_expansion_192b | |
2403 movups %xmm0,(%rax) | |
2404 movl %esi,48(%rax) | |
2405 xorq %rax,%rax | |
2406 jmp .Lenc_key_ret | |
2407 | |
2408 .align 16 | |
2409 .L14rounds: | |
2410 movups 16(%rdi),%xmm2 | |
2411 movl $13,%esi | |
2412 leaq 16(%rax),%rax | |
2413 movups %xmm0,(%rdx) | |
2414 movups %xmm2,16(%rdx) | |
2415 .byte 102,15,58,223,202,1 | |
2416 call .Lkey_expansion_256a_cold | |
2417 .byte 102,15,58,223,200,1 | |
2418 call .Lkey_expansion_256b | |
2419 .byte 102,15,58,223,202,2 | |
2420 call .Lkey_expansion_256a | |
2421 .byte 102,15,58,223,200,2 | |
2422 call .Lkey_expansion_256b | |
2423 .byte 102,15,58,223,202,4 | |
2424 call .Lkey_expansion_256a | |
2425 .byte 102,15,58,223,200,4 | |
2426 call .Lkey_expansion_256b | |
2427 .byte 102,15,58,223,202,8 | |
2428 call .Lkey_expansion_256a | |
2429 .byte 102,15,58,223,200,8 | |
2430 call .Lkey_expansion_256b | |
2431 .byte 102,15,58,223,202,16 | |
2432 call .Lkey_expansion_256a | |
2433 .byte 102,15,58,223,200,16 | |
2434 call .Lkey_expansion_256b | |
2435 .byte 102,15,58,223,202,32 | |
2436 call .Lkey_expansion_256a | |
2437 .byte 102,15,58,223,200,32 | |
2438 call .Lkey_expansion_256b | |
2439 .byte 102,15,58,223,202,64 | |
2440 call .Lkey_expansion_256a | |
2441 movups %xmm0,(%rax) | |
2442 movl %esi,16(%rax) | |
2443 xorq %rax,%rax | |
2444 jmp .Lenc_key_ret | |
2445 | |
2446 .align 16 | |
2447 .Lbad_keybits: | |
2448 movq $-2,%rax | |
2449 .Lenc_key_ret: | |
2450 addq $8,%rsp | |
2451 .byte 0xf3,0xc3 | |
2452 .LSEH_end_set_encrypt_key: | |
2453 | |
2454 .align 16 | |
2455 .Lkey_expansion_128: | |
2456 movups %xmm0,(%rax) | |
2457 leaq 16(%rax),%rax | |
2458 .Lkey_expansion_128_cold: | |
2459 shufps $16,%xmm0,%xmm4 | |
2460 xorps %xmm4,%xmm0 | |
2461 shufps $140,%xmm0,%xmm4 | |
2462 xorps %xmm4,%xmm0 | |
2463 shufps $255,%xmm1,%xmm1 | |
2464 xorps %xmm1,%xmm0 | |
2465 .byte 0xf3,0xc3 | |
2466 | |
2467 .align 16 | |
2468 .Lkey_expansion_192a: | |
2469 movups %xmm0,(%rax) | |
2470 leaq 16(%rax),%rax | |
2471 .Lkey_expansion_192a_cold: | |
2472 movaps %xmm2,%xmm5 | |
2473 .Lkey_expansion_192b_warm: | |
2474 shufps $16,%xmm0,%xmm4 | |
2475 movdqa %xmm2,%xmm3 | |
2476 xorps %xmm4,%xmm0 | |
2477 shufps $140,%xmm0,%xmm4 | |
2478 pslldq $4,%xmm3 | |
2479 xorps %xmm4,%xmm0 | |
2480 pshufd $85,%xmm1,%xmm1 | |
2481 pxor %xmm3,%xmm2 | |
2482 pxor %xmm1,%xmm0 | |
2483 pshufd $255,%xmm0,%xmm3 | |
2484 pxor %xmm3,%xmm2 | |
2485 .byte 0xf3,0xc3 | |
2486 | |
2487 .align 16 | |
2488 .Lkey_expansion_192b: | |
2489 movaps %xmm0,%xmm3 | |
2490 shufps $68,%xmm0,%xmm5 | |
2491 movups %xmm5,(%rax) | |
2492 shufps $78,%xmm2,%xmm3 | |
2493 movups %xmm3,16(%rax) | |
2494 leaq 32(%rax),%rax | |
2495 jmp .Lkey_expansion_192b_warm | |
2496 | |
2497 .align 16 | |
2498 .Lkey_expansion_256a: | |
2499 movups %xmm2,(%rax) | |
2500 leaq 16(%rax),%rax | |
2501 .Lkey_expansion_256a_cold: | |
2502 shufps $16,%xmm0,%xmm4 | |
2503 xorps %xmm4,%xmm0 | |
2504 shufps $140,%xmm0,%xmm4 | |
2505 xorps %xmm4,%xmm0 | |
2506 shufps $255,%xmm1,%xmm1 | |
2507 xorps %xmm1,%xmm0 | |
2508 .byte 0xf3,0xc3 | |
2509 | |
2510 .align 16 | |
2511 .Lkey_expansion_256b: | |
2512 movups %xmm0,(%rax) | |
2513 leaq 16(%rax),%rax | |
2514 | |
2515 shufps $16,%xmm2,%xmm4 | |
2516 xorps %xmm4,%xmm2 | |
2517 shufps $140,%xmm2,%xmm4 | |
2518 xorps %xmm4,%xmm2 | |
2519 shufps $170,%xmm1,%xmm1 | |
2520 xorps %xmm1,%xmm2 | |
2521 .byte 0xf3,0xc3 | |
2522 .size aesni_set_encrypt_key,.-aesni_set_encrypt_key | |
2523 .size __aesni_set_encrypt_key,.-__aesni_set_encrypt_key | |
2524 .align 64 | |
2525 .Lbswap_mask: | |
2526 .byte 15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0 | |
2527 .Lincrement32: | |
2528 .long 6,6,6,0 | |
2529 .Lincrement64: | |
2530 .long 1,0,0,0 | |
2531 .Lxts_magic: | |
2532 .long 0x87,0,1,0 | |
2533 | |
2534 .byte 65,69,83,32,102,111,114,32,73,110,116,101,108,32,65,69,83,45,78,73,44,32
,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101
,110,115,115,108,46,111,114,103,62,0 | |
2535 .align 64 | |
OLD | NEW |