OLD | NEW |
| (Empty) |
1 #if defined(__x86_64__) | |
2 .text | |
3 .extern OPENSSL_ia32cap_P | |
4 .hidden OPENSSL_ia32cap_P | |
5 .globl aesni_encrypt | |
6 .hidden aesni_encrypt | |
7 .type aesni_encrypt,@function | |
8 .align 16 | |
9 aesni_encrypt: | |
10 movups (%rdi),%xmm2 | |
11 movl 240(%rdx),%eax | |
12 movups (%rdx),%xmm0 | |
13 movups 16(%rdx),%xmm1 | |
14 leaq 32(%rdx),%rdx | |
15 xorps %xmm0,%xmm2 | |
16 .Loop_enc1_1: | |
17 .byte 102,15,56,220,209 | |
18 decl %eax | |
19 movups (%rdx),%xmm1 | |
20 leaq 16(%rdx),%rdx | |
21 jnz .Loop_enc1_1 | |
22 .byte 102,15,56,221,209 | |
23 pxor %xmm0,%xmm0 | |
24 pxor %xmm1,%xmm1 | |
25 movups %xmm2,(%rsi) | |
26 pxor %xmm2,%xmm2 | |
27 .byte 0xf3,0xc3 | |
28 .size aesni_encrypt,.-aesni_encrypt | |
29 | |
30 .globl aesni_decrypt | |
31 .hidden aesni_decrypt | |
32 .type aesni_decrypt,@function | |
33 .align 16 | |
34 aesni_decrypt: | |
35 movups (%rdi),%xmm2 | |
36 movl 240(%rdx),%eax | |
37 movups (%rdx),%xmm0 | |
38 movups 16(%rdx),%xmm1 | |
39 leaq 32(%rdx),%rdx | |
40 xorps %xmm0,%xmm2 | |
41 .Loop_dec1_2: | |
42 .byte 102,15,56,222,209 | |
43 decl %eax | |
44 movups (%rdx),%xmm1 | |
45 leaq 16(%rdx),%rdx | |
46 jnz .Loop_dec1_2 | |
47 .byte 102,15,56,223,209 | |
48 pxor %xmm0,%xmm0 | |
49 pxor %xmm1,%xmm1 | |
50 movups %xmm2,(%rsi) | |
51 pxor %xmm2,%xmm2 | |
52 .byte 0xf3,0xc3 | |
53 .size aesni_decrypt, .-aesni_decrypt | |
54 .type _aesni_encrypt2,@function | |
55 .align 16 | |
56 _aesni_encrypt2: | |
57 movups (%rcx),%xmm0 | |
58 shll $4,%eax | |
59 movups 16(%rcx),%xmm1 | |
60 xorps %xmm0,%xmm2 | |
61 xorps %xmm0,%xmm3 | |
62 movups 32(%rcx),%xmm0 | |
63 leaq 32(%rcx,%rax,1),%rcx | |
64 negq %rax | |
65 addq $16,%rax | |
66 | |
67 .Lenc_loop2: | |
68 .byte 102,15,56,220,209 | |
69 .byte 102,15,56,220,217 | |
70 movups (%rcx,%rax,1),%xmm1 | |
71 addq $32,%rax | |
72 .byte 102,15,56,220,208 | |
73 .byte 102,15,56,220,216 | |
74 movups -16(%rcx,%rax,1),%xmm0 | |
75 jnz .Lenc_loop2 | |
76 | |
77 .byte 102,15,56,220,209 | |
78 .byte 102,15,56,220,217 | |
79 .byte 102,15,56,221,208 | |
80 .byte 102,15,56,221,216 | |
81 .byte 0xf3,0xc3 | |
82 .size _aesni_encrypt2,.-_aesni_encrypt2 | |
83 .type _aesni_decrypt2,@function | |
84 .align 16 | |
85 _aesni_decrypt2: | |
86 movups (%rcx),%xmm0 | |
87 shll $4,%eax | |
88 movups 16(%rcx),%xmm1 | |
89 xorps %xmm0,%xmm2 | |
90 xorps %xmm0,%xmm3 | |
91 movups 32(%rcx),%xmm0 | |
92 leaq 32(%rcx,%rax,1),%rcx | |
93 negq %rax | |
94 addq $16,%rax | |
95 | |
96 .Ldec_loop2: | |
97 .byte 102,15,56,222,209 | |
98 .byte 102,15,56,222,217 | |
99 movups (%rcx,%rax,1),%xmm1 | |
100 addq $32,%rax | |
101 .byte 102,15,56,222,208 | |
102 .byte 102,15,56,222,216 | |
103 movups -16(%rcx,%rax,1),%xmm0 | |
104 jnz .Ldec_loop2 | |
105 | |
106 .byte 102,15,56,222,209 | |
107 .byte 102,15,56,222,217 | |
108 .byte 102,15,56,223,208 | |
109 .byte 102,15,56,223,216 | |
110 .byte 0xf3,0xc3 | |
111 .size _aesni_decrypt2,.-_aesni_decrypt2 | |
112 .type _aesni_encrypt3,@function | |
113 .align 16 | |
114 _aesni_encrypt3: | |
115 movups (%rcx),%xmm0 | |
116 shll $4,%eax | |
117 movups 16(%rcx),%xmm1 | |
118 xorps %xmm0,%xmm2 | |
119 xorps %xmm0,%xmm3 | |
120 xorps %xmm0,%xmm4 | |
121 movups 32(%rcx),%xmm0 | |
122 leaq 32(%rcx,%rax,1),%rcx | |
123 negq %rax | |
124 addq $16,%rax | |
125 | |
126 .Lenc_loop3: | |
127 .byte 102,15,56,220,209 | |
128 .byte 102,15,56,220,217 | |
129 .byte 102,15,56,220,225 | |
130 movups (%rcx,%rax,1),%xmm1 | |
131 addq $32,%rax | |
132 .byte 102,15,56,220,208 | |
133 .byte 102,15,56,220,216 | |
134 .byte 102,15,56,220,224 | |
135 movups -16(%rcx,%rax,1),%xmm0 | |
136 jnz .Lenc_loop3 | |
137 | |
138 .byte 102,15,56,220,209 | |
139 .byte 102,15,56,220,217 | |
140 .byte 102,15,56,220,225 | |
141 .byte 102,15,56,221,208 | |
142 .byte 102,15,56,221,216 | |
143 .byte 102,15,56,221,224 | |
144 .byte 0xf3,0xc3 | |
145 .size _aesni_encrypt3,.-_aesni_encrypt3 | |
146 .type _aesni_decrypt3,@function | |
147 .align 16 | |
148 _aesni_decrypt3: | |
149 movups (%rcx),%xmm0 | |
150 shll $4,%eax | |
151 movups 16(%rcx),%xmm1 | |
152 xorps %xmm0,%xmm2 | |
153 xorps %xmm0,%xmm3 | |
154 xorps %xmm0,%xmm4 | |
155 movups 32(%rcx),%xmm0 | |
156 leaq 32(%rcx,%rax,1),%rcx | |
157 negq %rax | |
158 addq $16,%rax | |
159 | |
160 .Ldec_loop3: | |
161 .byte 102,15,56,222,209 | |
162 .byte 102,15,56,222,217 | |
163 .byte 102,15,56,222,225 | |
164 movups (%rcx,%rax,1),%xmm1 | |
165 addq $32,%rax | |
166 .byte 102,15,56,222,208 | |
167 .byte 102,15,56,222,216 | |
168 .byte 102,15,56,222,224 | |
169 movups -16(%rcx,%rax,1),%xmm0 | |
170 jnz .Ldec_loop3 | |
171 | |
172 .byte 102,15,56,222,209 | |
173 .byte 102,15,56,222,217 | |
174 .byte 102,15,56,222,225 | |
175 .byte 102,15,56,223,208 | |
176 .byte 102,15,56,223,216 | |
177 .byte 102,15,56,223,224 | |
178 .byte 0xf3,0xc3 | |
179 .size _aesni_decrypt3,.-_aesni_decrypt3 | |
180 .type _aesni_encrypt4,@function | |
181 .align 16 | |
182 _aesni_encrypt4: | |
183 movups (%rcx),%xmm0 | |
184 shll $4,%eax | |
185 movups 16(%rcx),%xmm1 | |
186 xorps %xmm0,%xmm2 | |
187 xorps %xmm0,%xmm3 | |
188 xorps %xmm0,%xmm4 | |
189 xorps %xmm0,%xmm5 | |
190 movups 32(%rcx),%xmm0 | |
191 leaq 32(%rcx,%rax,1),%rcx | |
192 negq %rax | |
193 .byte 0x0f,0x1f,0x00 | |
194 addq $16,%rax | |
195 | |
196 .Lenc_loop4: | |
197 .byte 102,15,56,220,209 | |
198 .byte 102,15,56,220,217 | |
199 .byte 102,15,56,220,225 | |
200 .byte 102,15,56,220,233 | |
201 movups (%rcx,%rax,1),%xmm1 | |
202 addq $32,%rax | |
203 .byte 102,15,56,220,208 | |
204 .byte 102,15,56,220,216 | |
205 .byte 102,15,56,220,224 | |
206 .byte 102,15,56,220,232 | |
207 movups -16(%rcx,%rax,1),%xmm0 | |
208 jnz .Lenc_loop4 | |
209 | |
210 .byte 102,15,56,220,209 | |
211 .byte 102,15,56,220,217 | |
212 .byte 102,15,56,220,225 | |
213 .byte 102,15,56,220,233 | |
214 .byte 102,15,56,221,208 | |
215 .byte 102,15,56,221,216 | |
216 .byte 102,15,56,221,224 | |
217 .byte 102,15,56,221,232 | |
218 .byte 0xf3,0xc3 | |
219 .size _aesni_encrypt4,.-_aesni_encrypt4 | |
220 .type _aesni_decrypt4,@function | |
221 .align 16 | |
222 _aesni_decrypt4: | |
223 movups (%rcx),%xmm0 | |
224 shll $4,%eax | |
225 movups 16(%rcx),%xmm1 | |
226 xorps %xmm0,%xmm2 | |
227 xorps %xmm0,%xmm3 | |
228 xorps %xmm0,%xmm4 | |
229 xorps %xmm0,%xmm5 | |
230 movups 32(%rcx),%xmm0 | |
231 leaq 32(%rcx,%rax,1),%rcx | |
232 negq %rax | |
233 .byte 0x0f,0x1f,0x00 | |
234 addq $16,%rax | |
235 | |
236 .Ldec_loop4: | |
237 .byte 102,15,56,222,209 | |
238 .byte 102,15,56,222,217 | |
239 .byte 102,15,56,222,225 | |
240 .byte 102,15,56,222,233 | |
241 movups (%rcx,%rax,1),%xmm1 | |
242 addq $32,%rax | |
243 .byte 102,15,56,222,208 | |
244 .byte 102,15,56,222,216 | |
245 .byte 102,15,56,222,224 | |
246 .byte 102,15,56,222,232 | |
247 movups -16(%rcx,%rax,1),%xmm0 | |
248 jnz .Ldec_loop4 | |
249 | |
250 .byte 102,15,56,222,209 | |
251 .byte 102,15,56,222,217 | |
252 .byte 102,15,56,222,225 | |
253 .byte 102,15,56,222,233 | |
254 .byte 102,15,56,223,208 | |
255 .byte 102,15,56,223,216 | |
256 .byte 102,15,56,223,224 | |
257 .byte 102,15,56,223,232 | |
258 .byte 0xf3,0xc3 | |
259 .size _aesni_decrypt4,.-_aesni_decrypt4 | |
260 .type _aesni_encrypt6,@function | |
261 .align 16 | |
262 _aesni_encrypt6: | |
263 movups (%rcx),%xmm0 | |
264 shll $4,%eax | |
265 movups 16(%rcx),%xmm1 | |
266 xorps %xmm0,%xmm2 | |
267 pxor %xmm0,%xmm3 | |
268 pxor %xmm0,%xmm4 | |
269 .byte 102,15,56,220,209 | |
270 leaq 32(%rcx,%rax,1),%rcx | |
271 negq %rax | |
272 .byte 102,15,56,220,217 | |
273 pxor %xmm0,%xmm5 | |
274 pxor %xmm0,%xmm6 | |
275 .byte 102,15,56,220,225 | |
276 pxor %xmm0,%xmm7 | |
277 movups (%rcx,%rax,1),%xmm0 | |
278 addq $16,%rax | |
279 jmp .Lenc_loop6_enter | |
280 .align 16 | |
281 .Lenc_loop6: | |
282 .byte 102,15,56,220,209 | |
283 .byte 102,15,56,220,217 | |
284 .byte 102,15,56,220,225 | |
285 .Lenc_loop6_enter: | |
286 .byte 102,15,56,220,233 | |
287 .byte 102,15,56,220,241 | |
288 .byte 102,15,56,220,249 | |
289 movups (%rcx,%rax,1),%xmm1 | |
290 addq $32,%rax | |
291 .byte 102,15,56,220,208 | |
292 .byte 102,15,56,220,216 | |
293 .byte 102,15,56,220,224 | |
294 .byte 102,15,56,220,232 | |
295 .byte 102,15,56,220,240 | |
296 .byte 102,15,56,220,248 | |
297 movups -16(%rcx,%rax,1),%xmm0 | |
298 jnz .Lenc_loop6 | |
299 | |
300 .byte 102,15,56,220,209 | |
301 .byte 102,15,56,220,217 | |
302 .byte 102,15,56,220,225 | |
303 .byte 102,15,56,220,233 | |
304 .byte 102,15,56,220,241 | |
305 .byte 102,15,56,220,249 | |
306 .byte 102,15,56,221,208 | |
307 .byte 102,15,56,221,216 | |
308 .byte 102,15,56,221,224 | |
309 .byte 102,15,56,221,232 | |
310 .byte 102,15,56,221,240 | |
311 .byte 102,15,56,221,248 | |
312 .byte 0xf3,0xc3 | |
313 .size _aesni_encrypt6,.-_aesni_encrypt6 | |
314 .type _aesni_decrypt6,@function | |
315 .align 16 | |
316 _aesni_decrypt6: | |
317 movups (%rcx),%xmm0 | |
318 shll $4,%eax | |
319 movups 16(%rcx),%xmm1 | |
320 xorps %xmm0,%xmm2 | |
321 pxor %xmm0,%xmm3 | |
322 pxor %xmm0,%xmm4 | |
323 .byte 102,15,56,222,209 | |
324 leaq 32(%rcx,%rax,1),%rcx | |
325 negq %rax | |
326 .byte 102,15,56,222,217 | |
327 pxor %xmm0,%xmm5 | |
328 pxor %xmm0,%xmm6 | |
329 .byte 102,15,56,222,225 | |
330 pxor %xmm0,%xmm7 | |
331 movups (%rcx,%rax,1),%xmm0 | |
332 addq $16,%rax | |
333 jmp .Ldec_loop6_enter | |
334 .align 16 | |
335 .Ldec_loop6: | |
336 .byte 102,15,56,222,209 | |
337 .byte 102,15,56,222,217 | |
338 .byte 102,15,56,222,225 | |
339 .Ldec_loop6_enter: | |
340 .byte 102,15,56,222,233 | |
341 .byte 102,15,56,222,241 | |
342 .byte 102,15,56,222,249 | |
343 movups (%rcx,%rax,1),%xmm1 | |
344 addq $32,%rax | |
345 .byte 102,15,56,222,208 | |
346 .byte 102,15,56,222,216 | |
347 .byte 102,15,56,222,224 | |
348 .byte 102,15,56,222,232 | |
349 .byte 102,15,56,222,240 | |
350 .byte 102,15,56,222,248 | |
351 movups -16(%rcx,%rax,1),%xmm0 | |
352 jnz .Ldec_loop6 | |
353 | |
354 .byte 102,15,56,222,209 | |
355 .byte 102,15,56,222,217 | |
356 .byte 102,15,56,222,225 | |
357 .byte 102,15,56,222,233 | |
358 .byte 102,15,56,222,241 | |
359 .byte 102,15,56,222,249 | |
360 .byte 102,15,56,223,208 | |
361 .byte 102,15,56,223,216 | |
362 .byte 102,15,56,223,224 | |
363 .byte 102,15,56,223,232 | |
364 .byte 102,15,56,223,240 | |
365 .byte 102,15,56,223,248 | |
366 .byte 0xf3,0xc3 | |
367 .size _aesni_decrypt6,.-_aesni_decrypt6 | |
368 .type _aesni_encrypt8,@function | |
369 .align 16 | |
370 _aesni_encrypt8: | |
371 movups (%rcx),%xmm0 | |
372 shll $4,%eax | |
373 movups 16(%rcx),%xmm1 | |
374 xorps %xmm0,%xmm2 | |
375 xorps %xmm0,%xmm3 | |
376 pxor %xmm0,%xmm4 | |
377 pxor %xmm0,%xmm5 | |
378 pxor %xmm0,%xmm6 | |
379 leaq 32(%rcx,%rax,1),%rcx | |
380 negq %rax | |
381 .byte 102,15,56,220,209 | |
382 pxor %xmm0,%xmm7 | |
383 pxor %xmm0,%xmm8 | |
384 .byte 102,15,56,220,217 | |
385 pxor %xmm0,%xmm9 | |
386 movups (%rcx,%rax,1),%xmm0 | |
387 addq $16,%rax | |
388 jmp .Lenc_loop8_inner | |
389 .align 16 | |
390 .Lenc_loop8: | |
391 .byte 102,15,56,220,209 | |
392 .byte 102,15,56,220,217 | |
393 .Lenc_loop8_inner: | |
394 .byte 102,15,56,220,225 | |
395 .byte 102,15,56,220,233 | |
396 .byte 102,15,56,220,241 | |
397 .byte 102,15,56,220,249 | |
398 .byte 102,68,15,56,220,193 | |
399 .byte 102,68,15,56,220,201 | |
400 .Lenc_loop8_enter: | |
401 movups (%rcx,%rax,1),%xmm1 | |
402 addq $32,%rax | |
403 .byte 102,15,56,220,208 | |
404 .byte 102,15,56,220,216 | |
405 .byte 102,15,56,220,224 | |
406 .byte 102,15,56,220,232 | |
407 .byte 102,15,56,220,240 | |
408 .byte 102,15,56,220,248 | |
409 .byte 102,68,15,56,220,192 | |
410 .byte 102,68,15,56,220,200 | |
411 movups -16(%rcx,%rax,1),%xmm0 | |
412 jnz .Lenc_loop8 | |
413 | |
414 .byte 102,15,56,220,209 | |
415 .byte 102,15,56,220,217 | |
416 .byte 102,15,56,220,225 | |
417 .byte 102,15,56,220,233 | |
418 .byte 102,15,56,220,241 | |
419 .byte 102,15,56,220,249 | |
420 .byte 102,68,15,56,220,193 | |
421 .byte 102,68,15,56,220,201 | |
422 .byte 102,15,56,221,208 | |
423 .byte 102,15,56,221,216 | |
424 .byte 102,15,56,221,224 | |
425 .byte 102,15,56,221,232 | |
426 .byte 102,15,56,221,240 | |
427 .byte 102,15,56,221,248 | |
428 .byte 102,68,15,56,221,192 | |
429 .byte 102,68,15,56,221,200 | |
430 .byte 0xf3,0xc3 | |
431 .size _aesni_encrypt8,.-_aesni_encrypt8 | |
432 .type _aesni_decrypt8,@function | |
433 .align 16 | |
434 _aesni_decrypt8: | |
435 movups (%rcx),%xmm0 | |
436 shll $4,%eax | |
437 movups 16(%rcx),%xmm1 | |
438 xorps %xmm0,%xmm2 | |
439 xorps %xmm0,%xmm3 | |
440 pxor %xmm0,%xmm4 | |
441 pxor %xmm0,%xmm5 | |
442 pxor %xmm0,%xmm6 | |
443 leaq 32(%rcx,%rax,1),%rcx | |
444 negq %rax | |
445 .byte 102,15,56,222,209 | |
446 pxor %xmm0,%xmm7 | |
447 pxor %xmm0,%xmm8 | |
448 .byte 102,15,56,222,217 | |
449 pxor %xmm0,%xmm9 | |
450 movups (%rcx,%rax,1),%xmm0 | |
451 addq $16,%rax | |
452 jmp .Ldec_loop8_inner | |
453 .align 16 | |
454 .Ldec_loop8: | |
455 .byte 102,15,56,222,209 | |
456 .byte 102,15,56,222,217 | |
457 .Ldec_loop8_inner: | |
458 .byte 102,15,56,222,225 | |
459 .byte 102,15,56,222,233 | |
460 .byte 102,15,56,222,241 | |
461 .byte 102,15,56,222,249 | |
462 .byte 102,68,15,56,222,193 | |
463 .byte 102,68,15,56,222,201 | |
464 .Ldec_loop8_enter: | |
465 movups (%rcx,%rax,1),%xmm1 | |
466 addq $32,%rax | |
467 .byte 102,15,56,222,208 | |
468 .byte 102,15,56,222,216 | |
469 .byte 102,15,56,222,224 | |
470 .byte 102,15,56,222,232 | |
471 .byte 102,15,56,222,240 | |
472 .byte 102,15,56,222,248 | |
473 .byte 102,68,15,56,222,192 | |
474 .byte 102,68,15,56,222,200 | |
475 movups -16(%rcx,%rax,1),%xmm0 | |
476 jnz .Ldec_loop8 | |
477 | |
478 .byte 102,15,56,222,209 | |
479 .byte 102,15,56,222,217 | |
480 .byte 102,15,56,222,225 | |
481 .byte 102,15,56,222,233 | |
482 .byte 102,15,56,222,241 | |
483 .byte 102,15,56,222,249 | |
484 .byte 102,68,15,56,222,193 | |
485 .byte 102,68,15,56,222,201 | |
486 .byte 102,15,56,223,208 | |
487 .byte 102,15,56,223,216 | |
488 .byte 102,15,56,223,224 | |
489 .byte 102,15,56,223,232 | |
490 .byte 102,15,56,223,240 | |
491 .byte 102,15,56,223,248 | |
492 .byte 102,68,15,56,223,192 | |
493 .byte 102,68,15,56,223,200 | |
494 .byte 0xf3,0xc3 | |
495 .size _aesni_decrypt8,.-_aesni_decrypt8 | |
496 .globl aesni_ecb_encrypt | |
497 .hidden aesni_ecb_encrypt | |
498 .type aesni_ecb_encrypt,@function | |
499 .align 16 | |
500 aesni_ecb_encrypt: | |
501 andq $-16,%rdx | |
502 jz .Lecb_ret | |
503 | |
504 movl 240(%rcx),%eax | |
505 movups (%rcx),%xmm0 | |
506 movq %rcx,%r11 | |
507 movl %eax,%r10d | |
508 testl %r8d,%r8d | |
509 jz .Lecb_decrypt | |
510 | |
511 cmpq $0x80,%rdx | |
512 jb .Lecb_enc_tail | |
513 | |
514 movdqu (%rdi),%xmm2 | |
515 movdqu 16(%rdi),%xmm3 | |
516 movdqu 32(%rdi),%xmm4 | |
517 movdqu 48(%rdi),%xmm5 | |
518 movdqu 64(%rdi),%xmm6 | |
519 movdqu 80(%rdi),%xmm7 | |
520 movdqu 96(%rdi),%xmm8 | |
521 movdqu 112(%rdi),%xmm9 | |
522 leaq 128(%rdi),%rdi | |
523 subq $0x80,%rdx | |
524 jmp .Lecb_enc_loop8_enter | |
525 .align 16 | |
526 .Lecb_enc_loop8: | |
527 movups %xmm2,(%rsi) | |
528 movq %r11,%rcx | |
529 movdqu (%rdi),%xmm2 | |
530 movl %r10d,%eax | |
531 movups %xmm3,16(%rsi) | |
532 movdqu 16(%rdi),%xmm3 | |
533 movups %xmm4,32(%rsi) | |
534 movdqu 32(%rdi),%xmm4 | |
535 movups %xmm5,48(%rsi) | |
536 movdqu 48(%rdi),%xmm5 | |
537 movups %xmm6,64(%rsi) | |
538 movdqu 64(%rdi),%xmm6 | |
539 movups %xmm7,80(%rsi) | |
540 movdqu 80(%rdi),%xmm7 | |
541 movups %xmm8,96(%rsi) | |
542 movdqu 96(%rdi),%xmm8 | |
543 movups %xmm9,112(%rsi) | |
544 leaq 128(%rsi),%rsi | |
545 movdqu 112(%rdi),%xmm9 | |
546 leaq 128(%rdi),%rdi | |
547 .Lecb_enc_loop8_enter: | |
548 | |
549 call _aesni_encrypt8 | |
550 | |
551 subq $0x80,%rdx | |
552 jnc .Lecb_enc_loop8 | |
553 | |
554 movups %xmm2,(%rsi) | |
555 movq %r11,%rcx | |
556 movups %xmm3,16(%rsi) | |
557 movl %r10d,%eax | |
558 movups %xmm4,32(%rsi) | |
559 movups %xmm5,48(%rsi) | |
560 movups %xmm6,64(%rsi) | |
561 movups %xmm7,80(%rsi) | |
562 movups %xmm8,96(%rsi) | |
563 movups %xmm9,112(%rsi) | |
564 leaq 128(%rsi),%rsi | |
565 addq $0x80,%rdx | |
566 jz .Lecb_ret | |
567 | |
568 .Lecb_enc_tail: | |
569 movups (%rdi),%xmm2 | |
570 cmpq $0x20,%rdx | |
571 jb .Lecb_enc_one | |
572 movups 16(%rdi),%xmm3 | |
573 je .Lecb_enc_two | |
574 movups 32(%rdi),%xmm4 | |
575 cmpq $0x40,%rdx | |
576 jb .Lecb_enc_three | |
577 movups 48(%rdi),%xmm5 | |
578 je .Lecb_enc_four | |
579 movups 64(%rdi),%xmm6 | |
580 cmpq $0x60,%rdx | |
581 jb .Lecb_enc_five | |
582 movups 80(%rdi),%xmm7 | |
583 je .Lecb_enc_six | |
584 movdqu 96(%rdi),%xmm8 | |
585 xorps %xmm9,%xmm9 | |
586 call _aesni_encrypt8 | |
587 movups %xmm2,(%rsi) | |
588 movups %xmm3,16(%rsi) | |
589 movups %xmm4,32(%rsi) | |
590 movups %xmm5,48(%rsi) | |
591 movups %xmm6,64(%rsi) | |
592 movups %xmm7,80(%rsi) | |
593 movups %xmm8,96(%rsi) | |
594 jmp .Lecb_ret | |
595 .align 16 | |
596 .Lecb_enc_one: | |
597 movups (%rcx),%xmm0 | |
598 movups 16(%rcx),%xmm1 | |
599 leaq 32(%rcx),%rcx | |
600 xorps %xmm0,%xmm2 | |
601 .Loop_enc1_3: | |
602 .byte 102,15,56,220,209 | |
603 decl %eax | |
604 movups (%rcx),%xmm1 | |
605 leaq 16(%rcx),%rcx | |
606 jnz .Loop_enc1_3 | |
607 .byte 102,15,56,221,209 | |
608 movups %xmm2,(%rsi) | |
609 jmp .Lecb_ret | |
610 .align 16 | |
611 .Lecb_enc_two: | |
612 call _aesni_encrypt2 | |
613 movups %xmm2,(%rsi) | |
614 movups %xmm3,16(%rsi) | |
615 jmp .Lecb_ret | |
616 .align 16 | |
617 .Lecb_enc_three: | |
618 call _aesni_encrypt3 | |
619 movups %xmm2,(%rsi) | |
620 movups %xmm3,16(%rsi) | |
621 movups %xmm4,32(%rsi) | |
622 jmp .Lecb_ret | |
623 .align 16 | |
624 .Lecb_enc_four: | |
625 call _aesni_encrypt4 | |
626 movups %xmm2,(%rsi) | |
627 movups %xmm3,16(%rsi) | |
628 movups %xmm4,32(%rsi) | |
629 movups %xmm5,48(%rsi) | |
630 jmp .Lecb_ret | |
631 .align 16 | |
632 .Lecb_enc_five: | |
633 xorps %xmm7,%xmm7 | |
634 call _aesni_encrypt6 | |
635 movups %xmm2,(%rsi) | |
636 movups %xmm3,16(%rsi) | |
637 movups %xmm4,32(%rsi) | |
638 movups %xmm5,48(%rsi) | |
639 movups %xmm6,64(%rsi) | |
640 jmp .Lecb_ret | |
641 .align 16 | |
642 .Lecb_enc_six: | |
643 call _aesni_encrypt6 | |
644 movups %xmm2,(%rsi) | |
645 movups %xmm3,16(%rsi) | |
646 movups %xmm4,32(%rsi) | |
647 movups %xmm5,48(%rsi) | |
648 movups %xmm6,64(%rsi) | |
649 movups %xmm7,80(%rsi) | |
650 jmp .Lecb_ret | |
651 | |
652 .align 16 | |
653 .Lecb_decrypt: | |
654 cmpq $0x80,%rdx | |
655 jb .Lecb_dec_tail | |
656 | |
657 movdqu (%rdi),%xmm2 | |
658 movdqu 16(%rdi),%xmm3 | |
659 movdqu 32(%rdi),%xmm4 | |
660 movdqu 48(%rdi),%xmm5 | |
661 movdqu 64(%rdi),%xmm6 | |
662 movdqu 80(%rdi),%xmm7 | |
663 movdqu 96(%rdi),%xmm8 | |
664 movdqu 112(%rdi),%xmm9 | |
665 leaq 128(%rdi),%rdi | |
666 subq $0x80,%rdx | |
667 jmp .Lecb_dec_loop8_enter | |
668 .align 16 | |
669 .Lecb_dec_loop8: | |
670 movups %xmm2,(%rsi) | |
671 movq %r11,%rcx | |
672 movdqu (%rdi),%xmm2 | |
673 movl %r10d,%eax | |
674 movups %xmm3,16(%rsi) | |
675 movdqu 16(%rdi),%xmm3 | |
676 movups %xmm4,32(%rsi) | |
677 movdqu 32(%rdi),%xmm4 | |
678 movups %xmm5,48(%rsi) | |
679 movdqu 48(%rdi),%xmm5 | |
680 movups %xmm6,64(%rsi) | |
681 movdqu 64(%rdi),%xmm6 | |
682 movups %xmm7,80(%rsi) | |
683 movdqu 80(%rdi),%xmm7 | |
684 movups %xmm8,96(%rsi) | |
685 movdqu 96(%rdi),%xmm8 | |
686 movups %xmm9,112(%rsi) | |
687 leaq 128(%rsi),%rsi | |
688 movdqu 112(%rdi),%xmm9 | |
689 leaq 128(%rdi),%rdi | |
690 .Lecb_dec_loop8_enter: | |
691 | |
692 call _aesni_decrypt8 | |
693 | |
694 movups (%r11),%xmm0 | |
695 subq $0x80,%rdx | |
696 jnc .Lecb_dec_loop8 | |
697 | |
698 movups %xmm2,(%rsi) | |
699 pxor %xmm2,%xmm2 | |
700 movq %r11,%rcx | |
701 movups %xmm3,16(%rsi) | |
702 pxor %xmm3,%xmm3 | |
703 movl %r10d,%eax | |
704 movups %xmm4,32(%rsi) | |
705 pxor %xmm4,%xmm4 | |
706 movups %xmm5,48(%rsi) | |
707 pxor %xmm5,%xmm5 | |
708 movups %xmm6,64(%rsi) | |
709 pxor %xmm6,%xmm6 | |
710 movups %xmm7,80(%rsi) | |
711 pxor %xmm7,%xmm7 | |
712 movups %xmm8,96(%rsi) | |
713 pxor %xmm8,%xmm8 | |
714 movups %xmm9,112(%rsi) | |
715 pxor %xmm9,%xmm9 | |
716 leaq 128(%rsi),%rsi | |
717 addq $0x80,%rdx | |
718 jz .Lecb_ret | |
719 | |
720 .Lecb_dec_tail: | |
721 movups (%rdi),%xmm2 | |
722 cmpq $0x20,%rdx | |
723 jb .Lecb_dec_one | |
724 movups 16(%rdi),%xmm3 | |
725 je .Lecb_dec_two | |
726 movups 32(%rdi),%xmm4 | |
727 cmpq $0x40,%rdx | |
728 jb .Lecb_dec_three | |
729 movups 48(%rdi),%xmm5 | |
730 je .Lecb_dec_four | |
731 movups 64(%rdi),%xmm6 | |
732 cmpq $0x60,%rdx | |
733 jb .Lecb_dec_five | |
734 movups 80(%rdi),%xmm7 | |
735 je .Lecb_dec_six | |
736 movups 96(%rdi),%xmm8 | |
737 movups (%rcx),%xmm0 | |
738 xorps %xmm9,%xmm9 | |
739 call _aesni_decrypt8 | |
740 movups %xmm2,(%rsi) | |
741 pxor %xmm2,%xmm2 | |
742 movups %xmm3,16(%rsi) | |
743 pxor %xmm3,%xmm3 | |
744 movups %xmm4,32(%rsi) | |
745 pxor %xmm4,%xmm4 | |
746 movups %xmm5,48(%rsi) | |
747 pxor %xmm5,%xmm5 | |
748 movups %xmm6,64(%rsi) | |
749 pxor %xmm6,%xmm6 | |
750 movups %xmm7,80(%rsi) | |
751 pxor %xmm7,%xmm7 | |
752 movups %xmm8,96(%rsi) | |
753 pxor %xmm8,%xmm8 | |
754 pxor %xmm9,%xmm9 | |
755 jmp .Lecb_ret | |
756 .align 16 | |
757 .Lecb_dec_one: | |
758 movups (%rcx),%xmm0 | |
759 movups 16(%rcx),%xmm1 | |
760 leaq 32(%rcx),%rcx | |
761 xorps %xmm0,%xmm2 | |
762 .Loop_dec1_4: | |
763 .byte 102,15,56,222,209 | |
764 decl %eax | |
765 movups (%rcx),%xmm1 | |
766 leaq 16(%rcx),%rcx | |
767 jnz .Loop_dec1_4 | |
768 .byte 102,15,56,223,209 | |
769 movups %xmm2,(%rsi) | |
770 pxor %xmm2,%xmm2 | |
771 jmp .Lecb_ret | |
772 .align 16 | |
773 .Lecb_dec_two: | |
774 call _aesni_decrypt2 | |
775 movups %xmm2,(%rsi) | |
776 pxor %xmm2,%xmm2 | |
777 movups %xmm3,16(%rsi) | |
778 pxor %xmm3,%xmm3 | |
779 jmp .Lecb_ret | |
780 .align 16 | |
781 .Lecb_dec_three: | |
782 call _aesni_decrypt3 | |
783 movups %xmm2,(%rsi) | |
784 pxor %xmm2,%xmm2 | |
785 movups %xmm3,16(%rsi) | |
786 pxor %xmm3,%xmm3 | |
787 movups %xmm4,32(%rsi) | |
788 pxor %xmm4,%xmm4 | |
789 jmp .Lecb_ret | |
790 .align 16 | |
791 .Lecb_dec_four: | |
792 call _aesni_decrypt4 | |
793 movups %xmm2,(%rsi) | |
794 pxor %xmm2,%xmm2 | |
795 movups %xmm3,16(%rsi) | |
796 pxor %xmm3,%xmm3 | |
797 movups %xmm4,32(%rsi) | |
798 pxor %xmm4,%xmm4 | |
799 movups %xmm5,48(%rsi) | |
800 pxor %xmm5,%xmm5 | |
801 jmp .Lecb_ret | |
802 .align 16 | |
803 .Lecb_dec_five: | |
804 xorps %xmm7,%xmm7 | |
805 call _aesni_decrypt6 | |
806 movups %xmm2,(%rsi) | |
807 pxor %xmm2,%xmm2 | |
808 movups %xmm3,16(%rsi) | |
809 pxor %xmm3,%xmm3 | |
810 movups %xmm4,32(%rsi) | |
811 pxor %xmm4,%xmm4 | |
812 movups %xmm5,48(%rsi) | |
813 pxor %xmm5,%xmm5 | |
814 movups %xmm6,64(%rsi) | |
815 pxor %xmm6,%xmm6 | |
816 pxor %xmm7,%xmm7 | |
817 jmp .Lecb_ret | |
818 .align 16 | |
819 .Lecb_dec_six: | |
820 call _aesni_decrypt6 | |
821 movups %xmm2,(%rsi) | |
822 pxor %xmm2,%xmm2 | |
823 movups %xmm3,16(%rsi) | |
824 pxor %xmm3,%xmm3 | |
825 movups %xmm4,32(%rsi) | |
826 pxor %xmm4,%xmm4 | |
827 movups %xmm5,48(%rsi) | |
828 pxor %xmm5,%xmm5 | |
829 movups %xmm6,64(%rsi) | |
830 pxor %xmm6,%xmm6 | |
831 movups %xmm7,80(%rsi) | |
832 pxor %xmm7,%xmm7 | |
833 | |
834 .Lecb_ret: | |
835 xorps %xmm0,%xmm0 | |
836 pxor %xmm1,%xmm1 | |
837 .byte 0xf3,0xc3 | |
838 .size aesni_ecb_encrypt,.-aesni_ecb_encrypt | |
839 .globl aesni_ccm64_encrypt_blocks | |
840 .hidden aesni_ccm64_encrypt_blocks | |
841 .type aesni_ccm64_encrypt_blocks,@function | |
842 .align 16 | |
843 aesni_ccm64_encrypt_blocks: | |
844 movl 240(%rcx),%eax | |
845 movdqu (%r8),%xmm6 | |
846 movdqa .Lincrement64(%rip),%xmm9 | |
847 movdqa .Lbswap_mask(%rip),%xmm7 | |
848 | |
849 shll $4,%eax | |
850 movl $16,%r10d | |
851 leaq 0(%rcx),%r11 | |
852 movdqu (%r9),%xmm3 | |
853 movdqa %xmm6,%xmm2 | |
854 leaq 32(%rcx,%rax,1),%rcx | |
855 .byte 102,15,56,0,247 | |
856 subq %rax,%r10 | |
857 jmp .Lccm64_enc_outer | |
858 .align 16 | |
859 .Lccm64_enc_outer: | |
860 movups (%r11),%xmm0 | |
861 movq %r10,%rax | |
862 movups (%rdi),%xmm8 | |
863 | |
864 xorps %xmm0,%xmm2 | |
865 movups 16(%r11),%xmm1 | |
866 xorps %xmm8,%xmm0 | |
867 xorps %xmm0,%xmm3 | |
868 movups 32(%r11),%xmm0 | |
869 | |
870 .Lccm64_enc2_loop: | |
871 .byte 102,15,56,220,209 | |
872 .byte 102,15,56,220,217 | |
873 movups (%rcx,%rax,1),%xmm1 | |
874 addq $32,%rax | |
875 .byte 102,15,56,220,208 | |
876 .byte 102,15,56,220,216 | |
877 movups -16(%rcx,%rax,1),%xmm0 | |
878 jnz .Lccm64_enc2_loop | |
879 .byte 102,15,56,220,209 | |
880 .byte 102,15,56,220,217 | |
881 paddq %xmm9,%xmm6 | |
882 decq %rdx | |
883 .byte 102,15,56,221,208 | |
884 .byte 102,15,56,221,216 | |
885 | |
886 leaq 16(%rdi),%rdi | |
887 xorps %xmm2,%xmm8 | |
888 movdqa %xmm6,%xmm2 | |
889 movups %xmm8,(%rsi) | |
890 .byte 102,15,56,0,215 | |
891 leaq 16(%rsi),%rsi | |
892 jnz .Lccm64_enc_outer | |
893 | |
894 pxor %xmm0,%xmm0 | |
895 pxor %xmm1,%xmm1 | |
896 pxor %xmm2,%xmm2 | |
897 movups %xmm3,(%r9) | |
898 pxor %xmm3,%xmm3 | |
899 pxor %xmm8,%xmm8 | |
900 pxor %xmm6,%xmm6 | |
901 .byte 0xf3,0xc3 | |
902 .size aesni_ccm64_encrypt_blocks,.-aesni_ccm64_encrypt_blocks | |
903 .globl aesni_ccm64_decrypt_blocks | |
904 .hidden aesni_ccm64_decrypt_blocks | |
905 .type aesni_ccm64_decrypt_blocks,@function | |
906 .align 16 | |
907 aesni_ccm64_decrypt_blocks: | |
908 movl 240(%rcx),%eax | |
909 movups (%r8),%xmm6 | |
910 movdqu (%r9),%xmm3 | |
911 movdqa .Lincrement64(%rip),%xmm9 | |
912 movdqa .Lbswap_mask(%rip),%xmm7 | |
913 | |
914 movaps %xmm6,%xmm2 | |
915 movl %eax,%r10d | |
916 movq %rcx,%r11 | |
917 .byte 102,15,56,0,247 | |
918 movups (%rcx),%xmm0 | |
919 movups 16(%rcx),%xmm1 | |
920 leaq 32(%rcx),%rcx | |
921 xorps %xmm0,%xmm2 | |
922 .Loop_enc1_5: | |
923 .byte 102,15,56,220,209 | |
924 decl %eax | |
925 movups (%rcx),%xmm1 | |
926 leaq 16(%rcx),%rcx | |
927 jnz .Loop_enc1_5 | |
928 .byte 102,15,56,221,209 | |
929 shll $4,%r10d | |
930 movl $16,%eax | |
931 movups (%rdi),%xmm8 | |
932 paddq %xmm9,%xmm6 | |
933 leaq 16(%rdi),%rdi | |
934 subq %r10,%rax | |
935 leaq 32(%r11,%r10,1),%rcx | |
936 movq %rax,%r10 | |
937 jmp .Lccm64_dec_outer | |
938 .align 16 | |
939 .Lccm64_dec_outer: | |
940 xorps %xmm2,%xmm8 | |
941 movdqa %xmm6,%xmm2 | |
942 movups %xmm8,(%rsi) | |
943 leaq 16(%rsi),%rsi | |
944 .byte 102,15,56,0,215 | |
945 | |
946 subq $1,%rdx | |
947 jz .Lccm64_dec_break | |
948 | |
949 movups (%r11),%xmm0 | |
950 movq %r10,%rax | |
951 movups 16(%r11),%xmm1 | |
952 xorps %xmm0,%xmm8 | |
953 xorps %xmm0,%xmm2 | |
954 xorps %xmm8,%xmm3 | |
955 movups 32(%r11),%xmm0 | |
956 jmp .Lccm64_dec2_loop | |
957 .align 16 | |
958 .Lccm64_dec2_loop: | |
959 .byte 102,15,56,220,209 | |
960 .byte 102,15,56,220,217 | |
961 movups (%rcx,%rax,1),%xmm1 | |
962 addq $32,%rax | |
963 .byte 102,15,56,220,208 | |
964 .byte 102,15,56,220,216 | |
965 movups -16(%rcx,%rax,1),%xmm0 | |
966 jnz .Lccm64_dec2_loop | |
967 movups (%rdi),%xmm8 | |
968 paddq %xmm9,%xmm6 | |
969 .byte 102,15,56,220,209 | |
970 .byte 102,15,56,220,217 | |
971 .byte 102,15,56,221,208 | |
972 .byte 102,15,56,221,216 | |
973 leaq 16(%rdi),%rdi | |
974 jmp .Lccm64_dec_outer | |
975 | |
976 .align 16 | |
977 .Lccm64_dec_break: | |
978 | |
979 movl 240(%r11),%eax | |
980 movups (%r11),%xmm0 | |
981 movups 16(%r11),%xmm1 | |
982 xorps %xmm0,%xmm8 | |
983 leaq 32(%r11),%r11 | |
984 xorps %xmm8,%xmm3 | |
985 .Loop_enc1_6: | |
986 .byte 102,15,56,220,217 | |
987 decl %eax | |
988 movups (%r11),%xmm1 | |
989 leaq 16(%r11),%r11 | |
990 jnz .Loop_enc1_6 | |
991 .byte 102,15,56,221,217 | |
992 pxor %xmm0,%xmm0 | |
993 pxor %xmm1,%xmm1 | |
994 pxor %xmm2,%xmm2 | |
995 movups %xmm3,(%r9) | |
996 pxor %xmm3,%xmm3 | |
997 pxor %xmm8,%xmm8 | |
998 pxor %xmm6,%xmm6 | |
999 .byte 0xf3,0xc3 | |
1000 .size aesni_ccm64_decrypt_blocks,.-aesni_ccm64_decrypt_blocks | |
1001 .globl aesni_ctr32_encrypt_blocks | |
1002 .hidden aesni_ctr32_encrypt_blocks | |
1003 .type aesni_ctr32_encrypt_blocks,@function | |
1004 .align 16 | |
1005 aesni_ctr32_encrypt_blocks: | |
1006 cmpq $1,%rdx | |
1007 jne .Lctr32_bulk | |
1008 | |
1009 | |
1010 | |
1011 movups (%r8),%xmm2 | |
1012 movups (%rdi),%xmm3 | |
1013 movl 240(%rcx),%edx | |
1014 movups (%rcx),%xmm0 | |
1015 movups 16(%rcx),%xmm1 | |
1016 leaq 32(%rcx),%rcx | |
1017 xorps %xmm0,%xmm2 | |
1018 .Loop_enc1_7: | |
1019 .byte 102,15,56,220,209 | |
1020 decl %edx | |
1021 movups (%rcx),%xmm1 | |
1022 leaq 16(%rcx),%rcx | |
1023 jnz .Loop_enc1_7 | |
1024 .byte 102,15,56,221,209 | |
1025 pxor %xmm0,%xmm0 | |
1026 pxor %xmm1,%xmm1 | |
1027 xorps %xmm3,%xmm2 | |
1028 pxor %xmm3,%xmm3 | |
1029 movups %xmm2,(%rsi) | |
1030 xorps %xmm2,%xmm2 | |
1031 jmp .Lctr32_epilogue | |
1032 | |
1033 .align 16 | |
1034 .Lctr32_bulk: | |
1035 leaq (%rsp),%rax | |
1036 pushq %rbp | |
1037 subq $128,%rsp | |
1038 andq $-16,%rsp | |
1039 leaq -8(%rax),%rbp | |
1040 | |
1041 | |
1042 | |
1043 | |
1044 movdqu (%r8),%xmm2 | |
1045 movdqu (%rcx),%xmm0 | |
1046 movl 12(%r8),%r8d | |
1047 pxor %xmm0,%xmm2 | |
1048 movl 12(%rcx),%r11d | |
1049 movdqa %xmm2,0(%rsp) | |
1050 bswapl %r8d | |
1051 movdqa %xmm2,%xmm3 | |
1052 movdqa %xmm2,%xmm4 | |
1053 movdqa %xmm2,%xmm5 | |
1054 movdqa %xmm2,64(%rsp) | |
1055 movdqa %xmm2,80(%rsp) | |
1056 movdqa %xmm2,96(%rsp) | |
1057 movq %rdx,%r10 | |
1058 movdqa %xmm2,112(%rsp) | |
1059 | |
1060 leaq 1(%r8),%rax | |
1061 leaq 2(%r8),%rdx | |
1062 bswapl %eax | |
1063 bswapl %edx | |
1064 xorl %r11d,%eax | |
1065 xorl %r11d,%edx | |
1066 .byte 102,15,58,34,216,3 | |
1067 leaq 3(%r8),%rax | |
1068 movdqa %xmm3,16(%rsp) | |
1069 .byte 102,15,58,34,226,3 | |
1070 bswapl %eax | |
1071 movq %r10,%rdx | |
1072 leaq 4(%r8),%r10 | |
1073 movdqa %xmm4,32(%rsp) | |
1074 xorl %r11d,%eax | |
1075 bswapl %r10d | |
1076 .byte 102,15,58,34,232,3 | |
1077 xorl %r11d,%r10d | |
1078 movdqa %xmm5,48(%rsp) | |
1079 leaq 5(%r8),%r9 | |
1080 movl %r10d,64+12(%rsp) | |
1081 bswapl %r9d | |
1082 leaq 6(%r8),%r10 | |
1083 movl 240(%rcx),%eax | |
1084 xorl %r11d,%r9d | |
1085 bswapl %r10d | |
1086 movl %r9d,80+12(%rsp) | |
1087 xorl %r11d,%r10d | |
1088 leaq 7(%r8),%r9 | |
1089 movl %r10d,96+12(%rsp) | |
1090 bswapl %r9d | |
1091 movl OPENSSL_ia32cap_P+4(%rip),%r10d | |
1092 xorl %r11d,%r9d | |
1093 andl $71303168,%r10d | |
1094 movl %r9d,112+12(%rsp) | |
1095 | |
1096 movups 16(%rcx),%xmm1 | |
1097 | |
1098 movdqa 64(%rsp),%xmm6 | |
1099 movdqa 80(%rsp),%xmm7 | |
1100 | |
1101 cmpq $8,%rdx | |
1102 jb .Lctr32_tail | |
1103 | |
1104 subq $6,%rdx | |
1105 cmpl $4194304,%r10d | |
1106 je .Lctr32_6x | |
1107 | |
1108 leaq 128(%rcx),%rcx | |
1109 subq $2,%rdx | |
1110 jmp .Lctr32_loop8 | |
1111 | |
1112 .align 16 | |
1113 .Lctr32_6x: | |
1114 shll $4,%eax | |
1115 movl $48,%r10d | |
1116 bswapl %r11d | |
1117 leaq 32(%rcx,%rax,1),%rcx | |
1118 subq %rax,%r10 | |
1119 jmp .Lctr32_loop6 | |
1120 | |
1121 .align 16 | |
1122 .Lctr32_loop6: | |
1123 addl $6,%r8d | |
1124 movups -48(%rcx,%r10,1),%xmm0 | |
1125 .byte 102,15,56,220,209 | |
1126 movl %r8d,%eax | |
1127 xorl %r11d,%eax | |
1128 .byte 102,15,56,220,217 | |
1129 .byte 0x0f,0x38,0xf1,0x44,0x24,12 | |
1130 leal 1(%r8),%eax | |
1131 .byte 102,15,56,220,225 | |
1132 xorl %r11d,%eax | |
1133 .byte 0x0f,0x38,0xf1,0x44,0x24,28 | |
1134 .byte 102,15,56,220,233 | |
1135 leal 2(%r8),%eax | |
1136 xorl %r11d,%eax | |
1137 .byte 102,15,56,220,241 | |
1138 .byte 0x0f,0x38,0xf1,0x44,0x24,44 | |
1139 leal 3(%r8),%eax | |
1140 .byte 102,15,56,220,249 | |
1141 movups -32(%rcx,%r10,1),%xmm1 | |
1142 xorl %r11d,%eax | |
1143 | |
1144 .byte 102,15,56,220,208 | |
1145 .byte 0x0f,0x38,0xf1,0x44,0x24,60 | |
1146 leal 4(%r8),%eax | |
1147 .byte 102,15,56,220,216 | |
1148 xorl %r11d,%eax | |
1149 .byte 0x0f,0x38,0xf1,0x44,0x24,76 | |
1150 .byte 102,15,56,220,224 | |
1151 leal 5(%r8),%eax | |
1152 xorl %r11d,%eax | |
1153 .byte 102,15,56,220,232 | |
1154 .byte 0x0f,0x38,0xf1,0x44,0x24,92 | |
1155 movq %r10,%rax | |
1156 .byte 102,15,56,220,240 | |
1157 .byte 102,15,56,220,248 | |
1158 movups -16(%rcx,%r10,1),%xmm0 | |
1159 | |
1160 call .Lenc_loop6 | |
1161 | |
1162 movdqu (%rdi),%xmm8 | |
1163 movdqu 16(%rdi),%xmm9 | |
1164 movdqu 32(%rdi),%xmm10 | |
1165 movdqu 48(%rdi),%xmm11 | |
1166 movdqu 64(%rdi),%xmm12 | |
1167 movdqu 80(%rdi),%xmm13 | |
1168 leaq 96(%rdi),%rdi | |
1169 movups -64(%rcx,%r10,1),%xmm1 | |
1170 pxor %xmm2,%xmm8 | |
1171 movaps 0(%rsp),%xmm2 | |
1172 pxor %xmm3,%xmm9 | |
1173 movaps 16(%rsp),%xmm3 | |
1174 pxor %xmm4,%xmm10 | |
1175 movaps 32(%rsp),%xmm4 | |
1176 pxor %xmm5,%xmm11 | |
1177 movaps 48(%rsp),%xmm5 | |
1178 pxor %xmm6,%xmm12 | |
1179 movaps 64(%rsp),%xmm6 | |
1180 pxor %xmm7,%xmm13 | |
1181 movaps 80(%rsp),%xmm7 | |
1182 movdqu %xmm8,(%rsi) | |
1183 movdqu %xmm9,16(%rsi) | |
1184 movdqu %xmm10,32(%rsi) | |
1185 movdqu %xmm11,48(%rsi) | |
1186 movdqu %xmm12,64(%rsi) | |
1187 movdqu %xmm13,80(%rsi) | |
1188 leaq 96(%rsi),%rsi | |
1189 | |
1190 subq $6,%rdx | |
1191 jnc .Lctr32_loop6 | |
1192 | |
1193 addq $6,%rdx | |
1194 jz .Lctr32_done | |
1195 | |
1196 leal -48(%r10),%eax | |
1197 leaq -80(%rcx,%r10,1),%rcx | |
1198 negl %eax | |
1199 shrl $4,%eax | |
1200 jmp .Lctr32_tail | |
1201 | |
1202 .align 32 | |
1203 .Lctr32_loop8: | |
1204 addl $8,%r8d | |
1205 movdqa 96(%rsp),%xmm8 | |
1206 .byte 102,15,56,220,209 | |
1207 movl %r8d,%r9d | |
1208 movdqa 112(%rsp),%xmm9 | |
1209 .byte 102,15,56,220,217 | |
1210 bswapl %r9d | |
1211 movups 32-128(%rcx),%xmm0 | |
1212 .byte 102,15,56,220,225 | |
1213 xorl %r11d,%r9d | |
1214 nop | |
1215 .byte 102,15,56,220,233 | |
1216 movl %r9d,0+12(%rsp) | |
1217 leaq 1(%r8),%r9 | |
1218 .byte 102,15,56,220,241 | |
1219 .byte 102,15,56,220,249 | |
1220 .byte 102,68,15,56,220,193 | |
1221 .byte 102,68,15,56,220,201 | |
1222 movups 48-128(%rcx),%xmm1 | |
1223 bswapl %r9d | |
1224 .byte 102,15,56,220,208 | |
1225 .byte 102,15,56,220,216 | |
1226 xorl %r11d,%r9d | |
1227 .byte 0x66,0x90 | |
1228 .byte 102,15,56,220,224 | |
1229 .byte 102,15,56,220,232 | |
1230 movl %r9d,16+12(%rsp) | |
1231 leaq 2(%r8),%r9 | |
1232 .byte 102,15,56,220,240 | |
1233 .byte 102,15,56,220,248 | |
1234 .byte 102,68,15,56,220,192 | |
1235 .byte 102,68,15,56,220,200 | |
1236 movups 64-128(%rcx),%xmm0 | |
1237 bswapl %r9d | |
1238 .byte 102,15,56,220,209 | |
1239 .byte 102,15,56,220,217 | |
1240 xorl %r11d,%r9d | |
1241 .byte 0x66,0x90 | |
1242 .byte 102,15,56,220,225 | |
1243 .byte 102,15,56,220,233 | |
1244 movl %r9d,32+12(%rsp) | |
1245 leaq 3(%r8),%r9 | |
1246 .byte 102,15,56,220,241 | |
1247 .byte 102,15,56,220,249 | |
1248 .byte 102,68,15,56,220,193 | |
1249 .byte 102,68,15,56,220,201 | |
1250 movups 80-128(%rcx),%xmm1 | |
1251 bswapl %r9d | |
1252 .byte 102,15,56,220,208 | |
1253 .byte 102,15,56,220,216 | |
1254 xorl %r11d,%r9d | |
1255 .byte 0x66,0x90 | |
1256 .byte 102,15,56,220,224 | |
1257 .byte 102,15,56,220,232 | |
1258 movl %r9d,48+12(%rsp) | |
1259 leaq 4(%r8),%r9 | |
1260 .byte 102,15,56,220,240 | |
1261 .byte 102,15,56,220,248 | |
1262 .byte 102,68,15,56,220,192 | |
1263 .byte 102,68,15,56,220,200 | |
1264 movups 96-128(%rcx),%xmm0 | |
1265 bswapl %r9d | |
1266 .byte 102,15,56,220,209 | |
1267 .byte 102,15,56,220,217 | |
1268 xorl %r11d,%r9d | |
1269 .byte 0x66,0x90 | |
1270 .byte 102,15,56,220,225 | |
1271 .byte 102,15,56,220,233 | |
1272 movl %r9d,64+12(%rsp) | |
1273 leaq 5(%r8),%r9 | |
1274 .byte 102,15,56,220,241 | |
1275 .byte 102,15,56,220,249 | |
1276 .byte 102,68,15,56,220,193 | |
1277 .byte 102,68,15,56,220,201 | |
1278 movups 112-128(%rcx),%xmm1 | |
1279 bswapl %r9d | |
1280 .byte 102,15,56,220,208 | |
1281 .byte 102,15,56,220,216 | |
1282 xorl %r11d,%r9d | |
1283 .byte 0x66,0x90 | |
1284 .byte 102,15,56,220,224 | |
1285 .byte 102,15,56,220,232 | |
1286 movl %r9d,80+12(%rsp) | |
1287 leaq 6(%r8),%r9 | |
1288 .byte 102,15,56,220,240 | |
1289 .byte 102,15,56,220,248 | |
1290 .byte 102,68,15,56,220,192 | |
1291 .byte 102,68,15,56,220,200 | |
1292 movups 128-128(%rcx),%xmm0 | |
1293 bswapl %r9d | |
1294 .byte 102,15,56,220,209 | |
1295 .byte 102,15,56,220,217 | |
1296 xorl %r11d,%r9d | |
1297 .byte 0x66,0x90 | |
1298 .byte 102,15,56,220,225 | |
1299 .byte 102,15,56,220,233 | |
1300 movl %r9d,96+12(%rsp) | |
1301 leaq 7(%r8),%r9 | |
1302 .byte 102,15,56,220,241 | |
1303 .byte 102,15,56,220,249 | |
1304 .byte 102,68,15,56,220,193 | |
1305 .byte 102,68,15,56,220,201 | |
1306 movups 144-128(%rcx),%xmm1 | |
1307 bswapl %r9d | |
1308 .byte 102,15,56,220,208 | |
1309 .byte 102,15,56,220,216 | |
1310 .byte 102,15,56,220,224 | |
1311 xorl %r11d,%r9d | |
1312 movdqu 0(%rdi),%xmm10 | |
1313 .byte 102,15,56,220,232 | |
1314 movl %r9d,112+12(%rsp) | |
1315 cmpl $11,%eax | |
1316 .byte 102,15,56,220,240 | |
1317 .byte 102,15,56,220,248 | |
1318 .byte 102,68,15,56,220,192 | |
1319 .byte 102,68,15,56,220,200 | |
1320 movups 160-128(%rcx),%xmm0 | |
1321 | |
1322 jb .Lctr32_enc_done | |
1323 | |
1324 .byte 102,15,56,220,209 | |
1325 .byte 102,15,56,220,217 | |
1326 .byte 102,15,56,220,225 | |
1327 .byte 102,15,56,220,233 | |
1328 .byte 102,15,56,220,241 | |
1329 .byte 102,15,56,220,249 | |
1330 .byte 102,68,15,56,220,193 | |
1331 .byte 102,68,15,56,220,201 | |
1332 movups 176-128(%rcx),%xmm1 | |
1333 | |
1334 .byte 102,15,56,220,208 | |
1335 .byte 102,15,56,220,216 | |
1336 .byte 102,15,56,220,224 | |
1337 .byte 102,15,56,220,232 | |
1338 .byte 102,15,56,220,240 | |
1339 .byte 102,15,56,220,248 | |
1340 .byte 102,68,15,56,220,192 | |
1341 .byte 102,68,15,56,220,200 | |
1342 movups 192-128(%rcx),%xmm0 | |
1343 je .Lctr32_enc_done | |
1344 | |
1345 .byte 102,15,56,220,209 | |
1346 .byte 102,15,56,220,217 | |
1347 .byte 102,15,56,220,225 | |
1348 .byte 102,15,56,220,233 | |
1349 .byte 102,15,56,220,241 | |
1350 .byte 102,15,56,220,249 | |
1351 .byte 102,68,15,56,220,193 | |
1352 .byte 102,68,15,56,220,201 | |
1353 movups 208-128(%rcx),%xmm1 | |
1354 | |
1355 .byte 102,15,56,220,208 | |
1356 .byte 102,15,56,220,216 | |
1357 .byte 102,15,56,220,224 | |
1358 .byte 102,15,56,220,232 | |
1359 .byte 102,15,56,220,240 | |
1360 .byte 102,15,56,220,248 | |
1361 .byte 102,68,15,56,220,192 | |
1362 .byte 102,68,15,56,220,200 | |
1363 movups 224-128(%rcx),%xmm0 | |
1364 jmp .Lctr32_enc_done | |
1365 | |
1366 .align 16 | |
1367 .Lctr32_enc_done: | |
1368 movdqu 16(%rdi),%xmm11 | |
1369 pxor %xmm0,%xmm10 | |
1370 movdqu 32(%rdi),%xmm12 | |
1371 pxor %xmm0,%xmm11 | |
1372 movdqu 48(%rdi),%xmm13 | |
1373 pxor %xmm0,%xmm12 | |
1374 movdqu 64(%rdi),%xmm14 | |
1375 pxor %xmm0,%xmm13 | |
1376 movdqu 80(%rdi),%xmm15 | |
1377 pxor %xmm0,%xmm14 | |
1378 pxor %xmm0,%xmm15 | |
1379 .byte 102,15,56,220,209 | |
1380 .byte 102,15,56,220,217 | |
1381 .byte 102,15,56,220,225 | |
1382 .byte 102,15,56,220,233 | |
1383 .byte 102,15,56,220,241 | |
1384 .byte 102,15,56,220,249 | |
1385 .byte 102,68,15,56,220,193 | |
1386 .byte 102,68,15,56,220,201 | |
1387 movdqu 96(%rdi),%xmm1 | |
1388 leaq 128(%rdi),%rdi | |
1389 | |
1390 .byte 102,65,15,56,221,210 | |
1391 pxor %xmm0,%xmm1 | |
1392 movdqu 112-128(%rdi),%xmm10 | |
1393 .byte 102,65,15,56,221,219 | |
1394 pxor %xmm0,%xmm10 | |
1395 movdqa 0(%rsp),%xmm11 | |
1396 .byte 102,65,15,56,221,228 | |
1397 .byte 102,65,15,56,221,237 | |
1398 movdqa 16(%rsp),%xmm12 | |
1399 movdqa 32(%rsp),%xmm13 | |
1400 .byte 102,65,15,56,221,246 | |
1401 .byte 102,65,15,56,221,255 | |
1402 movdqa 48(%rsp),%xmm14 | |
1403 movdqa 64(%rsp),%xmm15 | |
1404 .byte 102,68,15,56,221,193 | |
1405 movdqa 80(%rsp),%xmm0 | |
1406 movups 16-128(%rcx),%xmm1 | |
1407 .byte 102,69,15,56,221,202 | |
1408 | |
1409 movups %xmm2,(%rsi) | |
1410 movdqa %xmm11,%xmm2 | |
1411 movups %xmm3,16(%rsi) | |
1412 movdqa %xmm12,%xmm3 | |
1413 movups %xmm4,32(%rsi) | |
1414 movdqa %xmm13,%xmm4 | |
1415 movups %xmm5,48(%rsi) | |
1416 movdqa %xmm14,%xmm5 | |
1417 movups %xmm6,64(%rsi) | |
1418 movdqa %xmm15,%xmm6 | |
1419 movups %xmm7,80(%rsi) | |
1420 movdqa %xmm0,%xmm7 | |
1421 movups %xmm8,96(%rsi) | |
1422 movups %xmm9,112(%rsi) | |
1423 leaq 128(%rsi),%rsi | |
1424 | |
1425 subq $8,%rdx | |
1426 jnc .Lctr32_loop8 | |
1427 | |
1428 addq $8,%rdx | |
1429 jz .Lctr32_done | |
1430 leaq -128(%rcx),%rcx | |
1431 | |
1432 .Lctr32_tail: | |
1433 | |
1434 | |
1435 leaq 16(%rcx),%rcx | |
1436 cmpq $4,%rdx | |
1437 jb .Lctr32_loop3 | |
1438 je .Lctr32_loop4 | |
1439 | |
1440 | |
1441 shll $4,%eax | |
1442 movdqa 96(%rsp),%xmm8 | |
1443 pxor %xmm9,%xmm9 | |
1444 | |
1445 movups 16(%rcx),%xmm0 | |
1446 .byte 102,15,56,220,209 | |
1447 .byte 102,15,56,220,217 | |
1448 leaq 32-16(%rcx,%rax,1),%rcx | |
1449 negq %rax | |
1450 .byte 102,15,56,220,225 | |
1451 addq $16,%rax | |
1452 movups (%rdi),%xmm10 | |
1453 .byte 102,15,56,220,233 | |
1454 .byte 102,15,56,220,241 | |
1455 movups 16(%rdi),%xmm11 | |
1456 movups 32(%rdi),%xmm12 | |
1457 .byte 102,15,56,220,249 | |
1458 .byte 102,68,15,56,220,193 | |
1459 | |
1460 call .Lenc_loop8_enter | |
1461 | |
1462 movdqu 48(%rdi),%xmm13 | |
1463 pxor %xmm10,%xmm2 | |
1464 movdqu 64(%rdi),%xmm10 | |
1465 pxor %xmm11,%xmm3 | |
1466 movdqu %xmm2,(%rsi) | |
1467 pxor %xmm12,%xmm4 | |
1468 movdqu %xmm3,16(%rsi) | |
1469 pxor %xmm13,%xmm5 | |
1470 movdqu %xmm4,32(%rsi) | |
1471 pxor %xmm10,%xmm6 | |
1472 movdqu %xmm5,48(%rsi) | |
1473 movdqu %xmm6,64(%rsi) | |
1474 cmpq $6,%rdx | |
1475 jb .Lctr32_done | |
1476 | |
1477 movups 80(%rdi),%xmm11 | |
1478 xorps %xmm11,%xmm7 | |
1479 movups %xmm7,80(%rsi) | |
1480 je .Lctr32_done | |
1481 | |
1482 movups 96(%rdi),%xmm12 | |
1483 xorps %xmm12,%xmm8 | |
1484 movups %xmm8,96(%rsi) | |
1485 jmp .Lctr32_done | |
1486 | |
1487 .align 32 | |
1488 .Lctr32_loop4: | |
1489 .byte 102,15,56,220,209 | |
1490 leaq 16(%rcx),%rcx | |
1491 decl %eax | |
1492 .byte 102,15,56,220,217 | |
1493 .byte 102,15,56,220,225 | |
1494 .byte 102,15,56,220,233 | |
1495 movups (%rcx),%xmm1 | |
1496 jnz .Lctr32_loop4 | |
1497 .byte 102,15,56,221,209 | |
1498 .byte 102,15,56,221,217 | |
1499 movups (%rdi),%xmm10 | |
1500 movups 16(%rdi),%xmm11 | |
1501 .byte 102,15,56,221,225 | |
1502 .byte 102,15,56,221,233 | |
1503 movups 32(%rdi),%xmm12 | |
1504 movups 48(%rdi),%xmm13 | |
1505 | |
1506 xorps %xmm10,%xmm2 | |
1507 movups %xmm2,(%rsi) | |
1508 xorps %xmm11,%xmm3 | |
1509 movups %xmm3,16(%rsi) | |
1510 pxor %xmm12,%xmm4 | |
1511 movdqu %xmm4,32(%rsi) | |
1512 pxor %xmm13,%xmm5 | |
1513 movdqu %xmm5,48(%rsi) | |
1514 jmp .Lctr32_done | |
1515 | |
1516 .align 32 | |
1517 .Lctr32_loop3: | |
1518 .byte 102,15,56,220,209 | |
1519 leaq 16(%rcx),%rcx | |
1520 decl %eax | |
1521 .byte 102,15,56,220,217 | |
1522 .byte 102,15,56,220,225 | |
1523 movups (%rcx),%xmm1 | |
1524 jnz .Lctr32_loop3 | |
1525 .byte 102,15,56,221,209 | |
1526 .byte 102,15,56,221,217 | |
1527 .byte 102,15,56,221,225 | |
1528 | |
1529 movups (%rdi),%xmm10 | |
1530 xorps %xmm10,%xmm2 | |
1531 movups %xmm2,(%rsi) | |
1532 cmpq $2,%rdx | |
1533 jb .Lctr32_done | |
1534 | |
1535 movups 16(%rdi),%xmm11 | |
1536 xorps %xmm11,%xmm3 | |
1537 movups %xmm3,16(%rsi) | |
1538 je .Lctr32_done | |
1539 | |
1540 movups 32(%rdi),%xmm12 | |
1541 xorps %xmm12,%xmm4 | |
1542 movups %xmm4,32(%rsi) | |
1543 | |
1544 .Lctr32_done: | |
1545 xorps %xmm0,%xmm0 | |
1546 xorl %r11d,%r11d | |
1547 pxor %xmm1,%xmm1 | |
1548 pxor %xmm2,%xmm2 | |
1549 pxor %xmm3,%xmm3 | |
1550 pxor %xmm4,%xmm4 | |
1551 pxor %xmm5,%xmm5 | |
1552 pxor %xmm6,%xmm6 | |
1553 pxor %xmm7,%xmm7 | |
1554 movaps %xmm0,0(%rsp) | |
1555 pxor %xmm8,%xmm8 | |
1556 movaps %xmm0,16(%rsp) | |
1557 pxor %xmm9,%xmm9 | |
1558 movaps %xmm0,32(%rsp) | |
1559 pxor %xmm10,%xmm10 | |
1560 movaps %xmm0,48(%rsp) | |
1561 pxor %xmm11,%xmm11 | |
1562 movaps %xmm0,64(%rsp) | |
1563 pxor %xmm12,%xmm12 | |
1564 movaps %xmm0,80(%rsp) | |
1565 pxor %xmm13,%xmm13 | |
1566 movaps %xmm0,96(%rsp) | |
1567 pxor %xmm14,%xmm14 | |
1568 movaps %xmm0,112(%rsp) | |
1569 pxor %xmm15,%xmm15 | |
1570 leaq (%rbp),%rsp | |
1571 popq %rbp | |
1572 .Lctr32_epilogue: | |
1573 .byte 0xf3,0xc3 | |
1574 .size aesni_ctr32_encrypt_blocks,.-aesni_ctr32_encrypt_blocks | |
1575 .globl aesni_xts_encrypt | |
1576 .hidden aesni_xts_encrypt | |
1577 .type aesni_xts_encrypt,@function | |
1578 .align 16 | |
1579 aesni_xts_encrypt: | |
1580 leaq (%rsp),%rax | |
1581 pushq %rbp | |
1582 subq $112,%rsp | |
1583 andq $-16,%rsp | |
1584 leaq -8(%rax),%rbp | |
1585 movups (%r9),%xmm2 | |
1586 movl 240(%r8),%eax | |
1587 movl 240(%rcx),%r10d | |
1588 movups (%r8),%xmm0 | |
1589 movups 16(%r8),%xmm1 | |
1590 leaq 32(%r8),%r8 | |
1591 xorps %xmm0,%xmm2 | |
1592 .Loop_enc1_8: | |
1593 .byte 102,15,56,220,209 | |
1594 decl %eax | |
1595 movups (%r8),%xmm1 | |
1596 leaq 16(%r8),%r8 | |
1597 jnz .Loop_enc1_8 | |
1598 .byte 102,15,56,221,209 | |
1599 movups (%rcx),%xmm0 | |
1600 movq %rcx,%r11 | |
1601 movl %r10d,%eax | |
1602 shll $4,%r10d | |
1603 movq %rdx,%r9 | |
1604 andq $-16,%rdx | |
1605 | |
1606 movups 16(%rcx,%r10,1),%xmm1 | |
1607 | |
1608 movdqa .Lxts_magic(%rip),%xmm8 | |
1609 movdqa %xmm2,%xmm15 | |
1610 pshufd $0x5f,%xmm2,%xmm9 | |
1611 pxor %xmm0,%xmm1 | |
1612 movdqa %xmm9,%xmm14 | |
1613 paddd %xmm9,%xmm9 | |
1614 movdqa %xmm15,%xmm10 | |
1615 psrad $31,%xmm14 | |
1616 paddq %xmm15,%xmm15 | |
1617 pand %xmm8,%xmm14 | |
1618 pxor %xmm0,%xmm10 | |
1619 pxor %xmm14,%xmm15 | |
1620 movdqa %xmm9,%xmm14 | |
1621 paddd %xmm9,%xmm9 | |
1622 movdqa %xmm15,%xmm11 | |
1623 psrad $31,%xmm14 | |
1624 paddq %xmm15,%xmm15 | |
1625 pand %xmm8,%xmm14 | |
1626 pxor %xmm0,%xmm11 | |
1627 pxor %xmm14,%xmm15 | |
1628 movdqa %xmm9,%xmm14 | |
1629 paddd %xmm9,%xmm9 | |
1630 movdqa %xmm15,%xmm12 | |
1631 psrad $31,%xmm14 | |
1632 paddq %xmm15,%xmm15 | |
1633 pand %xmm8,%xmm14 | |
1634 pxor %xmm0,%xmm12 | |
1635 pxor %xmm14,%xmm15 | |
1636 movdqa %xmm9,%xmm14 | |
1637 paddd %xmm9,%xmm9 | |
1638 movdqa %xmm15,%xmm13 | |
1639 psrad $31,%xmm14 | |
1640 paddq %xmm15,%xmm15 | |
1641 pand %xmm8,%xmm14 | |
1642 pxor %xmm0,%xmm13 | |
1643 pxor %xmm14,%xmm15 | |
1644 movdqa %xmm15,%xmm14 | |
1645 psrad $31,%xmm9 | |
1646 paddq %xmm15,%xmm15 | |
1647 pand %xmm8,%xmm9 | |
1648 pxor %xmm0,%xmm14 | |
1649 pxor %xmm9,%xmm15 | |
1650 movaps %xmm1,96(%rsp) | |
1651 | |
1652 subq $96,%rdx | |
1653 jc .Lxts_enc_short | |
1654 | |
1655 movl $16+96,%eax | |
1656 leaq 32(%r11,%r10,1),%rcx | |
1657 subq %r10,%rax | |
1658 movups 16(%r11),%xmm1 | |
1659 movq %rax,%r10 | |
1660 leaq .Lxts_magic(%rip),%r8 | |
1661 jmp .Lxts_enc_grandloop | |
1662 | |
1663 .align 32 | |
1664 .Lxts_enc_grandloop: | |
1665 movdqu 0(%rdi),%xmm2 | |
1666 movdqa %xmm0,%xmm8 | |
1667 movdqu 16(%rdi),%xmm3 | |
1668 pxor %xmm10,%xmm2 | |
1669 movdqu 32(%rdi),%xmm4 | |
1670 pxor %xmm11,%xmm3 | |
1671 .byte 102,15,56,220,209 | |
1672 movdqu 48(%rdi),%xmm5 | |
1673 pxor %xmm12,%xmm4 | |
1674 .byte 102,15,56,220,217 | |
1675 movdqu 64(%rdi),%xmm6 | |
1676 pxor %xmm13,%xmm5 | |
1677 .byte 102,15,56,220,225 | |
1678 movdqu 80(%rdi),%xmm7 | |
1679 pxor %xmm15,%xmm8 | |
1680 movdqa 96(%rsp),%xmm9 | |
1681 pxor %xmm14,%xmm6 | |
1682 .byte 102,15,56,220,233 | |
1683 movups 32(%r11),%xmm0 | |
1684 leaq 96(%rdi),%rdi | |
1685 pxor %xmm8,%xmm7 | |
1686 | |
1687 pxor %xmm9,%xmm10 | |
1688 .byte 102,15,56,220,241 | |
1689 pxor %xmm9,%xmm11 | |
1690 movdqa %xmm10,0(%rsp) | |
1691 .byte 102,15,56,220,249 | |
1692 movups 48(%r11),%xmm1 | |
1693 pxor %xmm9,%xmm12 | |
1694 | |
1695 .byte 102,15,56,220,208 | |
1696 pxor %xmm9,%xmm13 | |
1697 movdqa %xmm11,16(%rsp) | |
1698 .byte 102,15,56,220,216 | |
1699 pxor %xmm9,%xmm14 | |
1700 movdqa %xmm12,32(%rsp) | |
1701 .byte 102,15,56,220,224 | |
1702 .byte 102,15,56,220,232 | |
1703 pxor %xmm9,%xmm8 | |
1704 movdqa %xmm14,64(%rsp) | |
1705 .byte 102,15,56,220,240 | |
1706 .byte 102,15,56,220,248 | |
1707 movups 64(%r11),%xmm0 | |
1708 movdqa %xmm8,80(%rsp) | |
1709 pshufd $0x5f,%xmm15,%xmm9 | |
1710 jmp .Lxts_enc_loop6 | |
1711 .align 32 | |
1712 .Lxts_enc_loop6: | |
1713 .byte 102,15,56,220,209 | |
1714 .byte 102,15,56,220,217 | |
1715 .byte 102,15,56,220,225 | |
1716 .byte 102,15,56,220,233 | |
1717 .byte 102,15,56,220,241 | |
1718 .byte 102,15,56,220,249 | |
1719 movups -64(%rcx,%rax,1),%xmm1 | |
1720 addq $32,%rax | |
1721 | |
1722 .byte 102,15,56,220,208 | |
1723 .byte 102,15,56,220,216 | |
1724 .byte 102,15,56,220,224 | |
1725 .byte 102,15,56,220,232 | |
1726 .byte 102,15,56,220,240 | |
1727 .byte 102,15,56,220,248 | |
1728 movups -80(%rcx,%rax,1),%xmm0 | |
1729 jnz .Lxts_enc_loop6 | |
1730 | |
1731 movdqa (%r8),%xmm8 | |
1732 movdqa %xmm9,%xmm14 | |
1733 paddd %xmm9,%xmm9 | |
1734 .byte 102,15,56,220,209 | |
1735 paddq %xmm15,%xmm15 | |
1736 psrad $31,%xmm14 | |
1737 .byte 102,15,56,220,217 | |
1738 pand %xmm8,%xmm14 | |
1739 movups (%r11),%xmm10 | |
1740 .byte 102,15,56,220,225 | |
1741 .byte 102,15,56,220,233 | |
1742 .byte 102,15,56,220,241 | |
1743 pxor %xmm14,%xmm15 | |
1744 movaps %xmm10,%xmm11 | |
1745 .byte 102,15,56,220,249 | |
1746 movups -64(%rcx),%xmm1 | |
1747 | |
1748 movdqa %xmm9,%xmm14 | |
1749 .byte 102,15,56,220,208 | |
1750 paddd %xmm9,%xmm9 | |
1751 pxor %xmm15,%xmm10 | |
1752 .byte 102,15,56,220,216 | |
1753 psrad $31,%xmm14 | |
1754 paddq %xmm15,%xmm15 | |
1755 .byte 102,15,56,220,224 | |
1756 .byte 102,15,56,220,232 | |
1757 pand %xmm8,%xmm14 | |
1758 movaps %xmm11,%xmm12 | |
1759 .byte 102,15,56,220,240 | |
1760 pxor %xmm14,%xmm15 | |
1761 movdqa %xmm9,%xmm14 | |
1762 .byte 102,15,56,220,248 | |
1763 movups -48(%rcx),%xmm0 | |
1764 | |
1765 paddd %xmm9,%xmm9 | |
1766 .byte 102,15,56,220,209 | |
1767 pxor %xmm15,%xmm11 | |
1768 psrad $31,%xmm14 | |
1769 .byte 102,15,56,220,217 | |
1770 paddq %xmm15,%xmm15 | |
1771 pand %xmm8,%xmm14 | |
1772 .byte 102,15,56,220,225 | |
1773 .byte 102,15,56,220,233 | |
1774 movdqa %xmm13,48(%rsp) | |
1775 pxor %xmm14,%xmm15 | |
1776 .byte 102,15,56,220,241 | |
1777 movaps %xmm12,%xmm13 | |
1778 movdqa %xmm9,%xmm14 | |
1779 .byte 102,15,56,220,249 | |
1780 movups -32(%rcx),%xmm1 | |
1781 | |
1782 paddd %xmm9,%xmm9 | |
1783 .byte 102,15,56,220,208 | |
1784 pxor %xmm15,%xmm12 | |
1785 psrad $31,%xmm14 | |
1786 .byte 102,15,56,220,216 | |
1787 paddq %xmm15,%xmm15 | |
1788 pand %xmm8,%xmm14 | |
1789 .byte 102,15,56,220,224 | |
1790 .byte 102,15,56,220,232 | |
1791 .byte 102,15,56,220,240 | |
1792 pxor %xmm14,%xmm15 | |
1793 movaps %xmm13,%xmm14 | |
1794 .byte 102,15,56,220,248 | |
1795 | |
1796 movdqa %xmm9,%xmm0 | |
1797 paddd %xmm9,%xmm9 | |
1798 .byte 102,15,56,220,209 | |
1799 pxor %xmm15,%xmm13 | |
1800 psrad $31,%xmm0 | |
1801 .byte 102,15,56,220,217 | |
1802 paddq %xmm15,%xmm15 | |
1803 pand %xmm8,%xmm0 | |
1804 .byte 102,15,56,220,225 | |
1805 .byte 102,15,56,220,233 | |
1806 pxor %xmm0,%xmm15 | |
1807 movups (%r11),%xmm0 | |
1808 .byte 102,15,56,220,241 | |
1809 .byte 102,15,56,220,249 | |
1810 movups 16(%r11),%xmm1 | |
1811 | |
1812 pxor %xmm15,%xmm14 | |
1813 .byte 102,15,56,221,84,36,0 | |
1814 psrad $31,%xmm9 | |
1815 paddq %xmm15,%xmm15 | |
1816 .byte 102,15,56,221,92,36,16 | |
1817 .byte 102,15,56,221,100,36,32 | |
1818 pand %xmm8,%xmm9 | |
1819 movq %r10,%rax | |
1820 .byte 102,15,56,221,108,36,48 | |
1821 .byte 102,15,56,221,116,36,64 | |
1822 .byte 102,15,56,221,124,36,80 | |
1823 pxor %xmm9,%xmm15 | |
1824 | |
1825 leaq 96(%rsi),%rsi | |
1826 movups %xmm2,-96(%rsi) | |
1827 movups %xmm3,-80(%rsi) | |
1828 movups %xmm4,-64(%rsi) | |
1829 movups %xmm5,-48(%rsi) | |
1830 movups %xmm6,-32(%rsi) | |
1831 movups %xmm7,-16(%rsi) | |
1832 subq $96,%rdx | |
1833 jnc .Lxts_enc_grandloop | |
1834 | |
1835 movl $16+96,%eax | |
1836 subl %r10d,%eax | |
1837 movq %r11,%rcx | |
1838 shrl $4,%eax | |
1839 | |
1840 .Lxts_enc_short: | |
1841 | |
1842 movl %eax,%r10d | |
1843 pxor %xmm0,%xmm10 | |
1844 addq $96,%rdx | |
1845 jz .Lxts_enc_done | |
1846 | |
1847 pxor %xmm0,%xmm11 | |
1848 cmpq $0x20,%rdx | |
1849 jb .Lxts_enc_one | |
1850 pxor %xmm0,%xmm12 | |
1851 je .Lxts_enc_two | |
1852 | |
1853 pxor %xmm0,%xmm13 | |
1854 cmpq $0x40,%rdx | |
1855 jb .Lxts_enc_three | |
1856 pxor %xmm0,%xmm14 | |
1857 je .Lxts_enc_four | |
1858 | |
1859 movdqu (%rdi),%xmm2 | |
1860 movdqu 16(%rdi),%xmm3 | |
1861 movdqu 32(%rdi),%xmm4 | |
1862 pxor %xmm10,%xmm2 | |
1863 movdqu 48(%rdi),%xmm5 | |
1864 pxor %xmm11,%xmm3 | |
1865 movdqu 64(%rdi),%xmm6 | |
1866 leaq 80(%rdi),%rdi | |
1867 pxor %xmm12,%xmm4 | |
1868 pxor %xmm13,%xmm5 | |
1869 pxor %xmm14,%xmm6 | |
1870 pxor %xmm7,%xmm7 | |
1871 | |
1872 call _aesni_encrypt6 | |
1873 | |
1874 xorps %xmm10,%xmm2 | |
1875 movdqa %xmm15,%xmm10 | |
1876 xorps %xmm11,%xmm3 | |
1877 xorps %xmm12,%xmm4 | |
1878 movdqu %xmm2,(%rsi) | |
1879 xorps %xmm13,%xmm5 | |
1880 movdqu %xmm3,16(%rsi) | |
1881 xorps %xmm14,%xmm6 | |
1882 movdqu %xmm4,32(%rsi) | |
1883 movdqu %xmm5,48(%rsi) | |
1884 movdqu %xmm6,64(%rsi) | |
1885 leaq 80(%rsi),%rsi | |
1886 jmp .Lxts_enc_done | |
1887 | |
1888 .align 16 | |
1889 .Lxts_enc_one: | |
1890 movups (%rdi),%xmm2 | |
1891 leaq 16(%rdi),%rdi | |
1892 xorps %xmm10,%xmm2 | |
1893 movups (%rcx),%xmm0 | |
1894 movups 16(%rcx),%xmm1 | |
1895 leaq 32(%rcx),%rcx | |
1896 xorps %xmm0,%xmm2 | |
1897 .Loop_enc1_9: | |
1898 .byte 102,15,56,220,209 | |
1899 decl %eax | |
1900 movups (%rcx),%xmm1 | |
1901 leaq 16(%rcx),%rcx | |
1902 jnz .Loop_enc1_9 | |
1903 .byte 102,15,56,221,209 | |
1904 xorps %xmm10,%xmm2 | |
1905 movdqa %xmm11,%xmm10 | |
1906 movups %xmm2,(%rsi) | |
1907 leaq 16(%rsi),%rsi | |
1908 jmp .Lxts_enc_done | |
1909 | |
1910 .align 16 | |
1911 .Lxts_enc_two: | |
1912 movups (%rdi),%xmm2 | |
1913 movups 16(%rdi),%xmm3 | |
1914 leaq 32(%rdi),%rdi | |
1915 xorps %xmm10,%xmm2 | |
1916 xorps %xmm11,%xmm3 | |
1917 | |
1918 call _aesni_encrypt2 | |
1919 | |
1920 xorps %xmm10,%xmm2 | |
1921 movdqa %xmm12,%xmm10 | |
1922 xorps %xmm11,%xmm3 | |
1923 movups %xmm2,(%rsi) | |
1924 movups %xmm3,16(%rsi) | |
1925 leaq 32(%rsi),%rsi | |
1926 jmp .Lxts_enc_done | |
1927 | |
1928 .align 16 | |
1929 .Lxts_enc_three: | |
1930 movups (%rdi),%xmm2 | |
1931 movups 16(%rdi),%xmm3 | |
1932 movups 32(%rdi),%xmm4 | |
1933 leaq 48(%rdi),%rdi | |
1934 xorps %xmm10,%xmm2 | |
1935 xorps %xmm11,%xmm3 | |
1936 xorps %xmm12,%xmm4 | |
1937 | |
1938 call _aesni_encrypt3 | |
1939 | |
1940 xorps %xmm10,%xmm2 | |
1941 movdqa %xmm13,%xmm10 | |
1942 xorps %xmm11,%xmm3 | |
1943 xorps %xmm12,%xmm4 | |
1944 movups %xmm2,(%rsi) | |
1945 movups %xmm3,16(%rsi) | |
1946 movups %xmm4,32(%rsi) | |
1947 leaq 48(%rsi),%rsi | |
1948 jmp .Lxts_enc_done | |
1949 | |
1950 .align 16 | |
1951 .Lxts_enc_four: | |
1952 movups (%rdi),%xmm2 | |
1953 movups 16(%rdi),%xmm3 | |
1954 movups 32(%rdi),%xmm4 | |
1955 xorps %xmm10,%xmm2 | |
1956 movups 48(%rdi),%xmm5 | |
1957 leaq 64(%rdi),%rdi | |
1958 xorps %xmm11,%xmm3 | |
1959 xorps %xmm12,%xmm4 | |
1960 xorps %xmm13,%xmm5 | |
1961 | |
1962 call _aesni_encrypt4 | |
1963 | |
1964 pxor %xmm10,%xmm2 | |
1965 movdqa %xmm14,%xmm10 | |
1966 pxor %xmm11,%xmm3 | |
1967 pxor %xmm12,%xmm4 | |
1968 movdqu %xmm2,(%rsi) | |
1969 pxor %xmm13,%xmm5 | |
1970 movdqu %xmm3,16(%rsi) | |
1971 movdqu %xmm4,32(%rsi) | |
1972 movdqu %xmm5,48(%rsi) | |
1973 leaq 64(%rsi),%rsi | |
1974 jmp .Lxts_enc_done | |
1975 | |
1976 .align 16 | |
1977 .Lxts_enc_done: | |
1978 andq $15,%r9 | |
1979 jz .Lxts_enc_ret | |
1980 movq %r9,%rdx | |
1981 | |
1982 .Lxts_enc_steal: | |
1983 movzbl (%rdi),%eax | |
1984 movzbl -16(%rsi),%ecx | |
1985 leaq 1(%rdi),%rdi | |
1986 movb %al,-16(%rsi) | |
1987 movb %cl,0(%rsi) | |
1988 leaq 1(%rsi),%rsi | |
1989 subq $1,%rdx | |
1990 jnz .Lxts_enc_steal | |
1991 | |
1992 subq %r9,%rsi | |
1993 movq %r11,%rcx | |
1994 movl %r10d,%eax | |
1995 | |
1996 movups -16(%rsi),%xmm2 | |
1997 xorps %xmm10,%xmm2 | |
1998 movups (%rcx),%xmm0 | |
1999 movups 16(%rcx),%xmm1 | |
2000 leaq 32(%rcx),%rcx | |
2001 xorps %xmm0,%xmm2 | |
2002 .Loop_enc1_10: | |
2003 .byte 102,15,56,220,209 | |
2004 decl %eax | |
2005 movups (%rcx),%xmm1 | |
2006 leaq 16(%rcx),%rcx | |
2007 jnz .Loop_enc1_10 | |
2008 .byte 102,15,56,221,209 | |
2009 xorps %xmm10,%xmm2 | |
2010 movups %xmm2,-16(%rsi) | |
2011 | |
2012 .Lxts_enc_ret: | |
2013 xorps %xmm0,%xmm0 | |
2014 pxor %xmm1,%xmm1 | |
2015 pxor %xmm2,%xmm2 | |
2016 pxor %xmm3,%xmm3 | |
2017 pxor %xmm4,%xmm4 | |
2018 pxor %xmm5,%xmm5 | |
2019 pxor %xmm6,%xmm6 | |
2020 pxor %xmm7,%xmm7 | |
2021 movaps %xmm0,0(%rsp) | |
2022 pxor %xmm8,%xmm8 | |
2023 movaps %xmm0,16(%rsp) | |
2024 pxor %xmm9,%xmm9 | |
2025 movaps %xmm0,32(%rsp) | |
2026 pxor %xmm10,%xmm10 | |
2027 movaps %xmm0,48(%rsp) | |
2028 pxor %xmm11,%xmm11 | |
2029 movaps %xmm0,64(%rsp) | |
2030 pxor %xmm12,%xmm12 | |
2031 movaps %xmm0,80(%rsp) | |
2032 pxor %xmm13,%xmm13 | |
2033 movaps %xmm0,96(%rsp) | |
2034 pxor %xmm14,%xmm14 | |
2035 pxor %xmm15,%xmm15 | |
2036 leaq (%rbp),%rsp | |
2037 popq %rbp | |
2038 .Lxts_enc_epilogue: | |
2039 .byte 0xf3,0xc3 | |
2040 .size aesni_xts_encrypt,.-aesni_xts_encrypt | |
2041 .globl aesni_xts_decrypt | |
2042 .hidden aesni_xts_decrypt | |
2043 .type aesni_xts_decrypt,@function | |
2044 .align 16 | |
2045 aesni_xts_decrypt: | |
2046 leaq (%rsp),%rax | |
2047 pushq %rbp | |
2048 subq $112,%rsp | |
2049 andq $-16,%rsp | |
2050 leaq -8(%rax),%rbp | |
2051 movups (%r9),%xmm2 | |
2052 movl 240(%r8),%eax | |
2053 movl 240(%rcx),%r10d | |
2054 movups (%r8),%xmm0 | |
2055 movups 16(%r8),%xmm1 | |
2056 leaq 32(%r8),%r8 | |
2057 xorps %xmm0,%xmm2 | |
2058 .Loop_enc1_11: | |
2059 .byte 102,15,56,220,209 | |
2060 decl %eax | |
2061 movups (%r8),%xmm1 | |
2062 leaq 16(%r8),%r8 | |
2063 jnz .Loop_enc1_11 | |
2064 .byte 102,15,56,221,209 | |
2065 xorl %eax,%eax | |
2066 testq $15,%rdx | |
2067 setnz %al | |
2068 shlq $4,%rax | |
2069 subq %rax,%rdx | |
2070 | |
2071 movups (%rcx),%xmm0 | |
2072 movq %rcx,%r11 | |
2073 movl %r10d,%eax | |
2074 shll $4,%r10d | |
2075 movq %rdx,%r9 | |
2076 andq $-16,%rdx | |
2077 | |
2078 movups 16(%rcx,%r10,1),%xmm1 | |
2079 | |
2080 movdqa .Lxts_magic(%rip),%xmm8 | |
2081 movdqa %xmm2,%xmm15 | |
2082 pshufd $0x5f,%xmm2,%xmm9 | |
2083 pxor %xmm0,%xmm1 | |
2084 movdqa %xmm9,%xmm14 | |
2085 paddd %xmm9,%xmm9 | |
2086 movdqa %xmm15,%xmm10 | |
2087 psrad $31,%xmm14 | |
2088 paddq %xmm15,%xmm15 | |
2089 pand %xmm8,%xmm14 | |
2090 pxor %xmm0,%xmm10 | |
2091 pxor %xmm14,%xmm15 | |
2092 movdqa %xmm9,%xmm14 | |
2093 paddd %xmm9,%xmm9 | |
2094 movdqa %xmm15,%xmm11 | |
2095 psrad $31,%xmm14 | |
2096 paddq %xmm15,%xmm15 | |
2097 pand %xmm8,%xmm14 | |
2098 pxor %xmm0,%xmm11 | |
2099 pxor %xmm14,%xmm15 | |
2100 movdqa %xmm9,%xmm14 | |
2101 paddd %xmm9,%xmm9 | |
2102 movdqa %xmm15,%xmm12 | |
2103 psrad $31,%xmm14 | |
2104 paddq %xmm15,%xmm15 | |
2105 pand %xmm8,%xmm14 | |
2106 pxor %xmm0,%xmm12 | |
2107 pxor %xmm14,%xmm15 | |
2108 movdqa %xmm9,%xmm14 | |
2109 paddd %xmm9,%xmm9 | |
2110 movdqa %xmm15,%xmm13 | |
2111 psrad $31,%xmm14 | |
2112 paddq %xmm15,%xmm15 | |
2113 pand %xmm8,%xmm14 | |
2114 pxor %xmm0,%xmm13 | |
2115 pxor %xmm14,%xmm15 | |
2116 movdqa %xmm15,%xmm14 | |
2117 psrad $31,%xmm9 | |
2118 paddq %xmm15,%xmm15 | |
2119 pand %xmm8,%xmm9 | |
2120 pxor %xmm0,%xmm14 | |
2121 pxor %xmm9,%xmm15 | |
2122 movaps %xmm1,96(%rsp) | |
2123 | |
2124 subq $96,%rdx | |
2125 jc .Lxts_dec_short | |
2126 | |
2127 movl $16+96,%eax | |
2128 leaq 32(%r11,%r10,1),%rcx | |
2129 subq %r10,%rax | |
2130 movups 16(%r11),%xmm1 | |
2131 movq %rax,%r10 | |
2132 leaq .Lxts_magic(%rip),%r8 | |
2133 jmp .Lxts_dec_grandloop | |
2134 | |
2135 .align 32 | |
2136 .Lxts_dec_grandloop: | |
2137 movdqu 0(%rdi),%xmm2 | |
2138 movdqa %xmm0,%xmm8 | |
2139 movdqu 16(%rdi),%xmm3 | |
2140 pxor %xmm10,%xmm2 | |
2141 movdqu 32(%rdi),%xmm4 | |
2142 pxor %xmm11,%xmm3 | |
2143 .byte 102,15,56,222,209 | |
2144 movdqu 48(%rdi),%xmm5 | |
2145 pxor %xmm12,%xmm4 | |
2146 .byte 102,15,56,222,217 | |
2147 movdqu 64(%rdi),%xmm6 | |
2148 pxor %xmm13,%xmm5 | |
2149 .byte 102,15,56,222,225 | |
2150 movdqu 80(%rdi),%xmm7 | |
2151 pxor %xmm15,%xmm8 | |
2152 movdqa 96(%rsp),%xmm9 | |
2153 pxor %xmm14,%xmm6 | |
2154 .byte 102,15,56,222,233 | |
2155 movups 32(%r11),%xmm0 | |
2156 leaq 96(%rdi),%rdi | |
2157 pxor %xmm8,%xmm7 | |
2158 | |
2159 pxor %xmm9,%xmm10 | |
2160 .byte 102,15,56,222,241 | |
2161 pxor %xmm9,%xmm11 | |
2162 movdqa %xmm10,0(%rsp) | |
2163 .byte 102,15,56,222,249 | |
2164 movups 48(%r11),%xmm1 | |
2165 pxor %xmm9,%xmm12 | |
2166 | |
2167 .byte 102,15,56,222,208 | |
2168 pxor %xmm9,%xmm13 | |
2169 movdqa %xmm11,16(%rsp) | |
2170 .byte 102,15,56,222,216 | |
2171 pxor %xmm9,%xmm14 | |
2172 movdqa %xmm12,32(%rsp) | |
2173 .byte 102,15,56,222,224 | |
2174 .byte 102,15,56,222,232 | |
2175 pxor %xmm9,%xmm8 | |
2176 movdqa %xmm14,64(%rsp) | |
2177 .byte 102,15,56,222,240 | |
2178 .byte 102,15,56,222,248 | |
2179 movups 64(%r11),%xmm0 | |
2180 movdqa %xmm8,80(%rsp) | |
2181 pshufd $0x5f,%xmm15,%xmm9 | |
2182 jmp .Lxts_dec_loop6 | |
2183 .align 32 | |
2184 .Lxts_dec_loop6: | |
2185 .byte 102,15,56,222,209 | |
2186 .byte 102,15,56,222,217 | |
2187 .byte 102,15,56,222,225 | |
2188 .byte 102,15,56,222,233 | |
2189 .byte 102,15,56,222,241 | |
2190 .byte 102,15,56,222,249 | |
2191 movups -64(%rcx,%rax,1),%xmm1 | |
2192 addq $32,%rax | |
2193 | |
2194 .byte 102,15,56,222,208 | |
2195 .byte 102,15,56,222,216 | |
2196 .byte 102,15,56,222,224 | |
2197 .byte 102,15,56,222,232 | |
2198 .byte 102,15,56,222,240 | |
2199 .byte 102,15,56,222,248 | |
2200 movups -80(%rcx,%rax,1),%xmm0 | |
2201 jnz .Lxts_dec_loop6 | |
2202 | |
2203 movdqa (%r8),%xmm8 | |
2204 movdqa %xmm9,%xmm14 | |
2205 paddd %xmm9,%xmm9 | |
2206 .byte 102,15,56,222,209 | |
2207 paddq %xmm15,%xmm15 | |
2208 psrad $31,%xmm14 | |
2209 .byte 102,15,56,222,217 | |
2210 pand %xmm8,%xmm14 | |
2211 movups (%r11),%xmm10 | |
2212 .byte 102,15,56,222,225 | |
2213 .byte 102,15,56,222,233 | |
2214 .byte 102,15,56,222,241 | |
2215 pxor %xmm14,%xmm15 | |
2216 movaps %xmm10,%xmm11 | |
2217 .byte 102,15,56,222,249 | |
2218 movups -64(%rcx),%xmm1 | |
2219 | |
2220 movdqa %xmm9,%xmm14 | |
2221 .byte 102,15,56,222,208 | |
2222 paddd %xmm9,%xmm9 | |
2223 pxor %xmm15,%xmm10 | |
2224 .byte 102,15,56,222,216 | |
2225 psrad $31,%xmm14 | |
2226 paddq %xmm15,%xmm15 | |
2227 .byte 102,15,56,222,224 | |
2228 .byte 102,15,56,222,232 | |
2229 pand %xmm8,%xmm14 | |
2230 movaps %xmm11,%xmm12 | |
2231 .byte 102,15,56,222,240 | |
2232 pxor %xmm14,%xmm15 | |
2233 movdqa %xmm9,%xmm14 | |
2234 .byte 102,15,56,222,248 | |
2235 movups -48(%rcx),%xmm0 | |
2236 | |
2237 paddd %xmm9,%xmm9 | |
2238 .byte 102,15,56,222,209 | |
2239 pxor %xmm15,%xmm11 | |
2240 psrad $31,%xmm14 | |
2241 .byte 102,15,56,222,217 | |
2242 paddq %xmm15,%xmm15 | |
2243 pand %xmm8,%xmm14 | |
2244 .byte 102,15,56,222,225 | |
2245 .byte 102,15,56,222,233 | |
2246 movdqa %xmm13,48(%rsp) | |
2247 pxor %xmm14,%xmm15 | |
2248 .byte 102,15,56,222,241 | |
2249 movaps %xmm12,%xmm13 | |
2250 movdqa %xmm9,%xmm14 | |
2251 .byte 102,15,56,222,249 | |
2252 movups -32(%rcx),%xmm1 | |
2253 | |
2254 paddd %xmm9,%xmm9 | |
2255 .byte 102,15,56,222,208 | |
2256 pxor %xmm15,%xmm12 | |
2257 psrad $31,%xmm14 | |
2258 .byte 102,15,56,222,216 | |
2259 paddq %xmm15,%xmm15 | |
2260 pand %xmm8,%xmm14 | |
2261 .byte 102,15,56,222,224 | |
2262 .byte 102,15,56,222,232 | |
2263 .byte 102,15,56,222,240 | |
2264 pxor %xmm14,%xmm15 | |
2265 movaps %xmm13,%xmm14 | |
2266 .byte 102,15,56,222,248 | |
2267 | |
2268 movdqa %xmm9,%xmm0 | |
2269 paddd %xmm9,%xmm9 | |
2270 .byte 102,15,56,222,209 | |
2271 pxor %xmm15,%xmm13 | |
2272 psrad $31,%xmm0 | |
2273 .byte 102,15,56,222,217 | |
2274 paddq %xmm15,%xmm15 | |
2275 pand %xmm8,%xmm0 | |
2276 .byte 102,15,56,222,225 | |
2277 .byte 102,15,56,222,233 | |
2278 pxor %xmm0,%xmm15 | |
2279 movups (%r11),%xmm0 | |
2280 .byte 102,15,56,222,241 | |
2281 .byte 102,15,56,222,249 | |
2282 movups 16(%r11),%xmm1 | |
2283 | |
2284 pxor %xmm15,%xmm14 | |
2285 .byte 102,15,56,223,84,36,0 | |
2286 psrad $31,%xmm9 | |
2287 paddq %xmm15,%xmm15 | |
2288 .byte 102,15,56,223,92,36,16 | |
2289 .byte 102,15,56,223,100,36,32 | |
2290 pand %xmm8,%xmm9 | |
2291 movq %r10,%rax | |
2292 .byte 102,15,56,223,108,36,48 | |
2293 .byte 102,15,56,223,116,36,64 | |
2294 .byte 102,15,56,223,124,36,80 | |
2295 pxor %xmm9,%xmm15 | |
2296 | |
2297 leaq 96(%rsi),%rsi | |
2298 movups %xmm2,-96(%rsi) | |
2299 movups %xmm3,-80(%rsi) | |
2300 movups %xmm4,-64(%rsi) | |
2301 movups %xmm5,-48(%rsi) | |
2302 movups %xmm6,-32(%rsi) | |
2303 movups %xmm7,-16(%rsi) | |
2304 subq $96,%rdx | |
2305 jnc .Lxts_dec_grandloop | |
2306 | |
2307 movl $16+96,%eax | |
2308 subl %r10d,%eax | |
2309 movq %r11,%rcx | |
2310 shrl $4,%eax | |
2311 | |
2312 .Lxts_dec_short: | |
2313 | |
2314 movl %eax,%r10d | |
2315 pxor %xmm0,%xmm10 | |
2316 pxor %xmm0,%xmm11 | |
2317 addq $96,%rdx | |
2318 jz .Lxts_dec_done | |
2319 | |
2320 pxor %xmm0,%xmm12 | |
2321 cmpq $0x20,%rdx | |
2322 jb .Lxts_dec_one | |
2323 pxor %xmm0,%xmm13 | |
2324 je .Lxts_dec_two | |
2325 | |
2326 pxor %xmm0,%xmm14 | |
2327 cmpq $0x40,%rdx | |
2328 jb .Lxts_dec_three | |
2329 je .Lxts_dec_four | |
2330 | |
2331 movdqu (%rdi),%xmm2 | |
2332 movdqu 16(%rdi),%xmm3 | |
2333 movdqu 32(%rdi),%xmm4 | |
2334 pxor %xmm10,%xmm2 | |
2335 movdqu 48(%rdi),%xmm5 | |
2336 pxor %xmm11,%xmm3 | |
2337 movdqu 64(%rdi),%xmm6 | |
2338 leaq 80(%rdi),%rdi | |
2339 pxor %xmm12,%xmm4 | |
2340 pxor %xmm13,%xmm5 | |
2341 pxor %xmm14,%xmm6 | |
2342 | |
2343 call _aesni_decrypt6 | |
2344 | |
2345 xorps %xmm10,%xmm2 | |
2346 xorps %xmm11,%xmm3 | |
2347 xorps %xmm12,%xmm4 | |
2348 movdqu %xmm2,(%rsi) | |
2349 xorps %xmm13,%xmm5 | |
2350 movdqu %xmm3,16(%rsi) | |
2351 xorps %xmm14,%xmm6 | |
2352 movdqu %xmm4,32(%rsi) | |
2353 pxor %xmm14,%xmm14 | |
2354 movdqu %xmm5,48(%rsi) | |
2355 pcmpgtd %xmm15,%xmm14 | |
2356 movdqu %xmm6,64(%rsi) | |
2357 leaq 80(%rsi),%rsi | |
2358 pshufd $0x13,%xmm14,%xmm11 | |
2359 andq $15,%r9 | |
2360 jz .Lxts_dec_ret | |
2361 | |
2362 movdqa %xmm15,%xmm10 | |
2363 paddq %xmm15,%xmm15 | |
2364 pand %xmm8,%xmm11 | |
2365 pxor %xmm15,%xmm11 | |
2366 jmp .Lxts_dec_done2 | |
2367 | |
2368 .align 16 | |
2369 .Lxts_dec_one: | |
2370 movups (%rdi),%xmm2 | |
2371 leaq 16(%rdi),%rdi | |
2372 xorps %xmm10,%xmm2 | |
2373 movups (%rcx),%xmm0 | |
2374 movups 16(%rcx),%xmm1 | |
2375 leaq 32(%rcx),%rcx | |
2376 xorps %xmm0,%xmm2 | |
2377 .Loop_dec1_12: | |
2378 .byte 102,15,56,222,209 | |
2379 decl %eax | |
2380 movups (%rcx),%xmm1 | |
2381 leaq 16(%rcx),%rcx | |
2382 jnz .Loop_dec1_12 | |
2383 .byte 102,15,56,223,209 | |
2384 xorps %xmm10,%xmm2 | |
2385 movdqa %xmm11,%xmm10 | |
2386 movups %xmm2,(%rsi) | |
2387 movdqa %xmm12,%xmm11 | |
2388 leaq 16(%rsi),%rsi | |
2389 jmp .Lxts_dec_done | |
2390 | |
2391 .align 16 | |
2392 .Lxts_dec_two: | |
2393 movups (%rdi),%xmm2 | |
2394 movups 16(%rdi),%xmm3 | |
2395 leaq 32(%rdi),%rdi | |
2396 xorps %xmm10,%xmm2 | |
2397 xorps %xmm11,%xmm3 | |
2398 | |
2399 call _aesni_decrypt2 | |
2400 | |
2401 xorps %xmm10,%xmm2 | |
2402 movdqa %xmm12,%xmm10 | |
2403 xorps %xmm11,%xmm3 | |
2404 movdqa %xmm13,%xmm11 | |
2405 movups %xmm2,(%rsi) | |
2406 movups %xmm3,16(%rsi) | |
2407 leaq 32(%rsi),%rsi | |
2408 jmp .Lxts_dec_done | |
2409 | |
2410 .align 16 | |
2411 .Lxts_dec_three: | |
2412 movups (%rdi),%xmm2 | |
2413 movups 16(%rdi),%xmm3 | |
2414 movups 32(%rdi),%xmm4 | |
2415 leaq 48(%rdi),%rdi | |
2416 xorps %xmm10,%xmm2 | |
2417 xorps %xmm11,%xmm3 | |
2418 xorps %xmm12,%xmm4 | |
2419 | |
2420 call _aesni_decrypt3 | |
2421 | |
2422 xorps %xmm10,%xmm2 | |
2423 movdqa %xmm13,%xmm10 | |
2424 xorps %xmm11,%xmm3 | |
2425 movdqa %xmm14,%xmm11 | |
2426 xorps %xmm12,%xmm4 | |
2427 movups %xmm2,(%rsi) | |
2428 movups %xmm3,16(%rsi) | |
2429 movups %xmm4,32(%rsi) | |
2430 leaq 48(%rsi),%rsi | |
2431 jmp .Lxts_dec_done | |
2432 | |
2433 .align 16 | |
2434 .Lxts_dec_four: | |
2435 movups (%rdi),%xmm2 | |
2436 movups 16(%rdi),%xmm3 | |
2437 movups 32(%rdi),%xmm4 | |
2438 xorps %xmm10,%xmm2 | |
2439 movups 48(%rdi),%xmm5 | |
2440 leaq 64(%rdi),%rdi | |
2441 xorps %xmm11,%xmm3 | |
2442 xorps %xmm12,%xmm4 | |
2443 xorps %xmm13,%xmm5 | |
2444 | |
2445 call _aesni_decrypt4 | |
2446 | |
2447 pxor %xmm10,%xmm2 | |
2448 movdqa %xmm14,%xmm10 | |
2449 pxor %xmm11,%xmm3 | |
2450 movdqa %xmm15,%xmm11 | |
2451 pxor %xmm12,%xmm4 | |
2452 movdqu %xmm2,(%rsi) | |
2453 pxor %xmm13,%xmm5 | |
2454 movdqu %xmm3,16(%rsi) | |
2455 movdqu %xmm4,32(%rsi) | |
2456 movdqu %xmm5,48(%rsi) | |
2457 leaq 64(%rsi),%rsi | |
2458 jmp .Lxts_dec_done | |
2459 | |
2460 .align 16 | |
2461 .Lxts_dec_done: | |
2462 andq $15,%r9 | |
2463 jz .Lxts_dec_ret | |
2464 .Lxts_dec_done2: | |
2465 movq %r9,%rdx | |
2466 movq %r11,%rcx | |
2467 movl %r10d,%eax | |
2468 | |
2469 movups (%rdi),%xmm2 | |
2470 xorps %xmm11,%xmm2 | |
2471 movups (%rcx),%xmm0 | |
2472 movups 16(%rcx),%xmm1 | |
2473 leaq 32(%rcx),%rcx | |
2474 xorps %xmm0,%xmm2 | |
2475 .Loop_dec1_13: | |
2476 .byte 102,15,56,222,209 | |
2477 decl %eax | |
2478 movups (%rcx),%xmm1 | |
2479 leaq 16(%rcx),%rcx | |
2480 jnz .Loop_dec1_13 | |
2481 .byte 102,15,56,223,209 | |
2482 xorps %xmm11,%xmm2 | |
2483 movups %xmm2,(%rsi) | |
2484 | |
2485 .Lxts_dec_steal: | |
2486 movzbl 16(%rdi),%eax | |
2487 movzbl (%rsi),%ecx | |
2488 leaq 1(%rdi),%rdi | |
2489 movb %al,(%rsi) | |
2490 movb %cl,16(%rsi) | |
2491 leaq 1(%rsi),%rsi | |
2492 subq $1,%rdx | |
2493 jnz .Lxts_dec_steal | |
2494 | |
2495 subq %r9,%rsi | |
2496 movq %r11,%rcx | |
2497 movl %r10d,%eax | |
2498 | |
2499 movups (%rsi),%xmm2 | |
2500 xorps %xmm10,%xmm2 | |
2501 movups (%rcx),%xmm0 | |
2502 movups 16(%rcx),%xmm1 | |
2503 leaq 32(%rcx),%rcx | |
2504 xorps %xmm0,%xmm2 | |
2505 .Loop_dec1_14: | |
2506 .byte 102,15,56,222,209 | |
2507 decl %eax | |
2508 movups (%rcx),%xmm1 | |
2509 leaq 16(%rcx),%rcx | |
2510 jnz .Loop_dec1_14 | |
2511 .byte 102,15,56,223,209 | |
2512 xorps %xmm10,%xmm2 | |
2513 movups %xmm2,(%rsi) | |
2514 | |
2515 .Lxts_dec_ret: | |
2516 xorps %xmm0,%xmm0 | |
2517 pxor %xmm1,%xmm1 | |
2518 pxor %xmm2,%xmm2 | |
2519 pxor %xmm3,%xmm3 | |
2520 pxor %xmm4,%xmm4 | |
2521 pxor %xmm5,%xmm5 | |
2522 pxor %xmm6,%xmm6 | |
2523 pxor %xmm7,%xmm7 | |
2524 movaps %xmm0,0(%rsp) | |
2525 pxor %xmm8,%xmm8 | |
2526 movaps %xmm0,16(%rsp) | |
2527 pxor %xmm9,%xmm9 | |
2528 movaps %xmm0,32(%rsp) | |
2529 pxor %xmm10,%xmm10 | |
2530 movaps %xmm0,48(%rsp) | |
2531 pxor %xmm11,%xmm11 | |
2532 movaps %xmm0,64(%rsp) | |
2533 pxor %xmm12,%xmm12 | |
2534 movaps %xmm0,80(%rsp) | |
2535 pxor %xmm13,%xmm13 | |
2536 movaps %xmm0,96(%rsp) | |
2537 pxor %xmm14,%xmm14 | |
2538 pxor %xmm15,%xmm15 | |
2539 leaq (%rbp),%rsp | |
2540 popq %rbp | |
2541 .Lxts_dec_epilogue: | |
2542 .byte 0xf3,0xc3 | |
2543 .size aesni_xts_decrypt,.-aesni_xts_decrypt | |
2544 .globl aesni_cbc_encrypt | |
2545 .hidden aesni_cbc_encrypt | |
2546 .type aesni_cbc_encrypt,@function | |
2547 .align 16 | |
2548 aesni_cbc_encrypt: | |
2549 testq %rdx,%rdx | |
2550 jz .Lcbc_ret | |
2551 | |
2552 movl 240(%rcx),%r10d | |
2553 movq %rcx,%r11 | |
2554 testl %r9d,%r9d | |
2555 jz .Lcbc_decrypt | |
2556 | |
2557 movups (%r8),%xmm2 | |
2558 movl %r10d,%eax | |
2559 cmpq $16,%rdx | |
2560 jb .Lcbc_enc_tail | |
2561 subq $16,%rdx | |
2562 jmp .Lcbc_enc_loop | |
2563 .align 16 | |
2564 .Lcbc_enc_loop: | |
2565 movups (%rdi),%xmm3 | |
2566 leaq 16(%rdi),%rdi | |
2567 | |
2568 movups (%rcx),%xmm0 | |
2569 movups 16(%rcx),%xmm1 | |
2570 xorps %xmm0,%xmm3 | |
2571 leaq 32(%rcx),%rcx | |
2572 xorps %xmm3,%xmm2 | |
2573 .Loop_enc1_15: | |
2574 .byte 102,15,56,220,209 | |
2575 decl %eax | |
2576 movups (%rcx),%xmm1 | |
2577 leaq 16(%rcx),%rcx | |
2578 jnz .Loop_enc1_15 | |
2579 .byte 102,15,56,221,209 | |
2580 movl %r10d,%eax | |
2581 movq %r11,%rcx | |
2582 movups %xmm2,0(%rsi) | |
2583 leaq 16(%rsi),%rsi | |
2584 subq $16,%rdx | |
2585 jnc .Lcbc_enc_loop | |
2586 addq $16,%rdx | |
2587 jnz .Lcbc_enc_tail | |
2588 pxor %xmm0,%xmm0 | |
2589 pxor %xmm1,%xmm1 | |
2590 movups %xmm2,(%r8) | |
2591 pxor %xmm2,%xmm2 | |
2592 pxor %xmm3,%xmm3 | |
2593 jmp .Lcbc_ret | |
2594 | |
2595 .Lcbc_enc_tail: | |
2596 movq %rdx,%rcx | |
2597 xchgq %rdi,%rsi | |
2598 .long 0x9066A4F3 | |
2599 movl $16,%ecx | |
2600 subq %rdx,%rcx | |
2601 xorl %eax,%eax | |
2602 .long 0x9066AAF3 | |
2603 leaq -16(%rdi),%rdi | |
2604 movl %r10d,%eax | |
2605 movq %rdi,%rsi | |
2606 movq %r11,%rcx | |
2607 xorq %rdx,%rdx | |
2608 jmp .Lcbc_enc_loop | |
2609 | |
2610 .align 16 | |
2611 .Lcbc_decrypt: | |
2612 cmpq $16,%rdx | |
2613 jne .Lcbc_decrypt_bulk | |
2614 | |
2615 | |
2616 | |
2617 movdqu (%rdi),%xmm2 | |
2618 movdqu (%r8),%xmm3 | |
2619 movdqa %xmm2,%xmm4 | |
2620 movups (%rcx),%xmm0 | |
2621 movups 16(%rcx),%xmm1 | |
2622 leaq 32(%rcx),%rcx | |
2623 xorps %xmm0,%xmm2 | |
2624 .Loop_dec1_16: | |
2625 .byte 102,15,56,222,209 | |
2626 decl %r10d | |
2627 movups (%rcx),%xmm1 | |
2628 leaq 16(%rcx),%rcx | |
2629 jnz .Loop_dec1_16 | |
2630 .byte 102,15,56,223,209 | |
2631 pxor %xmm0,%xmm0 | |
2632 pxor %xmm1,%xmm1 | |
2633 movdqu %xmm4,(%r8) | |
2634 xorps %xmm3,%xmm2 | |
2635 pxor %xmm3,%xmm3 | |
2636 movups %xmm2,(%rsi) | |
2637 pxor %xmm2,%xmm2 | |
2638 jmp .Lcbc_ret | |
2639 .align 16 | |
2640 .Lcbc_decrypt_bulk: | |
2641 leaq (%rsp),%rax | |
2642 pushq %rbp | |
2643 subq $16,%rsp | |
2644 andq $-16,%rsp | |
2645 leaq -8(%rax),%rbp | |
2646 movups (%r8),%xmm10 | |
2647 movl %r10d,%eax | |
2648 cmpq $0x50,%rdx | |
2649 jbe .Lcbc_dec_tail | |
2650 | |
2651 movups (%rcx),%xmm0 | |
2652 movdqu 0(%rdi),%xmm2 | |
2653 movdqu 16(%rdi),%xmm3 | |
2654 movdqa %xmm2,%xmm11 | |
2655 movdqu 32(%rdi),%xmm4 | |
2656 movdqa %xmm3,%xmm12 | |
2657 movdqu 48(%rdi),%xmm5 | |
2658 movdqa %xmm4,%xmm13 | |
2659 movdqu 64(%rdi),%xmm6 | |
2660 movdqa %xmm5,%xmm14 | |
2661 movdqu 80(%rdi),%xmm7 | |
2662 movdqa %xmm6,%xmm15 | |
2663 movl OPENSSL_ia32cap_P+4(%rip),%r9d | |
2664 cmpq $0x70,%rdx | |
2665 jbe .Lcbc_dec_six_or_seven | |
2666 | |
2667 andl $71303168,%r9d | |
2668 subq $0x50,%rdx | |
2669 cmpl $4194304,%r9d | |
2670 je .Lcbc_dec_loop6_enter | |
2671 subq $0x20,%rdx | |
2672 leaq 112(%rcx),%rcx | |
2673 jmp .Lcbc_dec_loop8_enter | |
2674 .align 16 | |
2675 .Lcbc_dec_loop8: | |
2676 movups %xmm9,(%rsi) | |
2677 leaq 16(%rsi),%rsi | |
2678 .Lcbc_dec_loop8_enter: | |
2679 movdqu 96(%rdi),%xmm8 | |
2680 pxor %xmm0,%xmm2 | |
2681 movdqu 112(%rdi),%xmm9 | |
2682 pxor %xmm0,%xmm3 | |
2683 movups 16-112(%rcx),%xmm1 | |
2684 pxor %xmm0,%xmm4 | |
2685 xorq %r11,%r11 | |
2686 cmpq $0x70,%rdx | |
2687 pxor %xmm0,%xmm5 | |
2688 pxor %xmm0,%xmm6 | |
2689 pxor %xmm0,%xmm7 | |
2690 pxor %xmm0,%xmm8 | |
2691 | |
2692 .byte 102,15,56,222,209 | |
2693 pxor %xmm0,%xmm9 | |
2694 movups 32-112(%rcx),%xmm0 | |
2695 .byte 102,15,56,222,217 | |
2696 .byte 102,15,56,222,225 | |
2697 .byte 102,15,56,222,233 | |
2698 .byte 102,15,56,222,241 | |
2699 .byte 102,15,56,222,249 | |
2700 .byte 102,68,15,56,222,193 | |
2701 setnc %r11b | |
2702 shlq $7,%r11 | |
2703 .byte 102,68,15,56,222,201 | |
2704 addq %rdi,%r11 | |
2705 movups 48-112(%rcx),%xmm1 | |
2706 .byte 102,15,56,222,208 | |
2707 .byte 102,15,56,222,216 | |
2708 .byte 102,15,56,222,224 | |
2709 .byte 102,15,56,222,232 | |
2710 .byte 102,15,56,222,240 | |
2711 .byte 102,15,56,222,248 | |
2712 .byte 102,68,15,56,222,192 | |
2713 .byte 102,68,15,56,222,200 | |
2714 movups 64-112(%rcx),%xmm0 | |
2715 nop | |
2716 .byte 102,15,56,222,209 | |
2717 .byte 102,15,56,222,217 | |
2718 .byte 102,15,56,222,225 | |
2719 .byte 102,15,56,222,233 | |
2720 .byte 102,15,56,222,241 | |
2721 .byte 102,15,56,222,249 | |
2722 .byte 102,68,15,56,222,193 | |
2723 .byte 102,68,15,56,222,201 | |
2724 movups 80-112(%rcx),%xmm1 | |
2725 nop | |
2726 .byte 102,15,56,222,208 | |
2727 .byte 102,15,56,222,216 | |
2728 .byte 102,15,56,222,224 | |
2729 .byte 102,15,56,222,232 | |
2730 .byte 102,15,56,222,240 | |
2731 .byte 102,15,56,222,248 | |
2732 .byte 102,68,15,56,222,192 | |
2733 .byte 102,68,15,56,222,200 | |
2734 movups 96-112(%rcx),%xmm0 | |
2735 nop | |
2736 .byte 102,15,56,222,209 | |
2737 .byte 102,15,56,222,217 | |
2738 .byte 102,15,56,222,225 | |
2739 .byte 102,15,56,222,233 | |
2740 .byte 102,15,56,222,241 | |
2741 .byte 102,15,56,222,249 | |
2742 .byte 102,68,15,56,222,193 | |
2743 .byte 102,68,15,56,222,201 | |
2744 movups 112-112(%rcx),%xmm1 | |
2745 nop | |
2746 .byte 102,15,56,222,208 | |
2747 .byte 102,15,56,222,216 | |
2748 .byte 102,15,56,222,224 | |
2749 .byte 102,15,56,222,232 | |
2750 .byte 102,15,56,222,240 | |
2751 .byte 102,15,56,222,248 | |
2752 .byte 102,68,15,56,222,192 | |
2753 .byte 102,68,15,56,222,200 | |
2754 movups 128-112(%rcx),%xmm0 | |
2755 nop | |
2756 .byte 102,15,56,222,209 | |
2757 .byte 102,15,56,222,217 | |
2758 .byte 102,15,56,222,225 | |
2759 .byte 102,15,56,222,233 | |
2760 .byte 102,15,56,222,241 | |
2761 .byte 102,15,56,222,249 | |
2762 .byte 102,68,15,56,222,193 | |
2763 .byte 102,68,15,56,222,201 | |
2764 movups 144-112(%rcx),%xmm1 | |
2765 cmpl $11,%eax | |
2766 .byte 102,15,56,222,208 | |
2767 .byte 102,15,56,222,216 | |
2768 .byte 102,15,56,222,224 | |
2769 .byte 102,15,56,222,232 | |
2770 .byte 102,15,56,222,240 | |
2771 .byte 102,15,56,222,248 | |
2772 .byte 102,68,15,56,222,192 | |
2773 .byte 102,68,15,56,222,200 | |
2774 movups 160-112(%rcx),%xmm0 | |
2775 jb .Lcbc_dec_done | |
2776 .byte 102,15,56,222,209 | |
2777 .byte 102,15,56,222,217 | |
2778 .byte 102,15,56,222,225 | |
2779 .byte 102,15,56,222,233 | |
2780 .byte 102,15,56,222,241 | |
2781 .byte 102,15,56,222,249 | |
2782 .byte 102,68,15,56,222,193 | |
2783 .byte 102,68,15,56,222,201 | |
2784 movups 176-112(%rcx),%xmm1 | |
2785 nop | |
2786 .byte 102,15,56,222,208 | |
2787 .byte 102,15,56,222,216 | |
2788 .byte 102,15,56,222,224 | |
2789 .byte 102,15,56,222,232 | |
2790 .byte 102,15,56,222,240 | |
2791 .byte 102,15,56,222,248 | |
2792 .byte 102,68,15,56,222,192 | |
2793 .byte 102,68,15,56,222,200 | |
2794 movups 192-112(%rcx),%xmm0 | |
2795 je .Lcbc_dec_done | |
2796 .byte 102,15,56,222,209 | |
2797 .byte 102,15,56,222,217 | |
2798 .byte 102,15,56,222,225 | |
2799 .byte 102,15,56,222,233 | |
2800 .byte 102,15,56,222,241 | |
2801 .byte 102,15,56,222,249 | |
2802 .byte 102,68,15,56,222,193 | |
2803 .byte 102,68,15,56,222,201 | |
2804 movups 208-112(%rcx),%xmm1 | |
2805 nop | |
2806 .byte 102,15,56,222,208 | |
2807 .byte 102,15,56,222,216 | |
2808 .byte 102,15,56,222,224 | |
2809 .byte 102,15,56,222,232 | |
2810 .byte 102,15,56,222,240 | |
2811 .byte 102,15,56,222,248 | |
2812 .byte 102,68,15,56,222,192 | |
2813 .byte 102,68,15,56,222,200 | |
2814 movups 224-112(%rcx),%xmm0 | |
2815 jmp .Lcbc_dec_done | |
2816 .align 16 | |
2817 .Lcbc_dec_done: | |
2818 .byte 102,15,56,222,209 | |
2819 .byte 102,15,56,222,217 | |
2820 pxor %xmm0,%xmm10 | |
2821 pxor %xmm0,%xmm11 | |
2822 .byte 102,15,56,222,225 | |
2823 .byte 102,15,56,222,233 | |
2824 pxor %xmm0,%xmm12 | |
2825 pxor %xmm0,%xmm13 | |
2826 .byte 102,15,56,222,241 | |
2827 .byte 102,15,56,222,249 | |
2828 pxor %xmm0,%xmm14 | |
2829 pxor %xmm0,%xmm15 | |
2830 .byte 102,68,15,56,222,193 | |
2831 .byte 102,68,15,56,222,201 | |
2832 movdqu 80(%rdi),%xmm1 | |
2833 | |
2834 .byte 102,65,15,56,223,210 | |
2835 movdqu 96(%rdi),%xmm10 | |
2836 pxor %xmm0,%xmm1 | |
2837 .byte 102,65,15,56,223,219 | |
2838 pxor %xmm0,%xmm10 | |
2839 movdqu 112(%rdi),%xmm0 | |
2840 .byte 102,65,15,56,223,228 | |
2841 leaq 128(%rdi),%rdi | |
2842 movdqu 0(%r11),%xmm11 | |
2843 .byte 102,65,15,56,223,237 | |
2844 .byte 102,65,15,56,223,246 | |
2845 movdqu 16(%r11),%xmm12 | |
2846 movdqu 32(%r11),%xmm13 | |
2847 .byte 102,65,15,56,223,255 | |
2848 .byte 102,68,15,56,223,193 | |
2849 movdqu 48(%r11),%xmm14 | |
2850 movdqu 64(%r11),%xmm15 | |
2851 .byte 102,69,15,56,223,202 | |
2852 movdqa %xmm0,%xmm10 | |
2853 movdqu 80(%r11),%xmm1 | |
2854 movups -112(%rcx),%xmm0 | |
2855 | |
2856 movups %xmm2,(%rsi) | |
2857 movdqa %xmm11,%xmm2 | |
2858 movups %xmm3,16(%rsi) | |
2859 movdqa %xmm12,%xmm3 | |
2860 movups %xmm4,32(%rsi) | |
2861 movdqa %xmm13,%xmm4 | |
2862 movups %xmm5,48(%rsi) | |
2863 movdqa %xmm14,%xmm5 | |
2864 movups %xmm6,64(%rsi) | |
2865 movdqa %xmm15,%xmm6 | |
2866 movups %xmm7,80(%rsi) | |
2867 movdqa %xmm1,%xmm7 | |
2868 movups %xmm8,96(%rsi) | |
2869 leaq 112(%rsi),%rsi | |
2870 | |
2871 subq $0x80,%rdx | |
2872 ja .Lcbc_dec_loop8 | |
2873 | |
2874 movaps %xmm9,%xmm2 | |
2875 leaq -112(%rcx),%rcx | |
2876 addq $0x70,%rdx | |
2877 jle .Lcbc_dec_clear_tail_collected | |
2878 movups %xmm9,(%rsi) | |
2879 leaq 16(%rsi),%rsi | |
2880 cmpq $0x50,%rdx | |
2881 jbe .Lcbc_dec_tail | |
2882 | |
2883 movaps %xmm11,%xmm2 | |
2884 .Lcbc_dec_six_or_seven: | |
2885 cmpq $0x60,%rdx | |
2886 ja .Lcbc_dec_seven | |
2887 | |
2888 movaps %xmm7,%xmm8 | |
2889 call _aesni_decrypt6 | |
2890 pxor %xmm10,%xmm2 | |
2891 movaps %xmm8,%xmm10 | |
2892 pxor %xmm11,%xmm3 | |
2893 movdqu %xmm2,(%rsi) | |
2894 pxor %xmm12,%xmm4 | |
2895 movdqu %xmm3,16(%rsi) | |
2896 pxor %xmm3,%xmm3 | |
2897 pxor %xmm13,%xmm5 | |
2898 movdqu %xmm4,32(%rsi) | |
2899 pxor %xmm4,%xmm4 | |
2900 pxor %xmm14,%xmm6 | |
2901 movdqu %xmm5,48(%rsi) | |
2902 pxor %xmm5,%xmm5 | |
2903 pxor %xmm15,%xmm7 | |
2904 movdqu %xmm6,64(%rsi) | |
2905 pxor %xmm6,%xmm6 | |
2906 leaq 80(%rsi),%rsi | |
2907 movdqa %xmm7,%xmm2 | |
2908 pxor %xmm7,%xmm7 | |
2909 jmp .Lcbc_dec_tail_collected | |
2910 | |
2911 .align 16 | |
2912 .Lcbc_dec_seven: | |
2913 movups 96(%rdi),%xmm8 | |
2914 xorps %xmm9,%xmm9 | |
2915 call _aesni_decrypt8 | |
2916 movups 80(%rdi),%xmm9 | |
2917 pxor %xmm10,%xmm2 | |
2918 movups 96(%rdi),%xmm10 | |
2919 pxor %xmm11,%xmm3 | |
2920 movdqu %xmm2,(%rsi) | |
2921 pxor %xmm12,%xmm4 | |
2922 movdqu %xmm3,16(%rsi) | |
2923 pxor %xmm3,%xmm3 | |
2924 pxor %xmm13,%xmm5 | |
2925 movdqu %xmm4,32(%rsi) | |
2926 pxor %xmm4,%xmm4 | |
2927 pxor %xmm14,%xmm6 | |
2928 movdqu %xmm5,48(%rsi) | |
2929 pxor %xmm5,%xmm5 | |
2930 pxor %xmm15,%xmm7 | |
2931 movdqu %xmm6,64(%rsi) | |
2932 pxor %xmm6,%xmm6 | |
2933 pxor %xmm9,%xmm8 | |
2934 movdqu %xmm7,80(%rsi) | |
2935 pxor %xmm7,%xmm7 | |
2936 leaq 96(%rsi),%rsi | |
2937 movdqa %xmm8,%xmm2 | |
2938 pxor %xmm8,%xmm8 | |
2939 pxor %xmm9,%xmm9 | |
2940 jmp .Lcbc_dec_tail_collected | |
2941 | |
2942 .align 16 | |
2943 .Lcbc_dec_loop6: | |
2944 movups %xmm7,(%rsi) | |
2945 leaq 16(%rsi),%rsi | |
2946 movdqu 0(%rdi),%xmm2 | |
2947 movdqu 16(%rdi),%xmm3 | |
2948 movdqa %xmm2,%xmm11 | |
2949 movdqu 32(%rdi),%xmm4 | |
2950 movdqa %xmm3,%xmm12 | |
2951 movdqu 48(%rdi),%xmm5 | |
2952 movdqa %xmm4,%xmm13 | |
2953 movdqu 64(%rdi),%xmm6 | |
2954 movdqa %xmm5,%xmm14 | |
2955 movdqu 80(%rdi),%xmm7 | |
2956 movdqa %xmm6,%xmm15 | |
2957 .Lcbc_dec_loop6_enter: | |
2958 leaq 96(%rdi),%rdi | |
2959 movdqa %xmm7,%xmm8 | |
2960 | |
2961 call _aesni_decrypt6 | |
2962 | |
2963 pxor %xmm10,%xmm2 | |
2964 movdqa %xmm8,%xmm10 | |
2965 pxor %xmm11,%xmm3 | |
2966 movdqu %xmm2,(%rsi) | |
2967 pxor %xmm12,%xmm4 | |
2968 movdqu %xmm3,16(%rsi) | |
2969 pxor %xmm13,%xmm5 | |
2970 movdqu %xmm4,32(%rsi) | |
2971 pxor %xmm14,%xmm6 | |
2972 movq %r11,%rcx | |
2973 movdqu %xmm5,48(%rsi) | |
2974 pxor %xmm15,%xmm7 | |
2975 movl %r10d,%eax | |
2976 movdqu %xmm6,64(%rsi) | |
2977 leaq 80(%rsi),%rsi | |
2978 subq $0x60,%rdx | |
2979 ja .Lcbc_dec_loop6 | |
2980 | |
2981 movdqa %xmm7,%xmm2 | |
2982 addq $0x50,%rdx | |
2983 jle .Lcbc_dec_clear_tail_collected | |
2984 movups %xmm7,(%rsi) | |
2985 leaq 16(%rsi),%rsi | |
2986 | |
2987 .Lcbc_dec_tail: | |
2988 movups (%rdi),%xmm2 | |
2989 subq $0x10,%rdx | |
2990 jbe .Lcbc_dec_one | |
2991 | |
2992 movups 16(%rdi),%xmm3 | |
2993 movaps %xmm2,%xmm11 | |
2994 subq $0x10,%rdx | |
2995 jbe .Lcbc_dec_two | |
2996 | |
2997 movups 32(%rdi),%xmm4 | |
2998 movaps %xmm3,%xmm12 | |
2999 subq $0x10,%rdx | |
3000 jbe .Lcbc_dec_three | |
3001 | |
3002 movups 48(%rdi),%xmm5 | |
3003 movaps %xmm4,%xmm13 | |
3004 subq $0x10,%rdx | |
3005 jbe .Lcbc_dec_four | |
3006 | |
3007 movups 64(%rdi),%xmm6 | |
3008 movaps %xmm5,%xmm14 | |
3009 movaps %xmm6,%xmm15 | |
3010 xorps %xmm7,%xmm7 | |
3011 call _aesni_decrypt6 | |
3012 pxor %xmm10,%xmm2 | |
3013 movaps %xmm15,%xmm10 | |
3014 pxor %xmm11,%xmm3 | |
3015 movdqu %xmm2,(%rsi) | |
3016 pxor %xmm12,%xmm4 | |
3017 movdqu %xmm3,16(%rsi) | |
3018 pxor %xmm3,%xmm3 | |
3019 pxor %xmm13,%xmm5 | |
3020 movdqu %xmm4,32(%rsi) | |
3021 pxor %xmm4,%xmm4 | |
3022 pxor %xmm14,%xmm6 | |
3023 movdqu %xmm5,48(%rsi) | |
3024 pxor %xmm5,%xmm5 | |
3025 leaq 64(%rsi),%rsi | |
3026 movdqa %xmm6,%xmm2 | |
3027 pxor %xmm6,%xmm6 | |
3028 pxor %xmm7,%xmm7 | |
3029 subq $0x10,%rdx | |
3030 jmp .Lcbc_dec_tail_collected | |
3031 | |
3032 .align 16 | |
3033 .Lcbc_dec_one: | |
3034 movaps %xmm2,%xmm11 | |
3035 movups (%rcx),%xmm0 | |
3036 movups 16(%rcx),%xmm1 | |
3037 leaq 32(%rcx),%rcx | |
3038 xorps %xmm0,%xmm2 | |
3039 .Loop_dec1_17: | |
3040 .byte 102,15,56,222,209 | |
3041 decl %eax | |
3042 movups (%rcx),%xmm1 | |
3043 leaq 16(%rcx),%rcx | |
3044 jnz .Loop_dec1_17 | |
3045 .byte 102,15,56,223,209 | |
3046 xorps %xmm10,%xmm2 | |
3047 movaps %xmm11,%xmm10 | |
3048 jmp .Lcbc_dec_tail_collected | |
3049 .align 16 | |
3050 .Lcbc_dec_two: | |
3051 movaps %xmm3,%xmm12 | |
3052 call _aesni_decrypt2 | |
3053 pxor %xmm10,%xmm2 | |
3054 movaps %xmm12,%xmm10 | |
3055 pxor %xmm11,%xmm3 | |
3056 movdqu %xmm2,(%rsi) | |
3057 movdqa %xmm3,%xmm2 | |
3058 pxor %xmm3,%xmm3 | |
3059 leaq 16(%rsi),%rsi | |
3060 jmp .Lcbc_dec_tail_collected | |
3061 .align 16 | |
3062 .Lcbc_dec_three: | |
3063 movaps %xmm4,%xmm13 | |
3064 call _aesni_decrypt3 | |
3065 pxor %xmm10,%xmm2 | |
3066 movaps %xmm13,%xmm10 | |
3067 pxor %xmm11,%xmm3 | |
3068 movdqu %xmm2,(%rsi) | |
3069 pxor %xmm12,%xmm4 | |
3070 movdqu %xmm3,16(%rsi) | |
3071 pxor %xmm3,%xmm3 | |
3072 movdqa %xmm4,%xmm2 | |
3073 pxor %xmm4,%xmm4 | |
3074 leaq 32(%rsi),%rsi | |
3075 jmp .Lcbc_dec_tail_collected | |
3076 .align 16 | |
3077 .Lcbc_dec_four: | |
3078 movaps %xmm5,%xmm14 | |
3079 call _aesni_decrypt4 | |
3080 pxor %xmm10,%xmm2 | |
3081 movaps %xmm14,%xmm10 | |
3082 pxor %xmm11,%xmm3 | |
3083 movdqu %xmm2,(%rsi) | |
3084 pxor %xmm12,%xmm4 | |
3085 movdqu %xmm3,16(%rsi) | |
3086 pxor %xmm3,%xmm3 | |
3087 pxor %xmm13,%xmm5 | |
3088 movdqu %xmm4,32(%rsi) | |
3089 pxor %xmm4,%xmm4 | |
3090 movdqa %xmm5,%xmm2 | |
3091 pxor %xmm5,%xmm5 | |
3092 leaq 48(%rsi),%rsi | |
3093 jmp .Lcbc_dec_tail_collected | |
3094 | |
3095 .align 16 | |
3096 .Lcbc_dec_clear_tail_collected: | |
3097 pxor %xmm3,%xmm3 | |
3098 pxor %xmm4,%xmm4 | |
3099 pxor %xmm5,%xmm5 | |
3100 pxor %xmm6,%xmm6 | |
3101 pxor %xmm7,%xmm7 | |
3102 pxor %xmm8,%xmm8 | |
3103 pxor %xmm9,%xmm9 | |
3104 .Lcbc_dec_tail_collected: | |
3105 movups %xmm10,(%r8) | |
3106 andq $15,%rdx | |
3107 jnz .Lcbc_dec_tail_partial | |
3108 movups %xmm2,(%rsi) | |
3109 pxor %xmm2,%xmm2 | |
3110 jmp .Lcbc_dec_ret | |
3111 .align 16 | |
3112 .Lcbc_dec_tail_partial: | |
3113 movaps %xmm2,(%rsp) | |
3114 pxor %xmm2,%xmm2 | |
3115 movq $16,%rcx | |
3116 movq %rsi,%rdi | |
3117 subq %rdx,%rcx | |
3118 leaq (%rsp),%rsi | |
3119 .long 0x9066A4F3 | |
3120 movdqa %xmm2,(%rsp) | |
3121 | |
3122 .Lcbc_dec_ret: | |
3123 xorps %xmm0,%xmm0 | |
3124 pxor %xmm1,%xmm1 | |
3125 leaq (%rbp),%rsp | |
3126 popq %rbp | |
3127 .Lcbc_ret: | |
3128 .byte 0xf3,0xc3 | |
3129 .size aesni_cbc_encrypt,.-aesni_cbc_encrypt | |
3130 .globl aesni_set_decrypt_key | |
3131 .hidden aesni_set_decrypt_key | |
3132 .type aesni_set_decrypt_key,@function | |
3133 .align 16 | |
3134 aesni_set_decrypt_key: | |
3135 .byte 0x48,0x83,0xEC,0x08 | |
3136 call __aesni_set_encrypt_key | |
3137 shll $4,%esi | |
3138 testl %eax,%eax | |
3139 jnz .Ldec_key_ret | |
3140 leaq 16(%rdx,%rsi,1),%rdi | |
3141 | |
3142 movups (%rdx),%xmm0 | |
3143 movups (%rdi),%xmm1 | |
3144 movups %xmm0,(%rdi) | |
3145 movups %xmm1,(%rdx) | |
3146 leaq 16(%rdx),%rdx | |
3147 leaq -16(%rdi),%rdi | |
3148 | |
3149 .Ldec_key_inverse: | |
3150 movups (%rdx),%xmm0 | |
3151 movups (%rdi),%xmm1 | |
3152 .byte 102,15,56,219,192 | |
3153 .byte 102,15,56,219,201 | |
3154 leaq 16(%rdx),%rdx | |
3155 leaq -16(%rdi),%rdi | |
3156 movups %xmm0,16(%rdi) | |
3157 movups %xmm1,-16(%rdx) | |
3158 cmpq %rdx,%rdi | |
3159 ja .Ldec_key_inverse | |
3160 | |
3161 movups (%rdx),%xmm0 | |
3162 .byte 102,15,56,219,192 | |
3163 pxor %xmm1,%xmm1 | |
3164 movups %xmm0,(%rdi) | |
3165 pxor %xmm0,%xmm0 | |
3166 .Ldec_key_ret: | |
3167 addq $8,%rsp | |
3168 .byte 0xf3,0xc3 | |
3169 .LSEH_end_set_decrypt_key: | |
3170 .size aesni_set_decrypt_key,.-aesni_set_decrypt_key | |
3171 .globl aesni_set_encrypt_key | |
3172 .hidden aesni_set_encrypt_key | |
3173 .type aesni_set_encrypt_key,@function | |
3174 .align 16 | |
3175 aesni_set_encrypt_key: | |
3176 __aesni_set_encrypt_key: | |
3177 .byte 0x48,0x83,0xEC,0x08 | |
3178 movq $-1,%rax | |
3179 testq %rdi,%rdi | |
3180 jz .Lenc_key_ret | |
3181 testq %rdx,%rdx | |
3182 jz .Lenc_key_ret | |
3183 | |
3184 movl $268437504,%r10d | |
3185 movups (%rdi),%xmm0 | |
3186 xorps %xmm4,%xmm4 | |
3187 andl OPENSSL_ia32cap_P+4(%rip),%r10d | |
3188 leaq 16(%rdx),%rax | |
3189 cmpl $256,%esi | |
3190 je .L14rounds | |
3191 cmpl $192,%esi | |
3192 je .L12rounds | |
3193 cmpl $128,%esi | |
3194 jne .Lbad_keybits | |
3195 | |
3196 .L10rounds: | |
3197 movl $9,%esi | |
3198 cmpl $268435456,%r10d | |
3199 je .L10rounds_alt | |
3200 | |
3201 movups %xmm0,(%rdx) | |
3202 .byte 102,15,58,223,200,1 | |
3203 call .Lkey_expansion_128_cold | |
3204 .byte 102,15,58,223,200,2 | |
3205 call .Lkey_expansion_128 | |
3206 .byte 102,15,58,223,200,4 | |
3207 call .Lkey_expansion_128 | |
3208 .byte 102,15,58,223,200,8 | |
3209 call .Lkey_expansion_128 | |
3210 .byte 102,15,58,223,200,16 | |
3211 call .Lkey_expansion_128 | |
3212 .byte 102,15,58,223,200,32 | |
3213 call .Lkey_expansion_128 | |
3214 .byte 102,15,58,223,200,64 | |
3215 call .Lkey_expansion_128 | |
3216 .byte 102,15,58,223,200,128 | |
3217 call .Lkey_expansion_128 | |
3218 .byte 102,15,58,223,200,27 | |
3219 call .Lkey_expansion_128 | |
3220 .byte 102,15,58,223,200,54 | |
3221 call .Lkey_expansion_128 | |
3222 movups %xmm0,(%rax) | |
3223 movl %esi,80(%rax) | |
3224 xorl %eax,%eax | |
3225 jmp .Lenc_key_ret | |
3226 | |
3227 .align 16 | |
3228 .L10rounds_alt: | |
3229 movdqa .Lkey_rotate(%rip),%xmm5 | |
3230 movl $8,%r10d | |
3231 movdqa .Lkey_rcon1(%rip),%xmm4 | |
3232 movdqa %xmm0,%xmm2 | |
3233 movdqu %xmm0,(%rdx) | |
3234 jmp .Loop_key128 | |
3235 | |
3236 .align 16 | |
3237 .Loop_key128: | |
3238 .byte 102,15,56,0,197 | |
3239 .byte 102,15,56,221,196 | |
3240 pslld $1,%xmm4 | |
3241 leaq 16(%rax),%rax | |
3242 | |
3243 movdqa %xmm2,%xmm3 | |
3244 pslldq $4,%xmm2 | |
3245 pxor %xmm2,%xmm3 | |
3246 pslldq $4,%xmm2 | |
3247 pxor %xmm2,%xmm3 | |
3248 pslldq $4,%xmm2 | |
3249 pxor %xmm3,%xmm2 | |
3250 | |
3251 pxor %xmm2,%xmm0 | |
3252 movdqu %xmm0,-16(%rax) | |
3253 movdqa %xmm0,%xmm2 | |
3254 | |
3255 decl %r10d | |
3256 jnz .Loop_key128 | |
3257 | |
3258 movdqa .Lkey_rcon1b(%rip),%xmm4 | |
3259 | |
3260 .byte 102,15,56,0,197 | |
3261 .byte 102,15,56,221,196 | |
3262 pslld $1,%xmm4 | |
3263 | |
3264 movdqa %xmm2,%xmm3 | |
3265 pslldq $4,%xmm2 | |
3266 pxor %xmm2,%xmm3 | |
3267 pslldq $4,%xmm2 | |
3268 pxor %xmm2,%xmm3 | |
3269 pslldq $4,%xmm2 | |
3270 pxor %xmm3,%xmm2 | |
3271 | |
3272 pxor %xmm2,%xmm0 | |
3273 movdqu %xmm0,(%rax) | |
3274 | |
3275 movdqa %xmm0,%xmm2 | |
3276 .byte 102,15,56,0,197 | |
3277 .byte 102,15,56,221,196 | |
3278 | |
3279 movdqa %xmm2,%xmm3 | |
3280 pslldq $4,%xmm2 | |
3281 pxor %xmm2,%xmm3 | |
3282 pslldq $4,%xmm2 | |
3283 pxor %xmm2,%xmm3 | |
3284 pslldq $4,%xmm2 | |
3285 pxor %xmm3,%xmm2 | |
3286 | |
3287 pxor %xmm2,%xmm0 | |
3288 movdqu %xmm0,16(%rax) | |
3289 | |
3290 movl %esi,96(%rax) | |
3291 xorl %eax,%eax | |
3292 jmp .Lenc_key_ret | |
3293 | |
3294 .align 16 | |
3295 .L12rounds: | |
3296 movq 16(%rdi),%xmm2 | |
3297 movl $11,%esi | |
3298 cmpl $268435456,%r10d | |
3299 je .L12rounds_alt | |
3300 | |
3301 movups %xmm0,(%rdx) | |
3302 .byte 102,15,58,223,202,1 | |
3303 call .Lkey_expansion_192a_cold | |
3304 .byte 102,15,58,223,202,2 | |
3305 call .Lkey_expansion_192b | |
3306 .byte 102,15,58,223,202,4 | |
3307 call .Lkey_expansion_192a | |
3308 .byte 102,15,58,223,202,8 | |
3309 call .Lkey_expansion_192b | |
3310 .byte 102,15,58,223,202,16 | |
3311 call .Lkey_expansion_192a | |
3312 .byte 102,15,58,223,202,32 | |
3313 call .Lkey_expansion_192b | |
3314 .byte 102,15,58,223,202,64 | |
3315 call .Lkey_expansion_192a | |
3316 .byte 102,15,58,223,202,128 | |
3317 call .Lkey_expansion_192b | |
3318 movups %xmm0,(%rax) | |
3319 movl %esi,48(%rax) | |
3320 xorq %rax,%rax | |
3321 jmp .Lenc_key_ret | |
3322 | |
3323 .align 16 | |
3324 .L12rounds_alt: | |
3325 movdqa .Lkey_rotate192(%rip),%xmm5 | |
3326 movdqa .Lkey_rcon1(%rip),%xmm4 | |
3327 movl $8,%r10d | |
3328 movdqu %xmm0,(%rdx) | |
3329 jmp .Loop_key192 | |
3330 | |
3331 .align 16 | |
3332 .Loop_key192: | |
3333 movq %xmm2,0(%rax) | |
3334 movdqa %xmm2,%xmm1 | |
3335 .byte 102,15,56,0,213 | |
3336 .byte 102,15,56,221,212 | |
3337 pslld $1,%xmm4 | |
3338 leaq 24(%rax),%rax | |
3339 | |
3340 movdqa %xmm0,%xmm3 | |
3341 pslldq $4,%xmm0 | |
3342 pxor %xmm0,%xmm3 | |
3343 pslldq $4,%xmm0 | |
3344 pxor %xmm0,%xmm3 | |
3345 pslldq $4,%xmm0 | |
3346 pxor %xmm3,%xmm0 | |
3347 | |
3348 pshufd $0xff,%xmm0,%xmm3 | |
3349 pxor %xmm1,%xmm3 | |
3350 pslldq $4,%xmm1 | |
3351 pxor %xmm1,%xmm3 | |
3352 | |
3353 pxor %xmm2,%xmm0 | |
3354 pxor %xmm3,%xmm2 | |
3355 movdqu %xmm0,-16(%rax) | |
3356 | |
3357 decl %r10d | |
3358 jnz .Loop_key192 | |
3359 | |
3360 movl %esi,32(%rax) | |
3361 xorl %eax,%eax | |
3362 jmp .Lenc_key_ret | |
3363 | |
3364 .align 16 | |
3365 .L14rounds: | |
3366 movups 16(%rdi),%xmm2 | |
3367 movl $13,%esi | |
3368 leaq 16(%rax),%rax | |
3369 cmpl $268435456,%r10d | |
3370 je .L14rounds_alt | |
3371 | |
3372 movups %xmm0,(%rdx) | |
3373 movups %xmm2,16(%rdx) | |
3374 .byte 102,15,58,223,202,1 | |
3375 call .Lkey_expansion_256a_cold | |
3376 .byte 102,15,58,223,200,1 | |
3377 call .Lkey_expansion_256b | |
3378 .byte 102,15,58,223,202,2 | |
3379 call .Lkey_expansion_256a | |
3380 .byte 102,15,58,223,200,2 | |
3381 call .Lkey_expansion_256b | |
3382 .byte 102,15,58,223,202,4 | |
3383 call .Lkey_expansion_256a | |
3384 .byte 102,15,58,223,200,4 | |
3385 call .Lkey_expansion_256b | |
3386 .byte 102,15,58,223,202,8 | |
3387 call .Lkey_expansion_256a | |
3388 .byte 102,15,58,223,200,8 | |
3389 call .Lkey_expansion_256b | |
3390 .byte 102,15,58,223,202,16 | |
3391 call .Lkey_expansion_256a | |
3392 .byte 102,15,58,223,200,16 | |
3393 call .Lkey_expansion_256b | |
3394 .byte 102,15,58,223,202,32 | |
3395 call .Lkey_expansion_256a | |
3396 .byte 102,15,58,223,200,32 | |
3397 call .Lkey_expansion_256b | |
3398 .byte 102,15,58,223,202,64 | |
3399 call .Lkey_expansion_256a | |
3400 movups %xmm0,(%rax) | |
3401 movl %esi,16(%rax) | |
3402 xorq %rax,%rax | |
3403 jmp .Lenc_key_ret | |
3404 | |
3405 .align 16 | |
3406 .L14rounds_alt: | |
3407 movdqa .Lkey_rotate(%rip),%xmm5 | |
3408 movdqa .Lkey_rcon1(%rip),%xmm4 | |
3409 movl $7,%r10d | |
3410 movdqu %xmm0,0(%rdx) | |
3411 movdqa %xmm2,%xmm1 | |
3412 movdqu %xmm2,16(%rdx) | |
3413 jmp .Loop_key256 | |
3414 | |
3415 .align 16 | |
3416 .Loop_key256: | |
3417 .byte 102,15,56,0,213 | |
3418 .byte 102,15,56,221,212 | |
3419 | |
3420 movdqa %xmm0,%xmm3 | |
3421 pslldq $4,%xmm0 | |
3422 pxor %xmm0,%xmm3 | |
3423 pslldq $4,%xmm0 | |
3424 pxor %xmm0,%xmm3 | |
3425 pslldq $4,%xmm0 | |
3426 pxor %xmm3,%xmm0 | |
3427 pslld $1,%xmm4 | |
3428 | |
3429 pxor %xmm2,%xmm0 | |
3430 movdqu %xmm0,(%rax) | |
3431 | |
3432 decl %r10d | |
3433 jz .Ldone_key256 | |
3434 | |
3435 pshufd $0xff,%xmm0,%xmm2 | |
3436 pxor %xmm3,%xmm3 | |
3437 .byte 102,15,56,221,211 | |
3438 | |
3439 movdqa %xmm1,%xmm3 | |
3440 pslldq $4,%xmm1 | |
3441 pxor %xmm1,%xmm3 | |
3442 pslldq $4,%xmm1 | |
3443 pxor %xmm1,%xmm3 | |
3444 pslldq $4,%xmm1 | |
3445 pxor %xmm3,%xmm1 | |
3446 | |
3447 pxor %xmm1,%xmm2 | |
3448 movdqu %xmm2,16(%rax) | |
3449 leaq 32(%rax),%rax | |
3450 movdqa %xmm2,%xmm1 | |
3451 | |
3452 jmp .Loop_key256 | |
3453 | |
3454 .Ldone_key256: | |
3455 movl %esi,16(%rax) | |
3456 xorl %eax,%eax | |
3457 jmp .Lenc_key_ret | |
3458 | |
3459 .align 16 | |
3460 .Lbad_keybits: | |
3461 movq $-2,%rax | |
3462 .Lenc_key_ret: | |
3463 pxor %xmm0,%xmm0 | |
3464 pxor %xmm1,%xmm1 | |
3465 pxor %xmm2,%xmm2 | |
3466 pxor %xmm3,%xmm3 | |
3467 pxor %xmm4,%xmm4 | |
3468 pxor %xmm5,%xmm5 | |
3469 addq $8,%rsp | |
3470 .byte 0xf3,0xc3 | |
3471 .LSEH_end_set_encrypt_key: | |
3472 | |
3473 .align 16 | |
3474 .Lkey_expansion_128: | |
3475 movups %xmm0,(%rax) | |
3476 leaq 16(%rax),%rax | |
3477 .Lkey_expansion_128_cold: | |
3478 shufps $16,%xmm0,%xmm4 | |
3479 xorps %xmm4,%xmm0 | |
3480 shufps $140,%xmm0,%xmm4 | |
3481 xorps %xmm4,%xmm0 | |
3482 shufps $255,%xmm1,%xmm1 | |
3483 xorps %xmm1,%xmm0 | |
3484 .byte 0xf3,0xc3 | |
3485 | |
3486 .align 16 | |
3487 .Lkey_expansion_192a: | |
3488 movups %xmm0,(%rax) | |
3489 leaq 16(%rax),%rax | |
3490 .Lkey_expansion_192a_cold: | |
3491 movaps %xmm2,%xmm5 | |
3492 .Lkey_expansion_192b_warm: | |
3493 shufps $16,%xmm0,%xmm4 | |
3494 movdqa %xmm2,%xmm3 | |
3495 xorps %xmm4,%xmm0 | |
3496 shufps $140,%xmm0,%xmm4 | |
3497 pslldq $4,%xmm3 | |
3498 xorps %xmm4,%xmm0 | |
3499 pshufd $85,%xmm1,%xmm1 | |
3500 pxor %xmm3,%xmm2 | |
3501 pxor %xmm1,%xmm0 | |
3502 pshufd $255,%xmm0,%xmm3 | |
3503 pxor %xmm3,%xmm2 | |
3504 .byte 0xf3,0xc3 | |
3505 | |
3506 .align 16 | |
3507 .Lkey_expansion_192b: | |
3508 movaps %xmm0,%xmm3 | |
3509 shufps $68,%xmm0,%xmm5 | |
3510 movups %xmm5,(%rax) | |
3511 shufps $78,%xmm2,%xmm3 | |
3512 movups %xmm3,16(%rax) | |
3513 leaq 32(%rax),%rax | |
3514 jmp .Lkey_expansion_192b_warm | |
3515 | |
3516 .align 16 | |
3517 .Lkey_expansion_256a: | |
3518 movups %xmm2,(%rax) | |
3519 leaq 16(%rax),%rax | |
3520 .Lkey_expansion_256a_cold: | |
3521 shufps $16,%xmm0,%xmm4 | |
3522 xorps %xmm4,%xmm0 | |
3523 shufps $140,%xmm0,%xmm4 | |
3524 xorps %xmm4,%xmm0 | |
3525 shufps $255,%xmm1,%xmm1 | |
3526 xorps %xmm1,%xmm0 | |
3527 .byte 0xf3,0xc3 | |
3528 | |
3529 .align 16 | |
3530 .Lkey_expansion_256b: | |
3531 movups %xmm0,(%rax) | |
3532 leaq 16(%rax),%rax | |
3533 | |
3534 shufps $16,%xmm2,%xmm4 | |
3535 xorps %xmm4,%xmm2 | |
3536 shufps $140,%xmm2,%xmm4 | |
3537 xorps %xmm4,%xmm2 | |
3538 shufps $170,%xmm1,%xmm1 | |
3539 xorps %xmm1,%xmm2 | |
3540 .byte 0xf3,0xc3 | |
3541 .size aesni_set_encrypt_key,.-aesni_set_encrypt_key | |
3542 .size __aesni_set_encrypt_key,.-__aesni_set_encrypt_key | |
3543 .align 64 | |
3544 .Lbswap_mask: | |
3545 .byte 15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0 | |
3546 .Lincrement32: | |
3547 .long 6,6,6,0 | |
3548 .Lincrement64: | |
3549 .long 1,0,0,0 | |
3550 .Lxts_magic: | |
3551 .long 0x87,0,1,0 | |
3552 .Lincrement1: | |
3553 .byte 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1 | |
3554 .Lkey_rotate: | |
3555 .long 0x0c0f0e0d,0x0c0f0e0d,0x0c0f0e0d,0x0c0f0e0d | |
3556 .Lkey_rotate192: | |
3557 .long 0x04070605,0x04070605,0x04070605,0x04070605 | |
3558 .Lkey_rcon1: | |
3559 .long 1,1,1,1 | |
3560 .Lkey_rcon1b: | |
3561 .long 0x1b,0x1b,0x1b,0x1b | |
3562 | |
3563 .byte 65,69,83,32,102,111,114,32,73,110,116,101,108,32,65,69,83,45,78,73,44,32
,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101
,110,115,115,108,46,111,114,103,62,0 | |
3564 .align 64 | |
3565 #endif | |
OLD | NEW |