OLD | NEW |
| (Empty) |
1 #if defined(__i386__) | |
2 .file "ghash-x86.S" | |
3 .text | |
4 .globl gcm_gmult_4bit_x86 | |
5 .hidden gcm_gmult_4bit_x86 | |
6 .type gcm_gmult_4bit_x86,@function | |
7 .align 16 | |
8 gcm_gmult_4bit_x86: | |
9 .L_gcm_gmult_4bit_x86_begin: | |
10 pushl %ebp | |
11 pushl %ebx | |
12 pushl %esi | |
13 pushl %edi | |
14 subl $84,%esp | |
15 movl 104(%esp),%edi | |
16 movl 108(%esp),%esi | |
17 movl (%edi),%ebp | |
18 movl 4(%edi),%edx | |
19 movl 8(%edi),%ecx | |
20 movl 12(%edi),%ebx | |
21 movl $0,16(%esp) | |
22 movl $471859200,20(%esp) | |
23 movl $943718400,24(%esp) | |
24 movl $610271232,28(%esp) | |
25 movl $1887436800,32(%esp) | |
26 movl $1822425088,36(%esp) | |
27 movl $1220542464,40(%esp) | |
28 movl $1423966208,44(%esp) | |
29 movl $3774873600,48(%esp) | |
30 movl $4246732800,52(%esp) | |
31 movl $3644850176,56(%esp) | |
32 movl $3311403008,60(%esp) | |
33 movl $2441084928,64(%esp) | |
34 movl $2376073216,68(%esp) | |
35 movl $2847932416,72(%esp) | |
36 movl $3051356160,76(%esp) | |
37 movl %ebp,(%esp) | |
38 movl %edx,4(%esp) | |
39 movl %ecx,8(%esp) | |
40 movl %ebx,12(%esp) | |
41 shrl $20,%ebx | |
42 andl $240,%ebx | |
43 movl 4(%esi,%ebx,1),%ebp | |
44 movl (%esi,%ebx,1),%edx | |
45 movl 12(%esi,%ebx,1),%ecx | |
46 movl 8(%esi,%ebx,1),%ebx | |
47 xorl %eax,%eax | |
48 movl $15,%edi | |
49 jmp .L000x86_loop | |
50 .align 16 | |
51 .L000x86_loop: | |
52 movb %bl,%al | |
53 shrdl $4,%ecx,%ebx | |
54 andb $15,%al | |
55 shrdl $4,%edx,%ecx | |
56 shrdl $4,%ebp,%edx | |
57 shrl $4,%ebp | |
58 xorl 16(%esp,%eax,4),%ebp | |
59 movb (%esp,%edi,1),%al | |
60 andb $240,%al | |
61 xorl 8(%esi,%eax,1),%ebx | |
62 xorl 12(%esi,%eax,1),%ecx | |
63 xorl (%esi,%eax,1),%edx | |
64 xorl 4(%esi,%eax,1),%ebp | |
65 decl %edi | |
66 js .L001x86_break | |
67 movb %bl,%al | |
68 shrdl $4,%ecx,%ebx | |
69 andb $15,%al | |
70 shrdl $4,%edx,%ecx | |
71 shrdl $4,%ebp,%edx | |
72 shrl $4,%ebp | |
73 xorl 16(%esp,%eax,4),%ebp | |
74 movb (%esp,%edi,1),%al | |
75 shlb $4,%al | |
76 xorl 8(%esi,%eax,1),%ebx | |
77 xorl 12(%esi,%eax,1),%ecx | |
78 xorl (%esi,%eax,1),%edx | |
79 xorl 4(%esi,%eax,1),%ebp | |
80 jmp .L000x86_loop | |
81 .align 16 | |
82 .L001x86_break: | |
83 bswap %ebx | |
84 bswap %ecx | |
85 bswap %edx | |
86 bswap %ebp | |
87 movl 104(%esp),%edi | |
88 movl %ebx,12(%edi) | |
89 movl %ecx,8(%edi) | |
90 movl %edx,4(%edi) | |
91 movl %ebp,(%edi) | |
92 addl $84,%esp | |
93 popl %edi | |
94 popl %esi | |
95 popl %ebx | |
96 popl %ebp | |
97 ret | |
98 .size gcm_gmult_4bit_x86,.-.L_gcm_gmult_4bit_x86_begin | |
99 .globl gcm_ghash_4bit_x86 | |
100 .hidden gcm_ghash_4bit_x86 | |
101 .type gcm_ghash_4bit_x86,@function | |
102 .align 16 | |
103 gcm_ghash_4bit_x86: | |
104 .L_gcm_ghash_4bit_x86_begin: | |
105 pushl %ebp | |
106 pushl %ebx | |
107 pushl %esi | |
108 pushl %edi | |
109 subl $84,%esp | |
110 movl 104(%esp),%ebx | |
111 movl 108(%esp),%esi | |
112 movl 112(%esp),%edi | |
113 movl 116(%esp),%ecx | |
114 addl %edi,%ecx | |
115 movl %ecx,116(%esp) | |
116 movl (%ebx),%ebp | |
117 movl 4(%ebx),%edx | |
118 movl 8(%ebx),%ecx | |
119 movl 12(%ebx),%ebx | |
120 movl $0,16(%esp) | |
121 movl $471859200,20(%esp) | |
122 movl $943718400,24(%esp) | |
123 movl $610271232,28(%esp) | |
124 movl $1887436800,32(%esp) | |
125 movl $1822425088,36(%esp) | |
126 movl $1220542464,40(%esp) | |
127 movl $1423966208,44(%esp) | |
128 movl $3774873600,48(%esp) | |
129 movl $4246732800,52(%esp) | |
130 movl $3644850176,56(%esp) | |
131 movl $3311403008,60(%esp) | |
132 movl $2441084928,64(%esp) | |
133 movl $2376073216,68(%esp) | |
134 movl $2847932416,72(%esp) | |
135 movl $3051356160,76(%esp) | |
136 .align 16 | |
137 .L002x86_outer_loop: | |
138 xorl 12(%edi),%ebx | |
139 xorl 8(%edi),%ecx | |
140 xorl 4(%edi),%edx | |
141 xorl (%edi),%ebp | |
142 movl %ebx,12(%esp) | |
143 movl %ecx,8(%esp) | |
144 movl %edx,4(%esp) | |
145 movl %ebp,(%esp) | |
146 shrl $20,%ebx | |
147 andl $240,%ebx | |
148 movl 4(%esi,%ebx,1),%ebp | |
149 movl (%esi,%ebx,1),%edx | |
150 movl 12(%esi,%ebx,1),%ecx | |
151 movl 8(%esi,%ebx,1),%ebx | |
152 xorl %eax,%eax | |
153 movl $15,%edi | |
154 jmp .L003x86_loop | |
155 .align 16 | |
156 .L003x86_loop: | |
157 movb %bl,%al | |
158 shrdl $4,%ecx,%ebx | |
159 andb $15,%al | |
160 shrdl $4,%edx,%ecx | |
161 shrdl $4,%ebp,%edx | |
162 shrl $4,%ebp | |
163 xorl 16(%esp,%eax,4),%ebp | |
164 movb (%esp,%edi,1),%al | |
165 andb $240,%al | |
166 xorl 8(%esi,%eax,1),%ebx | |
167 xorl 12(%esi,%eax,1),%ecx | |
168 xorl (%esi,%eax,1),%edx | |
169 xorl 4(%esi,%eax,1),%ebp | |
170 decl %edi | |
171 js .L004x86_break | |
172 movb %bl,%al | |
173 shrdl $4,%ecx,%ebx | |
174 andb $15,%al | |
175 shrdl $4,%edx,%ecx | |
176 shrdl $4,%ebp,%edx | |
177 shrl $4,%ebp | |
178 xorl 16(%esp,%eax,4),%ebp | |
179 movb (%esp,%edi,1),%al | |
180 shlb $4,%al | |
181 xorl 8(%esi,%eax,1),%ebx | |
182 xorl 12(%esi,%eax,1),%ecx | |
183 xorl (%esi,%eax,1),%edx | |
184 xorl 4(%esi,%eax,1),%ebp | |
185 jmp .L003x86_loop | |
186 .align 16 | |
187 .L004x86_break: | |
188 bswap %ebx | |
189 bswap %ecx | |
190 bswap %edx | |
191 bswap %ebp | |
192 movl 112(%esp),%edi | |
193 leal 16(%edi),%edi | |
194 cmpl 116(%esp),%edi | |
195 movl %edi,112(%esp) | |
196 jb .L002x86_outer_loop | |
197 movl 104(%esp),%edi | |
198 movl %ebx,12(%edi) | |
199 movl %ecx,8(%edi) | |
200 movl %edx,4(%edi) | |
201 movl %ebp,(%edi) | |
202 addl $84,%esp | |
203 popl %edi | |
204 popl %esi | |
205 popl %ebx | |
206 popl %ebp | |
207 ret | |
208 .size gcm_ghash_4bit_x86,.-.L_gcm_ghash_4bit_x86_begin | |
209 .globl gcm_gmult_4bit_mmx | |
210 .hidden gcm_gmult_4bit_mmx | |
211 .type gcm_gmult_4bit_mmx,@function | |
212 .align 16 | |
213 gcm_gmult_4bit_mmx: | |
214 .L_gcm_gmult_4bit_mmx_begin: | |
215 pushl %ebp | |
216 pushl %ebx | |
217 pushl %esi | |
218 pushl %edi | |
219 movl 20(%esp),%edi | |
220 movl 24(%esp),%esi | |
221 call .L005pic_point | |
222 .L005pic_point: | |
223 popl %eax | |
224 leal .Lrem_4bit-.L005pic_point(%eax),%eax | |
225 movzbl 15(%edi),%ebx | |
226 xorl %ecx,%ecx | |
227 movl %ebx,%edx | |
228 movb %dl,%cl | |
229 movl $14,%ebp | |
230 shlb $4,%cl | |
231 andl $240,%edx | |
232 movq 8(%esi,%ecx,1),%mm0 | |
233 movq (%esi,%ecx,1),%mm1 | |
234 movd %mm0,%ebx | |
235 jmp .L006mmx_loop | |
236 .align 16 | |
237 .L006mmx_loop: | |
238 psrlq $4,%mm0 | |
239 andl $15,%ebx | |
240 movq %mm1,%mm2 | |
241 psrlq $4,%mm1 | |
242 pxor 8(%esi,%edx,1),%mm0 | |
243 movb (%edi,%ebp,1),%cl | |
244 psllq $60,%mm2 | |
245 pxor (%eax,%ebx,8),%mm1 | |
246 decl %ebp | |
247 movd %mm0,%ebx | |
248 pxor (%esi,%edx,1),%mm1 | |
249 movl %ecx,%edx | |
250 pxor %mm2,%mm0 | |
251 js .L007mmx_break | |
252 shlb $4,%cl | |
253 andl $15,%ebx | |
254 psrlq $4,%mm0 | |
255 andl $240,%edx | |
256 movq %mm1,%mm2 | |
257 psrlq $4,%mm1 | |
258 pxor 8(%esi,%ecx,1),%mm0 | |
259 psllq $60,%mm2 | |
260 pxor (%eax,%ebx,8),%mm1 | |
261 movd %mm0,%ebx | |
262 pxor (%esi,%ecx,1),%mm1 | |
263 pxor %mm2,%mm0 | |
264 jmp .L006mmx_loop | |
265 .align 16 | |
266 .L007mmx_break: | |
267 shlb $4,%cl | |
268 andl $15,%ebx | |
269 psrlq $4,%mm0 | |
270 andl $240,%edx | |
271 movq %mm1,%mm2 | |
272 psrlq $4,%mm1 | |
273 pxor 8(%esi,%ecx,1),%mm0 | |
274 psllq $60,%mm2 | |
275 pxor (%eax,%ebx,8),%mm1 | |
276 movd %mm0,%ebx | |
277 pxor (%esi,%ecx,1),%mm1 | |
278 pxor %mm2,%mm0 | |
279 psrlq $4,%mm0 | |
280 andl $15,%ebx | |
281 movq %mm1,%mm2 | |
282 psrlq $4,%mm1 | |
283 pxor 8(%esi,%edx,1),%mm0 | |
284 psllq $60,%mm2 | |
285 pxor (%eax,%ebx,8),%mm1 | |
286 movd %mm0,%ebx | |
287 pxor (%esi,%edx,1),%mm1 | |
288 pxor %mm2,%mm0 | |
289 psrlq $32,%mm0 | |
290 movd %mm1,%edx | |
291 psrlq $32,%mm1 | |
292 movd %mm0,%ecx | |
293 movd %mm1,%ebp | |
294 bswap %ebx | |
295 bswap %edx | |
296 bswap %ecx | |
297 bswap %ebp | |
298 emms | |
299 movl %ebx,12(%edi) | |
300 movl %edx,4(%edi) | |
301 movl %ecx,8(%edi) | |
302 movl %ebp,(%edi) | |
303 popl %edi | |
304 popl %esi | |
305 popl %ebx | |
306 popl %ebp | |
307 ret | |
308 .size gcm_gmult_4bit_mmx,.-.L_gcm_gmult_4bit_mmx_begin | |
309 .globl gcm_ghash_4bit_mmx | |
310 .hidden gcm_ghash_4bit_mmx | |
311 .type gcm_ghash_4bit_mmx,@function | |
312 .align 16 | |
313 gcm_ghash_4bit_mmx: | |
314 .L_gcm_ghash_4bit_mmx_begin: | |
315 pushl %ebp | |
316 pushl %ebx | |
317 pushl %esi | |
318 pushl %edi | |
319 movl 20(%esp),%eax | |
320 movl 24(%esp),%ebx | |
321 movl 28(%esp),%ecx | |
322 movl 32(%esp),%edx | |
323 movl %esp,%ebp | |
324 call .L008pic_point | |
325 .L008pic_point: | |
326 popl %esi | |
327 leal .Lrem_8bit-.L008pic_point(%esi),%esi | |
328 subl $544,%esp | |
329 andl $-64,%esp | |
330 subl $16,%esp | |
331 addl %ecx,%edx | |
332 movl %eax,544(%esp) | |
333 movl %edx,552(%esp) | |
334 movl %ebp,556(%esp) | |
335 addl $128,%ebx | |
336 leal 144(%esp),%edi | |
337 leal 400(%esp),%ebp | |
338 movl -120(%ebx),%edx | |
339 movq -120(%ebx),%mm0 | |
340 movq -128(%ebx),%mm3 | |
341 shll $4,%edx | |
342 movb %dl,(%esp) | |
343 movl -104(%ebx),%edx | |
344 movq -104(%ebx),%mm2 | |
345 movq -112(%ebx),%mm5 | |
346 movq %mm0,-128(%edi) | |
347 psrlq $4,%mm0 | |
348 movq %mm3,(%edi) | |
349 movq %mm3,%mm7 | |
350 psrlq $4,%mm3 | |
351 shll $4,%edx | |
352 movb %dl,1(%esp) | |
353 movl -88(%ebx),%edx | |
354 movq -88(%ebx),%mm1 | |
355 psllq $60,%mm7 | |
356 movq -96(%ebx),%mm4 | |
357 por %mm7,%mm0 | |
358 movq %mm2,-120(%edi) | |
359 psrlq $4,%mm2 | |
360 movq %mm5,8(%edi) | |
361 movq %mm5,%mm6 | |
362 movq %mm0,-128(%ebp) | |
363 psrlq $4,%mm5 | |
364 movq %mm3,(%ebp) | |
365 shll $4,%edx | |
366 movb %dl,2(%esp) | |
367 movl -72(%ebx),%edx | |
368 movq -72(%ebx),%mm0 | |
369 psllq $60,%mm6 | |
370 movq -80(%ebx),%mm3 | |
371 por %mm6,%mm2 | |
372 movq %mm1,-112(%edi) | |
373 psrlq $4,%mm1 | |
374 movq %mm4,16(%edi) | |
375 movq %mm4,%mm7 | |
376 movq %mm2,-120(%ebp) | |
377 psrlq $4,%mm4 | |
378 movq %mm5,8(%ebp) | |
379 shll $4,%edx | |
380 movb %dl,3(%esp) | |
381 movl -56(%ebx),%edx | |
382 movq -56(%ebx),%mm2 | |
383 psllq $60,%mm7 | |
384 movq -64(%ebx),%mm5 | |
385 por %mm7,%mm1 | |
386 movq %mm0,-104(%edi) | |
387 psrlq $4,%mm0 | |
388 movq %mm3,24(%edi) | |
389 movq %mm3,%mm6 | |
390 movq %mm1,-112(%ebp) | |
391 psrlq $4,%mm3 | |
392 movq %mm4,16(%ebp) | |
393 shll $4,%edx | |
394 movb %dl,4(%esp) | |
395 movl -40(%ebx),%edx | |
396 movq -40(%ebx),%mm1 | |
397 psllq $60,%mm6 | |
398 movq -48(%ebx),%mm4 | |
399 por %mm6,%mm0 | |
400 movq %mm2,-96(%edi) | |
401 psrlq $4,%mm2 | |
402 movq %mm5,32(%edi) | |
403 movq %mm5,%mm7 | |
404 movq %mm0,-104(%ebp) | |
405 psrlq $4,%mm5 | |
406 movq %mm3,24(%ebp) | |
407 shll $4,%edx | |
408 movb %dl,5(%esp) | |
409 movl -24(%ebx),%edx | |
410 movq -24(%ebx),%mm0 | |
411 psllq $60,%mm7 | |
412 movq -32(%ebx),%mm3 | |
413 por %mm7,%mm2 | |
414 movq %mm1,-88(%edi) | |
415 psrlq $4,%mm1 | |
416 movq %mm4,40(%edi) | |
417 movq %mm4,%mm6 | |
418 movq %mm2,-96(%ebp) | |
419 psrlq $4,%mm4 | |
420 movq %mm5,32(%ebp) | |
421 shll $4,%edx | |
422 movb %dl,6(%esp) | |
423 movl -8(%ebx),%edx | |
424 movq -8(%ebx),%mm2 | |
425 psllq $60,%mm6 | |
426 movq -16(%ebx),%mm5 | |
427 por %mm6,%mm1 | |
428 movq %mm0,-80(%edi) | |
429 psrlq $4,%mm0 | |
430 movq %mm3,48(%edi) | |
431 movq %mm3,%mm7 | |
432 movq %mm1,-88(%ebp) | |
433 psrlq $4,%mm3 | |
434 movq %mm4,40(%ebp) | |
435 shll $4,%edx | |
436 movb %dl,7(%esp) | |
437 movl 8(%ebx),%edx | |
438 movq 8(%ebx),%mm1 | |
439 psllq $60,%mm7 | |
440 movq (%ebx),%mm4 | |
441 por %mm7,%mm0 | |
442 movq %mm2,-72(%edi) | |
443 psrlq $4,%mm2 | |
444 movq %mm5,56(%edi) | |
445 movq %mm5,%mm6 | |
446 movq %mm0,-80(%ebp) | |
447 psrlq $4,%mm5 | |
448 movq %mm3,48(%ebp) | |
449 shll $4,%edx | |
450 movb %dl,8(%esp) | |
451 movl 24(%ebx),%edx | |
452 movq 24(%ebx),%mm0 | |
453 psllq $60,%mm6 | |
454 movq 16(%ebx),%mm3 | |
455 por %mm6,%mm2 | |
456 movq %mm1,-64(%edi) | |
457 psrlq $4,%mm1 | |
458 movq %mm4,64(%edi) | |
459 movq %mm4,%mm7 | |
460 movq %mm2,-72(%ebp) | |
461 psrlq $4,%mm4 | |
462 movq %mm5,56(%ebp) | |
463 shll $4,%edx | |
464 movb %dl,9(%esp) | |
465 movl 40(%ebx),%edx | |
466 movq 40(%ebx),%mm2 | |
467 psllq $60,%mm7 | |
468 movq 32(%ebx),%mm5 | |
469 por %mm7,%mm1 | |
470 movq %mm0,-56(%edi) | |
471 psrlq $4,%mm0 | |
472 movq %mm3,72(%edi) | |
473 movq %mm3,%mm6 | |
474 movq %mm1,-64(%ebp) | |
475 psrlq $4,%mm3 | |
476 movq %mm4,64(%ebp) | |
477 shll $4,%edx | |
478 movb %dl,10(%esp) | |
479 movl 56(%ebx),%edx | |
480 movq 56(%ebx),%mm1 | |
481 psllq $60,%mm6 | |
482 movq 48(%ebx),%mm4 | |
483 por %mm6,%mm0 | |
484 movq %mm2,-48(%edi) | |
485 psrlq $4,%mm2 | |
486 movq %mm5,80(%edi) | |
487 movq %mm5,%mm7 | |
488 movq %mm0,-56(%ebp) | |
489 psrlq $4,%mm5 | |
490 movq %mm3,72(%ebp) | |
491 shll $4,%edx | |
492 movb %dl,11(%esp) | |
493 movl 72(%ebx),%edx | |
494 movq 72(%ebx),%mm0 | |
495 psllq $60,%mm7 | |
496 movq 64(%ebx),%mm3 | |
497 por %mm7,%mm2 | |
498 movq %mm1,-40(%edi) | |
499 psrlq $4,%mm1 | |
500 movq %mm4,88(%edi) | |
501 movq %mm4,%mm6 | |
502 movq %mm2,-48(%ebp) | |
503 psrlq $4,%mm4 | |
504 movq %mm5,80(%ebp) | |
505 shll $4,%edx | |
506 movb %dl,12(%esp) | |
507 movl 88(%ebx),%edx | |
508 movq 88(%ebx),%mm2 | |
509 psllq $60,%mm6 | |
510 movq 80(%ebx),%mm5 | |
511 por %mm6,%mm1 | |
512 movq %mm0,-32(%edi) | |
513 psrlq $4,%mm0 | |
514 movq %mm3,96(%edi) | |
515 movq %mm3,%mm7 | |
516 movq %mm1,-40(%ebp) | |
517 psrlq $4,%mm3 | |
518 movq %mm4,88(%ebp) | |
519 shll $4,%edx | |
520 movb %dl,13(%esp) | |
521 movl 104(%ebx),%edx | |
522 movq 104(%ebx),%mm1 | |
523 psllq $60,%mm7 | |
524 movq 96(%ebx),%mm4 | |
525 por %mm7,%mm0 | |
526 movq %mm2,-24(%edi) | |
527 psrlq $4,%mm2 | |
528 movq %mm5,104(%edi) | |
529 movq %mm5,%mm6 | |
530 movq %mm0,-32(%ebp) | |
531 psrlq $4,%mm5 | |
532 movq %mm3,96(%ebp) | |
533 shll $4,%edx | |
534 movb %dl,14(%esp) | |
535 movl 120(%ebx),%edx | |
536 movq 120(%ebx),%mm0 | |
537 psllq $60,%mm6 | |
538 movq 112(%ebx),%mm3 | |
539 por %mm6,%mm2 | |
540 movq %mm1,-16(%edi) | |
541 psrlq $4,%mm1 | |
542 movq %mm4,112(%edi) | |
543 movq %mm4,%mm7 | |
544 movq %mm2,-24(%ebp) | |
545 psrlq $4,%mm4 | |
546 movq %mm5,104(%ebp) | |
547 shll $4,%edx | |
548 movb %dl,15(%esp) | |
549 psllq $60,%mm7 | |
550 por %mm7,%mm1 | |
551 movq %mm0,-8(%edi) | |
552 psrlq $4,%mm0 | |
553 movq %mm3,120(%edi) | |
554 movq %mm3,%mm6 | |
555 movq %mm1,-16(%ebp) | |
556 psrlq $4,%mm3 | |
557 movq %mm4,112(%ebp) | |
558 psllq $60,%mm6 | |
559 por %mm6,%mm0 | |
560 movq %mm0,-8(%ebp) | |
561 movq %mm3,120(%ebp) | |
562 movq (%eax),%mm6 | |
563 movl 8(%eax),%ebx | |
564 movl 12(%eax),%edx | |
565 .align 16 | |
566 .L009outer: | |
567 xorl 12(%ecx),%edx | |
568 xorl 8(%ecx),%ebx | |
569 pxor (%ecx),%mm6 | |
570 leal 16(%ecx),%ecx | |
571 movl %ebx,536(%esp) | |
572 movq %mm6,528(%esp) | |
573 movl %ecx,548(%esp) | |
574 xorl %eax,%eax | |
575 roll $8,%edx | |
576 movb %dl,%al | |
577 movl %eax,%ebp | |
578 andb $15,%al | |
579 shrl $4,%ebp | |
580 pxor %mm0,%mm0 | |
581 roll $8,%edx | |
582 pxor %mm1,%mm1 | |
583 pxor %mm2,%mm2 | |
584 movq 16(%esp,%eax,8),%mm7 | |
585 movq 144(%esp,%eax,8),%mm6 | |
586 movb %dl,%al | |
587 movd %mm7,%ebx | |
588 psrlq $8,%mm7 | |
589 movq %mm6,%mm3 | |
590 movl %eax,%edi | |
591 psrlq $8,%mm6 | |
592 pxor 272(%esp,%ebp,8),%mm7 | |
593 andb $15,%al | |
594 psllq $56,%mm3 | |
595 shrl $4,%edi | |
596 pxor 16(%esp,%eax,8),%mm7 | |
597 roll $8,%edx | |
598 pxor 144(%esp,%eax,8),%mm6 | |
599 pxor %mm3,%mm7 | |
600 pxor 400(%esp,%ebp,8),%mm6 | |
601 xorb (%esp,%ebp,1),%bl | |
602 movb %dl,%al | |
603 movd %mm7,%ecx | |
604 movzbl %bl,%ebx | |
605 psrlq $8,%mm7 | |
606 movq %mm6,%mm3 | |
607 movl %eax,%ebp | |
608 psrlq $8,%mm6 | |
609 pxor 272(%esp,%edi,8),%mm7 | |
610 andb $15,%al | |
611 psllq $56,%mm3 | |
612 shrl $4,%ebp | |
613 pinsrw $2,(%esi,%ebx,2),%mm2 | |
614 pxor 16(%esp,%eax,8),%mm7 | |
615 roll $8,%edx | |
616 pxor 144(%esp,%eax,8),%mm6 | |
617 pxor %mm3,%mm7 | |
618 pxor 400(%esp,%edi,8),%mm6 | |
619 xorb (%esp,%edi,1),%cl | |
620 movb %dl,%al | |
621 movl 536(%esp),%edx | |
622 movd %mm7,%ebx | |
623 movzbl %cl,%ecx | |
624 psrlq $8,%mm7 | |
625 movq %mm6,%mm3 | |
626 movl %eax,%edi | |
627 psrlq $8,%mm6 | |
628 pxor 272(%esp,%ebp,8),%mm7 | |
629 andb $15,%al | |
630 psllq $56,%mm3 | |
631 pxor %mm2,%mm6 | |
632 shrl $4,%edi | |
633 pinsrw $2,(%esi,%ecx,2),%mm1 | |
634 pxor 16(%esp,%eax,8),%mm7 | |
635 roll $8,%edx | |
636 pxor 144(%esp,%eax,8),%mm6 | |
637 pxor %mm3,%mm7 | |
638 pxor 400(%esp,%ebp,8),%mm6 | |
639 xorb (%esp,%ebp,1),%bl | |
640 movb %dl,%al | |
641 movd %mm7,%ecx | |
642 movzbl %bl,%ebx | |
643 psrlq $8,%mm7 | |
644 movq %mm6,%mm3 | |
645 movl %eax,%ebp | |
646 psrlq $8,%mm6 | |
647 pxor 272(%esp,%edi,8),%mm7 | |
648 andb $15,%al | |
649 psllq $56,%mm3 | |
650 pxor %mm1,%mm6 | |
651 shrl $4,%ebp | |
652 pinsrw $2,(%esi,%ebx,2),%mm0 | |
653 pxor 16(%esp,%eax,8),%mm7 | |
654 roll $8,%edx | |
655 pxor 144(%esp,%eax,8),%mm6 | |
656 pxor %mm3,%mm7 | |
657 pxor 400(%esp,%edi,8),%mm6 | |
658 xorb (%esp,%edi,1),%cl | |
659 movb %dl,%al | |
660 movd %mm7,%ebx | |
661 movzbl %cl,%ecx | |
662 psrlq $8,%mm7 | |
663 movq %mm6,%mm3 | |
664 movl %eax,%edi | |
665 psrlq $8,%mm6 | |
666 pxor 272(%esp,%ebp,8),%mm7 | |
667 andb $15,%al | |
668 psllq $56,%mm3 | |
669 pxor %mm0,%mm6 | |
670 shrl $4,%edi | |
671 pinsrw $2,(%esi,%ecx,2),%mm2 | |
672 pxor 16(%esp,%eax,8),%mm7 | |
673 roll $8,%edx | |
674 pxor 144(%esp,%eax,8),%mm6 | |
675 pxor %mm3,%mm7 | |
676 pxor 400(%esp,%ebp,8),%mm6 | |
677 xorb (%esp,%ebp,1),%bl | |
678 movb %dl,%al | |
679 movd %mm7,%ecx | |
680 movzbl %bl,%ebx | |
681 psrlq $8,%mm7 | |
682 movq %mm6,%mm3 | |
683 movl %eax,%ebp | |
684 psrlq $8,%mm6 | |
685 pxor 272(%esp,%edi,8),%mm7 | |
686 andb $15,%al | |
687 psllq $56,%mm3 | |
688 pxor %mm2,%mm6 | |
689 shrl $4,%ebp | |
690 pinsrw $2,(%esi,%ebx,2),%mm1 | |
691 pxor 16(%esp,%eax,8),%mm7 | |
692 roll $8,%edx | |
693 pxor 144(%esp,%eax,8),%mm6 | |
694 pxor %mm3,%mm7 | |
695 pxor 400(%esp,%edi,8),%mm6 | |
696 xorb (%esp,%edi,1),%cl | |
697 movb %dl,%al | |
698 movl 532(%esp),%edx | |
699 movd %mm7,%ebx | |
700 movzbl %cl,%ecx | |
701 psrlq $8,%mm7 | |
702 movq %mm6,%mm3 | |
703 movl %eax,%edi | |
704 psrlq $8,%mm6 | |
705 pxor 272(%esp,%ebp,8),%mm7 | |
706 andb $15,%al | |
707 psllq $56,%mm3 | |
708 pxor %mm1,%mm6 | |
709 shrl $4,%edi | |
710 pinsrw $2,(%esi,%ecx,2),%mm0 | |
711 pxor 16(%esp,%eax,8),%mm7 | |
712 roll $8,%edx | |
713 pxor 144(%esp,%eax,8),%mm6 | |
714 pxor %mm3,%mm7 | |
715 pxor 400(%esp,%ebp,8),%mm6 | |
716 xorb (%esp,%ebp,1),%bl | |
717 movb %dl,%al | |
718 movd %mm7,%ecx | |
719 movzbl %bl,%ebx | |
720 psrlq $8,%mm7 | |
721 movq %mm6,%mm3 | |
722 movl %eax,%ebp | |
723 psrlq $8,%mm6 | |
724 pxor 272(%esp,%edi,8),%mm7 | |
725 andb $15,%al | |
726 psllq $56,%mm3 | |
727 pxor %mm0,%mm6 | |
728 shrl $4,%ebp | |
729 pinsrw $2,(%esi,%ebx,2),%mm2 | |
730 pxor 16(%esp,%eax,8),%mm7 | |
731 roll $8,%edx | |
732 pxor 144(%esp,%eax,8),%mm6 | |
733 pxor %mm3,%mm7 | |
734 pxor 400(%esp,%edi,8),%mm6 | |
735 xorb (%esp,%edi,1),%cl | |
736 movb %dl,%al | |
737 movd %mm7,%ebx | |
738 movzbl %cl,%ecx | |
739 psrlq $8,%mm7 | |
740 movq %mm6,%mm3 | |
741 movl %eax,%edi | |
742 psrlq $8,%mm6 | |
743 pxor 272(%esp,%ebp,8),%mm7 | |
744 andb $15,%al | |
745 psllq $56,%mm3 | |
746 pxor %mm2,%mm6 | |
747 shrl $4,%edi | |
748 pinsrw $2,(%esi,%ecx,2),%mm1 | |
749 pxor 16(%esp,%eax,8),%mm7 | |
750 roll $8,%edx | |
751 pxor 144(%esp,%eax,8),%mm6 | |
752 pxor %mm3,%mm7 | |
753 pxor 400(%esp,%ebp,8),%mm6 | |
754 xorb (%esp,%ebp,1),%bl | |
755 movb %dl,%al | |
756 movd %mm7,%ecx | |
757 movzbl %bl,%ebx | |
758 psrlq $8,%mm7 | |
759 movq %mm6,%mm3 | |
760 movl %eax,%ebp | |
761 psrlq $8,%mm6 | |
762 pxor 272(%esp,%edi,8),%mm7 | |
763 andb $15,%al | |
764 psllq $56,%mm3 | |
765 pxor %mm1,%mm6 | |
766 shrl $4,%ebp | |
767 pinsrw $2,(%esi,%ebx,2),%mm0 | |
768 pxor 16(%esp,%eax,8),%mm7 | |
769 roll $8,%edx | |
770 pxor 144(%esp,%eax,8),%mm6 | |
771 pxor %mm3,%mm7 | |
772 pxor 400(%esp,%edi,8),%mm6 | |
773 xorb (%esp,%edi,1),%cl | |
774 movb %dl,%al | |
775 movl 528(%esp),%edx | |
776 movd %mm7,%ebx | |
777 movzbl %cl,%ecx | |
778 psrlq $8,%mm7 | |
779 movq %mm6,%mm3 | |
780 movl %eax,%edi | |
781 psrlq $8,%mm6 | |
782 pxor 272(%esp,%ebp,8),%mm7 | |
783 andb $15,%al | |
784 psllq $56,%mm3 | |
785 pxor %mm0,%mm6 | |
786 shrl $4,%edi | |
787 pinsrw $2,(%esi,%ecx,2),%mm2 | |
788 pxor 16(%esp,%eax,8),%mm7 | |
789 roll $8,%edx | |
790 pxor 144(%esp,%eax,8),%mm6 | |
791 pxor %mm3,%mm7 | |
792 pxor 400(%esp,%ebp,8),%mm6 | |
793 xorb (%esp,%ebp,1),%bl | |
794 movb %dl,%al | |
795 movd %mm7,%ecx | |
796 movzbl %bl,%ebx | |
797 psrlq $8,%mm7 | |
798 movq %mm6,%mm3 | |
799 movl %eax,%ebp | |
800 psrlq $8,%mm6 | |
801 pxor 272(%esp,%edi,8),%mm7 | |
802 andb $15,%al | |
803 psllq $56,%mm3 | |
804 pxor %mm2,%mm6 | |
805 shrl $4,%ebp | |
806 pinsrw $2,(%esi,%ebx,2),%mm1 | |
807 pxor 16(%esp,%eax,8),%mm7 | |
808 roll $8,%edx | |
809 pxor 144(%esp,%eax,8),%mm6 | |
810 pxor %mm3,%mm7 | |
811 pxor 400(%esp,%edi,8),%mm6 | |
812 xorb (%esp,%edi,1),%cl | |
813 movb %dl,%al | |
814 movd %mm7,%ebx | |
815 movzbl %cl,%ecx | |
816 psrlq $8,%mm7 | |
817 movq %mm6,%mm3 | |
818 movl %eax,%edi | |
819 psrlq $8,%mm6 | |
820 pxor 272(%esp,%ebp,8),%mm7 | |
821 andb $15,%al | |
822 psllq $56,%mm3 | |
823 pxor %mm1,%mm6 | |
824 shrl $4,%edi | |
825 pinsrw $2,(%esi,%ecx,2),%mm0 | |
826 pxor 16(%esp,%eax,8),%mm7 | |
827 roll $8,%edx | |
828 pxor 144(%esp,%eax,8),%mm6 | |
829 pxor %mm3,%mm7 | |
830 pxor 400(%esp,%ebp,8),%mm6 | |
831 xorb (%esp,%ebp,1),%bl | |
832 movb %dl,%al | |
833 movd %mm7,%ecx | |
834 movzbl %bl,%ebx | |
835 psrlq $8,%mm7 | |
836 movq %mm6,%mm3 | |
837 movl %eax,%ebp | |
838 psrlq $8,%mm6 | |
839 pxor 272(%esp,%edi,8),%mm7 | |
840 andb $15,%al | |
841 psllq $56,%mm3 | |
842 pxor %mm0,%mm6 | |
843 shrl $4,%ebp | |
844 pinsrw $2,(%esi,%ebx,2),%mm2 | |
845 pxor 16(%esp,%eax,8),%mm7 | |
846 roll $8,%edx | |
847 pxor 144(%esp,%eax,8),%mm6 | |
848 pxor %mm3,%mm7 | |
849 pxor 400(%esp,%edi,8),%mm6 | |
850 xorb (%esp,%edi,1),%cl | |
851 movb %dl,%al | |
852 movl 524(%esp),%edx | |
853 movd %mm7,%ebx | |
854 movzbl %cl,%ecx | |
855 psrlq $8,%mm7 | |
856 movq %mm6,%mm3 | |
857 movl %eax,%edi | |
858 psrlq $8,%mm6 | |
859 pxor 272(%esp,%ebp,8),%mm7 | |
860 andb $15,%al | |
861 psllq $56,%mm3 | |
862 pxor %mm2,%mm6 | |
863 shrl $4,%edi | |
864 pinsrw $2,(%esi,%ecx,2),%mm1 | |
865 pxor 16(%esp,%eax,8),%mm7 | |
866 pxor 144(%esp,%eax,8),%mm6 | |
867 xorb (%esp,%ebp,1),%bl | |
868 pxor %mm3,%mm7 | |
869 pxor 400(%esp,%ebp,8),%mm6 | |
870 movzbl %bl,%ebx | |
871 pxor %mm2,%mm2 | |
872 psllq $4,%mm1 | |
873 movd %mm7,%ecx | |
874 psrlq $4,%mm7 | |
875 movq %mm6,%mm3 | |
876 psrlq $4,%mm6 | |
877 shll $4,%ecx | |
878 pxor 16(%esp,%edi,8),%mm7 | |
879 psllq $60,%mm3 | |
880 movzbl %cl,%ecx | |
881 pxor %mm3,%mm7 | |
882 pxor 144(%esp,%edi,8),%mm6 | |
883 pinsrw $2,(%esi,%ebx,2),%mm0 | |
884 pxor %mm1,%mm6 | |
885 movd %mm7,%edx | |
886 pinsrw $3,(%esi,%ecx,2),%mm2 | |
887 psllq $12,%mm0 | |
888 pxor %mm0,%mm6 | |
889 psrlq $32,%mm7 | |
890 pxor %mm2,%mm6 | |
891 movl 548(%esp),%ecx | |
892 movd %mm7,%ebx | |
893 movq %mm6,%mm3 | |
894 psllw $8,%mm6 | |
895 psrlw $8,%mm3 | |
896 por %mm3,%mm6 | |
897 bswap %edx | |
898 pshufw $27,%mm6,%mm6 | |
899 bswap %ebx | |
900 cmpl 552(%esp),%ecx | |
901 jne .L009outer | |
902 movl 544(%esp),%eax | |
903 movl %edx,12(%eax) | |
904 movl %ebx,8(%eax) | |
905 movq %mm6,(%eax) | |
906 movl 556(%esp),%esp | |
907 emms | |
908 popl %edi | |
909 popl %esi | |
910 popl %ebx | |
911 popl %ebp | |
912 ret | |
913 .size gcm_ghash_4bit_mmx,.-.L_gcm_ghash_4bit_mmx_begin | |
914 .globl gcm_init_clmul | |
915 .hidden gcm_init_clmul | |
916 .type gcm_init_clmul,@function | |
917 .align 16 | |
918 gcm_init_clmul: | |
919 .L_gcm_init_clmul_begin: | |
920 movl 4(%esp),%edx | |
921 movl 8(%esp),%eax | |
922 call .L010pic | |
923 .L010pic: | |
924 popl %ecx | |
925 leal .Lbswap-.L010pic(%ecx),%ecx | |
926 movdqu (%eax),%xmm2 | |
927 pshufd $78,%xmm2,%xmm2 | |
928 pshufd $255,%xmm2,%xmm4 | |
929 movdqa %xmm2,%xmm3 | |
930 psllq $1,%xmm2 | |
931 pxor %xmm5,%xmm5 | |
932 psrlq $63,%xmm3 | |
933 pcmpgtd %xmm4,%xmm5 | |
934 pslldq $8,%xmm3 | |
935 por %xmm3,%xmm2 | |
936 pand 16(%ecx),%xmm5 | |
937 pxor %xmm5,%xmm2 | |
938 movdqa %xmm2,%xmm0 | |
939 movdqa %xmm0,%xmm1 | |
940 pshufd $78,%xmm0,%xmm3 | |
941 pshufd $78,%xmm2,%xmm4 | |
942 pxor %xmm0,%xmm3 | |
943 pxor %xmm2,%xmm4 | |
944 .byte 102,15,58,68,194,0 | |
945 .byte 102,15,58,68,202,17 | |
946 .byte 102,15,58,68,220,0 | |
947 xorps %xmm0,%xmm3 | |
948 xorps %xmm1,%xmm3 | |
949 movdqa %xmm3,%xmm4 | |
950 psrldq $8,%xmm3 | |
951 pslldq $8,%xmm4 | |
952 pxor %xmm3,%xmm1 | |
953 pxor %xmm4,%xmm0 | |
954 movdqa %xmm0,%xmm4 | |
955 movdqa %xmm0,%xmm3 | |
956 psllq $5,%xmm0 | |
957 pxor %xmm0,%xmm3 | |
958 psllq $1,%xmm0 | |
959 pxor %xmm3,%xmm0 | |
960 psllq $57,%xmm0 | |
961 movdqa %xmm0,%xmm3 | |
962 pslldq $8,%xmm0 | |
963 psrldq $8,%xmm3 | |
964 pxor %xmm4,%xmm0 | |
965 pxor %xmm3,%xmm1 | |
966 movdqa %xmm0,%xmm4 | |
967 psrlq $1,%xmm0 | |
968 pxor %xmm4,%xmm1 | |
969 pxor %xmm0,%xmm4 | |
970 psrlq $5,%xmm0 | |
971 pxor %xmm4,%xmm0 | |
972 psrlq $1,%xmm0 | |
973 pxor %xmm1,%xmm0 | |
974 pshufd $78,%xmm2,%xmm3 | |
975 pshufd $78,%xmm0,%xmm4 | |
976 pxor %xmm2,%xmm3 | |
977 movdqu %xmm2,(%edx) | |
978 pxor %xmm0,%xmm4 | |
979 movdqu %xmm0,16(%edx) | |
980 .byte 102,15,58,15,227,8 | |
981 movdqu %xmm4,32(%edx) | |
982 ret | |
983 .size gcm_init_clmul,.-.L_gcm_init_clmul_begin | |
984 .globl gcm_gmult_clmul | |
985 .hidden gcm_gmult_clmul | |
986 .type gcm_gmult_clmul,@function | |
987 .align 16 | |
988 gcm_gmult_clmul: | |
989 .L_gcm_gmult_clmul_begin: | |
990 movl 4(%esp),%eax | |
991 movl 8(%esp),%edx | |
992 call .L011pic | |
993 .L011pic: | |
994 popl %ecx | |
995 leal .Lbswap-.L011pic(%ecx),%ecx | |
996 movdqu (%eax),%xmm0 | |
997 movdqa (%ecx),%xmm5 | |
998 movups (%edx),%xmm2 | |
999 .byte 102,15,56,0,197 | |
1000 movups 32(%edx),%xmm4 | |
1001 movdqa %xmm0,%xmm1 | |
1002 pshufd $78,%xmm0,%xmm3 | |
1003 pxor %xmm0,%xmm3 | |
1004 .byte 102,15,58,68,194,0 | |
1005 .byte 102,15,58,68,202,17 | |
1006 .byte 102,15,58,68,220,0 | |
1007 xorps %xmm0,%xmm3 | |
1008 xorps %xmm1,%xmm3 | |
1009 movdqa %xmm3,%xmm4 | |
1010 psrldq $8,%xmm3 | |
1011 pslldq $8,%xmm4 | |
1012 pxor %xmm3,%xmm1 | |
1013 pxor %xmm4,%xmm0 | |
1014 movdqa %xmm0,%xmm4 | |
1015 movdqa %xmm0,%xmm3 | |
1016 psllq $5,%xmm0 | |
1017 pxor %xmm0,%xmm3 | |
1018 psllq $1,%xmm0 | |
1019 pxor %xmm3,%xmm0 | |
1020 psllq $57,%xmm0 | |
1021 movdqa %xmm0,%xmm3 | |
1022 pslldq $8,%xmm0 | |
1023 psrldq $8,%xmm3 | |
1024 pxor %xmm4,%xmm0 | |
1025 pxor %xmm3,%xmm1 | |
1026 movdqa %xmm0,%xmm4 | |
1027 psrlq $1,%xmm0 | |
1028 pxor %xmm4,%xmm1 | |
1029 pxor %xmm0,%xmm4 | |
1030 psrlq $5,%xmm0 | |
1031 pxor %xmm4,%xmm0 | |
1032 psrlq $1,%xmm0 | |
1033 pxor %xmm1,%xmm0 | |
1034 .byte 102,15,56,0,197 | |
1035 movdqu %xmm0,(%eax) | |
1036 ret | |
1037 .size gcm_gmult_clmul,.-.L_gcm_gmult_clmul_begin | |
1038 .globl gcm_ghash_clmul | |
1039 .hidden gcm_ghash_clmul | |
1040 .type gcm_ghash_clmul,@function | |
1041 .align 16 | |
1042 gcm_ghash_clmul: | |
1043 .L_gcm_ghash_clmul_begin: | |
1044 pushl %ebp | |
1045 pushl %ebx | |
1046 pushl %esi | |
1047 pushl %edi | |
1048 movl 20(%esp),%eax | |
1049 movl 24(%esp),%edx | |
1050 movl 28(%esp),%esi | |
1051 movl 32(%esp),%ebx | |
1052 call .L012pic | |
1053 .L012pic: | |
1054 popl %ecx | |
1055 leal .Lbswap-.L012pic(%ecx),%ecx | |
1056 movdqu (%eax),%xmm0 | |
1057 movdqa (%ecx),%xmm5 | |
1058 movdqu (%edx),%xmm2 | |
1059 .byte 102,15,56,0,197 | |
1060 subl $16,%ebx | |
1061 jz .L013odd_tail | |
1062 movdqu (%esi),%xmm3 | |
1063 movdqu 16(%esi),%xmm6 | |
1064 .byte 102,15,56,0,221 | |
1065 .byte 102,15,56,0,245 | |
1066 movdqu 32(%edx),%xmm5 | |
1067 pxor %xmm3,%xmm0 | |
1068 pshufd $78,%xmm6,%xmm3 | |
1069 movdqa %xmm6,%xmm7 | |
1070 pxor %xmm6,%xmm3 | |
1071 leal 32(%esi),%esi | |
1072 .byte 102,15,58,68,242,0 | |
1073 .byte 102,15,58,68,250,17 | |
1074 .byte 102,15,58,68,221,0 | |
1075 movups 16(%edx),%xmm2 | |
1076 nop | |
1077 subl $32,%ebx | |
1078 jbe .L014even_tail | |
1079 jmp .L015mod_loop | |
1080 .align 32 | |
1081 .L015mod_loop: | |
1082 pshufd $78,%xmm0,%xmm4 | |
1083 movdqa %xmm0,%xmm1 | |
1084 pxor %xmm0,%xmm4 | |
1085 nop | |
1086 .byte 102,15,58,68,194,0 | |
1087 .byte 102,15,58,68,202,17 | |
1088 .byte 102,15,58,68,229,16 | |
1089 movups (%edx),%xmm2 | |
1090 xorps %xmm6,%xmm0 | |
1091 movdqa (%ecx),%xmm5 | |
1092 xorps %xmm7,%xmm1 | |
1093 movdqu (%esi),%xmm7 | |
1094 pxor %xmm0,%xmm3 | |
1095 movdqu 16(%esi),%xmm6 | |
1096 pxor %xmm1,%xmm3 | |
1097 .byte 102,15,56,0,253 | |
1098 pxor %xmm3,%xmm4 | |
1099 movdqa %xmm4,%xmm3 | |
1100 psrldq $8,%xmm4 | |
1101 pslldq $8,%xmm3 | |
1102 pxor %xmm4,%xmm1 | |
1103 pxor %xmm3,%xmm0 | |
1104 .byte 102,15,56,0,245 | |
1105 pxor %xmm7,%xmm1 | |
1106 movdqa %xmm6,%xmm7 | |
1107 movdqa %xmm0,%xmm4 | |
1108 movdqa %xmm0,%xmm3 | |
1109 psllq $5,%xmm0 | |
1110 pxor %xmm0,%xmm3 | |
1111 psllq $1,%xmm0 | |
1112 pxor %xmm3,%xmm0 | |
1113 .byte 102,15,58,68,242,0 | |
1114 movups 32(%edx),%xmm5 | |
1115 psllq $57,%xmm0 | |
1116 movdqa %xmm0,%xmm3 | |
1117 pslldq $8,%xmm0 | |
1118 psrldq $8,%xmm3 | |
1119 pxor %xmm4,%xmm0 | |
1120 pxor %xmm3,%xmm1 | |
1121 pshufd $78,%xmm7,%xmm3 | |
1122 movdqa %xmm0,%xmm4 | |
1123 psrlq $1,%xmm0 | |
1124 pxor %xmm7,%xmm3 | |
1125 pxor %xmm4,%xmm1 | |
1126 .byte 102,15,58,68,250,17 | |
1127 movups 16(%edx),%xmm2 | |
1128 pxor %xmm0,%xmm4 | |
1129 psrlq $5,%xmm0 | |
1130 pxor %xmm4,%xmm0 | |
1131 psrlq $1,%xmm0 | |
1132 pxor %xmm1,%xmm0 | |
1133 .byte 102,15,58,68,221,0 | |
1134 leal 32(%esi),%esi | |
1135 subl $32,%ebx | |
1136 ja .L015mod_loop | |
1137 .L014even_tail: | |
1138 pshufd $78,%xmm0,%xmm4 | |
1139 movdqa %xmm0,%xmm1 | |
1140 pxor %xmm0,%xmm4 | |
1141 .byte 102,15,58,68,194,0 | |
1142 .byte 102,15,58,68,202,17 | |
1143 .byte 102,15,58,68,229,16 | |
1144 movdqa (%ecx),%xmm5 | |
1145 xorps %xmm6,%xmm0 | |
1146 xorps %xmm7,%xmm1 | |
1147 pxor %xmm0,%xmm3 | |
1148 pxor %xmm1,%xmm3 | |
1149 pxor %xmm3,%xmm4 | |
1150 movdqa %xmm4,%xmm3 | |
1151 psrldq $8,%xmm4 | |
1152 pslldq $8,%xmm3 | |
1153 pxor %xmm4,%xmm1 | |
1154 pxor %xmm3,%xmm0 | |
1155 movdqa %xmm0,%xmm4 | |
1156 movdqa %xmm0,%xmm3 | |
1157 psllq $5,%xmm0 | |
1158 pxor %xmm0,%xmm3 | |
1159 psllq $1,%xmm0 | |
1160 pxor %xmm3,%xmm0 | |
1161 psllq $57,%xmm0 | |
1162 movdqa %xmm0,%xmm3 | |
1163 pslldq $8,%xmm0 | |
1164 psrldq $8,%xmm3 | |
1165 pxor %xmm4,%xmm0 | |
1166 pxor %xmm3,%xmm1 | |
1167 movdqa %xmm0,%xmm4 | |
1168 psrlq $1,%xmm0 | |
1169 pxor %xmm4,%xmm1 | |
1170 pxor %xmm0,%xmm4 | |
1171 psrlq $5,%xmm0 | |
1172 pxor %xmm4,%xmm0 | |
1173 psrlq $1,%xmm0 | |
1174 pxor %xmm1,%xmm0 | |
1175 testl %ebx,%ebx | |
1176 jnz .L016done | |
1177 movups (%edx),%xmm2 | |
1178 .L013odd_tail: | |
1179 movdqu (%esi),%xmm3 | |
1180 .byte 102,15,56,0,221 | |
1181 pxor %xmm3,%xmm0 | |
1182 movdqa %xmm0,%xmm1 | |
1183 pshufd $78,%xmm0,%xmm3 | |
1184 pshufd $78,%xmm2,%xmm4 | |
1185 pxor %xmm0,%xmm3 | |
1186 pxor %xmm2,%xmm4 | |
1187 .byte 102,15,58,68,194,0 | |
1188 .byte 102,15,58,68,202,17 | |
1189 .byte 102,15,58,68,220,0 | |
1190 xorps %xmm0,%xmm3 | |
1191 xorps %xmm1,%xmm3 | |
1192 movdqa %xmm3,%xmm4 | |
1193 psrldq $8,%xmm3 | |
1194 pslldq $8,%xmm4 | |
1195 pxor %xmm3,%xmm1 | |
1196 pxor %xmm4,%xmm0 | |
1197 movdqa %xmm0,%xmm4 | |
1198 movdqa %xmm0,%xmm3 | |
1199 psllq $5,%xmm0 | |
1200 pxor %xmm0,%xmm3 | |
1201 psllq $1,%xmm0 | |
1202 pxor %xmm3,%xmm0 | |
1203 psllq $57,%xmm0 | |
1204 movdqa %xmm0,%xmm3 | |
1205 pslldq $8,%xmm0 | |
1206 psrldq $8,%xmm3 | |
1207 pxor %xmm4,%xmm0 | |
1208 pxor %xmm3,%xmm1 | |
1209 movdqa %xmm0,%xmm4 | |
1210 psrlq $1,%xmm0 | |
1211 pxor %xmm4,%xmm1 | |
1212 pxor %xmm0,%xmm4 | |
1213 psrlq $5,%xmm0 | |
1214 pxor %xmm4,%xmm0 | |
1215 psrlq $1,%xmm0 | |
1216 pxor %xmm1,%xmm0 | |
1217 .L016done: | |
1218 .byte 102,15,56,0,197 | |
1219 movdqu %xmm0,(%eax) | |
1220 popl %edi | |
1221 popl %esi | |
1222 popl %ebx | |
1223 popl %ebp | |
1224 ret | |
1225 .size gcm_ghash_clmul,.-.L_gcm_ghash_clmul_begin | |
1226 .align 64 | |
1227 .Lbswap: | |
1228 .byte 15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0 | |
1229 .byte 1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,194 | |
1230 .align 64 | |
1231 .Lrem_8bit: | |
1232 .value 0,450,900,582,1800,1738,1164,1358 | |
1233 .value 3600,4050,3476,3158,2328,2266,2716,2910 | |
1234 .value 7200,7650,8100,7782,6952,6890,6316,6510 | |
1235 .value 4656,5106,4532,4214,5432,5370,5820,6014 | |
1236 .value 14400,14722,15300,14854,16200,16010,15564,15630 | |
1237 .value 13904,14226,13780,13334,12632,12442,13020,13086 | |
1238 .value 9312,9634,10212,9766,9064,8874,8428,8494 | |
1239 .value 10864,11186,10740,10294,11640,11450,12028,12094 | |
1240 .value 28800,28994,29444,29382,30600,30282,29708,30158 | |
1241 .value 32400,32594,32020,31958,31128,30810,31260,31710 | |
1242 .value 27808,28002,28452,28390,27560,27242,26668,27118 | |
1243 .value 25264,25458,24884,24822,26040,25722,26172,26622 | |
1244 .value 18624,18690,19268,19078,20424,19978,19532,19854 | |
1245 .value 18128,18194,17748,17558,16856,16410,16988,17310 | |
1246 .value 21728,21794,22372,22182,21480,21034,20588,20910 | |
1247 .value 23280,23346,22900,22710,24056,23610,24188,24510 | |
1248 .value 57600,57538,57988,58182,58888,59338,58764,58446 | |
1249 .value 61200,61138,60564,60758,59416,59866,60316,59998 | |
1250 .value 64800,64738,65188,65382,64040,64490,63916,63598 | |
1251 .value 62256,62194,61620,61814,62520,62970,63420,63102 | |
1252 .value 55616,55426,56004,56070,56904,57226,56780,56334 | |
1253 .value 55120,54930,54484,54550,53336,53658,54236,53790 | |
1254 .value 50528,50338,50916,50982,49768,50090,49644,49198 | |
1255 .value 52080,51890,51444,51510,52344,52666,53244,52798 | |
1256 .value 37248,36930,37380,37830,38536,38730,38156,38094 | |
1257 .value 40848,40530,39956,40406,39064,39258,39708,39646 | |
1258 .value 36256,35938,36388,36838,35496,35690,35116,35054 | |
1259 .value 33712,33394,32820,33270,33976,34170,34620,34558 | |
1260 .value 43456,43010,43588,43910,44744,44810,44364,44174 | |
1261 .value 42960,42514,42068,42390,41176,41242,41820,41630 | |
1262 .value 46560,46114,46692,47014,45800,45866,45420,45230 | |
1263 .value 48112,47666,47220,47542,48376,48442,49020,48830 | |
1264 .align 64 | |
1265 .Lrem_4bit: | |
1266 .long 0,0,0,471859200,0,943718400,0,610271232 | |
1267 .long 0,1887436800,0,1822425088,0,1220542464,0,1423966208 | |
1268 .long 0,3774873600,0,4246732800,0,3644850176,0,3311403008 | |
1269 .long 0,2441084928,0,2376073216,0,2847932416,0,3051356160 | |
1270 .byte 71,72,65,83,72,32,102,111,114,32,120,56,54,44,32,67 | |
1271 .byte 82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112 | |
1272 .byte 112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62 | |
1273 .byte 0 | |
1274 #endif | |
OLD | NEW |