OLD | NEW |
| (Empty) |
1 #if defined(__i386__) | |
2 .file "ghash-x86.S" | |
3 .text | |
4 .globl _gcm_gmult_4bit_x86 | |
5 .private_extern _gcm_gmult_4bit_x86 | |
6 .align 4 | |
7 _gcm_gmult_4bit_x86: | |
8 L_gcm_gmult_4bit_x86_begin: | |
9 pushl %ebp | |
10 pushl %ebx | |
11 pushl %esi | |
12 pushl %edi | |
13 subl $84,%esp | |
14 movl 104(%esp),%edi | |
15 movl 108(%esp),%esi | |
16 movl (%edi),%ebp | |
17 movl 4(%edi),%edx | |
18 movl 8(%edi),%ecx | |
19 movl 12(%edi),%ebx | |
20 movl $0,16(%esp) | |
21 movl $471859200,20(%esp) | |
22 movl $943718400,24(%esp) | |
23 movl $610271232,28(%esp) | |
24 movl $1887436800,32(%esp) | |
25 movl $1822425088,36(%esp) | |
26 movl $1220542464,40(%esp) | |
27 movl $1423966208,44(%esp) | |
28 movl $3774873600,48(%esp) | |
29 movl $4246732800,52(%esp) | |
30 movl $3644850176,56(%esp) | |
31 movl $3311403008,60(%esp) | |
32 movl $2441084928,64(%esp) | |
33 movl $2376073216,68(%esp) | |
34 movl $2847932416,72(%esp) | |
35 movl $3051356160,76(%esp) | |
36 movl %ebp,(%esp) | |
37 movl %edx,4(%esp) | |
38 movl %ecx,8(%esp) | |
39 movl %ebx,12(%esp) | |
40 shrl $20,%ebx | |
41 andl $240,%ebx | |
42 movl 4(%esi,%ebx,1),%ebp | |
43 movl (%esi,%ebx,1),%edx | |
44 movl 12(%esi,%ebx,1),%ecx | |
45 movl 8(%esi,%ebx,1),%ebx | |
46 xorl %eax,%eax | |
47 movl $15,%edi | |
48 jmp L000x86_loop | |
49 .align 4,0x90 | |
50 L000x86_loop: | |
51 movb %bl,%al | |
52 shrdl $4,%ecx,%ebx | |
53 andb $15,%al | |
54 shrdl $4,%edx,%ecx | |
55 shrdl $4,%ebp,%edx | |
56 shrl $4,%ebp | |
57 xorl 16(%esp,%eax,4),%ebp | |
58 movb (%esp,%edi,1),%al | |
59 andb $240,%al | |
60 xorl 8(%esi,%eax,1),%ebx | |
61 xorl 12(%esi,%eax,1),%ecx | |
62 xorl (%esi,%eax,1),%edx | |
63 xorl 4(%esi,%eax,1),%ebp | |
64 decl %edi | |
65 js L001x86_break | |
66 movb %bl,%al | |
67 shrdl $4,%ecx,%ebx | |
68 andb $15,%al | |
69 shrdl $4,%edx,%ecx | |
70 shrdl $4,%ebp,%edx | |
71 shrl $4,%ebp | |
72 xorl 16(%esp,%eax,4),%ebp | |
73 movb (%esp,%edi,1),%al | |
74 shlb $4,%al | |
75 xorl 8(%esi,%eax,1),%ebx | |
76 xorl 12(%esi,%eax,1),%ecx | |
77 xorl (%esi,%eax,1),%edx | |
78 xorl 4(%esi,%eax,1),%ebp | |
79 jmp L000x86_loop | |
80 .align 4,0x90 | |
81 L001x86_break: | |
82 bswap %ebx | |
83 bswap %ecx | |
84 bswap %edx | |
85 bswap %ebp | |
86 movl 104(%esp),%edi | |
87 movl %ebx,12(%edi) | |
88 movl %ecx,8(%edi) | |
89 movl %edx,4(%edi) | |
90 movl %ebp,(%edi) | |
91 addl $84,%esp | |
92 popl %edi | |
93 popl %esi | |
94 popl %ebx | |
95 popl %ebp | |
96 ret | |
97 .globl _gcm_ghash_4bit_x86 | |
98 .private_extern _gcm_ghash_4bit_x86 | |
99 .align 4 | |
100 _gcm_ghash_4bit_x86: | |
101 L_gcm_ghash_4bit_x86_begin: | |
102 pushl %ebp | |
103 pushl %ebx | |
104 pushl %esi | |
105 pushl %edi | |
106 subl $84,%esp | |
107 movl 104(%esp),%ebx | |
108 movl 108(%esp),%esi | |
109 movl 112(%esp),%edi | |
110 movl 116(%esp),%ecx | |
111 addl %edi,%ecx | |
112 movl %ecx,116(%esp) | |
113 movl (%ebx),%ebp | |
114 movl 4(%ebx),%edx | |
115 movl 8(%ebx),%ecx | |
116 movl 12(%ebx),%ebx | |
117 movl $0,16(%esp) | |
118 movl $471859200,20(%esp) | |
119 movl $943718400,24(%esp) | |
120 movl $610271232,28(%esp) | |
121 movl $1887436800,32(%esp) | |
122 movl $1822425088,36(%esp) | |
123 movl $1220542464,40(%esp) | |
124 movl $1423966208,44(%esp) | |
125 movl $3774873600,48(%esp) | |
126 movl $4246732800,52(%esp) | |
127 movl $3644850176,56(%esp) | |
128 movl $3311403008,60(%esp) | |
129 movl $2441084928,64(%esp) | |
130 movl $2376073216,68(%esp) | |
131 movl $2847932416,72(%esp) | |
132 movl $3051356160,76(%esp) | |
133 .align 4,0x90 | |
134 L002x86_outer_loop: | |
135 xorl 12(%edi),%ebx | |
136 xorl 8(%edi),%ecx | |
137 xorl 4(%edi),%edx | |
138 xorl (%edi),%ebp | |
139 movl %ebx,12(%esp) | |
140 movl %ecx,8(%esp) | |
141 movl %edx,4(%esp) | |
142 movl %ebp,(%esp) | |
143 shrl $20,%ebx | |
144 andl $240,%ebx | |
145 movl 4(%esi,%ebx,1),%ebp | |
146 movl (%esi,%ebx,1),%edx | |
147 movl 12(%esi,%ebx,1),%ecx | |
148 movl 8(%esi,%ebx,1),%ebx | |
149 xorl %eax,%eax | |
150 movl $15,%edi | |
151 jmp L003x86_loop | |
152 .align 4,0x90 | |
153 L003x86_loop: | |
154 movb %bl,%al | |
155 shrdl $4,%ecx,%ebx | |
156 andb $15,%al | |
157 shrdl $4,%edx,%ecx | |
158 shrdl $4,%ebp,%edx | |
159 shrl $4,%ebp | |
160 xorl 16(%esp,%eax,4),%ebp | |
161 movb (%esp,%edi,1),%al | |
162 andb $240,%al | |
163 xorl 8(%esi,%eax,1),%ebx | |
164 xorl 12(%esi,%eax,1),%ecx | |
165 xorl (%esi,%eax,1),%edx | |
166 xorl 4(%esi,%eax,1),%ebp | |
167 decl %edi | |
168 js L004x86_break | |
169 movb %bl,%al | |
170 shrdl $4,%ecx,%ebx | |
171 andb $15,%al | |
172 shrdl $4,%edx,%ecx | |
173 shrdl $4,%ebp,%edx | |
174 shrl $4,%ebp | |
175 xorl 16(%esp,%eax,4),%ebp | |
176 movb (%esp,%edi,1),%al | |
177 shlb $4,%al | |
178 xorl 8(%esi,%eax,1),%ebx | |
179 xorl 12(%esi,%eax,1),%ecx | |
180 xorl (%esi,%eax,1),%edx | |
181 xorl 4(%esi,%eax,1),%ebp | |
182 jmp L003x86_loop | |
183 .align 4,0x90 | |
184 L004x86_break: | |
185 bswap %ebx | |
186 bswap %ecx | |
187 bswap %edx | |
188 bswap %ebp | |
189 movl 112(%esp),%edi | |
190 leal 16(%edi),%edi | |
191 cmpl 116(%esp),%edi | |
192 movl %edi,112(%esp) | |
193 jb L002x86_outer_loop | |
194 movl 104(%esp),%edi | |
195 movl %ebx,12(%edi) | |
196 movl %ecx,8(%edi) | |
197 movl %edx,4(%edi) | |
198 movl %ebp,(%edi) | |
199 addl $84,%esp | |
200 popl %edi | |
201 popl %esi | |
202 popl %ebx | |
203 popl %ebp | |
204 ret | |
205 .globl _gcm_gmult_4bit_mmx | |
206 .private_extern _gcm_gmult_4bit_mmx | |
207 .align 4 | |
208 _gcm_gmult_4bit_mmx: | |
209 L_gcm_gmult_4bit_mmx_begin: | |
210 pushl %ebp | |
211 pushl %ebx | |
212 pushl %esi | |
213 pushl %edi | |
214 movl 20(%esp),%edi | |
215 movl 24(%esp),%esi | |
216 call L005pic_point | |
217 L005pic_point: | |
218 popl %eax | |
219 leal Lrem_4bit-L005pic_point(%eax),%eax | |
220 movzbl 15(%edi),%ebx | |
221 xorl %ecx,%ecx | |
222 movl %ebx,%edx | |
223 movb %dl,%cl | |
224 movl $14,%ebp | |
225 shlb $4,%cl | |
226 andl $240,%edx | |
227 movq 8(%esi,%ecx,1),%mm0 | |
228 movq (%esi,%ecx,1),%mm1 | |
229 movd %mm0,%ebx | |
230 jmp L006mmx_loop | |
231 .align 4,0x90 | |
232 L006mmx_loop: | |
233 psrlq $4,%mm0 | |
234 andl $15,%ebx | |
235 movq %mm1,%mm2 | |
236 psrlq $4,%mm1 | |
237 pxor 8(%esi,%edx,1),%mm0 | |
238 movb (%edi,%ebp,1),%cl | |
239 psllq $60,%mm2 | |
240 pxor (%eax,%ebx,8),%mm1 | |
241 decl %ebp | |
242 movd %mm0,%ebx | |
243 pxor (%esi,%edx,1),%mm1 | |
244 movl %ecx,%edx | |
245 pxor %mm2,%mm0 | |
246 js L007mmx_break | |
247 shlb $4,%cl | |
248 andl $15,%ebx | |
249 psrlq $4,%mm0 | |
250 andl $240,%edx | |
251 movq %mm1,%mm2 | |
252 psrlq $4,%mm1 | |
253 pxor 8(%esi,%ecx,1),%mm0 | |
254 psllq $60,%mm2 | |
255 pxor (%eax,%ebx,8),%mm1 | |
256 movd %mm0,%ebx | |
257 pxor (%esi,%ecx,1),%mm1 | |
258 pxor %mm2,%mm0 | |
259 jmp L006mmx_loop | |
260 .align 4,0x90 | |
261 L007mmx_break: | |
262 shlb $4,%cl | |
263 andl $15,%ebx | |
264 psrlq $4,%mm0 | |
265 andl $240,%edx | |
266 movq %mm1,%mm2 | |
267 psrlq $4,%mm1 | |
268 pxor 8(%esi,%ecx,1),%mm0 | |
269 psllq $60,%mm2 | |
270 pxor (%eax,%ebx,8),%mm1 | |
271 movd %mm0,%ebx | |
272 pxor (%esi,%ecx,1),%mm1 | |
273 pxor %mm2,%mm0 | |
274 psrlq $4,%mm0 | |
275 andl $15,%ebx | |
276 movq %mm1,%mm2 | |
277 psrlq $4,%mm1 | |
278 pxor 8(%esi,%edx,1),%mm0 | |
279 psllq $60,%mm2 | |
280 pxor (%eax,%ebx,8),%mm1 | |
281 movd %mm0,%ebx | |
282 pxor (%esi,%edx,1),%mm1 | |
283 pxor %mm2,%mm0 | |
284 psrlq $32,%mm0 | |
285 movd %mm1,%edx | |
286 psrlq $32,%mm1 | |
287 movd %mm0,%ecx | |
288 movd %mm1,%ebp | |
289 bswap %ebx | |
290 bswap %edx | |
291 bswap %ecx | |
292 bswap %ebp | |
293 emms | |
294 movl %ebx,12(%edi) | |
295 movl %edx,4(%edi) | |
296 movl %ecx,8(%edi) | |
297 movl %ebp,(%edi) | |
298 popl %edi | |
299 popl %esi | |
300 popl %ebx | |
301 popl %ebp | |
302 ret | |
303 .globl _gcm_ghash_4bit_mmx | |
304 .private_extern _gcm_ghash_4bit_mmx | |
305 .align 4 | |
306 _gcm_ghash_4bit_mmx: | |
307 L_gcm_ghash_4bit_mmx_begin: | |
308 pushl %ebp | |
309 pushl %ebx | |
310 pushl %esi | |
311 pushl %edi | |
312 movl 20(%esp),%eax | |
313 movl 24(%esp),%ebx | |
314 movl 28(%esp),%ecx | |
315 movl 32(%esp),%edx | |
316 movl %esp,%ebp | |
317 call L008pic_point | |
318 L008pic_point: | |
319 popl %esi | |
320 leal Lrem_8bit-L008pic_point(%esi),%esi | |
321 subl $544,%esp | |
322 andl $-64,%esp | |
323 subl $16,%esp | |
324 addl %ecx,%edx | |
325 movl %eax,544(%esp) | |
326 movl %edx,552(%esp) | |
327 movl %ebp,556(%esp) | |
328 addl $128,%ebx | |
329 leal 144(%esp),%edi | |
330 leal 400(%esp),%ebp | |
331 movl -120(%ebx),%edx | |
332 movq -120(%ebx),%mm0 | |
333 movq -128(%ebx),%mm3 | |
334 shll $4,%edx | |
335 movb %dl,(%esp) | |
336 movl -104(%ebx),%edx | |
337 movq -104(%ebx),%mm2 | |
338 movq -112(%ebx),%mm5 | |
339 movq %mm0,-128(%edi) | |
340 psrlq $4,%mm0 | |
341 movq %mm3,(%edi) | |
342 movq %mm3,%mm7 | |
343 psrlq $4,%mm3 | |
344 shll $4,%edx | |
345 movb %dl,1(%esp) | |
346 movl -88(%ebx),%edx | |
347 movq -88(%ebx),%mm1 | |
348 psllq $60,%mm7 | |
349 movq -96(%ebx),%mm4 | |
350 por %mm7,%mm0 | |
351 movq %mm2,-120(%edi) | |
352 psrlq $4,%mm2 | |
353 movq %mm5,8(%edi) | |
354 movq %mm5,%mm6 | |
355 movq %mm0,-128(%ebp) | |
356 psrlq $4,%mm5 | |
357 movq %mm3,(%ebp) | |
358 shll $4,%edx | |
359 movb %dl,2(%esp) | |
360 movl -72(%ebx),%edx | |
361 movq -72(%ebx),%mm0 | |
362 psllq $60,%mm6 | |
363 movq -80(%ebx),%mm3 | |
364 por %mm6,%mm2 | |
365 movq %mm1,-112(%edi) | |
366 psrlq $4,%mm1 | |
367 movq %mm4,16(%edi) | |
368 movq %mm4,%mm7 | |
369 movq %mm2,-120(%ebp) | |
370 psrlq $4,%mm4 | |
371 movq %mm5,8(%ebp) | |
372 shll $4,%edx | |
373 movb %dl,3(%esp) | |
374 movl -56(%ebx),%edx | |
375 movq -56(%ebx),%mm2 | |
376 psllq $60,%mm7 | |
377 movq -64(%ebx),%mm5 | |
378 por %mm7,%mm1 | |
379 movq %mm0,-104(%edi) | |
380 psrlq $4,%mm0 | |
381 movq %mm3,24(%edi) | |
382 movq %mm3,%mm6 | |
383 movq %mm1,-112(%ebp) | |
384 psrlq $4,%mm3 | |
385 movq %mm4,16(%ebp) | |
386 shll $4,%edx | |
387 movb %dl,4(%esp) | |
388 movl -40(%ebx),%edx | |
389 movq -40(%ebx),%mm1 | |
390 psllq $60,%mm6 | |
391 movq -48(%ebx),%mm4 | |
392 por %mm6,%mm0 | |
393 movq %mm2,-96(%edi) | |
394 psrlq $4,%mm2 | |
395 movq %mm5,32(%edi) | |
396 movq %mm5,%mm7 | |
397 movq %mm0,-104(%ebp) | |
398 psrlq $4,%mm5 | |
399 movq %mm3,24(%ebp) | |
400 shll $4,%edx | |
401 movb %dl,5(%esp) | |
402 movl -24(%ebx),%edx | |
403 movq -24(%ebx),%mm0 | |
404 psllq $60,%mm7 | |
405 movq -32(%ebx),%mm3 | |
406 por %mm7,%mm2 | |
407 movq %mm1,-88(%edi) | |
408 psrlq $4,%mm1 | |
409 movq %mm4,40(%edi) | |
410 movq %mm4,%mm6 | |
411 movq %mm2,-96(%ebp) | |
412 psrlq $4,%mm4 | |
413 movq %mm5,32(%ebp) | |
414 shll $4,%edx | |
415 movb %dl,6(%esp) | |
416 movl -8(%ebx),%edx | |
417 movq -8(%ebx),%mm2 | |
418 psllq $60,%mm6 | |
419 movq -16(%ebx),%mm5 | |
420 por %mm6,%mm1 | |
421 movq %mm0,-80(%edi) | |
422 psrlq $4,%mm0 | |
423 movq %mm3,48(%edi) | |
424 movq %mm3,%mm7 | |
425 movq %mm1,-88(%ebp) | |
426 psrlq $4,%mm3 | |
427 movq %mm4,40(%ebp) | |
428 shll $4,%edx | |
429 movb %dl,7(%esp) | |
430 movl 8(%ebx),%edx | |
431 movq 8(%ebx),%mm1 | |
432 psllq $60,%mm7 | |
433 movq (%ebx),%mm4 | |
434 por %mm7,%mm0 | |
435 movq %mm2,-72(%edi) | |
436 psrlq $4,%mm2 | |
437 movq %mm5,56(%edi) | |
438 movq %mm5,%mm6 | |
439 movq %mm0,-80(%ebp) | |
440 psrlq $4,%mm5 | |
441 movq %mm3,48(%ebp) | |
442 shll $4,%edx | |
443 movb %dl,8(%esp) | |
444 movl 24(%ebx),%edx | |
445 movq 24(%ebx),%mm0 | |
446 psllq $60,%mm6 | |
447 movq 16(%ebx),%mm3 | |
448 por %mm6,%mm2 | |
449 movq %mm1,-64(%edi) | |
450 psrlq $4,%mm1 | |
451 movq %mm4,64(%edi) | |
452 movq %mm4,%mm7 | |
453 movq %mm2,-72(%ebp) | |
454 psrlq $4,%mm4 | |
455 movq %mm5,56(%ebp) | |
456 shll $4,%edx | |
457 movb %dl,9(%esp) | |
458 movl 40(%ebx),%edx | |
459 movq 40(%ebx),%mm2 | |
460 psllq $60,%mm7 | |
461 movq 32(%ebx),%mm5 | |
462 por %mm7,%mm1 | |
463 movq %mm0,-56(%edi) | |
464 psrlq $4,%mm0 | |
465 movq %mm3,72(%edi) | |
466 movq %mm3,%mm6 | |
467 movq %mm1,-64(%ebp) | |
468 psrlq $4,%mm3 | |
469 movq %mm4,64(%ebp) | |
470 shll $4,%edx | |
471 movb %dl,10(%esp) | |
472 movl 56(%ebx),%edx | |
473 movq 56(%ebx),%mm1 | |
474 psllq $60,%mm6 | |
475 movq 48(%ebx),%mm4 | |
476 por %mm6,%mm0 | |
477 movq %mm2,-48(%edi) | |
478 psrlq $4,%mm2 | |
479 movq %mm5,80(%edi) | |
480 movq %mm5,%mm7 | |
481 movq %mm0,-56(%ebp) | |
482 psrlq $4,%mm5 | |
483 movq %mm3,72(%ebp) | |
484 shll $4,%edx | |
485 movb %dl,11(%esp) | |
486 movl 72(%ebx),%edx | |
487 movq 72(%ebx),%mm0 | |
488 psllq $60,%mm7 | |
489 movq 64(%ebx),%mm3 | |
490 por %mm7,%mm2 | |
491 movq %mm1,-40(%edi) | |
492 psrlq $4,%mm1 | |
493 movq %mm4,88(%edi) | |
494 movq %mm4,%mm6 | |
495 movq %mm2,-48(%ebp) | |
496 psrlq $4,%mm4 | |
497 movq %mm5,80(%ebp) | |
498 shll $4,%edx | |
499 movb %dl,12(%esp) | |
500 movl 88(%ebx),%edx | |
501 movq 88(%ebx),%mm2 | |
502 psllq $60,%mm6 | |
503 movq 80(%ebx),%mm5 | |
504 por %mm6,%mm1 | |
505 movq %mm0,-32(%edi) | |
506 psrlq $4,%mm0 | |
507 movq %mm3,96(%edi) | |
508 movq %mm3,%mm7 | |
509 movq %mm1,-40(%ebp) | |
510 psrlq $4,%mm3 | |
511 movq %mm4,88(%ebp) | |
512 shll $4,%edx | |
513 movb %dl,13(%esp) | |
514 movl 104(%ebx),%edx | |
515 movq 104(%ebx),%mm1 | |
516 psllq $60,%mm7 | |
517 movq 96(%ebx),%mm4 | |
518 por %mm7,%mm0 | |
519 movq %mm2,-24(%edi) | |
520 psrlq $4,%mm2 | |
521 movq %mm5,104(%edi) | |
522 movq %mm5,%mm6 | |
523 movq %mm0,-32(%ebp) | |
524 psrlq $4,%mm5 | |
525 movq %mm3,96(%ebp) | |
526 shll $4,%edx | |
527 movb %dl,14(%esp) | |
528 movl 120(%ebx),%edx | |
529 movq 120(%ebx),%mm0 | |
530 psllq $60,%mm6 | |
531 movq 112(%ebx),%mm3 | |
532 por %mm6,%mm2 | |
533 movq %mm1,-16(%edi) | |
534 psrlq $4,%mm1 | |
535 movq %mm4,112(%edi) | |
536 movq %mm4,%mm7 | |
537 movq %mm2,-24(%ebp) | |
538 psrlq $4,%mm4 | |
539 movq %mm5,104(%ebp) | |
540 shll $4,%edx | |
541 movb %dl,15(%esp) | |
542 psllq $60,%mm7 | |
543 por %mm7,%mm1 | |
544 movq %mm0,-8(%edi) | |
545 psrlq $4,%mm0 | |
546 movq %mm3,120(%edi) | |
547 movq %mm3,%mm6 | |
548 movq %mm1,-16(%ebp) | |
549 psrlq $4,%mm3 | |
550 movq %mm4,112(%ebp) | |
551 psllq $60,%mm6 | |
552 por %mm6,%mm0 | |
553 movq %mm0,-8(%ebp) | |
554 movq %mm3,120(%ebp) | |
555 movq (%eax),%mm6 | |
556 movl 8(%eax),%ebx | |
557 movl 12(%eax),%edx | |
558 .align 4,0x90 | |
559 L009outer: | |
560 xorl 12(%ecx),%edx | |
561 xorl 8(%ecx),%ebx | |
562 pxor (%ecx),%mm6 | |
563 leal 16(%ecx),%ecx | |
564 movl %ebx,536(%esp) | |
565 movq %mm6,528(%esp) | |
566 movl %ecx,548(%esp) | |
567 xorl %eax,%eax | |
568 roll $8,%edx | |
569 movb %dl,%al | |
570 movl %eax,%ebp | |
571 andb $15,%al | |
572 shrl $4,%ebp | |
573 pxor %mm0,%mm0 | |
574 roll $8,%edx | |
575 pxor %mm1,%mm1 | |
576 pxor %mm2,%mm2 | |
577 movq 16(%esp,%eax,8),%mm7 | |
578 movq 144(%esp,%eax,8),%mm6 | |
579 movb %dl,%al | |
580 movd %mm7,%ebx | |
581 psrlq $8,%mm7 | |
582 movq %mm6,%mm3 | |
583 movl %eax,%edi | |
584 psrlq $8,%mm6 | |
585 pxor 272(%esp,%ebp,8),%mm7 | |
586 andb $15,%al | |
587 psllq $56,%mm3 | |
588 shrl $4,%edi | |
589 pxor 16(%esp,%eax,8),%mm7 | |
590 roll $8,%edx | |
591 pxor 144(%esp,%eax,8),%mm6 | |
592 pxor %mm3,%mm7 | |
593 pxor 400(%esp,%ebp,8),%mm6 | |
594 xorb (%esp,%ebp,1),%bl | |
595 movb %dl,%al | |
596 movd %mm7,%ecx | |
597 movzbl %bl,%ebx | |
598 psrlq $8,%mm7 | |
599 movq %mm6,%mm3 | |
600 movl %eax,%ebp | |
601 psrlq $8,%mm6 | |
602 pxor 272(%esp,%edi,8),%mm7 | |
603 andb $15,%al | |
604 psllq $56,%mm3 | |
605 shrl $4,%ebp | |
606 pinsrw $2,(%esi,%ebx,2),%mm2 | |
607 pxor 16(%esp,%eax,8),%mm7 | |
608 roll $8,%edx | |
609 pxor 144(%esp,%eax,8),%mm6 | |
610 pxor %mm3,%mm7 | |
611 pxor 400(%esp,%edi,8),%mm6 | |
612 xorb (%esp,%edi,1),%cl | |
613 movb %dl,%al | |
614 movl 536(%esp),%edx | |
615 movd %mm7,%ebx | |
616 movzbl %cl,%ecx | |
617 psrlq $8,%mm7 | |
618 movq %mm6,%mm3 | |
619 movl %eax,%edi | |
620 psrlq $8,%mm6 | |
621 pxor 272(%esp,%ebp,8),%mm7 | |
622 andb $15,%al | |
623 psllq $56,%mm3 | |
624 pxor %mm2,%mm6 | |
625 shrl $4,%edi | |
626 pinsrw $2,(%esi,%ecx,2),%mm1 | |
627 pxor 16(%esp,%eax,8),%mm7 | |
628 roll $8,%edx | |
629 pxor 144(%esp,%eax,8),%mm6 | |
630 pxor %mm3,%mm7 | |
631 pxor 400(%esp,%ebp,8),%mm6 | |
632 xorb (%esp,%ebp,1),%bl | |
633 movb %dl,%al | |
634 movd %mm7,%ecx | |
635 movzbl %bl,%ebx | |
636 psrlq $8,%mm7 | |
637 movq %mm6,%mm3 | |
638 movl %eax,%ebp | |
639 psrlq $8,%mm6 | |
640 pxor 272(%esp,%edi,8),%mm7 | |
641 andb $15,%al | |
642 psllq $56,%mm3 | |
643 pxor %mm1,%mm6 | |
644 shrl $4,%ebp | |
645 pinsrw $2,(%esi,%ebx,2),%mm0 | |
646 pxor 16(%esp,%eax,8),%mm7 | |
647 roll $8,%edx | |
648 pxor 144(%esp,%eax,8),%mm6 | |
649 pxor %mm3,%mm7 | |
650 pxor 400(%esp,%edi,8),%mm6 | |
651 xorb (%esp,%edi,1),%cl | |
652 movb %dl,%al | |
653 movd %mm7,%ebx | |
654 movzbl %cl,%ecx | |
655 psrlq $8,%mm7 | |
656 movq %mm6,%mm3 | |
657 movl %eax,%edi | |
658 psrlq $8,%mm6 | |
659 pxor 272(%esp,%ebp,8),%mm7 | |
660 andb $15,%al | |
661 psllq $56,%mm3 | |
662 pxor %mm0,%mm6 | |
663 shrl $4,%edi | |
664 pinsrw $2,(%esi,%ecx,2),%mm2 | |
665 pxor 16(%esp,%eax,8),%mm7 | |
666 roll $8,%edx | |
667 pxor 144(%esp,%eax,8),%mm6 | |
668 pxor %mm3,%mm7 | |
669 pxor 400(%esp,%ebp,8),%mm6 | |
670 xorb (%esp,%ebp,1),%bl | |
671 movb %dl,%al | |
672 movd %mm7,%ecx | |
673 movzbl %bl,%ebx | |
674 psrlq $8,%mm7 | |
675 movq %mm6,%mm3 | |
676 movl %eax,%ebp | |
677 psrlq $8,%mm6 | |
678 pxor 272(%esp,%edi,8),%mm7 | |
679 andb $15,%al | |
680 psllq $56,%mm3 | |
681 pxor %mm2,%mm6 | |
682 shrl $4,%ebp | |
683 pinsrw $2,(%esi,%ebx,2),%mm1 | |
684 pxor 16(%esp,%eax,8),%mm7 | |
685 roll $8,%edx | |
686 pxor 144(%esp,%eax,8),%mm6 | |
687 pxor %mm3,%mm7 | |
688 pxor 400(%esp,%edi,8),%mm6 | |
689 xorb (%esp,%edi,1),%cl | |
690 movb %dl,%al | |
691 movl 532(%esp),%edx | |
692 movd %mm7,%ebx | |
693 movzbl %cl,%ecx | |
694 psrlq $8,%mm7 | |
695 movq %mm6,%mm3 | |
696 movl %eax,%edi | |
697 psrlq $8,%mm6 | |
698 pxor 272(%esp,%ebp,8),%mm7 | |
699 andb $15,%al | |
700 psllq $56,%mm3 | |
701 pxor %mm1,%mm6 | |
702 shrl $4,%edi | |
703 pinsrw $2,(%esi,%ecx,2),%mm0 | |
704 pxor 16(%esp,%eax,8),%mm7 | |
705 roll $8,%edx | |
706 pxor 144(%esp,%eax,8),%mm6 | |
707 pxor %mm3,%mm7 | |
708 pxor 400(%esp,%ebp,8),%mm6 | |
709 xorb (%esp,%ebp,1),%bl | |
710 movb %dl,%al | |
711 movd %mm7,%ecx | |
712 movzbl %bl,%ebx | |
713 psrlq $8,%mm7 | |
714 movq %mm6,%mm3 | |
715 movl %eax,%ebp | |
716 psrlq $8,%mm6 | |
717 pxor 272(%esp,%edi,8),%mm7 | |
718 andb $15,%al | |
719 psllq $56,%mm3 | |
720 pxor %mm0,%mm6 | |
721 shrl $4,%ebp | |
722 pinsrw $2,(%esi,%ebx,2),%mm2 | |
723 pxor 16(%esp,%eax,8),%mm7 | |
724 roll $8,%edx | |
725 pxor 144(%esp,%eax,8),%mm6 | |
726 pxor %mm3,%mm7 | |
727 pxor 400(%esp,%edi,8),%mm6 | |
728 xorb (%esp,%edi,1),%cl | |
729 movb %dl,%al | |
730 movd %mm7,%ebx | |
731 movzbl %cl,%ecx | |
732 psrlq $8,%mm7 | |
733 movq %mm6,%mm3 | |
734 movl %eax,%edi | |
735 psrlq $8,%mm6 | |
736 pxor 272(%esp,%ebp,8),%mm7 | |
737 andb $15,%al | |
738 psllq $56,%mm3 | |
739 pxor %mm2,%mm6 | |
740 shrl $4,%edi | |
741 pinsrw $2,(%esi,%ecx,2),%mm1 | |
742 pxor 16(%esp,%eax,8),%mm7 | |
743 roll $8,%edx | |
744 pxor 144(%esp,%eax,8),%mm6 | |
745 pxor %mm3,%mm7 | |
746 pxor 400(%esp,%ebp,8),%mm6 | |
747 xorb (%esp,%ebp,1),%bl | |
748 movb %dl,%al | |
749 movd %mm7,%ecx | |
750 movzbl %bl,%ebx | |
751 psrlq $8,%mm7 | |
752 movq %mm6,%mm3 | |
753 movl %eax,%ebp | |
754 psrlq $8,%mm6 | |
755 pxor 272(%esp,%edi,8),%mm7 | |
756 andb $15,%al | |
757 psllq $56,%mm3 | |
758 pxor %mm1,%mm6 | |
759 shrl $4,%ebp | |
760 pinsrw $2,(%esi,%ebx,2),%mm0 | |
761 pxor 16(%esp,%eax,8),%mm7 | |
762 roll $8,%edx | |
763 pxor 144(%esp,%eax,8),%mm6 | |
764 pxor %mm3,%mm7 | |
765 pxor 400(%esp,%edi,8),%mm6 | |
766 xorb (%esp,%edi,1),%cl | |
767 movb %dl,%al | |
768 movl 528(%esp),%edx | |
769 movd %mm7,%ebx | |
770 movzbl %cl,%ecx | |
771 psrlq $8,%mm7 | |
772 movq %mm6,%mm3 | |
773 movl %eax,%edi | |
774 psrlq $8,%mm6 | |
775 pxor 272(%esp,%ebp,8),%mm7 | |
776 andb $15,%al | |
777 psllq $56,%mm3 | |
778 pxor %mm0,%mm6 | |
779 shrl $4,%edi | |
780 pinsrw $2,(%esi,%ecx,2),%mm2 | |
781 pxor 16(%esp,%eax,8),%mm7 | |
782 roll $8,%edx | |
783 pxor 144(%esp,%eax,8),%mm6 | |
784 pxor %mm3,%mm7 | |
785 pxor 400(%esp,%ebp,8),%mm6 | |
786 xorb (%esp,%ebp,1),%bl | |
787 movb %dl,%al | |
788 movd %mm7,%ecx | |
789 movzbl %bl,%ebx | |
790 psrlq $8,%mm7 | |
791 movq %mm6,%mm3 | |
792 movl %eax,%ebp | |
793 psrlq $8,%mm6 | |
794 pxor 272(%esp,%edi,8),%mm7 | |
795 andb $15,%al | |
796 psllq $56,%mm3 | |
797 pxor %mm2,%mm6 | |
798 shrl $4,%ebp | |
799 pinsrw $2,(%esi,%ebx,2),%mm1 | |
800 pxor 16(%esp,%eax,8),%mm7 | |
801 roll $8,%edx | |
802 pxor 144(%esp,%eax,8),%mm6 | |
803 pxor %mm3,%mm7 | |
804 pxor 400(%esp,%edi,8),%mm6 | |
805 xorb (%esp,%edi,1),%cl | |
806 movb %dl,%al | |
807 movd %mm7,%ebx | |
808 movzbl %cl,%ecx | |
809 psrlq $8,%mm7 | |
810 movq %mm6,%mm3 | |
811 movl %eax,%edi | |
812 psrlq $8,%mm6 | |
813 pxor 272(%esp,%ebp,8),%mm7 | |
814 andb $15,%al | |
815 psllq $56,%mm3 | |
816 pxor %mm1,%mm6 | |
817 shrl $4,%edi | |
818 pinsrw $2,(%esi,%ecx,2),%mm0 | |
819 pxor 16(%esp,%eax,8),%mm7 | |
820 roll $8,%edx | |
821 pxor 144(%esp,%eax,8),%mm6 | |
822 pxor %mm3,%mm7 | |
823 pxor 400(%esp,%ebp,8),%mm6 | |
824 xorb (%esp,%ebp,1),%bl | |
825 movb %dl,%al | |
826 movd %mm7,%ecx | |
827 movzbl %bl,%ebx | |
828 psrlq $8,%mm7 | |
829 movq %mm6,%mm3 | |
830 movl %eax,%ebp | |
831 psrlq $8,%mm6 | |
832 pxor 272(%esp,%edi,8),%mm7 | |
833 andb $15,%al | |
834 psllq $56,%mm3 | |
835 pxor %mm0,%mm6 | |
836 shrl $4,%ebp | |
837 pinsrw $2,(%esi,%ebx,2),%mm2 | |
838 pxor 16(%esp,%eax,8),%mm7 | |
839 roll $8,%edx | |
840 pxor 144(%esp,%eax,8),%mm6 | |
841 pxor %mm3,%mm7 | |
842 pxor 400(%esp,%edi,8),%mm6 | |
843 xorb (%esp,%edi,1),%cl | |
844 movb %dl,%al | |
845 movl 524(%esp),%edx | |
846 movd %mm7,%ebx | |
847 movzbl %cl,%ecx | |
848 psrlq $8,%mm7 | |
849 movq %mm6,%mm3 | |
850 movl %eax,%edi | |
851 psrlq $8,%mm6 | |
852 pxor 272(%esp,%ebp,8),%mm7 | |
853 andb $15,%al | |
854 psllq $56,%mm3 | |
855 pxor %mm2,%mm6 | |
856 shrl $4,%edi | |
857 pinsrw $2,(%esi,%ecx,2),%mm1 | |
858 pxor 16(%esp,%eax,8),%mm7 | |
859 pxor 144(%esp,%eax,8),%mm6 | |
860 xorb (%esp,%ebp,1),%bl | |
861 pxor %mm3,%mm7 | |
862 pxor 400(%esp,%ebp,8),%mm6 | |
863 movzbl %bl,%ebx | |
864 pxor %mm2,%mm2 | |
865 psllq $4,%mm1 | |
866 movd %mm7,%ecx | |
867 psrlq $4,%mm7 | |
868 movq %mm6,%mm3 | |
869 psrlq $4,%mm6 | |
870 shll $4,%ecx | |
871 pxor 16(%esp,%edi,8),%mm7 | |
872 psllq $60,%mm3 | |
873 movzbl %cl,%ecx | |
874 pxor %mm3,%mm7 | |
875 pxor 144(%esp,%edi,8),%mm6 | |
876 pinsrw $2,(%esi,%ebx,2),%mm0 | |
877 pxor %mm1,%mm6 | |
878 movd %mm7,%edx | |
879 pinsrw $3,(%esi,%ecx,2),%mm2 | |
880 psllq $12,%mm0 | |
881 pxor %mm0,%mm6 | |
882 psrlq $32,%mm7 | |
883 pxor %mm2,%mm6 | |
884 movl 548(%esp),%ecx | |
885 movd %mm7,%ebx | |
886 movq %mm6,%mm3 | |
887 psllw $8,%mm6 | |
888 psrlw $8,%mm3 | |
889 por %mm3,%mm6 | |
890 bswap %edx | |
891 pshufw $27,%mm6,%mm6 | |
892 bswap %ebx | |
893 cmpl 552(%esp),%ecx | |
894 jne L009outer | |
895 movl 544(%esp),%eax | |
896 movl %edx,12(%eax) | |
897 movl %ebx,8(%eax) | |
898 movq %mm6,(%eax) | |
899 movl 556(%esp),%esp | |
900 emms | |
901 popl %edi | |
902 popl %esi | |
903 popl %ebx | |
904 popl %ebp | |
905 ret | |
906 .globl _gcm_init_clmul | |
907 .private_extern _gcm_init_clmul | |
908 .align 4 | |
909 _gcm_init_clmul: | |
910 L_gcm_init_clmul_begin: | |
911 movl 4(%esp),%edx | |
912 movl 8(%esp),%eax | |
913 call L010pic | |
914 L010pic: | |
915 popl %ecx | |
916 leal Lbswap-L010pic(%ecx),%ecx | |
917 movdqu (%eax),%xmm2 | |
918 pshufd $78,%xmm2,%xmm2 | |
919 pshufd $255,%xmm2,%xmm4 | |
920 movdqa %xmm2,%xmm3 | |
921 psllq $1,%xmm2 | |
922 pxor %xmm5,%xmm5 | |
923 psrlq $63,%xmm3 | |
924 pcmpgtd %xmm4,%xmm5 | |
925 pslldq $8,%xmm3 | |
926 por %xmm3,%xmm2 | |
927 pand 16(%ecx),%xmm5 | |
928 pxor %xmm5,%xmm2 | |
929 movdqa %xmm2,%xmm0 | |
930 movdqa %xmm0,%xmm1 | |
931 pshufd $78,%xmm0,%xmm3 | |
932 pshufd $78,%xmm2,%xmm4 | |
933 pxor %xmm0,%xmm3 | |
934 pxor %xmm2,%xmm4 | |
935 .byte 102,15,58,68,194,0 | |
936 .byte 102,15,58,68,202,17 | |
937 .byte 102,15,58,68,220,0 | |
938 xorps %xmm0,%xmm3 | |
939 xorps %xmm1,%xmm3 | |
940 movdqa %xmm3,%xmm4 | |
941 psrldq $8,%xmm3 | |
942 pslldq $8,%xmm4 | |
943 pxor %xmm3,%xmm1 | |
944 pxor %xmm4,%xmm0 | |
945 movdqa %xmm0,%xmm4 | |
946 movdqa %xmm0,%xmm3 | |
947 psllq $5,%xmm0 | |
948 pxor %xmm0,%xmm3 | |
949 psllq $1,%xmm0 | |
950 pxor %xmm3,%xmm0 | |
951 psllq $57,%xmm0 | |
952 movdqa %xmm0,%xmm3 | |
953 pslldq $8,%xmm0 | |
954 psrldq $8,%xmm3 | |
955 pxor %xmm4,%xmm0 | |
956 pxor %xmm3,%xmm1 | |
957 movdqa %xmm0,%xmm4 | |
958 psrlq $1,%xmm0 | |
959 pxor %xmm4,%xmm1 | |
960 pxor %xmm0,%xmm4 | |
961 psrlq $5,%xmm0 | |
962 pxor %xmm4,%xmm0 | |
963 psrlq $1,%xmm0 | |
964 pxor %xmm1,%xmm0 | |
965 pshufd $78,%xmm2,%xmm3 | |
966 pshufd $78,%xmm0,%xmm4 | |
967 pxor %xmm2,%xmm3 | |
968 movdqu %xmm2,(%edx) | |
969 pxor %xmm0,%xmm4 | |
970 movdqu %xmm0,16(%edx) | |
971 .byte 102,15,58,15,227,8 | |
972 movdqu %xmm4,32(%edx) | |
973 ret | |
974 .globl _gcm_gmult_clmul | |
975 .private_extern _gcm_gmult_clmul | |
976 .align 4 | |
977 _gcm_gmult_clmul: | |
978 L_gcm_gmult_clmul_begin: | |
979 movl 4(%esp),%eax | |
980 movl 8(%esp),%edx | |
981 call L011pic | |
982 L011pic: | |
983 popl %ecx | |
984 leal Lbswap-L011pic(%ecx),%ecx | |
985 movdqu (%eax),%xmm0 | |
986 movdqa (%ecx),%xmm5 | |
987 movups (%edx),%xmm2 | |
988 .byte 102,15,56,0,197 | |
989 movups 32(%edx),%xmm4 | |
990 movdqa %xmm0,%xmm1 | |
991 pshufd $78,%xmm0,%xmm3 | |
992 pxor %xmm0,%xmm3 | |
993 .byte 102,15,58,68,194,0 | |
994 .byte 102,15,58,68,202,17 | |
995 .byte 102,15,58,68,220,0 | |
996 xorps %xmm0,%xmm3 | |
997 xorps %xmm1,%xmm3 | |
998 movdqa %xmm3,%xmm4 | |
999 psrldq $8,%xmm3 | |
1000 pslldq $8,%xmm4 | |
1001 pxor %xmm3,%xmm1 | |
1002 pxor %xmm4,%xmm0 | |
1003 movdqa %xmm0,%xmm4 | |
1004 movdqa %xmm0,%xmm3 | |
1005 psllq $5,%xmm0 | |
1006 pxor %xmm0,%xmm3 | |
1007 psllq $1,%xmm0 | |
1008 pxor %xmm3,%xmm0 | |
1009 psllq $57,%xmm0 | |
1010 movdqa %xmm0,%xmm3 | |
1011 pslldq $8,%xmm0 | |
1012 psrldq $8,%xmm3 | |
1013 pxor %xmm4,%xmm0 | |
1014 pxor %xmm3,%xmm1 | |
1015 movdqa %xmm0,%xmm4 | |
1016 psrlq $1,%xmm0 | |
1017 pxor %xmm4,%xmm1 | |
1018 pxor %xmm0,%xmm4 | |
1019 psrlq $5,%xmm0 | |
1020 pxor %xmm4,%xmm0 | |
1021 psrlq $1,%xmm0 | |
1022 pxor %xmm1,%xmm0 | |
1023 .byte 102,15,56,0,197 | |
1024 movdqu %xmm0,(%eax) | |
1025 ret | |
1026 .globl _gcm_ghash_clmul | |
1027 .private_extern _gcm_ghash_clmul | |
1028 .align 4 | |
1029 _gcm_ghash_clmul: | |
1030 L_gcm_ghash_clmul_begin: | |
1031 pushl %ebp | |
1032 pushl %ebx | |
1033 pushl %esi | |
1034 pushl %edi | |
1035 movl 20(%esp),%eax | |
1036 movl 24(%esp),%edx | |
1037 movl 28(%esp),%esi | |
1038 movl 32(%esp),%ebx | |
1039 call L012pic | |
1040 L012pic: | |
1041 popl %ecx | |
1042 leal Lbswap-L012pic(%ecx),%ecx | |
1043 movdqu (%eax),%xmm0 | |
1044 movdqa (%ecx),%xmm5 | |
1045 movdqu (%edx),%xmm2 | |
1046 .byte 102,15,56,0,197 | |
1047 subl $16,%ebx | |
1048 jz L013odd_tail | |
1049 movdqu (%esi),%xmm3 | |
1050 movdqu 16(%esi),%xmm6 | |
1051 .byte 102,15,56,0,221 | |
1052 .byte 102,15,56,0,245 | |
1053 movdqu 32(%edx),%xmm5 | |
1054 pxor %xmm3,%xmm0 | |
1055 pshufd $78,%xmm6,%xmm3 | |
1056 movdqa %xmm6,%xmm7 | |
1057 pxor %xmm6,%xmm3 | |
1058 leal 32(%esi),%esi | |
1059 .byte 102,15,58,68,242,0 | |
1060 .byte 102,15,58,68,250,17 | |
1061 .byte 102,15,58,68,221,0 | |
1062 movups 16(%edx),%xmm2 | |
1063 nop | |
1064 subl $32,%ebx | |
1065 jbe L014even_tail | |
1066 jmp L015mod_loop | |
1067 .align 5,0x90 | |
1068 L015mod_loop: | |
1069 pshufd $78,%xmm0,%xmm4 | |
1070 movdqa %xmm0,%xmm1 | |
1071 pxor %xmm0,%xmm4 | |
1072 nop | |
1073 .byte 102,15,58,68,194,0 | |
1074 .byte 102,15,58,68,202,17 | |
1075 .byte 102,15,58,68,229,16 | |
1076 movups (%edx),%xmm2 | |
1077 xorps %xmm6,%xmm0 | |
1078 movdqa (%ecx),%xmm5 | |
1079 xorps %xmm7,%xmm1 | |
1080 movdqu (%esi),%xmm7 | |
1081 pxor %xmm0,%xmm3 | |
1082 movdqu 16(%esi),%xmm6 | |
1083 pxor %xmm1,%xmm3 | |
1084 .byte 102,15,56,0,253 | |
1085 pxor %xmm3,%xmm4 | |
1086 movdqa %xmm4,%xmm3 | |
1087 psrldq $8,%xmm4 | |
1088 pslldq $8,%xmm3 | |
1089 pxor %xmm4,%xmm1 | |
1090 pxor %xmm3,%xmm0 | |
1091 .byte 102,15,56,0,245 | |
1092 pxor %xmm7,%xmm1 | |
1093 movdqa %xmm6,%xmm7 | |
1094 movdqa %xmm0,%xmm4 | |
1095 movdqa %xmm0,%xmm3 | |
1096 psllq $5,%xmm0 | |
1097 pxor %xmm0,%xmm3 | |
1098 psllq $1,%xmm0 | |
1099 pxor %xmm3,%xmm0 | |
1100 .byte 102,15,58,68,242,0 | |
1101 movups 32(%edx),%xmm5 | |
1102 psllq $57,%xmm0 | |
1103 movdqa %xmm0,%xmm3 | |
1104 pslldq $8,%xmm0 | |
1105 psrldq $8,%xmm3 | |
1106 pxor %xmm4,%xmm0 | |
1107 pxor %xmm3,%xmm1 | |
1108 pshufd $78,%xmm7,%xmm3 | |
1109 movdqa %xmm0,%xmm4 | |
1110 psrlq $1,%xmm0 | |
1111 pxor %xmm7,%xmm3 | |
1112 pxor %xmm4,%xmm1 | |
1113 .byte 102,15,58,68,250,17 | |
1114 movups 16(%edx),%xmm2 | |
1115 pxor %xmm0,%xmm4 | |
1116 psrlq $5,%xmm0 | |
1117 pxor %xmm4,%xmm0 | |
1118 psrlq $1,%xmm0 | |
1119 pxor %xmm1,%xmm0 | |
1120 .byte 102,15,58,68,221,0 | |
1121 leal 32(%esi),%esi | |
1122 subl $32,%ebx | |
1123 ja L015mod_loop | |
1124 L014even_tail: | |
1125 pshufd $78,%xmm0,%xmm4 | |
1126 movdqa %xmm0,%xmm1 | |
1127 pxor %xmm0,%xmm4 | |
1128 .byte 102,15,58,68,194,0 | |
1129 .byte 102,15,58,68,202,17 | |
1130 .byte 102,15,58,68,229,16 | |
1131 movdqa (%ecx),%xmm5 | |
1132 xorps %xmm6,%xmm0 | |
1133 xorps %xmm7,%xmm1 | |
1134 pxor %xmm0,%xmm3 | |
1135 pxor %xmm1,%xmm3 | |
1136 pxor %xmm3,%xmm4 | |
1137 movdqa %xmm4,%xmm3 | |
1138 psrldq $8,%xmm4 | |
1139 pslldq $8,%xmm3 | |
1140 pxor %xmm4,%xmm1 | |
1141 pxor %xmm3,%xmm0 | |
1142 movdqa %xmm0,%xmm4 | |
1143 movdqa %xmm0,%xmm3 | |
1144 psllq $5,%xmm0 | |
1145 pxor %xmm0,%xmm3 | |
1146 psllq $1,%xmm0 | |
1147 pxor %xmm3,%xmm0 | |
1148 psllq $57,%xmm0 | |
1149 movdqa %xmm0,%xmm3 | |
1150 pslldq $8,%xmm0 | |
1151 psrldq $8,%xmm3 | |
1152 pxor %xmm4,%xmm0 | |
1153 pxor %xmm3,%xmm1 | |
1154 movdqa %xmm0,%xmm4 | |
1155 psrlq $1,%xmm0 | |
1156 pxor %xmm4,%xmm1 | |
1157 pxor %xmm0,%xmm4 | |
1158 psrlq $5,%xmm0 | |
1159 pxor %xmm4,%xmm0 | |
1160 psrlq $1,%xmm0 | |
1161 pxor %xmm1,%xmm0 | |
1162 testl %ebx,%ebx | |
1163 jnz L016done | |
1164 movups (%edx),%xmm2 | |
1165 L013odd_tail: | |
1166 movdqu (%esi),%xmm3 | |
1167 .byte 102,15,56,0,221 | |
1168 pxor %xmm3,%xmm0 | |
1169 movdqa %xmm0,%xmm1 | |
1170 pshufd $78,%xmm0,%xmm3 | |
1171 pshufd $78,%xmm2,%xmm4 | |
1172 pxor %xmm0,%xmm3 | |
1173 pxor %xmm2,%xmm4 | |
1174 .byte 102,15,58,68,194,0 | |
1175 .byte 102,15,58,68,202,17 | |
1176 .byte 102,15,58,68,220,0 | |
1177 xorps %xmm0,%xmm3 | |
1178 xorps %xmm1,%xmm3 | |
1179 movdqa %xmm3,%xmm4 | |
1180 psrldq $8,%xmm3 | |
1181 pslldq $8,%xmm4 | |
1182 pxor %xmm3,%xmm1 | |
1183 pxor %xmm4,%xmm0 | |
1184 movdqa %xmm0,%xmm4 | |
1185 movdqa %xmm0,%xmm3 | |
1186 psllq $5,%xmm0 | |
1187 pxor %xmm0,%xmm3 | |
1188 psllq $1,%xmm0 | |
1189 pxor %xmm3,%xmm0 | |
1190 psllq $57,%xmm0 | |
1191 movdqa %xmm0,%xmm3 | |
1192 pslldq $8,%xmm0 | |
1193 psrldq $8,%xmm3 | |
1194 pxor %xmm4,%xmm0 | |
1195 pxor %xmm3,%xmm1 | |
1196 movdqa %xmm0,%xmm4 | |
1197 psrlq $1,%xmm0 | |
1198 pxor %xmm4,%xmm1 | |
1199 pxor %xmm0,%xmm4 | |
1200 psrlq $5,%xmm0 | |
1201 pxor %xmm4,%xmm0 | |
1202 psrlq $1,%xmm0 | |
1203 pxor %xmm1,%xmm0 | |
1204 L016done: | |
1205 .byte 102,15,56,0,197 | |
1206 movdqu %xmm0,(%eax) | |
1207 popl %edi | |
1208 popl %esi | |
1209 popl %ebx | |
1210 popl %ebp | |
1211 ret | |
1212 .align 6,0x90 | |
1213 Lbswap: | |
1214 .byte 15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0 | |
1215 .byte 1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,194 | |
1216 .align 6,0x90 | |
1217 Lrem_8bit: | |
1218 .value 0,450,900,582,1800,1738,1164,1358 | |
1219 .value 3600,4050,3476,3158,2328,2266,2716,2910 | |
1220 .value 7200,7650,8100,7782,6952,6890,6316,6510 | |
1221 .value 4656,5106,4532,4214,5432,5370,5820,6014 | |
1222 .value 14400,14722,15300,14854,16200,16010,15564,15630 | |
1223 .value 13904,14226,13780,13334,12632,12442,13020,13086 | |
1224 .value 9312,9634,10212,9766,9064,8874,8428,8494 | |
1225 .value 10864,11186,10740,10294,11640,11450,12028,12094 | |
1226 .value 28800,28994,29444,29382,30600,30282,29708,30158 | |
1227 .value 32400,32594,32020,31958,31128,30810,31260,31710 | |
1228 .value 27808,28002,28452,28390,27560,27242,26668,27118 | |
1229 .value 25264,25458,24884,24822,26040,25722,26172,26622 | |
1230 .value 18624,18690,19268,19078,20424,19978,19532,19854 | |
1231 .value 18128,18194,17748,17558,16856,16410,16988,17310 | |
1232 .value 21728,21794,22372,22182,21480,21034,20588,20910 | |
1233 .value 23280,23346,22900,22710,24056,23610,24188,24510 | |
1234 .value 57600,57538,57988,58182,58888,59338,58764,58446 | |
1235 .value 61200,61138,60564,60758,59416,59866,60316,59998 | |
1236 .value 64800,64738,65188,65382,64040,64490,63916,63598 | |
1237 .value 62256,62194,61620,61814,62520,62970,63420,63102 | |
1238 .value 55616,55426,56004,56070,56904,57226,56780,56334 | |
1239 .value 55120,54930,54484,54550,53336,53658,54236,53790 | |
1240 .value 50528,50338,50916,50982,49768,50090,49644,49198 | |
1241 .value 52080,51890,51444,51510,52344,52666,53244,52798 | |
1242 .value 37248,36930,37380,37830,38536,38730,38156,38094 | |
1243 .value 40848,40530,39956,40406,39064,39258,39708,39646 | |
1244 .value 36256,35938,36388,36838,35496,35690,35116,35054 | |
1245 .value 33712,33394,32820,33270,33976,34170,34620,34558 | |
1246 .value 43456,43010,43588,43910,44744,44810,44364,44174 | |
1247 .value 42960,42514,42068,42390,41176,41242,41820,41630 | |
1248 .value 46560,46114,46692,47014,45800,45866,45420,45230 | |
1249 .value 48112,47666,47220,47542,48376,48442,49020,48830 | |
1250 .align 6,0x90 | |
1251 Lrem_4bit: | |
1252 .long 0,0,0,471859200,0,943718400,0,610271232 | |
1253 .long 0,1887436800,0,1822425088,0,1220542464,0,1423966208 | |
1254 .long 0,3774873600,0,4246732800,0,3644850176,0,3311403008 | |
1255 .long 0,2441084928,0,2376073216,0,2847932416,0,3051356160 | |
1256 .byte 71,72,65,83,72,32,102,111,114,32,120,56,54,44,32,67 | |
1257 .byte 82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112 | |
1258 .byte 112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62 | |
1259 .byte 0 | |
1260 #endif | |
OLD | NEW |