OLD | NEW |
| (Empty) |
1 .file "ghash-x86.s" | |
2 .text | |
3 .globl _gcm_gmult_4bit_x86 | |
4 .align 4 | |
5 _gcm_gmult_4bit_x86: | |
6 L_gcm_gmult_4bit_x86_begin: | |
7 pushl %ebp | |
8 pushl %ebx | |
9 pushl %esi | |
10 pushl %edi | |
11 subl $84,%esp | |
12 movl 104(%esp),%edi | |
13 movl 108(%esp),%esi | |
14 movl (%edi),%ebp | |
15 movl 4(%edi),%edx | |
16 movl 8(%edi),%ecx | |
17 movl 12(%edi),%ebx | |
18 movl $0,16(%esp) | |
19 movl $471859200,20(%esp) | |
20 movl $943718400,24(%esp) | |
21 movl $610271232,28(%esp) | |
22 movl $1887436800,32(%esp) | |
23 movl $1822425088,36(%esp) | |
24 movl $1220542464,40(%esp) | |
25 movl $1423966208,44(%esp) | |
26 movl $3774873600,48(%esp) | |
27 movl $4246732800,52(%esp) | |
28 movl $3644850176,56(%esp) | |
29 movl $3311403008,60(%esp) | |
30 movl $2441084928,64(%esp) | |
31 movl $2376073216,68(%esp) | |
32 movl $2847932416,72(%esp) | |
33 movl $3051356160,76(%esp) | |
34 movl %ebp,(%esp) | |
35 movl %edx,4(%esp) | |
36 movl %ecx,8(%esp) | |
37 movl %ebx,12(%esp) | |
38 shrl $20,%ebx | |
39 andl $240,%ebx | |
40 movl 4(%esi,%ebx,1),%ebp | |
41 movl (%esi,%ebx,1),%edx | |
42 movl 12(%esi,%ebx,1),%ecx | |
43 movl 8(%esi,%ebx,1),%ebx | |
44 xorl %eax,%eax | |
45 movl $15,%edi | |
46 jmp L000x86_loop | |
47 .align 4,0x90 | |
48 L000x86_loop: | |
49 movb %bl,%al | |
50 shrdl $4,%ecx,%ebx | |
51 andb $15,%al | |
52 shrdl $4,%edx,%ecx | |
53 shrdl $4,%ebp,%edx | |
54 shrl $4,%ebp | |
55 xorl 16(%esp,%eax,4),%ebp | |
56 movb (%esp,%edi,1),%al | |
57 andb $240,%al | |
58 xorl 8(%esi,%eax,1),%ebx | |
59 xorl 12(%esi,%eax,1),%ecx | |
60 xorl (%esi,%eax,1),%edx | |
61 xorl 4(%esi,%eax,1),%ebp | |
62 decl %edi | |
63 js L001x86_break | |
64 movb %bl,%al | |
65 shrdl $4,%ecx,%ebx | |
66 andb $15,%al | |
67 shrdl $4,%edx,%ecx | |
68 shrdl $4,%ebp,%edx | |
69 shrl $4,%ebp | |
70 xorl 16(%esp,%eax,4),%ebp | |
71 movb (%esp,%edi,1),%al | |
72 shlb $4,%al | |
73 xorl 8(%esi,%eax,1),%ebx | |
74 xorl 12(%esi,%eax,1),%ecx | |
75 xorl (%esi,%eax,1),%edx | |
76 xorl 4(%esi,%eax,1),%ebp | |
77 jmp L000x86_loop | |
78 .align 4,0x90 | |
79 L001x86_break: | |
80 bswap %ebx | |
81 bswap %ecx | |
82 bswap %edx | |
83 bswap %ebp | |
84 movl 104(%esp),%edi | |
85 movl %ebx,12(%edi) | |
86 movl %ecx,8(%edi) | |
87 movl %edx,4(%edi) | |
88 movl %ebp,(%edi) | |
89 addl $84,%esp | |
90 popl %edi | |
91 popl %esi | |
92 popl %ebx | |
93 popl %ebp | |
94 ret | |
95 .globl _gcm_ghash_4bit_x86 | |
96 .align 4 | |
97 _gcm_ghash_4bit_x86: | |
98 L_gcm_ghash_4bit_x86_begin: | |
99 pushl %ebp | |
100 pushl %ebx | |
101 pushl %esi | |
102 pushl %edi | |
103 subl $84,%esp | |
104 movl 104(%esp),%ebx | |
105 movl 108(%esp),%esi | |
106 movl 112(%esp),%edi | |
107 movl 116(%esp),%ecx | |
108 addl %edi,%ecx | |
109 movl %ecx,116(%esp) | |
110 movl (%ebx),%ebp | |
111 movl 4(%ebx),%edx | |
112 movl 8(%ebx),%ecx | |
113 movl 12(%ebx),%ebx | |
114 movl $0,16(%esp) | |
115 movl $471859200,20(%esp) | |
116 movl $943718400,24(%esp) | |
117 movl $610271232,28(%esp) | |
118 movl $1887436800,32(%esp) | |
119 movl $1822425088,36(%esp) | |
120 movl $1220542464,40(%esp) | |
121 movl $1423966208,44(%esp) | |
122 movl $3774873600,48(%esp) | |
123 movl $4246732800,52(%esp) | |
124 movl $3644850176,56(%esp) | |
125 movl $3311403008,60(%esp) | |
126 movl $2441084928,64(%esp) | |
127 movl $2376073216,68(%esp) | |
128 movl $2847932416,72(%esp) | |
129 movl $3051356160,76(%esp) | |
130 .align 4,0x90 | |
131 L002x86_outer_loop: | |
132 xorl 12(%edi),%ebx | |
133 xorl 8(%edi),%ecx | |
134 xorl 4(%edi),%edx | |
135 xorl (%edi),%ebp | |
136 movl %ebx,12(%esp) | |
137 movl %ecx,8(%esp) | |
138 movl %edx,4(%esp) | |
139 movl %ebp,(%esp) | |
140 shrl $20,%ebx | |
141 andl $240,%ebx | |
142 movl 4(%esi,%ebx,1),%ebp | |
143 movl (%esi,%ebx,1),%edx | |
144 movl 12(%esi,%ebx,1),%ecx | |
145 movl 8(%esi,%ebx,1),%ebx | |
146 xorl %eax,%eax | |
147 movl $15,%edi | |
148 jmp L003x86_loop | |
149 .align 4,0x90 | |
150 L003x86_loop: | |
151 movb %bl,%al | |
152 shrdl $4,%ecx,%ebx | |
153 andb $15,%al | |
154 shrdl $4,%edx,%ecx | |
155 shrdl $4,%ebp,%edx | |
156 shrl $4,%ebp | |
157 xorl 16(%esp,%eax,4),%ebp | |
158 movb (%esp,%edi,1),%al | |
159 andb $240,%al | |
160 xorl 8(%esi,%eax,1),%ebx | |
161 xorl 12(%esi,%eax,1),%ecx | |
162 xorl (%esi,%eax,1),%edx | |
163 xorl 4(%esi,%eax,1),%ebp | |
164 decl %edi | |
165 js L004x86_break | |
166 movb %bl,%al | |
167 shrdl $4,%ecx,%ebx | |
168 andb $15,%al | |
169 shrdl $4,%edx,%ecx | |
170 shrdl $4,%ebp,%edx | |
171 shrl $4,%ebp | |
172 xorl 16(%esp,%eax,4),%ebp | |
173 movb (%esp,%edi,1),%al | |
174 shlb $4,%al | |
175 xorl 8(%esi,%eax,1),%ebx | |
176 xorl 12(%esi,%eax,1),%ecx | |
177 xorl (%esi,%eax,1),%edx | |
178 xorl 4(%esi,%eax,1),%ebp | |
179 jmp L003x86_loop | |
180 .align 4,0x90 | |
181 L004x86_break: | |
182 bswap %ebx | |
183 bswap %ecx | |
184 bswap %edx | |
185 bswap %ebp | |
186 movl 112(%esp),%edi | |
187 leal 16(%edi),%edi | |
188 cmpl 116(%esp),%edi | |
189 movl %edi,112(%esp) | |
190 jb L002x86_outer_loop | |
191 movl 104(%esp),%edi | |
192 movl %ebx,12(%edi) | |
193 movl %ecx,8(%edi) | |
194 movl %edx,4(%edi) | |
195 movl %ebp,(%edi) | |
196 addl $84,%esp | |
197 popl %edi | |
198 popl %esi | |
199 popl %ebx | |
200 popl %ebp | |
201 ret | |
202 .align 4 | |
203 __mmx_gmult_4bit_inner: | |
204 xorl %ecx,%ecx | |
205 movl %ebx,%edx | |
206 movb %dl,%cl | |
207 shlb $4,%cl | |
208 andl $240,%edx | |
209 movq 8(%esi,%ecx,1),%mm0 | |
210 movq (%esi,%ecx,1),%mm1 | |
211 movd %mm0,%ebp | |
212 psrlq $4,%mm0 | |
213 movq %mm1,%mm2 | |
214 psrlq $4,%mm1 | |
215 pxor 8(%esi,%edx,1),%mm0 | |
216 movb 14(%edi),%cl | |
217 psllq $60,%mm2 | |
218 andl $15,%ebp | |
219 pxor (%esi,%edx,1),%mm1 | |
220 movl %ecx,%edx | |
221 movd %mm0,%ebx | |
222 pxor %mm2,%mm0 | |
223 shlb $4,%cl | |
224 psrlq $4,%mm0 | |
225 movq %mm1,%mm2 | |
226 psrlq $4,%mm1 | |
227 pxor 8(%esi,%ecx,1),%mm0 | |
228 psllq $60,%mm2 | |
229 andl $240,%edx | |
230 pxor (%eax,%ebp,8),%mm1 | |
231 andl $15,%ebx | |
232 pxor (%esi,%ecx,1),%mm1 | |
233 movd %mm0,%ebp | |
234 pxor %mm2,%mm0 | |
235 psrlq $4,%mm0 | |
236 movq %mm1,%mm2 | |
237 psrlq $4,%mm1 | |
238 pxor 8(%esi,%edx,1),%mm0 | |
239 movb 13(%edi),%cl | |
240 psllq $60,%mm2 | |
241 pxor (%eax,%ebx,8),%mm1 | |
242 andl $15,%ebp | |
243 pxor (%esi,%edx,1),%mm1 | |
244 movl %ecx,%edx | |
245 movd %mm0,%ebx | |
246 pxor %mm2,%mm0 | |
247 shlb $4,%cl | |
248 psrlq $4,%mm0 | |
249 movq %mm1,%mm2 | |
250 psrlq $4,%mm1 | |
251 pxor 8(%esi,%ecx,1),%mm0 | |
252 psllq $60,%mm2 | |
253 andl $240,%edx | |
254 pxor (%eax,%ebp,8),%mm1 | |
255 andl $15,%ebx | |
256 pxor (%esi,%ecx,1),%mm1 | |
257 movd %mm0,%ebp | |
258 pxor %mm2,%mm0 | |
259 psrlq $4,%mm0 | |
260 movq %mm1,%mm2 | |
261 psrlq $4,%mm1 | |
262 pxor 8(%esi,%edx,1),%mm0 | |
263 movb 12(%edi),%cl | |
264 psllq $60,%mm2 | |
265 pxor (%eax,%ebx,8),%mm1 | |
266 andl $15,%ebp | |
267 pxor (%esi,%edx,1),%mm1 | |
268 movl %ecx,%edx | |
269 movd %mm0,%ebx | |
270 pxor %mm2,%mm0 | |
271 shlb $4,%cl | |
272 psrlq $4,%mm0 | |
273 movq %mm1,%mm2 | |
274 psrlq $4,%mm1 | |
275 pxor 8(%esi,%ecx,1),%mm0 | |
276 psllq $60,%mm2 | |
277 andl $240,%edx | |
278 pxor (%eax,%ebp,8),%mm1 | |
279 andl $15,%ebx | |
280 pxor (%esi,%ecx,1),%mm1 | |
281 movd %mm0,%ebp | |
282 pxor %mm2,%mm0 | |
283 psrlq $4,%mm0 | |
284 movq %mm1,%mm2 | |
285 psrlq $4,%mm1 | |
286 pxor 8(%esi,%edx,1),%mm0 | |
287 movb 11(%edi),%cl | |
288 psllq $60,%mm2 | |
289 pxor (%eax,%ebx,8),%mm1 | |
290 andl $15,%ebp | |
291 pxor (%esi,%edx,1),%mm1 | |
292 movl %ecx,%edx | |
293 movd %mm0,%ebx | |
294 pxor %mm2,%mm0 | |
295 shlb $4,%cl | |
296 psrlq $4,%mm0 | |
297 movq %mm1,%mm2 | |
298 psrlq $4,%mm1 | |
299 pxor 8(%esi,%ecx,1),%mm0 | |
300 psllq $60,%mm2 | |
301 andl $240,%edx | |
302 pxor (%eax,%ebp,8),%mm1 | |
303 andl $15,%ebx | |
304 pxor (%esi,%ecx,1),%mm1 | |
305 movd %mm0,%ebp | |
306 pxor %mm2,%mm0 | |
307 psrlq $4,%mm0 | |
308 movq %mm1,%mm2 | |
309 psrlq $4,%mm1 | |
310 pxor 8(%esi,%edx,1),%mm0 | |
311 movb 10(%edi),%cl | |
312 psllq $60,%mm2 | |
313 pxor (%eax,%ebx,8),%mm1 | |
314 andl $15,%ebp | |
315 pxor (%esi,%edx,1),%mm1 | |
316 movl %ecx,%edx | |
317 movd %mm0,%ebx | |
318 pxor %mm2,%mm0 | |
319 shlb $4,%cl | |
320 psrlq $4,%mm0 | |
321 movq %mm1,%mm2 | |
322 psrlq $4,%mm1 | |
323 pxor 8(%esi,%ecx,1),%mm0 | |
324 psllq $60,%mm2 | |
325 andl $240,%edx | |
326 pxor (%eax,%ebp,8),%mm1 | |
327 andl $15,%ebx | |
328 pxor (%esi,%ecx,1),%mm1 | |
329 movd %mm0,%ebp | |
330 pxor %mm2,%mm0 | |
331 psrlq $4,%mm0 | |
332 movq %mm1,%mm2 | |
333 psrlq $4,%mm1 | |
334 pxor 8(%esi,%edx,1),%mm0 | |
335 movb 9(%edi),%cl | |
336 psllq $60,%mm2 | |
337 pxor (%eax,%ebx,8),%mm1 | |
338 andl $15,%ebp | |
339 pxor (%esi,%edx,1),%mm1 | |
340 movl %ecx,%edx | |
341 movd %mm0,%ebx | |
342 pxor %mm2,%mm0 | |
343 shlb $4,%cl | |
344 psrlq $4,%mm0 | |
345 movq %mm1,%mm2 | |
346 psrlq $4,%mm1 | |
347 pxor 8(%esi,%ecx,1),%mm0 | |
348 psllq $60,%mm2 | |
349 andl $240,%edx | |
350 pxor (%eax,%ebp,8),%mm1 | |
351 andl $15,%ebx | |
352 pxor (%esi,%ecx,1),%mm1 | |
353 movd %mm0,%ebp | |
354 pxor %mm2,%mm0 | |
355 psrlq $4,%mm0 | |
356 movq %mm1,%mm2 | |
357 psrlq $4,%mm1 | |
358 pxor 8(%esi,%edx,1),%mm0 | |
359 movb 8(%edi),%cl | |
360 psllq $60,%mm2 | |
361 pxor (%eax,%ebx,8),%mm1 | |
362 andl $15,%ebp | |
363 pxor (%esi,%edx,1),%mm1 | |
364 movl %ecx,%edx | |
365 movd %mm0,%ebx | |
366 pxor %mm2,%mm0 | |
367 shlb $4,%cl | |
368 psrlq $4,%mm0 | |
369 movq %mm1,%mm2 | |
370 psrlq $4,%mm1 | |
371 pxor 8(%esi,%ecx,1),%mm0 | |
372 psllq $60,%mm2 | |
373 andl $240,%edx | |
374 pxor (%eax,%ebp,8),%mm1 | |
375 andl $15,%ebx | |
376 pxor (%esi,%ecx,1),%mm1 | |
377 movd %mm0,%ebp | |
378 pxor %mm2,%mm0 | |
379 psrlq $4,%mm0 | |
380 movq %mm1,%mm2 | |
381 psrlq $4,%mm1 | |
382 pxor 8(%esi,%edx,1),%mm0 | |
383 movb 7(%edi),%cl | |
384 psllq $60,%mm2 | |
385 pxor (%eax,%ebx,8),%mm1 | |
386 andl $15,%ebp | |
387 pxor (%esi,%edx,1),%mm1 | |
388 movl %ecx,%edx | |
389 movd %mm0,%ebx | |
390 pxor %mm2,%mm0 | |
391 shlb $4,%cl | |
392 psrlq $4,%mm0 | |
393 movq %mm1,%mm2 | |
394 psrlq $4,%mm1 | |
395 pxor 8(%esi,%ecx,1),%mm0 | |
396 psllq $60,%mm2 | |
397 andl $240,%edx | |
398 pxor (%eax,%ebp,8),%mm1 | |
399 andl $15,%ebx | |
400 pxor (%esi,%ecx,1),%mm1 | |
401 movd %mm0,%ebp | |
402 pxor %mm2,%mm0 | |
403 psrlq $4,%mm0 | |
404 movq %mm1,%mm2 | |
405 psrlq $4,%mm1 | |
406 pxor 8(%esi,%edx,1),%mm0 | |
407 movb 6(%edi),%cl | |
408 psllq $60,%mm2 | |
409 pxor (%eax,%ebx,8),%mm1 | |
410 andl $15,%ebp | |
411 pxor (%esi,%edx,1),%mm1 | |
412 movl %ecx,%edx | |
413 movd %mm0,%ebx | |
414 pxor %mm2,%mm0 | |
415 shlb $4,%cl | |
416 psrlq $4,%mm0 | |
417 movq %mm1,%mm2 | |
418 psrlq $4,%mm1 | |
419 pxor 8(%esi,%ecx,1),%mm0 | |
420 psllq $60,%mm2 | |
421 andl $240,%edx | |
422 pxor (%eax,%ebp,8),%mm1 | |
423 andl $15,%ebx | |
424 pxor (%esi,%ecx,1),%mm1 | |
425 movd %mm0,%ebp | |
426 pxor %mm2,%mm0 | |
427 psrlq $4,%mm0 | |
428 movq %mm1,%mm2 | |
429 psrlq $4,%mm1 | |
430 pxor 8(%esi,%edx,1),%mm0 | |
431 movb 5(%edi),%cl | |
432 psllq $60,%mm2 | |
433 pxor (%eax,%ebx,8),%mm1 | |
434 andl $15,%ebp | |
435 pxor (%esi,%edx,1),%mm1 | |
436 movl %ecx,%edx | |
437 movd %mm0,%ebx | |
438 pxor %mm2,%mm0 | |
439 shlb $4,%cl | |
440 psrlq $4,%mm0 | |
441 movq %mm1,%mm2 | |
442 psrlq $4,%mm1 | |
443 pxor 8(%esi,%ecx,1),%mm0 | |
444 psllq $60,%mm2 | |
445 andl $240,%edx | |
446 pxor (%eax,%ebp,8),%mm1 | |
447 andl $15,%ebx | |
448 pxor (%esi,%ecx,1),%mm1 | |
449 movd %mm0,%ebp | |
450 pxor %mm2,%mm0 | |
451 psrlq $4,%mm0 | |
452 movq %mm1,%mm2 | |
453 psrlq $4,%mm1 | |
454 pxor 8(%esi,%edx,1),%mm0 | |
455 movb 4(%edi),%cl | |
456 psllq $60,%mm2 | |
457 pxor (%eax,%ebx,8),%mm1 | |
458 andl $15,%ebp | |
459 pxor (%esi,%edx,1),%mm1 | |
460 movl %ecx,%edx | |
461 movd %mm0,%ebx | |
462 pxor %mm2,%mm0 | |
463 shlb $4,%cl | |
464 psrlq $4,%mm0 | |
465 movq %mm1,%mm2 | |
466 psrlq $4,%mm1 | |
467 pxor 8(%esi,%ecx,1),%mm0 | |
468 psllq $60,%mm2 | |
469 andl $240,%edx | |
470 pxor (%eax,%ebp,8),%mm1 | |
471 andl $15,%ebx | |
472 pxor (%esi,%ecx,1),%mm1 | |
473 movd %mm0,%ebp | |
474 pxor %mm2,%mm0 | |
475 psrlq $4,%mm0 | |
476 movq %mm1,%mm2 | |
477 psrlq $4,%mm1 | |
478 pxor 8(%esi,%edx,1),%mm0 | |
479 movb 3(%edi),%cl | |
480 psllq $60,%mm2 | |
481 pxor (%eax,%ebx,8),%mm1 | |
482 andl $15,%ebp | |
483 pxor (%esi,%edx,1),%mm1 | |
484 movl %ecx,%edx | |
485 movd %mm0,%ebx | |
486 pxor %mm2,%mm0 | |
487 shlb $4,%cl | |
488 psrlq $4,%mm0 | |
489 movq %mm1,%mm2 | |
490 psrlq $4,%mm1 | |
491 pxor 8(%esi,%ecx,1),%mm0 | |
492 psllq $60,%mm2 | |
493 andl $240,%edx | |
494 pxor (%eax,%ebp,8),%mm1 | |
495 andl $15,%ebx | |
496 pxor (%esi,%ecx,1),%mm1 | |
497 movd %mm0,%ebp | |
498 pxor %mm2,%mm0 | |
499 psrlq $4,%mm0 | |
500 movq %mm1,%mm2 | |
501 psrlq $4,%mm1 | |
502 pxor 8(%esi,%edx,1),%mm0 | |
503 movb 2(%edi),%cl | |
504 psllq $60,%mm2 | |
505 pxor (%eax,%ebx,8),%mm1 | |
506 andl $15,%ebp | |
507 pxor (%esi,%edx,1),%mm1 | |
508 movl %ecx,%edx | |
509 movd %mm0,%ebx | |
510 pxor %mm2,%mm0 | |
511 shlb $4,%cl | |
512 psrlq $4,%mm0 | |
513 movq %mm1,%mm2 | |
514 psrlq $4,%mm1 | |
515 pxor 8(%esi,%ecx,1),%mm0 | |
516 psllq $60,%mm2 | |
517 andl $240,%edx | |
518 pxor (%eax,%ebp,8),%mm1 | |
519 andl $15,%ebx | |
520 pxor (%esi,%ecx,1),%mm1 | |
521 movd %mm0,%ebp | |
522 pxor %mm2,%mm0 | |
523 psrlq $4,%mm0 | |
524 movq %mm1,%mm2 | |
525 psrlq $4,%mm1 | |
526 pxor 8(%esi,%edx,1),%mm0 | |
527 movb 1(%edi),%cl | |
528 psllq $60,%mm2 | |
529 pxor (%eax,%ebx,8),%mm1 | |
530 andl $15,%ebp | |
531 pxor (%esi,%edx,1),%mm1 | |
532 movl %ecx,%edx | |
533 movd %mm0,%ebx | |
534 pxor %mm2,%mm0 | |
535 shlb $4,%cl | |
536 psrlq $4,%mm0 | |
537 movq %mm1,%mm2 | |
538 psrlq $4,%mm1 | |
539 pxor 8(%esi,%ecx,1),%mm0 | |
540 psllq $60,%mm2 | |
541 andl $240,%edx | |
542 pxor (%eax,%ebp,8),%mm1 | |
543 andl $15,%ebx | |
544 pxor (%esi,%ecx,1),%mm1 | |
545 movd %mm0,%ebp | |
546 pxor %mm2,%mm0 | |
547 psrlq $4,%mm0 | |
548 movq %mm1,%mm2 | |
549 psrlq $4,%mm1 | |
550 pxor 8(%esi,%edx,1),%mm0 | |
551 movb (%edi),%cl | |
552 psllq $60,%mm2 | |
553 pxor (%eax,%ebx,8),%mm1 | |
554 andl $15,%ebp | |
555 pxor (%esi,%edx,1),%mm1 | |
556 movl %ecx,%edx | |
557 movd %mm0,%ebx | |
558 pxor %mm2,%mm0 | |
559 shlb $4,%cl | |
560 psrlq $4,%mm0 | |
561 movq %mm1,%mm2 | |
562 psrlq $4,%mm1 | |
563 pxor 8(%esi,%ecx,1),%mm0 | |
564 psllq $60,%mm2 | |
565 andl $240,%edx | |
566 pxor (%eax,%ebp,8),%mm1 | |
567 andl $15,%ebx | |
568 pxor (%esi,%ecx,1),%mm1 | |
569 movd %mm0,%ebp | |
570 pxor %mm2,%mm0 | |
571 psrlq $4,%mm0 | |
572 movq %mm1,%mm2 | |
573 psrlq $4,%mm1 | |
574 pxor 8(%esi,%edx,1),%mm0 | |
575 psllq $60,%mm2 | |
576 pxor (%eax,%ebx,8),%mm1 | |
577 andl $15,%ebp | |
578 pxor (%esi,%edx,1),%mm1 | |
579 movd %mm0,%ebx | |
580 pxor %mm2,%mm0 | |
581 movl 4(%eax,%ebp,8),%edi | |
582 psrlq $32,%mm0 | |
583 movd %mm1,%edx | |
584 psrlq $32,%mm1 | |
585 movd %mm0,%ecx | |
586 movd %mm1,%ebp | |
587 shll $4,%edi | |
588 bswap %ebx | |
589 bswap %edx | |
590 bswap %ecx | |
591 xorl %edi,%ebp | |
592 bswap %ebp | |
593 ret | |
594 .globl _gcm_gmult_4bit_mmx | |
595 .align 4 | |
596 _gcm_gmult_4bit_mmx: | |
597 L_gcm_gmult_4bit_mmx_begin: | |
598 pushl %ebp | |
599 pushl %ebx | |
600 pushl %esi | |
601 pushl %edi | |
602 movl 20(%esp),%edi | |
603 movl 24(%esp),%esi | |
604 call L005pic_point | |
605 L005pic_point: | |
606 popl %eax | |
607 leal Lrem_4bit-L005pic_point(%eax),%eax | |
608 movzbl 15(%edi),%ebx | |
609 call __mmx_gmult_4bit_inner | |
610 movl 20(%esp),%edi | |
611 emms | |
612 movl %ebx,12(%edi) | |
613 movl %edx,4(%edi) | |
614 movl %ecx,8(%edi) | |
615 movl %ebp,(%edi) | |
616 popl %edi | |
617 popl %esi | |
618 popl %ebx | |
619 popl %ebp | |
620 ret | |
621 .globl _gcm_ghash_4bit_mmx | |
622 .align 4 | |
623 _gcm_ghash_4bit_mmx: | |
624 L_gcm_ghash_4bit_mmx_begin: | |
625 pushl %ebp | |
626 pushl %ebx | |
627 pushl %esi | |
628 pushl %edi | |
629 movl 20(%esp),%ebp | |
630 movl 24(%esp),%esi | |
631 movl 28(%esp),%edi | |
632 movl 32(%esp),%ecx | |
633 call L006pic_point | |
634 L006pic_point: | |
635 popl %eax | |
636 leal Lrem_4bit-L006pic_point(%eax),%eax | |
637 addl %edi,%ecx | |
638 movl %ecx,32(%esp) | |
639 subl $20,%esp | |
640 movl 12(%ebp),%ebx | |
641 movl 4(%ebp),%edx | |
642 movl 8(%ebp),%ecx | |
643 movl (%ebp),%ebp | |
644 jmp L007mmx_outer_loop | |
645 .align 4,0x90 | |
646 L007mmx_outer_loop: | |
647 xorl 12(%edi),%ebx | |
648 xorl 4(%edi),%edx | |
649 xorl 8(%edi),%ecx | |
650 xorl (%edi),%ebp | |
651 movl %edi,48(%esp) | |
652 movl %ebx,12(%esp) | |
653 movl %edx,4(%esp) | |
654 movl %ecx,8(%esp) | |
655 movl %ebp,(%esp) | |
656 movl %esp,%edi | |
657 shrl $24,%ebx | |
658 call __mmx_gmult_4bit_inner | |
659 movl 48(%esp),%edi | |
660 leal 16(%edi),%edi | |
661 cmpl 52(%esp),%edi | |
662 jb L007mmx_outer_loop | |
663 movl 40(%esp),%edi | |
664 emms | |
665 movl %ebx,12(%edi) | |
666 movl %edx,4(%edi) | |
667 movl %ecx,8(%edi) | |
668 movl %ebp,(%edi) | |
669 addl $20,%esp | |
670 popl %edi | |
671 popl %esi | |
672 popl %ebx | |
673 popl %ebp | |
674 ret | |
675 .align 6,0x90 | |
676 Lrem_4bit: | |
677 .long 0,0,0,29491200,0,58982400,0,38141952 | |
678 .long 0,117964800,0,113901568,0,76283904,0,88997888 | |
679 .long 0,235929600,0,265420800,0,227803136,0,206962688 | |
680 .long 0,152567808,0,148504576,0,177995776,0,190709760 | |
681 .align 6,0x90 | |
682 L008rem_8bit: | |
683 .value 0,450,900,582,1800,1738,1164,1358 | |
684 .value 3600,4050,3476,3158,2328,2266,2716,2910 | |
685 .value 7200,7650,8100,7782,6952,6890,6316,6510 | |
686 .value 4656,5106,4532,4214,5432,5370,5820,6014 | |
687 .value 14400,14722,15300,14854,16200,16010,15564,15630 | |
688 .value 13904,14226,13780,13334,12632,12442,13020,13086 | |
689 .value 9312,9634,10212,9766,9064,8874,8428,8494 | |
690 .value 10864,11186,10740,10294,11640,11450,12028,12094 | |
691 .value 28800,28994,29444,29382,30600,30282,29708,30158 | |
692 .value 32400,32594,32020,31958,31128,30810,31260,31710 | |
693 .value 27808,28002,28452,28390,27560,27242,26668,27118 | |
694 .value 25264,25458,24884,24822,26040,25722,26172,26622 | |
695 .value 18624,18690,19268,19078,20424,19978,19532,19854 | |
696 .value 18128,18194,17748,17558,16856,16410,16988,17310 | |
697 .value 21728,21794,22372,22182,21480,21034,20588,20910 | |
698 .value 23280,23346,22900,22710,24056,23610,24188,24510 | |
699 .value 57600,57538,57988,58182,58888,59338,58764,58446 | |
700 .value 61200,61138,60564,60758,59416,59866,60316,59998 | |
701 .value 64800,64738,65188,65382,64040,64490,63916,63598 | |
702 .value 62256,62194,61620,61814,62520,62970,63420,63102 | |
703 .value 55616,55426,56004,56070,56904,57226,56780,56334 | |
704 .value 55120,54930,54484,54550,53336,53658,54236,53790 | |
705 .value 50528,50338,50916,50982,49768,50090,49644,49198 | |
706 .value 52080,51890,51444,51510,52344,52666,53244,52798 | |
707 .value 37248,36930,37380,37830,38536,38730,38156,38094 | |
708 .value 40848,40530,39956,40406,39064,39258,39708,39646 | |
709 .value 36256,35938,36388,36838,35496,35690,35116,35054 | |
710 .value 33712,33394,32820,33270,33976,34170,34620,34558 | |
711 .value 43456,43010,43588,43910,44744,44810,44364,44174 | |
712 .value 42960,42514,42068,42390,41176,41242,41820,41630 | |
713 .value 46560,46114,46692,47014,45800,45866,45420,45230 | |
714 .value 48112,47666,47220,47542,48376,48442,49020,48830 | |
715 .byte 71,72,65,83,72,32,102,111,114,32,120,56,54,44,32,67 | |
716 .byte 82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112 | |
717 .byte 112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62 | |
718 .byte 0 | |
OLD | NEW |