OLD | NEW |
1 #if defined(__i386__) | 1 #if defined(__i386__) |
2 .file "src/crypto/bn/asm/x86-mont.S" | 2 .file "src/crypto/bn/asm/x86-mont.S" |
3 .text | 3 .text |
4 .globl _bn_mul_mont | 4 .globl _bn_mul_mont |
5 .private_extern _bn_mul_mont | 5 .private_extern _bn_mul_mont |
6 .align 4 | 6 .align 4 |
7 _bn_mul_mont: | 7 _bn_mul_mont: |
8 L_bn_mul_mont_begin: | 8 L_bn_mul_mont_begin: |
9 pushl %ebp | 9 pushl %ebp |
10 pushl %ebx | 10 pushl %ebx |
11 pushl %esi | 11 pushl %esi |
12 pushl %edi | 12 pushl %edi |
13 xorl %eax,%eax | 13 xorl %eax,%eax |
14 movl 40(%esp),%edi | 14 movl 40(%esp),%edi |
15 cmpl $4,%edi | 15 cmpl $4,%edi |
16 jl L000just_leave | 16 jl L000just_leave |
17 leal 20(%esp),%esi | 17 leal 20(%esp),%esi |
18 leal 24(%esp),%edx | 18 leal 24(%esp),%edx |
19 movl %esp,%ebp | |
20 addl $2,%edi | 19 addl $2,%edi |
21 negl %edi | 20 negl %edi |
22 » leal» -32(%esp,%edi,4),%esp | 21 » leal» -32(%esp,%edi,4),%ebp |
23 negl %edi | 22 negl %edi |
24 » movl» %esp,%eax | 23 » movl» %ebp,%eax |
25 subl %edx,%eax | 24 subl %edx,%eax |
26 andl $2047,%eax | 25 andl $2047,%eax |
27 » subl» %eax,%esp | 26 » subl» %eax,%ebp |
28 » xorl» %esp,%edx | 27 » xorl» %ebp,%edx |
29 andl $2048,%edx | 28 andl $2048,%edx |
30 xorl $2048,%edx | 29 xorl $2048,%edx |
31 » subl» %edx,%esp | 30 » subl» %edx,%ebp |
32 » andl» $-64,%esp | 31 » andl» $-64,%ebp |
| 32 » movl» %esp,%eax |
| 33 » subl» %ebp,%eax |
| 34 » andl» $-4096,%eax |
| 35 » movl» %esp,%edx |
| 36 » leal» (%ebp,%eax,1),%esp |
| 37 » movl» (%esp),%eax |
| 38 » cmpl» %ebp,%esp |
| 39 » ja» L001page_walk |
| 40 » jmp» L002page_walk_done |
| 41 .align» 4,0x90 |
| 42 L001page_walk: |
| 43 » leal» -4096(%esp),%esp |
| 44 » movl» (%esp),%eax |
| 45 » cmpl» %ebp,%esp |
| 46 » ja» L001page_walk |
| 47 L002page_walk_done: |
33 movl (%esi),%eax | 48 movl (%esi),%eax |
34 movl 4(%esi),%ebx | 49 movl 4(%esi),%ebx |
35 movl 8(%esi),%ecx | 50 movl 8(%esi),%ecx |
36 » movl» 12(%esi),%edx | 51 » movl» 12(%esi),%ebp |
37 movl 16(%esi),%esi | 52 movl 16(%esi),%esi |
38 movl (%esi),%esi | 53 movl (%esi),%esi |
39 movl %eax,4(%esp) | 54 movl %eax,4(%esp) |
40 movl %ebx,8(%esp) | 55 movl %ebx,8(%esp) |
41 movl %ecx,12(%esp) | 56 movl %ecx,12(%esp) |
42 » movl» %edx,16(%esp) | 57 » movl» %ebp,16(%esp) |
43 movl %esi,20(%esp) | 58 movl %esi,20(%esp) |
44 leal -3(%edi),%ebx | 59 leal -3(%edi),%ebx |
45 » movl» %ebp,24(%esp) | 60 » movl» %edx,24(%esp) |
46 » call» L001PIC_me_up | 61 » call» L003PIC_me_up |
47 L001PIC_me_up: | 62 L003PIC_me_up: |
48 popl %eax | 63 popl %eax |
49 » movl» L_OPENSSL_ia32cap_P$non_lazy_ptr-L001PIC_me_up(%eax),%eax | 64 » movl» L_OPENSSL_ia32cap_P$non_lazy_ptr-L003PIC_me_up(%eax),%eax |
50 btl $26,(%eax) | 65 btl $26,(%eax) |
51 » jnc» L002non_sse2 | 66 » jnc» L004non_sse2 |
52 movl $-1,%eax | 67 movl $-1,%eax |
53 movd %eax,%mm7 | 68 movd %eax,%mm7 |
54 movl 8(%esp),%esi | 69 movl 8(%esp),%esi |
55 movl 12(%esp),%edi | 70 movl 12(%esp),%edi |
56 movl 16(%esp),%ebp | 71 movl 16(%esp),%ebp |
57 xorl %edx,%edx | 72 xorl %edx,%edx |
58 xorl %ecx,%ecx | 73 xorl %ecx,%ecx |
59 movd (%edi),%mm4 | 74 movd (%edi),%mm4 |
60 movd (%esi),%mm5 | 75 movd (%esi),%mm5 |
61 movd (%ebp),%mm3 | 76 movd (%ebp),%mm3 |
62 pmuludq %mm4,%mm5 | 77 pmuludq %mm4,%mm5 |
63 movq %mm5,%mm2 | 78 movq %mm5,%mm2 |
64 movq %mm5,%mm0 | 79 movq %mm5,%mm0 |
65 pand %mm7,%mm0 | 80 pand %mm7,%mm0 |
66 pmuludq 20(%esp),%mm5 | 81 pmuludq 20(%esp),%mm5 |
67 pmuludq %mm5,%mm3 | 82 pmuludq %mm5,%mm3 |
68 paddq %mm0,%mm3 | 83 paddq %mm0,%mm3 |
69 movd 4(%ebp),%mm1 | 84 movd 4(%ebp),%mm1 |
70 movd 4(%esi),%mm0 | 85 movd 4(%esi),%mm0 |
71 psrlq $32,%mm2 | 86 psrlq $32,%mm2 |
72 psrlq $32,%mm3 | 87 psrlq $32,%mm3 |
73 incl %ecx | 88 incl %ecx |
74 .align 4,0x90 | 89 .align 4,0x90 |
75 L0031st: | 90 L0051st: |
76 pmuludq %mm4,%mm0 | 91 pmuludq %mm4,%mm0 |
77 pmuludq %mm5,%mm1 | 92 pmuludq %mm5,%mm1 |
78 paddq %mm0,%mm2 | 93 paddq %mm0,%mm2 |
79 paddq %mm1,%mm3 | 94 paddq %mm1,%mm3 |
80 movq %mm2,%mm0 | 95 movq %mm2,%mm0 |
81 pand %mm7,%mm0 | 96 pand %mm7,%mm0 |
82 movd 4(%ebp,%ecx,4),%mm1 | 97 movd 4(%ebp,%ecx,4),%mm1 |
83 paddq %mm0,%mm3 | 98 paddq %mm0,%mm3 |
84 movd 4(%esi,%ecx,4),%mm0 | 99 movd 4(%esi,%ecx,4),%mm0 |
85 psrlq $32,%mm2 | 100 psrlq $32,%mm2 |
86 movd %mm3,28(%esp,%ecx,4) | 101 movd %mm3,28(%esp,%ecx,4) |
87 psrlq $32,%mm3 | 102 psrlq $32,%mm3 |
88 leal 1(%ecx),%ecx | 103 leal 1(%ecx),%ecx |
89 cmpl %ebx,%ecx | 104 cmpl %ebx,%ecx |
90 » jl» L0031st | 105 » jl» L0051st |
91 pmuludq %mm4,%mm0 | 106 pmuludq %mm4,%mm0 |
92 pmuludq %mm5,%mm1 | 107 pmuludq %mm5,%mm1 |
93 paddq %mm0,%mm2 | 108 paddq %mm0,%mm2 |
94 paddq %mm1,%mm3 | 109 paddq %mm1,%mm3 |
95 movq %mm2,%mm0 | 110 movq %mm2,%mm0 |
96 pand %mm7,%mm0 | 111 pand %mm7,%mm0 |
97 paddq %mm0,%mm3 | 112 paddq %mm0,%mm3 |
98 movd %mm3,28(%esp,%ecx,4) | 113 movd %mm3,28(%esp,%ecx,4) |
99 psrlq $32,%mm2 | 114 psrlq $32,%mm2 |
100 psrlq $32,%mm3 | 115 psrlq $32,%mm3 |
101 paddq %mm2,%mm3 | 116 paddq %mm2,%mm3 |
102 movq %mm3,32(%esp,%ebx,4) | 117 movq %mm3,32(%esp,%ebx,4) |
103 incl %edx | 118 incl %edx |
104 L004outer: | 119 L006outer: |
105 xorl %ecx,%ecx | 120 xorl %ecx,%ecx |
106 movd (%edi,%edx,4),%mm4 | 121 movd (%edi,%edx,4),%mm4 |
107 movd (%esi),%mm5 | 122 movd (%esi),%mm5 |
108 movd 32(%esp),%mm6 | 123 movd 32(%esp),%mm6 |
109 movd (%ebp),%mm3 | 124 movd (%ebp),%mm3 |
110 pmuludq %mm4,%mm5 | 125 pmuludq %mm4,%mm5 |
111 paddq %mm6,%mm5 | 126 paddq %mm6,%mm5 |
112 movq %mm5,%mm0 | 127 movq %mm5,%mm0 |
113 movq %mm5,%mm2 | 128 movq %mm5,%mm2 |
114 pand %mm7,%mm0 | 129 pand %mm7,%mm0 |
115 pmuludq 20(%esp),%mm5 | 130 pmuludq 20(%esp),%mm5 |
116 pmuludq %mm5,%mm3 | 131 pmuludq %mm5,%mm3 |
117 paddq %mm0,%mm3 | 132 paddq %mm0,%mm3 |
118 movd 36(%esp),%mm6 | 133 movd 36(%esp),%mm6 |
119 movd 4(%ebp),%mm1 | 134 movd 4(%ebp),%mm1 |
120 movd 4(%esi),%mm0 | 135 movd 4(%esi),%mm0 |
121 psrlq $32,%mm2 | 136 psrlq $32,%mm2 |
122 psrlq $32,%mm3 | 137 psrlq $32,%mm3 |
123 paddq %mm6,%mm2 | 138 paddq %mm6,%mm2 |
124 incl %ecx | 139 incl %ecx |
125 decl %ebx | 140 decl %ebx |
126 L005inner: | 141 L007inner: |
127 pmuludq %mm4,%mm0 | 142 pmuludq %mm4,%mm0 |
128 pmuludq %mm5,%mm1 | 143 pmuludq %mm5,%mm1 |
129 paddq %mm0,%mm2 | 144 paddq %mm0,%mm2 |
130 paddq %mm1,%mm3 | 145 paddq %mm1,%mm3 |
131 movq %mm2,%mm0 | 146 movq %mm2,%mm0 |
132 movd 36(%esp,%ecx,4),%mm6 | 147 movd 36(%esp,%ecx,4),%mm6 |
133 pand %mm7,%mm0 | 148 pand %mm7,%mm0 |
134 movd 4(%ebp,%ecx,4),%mm1 | 149 movd 4(%ebp,%ecx,4),%mm1 |
135 paddq %mm0,%mm3 | 150 paddq %mm0,%mm3 |
136 movd 4(%esi,%ecx,4),%mm0 | 151 movd 4(%esi,%ecx,4),%mm0 |
137 psrlq $32,%mm2 | 152 psrlq $32,%mm2 |
138 movd %mm3,28(%esp,%ecx,4) | 153 movd %mm3,28(%esp,%ecx,4) |
139 psrlq $32,%mm3 | 154 psrlq $32,%mm3 |
140 paddq %mm6,%mm2 | 155 paddq %mm6,%mm2 |
141 decl %ebx | 156 decl %ebx |
142 leal 1(%ecx),%ecx | 157 leal 1(%ecx),%ecx |
143 » jnz» L005inner | 158 » jnz» L007inner |
144 movl %ecx,%ebx | 159 movl %ecx,%ebx |
145 pmuludq %mm4,%mm0 | 160 pmuludq %mm4,%mm0 |
146 pmuludq %mm5,%mm1 | 161 pmuludq %mm5,%mm1 |
147 paddq %mm0,%mm2 | 162 paddq %mm0,%mm2 |
148 paddq %mm1,%mm3 | 163 paddq %mm1,%mm3 |
149 movq %mm2,%mm0 | 164 movq %mm2,%mm0 |
150 pand %mm7,%mm0 | 165 pand %mm7,%mm0 |
151 paddq %mm0,%mm3 | 166 paddq %mm0,%mm3 |
152 movd %mm3,28(%esp,%ecx,4) | 167 movd %mm3,28(%esp,%ecx,4) |
153 psrlq $32,%mm2 | 168 psrlq $32,%mm2 |
154 psrlq $32,%mm3 | 169 psrlq $32,%mm3 |
155 movd 36(%esp,%ebx,4),%mm6 | 170 movd 36(%esp,%ebx,4),%mm6 |
156 paddq %mm2,%mm3 | 171 paddq %mm2,%mm3 |
157 paddq %mm6,%mm3 | 172 paddq %mm6,%mm3 |
158 movq %mm3,32(%esp,%ebx,4) | 173 movq %mm3,32(%esp,%ebx,4) |
159 leal 1(%edx),%edx | 174 leal 1(%edx),%edx |
160 cmpl %ebx,%edx | 175 cmpl %ebx,%edx |
161 » jle» L004outer | 176 » jle» L006outer |
162 emms | 177 emms |
163 » jmp» L006common_tail | 178 » jmp» L008common_tail |
164 .align 4,0x90 | 179 .align 4,0x90 |
165 L002non_sse2: | 180 L004non_sse2: |
166 movl 8(%esp),%esi | 181 movl 8(%esp),%esi |
167 leal 1(%ebx),%ebp | 182 leal 1(%ebx),%ebp |
168 movl 12(%esp),%edi | 183 movl 12(%esp),%edi |
169 xorl %ecx,%ecx | 184 xorl %ecx,%ecx |
170 movl %esi,%edx | 185 movl %esi,%edx |
171 andl $1,%ebp | 186 andl $1,%ebp |
172 subl %edi,%edx | 187 subl %edi,%edx |
173 leal 4(%edi,%ebx,4),%eax | 188 leal 4(%edi,%ebx,4),%eax |
174 orl %edx,%ebp | 189 orl %edx,%ebp |
175 movl (%edi),%edi | 190 movl (%edi),%edi |
176 » jz» L007bn_sqr_mont | 191 » jz» L009bn_sqr_mont |
177 movl %eax,28(%esp) | 192 movl %eax,28(%esp) |
178 movl (%esi),%eax | 193 movl (%esi),%eax |
179 xorl %edx,%edx | 194 xorl %edx,%edx |
180 .align 4,0x90 | 195 .align 4,0x90 |
181 L008mull: | 196 L010mull: |
182 movl %edx,%ebp | 197 movl %edx,%ebp |
183 mull %edi | 198 mull %edi |
184 addl %eax,%ebp | 199 addl %eax,%ebp |
185 leal 1(%ecx),%ecx | 200 leal 1(%ecx),%ecx |
186 adcl $0,%edx | 201 adcl $0,%edx |
187 movl (%esi,%ecx,4),%eax | 202 movl (%esi,%ecx,4),%eax |
188 cmpl %ebx,%ecx | 203 cmpl %ebx,%ecx |
189 movl %ebp,28(%esp,%ecx,4) | 204 movl %ebp,28(%esp,%ecx,4) |
190 » jl» L008mull | 205 » jl» L010mull |
191 movl %edx,%ebp | 206 movl %edx,%ebp |
192 mull %edi | 207 mull %edi |
193 movl 20(%esp),%edi | 208 movl 20(%esp),%edi |
194 addl %ebp,%eax | 209 addl %ebp,%eax |
195 movl 16(%esp),%esi | 210 movl 16(%esp),%esi |
196 adcl $0,%edx | 211 adcl $0,%edx |
197 imull 32(%esp),%edi | 212 imull 32(%esp),%edi |
198 movl %eax,32(%esp,%ebx,4) | 213 movl %eax,32(%esp,%ebx,4) |
199 xorl %ecx,%ecx | 214 xorl %ecx,%ecx |
200 movl %edx,36(%esp,%ebx,4) | 215 movl %edx,36(%esp,%ebx,4) |
201 movl %ecx,40(%esp,%ebx,4) | 216 movl %ecx,40(%esp,%ebx,4) |
202 movl (%esi),%eax | 217 movl (%esi),%eax |
203 mull %edi | 218 mull %edi |
204 addl 32(%esp),%eax | 219 addl 32(%esp),%eax |
205 movl 4(%esi),%eax | 220 movl 4(%esi),%eax |
206 adcl $0,%edx | 221 adcl $0,%edx |
207 incl %ecx | 222 incl %ecx |
208 » jmp» L0092ndmadd | 223 » jmp» L0112ndmadd |
209 .align 4,0x90 | 224 .align 4,0x90 |
210 L0101stmadd: | 225 L0121stmadd: |
211 movl %edx,%ebp | 226 movl %edx,%ebp |
212 mull %edi | 227 mull %edi |
213 addl 32(%esp,%ecx,4),%ebp | 228 addl 32(%esp,%ecx,4),%ebp |
214 leal 1(%ecx),%ecx | 229 leal 1(%ecx),%ecx |
215 adcl $0,%edx | 230 adcl $0,%edx |
216 addl %eax,%ebp | 231 addl %eax,%ebp |
217 movl (%esi,%ecx,4),%eax | 232 movl (%esi,%ecx,4),%eax |
218 adcl $0,%edx | 233 adcl $0,%edx |
219 cmpl %ebx,%ecx | 234 cmpl %ebx,%ecx |
220 movl %ebp,28(%esp,%ecx,4) | 235 movl %ebp,28(%esp,%ecx,4) |
221 » jl» L0101stmadd | 236 » jl» L0121stmadd |
222 movl %edx,%ebp | 237 movl %edx,%ebp |
223 mull %edi | 238 mull %edi |
224 addl 32(%esp,%ebx,4),%eax | 239 addl 32(%esp,%ebx,4),%eax |
225 movl 20(%esp),%edi | 240 movl 20(%esp),%edi |
226 adcl $0,%edx | 241 adcl $0,%edx |
227 movl 16(%esp),%esi | 242 movl 16(%esp),%esi |
228 addl %eax,%ebp | 243 addl %eax,%ebp |
229 adcl $0,%edx | 244 adcl $0,%edx |
230 imull 32(%esp),%edi | 245 imull 32(%esp),%edi |
231 xorl %ecx,%ecx | 246 xorl %ecx,%ecx |
232 addl 36(%esp,%ebx,4),%edx | 247 addl 36(%esp,%ebx,4),%edx |
233 movl %ebp,32(%esp,%ebx,4) | 248 movl %ebp,32(%esp,%ebx,4) |
234 adcl $0,%ecx | 249 adcl $0,%ecx |
235 movl (%esi),%eax | 250 movl (%esi),%eax |
236 movl %edx,36(%esp,%ebx,4) | 251 movl %edx,36(%esp,%ebx,4) |
237 movl %ecx,40(%esp,%ebx,4) | 252 movl %ecx,40(%esp,%ebx,4) |
238 mull %edi | 253 mull %edi |
239 addl 32(%esp),%eax | 254 addl 32(%esp),%eax |
240 movl 4(%esi),%eax | 255 movl 4(%esi),%eax |
241 adcl $0,%edx | 256 adcl $0,%edx |
242 movl $1,%ecx | 257 movl $1,%ecx |
243 .align 4,0x90 | 258 .align 4,0x90 |
244 L0092ndmadd: | 259 L0112ndmadd: |
245 movl %edx,%ebp | 260 movl %edx,%ebp |
246 mull %edi | 261 mull %edi |
247 addl 32(%esp,%ecx,4),%ebp | 262 addl 32(%esp,%ecx,4),%ebp |
248 leal 1(%ecx),%ecx | 263 leal 1(%ecx),%ecx |
249 adcl $0,%edx | 264 adcl $0,%edx |
250 addl %eax,%ebp | 265 addl %eax,%ebp |
251 movl (%esi,%ecx,4),%eax | 266 movl (%esi,%ecx,4),%eax |
252 adcl $0,%edx | 267 adcl $0,%edx |
253 cmpl %ebx,%ecx | 268 cmpl %ebx,%ecx |
254 movl %ebp,24(%esp,%ecx,4) | 269 movl %ebp,24(%esp,%ecx,4) |
255 » jl» L0092ndmadd | 270 » jl» L0112ndmadd |
256 movl %edx,%ebp | 271 movl %edx,%ebp |
257 mull %edi | 272 mull %edi |
258 addl 32(%esp,%ebx,4),%ebp | 273 addl 32(%esp,%ebx,4),%ebp |
259 adcl $0,%edx | 274 adcl $0,%edx |
260 addl %eax,%ebp | 275 addl %eax,%ebp |
261 adcl $0,%edx | 276 adcl $0,%edx |
262 movl %ebp,28(%esp,%ebx,4) | 277 movl %ebp,28(%esp,%ebx,4) |
263 xorl %eax,%eax | 278 xorl %eax,%eax |
264 movl 12(%esp),%ecx | 279 movl 12(%esp),%ecx |
265 addl 36(%esp,%ebx,4),%edx | 280 addl 36(%esp,%ebx,4),%edx |
266 adcl 40(%esp,%ebx,4),%eax | 281 adcl 40(%esp,%ebx,4),%eax |
267 leal 4(%ecx),%ecx | 282 leal 4(%ecx),%ecx |
268 movl %edx,32(%esp,%ebx,4) | 283 movl %edx,32(%esp,%ebx,4) |
269 cmpl 28(%esp),%ecx | 284 cmpl 28(%esp),%ecx |
270 movl %eax,36(%esp,%ebx,4) | 285 movl %eax,36(%esp,%ebx,4) |
271 » je» L006common_tail | 286 » je» L008common_tail |
272 movl (%ecx),%edi | 287 movl (%ecx),%edi |
273 movl 8(%esp),%esi | 288 movl 8(%esp),%esi |
274 movl %ecx,12(%esp) | 289 movl %ecx,12(%esp) |
275 xorl %ecx,%ecx | 290 xorl %ecx,%ecx |
276 xorl %edx,%edx | 291 xorl %edx,%edx |
277 movl (%esi),%eax | 292 movl (%esi),%eax |
278 » jmp» L0101stmadd | 293 » jmp» L0121stmadd |
279 .align 4,0x90 | 294 .align 4,0x90 |
280 L007bn_sqr_mont: | 295 L009bn_sqr_mont: |
281 movl %ebx,(%esp) | 296 movl %ebx,(%esp) |
282 movl %ecx,12(%esp) | 297 movl %ecx,12(%esp) |
283 movl %edi,%eax | 298 movl %edi,%eax |
284 mull %edi | 299 mull %edi |
285 movl %eax,32(%esp) | 300 movl %eax,32(%esp) |
286 movl %edx,%ebx | 301 movl %edx,%ebx |
287 shrl $1,%edx | 302 shrl $1,%edx |
288 andl $1,%ebx | 303 andl $1,%ebx |
289 incl %ecx | 304 incl %ecx |
290 .align 4,0x90 | 305 .align 4,0x90 |
291 L011sqr: | 306 L013sqr: |
292 movl (%esi,%ecx,4),%eax | 307 movl (%esi,%ecx,4),%eax |
293 movl %edx,%ebp | 308 movl %edx,%ebp |
294 mull %edi | 309 mull %edi |
295 addl %ebp,%eax | 310 addl %ebp,%eax |
296 leal 1(%ecx),%ecx | 311 leal 1(%ecx),%ecx |
297 adcl $0,%edx | 312 adcl $0,%edx |
298 leal (%ebx,%eax,2),%ebp | 313 leal (%ebx,%eax,2),%ebp |
299 shrl $31,%eax | 314 shrl $31,%eax |
300 cmpl (%esp),%ecx | 315 cmpl (%esp),%ecx |
301 movl %eax,%ebx | 316 movl %eax,%ebx |
302 movl %ebp,28(%esp,%ecx,4) | 317 movl %ebp,28(%esp,%ecx,4) |
303 » jl» L011sqr | 318 » jl» L013sqr |
304 movl (%esi,%ecx,4),%eax | 319 movl (%esi,%ecx,4),%eax |
305 movl %edx,%ebp | 320 movl %edx,%ebp |
306 mull %edi | 321 mull %edi |
307 addl %ebp,%eax | 322 addl %ebp,%eax |
308 movl 20(%esp),%edi | 323 movl 20(%esp),%edi |
309 adcl $0,%edx | 324 adcl $0,%edx |
310 movl 16(%esp),%esi | 325 movl 16(%esp),%esi |
311 leal (%ebx,%eax,2),%ebp | 326 leal (%ebx,%eax,2),%ebp |
312 imull 32(%esp),%edi | 327 imull 32(%esp),%edi |
313 shrl $31,%eax | 328 shrl $31,%eax |
314 movl %ebp,32(%esp,%ecx,4) | 329 movl %ebp,32(%esp,%ecx,4) |
315 leal (%eax,%edx,2),%ebp | 330 leal (%eax,%edx,2),%ebp |
316 movl (%esi),%eax | 331 movl (%esi),%eax |
317 shrl $31,%edx | 332 shrl $31,%edx |
318 movl %ebp,36(%esp,%ecx,4) | 333 movl %ebp,36(%esp,%ecx,4) |
319 movl %edx,40(%esp,%ecx,4) | 334 movl %edx,40(%esp,%ecx,4) |
320 mull %edi | 335 mull %edi |
321 addl 32(%esp),%eax | 336 addl 32(%esp),%eax |
322 movl %ecx,%ebx | 337 movl %ecx,%ebx |
323 adcl $0,%edx | 338 adcl $0,%edx |
324 movl 4(%esi),%eax | 339 movl 4(%esi),%eax |
325 movl $1,%ecx | 340 movl $1,%ecx |
326 .align 4,0x90 | 341 .align 4,0x90 |
327 L0123rdmadd: | 342 L0143rdmadd: |
328 movl %edx,%ebp | 343 movl %edx,%ebp |
329 mull %edi | 344 mull %edi |
330 addl 32(%esp,%ecx,4),%ebp | 345 addl 32(%esp,%ecx,4),%ebp |
331 adcl $0,%edx | 346 adcl $0,%edx |
332 addl %eax,%ebp | 347 addl %eax,%ebp |
333 movl 4(%esi,%ecx,4),%eax | 348 movl 4(%esi,%ecx,4),%eax |
334 adcl $0,%edx | 349 adcl $0,%edx |
335 movl %ebp,28(%esp,%ecx,4) | 350 movl %ebp,28(%esp,%ecx,4) |
336 movl %edx,%ebp | 351 movl %edx,%ebp |
337 mull %edi | 352 mull %edi |
338 addl 36(%esp,%ecx,4),%ebp | 353 addl 36(%esp,%ecx,4),%ebp |
339 leal 2(%ecx),%ecx | 354 leal 2(%ecx),%ecx |
340 adcl $0,%edx | 355 adcl $0,%edx |
341 addl %eax,%ebp | 356 addl %eax,%ebp |
342 movl (%esi,%ecx,4),%eax | 357 movl (%esi,%ecx,4),%eax |
343 adcl $0,%edx | 358 adcl $0,%edx |
344 cmpl %ebx,%ecx | 359 cmpl %ebx,%ecx |
345 movl %ebp,24(%esp,%ecx,4) | 360 movl %ebp,24(%esp,%ecx,4) |
346 » jl» L0123rdmadd | 361 » jl» L0143rdmadd |
347 movl %edx,%ebp | 362 movl %edx,%ebp |
348 mull %edi | 363 mull %edi |
349 addl 32(%esp,%ebx,4),%ebp | 364 addl 32(%esp,%ebx,4),%ebp |
350 adcl $0,%edx | 365 adcl $0,%edx |
351 addl %eax,%ebp | 366 addl %eax,%ebp |
352 adcl $0,%edx | 367 adcl $0,%edx |
353 movl %ebp,28(%esp,%ebx,4) | 368 movl %ebp,28(%esp,%ebx,4) |
354 movl 12(%esp),%ecx | 369 movl 12(%esp),%ecx |
355 xorl %eax,%eax | 370 xorl %eax,%eax |
356 movl 8(%esp),%esi | 371 movl 8(%esp),%esi |
357 addl 36(%esp,%ebx,4),%edx | 372 addl 36(%esp,%ebx,4),%edx |
358 adcl 40(%esp,%ebx,4),%eax | 373 adcl 40(%esp,%ebx,4),%eax |
359 movl %edx,32(%esp,%ebx,4) | 374 movl %edx,32(%esp,%ebx,4) |
360 cmpl %ebx,%ecx | 375 cmpl %ebx,%ecx |
361 movl %eax,36(%esp,%ebx,4) | 376 movl %eax,36(%esp,%ebx,4) |
362 » je» L006common_tail | 377 » je» L008common_tail |
363 movl 4(%esi,%ecx,4),%edi | 378 movl 4(%esi,%ecx,4),%edi |
364 leal 1(%ecx),%ecx | 379 leal 1(%ecx),%ecx |
365 movl %edi,%eax | 380 movl %edi,%eax |
366 movl %ecx,12(%esp) | 381 movl %ecx,12(%esp) |
367 mull %edi | 382 mull %edi |
368 addl 32(%esp,%ecx,4),%eax | 383 addl 32(%esp,%ecx,4),%eax |
369 adcl $0,%edx | 384 adcl $0,%edx |
370 movl %eax,32(%esp,%ecx,4) | 385 movl %eax,32(%esp,%ecx,4) |
371 xorl %ebp,%ebp | 386 xorl %ebp,%ebp |
372 cmpl %ebx,%ecx | 387 cmpl %ebx,%ecx |
373 leal 1(%ecx),%ecx | 388 leal 1(%ecx),%ecx |
374 » je» L013sqrlast | 389 » je» L015sqrlast |
375 movl %edx,%ebx | 390 movl %edx,%ebx |
376 shrl $1,%edx | 391 shrl $1,%edx |
377 andl $1,%ebx | 392 andl $1,%ebx |
378 .align 4,0x90 | 393 .align 4,0x90 |
379 L014sqradd: | 394 L016sqradd: |
380 movl (%esi,%ecx,4),%eax | 395 movl (%esi,%ecx,4),%eax |
381 movl %edx,%ebp | 396 movl %edx,%ebp |
382 mull %edi | 397 mull %edi |
383 addl %ebp,%eax | 398 addl %ebp,%eax |
384 leal (%eax,%eax,1),%ebp | 399 leal (%eax,%eax,1),%ebp |
385 adcl $0,%edx | 400 adcl $0,%edx |
386 shrl $31,%eax | 401 shrl $31,%eax |
387 addl 32(%esp,%ecx,4),%ebp | 402 addl 32(%esp,%ecx,4),%ebp |
388 leal 1(%ecx),%ecx | 403 leal 1(%ecx),%ecx |
389 adcl $0,%eax | 404 adcl $0,%eax |
390 addl %ebx,%ebp | 405 addl %ebx,%ebp |
391 adcl $0,%eax | 406 adcl $0,%eax |
392 cmpl (%esp),%ecx | 407 cmpl (%esp),%ecx |
393 movl %ebp,28(%esp,%ecx,4) | 408 movl %ebp,28(%esp,%ecx,4) |
394 movl %eax,%ebx | 409 movl %eax,%ebx |
395 » jle» L014sqradd | 410 » jle» L016sqradd |
396 movl %edx,%ebp | 411 movl %edx,%ebp |
397 addl %edx,%edx | 412 addl %edx,%edx |
398 shrl $31,%ebp | 413 shrl $31,%ebp |
399 addl %ebx,%edx | 414 addl %ebx,%edx |
400 adcl $0,%ebp | 415 adcl $0,%ebp |
401 L013sqrlast: | 416 L015sqrlast: |
402 movl 20(%esp),%edi | 417 movl 20(%esp),%edi |
403 movl 16(%esp),%esi | 418 movl 16(%esp),%esi |
404 imull 32(%esp),%edi | 419 imull 32(%esp),%edi |
405 addl 32(%esp,%ecx,4),%edx | 420 addl 32(%esp,%ecx,4),%edx |
406 movl (%esi),%eax | 421 movl (%esi),%eax |
407 adcl $0,%ebp | 422 adcl $0,%ebp |
408 movl %edx,32(%esp,%ecx,4) | 423 movl %edx,32(%esp,%ecx,4) |
409 movl %ebp,36(%esp,%ecx,4) | 424 movl %ebp,36(%esp,%ecx,4) |
410 mull %edi | 425 mull %edi |
411 addl 32(%esp),%eax | 426 addl 32(%esp),%eax |
412 leal -1(%ecx),%ebx | 427 leal -1(%ecx),%ebx |
413 adcl $0,%edx | 428 adcl $0,%edx |
414 movl $1,%ecx | 429 movl $1,%ecx |
415 movl 4(%esi),%eax | 430 movl 4(%esi),%eax |
416 » jmp» L0123rdmadd | 431 » jmp» L0143rdmadd |
417 .align 4,0x90 | 432 .align 4,0x90 |
418 L006common_tail: | 433 L008common_tail: |
419 movl 16(%esp),%ebp | 434 movl 16(%esp),%ebp |
420 movl 4(%esp),%edi | 435 movl 4(%esp),%edi |
421 leal 32(%esp),%esi | 436 leal 32(%esp),%esi |
422 movl (%esi),%eax | 437 movl (%esi),%eax |
423 movl %ebx,%ecx | 438 movl %ebx,%ecx |
424 xorl %edx,%edx | 439 xorl %edx,%edx |
425 .align 4,0x90 | 440 .align 4,0x90 |
426 L015sub: | 441 L017sub: |
427 sbbl (%ebp,%edx,4),%eax | 442 sbbl (%ebp,%edx,4),%eax |
428 movl %eax,(%edi,%edx,4) | 443 movl %eax,(%edi,%edx,4) |
429 decl %ecx | 444 decl %ecx |
430 movl 4(%esi,%edx,4),%eax | 445 movl 4(%esi,%edx,4),%eax |
431 leal 1(%edx),%edx | 446 leal 1(%edx),%edx |
432 » jge» L015sub | 447 » jge» L017sub |
433 sbbl $0,%eax | 448 sbbl $0,%eax |
| 449 andl %eax,%esi |
| 450 notl %eax |
| 451 movl %edi,%ebp |
| 452 andl %eax,%ebp |
| 453 orl %ebp,%esi |
434 .align 4,0x90 | 454 .align 4,0x90 |
435 L016copy: | 455 L018copy: |
436 » movl» (%esi,%ebx,4),%edx | 456 » movl» (%esi,%ebx,4),%eax |
437 » movl» (%edi,%ebx,4),%ebp | 457 » movl» %eax,(%edi,%ebx,4) |
438 » xorl» %ebp,%edx | 458 » movl» %ecx,32(%esp,%ebx,4) |
439 » andl» %eax,%edx | |
440 » xorl» %ebp,%edx | |
441 » movl» %ecx,(%esi,%ebx,4) | |
442 » movl» %edx,(%edi,%ebx,4) | |
443 decl %ebx | 459 decl %ebx |
444 » jge» L016copy | 460 » jge» L018copy |
445 movl 24(%esp),%esp | 461 movl 24(%esp),%esp |
446 movl $1,%eax | 462 movl $1,%eax |
447 L000just_leave: | 463 L000just_leave: |
448 popl %edi | 464 popl %edi |
449 popl %esi | 465 popl %esi |
450 popl %ebx | 466 popl %ebx |
451 popl %ebp | 467 popl %ebp |
452 ret | 468 ret |
453 .byte 77,111,110,116,103,111,109,101,114,121,32,77,117,108,116,105 | 469 .byte 77,111,110,116,103,111,109,101,114,121,32,77,117,108,116,105 |
454 .byte 112,108,105,99,97,116,105,111,110,32,102,111,114,32,120,56 | 470 .byte 112,108,105,99,97,116,105,111,110,32,102,111,114,32,120,56 |
455 .byte 54,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121 | 471 .byte 54,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121 |
456 .byte 32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46 | 472 .byte 32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46 |
457 .byte 111,114,103,62,0 | 473 .byte 111,114,103,62,0 |
458 .section __IMPORT,__pointers,non_lazy_symbol_pointers | 474 .section __IMPORT,__pointers,non_lazy_symbol_pointers |
459 L_OPENSSL_ia32cap_P$non_lazy_ptr: | 475 L_OPENSSL_ia32cap_P$non_lazy_ptr: |
460 .indirect_symbol _OPENSSL_ia32cap_P | 476 .indirect_symbol _OPENSSL_ia32cap_P |
461 .long 0 | 477 .long 0 |
462 #endif | 478 #endif |
OLD | NEW |