OLD | NEW |
1 #if defined(__i386__) | 1 #if defined(__i386__) |
2 .file "src/crypto/bn/asm/x86-mont.S" | 2 .file "src/crypto/bn/asm/x86-mont.S" |
3 .text | 3 .text |
4 .globl bn_mul_mont | 4 .globl bn_mul_mont |
5 .hidden bn_mul_mont | 5 .hidden bn_mul_mont |
6 .type bn_mul_mont,@function | 6 .type bn_mul_mont,@function |
7 .align 16 | 7 .align 16 |
8 bn_mul_mont: | 8 bn_mul_mont: |
9 .L_bn_mul_mont_begin: | 9 .L_bn_mul_mont_begin: |
10 pushl %ebp | 10 pushl %ebp |
11 pushl %ebx | 11 pushl %ebx |
12 pushl %esi | 12 pushl %esi |
13 pushl %edi | 13 pushl %edi |
14 xorl %eax,%eax | 14 xorl %eax,%eax |
15 movl 40(%esp),%edi | 15 movl 40(%esp),%edi |
16 cmpl $4,%edi | 16 cmpl $4,%edi |
17 jl .L000just_leave | 17 jl .L000just_leave |
18 leal 20(%esp),%esi | 18 leal 20(%esp),%esi |
19 leal 24(%esp),%edx | 19 leal 24(%esp),%edx |
20 movl %esp,%ebp | |
21 addl $2,%edi | 20 addl $2,%edi |
22 negl %edi | 21 negl %edi |
23 » leal» -32(%esp,%edi,4),%esp | 22 » leal» -32(%esp,%edi,4),%ebp |
24 negl %edi | 23 negl %edi |
25 » movl» %esp,%eax | 24 » movl» %ebp,%eax |
26 subl %edx,%eax | 25 subl %edx,%eax |
27 andl $2047,%eax | 26 andl $2047,%eax |
28 » subl» %eax,%esp | 27 » subl» %eax,%ebp |
29 » xorl» %esp,%edx | 28 » xorl» %ebp,%edx |
30 andl $2048,%edx | 29 andl $2048,%edx |
31 xorl $2048,%edx | 30 xorl $2048,%edx |
32 » subl» %edx,%esp | 31 » subl» %edx,%ebp |
33 » andl» $-64,%esp | 32 » andl» $-64,%ebp |
| 33 » movl» %esp,%eax |
| 34 » subl» %ebp,%eax |
| 35 » andl» $-4096,%eax |
| 36 » movl» %esp,%edx |
| 37 » leal» (%ebp,%eax,1),%esp |
| 38 » movl» (%esp),%eax |
| 39 » cmpl» %ebp,%esp |
| 40 » ja» .L001page_walk |
| 41 » jmp» .L002page_walk_done |
| 42 .align» 16 |
| 43 .L001page_walk: |
| 44 » leal» -4096(%esp),%esp |
| 45 » movl» (%esp),%eax |
| 46 » cmpl» %ebp,%esp |
| 47 » ja» .L001page_walk |
| 48 .L002page_walk_done: |
34 movl (%esi),%eax | 49 movl (%esi),%eax |
35 movl 4(%esi),%ebx | 50 movl 4(%esi),%ebx |
36 movl 8(%esi),%ecx | 51 movl 8(%esi),%ecx |
37 » movl» 12(%esi),%edx | 52 » movl» 12(%esi),%ebp |
38 movl 16(%esi),%esi | 53 movl 16(%esi),%esi |
39 movl (%esi),%esi | 54 movl (%esi),%esi |
40 movl %eax,4(%esp) | 55 movl %eax,4(%esp) |
41 movl %ebx,8(%esp) | 56 movl %ebx,8(%esp) |
42 movl %ecx,12(%esp) | 57 movl %ecx,12(%esp) |
43 » movl» %edx,16(%esp) | 58 » movl» %ebp,16(%esp) |
44 movl %esi,20(%esp) | 59 movl %esi,20(%esp) |
45 leal -3(%edi),%ebx | 60 leal -3(%edi),%ebx |
46 » movl» %ebp,24(%esp) | 61 » movl» %edx,24(%esp) |
47 » call» .L001PIC_me_up | 62 » call» .L003PIC_me_up |
48 .L001PIC_me_up: | 63 .L003PIC_me_up: |
49 popl %eax | 64 popl %eax |
50 » leal» OPENSSL_ia32cap_P-.L001PIC_me_up(%eax),%eax | 65 » leal» OPENSSL_ia32cap_P-.L003PIC_me_up(%eax),%eax |
51 btl $26,(%eax) | 66 btl $26,(%eax) |
52 » jnc» .L002non_sse2 | 67 » jnc» .L004non_sse2 |
53 movl $-1,%eax | 68 movl $-1,%eax |
54 movd %eax,%mm7 | 69 movd %eax,%mm7 |
55 movl 8(%esp),%esi | 70 movl 8(%esp),%esi |
56 movl 12(%esp),%edi | 71 movl 12(%esp),%edi |
57 movl 16(%esp),%ebp | 72 movl 16(%esp),%ebp |
58 xorl %edx,%edx | 73 xorl %edx,%edx |
59 xorl %ecx,%ecx | 74 xorl %ecx,%ecx |
60 movd (%edi),%mm4 | 75 movd (%edi),%mm4 |
61 movd (%esi),%mm5 | 76 movd (%esi),%mm5 |
62 movd (%ebp),%mm3 | 77 movd (%ebp),%mm3 |
63 pmuludq %mm4,%mm5 | 78 pmuludq %mm4,%mm5 |
64 movq %mm5,%mm2 | 79 movq %mm5,%mm2 |
65 movq %mm5,%mm0 | 80 movq %mm5,%mm0 |
66 pand %mm7,%mm0 | 81 pand %mm7,%mm0 |
67 pmuludq 20(%esp),%mm5 | 82 pmuludq 20(%esp),%mm5 |
68 pmuludq %mm5,%mm3 | 83 pmuludq %mm5,%mm3 |
69 paddq %mm0,%mm3 | 84 paddq %mm0,%mm3 |
70 movd 4(%ebp),%mm1 | 85 movd 4(%ebp),%mm1 |
71 movd 4(%esi),%mm0 | 86 movd 4(%esi),%mm0 |
72 psrlq $32,%mm2 | 87 psrlq $32,%mm2 |
73 psrlq $32,%mm3 | 88 psrlq $32,%mm3 |
74 incl %ecx | 89 incl %ecx |
75 .align 16 | 90 .align 16 |
76 .L0031st: | 91 .L0051st: |
77 pmuludq %mm4,%mm0 | 92 pmuludq %mm4,%mm0 |
78 pmuludq %mm5,%mm1 | 93 pmuludq %mm5,%mm1 |
79 paddq %mm0,%mm2 | 94 paddq %mm0,%mm2 |
80 paddq %mm1,%mm3 | 95 paddq %mm1,%mm3 |
81 movq %mm2,%mm0 | 96 movq %mm2,%mm0 |
82 pand %mm7,%mm0 | 97 pand %mm7,%mm0 |
83 movd 4(%ebp,%ecx,4),%mm1 | 98 movd 4(%ebp,%ecx,4),%mm1 |
84 paddq %mm0,%mm3 | 99 paddq %mm0,%mm3 |
85 movd 4(%esi,%ecx,4),%mm0 | 100 movd 4(%esi,%ecx,4),%mm0 |
86 psrlq $32,%mm2 | 101 psrlq $32,%mm2 |
87 movd %mm3,28(%esp,%ecx,4) | 102 movd %mm3,28(%esp,%ecx,4) |
88 psrlq $32,%mm3 | 103 psrlq $32,%mm3 |
89 leal 1(%ecx),%ecx | 104 leal 1(%ecx),%ecx |
90 cmpl %ebx,%ecx | 105 cmpl %ebx,%ecx |
91 » jl» .L0031st | 106 » jl» .L0051st |
92 pmuludq %mm4,%mm0 | 107 pmuludq %mm4,%mm0 |
93 pmuludq %mm5,%mm1 | 108 pmuludq %mm5,%mm1 |
94 paddq %mm0,%mm2 | 109 paddq %mm0,%mm2 |
95 paddq %mm1,%mm3 | 110 paddq %mm1,%mm3 |
96 movq %mm2,%mm0 | 111 movq %mm2,%mm0 |
97 pand %mm7,%mm0 | 112 pand %mm7,%mm0 |
98 paddq %mm0,%mm3 | 113 paddq %mm0,%mm3 |
99 movd %mm3,28(%esp,%ecx,4) | 114 movd %mm3,28(%esp,%ecx,4) |
100 psrlq $32,%mm2 | 115 psrlq $32,%mm2 |
101 psrlq $32,%mm3 | 116 psrlq $32,%mm3 |
102 paddq %mm2,%mm3 | 117 paddq %mm2,%mm3 |
103 movq %mm3,32(%esp,%ebx,4) | 118 movq %mm3,32(%esp,%ebx,4) |
104 incl %edx | 119 incl %edx |
105 .L004outer: | 120 .L006outer: |
106 xorl %ecx,%ecx | 121 xorl %ecx,%ecx |
107 movd (%edi,%edx,4),%mm4 | 122 movd (%edi,%edx,4),%mm4 |
108 movd (%esi),%mm5 | 123 movd (%esi),%mm5 |
109 movd 32(%esp),%mm6 | 124 movd 32(%esp),%mm6 |
110 movd (%ebp),%mm3 | 125 movd (%ebp),%mm3 |
111 pmuludq %mm4,%mm5 | 126 pmuludq %mm4,%mm5 |
112 paddq %mm6,%mm5 | 127 paddq %mm6,%mm5 |
113 movq %mm5,%mm0 | 128 movq %mm5,%mm0 |
114 movq %mm5,%mm2 | 129 movq %mm5,%mm2 |
115 pand %mm7,%mm0 | 130 pand %mm7,%mm0 |
116 pmuludq 20(%esp),%mm5 | 131 pmuludq 20(%esp),%mm5 |
117 pmuludq %mm5,%mm3 | 132 pmuludq %mm5,%mm3 |
118 paddq %mm0,%mm3 | 133 paddq %mm0,%mm3 |
119 movd 36(%esp),%mm6 | 134 movd 36(%esp),%mm6 |
120 movd 4(%ebp),%mm1 | 135 movd 4(%ebp),%mm1 |
121 movd 4(%esi),%mm0 | 136 movd 4(%esi),%mm0 |
122 psrlq $32,%mm2 | 137 psrlq $32,%mm2 |
123 psrlq $32,%mm3 | 138 psrlq $32,%mm3 |
124 paddq %mm6,%mm2 | 139 paddq %mm6,%mm2 |
125 incl %ecx | 140 incl %ecx |
126 decl %ebx | 141 decl %ebx |
127 .L005inner: | 142 .L007inner: |
128 pmuludq %mm4,%mm0 | 143 pmuludq %mm4,%mm0 |
129 pmuludq %mm5,%mm1 | 144 pmuludq %mm5,%mm1 |
130 paddq %mm0,%mm2 | 145 paddq %mm0,%mm2 |
131 paddq %mm1,%mm3 | 146 paddq %mm1,%mm3 |
132 movq %mm2,%mm0 | 147 movq %mm2,%mm0 |
133 movd 36(%esp,%ecx,4),%mm6 | 148 movd 36(%esp,%ecx,4),%mm6 |
134 pand %mm7,%mm0 | 149 pand %mm7,%mm0 |
135 movd 4(%ebp,%ecx,4),%mm1 | 150 movd 4(%ebp,%ecx,4),%mm1 |
136 paddq %mm0,%mm3 | 151 paddq %mm0,%mm3 |
137 movd 4(%esi,%ecx,4),%mm0 | 152 movd 4(%esi,%ecx,4),%mm0 |
138 psrlq $32,%mm2 | 153 psrlq $32,%mm2 |
139 movd %mm3,28(%esp,%ecx,4) | 154 movd %mm3,28(%esp,%ecx,4) |
140 psrlq $32,%mm3 | 155 psrlq $32,%mm3 |
141 paddq %mm6,%mm2 | 156 paddq %mm6,%mm2 |
142 decl %ebx | 157 decl %ebx |
143 leal 1(%ecx),%ecx | 158 leal 1(%ecx),%ecx |
144 » jnz» .L005inner | 159 » jnz» .L007inner |
145 movl %ecx,%ebx | 160 movl %ecx,%ebx |
146 pmuludq %mm4,%mm0 | 161 pmuludq %mm4,%mm0 |
147 pmuludq %mm5,%mm1 | 162 pmuludq %mm5,%mm1 |
148 paddq %mm0,%mm2 | 163 paddq %mm0,%mm2 |
149 paddq %mm1,%mm3 | 164 paddq %mm1,%mm3 |
150 movq %mm2,%mm0 | 165 movq %mm2,%mm0 |
151 pand %mm7,%mm0 | 166 pand %mm7,%mm0 |
152 paddq %mm0,%mm3 | 167 paddq %mm0,%mm3 |
153 movd %mm3,28(%esp,%ecx,4) | 168 movd %mm3,28(%esp,%ecx,4) |
154 psrlq $32,%mm2 | 169 psrlq $32,%mm2 |
155 psrlq $32,%mm3 | 170 psrlq $32,%mm3 |
156 movd 36(%esp,%ebx,4),%mm6 | 171 movd 36(%esp,%ebx,4),%mm6 |
157 paddq %mm2,%mm3 | 172 paddq %mm2,%mm3 |
158 paddq %mm6,%mm3 | 173 paddq %mm6,%mm3 |
159 movq %mm3,32(%esp,%ebx,4) | 174 movq %mm3,32(%esp,%ebx,4) |
160 leal 1(%edx),%edx | 175 leal 1(%edx),%edx |
161 cmpl %ebx,%edx | 176 cmpl %ebx,%edx |
162 » jle» .L004outer | 177 » jle» .L006outer |
163 emms | 178 emms |
164 » jmp» .L006common_tail | 179 » jmp» .L008common_tail |
165 .align 16 | 180 .align 16 |
166 .L002non_sse2: | 181 .L004non_sse2: |
167 movl 8(%esp),%esi | 182 movl 8(%esp),%esi |
168 leal 1(%ebx),%ebp | 183 leal 1(%ebx),%ebp |
169 movl 12(%esp),%edi | 184 movl 12(%esp),%edi |
170 xorl %ecx,%ecx | 185 xorl %ecx,%ecx |
171 movl %esi,%edx | 186 movl %esi,%edx |
172 andl $1,%ebp | 187 andl $1,%ebp |
173 subl %edi,%edx | 188 subl %edi,%edx |
174 leal 4(%edi,%ebx,4),%eax | 189 leal 4(%edi,%ebx,4),%eax |
175 orl %edx,%ebp | 190 orl %edx,%ebp |
176 movl (%edi),%edi | 191 movl (%edi),%edi |
177 » jz» .L007bn_sqr_mont | 192 » jz» .L009bn_sqr_mont |
178 movl %eax,28(%esp) | 193 movl %eax,28(%esp) |
179 movl (%esi),%eax | 194 movl (%esi),%eax |
180 xorl %edx,%edx | 195 xorl %edx,%edx |
181 .align 16 | 196 .align 16 |
182 .L008mull: | 197 .L010mull: |
183 movl %edx,%ebp | 198 movl %edx,%ebp |
184 mull %edi | 199 mull %edi |
185 addl %eax,%ebp | 200 addl %eax,%ebp |
186 leal 1(%ecx),%ecx | 201 leal 1(%ecx),%ecx |
187 adcl $0,%edx | 202 adcl $0,%edx |
188 movl (%esi,%ecx,4),%eax | 203 movl (%esi,%ecx,4),%eax |
189 cmpl %ebx,%ecx | 204 cmpl %ebx,%ecx |
190 movl %ebp,28(%esp,%ecx,4) | 205 movl %ebp,28(%esp,%ecx,4) |
191 » jl» .L008mull | 206 » jl» .L010mull |
192 movl %edx,%ebp | 207 movl %edx,%ebp |
193 mull %edi | 208 mull %edi |
194 movl 20(%esp),%edi | 209 movl 20(%esp),%edi |
195 addl %ebp,%eax | 210 addl %ebp,%eax |
196 movl 16(%esp),%esi | 211 movl 16(%esp),%esi |
197 adcl $0,%edx | 212 adcl $0,%edx |
198 imull 32(%esp),%edi | 213 imull 32(%esp),%edi |
199 movl %eax,32(%esp,%ebx,4) | 214 movl %eax,32(%esp,%ebx,4) |
200 xorl %ecx,%ecx | 215 xorl %ecx,%ecx |
201 movl %edx,36(%esp,%ebx,4) | 216 movl %edx,36(%esp,%ebx,4) |
202 movl %ecx,40(%esp,%ebx,4) | 217 movl %ecx,40(%esp,%ebx,4) |
203 movl (%esi),%eax | 218 movl (%esi),%eax |
204 mull %edi | 219 mull %edi |
205 addl 32(%esp),%eax | 220 addl 32(%esp),%eax |
206 movl 4(%esi),%eax | 221 movl 4(%esi),%eax |
207 adcl $0,%edx | 222 adcl $0,%edx |
208 incl %ecx | 223 incl %ecx |
209 » jmp» .L0092ndmadd | 224 » jmp» .L0112ndmadd |
210 .align 16 | 225 .align 16 |
211 .L0101stmadd: | 226 .L0121stmadd: |
212 movl %edx,%ebp | 227 movl %edx,%ebp |
213 mull %edi | 228 mull %edi |
214 addl 32(%esp,%ecx,4),%ebp | 229 addl 32(%esp,%ecx,4),%ebp |
215 leal 1(%ecx),%ecx | 230 leal 1(%ecx),%ecx |
216 adcl $0,%edx | 231 adcl $0,%edx |
217 addl %eax,%ebp | 232 addl %eax,%ebp |
218 movl (%esi,%ecx,4),%eax | 233 movl (%esi,%ecx,4),%eax |
219 adcl $0,%edx | 234 adcl $0,%edx |
220 cmpl %ebx,%ecx | 235 cmpl %ebx,%ecx |
221 movl %ebp,28(%esp,%ecx,4) | 236 movl %ebp,28(%esp,%ecx,4) |
222 » jl» .L0101stmadd | 237 » jl» .L0121stmadd |
223 movl %edx,%ebp | 238 movl %edx,%ebp |
224 mull %edi | 239 mull %edi |
225 addl 32(%esp,%ebx,4),%eax | 240 addl 32(%esp,%ebx,4),%eax |
226 movl 20(%esp),%edi | 241 movl 20(%esp),%edi |
227 adcl $0,%edx | 242 adcl $0,%edx |
228 movl 16(%esp),%esi | 243 movl 16(%esp),%esi |
229 addl %eax,%ebp | 244 addl %eax,%ebp |
230 adcl $0,%edx | 245 adcl $0,%edx |
231 imull 32(%esp),%edi | 246 imull 32(%esp),%edi |
232 xorl %ecx,%ecx | 247 xorl %ecx,%ecx |
233 addl 36(%esp,%ebx,4),%edx | 248 addl 36(%esp,%ebx,4),%edx |
234 movl %ebp,32(%esp,%ebx,4) | 249 movl %ebp,32(%esp,%ebx,4) |
235 adcl $0,%ecx | 250 adcl $0,%ecx |
236 movl (%esi),%eax | 251 movl (%esi),%eax |
237 movl %edx,36(%esp,%ebx,4) | 252 movl %edx,36(%esp,%ebx,4) |
238 movl %ecx,40(%esp,%ebx,4) | 253 movl %ecx,40(%esp,%ebx,4) |
239 mull %edi | 254 mull %edi |
240 addl 32(%esp),%eax | 255 addl 32(%esp),%eax |
241 movl 4(%esi),%eax | 256 movl 4(%esi),%eax |
242 adcl $0,%edx | 257 adcl $0,%edx |
243 movl $1,%ecx | 258 movl $1,%ecx |
244 .align 16 | 259 .align 16 |
245 .L0092ndmadd: | 260 .L0112ndmadd: |
246 movl %edx,%ebp | 261 movl %edx,%ebp |
247 mull %edi | 262 mull %edi |
248 addl 32(%esp,%ecx,4),%ebp | 263 addl 32(%esp,%ecx,4),%ebp |
249 leal 1(%ecx),%ecx | 264 leal 1(%ecx),%ecx |
250 adcl $0,%edx | 265 adcl $0,%edx |
251 addl %eax,%ebp | 266 addl %eax,%ebp |
252 movl (%esi,%ecx,4),%eax | 267 movl (%esi,%ecx,4),%eax |
253 adcl $0,%edx | 268 adcl $0,%edx |
254 cmpl %ebx,%ecx | 269 cmpl %ebx,%ecx |
255 movl %ebp,24(%esp,%ecx,4) | 270 movl %ebp,24(%esp,%ecx,4) |
256 » jl» .L0092ndmadd | 271 » jl» .L0112ndmadd |
257 movl %edx,%ebp | 272 movl %edx,%ebp |
258 mull %edi | 273 mull %edi |
259 addl 32(%esp,%ebx,4),%ebp | 274 addl 32(%esp,%ebx,4),%ebp |
260 adcl $0,%edx | 275 adcl $0,%edx |
261 addl %eax,%ebp | 276 addl %eax,%ebp |
262 adcl $0,%edx | 277 adcl $0,%edx |
263 movl %ebp,28(%esp,%ebx,4) | 278 movl %ebp,28(%esp,%ebx,4) |
264 xorl %eax,%eax | 279 xorl %eax,%eax |
265 movl 12(%esp),%ecx | 280 movl 12(%esp),%ecx |
266 addl 36(%esp,%ebx,4),%edx | 281 addl 36(%esp,%ebx,4),%edx |
267 adcl 40(%esp,%ebx,4),%eax | 282 adcl 40(%esp,%ebx,4),%eax |
268 leal 4(%ecx),%ecx | 283 leal 4(%ecx),%ecx |
269 movl %edx,32(%esp,%ebx,4) | 284 movl %edx,32(%esp,%ebx,4) |
270 cmpl 28(%esp),%ecx | 285 cmpl 28(%esp),%ecx |
271 movl %eax,36(%esp,%ebx,4) | 286 movl %eax,36(%esp,%ebx,4) |
272 » je» .L006common_tail | 287 » je» .L008common_tail |
273 movl (%ecx),%edi | 288 movl (%ecx),%edi |
274 movl 8(%esp),%esi | 289 movl 8(%esp),%esi |
275 movl %ecx,12(%esp) | 290 movl %ecx,12(%esp) |
276 xorl %ecx,%ecx | 291 xorl %ecx,%ecx |
277 xorl %edx,%edx | 292 xorl %edx,%edx |
278 movl (%esi),%eax | 293 movl (%esi),%eax |
279 » jmp» .L0101stmadd | 294 » jmp» .L0121stmadd |
280 .align 16 | 295 .align 16 |
281 .L007bn_sqr_mont: | 296 .L009bn_sqr_mont: |
282 movl %ebx,(%esp) | 297 movl %ebx,(%esp) |
283 movl %ecx,12(%esp) | 298 movl %ecx,12(%esp) |
284 movl %edi,%eax | 299 movl %edi,%eax |
285 mull %edi | 300 mull %edi |
286 movl %eax,32(%esp) | 301 movl %eax,32(%esp) |
287 movl %edx,%ebx | 302 movl %edx,%ebx |
288 shrl $1,%edx | 303 shrl $1,%edx |
289 andl $1,%ebx | 304 andl $1,%ebx |
290 incl %ecx | 305 incl %ecx |
291 .align 16 | 306 .align 16 |
292 .L011sqr: | 307 .L013sqr: |
293 movl (%esi,%ecx,4),%eax | 308 movl (%esi,%ecx,4),%eax |
294 movl %edx,%ebp | 309 movl %edx,%ebp |
295 mull %edi | 310 mull %edi |
296 addl %ebp,%eax | 311 addl %ebp,%eax |
297 leal 1(%ecx),%ecx | 312 leal 1(%ecx),%ecx |
298 adcl $0,%edx | 313 adcl $0,%edx |
299 leal (%ebx,%eax,2),%ebp | 314 leal (%ebx,%eax,2),%ebp |
300 shrl $31,%eax | 315 shrl $31,%eax |
301 cmpl (%esp),%ecx | 316 cmpl (%esp),%ecx |
302 movl %eax,%ebx | 317 movl %eax,%ebx |
303 movl %ebp,28(%esp,%ecx,4) | 318 movl %ebp,28(%esp,%ecx,4) |
304 » jl» .L011sqr | 319 » jl» .L013sqr |
305 movl (%esi,%ecx,4),%eax | 320 movl (%esi,%ecx,4),%eax |
306 movl %edx,%ebp | 321 movl %edx,%ebp |
307 mull %edi | 322 mull %edi |
308 addl %ebp,%eax | 323 addl %ebp,%eax |
309 movl 20(%esp),%edi | 324 movl 20(%esp),%edi |
310 adcl $0,%edx | 325 adcl $0,%edx |
311 movl 16(%esp),%esi | 326 movl 16(%esp),%esi |
312 leal (%ebx,%eax,2),%ebp | 327 leal (%ebx,%eax,2),%ebp |
313 imull 32(%esp),%edi | 328 imull 32(%esp),%edi |
314 shrl $31,%eax | 329 shrl $31,%eax |
315 movl %ebp,32(%esp,%ecx,4) | 330 movl %ebp,32(%esp,%ecx,4) |
316 leal (%eax,%edx,2),%ebp | 331 leal (%eax,%edx,2),%ebp |
317 movl (%esi),%eax | 332 movl (%esi),%eax |
318 shrl $31,%edx | 333 shrl $31,%edx |
319 movl %ebp,36(%esp,%ecx,4) | 334 movl %ebp,36(%esp,%ecx,4) |
320 movl %edx,40(%esp,%ecx,4) | 335 movl %edx,40(%esp,%ecx,4) |
321 mull %edi | 336 mull %edi |
322 addl 32(%esp),%eax | 337 addl 32(%esp),%eax |
323 movl %ecx,%ebx | 338 movl %ecx,%ebx |
324 adcl $0,%edx | 339 adcl $0,%edx |
325 movl 4(%esi),%eax | 340 movl 4(%esi),%eax |
326 movl $1,%ecx | 341 movl $1,%ecx |
327 .align 16 | 342 .align 16 |
328 .L0123rdmadd: | 343 .L0143rdmadd: |
329 movl %edx,%ebp | 344 movl %edx,%ebp |
330 mull %edi | 345 mull %edi |
331 addl 32(%esp,%ecx,4),%ebp | 346 addl 32(%esp,%ecx,4),%ebp |
332 adcl $0,%edx | 347 adcl $0,%edx |
333 addl %eax,%ebp | 348 addl %eax,%ebp |
334 movl 4(%esi,%ecx,4),%eax | 349 movl 4(%esi,%ecx,4),%eax |
335 adcl $0,%edx | 350 adcl $0,%edx |
336 movl %ebp,28(%esp,%ecx,4) | 351 movl %ebp,28(%esp,%ecx,4) |
337 movl %edx,%ebp | 352 movl %edx,%ebp |
338 mull %edi | 353 mull %edi |
339 addl 36(%esp,%ecx,4),%ebp | 354 addl 36(%esp,%ecx,4),%ebp |
340 leal 2(%ecx),%ecx | 355 leal 2(%ecx),%ecx |
341 adcl $0,%edx | 356 adcl $0,%edx |
342 addl %eax,%ebp | 357 addl %eax,%ebp |
343 movl (%esi,%ecx,4),%eax | 358 movl (%esi,%ecx,4),%eax |
344 adcl $0,%edx | 359 adcl $0,%edx |
345 cmpl %ebx,%ecx | 360 cmpl %ebx,%ecx |
346 movl %ebp,24(%esp,%ecx,4) | 361 movl %ebp,24(%esp,%ecx,4) |
347 » jl» .L0123rdmadd | 362 » jl» .L0143rdmadd |
348 movl %edx,%ebp | 363 movl %edx,%ebp |
349 mull %edi | 364 mull %edi |
350 addl 32(%esp,%ebx,4),%ebp | 365 addl 32(%esp,%ebx,4),%ebp |
351 adcl $0,%edx | 366 adcl $0,%edx |
352 addl %eax,%ebp | 367 addl %eax,%ebp |
353 adcl $0,%edx | 368 adcl $0,%edx |
354 movl %ebp,28(%esp,%ebx,4) | 369 movl %ebp,28(%esp,%ebx,4) |
355 movl 12(%esp),%ecx | 370 movl 12(%esp),%ecx |
356 xorl %eax,%eax | 371 xorl %eax,%eax |
357 movl 8(%esp),%esi | 372 movl 8(%esp),%esi |
358 addl 36(%esp,%ebx,4),%edx | 373 addl 36(%esp,%ebx,4),%edx |
359 adcl 40(%esp,%ebx,4),%eax | 374 adcl 40(%esp,%ebx,4),%eax |
360 movl %edx,32(%esp,%ebx,4) | 375 movl %edx,32(%esp,%ebx,4) |
361 cmpl %ebx,%ecx | 376 cmpl %ebx,%ecx |
362 movl %eax,36(%esp,%ebx,4) | 377 movl %eax,36(%esp,%ebx,4) |
363 » je» .L006common_tail | 378 » je» .L008common_tail |
364 movl 4(%esi,%ecx,4),%edi | 379 movl 4(%esi,%ecx,4),%edi |
365 leal 1(%ecx),%ecx | 380 leal 1(%ecx),%ecx |
366 movl %edi,%eax | 381 movl %edi,%eax |
367 movl %ecx,12(%esp) | 382 movl %ecx,12(%esp) |
368 mull %edi | 383 mull %edi |
369 addl 32(%esp,%ecx,4),%eax | 384 addl 32(%esp,%ecx,4),%eax |
370 adcl $0,%edx | 385 adcl $0,%edx |
371 movl %eax,32(%esp,%ecx,4) | 386 movl %eax,32(%esp,%ecx,4) |
372 xorl %ebp,%ebp | 387 xorl %ebp,%ebp |
373 cmpl %ebx,%ecx | 388 cmpl %ebx,%ecx |
374 leal 1(%ecx),%ecx | 389 leal 1(%ecx),%ecx |
375 » je» .L013sqrlast | 390 » je» .L015sqrlast |
376 movl %edx,%ebx | 391 movl %edx,%ebx |
377 shrl $1,%edx | 392 shrl $1,%edx |
378 andl $1,%ebx | 393 andl $1,%ebx |
379 .align 16 | 394 .align 16 |
380 .L014sqradd: | 395 .L016sqradd: |
381 movl (%esi,%ecx,4),%eax | 396 movl (%esi,%ecx,4),%eax |
382 movl %edx,%ebp | 397 movl %edx,%ebp |
383 mull %edi | 398 mull %edi |
384 addl %ebp,%eax | 399 addl %ebp,%eax |
385 leal (%eax,%eax,1),%ebp | 400 leal (%eax,%eax,1),%ebp |
386 adcl $0,%edx | 401 adcl $0,%edx |
387 shrl $31,%eax | 402 shrl $31,%eax |
388 addl 32(%esp,%ecx,4),%ebp | 403 addl 32(%esp,%ecx,4),%ebp |
389 leal 1(%ecx),%ecx | 404 leal 1(%ecx),%ecx |
390 adcl $0,%eax | 405 adcl $0,%eax |
391 addl %ebx,%ebp | 406 addl %ebx,%ebp |
392 adcl $0,%eax | 407 adcl $0,%eax |
393 cmpl (%esp),%ecx | 408 cmpl (%esp),%ecx |
394 movl %ebp,28(%esp,%ecx,4) | 409 movl %ebp,28(%esp,%ecx,4) |
395 movl %eax,%ebx | 410 movl %eax,%ebx |
396 » jle» .L014sqradd | 411 » jle» .L016sqradd |
397 movl %edx,%ebp | 412 movl %edx,%ebp |
398 addl %edx,%edx | 413 addl %edx,%edx |
399 shrl $31,%ebp | 414 shrl $31,%ebp |
400 addl %ebx,%edx | 415 addl %ebx,%edx |
401 adcl $0,%ebp | 416 adcl $0,%ebp |
402 .L013sqrlast: | 417 .L015sqrlast: |
403 movl 20(%esp),%edi | 418 movl 20(%esp),%edi |
404 movl 16(%esp),%esi | 419 movl 16(%esp),%esi |
405 imull 32(%esp),%edi | 420 imull 32(%esp),%edi |
406 addl 32(%esp,%ecx,4),%edx | 421 addl 32(%esp,%ecx,4),%edx |
407 movl (%esi),%eax | 422 movl (%esi),%eax |
408 adcl $0,%ebp | 423 adcl $0,%ebp |
409 movl %edx,32(%esp,%ecx,4) | 424 movl %edx,32(%esp,%ecx,4) |
410 movl %ebp,36(%esp,%ecx,4) | 425 movl %ebp,36(%esp,%ecx,4) |
411 mull %edi | 426 mull %edi |
412 addl 32(%esp),%eax | 427 addl 32(%esp),%eax |
413 leal -1(%ecx),%ebx | 428 leal -1(%ecx),%ebx |
414 adcl $0,%edx | 429 adcl $0,%edx |
415 movl $1,%ecx | 430 movl $1,%ecx |
416 movl 4(%esi),%eax | 431 movl 4(%esi),%eax |
417 » jmp» .L0123rdmadd | 432 » jmp» .L0143rdmadd |
418 .align 16 | 433 .align 16 |
419 .L006common_tail: | 434 .L008common_tail: |
420 movl 16(%esp),%ebp | 435 movl 16(%esp),%ebp |
421 movl 4(%esp),%edi | 436 movl 4(%esp),%edi |
422 leal 32(%esp),%esi | 437 leal 32(%esp),%esi |
423 movl (%esi),%eax | 438 movl (%esi),%eax |
424 movl %ebx,%ecx | 439 movl %ebx,%ecx |
425 xorl %edx,%edx | 440 xorl %edx,%edx |
426 .align 16 | 441 .align 16 |
427 .L015sub: | 442 .L017sub: |
428 sbbl (%ebp,%edx,4),%eax | 443 sbbl (%ebp,%edx,4),%eax |
429 movl %eax,(%edi,%edx,4) | 444 movl %eax,(%edi,%edx,4) |
430 decl %ecx | 445 decl %ecx |
431 movl 4(%esi,%edx,4),%eax | 446 movl 4(%esi,%edx,4),%eax |
432 leal 1(%edx),%edx | 447 leal 1(%edx),%edx |
433 » jge» .L015sub | 448 » jge» .L017sub |
434 sbbl $0,%eax | 449 sbbl $0,%eax |
| 450 andl %eax,%esi |
| 451 notl %eax |
| 452 movl %edi,%ebp |
| 453 andl %eax,%ebp |
| 454 orl %ebp,%esi |
435 .align 16 | 455 .align 16 |
436 .L016copy: | 456 .L018copy: |
437 » movl» (%esi,%ebx,4),%edx | 457 » movl» (%esi,%ebx,4),%eax |
438 » movl» (%edi,%ebx,4),%ebp | 458 » movl» %eax,(%edi,%ebx,4) |
439 » xorl» %ebp,%edx | 459 » movl» %ecx,32(%esp,%ebx,4) |
440 » andl» %eax,%edx | |
441 » xorl» %ebp,%edx | |
442 » movl» %ecx,(%esi,%ebx,4) | |
443 » movl» %edx,(%edi,%ebx,4) | |
444 decl %ebx | 460 decl %ebx |
445 » jge» .L016copy | 461 » jge» .L018copy |
446 movl 24(%esp),%esp | 462 movl 24(%esp),%esp |
447 movl $1,%eax | 463 movl $1,%eax |
448 .L000just_leave: | 464 .L000just_leave: |
449 popl %edi | 465 popl %edi |
450 popl %esi | 466 popl %esi |
451 popl %ebx | 467 popl %ebx |
452 popl %ebp | 468 popl %ebp |
453 ret | 469 ret |
454 .size bn_mul_mont,.-.L_bn_mul_mont_begin | 470 .size bn_mul_mont,.-.L_bn_mul_mont_begin |
455 .byte 77,111,110,116,103,111,109,101,114,121,32,77,117,108,116,105 | 471 .byte 77,111,110,116,103,111,109,101,114,121,32,77,117,108,116,105 |
456 .byte 112,108,105,99,97,116,105,111,110,32,102,111,114,32,120,56 | 472 .byte 112,108,105,99,97,116,105,111,110,32,102,111,114,32,120,56 |
457 .byte 54,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121 | 473 .byte 54,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121 |
458 .byte 32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46 | 474 .byte 32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46 |
459 .byte 111,114,103,62,0 | 475 .byte 111,114,103,62,0 |
460 #endif | 476 #endif |
OLD | NEW |