| OLD | NEW |
| (Empty) |
| 1 #if defined(__i386__) | |
| 2 .file "src/crypto/bn/asm/x86-mont.S" | |
| 3 .text | |
| 4 .globl bn_mul_mont | |
| 5 .hidden bn_mul_mont | |
| 6 .type bn_mul_mont,@function | |
| 7 .align 16 | |
| 8 bn_mul_mont: | |
| 9 .L_bn_mul_mont_begin: | |
| 10 pushl %ebp | |
| 11 pushl %ebx | |
| 12 pushl %esi | |
| 13 pushl %edi | |
| 14 xorl %eax,%eax | |
| 15 movl 40(%esp),%edi | |
| 16 cmpl $4,%edi | |
| 17 jl .L000just_leave | |
| 18 leal 20(%esp),%esi | |
| 19 leal 24(%esp),%edx | |
| 20 movl %esp,%ebp | |
| 21 addl $2,%edi | |
| 22 negl %edi | |
| 23 leal -32(%esp,%edi,4),%esp | |
| 24 negl %edi | |
| 25 movl %esp,%eax | |
| 26 subl %edx,%eax | |
| 27 andl $2047,%eax | |
| 28 subl %eax,%esp | |
| 29 xorl %esp,%edx | |
| 30 andl $2048,%edx | |
| 31 xorl $2048,%edx | |
| 32 subl %edx,%esp | |
| 33 andl $-64,%esp | |
| 34 movl (%esi),%eax | |
| 35 movl 4(%esi),%ebx | |
| 36 movl 8(%esi),%ecx | |
| 37 movl 12(%esi),%edx | |
| 38 movl 16(%esi),%esi | |
| 39 movl (%esi),%esi | |
| 40 movl %eax,4(%esp) | |
| 41 movl %ebx,8(%esp) | |
| 42 movl %ecx,12(%esp) | |
| 43 movl %edx,16(%esp) | |
| 44 movl %esi,20(%esp) | |
| 45 leal -3(%edi),%ebx | |
| 46 movl %ebp,24(%esp) | |
| 47 call .L001PIC_me_up | |
| 48 .L001PIC_me_up: | |
| 49 popl %eax | |
| 50 leal OPENSSL_ia32cap_P-.L001PIC_me_up(%eax),%eax | |
| 51 btl $26,(%eax) | |
| 52 jnc .L002non_sse2 | |
| 53 movl $-1,%eax | |
| 54 movd %eax,%mm7 | |
| 55 movl 8(%esp),%esi | |
| 56 movl 12(%esp),%edi | |
| 57 movl 16(%esp),%ebp | |
| 58 xorl %edx,%edx | |
| 59 xorl %ecx,%ecx | |
| 60 movd (%edi),%mm4 | |
| 61 movd (%esi),%mm5 | |
| 62 movd (%ebp),%mm3 | |
| 63 pmuludq %mm4,%mm5 | |
| 64 movq %mm5,%mm2 | |
| 65 movq %mm5,%mm0 | |
| 66 pand %mm7,%mm0 | |
| 67 pmuludq 20(%esp),%mm5 | |
| 68 pmuludq %mm5,%mm3 | |
| 69 paddq %mm0,%mm3 | |
| 70 movd 4(%ebp),%mm1 | |
| 71 movd 4(%esi),%mm0 | |
| 72 psrlq $32,%mm2 | |
| 73 psrlq $32,%mm3 | |
| 74 incl %ecx | |
| 75 .align 16 | |
| 76 .L0031st: | |
| 77 pmuludq %mm4,%mm0 | |
| 78 pmuludq %mm5,%mm1 | |
| 79 paddq %mm0,%mm2 | |
| 80 paddq %mm1,%mm3 | |
| 81 movq %mm2,%mm0 | |
| 82 pand %mm7,%mm0 | |
| 83 movd 4(%ebp,%ecx,4),%mm1 | |
| 84 paddq %mm0,%mm3 | |
| 85 movd 4(%esi,%ecx,4),%mm0 | |
| 86 psrlq $32,%mm2 | |
| 87 movd %mm3,28(%esp,%ecx,4) | |
| 88 psrlq $32,%mm3 | |
| 89 leal 1(%ecx),%ecx | |
| 90 cmpl %ebx,%ecx | |
| 91 jl .L0031st | |
| 92 pmuludq %mm4,%mm0 | |
| 93 pmuludq %mm5,%mm1 | |
| 94 paddq %mm0,%mm2 | |
| 95 paddq %mm1,%mm3 | |
| 96 movq %mm2,%mm0 | |
| 97 pand %mm7,%mm0 | |
| 98 paddq %mm0,%mm3 | |
| 99 movd %mm3,28(%esp,%ecx,4) | |
| 100 psrlq $32,%mm2 | |
| 101 psrlq $32,%mm3 | |
| 102 paddq %mm2,%mm3 | |
| 103 movq %mm3,32(%esp,%ebx,4) | |
| 104 incl %edx | |
| 105 .L004outer: | |
| 106 xorl %ecx,%ecx | |
| 107 movd (%edi,%edx,4),%mm4 | |
| 108 movd (%esi),%mm5 | |
| 109 movd 32(%esp),%mm6 | |
| 110 movd (%ebp),%mm3 | |
| 111 pmuludq %mm4,%mm5 | |
| 112 paddq %mm6,%mm5 | |
| 113 movq %mm5,%mm0 | |
| 114 movq %mm5,%mm2 | |
| 115 pand %mm7,%mm0 | |
| 116 pmuludq 20(%esp),%mm5 | |
| 117 pmuludq %mm5,%mm3 | |
| 118 paddq %mm0,%mm3 | |
| 119 movd 36(%esp),%mm6 | |
| 120 movd 4(%ebp),%mm1 | |
| 121 movd 4(%esi),%mm0 | |
| 122 psrlq $32,%mm2 | |
| 123 psrlq $32,%mm3 | |
| 124 paddq %mm6,%mm2 | |
| 125 incl %ecx | |
| 126 decl %ebx | |
| 127 .L005inner: | |
| 128 pmuludq %mm4,%mm0 | |
| 129 pmuludq %mm5,%mm1 | |
| 130 paddq %mm0,%mm2 | |
| 131 paddq %mm1,%mm3 | |
| 132 movq %mm2,%mm0 | |
| 133 movd 36(%esp,%ecx,4),%mm6 | |
| 134 pand %mm7,%mm0 | |
| 135 movd 4(%ebp,%ecx,4),%mm1 | |
| 136 paddq %mm0,%mm3 | |
| 137 movd 4(%esi,%ecx,4),%mm0 | |
| 138 psrlq $32,%mm2 | |
| 139 movd %mm3,28(%esp,%ecx,4) | |
| 140 psrlq $32,%mm3 | |
| 141 paddq %mm6,%mm2 | |
| 142 decl %ebx | |
| 143 leal 1(%ecx),%ecx | |
| 144 jnz .L005inner | |
| 145 movl %ecx,%ebx | |
| 146 pmuludq %mm4,%mm0 | |
| 147 pmuludq %mm5,%mm1 | |
| 148 paddq %mm0,%mm2 | |
| 149 paddq %mm1,%mm3 | |
| 150 movq %mm2,%mm0 | |
| 151 pand %mm7,%mm0 | |
| 152 paddq %mm0,%mm3 | |
| 153 movd %mm3,28(%esp,%ecx,4) | |
| 154 psrlq $32,%mm2 | |
| 155 psrlq $32,%mm3 | |
| 156 movd 36(%esp,%ebx,4),%mm6 | |
| 157 paddq %mm2,%mm3 | |
| 158 paddq %mm6,%mm3 | |
| 159 movq %mm3,32(%esp,%ebx,4) | |
| 160 leal 1(%edx),%edx | |
| 161 cmpl %ebx,%edx | |
| 162 jle .L004outer | |
| 163 emms | |
| 164 jmp .L006common_tail | |
| 165 .align 16 | |
| 166 .L002non_sse2: | |
| 167 movl 8(%esp),%esi | |
| 168 leal 1(%ebx),%ebp | |
| 169 movl 12(%esp),%edi | |
| 170 xorl %ecx,%ecx | |
| 171 movl %esi,%edx | |
| 172 andl $1,%ebp | |
| 173 subl %edi,%edx | |
| 174 leal 4(%edi,%ebx,4),%eax | |
| 175 orl %edx,%ebp | |
| 176 movl (%edi),%edi | |
| 177 jz .L007bn_sqr_mont | |
| 178 movl %eax,28(%esp) | |
| 179 movl (%esi),%eax | |
| 180 xorl %edx,%edx | |
| 181 .align 16 | |
| 182 .L008mull: | |
| 183 movl %edx,%ebp | |
| 184 mull %edi | |
| 185 addl %eax,%ebp | |
| 186 leal 1(%ecx),%ecx | |
| 187 adcl $0,%edx | |
| 188 movl (%esi,%ecx,4),%eax | |
| 189 cmpl %ebx,%ecx | |
| 190 movl %ebp,28(%esp,%ecx,4) | |
| 191 jl .L008mull | |
| 192 movl %edx,%ebp | |
| 193 mull %edi | |
| 194 movl 20(%esp),%edi | |
| 195 addl %ebp,%eax | |
| 196 movl 16(%esp),%esi | |
| 197 adcl $0,%edx | |
| 198 imull 32(%esp),%edi | |
| 199 movl %eax,32(%esp,%ebx,4) | |
| 200 xorl %ecx,%ecx | |
| 201 movl %edx,36(%esp,%ebx,4) | |
| 202 movl %ecx,40(%esp,%ebx,4) | |
| 203 movl (%esi),%eax | |
| 204 mull %edi | |
| 205 addl 32(%esp),%eax | |
| 206 movl 4(%esi),%eax | |
| 207 adcl $0,%edx | |
| 208 incl %ecx | |
| 209 jmp .L0092ndmadd | |
| 210 .align 16 | |
| 211 .L0101stmadd: | |
| 212 movl %edx,%ebp | |
| 213 mull %edi | |
| 214 addl 32(%esp,%ecx,4),%ebp | |
| 215 leal 1(%ecx),%ecx | |
| 216 adcl $0,%edx | |
| 217 addl %eax,%ebp | |
| 218 movl (%esi,%ecx,4),%eax | |
| 219 adcl $0,%edx | |
| 220 cmpl %ebx,%ecx | |
| 221 movl %ebp,28(%esp,%ecx,4) | |
| 222 jl .L0101stmadd | |
| 223 movl %edx,%ebp | |
| 224 mull %edi | |
| 225 addl 32(%esp,%ebx,4),%eax | |
| 226 movl 20(%esp),%edi | |
| 227 adcl $0,%edx | |
| 228 movl 16(%esp),%esi | |
| 229 addl %eax,%ebp | |
| 230 adcl $0,%edx | |
| 231 imull 32(%esp),%edi | |
| 232 xorl %ecx,%ecx | |
| 233 addl 36(%esp,%ebx,4),%edx | |
| 234 movl %ebp,32(%esp,%ebx,4) | |
| 235 adcl $0,%ecx | |
| 236 movl (%esi),%eax | |
| 237 movl %edx,36(%esp,%ebx,4) | |
| 238 movl %ecx,40(%esp,%ebx,4) | |
| 239 mull %edi | |
| 240 addl 32(%esp),%eax | |
| 241 movl 4(%esi),%eax | |
| 242 adcl $0,%edx | |
| 243 movl $1,%ecx | |
| 244 .align 16 | |
| 245 .L0092ndmadd: | |
| 246 movl %edx,%ebp | |
| 247 mull %edi | |
| 248 addl 32(%esp,%ecx,4),%ebp | |
| 249 leal 1(%ecx),%ecx | |
| 250 adcl $0,%edx | |
| 251 addl %eax,%ebp | |
| 252 movl (%esi,%ecx,4),%eax | |
| 253 adcl $0,%edx | |
| 254 cmpl %ebx,%ecx | |
| 255 movl %ebp,24(%esp,%ecx,4) | |
| 256 jl .L0092ndmadd | |
| 257 movl %edx,%ebp | |
| 258 mull %edi | |
| 259 addl 32(%esp,%ebx,4),%ebp | |
| 260 adcl $0,%edx | |
| 261 addl %eax,%ebp | |
| 262 adcl $0,%edx | |
| 263 movl %ebp,28(%esp,%ebx,4) | |
| 264 xorl %eax,%eax | |
| 265 movl 12(%esp),%ecx | |
| 266 addl 36(%esp,%ebx,4),%edx | |
| 267 adcl 40(%esp,%ebx,4),%eax | |
| 268 leal 4(%ecx),%ecx | |
| 269 movl %edx,32(%esp,%ebx,4) | |
| 270 cmpl 28(%esp),%ecx | |
| 271 movl %eax,36(%esp,%ebx,4) | |
| 272 je .L006common_tail | |
| 273 movl (%ecx),%edi | |
| 274 movl 8(%esp),%esi | |
| 275 movl %ecx,12(%esp) | |
| 276 xorl %ecx,%ecx | |
| 277 xorl %edx,%edx | |
| 278 movl (%esi),%eax | |
| 279 jmp .L0101stmadd | |
| 280 .align 16 | |
| 281 .L007bn_sqr_mont: | |
| 282 movl %ebx,(%esp) | |
| 283 movl %ecx,12(%esp) | |
| 284 movl %edi,%eax | |
| 285 mull %edi | |
| 286 movl %eax,32(%esp) | |
| 287 movl %edx,%ebx | |
| 288 shrl $1,%edx | |
| 289 andl $1,%ebx | |
| 290 incl %ecx | |
| 291 .align 16 | |
| 292 .L011sqr: | |
| 293 movl (%esi,%ecx,4),%eax | |
| 294 movl %edx,%ebp | |
| 295 mull %edi | |
| 296 addl %ebp,%eax | |
| 297 leal 1(%ecx),%ecx | |
| 298 adcl $0,%edx | |
| 299 leal (%ebx,%eax,2),%ebp | |
| 300 shrl $31,%eax | |
| 301 cmpl (%esp),%ecx | |
| 302 movl %eax,%ebx | |
| 303 movl %ebp,28(%esp,%ecx,4) | |
| 304 jl .L011sqr | |
| 305 movl (%esi,%ecx,4),%eax | |
| 306 movl %edx,%ebp | |
| 307 mull %edi | |
| 308 addl %ebp,%eax | |
| 309 movl 20(%esp),%edi | |
| 310 adcl $0,%edx | |
| 311 movl 16(%esp),%esi | |
| 312 leal (%ebx,%eax,2),%ebp | |
| 313 imull 32(%esp),%edi | |
| 314 shrl $31,%eax | |
| 315 movl %ebp,32(%esp,%ecx,4) | |
| 316 leal (%eax,%edx,2),%ebp | |
| 317 movl (%esi),%eax | |
| 318 shrl $31,%edx | |
| 319 movl %ebp,36(%esp,%ecx,4) | |
| 320 movl %edx,40(%esp,%ecx,4) | |
| 321 mull %edi | |
| 322 addl 32(%esp),%eax | |
| 323 movl %ecx,%ebx | |
| 324 adcl $0,%edx | |
| 325 movl 4(%esi),%eax | |
| 326 movl $1,%ecx | |
| 327 .align 16 | |
| 328 .L0123rdmadd: | |
| 329 movl %edx,%ebp | |
| 330 mull %edi | |
| 331 addl 32(%esp,%ecx,4),%ebp | |
| 332 adcl $0,%edx | |
| 333 addl %eax,%ebp | |
| 334 movl 4(%esi,%ecx,4),%eax | |
| 335 adcl $0,%edx | |
| 336 movl %ebp,28(%esp,%ecx,4) | |
| 337 movl %edx,%ebp | |
| 338 mull %edi | |
| 339 addl 36(%esp,%ecx,4),%ebp | |
| 340 leal 2(%ecx),%ecx | |
| 341 adcl $0,%edx | |
| 342 addl %eax,%ebp | |
| 343 movl (%esi,%ecx,4),%eax | |
| 344 adcl $0,%edx | |
| 345 cmpl %ebx,%ecx | |
| 346 movl %ebp,24(%esp,%ecx,4) | |
| 347 jl .L0123rdmadd | |
| 348 movl %edx,%ebp | |
| 349 mull %edi | |
| 350 addl 32(%esp,%ebx,4),%ebp | |
| 351 adcl $0,%edx | |
| 352 addl %eax,%ebp | |
| 353 adcl $0,%edx | |
| 354 movl %ebp,28(%esp,%ebx,4) | |
| 355 movl 12(%esp),%ecx | |
| 356 xorl %eax,%eax | |
| 357 movl 8(%esp),%esi | |
| 358 addl 36(%esp,%ebx,4),%edx | |
| 359 adcl 40(%esp,%ebx,4),%eax | |
| 360 movl %edx,32(%esp,%ebx,4) | |
| 361 cmpl %ebx,%ecx | |
| 362 movl %eax,36(%esp,%ebx,4) | |
| 363 je .L006common_tail | |
| 364 movl 4(%esi,%ecx,4),%edi | |
| 365 leal 1(%ecx),%ecx | |
| 366 movl %edi,%eax | |
| 367 movl %ecx,12(%esp) | |
| 368 mull %edi | |
| 369 addl 32(%esp,%ecx,4),%eax | |
| 370 adcl $0,%edx | |
| 371 movl %eax,32(%esp,%ecx,4) | |
| 372 xorl %ebp,%ebp | |
| 373 cmpl %ebx,%ecx | |
| 374 leal 1(%ecx),%ecx | |
| 375 je .L013sqrlast | |
| 376 movl %edx,%ebx | |
| 377 shrl $1,%edx | |
| 378 andl $1,%ebx | |
| 379 .align 16 | |
| 380 .L014sqradd: | |
| 381 movl (%esi,%ecx,4),%eax | |
| 382 movl %edx,%ebp | |
| 383 mull %edi | |
| 384 addl %ebp,%eax | |
| 385 leal (%eax,%eax,1),%ebp | |
| 386 adcl $0,%edx | |
| 387 shrl $31,%eax | |
| 388 addl 32(%esp,%ecx,4),%ebp | |
| 389 leal 1(%ecx),%ecx | |
| 390 adcl $0,%eax | |
| 391 addl %ebx,%ebp | |
| 392 adcl $0,%eax | |
| 393 cmpl (%esp),%ecx | |
| 394 movl %ebp,28(%esp,%ecx,4) | |
| 395 movl %eax,%ebx | |
| 396 jle .L014sqradd | |
| 397 movl %edx,%ebp | |
| 398 addl %edx,%edx | |
| 399 shrl $31,%ebp | |
| 400 addl %ebx,%edx | |
| 401 adcl $0,%ebp | |
| 402 .L013sqrlast: | |
| 403 movl 20(%esp),%edi | |
| 404 movl 16(%esp),%esi | |
| 405 imull 32(%esp),%edi | |
| 406 addl 32(%esp,%ecx,4),%edx | |
| 407 movl (%esi),%eax | |
| 408 adcl $0,%ebp | |
| 409 movl %edx,32(%esp,%ecx,4) | |
| 410 movl %ebp,36(%esp,%ecx,4) | |
| 411 mull %edi | |
| 412 addl 32(%esp),%eax | |
| 413 leal -1(%ecx),%ebx | |
| 414 adcl $0,%edx | |
| 415 movl $1,%ecx | |
| 416 movl 4(%esi),%eax | |
| 417 jmp .L0123rdmadd | |
| 418 .align 16 | |
| 419 .L006common_tail: | |
| 420 movl 16(%esp),%ebp | |
| 421 movl 4(%esp),%edi | |
| 422 leal 32(%esp),%esi | |
| 423 movl (%esi),%eax | |
| 424 movl %ebx,%ecx | |
| 425 xorl %edx,%edx | |
| 426 .align 16 | |
| 427 .L015sub: | |
| 428 sbbl (%ebp,%edx,4),%eax | |
| 429 movl %eax,(%edi,%edx,4) | |
| 430 decl %ecx | |
| 431 movl 4(%esi,%edx,4),%eax | |
| 432 leal 1(%edx),%edx | |
| 433 jge .L015sub | |
| 434 sbbl $0,%eax | |
| 435 .align 16 | |
| 436 .L016copy: | |
| 437 movl (%esi,%ebx,4),%edx | |
| 438 movl (%edi,%ebx,4),%ebp | |
| 439 xorl %ebp,%edx | |
| 440 andl %eax,%edx | |
| 441 xorl %ebp,%edx | |
| 442 movl %ecx,(%esi,%ebx,4) | |
| 443 movl %edx,(%edi,%ebx,4) | |
| 444 decl %ebx | |
| 445 jge .L016copy | |
| 446 movl 24(%esp),%esp | |
| 447 movl $1,%eax | |
| 448 .L000just_leave: | |
| 449 popl %edi | |
| 450 popl %esi | |
| 451 popl %ebx | |
| 452 popl %ebp | |
| 453 ret | |
| 454 .size bn_mul_mont,.-.L_bn_mul_mont_begin | |
| 455 .byte 77,111,110,116,103,111,109,101,114,121,32,77,117,108,116,105 | |
| 456 .byte 112,108,105,99,97,116,105,111,110,32,102,111,114,32,120,56 | |
| 457 .byte 54,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121 | |
| 458 .byte 32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46 | |
| 459 .byte 111,114,103,62,0 | |
| 460 #endif | |
| OLD | NEW |