OLD | NEW |
(Empty) | |
| 1 #if defined(__i386__) |
| 2 .file "src/crypto/bn/asm/x86-mont.S" |
| 3 .text |
| 4 .globl _bn_mul_mont |
| 5 .private_extern _bn_mul_mont |
| 6 .align 4 |
| 7 _bn_mul_mont: |
| 8 L_bn_mul_mont_begin: |
| 9 pushl %ebp |
| 10 pushl %ebx |
| 11 pushl %esi |
| 12 pushl %edi |
| 13 xorl %eax,%eax |
| 14 movl 40(%esp),%edi |
| 15 cmpl $4,%edi |
| 16 jl L000just_leave |
| 17 leal 20(%esp),%esi |
| 18 leal 24(%esp),%edx |
| 19 movl %esp,%ebp |
| 20 addl $2,%edi |
| 21 negl %edi |
| 22 leal -32(%esp,%edi,4),%esp |
| 23 negl %edi |
| 24 movl %esp,%eax |
| 25 subl %edx,%eax |
| 26 andl $2047,%eax |
| 27 subl %eax,%esp |
| 28 xorl %esp,%edx |
| 29 andl $2048,%edx |
| 30 xorl $2048,%edx |
| 31 subl %edx,%esp |
| 32 andl $-64,%esp |
| 33 movl (%esi),%eax |
| 34 movl 4(%esi),%ebx |
| 35 movl 8(%esi),%ecx |
| 36 movl 12(%esi),%edx |
| 37 movl 16(%esi),%esi |
| 38 movl (%esi),%esi |
| 39 movl %eax,4(%esp) |
| 40 movl %ebx,8(%esp) |
| 41 movl %ecx,12(%esp) |
| 42 movl %edx,16(%esp) |
| 43 movl %esi,20(%esp) |
| 44 leal -3(%edi),%ebx |
| 45 movl %ebp,24(%esp) |
| 46 call L001PIC_me_up |
| 47 L001PIC_me_up: |
| 48 popl %eax |
| 49 movl L_OPENSSL_ia32cap_P$non_lazy_ptr-L001PIC_me_up(%eax),%eax |
| 50 btl $26,(%eax) |
| 51 jnc L002non_sse2 |
| 52 movl $-1,%eax |
| 53 movd %eax,%mm7 |
| 54 movl 8(%esp),%esi |
| 55 movl 12(%esp),%edi |
| 56 movl 16(%esp),%ebp |
| 57 xorl %edx,%edx |
| 58 xorl %ecx,%ecx |
| 59 movd (%edi),%mm4 |
| 60 movd (%esi),%mm5 |
| 61 movd (%ebp),%mm3 |
| 62 pmuludq %mm4,%mm5 |
| 63 movq %mm5,%mm2 |
| 64 movq %mm5,%mm0 |
| 65 pand %mm7,%mm0 |
| 66 pmuludq 20(%esp),%mm5 |
| 67 pmuludq %mm5,%mm3 |
| 68 paddq %mm0,%mm3 |
| 69 movd 4(%ebp),%mm1 |
| 70 movd 4(%esi),%mm0 |
| 71 psrlq $32,%mm2 |
| 72 psrlq $32,%mm3 |
| 73 incl %ecx |
| 74 .align 4,0x90 |
| 75 L0031st: |
| 76 pmuludq %mm4,%mm0 |
| 77 pmuludq %mm5,%mm1 |
| 78 paddq %mm0,%mm2 |
| 79 paddq %mm1,%mm3 |
| 80 movq %mm2,%mm0 |
| 81 pand %mm7,%mm0 |
| 82 movd 4(%ebp,%ecx,4),%mm1 |
| 83 paddq %mm0,%mm3 |
| 84 movd 4(%esi,%ecx,4),%mm0 |
| 85 psrlq $32,%mm2 |
| 86 movd %mm3,28(%esp,%ecx,4) |
| 87 psrlq $32,%mm3 |
| 88 leal 1(%ecx),%ecx |
| 89 cmpl %ebx,%ecx |
| 90 jl L0031st |
| 91 pmuludq %mm4,%mm0 |
| 92 pmuludq %mm5,%mm1 |
| 93 paddq %mm0,%mm2 |
| 94 paddq %mm1,%mm3 |
| 95 movq %mm2,%mm0 |
| 96 pand %mm7,%mm0 |
| 97 paddq %mm0,%mm3 |
| 98 movd %mm3,28(%esp,%ecx,4) |
| 99 psrlq $32,%mm2 |
| 100 psrlq $32,%mm3 |
| 101 paddq %mm2,%mm3 |
| 102 movq %mm3,32(%esp,%ebx,4) |
| 103 incl %edx |
| 104 L004outer: |
| 105 xorl %ecx,%ecx |
| 106 movd (%edi,%edx,4),%mm4 |
| 107 movd (%esi),%mm5 |
| 108 movd 32(%esp),%mm6 |
| 109 movd (%ebp),%mm3 |
| 110 pmuludq %mm4,%mm5 |
| 111 paddq %mm6,%mm5 |
| 112 movq %mm5,%mm0 |
| 113 movq %mm5,%mm2 |
| 114 pand %mm7,%mm0 |
| 115 pmuludq 20(%esp),%mm5 |
| 116 pmuludq %mm5,%mm3 |
| 117 paddq %mm0,%mm3 |
| 118 movd 36(%esp),%mm6 |
| 119 movd 4(%ebp),%mm1 |
| 120 movd 4(%esi),%mm0 |
| 121 psrlq $32,%mm2 |
| 122 psrlq $32,%mm3 |
| 123 paddq %mm6,%mm2 |
| 124 incl %ecx |
| 125 decl %ebx |
| 126 L005inner: |
| 127 pmuludq %mm4,%mm0 |
| 128 pmuludq %mm5,%mm1 |
| 129 paddq %mm0,%mm2 |
| 130 paddq %mm1,%mm3 |
| 131 movq %mm2,%mm0 |
| 132 movd 36(%esp,%ecx,4),%mm6 |
| 133 pand %mm7,%mm0 |
| 134 movd 4(%ebp,%ecx,4),%mm1 |
| 135 paddq %mm0,%mm3 |
| 136 movd 4(%esi,%ecx,4),%mm0 |
| 137 psrlq $32,%mm2 |
| 138 movd %mm3,28(%esp,%ecx,4) |
| 139 psrlq $32,%mm3 |
| 140 paddq %mm6,%mm2 |
| 141 decl %ebx |
| 142 leal 1(%ecx),%ecx |
| 143 jnz L005inner |
| 144 movl %ecx,%ebx |
| 145 pmuludq %mm4,%mm0 |
| 146 pmuludq %mm5,%mm1 |
| 147 paddq %mm0,%mm2 |
| 148 paddq %mm1,%mm3 |
| 149 movq %mm2,%mm0 |
| 150 pand %mm7,%mm0 |
| 151 paddq %mm0,%mm3 |
| 152 movd %mm3,28(%esp,%ecx,4) |
| 153 psrlq $32,%mm2 |
| 154 psrlq $32,%mm3 |
| 155 movd 36(%esp,%ebx,4),%mm6 |
| 156 paddq %mm2,%mm3 |
| 157 paddq %mm6,%mm3 |
| 158 movq %mm3,32(%esp,%ebx,4) |
| 159 leal 1(%edx),%edx |
| 160 cmpl %ebx,%edx |
| 161 jle L004outer |
| 162 emms |
| 163 jmp L006common_tail |
| 164 .align 4,0x90 |
| 165 L002non_sse2: |
| 166 movl 8(%esp),%esi |
| 167 leal 1(%ebx),%ebp |
| 168 movl 12(%esp),%edi |
| 169 xorl %ecx,%ecx |
| 170 movl %esi,%edx |
| 171 andl $1,%ebp |
| 172 subl %edi,%edx |
| 173 leal 4(%edi,%ebx,4),%eax |
| 174 orl %edx,%ebp |
| 175 movl (%edi),%edi |
| 176 jz L007bn_sqr_mont |
| 177 movl %eax,28(%esp) |
| 178 movl (%esi),%eax |
| 179 xorl %edx,%edx |
| 180 .align 4,0x90 |
| 181 L008mull: |
| 182 movl %edx,%ebp |
| 183 mull %edi |
| 184 addl %eax,%ebp |
| 185 leal 1(%ecx),%ecx |
| 186 adcl $0,%edx |
| 187 movl (%esi,%ecx,4),%eax |
| 188 cmpl %ebx,%ecx |
| 189 movl %ebp,28(%esp,%ecx,4) |
| 190 jl L008mull |
| 191 movl %edx,%ebp |
| 192 mull %edi |
| 193 movl 20(%esp),%edi |
| 194 addl %ebp,%eax |
| 195 movl 16(%esp),%esi |
| 196 adcl $0,%edx |
| 197 imull 32(%esp),%edi |
| 198 movl %eax,32(%esp,%ebx,4) |
| 199 xorl %ecx,%ecx |
| 200 movl %edx,36(%esp,%ebx,4) |
| 201 movl %ecx,40(%esp,%ebx,4) |
| 202 movl (%esi),%eax |
| 203 mull %edi |
| 204 addl 32(%esp),%eax |
| 205 movl 4(%esi),%eax |
| 206 adcl $0,%edx |
| 207 incl %ecx |
| 208 jmp L0092ndmadd |
| 209 .align 4,0x90 |
| 210 L0101stmadd: |
| 211 movl %edx,%ebp |
| 212 mull %edi |
| 213 addl 32(%esp,%ecx,4),%ebp |
| 214 leal 1(%ecx),%ecx |
| 215 adcl $0,%edx |
| 216 addl %eax,%ebp |
| 217 movl (%esi,%ecx,4),%eax |
| 218 adcl $0,%edx |
| 219 cmpl %ebx,%ecx |
| 220 movl %ebp,28(%esp,%ecx,4) |
| 221 jl L0101stmadd |
| 222 movl %edx,%ebp |
| 223 mull %edi |
| 224 addl 32(%esp,%ebx,4),%eax |
| 225 movl 20(%esp),%edi |
| 226 adcl $0,%edx |
| 227 movl 16(%esp),%esi |
| 228 addl %eax,%ebp |
| 229 adcl $0,%edx |
| 230 imull 32(%esp),%edi |
| 231 xorl %ecx,%ecx |
| 232 addl 36(%esp,%ebx,4),%edx |
| 233 movl %ebp,32(%esp,%ebx,4) |
| 234 adcl $0,%ecx |
| 235 movl (%esi),%eax |
| 236 movl %edx,36(%esp,%ebx,4) |
| 237 movl %ecx,40(%esp,%ebx,4) |
| 238 mull %edi |
| 239 addl 32(%esp),%eax |
| 240 movl 4(%esi),%eax |
| 241 adcl $0,%edx |
| 242 movl $1,%ecx |
| 243 .align 4,0x90 |
| 244 L0092ndmadd: |
| 245 movl %edx,%ebp |
| 246 mull %edi |
| 247 addl 32(%esp,%ecx,4),%ebp |
| 248 leal 1(%ecx),%ecx |
| 249 adcl $0,%edx |
| 250 addl %eax,%ebp |
| 251 movl (%esi,%ecx,4),%eax |
| 252 adcl $0,%edx |
| 253 cmpl %ebx,%ecx |
| 254 movl %ebp,24(%esp,%ecx,4) |
| 255 jl L0092ndmadd |
| 256 movl %edx,%ebp |
| 257 mull %edi |
| 258 addl 32(%esp,%ebx,4),%ebp |
| 259 adcl $0,%edx |
| 260 addl %eax,%ebp |
| 261 adcl $0,%edx |
| 262 movl %ebp,28(%esp,%ebx,4) |
| 263 xorl %eax,%eax |
| 264 movl 12(%esp),%ecx |
| 265 addl 36(%esp,%ebx,4),%edx |
| 266 adcl 40(%esp,%ebx,4),%eax |
| 267 leal 4(%ecx),%ecx |
| 268 movl %edx,32(%esp,%ebx,4) |
| 269 cmpl 28(%esp),%ecx |
| 270 movl %eax,36(%esp,%ebx,4) |
| 271 je L006common_tail |
| 272 movl (%ecx),%edi |
| 273 movl 8(%esp),%esi |
| 274 movl %ecx,12(%esp) |
| 275 xorl %ecx,%ecx |
| 276 xorl %edx,%edx |
| 277 movl (%esi),%eax |
| 278 jmp L0101stmadd |
| 279 .align 4,0x90 |
| 280 L007bn_sqr_mont: |
| 281 movl %ebx,(%esp) |
| 282 movl %ecx,12(%esp) |
| 283 movl %edi,%eax |
| 284 mull %edi |
| 285 movl %eax,32(%esp) |
| 286 movl %edx,%ebx |
| 287 shrl $1,%edx |
| 288 andl $1,%ebx |
| 289 incl %ecx |
| 290 .align 4,0x90 |
| 291 L011sqr: |
| 292 movl (%esi,%ecx,4),%eax |
| 293 movl %edx,%ebp |
| 294 mull %edi |
| 295 addl %ebp,%eax |
| 296 leal 1(%ecx),%ecx |
| 297 adcl $0,%edx |
| 298 leal (%ebx,%eax,2),%ebp |
| 299 shrl $31,%eax |
| 300 cmpl (%esp),%ecx |
| 301 movl %eax,%ebx |
| 302 movl %ebp,28(%esp,%ecx,4) |
| 303 jl L011sqr |
| 304 movl (%esi,%ecx,4),%eax |
| 305 movl %edx,%ebp |
| 306 mull %edi |
| 307 addl %ebp,%eax |
| 308 movl 20(%esp),%edi |
| 309 adcl $0,%edx |
| 310 movl 16(%esp),%esi |
| 311 leal (%ebx,%eax,2),%ebp |
| 312 imull 32(%esp),%edi |
| 313 shrl $31,%eax |
| 314 movl %ebp,32(%esp,%ecx,4) |
| 315 leal (%eax,%edx,2),%ebp |
| 316 movl (%esi),%eax |
| 317 shrl $31,%edx |
| 318 movl %ebp,36(%esp,%ecx,4) |
| 319 movl %edx,40(%esp,%ecx,4) |
| 320 mull %edi |
| 321 addl 32(%esp),%eax |
| 322 movl %ecx,%ebx |
| 323 adcl $0,%edx |
| 324 movl 4(%esi),%eax |
| 325 movl $1,%ecx |
| 326 .align 4,0x90 |
| 327 L0123rdmadd: |
| 328 movl %edx,%ebp |
| 329 mull %edi |
| 330 addl 32(%esp,%ecx,4),%ebp |
| 331 adcl $0,%edx |
| 332 addl %eax,%ebp |
| 333 movl 4(%esi,%ecx,4),%eax |
| 334 adcl $0,%edx |
| 335 movl %ebp,28(%esp,%ecx,4) |
| 336 movl %edx,%ebp |
| 337 mull %edi |
| 338 addl 36(%esp,%ecx,4),%ebp |
| 339 leal 2(%ecx),%ecx |
| 340 adcl $0,%edx |
| 341 addl %eax,%ebp |
| 342 movl (%esi,%ecx,4),%eax |
| 343 adcl $0,%edx |
| 344 cmpl %ebx,%ecx |
| 345 movl %ebp,24(%esp,%ecx,4) |
| 346 jl L0123rdmadd |
| 347 movl %edx,%ebp |
| 348 mull %edi |
| 349 addl 32(%esp,%ebx,4),%ebp |
| 350 adcl $0,%edx |
| 351 addl %eax,%ebp |
| 352 adcl $0,%edx |
| 353 movl %ebp,28(%esp,%ebx,4) |
| 354 movl 12(%esp),%ecx |
| 355 xorl %eax,%eax |
| 356 movl 8(%esp),%esi |
| 357 addl 36(%esp,%ebx,4),%edx |
| 358 adcl 40(%esp,%ebx,4),%eax |
| 359 movl %edx,32(%esp,%ebx,4) |
| 360 cmpl %ebx,%ecx |
| 361 movl %eax,36(%esp,%ebx,4) |
| 362 je L006common_tail |
| 363 movl 4(%esi,%ecx,4),%edi |
| 364 leal 1(%ecx),%ecx |
| 365 movl %edi,%eax |
| 366 movl %ecx,12(%esp) |
| 367 mull %edi |
| 368 addl 32(%esp,%ecx,4),%eax |
| 369 adcl $0,%edx |
| 370 movl %eax,32(%esp,%ecx,4) |
| 371 xorl %ebp,%ebp |
| 372 cmpl %ebx,%ecx |
| 373 leal 1(%ecx),%ecx |
| 374 je L013sqrlast |
| 375 movl %edx,%ebx |
| 376 shrl $1,%edx |
| 377 andl $1,%ebx |
| 378 .align 4,0x90 |
| 379 L014sqradd: |
| 380 movl (%esi,%ecx,4),%eax |
| 381 movl %edx,%ebp |
| 382 mull %edi |
| 383 addl %ebp,%eax |
| 384 leal (%eax,%eax,1),%ebp |
| 385 adcl $0,%edx |
| 386 shrl $31,%eax |
| 387 addl 32(%esp,%ecx,4),%ebp |
| 388 leal 1(%ecx),%ecx |
| 389 adcl $0,%eax |
| 390 addl %ebx,%ebp |
| 391 adcl $0,%eax |
| 392 cmpl (%esp),%ecx |
| 393 movl %ebp,28(%esp,%ecx,4) |
| 394 movl %eax,%ebx |
| 395 jle L014sqradd |
| 396 movl %edx,%ebp |
| 397 addl %edx,%edx |
| 398 shrl $31,%ebp |
| 399 addl %ebx,%edx |
| 400 adcl $0,%ebp |
| 401 L013sqrlast: |
| 402 movl 20(%esp),%edi |
| 403 movl 16(%esp),%esi |
| 404 imull 32(%esp),%edi |
| 405 addl 32(%esp,%ecx,4),%edx |
| 406 movl (%esi),%eax |
| 407 adcl $0,%ebp |
| 408 movl %edx,32(%esp,%ecx,4) |
| 409 movl %ebp,36(%esp,%ecx,4) |
| 410 mull %edi |
| 411 addl 32(%esp),%eax |
| 412 leal -1(%ecx),%ebx |
| 413 adcl $0,%edx |
| 414 movl $1,%ecx |
| 415 movl 4(%esi),%eax |
| 416 jmp L0123rdmadd |
| 417 .align 4,0x90 |
| 418 L006common_tail: |
| 419 movl 16(%esp),%ebp |
| 420 movl 4(%esp),%edi |
| 421 leal 32(%esp),%esi |
| 422 movl (%esi),%eax |
| 423 movl %ebx,%ecx |
| 424 xorl %edx,%edx |
| 425 .align 4,0x90 |
| 426 L015sub: |
| 427 sbbl (%ebp,%edx,4),%eax |
| 428 movl %eax,(%edi,%edx,4) |
| 429 decl %ecx |
| 430 movl 4(%esi,%edx,4),%eax |
| 431 leal 1(%edx),%edx |
| 432 jge L015sub |
| 433 sbbl $0,%eax |
| 434 .align 4,0x90 |
| 435 L016copy: |
| 436 movl (%esi,%ebx,4),%edx |
| 437 movl (%edi,%ebx,4),%ebp |
| 438 xorl %ebp,%edx |
| 439 andl %eax,%edx |
| 440 xorl %ebp,%edx |
| 441 movl %ecx,(%esi,%ebx,4) |
| 442 movl %edx,(%edi,%ebx,4) |
| 443 decl %ebx |
| 444 jge L016copy |
| 445 movl 24(%esp),%esp |
| 446 movl $1,%eax |
| 447 L000just_leave: |
| 448 popl %edi |
| 449 popl %esi |
| 450 popl %ebx |
| 451 popl %ebp |
| 452 ret |
| 453 .byte 77,111,110,116,103,111,109,101,114,121,32,77,117,108,116,105 |
| 454 .byte 112,108,105,99,97,116,105,111,110,32,102,111,114,32,120,56 |
| 455 .byte 54,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121 |
| 456 .byte 32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46 |
| 457 .byte 111,114,103,62,0 |
| 458 .section __IMPORT,__pointers,non_lazy_symbol_pointers |
| 459 L_OPENSSL_ia32cap_P$non_lazy_ptr: |
| 460 .indirect_symbol _OPENSSL_ia32cap_P |
| 461 .long 0 |
| 462 #endif |
OLD | NEW |