OLD | NEW |
(Empty) | |
| 1 %ifidn __OUTPUT_FORMAT__,obj |
| 2 section code use32 class=code align=64 |
| 3 %elifidn __OUTPUT_FORMAT__,win32 |
| 4 %ifdef __YASM_VERSION_ID__ |
| 5 %if __YASM_VERSION_ID__ < 01010000h |
| 6 %error yasm version 1.1.0 or later needed. |
| 7 %endif |
| 8 ; Yasm automatically includes .00 and complains about redefining it. |
| 9 ; https://www.tortall.net/projects/yasm/manual/html/objfmt-win32-safeseh.html |
| 10 %else |
| 11 $@feat.00 equ 1 |
| 12 %endif |
| 13 section .text code align=64 |
| 14 %else |
| 15 section .text code |
| 16 %endif |
| 17 global _bn_mul_mont |
| 18 align 16 |
| 19 _bn_mul_mont: |
| 20 L$_bn_mul_mont_begin: |
| 21 push ebp |
| 22 push ebx |
| 23 push esi |
| 24 push edi |
| 25 xor eax,eax |
| 26 mov edi,DWORD [40+esp] |
| 27 cmp edi,4 |
| 28 jl NEAR L$000just_leave |
| 29 lea esi,[20+esp] |
| 30 lea edx,[24+esp] |
| 31 mov ebp,esp |
| 32 add edi,2 |
| 33 neg edi |
| 34 lea esp,[edi*4+esp-32] |
| 35 neg edi |
| 36 mov eax,esp |
| 37 sub eax,edx |
| 38 and eax,2047 |
| 39 sub esp,eax |
| 40 xor edx,esp |
| 41 and edx,2048 |
| 42 xor edx,2048 |
| 43 sub esp,edx |
| 44 and esp,-64 |
| 45 mov eax,DWORD [esi] |
| 46 mov ebx,DWORD [4+esi] |
| 47 mov ecx,DWORD [8+esi] |
| 48 mov edx,DWORD [12+esi] |
| 49 mov esi,DWORD [16+esi] |
| 50 mov esi,DWORD [esi] |
| 51 mov DWORD [4+esp],eax |
| 52 mov DWORD [8+esp],ebx |
| 53 mov DWORD [12+esp],ecx |
| 54 mov DWORD [16+esp],edx |
| 55 mov DWORD [20+esp],esi |
| 56 lea ebx,[edi-3] |
| 57 mov DWORD [24+esp],ebp |
| 58 mov esi,DWORD [8+esp] |
| 59 lea ebp,[1+ebx] |
| 60 mov edi,DWORD [12+esp] |
| 61 xor ecx,ecx |
| 62 mov edx,esi |
| 63 and ebp,1 |
| 64 sub edx,edi |
| 65 lea eax,[4+ebx*4+edi] |
| 66 or ebp,edx |
| 67 mov edi,DWORD [edi] |
| 68 jz NEAR L$001bn_sqr_mont |
| 69 mov DWORD [28+esp],eax |
| 70 mov eax,DWORD [esi] |
| 71 xor edx,edx |
| 72 align 16 |
| 73 L$002mull: |
| 74 mov ebp,edx |
| 75 mul edi |
| 76 add ebp,eax |
| 77 lea ecx,[1+ecx] |
| 78 adc edx,0 |
| 79 mov eax,DWORD [ecx*4+esi] |
| 80 cmp ecx,ebx |
| 81 mov DWORD [28+ecx*4+esp],ebp |
| 82 jl NEAR L$002mull |
| 83 mov ebp,edx |
| 84 mul edi |
| 85 mov edi,DWORD [20+esp] |
| 86 add eax,ebp |
| 87 mov esi,DWORD [16+esp] |
| 88 adc edx,0 |
| 89 imul edi,DWORD [32+esp] |
| 90 mov DWORD [32+ebx*4+esp],eax |
| 91 xor ecx,ecx |
| 92 mov DWORD [36+ebx*4+esp],edx |
| 93 mov DWORD [40+ebx*4+esp],ecx |
| 94 mov eax,DWORD [esi] |
| 95 mul edi |
| 96 add eax,DWORD [32+esp] |
| 97 mov eax,DWORD [4+esi] |
| 98 adc edx,0 |
| 99 inc ecx |
| 100 jmp NEAR L$0032ndmadd |
| 101 align 16 |
| 102 L$0041stmadd: |
| 103 mov ebp,edx |
| 104 mul edi |
| 105 add ebp,DWORD [32+ecx*4+esp] |
| 106 lea ecx,[1+ecx] |
| 107 adc edx,0 |
| 108 add ebp,eax |
| 109 mov eax,DWORD [ecx*4+esi] |
| 110 adc edx,0 |
| 111 cmp ecx,ebx |
| 112 mov DWORD [28+ecx*4+esp],ebp |
| 113 jl NEAR L$0041stmadd |
| 114 mov ebp,edx |
| 115 mul edi |
| 116 add eax,DWORD [32+ebx*4+esp] |
| 117 mov edi,DWORD [20+esp] |
| 118 adc edx,0 |
| 119 mov esi,DWORD [16+esp] |
| 120 add ebp,eax |
| 121 adc edx,0 |
| 122 imul edi,DWORD [32+esp] |
| 123 xor ecx,ecx |
| 124 add edx,DWORD [36+ebx*4+esp] |
| 125 mov DWORD [32+ebx*4+esp],ebp |
| 126 adc ecx,0 |
| 127 mov eax,DWORD [esi] |
| 128 mov DWORD [36+ebx*4+esp],edx |
| 129 mov DWORD [40+ebx*4+esp],ecx |
| 130 mul edi |
| 131 add eax,DWORD [32+esp] |
| 132 mov eax,DWORD [4+esi] |
| 133 adc edx,0 |
| 134 mov ecx,1 |
| 135 align 16 |
| 136 L$0032ndmadd: |
| 137 mov ebp,edx |
| 138 mul edi |
| 139 add ebp,DWORD [32+ecx*4+esp] |
| 140 lea ecx,[1+ecx] |
| 141 adc edx,0 |
| 142 add ebp,eax |
| 143 mov eax,DWORD [ecx*4+esi] |
| 144 adc edx,0 |
| 145 cmp ecx,ebx |
| 146 mov DWORD [24+ecx*4+esp],ebp |
| 147 jl NEAR L$0032ndmadd |
| 148 mov ebp,edx |
| 149 mul edi |
| 150 add ebp,DWORD [32+ebx*4+esp] |
| 151 adc edx,0 |
| 152 add ebp,eax |
| 153 adc edx,0 |
| 154 mov DWORD [28+ebx*4+esp],ebp |
| 155 xor eax,eax |
| 156 mov ecx,DWORD [12+esp] |
| 157 add edx,DWORD [36+ebx*4+esp] |
| 158 adc eax,DWORD [40+ebx*4+esp] |
| 159 lea ecx,[4+ecx] |
| 160 mov DWORD [32+ebx*4+esp],edx |
| 161 cmp ecx,DWORD [28+esp] |
| 162 mov DWORD [36+ebx*4+esp],eax |
| 163 je NEAR L$005common_tail |
| 164 mov edi,DWORD [ecx] |
| 165 mov esi,DWORD [8+esp] |
| 166 mov DWORD [12+esp],ecx |
| 167 xor ecx,ecx |
| 168 xor edx,edx |
| 169 mov eax,DWORD [esi] |
| 170 jmp NEAR L$0041stmadd |
| 171 align 16 |
| 172 L$001bn_sqr_mont: |
| 173 mov DWORD [esp],ebx |
| 174 mov DWORD [12+esp],ecx |
| 175 mov eax,edi |
| 176 mul edi |
| 177 mov DWORD [32+esp],eax |
| 178 mov ebx,edx |
| 179 shr edx,1 |
| 180 and ebx,1 |
| 181 inc ecx |
| 182 align 16 |
| 183 L$006sqr: |
| 184 mov eax,DWORD [ecx*4+esi] |
| 185 mov ebp,edx |
| 186 mul edi |
| 187 add eax,ebp |
| 188 lea ecx,[1+ecx] |
| 189 adc edx,0 |
| 190 lea ebp,[eax*2+ebx] |
| 191 shr eax,31 |
| 192 cmp ecx,DWORD [esp] |
| 193 mov ebx,eax |
| 194 mov DWORD [28+ecx*4+esp],ebp |
| 195 jl NEAR L$006sqr |
| 196 mov eax,DWORD [ecx*4+esi] |
| 197 mov ebp,edx |
| 198 mul edi |
| 199 add eax,ebp |
| 200 mov edi,DWORD [20+esp] |
| 201 adc edx,0 |
| 202 mov esi,DWORD [16+esp] |
| 203 lea ebp,[eax*2+ebx] |
| 204 imul edi,DWORD [32+esp] |
| 205 shr eax,31 |
| 206 mov DWORD [32+ecx*4+esp],ebp |
| 207 lea ebp,[edx*2+eax] |
| 208 mov eax,DWORD [esi] |
| 209 shr edx,31 |
| 210 mov DWORD [36+ecx*4+esp],ebp |
| 211 mov DWORD [40+ecx*4+esp],edx |
| 212 mul edi |
| 213 add eax,DWORD [32+esp] |
| 214 mov ebx,ecx |
| 215 adc edx,0 |
| 216 mov eax,DWORD [4+esi] |
| 217 mov ecx,1 |
| 218 align 16 |
| 219 L$0073rdmadd: |
| 220 mov ebp,edx |
| 221 mul edi |
| 222 add ebp,DWORD [32+ecx*4+esp] |
| 223 adc edx,0 |
| 224 add ebp,eax |
| 225 mov eax,DWORD [4+ecx*4+esi] |
| 226 adc edx,0 |
| 227 mov DWORD [28+ecx*4+esp],ebp |
| 228 mov ebp,edx |
| 229 mul edi |
| 230 add ebp,DWORD [36+ecx*4+esp] |
| 231 lea ecx,[2+ecx] |
| 232 adc edx,0 |
| 233 add ebp,eax |
| 234 mov eax,DWORD [ecx*4+esi] |
| 235 adc edx,0 |
| 236 cmp ecx,ebx |
| 237 mov DWORD [24+ecx*4+esp],ebp |
| 238 jl NEAR L$0073rdmadd |
| 239 mov ebp,edx |
| 240 mul edi |
| 241 add ebp,DWORD [32+ebx*4+esp] |
| 242 adc edx,0 |
| 243 add ebp,eax |
| 244 adc edx,0 |
| 245 mov DWORD [28+ebx*4+esp],ebp |
| 246 mov ecx,DWORD [12+esp] |
| 247 xor eax,eax |
| 248 mov esi,DWORD [8+esp] |
| 249 add edx,DWORD [36+ebx*4+esp] |
| 250 adc eax,DWORD [40+ebx*4+esp] |
| 251 mov DWORD [32+ebx*4+esp],edx |
| 252 cmp ecx,ebx |
| 253 mov DWORD [36+ebx*4+esp],eax |
| 254 je NEAR L$005common_tail |
| 255 mov edi,DWORD [4+ecx*4+esi] |
| 256 lea ecx,[1+ecx] |
| 257 mov eax,edi |
| 258 mov DWORD [12+esp],ecx |
| 259 mul edi |
| 260 add eax,DWORD [32+ecx*4+esp] |
| 261 adc edx,0 |
| 262 mov DWORD [32+ecx*4+esp],eax |
| 263 xor ebp,ebp |
| 264 cmp ecx,ebx |
| 265 lea ecx,[1+ecx] |
| 266 je NEAR L$008sqrlast |
| 267 mov ebx,edx |
| 268 shr edx,1 |
| 269 and ebx,1 |
| 270 align 16 |
| 271 L$009sqradd: |
| 272 mov eax,DWORD [ecx*4+esi] |
| 273 mov ebp,edx |
| 274 mul edi |
| 275 add eax,ebp |
| 276 lea ebp,[eax*1+eax] |
| 277 adc edx,0 |
| 278 shr eax,31 |
| 279 add ebp,DWORD [32+ecx*4+esp] |
| 280 lea ecx,[1+ecx] |
| 281 adc eax,0 |
| 282 add ebp,ebx |
| 283 adc eax,0 |
| 284 cmp ecx,DWORD [esp] |
| 285 mov DWORD [28+ecx*4+esp],ebp |
| 286 mov ebx,eax |
| 287 jle NEAR L$009sqradd |
| 288 mov ebp,edx |
| 289 add edx,edx |
| 290 shr ebp,31 |
| 291 add edx,ebx |
| 292 adc ebp,0 |
| 293 L$008sqrlast: |
| 294 mov edi,DWORD [20+esp] |
| 295 mov esi,DWORD [16+esp] |
| 296 imul edi,DWORD [32+esp] |
| 297 add edx,DWORD [32+ecx*4+esp] |
| 298 mov eax,DWORD [esi] |
| 299 adc ebp,0 |
| 300 mov DWORD [32+ecx*4+esp],edx |
| 301 mov DWORD [36+ecx*4+esp],ebp |
| 302 mul edi |
| 303 add eax,DWORD [32+esp] |
| 304 lea ebx,[ecx-1] |
| 305 adc edx,0 |
| 306 mov ecx,1 |
| 307 mov eax,DWORD [4+esi] |
| 308 jmp NEAR L$0073rdmadd |
| 309 align 16 |
| 310 L$005common_tail: |
| 311 mov ebp,DWORD [16+esp] |
| 312 mov edi,DWORD [4+esp] |
| 313 lea esi,[32+esp] |
| 314 mov eax,DWORD [esi] |
| 315 mov ecx,ebx |
| 316 xor edx,edx |
| 317 align 16 |
| 318 L$010sub: |
| 319 sbb eax,DWORD [edx*4+ebp] |
| 320 mov DWORD [edx*4+edi],eax |
| 321 dec ecx |
| 322 mov eax,DWORD [4+edx*4+esi] |
| 323 lea edx,[1+edx] |
| 324 jge NEAR L$010sub |
| 325 sbb eax,0 |
| 326 align 16 |
| 327 L$011copy: |
| 328 mov edx,DWORD [ebx*4+esi] |
| 329 mov ebp,DWORD [ebx*4+edi] |
| 330 xor edx,ebp |
| 331 and edx,eax |
| 332 xor edx,ebp |
| 333 mov DWORD [ebx*4+esi],ecx |
| 334 mov DWORD [ebx*4+edi],edx |
| 335 dec ebx |
| 336 jge NEAR L$011copy |
| 337 mov esp,DWORD [24+esp] |
| 338 mov eax,1 |
| 339 L$000just_leave: |
| 340 pop edi |
| 341 pop esi |
| 342 pop ebx |
| 343 pop ebp |
| 344 ret |
| 345 db 77,111,110,116,103,111,109,101,114,121,32,77,117,108,116,105 |
| 346 db 112,108,105,99,97,116,105,111,110,32,102,111,114,32,120,56 |
| 347 db 54,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121 |
| 348 db 32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46 |
| 349 db 111,114,103,62,0 |
OLD | NEW |