| OLD | NEW |
| 1 default rel | 1 default rel |
| 2 %define XMMWORD | 2 %define XMMWORD |
| 3 %define YMMWORD | 3 %define YMMWORD |
| 4 %define ZMMWORD | 4 %define ZMMWORD |
| 5 section .text code align=64 | 5 section .text code align=64 |
| 6 | 6 |
| 7 | 7 |
| 8 EXTERN OPENSSL_ia32cap_P | 8 EXTERN OPENSSL_ia32cap_P |
| 9 | 9 |
| 10 global bn_mul_mont_gather5 | 10 global bn_mul_mont_gather5 |
| (...skipping 13 matching lines...) Expand all Loading... |
| 24 | 24 |
| 25 | 25 |
| 26 test r9d,7 | 26 test r9d,7 |
| 27 jnz NEAR $L$mul_enter | 27 jnz NEAR $L$mul_enter |
| 28 jmp NEAR $L$mul4x_enter | 28 jmp NEAR $L$mul4x_enter |
| 29 | 29 |
| 30 ALIGN 16 | 30 ALIGN 16 |
| 31 $L$mul_enter: | 31 $L$mul_enter: |
| 32 mov r9d,r9d | 32 mov r9d,r9d |
| 33 mov rax,rsp | 33 mov rax,rsp |
| 34 » mov» r10d,DWORD[56+rsp] | 34 » movd» xmm5,DWORD[56+rsp] |
| 35 » lea» r10,[$L$inc] |
| 35 push rbx | 36 push rbx |
| 36 push rbp | 37 push rbp |
| 37 push r12 | 38 push r12 |
| 38 push r13 | 39 push r13 |
| 39 push r14 | 40 push r14 |
| 40 push r15 | 41 push r15 |
| 41 » lea» rsp,[((-40))+rsp] | 42 |
| 42 » movaps» XMMWORD[rsp],xmm6 | |
| 43 » movaps» XMMWORD[16+rsp],xmm7 | |
| 44 lea r11,[2+r9] | 43 lea r11,[2+r9] |
| 45 neg r11 | 44 neg r11 |
| 46 » lea» rsp,[r11*8+rsp] | 45 » lea» rsp,[((-264))+r11*8+rsp] |
| 47 and rsp,-1024 | 46 and rsp,-1024 |
| 48 | 47 |
| 49 mov QWORD[8+r9*8+rsp],rax | 48 mov QWORD[8+r9*8+rsp],rax |
| 50 $L$mul_body: | 49 $L$mul_body: |
| 51 » mov» r12,rdx | 50 » lea» r12,[128+rdx] |
| 52 » mov» r11,r10 | 51 » movdqa» xmm0,XMMWORD[r10] |
| 53 » shr» r10,3 | 52 » movdqa» xmm1,XMMWORD[16+r10] |
| 54 » and» r11,7 | 53 » lea» r10,[((24-112))+r9*8+rsp] |
| 55 » not» r10 | 54 » and» r10,-16 |
| 56 » lea» rax,[$L$magic_masks] | |
| 57 » and» r10,3 | |
| 58 » lea» r12,[96+r11*8+r12] | |
| 59 » movq» xmm4,QWORD[r10*8+rax] | |
| 60 » movq» xmm5,QWORD[8+r10*8+rax] | |
| 61 » movq» xmm6,QWORD[16+r10*8+rax] | |
| 62 » movq» xmm7,QWORD[24+r10*8+rax] | |
| 63 | 55 |
| 64 » movq» xmm0,QWORD[(((-96)))+r12] | 56 » pshufd» xmm5,xmm5,0 |
| 65 » movq» xmm1,QWORD[((-32))+r12] | 57 » movdqa» xmm4,xmm1 |
| 66 » pand» xmm0,xmm4 | 58 » movdqa» xmm2,xmm1 |
| 67 » movq» xmm2,QWORD[32+r12] | 59 » paddd» xmm1,xmm0 |
| 68 » pand» xmm1,xmm5 | 60 » pcmpeqd»xmm0,xmm5 |
| 69 » movq» xmm3,QWORD[96+r12] | 61 DB» 0x67 |
| 70 » pand» xmm2,xmm6 | 62 » movdqa» xmm3,xmm4 |
| 63 » paddd» xmm2,xmm1 |
| 64 » pcmpeqd»xmm1,xmm5 |
| 65 » movdqa» XMMWORD[112+r10],xmm0 |
| 66 » movdqa» xmm0,xmm4 |
| 67 |
| 68 » paddd» xmm3,xmm2 |
| 69 » pcmpeqd»xmm2,xmm5 |
| 70 » movdqa» XMMWORD[128+r10],xmm1 |
| 71 » movdqa» xmm1,xmm4 |
| 72 |
| 73 » paddd» xmm0,xmm3 |
| 74 » pcmpeqd»xmm3,xmm5 |
| 75 » movdqa» XMMWORD[144+r10],xmm2 |
| 76 » movdqa» xmm2,xmm4 |
| 77 |
| 78 » paddd» xmm1,xmm0 |
| 79 » pcmpeqd»xmm0,xmm5 |
| 80 » movdqa» XMMWORD[160+r10],xmm3 |
| 81 » movdqa» xmm3,xmm4 |
| 82 » paddd» xmm2,xmm1 |
| 83 » pcmpeqd»xmm1,xmm5 |
| 84 » movdqa» XMMWORD[176+r10],xmm0 |
| 85 » movdqa» xmm0,xmm4 |
| 86 |
| 87 » paddd» xmm3,xmm2 |
| 88 » pcmpeqd»xmm2,xmm5 |
| 89 » movdqa» XMMWORD[192+r10],xmm1 |
| 90 » movdqa» xmm1,xmm4 |
| 91 |
| 92 » paddd» xmm0,xmm3 |
| 93 » pcmpeqd»xmm3,xmm5 |
| 94 » movdqa» XMMWORD[208+r10],xmm2 |
| 95 » movdqa» xmm2,xmm4 |
| 96 |
| 97 » paddd» xmm1,xmm0 |
| 98 » pcmpeqd»xmm0,xmm5 |
| 99 » movdqa» XMMWORD[224+r10],xmm3 |
| 100 » movdqa» xmm3,xmm4 |
| 101 » paddd» xmm2,xmm1 |
| 102 » pcmpeqd»xmm1,xmm5 |
| 103 » movdqa» XMMWORD[240+r10],xmm0 |
| 104 » movdqa» xmm0,xmm4 |
| 105 |
| 106 » paddd» xmm3,xmm2 |
| 107 » pcmpeqd»xmm2,xmm5 |
| 108 » movdqa» XMMWORD[256+r10],xmm1 |
| 109 » movdqa» xmm1,xmm4 |
| 110 |
| 111 » paddd» xmm0,xmm3 |
| 112 » pcmpeqd»xmm3,xmm5 |
| 113 » movdqa» XMMWORD[272+r10],xmm2 |
| 114 » movdqa» xmm2,xmm4 |
| 115 |
| 116 » paddd» xmm1,xmm0 |
| 117 » pcmpeqd»xmm0,xmm5 |
| 118 » movdqa» XMMWORD[288+r10],xmm3 |
| 119 » movdqa» xmm3,xmm4 |
| 120 » paddd» xmm2,xmm1 |
| 121 » pcmpeqd»xmm1,xmm5 |
| 122 » movdqa» XMMWORD[304+r10],xmm0 |
| 123 |
| 124 » paddd» xmm3,xmm2 |
| 125 DB» 0x67 |
| 126 » pcmpeqd»xmm2,xmm5 |
| 127 » movdqa» XMMWORD[320+r10],xmm1 |
| 128 |
| 129 » pcmpeqd»xmm3,xmm5 |
| 130 » movdqa» XMMWORD[336+r10],xmm2 |
| 131 » pand» xmm0,XMMWORD[64+r12] |
| 132 |
| 133 » pand» xmm1,XMMWORD[80+r12] |
| 134 » pand» xmm2,XMMWORD[96+r12] |
| 135 » movdqa» XMMWORD[352+r10],xmm3 |
| 136 » pand» xmm3,XMMWORD[112+r12] |
| 137 » por» xmm0,xmm2 |
| 138 » por» xmm1,xmm3 |
| 139 » movdqa» xmm4,XMMWORD[((-128))+r12] |
| 140 » movdqa» xmm5,XMMWORD[((-112))+r12] |
| 141 » movdqa» xmm2,XMMWORD[((-96))+r12] |
| 142 » pand» xmm4,XMMWORD[112+r10] |
| 143 » movdqa» xmm3,XMMWORD[((-80))+r12] |
| 144 » pand» xmm5,XMMWORD[128+r10] |
| 145 » por» xmm0,xmm4 |
| 146 » pand» xmm2,XMMWORD[144+r10] |
| 147 » por» xmm1,xmm5 |
| 148 » pand» xmm3,XMMWORD[160+r10] |
| 149 » por» xmm0,xmm2 |
| 150 » por» xmm1,xmm3 |
| 151 » movdqa» xmm4,XMMWORD[((-64))+r12] |
| 152 » movdqa» xmm5,XMMWORD[((-48))+r12] |
| 153 » movdqa» xmm2,XMMWORD[((-32))+r12] |
| 154 » pand» xmm4,XMMWORD[176+r10] |
| 155 » movdqa» xmm3,XMMWORD[((-16))+r12] |
| 156 » pand» xmm5,XMMWORD[192+r10] |
| 157 » por» xmm0,xmm4 |
| 158 » pand» xmm2,XMMWORD[208+r10] |
| 159 » por» xmm1,xmm5 |
| 160 » pand» xmm3,XMMWORD[224+r10] |
| 161 » por» xmm0,xmm2 |
| 162 » por» xmm1,xmm3 |
| 163 » movdqa» xmm4,XMMWORD[r12] |
| 164 » movdqa» xmm5,XMMWORD[16+r12] |
| 165 » movdqa» xmm2,XMMWORD[32+r12] |
| 166 » pand» xmm4,XMMWORD[240+r10] |
| 167 » movdqa» xmm3,XMMWORD[48+r12] |
| 168 » pand» xmm5,XMMWORD[256+r10] |
| 169 » por» xmm0,xmm4 |
| 170 » pand» xmm2,XMMWORD[272+r10] |
| 171 » por» xmm1,xmm5 |
| 172 » pand» xmm3,XMMWORD[288+r10] |
| 173 » por» xmm0,xmm2 |
| 174 » por» xmm1,xmm3 |
| 71 por xmm0,xmm1 | 175 por xmm0,xmm1 |
| 72 » pand» xmm3,xmm7 | 176 » pshufd» xmm1,xmm0,0x4e |
| 73 » por» xmm0,xmm2 | 177 » por» xmm0,xmm1 |
| 74 lea r12,[256+r12] | 178 lea r12,[256+r12] |
| 75 por xmm0,xmm3 | |
| 76 | |
| 77 DB 102,72,15,126,195 | 179 DB 102,72,15,126,195 |
| 78 | 180 |
| 79 mov r8,QWORD[r8] | 181 mov r8,QWORD[r8] |
| 80 mov rax,QWORD[rsi] | 182 mov rax,QWORD[rsi] |
| 81 | 183 |
| 82 xor r14,r14 | 184 xor r14,r14 |
| 83 xor r15,r15 | 185 xor r15,r15 |
| 84 | 186 |
| 85 movq xmm0,QWORD[(((-96)))+r12] | |
| 86 movq xmm1,QWORD[((-32))+r12] | |
| 87 pand xmm0,xmm4 | |
| 88 movq xmm2,QWORD[32+r12] | |
| 89 pand xmm1,xmm5 | |
| 90 | |
| 91 mov rbp,r8 | 187 mov rbp,r8 |
| 92 mul rbx | 188 mul rbx |
| 93 mov r10,rax | 189 mov r10,rax |
| 94 mov rax,QWORD[rcx] | 190 mov rax,QWORD[rcx] |
| 95 | 191 |
| 96 movq xmm3,QWORD[96+r12] | |
| 97 pand xmm2,xmm6 | |
| 98 por xmm0,xmm1 | |
| 99 pand xmm3,xmm7 | |
| 100 | |
| 101 imul rbp,r10 | 192 imul rbp,r10 |
| 102 mov r11,rdx | 193 mov r11,rdx |
| 103 | 194 |
| 104 por xmm0,xmm2 | |
| 105 lea r12,[256+r12] | |
| 106 por xmm0,xmm3 | |
| 107 | |
| 108 mul rbp | 195 mul rbp |
| 109 add r10,rax | 196 add r10,rax |
| 110 mov rax,QWORD[8+rsi] | 197 mov rax,QWORD[8+rsi] |
| 111 adc rdx,0 | 198 adc rdx,0 |
| 112 mov r13,rdx | 199 mov r13,rdx |
| 113 | 200 |
| 114 lea r15,[1+r15] | 201 lea r15,[1+r15] |
| 115 jmp NEAR $L$1st_enter | 202 jmp NEAR $L$1st_enter |
| 116 | 203 |
| 117 ALIGN 16 | 204 ALIGN 16 |
| (...skipping 12 matching lines...) Expand all Loading... |
| 130 add r11,rax | 217 add r11,rax |
| 131 mov rax,QWORD[r15*8+rcx] | 218 mov rax,QWORD[r15*8+rcx] |
| 132 adc rdx,0 | 219 adc rdx,0 |
| 133 lea r15,[1+r15] | 220 lea r15,[1+r15] |
| 134 mov r10,rdx | 221 mov r10,rdx |
| 135 | 222 |
| 136 mul rbp | 223 mul rbp |
| 137 cmp r15,r9 | 224 cmp r15,r9 |
| 138 jne NEAR $L$1st | 225 jne NEAR $L$1st |
| 139 | 226 |
| 140 DB 102,72,15,126,195 | |
| 141 | 227 |
| 142 add r13,rax | 228 add r13,rax |
| 143 mov rax,QWORD[rsi] | |
| 144 adc rdx,0 | 229 adc rdx,0 |
| 145 add r13,r11 | 230 add r13,r11 |
| 146 adc rdx,0 | 231 adc rdx,0 |
| 147 » mov» QWORD[((-16))+r15*8+rsp],r13 | 232 » mov» QWORD[((-16))+r9*8+rsp],r13 |
| 148 mov r13,rdx | 233 mov r13,rdx |
| 149 mov r11,r10 | 234 mov r11,r10 |
| 150 | 235 |
| 151 xor rdx,rdx | 236 xor rdx,rdx |
| 152 add r13,r11 | 237 add r13,r11 |
| 153 adc rdx,0 | 238 adc rdx,0 |
| 154 mov QWORD[((-8))+r9*8+rsp],r13 | 239 mov QWORD[((-8))+r9*8+rsp],r13 |
| 155 mov QWORD[r9*8+rsp],rdx | 240 mov QWORD[r9*8+rsp],rdx |
| 156 | 241 |
| 157 lea r14,[1+r14] | 242 lea r14,[1+r14] |
| 158 jmp NEAR $L$outer | 243 jmp NEAR $L$outer |
| 159 ALIGN 16 | 244 ALIGN 16 |
| 160 $L$outer: | 245 $L$outer: |
| 246 lea rdx,[((24+128))+r9*8+rsp] |
| 247 and rdx,-16 |
| 248 pxor xmm4,xmm4 |
| 249 pxor xmm5,xmm5 |
| 250 movdqa xmm0,XMMWORD[((-128))+r12] |
| 251 movdqa xmm1,XMMWORD[((-112))+r12] |
| 252 movdqa xmm2,XMMWORD[((-96))+r12] |
| 253 movdqa xmm3,XMMWORD[((-80))+r12] |
| 254 pand xmm0,XMMWORD[((-128))+rdx] |
| 255 pand xmm1,XMMWORD[((-112))+rdx] |
| 256 por xmm4,xmm0 |
| 257 pand xmm2,XMMWORD[((-96))+rdx] |
| 258 por xmm5,xmm1 |
| 259 pand xmm3,XMMWORD[((-80))+rdx] |
| 260 por xmm4,xmm2 |
| 261 por xmm5,xmm3 |
| 262 movdqa xmm0,XMMWORD[((-64))+r12] |
| 263 movdqa xmm1,XMMWORD[((-48))+r12] |
| 264 movdqa xmm2,XMMWORD[((-32))+r12] |
| 265 movdqa xmm3,XMMWORD[((-16))+r12] |
| 266 pand xmm0,XMMWORD[((-64))+rdx] |
| 267 pand xmm1,XMMWORD[((-48))+rdx] |
| 268 por xmm4,xmm0 |
| 269 pand xmm2,XMMWORD[((-32))+rdx] |
| 270 por xmm5,xmm1 |
| 271 pand xmm3,XMMWORD[((-16))+rdx] |
| 272 por xmm4,xmm2 |
| 273 por xmm5,xmm3 |
| 274 movdqa xmm0,XMMWORD[r12] |
| 275 movdqa xmm1,XMMWORD[16+r12] |
| 276 movdqa xmm2,XMMWORD[32+r12] |
| 277 movdqa xmm3,XMMWORD[48+r12] |
| 278 pand xmm0,XMMWORD[rdx] |
| 279 pand xmm1,XMMWORD[16+rdx] |
| 280 por xmm4,xmm0 |
| 281 pand xmm2,XMMWORD[32+rdx] |
| 282 por xmm5,xmm1 |
| 283 pand xmm3,XMMWORD[48+rdx] |
| 284 por xmm4,xmm2 |
| 285 por xmm5,xmm3 |
| 286 movdqa xmm0,XMMWORD[64+r12] |
| 287 movdqa xmm1,XMMWORD[80+r12] |
| 288 movdqa xmm2,XMMWORD[96+r12] |
| 289 movdqa xmm3,XMMWORD[112+r12] |
| 290 pand xmm0,XMMWORD[64+rdx] |
| 291 pand xmm1,XMMWORD[80+rdx] |
| 292 por xmm4,xmm0 |
| 293 pand xmm2,XMMWORD[96+rdx] |
| 294 por xmm5,xmm1 |
| 295 pand xmm3,XMMWORD[112+rdx] |
| 296 por xmm4,xmm2 |
| 297 por xmm5,xmm3 |
| 298 por xmm4,xmm5 |
| 299 pshufd xmm0,xmm4,0x4e |
| 300 por xmm0,xmm4 |
| 301 lea r12,[256+r12] |
| 302 |
| 303 mov rax,QWORD[rsi] |
| 304 DB 102,72,15,126,195 |
| 305 |
| 161 xor r15,r15 | 306 xor r15,r15 |
| 162 mov rbp,r8 | 307 mov rbp,r8 |
| 163 mov r10,QWORD[rsp] | 308 mov r10,QWORD[rsp] |
| 164 | 309 |
| 165 movq xmm0,QWORD[(((-96)))+r12] | |
| 166 movq xmm1,QWORD[((-32))+r12] | |
| 167 pand xmm0,xmm4 | |
| 168 movq xmm2,QWORD[32+r12] | |
| 169 pand xmm1,xmm5 | |
| 170 | |
| 171 mul rbx | 310 mul rbx |
| 172 add r10,rax | 311 add r10,rax |
| 173 mov rax,QWORD[rcx] | 312 mov rax,QWORD[rcx] |
| 174 adc rdx,0 | 313 adc rdx,0 |
| 175 | 314 |
| 176 movq xmm3,QWORD[96+r12] | |
| 177 pand xmm2,xmm6 | |
| 178 por xmm0,xmm1 | |
| 179 pand xmm3,xmm7 | |
| 180 | |
| 181 imul rbp,r10 | 315 imul rbp,r10 |
| 182 mov r11,rdx | 316 mov r11,rdx |
| 183 | 317 |
| 184 por xmm0,xmm2 | |
| 185 lea r12,[256+r12] | |
| 186 por xmm0,xmm3 | |
| 187 | |
| 188 mul rbp | 318 mul rbp |
| 189 add r10,rax | 319 add r10,rax |
| 190 mov rax,QWORD[8+rsi] | 320 mov rax,QWORD[8+rsi] |
| 191 adc rdx,0 | 321 adc rdx,0 |
| 192 mov r10,QWORD[8+rsp] | 322 mov r10,QWORD[8+rsp] |
| 193 mov r13,rdx | 323 mov r13,rdx |
| 194 | 324 |
| 195 lea r15,[1+r15] | 325 lea r15,[1+r15] |
| 196 jmp NEAR $L$inner_enter | 326 jmp NEAR $L$inner_enter |
| 197 | 327 |
| (...skipping 15 matching lines...) Expand all Loading... |
| 213 adc rdx,0 | 343 adc rdx,0 |
| 214 add r10,r11 | 344 add r10,r11 |
| 215 mov r11,rdx | 345 mov r11,rdx |
| 216 adc r11,0 | 346 adc r11,0 |
| 217 lea r15,[1+r15] | 347 lea r15,[1+r15] |
| 218 | 348 |
| 219 mul rbp | 349 mul rbp |
| 220 cmp r15,r9 | 350 cmp r15,r9 |
| 221 jne NEAR $L$inner | 351 jne NEAR $L$inner |
| 222 | 352 |
| 223 DB 102,72,15,126,195 | |
| 224 | |
| 225 add r13,rax | 353 add r13,rax |
| 226 mov rax,QWORD[rsi] | |
| 227 adc rdx,0 | 354 adc rdx,0 |
| 228 add r13,r10 | 355 add r13,r10 |
| 229 » mov» r10,QWORD[r15*8+rsp] | 356 » mov» r10,QWORD[r9*8+rsp] |
| 230 adc rdx,0 | 357 adc rdx,0 |
| 231 » mov» QWORD[((-16))+r15*8+rsp],r13 | 358 » mov» QWORD[((-16))+r9*8+rsp],r13 |
| 232 mov r13,rdx | 359 mov r13,rdx |
| 233 | 360 |
| 234 xor rdx,rdx | 361 xor rdx,rdx |
| 235 add r13,r11 | 362 add r13,r11 |
| 236 adc rdx,0 | 363 adc rdx,0 |
| 237 add r13,r10 | 364 add r13,r10 |
| 238 adc rdx,0 | 365 adc rdx,0 |
| 239 mov QWORD[((-8))+r9*8+rsp],r13 | 366 mov QWORD[((-8))+r9*8+rsp],r13 |
| 240 mov QWORD[r9*8+rsp],rdx | 367 mov QWORD[r9*8+rsp],rdx |
| 241 | 368 |
| (...skipping 25 matching lines...) Expand all Loading... |
| 267 and rsi,rax | 394 and rsi,rax |
| 268 xor rsi,rcx | 395 xor rsi,rcx |
| 269 mov QWORD[r14*8+rsp],r14 | 396 mov QWORD[r14*8+rsp],r14 |
| 270 mov QWORD[r14*8+rdi],rsi | 397 mov QWORD[r14*8+rdi],rsi |
| 271 lea r14,[1+r14] | 398 lea r14,[1+r14] |
| 272 sub r15,1 | 399 sub r15,1 |
| 273 jnz NEAR $L$copy | 400 jnz NEAR $L$copy |
| 274 | 401 |
| 275 mov rsi,QWORD[8+r9*8+rsp] | 402 mov rsi,QWORD[8+r9*8+rsp] |
| 276 mov rax,1 | 403 mov rax,1 |
| 277 » movaps» xmm6,XMMWORD[((-88))+rsi] | 404 |
| 278 » movaps» xmm7,XMMWORD[((-72))+rsi] | |
| 279 mov r15,QWORD[((-48))+rsi] | 405 mov r15,QWORD[((-48))+rsi] |
| 280 mov r14,QWORD[((-40))+rsi] | 406 mov r14,QWORD[((-40))+rsi] |
| 281 mov r13,QWORD[((-32))+rsi] | 407 mov r13,QWORD[((-32))+rsi] |
| 282 mov r12,QWORD[((-24))+rsi] | 408 mov r12,QWORD[((-24))+rsi] |
| 283 mov rbp,QWORD[((-16))+rsi] | 409 mov rbp,QWORD[((-16))+rsi] |
| 284 mov rbx,QWORD[((-8))+rsi] | 410 mov rbx,QWORD[((-8))+rsi] |
| 285 lea rsp,[rsi] | 411 lea rsp,[rsi] |
| 286 $L$mul_epilogue: | 412 $L$mul_epilogue: |
| 287 mov rdi,QWORD[8+rsp] ;WIN64 epilogue | 413 mov rdi,QWORD[8+rsp] ;WIN64 epilogue |
| 288 mov rsi,QWORD[16+rsp] | 414 mov rsi,QWORD[16+rsp] |
| (...skipping 16 matching lines...) Expand all Loading... |
| 305 | 431 |
| 306 $L$mul4x_enter: | 432 $L$mul4x_enter: |
| 307 DB 0x67 | 433 DB 0x67 |
| 308 mov rax,rsp | 434 mov rax,rsp |
| 309 push rbx | 435 push rbx |
| 310 push rbp | 436 push rbp |
| 311 push r12 | 437 push r12 |
| 312 push r13 | 438 push r13 |
| 313 push r14 | 439 push r14 |
| 314 push r15 | 440 push r15 |
| 315 » lea» rsp,[((-40))+rsp] | 441 |
| 316 » movaps» XMMWORD[rsp],xmm6 | |
| 317 » movaps» XMMWORD[16+rsp],xmm7 | |
| 318 DB 0x67 | 442 DB 0x67 |
| 319 mov r10d,r9d | |
| 320 shl r9d,3 | 443 shl r9d,3 |
| 321 » shl» r10d,3+2 | 444 » lea» r10,[r9*2+r9] |
| 322 neg r9 | 445 neg r9 |
| 323 | 446 |
| 324 | 447 |
| 325 | 448 |
| 326 | 449 |
| 327 | 450 |
| 328 | 451 |
| 329 | 452 |
| 330 | 453 |
| 331 » lea» r11,[((-64))+r9*2+rsp] | 454 |
| 332 » sub» r11,rsi | 455 |
| 456 » lea» r11,[((-320))+r9*2+rsp] |
| 457 » sub» r11,rdi |
| 333 and r11,4095 | 458 and r11,4095 |
| 334 cmp r10,r11 | 459 cmp r10,r11 |
| 335 jb NEAR $L$mul4xsp_alt | 460 jb NEAR $L$mul4xsp_alt |
| 336 sub rsp,r11 | 461 sub rsp,r11 |
| 337 » lea» rsp,[((-64))+r9*2+rsp] | 462 » lea» rsp,[((-320))+r9*2+rsp] |
| 338 jmp NEAR $L$mul4xsp_done | 463 jmp NEAR $L$mul4xsp_done |
| 339 | 464 |
| 340 ALIGN 32 | 465 ALIGN 32 |
| 341 $L$mul4xsp_alt: | 466 $L$mul4xsp_alt: |
| 342 » lea» r10,[((4096-64))+r9*2] | 467 » lea» r10,[((4096-320))+r9*2] |
| 343 » lea» rsp,[((-64))+r9*2+rsp] | 468 » lea» rsp,[((-320))+r9*2+rsp] |
| 344 sub r11,r10 | 469 sub r11,r10 |
| 345 mov r10,0 | 470 mov r10,0 |
| 346 cmovc r11,r10 | 471 cmovc r11,r10 |
| 347 sub rsp,r11 | 472 sub rsp,r11 |
| 348 $L$mul4xsp_done: | 473 $L$mul4xsp_done: |
| 349 and rsp,-64 | 474 and rsp,-64 |
| 350 neg r9 | 475 neg r9 |
| 351 | 476 |
| 352 mov QWORD[40+rsp],rax | 477 mov QWORD[40+rsp],rax |
| 353 $L$mul4x_body: | 478 $L$mul4x_body: |
| 354 | 479 |
| 355 call mul4x_internal | 480 call mul4x_internal |
| 356 | 481 |
| 357 mov rsi,QWORD[40+rsp] | 482 mov rsi,QWORD[40+rsp] |
| 358 mov rax,1 | 483 mov rax,1 |
| 359 » movaps» xmm6,XMMWORD[((-88))+rsi] | 484 |
| 360 » movaps» xmm7,XMMWORD[((-72))+rsi] | |
| 361 mov r15,QWORD[((-48))+rsi] | 485 mov r15,QWORD[((-48))+rsi] |
| 362 mov r14,QWORD[((-40))+rsi] | 486 mov r14,QWORD[((-40))+rsi] |
| 363 mov r13,QWORD[((-32))+rsi] | 487 mov r13,QWORD[((-32))+rsi] |
| 364 mov r12,QWORD[((-24))+rsi] | 488 mov r12,QWORD[((-24))+rsi] |
| 365 mov rbp,QWORD[((-16))+rsi] | 489 mov rbp,QWORD[((-16))+rsi] |
| 366 mov rbx,QWORD[((-8))+rsi] | 490 mov rbx,QWORD[((-8))+rsi] |
| 367 lea rsp,[rsi] | 491 lea rsp,[rsi] |
| 368 $L$mul4x_epilogue: | 492 $L$mul4x_epilogue: |
| 369 mov rdi,QWORD[8+rsp] ;WIN64 epilogue | 493 mov rdi,QWORD[8+rsp] ;WIN64 epilogue |
| 370 mov rsi,QWORD[16+rsp] | 494 mov rsi,QWORD[16+rsp] |
| 371 DB 0F3h,0C3h ;repret | 495 DB 0F3h,0C3h ;repret |
| 372 $L$SEH_end_bn_mul4x_mont_gather5: | 496 $L$SEH_end_bn_mul4x_mont_gather5: |
| 373 | 497 |
| 374 | 498 |
| 375 ALIGN 32 | 499 ALIGN 32 |
| 376 mul4x_internal: | 500 mul4x_internal: |
| 377 shl r9,5 | 501 shl r9,5 |
| 378 » mov» r10d,DWORD[56+rax] | 502 » movd» xmm5,DWORD[56+rax] |
| 379 » lea» r13,[256+r9*1+rdx] | 503 » lea» rax,[$L$inc] |
| 504 » lea» r13,[128+r9*1+rdx] |
| 380 shr r9,5 | 505 shr r9,5 |
| 381 » mov» r11,r10 | 506 » movdqa» xmm0,XMMWORD[rax] |
| 382 » shr» r10,3 | 507 » movdqa» xmm1,XMMWORD[16+rax] |
| 383 » and» r11,7 | 508 » lea» r10,[((88-112))+r9*1+rsp] |
| 384 » not» r10 | 509 » lea» r12,[128+rdx] |
| 385 » lea» rax,[$L$magic_masks] | |
| 386 » and» r10,3 | |
| 387 » lea» r12,[96+r11*8+rdx] | |
| 388 » movq» xmm4,QWORD[r10*8+rax] | |
| 389 » movq» xmm5,QWORD[8+r10*8+rax] | |
| 390 » add» r11,7 | |
| 391 » movq» xmm6,QWORD[16+r10*8+rax] | |
| 392 » movq» xmm7,QWORD[24+r10*8+rax] | |
| 393 » and» r11,7 | |
| 394 | 510 |
| 395 » movq» xmm0,QWORD[(((-96)))+r12] | 511 » pshufd» xmm5,xmm5,0 |
| 396 » lea» r14,[256+r12] | 512 » movdqa» xmm4,xmm1 |
| 397 » movq» xmm1,QWORD[((-32))+r12] | 513 DB» 0x67,0x67 |
| 398 » pand» xmm0,xmm4 | 514 » movdqa» xmm2,xmm1 |
| 399 » movq» xmm2,QWORD[32+r12] | 515 » paddd» xmm1,xmm0 |
| 400 » pand» xmm1,xmm5 | 516 » pcmpeqd»xmm0,xmm5 |
| 401 » movq» xmm3,QWORD[96+r12] | |
| 402 » pand» xmm2,xmm6 | |
| 403 DB 0x67 | 517 DB 0x67 |
| 518 movdqa xmm3,xmm4 |
| 519 paddd xmm2,xmm1 |
| 520 pcmpeqd xmm1,xmm5 |
| 521 movdqa XMMWORD[112+r10],xmm0 |
| 522 movdqa xmm0,xmm4 |
| 523 |
| 524 paddd xmm3,xmm2 |
| 525 pcmpeqd xmm2,xmm5 |
| 526 movdqa XMMWORD[128+r10],xmm1 |
| 527 movdqa xmm1,xmm4 |
| 528 |
| 529 paddd xmm0,xmm3 |
| 530 pcmpeqd xmm3,xmm5 |
| 531 movdqa XMMWORD[144+r10],xmm2 |
| 532 movdqa xmm2,xmm4 |
| 533 |
| 534 paddd xmm1,xmm0 |
| 535 pcmpeqd xmm0,xmm5 |
| 536 movdqa XMMWORD[160+r10],xmm3 |
| 537 movdqa xmm3,xmm4 |
| 538 paddd xmm2,xmm1 |
| 539 pcmpeqd xmm1,xmm5 |
| 540 movdqa XMMWORD[176+r10],xmm0 |
| 541 movdqa xmm0,xmm4 |
| 542 |
| 543 paddd xmm3,xmm2 |
| 544 pcmpeqd xmm2,xmm5 |
| 545 movdqa XMMWORD[192+r10],xmm1 |
| 546 movdqa xmm1,xmm4 |
| 547 |
| 548 paddd xmm0,xmm3 |
| 549 pcmpeqd xmm3,xmm5 |
| 550 movdqa XMMWORD[208+r10],xmm2 |
| 551 movdqa xmm2,xmm4 |
| 552 |
| 553 paddd xmm1,xmm0 |
| 554 pcmpeqd xmm0,xmm5 |
| 555 movdqa XMMWORD[224+r10],xmm3 |
| 556 movdqa xmm3,xmm4 |
| 557 paddd xmm2,xmm1 |
| 558 pcmpeqd xmm1,xmm5 |
| 559 movdqa XMMWORD[240+r10],xmm0 |
| 560 movdqa xmm0,xmm4 |
| 561 |
| 562 paddd xmm3,xmm2 |
| 563 pcmpeqd xmm2,xmm5 |
| 564 movdqa XMMWORD[256+r10],xmm1 |
| 565 movdqa xmm1,xmm4 |
| 566 |
| 567 paddd xmm0,xmm3 |
| 568 pcmpeqd xmm3,xmm5 |
| 569 movdqa XMMWORD[272+r10],xmm2 |
| 570 movdqa xmm2,xmm4 |
| 571 |
| 572 paddd xmm1,xmm0 |
| 573 pcmpeqd xmm0,xmm5 |
| 574 movdqa XMMWORD[288+r10],xmm3 |
| 575 movdqa xmm3,xmm4 |
| 576 paddd xmm2,xmm1 |
| 577 pcmpeqd xmm1,xmm5 |
| 578 movdqa XMMWORD[304+r10],xmm0 |
| 579 |
| 580 paddd xmm3,xmm2 |
| 581 DB 0x67 |
| 582 pcmpeqd xmm2,xmm5 |
| 583 movdqa XMMWORD[320+r10],xmm1 |
| 584 |
| 585 pcmpeqd xmm3,xmm5 |
| 586 movdqa XMMWORD[336+r10],xmm2 |
| 587 pand xmm0,XMMWORD[64+r12] |
| 588 |
| 589 pand xmm1,XMMWORD[80+r12] |
| 590 pand xmm2,XMMWORD[96+r12] |
| 591 movdqa XMMWORD[352+r10],xmm3 |
| 592 pand xmm3,XMMWORD[112+r12] |
| 593 por xmm0,xmm2 |
| 594 por xmm1,xmm3 |
| 595 movdqa xmm4,XMMWORD[((-128))+r12] |
| 596 movdqa xmm5,XMMWORD[((-112))+r12] |
| 597 movdqa xmm2,XMMWORD[((-96))+r12] |
| 598 pand xmm4,XMMWORD[112+r10] |
| 599 movdqa xmm3,XMMWORD[((-80))+r12] |
| 600 pand xmm5,XMMWORD[128+r10] |
| 601 por xmm0,xmm4 |
| 602 pand xmm2,XMMWORD[144+r10] |
| 603 por xmm1,xmm5 |
| 604 pand xmm3,XMMWORD[160+r10] |
| 605 por xmm0,xmm2 |
| 606 por xmm1,xmm3 |
| 607 movdqa xmm4,XMMWORD[((-64))+r12] |
| 608 movdqa xmm5,XMMWORD[((-48))+r12] |
| 609 movdqa xmm2,XMMWORD[((-32))+r12] |
| 610 pand xmm4,XMMWORD[176+r10] |
| 611 movdqa xmm3,XMMWORD[((-16))+r12] |
| 612 pand xmm5,XMMWORD[192+r10] |
| 613 por xmm0,xmm4 |
| 614 pand xmm2,XMMWORD[208+r10] |
| 615 por xmm1,xmm5 |
| 616 pand xmm3,XMMWORD[224+r10] |
| 617 por xmm0,xmm2 |
| 618 por xmm1,xmm3 |
| 619 movdqa xmm4,XMMWORD[r12] |
| 620 movdqa xmm5,XMMWORD[16+r12] |
| 621 movdqa xmm2,XMMWORD[32+r12] |
| 622 pand xmm4,XMMWORD[240+r10] |
| 623 movdqa xmm3,XMMWORD[48+r12] |
| 624 pand xmm5,XMMWORD[256+r10] |
| 625 por xmm0,xmm4 |
| 626 pand xmm2,XMMWORD[272+r10] |
| 627 por xmm1,xmm5 |
| 628 pand xmm3,XMMWORD[288+r10] |
| 629 por xmm0,xmm2 |
| 630 por xmm1,xmm3 |
| 404 por xmm0,xmm1 | 631 por xmm0,xmm1 |
| 405 » movq» xmm1,QWORD[((-96))+r14] | 632 » pshufd» xmm1,xmm0,0x4e |
| 406 DB» 0x67 | 633 » por» xmm0,xmm1 |
| 407 » pand» xmm3,xmm7 | 634 » lea» r12,[256+r12] |
| 408 DB» 0x67 | 635 DB» 102,72,15,126,195 |
| 409 » por» xmm0,xmm2 | |
| 410 » movq» xmm2,QWORD[((-32))+r14] | |
| 411 DB» 0x67 | |
| 412 » pand» xmm1,xmm4 | |
| 413 DB» 0x67 | |
| 414 » por» xmm0,xmm3 | |
| 415 » movq» xmm3,QWORD[32+r14] | |
| 416 | 636 |
| 417 DB 102,72,15,126,195 | |
| 418 movq xmm0,QWORD[96+r14] | |
| 419 mov QWORD[((16+8))+rsp],r13 | 637 mov QWORD[((16+8))+rsp],r13 |
| 420 mov QWORD[((56+8))+rsp],rdi | 638 mov QWORD[((56+8))+rsp],rdi |
| 421 | 639 |
| 422 mov r8,QWORD[r8] | 640 mov r8,QWORD[r8] |
| 423 mov rax,QWORD[rsi] | 641 mov rax,QWORD[rsi] |
| 424 lea rsi,[r9*1+rsi] | 642 lea rsi,[r9*1+rsi] |
| 425 neg r9 | 643 neg r9 |
| 426 | 644 |
| 427 mov rbp,r8 | 645 mov rbp,r8 |
| 428 mul rbx | 646 mul rbx |
| 429 mov r10,rax | 647 mov r10,rax |
| 430 mov rax,QWORD[rcx] | 648 mov rax,QWORD[rcx] |
| 431 | 649 |
| 432 pand xmm2,xmm5 | |
| 433 pand xmm3,xmm6 | |
| 434 por xmm1,xmm2 | |
| 435 | |
| 436 imul rbp,r10 | 650 imul rbp,r10 |
| 437 | 651 » lea» r14,[((64+8))+rsp] |
| 438 | |
| 439 | |
| 440 | |
| 441 | |
| 442 | |
| 443 | |
| 444 » lea» r14,[((64+8))+r11*8+rsp] | |
| 445 mov r11,rdx | 652 mov r11,rdx |
| 446 | 653 |
| 447 pand xmm0,xmm7 | |
| 448 por xmm1,xmm3 | |
| 449 lea r12,[512+r12] | |
| 450 por xmm0,xmm1 | |
| 451 | |
| 452 mul rbp | 654 mul rbp |
| 453 add r10,rax | 655 add r10,rax |
| 454 mov rax,QWORD[8+r9*1+rsi] | 656 mov rax,QWORD[8+r9*1+rsi] |
| 455 adc rdx,0 | 657 adc rdx,0 |
| 456 mov rdi,rdx | 658 mov rdi,rdx |
| 457 | 659 |
| 458 mul rbx | 660 mul rbx |
| 459 add r11,rax | 661 add r11,rax |
| 460 » mov» rax,QWORD[16+rcx] | 662 » mov» rax,QWORD[8+rcx] |
| 461 adc rdx,0 | 663 adc rdx,0 |
| 462 mov r10,rdx | 664 mov r10,rdx |
| 463 | 665 |
| 464 mul rbp | 666 mul rbp |
| 465 add rdi,rax | 667 add rdi,rax |
| 466 mov rax,QWORD[16+r9*1+rsi] | 668 mov rax,QWORD[16+r9*1+rsi] |
| 467 adc rdx,0 | 669 adc rdx,0 |
| 468 add rdi,r11 | 670 add rdi,r11 |
| 469 lea r15,[32+r9] | 671 lea r15,[32+r9] |
| 470 » lea» rcx,[64+rcx] | 672 » lea» rcx,[32+rcx] |
| 471 adc rdx,0 | 673 adc rdx,0 |
| 472 mov QWORD[r14],rdi | 674 mov QWORD[r14],rdi |
| 473 mov r13,rdx | 675 mov r13,rdx |
| 474 jmp NEAR $L$1st4x | 676 jmp NEAR $L$1st4x |
| 475 | 677 |
| 476 ALIGN 32 | 678 ALIGN 32 |
| 477 $L$1st4x: | 679 $L$1st4x: |
| 478 mul rbx | 680 mul rbx |
| 479 add r10,rax | 681 add r10,rax |
| 480 » mov» rax,QWORD[((-32))+rcx] | 682 » mov» rax,QWORD[((-16))+rcx] |
| 481 lea r14,[32+r14] | 683 lea r14,[32+r14] |
| 482 adc rdx,0 | 684 adc rdx,0 |
| 483 mov r11,rdx | 685 mov r11,rdx |
| 484 | 686 |
| 485 mul rbp | 687 mul rbp |
| 486 add r13,rax | 688 add r13,rax |
| 487 mov rax,QWORD[((-8))+r15*1+rsi] | 689 mov rax,QWORD[((-8))+r15*1+rsi] |
| 488 adc rdx,0 | 690 adc rdx,0 |
| 489 add r13,r10 | 691 add r13,r10 |
| 490 adc rdx,0 | 692 adc rdx,0 |
| 491 mov QWORD[((-24))+r14],r13 | 693 mov QWORD[((-24))+r14],r13 |
| 492 mov rdi,rdx | 694 mov rdi,rdx |
| 493 | 695 |
| 494 mul rbx | 696 mul rbx |
| 495 add r11,rax | 697 add r11,rax |
| 496 » mov» rax,QWORD[((-16))+rcx] | 698 » mov» rax,QWORD[((-8))+rcx] |
| 497 adc rdx,0 | 699 adc rdx,0 |
| 498 mov r10,rdx | 700 mov r10,rdx |
| 499 | 701 |
| 500 mul rbp | 702 mul rbp |
| 501 add rdi,rax | 703 add rdi,rax |
| 502 mov rax,QWORD[r15*1+rsi] | 704 mov rax,QWORD[r15*1+rsi] |
| 503 adc rdx,0 | 705 adc rdx,0 |
| 504 add rdi,r11 | 706 add rdi,r11 |
| 505 adc rdx,0 | 707 adc rdx,0 |
| 506 mov QWORD[((-16))+r14],rdi | 708 mov QWORD[((-16))+r14],rdi |
| 507 mov r13,rdx | 709 mov r13,rdx |
| 508 | 710 |
| 509 mul rbx | 711 mul rbx |
| 510 add r10,rax | 712 add r10,rax |
| 511 mov rax,QWORD[rcx] | 713 mov rax,QWORD[rcx] |
| 512 adc rdx,0 | 714 adc rdx,0 |
| 513 mov r11,rdx | 715 mov r11,rdx |
| 514 | 716 |
| 515 mul rbp | 717 mul rbp |
| 516 add r13,rax | 718 add r13,rax |
| 517 mov rax,QWORD[8+r15*1+rsi] | 719 mov rax,QWORD[8+r15*1+rsi] |
| 518 adc rdx,0 | 720 adc rdx,0 |
| 519 add r13,r10 | 721 add r13,r10 |
| 520 adc rdx,0 | 722 adc rdx,0 |
| 521 mov QWORD[((-8))+r14],r13 | 723 mov QWORD[((-8))+r14],r13 |
| 522 mov rdi,rdx | 724 mov rdi,rdx |
| 523 | 725 |
| 524 mul rbx | 726 mul rbx |
| 525 add r11,rax | 727 add r11,rax |
| 526 » mov» rax,QWORD[16+rcx] | 728 » mov» rax,QWORD[8+rcx] |
| 527 adc rdx,0 | 729 adc rdx,0 |
| 528 mov r10,rdx | 730 mov r10,rdx |
| 529 | 731 |
| 530 mul rbp | 732 mul rbp |
| 531 add rdi,rax | 733 add rdi,rax |
| 532 mov rax,QWORD[16+r15*1+rsi] | 734 mov rax,QWORD[16+r15*1+rsi] |
| 533 adc rdx,0 | 735 adc rdx,0 |
| 534 add rdi,r11 | 736 add rdi,r11 |
| 535 » lea» rcx,[64+rcx] | 737 » lea» rcx,[32+rcx] |
| 536 adc rdx,0 | 738 adc rdx,0 |
| 537 mov QWORD[r14],rdi | 739 mov QWORD[r14],rdi |
| 538 mov r13,rdx | 740 mov r13,rdx |
| 539 | 741 |
| 540 add r15,32 | 742 add r15,32 |
| 541 jnz NEAR $L$1st4x | 743 jnz NEAR $L$1st4x |
| 542 | 744 |
| 543 mul rbx | 745 mul rbx |
| 544 add r10,rax | 746 add r10,rax |
| 545 » mov» rax,QWORD[((-32))+rcx] | 747 » mov» rax,QWORD[((-16))+rcx] |
| 546 lea r14,[32+r14] | 748 lea r14,[32+r14] |
| 547 adc rdx,0 | 749 adc rdx,0 |
| 548 mov r11,rdx | 750 mov r11,rdx |
| 549 | 751 |
| 550 mul rbp | 752 mul rbp |
| 551 add r13,rax | 753 add r13,rax |
| 552 mov rax,QWORD[((-8))+rsi] | 754 mov rax,QWORD[((-8))+rsi] |
| 553 adc rdx,0 | 755 adc rdx,0 |
| 554 add r13,r10 | 756 add r13,r10 |
| 555 adc rdx,0 | 757 adc rdx,0 |
| 556 mov QWORD[((-24))+r14],r13 | 758 mov QWORD[((-24))+r14],r13 |
| 557 mov rdi,rdx | 759 mov rdi,rdx |
| 558 | 760 |
| 559 mul rbx | 761 mul rbx |
| 560 add r11,rax | 762 add r11,rax |
| 561 » mov» rax,QWORD[((-16))+rcx] | 763 » mov» rax,QWORD[((-8))+rcx] |
| 562 adc rdx,0 | 764 adc rdx,0 |
| 563 mov r10,rdx | 765 mov r10,rdx |
| 564 | 766 |
| 565 mul rbp | 767 mul rbp |
| 566 add rdi,rax | 768 add rdi,rax |
| 567 mov rax,QWORD[r9*1+rsi] | 769 mov rax,QWORD[r9*1+rsi] |
| 568 adc rdx,0 | 770 adc rdx,0 |
| 569 add rdi,r11 | 771 add rdi,r11 |
| 570 adc rdx,0 | 772 adc rdx,0 |
| 571 mov QWORD[((-16))+r14],rdi | 773 mov QWORD[((-16))+r14],rdi |
| 572 mov r13,rdx | 774 mov r13,rdx |
| 573 | 775 |
| 574 DB» 102,72,15,126,195 | 776 » lea» rcx,[r9*1+rcx] |
| 575 » lea» rcx,[r9*2+rcx] | |
| 576 | 777 |
| 577 xor rdi,rdi | 778 xor rdi,rdi |
| 578 add r13,r10 | 779 add r13,r10 |
| 579 adc rdi,0 | 780 adc rdi,0 |
| 580 mov QWORD[((-8))+r14],r13 | 781 mov QWORD[((-8))+r14],r13 |
| 581 | 782 |
| 582 jmp NEAR $L$outer4x | 783 jmp NEAR $L$outer4x |
| 583 | 784 |
| 584 ALIGN 32 | 785 ALIGN 32 |
| 585 $L$outer4x: | 786 $L$outer4x: |
| 787 lea rdx,[((16+128))+r14] |
| 788 pxor xmm4,xmm4 |
| 789 pxor xmm5,xmm5 |
| 790 movdqa xmm0,XMMWORD[((-128))+r12] |
| 791 movdqa xmm1,XMMWORD[((-112))+r12] |
| 792 movdqa xmm2,XMMWORD[((-96))+r12] |
| 793 movdqa xmm3,XMMWORD[((-80))+r12] |
| 794 pand xmm0,XMMWORD[((-128))+rdx] |
| 795 pand xmm1,XMMWORD[((-112))+rdx] |
| 796 por xmm4,xmm0 |
| 797 pand xmm2,XMMWORD[((-96))+rdx] |
| 798 por xmm5,xmm1 |
| 799 pand xmm3,XMMWORD[((-80))+rdx] |
| 800 por xmm4,xmm2 |
| 801 por xmm5,xmm3 |
| 802 movdqa xmm0,XMMWORD[((-64))+r12] |
| 803 movdqa xmm1,XMMWORD[((-48))+r12] |
| 804 movdqa xmm2,XMMWORD[((-32))+r12] |
| 805 movdqa xmm3,XMMWORD[((-16))+r12] |
| 806 pand xmm0,XMMWORD[((-64))+rdx] |
| 807 pand xmm1,XMMWORD[((-48))+rdx] |
| 808 por xmm4,xmm0 |
| 809 pand xmm2,XMMWORD[((-32))+rdx] |
| 810 por xmm5,xmm1 |
| 811 pand xmm3,XMMWORD[((-16))+rdx] |
| 812 por xmm4,xmm2 |
| 813 por xmm5,xmm3 |
| 814 movdqa xmm0,XMMWORD[r12] |
| 815 movdqa xmm1,XMMWORD[16+r12] |
| 816 movdqa xmm2,XMMWORD[32+r12] |
| 817 movdqa xmm3,XMMWORD[48+r12] |
| 818 pand xmm0,XMMWORD[rdx] |
| 819 pand xmm1,XMMWORD[16+rdx] |
| 820 por xmm4,xmm0 |
| 821 pand xmm2,XMMWORD[32+rdx] |
| 822 por xmm5,xmm1 |
| 823 pand xmm3,XMMWORD[48+rdx] |
| 824 por xmm4,xmm2 |
| 825 por xmm5,xmm3 |
| 826 movdqa xmm0,XMMWORD[64+r12] |
| 827 movdqa xmm1,XMMWORD[80+r12] |
| 828 movdqa xmm2,XMMWORD[96+r12] |
| 829 movdqa xmm3,XMMWORD[112+r12] |
| 830 pand xmm0,XMMWORD[64+rdx] |
| 831 pand xmm1,XMMWORD[80+rdx] |
| 832 por xmm4,xmm0 |
| 833 pand xmm2,XMMWORD[96+rdx] |
| 834 por xmm5,xmm1 |
| 835 pand xmm3,XMMWORD[112+rdx] |
| 836 por xmm4,xmm2 |
| 837 por xmm5,xmm3 |
| 838 por xmm4,xmm5 |
| 839 pshufd xmm0,xmm4,0x4e |
| 840 por xmm0,xmm4 |
| 841 lea r12,[256+r12] |
| 842 DB 102,72,15,126,195 |
| 843 |
| 586 mov r10,QWORD[r9*1+r14] | 844 mov r10,QWORD[r9*1+r14] |
| 587 mov rbp,r8 | 845 mov rbp,r8 |
| 588 mul rbx | 846 mul rbx |
| 589 add r10,rax | 847 add r10,rax |
| 590 mov rax,QWORD[rcx] | 848 mov rax,QWORD[rcx] |
| 591 adc rdx,0 | 849 adc rdx,0 |
| 592 | 850 |
| 593 movq xmm0,QWORD[(((-96)))+r12] | |
| 594 movq xmm1,QWORD[((-32))+r12] | |
| 595 pand xmm0,xmm4 | |
| 596 movq xmm2,QWORD[32+r12] | |
| 597 pand xmm1,xmm5 | |
| 598 movq xmm3,QWORD[96+r12] | |
| 599 | |
| 600 imul rbp,r10 | 851 imul rbp,r10 |
| 601 DB 0x67 | |
| 602 mov r11,rdx | 852 mov r11,rdx |
| 603 mov QWORD[r14],rdi | 853 mov QWORD[r14],rdi |
| 604 | 854 |
| 605 pand xmm2,xmm6 | |
| 606 por xmm0,xmm1 | |
| 607 pand xmm3,xmm7 | |
| 608 por xmm0,xmm2 | |
| 609 lea r14,[r9*1+r14] | 855 lea r14,[r9*1+r14] |
| 610 lea r12,[256+r12] | |
| 611 por xmm0,xmm3 | |
| 612 | 856 |
| 613 mul rbp | 857 mul rbp |
| 614 add r10,rax | 858 add r10,rax |
| 615 mov rax,QWORD[8+r9*1+rsi] | 859 mov rax,QWORD[8+r9*1+rsi] |
| 616 adc rdx,0 | 860 adc rdx,0 |
| 617 mov rdi,rdx | 861 mov rdi,rdx |
| 618 | 862 |
| 619 mul rbx | 863 mul rbx |
| 620 add r11,rax | 864 add r11,rax |
| 621 » mov» rax,QWORD[16+rcx] | 865 » mov» rax,QWORD[8+rcx] |
| 622 adc rdx,0 | 866 adc rdx,0 |
| 623 add r11,QWORD[8+r14] | 867 add r11,QWORD[8+r14] |
| 624 adc rdx,0 | 868 adc rdx,0 |
| 625 mov r10,rdx | 869 mov r10,rdx |
| 626 | 870 |
| 627 mul rbp | 871 mul rbp |
| 628 add rdi,rax | 872 add rdi,rax |
| 629 mov rax,QWORD[16+r9*1+rsi] | 873 mov rax,QWORD[16+r9*1+rsi] |
| 630 adc rdx,0 | 874 adc rdx,0 |
| 631 add rdi,r11 | 875 add rdi,r11 |
| 632 lea r15,[32+r9] | 876 lea r15,[32+r9] |
| 633 » lea» rcx,[64+rcx] | 877 » lea» rcx,[32+rcx] |
| 634 adc rdx,0 | 878 adc rdx,0 |
| 635 mov r13,rdx | 879 mov r13,rdx |
| 636 jmp NEAR $L$inner4x | 880 jmp NEAR $L$inner4x |
| 637 | 881 |
| 638 ALIGN 32 | 882 ALIGN 32 |
| 639 $L$inner4x: | 883 $L$inner4x: |
| 640 mul rbx | 884 mul rbx |
| 641 add r10,rax | 885 add r10,rax |
| 642 » mov» rax,QWORD[((-32))+rcx] | 886 » mov» rax,QWORD[((-16))+rcx] |
| 643 adc rdx,0 | 887 adc rdx,0 |
| 644 add r10,QWORD[16+r14] | 888 add r10,QWORD[16+r14] |
| 645 lea r14,[32+r14] | 889 lea r14,[32+r14] |
| 646 adc rdx,0 | 890 adc rdx,0 |
| 647 mov r11,rdx | 891 mov r11,rdx |
| 648 | 892 |
| 649 mul rbp | 893 mul rbp |
| 650 add r13,rax | 894 add r13,rax |
| 651 mov rax,QWORD[((-8))+r15*1+rsi] | 895 mov rax,QWORD[((-8))+r15*1+rsi] |
| 652 adc rdx,0 | 896 adc rdx,0 |
| 653 add r13,r10 | 897 add r13,r10 |
| 654 adc rdx,0 | 898 adc rdx,0 |
| 655 mov QWORD[((-32))+r14],rdi | 899 mov QWORD[((-32))+r14],rdi |
| 656 mov rdi,rdx | 900 mov rdi,rdx |
| 657 | 901 |
| 658 mul rbx | 902 mul rbx |
| 659 add r11,rax | 903 add r11,rax |
| 660 » mov» rax,QWORD[((-16))+rcx] | 904 » mov» rax,QWORD[((-8))+rcx] |
| 661 adc rdx,0 | 905 adc rdx,0 |
| 662 add r11,QWORD[((-8))+r14] | 906 add r11,QWORD[((-8))+r14] |
| 663 adc rdx,0 | 907 adc rdx,0 |
| 664 mov r10,rdx | 908 mov r10,rdx |
| 665 | 909 |
| 666 mul rbp | 910 mul rbp |
| 667 add rdi,rax | 911 add rdi,rax |
| 668 mov rax,QWORD[r15*1+rsi] | 912 mov rax,QWORD[r15*1+rsi] |
| 669 adc rdx,0 | 913 adc rdx,0 |
| 670 add rdi,r11 | 914 add rdi,r11 |
| (...skipping 13 matching lines...) Expand all Loading... |
| 684 add r13,rax | 928 add r13,rax |
| 685 mov rax,QWORD[8+r15*1+rsi] | 929 mov rax,QWORD[8+r15*1+rsi] |
| 686 adc rdx,0 | 930 adc rdx,0 |
| 687 add r13,r10 | 931 add r13,r10 |
| 688 adc rdx,0 | 932 adc rdx,0 |
| 689 mov QWORD[((-16))+r14],rdi | 933 mov QWORD[((-16))+r14],rdi |
| 690 mov rdi,rdx | 934 mov rdi,rdx |
| 691 | 935 |
| 692 mul rbx | 936 mul rbx |
| 693 add r11,rax | 937 add r11,rax |
| 694 » mov» rax,QWORD[16+rcx] | 938 » mov» rax,QWORD[8+rcx] |
| 695 adc rdx,0 | 939 adc rdx,0 |
| 696 add r11,QWORD[8+r14] | 940 add r11,QWORD[8+r14] |
| 697 adc rdx,0 | 941 adc rdx,0 |
| 698 mov r10,rdx | 942 mov r10,rdx |
| 699 | 943 |
| 700 mul rbp | 944 mul rbp |
| 701 add rdi,rax | 945 add rdi,rax |
| 702 mov rax,QWORD[16+r15*1+rsi] | 946 mov rax,QWORD[16+r15*1+rsi] |
| 703 adc rdx,0 | 947 adc rdx,0 |
| 704 add rdi,r11 | 948 add rdi,r11 |
| 705 » lea» rcx,[64+rcx] | 949 » lea» rcx,[32+rcx] |
| 706 adc rdx,0 | 950 adc rdx,0 |
| 707 mov QWORD[((-8))+r14],r13 | 951 mov QWORD[((-8))+r14],r13 |
| 708 mov r13,rdx | 952 mov r13,rdx |
| 709 | 953 |
| 710 add r15,32 | 954 add r15,32 |
| 711 jnz NEAR $L$inner4x | 955 jnz NEAR $L$inner4x |
| 712 | 956 |
| 713 mul rbx | 957 mul rbx |
| 714 add r10,rax | 958 add r10,rax |
| 715 » mov» rax,QWORD[((-32))+rcx] | 959 » mov» rax,QWORD[((-16))+rcx] |
| 716 adc rdx,0 | 960 adc rdx,0 |
| 717 add r10,QWORD[16+r14] | 961 add r10,QWORD[16+r14] |
| 718 lea r14,[32+r14] | 962 lea r14,[32+r14] |
| 719 adc rdx,0 | 963 adc rdx,0 |
| 720 mov r11,rdx | 964 mov r11,rdx |
| 721 | 965 |
| 722 mul rbp | 966 mul rbp |
| 723 add r13,rax | 967 add r13,rax |
| 724 mov rax,QWORD[((-8))+rsi] | 968 mov rax,QWORD[((-8))+rsi] |
| 725 adc rdx,0 | 969 adc rdx,0 |
| 726 add r13,r10 | 970 add r13,r10 |
| 727 adc rdx,0 | 971 adc rdx,0 |
| 728 mov QWORD[((-32))+r14],rdi | 972 mov QWORD[((-32))+r14],rdi |
| 729 mov rdi,rdx | 973 mov rdi,rdx |
| 730 | 974 |
| 731 mul rbx | 975 mul rbx |
| 732 add r11,rax | 976 add r11,rax |
| 733 mov rax,rbp | 977 mov rax,rbp |
| 734 » mov» rbp,QWORD[((-16))+rcx] | 978 » mov» rbp,QWORD[((-8))+rcx] |
| 735 adc rdx,0 | 979 adc rdx,0 |
| 736 add r11,QWORD[((-8))+r14] | 980 add r11,QWORD[((-8))+r14] |
| 737 adc rdx,0 | 981 adc rdx,0 |
| 738 mov r10,rdx | 982 mov r10,rdx |
| 739 | 983 |
| 740 mul rbp | 984 mul rbp |
| 741 add rdi,rax | 985 add rdi,rax |
| 742 mov rax,QWORD[r9*1+rsi] | 986 mov rax,QWORD[r9*1+rsi] |
| 743 adc rdx,0 | 987 adc rdx,0 |
| 744 add rdi,r11 | 988 add rdi,r11 |
| 745 adc rdx,0 | 989 adc rdx,0 |
| 746 mov QWORD[((-24))+r14],r13 | 990 mov QWORD[((-24))+r14],r13 |
| 747 mov r13,rdx | 991 mov r13,rdx |
| 748 | 992 |
| 749 DB 102,72,15,126,195 | |
| 750 mov QWORD[((-16))+r14],rdi | 993 mov QWORD[((-16))+r14],rdi |
| 751 » lea» rcx,[r9*2+rcx] | 994 » lea» rcx,[r9*1+rcx] |
| 752 | 995 |
| 753 xor rdi,rdi | 996 xor rdi,rdi |
| 754 add r13,r10 | 997 add r13,r10 |
| 755 adc rdi,0 | 998 adc rdi,0 |
| 756 add r13,QWORD[r14] | 999 add r13,QWORD[r14] |
| 757 adc rdi,0 | 1000 adc rdi,0 |
| 758 mov QWORD[((-8))+r14],r13 | 1001 mov QWORD[((-8))+r14],r13 |
| 759 | 1002 |
| 760 cmp r12,QWORD[((16+8))+rsp] | 1003 cmp r12,QWORD[((16+8))+rsp] |
| 761 jb NEAR $L$outer4x | 1004 jb NEAR $L$outer4x |
| 1005 xor rax,rax |
| 762 sub rbp,r13 | 1006 sub rbp,r13 |
| 763 adc r15,r15 | 1007 adc r15,r15 |
| 764 or rdi,r15 | 1008 or rdi,r15 |
| 765 » xor» rdi,1 | 1009 » sub» rax,rdi |
| 766 lea rbx,[r9*1+r14] | 1010 lea rbx,[r9*1+r14] |
| 767 » lea» rbp,[rdi*8+rcx] | 1011 » mov» r12,QWORD[rcx] |
| 1012 » lea» rbp,[rcx] |
| 768 mov rcx,r9 | 1013 mov rcx,r9 |
| 769 sar rcx,3+2 | 1014 sar rcx,3+2 |
| 770 mov rdi,QWORD[((56+8))+rsp] | 1015 mov rdi,QWORD[((56+8))+rsp] |
| 771 » jmp» NEAR $L$sqr4x_sub | 1016 » dec» r12 |
| 1017 » xor» r10,r10 |
| 1018 » mov» r13,QWORD[8+rbp] |
| 1019 » mov» r14,QWORD[16+rbp] |
| 1020 » mov» r15,QWORD[24+rbp] |
| 1021 » jmp» NEAR $L$sqr4x_sub_entry |
| 772 | 1022 |
| 773 global bn_power5 | 1023 global bn_power5 |
| 774 | 1024 |
| 775 ALIGN 32 | 1025 ALIGN 32 |
| 776 bn_power5: | 1026 bn_power5: |
| 777 mov QWORD[8+rsp],rdi ;WIN64 prologue | 1027 mov QWORD[8+rsp],rdi ;WIN64 prologue |
| 778 mov QWORD[16+rsp],rsi | 1028 mov QWORD[16+rsp],rsi |
| 779 mov rax,rsp | 1029 mov rax,rsp |
| 780 $L$SEH_begin_bn_power5: | 1030 $L$SEH_begin_bn_power5: |
| 781 mov rdi,rcx | 1031 mov rdi,rcx |
| 782 mov rsi,rdx | 1032 mov rsi,rdx |
| 783 mov rdx,r8 | 1033 mov rdx,r8 |
| 784 mov rcx,r9 | 1034 mov rcx,r9 |
| 785 mov r8,QWORD[40+rsp] | 1035 mov r8,QWORD[40+rsp] |
| 786 mov r9,QWORD[48+rsp] | 1036 mov r9,QWORD[48+rsp] |
| 787 | 1037 |
| 788 | 1038 |
| 789 mov rax,rsp | 1039 mov rax,rsp |
| 790 push rbx | 1040 push rbx |
| 791 push rbp | 1041 push rbp |
| 792 push r12 | 1042 push r12 |
| 793 push r13 | 1043 push r13 |
| 794 push r14 | 1044 push r14 |
| 795 push r15 | 1045 push r15 |
| 796 » lea» rsp,[((-40))+rsp] | 1046 |
| 797 » movaps» XMMWORD[rsp],xmm6 | |
| 798 » movaps» XMMWORD[16+rsp],xmm7 | |
| 799 » mov» r10d,r9d | |
| 800 shl r9d,3 | 1047 shl r9d,3 |
| 801 » shl» r10d,3+2 | 1048 » lea» r10d,[r9*2+r9] |
| 802 neg r9 | 1049 neg r9 |
| 803 mov r8,QWORD[r8] | 1050 mov r8,QWORD[r8] |
| 804 | 1051 |
| 805 | 1052 |
| 806 | 1053 |
| 807 | 1054 |
| 808 | 1055 |
| 809 | 1056 |
| 810 | 1057 |
| 811 » lea» r11,[((-64))+r9*2+rsp] | 1058 |
| 812 » sub» r11,rsi | 1059 » lea» r11,[((-320))+r9*2+rsp] |
| 1060 » sub» r11,rdi |
| 813 and r11,4095 | 1061 and r11,4095 |
| 814 cmp r10,r11 | 1062 cmp r10,r11 |
| 815 jb NEAR $L$pwr_sp_alt | 1063 jb NEAR $L$pwr_sp_alt |
| 816 sub rsp,r11 | 1064 sub rsp,r11 |
| 817 » lea» rsp,[((-64))+r9*2+rsp] | 1065 » lea» rsp,[((-320))+r9*2+rsp] |
| 818 jmp NEAR $L$pwr_sp_done | 1066 jmp NEAR $L$pwr_sp_done |
| 819 | 1067 |
| 820 ALIGN 32 | 1068 ALIGN 32 |
| 821 $L$pwr_sp_alt: | 1069 $L$pwr_sp_alt: |
| 822 » lea» r10,[((4096-64))+r9*2] | 1070 » lea» r10,[((4096-320))+r9*2] |
| 823 » lea» rsp,[((-64))+r9*2+rsp] | 1071 » lea» rsp,[((-320))+r9*2+rsp] |
| 824 sub r11,r10 | 1072 sub r11,r10 |
| 825 mov r10,0 | 1073 mov r10,0 |
| 826 cmovc r11,r10 | 1074 cmovc r11,r10 |
| 827 sub rsp,r11 | 1075 sub rsp,r11 |
| 828 $L$pwr_sp_done: | 1076 $L$pwr_sp_done: |
| 829 and rsp,-64 | 1077 and rsp,-64 |
| 830 mov r10,r9 | 1078 mov r10,r9 |
| 831 neg r9 | 1079 neg r9 |
| 832 | 1080 |
| 833 | 1081 |
| 834 | 1082 |
| 835 | 1083 |
| 836 | 1084 |
| 837 | 1085 |
| 838 | 1086 |
| 839 | 1087 |
| 840 | 1088 |
| 841 | 1089 |
| 842 mov QWORD[32+rsp],r8 | 1090 mov QWORD[32+rsp],r8 |
| 843 mov QWORD[40+rsp],rax | 1091 mov QWORD[40+rsp],rax |
| 844 $L$power5_body: | 1092 $L$power5_body: |
| 845 DB 102,72,15,110,207 | 1093 DB 102,72,15,110,207 |
| 846 DB 102,72,15,110,209 | 1094 DB 102,72,15,110,209 |
| 847 DB 102,73,15,110,218 | 1095 DB 102,73,15,110,218 |
| 848 DB 102,72,15,110,226 | 1096 DB 102,72,15,110,226 |
| 849 | 1097 |
| 850 call __bn_sqr8x_internal | 1098 call __bn_sqr8x_internal |
| 1099 call __bn_post4x_internal |
| 851 call __bn_sqr8x_internal | 1100 call __bn_sqr8x_internal |
| 1101 call __bn_post4x_internal |
| 852 call __bn_sqr8x_internal | 1102 call __bn_sqr8x_internal |
| 1103 call __bn_post4x_internal |
| 853 call __bn_sqr8x_internal | 1104 call __bn_sqr8x_internal |
| 1105 call __bn_post4x_internal |
| 854 call __bn_sqr8x_internal | 1106 call __bn_sqr8x_internal |
| 1107 call __bn_post4x_internal |
| 855 | 1108 |
| 856 DB 102,72,15,126,209 | 1109 DB 102,72,15,126,209 |
| 857 DB 102,72,15,126,226 | 1110 DB 102,72,15,126,226 |
| 858 mov rdi,rsi | 1111 mov rdi,rsi |
| 859 mov rax,QWORD[40+rsp] | 1112 mov rax,QWORD[40+rsp] |
| 860 lea r8,[32+rsp] | 1113 lea r8,[32+rsp] |
| 861 | 1114 |
| 862 call mul4x_internal | 1115 call mul4x_internal |
| 863 | 1116 |
| 864 mov rsi,QWORD[40+rsp] | 1117 mov rsi,QWORD[40+rsp] |
| (...skipping 525 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 1390 lea r8,[r11*2+rcx] | 1643 lea r8,[r11*2+rcx] |
| 1391 shr r11,63 | 1644 shr r11,63 |
| 1392 or r8,r10 | 1645 or r8,r10 |
| 1393 mul rax | 1646 mul rax |
| 1394 neg r15 | 1647 neg r15 |
| 1395 adc rbx,rax | 1648 adc rbx,rax |
| 1396 adc r8,rdx | 1649 adc r8,rdx |
| 1397 mov QWORD[((-16))+rdi],rbx | 1650 mov QWORD[((-16))+rdi],rbx |
| 1398 mov QWORD[((-8))+rdi],r8 | 1651 mov QWORD[((-8))+rdi],r8 |
| 1399 DB 102,72,15,126,213 | 1652 DB 102,72,15,126,213 |
| 1400 sqr8x_reduction: | 1653 __bn_sqr8x_reduction: |
| 1401 xor rax,rax | 1654 xor rax,rax |
| 1402 » lea» rcx,[r9*2+rbp] | 1655 » lea» rcx,[rbp*1+r9] |
| 1403 lea rdx,[((48+8))+r9*2+rsp] | 1656 lea rdx,[((48+8))+r9*2+rsp] |
| 1404 mov QWORD[((0+8))+rsp],rcx | 1657 mov QWORD[((0+8))+rsp],rcx |
| 1405 lea rdi,[((48+8))+r9*1+rsp] | 1658 lea rdi,[((48+8))+r9*1+rsp] |
| 1406 mov QWORD[((8+8))+rsp],rdx | 1659 mov QWORD[((8+8))+rsp],rdx |
| 1407 neg r9 | 1660 neg r9 |
| 1408 jmp NEAR $L$8x_reduction_loop | 1661 jmp NEAR $L$8x_reduction_loop |
| 1409 | 1662 |
| 1410 ALIGN 32 | 1663 ALIGN 32 |
| 1411 $L$8x_reduction_loop: | 1664 $L$8x_reduction_loop: |
| 1412 lea rdi,[r9*1+rdi] | 1665 lea rdi,[r9*1+rdi] |
| (...skipping 12 matching lines...) Expand all Loading... |
| 1425 DB 0x67 | 1678 DB 0x67 |
| 1426 mov r8,rbx | 1679 mov r8,rbx |
| 1427 imul rbx,QWORD[((32+8))+rsp] | 1680 imul rbx,QWORD[((32+8))+rsp] |
| 1428 mov rax,QWORD[rbp] | 1681 mov rax,QWORD[rbp] |
| 1429 mov ecx,8 | 1682 mov ecx,8 |
| 1430 jmp NEAR $L$8x_reduce | 1683 jmp NEAR $L$8x_reduce |
| 1431 | 1684 |
| 1432 ALIGN 32 | 1685 ALIGN 32 |
| 1433 $L$8x_reduce: | 1686 $L$8x_reduce: |
| 1434 mul rbx | 1687 mul rbx |
| 1435 » mov» rax,QWORD[16+rbp] | 1688 » mov» rax,QWORD[8+rbp] |
| 1436 neg r8 | 1689 neg r8 |
| 1437 mov r8,rdx | 1690 mov r8,rdx |
| 1438 adc r8,0 | 1691 adc r8,0 |
| 1439 | 1692 |
| 1440 mul rbx | 1693 mul rbx |
| 1441 add r9,rax | 1694 add r9,rax |
| 1442 » mov» rax,QWORD[32+rbp] | 1695 » mov» rax,QWORD[16+rbp] |
| 1443 adc rdx,0 | 1696 adc rdx,0 |
| 1444 add r8,r9 | 1697 add r8,r9 |
| 1445 mov QWORD[((48-8+8))+rcx*8+rsp],rbx | 1698 mov QWORD[((48-8+8))+rcx*8+rsp],rbx |
| 1446 mov r9,rdx | 1699 mov r9,rdx |
| 1447 adc r9,0 | 1700 adc r9,0 |
| 1448 | 1701 |
| 1449 mul rbx | 1702 mul rbx |
| 1450 add r10,rax | 1703 add r10,rax |
| 1451 » mov» rax,QWORD[48+rbp] | 1704 » mov» rax,QWORD[24+rbp] |
| 1452 adc rdx,0 | 1705 adc rdx,0 |
| 1453 add r9,r10 | 1706 add r9,r10 |
| 1454 mov rsi,QWORD[((32+8))+rsp] | 1707 mov rsi,QWORD[((32+8))+rsp] |
| 1455 mov r10,rdx | 1708 mov r10,rdx |
| 1456 adc r10,0 | 1709 adc r10,0 |
| 1457 | 1710 |
| 1458 mul rbx | 1711 mul rbx |
| 1459 add r11,rax | 1712 add r11,rax |
| 1460 » mov» rax,QWORD[64+rbp] | 1713 » mov» rax,QWORD[32+rbp] |
| 1461 adc rdx,0 | 1714 adc rdx,0 |
| 1462 imul rsi,r8 | 1715 imul rsi,r8 |
| 1463 add r10,r11 | 1716 add r10,r11 |
| 1464 mov r11,rdx | 1717 mov r11,rdx |
| 1465 adc r11,0 | 1718 adc r11,0 |
| 1466 | 1719 |
| 1467 mul rbx | 1720 mul rbx |
| 1468 add r12,rax | 1721 add r12,rax |
| 1469 » mov» rax,QWORD[80+rbp] | 1722 » mov» rax,QWORD[40+rbp] |
| 1470 adc rdx,0 | 1723 adc rdx,0 |
| 1471 add r11,r12 | 1724 add r11,r12 |
| 1472 mov r12,rdx | 1725 mov r12,rdx |
| 1473 adc r12,0 | 1726 adc r12,0 |
| 1474 | 1727 |
| 1475 mul rbx | 1728 mul rbx |
| 1476 add r13,rax | 1729 add r13,rax |
| 1477 » mov» rax,QWORD[96+rbp] | 1730 » mov» rax,QWORD[48+rbp] |
| 1478 adc rdx,0 | 1731 adc rdx,0 |
| 1479 add r12,r13 | 1732 add r12,r13 |
| 1480 mov r13,rdx | 1733 mov r13,rdx |
| 1481 adc r13,0 | 1734 adc r13,0 |
| 1482 | 1735 |
| 1483 mul rbx | 1736 mul rbx |
| 1484 add r14,rax | 1737 add r14,rax |
| 1485 » mov» rax,QWORD[112+rbp] | 1738 » mov» rax,QWORD[56+rbp] |
| 1486 adc rdx,0 | 1739 adc rdx,0 |
| 1487 add r13,r14 | 1740 add r13,r14 |
| 1488 mov r14,rdx | 1741 mov r14,rdx |
| 1489 adc r14,0 | 1742 adc r14,0 |
| 1490 | 1743 |
| 1491 mul rbx | 1744 mul rbx |
| 1492 mov rbx,rsi | 1745 mov rbx,rsi |
| 1493 add r15,rax | 1746 add r15,rax |
| 1494 mov rax,QWORD[rbp] | 1747 mov rax,QWORD[rbp] |
| 1495 adc rdx,0 | 1748 adc rdx,0 |
| 1496 add r14,r15 | 1749 add r14,r15 |
| 1497 mov r15,rdx | 1750 mov r15,rdx |
| 1498 adc r15,0 | 1751 adc r15,0 |
| 1499 | 1752 |
| 1500 dec ecx | 1753 dec ecx |
| 1501 jnz NEAR $L$8x_reduce | 1754 jnz NEAR $L$8x_reduce |
| 1502 | 1755 |
| 1503 » lea» rbp,[128+rbp] | 1756 » lea» rbp,[64+rbp] |
| 1504 xor rax,rax | 1757 xor rax,rax |
| 1505 mov rdx,QWORD[((8+8))+rsp] | 1758 mov rdx,QWORD[((8+8))+rsp] |
| 1506 cmp rbp,QWORD[((0+8))+rsp] | 1759 cmp rbp,QWORD[((0+8))+rsp] |
| 1507 jae NEAR $L$8x_no_tail | 1760 jae NEAR $L$8x_no_tail |
| 1508 | 1761 |
| 1509 DB 0x66 | 1762 DB 0x66 |
| 1510 add r8,QWORD[rdi] | 1763 add r8,QWORD[rdi] |
| 1511 adc r9,QWORD[8+rdi] | 1764 adc r9,QWORD[8+rdi] |
| 1512 adc r10,QWORD[16+rdi] | 1765 adc r10,QWORD[16+rdi] |
| 1513 adc r11,QWORD[24+rdi] | 1766 adc r11,QWORD[24+rdi] |
| 1514 adc r12,QWORD[32+rdi] | 1767 adc r12,QWORD[32+rdi] |
| 1515 adc r13,QWORD[40+rdi] | 1768 adc r13,QWORD[40+rdi] |
| 1516 adc r14,QWORD[48+rdi] | 1769 adc r14,QWORD[48+rdi] |
| 1517 adc r15,QWORD[56+rdi] | 1770 adc r15,QWORD[56+rdi] |
| 1518 sbb rsi,rsi | 1771 sbb rsi,rsi |
| 1519 | 1772 |
| 1520 mov rbx,QWORD[((48+56+8))+rsp] | 1773 mov rbx,QWORD[((48+56+8))+rsp] |
| 1521 mov ecx,8 | 1774 mov ecx,8 |
| 1522 mov rax,QWORD[rbp] | 1775 mov rax,QWORD[rbp] |
| 1523 jmp NEAR $L$8x_tail | 1776 jmp NEAR $L$8x_tail |
| 1524 | 1777 |
| 1525 ALIGN 32 | 1778 ALIGN 32 |
| 1526 $L$8x_tail: | 1779 $L$8x_tail: |
| 1527 mul rbx | 1780 mul rbx |
| 1528 add r8,rax | 1781 add r8,rax |
| 1529 » mov» rax,QWORD[16+rbp] | 1782 » mov» rax,QWORD[8+rbp] |
| 1530 mov QWORD[rdi],r8 | 1783 mov QWORD[rdi],r8 |
| 1531 mov r8,rdx | 1784 mov r8,rdx |
| 1532 adc r8,0 | 1785 adc r8,0 |
| 1533 | 1786 |
| 1534 mul rbx | 1787 mul rbx |
| 1535 add r9,rax | 1788 add r9,rax |
| 1536 » mov» rax,QWORD[32+rbp] | 1789 » mov» rax,QWORD[16+rbp] |
| 1537 adc rdx,0 | 1790 adc rdx,0 |
| 1538 add r8,r9 | 1791 add r8,r9 |
| 1539 lea rdi,[8+rdi] | 1792 lea rdi,[8+rdi] |
| 1540 mov r9,rdx | 1793 mov r9,rdx |
| 1541 adc r9,0 | 1794 adc r9,0 |
| 1542 | 1795 |
| 1543 mul rbx | 1796 mul rbx |
| 1544 add r10,rax | 1797 add r10,rax |
| 1545 » mov» rax,QWORD[48+rbp] | 1798 » mov» rax,QWORD[24+rbp] |
| 1546 adc rdx,0 | 1799 adc rdx,0 |
| 1547 add r9,r10 | 1800 add r9,r10 |
| 1548 mov r10,rdx | 1801 mov r10,rdx |
| 1549 adc r10,0 | 1802 adc r10,0 |
| 1550 | 1803 |
| 1551 mul rbx | 1804 mul rbx |
| 1552 add r11,rax | 1805 add r11,rax |
| 1553 » mov» rax,QWORD[64+rbp] | 1806 » mov» rax,QWORD[32+rbp] |
| 1554 adc rdx,0 | 1807 adc rdx,0 |
| 1555 add r10,r11 | 1808 add r10,r11 |
| 1556 mov r11,rdx | 1809 mov r11,rdx |
| 1557 adc r11,0 | 1810 adc r11,0 |
| 1558 | 1811 |
| 1559 mul rbx | 1812 mul rbx |
| 1560 add r12,rax | 1813 add r12,rax |
| 1561 » mov» rax,QWORD[80+rbp] | 1814 » mov» rax,QWORD[40+rbp] |
| 1562 adc rdx,0 | 1815 adc rdx,0 |
| 1563 add r11,r12 | 1816 add r11,r12 |
| 1564 mov r12,rdx | 1817 mov r12,rdx |
| 1565 adc r12,0 | 1818 adc r12,0 |
| 1566 | 1819 |
| 1567 mul rbx | 1820 mul rbx |
| 1568 add r13,rax | 1821 add r13,rax |
| 1569 » mov» rax,QWORD[96+rbp] | 1822 » mov» rax,QWORD[48+rbp] |
| 1570 adc rdx,0 | 1823 adc rdx,0 |
| 1571 add r12,r13 | 1824 add r12,r13 |
| 1572 mov r13,rdx | 1825 mov r13,rdx |
| 1573 adc r13,0 | 1826 adc r13,0 |
| 1574 | 1827 |
| 1575 mul rbx | 1828 mul rbx |
| 1576 add r14,rax | 1829 add r14,rax |
| 1577 » mov» rax,QWORD[112+rbp] | 1830 » mov» rax,QWORD[56+rbp] |
| 1578 adc rdx,0 | 1831 adc rdx,0 |
| 1579 add r13,r14 | 1832 add r13,r14 |
| 1580 mov r14,rdx | 1833 mov r14,rdx |
| 1581 adc r14,0 | 1834 adc r14,0 |
| 1582 | 1835 |
| 1583 mul rbx | 1836 mul rbx |
| 1584 mov rbx,QWORD[((48-16+8))+rcx*8+rsp] | 1837 mov rbx,QWORD[((48-16+8))+rcx*8+rsp] |
| 1585 add r15,rax | 1838 add r15,rax |
| 1586 adc rdx,0 | 1839 adc rdx,0 |
| 1587 add r14,r15 | 1840 add r14,r15 |
| 1588 mov rax,QWORD[rbp] | 1841 mov rax,QWORD[rbp] |
| 1589 mov r15,rdx | 1842 mov r15,rdx |
| 1590 adc r15,0 | 1843 adc r15,0 |
| 1591 | 1844 |
| 1592 dec ecx | 1845 dec ecx |
| 1593 jnz NEAR $L$8x_tail | 1846 jnz NEAR $L$8x_tail |
| 1594 | 1847 |
| 1595 » lea» rbp,[128+rbp] | 1848 » lea» rbp,[64+rbp] |
| 1596 mov rdx,QWORD[((8+8))+rsp] | 1849 mov rdx,QWORD[((8+8))+rsp] |
| 1597 cmp rbp,QWORD[((0+8))+rsp] | 1850 cmp rbp,QWORD[((0+8))+rsp] |
| 1598 jae NEAR $L$8x_tail_done | 1851 jae NEAR $L$8x_tail_done |
| 1599 | 1852 |
| 1600 mov rbx,QWORD[((48+56+8))+rsp] | 1853 mov rbx,QWORD[((48+56+8))+rsp] |
| 1601 neg rsi | 1854 neg rsi |
| 1602 mov rax,QWORD[rbp] | 1855 mov rax,QWORD[rbp] |
| 1603 adc r8,QWORD[rdi] | 1856 adc r8,QWORD[rdi] |
| 1604 adc r9,QWORD[8+rdi] | 1857 adc r9,QWORD[8+rdi] |
| 1605 adc r10,QWORD[16+rdi] | 1858 adc r10,QWORD[16+rdi] |
| 1606 adc r11,QWORD[24+rdi] | 1859 adc r11,QWORD[24+rdi] |
| 1607 adc r12,QWORD[32+rdi] | 1860 adc r12,QWORD[32+rdi] |
| 1608 adc r13,QWORD[40+rdi] | 1861 adc r13,QWORD[40+rdi] |
| 1609 adc r14,QWORD[48+rdi] | 1862 adc r14,QWORD[48+rdi] |
| 1610 adc r15,QWORD[56+rdi] | 1863 adc r15,QWORD[56+rdi] |
| 1611 sbb rsi,rsi | 1864 sbb rsi,rsi |
| 1612 | 1865 |
| 1613 mov ecx,8 | 1866 mov ecx,8 |
| 1614 jmp NEAR $L$8x_tail | 1867 jmp NEAR $L$8x_tail |
| 1615 | 1868 |
| 1616 ALIGN 32 | 1869 ALIGN 32 |
| 1617 $L$8x_tail_done: | 1870 $L$8x_tail_done: |
| 1618 add r8,QWORD[rdx] | 1871 add r8,QWORD[rdx] |
| 1872 adc r9,0 |
| 1873 adc r10,0 |
| 1874 adc r11,0 |
| 1875 adc r12,0 |
| 1876 adc r13,0 |
| 1877 adc r14,0 |
| 1878 adc r15,0 |
| 1879 |
| 1880 |
| 1619 xor rax,rax | 1881 xor rax,rax |
| 1620 | 1882 |
| 1621 neg rsi | 1883 neg rsi |
| 1622 $L$8x_no_tail: | 1884 $L$8x_no_tail: |
| 1623 adc r8,QWORD[rdi] | 1885 adc r8,QWORD[rdi] |
| 1624 adc r9,QWORD[8+rdi] | 1886 adc r9,QWORD[8+rdi] |
| 1625 adc r10,QWORD[16+rdi] | 1887 adc r10,QWORD[16+rdi] |
| 1626 adc r11,QWORD[24+rdi] | 1888 adc r11,QWORD[24+rdi] |
| 1627 adc r12,QWORD[32+rdi] | 1889 adc r12,QWORD[32+rdi] |
| 1628 adc r13,QWORD[40+rdi] | 1890 adc r13,QWORD[40+rdi] |
| 1629 adc r14,QWORD[48+rdi] | 1891 adc r14,QWORD[48+rdi] |
| 1630 adc r15,QWORD[56+rdi] | 1892 adc r15,QWORD[56+rdi] |
| 1631 adc rax,0 | 1893 adc rax,0 |
| 1632 » mov» rcx,QWORD[((-16))+rbp] | 1894 » mov» rcx,QWORD[((-8))+rbp] |
| 1633 xor rsi,rsi | 1895 xor rsi,rsi |
| 1634 | 1896 |
| 1635 DB 102,72,15,126,213 | 1897 DB 102,72,15,126,213 |
| 1636 | 1898 |
| 1637 mov QWORD[rdi],r8 | 1899 mov QWORD[rdi],r8 |
| 1638 mov QWORD[8+rdi],r9 | 1900 mov QWORD[8+rdi],r9 |
| 1639 DB 102,73,15,126,217 | 1901 DB 102,73,15,126,217 |
| 1640 mov QWORD[16+rdi],r10 | 1902 mov QWORD[16+rdi],r10 |
| 1641 mov QWORD[24+rdi],r11 | 1903 mov QWORD[24+rdi],r11 |
| 1642 mov QWORD[32+rdi],r12 | 1904 mov QWORD[32+rdi],r12 |
| 1643 mov QWORD[40+rdi],r13 | 1905 mov QWORD[40+rdi],r13 |
| 1644 mov QWORD[48+rdi],r14 | 1906 mov QWORD[48+rdi],r14 |
| 1645 mov QWORD[56+rdi],r15 | 1907 mov QWORD[56+rdi],r15 |
| 1646 lea rdi,[64+rdi] | 1908 lea rdi,[64+rdi] |
| 1647 | 1909 |
| 1648 cmp rdi,rdx | 1910 cmp rdi,rdx |
| 1649 jb NEAR $L$8x_reduction_loop | 1911 jb NEAR $L$8x_reduction_loop |
| 1912 DB 0F3h,0C3h ;repret |
| 1650 | 1913 |
| 1651 sub rcx,r15 | |
| 1652 lea rbx,[r9*1+rdi] | |
| 1653 adc rsi,rsi | |
| 1654 mov rcx,r9 | |
| 1655 or rax,rsi | |
| 1656 DB 102,72,15,126,207 | |
| 1657 xor rax,1 | |
| 1658 DB 102,72,15,126,206 | |
| 1659 lea rbp,[rax*8+rbp] | |
| 1660 sar rcx,3+2 | |
| 1661 jmp NEAR $L$sqr4x_sub | |
| 1662 | 1914 |
| 1663 ALIGN 32 | 1915 ALIGN 32 |
| 1916 __bn_post4x_internal: |
| 1917 mov r12,QWORD[rbp] |
| 1918 lea rbx,[r9*1+rdi] |
| 1919 mov rcx,r9 |
| 1920 DB 102,72,15,126,207 |
| 1921 neg rax |
| 1922 DB 102,72,15,126,206 |
| 1923 sar rcx,3+2 |
| 1924 dec r12 |
| 1925 xor r10,r10 |
| 1926 mov r13,QWORD[8+rbp] |
| 1927 mov r14,QWORD[16+rbp] |
| 1928 mov r15,QWORD[24+rbp] |
| 1929 jmp NEAR $L$sqr4x_sub_entry |
| 1930 |
| 1931 ALIGN 16 |
| 1664 $L$sqr4x_sub: | 1932 $L$sqr4x_sub: |
| 1665 DB» 0x66 | 1933 » mov» r12,QWORD[rbp] |
| 1666 » mov» r12,QWORD[rbx] | 1934 » mov» r13,QWORD[8+rbp] |
| 1667 » mov» r13,QWORD[8+rbx] | 1935 » mov» r14,QWORD[16+rbp] |
| 1668 » sbb» r12,QWORD[rbp] | 1936 » mov» r15,QWORD[24+rbp] |
| 1669 » mov» r14,QWORD[16+rbx] | 1937 $L$sqr4x_sub_entry: |
| 1670 » sbb» r13,QWORD[16+rbp] | 1938 » lea» rbp,[32+rbp] |
| 1671 » mov» r15,QWORD[24+rbx] | 1939 » not» r12 |
| 1940 » not» r13 |
| 1941 » not» r14 |
| 1942 » not» r15 |
| 1943 » and» r12,rax |
| 1944 » and» r13,rax |
| 1945 » and» r14,rax |
| 1946 » and» r15,rax |
| 1947 |
| 1948 » neg» r10 |
| 1949 » adc» r12,QWORD[rbx] |
| 1950 » adc» r13,QWORD[8+rbx] |
| 1951 » adc» r14,QWORD[16+rbx] |
| 1952 » adc» r15,QWORD[24+rbx] |
| 1953 » mov» QWORD[rdi],r12 |
| 1672 lea rbx,[32+rbx] | 1954 lea rbx,[32+rbx] |
| 1673 sbb r14,QWORD[32+rbp] | |
| 1674 mov QWORD[rdi],r12 | |
| 1675 sbb r15,QWORD[48+rbp] | |
| 1676 lea rbp,[64+rbp] | |
| 1677 mov QWORD[8+rdi],r13 | 1955 mov QWORD[8+rdi],r13 |
| 1956 sbb r10,r10 |
| 1678 mov QWORD[16+rdi],r14 | 1957 mov QWORD[16+rdi],r14 |
| 1679 mov QWORD[24+rdi],r15 | 1958 mov QWORD[24+rdi],r15 |
| 1680 lea rdi,[32+rdi] | 1959 lea rdi,[32+rdi] |
| 1681 | 1960 |
| 1682 inc rcx | 1961 inc rcx |
| 1683 jnz NEAR $L$sqr4x_sub | 1962 jnz NEAR $L$sqr4x_sub |
| 1963 |
| 1684 mov r10,r9 | 1964 mov r10,r9 |
| 1685 neg r9 | 1965 neg r9 |
| 1686 DB 0F3h,0C3h ;repret | 1966 DB 0F3h,0C3h ;repret |
| 1687 | 1967 |
| 1688 global bn_from_montgomery | 1968 global bn_from_montgomery |
| 1689 | 1969 |
| 1690 ALIGN 32 | 1970 ALIGN 32 |
| 1691 bn_from_montgomery: | 1971 bn_from_montgomery: |
| 1692 test DWORD[48+rsp],7 | 1972 test DWORD[48+rsp],7 |
| 1693 jz NEAR bn_from_mont8x | 1973 jz NEAR bn_from_mont8x |
| (...skipping 17 matching lines...) Expand all Loading... |
| 1711 | 1991 |
| 1712 | 1992 |
| 1713 DB 0x67 | 1993 DB 0x67 |
| 1714 mov rax,rsp | 1994 mov rax,rsp |
| 1715 push rbx | 1995 push rbx |
| 1716 push rbp | 1996 push rbp |
| 1717 push r12 | 1997 push r12 |
| 1718 push r13 | 1998 push r13 |
| 1719 push r14 | 1999 push r14 |
| 1720 push r15 | 2000 push r15 |
| 1721 » lea» rsp,[((-40))+rsp] | 2001 |
| 1722 » movaps» XMMWORD[rsp],xmm6 | |
| 1723 » movaps» XMMWORD[16+rsp],xmm7 | |
| 1724 DB» 0x67 | |
| 1725 » mov» r10d,r9d | |
| 1726 shl r9d,3 | 2002 shl r9d,3 |
| 1727 » shl» r10d,3+2 | 2003 » lea» r10,[r9*2+r9] |
| 1728 neg r9 | 2004 neg r9 |
| 1729 mov r8,QWORD[r8] | 2005 mov r8,QWORD[r8] |
| 1730 | 2006 |
| 1731 | 2007 |
| 1732 | 2008 |
| 1733 | 2009 |
| 1734 | 2010 |
| 1735 | 2011 |
| 1736 | 2012 |
| 1737 » lea» r11,[((-64))+r9*2+rsp] | 2013 |
| 1738 » sub» r11,rsi | 2014 » lea» r11,[((-320))+r9*2+rsp] |
| 2015 » sub» r11,rdi |
| 1739 and r11,4095 | 2016 and r11,4095 |
| 1740 cmp r10,r11 | 2017 cmp r10,r11 |
| 1741 jb NEAR $L$from_sp_alt | 2018 jb NEAR $L$from_sp_alt |
| 1742 sub rsp,r11 | 2019 sub rsp,r11 |
| 1743 » lea» rsp,[((-64))+r9*2+rsp] | 2020 » lea» rsp,[((-320))+r9*2+rsp] |
| 1744 jmp NEAR $L$from_sp_done | 2021 jmp NEAR $L$from_sp_done |
| 1745 | 2022 |
| 1746 ALIGN 32 | 2023 ALIGN 32 |
| 1747 $L$from_sp_alt: | 2024 $L$from_sp_alt: |
| 1748 » lea» r10,[((4096-64))+r9*2] | 2025 » lea» r10,[((4096-320))+r9*2] |
| 1749 » lea» rsp,[((-64))+r9*2+rsp] | 2026 » lea» rsp,[((-320))+r9*2+rsp] |
| 1750 sub r11,r10 | 2027 sub r11,r10 |
| 1751 mov r10,0 | 2028 mov r10,0 |
| 1752 cmovc r11,r10 | 2029 cmovc r11,r10 |
| 1753 sub rsp,r11 | 2030 sub rsp,r11 |
| 1754 $L$from_sp_done: | 2031 $L$from_sp_done: |
| 1755 and rsp,-64 | 2032 and rsp,-64 |
| 1756 mov r10,r9 | 2033 mov r10,r9 |
| 1757 neg r9 | 2034 neg r9 |
| 1758 | 2035 |
| 1759 | 2036 |
| (...skipping 30 matching lines...) Expand all Loading... |
| 1790 movdqa XMMWORD[48+rax],xmm4 | 2067 movdqa XMMWORD[48+rax],xmm4 |
| 1791 lea rax,[64+rax] | 2068 lea rax,[64+rax] |
| 1792 sub r11,64 | 2069 sub r11,64 |
| 1793 jnz NEAR $L$mul_by_1 | 2070 jnz NEAR $L$mul_by_1 |
| 1794 | 2071 |
| 1795 DB 102,72,15,110,207 | 2072 DB 102,72,15,110,207 |
| 1796 DB 102,72,15,110,209 | 2073 DB 102,72,15,110,209 |
| 1797 DB 0x67 | 2074 DB 0x67 |
| 1798 mov rbp,rcx | 2075 mov rbp,rcx |
| 1799 DB 102,73,15,110,218 | 2076 DB 102,73,15,110,218 |
| 1800 » call» sqr8x_reduction | 2077 » call» __bn_sqr8x_reduction |
| 2078 » call» __bn_post4x_internal |
| 1801 | 2079 |
| 1802 pxor xmm0,xmm0 | 2080 pxor xmm0,xmm0 |
| 1803 lea rax,[48+rsp] | 2081 lea rax,[48+rsp] |
| 1804 mov rsi,QWORD[40+rsp] | 2082 mov rsi,QWORD[40+rsp] |
| 1805 jmp NEAR $L$from_mont_zero | 2083 jmp NEAR $L$from_mont_zero |
| 1806 | 2084 |
| 1807 ALIGN 32 | 2085 ALIGN 32 |
| 1808 $L$from_mont_zero: | 2086 $L$from_mont_zero: |
| 1809 movdqa XMMWORD[rax],xmm0 | 2087 movdqa XMMWORD[rax],xmm0 |
| 1810 movdqa XMMWORD[16+rax],xmm0 | 2088 movdqa XMMWORD[16+rax],xmm0 |
| (...skipping 29 matching lines...) Expand all Loading... |
| 1840 mov QWORD[r8],rax | 2118 mov QWORD[r8],rax |
| 1841 lea r8,[256+r8] | 2119 lea r8,[256+r8] |
| 1842 sub edx,1 | 2120 sub edx,1 |
| 1843 jnz NEAR $L$scatter | 2121 jnz NEAR $L$scatter |
| 1844 $L$scatter_epilogue: | 2122 $L$scatter_epilogue: |
| 1845 DB 0F3h,0C3h ;repret | 2123 DB 0F3h,0C3h ;repret |
| 1846 | 2124 |
| 1847 | 2125 |
| 1848 global bn_gather5 | 2126 global bn_gather5 |
| 1849 | 2127 |
| 1850 ALIGN» 16 | 2128 ALIGN» 32 |
| 1851 bn_gather5: | 2129 bn_gather5: |
| 1852 $L$SEH_begin_bn_gather5: | 2130 $L$SEH_begin_bn_gather5: |
| 1853 | 2131 |
| 1854 DB» 0x48,0x83,0xec,0x28 | 2132 DB» 0x4c,0x8d,0x14,0x24 |
| 1855 DB» 0x0f,0x29,0x34,0x24 | 2133 DB» 0x48,0x81,0xec,0x08,0x01,0x00,0x00 |
| 1856 DB» 0x0f,0x29,0x7c,0x24,0x10 | 2134 » lea» rax,[$L$inc] |
| 1857 » mov» r11d,r9d | 2135 » and» rsp,-16 |
| 1858 » shr» r9d,3 | 2136 |
| 1859 » and» r11,7 | 2137 » movd» xmm5,r9d |
| 1860 » not» r9d | 2138 » movdqa» xmm0,XMMWORD[rax] |
| 1861 » lea» rax,[$L$magic_masks] | 2139 » movdqa» xmm1,XMMWORD[16+rax] |
| 1862 » and» r9d,3 | 2140 » lea» r11,[128+r8] |
| 1863 » lea» r8,[128+r11*8+r8] | 2141 » lea» rax,[128+rsp] |
| 1864 » movq» xmm4,QWORD[r9*8+rax] | 2142 |
| 1865 » movq» xmm5,QWORD[8+r9*8+rax] | 2143 » pshufd» xmm5,xmm5,0 |
| 1866 » movq» xmm6,QWORD[16+r9*8+rax] | 2144 » movdqa» xmm4,xmm1 |
| 1867 » movq» xmm7,QWORD[24+r9*8+rax] | 2145 » movdqa» xmm2,xmm1 |
| 2146 » paddd» xmm1,xmm0 |
| 2147 » pcmpeqd»xmm0,xmm5 |
| 2148 » movdqa» xmm3,xmm4 |
| 2149 |
| 2150 » paddd» xmm2,xmm1 |
| 2151 » pcmpeqd»xmm1,xmm5 |
| 2152 » movdqa» XMMWORD[(-128)+rax],xmm0 |
| 2153 » movdqa» xmm0,xmm4 |
| 2154 |
| 2155 » paddd» xmm3,xmm2 |
| 2156 » pcmpeqd»xmm2,xmm5 |
| 2157 » movdqa» XMMWORD[(-112)+rax],xmm1 |
| 2158 » movdqa» xmm1,xmm4 |
| 2159 |
| 2160 » paddd» xmm0,xmm3 |
| 2161 » pcmpeqd»xmm3,xmm5 |
| 2162 » movdqa» XMMWORD[(-96)+rax],xmm2 |
| 2163 » movdqa» xmm2,xmm4 |
| 2164 » paddd» xmm1,xmm0 |
| 2165 » pcmpeqd»xmm0,xmm5 |
| 2166 » movdqa» XMMWORD[(-80)+rax],xmm3 |
| 2167 » movdqa» xmm3,xmm4 |
| 2168 |
| 2169 » paddd» xmm2,xmm1 |
| 2170 » pcmpeqd»xmm1,xmm5 |
| 2171 » movdqa» XMMWORD[(-64)+rax],xmm0 |
| 2172 » movdqa» xmm0,xmm4 |
| 2173 |
| 2174 » paddd» xmm3,xmm2 |
| 2175 » pcmpeqd»xmm2,xmm5 |
| 2176 » movdqa» XMMWORD[(-48)+rax],xmm1 |
| 2177 » movdqa» xmm1,xmm4 |
| 2178 |
| 2179 » paddd» xmm0,xmm3 |
| 2180 » pcmpeqd»xmm3,xmm5 |
| 2181 » movdqa» XMMWORD[(-32)+rax],xmm2 |
| 2182 » movdqa» xmm2,xmm4 |
| 2183 » paddd» xmm1,xmm0 |
| 2184 » pcmpeqd»xmm0,xmm5 |
| 2185 » movdqa» XMMWORD[(-16)+rax],xmm3 |
| 2186 » movdqa» xmm3,xmm4 |
| 2187 |
| 2188 » paddd» xmm2,xmm1 |
| 2189 » pcmpeqd»xmm1,xmm5 |
| 2190 » movdqa» XMMWORD[rax],xmm0 |
| 2191 » movdqa» xmm0,xmm4 |
| 2192 |
| 2193 » paddd» xmm3,xmm2 |
| 2194 » pcmpeqd»xmm2,xmm5 |
| 2195 » movdqa» XMMWORD[16+rax],xmm1 |
| 2196 » movdqa» xmm1,xmm4 |
| 2197 |
| 2198 » paddd» xmm0,xmm3 |
| 2199 » pcmpeqd»xmm3,xmm5 |
| 2200 » movdqa» XMMWORD[32+rax],xmm2 |
| 2201 » movdqa» xmm2,xmm4 |
| 2202 » paddd» xmm1,xmm0 |
| 2203 » pcmpeqd»xmm0,xmm5 |
| 2204 » movdqa» XMMWORD[48+rax],xmm3 |
| 2205 » movdqa» xmm3,xmm4 |
| 2206 |
| 2207 » paddd» xmm2,xmm1 |
| 2208 » pcmpeqd»xmm1,xmm5 |
| 2209 » movdqa» XMMWORD[64+rax],xmm0 |
| 2210 » movdqa» xmm0,xmm4 |
| 2211 |
| 2212 » paddd» xmm3,xmm2 |
| 2213 » pcmpeqd»xmm2,xmm5 |
| 2214 » movdqa» XMMWORD[80+rax],xmm1 |
| 2215 » movdqa» xmm1,xmm4 |
| 2216 |
| 2217 » paddd» xmm0,xmm3 |
| 2218 » pcmpeqd»xmm3,xmm5 |
| 2219 » movdqa» XMMWORD[96+rax],xmm2 |
| 2220 » movdqa» xmm2,xmm4 |
| 2221 » movdqa» XMMWORD[112+rax],xmm3 |
| 1868 jmp NEAR $L$gather | 2222 jmp NEAR $L$gather |
| 1869 ALIGN» 16 | 2223 |
| 2224 ALIGN» 32 |
| 1870 $L$gather: | 2225 $L$gather: |
| 1871 » movq» xmm0,QWORD[(((-128)))+r8] | 2226 » pxor» xmm4,xmm4 |
| 1872 » movq» xmm1,QWORD[((-64))+r8] | 2227 » pxor» xmm5,xmm5 |
| 1873 » pand» xmm0,xmm4 | 2228 » movdqa» xmm0,XMMWORD[((-128))+r11] |
| 1874 » movq» xmm2,QWORD[r8] | 2229 » movdqa» xmm1,XMMWORD[((-112))+r11] |
| 1875 » pand» xmm1,xmm5 | 2230 » movdqa» xmm2,XMMWORD[((-96))+r11] |
| 1876 » movq» xmm3,QWORD[64+r8] | 2231 » pand» xmm0,XMMWORD[((-128))+rax] |
| 1877 » pand» xmm2,xmm6 | 2232 » movdqa» xmm3,XMMWORD[((-80))+r11] |
| 1878 » por» xmm0,xmm1 | 2233 » pand» xmm1,XMMWORD[((-112))+rax] |
| 1879 » pand» xmm3,xmm7 | 2234 » por» xmm4,xmm0 |
| 1880 DB» 0x67,0x67 | 2235 » pand» xmm2,XMMWORD[((-96))+rax] |
| 1881 » por» xmm0,xmm2 | 2236 » por» xmm5,xmm1 |
| 1882 » lea» r8,[256+r8] | 2237 » pand» xmm3,XMMWORD[((-80))+rax] |
| 1883 » por» xmm0,xmm3 | 2238 » por» xmm4,xmm2 |
| 1884 | 2239 » por» xmm5,xmm3 |
| 2240 » movdqa» xmm0,XMMWORD[((-64))+r11] |
| 2241 » movdqa» xmm1,XMMWORD[((-48))+r11] |
| 2242 » movdqa» xmm2,XMMWORD[((-32))+r11] |
| 2243 » pand» xmm0,XMMWORD[((-64))+rax] |
| 2244 » movdqa» xmm3,XMMWORD[((-16))+r11] |
| 2245 » pand» xmm1,XMMWORD[((-48))+rax] |
| 2246 » por» xmm4,xmm0 |
| 2247 » pand» xmm2,XMMWORD[((-32))+rax] |
| 2248 » por» xmm5,xmm1 |
| 2249 » pand» xmm3,XMMWORD[((-16))+rax] |
| 2250 » por» xmm4,xmm2 |
| 2251 » por» xmm5,xmm3 |
| 2252 » movdqa» xmm0,XMMWORD[r11] |
| 2253 » movdqa» xmm1,XMMWORD[16+r11] |
| 2254 » movdqa» xmm2,XMMWORD[32+r11] |
| 2255 » pand» xmm0,XMMWORD[rax] |
| 2256 » movdqa» xmm3,XMMWORD[48+r11] |
| 2257 » pand» xmm1,XMMWORD[16+rax] |
| 2258 » por» xmm4,xmm0 |
| 2259 » pand» xmm2,XMMWORD[32+rax] |
| 2260 » por» xmm5,xmm1 |
| 2261 » pand» xmm3,XMMWORD[48+rax] |
| 2262 » por» xmm4,xmm2 |
| 2263 » por» xmm5,xmm3 |
| 2264 » movdqa» xmm0,XMMWORD[64+r11] |
| 2265 » movdqa» xmm1,XMMWORD[80+r11] |
| 2266 » movdqa» xmm2,XMMWORD[96+r11] |
| 2267 » pand» xmm0,XMMWORD[64+rax] |
| 2268 » movdqa» xmm3,XMMWORD[112+r11] |
| 2269 » pand» xmm1,XMMWORD[80+rax] |
| 2270 » por» xmm4,xmm0 |
| 2271 » pand» xmm2,XMMWORD[96+rax] |
| 2272 » por» xmm5,xmm1 |
| 2273 » pand» xmm3,XMMWORD[112+rax] |
| 2274 » por» xmm4,xmm2 |
| 2275 » por» xmm5,xmm3 |
| 2276 » por» xmm4,xmm5 |
| 2277 » lea» r11,[256+r11] |
| 2278 » pshufd» xmm0,xmm4,0x4e |
| 2279 » por» xmm0,xmm4 |
| 1885 movq QWORD[rcx],xmm0 | 2280 movq QWORD[rcx],xmm0 |
| 1886 lea rcx,[8+rcx] | 2281 lea rcx,[8+rcx] |
| 1887 sub edx,1 | 2282 sub edx,1 |
| 1888 jnz NEAR $L$gather | 2283 jnz NEAR $L$gather |
| 1889 » movaps» xmm6,XMMWORD[rsp] | 2284 |
| 1890 » movaps» xmm7,XMMWORD[16+rsp] | 2285 » lea» rsp,[r10] |
| 1891 » lea» rsp,[40+rsp] | |
| 1892 DB 0F3h,0C3h ;repret | 2286 DB 0F3h,0C3h ;repret |
| 1893 $L$SEH_end_bn_gather5: | 2287 $L$SEH_end_bn_gather5: |
| 1894 | 2288 |
| 1895 ALIGN 64 | 2289 ALIGN 64 |
| 1896 $L$magic_masks: | 2290 $L$inc: |
| 1897 » DD» 0,0,0,0,0,0,-1,-1 | 2291 » DD» 0,0,1,1 |
| 1898 » DD» 0,0,0,0,0,0,0,0 | 2292 » DD» 2,2,2,2 |
| 1899 DB 77,111,110,116,103,111,109,101,114,121,32,77,117,108,116,105 | 2293 DB 77,111,110,116,103,111,109,101,114,121,32,77,117,108,116,105 |
| 1900 DB 112,108,105,99,97,116,105,111,110,32,119,105,116,104,32,115 | 2294 DB 112,108,105,99,97,116,105,111,110,32,119,105,116,104,32,115 |
| 1901 DB 99,97,116,116,101,114,47,103,97,116,104,101,114,32,102,111 | 2295 DB 99,97,116,116,101,114,47,103,97,116,104,101,114,32,102,111 |
| 1902 DB 114,32,120,56,54,95,54,52,44,32,67,82,89,80,84,79 | 2296 DB 114,32,120,56,54,95,54,52,44,32,67,82,89,80,84,79 |
| 1903 DB 71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111 | 2297 DB 71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111 |
| 1904 DB 112,101,110,115,115,108,46,111,114,103,62,0 | 2298 DB 112,101,110,115,115,108,46,111,114,103,62,0 |
| 1905 EXTERN __imp_RtlVirtualUnwind | 2299 EXTERN __imp_RtlVirtualUnwind |
| 1906 | 2300 |
| 1907 ALIGN 16 | 2301 ALIGN 16 |
| 1908 mul_handler: | 2302 mul_handler: |
| (...skipping 21 matching lines...) Expand all Loading... |
| 1930 | 2324 |
| 1931 mov rax,QWORD[152+r8] | 2325 mov rax,QWORD[152+r8] |
| 1932 | 2326 |
| 1933 mov r10d,DWORD[4+r11] | 2327 mov r10d,DWORD[4+r11] |
| 1934 lea r10,[r10*1+rsi] | 2328 lea r10,[r10*1+rsi] |
| 1935 cmp rbx,r10 | 2329 cmp rbx,r10 |
| 1936 jae NEAR $L$common_seh_tail | 2330 jae NEAR $L$common_seh_tail |
| 1937 | 2331 |
| 1938 lea r10,[$L$mul_epilogue] | 2332 lea r10,[$L$mul_epilogue] |
| 1939 cmp rbx,r10 | 2333 cmp rbx,r10 |
| 1940 » jb» NEAR $L$body_40 | 2334 » ja» NEAR $L$body_40 |
| 1941 | 2335 |
| 1942 mov r10,QWORD[192+r8] | 2336 mov r10,QWORD[192+r8] |
| 1943 mov rax,QWORD[8+r10*8+rax] | 2337 mov rax,QWORD[8+r10*8+rax] |
| 2338 |
| 1944 jmp NEAR $L$body_proceed | 2339 jmp NEAR $L$body_proceed |
| 1945 | 2340 |
| 1946 $L$body_40: | 2341 $L$body_40: |
| 1947 mov rax,QWORD[40+rax] | 2342 mov rax,QWORD[40+rax] |
| 1948 $L$body_proceed: | 2343 $L$body_proceed: |
| 1949 | |
| 1950 movaps xmm0,XMMWORD[((-88))+rax] | |
| 1951 movaps xmm1,XMMWORD[((-72))+rax] | |
| 1952 | |
| 1953 mov rbx,QWORD[((-8))+rax] | 2344 mov rbx,QWORD[((-8))+rax] |
| 1954 mov rbp,QWORD[((-16))+rax] | 2345 mov rbp,QWORD[((-16))+rax] |
| 1955 mov r12,QWORD[((-24))+rax] | 2346 mov r12,QWORD[((-24))+rax] |
| 1956 mov r13,QWORD[((-32))+rax] | 2347 mov r13,QWORD[((-32))+rax] |
| 1957 mov r14,QWORD[((-40))+rax] | 2348 mov r14,QWORD[((-40))+rax] |
| 1958 mov r15,QWORD[((-48))+rax] | 2349 mov r15,QWORD[((-48))+rax] |
| 1959 mov QWORD[144+r8],rbx | 2350 mov QWORD[144+r8],rbx |
| 1960 mov QWORD[160+r8],rbp | 2351 mov QWORD[160+r8],rbp |
| 1961 mov QWORD[216+r8],r12 | 2352 mov QWORD[216+r8],r12 |
| 1962 mov QWORD[224+r8],r13 | 2353 mov QWORD[224+r8],r13 |
| 1963 mov QWORD[232+r8],r14 | 2354 mov QWORD[232+r8],r14 |
| 1964 mov QWORD[240+r8],r15 | 2355 mov QWORD[240+r8],r15 |
| 1965 movups XMMWORD[512+r8],xmm0 | |
| 1966 movups XMMWORD[528+r8],xmm1 | |
| 1967 | 2356 |
| 1968 $L$common_seh_tail: | 2357 $L$common_seh_tail: |
| 1969 mov rdi,QWORD[8+rax] | 2358 mov rdi,QWORD[8+rax] |
| 1970 mov rsi,QWORD[16+rax] | 2359 mov rsi,QWORD[16+rax] |
| 1971 mov QWORD[152+r8],rax | 2360 mov QWORD[152+r8],rax |
| 1972 mov QWORD[168+r8],rsi | 2361 mov QWORD[168+r8],rsi |
| 1973 mov QWORD[176+r8],rdi | 2362 mov QWORD[176+r8],rdi |
| 1974 | 2363 |
| 1975 mov rdi,QWORD[40+r9] | 2364 mov rdi,QWORD[40+r9] |
| 1976 mov rsi,r8 | 2365 mov rsi,r8 |
| (...skipping 65 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 2042 DB 9,0,0,0 | 2431 DB 9,0,0,0 |
| 2043 DD mul_handler wrt ..imagebase | 2432 DD mul_handler wrt ..imagebase |
| 2044 DD $L$power5_body wrt ..imagebase,$L$power5_epilogue wrt ..imagebas
e | 2433 DD $L$power5_body wrt ..imagebase,$L$power5_epilogue wrt ..imagebas
e |
| 2045 ALIGN 8 | 2434 ALIGN 8 |
| 2046 $L$SEH_info_bn_from_mont8x: | 2435 $L$SEH_info_bn_from_mont8x: |
| 2047 DB 9,0,0,0 | 2436 DB 9,0,0,0 |
| 2048 DD mul_handler wrt ..imagebase | 2437 DD mul_handler wrt ..imagebase |
| 2049 DD $L$from_body wrt ..imagebase,$L$from_epilogue wrt ..imagebase | 2438 DD $L$from_body wrt ..imagebase,$L$from_epilogue wrt ..imagebase |
| 2050 ALIGN 8 | 2439 ALIGN 8 |
| 2051 $L$SEH_info_bn_gather5: | 2440 $L$SEH_info_bn_gather5: |
| 2052 DB» 0x01,0x0d,0x05,0x00 | 2441 DB» 0x01,0x0b,0x03,0x0a |
| 2053 DB» 0x0d,0x78,0x01,0x00 | 2442 DB» 0x0b,0x01,0x21,0x00 |
| 2054 DB» 0x08,0x68,0x00,0x00 | 2443 DB» 0x04,0xa3,0x00,0x00 |
| 2055 DB» 0x04,0x42,0x00,0x00 | |
| 2056 ALIGN 8 | 2444 ALIGN 8 |
| OLD | NEW |