OLD | NEW |
1 default rel | 1 default rel |
2 %define XMMWORD | 2 %define XMMWORD |
3 %define YMMWORD | 3 %define YMMWORD |
4 %define ZMMWORD | 4 %define ZMMWORD |
5 section .text code align=64 | 5 section .text code align=64 |
6 | 6 |
7 | 7 |
8 EXTERN OPENSSL_ia32cap_P | 8 EXTERN OPENSSL_ia32cap_P |
9 | 9 |
10 global bn_mul_mont_gather5 | 10 global bn_mul_mont_gather5 |
(...skipping 13 matching lines...) Expand all Loading... |
24 | 24 |
25 | 25 |
26 test r9d,7 | 26 test r9d,7 |
27 jnz NEAR $L$mul_enter | 27 jnz NEAR $L$mul_enter |
28 jmp NEAR $L$mul4x_enter | 28 jmp NEAR $L$mul4x_enter |
29 | 29 |
30 ALIGN 16 | 30 ALIGN 16 |
31 $L$mul_enter: | 31 $L$mul_enter: |
32 mov r9d,r9d | 32 mov r9d,r9d |
33 mov rax,rsp | 33 mov rax,rsp |
34 » mov» r10d,DWORD[56+rsp] | 34 » movd» xmm5,DWORD[56+rsp] |
| 35 » lea» r10,[$L$inc] |
35 push rbx | 36 push rbx |
36 push rbp | 37 push rbp |
37 push r12 | 38 push r12 |
38 push r13 | 39 push r13 |
39 push r14 | 40 push r14 |
40 push r15 | 41 push r15 |
41 » lea» rsp,[((-40))+rsp] | 42 |
42 » movaps» XMMWORD[rsp],xmm6 | |
43 » movaps» XMMWORD[16+rsp],xmm7 | |
44 lea r11,[2+r9] | 43 lea r11,[2+r9] |
45 neg r11 | 44 neg r11 |
46 » lea» rsp,[r11*8+rsp] | 45 » lea» rsp,[((-264))+r11*8+rsp] |
47 and rsp,-1024 | 46 and rsp,-1024 |
48 | 47 |
49 mov QWORD[8+r9*8+rsp],rax | 48 mov QWORD[8+r9*8+rsp],rax |
50 $L$mul_body: | 49 $L$mul_body: |
51 » mov» r12,rdx | 50 » lea» r12,[128+rdx] |
52 » mov» r11,r10 | 51 » movdqa» xmm0,XMMWORD[r10] |
53 » shr» r10,3 | 52 » movdqa» xmm1,XMMWORD[16+r10] |
54 » and» r11,7 | 53 » lea» r10,[((24-112))+r9*8+rsp] |
55 » not» r10 | 54 » and» r10,-16 |
56 » lea» rax,[$L$magic_masks] | |
57 » and» r10,3 | |
58 » lea» r12,[96+r11*8+r12] | |
59 » movq» xmm4,QWORD[r10*8+rax] | |
60 » movq» xmm5,QWORD[8+r10*8+rax] | |
61 » movq» xmm6,QWORD[16+r10*8+rax] | |
62 » movq» xmm7,QWORD[24+r10*8+rax] | |
63 | 55 |
64 » movq» xmm0,QWORD[(((-96)))+r12] | 56 » pshufd» xmm5,xmm5,0 |
65 » movq» xmm1,QWORD[((-32))+r12] | 57 » movdqa» xmm4,xmm1 |
66 » pand» xmm0,xmm4 | 58 » movdqa» xmm2,xmm1 |
67 » movq» xmm2,QWORD[32+r12] | 59 » paddd» xmm1,xmm0 |
68 » pand» xmm1,xmm5 | 60 » pcmpeqd»xmm0,xmm5 |
69 » movq» xmm3,QWORD[96+r12] | 61 DB» 0x67 |
70 » pand» xmm2,xmm6 | 62 » movdqa» xmm3,xmm4 |
| 63 » paddd» xmm2,xmm1 |
| 64 » pcmpeqd»xmm1,xmm5 |
| 65 » movdqa» XMMWORD[112+r10],xmm0 |
| 66 » movdqa» xmm0,xmm4 |
| 67 |
| 68 » paddd» xmm3,xmm2 |
| 69 » pcmpeqd»xmm2,xmm5 |
| 70 » movdqa» XMMWORD[128+r10],xmm1 |
| 71 » movdqa» xmm1,xmm4 |
| 72 |
| 73 » paddd» xmm0,xmm3 |
| 74 » pcmpeqd»xmm3,xmm5 |
| 75 » movdqa» XMMWORD[144+r10],xmm2 |
| 76 » movdqa» xmm2,xmm4 |
| 77 |
| 78 » paddd» xmm1,xmm0 |
| 79 » pcmpeqd»xmm0,xmm5 |
| 80 » movdqa» XMMWORD[160+r10],xmm3 |
| 81 » movdqa» xmm3,xmm4 |
| 82 » paddd» xmm2,xmm1 |
| 83 » pcmpeqd»xmm1,xmm5 |
| 84 » movdqa» XMMWORD[176+r10],xmm0 |
| 85 » movdqa» xmm0,xmm4 |
| 86 |
| 87 » paddd» xmm3,xmm2 |
| 88 » pcmpeqd»xmm2,xmm5 |
| 89 » movdqa» XMMWORD[192+r10],xmm1 |
| 90 » movdqa» xmm1,xmm4 |
| 91 |
| 92 » paddd» xmm0,xmm3 |
| 93 » pcmpeqd»xmm3,xmm5 |
| 94 » movdqa» XMMWORD[208+r10],xmm2 |
| 95 » movdqa» xmm2,xmm4 |
| 96 |
| 97 » paddd» xmm1,xmm0 |
| 98 » pcmpeqd»xmm0,xmm5 |
| 99 » movdqa» XMMWORD[224+r10],xmm3 |
| 100 » movdqa» xmm3,xmm4 |
| 101 » paddd» xmm2,xmm1 |
| 102 » pcmpeqd»xmm1,xmm5 |
| 103 » movdqa» XMMWORD[240+r10],xmm0 |
| 104 » movdqa» xmm0,xmm4 |
| 105 |
| 106 » paddd» xmm3,xmm2 |
| 107 » pcmpeqd»xmm2,xmm5 |
| 108 » movdqa» XMMWORD[256+r10],xmm1 |
| 109 » movdqa» xmm1,xmm4 |
| 110 |
| 111 » paddd» xmm0,xmm3 |
| 112 » pcmpeqd»xmm3,xmm5 |
| 113 » movdqa» XMMWORD[272+r10],xmm2 |
| 114 » movdqa» xmm2,xmm4 |
| 115 |
| 116 » paddd» xmm1,xmm0 |
| 117 » pcmpeqd»xmm0,xmm5 |
| 118 » movdqa» XMMWORD[288+r10],xmm3 |
| 119 » movdqa» xmm3,xmm4 |
| 120 » paddd» xmm2,xmm1 |
| 121 » pcmpeqd»xmm1,xmm5 |
| 122 » movdqa» XMMWORD[304+r10],xmm0 |
| 123 |
| 124 » paddd» xmm3,xmm2 |
| 125 DB» 0x67 |
| 126 » pcmpeqd»xmm2,xmm5 |
| 127 » movdqa» XMMWORD[320+r10],xmm1 |
| 128 |
| 129 » pcmpeqd»xmm3,xmm5 |
| 130 » movdqa» XMMWORD[336+r10],xmm2 |
| 131 » pand» xmm0,XMMWORD[64+r12] |
| 132 |
| 133 » pand» xmm1,XMMWORD[80+r12] |
| 134 » pand» xmm2,XMMWORD[96+r12] |
| 135 » movdqa» XMMWORD[352+r10],xmm3 |
| 136 » pand» xmm3,XMMWORD[112+r12] |
| 137 » por» xmm0,xmm2 |
| 138 » por» xmm1,xmm3 |
| 139 » movdqa» xmm4,XMMWORD[((-128))+r12] |
| 140 » movdqa» xmm5,XMMWORD[((-112))+r12] |
| 141 » movdqa» xmm2,XMMWORD[((-96))+r12] |
| 142 » pand» xmm4,XMMWORD[112+r10] |
| 143 » movdqa» xmm3,XMMWORD[((-80))+r12] |
| 144 » pand» xmm5,XMMWORD[128+r10] |
| 145 » por» xmm0,xmm4 |
| 146 » pand» xmm2,XMMWORD[144+r10] |
| 147 » por» xmm1,xmm5 |
| 148 » pand» xmm3,XMMWORD[160+r10] |
| 149 » por» xmm0,xmm2 |
| 150 » por» xmm1,xmm3 |
| 151 » movdqa» xmm4,XMMWORD[((-64))+r12] |
| 152 » movdqa» xmm5,XMMWORD[((-48))+r12] |
| 153 » movdqa» xmm2,XMMWORD[((-32))+r12] |
| 154 » pand» xmm4,XMMWORD[176+r10] |
| 155 » movdqa» xmm3,XMMWORD[((-16))+r12] |
| 156 » pand» xmm5,XMMWORD[192+r10] |
| 157 » por» xmm0,xmm4 |
| 158 » pand» xmm2,XMMWORD[208+r10] |
| 159 » por» xmm1,xmm5 |
| 160 » pand» xmm3,XMMWORD[224+r10] |
| 161 » por» xmm0,xmm2 |
| 162 » por» xmm1,xmm3 |
| 163 » movdqa» xmm4,XMMWORD[r12] |
| 164 » movdqa» xmm5,XMMWORD[16+r12] |
| 165 » movdqa» xmm2,XMMWORD[32+r12] |
| 166 » pand» xmm4,XMMWORD[240+r10] |
| 167 » movdqa» xmm3,XMMWORD[48+r12] |
| 168 » pand» xmm5,XMMWORD[256+r10] |
| 169 » por» xmm0,xmm4 |
| 170 » pand» xmm2,XMMWORD[272+r10] |
| 171 » por» xmm1,xmm5 |
| 172 » pand» xmm3,XMMWORD[288+r10] |
| 173 » por» xmm0,xmm2 |
| 174 » por» xmm1,xmm3 |
71 por xmm0,xmm1 | 175 por xmm0,xmm1 |
72 » pand» xmm3,xmm7 | 176 » pshufd» xmm1,xmm0,0x4e |
73 » por» xmm0,xmm2 | 177 » por» xmm0,xmm1 |
74 lea r12,[256+r12] | 178 lea r12,[256+r12] |
75 por xmm0,xmm3 | |
76 | |
77 DB 102,72,15,126,195 | 179 DB 102,72,15,126,195 |
78 | 180 |
79 mov r8,QWORD[r8] | 181 mov r8,QWORD[r8] |
80 mov rax,QWORD[rsi] | 182 mov rax,QWORD[rsi] |
81 | 183 |
82 xor r14,r14 | 184 xor r14,r14 |
83 xor r15,r15 | 185 xor r15,r15 |
84 | 186 |
85 movq xmm0,QWORD[(((-96)))+r12] | |
86 movq xmm1,QWORD[((-32))+r12] | |
87 pand xmm0,xmm4 | |
88 movq xmm2,QWORD[32+r12] | |
89 pand xmm1,xmm5 | |
90 | |
91 mov rbp,r8 | 187 mov rbp,r8 |
92 mul rbx | 188 mul rbx |
93 mov r10,rax | 189 mov r10,rax |
94 mov rax,QWORD[rcx] | 190 mov rax,QWORD[rcx] |
95 | 191 |
96 movq xmm3,QWORD[96+r12] | |
97 pand xmm2,xmm6 | |
98 por xmm0,xmm1 | |
99 pand xmm3,xmm7 | |
100 | |
101 imul rbp,r10 | 192 imul rbp,r10 |
102 mov r11,rdx | 193 mov r11,rdx |
103 | 194 |
104 por xmm0,xmm2 | |
105 lea r12,[256+r12] | |
106 por xmm0,xmm3 | |
107 | |
108 mul rbp | 195 mul rbp |
109 add r10,rax | 196 add r10,rax |
110 mov rax,QWORD[8+rsi] | 197 mov rax,QWORD[8+rsi] |
111 adc rdx,0 | 198 adc rdx,0 |
112 mov r13,rdx | 199 mov r13,rdx |
113 | 200 |
114 lea r15,[1+r15] | 201 lea r15,[1+r15] |
115 jmp NEAR $L$1st_enter | 202 jmp NEAR $L$1st_enter |
116 | 203 |
117 ALIGN 16 | 204 ALIGN 16 |
(...skipping 12 matching lines...) Expand all Loading... |
130 add r11,rax | 217 add r11,rax |
131 mov rax,QWORD[r15*8+rcx] | 218 mov rax,QWORD[r15*8+rcx] |
132 adc rdx,0 | 219 adc rdx,0 |
133 lea r15,[1+r15] | 220 lea r15,[1+r15] |
134 mov r10,rdx | 221 mov r10,rdx |
135 | 222 |
136 mul rbp | 223 mul rbp |
137 cmp r15,r9 | 224 cmp r15,r9 |
138 jne NEAR $L$1st | 225 jne NEAR $L$1st |
139 | 226 |
140 DB 102,72,15,126,195 | |
141 | 227 |
142 add r13,rax | 228 add r13,rax |
143 mov rax,QWORD[rsi] | |
144 adc rdx,0 | 229 adc rdx,0 |
145 add r13,r11 | 230 add r13,r11 |
146 adc rdx,0 | 231 adc rdx,0 |
147 » mov» QWORD[((-16))+r15*8+rsp],r13 | 232 » mov» QWORD[((-16))+r9*8+rsp],r13 |
148 mov r13,rdx | 233 mov r13,rdx |
149 mov r11,r10 | 234 mov r11,r10 |
150 | 235 |
151 xor rdx,rdx | 236 xor rdx,rdx |
152 add r13,r11 | 237 add r13,r11 |
153 adc rdx,0 | 238 adc rdx,0 |
154 mov QWORD[((-8))+r9*8+rsp],r13 | 239 mov QWORD[((-8))+r9*8+rsp],r13 |
155 mov QWORD[r9*8+rsp],rdx | 240 mov QWORD[r9*8+rsp],rdx |
156 | 241 |
157 lea r14,[1+r14] | 242 lea r14,[1+r14] |
158 jmp NEAR $L$outer | 243 jmp NEAR $L$outer |
159 ALIGN 16 | 244 ALIGN 16 |
160 $L$outer: | 245 $L$outer: |
| 246 lea rdx,[((24+128))+r9*8+rsp] |
| 247 and rdx,-16 |
| 248 pxor xmm4,xmm4 |
| 249 pxor xmm5,xmm5 |
| 250 movdqa xmm0,XMMWORD[((-128))+r12] |
| 251 movdqa xmm1,XMMWORD[((-112))+r12] |
| 252 movdqa xmm2,XMMWORD[((-96))+r12] |
| 253 movdqa xmm3,XMMWORD[((-80))+r12] |
| 254 pand xmm0,XMMWORD[((-128))+rdx] |
| 255 pand xmm1,XMMWORD[((-112))+rdx] |
| 256 por xmm4,xmm0 |
| 257 pand xmm2,XMMWORD[((-96))+rdx] |
| 258 por xmm5,xmm1 |
| 259 pand xmm3,XMMWORD[((-80))+rdx] |
| 260 por xmm4,xmm2 |
| 261 por xmm5,xmm3 |
| 262 movdqa xmm0,XMMWORD[((-64))+r12] |
| 263 movdqa xmm1,XMMWORD[((-48))+r12] |
| 264 movdqa xmm2,XMMWORD[((-32))+r12] |
| 265 movdqa xmm3,XMMWORD[((-16))+r12] |
| 266 pand xmm0,XMMWORD[((-64))+rdx] |
| 267 pand xmm1,XMMWORD[((-48))+rdx] |
| 268 por xmm4,xmm0 |
| 269 pand xmm2,XMMWORD[((-32))+rdx] |
| 270 por xmm5,xmm1 |
| 271 pand xmm3,XMMWORD[((-16))+rdx] |
| 272 por xmm4,xmm2 |
| 273 por xmm5,xmm3 |
| 274 movdqa xmm0,XMMWORD[r12] |
| 275 movdqa xmm1,XMMWORD[16+r12] |
| 276 movdqa xmm2,XMMWORD[32+r12] |
| 277 movdqa xmm3,XMMWORD[48+r12] |
| 278 pand xmm0,XMMWORD[rdx] |
| 279 pand xmm1,XMMWORD[16+rdx] |
| 280 por xmm4,xmm0 |
| 281 pand xmm2,XMMWORD[32+rdx] |
| 282 por xmm5,xmm1 |
| 283 pand xmm3,XMMWORD[48+rdx] |
| 284 por xmm4,xmm2 |
| 285 por xmm5,xmm3 |
| 286 movdqa xmm0,XMMWORD[64+r12] |
| 287 movdqa xmm1,XMMWORD[80+r12] |
| 288 movdqa xmm2,XMMWORD[96+r12] |
| 289 movdqa xmm3,XMMWORD[112+r12] |
| 290 pand xmm0,XMMWORD[64+rdx] |
| 291 pand xmm1,XMMWORD[80+rdx] |
| 292 por xmm4,xmm0 |
| 293 pand xmm2,XMMWORD[96+rdx] |
| 294 por xmm5,xmm1 |
| 295 pand xmm3,XMMWORD[112+rdx] |
| 296 por xmm4,xmm2 |
| 297 por xmm5,xmm3 |
| 298 por xmm4,xmm5 |
| 299 pshufd xmm0,xmm4,0x4e |
| 300 por xmm0,xmm4 |
| 301 lea r12,[256+r12] |
| 302 |
| 303 mov rax,QWORD[rsi] |
| 304 DB 102,72,15,126,195 |
| 305 |
161 xor r15,r15 | 306 xor r15,r15 |
162 mov rbp,r8 | 307 mov rbp,r8 |
163 mov r10,QWORD[rsp] | 308 mov r10,QWORD[rsp] |
164 | 309 |
165 movq xmm0,QWORD[(((-96)))+r12] | |
166 movq xmm1,QWORD[((-32))+r12] | |
167 pand xmm0,xmm4 | |
168 movq xmm2,QWORD[32+r12] | |
169 pand xmm1,xmm5 | |
170 | |
171 mul rbx | 310 mul rbx |
172 add r10,rax | 311 add r10,rax |
173 mov rax,QWORD[rcx] | 312 mov rax,QWORD[rcx] |
174 adc rdx,0 | 313 adc rdx,0 |
175 | 314 |
176 movq xmm3,QWORD[96+r12] | |
177 pand xmm2,xmm6 | |
178 por xmm0,xmm1 | |
179 pand xmm3,xmm7 | |
180 | |
181 imul rbp,r10 | 315 imul rbp,r10 |
182 mov r11,rdx | 316 mov r11,rdx |
183 | 317 |
184 por xmm0,xmm2 | |
185 lea r12,[256+r12] | |
186 por xmm0,xmm3 | |
187 | |
188 mul rbp | 318 mul rbp |
189 add r10,rax | 319 add r10,rax |
190 mov rax,QWORD[8+rsi] | 320 mov rax,QWORD[8+rsi] |
191 adc rdx,0 | 321 adc rdx,0 |
192 mov r10,QWORD[8+rsp] | 322 mov r10,QWORD[8+rsp] |
193 mov r13,rdx | 323 mov r13,rdx |
194 | 324 |
195 lea r15,[1+r15] | 325 lea r15,[1+r15] |
196 jmp NEAR $L$inner_enter | 326 jmp NEAR $L$inner_enter |
197 | 327 |
(...skipping 15 matching lines...) Expand all Loading... |
213 adc rdx,0 | 343 adc rdx,0 |
214 add r10,r11 | 344 add r10,r11 |
215 mov r11,rdx | 345 mov r11,rdx |
216 adc r11,0 | 346 adc r11,0 |
217 lea r15,[1+r15] | 347 lea r15,[1+r15] |
218 | 348 |
219 mul rbp | 349 mul rbp |
220 cmp r15,r9 | 350 cmp r15,r9 |
221 jne NEAR $L$inner | 351 jne NEAR $L$inner |
222 | 352 |
223 DB 102,72,15,126,195 | |
224 | |
225 add r13,rax | 353 add r13,rax |
226 mov rax,QWORD[rsi] | |
227 adc rdx,0 | 354 adc rdx,0 |
228 add r13,r10 | 355 add r13,r10 |
229 » mov» r10,QWORD[r15*8+rsp] | 356 » mov» r10,QWORD[r9*8+rsp] |
230 adc rdx,0 | 357 adc rdx,0 |
231 » mov» QWORD[((-16))+r15*8+rsp],r13 | 358 » mov» QWORD[((-16))+r9*8+rsp],r13 |
232 mov r13,rdx | 359 mov r13,rdx |
233 | 360 |
234 xor rdx,rdx | 361 xor rdx,rdx |
235 add r13,r11 | 362 add r13,r11 |
236 adc rdx,0 | 363 adc rdx,0 |
237 add r13,r10 | 364 add r13,r10 |
238 adc rdx,0 | 365 adc rdx,0 |
239 mov QWORD[((-8))+r9*8+rsp],r13 | 366 mov QWORD[((-8))+r9*8+rsp],r13 |
240 mov QWORD[r9*8+rsp],rdx | 367 mov QWORD[r9*8+rsp],rdx |
241 | 368 |
(...skipping 25 matching lines...) Expand all Loading... |
267 and rsi,rax | 394 and rsi,rax |
268 xor rsi,rcx | 395 xor rsi,rcx |
269 mov QWORD[r14*8+rsp],r14 | 396 mov QWORD[r14*8+rsp],r14 |
270 mov QWORD[r14*8+rdi],rsi | 397 mov QWORD[r14*8+rdi],rsi |
271 lea r14,[1+r14] | 398 lea r14,[1+r14] |
272 sub r15,1 | 399 sub r15,1 |
273 jnz NEAR $L$copy | 400 jnz NEAR $L$copy |
274 | 401 |
275 mov rsi,QWORD[8+r9*8+rsp] | 402 mov rsi,QWORD[8+r9*8+rsp] |
276 mov rax,1 | 403 mov rax,1 |
277 » movaps» xmm6,XMMWORD[((-88))+rsi] | 404 |
278 » movaps» xmm7,XMMWORD[((-72))+rsi] | |
279 mov r15,QWORD[((-48))+rsi] | 405 mov r15,QWORD[((-48))+rsi] |
280 mov r14,QWORD[((-40))+rsi] | 406 mov r14,QWORD[((-40))+rsi] |
281 mov r13,QWORD[((-32))+rsi] | 407 mov r13,QWORD[((-32))+rsi] |
282 mov r12,QWORD[((-24))+rsi] | 408 mov r12,QWORD[((-24))+rsi] |
283 mov rbp,QWORD[((-16))+rsi] | 409 mov rbp,QWORD[((-16))+rsi] |
284 mov rbx,QWORD[((-8))+rsi] | 410 mov rbx,QWORD[((-8))+rsi] |
285 lea rsp,[rsi] | 411 lea rsp,[rsi] |
286 $L$mul_epilogue: | 412 $L$mul_epilogue: |
287 mov rdi,QWORD[8+rsp] ;WIN64 epilogue | 413 mov rdi,QWORD[8+rsp] ;WIN64 epilogue |
288 mov rsi,QWORD[16+rsp] | 414 mov rsi,QWORD[16+rsp] |
(...skipping 16 matching lines...) Expand all Loading... |
305 | 431 |
306 $L$mul4x_enter: | 432 $L$mul4x_enter: |
307 DB 0x67 | 433 DB 0x67 |
308 mov rax,rsp | 434 mov rax,rsp |
309 push rbx | 435 push rbx |
310 push rbp | 436 push rbp |
311 push r12 | 437 push r12 |
312 push r13 | 438 push r13 |
313 push r14 | 439 push r14 |
314 push r15 | 440 push r15 |
315 » lea» rsp,[((-40))+rsp] | 441 |
316 » movaps» XMMWORD[rsp],xmm6 | |
317 » movaps» XMMWORD[16+rsp],xmm7 | |
318 DB 0x67 | 442 DB 0x67 |
319 mov r10d,r9d | |
320 shl r9d,3 | 443 shl r9d,3 |
321 » shl» r10d,3+2 | 444 » lea» r10,[r9*2+r9] |
322 neg r9 | 445 neg r9 |
323 | 446 |
324 | 447 |
325 | 448 |
326 | 449 |
327 | 450 |
328 | 451 |
329 | 452 |
330 | 453 |
331 » lea» r11,[((-64))+r9*2+rsp] | 454 |
332 » sub» r11,rsi | 455 |
| 456 » lea» r11,[((-320))+r9*2+rsp] |
| 457 » sub» r11,rdi |
333 and r11,4095 | 458 and r11,4095 |
334 cmp r10,r11 | 459 cmp r10,r11 |
335 jb NEAR $L$mul4xsp_alt | 460 jb NEAR $L$mul4xsp_alt |
336 sub rsp,r11 | 461 sub rsp,r11 |
337 » lea» rsp,[((-64))+r9*2+rsp] | 462 » lea» rsp,[((-320))+r9*2+rsp] |
338 jmp NEAR $L$mul4xsp_done | 463 jmp NEAR $L$mul4xsp_done |
339 | 464 |
340 ALIGN 32 | 465 ALIGN 32 |
341 $L$mul4xsp_alt: | 466 $L$mul4xsp_alt: |
342 » lea» r10,[((4096-64))+r9*2] | 467 » lea» r10,[((4096-320))+r9*2] |
343 » lea» rsp,[((-64))+r9*2+rsp] | 468 » lea» rsp,[((-320))+r9*2+rsp] |
344 sub r11,r10 | 469 sub r11,r10 |
345 mov r10,0 | 470 mov r10,0 |
346 cmovc r11,r10 | 471 cmovc r11,r10 |
347 sub rsp,r11 | 472 sub rsp,r11 |
348 $L$mul4xsp_done: | 473 $L$mul4xsp_done: |
349 and rsp,-64 | 474 and rsp,-64 |
350 neg r9 | 475 neg r9 |
351 | 476 |
352 mov QWORD[40+rsp],rax | 477 mov QWORD[40+rsp],rax |
353 $L$mul4x_body: | 478 $L$mul4x_body: |
354 | 479 |
355 call mul4x_internal | 480 call mul4x_internal |
356 | 481 |
357 mov rsi,QWORD[40+rsp] | 482 mov rsi,QWORD[40+rsp] |
358 mov rax,1 | 483 mov rax,1 |
359 » movaps» xmm6,XMMWORD[((-88))+rsi] | 484 |
360 » movaps» xmm7,XMMWORD[((-72))+rsi] | |
361 mov r15,QWORD[((-48))+rsi] | 485 mov r15,QWORD[((-48))+rsi] |
362 mov r14,QWORD[((-40))+rsi] | 486 mov r14,QWORD[((-40))+rsi] |
363 mov r13,QWORD[((-32))+rsi] | 487 mov r13,QWORD[((-32))+rsi] |
364 mov r12,QWORD[((-24))+rsi] | 488 mov r12,QWORD[((-24))+rsi] |
365 mov rbp,QWORD[((-16))+rsi] | 489 mov rbp,QWORD[((-16))+rsi] |
366 mov rbx,QWORD[((-8))+rsi] | 490 mov rbx,QWORD[((-8))+rsi] |
367 lea rsp,[rsi] | 491 lea rsp,[rsi] |
368 $L$mul4x_epilogue: | 492 $L$mul4x_epilogue: |
369 mov rdi,QWORD[8+rsp] ;WIN64 epilogue | 493 mov rdi,QWORD[8+rsp] ;WIN64 epilogue |
370 mov rsi,QWORD[16+rsp] | 494 mov rsi,QWORD[16+rsp] |
371 DB 0F3h,0C3h ;repret | 495 DB 0F3h,0C3h ;repret |
372 $L$SEH_end_bn_mul4x_mont_gather5: | 496 $L$SEH_end_bn_mul4x_mont_gather5: |
373 | 497 |
374 | 498 |
375 ALIGN 32 | 499 ALIGN 32 |
376 mul4x_internal: | 500 mul4x_internal: |
377 shl r9,5 | 501 shl r9,5 |
378 » mov» r10d,DWORD[56+rax] | 502 » movd» xmm5,DWORD[56+rax] |
379 » lea» r13,[256+r9*1+rdx] | 503 » lea» rax,[$L$inc] |
| 504 » lea» r13,[128+r9*1+rdx] |
380 shr r9,5 | 505 shr r9,5 |
381 » mov» r11,r10 | 506 » movdqa» xmm0,XMMWORD[rax] |
382 » shr» r10,3 | 507 » movdqa» xmm1,XMMWORD[16+rax] |
383 » and» r11,7 | 508 » lea» r10,[((88-112))+r9*1+rsp] |
384 » not» r10 | 509 » lea» r12,[128+rdx] |
385 » lea» rax,[$L$magic_masks] | |
386 » and» r10,3 | |
387 » lea» r12,[96+r11*8+rdx] | |
388 » movq» xmm4,QWORD[r10*8+rax] | |
389 » movq» xmm5,QWORD[8+r10*8+rax] | |
390 » add» r11,7 | |
391 » movq» xmm6,QWORD[16+r10*8+rax] | |
392 » movq» xmm7,QWORD[24+r10*8+rax] | |
393 » and» r11,7 | |
394 | 510 |
395 » movq» xmm0,QWORD[(((-96)))+r12] | 511 » pshufd» xmm5,xmm5,0 |
396 » lea» r14,[256+r12] | 512 » movdqa» xmm4,xmm1 |
397 » movq» xmm1,QWORD[((-32))+r12] | 513 DB» 0x67,0x67 |
398 » pand» xmm0,xmm4 | 514 » movdqa» xmm2,xmm1 |
399 » movq» xmm2,QWORD[32+r12] | 515 » paddd» xmm1,xmm0 |
400 » pand» xmm1,xmm5 | 516 » pcmpeqd»xmm0,xmm5 |
401 » movq» xmm3,QWORD[96+r12] | |
402 » pand» xmm2,xmm6 | |
403 DB 0x67 | 517 DB 0x67 |
| 518 movdqa xmm3,xmm4 |
| 519 paddd xmm2,xmm1 |
| 520 pcmpeqd xmm1,xmm5 |
| 521 movdqa XMMWORD[112+r10],xmm0 |
| 522 movdqa xmm0,xmm4 |
| 523 |
| 524 paddd xmm3,xmm2 |
| 525 pcmpeqd xmm2,xmm5 |
| 526 movdqa XMMWORD[128+r10],xmm1 |
| 527 movdqa xmm1,xmm4 |
| 528 |
| 529 paddd xmm0,xmm3 |
| 530 pcmpeqd xmm3,xmm5 |
| 531 movdqa XMMWORD[144+r10],xmm2 |
| 532 movdqa xmm2,xmm4 |
| 533 |
| 534 paddd xmm1,xmm0 |
| 535 pcmpeqd xmm0,xmm5 |
| 536 movdqa XMMWORD[160+r10],xmm3 |
| 537 movdqa xmm3,xmm4 |
| 538 paddd xmm2,xmm1 |
| 539 pcmpeqd xmm1,xmm5 |
| 540 movdqa XMMWORD[176+r10],xmm0 |
| 541 movdqa xmm0,xmm4 |
| 542 |
| 543 paddd xmm3,xmm2 |
| 544 pcmpeqd xmm2,xmm5 |
| 545 movdqa XMMWORD[192+r10],xmm1 |
| 546 movdqa xmm1,xmm4 |
| 547 |
| 548 paddd xmm0,xmm3 |
| 549 pcmpeqd xmm3,xmm5 |
| 550 movdqa XMMWORD[208+r10],xmm2 |
| 551 movdqa xmm2,xmm4 |
| 552 |
| 553 paddd xmm1,xmm0 |
| 554 pcmpeqd xmm0,xmm5 |
| 555 movdqa XMMWORD[224+r10],xmm3 |
| 556 movdqa xmm3,xmm4 |
| 557 paddd xmm2,xmm1 |
| 558 pcmpeqd xmm1,xmm5 |
| 559 movdqa XMMWORD[240+r10],xmm0 |
| 560 movdqa xmm0,xmm4 |
| 561 |
| 562 paddd xmm3,xmm2 |
| 563 pcmpeqd xmm2,xmm5 |
| 564 movdqa XMMWORD[256+r10],xmm1 |
| 565 movdqa xmm1,xmm4 |
| 566 |
| 567 paddd xmm0,xmm3 |
| 568 pcmpeqd xmm3,xmm5 |
| 569 movdqa XMMWORD[272+r10],xmm2 |
| 570 movdqa xmm2,xmm4 |
| 571 |
| 572 paddd xmm1,xmm0 |
| 573 pcmpeqd xmm0,xmm5 |
| 574 movdqa XMMWORD[288+r10],xmm3 |
| 575 movdqa xmm3,xmm4 |
| 576 paddd xmm2,xmm1 |
| 577 pcmpeqd xmm1,xmm5 |
| 578 movdqa XMMWORD[304+r10],xmm0 |
| 579 |
| 580 paddd xmm3,xmm2 |
| 581 DB 0x67 |
| 582 pcmpeqd xmm2,xmm5 |
| 583 movdqa XMMWORD[320+r10],xmm1 |
| 584 |
| 585 pcmpeqd xmm3,xmm5 |
| 586 movdqa XMMWORD[336+r10],xmm2 |
| 587 pand xmm0,XMMWORD[64+r12] |
| 588 |
| 589 pand xmm1,XMMWORD[80+r12] |
| 590 pand xmm2,XMMWORD[96+r12] |
| 591 movdqa XMMWORD[352+r10],xmm3 |
| 592 pand xmm3,XMMWORD[112+r12] |
| 593 por xmm0,xmm2 |
| 594 por xmm1,xmm3 |
| 595 movdqa xmm4,XMMWORD[((-128))+r12] |
| 596 movdqa xmm5,XMMWORD[((-112))+r12] |
| 597 movdqa xmm2,XMMWORD[((-96))+r12] |
| 598 pand xmm4,XMMWORD[112+r10] |
| 599 movdqa xmm3,XMMWORD[((-80))+r12] |
| 600 pand xmm5,XMMWORD[128+r10] |
| 601 por xmm0,xmm4 |
| 602 pand xmm2,XMMWORD[144+r10] |
| 603 por xmm1,xmm5 |
| 604 pand xmm3,XMMWORD[160+r10] |
| 605 por xmm0,xmm2 |
| 606 por xmm1,xmm3 |
| 607 movdqa xmm4,XMMWORD[((-64))+r12] |
| 608 movdqa xmm5,XMMWORD[((-48))+r12] |
| 609 movdqa xmm2,XMMWORD[((-32))+r12] |
| 610 pand xmm4,XMMWORD[176+r10] |
| 611 movdqa xmm3,XMMWORD[((-16))+r12] |
| 612 pand xmm5,XMMWORD[192+r10] |
| 613 por xmm0,xmm4 |
| 614 pand xmm2,XMMWORD[208+r10] |
| 615 por xmm1,xmm5 |
| 616 pand xmm3,XMMWORD[224+r10] |
| 617 por xmm0,xmm2 |
| 618 por xmm1,xmm3 |
| 619 movdqa xmm4,XMMWORD[r12] |
| 620 movdqa xmm5,XMMWORD[16+r12] |
| 621 movdqa xmm2,XMMWORD[32+r12] |
| 622 pand xmm4,XMMWORD[240+r10] |
| 623 movdqa xmm3,XMMWORD[48+r12] |
| 624 pand xmm5,XMMWORD[256+r10] |
| 625 por xmm0,xmm4 |
| 626 pand xmm2,XMMWORD[272+r10] |
| 627 por xmm1,xmm5 |
| 628 pand xmm3,XMMWORD[288+r10] |
| 629 por xmm0,xmm2 |
| 630 por xmm1,xmm3 |
404 por xmm0,xmm1 | 631 por xmm0,xmm1 |
405 » movq» xmm1,QWORD[((-96))+r14] | 632 » pshufd» xmm1,xmm0,0x4e |
406 DB» 0x67 | 633 » por» xmm0,xmm1 |
407 » pand» xmm3,xmm7 | 634 » lea» r12,[256+r12] |
408 DB» 0x67 | 635 DB» 102,72,15,126,195 |
409 » por» xmm0,xmm2 | |
410 » movq» xmm2,QWORD[((-32))+r14] | |
411 DB» 0x67 | |
412 » pand» xmm1,xmm4 | |
413 DB» 0x67 | |
414 » por» xmm0,xmm3 | |
415 » movq» xmm3,QWORD[32+r14] | |
416 | 636 |
417 DB 102,72,15,126,195 | |
418 movq xmm0,QWORD[96+r14] | |
419 mov QWORD[((16+8))+rsp],r13 | 637 mov QWORD[((16+8))+rsp],r13 |
420 mov QWORD[((56+8))+rsp],rdi | 638 mov QWORD[((56+8))+rsp],rdi |
421 | 639 |
422 mov r8,QWORD[r8] | 640 mov r8,QWORD[r8] |
423 mov rax,QWORD[rsi] | 641 mov rax,QWORD[rsi] |
424 lea rsi,[r9*1+rsi] | 642 lea rsi,[r9*1+rsi] |
425 neg r9 | 643 neg r9 |
426 | 644 |
427 mov rbp,r8 | 645 mov rbp,r8 |
428 mul rbx | 646 mul rbx |
429 mov r10,rax | 647 mov r10,rax |
430 mov rax,QWORD[rcx] | 648 mov rax,QWORD[rcx] |
431 | 649 |
432 pand xmm2,xmm5 | |
433 pand xmm3,xmm6 | |
434 por xmm1,xmm2 | |
435 | |
436 imul rbp,r10 | 650 imul rbp,r10 |
437 | 651 » lea» r14,[((64+8))+rsp] |
438 | |
439 | |
440 | |
441 | |
442 | |
443 | |
444 » lea» r14,[((64+8))+r11*8+rsp] | |
445 mov r11,rdx | 652 mov r11,rdx |
446 | 653 |
447 pand xmm0,xmm7 | |
448 por xmm1,xmm3 | |
449 lea r12,[512+r12] | |
450 por xmm0,xmm1 | |
451 | |
452 mul rbp | 654 mul rbp |
453 add r10,rax | 655 add r10,rax |
454 mov rax,QWORD[8+r9*1+rsi] | 656 mov rax,QWORD[8+r9*1+rsi] |
455 adc rdx,0 | 657 adc rdx,0 |
456 mov rdi,rdx | 658 mov rdi,rdx |
457 | 659 |
458 mul rbx | 660 mul rbx |
459 add r11,rax | 661 add r11,rax |
460 » mov» rax,QWORD[16+rcx] | 662 » mov» rax,QWORD[8+rcx] |
461 adc rdx,0 | 663 adc rdx,0 |
462 mov r10,rdx | 664 mov r10,rdx |
463 | 665 |
464 mul rbp | 666 mul rbp |
465 add rdi,rax | 667 add rdi,rax |
466 mov rax,QWORD[16+r9*1+rsi] | 668 mov rax,QWORD[16+r9*1+rsi] |
467 adc rdx,0 | 669 adc rdx,0 |
468 add rdi,r11 | 670 add rdi,r11 |
469 lea r15,[32+r9] | 671 lea r15,[32+r9] |
470 » lea» rcx,[64+rcx] | 672 » lea» rcx,[32+rcx] |
471 adc rdx,0 | 673 adc rdx,0 |
472 mov QWORD[r14],rdi | 674 mov QWORD[r14],rdi |
473 mov r13,rdx | 675 mov r13,rdx |
474 jmp NEAR $L$1st4x | 676 jmp NEAR $L$1st4x |
475 | 677 |
476 ALIGN 32 | 678 ALIGN 32 |
477 $L$1st4x: | 679 $L$1st4x: |
478 mul rbx | 680 mul rbx |
479 add r10,rax | 681 add r10,rax |
480 » mov» rax,QWORD[((-32))+rcx] | 682 » mov» rax,QWORD[((-16))+rcx] |
481 lea r14,[32+r14] | 683 lea r14,[32+r14] |
482 adc rdx,0 | 684 adc rdx,0 |
483 mov r11,rdx | 685 mov r11,rdx |
484 | 686 |
485 mul rbp | 687 mul rbp |
486 add r13,rax | 688 add r13,rax |
487 mov rax,QWORD[((-8))+r15*1+rsi] | 689 mov rax,QWORD[((-8))+r15*1+rsi] |
488 adc rdx,0 | 690 adc rdx,0 |
489 add r13,r10 | 691 add r13,r10 |
490 adc rdx,0 | 692 adc rdx,0 |
491 mov QWORD[((-24))+r14],r13 | 693 mov QWORD[((-24))+r14],r13 |
492 mov rdi,rdx | 694 mov rdi,rdx |
493 | 695 |
494 mul rbx | 696 mul rbx |
495 add r11,rax | 697 add r11,rax |
496 » mov» rax,QWORD[((-16))+rcx] | 698 » mov» rax,QWORD[((-8))+rcx] |
497 adc rdx,0 | 699 adc rdx,0 |
498 mov r10,rdx | 700 mov r10,rdx |
499 | 701 |
500 mul rbp | 702 mul rbp |
501 add rdi,rax | 703 add rdi,rax |
502 mov rax,QWORD[r15*1+rsi] | 704 mov rax,QWORD[r15*1+rsi] |
503 adc rdx,0 | 705 adc rdx,0 |
504 add rdi,r11 | 706 add rdi,r11 |
505 adc rdx,0 | 707 adc rdx,0 |
506 mov QWORD[((-16))+r14],rdi | 708 mov QWORD[((-16))+r14],rdi |
507 mov r13,rdx | 709 mov r13,rdx |
508 | 710 |
509 mul rbx | 711 mul rbx |
510 add r10,rax | 712 add r10,rax |
511 mov rax,QWORD[rcx] | 713 mov rax,QWORD[rcx] |
512 adc rdx,0 | 714 adc rdx,0 |
513 mov r11,rdx | 715 mov r11,rdx |
514 | 716 |
515 mul rbp | 717 mul rbp |
516 add r13,rax | 718 add r13,rax |
517 mov rax,QWORD[8+r15*1+rsi] | 719 mov rax,QWORD[8+r15*1+rsi] |
518 adc rdx,0 | 720 adc rdx,0 |
519 add r13,r10 | 721 add r13,r10 |
520 adc rdx,0 | 722 adc rdx,0 |
521 mov QWORD[((-8))+r14],r13 | 723 mov QWORD[((-8))+r14],r13 |
522 mov rdi,rdx | 724 mov rdi,rdx |
523 | 725 |
524 mul rbx | 726 mul rbx |
525 add r11,rax | 727 add r11,rax |
526 » mov» rax,QWORD[16+rcx] | 728 » mov» rax,QWORD[8+rcx] |
527 adc rdx,0 | 729 adc rdx,0 |
528 mov r10,rdx | 730 mov r10,rdx |
529 | 731 |
530 mul rbp | 732 mul rbp |
531 add rdi,rax | 733 add rdi,rax |
532 mov rax,QWORD[16+r15*1+rsi] | 734 mov rax,QWORD[16+r15*1+rsi] |
533 adc rdx,0 | 735 adc rdx,0 |
534 add rdi,r11 | 736 add rdi,r11 |
535 » lea» rcx,[64+rcx] | 737 » lea» rcx,[32+rcx] |
536 adc rdx,0 | 738 adc rdx,0 |
537 mov QWORD[r14],rdi | 739 mov QWORD[r14],rdi |
538 mov r13,rdx | 740 mov r13,rdx |
539 | 741 |
540 add r15,32 | 742 add r15,32 |
541 jnz NEAR $L$1st4x | 743 jnz NEAR $L$1st4x |
542 | 744 |
543 mul rbx | 745 mul rbx |
544 add r10,rax | 746 add r10,rax |
545 » mov» rax,QWORD[((-32))+rcx] | 747 » mov» rax,QWORD[((-16))+rcx] |
546 lea r14,[32+r14] | 748 lea r14,[32+r14] |
547 adc rdx,0 | 749 adc rdx,0 |
548 mov r11,rdx | 750 mov r11,rdx |
549 | 751 |
550 mul rbp | 752 mul rbp |
551 add r13,rax | 753 add r13,rax |
552 mov rax,QWORD[((-8))+rsi] | 754 mov rax,QWORD[((-8))+rsi] |
553 adc rdx,0 | 755 adc rdx,0 |
554 add r13,r10 | 756 add r13,r10 |
555 adc rdx,0 | 757 adc rdx,0 |
556 mov QWORD[((-24))+r14],r13 | 758 mov QWORD[((-24))+r14],r13 |
557 mov rdi,rdx | 759 mov rdi,rdx |
558 | 760 |
559 mul rbx | 761 mul rbx |
560 add r11,rax | 762 add r11,rax |
561 » mov» rax,QWORD[((-16))+rcx] | 763 » mov» rax,QWORD[((-8))+rcx] |
562 adc rdx,0 | 764 adc rdx,0 |
563 mov r10,rdx | 765 mov r10,rdx |
564 | 766 |
565 mul rbp | 767 mul rbp |
566 add rdi,rax | 768 add rdi,rax |
567 mov rax,QWORD[r9*1+rsi] | 769 mov rax,QWORD[r9*1+rsi] |
568 adc rdx,0 | 770 adc rdx,0 |
569 add rdi,r11 | 771 add rdi,r11 |
570 adc rdx,0 | 772 adc rdx,0 |
571 mov QWORD[((-16))+r14],rdi | 773 mov QWORD[((-16))+r14],rdi |
572 mov r13,rdx | 774 mov r13,rdx |
573 | 775 |
574 DB» 102,72,15,126,195 | 776 » lea» rcx,[r9*1+rcx] |
575 » lea» rcx,[r9*2+rcx] | |
576 | 777 |
577 xor rdi,rdi | 778 xor rdi,rdi |
578 add r13,r10 | 779 add r13,r10 |
579 adc rdi,0 | 780 adc rdi,0 |
580 mov QWORD[((-8))+r14],r13 | 781 mov QWORD[((-8))+r14],r13 |
581 | 782 |
582 jmp NEAR $L$outer4x | 783 jmp NEAR $L$outer4x |
583 | 784 |
584 ALIGN 32 | 785 ALIGN 32 |
585 $L$outer4x: | 786 $L$outer4x: |
| 787 lea rdx,[((16+128))+r14] |
| 788 pxor xmm4,xmm4 |
| 789 pxor xmm5,xmm5 |
| 790 movdqa xmm0,XMMWORD[((-128))+r12] |
| 791 movdqa xmm1,XMMWORD[((-112))+r12] |
| 792 movdqa xmm2,XMMWORD[((-96))+r12] |
| 793 movdqa xmm3,XMMWORD[((-80))+r12] |
| 794 pand xmm0,XMMWORD[((-128))+rdx] |
| 795 pand xmm1,XMMWORD[((-112))+rdx] |
| 796 por xmm4,xmm0 |
| 797 pand xmm2,XMMWORD[((-96))+rdx] |
| 798 por xmm5,xmm1 |
| 799 pand xmm3,XMMWORD[((-80))+rdx] |
| 800 por xmm4,xmm2 |
| 801 por xmm5,xmm3 |
| 802 movdqa xmm0,XMMWORD[((-64))+r12] |
| 803 movdqa xmm1,XMMWORD[((-48))+r12] |
| 804 movdqa xmm2,XMMWORD[((-32))+r12] |
| 805 movdqa xmm3,XMMWORD[((-16))+r12] |
| 806 pand xmm0,XMMWORD[((-64))+rdx] |
| 807 pand xmm1,XMMWORD[((-48))+rdx] |
| 808 por xmm4,xmm0 |
| 809 pand xmm2,XMMWORD[((-32))+rdx] |
| 810 por xmm5,xmm1 |
| 811 pand xmm3,XMMWORD[((-16))+rdx] |
| 812 por xmm4,xmm2 |
| 813 por xmm5,xmm3 |
| 814 movdqa xmm0,XMMWORD[r12] |
| 815 movdqa xmm1,XMMWORD[16+r12] |
| 816 movdqa xmm2,XMMWORD[32+r12] |
| 817 movdqa xmm3,XMMWORD[48+r12] |
| 818 pand xmm0,XMMWORD[rdx] |
| 819 pand xmm1,XMMWORD[16+rdx] |
| 820 por xmm4,xmm0 |
| 821 pand xmm2,XMMWORD[32+rdx] |
| 822 por xmm5,xmm1 |
| 823 pand xmm3,XMMWORD[48+rdx] |
| 824 por xmm4,xmm2 |
| 825 por xmm5,xmm3 |
| 826 movdqa xmm0,XMMWORD[64+r12] |
| 827 movdqa xmm1,XMMWORD[80+r12] |
| 828 movdqa xmm2,XMMWORD[96+r12] |
| 829 movdqa xmm3,XMMWORD[112+r12] |
| 830 pand xmm0,XMMWORD[64+rdx] |
| 831 pand xmm1,XMMWORD[80+rdx] |
| 832 por xmm4,xmm0 |
| 833 pand xmm2,XMMWORD[96+rdx] |
| 834 por xmm5,xmm1 |
| 835 pand xmm3,XMMWORD[112+rdx] |
| 836 por xmm4,xmm2 |
| 837 por xmm5,xmm3 |
| 838 por xmm4,xmm5 |
| 839 pshufd xmm0,xmm4,0x4e |
| 840 por xmm0,xmm4 |
| 841 lea r12,[256+r12] |
| 842 DB 102,72,15,126,195 |
| 843 |
586 mov r10,QWORD[r9*1+r14] | 844 mov r10,QWORD[r9*1+r14] |
587 mov rbp,r8 | 845 mov rbp,r8 |
588 mul rbx | 846 mul rbx |
589 add r10,rax | 847 add r10,rax |
590 mov rax,QWORD[rcx] | 848 mov rax,QWORD[rcx] |
591 adc rdx,0 | 849 adc rdx,0 |
592 | 850 |
593 movq xmm0,QWORD[(((-96)))+r12] | |
594 movq xmm1,QWORD[((-32))+r12] | |
595 pand xmm0,xmm4 | |
596 movq xmm2,QWORD[32+r12] | |
597 pand xmm1,xmm5 | |
598 movq xmm3,QWORD[96+r12] | |
599 | |
600 imul rbp,r10 | 851 imul rbp,r10 |
601 DB 0x67 | |
602 mov r11,rdx | 852 mov r11,rdx |
603 mov QWORD[r14],rdi | 853 mov QWORD[r14],rdi |
604 | 854 |
605 pand xmm2,xmm6 | |
606 por xmm0,xmm1 | |
607 pand xmm3,xmm7 | |
608 por xmm0,xmm2 | |
609 lea r14,[r9*1+r14] | 855 lea r14,[r9*1+r14] |
610 lea r12,[256+r12] | |
611 por xmm0,xmm3 | |
612 | 856 |
613 mul rbp | 857 mul rbp |
614 add r10,rax | 858 add r10,rax |
615 mov rax,QWORD[8+r9*1+rsi] | 859 mov rax,QWORD[8+r9*1+rsi] |
616 adc rdx,0 | 860 adc rdx,0 |
617 mov rdi,rdx | 861 mov rdi,rdx |
618 | 862 |
619 mul rbx | 863 mul rbx |
620 add r11,rax | 864 add r11,rax |
621 » mov» rax,QWORD[16+rcx] | 865 » mov» rax,QWORD[8+rcx] |
622 adc rdx,0 | 866 adc rdx,0 |
623 add r11,QWORD[8+r14] | 867 add r11,QWORD[8+r14] |
624 adc rdx,0 | 868 adc rdx,0 |
625 mov r10,rdx | 869 mov r10,rdx |
626 | 870 |
627 mul rbp | 871 mul rbp |
628 add rdi,rax | 872 add rdi,rax |
629 mov rax,QWORD[16+r9*1+rsi] | 873 mov rax,QWORD[16+r9*1+rsi] |
630 adc rdx,0 | 874 adc rdx,0 |
631 add rdi,r11 | 875 add rdi,r11 |
632 lea r15,[32+r9] | 876 lea r15,[32+r9] |
633 » lea» rcx,[64+rcx] | 877 » lea» rcx,[32+rcx] |
634 adc rdx,0 | 878 adc rdx,0 |
635 mov r13,rdx | 879 mov r13,rdx |
636 jmp NEAR $L$inner4x | 880 jmp NEAR $L$inner4x |
637 | 881 |
638 ALIGN 32 | 882 ALIGN 32 |
639 $L$inner4x: | 883 $L$inner4x: |
640 mul rbx | 884 mul rbx |
641 add r10,rax | 885 add r10,rax |
642 » mov» rax,QWORD[((-32))+rcx] | 886 » mov» rax,QWORD[((-16))+rcx] |
643 adc rdx,0 | 887 adc rdx,0 |
644 add r10,QWORD[16+r14] | 888 add r10,QWORD[16+r14] |
645 lea r14,[32+r14] | 889 lea r14,[32+r14] |
646 adc rdx,0 | 890 adc rdx,0 |
647 mov r11,rdx | 891 mov r11,rdx |
648 | 892 |
649 mul rbp | 893 mul rbp |
650 add r13,rax | 894 add r13,rax |
651 mov rax,QWORD[((-8))+r15*1+rsi] | 895 mov rax,QWORD[((-8))+r15*1+rsi] |
652 adc rdx,0 | 896 adc rdx,0 |
653 add r13,r10 | 897 add r13,r10 |
654 adc rdx,0 | 898 adc rdx,0 |
655 mov QWORD[((-32))+r14],rdi | 899 mov QWORD[((-32))+r14],rdi |
656 mov rdi,rdx | 900 mov rdi,rdx |
657 | 901 |
658 mul rbx | 902 mul rbx |
659 add r11,rax | 903 add r11,rax |
660 » mov» rax,QWORD[((-16))+rcx] | 904 » mov» rax,QWORD[((-8))+rcx] |
661 adc rdx,0 | 905 adc rdx,0 |
662 add r11,QWORD[((-8))+r14] | 906 add r11,QWORD[((-8))+r14] |
663 adc rdx,0 | 907 adc rdx,0 |
664 mov r10,rdx | 908 mov r10,rdx |
665 | 909 |
666 mul rbp | 910 mul rbp |
667 add rdi,rax | 911 add rdi,rax |
668 mov rax,QWORD[r15*1+rsi] | 912 mov rax,QWORD[r15*1+rsi] |
669 adc rdx,0 | 913 adc rdx,0 |
670 add rdi,r11 | 914 add rdi,r11 |
(...skipping 13 matching lines...) Expand all Loading... |
684 add r13,rax | 928 add r13,rax |
685 mov rax,QWORD[8+r15*1+rsi] | 929 mov rax,QWORD[8+r15*1+rsi] |
686 adc rdx,0 | 930 adc rdx,0 |
687 add r13,r10 | 931 add r13,r10 |
688 adc rdx,0 | 932 adc rdx,0 |
689 mov QWORD[((-16))+r14],rdi | 933 mov QWORD[((-16))+r14],rdi |
690 mov rdi,rdx | 934 mov rdi,rdx |
691 | 935 |
692 mul rbx | 936 mul rbx |
693 add r11,rax | 937 add r11,rax |
694 » mov» rax,QWORD[16+rcx] | 938 » mov» rax,QWORD[8+rcx] |
695 adc rdx,0 | 939 adc rdx,0 |
696 add r11,QWORD[8+r14] | 940 add r11,QWORD[8+r14] |
697 adc rdx,0 | 941 adc rdx,0 |
698 mov r10,rdx | 942 mov r10,rdx |
699 | 943 |
700 mul rbp | 944 mul rbp |
701 add rdi,rax | 945 add rdi,rax |
702 mov rax,QWORD[16+r15*1+rsi] | 946 mov rax,QWORD[16+r15*1+rsi] |
703 adc rdx,0 | 947 adc rdx,0 |
704 add rdi,r11 | 948 add rdi,r11 |
705 » lea» rcx,[64+rcx] | 949 » lea» rcx,[32+rcx] |
706 adc rdx,0 | 950 adc rdx,0 |
707 mov QWORD[((-8))+r14],r13 | 951 mov QWORD[((-8))+r14],r13 |
708 mov r13,rdx | 952 mov r13,rdx |
709 | 953 |
710 add r15,32 | 954 add r15,32 |
711 jnz NEAR $L$inner4x | 955 jnz NEAR $L$inner4x |
712 | 956 |
713 mul rbx | 957 mul rbx |
714 add r10,rax | 958 add r10,rax |
715 » mov» rax,QWORD[((-32))+rcx] | 959 » mov» rax,QWORD[((-16))+rcx] |
716 adc rdx,0 | 960 adc rdx,0 |
717 add r10,QWORD[16+r14] | 961 add r10,QWORD[16+r14] |
718 lea r14,[32+r14] | 962 lea r14,[32+r14] |
719 adc rdx,0 | 963 adc rdx,0 |
720 mov r11,rdx | 964 mov r11,rdx |
721 | 965 |
722 mul rbp | 966 mul rbp |
723 add r13,rax | 967 add r13,rax |
724 mov rax,QWORD[((-8))+rsi] | 968 mov rax,QWORD[((-8))+rsi] |
725 adc rdx,0 | 969 adc rdx,0 |
726 add r13,r10 | 970 add r13,r10 |
727 adc rdx,0 | 971 adc rdx,0 |
728 mov QWORD[((-32))+r14],rdi | 972 mov QWORD[((-32))+r14],rdi |
729 mov rdi,rdx | 973 mov rdi,rdx |
730 | 974 |
731 mul rbx | 975 mul rbx |
732 add r11,rax | 976 add r11,rax |
733 mov rax,rbp | 977 mov rax,rbp |
734 » mov» rbp,QWORD[((-16))+rcx] | 978 » mov» rbp,QWORD[((-8))+rcx] |
735 adc rdx,0 | 979 adc rdx,0 |
736 add r11,QWORD[((-8))+r14] | 980 add r11,QWORD[((-8))+r14] |
737 adc rdx,0 | 981 adc rdx,0 |
738 mov r10,rdx | 982 mov r10,rdx |
739 | 983 |
740 mul rbp | 984 mul rbp |
741 add rdi,rax | 985 add rdi,rax |
742 mov rax,QWORD[r9*1+rsi] | 986 mov rax,QWORD[r9*1+rsi] |
743 adc rdx,0 | 987 adc rdx,0 |
744 add rdi,r11 | 988 add rdi,r11 |
745 adc rdx,0 | 989 adc rdx,0 |
746 mov QWORD[((-24))+r14],r13 | 990 mov QWORD[((-24))+r14],r13 |
747 mov r13,rdx | 991 mov r13,rdx |
748 | 992 |
749 DB 102,72,15,126,195 | |
750 mov QWORD[((-16))+r14],rdi | 993 mov QWORD[((-16))+r14],rdi |
751 » lea» rcx,[r9*2+rcx] | 994 » lea» rcx,[r9*1+rcx] |
752 | 995 |
753 xor rdi,rdi | 996 xor rdi,rdi |
754 add r13,r10 | 997 add r13,r10 |
755 adc rdi,0 | 998 adc rdi,0 |
756 add r13,QWORD[r14] | 999 add r13,QWORD[r14] |
757 adc rdi,0 | 1000 adc rdi,0 |
758 mov QWORD[((-8))+r14],r13 | 1001 mov QWORD[((-8))+r14],r13 |
759 | 1002 |
760 cmp r12,QWORD[((16+8))+rsp] | 1003 cmp r12,QWORD[((16+8))+rsp] |
761 jb NEAR $L$outer4x | 1004 jb NEAR $L$outer4x |
| 1005 xor rax,rax |
762 sub rbp,r13 | 1006 sub rbp,r13 |
763 adc r15,r15 | 1007 adc r15,r15 |
764 or rdi,r15 | 1008 or rdi,r15 |
765 » xor» rdi,1 | 1009 » sub» rax,rdi |
766 lea rbx,[r9*1+r14] | 1010 lea rbx,[r9*1+r14] |
767 » lea» rbp,[rdi*8+rcx] | 1011 » mov» r12,QWORD[rcx] |
| 1012 » lea» rbp,[rcx] |
768 mov rcx,r9 | 1013 mov rcx,r9 |
769 sar rcx,3+2 | 1014 sar rcx,3+2 |
770 mov rdi,QWORD[((56+8))+rsp] | 1015 mov rdi,QWORD[((56+8))+rsp] |
771 » jmp» NEAR $L$sqr4x_sub | 1016 » dec» r12 |
| 1017 » xor» r10,r10 |
| 1018 » mov» r13,QWORD[8+rbp] |
| 1019 » mov» r14,QWORD[16+rbp] |
| 1020 » mov» r15,QWORD[24+rbp] |
| 1021 » jmp» NEAR $L$sqr4x_sub_entry |
772 | 1022 |
773 global bn_power5 | 1023 global bn_power5 |
774 | 1024 |
775 ALIGN 32 | 1025 ALIGN 32 |
776 bn_power5: | 1026 bn_power5: |
777 mov QWORD[8+rsp],rdi ;WIN64 prologue | 1027 mov QWORD[8+rsp],rdi ;WIN64 prologue |
778 mov QWORD[16+rsp],rsi | 1028 mov QWORD[16+rsp],rsi |
779 mov rax,rsp | 1029 mov rax,rsp |
780 $L$SEH_begin_bn_power5: | 1030 $L$SEH_begin_bn_power5: |
781 mov rdi,rcx | 1031 mov rdi,rcx |
782 mov rsi,rdx | 1032 mov rsi,rdx |
783 mov rdx,r8 | 1033 mov rdx,r8 |
784 mov rcx,r9 | 1034 mov rcx,r9 |
785 mov r8,QWORD[40+rsp] | 1035 mov r8,QWORD[40+rsp] |
786 mov r9,QWORD[48+rsp] | 1036 mov r9,QWORD[48+rsp] |
787 | 1037 |
788 | 1038 |
789 mov rax,rsp | 1039 mov rax,rsp |
790 push rbx | 1040 push rbx |
791 push rbp | 1041 push rbp |
792 push r12 | 1042 push r12 |
793 push r13 | 1043 push r13 |
794 push r14 | 1044 push r14 |
795 push r15 | 1045 push r15 |
796 » lea» rsp,[((-40))+rsp] | 1046 |
797 » movaps» XMMWORD[rsp],xmm6 | |
798 » movaps» XMMWORD[16+rsp],xmm7 | |
799 » mov» r10d,r9d | |
800 shl r9d,3 | 1047 shl r9d,3 |
801 » shl» r10d,3+2 | 1048 » lea» r10d,[r9*2+r9] |
802 neg r9 | 1049 neg r9 |
803 mov r8,QWORD[r8] | 1050 mov r8,QWORD[r8] |
804 | 1051 |
805 | 1052 |
806 | 1053 |
807 | 1054 |
808 | 1055 |
809 | 1056 |
810 | 1057 |
811 » lea» r11,[((-64))+r9*2+rsp] | 1058 |
812 » sub» r11,rsi | 1059 » lea» r11,[((-320))+r9*2+rsp] |
| 1060 » sub» r11,rdi |
813 and r11,4095 | 1061 and r11,4095 |
814 cmp r10,r11 | 1062 cmp r10,r11 |
815 jb NEAR $L$pwr_sp_alt | 1063 jb NEAR $L$pwr_sp_alt |
816 sub rsp,r11 | 1064 sub rsp,r11 |
817 » lea» rsp,[((-64))+r9*2+rsp] | 1065 » lea» rsp,[((-320))+r9*2+rsp] |
818 jmp NEAR $L$pwr_sp_done | 1066 jmp NEAR $L$pwr_sp_done |
819 | 1067 |
820 ALIGN 32 | 1068 ALIGN 32 |
821 $L$pwr_sp_alt: | 1069 $L$pwr_sp_alt: |
822 » lea» r10,[((4096-64))+r9*2] | 1070 » lea» r10,[((4096-320))+r9*2] |
823 » lea» rsp,[((-64))+r9*2+rsp] | 1071 » lea» rsp,[((-320))+r9*2+rsp] |
824 sub r11,r10 | 1072 sub r11,r10 |
825 mov r10,0 | 1073 mov r10,0 |
826 cmovc r11,r10 | 1074 cmovc r11,r10 |
827 sub rsp,r11 | 1075 sub rsp,r11 |
828 $L$pwr_sp_done: | 1076 $L$pwr_sp_done: |
829 and rsp,-64 | 1077 and rsp,-64 |
830 mov r10,r9 | 1078 mov r10,r9 |
831 neg r9 | 1079 neg r9 |
832 | 1080 |
833 | 1081 |
834 | 1082 |
835 | 1083 |
836 | 1084 |
837 | 1085 |
838 | 1086 |
839 | 1087 |
840 | 1088 |
841 | 1089 |
842 mov QWORD[32+rsp],r8 | 1090 mov QWORD[32+rsp],r8 |
843 mov QWORD[40+rsp],rax | 1091 mov QWORD[40+rsp],rax |
844 $L$power5_body: | 1092 $L$power5_body: |
845 DB 102,72,15,110,207 | 1093 DB 102,72,15,110,207 |
846 DB 102,72,15,110,209 | 1094 DB 102,72,15,110,209 |
847 DB 102,73,15,110,218 | 1095 DB 102,73,15,110,218 |
848 DB 102,72,15,110,226 | 1096 DB 102,72,15,110,226 |
849 | 1097 |
850 call __bn_sqr8x_internal | 1098 call __bn_sqr8x_internal |
| 1099 call __bn_post4x_internal |
851 call __bn_sqr8x_internal | 1100 call __bn_sqr8x_internal |
| 1101 call __bn_post4x_internal |
852 call __bn_sqr8x_internal | 1102 call __bn_sqr8x_internal |
| 1103 call __bn_post4x_internal |
853 call __bn_sqr8x_internal | 1104 call __bn_sqr8x_internal |
| 1105 call __bn_post4x_internal |
854 call __bn_sqr8x_internal | 1106 call __bn_sqr8x_internal |
| 1107 call __bn_post4x_internal |
855 | 1108 |
856 DB 102,72,15,126,209 | 1109 DB 102,72,15,126,209 |
857 DB 102,72,15,126,226 | 1110 DB 102,72,15,126,226 |
858 mov rdi,rsi | 1111 mov rdi,rsi |
859 mov rax,QWORD[40+rsp] | 1112 mov rax,QWORD[40+rsp] |
860 lea r8,[32+rsp] | 1113 lea r8,[32+rsp] |
861 | 1114 |
862 call mul4x_internal | 1115 call mul4x_internal |
863 | 1116 |
864 mov rsi,QWORD[40+rsp] | 1117 mov rsi,QWORD[40+rsp] |
(...skipping 525 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
1390 lea r8,[r11*2+rcx] | 1643 lea r8,[r11*2+rcx] |
1391 shr r11,63 | 1644 shr r11,63 |
1392 or r8,r10 | 1645 or r8,r10 |
1393 mul rax | 1646 mul rax |
1394 neg r15 | 1647 neg r15 |
1395 adc rbx,rax | 1648 adc rbx,rax |
1396 adc r8,rdx | 1649 adc r8,rdx |
1397 mov QWORD[((-16))+rdi],rbx | 1650 mov QWORD[((-16))+rdi],rbx |
1398 mov QWORD[((-8))+rdi],r8 | 1651 mov QWORD[((-8))+rdi],r8 |
1399 DB 102,72,15,126,213 | 1652 DB 102,72,15,126,213 |
1400 sqr8x_reduction: | 1653 __bn_sqr8x_reduction: |
1401 xor rax,rax | 1654 xor rax,rax |
1402 » lea» rcx,[r9*2+rbp] | 1655 » lea» rcx,[rbp*1+r9] |
1403 lea rdx,[((48+8))+r9*2+rsp] | 1656 lea rdx,[((48+8))+r9*2+rsp] |
1404 mov QWORD[((0+8))+rsp],rcx | 1657 mov QWORD[((0+8))+rsp],rcx |
1405 lea rdi,[((48+8))+r9*1+rsp] | 1658 lea rdi,[((48+8))+r9*1+rsp] |
1406 mov QWORD[((8+8))+rsp],rdx | 1659 mov QWORD[((8+8))+rsp],rdx |
1407 neg r9 | 1660 neg r9 |
1408 jmp NEAR $L$8x_reduction_loop | 1661 jmp NEAR $L$8x_reduction_loop |
1409 | 1662 |
1410 ALIGN 32 | 1663 ALIGN 32 |
1411 $L$8x_reduction_loop: | 1664 $L$8x_reduction_loop: |
1412 lea rdi,[r9*1+rdi] | 1665 lea rdi,[r9*1+rdi] |
(...skipping 12 matching lines...) Expand all Loading... |
1425 DB 0x67 | 1678 DB 0x67 |
1426 mov r8,rbx | 1679 mov r8,rbx |
1427 imul rbx,QWORD[((32+8))+rsp] | 1680 imul rbx,QWORD[((32+8))+rsp] |
1428 mov rax,QWORD[rbp] | 1681 mov rax,QWORD[rbp] |
1429 mov ecx,8 | 1682 mov ecx,8 |
1430 jmp NEAR $L$8x_reduce | 1683 jmp NEAR $L$8x_reduce |
1431 | 1684 |
1432 ALIGN 32 | 1685 ALIGN 32 |
1433 $L$8x_reduce: | 1686 $L$8x_reduce: |
1434 mul rbx | 1687 mul rbx |
1435 » mov» rax,QWORD[16+rbp] | 1688 » mov» rax,QWORD[8+rbp] |
1436 neg r8 | 1689 neg r8 |
1437 mov r8,rdx | 1690 mov r8,rdx |
1438 adc r8,0 | 1691 adc r8,0 |
1439 | 1692 |
1440 mul rbx | 1693 mul rbx |
1441 add r9,rax | 1694 add r9,rax |
1442 » mov» rax,QWORD[32+rbp] | 1695 » mov» rax,QWORD[16+rbp] |
1443 adc rdx,0 | 1696 adc rdx,0 |
1444 add r8,r9 | 1697 add r8,r9 |
1445 mov QWORD[((48-8+8))+rcx*8+rsp],rbx | 1698 mov QWORD[((48-8+8))+rcx*8+rsp],rbx |
1446 mov r9,rdx | 1699 mov r9,rdx |
1447 adc r9,0 | 1700 adc r9,0 |
1448 | 1701 |
1449 mul rbx | 1702 mul rbx |
1450 add r10,rax | 1703 add r10,rax |
1451 » mov» rax,QWORD[48+rbp] | 1704 » mov» rax,QWORD[24+rbp] |
1452 adc rdx,0 | 1705 adc rdx,0 |
1453 add r9,r10 | 1706 add r9,r10 |
1454 mov rsi,QWORD[((32+8))+rsp] | 1707 mov rsi,QWORD[((32+8))+rsp] |
1455 mov r10,rdx | 1708 mov r10,rdx |
1456 adc r10,0 | 1709 adc r10,0 |
1457 | 1710 |
1458 mul rbx | 1711 mul rbx |
1459 add r11,rax | 1712 add r11,rax |
1460 » mov» rax,QWORD[64+rbp] | 1713 » mov» rax,QWORD[32+rbp] |
1461 adc rdx,0 | 1714 adc rdx,0 |
1462 imul rsi,r8 | 1715 imul rsi,r8 |
1463 add r10,r11 | 1716 add r10,r11 |
1464 mov r11,rdx | 1717 mov r11,rdx |
1465 adc r11,0 | 1718 adc r11,0 |
1466 | 1719 |
1467 mul rbx | 1720 mul rbx |
1468 add r12,rax | 1721 add r12,rax |
1469 » mov» rax,QWORD[80+rbp] | 1722 » mov» rax,QWORD[40+rbp] |
1470 adc rdx,0 | 1723 adc rdx,0 |
1471 add r11,r12 | 1724 add r11,r12 |
1472 mov r12,rdx | 1725 mov r12,rdx |
1473 adc r12,0 | 1726 adc r12,0 |
1474 | 1727 |
1475 mul rbx | 1728 mul rbx |
1476 add r13,rax | 1729 add r13,rax |
1477 » mov» rax,QWORD[96+rbp] | 1730 » mov» rax,QWORD[48+rbp] |
1478 adc rdx,0 | 1731 adc rdx,0 |
1479 add r12,r13 | 1732 add r12,r13 |
1480 mov r13,rdx | 1733 mov r13,rdx |
1481 adc r13,0 | 1734 adc r13,0 |
1482 | 1735 |
1483 mul rbx | 1736 mul rbx |
1484 add r14,rax | 1737 add r14,rax |
1485 » mov» rax,QWORD[112+rbp] | 1738 » mov» rax,QWORD[56+rbp] |
1486 adc rdx,0 | 1739 adc rdx,0 |
1487 add r13,r14 | 1740 add r13,r14 |
1488 mov r14,rdx | 1741 mov r14,rdx |
1489 adc r14,0 | 1742 adc r14,0 |
1490 | 1743 |
1491 mul rbx | 1744 mul rbx |
1492 mov rbx,rsi | 1745 mov rbx,rsi |
1493 add r15,rax | 1746 add r15,rax |
1494 mov rax,QWORD[rbp] | 1747 mov rax,QWORD[rbp] |
1495 adc rdx,0 | 1748 adc rdx,0 |
1496 add r14,r15 | 1749 add r14,r15 |
1497 mov r15,rdx | 1750 mov r15,rdx |
1498 adc r15,0 | 1751 adc r15,0 |
1499 | 1752 |
1500 dec ecx | 1753 dec ecx |
1501 jnz NEAR $L$8x_reduce | 1754 jnz NEAR $L$8x_reduce |
1502 | 1755 |
1503 » lea» rbp,[128+rbp] | 1756 » lea» rbp,[64+rbp] |
1504 xor rax,rax | 1757 xor rax,rax |
1505 mov rdx,QWORD[((8+8))+rsp] | 1758 mov rdx,QWORD[((8+8))+rsp] |
1506 cmp rbp,QWORD[((0+8))+rsp] | 1759 cmp rbp,QWORD[((0+8))+rsp] |
1507 jae NEAR $L$8x_no_tail | 1760 jae NEAR $L$8x_no_tail |
1508 | 1761 |
1509 DB 0x66 | 1762 DB 0x66 |
1510 add r8,QWORD[rdi] | 1763 add r8,QWORD[rdi] |
1511 adc r9,QWORD[8+rdi] | 1764 adc r9,QWORD[8+rdi] |
1512 adc r10,QWORD[16+rdi] | 1765 adc r10,QWORD[16+rdi] |
1513 adc r11,QWORD[24+rdi] | 1766 adc r11,QWORD[24+rdi] |
1514 adc r12,QWORD[32+rdi] | 1767 adc r12,QWORD[32+rdi] |
1515 adc r13,QWORD[40+rdi] | 1768 adc r13,QWORD[40+rdi] |
1516 adc r14,QWORD[48+rdi] | 1769 adc r14,QWORD[48+rdi] |
1517 adc r15,QWORD[56+rdi] | 1770 adc r15,QWORD[56+rdi] |
1518 sbb rsi,rsi | 1771 sbb rsi,rsi |
1519 | 1772 |
1520 mov rbx,QWORD[((48+56+8))+rsp] | 1773 mov rbx,QWORD[((48+56+8))+rsp] |
1521 mov ecx,8 | 1774 mov ecx,8 |
1522 mov rax,QWORD[rbp] | 1775 mov rax,QWORD[rbp] |
1523 jmp NEAR $L$8x_tail | 1776 jmp NEAR $L$8x_tail |
1524 | 1777 |
1525 ALIGN 32 | 1778 ALIGN 32 |
1526 $L$8x_tail: | 1779 $L$8x_tail: |
1527 mul rbx | 1780 mul rbx |
1528 add r8,rax | 1781 add r8,rax |
1529 » mov» rax,QWORD[16+rbp] | 1782 » mov» rax,QWORD[8+rbp] |
1530 mov QWORD[rdi],r8 | 1783 mov QWORD[rdi],r8 |
1531 mov r8,rdx | 1784 mov r8,rdx |
1532 adc r8,0 | 1785 adc r8,0 |
1533 | 1786 |
1534 mul rbx | 1787 mul rbx |
1535 add r9,rax | 1788 add r9,rax |
1536 » mov» rax,QWORD[32+rbp] | 1789 » mov» rax,QWORD[16+rbp] |
1537 adc rdx,0 | 1790 adc rdx,0 |
1538 add r8,r9 | 1791 add r8,r9 |
1539 lea rdi,[8+rdi] | 1792 lea rdi,[8+rdi] |
1540 mov r9,rdx | 1793 mov r9,rdx |
1541 adc r9,0 | 1794 adc r9,0 |
1542 | 1795 |
1543 mul rbx | 1796 mul rbx |
1544 add r10,rax | 1797 add r10,rax |
1545 » mov» rax,QWORD[48+rbp] | 1798 » mov» rax,QWORD[24+rbp] |
1546 adc rdx,0 | 1799 adc rdx,0 |
1547 add r9,r10 | 1800 add r9,r10 |
1548 mov r10,rdx | 1801 mov r10,rdx |
1549 adc r10,0 | 1802 adc r10,0 |
1550 | 1803 |
1551 mul rbx | 1804 mul rbx |
1552 add r11,rax | 1805 add r11,rax |
1553 » mov» rax,QWORD[64+rbp] | 1806 » mov» rax,QWORD[32+rbp] |
1554 adc rdx,0 | 1807 adc rdx,0 |
1555 add r10,r11 | 1808 add r10,r11 |
1556 mov r11,rdx | 1809 mov r11,rdx |
1557 adc r11,0 | 1810 adc r11,0 |
1558 | 1811 |
1559 mul rbx | 1812 mul rbx |
1560 add r12,rax | 1813 add r12,rax |
1561 » mov» rax,QWORD[80+rbp] | 1814 » mov» rax,QWORD[40+rbp] |
1562 adc rdx,0 | 1815 adc rdx,0 |
1563 add r11,r12 | 1816 add r11,r12 |
1564 mov r12,rdx | 1817 mov r12,rdx |
1565 adc r12,0 | 1818 adc r12,0 |
1566 | 1819 |
1567 mul rbx | 1820 mul rbx |
1568 add r13,rax | 1821 add r13,rax |
1569 » mov» rax,QWORD[96+rbp] | 1822 » mov» rax,QWORD[48+rbp] |
1570 adc rdx,0 | 1823 adc rdx,0 |
1571 add r12,r13 | 1824 add r12,r13 |
1572 mov r13,rdx | 1825 mov r13,rdx |
1573 adc r13,0 | 1826 adc r13,0 |
1574 | 1827 |
1575 mul rbx | 1828 mul rbx |
1576 add r14,rax | 1829 add r14,rax |
1577 » mov» rax,QWORD[112+rbp] | 1830 » mov» rax,QWORD[56+rbp] |
1578 adc rdx,0 | 1831 adc rdx,0 |
1579 add r13,r14 | 1832 add r13,r14 |
1580 mov r14,rdx | 1833 mov r14,rdx |
1581 adc r14,0 | 1834 adc r14,0 |
1582 | 1835 |
1583 mul rbx | 1836 mul rbx |
1584 mov rbx,QWORD[((48-16+8))+rcx*8+rsp] | 1837 mov rbx,QWORD[((48-16+8))+rcx*8+rsp] |
1585 add r15,rax | 1838 add r15,rax |
1586 adc rdx,0 | 1839 adc rdx,0 |
1587 add r14,r15 | 1840 add r14,r15 |
1588 mov rax,QWORD[rbp] | 1841 mov rax,QWORD[rbp] |
1589 mov r15,rdx | 1842 mov r15,rdx |
1590 adc r15,0 | 1843 adc r15,0 |
1591 | 1844 |
1592 dec ecx | 1845 dec ecx |
1593 jnz NEAR $L$8x_tail | 1846 jnz NEAR $L$8x_tail |
1594 | 1847 |
1595 » lea» rbp,[128+rbp] | 1848 » lea» rbp,[64+rbp] |
1596 mov rdx,QWORD[((8+8))+rsp] | 1849 mov rdx,QWORD[((8+8))+rsp] |
1597 cmp rbp,QWORD[((0+8))+rsp] | 1850 cmp rbp,QWORD[((0+8))+rsp] |
1598 jae NEAR $L$8x_tail_done | 1851 jae NEAR $L$8x_tail_done |
1599 | 1852 |
1600 mov rbx,QWORD[((48+56+8))+rsp] | 1853 mov rbx,QWORD[((48+56+8))+rsp] |
1601 neg rsi | 1854 neg rsi |
1602 mov rax,QWORD[rbp] | 1855 mov rax,QWORD[rbp] |
1603 adc r8,QWORD[rdi] | 1856 adc r8,QWORD[rdi] |
1604 adc r9,QWORD[8+rdi] | 1857 adc r9,QWORD[8+rdi] |
1605 adc r10,QWORD[16+rdi] | 1858 adc r10,QWORD[16+rdi] |
1606 adc r11,QWORD[24+rdi] | 1859 adc r11,QWORD[24+rdi] |
1607 adc r12,QWORD[32+rdi] | 1860 adc r12,QWORD[32+rdi] |
1608 adc r13,QWORD[40+rdi] | 1861 adc r13,QWORD[40+rdi] |
1609 adc r14,QWORD[48+rdi] | 1862 adc r14,QWORD[48+rdi] |
1610 adc r15,QWORD[56+rdi] | 1863 adc r15,QWORD[56+rdi] |
1611 sbb rsi,rsi | 1864 sbb rsi,rsi |
1612 | 1865 |
1613 mov ecx,8 | 1866 mov ecx,8 |
1614 jmp NEAR $L$8x_tail | 1867 jmp NEAR $L$8x_tail |
1615 | 1868 |
1616 ALIGN 32 | 1869 ALIGN 32 |
1617 $L$8x_tail_done: | 1870 $L$8x_tail_done: |
1618 add r8,QWORD[rdx] | 1871 add r8,QWORD[rdx] |
| 1872 adc r9,0 |
| 1873 adc r10,0 |
| 1874 adc r11,0 |
| 1875 adc r12,0 |
| 1876 adc r13,0 |
| 1877 adc r14,0 |
| 1878 adc r15,0 |
| 1879 |
| 1880 |
1619 xor rax,rax | 1881 xor rax,rax |
1620 | 1882 |
1621 neg rsi | 1883 neg rsi |
1622 $L$8x_no_tail: | 1884 $L$8x_no_tail: |
1623 adc r8,QWORD[rdi] | 1885 adc r8,QWORD[rdi] |
1624 adc r9,QWORD[8+rdi] | 1886 adc r9,QWORD[8+rdi] |
1625 adc r10,QWORD[16+rdi] | 1887 adc r10,QWORD[16+rdi] |
1626 adc r11,QWORD[24+rdi] | 1888 adc r11,QWORD[24+rdi] |
1627 adc r12,QWORD[32+rdi] | 1889 adc r12,QWORD[32+rdi] |
1628 adc r13,QWORD[40+rdi] | 1890 adc r13,QWORD[40+rdi] |
1629 adc r14,QWORD[48+rdi] | 1891 adc r14,QWORD[48+rdi] |
1630 adc r15,QWORD[56+rdi] | 1892 adc r15,QWORD[56+rdi] |
1631 adc rax,0 | 1893 adc rax,0 |
1632 » mov» rcx,QWORD[((-16))+rbp] | 1894 » mov» rcx,QWORD[((-8))+rbp] |
1633 xor rsi,rsi | 1895 xor rsi,rsi |
1634 | 1896 |
1635 DB 102,72,15,126,213 | 1897 DB 102,72,15,126,213 |
1636 | 1898 |
1637 mov QWORD[rdi],r8 | 1899 mov QWORD[rdi],r8 |
1638 mov QWORD[8+rdi],r9 | 1900 mov QWORD[8+rdi],r9 |
1639 DB 102,73,15,126,217 | 1901 DB 102,73,15,126,217 |
1640 mov QWORD[16+rdi],r10 | 1902 mov QWORD[16+rdi],r10 |
1641 mov QWORD[24+rdi],r11 | 1903 mov QWORD[24+rdi],r11 |
1642 mov QWORD[32+rdi],r12 | 1904 mov QWORD[32+rdi],r12 |
1643 mov QWORD[40+rdi],r13 | 1905 mov QWORD[40+rdi],r13 |
1644 mov QWORD[48+rdi],r14 | 1906 mov QWORD[48+rdi],r14 |
1645 mov QWORD[56+rdi],r15 | 1907 mov QWORD[56+rdi],r15 |
1646 lea rdi,[64+rdi] | 1908 lea rdi,[64+rdi] |
1647 | 1909 |
1648 cmp rdi,rdx | 1910 cmp rdi,rdx |
1649 jb NEAR $L$8x_reduction_loop | 1911 jb NEAR $L$8x_reduction_loop |
| 1912 DB 0F3h,0C3h ;repret |
1650 | 1913 |
1651 sub rcx,r15 | |
1652 lea rbx,[r9*1+rdi] | |
1653 adc rsi,rsi | |
1654 mov rcx,r9 | |
1655 or rax,rsi | |
1656 DB 102,72,15,126,207 | |
1657 xor rax,1 | |
1658 DB 102,72,15,126,206 | |
1659 lea rbp,[rax*8+rbp] | |
1660 sar rcx,3+2 | |
1661 jmp NEAR $L$sqr4x_sub | |
1662 | 1914 |
1663 ALIGN 32 | 1915 ALIGN 32 |
| 1916 __bn_post4x_internal: |
| 1917 mov r12,QWORD[rbp] |
| 1918 lea rbx,[r9*1+rdi] |
| 1919 mov rcx,r9 |
| 1920 DB 102,72,15,126,207 |
| 1921 neg rax |
| 1922 DB 102,72,15,126,206 |
| 1923 sar rcx,3+2 |
| 1924 dec r12 |
| 1925 xor r10,r10 |
| 1926 mov r13,QWORD[8+rbp] |
| 1927 mov r14,QWORD[16+rbp] |
| 1928 mov r15,QWORD[24+rbp] |
| 1929 jmp NEAR $L$sqr4x_sub_entry |
| 1930 |
| 1931 ALIGN 16 |
1664 $L$sqr4x_sub: | 1932 $L$sqr4x_sub: |
1665 DB» 0x66 | 1933 » mov» r12,QWORD[rbp] |
1666 » mov» r12,QWORD[rbx] | 1934 » mov» r13,QWORD[8+rbp] |
1667 » mov» r13,QWORD[8+rbx] | 1935 » mov» r14,QWORD[16+rbp] |
1668 » sbb» r12,QWORD[rbp] | 1936 » mov» r15,QWORD[24+rbp] |
1669 » mov» r14,QWORD[16+rbx] | 1937 $L$sqr4x_sub_entry: |
1670 » sbb» r13,QWORD[16+rbp] | 1938 » lea» rbp,[32+rbp] |
1671 » mov» r15,QWORD[24+rbx] | 1939 » not» r12 |
| 1940 » not» r13 |
| 1941 » not» r14 |
| 1942 » not» r15 |
| 1943 » and» r12,rax |
| 1944 » and» r13,rax |
| 1945 » and» r14,rax |
| 1946 » and» r15,rax |
| 1947 |
| 1948 » neg» r10 |
| 1949 » adc» r12,QWORD[rbx] |
| 1950 » adc» r13,QWORD[8+rbx] |
| 1951 » adc» r14,QWORD[16+rbx] |
| 1952 » adc» r15,QWORD[24+rbx] |
| 1953 » mov» QWORD[rdi],r12 |
1672 lea rbx,[32+rbx] | 1954 lea rbx,[32+rbx] |
1673 sbb r14,QWORD[32+rbp] | |
1674 mov QWORD[rdi],r12 | |
1675 sbb r15,QWORD[48+rbp] | |
1676 lea rbp,[64+rbp] | |
1677 mov QWORD[8+rdi],r13 | 1955 mov QWORD[8+rdi],r13 |
| 1956 sbb r10,r10 |
1678 mov QWORD[16+rdi],r14 | 1957 mov QWORD[16+rdi],r14 |
1679 mov QWORD[24+rdi],r15 | 1958 mov QWORD[24+rdi],r15 |
1680 lea rdi,[32+rdi] | 1959 lea rdi,[32+rdi] |
1681 | 1960 |
1682 inc rcx | 1961 inc rcx |
1683 jnz NEAR $L$sqr4x_sub | 1962 jnz NEAR $L$sqr4x_sub |
| 1963 |
1684 mov r10,r9 | 1964 mov r10,r9 |
1685 neg r9 | 1965 neg r9 |
1686 DB 0F3h,0C3h ;repret | 1966 DB 0F3h,0C3h ;repret |
1687 | 1967 |
1688 global bn_from_montgomery | 1968 global bn_from_montgomery |
1689 | 1969 |
1690 ALIGN 32 | 1970 ALIGN 32 |
1691 bn_from_montgomery: | 1971 bn_from_montgomery: |
1692 test DWORD[48+rsp],7 | 1972 test DWORD[48+rsp],7 |
1693 jz NEAR bn_from_mont8x | 1973 jz NEAR bn_from_mont8x |
(...skipping 17 matching lines...) Expand all Loading... |
1711 | 1991 |
1712 | 1992 |
1713 DB 0x67 | 1993 DB 0x67 |
1714 mov rax,rsp | 1994 mov rax,rsp |
1715 push rbx | 1995 push rbx |
1716 push rbp | 1996 push rbp |
1717 push r12 | 1997 push r12 |
1718 push r13 | 1998 push r13 |
1719 push r14 | 1999 push r14 |
1720 push r15 | 2000 push r15 |
1721 » lea» rsp,[((-40))+rsp] | 2001 |
1722 » movaps» XMMWORD[rsp],xmm6 | |
1723 » movaps» XMMWORD[16+rsp],xmm7 | |
1724 DB» 0x67 | |
1725 » mov» r10d,r9d | |
1726 shl r9d,3 | 2002 shl r9d,3 |
1727 » shl» r10d,3+2 | 2003 » lea» r10,[r9*2+r9] |
1728 neg r9 | 2004 neg r9 |
1729 mov r8,QWORD[r8] | 2005 mov r8,QWORD[r8] |
1730 | 2006 |
1731 | 2007 |
1732 | 2008 |
1733 | 2009 |
1734 | 2010 |
1735 | 2011 |
1736 | 2012 |
1737 » lea» r11,[((-64))+r9*2+rsp] | 2013 |
1738 » sub» r11,rsi | 2014 » lea» r11,[((-320))+r9*2+rsp] |
| 2015 » sub» r11,rdi |
1739 and r11,4095 | 2016 and r11,4095 |
1740 cmp r10,r11 | 2017 cmp r10,r11 |
1741 jb NEAR $L$from_sp_alt | 2018 jb NEAR $L$from_sp_alt |
1742 sub rsp,r11 | 2019 sub rsp,r11 |
1743 » lea» rsp,[((-64))+r9*2+rsp] | 2020 » lea» rsp,[((-320))+r9*2+rsp] |
1744 jmp NEAR $L$from_sp_done | 2021 jmp NEAR $L$from_sp_done |
1745 | 2022 |
1746 ALIGN 32 | 2023 ALIGN 32 |
1747 $L$from_sp_alt: | 2024 $L$from_sp_alt: |
1748 » lea» r10,[((4096-64))+r9*2] | 2025 » lea» r10,[((4096-320))+r9*2] |
1749 » lea» rsp,[((-64))+r9*2+rsp] | 2026 » lea» rsp,[((-320))+r9*2+rsp] |
1750 sub r11,r10 | 2027 sub r11,r10 |
1751 mov r10,0 | 2028 mov r10,0 |
1752 cmovc r11,r10 | 2029 cmovc r11,r10 |
1753 sub rsp,r11 | 2030 sub rsp,r11 |
1754 $L$from_sp_done: | 2031 $L$from_sp_done: |
1755 and rsp,-64 | 2032 and rsp,-64 |
1756 mov r10,r9 | 2033 mov r10,r9 |
1757 neg r9 | 2034 neg r9 |
1758 | 2035 |
1759 | 2036 |
(...skipping 30 matching lines...) Expand all Loading... |
1790 movdqa XMMWORD[48+rax],xmm4 | 2067 movdqa XMMWORD[48+rax],xmm4 |
1791 lea rax,[64+rax] | 2068 lea rax,[64+rax] |
1792 sub r11,64 | 2069 sub r11,64 |
1793 jnz NEAR $L$mul_by_1 | 2070 jnz NEAR $L$mul_by_1 |
1794 | 2071 |
1795 DB 102,72,15,110,207 | 2072 DB 102,72,15,110,207 |
1796 DB 102,72,15,110,209 | 2073 DB 102,72,15,110,209 |
1797 DB 0x67 | 2074 DB 0x67 |
1798 mov rbp,rcx | 2075 mov rbp,rcx |
1799 DB 102,73,15,110,218 | 2076 DB 102,73,15,110,218 |
1800 » call» sqr8x_reduction | 2077 » call» __bn_sqr8x_reduction |
| 2078 » call» __bn_post4x_internal |
1801 | 2079 |
1802 pxor xmm0,xmm0 | 2080 pxor xmm0,xmm0 |
1803 lea rax,[48+rsp] | 2081 lea rax,[48+rsp] |
1804 mov rsi,QWORD[40+rsp] | 2082 mov rsi,QWORD[40+rsp] |
1805 jmp NEAR $L$from_mont_zero | 2083 jmp NEAR $L$from_mont_zero |
1806 | 2084 |
1807 ALIGN 32 | 2085 ALIGN 32 |
1808 $L$from_mont_zero: | 2086 $L$from_mont_zero: |
1809 movdqa XMMWORD[rax],xmm0 | 2087 movdqa XMMWORD[rax],xmm0 |
1810 movdqa XMMWORD[16+rax],xmm0 | 2088 movdqa XMMWORD[16+rax],xmm0 |
(...skipping 29 matching lines...) Expand all Loading... |
1840 mov QWORD[r8],rax | 2118 mov QWORD[r8],rax |
1841 lea r8,[256+r8] | 2119 lea r8,[256+r8] |
1842 sub edx,1 | 2120 sub edx,1 |
1843 jnz NEAR $L$scatter | 2121 jnz NEAR $L$scatter |
1844 $L$scatter_epilogue: | 2122 $L$scatter_epilogue: |
1845 DB 0F3h,0C3h ;repret | 2123 DB 0F3h,0C3h ;repret |
1846 | 2124 |
1847 | 2125 |
1848 global bn_gather5 | 2126 global bn_gather5 |
1849 | 2127 |
1850 ALIGN» 16 | 2128 ALIGN» 32 |
1851 bn_gather5: | 2129 bn_gather5: |
1852 $L$SEH_begin_bn_gather5: | 2130 $L$SEH_begin_bn_gather5: |
1853 | 2131 |
1854 DB» 0x48,0x83,0xec,0x28 | 2132 DB» 0x4c,0x8d,0x14,0x24 |
1855 DB» 0x0f,0x29,0x34,0x24 | 2133 DB» 0x48,0x81,0xec,0x08,0x01,0x00,0x00 |
1856 DB» 0x0f,0x29,0x7c,0x24,0x10 | 2134 » lea» rax,[$L$inc] |
1857 » mov» r11d,r9d | 2135 » and» rsp,-16 |
1858 » shr» r9d,3 | 2136 |
1859 » and» r11,7 | 2137 » movd» xmm5,r9d |
1860 » not» r9d | 2138 » movdqa» xmm0,XMMWORD[rax] |
1861 » lea» rax,[$L$magic_masks] | 2139 » movdqa» xmm1,XMMWORD[16+rax] |
1862 » and» r9d,3 | 2140 » lea» r11,[128+r8] |
1863 » lea» r8,[128+r11*8+r8] | 2141 » lea» rax,[128+rsp] |
1864 » movq» xmm4,QWORD[r9*8+rax] | 2142 |
1865 » movq» xmm5,QWORD[8+r9*8+rax] | 2143 » pshufd» xmm5,xmm5,0 |
1866 » movq» xmm6,QWORD[16+r9*8+rax] | 2144 » movdqa» xmm4,xmm1 |
1867 » movq» xmm7,QWORD[24+r9*8+rax] | 2145 » movdqa» xmm2,xmm1 |
| 2146 » paddd» xmm1,xmm0 |
| 2147 » pcmpeqd»xmm0,xmm5 |
| 2148 » movdqa» xmm3,xmm4 |
| 2149 |
| 2150 » paddd» xmm2,xmm1 |
| 2151 » pcmpeqd»xmm1,xmm5 |
| 2152 » movdqa» XMMWORD[(-128)+rax],xmm0 |
| 2153 » movdqa» xmm0,xmm4 |
| 2154 |
| 2155 » paddd» xmm3,xmm2 |
| 2156 » pcmpeqd»xmm2,xmm5 |
| 2157 » movdqa» XMMWORD[(-112)+rax],xmm1 |
| 2158 » movdqa» xmm1,xmm4 |
| 2159 |
| 2160 » paddd» xmm0,xmm3 |
| 2161 » pcmpeqd»xmm3,xmm5 |
| 2162 » movdqa» XMMWORD[(-96)+rax],xmm2 |
| 2163 » movdqa» xmm2,xmm4 |
| 2164 » paddd» xmm1,xmm0 |
| 2165 » pcmpeqd»xmm0,xmm5 |
| 2166 » movdqa» XMMWORD[(-80)+rax],xmm3 |
| 2167 » movdqa» xmm3,xmm4 |
| 2168 |
| 2169 » paddd» xmm2,xmm1 |
| 2170 » pcmpeqd»xmm1,xmm5 |
| 2171 » movdqa» XMMWORD[(-64)+rax],xmm0 |
| 2172 » movdqa» xmm0,xmm4 |
| 2173 |
| 2174 » paddd» xmm3,xmm2 |
| 2175 » pcmpeqd»xmm2,xmm5 |
| 2176 » movdqa» XMMWORD[(-48)+rax],xmm1 |
| 2177 » movdqa» xmm1,xmm4 |
| 2178 |
| 2179 » paddd» xmm0,xmm3 |
| 2180 » pcmpeqd»xmm3,xmm5 |
| 2181 » movdqa» XMMWORD[(-32)+rax],xmm2 |
| 2182 » movdqa» xmm2,xmm4 |
| 2183 » paddd» xmm1,xmm0 |
| 2184 » pcmpeqd»xmm0,xmm5 |
| 2185 » movdqa» XMMWORD[(-16)+rax],xmm3 |
| 2186 » movdqa» xmm3,xmm4 |
| 2187 |
| 2188 » paddd» xmm2,xmm1 |
| 2189 » pcmpeqd»xmm1,xmm5 |
| 2190 » movdqa» XMMWORD[rax],xmm0 |
| 2191 » movdqa» xmm0,xmm4 |
| 2192 |
| 2193 » paddd» xmm3,xmm2 |
| 2194 » pcmpeqd»xmm2,xmm5 |
| 2195 » movdqa» XMMWORD[16+rax],xmm1 |
| 2196 » movdqa» xmm1,xmm4 |
| 2197 |
| 2198 » paddd» xmm0,xmm3 |
| 2199 » pcmpeqd»xmm3,xmm5 |
| 2200 » movdqa» XMMWORD[32+rax],xmm2 |
| 2201 » movdqa» xmm2,xmm4 |
| 2202 » paddd» xmm1,xmm0 |
| 2203 » pcmpeqd»xmm0,xmm5 |
| 2204 » movdqa» XMMWORD[48+rax],xmm3 |
| 2205 » movdqa» xmm3,xmm4 |
| 2206 |
| 2207 » paddd» xmm2,xmm1 |
| 2208 » pcmpeqd»xmm1,xmm5 |
| 2209 » movdqa» XMMWORD[64+rax],xmm0 |
| 2210 » movdqa» xmm0,xmm4 |
| 2211 |
| 2212 » paddd» xmm3,xmm2 |
| 2213 » pcmpeqd»xmm2,xmm5 |
| 2214 » movdqa» XMMWORD[80+rax],xmm1 |
| 2215 » movdqa» xmm1,xmm4 |
| 2216 |
| 2217 » paddd» xmm0,xmm3 |
| 2218 » pcmpeqd»xmm3,xmm5 |
| 2219 » movdqa» XMMWORD[96+rax],xmm2 |
| 2220 » movdqa» xmm2,xmm4 |
| 2221 » movdqa» XMMWORD[112+rax],xmm3 |
1868 jmp NEAR $L$gather | 2222 jmp NEAR $L$gather |
1869 ALIGN» 16 | 2223 |
| 2224 ALIGN» 32 |
1870 $L$gather: | 2225 $L$gather: |
1871 » movq» xmm0,QWORD[(((-128)))+r8] | 2226 » pxor» xmm4,xmm4 |
1872 » movq» xmm1,QWORD[((-64))+r8] | 2227 » pxor» xmm5,xmm5 |
1873 » pand» xmm0,xmm4 | 2228 » movdqa» xmm0,XMMWORD[((-128))+r11] |
1874 » movq» xmm2,QWORD[r8] | 2229 » movdqa» xmm1,XMMWORD[((-112))+r11] |
1875 » pand» xmm1,xmm5 | 2230 » movdqa» xmm2,XMMWORD[((-96))+r11] |
1876 » movq» xmm3,QWORD[64+r8] | 2231 » pand» xmm0,XMMWORD[((-128))+rax] |
1877 » pand» xmm2,xmm6 | 2232 » movdqa» xmm3,XMMWORD[((-80))+r11] |
1878 » por» xmm0,xmm1 | 2233 » pand» xmm1,XMMWORD[((-112))+rax] |
1879 » pand» xmm3,xmm7 | 2234 » por» xmm4,xmm0 |
1880 DB» 0x67,0x67 | 2235 » pand» xmm2,XMMWORD[((-96))+rax] |
1881 » por» xmm0,xmm2 | 2236 » por» xmm5,xmm1 |
1882 » lea» r8,[256+r8] | 2237 » pand» xmm3,XMMWORD[((-80))+rax] |
1883 » por» xmm0,xmm3 | 2238 » por» xmm4,xmm2 |
1884 | 2239 » por» xmm5,xmm3 |
| 2240 » movdqa» xmm0,XMMWORD[((-64))+r11] |
| 2241 » movdqa» xmm1,XMMWORD[((-48))+r11] |
| 2242 » movdqa» xmm2,XMMWORD[((-32))+r11] |
| 2243 » pand» xmm0,XMMWORD[((-64))+rax] |
| 2244 » movdqa» xmm3,XMMWORD[((-16))+r11] |
| 2245 » pand» xmm1,XMMWORD[((-48))+rax] |
| 2246 » por» xmm4,xmm0 |
| 2247 » pand» xmm2,XMMWORD[((-32))+rax] |
| 2248 » por» xmm5,xmm1 |
| 2249 » pand» xmm3,XMMWORD[((-16))+rax] |
| 2250 » por» xmm4,xmm2 |
| 2251 » por» xmm5,xmm3 |
| 2252 » movdqa» xmm0,XMMWORD[r11] |
| 2253 » movdqa» xmm1,XMMWORD[16+r11] |
| 2254 » movdqa» xmm2,XMMWORD[32+r11] |
| 2255 » pand» xmm0,XMMWORD[rax] |
| 2256 » movdqa» xmm3,XMMWORD[48+r11] |
| 2257 » pand» xmm1,XMMWORD[16+rax] |
| 2258 » por» xmm4,xmm0 |
| 2259 » pand» xmm2,XMMWORD[32+rax] |
| 2260 » por» xmm5,xmm1 |
| 2261 » pand» xmm3,XMMWORD[48+rax] |
| 2262 » por» xmm4,xmm2 |
| 2263 » por» xmm5,xmm3 |
| 2264 » movdqa» xmm0,XMMWORD[64+r11] |
| 2265 » movdqa» xmm1,XMMWORD[80+r11] |
| 2266 » movdqa» xmm2,XMMWORD[96+r11] |
| 2267 » pand» xmm0,XMMWORD[64+rax] |
| 2268 » movdqa» xmm3,XMMWORD[112+r11] |
| 2269 » pand» xmm1,XMMWORD[80+rax] |
| 2270 » por» xmm4,xmm0 |
| 2271 » pand» xmm2,XMMWORD[96+rax] |
| 2272 » por» xmm5,xmm1 |
| 2273 » pand» xmm3,XMMWORD[112+rax] |
| 2274 » por» xmm4,xmm2 |
| 2275 » por» xmm5,xmm3 |
| 2276 » por» xmm4,xmm5 |
| 2277 » lea» r11,[256+r11] |
| 2278 » pshufd» xmm0,xmm4,0x4e |
| 2279 » por» xmm0,xmm4 |
1885 movq QWORD[rcx],xmm0 | 2280 movq QWORD[rcx],xmm0 |
1886 lea rcx,[8+rcx] | 2281 lea rcx,[8+rcx] |
1887 sub edx,1 | 2282 sub edx,1 |
1888 jnz NEAR $L$gather | 2283 jnz NEAR $L$gather |
1889 » movaps» xmm6,XMMWORD[rsp] | 2284 |
1890 » movaps» xmm7,XMMWORD[16+rsp] | 2285 » lea» rsp,[r10] |
1891 » lea» rsp,[40+rsp] | |
1892 DB 0F3h,0C3h ;repret | 2286 DB 0F3h,0C3h ;repret |
1893 $L$SEH_end_bn_gather5: | 2287 $L$SEH_end_bn_gather5: |
1894 | 2288 |
1895 ALIGN 64 | 2289 ALIGN 64 |
1896 $L$magic_masks: | 2290 $L$inc: |
1897 » DD» 0,0,0,0,0,0,-1,-1 | 2291 » DD» 0,0,1,1 |
1898 » DD» 0,0,0,0,0,0,0,0 | 2292 » DD» 2,2,2,2 |
1899 DB 77,111,110,116,103,111,109,101,114,121,32,77,117,108,116,105 | 2293 DB 77,111,110,116,103,111,109,101,114,121,32,77,117,108,116,105 |
1900 DB 112,108,105,99,97,116,105,111,110,32,119,105,116,104,32,115 | 2294 DB 112,108,105,99,97,116,105,111,110,32,119,105,116,104,32,115 |
1901 DB 99,97,116,116,101,114,47,103,97,116,104,101,114,32,102,111 | 2295 DB 99,97,116,116,101,114,47,103,97,116,104,101,114,32,102,111 |
1902 DB 114,32,120,56,54,95,54,52,44,32,67,82,89,80,84,79 | 2296 DB 114,32,120,56,54,95,54,52,44,32,67,82,89,80,84,79 |
1903 DB 71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111 | 2297 DB 71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111 |
1904 DB 112,101,110,115,115,108,46,111,114,103,62,0 | 2298 DB 112,101,110,115,115,108,46,111,114,103,62,0 |
1905 EXTERN __imp_RtlVirtualUnwind | 2299 EXTERN __imp_RtlVirtualUnwind |
1906 | 2300 |
1907 ALIGN 16 | 2301 ALIGN 16 |
1908 mul_handler: | 2302 mul_handler: |
(...skipping 21 matching lines...) Expand all Loading... |
1930 | 2324 |
1931 mov rax,QWORD[152+r8] | 2325 mov rax,QWORD[152+r8] |
1932 | 2326 |
1933 mov r10d,DWORD[4+r11] | 2327 mov r10d,DWORD[4+r11] |
1934 lea r10,[r10*1+rsi] | 2328 lea r10,[r10*1+rsi] |
1935 cmp rbx,r10 | 2329 cmp rbx,r10 |
1936 jae NEAR $L$common_seh_tail | 2330 jae NEAR $L$common_seh_tail |
1937 | 2331 |
1938 lea r10,[$L$mul_epilogue] | 2332 lea r10,[$L$mul_epilogue] |
1939 cmp rbx,r10 | 2333 cmp rbx,r10 |
1940 » jb» NEAR $L$body_40 | 2334 » ja» NEAR $L$body_40 |
1941 | 2335 |
1942 mov r10,QWORD[192+r8] | 2336 mov r10,QWORD[192+r8] |
1943 mov rax,QWORD[8+r10*8+rax] | 2337 mov rax,QWORD[8+r10*8+rax] |
| 2338 |
1944 jmp NEAR $L$body_proceed | 2339 jmp NEAR $L$body_proceed |
1945 | 2340 |
1946 $L$body_40: | 2341 $L$body_40: |
1947 mov rax,QWORD[40+rax] | 2342 mov rax,QWORD[40+rax] |
1948 $L$body_proceed: | 2343 $L$body_proceed: |
1949 | |
1950 movaps xmm0,XMMWORD[((-88))+rax] | |
1951 movaps xmm1,XMMWORD[((-72))+rax] | |
1952 | |
1953 mov rbx,QWORD[((-8))+rax] | 2344 mov rbx,QWORD[((-8))+rax] |
1954 mov rbp,QWORD[((-16))+rax] | 2345 mov rbp,QWORD[((-16))+rax] |
1955 mov r12,QWORD[((-24))+rax] | 2346 mov r12,QWORD[((-24))+rax] |
1956 mov r13,QWORD[((-32))+rax] | 2347 mov r13,QWORD[((-32))+rax] |
1957 mov r14,QWORD[((-40))+rax] | 2348 mov r14,QWORD[((-40))+rax] |
1958 mov r15,QWORD[((-48))+rax] | 2349 mov r15,QWORD[((-48))+rax] |
1959 mov QWORD[144+r8],rbx | 2350 mov QWORD[144+r8],rbx |
1960 mov QWORD[160+r8],rbp | 2351 mov QWORD[160+r8],rbp |
1961 mov QWORD[216+r8],r12 | 2352 mov QWORD[216+r8],r12 |
1962 mov QWORD[224+r8],r13 | 2353 mov QWORD[224+r8],r13 |
1963 mov QWORD[232+r8],r14 | 2354 mov QWORD[232+r8],r14 |
1964 mov QWORD[240+r8],r15 | 2355 mov QWORD[240+r8],r15 |
1965 movups XMMWORD[512+r8],xmm0 | |
1966 movups XMMWORD[528+r8],xmm1 | |
1967 | 2356 |
1968 $L$common_seh_tail: | 2357 $L$common_seh_tail: |
1969 mov rdi,QWORD[8+rax] | 2358 mov rdi,QWORD[8+rax] |
1970 mov rsi,QWORD[16+rax] | 2359 mov rsi,QWORD[16+rax] |
1971 mov QWORD[152+r8],rax | 2360 mov QWORD[152+r8],rax |
1972 mov QWORD[168+r8],rsi | 2361 mov QWORD[168+r8],rsi |
1973 mov QWORD[176+r8],rdi | 2362 mov QWORD[176+r8],rdi |
1974 | 2363 |
1975 mov rdi,QWORD[40+r9] | 2364 mov rdi,QWORD[40+r9] |
1976 mov rsi,r8 | 2365 mov rsi,r8 |
(...skipping 65 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
2042 DB 9,0,0,0 | 2431 DB 9,0,0,0 |
2043 DD mul_handler wrt ..imagebase | 2432 DD mul_handler wrt ..imagebase |
2044 DD $L$power5_body wrt ..imagebase,$L$power5_epilogue wrt ..imagebas
e | 2433 DD $L$power5_body wrt ..imagebase,$L$power5_epilogue wrt ..imagebas
e |
2045 ALIGN 8 | 2434 ALIGN 8 |
2046 $L$SEH_info_bn_from_mont8x: | 2435 $L$SEH_info_bn_from_mont8x: |
2047 DB 9,0,0,0 | 2436 DB 9,0,0,0 |
2048 DD mul_handler wrt ..imagebase | 2437 DD mul_handler wrt ..imagebase |
2049 DD $L$from_body wrt ..imagebase,$L$from_epilogue wrt ..imagebase | 2438 DD $L$from_body wrt ..imagebase,$L$from_epilogue wrt ..imagebase |
2050 ALIGN 8 | 2439 ALIGN 8 |
2051 $L$SEH_info_bn_gather5: | 2440 $L$SEH_info_bn_gather5: |
2052 DB» 0x01,0x0d,0x05,0x00 | 2441 DB» 0x01,0x0b,0x03,0x0a |
2053 DB» 0x0d,0x78,0x01,0x00 | 2442 DB» 0x0b,0x01,0x21,0x00 |
2054 DB» 0x08,0x68,0x00,0x00 | 2443 DB» 0x04,0xa3,0x00,0x00 |
2055 DB» 0x04,0x42,0x00,0x00 | |
2056 ALIGN 8 | 2444 ALIGN 8 |
OLD | NEW |