OLD | NEW |
1 OPTION» DOTNAME | 1 default»rel |
2 .text$» SEGMENT ALIGN(256) 'CODE' | 2 %define XMMWORD |
| 3 %define YMMWORD |
| 4 %define ZMMWORD |
| 5 section».text code align=64 |
3 | 6 |
4 EXTERN OPENSSL_ia32cap_P:NEAR | |
5 | 7 |
6 PUBLIC» bn_mul_mont | 8 EXTERN» OPENSSL_ia32cap_P |
| 9 |
| 10 global» bn_mul_mont |
7 | 11 |
8 ALIGN 16 | 12 ALIGN 16 |
9 bn_mul_mont» PROC PUBLIC | 13 bn_mul_mont: |
10 » mov» QWORD PTR[8+rsp],rdi» ;WIN64 prologue | 14 » mov» QWORD[8+rsp],rdi» ;WIN64 prologue |
11 » mov» QWORD PTR[16+rsp],rsi | 15 » mov» QWORD[16+rsp],rsi |
12 mov rax,rsp | 16 mov rax,rsp |
13 $L$SEH_begin_bn_mul_mont:: | 17 $L$SEH_begin_bn_mul_mont: |
14 mov rdi,rcx | 18 mov rdi,rcx |
15 mov rsi,rdx | 19 mov rsi,rdx |
16 mov rdx,r8 | 20 mov rdx,r8 |
17 mov rcx,r9 | 21 mov rcx,r9 |
18 » mov» r8,QWORD PTR[40+rsp] | 22 » mov» r8,QWORD[40+rsp] |
19 » mov» r9,QWORD PTR[48+rsp] | 23 » mov» r9,QWORD[48+rsp] |
20 | 24 |
21 | 25 |
22 test r9d,3 | 26 test r9d,3 |
23 » jnz» $L$mul_enter | 27 » jnz» NEAR $L$mul_enter |
24 cmp r9d,8 | 28 cmp r9d,8 |
25 » jb» $L$mul_enter | 29 » jb» NEAR $L$mul_enter |
26 cmp rdx,rsi | 30 cmp rdx,rsi |
27 » jne» $L$mul4x_enter | 31 » jne» NEAR $L$mul4x_enter |
28 test r9d,7 | 32 test r9d,7 |
29 » jz» $L$sqr8x_enter | 33 » jz» NEAR $L$sqr8x_enter |
30 » jmp» $L$mul4x_enter | 34 » jmp» NEAR $L$mul4x_enter |
31 | 35 |
32 ALIGN 16 | 36 ALIGN 16 |
33 $L$mul_enter:: | 37 $L$mul_enter: |
34 push rbx | 38 push rbx |
35 push rbp | 39 push rbp |
36 push r12 | 40 push r12 |
37 push r13 | 41 push r13 |
38 push r14 | 42 push r14 |
39 push r15 | 43 push r15 |
40 | 44 |
41 mov r9d,r9d | 45 mov r9d,r9d |
42 » lea» r10,QWORD PTR[2+r9] | 46 » lea» r10,[2+r9] |
43 mov r11,rsp | 47 mov r11,rsp |
44 neg r10 | 48 neg r10 |
45 » lea» rsp,QWORD PTR[r10*8+rsp] | 49 » lea» rsp,[r10*8+rsp] |
46 and rsp,-1024 | 50 and rsp,-1024 |
47 | 51 |
48 » mov» QWORD PTR[8+r9*8+rsp],r11 | 52 » mov» QWORD[8+r9*8+rsp],r11 |
49 $L$mul_body:: | 53 $L$mul_body: |
50 mov r12,rdx | 54 mov r12,rdx |
51 » mov» r8,QWORD PTR[r8] | 55 » mov» r8,QWORD[r8] |
52 » mov» rbx,QWORD PTR[r12] | 56 » mov» rbx,QWORD[r12] |
53 » mov» rax,QWORD PTR[rsi] | 57 » mov» rax,QWORD[rsi] |
54 | 58 |
55 xor r14,r14 | 59 xor r14,r14 |
56 xor r15,r15 | 60 xor r15,r15 |
57 | 61 |
58 mov rbp,r8 | 62 mov rbp,r8 |
59 mul rbx | 63 mul rbx |
60 mov r10,rax | 64 mov r10,rax |
61 » mov» rax,QWORD PTR[rcx] | 65 » mov» rax,QWORD[rcx] |
62 | 66 |
63 imul rbp,r10 | 67 imul rbp,r10 |
64 mov r11,rdx | 68 mov r11,rdx |
65 | 69 |
66 mul rbp | 70 mul rbp |
67 add r10,rax | 71 add r10,rax |
68 » mov» rax,QWORD PTR[8+rsi] | 72 » mov» rax,QWORD[8+rsi] |
69 adc rdx,0 | 73 adc rdx,0 |
70 mov r13,rdx | 74 mov r13,rdx |
71 | 75 |
72 » lea» r15,QWORD PTR[1+r15] | 76 » lea» r15,[1+r15] |
73 » jmp» $L$1st_enter | 77 » jmp» NEAR $L$1st_enter |
74 | 78 |
75 ALIGN 16 | 79 ALIGN 16 |
76 $L$1st:: | 80 $L$1st: |
77 add r13,rax | 81 add r13,rax |
78 » mov» rax,QWORD PTR[r15*8+rsi] | 82 » mov» rax,QWORD[r15*8+rsi] |
79 adc rdx,0 | 83 adc rdx,0 |
80 add r13,r11 | 84 add r13,r11 |
81 mov r11,r10 | 85 mov r11,r10 |
82 adc rdx,0 | 86 adc rdx,0 |
83 » mov» QWORD PTR[((-16))+r15*8+rsp],r13 | 87 » mov» QWORD[((-16))+r15*8+rsp],r13 |
84 mov r13,rdx | 88 mov r13,rdx |
85 | 89 |
86 $L$1st_enter:: | 90 $L$1st_enter: |
87 mul rbx | 91 mul rbx |
88 add r11,rax | 92 add r11,rax |
89 » mov» rax,QWORD PTR[r15*8+rcx] | 93 » mov» rax,QWORD[r15*8+rcx] |
90 adc rdx,0 | 94 adc rdx,0 |
91 » lea» r15,QWORD PTR[1+r15] | 95 » lea» r15,[1+r15] |
92 mov r10,rdx | 96 mov r10,rdx |
93 | 97 |
94 mul rbp | 98 mul rbp |
95 cmp r15,r9 | 99 cmp r15,r9 |
96 » jne» $L$1st | 100 » jne» NEAR $L$1st |
97 | 101 |
98 add r13,rax | 102 add r13,rax |
99 » mov» rax,QWORD PTR[rsi] | 103 » mov» rax,QWORD[rsi] |
100 adc rdx,0 | 104 adc rdx,0 |
101 add r13,r11 | 105 add r13,r11 |
102 adc rdx,0 | 106 adc rdx,0 |
103 » mov» QWORD PTR[((-16))+r15*8+rsp],r13 | 107 » mov» QWORD[((-16))+r15*8+rsp],r13 |
104 mov r13,rdx | 108 mov r13,rdx |
105 mov r11,r10 | 109 mov r11,r10 |
106 | 110 |
107 xor rdx,rdx | 111 xor rdx,rdx |
108 add r13,r11 | 112 add r13,r11 |
109 adc rdx,0 | 113 adc rdx,0 |
110 » mov» QWORD PTR[((-8))+r9*8+rsp],r13 | 114 » mov» QWORD[((-8))+r9*8+rsp],r13 |
111 » mov» QWORD PTR[r9*8+rsp],rdx | 115 » mov» QWORD[r9*8+rsp],rdx |
112 | 116 |
113 » lea» r14,QWORD PTR[1+r14] | 117 » lea» r14,[1+r14] |
114 » jmp» $L$outer | 118 » jmp» NEAR $L$outer |
115 ALIGN 16 | 119 ALIGN 16 |
116 $L$outer:: | 120 $L$outer: |
117 » mov» rbx,QWORD PTR[r14*8+r12] | 121 » mov» rbx,QWORD[r14*8+r12] |
118 xor r15,r15 | 122 xor r15,r15 |
119 mov rbp,r8 | 123 mov rbp,r8 |
120 » mov» r10,QWORD PTR[rsp] | 124 » mov» r10,QWORD[rsp] |
121 mul rbx | 125 mul rbx |
122 add r10,rax | 126 add r10,rax |
123 » mov» rax,QWORD PTR[rcx] | 127 » mov» rax,QWORD[rcx] |
124 adc rdx,0 | 128 adc rdx,0 |
125 | 129 |
126 imul rbp,r10 | 130 imul rbp,r10 |
127 mov r11,rdx | 131 mov r11,rdx |
128 | 132 |
129 mul rbp | 133 mul rbp |
130 add r10,rax | 134 add r10,rax |
131 » mov» rax,QWORD PTR[8+rsi] | 135 » mov» rax,QWORD[8+rsi] |
132 adc rdx,0 | 136 adc rdx,0 |
133 » mov» r10,QWORD PTR[8+rsp] | 137 » mov» r10,QWORD[8+rsp] |
134 mov r13,rdx | 138 mov r13,rdx |
135 | 139 |
136 » lea» r15,QWORD PTR[1+r15] | 140 » lea» r15,[1+r15] |
137 » jmp» $L$inner_enter | 141 » jmp» NEAR $L$inner_enter |
138 | 142 |
139 ALIGN 16 | 143 ALIGN 16 |
140 $L$inner:: | 144 $L$inner: |
141 add r13,rax | 145 add r13,rax |
142 » mov» rax,QWORD PTR[r15*8+rsi] | 146 » mov» rax,QWORD[r15*8+rsi] |
143 adc rdx,0 | 147 adc rdx,0 |
144 add r13,r10 | 148 add r13,r10 |
145 » mov» r10,QWORD PTR[r15*8+rsp] | 149 » mov» r10,QWORD[r15*8+rsp] |
146 adc rdx,0 | 150 adc rdx,0 |
147 » mov» QWORD PTR[((-16))+r15*8+rsp],r13 | 151 » mov» QWORD[((-16))+r15*8+rsp],r13 |
148 mov r13,rdx | 152 mov r13,rdx |
149 | 153 |
150 $L$inner_enter:: | 154 $L$inner_enter: |
151 mul rbx | 155 mul rbx |
152 add r11,rax | 156 add r11,rax |
153 » mov» rax,QWORD PTR[r15*8+rcx] | 157 » mov» rax,QWORD[r15*8+rcx] |
154 adc rdx,0 | 158 adc rdx,0 |
155 add r10,r11 | 159 add r10,r11 |
156 mov r11,rdx | 160 mov r11,rdx |
157 adc r11,0 | 161 adc r11,0 |
158 » lea» r15,QWORD PTR[1+r15] | 162 » lea» r15,[1+r15] |
159 | 163 |
160 mul rbp | 164 mul rbp |
161 cmp r15,r9 | 165 cmp r15,r9 |
162 » jne» $L$inner | 166 » jne» NEAR $L$inner |
163 | 167 |
164 add r13,rax | 168 add r13,rax |
165 » mov» rax,QWORD PTR[rsi] | 169 » mov» rax,QWORD[rsi] |
166 adc rdx,0 | 170 adc rdx,0 |
167 add r13,r10 | 171 add r13,r10 |
168 » mov» r10,QWORD PTR[r15*8+rsp] | 172 » mov» r10,QWORD[r15*8+rsp] |
169 adc rdx,0 | 173 adc rdx,0 |
170 » mov» QWORD PTR[((-16))+r15*8+rsp],r13 | 174 » mov» QWORD[((-16))+r15*8+rsp],r13 |
171 mov r13,rdx | 175 mov r13,rdx |
172 | 176 |
173 xor rdx,rdx | 177 xor rdx,rdx |
174 add r13,r11 | 178 add r13,r11 |
175 adc rdx,0 | 179 adc rdx,0 |
176 add r13,r10 | 180 add r13,r10 |
177 adc rdx,0 | 181 adc rdx,0 |
178 » mov» QWORD PTR[((-8))+r9*8+rsp],r13 | 182 » mov» QWORD[((-8))+r9*8+rsp],r13 |
179 » mov» QWORD PTR[r9*8+rsp],rdx | 183 » mov» QWORD[r9*8+rsp],rdx |
180 | 184 |
181 » lea» r14,QWORD PTR[1+r14] | 185 » lea» r14,[1+r14] |
182 cmp r14,r9 | 186 cmp r14,r9 |
183 » jb» $L$outer | 187 » jb» NEAR $L$outer |
184 | 188 |
185 xor r14,r14 | 189 xor r14,r14 |
186 » mov» rax,QWORD PTR[rsp] | 190 » mov» rax,QWORD[rsp] |
187 » lea» rsi,QWORD PTR[rsp] | 191 » lea» rsi,[rsp] |
188 mov r15,r9 | 192 mov r15,r9 |
189 » jmp» $L$sub | 193 » jmp» NEAR $L$sub |
190 ALIGN 16 | 194 ALIGN 16 |
191 $L$sub::» sbb» rax,QWORD PTR[r14*8+rcx] | 195 $L$sub:»sbb» rax,QWORD[r14*8+rcx] |
192 » mov» QWORD PTR[r14*8+rdi],rax | 196 » mov» QWORD[r14*8+rdi],rax |
193 » mov» rax,QWORD PTR[8+r14*8+rsi] | 197 » mov» rax,QWORD[8+r14*8+rsi] |
194 » lea» r14,QWORD PTR[1+r14] | 198 » lea» r14,[1+r14] |
195 dec r15 | 199 dec r15 |
196 » jnz» $L$sub | 200 » jnz» NEAR $L$sub |
197 | 201 |
198 sbb rax,0 | 202 sbb rax,0 |
199 xor r14,r14 | 203 xor r14,r14 |
200 mov r15,r9 | 204 mov r15,r9 |
201 ALIGN 16 | 205 ALIGN 16 |
202 $L$copy:: | 206 $L$copy: |
203 » mov» rsi,QWORD PTR[r14*8+rsp] | 207 » mov» rsi,QWORD[r14*8+rsp] |
204 » mov» rcx,QWORD PTR[r14*8+rdi] | 208 » mov» rcx,QWORD[r14*8+rdi] |
205 xor rsi,rcx | 209 xor rsi,rcx |
206 and rsi,rax | 210 and rsi,rax |
207 xor rsi,rcx | 211 xor rsi,rcx |
208 » mov» QWORD PTR[r14*8+rsp],r14 | 212 » mov» QWORD[r14*8+rsp],r14 |
209 » mov» QWORD PTR[r14*8+rdi],rsi | 213 » mov» QWORD[r14*8+rdi],rsi |
210 » lea» r14,QWORD PTR[1+r14] | 214 » lea» r14,[1+r14] |
211 sub r15,1 | 215 sub r15,1 |
212 » jnz» $L$copy | 216 » jnz» NEAR $L$copy |
213 | 217 |
214 » mov» rsi,QWORD PTR[8+r9*8+rsp] | 218 » mov» rsi,QWORD[8+r9*8+rsp] |
215 mov rax,1 | 219 mov rax,1 |
216 » mov» r15,QWORD PTR[rsi] | 220 » mov» r15,QWORD[rsi] |
217 » mov» r14,QWORD PTR[8+rsi] | 221 » mov» r14,QWORD[8+rsi] |
218 » mov» r13,QWORD PTR[16+rsi] | 222 » mov» r13,QWORD[16+rsi] |
219 » mov» r12,QWORD PTR[24+rsi] | 223 » mov» r12,QWORD[24+rsi] |
220 » mov» rbp,QWORD PTR[32+rsi] | 224 » mov» rbp,QWORD[32+rsi] |
221 » mov» rbx,QWORD PTR[40+rsi] | 225 » mov» rbx,QWORD[40+rsi] |
222 » lea» rsp,QWORD PTR[48+rsi] | 226 » lea» rsp,[48+rsi] |
223 $L$mul_epilogue:: | 227 $L$mul_epilogue: |
224 » mov» rdi,QWORD PTR[8+rsp]» ;WIN64 epilogue | 228 » mov» rdi,QWORD[8+rsp]» ;WIN64 epilogue |
225 » mov» rsi,QWORD PTR[16+rsp] | 229 » mov» rsi,QWORD[16+rsp] |
226 DB 0F3h,0C3h ;repret | 230 DB 0F3h,0C3h ;repret |
227 $L$SEH_end_bn_mul_mont:: | 231 $L$SEH_end_bn_mul_mont: |
228 bn_mul_mont» ENDP | |
229 | 232 |
230 ALIGN 16 | 233 ALIGN 16 |
231 bn_mul4x_mont» PROC PRIVATE | 234 bn_mul4x_mont: |
232 » mov» QWORD PTR[8+rsp],rdi» ;WIN64 prologue | 235 » mov» QWORD[8+rsp],rdi» ;WIN64 prologue |
233 » mov» QWORD PTR[16+rsp],rsi | 236 » mov» QWORD[16+rsp],rsi |
234 mov rax,rsp | 237 mov rax,rsp |
235 $L$SEH_begin_bn_mul4x_mont:: | 238 $L$SEH_begin_bn_mul4x_mont: |
236 mov rdi,rcx | 239 mov rdi,rcx |
237 mov rsi,rdx | 240 mov rsi,rdx |
238 mov rdx,r8 | 241 mov rdx,r8 |
239 mov rcx,r9 | 242 mov rcx,r9 |
240 » mov» r8,QWORD PTR[40+rsp] | 243 » mov» r8,QWORD[40+rsp] |
241 » mov» r9,QWORD PTR[48+rsp] | 244 » mov» r9,QWORD[48+rsp] |
242 | 245 |
243 | 246 |
244 $L$mul4x_enter:: | 247 $L$mul4x_enter: |
245 push rbx | 248 push rbx |
246 push rbp | 249 push rbp |
247 push r12 | 250 push r12 |
248 push r13 | 251 push r13 |
249 push r14 | 252 push r14 |
250 push r15 | 253 push r15 |
251 | 254 |
252 mov r9d,r9d | 255 mov r9d,r9d |
253 » lea» r10,QWORD PTR[4+r9] | 256 » lea» r10,[4+r9] |
254 mov r11,rsp | 257 mov r11,rsp |
255 neg r10 | 258 neg r10 |
256 » lea» rsp,QWORD PTR[r10*8+rsp] | 259 » lea» rsp,[r10*8+rsp] |
257 and rsp,-1024 | 260 and rsp,-1024 |
258 | 261 |
259 » mov» QWORD PTR[8+r9*8+rsp],r11 | 262 » mov» QWORD[8+r9*8+rsp],r11 |
260 $L$mul4x_body:: | 263 $L$mul4x_body: |
261 » mov» QWORD PTR[16+r9*8+rsp],rdi | 264 » mov» QWORD[16+r9*8+rsp],rdi |
262 mov r12,rdx | 265 mov r12,rdx |
263 » mov» r8,QWORD PTR[r8] | 266 » mov» r8,QWORD[r8] |
264 » mov» rbx,QWORD PTR[r12] | 267 » mov» rbx,QWORD[r12] |
265 » mov» rax,QWORD PTR[rsi] | 268 » mov» rax,QWORD[rsi] |
266 | 269 |
267 xor r14,r14 | 270 xor r14,r14 |
268 xor r15,r15 | 271 xor r15,r15 |
269 | 272 |
270 mov rbp,r8 | 273 mov rbp,r8 |
271 mul rbx | 274 mul rbx |
272 mov r10,rax | 275 mov r10,rax |
273 » mov» rax,QWORD PTR[rcx] | 276 » mov» rax,QWORD[rcx] |
274 | 277 |
275 imul rbp,r10 | 278 imul rbp,r10 |
276 mov r11,rdx | 279 mov r11,rdx |
277 | 280 |
278 mul rbp | 281 mul rbp |
279 add r10,rax | 282 add r10,rax |
280 » mov» rax,QWORD PTR[8+rsi] | 283 » mov» rax,QWORD[8+rsi] |
281 adc rdx,0 | 284 adc rdx,0 |
282 mov rdi,rdx | 285 mov rdi,rdx |
283 | 286 |
284 mul rbx | 287 mul rbx |
285 add r11,rax | 288 add r11,rax |
286 » mov» rax,QWORD PTR[8+rcx] | 289 » mov» rax,QWORD[8+rcx] |
287 adc rdx,0 | 290 adc rdx,0 |
288 mov r10,rdx | 291 mov r10,rdx |
289 | 292 |
290 mul rbp | 293 mul rbp |
291 add rdi,rax | 294 add rdi,rax |
292 » mov» rax,QWORD PTR[16+rsi] | 295 » mov» rax,QWORD[16+rsi] |
293 adc rdx,0 | 296 adc rdx,0 |
294 add rdi,r11 | 297 add rdi,r11 |
295 » lea» r15,QWORD PTR[4+r15] | 298 » lea» r15,[4+r15] |
296 adc rdx,0 | 299 adc rdx,0 |
297 » mov» QWORD PTR[rsp],rdi | 300 » mov» QWORD[rsp],rdi |
298 mov r13,rdx | 301 mov r13,rdx |
299 » jmp» $L$1st4x | 302 » jmp» NEAR $L$1st4x |
300 ALIGN 16 | 303 ALIGN 16 |
301 $L$1st4x:: | 304 $L$1st4x: |
302 mul rbx | 305 mul rbx |
303 add r10,rax | 306 add r10,rax |
304 » mov» rax,QWORD PTR[((-16))+r15*8+rcx] | 307 » mov» rax,QWORD[((-16))+r15*8+rcx] |
305 adc rdx,0 | 308 adc rdx,0 |
306 mov r11,rdx | 309 mov r11,rdx |
307 | 310 |
308 mul rbp | 311 mul rbp |
309 add r13,rax | 312 add r13,rax |
310 » mov» rax,QWORD PTR[((-8))+r15*8+rsi] | 313 » mov» rax,QWORD[((-8))+r15*8+rsi] |
311 adc rdx,0 | 314 adc rdx,0 |
312 add r13,r10 | 315 add r13,r10 |
313 adc rdx,0 | 316 adc rdx,0 |
314 » mov» QWORD PTR[((-24))+r15*8+rsp],r13 | 317 » mov» QWORD[((-24))+r15*8+rsp],r13 |
315 mov rdi,rdx | 318 mov rdi,rdx |
316 | 319 |
317 mul rbx | 320 mul rbx |
318 add r11,rax | 321 add r11,rax |
319 » mov» rax,QWORD PTR[((-8))+r15*8+rcx] | 322 » mov» rax,QWORD[((-8))+r15*8+rcx] |
320 adc rdx,0 | 323 adc rdx,0 |
321 mov r10,rdx | 324 mov r10,rdx |
322 | 325 |
323 mul rbp | 326 mul rbp |
324 add rdi,rax | 327 add rdi,rax |
325 » mov» rax,QWORD PTR[r15*8+rsi] | 328 » mov» rax,QWORD[r15*8+rsi] |
326 adc rdx,0 | 329 adc rdx,0 |
327 add rdi,r11 | 330 add rdi,r11 |
328 adc rdx,0 | 331 adc rdx,0 |
329 » mov» QWORD PTR[((-16))+r15*8+rsp],rdi | 332 » mov» QWORD[((-16))+r15*8+rsp],rdi |
330 mov r13,rdx | 333 mov r13,rdx |
331 | 334 |
332 mul rbx | 335 mul rbx |
333 add r10,rax | 336 add r10,rax |
334 » mov» rax,QWORD PTR[r15*8+rcx] | 337 » mov» rax,QWORD[r15*8+rcx] |
335 adc rdx,0 | 338 adc rdx,0 |
336 mov r11,rdx | 339 mov r11,rdx |
337 | 340 |
338 mul rbp | 341 mul rbp |
339 add r13,rax | 342 add r13,rax |
340 » mov» rax,QWORD PTR[8+r15*8+rsi] | 343 » mov» rax,QWORD[8+r15*8+rsi] |
341 adc rdx,0 | 344 adc rdx,0 |
342 add r13,r10 | 345 add r13,r10 |
343 adc rdx,0 | 346 adc rdx,0 |
344 » mov» QWORD PTR[((-8))+r15*8+rsp],r13 | 347 » mov» QWORD[((-8))+r15*8+rsp],r13 |
345 mov rdi,rdx | 348 mov rdi,rdx |
346 | 349 |
347 mul rbx | 350 mul rbx |
348 add r11,rax | 351 add r11,rax |
349 » mov» rax,QWORD PTR[8+r15*8+rcx] | 352 » mov» rax,QWORD[8+r15*8+rcx] |
350 adc rdx,0 | 353 adc rdx,0 |
351 » lea» r15,QWORD PTR[4+r15] | 354 » lea» r15,[4+r15] |
352 mov r10,rdx | 355 mov r10,rdx |
353 | 356 |
354 mul rbp | 357 mul rbp |
355 add rdi,rax | 358 add rdi,rax |
356 » mov» rax,QWORD PTR[((-16))+r15*8+rsi] | 359 » mov» rax,QWORD[((-16))+r15*8+rsi] |
357 adc rdx,0 | 360 adc rdx,0 |
358 add rdi,r11 | 361 add rdi,r11 |
359 adc rdx,0 | 362 adc rdx,0 |
360 » mov» QWORD PTR[((-32))+r15*8+rsp],rdi | 363 » mov» QWORD[((-32))+r15*8+rsp],rdi |
361 mov r13,rdx | 364 mov r13,rdx |
362 cmp r15,r9 | 365 cmp r15,r9 |
363 » jb» $L$1st4x | 366 » jb» NEAR $L$1st4x |
364 | 367 |
365 mul rbx | 368 mul rbx |
366 add r10,rax | 369 add r10,rax |
367 » mov» rax,QWORD PTR[((-16))+r15*8+rcx] | 370 » mov» rax,QWORD[((-16))+r15*8+rcx] |
368 adc rdx,0 | 371 adc rdx,0 |
369 mov r11,rdx | 372 mov r11,rdx |
370 | 373 |
371 mul rbp | 374 mul rbp |
372 add r13,rax | 375 add r13,rax |
373 » mov» rax,QWORD PTR[((-8))+r15*8+rsi] | 376 » mov» rax,QWORD[((-8))+r15*8+rsi] |
374 adc rdx,0 | 377 adc rdx,0 |
375 add r13,r10 | 378 add r13,r10 |
376 adc rdx,0 | 379 adc rdx,0 |
377 » mov» QWORD PTR[((-24))+r15*8+rsp],r13 | 380 » mov» QWORD[((-24))+r15*8+rsp],r13 |
378 mov rdi,rdx | 381 mov rdi,rdx |
379 | 382 |
380 mul rbx | 383 mul rbx |
381 add r11,rax | 384 add r11,rax |
382 » mov» rax,QWORD PTR[((-8))+r15*8+rcx] | 385 » mov» rax,QWORD[((-8))+r15*8+rcx] |
383 adc rdx,0 | 386 adc rdx,0 |
384 mov r10,rdx | 387 mov r10,rdx |
385 | 388 |
386 mul rbp | 389 mul rbp |
387 add rdi,rax | 390 add rdi,rax |
388 » mov» rax,QWORD PTR[rsi] | 391 » mov» rax,QWORD[rsi] |
389 adc rdx,0 | 392 adc rdx,0 |
390 add rdi,r11 | 393 add rdi,r11 |
391 adc rdx,0 | 394 adc rdx,0 |
392 » mov» QWORD PTR[((-16))+r15*8+rsp],rdi | 395 » mov» QWORD[((-16))+r15*8+rsp],rdi |
393 mov r13,rdx | 396 mov r13,rdx |
394 | 397 |
395 xor rdi,rdi | 398 xor rdi,rdi |
396 add r13,r10 | 399 add r13,r10 |
397 adc rdi,0 | 400 adc rdi,0 |
398 » mov» QWORD PTR[((-8))+r15*8+rsp],r13 | 401 » mov» QWORD[((-8))+r15*8+rsp],r13 |
399 » mov» QWORD PTR[r15*8+rsp],rdi | 402 » mov» QWORD[r15*8+rsp],rdi |
400 | 403 |
401 » lea» r14,QWORD PTR[1+r14] | 404 » lea» r14,[1+r14] |
402 ALIGN 4 | 405 ALIGN 4 |
403 $L$outer4x:: | 406 $L$outer4x: |
404 » mov» rbx,QWORD PTR[r14*8+r12] | 407 » mov» rbx,QWORD[r14*8+r12] |
405 xor r15,r15 | 408 xor r15,r15 |
406 » mov» r10,QWORD PTR[rsp] | 409 » mov» r10,QWORD[rsp] |
407 mov rbp,r8 | 410 mov rbp,r8 |
408 mul rbx | 411 mul rbx |
409 add r10,rax | 412 add r10,rax |
410 » mov» rax,QWORD PTR[rcx] | 413 » mov» rax,QWORD[rcx] |
411 adc rdx,0 | 414 adc rdx,0 |
412 | 415 |
413 imul rbp,r10 | 416 imul rbp,r10 |
414 mov r11,rdx | 417 mov r11,rdx |
415 | 418 |
416 mul rbp | 419 mul rbp |
417 add r10,rax | 420 add r10,rax |
418 » mov» rax,QWORD PTR[8+rsi] | 421 » mov» rax,QWORD[8+rsi] |
419 » adc» rdx,0 | 422 » adc» rdx,0 |
420 » mov» rdi,rdx | 423 » mov» rdi,rdx |
421 | 424 |
422 » mul» rbx | 425 » mul» rbx |
423 » add» r11,rax | 426 » add» r11,rax |
424 » mov» rax,QWORD PTR[8+rcx] | 427 » mov» rax,QWORD[8+rcx] |
425 » adc» rdx,0 | 428 » adc» rdx,0 |
426 » add» r11,QWORD PTR[8+rsp] | 429 » add» r11,QWORD[8+rsp] |
427 » adc» rdx,0 | 430 » adc» rdx,0 |
428 » mov» r10,rdx | 431 » mov» r10,rdx |
429 | 432 |
430 » mul» rbp | 433 » mul» rbp |
431 » add» rdi,rax | 434 » add» rdi,rax |
432 » mov» rax,QWORD PTR[16+rsi] | 435 » mov» rax,QWORD[16+rsi] |
433 » adc» rdx,0 | 436 » adc» rdx,0 |
434 » add» rdi,r11 | 437 » add» rdi,r11 |
435 » lea» r15,QWORD PTR[4+r15] | 438 » lea» r15,[4+r15] |
436 » adc» rdx,0 | 439 » adc» rdx,0 |
437 » mov» QWORD PTR[rsp],rdi | 440 » mov» QWORD[rsp],rdi |
438 » mov» r13,rdx | 441 » mov» r13,rdx |
439 » jmp» $L$inner4x | 442 » jmp» NEAR $L$inner4x |
440 ALIGN 16 | 443 ALIGN 16 |
441 $L$inner4x:: | 444 $L$inner4x: |
442 » mul» rbx | 445 » mul» rbx |
443 » add» r10,rax | 446 » add» r10,rax |
444 » mov» rax,QWORD PTR[((-16))+r15*8+rcx] | 447 » mov» rax,QWORD[((-16))+r15*8+rcx] |
445 » adc» rdx,0 | 448 » adc» rdx,0 |
446 » add» r10,QWORD PTR[((-16))+r15*8+rsp] | 449 » add» r10,QWORD[((-16))+r15*8+rsp] |
447 adc rdx,0 | 450 adc rdx,0 |
448 mov r11,rdx | 451 mov r11,rdx |
449 | 452 |
450 mul rbp | 453 mul rbp |
451 add r13,rax | 454 add r13,rax |
452 » mov» rax,QWORD PTR[((-8))+r15*8+rsi] | 455 » mov» rax,QWORD[((-8))+r15*8+rsi] |
453 » adc» rdx,0 | 456 » adc» rdx,0 |
454 » add» r13,r10 | 457 » add» r13,r10 |
455 » adc» rdx,0 | 458 » adc» rdx,0 |
456 » mov» QWORD PTR[((-24))+r15*8+rsp],r13 | 459 » mov» QWORD[((-24))+r15*8+rsp],r13 |
457 » mov» rdi,rdx | 460 » mov» rdi,rdx |
458 | 461 |
459 » mul» rbx | 462 » mul» rbx |
460 » add» r11,rax | 463 » add» r11,rax |
461 » mov» rax,QWORD PTR[((-8))+r15*8+rcx] | 464 » mov» rax,QWORD[((-8))+r15*8+rcx] |
462 » adc» rdx,0 | 465 » adc» rdx,0 |
463 » add» r11,QWORD PTR[((-8))+r15*8+rsp] | 466 » add» r11,QWORD[((-8))+r15*8+rsp] |
464 » adc» rdx,0 | 467 » adc» rdx,0 |
465 » mov» r10,rdx | 468 » mov» r10,rdx |
466 | 469 |
467 » mul» rbp | 470 » mul» rbp |
468 » add» rdi,rax | 471 » add» rdi,rax |
469 » mov» rax,QWORD PTR[r15*8+rsi] | 472 » mov» rax,QWORD[r15*8+rsi] |
470 » adc» rdx,0 | 473 » adc» rdx,0 |
471 » add» rdi,r11 | 474 » add» rdi,r11 |
472 » adc» rdx,0 | 475 » adc» rdx,0 |
473 » mov» QWORD PTR[((-16))+r15*8+rsp],rdi | 476 » mov» QWORD[((-16))+r15*8+rsp],rdi |
474 » mov» r13,rdx | 477 » mov» r13,rdx |
475 | 478 |
476 » mul» rbx | 479 » mul» rbx |
477 » add» r10,rax | 480 » add» r10,rax |
478 » mov» rax,QWORD PTR[r15*8+rcx] | 481 » mov» rax,QWORD[r15*8+rcx] |
479 » adc» rdx,0 | 482 » adc» rdx,0 |
480 » add» r10,QWORD PTR[r15*8+rsp] | 483 » add» r10,QWORD[r15*8+rsp] |
481 adc rdx,0 | 484 adc rdx,0 |
482 mov r11,rdx | 485 mov r11,rdx |
483 | 486 |
484 mul rbp | 487 mul rbp |
485 add r13,rax | 488 add r13,rax |
486 » mov» rax,QWORD PTR[8+r15*8+rsi] | 489 » mov» rax,QWORD[8+r15*8+rsi] |
487 » adc» rdx,0 | 490 » adc» rdx,0 |
488 » add» r13,r10 | 491 » add» r13,r10 |
489 » adc» rdx,0 | 492 » adc» rdx,0 |
490 » mov» QWORD PTR[((-8))+r15*8+rsp],r13 | 493 » mov» QWORD[((-8))+r15*8+rsp],r13 |
491 » mov» rdi,rdx | 494 » mov» rdi,rdx |
492 | 495 |
493 » mul» rbx | 496 » mul» rbx |
494 » add» r11,rax | 497 » add» r11,rax |
495 » mov» rax,QWORD PTR[8+r15*8+rcx] | 498 » mov» rax,QWORD[8+r15*8+rcx] |
496 » adc» rdx,0 | 499 » adc» rdx,0 |
497 » add» r11,QWORD PTR[8+r15*8+rsp] | 500 » add» r11,QWORD[8+r15*8+rsp] |
498 » adc» rdx,0 | 501 » adc» rdx,0 |
499 » lea» r15,QWORD PTR[4+r15] | 502 » lea» r15,[4+r15] |
500 » mov» r10,rdx | 503 » mov» r10,rdx |
501 | 504 |
502 » mul» rbp | 505 » mul» rbp |
503 » add» rdi,rax | 506 » add» rdi,rax |
504 » mov» rax,QWORD PTR[((-16))+r15*8+rsi] | 507 » mov» rax,QWORD[((-16))+r15*8+rsi] |
505 » adc» rdx,0 | 508 » adc» rdx,0 |
506 » add» rdi,r11 | 509 » add» rdi,r11 |
507 » adc» rdx,0 | 510 » adc» rdx,0 |
508 » mov» QWORD PTR[((-32))+r15*8+rsp],rdi | 511 » mov» QWORD[((-32))+r15*8+rsp],rdi |
509 mov r13,rdx | 512 mov r13,rdx |
510 cmp r15,r9 | 513 cmp r15,r9 |
511 » jb» $L$inner4x | 514 » jb» NEAR $L$inner4x |
512 | 515 |
513 » mul» rbx | 516 » mul» rbx |
514 » add» r10,rax | 517 » add» r10,rax |
515 » mov» rax,QWORD PTR[((-16))+r15*8+rcx] | 518 » mov» rax,QWORD[((-16))+r15*8+rcx] |
516 » adc» rdx,0 | 519 » adc» rdx,0 |
517 » add» r10,QWORD PTR[((-16))+r15*8+rsp] | 520 » add» r10,QWORD[((-16))+r15*8+rsp] |
518 adc rdx,0 | 521 adc rdx,0 |
519 mov r11,rdx | 522 mov r11,rdx |
520 | 523 |
521 mul rbp | 524 mul rbp |
522 add r13,rax | 525 add r13,rax |
523 » mov» rax,QWORD PTR[((-8))+r15*8+rsi] | 526 » mov» rax,QWORD[((-8))+r15*8+rsi] |
524 » adc» rdx,0 | 527 » adc» rdx,0 |
525 » add» r13,r10 | 528 » add» r13,r10 |
526 » adc» rdx,0 | 529 » adc» rdx,0 |
527 » mov» QWORD PTR[((-24))+r15*8+rsp],r13 | 530 » mov» QWORD[((-24))+r15*8+rsp],r13 |
528 » mov» rdi,rdx | 531 » mov» rdi,rdx |
529 | 532 |
530 » mul» rbx | 533 » mul» rbx |
531 » add» r11,rax | 534 » add» r11,rax |
532 » mov» rax,QWORD PTR[((-8))+r15*8+rcx] | 535 » mov» rax,QWORD[((-8))+r15*8+rcx] |
533 » adc» rdx,0 | 536 » adc» rdx,0 |
534 » add» r11,QWORD PTR[((-8))+r15*8+rsp] | 537 » add» r11,QWORD[((-8))+r15*8+rsp] |
535 » adc» rdx,0 | 538 » adc» rdx,0 |
536 » lea» r14,QWORD PTR[1+r14] | 539 » lea» r14,[1+r14] |
537 » mov» r10,rdx | 540 » mov» r10,rdx |
538 | 541 |
539 » mul» rbp | 542 » mul» rbp |
540 » add» rdi,rax | 543 » add» rdi,rax |
541 » mov» rax,QWORD PTR[rsi] | 544 » mov» rax,QWORD[rsi] |
542 » adc» rdx,0 | 545 » adc» rdx,0 |
543 » add» rdi,r11 | 546 » add» rdi,r11 |
544 » adc» rdx,0 | 547 » adc» rdx,0 |
545 » mov» QWORD PTR[((-16))+r15*8+rsp],rdi | 548 » mov» QWORD[((-16))+r15*8+rsp],rdi |
546 mov r13,rdx | 549 mov r13,rdx |
547 | 550 |
548 xor rdi,rdi | 551 xor rdi,rdi |
549 add r13,r10 | 552 add r13,r10 |
550 adc rdi,0 | 553 adc rdi,0 |
551 » add» r13,QWORD PTR[r9*8+rsp] | 554 » add» r13,QWORD[r9*8+rsp] |
552 adc rdi,0 | 555 adc rdi,0 |
553 » mov» QWORD PTR[((-8))+r15*8+rsp],r13 | 556 » mov» QWORD[((-8))+r15*8+rsp],r13 |
554 » mov» QWORD PTR[r15*8+rsp],rdi | 557 » mov» QWORD[r15*8+rsp],rdi |
555 | 558 |
556 cmp r14,r9 | 559 cmp r14,r9 |
557 » jb» $L$outer4x | 560 » jb» NEAR $L$outer4x |
558 » mov» rdi,QWORD PTR[16+r9*8+rsp] | 561 » mov» rdi,QWORD[16+r9*8+rsp] |
559 » mov» rax,QWORD PTR[rsp] | 562 » mov» rax,QWORD[rsp] |
560 » mov» rdx,QWORD PTR[8+rsp] | 563 » mov» rdx,QWORD[8+rsp] |
561 shr r9,2 | 564 shr r9,2 |
562 » lea» rsi,QWORD PTR[rsp] | 565 » lea» rsi,[rsp] |
563 xor r14,r14 | 566 xor r14,r14 |
564 | 567 |
565 » sub» rax,QWORD PTR[rcx] | 568 » sub» rax,QWORD[rcx] |
566 » mov» rbx,QWORD PTR[16+rsi] | 569 » mov» rbx,QWORD[16+rsi] |
567 » mov» rbp,QWORD PTR[24+rsi] | 570 » mov» rbp,QWORD[24+rsi] |
568 » sbb» rdx,QWORD PTR[8+rcx] | 571 » sbb» rdx,QWORD[8+rcx] |
569 » lea» r15,QWORD PTR[((-1))+r9] | 572 » lea» r15,[((-1))+r9] |
570 » jmp» $L$sub4x | 573 » jmp» NEAR $L$sub4x |
571 ALIGN 16 | 574 ALIGN 16 |
572 $L$sub4x:: | 575 $L$sub4x: |
573 » mov» QWORD PTR[r14*8+rdi],rax | 576 » mov» QWORD[r14*8+rdi],rax |
574 » mov» QWORD PTR[8+r14*8+rdi],rdx | 577 » mov» QWORD[8+r14*8+rdi],rdx |
575 » sbb» rbx,QWORD PTR[16+r14*8+rcx] | 578 » sbb» rbx,QWORD[16+r14*8+rcx] |
576 » mov» rax,QWORD PTR[32+r14*8+rsi] | 579 » mov» rax,QWORD[32+r14*8+rsi] |
577 » mov» rdx,QWORD PTR[40+r14*8+rsi] | 580 » mov» rdx,QWORD[40+r14*8+rsi] |
578 » sbb» rbp,QWORD PTR[24+r14*8+rcx] | 581 » sbb» rbp,QWORD[24+r14*8+rcx] |
579 » mov» QWORD PTR[16+r14*8+rdi],rbx | 582 » mov» QWORD[16+r14*8+rdi],rbx |
580 » mov» QWORD PTR[24+r14*8+rdi],rbp | 583 » mov» QWORD[24+r14*8+rdi],rbp |
581 » sbb» rax,QWORD PTR[32+r14*8+rcx] | 584 » sbb» rax,QWORD[32+r14*8+rcx] |
582 » mov» rbx,QWORD PTR[48+r14*8+rsi] | 585 » mov» rbx,QWORD[48+r14*8+rsi] |
583 » mov» rbp,QWORD PTR[56+r14*8+rsi] | 586 » mov» rbp,QWORD[56+r14*8+rsi] |
584 » sbb» rdx,QWORD PTR[40+r14*8+rcx] | 587 » sbb» rdx,QWORD[40+r14*8+rcx] |
585 » lea» r14,QWORD PTR[4+r14] | 588 » lea» r14,[4+r14] |
586 dec r15 | 589 dec r15 |
587 » jnz» $L$sub4x | 590 » jnz» NEAR $L$sub4x |
588 | 591 |
589 » mov» QWORD PTR[r14*8+rdi],rax | 592 » mov» QWORD[r14*8+rdi],rax |
590 » mov» rax,QWORD PTR[32+r14*8+rsi] | 593 » mov» rax,QWORD[32+r14*8+rsi] |
591 » sbb» rbx,QWORD PTR[16+r14*8+rcx] | 594 » sbb» rbx,QWORD[16+r14*8+rcx] |
592 » mov» QWORD PTR[8+r14*8+rdi],rdx | 595 » mov» QWORD[8+r14*8+rdi],rdx |
593 » sbb» rbp,QWORD PTR[24+r14*8+rcx] | 596 » sbb» rbp,QWORD[24+r14*8+rcx] |
594 » mov» QWORD PTR[16+r14*8+rdi],rbx | 597 » mov» QWORD[16+r14*8+rdi],rbx |
595 | 598 |
596 sbb rax,0 | 599 sbb rax,0 |
597 DB 66h, 48h, 0fh, 6eh, 0c0h | 600 DB 66h, 48h, 0fh, 6eh, 0c0h |
598 punpcklqdq xmm0,xmm0 | 601 punpcklqdq xmm0,xmm0 |
599 » mov» QWORD PTR[24+r14*8+rdi],rbp | 602 » mov» QWORD[24+r14*8+rdi],rbp |
600 xor r14,r14 | 603 xor r14,r14 |
601 | 604 |
602 mov r15,r9 | 605 mov r15,r9 |
603 pxor xmm5,xmm5 | 606 pxor xmm5,xmm5 |
604 » jmp» $L$copy4x | 607 » jmp» NEAR $L$copy4x |
605 ALIGN 16 | 608 ALIGN 16 |
606 $L$copy4x:: | 609 $L$copy4x: |
607 » movdqu» xmm2,XMMWORD PTR[r14*1+rsp] | 610 » movdqu» xmm2,XMMWORD[r14*1+rsp] |
608 » movdqu» xmm4,XMMWORD PTR[16+r14*1+rsp] | 611 » movdqu» xmm4,XMMWORD[16+r14*1+rsp] |
609 » movdqu» xmm1,XMMWORD PTR[r14*1+rdi] | 612 » movdqu» xmm1,XMMWORD[r14*1+rdi] |
610 » movdqu» xmm3,XMMWORD PTR[16+r14*1+rdi] | 613 » movdqu» xmm3,XMMWORD[16+r14*1+rdi] |
611 pxor xmm2,xmm1 | 614 pxor xmm2,xmm1 |
612 pxor xmm4,xmm3 | 615 pxor xmm4,xmm3 |
613 pand xmm2,xmm0 | 616 pand xmm2,xmm0 |
614 pand xmm4,xmm0 | 617 pand xmm4,xmm0 |
615 pxor xmm2,xmm1 | 618 pxor xmm2,xmm1 |
616 pxor xmm4,xmm3 | 619 pxor xmm4,xmm3 |
617 » movdqu» XMMWORD PTR[r14*1+rdi],xmm2 | 620 » movdqu» XMMWORD[r14*1+rdi],xmm2 |
618 » movdqu» XMMWORD PTR[16+r14*1+rdi],xmm4 | 621 » movdqu» XMMWORD[16+r14*1+rdi],xmm4 |
619 » movdqa» XMMWORD PTR[r14*1+rsp],xmm5 | 622 » movdqa» XMMWORD[r14*1+rsp],xmm5 |
620 » movdqa» XMMWORD PTR[16+r14*1+rsp],xmm5 | 623 » movdqa» XMMWORD[16+r14*1+rsp],xmm5 |
621 | 624 |
622 » lea» r14,QWORD PTR[32+r14] | 625 » lea» r14,[32+r14] |
623 dec r15 | 626 dec r15 |
624 » jnz» $L$copy4x | 627 » jnz» NEAR $L$copy4x |
625 | 628 |
626 shl r9,2 | 629 shl r9,2 |
627 » mov» rsi,QWORD PTR[8+r9*8+rsp] | 630 » mov» rsi,QWORD[8+r9*8+rsp] |
628 mov rax,1 | 631 mov rax,1 |
629 » mov» r15,QWORD PTR[rsi] | 632 » mov» r15,QWORD[rsi] |
630 » mov» r14,QWORD PTR[8+rsi] | 633 » mov» r14,QWORD[8+rsi] |
631 » mov» r13,QWORD PTR[16+rsi] | 634 » mov» r13,QWORD[16+rsi] |
632 » mov» r12,QWORD PTR[24+rsi] | 635 » mov» r12,QWORD[24+rsi] |
633 » mov» rbp,QWORD PTR[32+rsi] | 636 » mov» rbp,QWORD[32+rsi] |
634 » mov» rbx,QWORD PTR[40+rsi] | 637 » mov» rbx,QWORD[40+rsi] |
635 » lea» rsp,QWORD PTR[48+rsi] | 638 » lea» rsp,[48+rsi] |
636 $L$mul4x_epilogue:: | 639 $L$mul4x_epilogue: |
637 » mov» rdi,QWORD PTR[8+rsp]» ;WIN64 epilogue | 640 » mov» rdi,QWORD[8+rsp]» ;WIN64 epilogue |
638 » mov» rsi,QWORD PTR[16+rsp] | 641 » mov» rsi,QWORD[16+rsp] |
639 DB 0F3h,0C3h ;repret | 642 DB 0F3h,0C3h ;repret |
640 $L$SEH_end_bn_mul4x_mont:: | 643 $L$SEH_end_bn_mul4x_mont: |
641 bn_mul4x_mont» ENDP | 644 EXTERN» bn_sqr8x_internal |
642 EXTERN» bn_sqr8x_internal:NEAR | |
643 | 645 |
644 | 646 |
645 ALIGN 32 | 647 ALIGN 32 |
646 bn_sqr8x_mont» PROC PRIVATE | 648 bn_sqr8x_mont: |
647 » mov» QWORD PTR[8+rsp],rdi» ;WIN64 prologue | 649 » mov» QWORD[8+rsp],rdi» ;WIN64 prologue |
648 » mov» QWORD PTR[16+rsp],rsi | 650 » mov» QWORD[16+rsp],rsi |
649 mov rax,rsp | 651 mov rax,rsp |
650 $L$SEH_begin_bn_sqr8x_mont:: | 652 $L$SEH_begin_bn_sqr8x_mont: |
651 mov rdi,rcx | 653 mov rdi,rcx |
652 mov rsi,rdx | 654 mov rsi,rdx |
653 mov rdx,r8 | 655 mov rdx,r8 |
654 mov rcx,r9 | 656 mov rcx,r9 |
655 » mov» r8,QWORD PTR[40+rsp] | 657 » mov» r8,QWORD[40+rsp] |
656 » mov» r9,QWORD PTR[48+rsp] | 658 » mov» r9,QWORD[48+rsp] |
657 | 659 |
658 | 660 |
659 $L$sqr8x_enter:: | 661 $L$sqr8x_enter: |
660 mov rax,rsp | 662 mov rax,rsp |
661 push rbx | 663 push rbx |
662 push rbp | 664 push rbp |
663 push r12 | 665 push r12 |
664 push r13 | 666 push r13 |
665 push r14 | 667 push r14 |
666 push r15 | 668 push r15 |
667 | 669 |
668 mov r10d,r9d | 670 mov r10d,r9d |
669 shl r9d,3 | 671 shl r9d,3 |
670 shl r10,3+2 | 672 shl r10,3+2 |
671 neg r9 | 673 neg r9 |
672 | 674 |
673 | 675 |
674 | 676 |
675 | 677 |
676 | 678 |
677 | 679 |
678 » lea» r11,QWORD PTR[((-64))+r9*4+rsp] | 680 » lea» r11,[((-64))+r9*4+rsp] |
679 » mov» r8,QWORD PTR[r8] | 681 » mov» r8,QWORD[r8] |
680 sub r11,rsi | 682 sub r11,rsi |
681 and r11,4095 | 683 and r11,4095 |
682 cmp r10,r11 | 684 cmp r10,r11 |
683 » jb» $L$sqr8x_sp_alt | 685 » jb» NEAR $L$sqr8x_sp_alt |
684 sub rsp,r11 | 686 sub rsp,r11 |
685 » lea» rsp,QWORD PTR[((-64))+r9*4+rsp] | 687 » lea» rsp,[((-64))+r9*4+rsp] |
686 » jmp» $L$sqr8x_sp_done | 688 » jmp» NEAR $L$sqr8x_sp_done |
687 | 689 |
688 ALIGN 32 | 690 ALIGN 32 |
689 $L$sqr8x_sp_alt:: | 691 $L$sqr8x_sp_alt: |
690 » lea» r10,QWORD PTR[((4096-64))+r9*4] | 692 » lea» r10,[((4096-64))+r9*4] |
691 » lea» rsp,QWORD PTR[((-64))+r9*4+rsp] | 693 » lea» rsp,[((-64))+r9*4+rsp] |
692 sub r11,r10 | 694 sub r11,r10 |
693 mov r10,0 | 695 mov r10,0 |
694 cmovc r11,r10 | 696 cmovc r11,r10 |
695 sub rsp,r11 | 697 sub rsp,r11 |
696 $L$sqr8x_sp_done:: | 698 $L$sqr8x_sp_done: |
697 and rsp,-64 | 699 and rsp,-64 |
698 mov r10,r9 | 700 mov r10,r9 |
699 neg r9 | 701 neg r9 |
700 | 702 |
701 » lea» r11,QWORD PTR[64+r9*2+rsp] | 703 » lea» r11,[64+r9*2+rsp] |
702 » mov» QWORD PTR[32+rsp],r8 | 704 » mov» QWORD[32+rsp],r8 |
703 » mov» QWORD PTR[40+rsp],rax | 705 » mov» QWORD[40+rsp],rax |
704 $L$sqr8x_body:: | 706 $L$sqr8x_body: |
705 | 707 |
706 mov rbp,r9 | 708 mov rbp,r9 |
707 DB 102,73,15,110,211 | 709 DB 102,73,15,110,211 |
708 shr rbp,3+2 | 710 shr rbp,3+2 |
709 » mov» eax,DWORD PTR[((OPENSSL_ia32cap_P+8))] | 711 » mov» eax,DWORD[((OPENSSL_ia32cap_P+8))] |
710 » jmp» $L$sqr8x_copy_n | 712 » jmp» NEAR $L$sqr8x_copy_n |
711 | 713 |
712 ALIGN 32 | 714 ALIGN 32 |
713 $L$sqr8x_copy_n:: | 715 $L$sqr8x_copy_n: |
714 » movq» xmm0,QWORD PTR[rcx] | 716 » movq» xmm0,QWORD[rcx] |
715 » movq» xmm1,QWORD PTR[8+rcx] | 717 » movq» xmm1,QWORD[8+rcx] |
716 » movq» xmm3,QWORD PTR[16+rcx] | 718 » movq» xmm3,QWORD[16+rcx] |
717 » movq» xmm4,QWORD PTR[24+rcx] | 719 » movq» xmm4,QWORD[24+rcx] |
718 » lea» rcx,QWORD PTR[32+rcx] | 720 » lea» rcx,[32+rcx] |
719 » movdqa» XMMWORD PTR[r11],xmm0 | 721 » movdqa» XMMWORD[r11],xmm0 |
720 » movdqa» XMMWORD PTR[16+r11],xmm1 | 722 » movdqa» XMMWORD[16+r11],xmm1 |
721 » movdqa» XMMWORD PTR[32+r11],xmm3 | 723 » movdqa» XMMWORD[32+r11],xmm3 |
722 » movdqa» XMMWORD PTR[48+r11],xmm4 | 724 » movdqa» XMMWORD[48+r11],xmm4 |
723 » lea» r11,QWORD PTR[64+r11] | 725 » lea» r11,[64+r11] |
724 dec rbp | 726 dec rbp |
725 » jnz» $L$sqr8x_copy_n | 727 » jnz» NEAR $L$sqr8x_copy_n |
726 | 728 |
727 pxor xmm0,xmm0 | 729 pxor xmm0,xmm0 |
728 DB 102,72,15,110,207 | 730 DB 102,72,15,110,207 |
729 DB 102,73,15,110,218 | 731 DB 102,73,15,110,218 |
730 call bn_sqr8x_internal | 732 call bn_sqr8x_internal |
731 | 733 |
732 pxor xmm0,xmm0 | 734 pxor xmm0,xmm0 |
733 » lea» rax,QWORD PTR[48+rsp] | 735 » lea» rax,[48+rsp] |
734 » lea» rdx,QWORD PTR[64+r9*2+rsp] | 736 » lea» rdx,[64+r9*2+rsp] |
735 shr r9,3+2 | 737 shr r9,3+2 |
736 » mov» rsi,QWORD PTR[40+rsp] | 738 » mov» rsi,QWORD[40+rsp] |
737 » jmp» $L$sqr8x_zero | 739 » jmp» NEAR $L$sqr8x_zero |
738 | 740 |
739 ALIGN 32 | 741 ALIGN 32 |
740 $L$sqr8x_zero:: | 742 $L$sqr8x_zero: |
741 » movdqa» XMMWORD PTR[rax],xmm0 | 743 » movdqa» XMMWORD[rax],xmm0 |
742 » movdqa» XMMWORD PTR[16+rax],xmm0 | 744 » movdqa» XMMWORD[16+rax],xmm0 |
743 » movdqa» XMMWORD PTR[32+rax],xmm0 | 745 » movdqa» XMMWORD[32+rax],xmm0 |
744 » movdqa» XMMWORD PTR[48+rax],xmm0 | 746 » movdqa» XMMWORD[48+rax],xmm0 |
745 » lea» rax,QWORD PTR[64+rax] | 747 » lea» rax,[64+rax] |
746 » movdqa» XMMWORD PTR[rdx],xmm0 | 748 » movdqa» XMMWORD[rdx],xmm0 |
747 » movdqa» XMMWORD PTR[16+rdx],xmm0 | 749 » movdqa» XMMWORD[16+rdx],xmm0 |
748 » movdqa» XMMWORD PTR[32+rdx],xmm0 | 750 » movdqa» XMMWORD[32+rdx],xmm0 |
749 » movdqa» XMMWORD PTR[48+rdx],xmm0 | 751 » movdqa» XMMWORD[48+rdx],xmm0 |
750 » lea» rdx,QWORD PTR[64+rdx] | 752 » lea» rdx,[64+rdx] |
751 dec r9 | 753 dec r9 |
752 » jnz» $L$sqr8x_zero | 754 » jnz» NEAR $L$sqr8x_zero |
753 | 755 |
754 mov rax,1 | 756 mov rax,1 |
755 » mov» r15,QWORD PTR[((-48))+rsi] | 757 » mov» r15,QWORD[((-48))+rsi] |
756 » mov» r14,QWORD PTR[((-40))+rsi] | 758 » mov» r14,QWORD[((-40))+rsi] |
757 » mov» r13,QWORD PTR[((-32))+rsi] | 759 » mov» r13,QWORD[((-32))+rsi] |
758 » mov» r12,QWORD PTR[((-24))+rsi] | 760 » mov» r12,QWORD[((-24))+rsi] |
759 » mov» rbp,QWORD PTR[((-16))+rsi] | 761 » mov» rbp,QWORD[((-16))+rsi] |
760 » mov» rbx,QWORD PTR[((-8))+rsi] | 762 » mov» rbx,QWORD[((-8))+rsi] |
761 » lea» rsp,QWORD PTR[rsi] | 763 » lea» rsp,[rsi] |
762 $L$sqr8x_epilogue:: | 764 $L$sqr8x_epilogue: |
763 » mov» rdi,QWORD PTR[8+rsp]» ;WIN64 epilogue | 765 » mov» rdi,QWORD[8+rsp]» ;WIN64 epilogue |
764 » mov» rsi,QWORD PTR[16+rsp] | 766 » mov» rsi,QWORD[16+rsp] |
765 DB 0F3h,0C3h ;repret | 767 DB 0F3h,0C3h ;repret |
766 $L$SEH_end_bn_sqr8x_mont:: | 768 $L$SEH_end_bn_sqr8x_mont: |
767 bn_sqr8x_mont» ENDP | |
768 DB 77,111,110,116,103,111,109,101,114,121,32,77,117,108,116,105 | 769 DB 77,111,110,116,103,111,109,101,114,121,32,77,117,108,116,105 |
769 DB 112,108,105,99,97,116,105,111,110,32,102,111,114,32,120,56 | 770 DB 112,108,105,99,97,116,105,111,110,32,102,111,114,32,120,56 |
770 DB 54,95,54,52,44,32,67,82,89,80,84,79,71,65,77,83 | 771 DB 54,95,54,52,44,32,67,82,89,80,84,79,71,65,77,83 |
771 DB 32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115 | 772 DB 32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115 |
772 DB 115,108,46,111,114,103,62,0 | 773 DB 115,108,46,111,114,103,62,0 |
773 ALIGN 16 | 774 ALIGN 16 |
774 EXTERN» __imp_RtlVirtualUnwind:NEAR | 775 EXTERN» __imp_RtlVirtualUnwind |
775 | 776 |
776 ALIGN 16 | 777 ALIGN 16 |
777 mul_handler» PROC PRIVATE | 778 mul_handler: |
778 push rsi | 779 push rsi |
779 push rdi | 780 push rdi |
780 push rbx | 781 push rbx |
781 push rbp | 782 push rbp |
782 push r12 | 783 push r12 |
783 push r13 | 784 push r13 |
784 push r14 | 785 push r14 |
785 push r15 | 786 push r15 |
786 pushfq | 787 pushfq |
787 sub rsp,64 | 788 sub rsp,64 |
788 | 789 |
789 » mov» rax,QWORD PTR[120+r8] | 790 » mov» rax,QWORD[120+r8] |
790 » mov» rbx,QWORD PTR[248+r8] | 791 » mov» rbx,QWORD[248+r8] |
791 | 792 |
792 » mov» rsi,QWORD PTR[8+r9] | 793 » mov» rsi,QWORD[8+r9] |
793 » mov» r11,QWORD PTR[56+r9] | 794 » mov» r11,QWORD[56+r9] |
794 | 795 |
795 » mov» r10d,DWORD PTR[r11] | 796 » mov» r10d,DWORD[r11] |
796 » lea» r10,QWORD PTR[r10*1+rsi] | 797 » lea» r10,[r10*1+rsi] |
797 cmp rbx,r10 | 798 cmp rbx,r10 |
798 » jb» $L$common_seh_tail | 799 » jb» NEAR $L$common_seh_tail |
799 | 800 |
800 » mov» rax,QWORD PTR[152+r8] | 801 » mov» rax,QWORD[152+r8] |
801 | 802 |
802 » mov» r10d,DWORD PTR[4+r11] | 803 » mov» r10d,DWORD[4+r11] |
803 » lea» r10,QWORD PTR[r10*1+rsi] | 804 » lea» r10,[r10*1+rsi] |
804 cmp rbx,r10 | 805 cmp rbx,r10 |
805 » jae» $L$common_seh_tail | 806 » jae» NEAR $L$common_seh_tail |
806 | 807 |
807 » mov» r10,QWORD PTR[192+r8] | 808 » mov» r10,QWORD[192+r8] |
808 » mov» rax,QWORD PTR[8+r10*8+rax] | 809 » mov» rax,QWORD[8+r10*8+rax] |
809 » lea» rax,QWORD PTR[48+rax] | 810 » lea» rax,[48+rax] |
810 | 811 |
811 » mov» rbx,QWORD PTR[((-8))+rax] | 812 » mov» rbx,QWORD[((-8))+rax] |
812 » mov» rbp,QWORD PTR[((-16))+rax] | 813 » mov» rbp,QWORD[((-16))+rax] |
813 » mov» r12,QWORD PTR[((-24))+rax] | 814 » mov» r12,QWORD[((-24))+rax] |
814 » mov» r13,QWORD PTR[((-32))+rax] | 815 » mov» r13,QWORD[((-32))+rax] |
815 » mov» r14,QWORD PTR[((-40))+rax] | 816 » mov» r14,QWORD[((-40))+rax] |
816 » mov» r15,QWORD PTR[((-48))+rax] | 817 » mov» r15,QWORD[((-48))+rax] |
817 » mov» QWORD PTR[144+r8],rbx | 818 » mov» QWORD[144+r8],rbx |
818 » mov» QWORD PTR[160+r8],rbp | 819 » mov» QWORD[160+r8],rbp |
819 » mov» QWORD PTR[216+r8],r12 | 820 » mov» QWORD[216+r8],r12 |
820 » mov» QWORD PTR[224+r8],r13 | 821 » mov» QWORD[224+r8],r13 |
821 » mov» QWORD PTR[232+r8],r14 | 822 » mov» QWORD[232+r8],r14 |
822 » mov» QWORD PTR[240+r8],r15 | 823 » mov» QWORD[240+r8],r15 |
823 | 824 |
824 » jmp» $L$common_seh_tail | 825 » jmp» NEAR $L$common_seh_tail |
825 mul_handler» ENDP | 826 |
826 | 827 |
827 | 828 |
828 ALIGN 16 | 829 ALIGN 16 |
829 sqr_handler» PROC PRIVATE | 830 sqr_handler: |
830 push rsi | 831 push rsi |
831 push rdi | 832 push rdi |
832 push rbx | 833 push rbx |
833 push rbp | 834 push rbp |
834 push r12 | 835 push r12 |
835 push r13 | 836 push r13 |
836 push r14 | 837 push r14 |
837 push r15 | 838 push r15 |
838 pushfq | 839 pushfq |
839 sub rsp,64 | 840 sub rsp,64 |
840 | 841 |
841 » mov» rax,QWORD PTR[120+r8] | 842 » mov» rax,QWORD[120+r8] |
842 » mov» rbx,QWORD PTR[248+r8] | 843 » mov» rbx,QWORD[248+r8] |
843 | 844 |
844 » mov» rsi,QWORD PTR[8+r9] | 845 » mov» rsi,QWORD[8+r9] |
845 » mov» r11,QWORD PTR[56+r9] | 846 » mov» r11,QWORD[56+r9] |
846 | 847 |
847 » mov» r10d,DWORD PTR[r11] | 848 » mov» r10d,DWORD[r11] |
848 » lea» r10,QWORD PTR[r10*1+rsi] | 849 » lea» r10,[r10*1+rsi] |
849 cmp rbx,r10 | 850 cmp rbx,r10 |
850 » jb» $L$common_seh_tail | 851 » jb» NEAR $L$common_seh_tail |
851 | 852 |
852 » mov» rax,QWORD PTR[152+r8] | 853 » mov» rax,QWORD[152+r8] |
853 | 854 |
854 » mov» r10d,DWORD PTR[4+r11] | 855 » mov» r10d,DWORD[4+r11] |
855 » lea» r10,QWORD PTR[r10*1+rsi] | 856 » lea» r10,[r10*1+rsi] |
856 cmp rbx,r10 | 857 cmp rbx,r10 |
857 » jae» $L$common_seh_tail | 858 » jae» NEAR $L$common_seh_tail |
858 | 859 |
859 » mov» rax,QWORD PTR[40+rax] | 860 » mov» rax,QWORD[40+rax] |
860 | 861 |
861 » mov» rbx,QWORD PTR[((-8))+rax] | 862 » mov» rbx,QWORD[((-8))+rax] |
862 » mov» rbp,QWORD PTR[((-16))+rax] | 863 » mov» rbp,QWORD[((-16))+rax] |
863 » mov» r12,QWORD PTR[((-24))+rax] | 864 » mov» r12,QWORD[((-24))+rax] |
864 » mov» r13,QWORD PTR[((-32))+rax] | 865 » mov» r13,QWORD[((-32))+rax] |
865 » mov» r14,QWORD PTR[((-40))+rax] | 866 » mov» r14,QWORD[((-40))+rax] |
866 » mov» r15,QWORD PTR[((-48))+rax] | 867 » mov» r15,QWORD[((-48))+rax] |
867 » mov» QWORD PTR[144+r8],rbx | 868 » mov» QWORD[144+r8],rbx |
868 » mov» QWORD PTR[160+r8],rbp | 869 » mov» QWORD[160+r8],rbp |
869 » mov» QWORD PTR[216+r8],r12 | 870 » mov» QWORD[216+r8],r12 |
870 » mov» QWORD PTR[224+r8],r13 | 871 » mov» QWORD[224+r8],r13 |
871 » mov» QWORD PTR[232+r8],r14 | 872 » mov» QWORD[232+r8],r14 |
872 » mov» QWORD PTR[240+r8],r15 | 873 » mov» QWORD[240+r8],r15 |
873 | 874 |
874 $L$common_seh_tail:: | 875 $L$common_seh_tail: |
875 » mov» rdi,QWORD PTR[8+rax] | 876 » mov» rdi,QWORD[8+rax] |
876 » mov» rsi,QWORD PTR[16+rax] | 877 » mov» rsi,QWORD[16+rax] |
877 » mov» QWORD PTR[152+r8],rax | 878 » mov» QWORD[152+r8],rax |
878 » mov» QWORD PTR[168+r8],rsi | 879 » mov» QWORD[168+r8],rsi |
879 » mov» QWORD PTR[176+r8],rdi | 880 » mov» QWORD[176+r8],rdi |
880 | 881 |
881 » mov» rdi,QWORD PTR[40+r9] | 882 » mov» rdi,QWORD[40+r9] |
882 mov rsi,r8 | 883 mov rsi,r8 |
883 mov ecx,154 | 884 mov ecx,154 |
884 » DD» 0a548f3fch | 885 » DD» 0xa548f3fc |
885 | 886 |
886 mov rsi,r9 | 887 mov rsi,r9 |
887 xor rcx,rcx | 888 xor rcx,rcx |
888 » mov» rdx,QWORD PTR[8+rsi] | 889 » mov» rdx,QWORD[8+rsi] |
889 » mov» r8,QWORD PTR[rsi] | 890 » mov» r8,QWORD[rsi] |
890 » mov» r9,QWORD PTR[16+rsi] | 891 » mov» r9,QWORD[16+rsi] |
891 » mov» r10,QWORD PTR[40+rsi] | 892 » mov» r10,QWORD[40+rsi] |
892 » lea» r11,QWORD PTR[56+rsi] | 893 » lea» r11,[56+rsi] |
893 » lea» r12,QWORD PTR[24+rsi] | 894 » lea» r12,[24+rsi] |
894 » mov» QWORD PTR[32+rsp],r10 | 895 » mov» QWORD[32+rsp],r10 |
895 » mov» QWORD PTR[40+rsp],r11 | 896 » mov» QWORD[40+rsp],r11 |
896 » mov» QWORD PTR[48+rsp],r12 | 897 » mov» QWORD[48+rsp],r12 |
897 » mov» QWORD PTR[56+rsp],rcx | 898 » mov» QWORD[56+rsp],rcx |
898 » call» QWORD PTR[__imp_RtlVirtualUnwind] | 899 » call» QWORD[__imp_RtlVirtualUnwind] |
899 | 900 |
900 mov eax,1 | 901 mov eax,1 |
901 add rsp,64 | 902 add rsp,64 |
902 popfq | 903 popfq |
903 pop r15 | 904 pop r15 |
904 pop r14 | 905 pop r14 |
905 pop r13 | 906 pop r13 |
906 pop r12 | 907 pop r12 |
907 pop rbp | 908 pop rbp |
908 pop rbx | 909 pop rbx |
909 pop rdi | 910 pop rdi |
910 pop rsi | 911 pop rsi |
911 DB 0F3h,0C3h ;repret | 912 DB 0F3h,0C3h ;repret |
912 sqr_handler ENDP | |
913 | 913 |
914 .text$» ENDS | 914 |
915 .pdata» SEGMENT READONLY ALIGN(4) | 915 section».pdata rdata align=4 |
916 ALIGN 4 | 916 ALIGN 4 |
917 » DD» imagerel $L$SEH_begin_bn_mul_mont | 917 » DD» $L$SEH_begin_bn_mul_mont wrt ..imagebase |
918 » DD» imagerel $L$SEH_end_bn_mul_mont | 918 » DD» $L$SEH_end_bn_mul_mont wrt ..imagebase |
919 » DD» imagerel $L$SEH_info_bn_mul_mont | 919 » DD» $L$SEH_info_bn_mul_mont wrt ..imagebase |
920 | 920 |
921 » DD» imagerel $L$SEH_begin_bn_mul4x_mont | 921 » DD» $L$SEH_begin_bn_mul4x_mont wrt ..imagebase |
922 » DD» imagerel $L$SEH_end_bn_mul4x_mont | 922 » DD» $L$SEH_end_bn_mul4x_mont wrt ..imagebase |
923 » DD» imagerel $L$SEH_info_bn_mul4x_mont | 923 » DD» $L$SEH_info_bn_mul4x_mont wrt ..imagebase |
924 | 924 |
925 » DD» imagerel $L$SEH_begin_bn_sqr8x_mont | 925 » DD» $L$SEH_begin_bn_sqr8x_mont wrt ..imagebase |
926 » DD» imagerel $L$SEH_end_bn_sqr8x_mont | 926 » DD» $L$SEH_end_bn_sqr8x_mont wrt ..imagebase |
927 » DD» imagerel $L$SEH_info_bn_sqr8x_mont | 927 » DD» $L$SEH_info_bn_sqr8x_mont wrt ..imagebase |
928 .pdata» ENDS | 928 section».xdata rdata align=8 |
929 .xdata» SEGMENT READONLY ALIGN(8) | |
930 ALIGN 8 | 929 ALIGN 8 |
931 $L$SEH_info_bn_mul_mont:: | 930 $L$SEH_info_bn_mul_mont: |
932 DB 9,0,0,0 | 931 DB 9,0,0,0 |
933 » DD» imagerel mul_handler | 932 » DD» mul_handler wrt ..imagebase |
934 » DD» imagerel $L$mul_body,imagerel $L$mul_epilogue | 933 » DD» $L$mul_body wrt ..imagebase,$L$mul_epilogue wrt ..imagebase |
935 $L$SEH_info_bn_mul4x_mont:: | 934 $L$SEH_info_bn_mul4x_mont: |
936 DB 9,0,0,0 | 935 DB 9,0,0,0 |
937 » DD» imagerel mul_handler | 936 » DD» mul_handler wrt ..imagebase |
938 » DD» imagerel $L$mul4x_body,imagerel $L$mul4x_epilogue | 937 » DD» $L$mul4x_body wrt ..imagebase,$L$mul4x_epilogue wrt ..imagebase |
939 $L$SEH_info_bn_sqr8x_mont:: | 938 $L$SEH_info_bn_sqr8x_mont: |
940 DB 9,0,0,0 | 939 DB 9,0,0,0 |
941 » DD» imagerel sqr_handler | 940 » DD» sqr_handler wrt ..imagebase |
942 » DD» imagerel $L$sqr8x_body,imagerel $L$sqr8x_epilogue | 941 » DD» $L$sqr8x_body wrt ..imagebase,$L$sqr8x_epilogue wrt ..imagebase |
943 | |
944 .xdata» ENDS | |
945 END | |
OLD | NEW |