OLD | NEW |
1 #if defined(__x86_64__) | 1 #if defined(__x86_64__) |
2 .text | 2 .text |
3 | 3 |
4 | 4 |
5 | 5 |
6 .globl _bn_mul_mont_gather5 | 6 .globl _bn_mul_mont_gather5 |
7 .private_extern _bn_mul_mont_gather5 | 7 .private_extern _bn_mul_mont_gather5 |
8 | 8 |
9 .p2align 6 | 9 .p2align 6 |
10 _bn_mul_mont_gather5: | 10 _bn_mul_mont_gather5: |
11 testl $7,%r9d | 11 testl $7,%r9d |
12 jnz L$mul_enter | 12 jnz L$mul_enter |
13 jmp L$mul4x_enter | 13 jmp L$mul4x_enter |
14 | 14 |
15 .p2align 4 | 15 .p2align 4 |
16 L$mul_enter: | 16 L$mul_enter: |
17 movl %r9d,%r9d | 17 movl %r9d,%r9d |
18 movq %rsp,%rax | 18 movq %rsp,%rax |
19 » movl» 8(%rsp),%r10d | 19 » movd» 8(%rsp),%xmm5 |
| 20 » leaq» L$inc(%rip),%r10 |
20 pushq %rbx | 21 pushq %rbx |
21 pushq %rbp | 22 pushq %rbp |
22 pushq %r12 | 23 pushq %r12 |
23 pushq %r13 | 24 pushq %r13 |
24 pushq %r14 | 25 pushq %r14 |
25 pushq %r15 | 26 pushq %r15 |
| 27 |
26 leaq 2(%r9),%r11 | 28 leaq 2(%r9),%r11 |
27 negq %r11 | 29 negq %r11 |
28 » leaq» (%rsp,%r11,8),%rsp | 30 » leaq» -264(%rsp,%r11,8),%rsp |
29 andq $-1024,%rsp | 31 andq $-1024,%rsp |
30 | 32 |
31 movq %rax,8(%rsp,%r9,8) | 33 movq %rax,8(%rsp,%r9,8) |
32 L$mul_body: | 34 L$mul_body: |
33 » movq» %rdx,%r12 | 35 » leaq» 128(%rdx),%r12 |
34 » movq» %r10,%r11 | 36 » movdqa» 0(%r10),%xmm0 |
35 » shrq» $3,%r10 | 37 » movdqa» 16(%r10),%xmm1 |
36 » andq» $7,%r11 | 38 » leaq» 24-112(%rsp,%r9,8),%r10 |
37 » notq» %r10 | 39 » andq» $-16,%r10 |
38 » leaq» L$magic_masks(%rip),%rax | |
39 » andq» $3,%r10 | |
40 » leaq» 96(%r12,%r11,8),%r12 | |
41 » movq» 0(%rax,%r10,8),%xmm4 | |
42 » movq» 8(%rax,%r10,8),%xmm5 | |
43 » movq» 16(%rax,%r10,8),%xmm6 | |
44 » movq» 24(%rax,%r10,8),%xmm7 | |
45 | 40 |
46 » movq» -96(%r12),%xmm0 | 41 » pshufd» $0,%xmm5,%xmm5 |
47 » movq» -32(%r12),%xmm1 | 42 » movdqa» %xmm1,%xmm4 |
48 » pand» %xmm4,%xmm0 | 43 » movdqa» %xmm1,%xmm2 |
49 » movq» 32(%r12),%xmm2 | 44 » paddd» %xmm0,%xmm1 |
50 » pand» %xmm5,%xmm1 | 45 » pcmpeqd»%xmm5,%xmm0 |
51 » movq» 96(%r12),%xmm3 | 46 .byte» 0x67 |
52 » pand» %xmm6,%xmm2 | 47 » movdqa» %xmm4,%xmm3 |
| 48 » paddd» %xmm1,%xmm2 |
| 49 » pcmpeqd»%xmm5,%xmm1 |
| 50 » movdqa» %xmm0,112(%r10) |
| 51 » movdqa» %xmm4,%xmm0 |
| 52 |
| 53 » paddd» %xmm2,%xmm3 |
| 54 » pcmpeqd»%xmm5,%xmm2 |
| 55 » movdqa» %xmm1,128(%r10) |
| 56 » movdqa» %xmm4,%xmm1 |
| 57 |
| 58 » paddd» %xmm3,%xmm0 |
| 59 » pcmpeqd»%xmm5,%xmm3 |
| 60 » movdqa» %xmm2,144(%r10) |
| 61 » movdqa» %xmm4,%xmm2 |
| 62 |
| 63 » paddd» %xmm0,%xmm1 |
| 64 » pcmpeqd»%xmm5,%xmm0 |
| 65 » movdqa» %xmm3,160(%r10) |
| 66 » movdqa» %xmm4,%xmm3 |
| 67 » paddd» %xmm1,%xmm2 |
| 68 » pcmpeqd»%xmm5,%xmm1 |
| 69 » movdqa» %xmm0,176(%r10) |
| 70 » movdqa» %xmm4,%xmm0 |
| 71 |
| 72 » paddd» %xmm2,%xmm3 |
| 73 » pcmpeqd»%xmm5,%xmm2 |
| 74 » movdqa» %xmm1,192(%r10) |
| 75 » movdqa» %xmm4,%xmm1 |
| 76 |
| 77 » paddd» %xmm3,%xmm0 |
| 78 » pcmpeqd»%xmm5,%xmm3 |
| 79 » movdqa» %xmm2,208(%r10) |
| 80 » movdqa» %xmm4,%xmm2 |
| 81 |
| 82 » paddd» %xmm0,%xmm1 |
| 83 » pcmpeqd»%xmm5,%xmm0 |
| 84 » movdqa» %xmm3,224(%r10) |
| 85 » movdqa» %xmm4,%xmm3 |
| 86 » paddd» %xmm1,%xmm2 |
| 87 » pcmpeqd»%xmm5,%xmm1 |
| 88 » movdqa» %xmm0,240(%r10) |
| 89 » movdqa» %xmm4,%xmm0 |
| 90 |
| 91 » paddd» %xmm2,%xmm3 |
| 92 » pcmpeqd»%xmm5,%xmm2 |
| 93 » movdqa» %xmm1,256(%r10) |
| 94 » movdqa» %xmm4,%xmm1 |
| 95 |
| 96 » paddd» %xmm3,%xmm0 |
| 97 » pcmpeqd»%xmm5,%xmm3 |
| 98 » movdqa» %xmm2,272(%r10) |
| 99 » movdqa» %xmm4,%xmm2 |
| 100 |
| 101 » paddd» %xmm0,%xmm1 |
| 102 » pcmpeqd»%xmm5,%xmm0 |
| 103 » movdqa» %xmm3,288(%r10) |
| 104 » movdqa» %xmm4,%xmm3 |
| 105 » paddd» %xmm1,%xmm2 |
| 106 » pcmpeqd»%xmm5,%xmm1 |
| 107 » movdqa» %xmm0,304(%r10) |
| 108 |
| 109 » paddd» %xmm2,%xmm3 |
| 110 .byte» 0x67 |
| 111 » pcmpeqd»%xmm5,%xmm2 |
| 112 » movdqa» %xmm1,320(%r10) |
| 113 |
| 114 » pcmpeqd»%xmm5,%xmm3 |
| 115 » movdqa» %xmm2,336(%r10) |
| 116 » pand» 64(%r12),%xmm0 |
| 117 |
| 118 » pand» 80(%r12),%xmm1 |
| 119 » pand» 96(%r12),%xmm2 |
| 120 » movdqa» %xmm3,352(%r10) |
| 121 » pand» 112(%r12),%xmm3 |
| 122 » por» %xmm2,%xmm0 |
| 123 » por» %xmm3,%xmm1 |
| 124 » movdqa» -128(%r12),%xmm4 |
| 125 » movdqa» -112(%r12),%xmm5 |
| 126 » movdqa» -96(%r12),%xmm2 |
| 127 » pand» 112(%r10),%xmm4 |
| 128 » movdqa» -80(%r12),%xmm3 |
| 129 » pand» 128(%r10),%xmm5 |
| 130 » por» %xmm4,%xmm0 |
| 131 » pand» 144(%r10),%xmm2 |
| 132 » por» %xmm5,%xmm1 |
| 133 » pand» 160(%r10),%xmm3 |
| 134 » por» %xmm2,%xmm0 |
| 135 » por» %xmm3,%xmm1 |
| 136 » movdqa» -64(%r12),%xmm4 |
| 137 » movdqa» -48(%r12),%xmm5 |
| 138 » movdqa» -32(%r12),%xmm2 |
| 139 » pand» 176(%r10),%xmm4 |
| 140 » movdqa» -16(%r12),%xmm3 |
| 141 » pand» 192(%r10),%xmm5 |
| 142 » por» %xmm4,%xmm0 |
| 143 » pand» 208(%r10),%xmm2 |
| 144 » por» %xmm5,%xmm1 |
| 145 » pand» 224(%r10),%xmm3 |
| 146 » por» %xmm2,%xmm0 |
| 147 » por» %xmm3,%xmm1 |
| 148 » movdqa» 0(%r12),%xmm4 |
| 149 » movdqa» 16(%r12),%xmm5 |
| 150 » movdqa» 32(%r12),%xmm2 |
| 151 » pand» 240(%r10),%xmm4 |
| 152 » movdqa» 48(%r12),%xmm3 |
| 153 » pand» 256(%r10),%xmm5 |
| 154 » por» %xmm4,%xmm0 |
| 155 » pand» 272(%r10),%xmm2 |
| 156 » por» %xmm5,%xmm1 |
| 157 » pand» 288(%r10),%xmm3 |
| 158 » por» %xmm2,%xmm0 |
| 159 » por» %xmm3,%xmm1 |
53 por %xmm1,%xmm0 | 160 por %xmm1,%xmm0 |
54 » pand» %xmm7,%xmm3 | 161 » pshufd» $0x4e,%xmm0,%xmm1 |
55 » por» %xmm2,%xmm0 | 162 » por» %xmm1,%xmm0 |
56 leaq 256(%r12),%r12 | 163 leaq 256(%r12),%r12 |
57 por %xmm3,%xmm0 | |
58 | |
59 .byte 102,72,15,126,195 | 164 .byte 102,72,15,126,195 |
60 | 165 |
61 movq (%r8),%r8 | 166 movq (%r8),%r8 |
62 movq (%rsi),%rax | 167 movq (%rsi),%rax |
63 | 168 |
64 xorq %r14,%r14 | 169 xorq %r14,%r14 |
65 xorq %r15,%r15 | 170 xorq %r15,%r15 |
66 | 171 |
67 movq -96(%r12),%xmm0 | |
68 movq -32(%r12),%xmm1 | |
69 pand %xmm4,%xmm0 | |
70 movq 32(%r12),%xmm2 | |
71 pand %xmm5,%xmm1 | |
72 | |
73 movq %r8,%rbp | 172 movq %r8,%rbp |
74 mulq %rbx | 173 mulq %rbx |
75 movq %rax,%r10 | 174 movq %rax,%r10 |
76 movq (%rcx),%rax | 175 movq (%rcx),%rax |
77 | 176 |
78 movq 96(%r12),%xmm3 | |
79 pand %xmm6,%xmm2 | |
80 por %xmm1,%xmm0 | |
81 pand %xmm7,%xmm3 | |
82 | |
83 imulq %r10,%rbp | 177 imulq %r10,%rbp |
84 movq %rdx,%r11 | 178 movq %rdx,%r11 |
85 | 179 |
86 por %xmm2,%xmm0 | |
87 leaq 256(%r12),%r12 | |
88 por %xmm3,%xmm0 | |
89 | |
90 mulq %rbp | 180 mulq %rbp |
91 addq %rax,%r10 | 181 addq %rax,%r10 |
92 movq 8(%rsi),%rax | 182 movq 8(%rsi),%rax |
93 adcq $0,%rdx | 183 adcq $0,%rdx |
94 movq %rdx,%r13 | 184 movq %rdx,%r13 |
95 | 185 |
96 leaq 1(%r15),%r15 | 186 leaq 1(%r15),%r15 |
97 jmp L$1st_enter | 187 jmp L$1st_enter |
98 | 188 |
99 .p2align 4 | 189 .p2align 4 |
(...skipping 12 matching lines...) Expand all Loading... |
112 addq %rax,%r11 | 202 addq %rax,%r11 |
113 movq (%rcx,%r15,8),%rax | 203 movq (%rcx,%r15,8),%rax |
114 adcq $0,%rdx | 204 adcq $0,%rdx |
115 leaq 1(%r15),%r15 | 205 leaq 1(%r15),%r15 |
116 movq %rdx,%r10 | 206 movq %rdx,%r10 |
117 | 207 |
118 mulq %rbp | 208 mulq %rbp |
119 cmpq %r9,%r15 | 209 cmpq %r9,%r15 |
120 jne L$1st | 210 jne L$1st |
121 | 211 |
122 .byte 102,72,15,126,195 | |
123 | 212 |
124 addq %rax,%r13 | 213 addq %rax,%r13 |
125 movq (%rsi),%rax | |
126 adcq $0,%rdx | 214 adcq $0,%rdx |
127 addq %r11,%r13 | 215 addq %r11,%r13 |
128 adcq $0,%rdx | 216 adcq $0,%rdx |
129 » movq» %r13,-16(%rsp,%r15,8) | 217 » movq» %r13,-16(%rsp,%r9,8) |
130 movq %rdx,%r13 | 218 movq %rdx,%r13 |
131 movq %r10,%r11 | 219 movq %r10,%r11 |
132 | 220 |
133 xorq %rdx,%rdx | 221 xorq %rdx,%rdx |
134 addq %r11,%r13 | 222 addq %r11,%r13 |
135 adcq $0,%rdx | 223 adcq $0,%rdx |
136 movq %r13,-8(%rsp,%r9,8) | 224 movq %r13,-8(%rsp,%r9,8) |
137 movq %rdx,(%rsp,%r9,8) | 225 movq %rdx,(%rsp,%r9,8) |
138 | 226 |
139 leaq 1(%r14),%r14 | 227 leaq 1(%r14),%r14 |
140 jmp L$outer | 228 jmp L$outer |
141 .p2align 4 | 229 .p2align 4 |
142 L$outer: | 230 L$outer: |
| 231 leaq 24+128(%rsp,%r9,8),%rdx |
| 232 andq $-16,%rdx |
| 233 pxor %xmm4,%xmm4 |
| 234 pxor %xmm5,%xmm5 |
| 235 movdqa -128(%r12),%xmm0 |
| 236 movdqa -112(%r12),%xmm1 |
| 237 movdqa -96(%r12),%xmm2 |
| 238 movdqa -80(%r12),%xmm3 |
| 239 pand -128(%rdx),%xmm0 |
| 240 pand -112(%rdx),%xmm1 |
| 241 por %xmm0,%xmm4 |
| 242 pand -96(%rdx),%xmm2 |
| 243 por %xmm1,%xmm5 |
| 244 pand -80(%rdx),%xmm3 |
| 245 por %xmm2,%xmm4 |
| 246 por %xmm3,%xmm5 |
| 247 movdqa -64(%r12),%xmm0 |
| 248 movdqa -48(%r12),%xmm1 |
| 249 movdqa -32(%r12),%xmm2 |
| 250 movdqa -16(%r12),%xmm3 |
| 251 pand -64(%rdx),%xmm0 |
| 252 pand -48(%rdx),%xmm1 |
| 253 por %xmm0,%xmm4 |
| 254 pand -32(%rdx),%xmm2 |
| 255 por %xmm1,%xmm5 |
| 256 pand -16(%rdx),%xmm3 |
| 257 por %xmm2,%xmm4 |
| 258 por %xmm3,%xmm5 |
| 259 movdqa 0(%r12),%xmm0 |
| 260 movdqa 16(%r12),%xmm1 |
| 261 movdqa 32(%r12),%xmm2 |
| 262 movdqa 48(%r12),%xmm3 |
| 263 pand 0(%rdx),%xmm0 |
| 264 pand 16(%rdx),%xmm1 |
| 265 por %xmm0,%xmm4 |
| 266 pand 32(%rdx),%xmm2 |
| 267 por %xmm1,%xmm5 |
| 268 pand 48(%rdx),%xmm3 |
| 269 por %xmm2,%xmm4 |
| 270 por %xmm3,%xmm5 |
| 271 movdqa 64(%r12),%xmm0 |
| 272 movdqa 80(%r12),%xmm1 |
| 273 movdqa 96(%r12),%xmm2 |
| 274 movdqa 112(%r12),%xmm3 |
| 275 pand 64(%rdx),%xmm0 |
| 276 pand 80(%rdx),%xmm1 |
| 277 por %xmm0,%xmm4 |
| 278 pand 96(%rdx),%xmm2 |
| 279 por %xmm1,%xmm5 |
| 280 pand 112(%rdx),%xmm3 |
| 281 por %xmm2,%xmm4 |
| 282 por %xmm3,%xmm5 |
| 283 por %xmm5,%xmm4 |
| 284 pshufd $0x4e,%xmm4,%xmm0 |
| 285 por %xmm4,%xmm0 |
| 286 leaq 256(%r12),%r12 |
| 287 |
| 288 movq (%rsi),%rax |
| 289 .byte 102,72,15,126,195 |
| 290 |
143 xorq %r15,%r15 | 291 xorq %r15,%r15 |
144 movq %r8,%rbp | 292 movq %r8,%rbp |
145 movq (%rsp),%r10 | 293 movq (%rsp),%r10 |
146 | 294 |
147 movq -96(%r12),%xmm0 | |
148 movq -32(%r12),%xmm1 | |
149 pand %xmm4,%xmm0 | |
150 movq 32(%r12),%xmm2 | |
151 pand %xmm5,%xmm1 | |
152 | |
153 mulq %rbx | 295 mulq %rbx |
154 addq %rax,%r10 | 296 addq %rax,%r10 |
155 movq (%rcx),%rax | 297 movq (%rcx),%rax |
156 adcq $0,%rdx | 298 adcq $0,%rdx |
157 | 299 |
158 movq 96(%r12),%xmm3 | |
159 pand %xmm6,%xmm2 | |
160 por %xmm1,%xmm0 | |
161 pand %xmm7,%xmm3 | |
162 | |
163 imulq %r10,%rbp | 300 imulq %r10,%rbp |
164 movq %rdx,%r11 | 301 movq %rdx,%r11 |
165 | 302 |
166 por %xmm2,%xmm0 | |
167 leaq 256(%r12),%r12 | |
168 por %xmm3,%xmm0 | |
169 | |
170 mulq %rbp | 303 mulq %rbp |
171 addq %rax,%r10 | 304 addq %rax,%r10 |
172 movq 8(%rsi),%rax | 305 movq 8(%rsi),%rax |
173 adcq $0,%rdx | 306 adcq $0,%rdx |
174 movq 8(%rsp),%r10 | 307 movq 8(%rsp),%r10 |
175 movq %rdx,%r13 | 308 movq %rdx,%r13 |
176 | 309 |
177 leaq 1(%r15),%r15 | 310 leaq 1(%r15),%r15 |
178 jmp L$inner_enter | 311 jmp L$inner_enter |
179 | 312 |
(...skipping 15 matching lines...) Expand all Loading... |
195 adcq $0,%rdx | 328 adcq $0,%rdx |
196 addq %r11,%r10 | 329 addq %r11,%r10 |
197 movq %rdx,%r11 | 330 movq %rdx,%r11 |
198 adcq $0,%r11 | 331 adcq $0,%r11 |
199 leaq 1(%r15),%r15 | 332 leaq 1(%r15),%r15 |
200 | 333 |
201 mulq %rbp | 334 mulq %rbp |
202 cmpq %r9,%r15 | 335 cmpq %r9,%r15 |
203 jne L$inner | 336 jne L$inner |
204 | 337 |
205 .byte 102,72,15,126,195 | |
206 | |
207 addq %rax,%r13 | 338 addq %rax,%r13 |
208 movq (%rsi),%rax | |
209 adcq $0,%rdx | 339 adcq $0,%rdx |
210 addq %r10,%r13 | 340 addq %r10,%r13 |
211 » movq» (%rsp,%r15,8),%r10 | 341 » movq» (%rsp,%r9,8),%r10 |
212 adcq $0,%rdx | 342 adcq $0,%rdx |
213 » movq» %r13,-16(%rsp,%r15,8) | 343 » movq» %r13,-16(%rsp,%r9,8) |
214 movq %rdx,%r13 | 344 movq %rdx,%r13 |
215 | 345 |
216 xorq %rdx,%rdx | 346 xorq %rdx,%rdx |
217 addq %r11,%r13 | 347 addq %r11,%r13 |
218 adcq $0,%rdx | 348 adcq $0,%rdx |
219 addq %r10,%r13 | 349 addq %r10,%r13 |
220 adcq $0,%rdx | 350 adcq $0,%rdx |
221 movq %r13,-8(%rsp,%r9,8) | 351 movq %r13,-8(%rsp,%r9,8) |
222 movq %rdx,(%rsp,%r9,8) | 352 movq %rdx,(%rsp,%r9,8) |
223 | 353 |
(...skipping 25 matching lines...) Expand all Loading... |
249 andq %rax,%rsi | 379 andq %rax,%rsi |
250 xorq %rcx,%rsi | 380 xorq %rcx,%rsi |
251 movq %r14,(%rsp,%r14,8) | 381 movq %r14,(%rsp,%r14,8) |
252 movq %rsi,(%rdi,%r14,8) | 382 movq %rsi,(%rdi,%r14,8) |
253 leaq 1(%r14),%r14 | 383 leaq 1(%r14),%r14 |
254 subq $1,%r15 | 384 subq $1,%r15 |
255 jnz L$copy | 385 jnz L$copy |
256 | 386 |
257 movq 8(%rsp,%r9,8),%rsi | 387 movq 8(%rsp,%r9,8),%rsi |
258 movq $1,%rax | 388 movq $1,%rax |
| 389 |
259 movq -48(%rsi),%r15 | 390 movq -48(%rsi),%r15 |
260 movq -40(%rsi),%r14 | 391 movq -40(%rsi),%r14 |
261 movq -32(%rsi),%r13 | 392 movq -32(%rsi),%r13 |
262 movq -24(%rsi),%r12 | 393 movq -24(%rsi),%r12 |
263 movq -16(%rsi),%rbp | 394 movq -16(%rsi),%rbp |
264 movq -8(%rsi),%rbx | 395 movq -8(%rsi),%rbx |
265 leaq (%rsi),%rsp | 396 leaq (%rsi),%rsp |
266 L$mul_epilogue: | 397 L$mul_epilogue: |
267 .byte 0xf3,0xc3 | 398 .byte 0xf3,0xc3 |
268 | 399 |
269 | 400 |
270 .p2align 5 | 401 .p2align 5 |
271 bn_mul4x_mont_gather5: | 402 bn_mul4x_mont_gather5: |
272 L$mul4x_enter: | 403 L$mul4x_enter: |
273 .byte 0x67 | 404 .byte 0x67 |
274 movq %rsp,%rax | 405 movq %rsp,%rax |
275 pushq %rbx | 406 pushq %rbx |
276 pushq %rbp | 407 pushq %rbp |
277 pushq %r12 | 408 pushq %r12 |
278 pushq %r13 | 409 pushq %r13 |
279 pushq %r14 | 410 pushq %r14 |
280 pushq %r15 | 411 pushq %r15 |
| 412 |
281 .byte 0x67 | 413 .byte 0x67 |
282 movl %r9d,%r10d | |
283 shll $3,%r9d | 414 shll $3,%r9d |
284 » shll» $3+2,%r10d | 415 » leaq» (%r9,%r9,2),%r10 |
285 negq %r9 | 416 negq %r9 |
286 | 417 |
287 | 418 |
288 | 419 |
289 | 420 |
290 | 421 |
291 | 422 |
292 | 423 |
293 | 424 |
294 » leaq» -64(%rsp,%r9,2),%r11 | 425 |
295 » subq» %rsi,%r11 | 426 |
| 427 » leaq» -320(%rsp,%r9,2),%r11 |
| 428 » subq» %rdi,%r11 |
296 andq $4095,%r11 | 429 andq $4095,%r11 |
297 cmpq %r11,%r10 | 430 cmpq %r11,%r10 |
298 jb L$mul4xsp_alt | 431 jb L$mul4xsp_alt |
299 subq %r11,%rsp | 432 subq %r11,%rsp |
300 » leaq» -64(%rsp,%r9,2),%rsp | 433 » leaq» -320(%rsp,%r9,2),%rsp |
301 jmp L$mul4xsp_done | 434 jmp L$mul4xsp_done |
302 | 435 |
303 .p2align 5 | 436 .p2align 5 |
304 L$mul4xsp_alt: | 437 L$mul4xsp_alt: |
305 » leaq» 4096-64(,%r9,2),%r10 | 438 » leaq» 4096-320(,%r9,2),%r10 |
306 » leaq» -64(%rsp,%r9,2),%rsp | 439 » leaq» -320(%rsp,%r9,2),%rsp |
307 subq %r10,%r11 | 440 subq %r10,%r11 |
308 movq $0,%r10 | 441 movq $0,%r10 |
309 cmovcq %r10,%r11 | 442 cmovcq %r10,%r11 |
310 subq %r11,%rsp | 443 subq %r11,%rsp |
311 L$mul4xsp_done: | 444 L$mul4xsp_done: |
312 andq $-64,%rsp | 445 andq $-64,%rsp |
313 negq %r9 | 446 negq %r9 |
314 | 447 |
315 movq %rax,40(%rsp) | 448 movq %rax,40(%rsp) |
316 L$mul4x_body: | 449 L$mul4x_body: |
317 | 450 |
318 call mul4x_internal | 451 call mul4x_internal |
319 | 452 |
320 movq 40(%rsp),%rsi | 453 movq 40(%rsp),%rsi |
321 movq $1,%rax | 454 movq $1,%rax |
| 455 |
322 movq -48(%rsi),%r15 | 456 movq -48(%rsi),%r15 |
323 movq -40(%rsi),%r14 | 457 movq -40(%rsi),%r14 |
324 movq -32(%rsi),%r13 | 458 movq -32(%rsi),%r13 |
325 movq -24(%rsi),%r12 | 459 movq -24(%rsi),%r12 |
326 movq -16(%rsi),%rbp | 460 movq -16(%rsi),%rbp |
327 movq -8(%rsi),%rbx | 461 movq -8(%rsi),%rbx |
328 leaq (%rsi),%rsp | 462 leaq (%rsi),%rsp |
329 L$mul4x_epilogue: | 463 L$mul4x_epilogue: |
330 .byte 0xf3,0xc3 | 464 .byte 0xf3,0xc3 |
331 | 465 |
332 | 466 |
333 | 467 |
334 .p2align 5 | 468 .p2align 5 |
335 mul4x_internal: | 469 mul4x_internal: |
336 shlq $5,%r9 | 470 shlq $5,%r9 |
337 » movl» 8(%rax),%r10d | 471 » movd» 8(%rax),%xmm5 |
338 » leaq» 256(%rdx,%r9,1),%r13 | 472 » leaq» L$inc(%rip),%rax |
| 473 » leaq» 128(%rdx,%r9,1),%r13 |
339 shrq $5,%r9 | 474 shrq $5,%r9 |
340 » movq» %r10,%r11 | 475 » movdqa» 0(%rax),%xmm0 |
341 » shrq» $3,%r10 | 476 » movdqa» 16(%rax),%xmm1 |
342 » andq» $7,%r11 | 477 » leaq» 88-112(%rsp,%r9,1),%r10 |
343 » notq» %r10 | 478 » leaq» 128(%rdx),%r12 |
344 » leaq» L$magic_masks(%rip),%rax | |
345 » andq» $3,%r10 | |
346 » leaq» 96(%rdx,%r11,8),%r12 | |
347 » movq» 0(%rax,%r10,8),%xmm4 | |
348 » movq» 8(%rax,%r10,8),%xmm5 | |
349 » addq» $7,%r11 | |
350 » movq» 16(%rax,%r10,8),%xmm6 | |
351 » movq» 24(%rax,%r10,8),%xmm7 | |
352 » andq» $7,%r11 | |
353 | 479 |
354 » movq» -96(%r12),%xmm0 | 480 » pshufd» $0,%xmm5,%xmm5 |
355 » leaq» 256(%r12),%r14 | 481 » movdqa» %xmm1,%xmm4 |
356 » movq» -32(%r12),%xmm1 | 482 .byte» 0x67,0x67 |
357 » pand» %xmm4,%xmm0 | 483 » movdqa» %xmm1,%xmm2 |
358 » movq» 32(%r12),%xmm2 | 484 » paddd» %xmm0,%xmm1 |
359 » pand» %xmm5,%xmm1 | 485 » pcmpeqd»%xmm5,%xmm0 |
360 » movq» 96(%r12),%xmm3 | |
361 » pand» %xmm6,%xmm2 | |
362 .byte 0x67 | 486 .byte 0x67 |
| 487 movdqa %xmm4,%xmm3 |
| 488 paddd %xmm1,%xmm2 |
| 489 pcmpeqd %xmm5,%xmm1 |
| 490 movdqa %xmm0,112(%r10) |
| 491 movdqa %xmm4,%xmm0 |
| 492 |
| 493 paddd %xmm2,%xmm3 |
| 494 pcmpeqd %xmm5,%xmm2 |
| 495 movdqa %xmm1,128(%r10) |
| 496 movdqa %xmm4,%xmm1 |
| 497 |
| 498 paddd %xmm3,%xmm0 |
| 499 pcmpeqd %xmm5,%xmm3 |
| 500 movdqa %xmm2,144(%r10) |
| 501 movdqa %xmm4,%xmm2 |
| 502 |
| 503 paddd %xmm0,%xmm1 |
| 504 pcmpeqd %xmm5,%xmm0 |
| 505 movdqa %xmm3,160(%r10) |
| 506 movdqa %xmm4,%xmm3 |
| 507 paddd %xmm1,%xmm2 |
| 508 pcmpeqd %xmm5,%xmm1 |
| 509 movdqa %xmm0,176(%r10) |
| 510 movdqa %xmm4,%xmm0 |
| 511 |
| 512 paddd %xmm2,%xmm3 |
| 513 pcmpeqd %xmm5,%xmm2 |
| 514 movdqa %xmm1,192(%r10) |
| 515 movdqa %xmm4,%xmm1 |
| 516 |
| 517 paddd %xmm3,%xmm0 |
| 518 pcmpeqd %xmm5,%xmm3 |
| 519 movdqa %xmm2,208(%r10) |
| 520 movdqa %xmm4,%xmm2 |
| 521 |
| 522 paddd %xmm0,%xmm1 |
| 523 pcmpeqd %xmm5,%xmm0 |
| 524 movdqa %xmm3,224(%r10) |
| 525 movdqa %xmm4,%xmm3 |
| 526 paddd %xmm1,%xmm2 |
| 527 pcmpeqd %xmm5,%xmm1 |
| 528 movdqa %xmm0,240(%r10) |
| 529 movdqa %xmm4,%xmm0 |
| 530 |
| 531 paddd %xmm2,%xmm3 |
| 532 pcmpeqd %xmm5,%xmm2 |
| 533 movdqa %xmm1,256(%r10) |
| 534 movdqa %xmm4,%xmm1 |
| 535 |
| 536 paddd %xmm3,%xmm0 |
| 537 pcmpeqd %xmm5,%xmm3 |
| 538 movdqa %xmm2,272(%r10) |
| 539 movdqa %xmm4,%xmm2 |
| 540 |
| 541 paddd %xmm0,%xmm1 |
| 542 pcmpeqd %xmm5,%xmm0 |
| 543 movdqa %xmm3,288(%r10) |
| 544 movdqa %xmm4,%xmm3 |
| 545 paddd %xmm1,%xmm2 |
| 546 pcmpeqd %xmm5,%xmm1 |
| 547 movdqa %xmm0,304(%r10) |
| 548 |
| 549 paddd %xmm2,%xmm3 |
| 550 .byte 0x67 |
| 551 pcmpeqd %xmm5,%xmm2 |
| 552 movdqa %xmm1,320(%r10) |
| 553 |
| 554 pcmpeqd %xmm5,%xmm3 |
| 555 movdqa %xmm2,336(%r10) |
| 556 pand 64(%r12),%xmm0 |
| 557 |
| 558 pand 80(%r12),%xmm1 |
| 559 pand 96(%r12),%xmm2 |
| 560 movdqa %xmm3,352(%r10) |
| 561 pand 112(%r12),%xmm3 |
| 562 por %xmm2,%xmm0 |
| 563 por %xmm3,%xmm1 |
| 564 movdqa -128(%r12),%xmm4 |
| 565 movdqa -112(%r12),%xmm5 |
| 566 movdqa -96(%r12),%xmm2 |
| 567 pand 112(%r10),%xmm4 |
| 568 movdqa -80(%r12),%xmm3 |
| 569 pand 128(%r10),%xmm5 |
| 570 por %xmm4,%xmm0 |
| 571 pand 144(%r10),%xmm2 |
| 572 por %xmm5,%xmm1 |
| 573 pand 160(%r10),%xmm3 |
| 574 por %xmm2,%xmm0 |
| 575 por %xmm3,%xmm1 |
| 576 movdqa -64(%r12),%xmm4 |
| 577 movdqa -48(%r12),%xmm5 |
| 578 movdqa -32(%r12),%xmm2 |
| 579 pand 176(%r10),%xmm4 |
| 580 movdqa -16(%r12),%xmm3 |
| 581 pand 192(%r10),%xmm5 |
| 582 por %xmm4,%xmm0 |
| 583 pand 208(%r10),%xmm2 |
| 584 por %xmm5,%xmm1 |
| 585 pand 224(%r10),%xmm3 |
| 586 por %xmm2,%xmm0 |
| 587 por %xmm3,%xmm1 |
| 588 movdqa 0(%r12),%xmm4 |
| 589 movdqa 16(%r12),%xmm5 |
| 590 movdqa 32(%r12),%xmm2 |
| 591 pand 240(%r10),%xmm4 |
| 592 movdqa 48(%r12),%xmm3 |
| 593 pand 256(%r10),%xmm5 |
| 594 por %xmm4,%xmm0 |
| 595 pand 272(%r10),%xmm2 |
| 596 por %xmm5,%xmm1 |
| 597 pand 288(%r10),%xmm3 |
| 598 por %xmm2,%xmm0 |
| 599 por %xmm3,%xmm1 |
363 por %xmm1,%xmm0 | 600 por %xmm1,%xmm0 |
364 » movq» -96(%r14),%xmm1 | 601 » pshufd» $0x4e,%xmm0,%xmm1 |
365 .byte» 0x67 | 602 » por» %xmm1,%xmm0 |
366 » pand» %xmm7,%xmm3 | 603 » leaq» 256(%r12),%r12 |
367 .byte» 0x67 | 604 .byte» 102,72,15,126,195 |
368 » por» %xmm2,%xmm0 | |
369 » movq» -32(%r14),%xmm2 | |
370 .byte» 0x67 | |
371 » pand» %xmm4,%xmm1 | |
372 .byte» 0x67 | |
373 » por» %xmm3,%xmm0 | |
374 » movq» 32(%r14),%xmm3 | |
375 | 605 |
376 .byte 102,72,15,126,195 | |
377 movq 96(%r14),%xmm0 | |
378 movq %r13,16+8(%rsp) | 606 movq %r13,16+8(%rsp) |
379 movq %rdi,56+8(%rsp) | 607 movq %rdi,56+8(%rsp) |
380 | 608 |
381 movq (%r8),%r8 | 609 movq (%r8),%r8 |
382 movq (%rsi),%rax | 610 movq (%rsi),%rax |
383 leaq (%rsi,%r9,1),%rsi | 611 leaq (%rsi,%r9,1),%rsi |
384 negq %r9 | 612 negq %r9 |
385 | 613 |
386 movq %r8,%rbp | 614 movq %r8,%rbp |
387 mulq %rbx | 615 mulq %rbx |
388 movq %rax,%r10 | 616 movq %rax,%r10 |
389 movq (%rcx),%rax | 617 movq (%rcx),%rax |
390 | 618 |
391 pand %xmm5,%xmm2 | |
392 pand %xmm6,%xmm3 | |
393 por %xmm2,%xmm1 | |
394 | |
395 imulq %r10,%rbp | 619 imulq %r10,%rbp |
396 | 620 » leaq» 64+8(%rsp),%r14 |
397 | |
398 | |
399 | |
400 | |
401 | |
402 | |
403 » leaq» 64+8(%rsp,%r11,8),%r14 | |
404 movq %rdx,%r11 | 621 movq %rdx,%r11 |
405 | 622 |
406 pand %xmm7,%xmm0 | |
407 por %xmm3,%xmm1 | |
408 leaq 512(%r12),%r12 | |
409 por %xmm1,%xmm0 | |
410 | |
411 mulq %rbp | 623 mulq %rbp |
412 addq %rax,%r10 | 624 addq %rax,%r10 |
413 movq 8(%rsi,%r9,1),%rax | 625 movq 8(%rsi,%r9,1),%rax |
414 adcq $0,%rdx | 626 adcq $0,%rdx |
415 movq %rdx,%rdi | 627 movq %rdx,%rdi |
416 | 628 |
417 mulq %rbx | 629 mulq %rbx |
418 addq %rax,%r11 | 630 addq %rax,%r11 |
419 » movq» 16(%rcx),%rax | 631 » movq» 8(%rcx),%rax |
420 adcq $0,%rdx | 632 adcq $0,%rdx |
421 movq %rdx,%r10 | 633 movq %rdx,%r10 |
422 | 634 |
423 mulq %rbp | 635 mulq %rbp |
424 addq %rax,%rdi | 636 addq %rax,%rdi |
425 movq 16(%rsi,%r9,1),%rax | 637 movq 16(%rsi,%r9,1),%rax |
426 adcq $0,%rdx | 638 adcq $0,%rdx |
427 addq %r11,%rdi | 639 addq %r11,%rdi |
428 leaq 32(%r9),%r15 | 640 leaq 32(%r9),%r15 |
429 » leaq» 64(%rcx),%rcx | 641 » leaq» 32(%rcx),%rcx |
430 adcq $0,%rdx | 642 adcq $0,%rdx |
431 movq %rdi,(%r14) | 643 movq %rdi,(%r14) |
432 movq %rdx,%r13 | 644 movq %rdx,%r13 |
433 jmp L$1st4x | 645 jmp L$1st4x |
434 | 646 |
435 .p2align 5 | 647 .p2align 5 |
436 L$1st4x: | 648 L$1st4x: |
437 mulq %rbx | 649 mulq %rbx |
438 addq %rax,%r10 | 650 addq %rax,%r10 |
439 » movq» -32(%rcx),%rax | 651 » movq» -16(%rcx),%rax |
440 leaq 32(%r14),%r14 | 652 leaq 32(%r14),%r14 |
441 adcq $0,%rdx | 653 adcq $0,%rdx |
442 movq %rdx,%r11 | 654 movq %rdx,%r11 |
443 | 655 |
444 mulq %rbp | 656 mulq %rbp |
445 addq %rax,%r13 | 657 addq %rax,%r13 |
446 movq -8(%rsi,%r15,1),%rax | 658 movq -8(%rsi,%r15,1),%rax |
447 adcq $0,%rdx | 659 adcq $0,%rdx |
448 addq %r10,%r13 | 660 addq %r10,%r13 |
449 adcq $0,%rdx | 661 adcq $0,%rdx |
450 movq %r13,-24(%r14) | 662 movq %r13,-24(%r14) |
451 movq %rdx,%rdi | 663 movq %rdx,%rdi |
452 | 664 |
453 mulq %rbx | 665 mulq %rbx |
454 addq %rax,%r11 | 666 addq %rax,%r11 |
455 » movq» -16(%rcx),%rax | 667 » movq» -8(%rcx),%rax |
456 adcq $0,%rdx | 668 adcq $0,%rdx |
457 movq %rdx,%r10 | 669 movq %rdx,%r10 |
458 | 670 |
459 mulq %rbp | 671 mulq %rbp |
460 addq %rax,%rdi | 672 addq %rax,%rdi |
461 movq (%rsi,%r15,1),%rax | 673 movq (%rsi,%r15,1),%rax |
462 adcq $0,%rdx | 674 adcq $0,%rdx |
463 addq %r11,%rdi | 675 addq %r11,%rdi |
464 adcq $0,%rdx | 676 adcq $0,%rdx |
465 movq %rdi,-16(%r14) | 677 movq %rdi,-16(%r14) |
466 movq %rdx,%r13 | 678 movq %rdx,%r13 |
467 | 679 |
468 mulq %rbx | 680 mulq %rbx |
469 addq %rax,%r10 | 681 addq %rax,%r10 |
470 movq 0(%rcx),%rax | 682 movq 0(%rcx),%rax |
471 adcq $0,%rdx | 683 adcq $0,%rdx |
472 movq %rdx,%r11 | 684 movq %rdx,%r11 |
473 | 685 |
474 mulq %rbp | 686 mulq %rbp |
475 addq %rax,%r13 | 687 addq %rax,%r13 |
476 movq 8(%rsi,%r15,1),%rax | 688 movq 8(%rsi,%r15,1),%rax |
477 adcq $0,%rdx | 689 adcq $0,%rdx |
478 addq %r10,%r13 | 690 addq %r10,%r13 |
479 adcq $0,%rdx | 691 adcq $0,%rdx |
480 movq %r13,-8(%r14) | 692 movq %r13,-8(%r14) |
481 movq %rdx,%rdi | 693 movq %rdx,%rdi |
482 | 694 |
483 mulq %rbx | 695 mulq %rbx |
484 addq %rax,%r11 | 696 addq %rax,%r11 |
485 » movq» 16(%rcx),%rax | 697 » movq» 8(%rcx),%rax |
486 adcq $0,%rdx | 698 adcq $0,%rdx |
487 movq %rdx,%r10 | 699 movq %rdx,%r10 |
488 | 700 |
489 mulq %rbp | 701 mulq %rbp |
490 addq %rax,%rdi | 702 addq %rax,%rdi |
491 movq 16(%rsi,%r15,1),%rax | 703 movq 16(%rsi,%r15,1),%rax |
492 adcq $0,%rdx | 704 adcq $0,%rdx |
493 addq %r11,%rdi | 705 addq %r11,%rdi |
494 » leaq» 64(%rcx),%rcx | 706 » leaq» 32(%rcx),%rcx |
495 adcq $0,%rdx | 707 adcq $0,%rdx |
496 movq %rdi,(%r14) | 708 movq %rdi,(%r14) |
497 movq %rdx,%r13 | 709 movq %rdx,%r13 |
498 | 710 |
499 addq $32,%r15 | 711 addq $32,%r15 |
500 jnz L$1st4x | 712 jnz L$1st4x |
501 | 713 |
502 mulq %rbx | 714 mulq %rbx |
503 addq %rax,%r10 | 715 addq %rax,%r10 |
504 » movq» -32(%rcx),%rax | 716 » movq» -16(%rcx),%rax |
505 leaq 32(%r14),%r14 | 717 leaq 32(%r14),%r14 |
506 adcq $0,%rdx | 718 adcq $0,%rdx |
507 movq %rdx,%r11 | 719 movq %rdx,%r11 |
508 | 720 |
509 mulq %rbp | 721 mulq %rbp |
510 addq %rax,%r13 | 722 addq %rax,%r13 |
511 movq -8(%rsi),%rax | 723 movq -8(%rsi),%rax |
512 adcq $0,%rdx | 724 adcq $0,%rdx |
513 addq %r10,%r13 | 725 addq %r10,%r13 |
514 adcq $0,%rdx | 726 adcq $0,%rdx |
515 movq %r13,-24(%r14) | 727 movq %r13,-24(%r14) |
516 movq %rdx,%rdi | 728 movq %rdx,%rdi |
517 | 729 |
518 mulq %rbx | 730 mulq %rbx |
519 addq %rax,%r11 | 731 addq %rax,%r11 |
520 » movq» -16(%rcx),%rax | 732 » movq» -8(%rcx),%rax |
521 adcq $0,%rdx | 733 adcq $0,%rdx |
522 movq %rdx,%r10 | 734 movq %rdx,%r10 |
523 | 735 |
524 mulq %rbp | 736 mulq %rbp |
525 addq %rax,%rdi | 737 addq %rax,%rdi |
526 movq (%rsi,%r9,1),%rax | 738 movq (%rsi,%r9,1),%rax |
527 adcq $0,%rdx | 739 adcq $0,%rdx |
528 addq %r11,%rdi | 740 addq %r11,%rdi |
529 adcq $0,%rdx | 741 adcq $0,%rdx |
530 movq %rdi,-16(%r14) | 742 movq %rdi,-16(%r14) |
531 movq %rdx,%r13 | 743 movq %rdx,%r13 |
532 | 744 |
533 .byte» 102,72,15,126,195 | 745 » leaq» (%rcx,%r9,1),%rcx |
534 » leaq» (%rcx,%r9,2),%rcx | |
535 | 746 |
536 xorq %rdi,%rdi | 747 xorq %rdi,%rdi |
537 addq %r10,%r13 | 748 addq %r10,%r13 |
538 adcq $0,%rdi | 749 adcq $0,%rdi |
539 movq %r13,-8(%r14) | 750 movq %r13,-8(%r14) |
540 | 751 |
541 jmp L$outer4x | 752 jmp L$outer4x |
542 | 753 |
543 .p2align 5 | 754 .p2align 5 |
544 L$outer4x: | 755 L$outer4x: |
| 756 leaq 16+128(%r14),%rdx |
| 757 pxor %xmm4,%xmm4 |
| 758 pxor %xmm5,%xmm5 |
| 759 movdqa -128(%r12),%xmm0 |
| 760 movdqa -112(%r12),%xmm1 |
| 761 movdqa -96(%r12),%xmm2 |
| 762 movdqa -80(%r12),%xmm3 |
| 763 pand -128(%rdx),%xmm0 |
| 764 pand -112(%rdx),%xmm1 |
| 765 por %xmm0,%xmm4 |
| 766 pand -96(%rdx),%xmm2 |
| 767 por %xmm1,%xmm5 |
| 768 pand -80(%rdx),%xmm3 |
| 769 por %xmm2,%xmm4 |
| 770 por %xmm3,%xmm5 |
| 771 movdqa -64(%r12),%xmm0 |
| 772 movdqa -48(%r12),%xmm1 |
| 773 movdqa -32(%r12),%xmm2 |
| 774 movdqa -16(%r12),%xmm3 |
| 775 pand -64(%rdx),%xmm0 |
| 776 pand -48(%rdx),%xmm1 |
| 777 por %xmm0,%xmm4 |
| 778 pand -32(%rdx),%xmm2 |
| 779 por %xmm1,%xmm5 |
| 780 pand -16(%rdx),%xmm3 |
| 781 por %xmm2,%xmm4 |
| 782 por %xmm3,%xmm5 |
| 783 movdqa 0(%r12),%xmm0 |
| 784 movdqa 16(%r12),%xmm1 |
| 785 movdqa 32(%r12),%xmm2 |
| 786 movdqa 48(%r12),%xmm3 |
| 787 pand 0(%rdx),%xmm0 |
| 788 pand 16(%rdx),%xmm1 |
| 789 por %xmm0,%xmm4 |
| 790 pand 32(%rdx),%xmm2 |
| 791 por %xmm1,%xmm5 |
| 792 pand 48(%rdx),%xmm3 |
| 793 por %xmm2,%xmm4 |
| 794 por %xmm3,%xmm5 |
| 795 movdqa 64(%r12),%xmm0 |
| 796 movdqa 80(%r12),%xmm1 |
| 797 movdqa 96(%r12),%xmm2 |
| 798 movdqa 112(%r12),%xmm3 |
| 799 pand 64(%rdx),%xmm0 |
| 800 pand 80(%rdx),%xmm1 |
| 801 por %xmm0,%xmm4 |
| 802 pand 96(%rdx),%xmm2 |
| 803 por %xmm1,%xmm5 |
| 804 pand 112(%rdx),%xmm3 |
| 805 por %xmm2,%xmm4 |
| 806 por %xmm3,%xmm5 |
| 807 por %xmm5,%xmm4 |
| 808 pshufd $0x4e,%xmm4,%xmm0 |
| 809 por %xmm4,%xmm0 |
| 810 leaq 256(%r12),%r12 |
| 811 .byte 102,72,15,126,195 |
| 812 |
545 movq (%r14,%r9,1),%r10 | 813 movq (%r14,%r9,1),%r10 |
546 movq %r8,%rbp | 814 movq %r8,%rbp |
547 mulq %rbx | 815 mulq %rbx |
548 addq %rax,%r10 | 816 addq %rax,%r10 |
549 movq (%rcx),%rax | 817 movq (%rcx),%rax |
550 adcq $0,%rdx | 818 adcq $0,%rdx |
551 | 819 |
552 movq -96(%r12),%xmm0 | |
553 movq -32(%r12),%xmm1 | |
554 pand %xmm4,%xmm0 | |
555 movq 32(%r12),%xmm2 | |
556 pand %xmm5,%xmm1 | |
557 movq 96(%r12),%xmm3 | |
558 | |
559 imulq %r10,%rbp | 820 imulq %r10,%rbp |
560 .byte 0x67 | |
561 movq %rdx,%r11 | 821 movq %rdx,%r11 |
562 movq %rdi,(%r14) | 822 movq %rdi,(%r14) |
563 | 823 |
564 pand %xmm6,%xmm2 | |
565 por %xmm1,%xmm0 | |
566 pand %xmm7,%xmm3 | |
567 por %xmm2,%xmm0 | |
568 leaq (%r14,%r9,1),%r14 | 824 leaq (%r14,%r9,1),%r14 |
569 leaq 256(%r12),%r12 | |
570 por %xmm3,%xmm0 | |
571 | 825 |
572 mulq %rbp | 826 mulq %rbp |
573 addq %rax,%r10 | 827 addq %rax,%r10 |
574 movq 8(%rsi,%r9,1),%rax | 828 movq 8(%rsi,%r9,1),%rax |
575 adcq $0,%rdx | 829 adcq $0,%rdx |
576 movq %rdx,%rdi | 830 movq %rdx,%rdi |
577 | 831 |
578 mulq %rbx | 832 mulq %rbx |
579 addq %rax,%r11 | 833 addq %rax,%r11 |
580 » movq» 16(%rcx),%rax | 834 » movq» 8(%rcx),%rax |
581 adcq $0,%rdx | 835 adcq $0,%rdx |
582 addq 8(%r14),%r11 | 836 addq 8(%r14),%r11 |
583 adcq $0,%rdx | 837 adcq $0,%rdx |
584 movq %rdx,%r10 | 838 movq %rdx,%r10 |
585 | 839 |
586 mulq %rbp | 840 mulq %rbp |
587 addq %rax,%rdi | 841 addq %rax,%rdi |
588 movq 16(%rsi,%r9,1),%rax | 842 movq 16(%rsi,%r9,1),%rax |
589 adcq $0,%rdx | 843 adcq $0,%rdx |
590 addq %r11,%rdi | 844 addq %r11,%rdi |
591 leaq 32(%r9),%r15 | 845 leaq 32(%r9),%r15 |
592 » leaq» 64(%rcx),%rcx | 846 » leaq» 32(%rcx),%rcx |
593 adcq $0,%rdx | 847 adcq $0,%rdx |
594 movq %rdx,%r13 | 848 movq %rdx,%r13 |
595 jmp L$inner4x | 849 jmp L$inner4x |
596 | 850 |
597 .p2align 5 | 851 .p2align 5 |
598 L$inner4x: | 852 L$inner4x: |
599 mulq %rbx | 853 mulq %rbx |
600 addq %rax,%r10 | 854 addq %rax,%r10 |
601 » movq» -32(%rcx),%rax | 855 » movq» -16(%rcx),%rax |
602 adcq $0,%rdx | 856 adcq $0,%rdx |
603 addq 16(%r14),%r10 | 857 addq 16(%r14),%r10 |
604 leaq 32(%r14),%r14 | 858 leaq 32(%r14),%r14 |
605 adcq $0,%rdx | 859 adcq $0,%rdx |
606 movq %rdx,%r11 | 860 movq %rdx,%r11 |
607 | 861 |
608 mulq %rbp | 862 mulq %rbp |
609 addq %rax,%r13 | 863 addq %rax,%r13 |
610 movq -8(%rsi,%r15,1),%rax | 864 movq -8(%rsi,%r15,1),%rax |
611 adcq $0,%rdx | 865 adcq $0,%rdx |
612 addq %r10,%r13 | 866 addq %r10,%r13 |
613 adcq $0,%rdx | 867 adcq $0,%rdx |
614 movq %rdi,-32(%r14) | 868 movq %rdi,-32(%r14) |
615 movq %rdx,%rdi | 869 movq %rdx,%rdi |
616 | 870 |
617 mulq %rbx | 871 mulq %rbx |
618 addq %rax,%r11 | 872 addq %rax,%r11 |
619 » movq» -16(%rcx),%rax | 873 » movq» -8(%rcx),%rax |
620 adcq $0,%rdx | 874 adcq $0,%rdx |
621 addq -8(%r14),%r11 | 875 addq -8(%r14),%r11 |
622 adcq $0,%rdx | 876 adcq $0,%rdx |
623 movq %rdx,%r10 | 877 movq %rdx,%r10 |
624 | 878 |
625 mulq %rbp | 879 mulq %rbp |
626 addq %rax,%rdi | 880 addq %rax,%rdi |
627 movq (%rsi,%r15,1),%rax | 881 movq (%rsi,%r15,1),%rax |
628 adcq $0,%rdx | 882 adcq $0,%rdx |
629 addq %r11,%rdi | 883 addq %r11,%rdi |
(...skipping 13 matching lines...) Expand all Loading... |
643 addq %rax,%r13 | 897 addq %rax,%r13 |
644 movq 8(%rsi,%r15,1),%rax | 898 movq 8(%rsi,%r15,1),%rax |
645 adcq $0,%rdx | 899 adcq $0,%rdx |
646 addq %r10,%r13 | 900 addq %r10,%r13 |
647 adcq $0,%rdx | 901 adcq $0,%rdx |
648 movq %rdi,-16(%r14) | 902 movq %rdi,-16(%r14) |
649 movq %rdx,%rdi | 903 movq %rdx,%rdi |
650 | 904 |
651 mulq %rbx | 905 mulq %rbx |
652 addq %rax,%r11 | 906 addq %rax,%r11 |
653 » movq» 16(%rcx),%rax | 907 » movq» 8(%rcx),%rax |
654 adcq $0,%rdx | 908 adcq $0,%rdx |
655 addq 8(%r14),%r11 | 909 addq 8(%r14),%r11 |
656 adcq $0,%rdx | 910 adcq $0,%rdx |
657 movq %rdx,%r10 | 911 movq %rdx,%r10 |
658 | 912 |
659 mulq %rbp | 913 mulq %rbp |
660 addq %rax,%rdi | 914 addq %rax,%rdi |
661 movq 16(%rsi,%r15,1),%rax | 915 movq 16(%rsi,%r15,1),%rax |
662 adcq $0,%rdx | 916 adcq $0,%rdx |
663 addq %r11,%rdi | 917 addq %r11,%rdi |
664 » leaq» 64(%rcx),%rcx | 918 » leaq» 32(%rcx),%rcx |
665 adcq $0,%rdx | 919 adcq $0,%rdx |
666 movq %r13,-8(%r14) | 920 movq %r13,-8(%r14) |
667 movq %rdx,%r13 | 921 movq %rdx,%r13 |
668 | 922 |
669 addq $32,%r15 | 923 addq $32,%r15 |
670 jnz L$inner4x | 924 jnz L$inner4x |
671 | 925 |
672 mulq %rbx | 926 mulq %rbx |
673 addq %rax,%r10 | 927 addq %rax,%r10 |
674 » movq» -32(%rcx),%rax | 928 » movq» -16(%rcx),%rax |
675 adcq $0,%rdx | 929 adcq $0,%rdx |
676 addq 16(%r14),%r10 | 930 addq 16(%r14),%r10 |
677 leaq 32(%r14),%r14 | 931 leaq 32(%r14),%r14 |
678 adcq $0,%rdx | 932 adcq $0,%rdx |
679 movq %rdx,%r11 | 933 movq %rdx,%r11 |
680 | 934 |
681 mulq %rbp | 935 mulq %rbp |
682 addq %rax,%r13 | 936 addq %rax,%r13 |
683 movq -8(%rsi),%rax | 937 movq -8(%rsi),%rax |
684 adcq $0,%rdx | 938 adcq $0,%rdx |
685 addq %r10,%r13 | 939 addq %r10,%r13 |
686 adcq $0,%rdx | 940 adcq $0,%rdx |
687 movq %rdi,-32(%r14) | 941 movq %rdi,-32(%r14) |
688 movq %rdx,%rdi | 942 movq %rdx,%rdi |
689 | 943 |
690 mulq %rbx | 944 mulq %rbx |
691 addq %rax,%r11 | 945 addq %rax,%r11 |
692 movq %rbp,%rax | 946 movq %rbp,%rax |
693 » movq» -16(%rcx),%rbp | 947 » movq» -8(%rcx),%rbp |
694 adcq $0,%rdx | 948 adcq $0,%rdx |
695 addq -8(%r14),%r11 | 949 addq -8(%r14),%r11 |
696 adcq $0,%rdx | 950 adcq $0,%rdx |
697 movq %rdx,%r10 | 951 movq %rdx,%r10 |
698 | 952 |
699 mulq %rbp | 953 mulq %rbp |
700 addq %rax,%rdi | 954 addq %rax,%rdi |
701 movq (%rsi,%r9,1),%rax | 955 movq (%rsi,%r9,1),%rax |
702 adcq $0,%rdx | 956 adcq $0,%rdx |
703 addq %r11,%rdi | 957 addq %r11,%rdi |
704 adcq $0,%rdx | 958 adcq $0,%rdx |
705 movq %r13,-24(%r14) | 959 movq %r13,-24(%r14) |
706 movq %rdx,%r13 | 960 movq %rdx,%r13 |
707 | 961 |
708 .byte 102,72,15,126,195 | |
709 movq %rdi,-16(%r14) | 962 movq %rdi,-16(%r14) |
710 » leaq» (%rcx,%r9,2),%rcx | 963 » leaq» (%rcx,%r9,1),%rcx |
711 | 964 |
712 xorq %rdi,%rdi | 965 xorq %rdi,%rdi |
713 addq %r10,%r13 | 966 addq %r10,%r13 |
714 adcq $0,%rdi | 967 adcq $0,%rdi |
715 addq (%r14),%r13 | 968 addq (%r14),%r13 |
716 adcq $0,%rdi | 969 adcq $0,%rdi |
717 movq %r13,-8(%r14) | 970 movq %r13,-8(%r14) |
718 | 971 |
719 cmpq 16+8(%rsp),%r12 | 972 cmpq 16+8(%rsp),%r12 |
720 jb L$outer4x | 973 jb L$outer4x |
| 974 xorq %rax,%rax |
721 subq %r13,%rbp | 975 subq %r13,%rbp |
722 adcq %r15,%r15 | 976 adcq %r15,%r15 |
723 orq %r15,%rdi | 977 orq %r15,%rdi |
724 » xorq» $1,%rdi | 978 » subq» %rdi,%rax |
725 leaq (%r14,%r9,1),%rbx | 979 leaq (%r14,%r9,1),%rbx |
726 » leaq» (%rcx,%rdi,8),%rbp | 980 » movq» (%rcx),%r12 |
| 981 » leaq» (%rcx),%rbp |
727 movq %r9,%rcx | 982 movq %r9,%rcx |
728 sarq $3+2,%rcx | 983 sarq $3+2,%rcx |
729 movq 56+8(%rsp),%rdi | 984 movq 56+8(%rsp),%rdi |
730 » jmp» L$sqr4x_sub | 985 » decq» %r12 |
| 986 » xorq» %r10,%r10 |
| 987 » movq» 8(%rbp),%r13 |
| 988 » movq» 16(%rbp),%r14 |
| 989 » movq» 24(%rbp),%r15 |
| 990 » jmp» L$sqr4x_sub_entry |
731 | 991 |
732 .globl _bn_power5 | 992 .globl _bn_power5 |
733 .private_extern _bn_power5 | 993 .private_extern _bn_power5 |
734 | 994 |
735 .p2align 5 | 995 .p2align 5 |
736 _bn_power5: | 996 _bn_power5: |
737 movq %rsp,%rax | 997 movq %rsp,%rax |
738 pushq %rbx | 998 pushq %rbx |
739 pushq %rbp | 999 pushq %rbp |
740 pushq %r12 | 1000 pushq %r12 |
741 pushq %r13 | 1001 pushq %r13 |
742 pushq %r14 | 1002 pushq %r14 |
743 pushq %r15 | 1003 pushq %r15 |
744 » movl» %r9d,%r10d | 1004 |
745 shll $3,%r9d | 1005 shll $3,%r9d |
746 » shll» $3+2,%r10d | 1006 » leal» (%r9,%r9,2),%r10d |
747 negq %r9 | 1007 negq %r9 |
748 movq (%r8),%r8 | 1008 movq (%r8),%r8 |
749 | 1009 |
750 | 1010 |
751 | 1011 |
752 | 1012 |
753 | 1013 |
754 | 1014 |
755 | 1015 |
756 » leaq» -64(%rsp,%r9,2),%r11 | 1016 |
757 » subq» %rsi,%r11 | 1017 » leaq» -320(%rsp,%r9,2),%r11 |
| 1018 » subq» %rdi,%r11 |
758 andq $4095,%r11 | 1019 andq $4095,%r11 |
759 cmpq %r11,%r10 | 1020 cmpq %r11,%r10 |
760 jb L$pwr_sp_alt | 1021 jb L$pwr_sp_alt |
761 subq %r11,%rsp | 1022 subq %r11,%rsp |
762 » leaq» -64(%rsp,%r9,2),%rsp | 1023 » leaq» -320(%rsp,%r9,2),%rsp |
763 jmp L$pwr_sp_done | 1024 jmp L$pwr_sp_done |
764 | 1025 |
765 .p2align 5 | 1026 .p2align 5 |
766 L$pwr_sp_alt: | 1027 L$pwr_sp_alt: |
767 » leaq» 4096-64(,%r9,2),%r10 | 1028 » leaq» 4096-320(,%r9,2),%r10 |
768 » leaq» -64(%rsp,%r9,2),%rsp | 1029 » leaq» -320(%rsp,%r9,2),%rsp |
769 subq %r10,%r11 | 1030 subq %r10,%r11 |
770 movq $0,%r10 | 1031 movq $0,%r10 |
771 cmovcq %r10,%r11 | 1032 cmovcq %r10,%r11 |
772 subq %r11,%rsp | 1033 subq %r11,%rsp |
773 L$pwr_sp_done: | 1034 L$pwr_sp_done: |
774 andq $-64,%rsp | 1035 andq $-64,%rsp |
775 movq %r9,%r10 | 1036 movq %r9,%r10 |
776 negq %r9 | 1037 negq %r9 |
777 | 1038 |
778 | 1039 |
779 | 1040 |
780 | 1041 |
781 | 1042 |
782 | 1043 |
783 | 1044 |
784 | 1045 |
785 | 1046 |
786 | 1047 |
787 movq %r8,32(%rsp) | 1048 movq %r8,32(%rsp) |
788 movq %rax,40(%rsp) | 1049 movq %rax,40(%rsp) |
789 L$power5_body: | 1050 L$power5_body: |
790 .byte 102,72,15,110,207 | 1051 .byte 102,72,15,110,207 |
791 .byte 102,72,15,110,209 | 1052 .byte 102,72,15,110,209 |
792 .byte 102,73,15,110,218 | 1053 .byte 102,73,15,110,218 |
793 .byte 102,72,15,110,226 | 1054 .byte 102,72,15,110,226 |
794 | 1055 |
795 call __bn_sqr8x_internal | 1056 call __bn_sqr8x_internal |
| 1057 call __bn_post4x_internal |
796 call __bn_sqr8x_internal | 1058 call __bn_sqr8x_internal |
| 1059 call __bn_post4x_internal |
797 call __bn_sqr8x_internal | 1060 call __bn_sqr8x_internal |
| 1061 call __bn_post4x_internal |
798 call __bn_sqr8x_internal | 1062 call __bn_sqr8x_internal |
| 1063 call __bn_post4x_internal |
799 call __bn_sqr8x_internal | 1064 call __bn_sqr8x_internal |
| 1065 call __bn_post4x_internal |
800 | 1066 |
801 .byte 102,72,15,126,209 | 1067 .byte 102,72,15,126,209 |
802 .byte 102,72,15,126,226 | 1068 .byte 102,72,15,126,226 |
803 movq %rsi,%rdi | 1069 movq %rsi,%rdi |
804 movq 40(%rsp),%rax | 1070 movq 40(%rsp),%rax |
805 leaq 32(%rsp),%r8 | 1071 leaq 32(%rsp),%r8 |
806 | 1072 |
807 call mul4x_internal | 1073 call mul4x_internal |
808 | 1074 |
809 movq 40(%rsp),%rsi | 1075 movq 40(%rsp),%rsi |
(...skipping 524 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
1334 leaq (%rcx,%r11,2),%r8 | 1600 leaq (%rcx,%r11,2),%r8 |
1335 shrq $63,%r11 | 1601 shrq $63,%r11 |
1336 orq %r10,%r8 | 1602 orq %r10,%r8 |
1337 mulq %rax | 1603 mulq %rax |
1338 negq %r15 | 1604 negq %r15 |
1339 adcq %rax,%rbx | 1605 adcq %rax,%rbx |
1340 adcq %rdx,%r8 | 1606 adcq %rdx,%r8 |
1341 movq %rbx,-16(%rdi) | 1607 movq %rbx,-16(%rdi) |
1342 movq %r8,-8(%rdi) | 1608 movq %r8,-8(%rdi) |
1343 .byte 102,72,15,126,213 | 1609 .byte 102,72,15,126,213 |
1344 sqr8x_reduction: | 1610 __bn_sqr8x_reduction: |
1345 xorq %rax,%rax | 1611 xorq %rax,%rax |
1346 » leaq» (%rbp,%r9,2),%rcx | 1612 » leaq» (%r9,%rbp,1),%rcx |
1347 leaq 48+8(%rsp,%r9,2),%rdx | 1613 leaq 48+8(%rsp,%r9,2),%rdx |
1348 movq %rcx,0+8(%rsp) | 1614 movq %rcx,0+8(%rsp) |
1349 leaq 48+8(%rsp,%r9,1),%rdi | 1615 leaq 48+8(%rsp,%r9,1),%rdi |
1350 movq %rdx,8+8(%rsp) | 1616 movq %rdx,8+8(%rsp) |
1351 negq %r9 | 1617 negq %r9 |
1352 jmp L$8x_reduction_loop | 1618 jmp L$8x_reduction_loop |
1353 | 1619 |
1354 .p2align 5 | 1620 .p2align 5 |
1355 L$8x_reduction_loop: | 1621 L$8x_reduction_loop: |
1356 leaq (%rdi,%r9,1),%rdi | 1622 leaq (%rdi,%r9,1),%rdi |
(...skipping 12 matching lines...) Expand all Loading... |
1369 .byte 0x67 | 1635 .byte 0x67 |
1370 movq %rbx,%r8 | 1636 movq %rbx,%r8 |
1371 imulq 32+8(%rsp),%rbx | 1637 imulq 32+8(%rsp),%rbx |
1372 movq 0(%rbp),%rax | 1638 movq 0(%rbp),%rax |
1373 movl $8,%ecx | 1639 movl $8,%ecx |
1374 jmp L$8x_reduce | 1640 jmp L$8x_reduce |
1375 | 1641 |
1376 .p2align 5 | 1642 .p2align 5 |
1377 L$8x_reduce: | 1643 L$8x_reduce: |
1378 mulq %rbx | 1644 mulq %rbx |
1379 » movq» 16(%rbp),%rax | 1645 » movq» 8(%rbp),%rax |
1380 negq %r8 | 1646 negq %r8 |
1381 movq %rdx,%r8 | 1647 movq %rdx,%r8 |
1382 adcq $0,%r8 | 1648 adcq $0,%r8 |
1383 | 1649 |
1384 mulq %rbx | 1650 mulq %rbx |
1385 addq %rax,%r9 | 1651 addq %rax,%r9 |
1386 » movq» 32(%rbp),%rax | 1652 » movq» 16(%rbp),%rax |
1387 adcq $0,%rdx | 1653 adcq $0,%rdx |
1388 addq %r9,%r8 | 1654 addq %r9,%r8 |
1389 movq %rbx,48-8+8(%rsp,%rcx,8) | 1655 movq %rbx,48-8+8(%rsp,%rcx,8) |
1390 movq %rdx,%r9 | 1656 movq %rdx,%r9 |
1391 adcq $0,%r9 | 1657 adcq $0,%r9 |
1392 | 1658 |
1393 mulq %rbx | 1659 mulq %rbx |
1394 addq %rax,%r10 | 1660 addq %rax,%r10 |
1395 » movq» 48(%rbp),%rax | 1661 » movq» 24(%rbp),%rax |
1396 adcq $0,%rdx | 1662 adcq $0,%rdx |
1397 addq %r10,%r9 | 1663 addq %r10,%r9 |
1398 movq 32+8(%rsp),%rsi | 1664 movq 32+8(%rsp),%rsi |
1399 movq %rdx,%r10 | 1665 movq %rdx,%r10 |
1400 adcq $0,%r10 | 1666 adcq $0,%r10 |
1401 | 1667 |
1402 mulq %rbx | 1668 mulq %rbx |
1403 addq %rax,%r11 | 1669 addq %rax,%r11 |
1404 » movq» 64(%rbp),%rax | 1670 » movq» 32(%rbp),%rax |
1405 adcq $0,%rdx | 1671 adcq $0,%rdx |
1406 imulq %r8,%rsi | 1672 imulq %r8,%rsi |
1407 addq %r11,%r10 | 1673 addq %r11,%r10 |
1408 movq %rdx,%r11 | 1674 movq %rdx,%r11 |
1409 adcq $0,%r11 | 1675 adcq $0,%r11 |
1410 | 1676 |
1411 mulq %rbx | 1677 mulq %rbx |
1412 addq %rax,%r12 | 1678 addq %rax,%r12 |
1413 » movq» 80(%rbp),%rax | 1679 » movq» 40(%rbp),%rax |
1414 adcq $0,%rdx | 1680 adcq $0,%rdx |
1415 addq %r12,%r11 | 1681 addq %r12,%r11 |
1416 movq %rdx,%r12 | 1682 movq %rdx,%r12 |
1417 adcq $0,%r12 | 1683 adcq $0,%r12 |
1418 | 1684 |
1419 mulq %rbx | 1685 mulq %rbx |
1420 addq %rax,%r13 | 1686 addq %rax,%r13 |
1421 » movq» 96(%rbp),%rax | 1687 » movq» 48(%rbp),%rax |
1422 adcq $0,%rdx | 1688 adcq $0,%rdx |
1423 addq %r13,%r12 | 1689 addq %r13,%r12 |
1424 movq %rdx,%r13 | 1690 movq %rdx,%r13 |
1425 adcq $0,%r13 | 1691 adcq $0,%r13 |
1426 | 1692 |
1427 mulq %rbx | 1693 mulq %rbx |
1428 addq %rax,%r14 | 1694 addq %rax,%r14 |
1429 » movq» 112(%rbp),%rax | 1695 » movq» 56(%rbp),%rax |
1430 adcq $0,%rdx | 1696 adcq $0,%rdx |
1431 addq %r14,%r13 | 1697 addq %r14,%r13 |
1432 movq %rdx,%r14 | 1698 movq %rdx,%r14 |
1433 adcq $0,%r14 | 1699 adcq $0,%r14 |
1434 | 1700 |
1435 mulq %rbx | 1701 mulq %rbx |
1436 movq %rsi,%rbx | 1702 movq %rsi,%rbx |
1437 addq %rax,%r15 | 1703 addq %rax,%r15 |
1438 movq 0(%rbp),%rax | 1704 movq 0(%rbp),%rax |
1439 adcq $0,%rdx | 1705 adcq $0,%rdx |
1440 addq %r15,%r14 | 1706 addq %r15,%r14 |
1441 movq %rdx,%r15 | 1707 movq %rdx,%r15 |
1442 adcq $0,%r15 | 1708 adcq $0,%r15 |
1443 | 1709 |
1444 decl %ecx | 1710 decl %ecx |
1445 jnz L$8x_reduce | 1711 jnz L$8x_reduce |
1446 | 1712 |
1447 » leaq» 128(%rbp),%rbp | 1713 » leaq» 64(%rbp),%rbp |
1448 xorq %rax,%rax | 1714 xorq %rax,%rax |
1449 movq 8+8(%rsp),%rdx | 1715 movq 8+8(%rsp),%rdx |
1450 cmpq 0+8(%rsp),%rbp | 1716 cmpq 0+8(%rsp),%rbp |
1451 jae L$8x_no_tail | 1717 jae L$8x_no_tail |
1452 | 1718 |
1453 .byte 0x66 | 1719 .byte 0x66 |
1454 addq 0(%rdi),%r8 | 1720 addq 0(%rdi),%r8 |
1455 adcq 8(%rdi),%r9 | 1721 adcq 8(%rdi),%r9 |
1456 adcq 16(%rdi),%r10 | 1722 adcq 16(%rdi),%r10 |
1457 adcq 24(%rdi),%r11 | 1723 adcq 24(%rdi),%r11 |
1458 adcq 32(%rdi),%r12 | 1724 adcq 32(%rdi),%r12 |
1459 adcq 40(%rdi),%r13 | 1725 adcq 40(%rdi),%r13 |
1460 adcq 48(%rdi),%r14 | 1726 adcq 48(%rdi),%r14 |
1461 adcq 56(%rdi),%r15 | 1727 adcq 56(%rdi),%r15 |
1462 sbbq %rsi,%rsi | 1728 sbbq %rsi,%rsi |
1463 | 1729 |
1464 movq 48+56+8(%rsp),%rbx | 1730 movq 48+56+8(%rsp),%rbx |
1465 movl $8,%ecx | 1731 movl $8,%ecx |
1466 movq 0(%rbp),%rax | 1732 movq 0(%rbp),%rax |
1467 jmp L$8x_tail | 1733 jmp L$8x_tail |
1468 | 1734 |
1469 .p2align 5 | 1735 .p2align 5 |
1470 L$8x_tail: | 1736 L$8x_tail: |
1471 mulq %rbx | 1737 mulq %rbx |
1472 addq %rax,%r8 | 1738 addq %rax,%r8 |
1473 » movq» 16(%rbp),%rax | 1739 » movq» 8(%rbp),%rax |
1474 movq %r8,(%rdi) | 1740 movq %r8,(%rdi) |
1475 movq %rdx,%r8 | 1741 movq %rdx,%r8 |
1476 adcq $0,%r8 | 1742 adcq $0,%r8 |
1477 | 1743 |
1478 mulq %rbx | 1744 mulq %rbx |
1479 addq %rax,%r9 | 1745 addq %rax,%r9 |
1480 » movq» 32(%rbp),%rax | 1746 » movq» 16(%rbp),%rax |
1481 adcq $0,%rdx | 1747 adcq $0,%rdx |
1482 addq %r9,%r8 | 1748 addq %r9,%r8 |
1483 leaq 8(%rdi),%rdi | 1749 leaq 8(%rdi),%rdi |
1484 movq %rdx,%r9 | 1750 movq %rdx,%r9 |
1485 adcq $0,%r9 | 1751 adcq $0,%r9 |
1486 | 1752 |
1487 mulq %rbx | 1753 mulq %rbx |
1488 addq %rax,%r10 | 1754 addq %rax,%r10 |
1489 » movq» 48(%rbp),%rax | 1755 » movq» 24(%rbp),%rax |
1490 adcq $0,%rdx | 1756 adcq $0,%rdx |
1491 addq %r10,%r9 | 1757 addq %r10,%r9 |
1492 movq %rdx,%r10 | 1758 movq %rdx,%r10 |
1493 adcq $0,%r10 | 1759 adcq $0,%r10 |
1494 | 1760 |
1495 mulq %rbx | 1761 mulq %rbx |
1496 addq %rax,%r11 | 1762 addq %rax,%r11 |
1497 » movq» 64(%rbp),%rax | 1763 » movq» 32(%rbp),%rax |
1498 adcq $0,%rdx | 1764 adcq $0,%rdx |
1499 addq %r11,%r10 | 1765 addq %r11,%r10 |
1500 movq %rdx,%r11 | 1766 movq %rdx,%r11 |
1501 adcq $0,%r11 | 1767 adcq $0,%r11 |
1502 | 1768 |
1503 mulq %rbx | 1769 mulq %rbx |
1504 addq %rax,%r12 | 1770 addq %rax,%r12 |
1505 » movq» 80(%rbp),%rax | 1771 » movq» 40(%rbp),%rax |
1506 adcq $0,%rdx | 1772 adcq $0,%rdx |
1507 addq %r12,%r11 | 1773 addq %r12,%r11 |
1508 movq %rdx,%r12 | 1774 movq %rdx,%r12 |
1509 adcq $0,%r12 | 1775 adcq $0,%r12 |
1510 | 1776 |
1511 mulq %rbx | 1777 mulq %rbx |
1512 addq %rax,%r13 | 1778 addq %rax,%r13 |
1513 » movq» 96(%rbp),%rax | 1779 » movq» 48(%rbp),%rax |
1514 adcq $0,%rdx | 1780 adcq $0,%rdx |
1515 addq %r13,%r12 | 1781 addq %r13,%r12 |
1516 movq %rdx,%r13 | 1782 movq %rdx,%r13 |
1517 adcq $0,%r13 | 1783 adcq $0,%r13 |
1518 | 1784 |
1519 mulq %rbx | 1785 mulq %rbx |
1520 addq %rax,%r14 | 1786 addq %rax,%r14 |
1521 » movq» 112(%rbp),%rax | 1787 » movq» 56(%rbp),%rax |
1522 adcq $0,%rdx | 1788 adcq $0,%rdx |
1523 addq %r14,%r13 | 1789 addq %r14,%r13 |
1524 movq %rdx,%r14 | 1790 movq %rdx,%r14 |
1525 adcq $0,%r14 | 1791 adcq $0,%r14 |
1526 | 1792 |
1527 mulq %rbx | 1793 mulq %rbx |
1528 movq 48-16+8(%rsp,%rcx,8),%rbx | 1794 movq 48-16+8(%rsp,%rcx,8),%rbx |
1529 addq %rax,%r15 | 1795 addq %rax,%r15 |
1530 adcq $0,%rdx | 1796 adcq $0,%rdx |
1531 addq %r15,%r14 | 1797 addq %r15,%r14 |
1532 movq 0(%rbp),%rax | 1798 movq 0(%rbp),%rax |
1533 movq %rdx,%r15 | 1799 movq %rdx,%r15 |
1534 adcq $0,%r15 | 1800 adcq $0,%r15 |
1535 | 1801 |
1536 decl %ecx | 1802 decl %ecx |
1537 jnz L$8x_tail | 1803 jnz L$8x_tail |
1538 | 1804 |
1539 » leaq» 128(%rbp),%rbp | 1805 » leaq» 64(%rbp),%rbp |
1540 movq 8+8(%rsp),%rdx | 1806 movq 8+8(%rsp),%rdx |
1541 cmpq 0+8(%rsp),%rbp | 1807 cmpq 0+8(%rsp),%rbp |
1542 jae L$8x_tail_done | 1808 jae L$8x_tail_done |
1543 | 1809 |
1544 movq 48+56+8(%rsp),%rbx | 1810 movq 48+56+8(%rsp),%rbx |
1545 negq %rsi | 1811 negq %rsi |
1546 movq 0(%rbp),%rax | 1812 movq 0(%rbp),%rax |
1547 adcq 0(%rdi),%r8 | 1813 adcq 0(%rdi),%r8 |
1548 adcq 8(%rdi),%r9 | 1814 adcq 8(%rdi),%r9 |
1549 adcq 16(%rdi),%r10 | 1815 adcq 16(%rdi),%r10 |
1550 adcq 24(%rdi),%r11 | 1816 adcq 24(%rdi),%r11 |
1551 adcq 32(%rdi),%r12 | 1817 adcq 32(%rdi),%r12 |
1552 adcq 40(%rdi),%r13 | 1818 adcq 40(%rdi),%r13 |
1553 adcq 48(%rdi),%r14 | 1819 adcq 48(%rdi),%r14 |
1554 adcq 56(%rdi),%r15 | 1820 adcq 56(%rdi),%r15 |
1555 sbbq %rsi,%rsi | 1821 sbbq %rsi,%rsi |
1556 | 1822 |
1557 movl $8,%ecx | 1823 movl $8,%ecx |
1558 jmp L$8x_tail | 1824 jmp L$8x_tail |
1559 | 1825 |
1560 .p2align 5 | 1826 .p2align 5 |
1561 L$8x_tail_done: | 1827 L$8x_tail_done: |
1562 addq (%rdx),%r8 | 1828 addq (%rdx),%r8 |
| 1829 adcq $0,%r9 |
| 1830 adcq $0,%r10 |
| 1831 adcq $0,%r11 |
| 1832 adcq $0,%r12 |
| 1833 adcq $0,%r13 |
| 1834 adcq $0,%r14 |
| 1835 adcq $0,%r15 |
| 1836 |
| 1837 |
1563 xorq %rax,%rax | 1838 xorq %rax,%rax |
1564 | 1839 |
1565 negq %rsi | 1840 negq %rsi |
1566 L$8x_no_tail: | 1841 L$8x_no_tail: |
1567 adcq 0(%rdi),%r8 | 1842 adcq 0(%rdi),%r8 |
1568 adcq 8(%rdi),%r9 | 1843 adcq 8(%rdi),%r9 |
1569 adcq 16(%rdi),%r10 | 1844 adcq 16(%rdi),%r10 |
1570 adcq 24(%rdi),%r11 | 1845 adcq 24(%rdi),%r11 |
1571 adcq 32(%rdi),%r12 | 1846 adcq 32(%rdi),%r12 |
1572 adcq 40(%rdi),%r13 | 1847 adcq 40(%rdi),%r13 |
1573 adcq 48(%rdi),%r14 | 1848 adcq 48(%rdi),%r14 |
1574 adcq 56(%rdi),%r15 | 1849 adcq 56(%rdi),%r15 |
1575 adcq $0,%rax | 1850 adcq $0,%rax |
1576 » movq» -16(%rbp),%rcx | 1851 » movq» -8(%rbp),%rcx |
1577 xorq %rsi,%rsi | 1852 xorq %rsi,%rsi |
1578 | 1853 |
1579 .byte 102,72,15,126,213 | 1854 .byte 102,72,15,126,213 |
1580 | 1855 |
1581 movq %r8,0(%rdi) | 1856 movq %r8,0(%rdi) |
1582 movq %r9,8(%rdi) | 1857 movq %r9,8(%rdi) |
1583 .byte 102,73,15,126,217 | 1858 .byte 102,73,15,126,217 |
1584 movq %r10,16(%rdi) | 1859 movq %r10,16(%rdi) |
1585 movq %r11,24(%rdi) | 1860 movq %r11,24(%rdi) |
1586 movq %r12,32(%rdi) | 1861 movq %r12,32(%rdi) |
1587 movq %r13,40(%rdi) | 1862 movq %r13,40(%rdi) |
1588 movq %r14,48(%rdi) | 1863 movq %r14,48(%rdi) |
1589 movq %r15,56(%rdi) | 1864 movq %r15,56(%rdi) |
1590 leaq 64(%rdi),%rdi | 1865 leaq 64(%rdi),%rdi |
1591 | 1866 |
1592 cmpq %rdx,%rdi | 1867 cmpq %rdx,%rdi |
1593 jb L$8x_reduction_loop | 1868 jb L$8x_reduction_loop |
| 1869 .byte 0xf3,0xc3 |
1594 | 1870 |
1595 subq %r15,%rcx | |
1596 leaq (%rdi,%r9,1),%rbx | |
1597 adcq %rsi,%rsi | |
1598 movq %r9,%rcx | |
1599 orq %rsi,%rax | |
1600 .byte 102,72,15,126,207 | |
1601 xorq $1,%rax | |
1602 .byte 102,72,15,126,206 | |
1603 leaq (%rbp,%rax,8),%rbp | |
1604 sarq $3+2,%rcx | |
1605 jmp L$sqr4x_sub | |
1606 | 1871 |
1607 .p2align 5 | 1872 .p2align 5 |
| 1873 __bn_post4x_internal: |
| 1874 movq 0(%rbp),%r12 |
| 1875 leaq (%rdi,%r9,1),%rbx |
| 1876 movq %r9,%rcx |
| 1877 .byte 102,72,15,126,207 |
| 1878 negq %rax |
| 1879 .byte 102,72,15,126,206 |
| 1880 sarq $3+2,%rcx |
| 1881 decq %r12 |
| 1882 xorq %r10,%r10 |
| 1883 movq 8(%rbp),%r13 |
| 1884 movq 16(%rbp),%r14 |
| 1885 movq 24(%rbp),%r15 |
| 1886 jmp L$sqr4x_sub_entry |
| 1887 |
| 1888 .p2align 4 |
1608 L$sqr4x_sub: | 1889 L$sqr4x_sub: |
1609 .byte» 0x66 | 1890 » movq» 0(%rbp),%r12 |
1610 » movq» 0(%rbx),%r12 | 1891 » movq» 8(%rbp),%r13 |
1611 » movq» 8(%rbx),%r13 | 1892 » movq» 16(%rbp),%r14 |
1612 » sbbq» 0(%rbp),%r12 | 1893 » movq» 24(%rbp),%r15 |
1613 » movq» 16(%rbx),%r14 | 1894 L$sqr4x_sub_entry: |
1614 » sbbq» 16(%rbp),%r13 | 1895 » leaq» 32(%rbp),%rbp |
1615 » movq» 24(%rbx),%r15 | 1896 » notq» %r12 |
| 1897 » notq» %r13 |
| 1898 » notq» %r14 |
| 1899 » notq» %r15 |
| 1900 » andq» %rax,%r12 |
| 1901 » andq» %rax,%r13 |
| 1902 » andq» %rax,%r14 |
| 1903 » andq» %rax,%r15 |
| 1904 |
| 1905 » negq» %r10 |
| 1906 » adcq» 0(%rbx),%r12 |
| 1907 » adcq» 8(%rbx),%r13 |
| 1908 » adcq» 16(%rbx),%r14 |
| 1909 » adcq» 24(%rbx),%r15 |
| 1910 » movq» %r12,0(%rdi) |
1616 leaq 32(%rbx),%rbx | 1911 leaq 32(%rbx),%rbx |
1617 sbbq 32(%rbp),%r14 | |
1618 movq %r12,0(%rdi) | |
1619 sbbq 48(%rbp),%r15 | |
1620 leaq 64(%rbp),%rbp | |
1621 movq %r13,8(%rdi) | 1912 movq %r13,8(%rdi) |
| 1913 sbbq %r10,%r10 |
1622 movq %r14,16(%rdi) | 1914 movq %r14,16(%rdi) |
1623 movq %r15,24(%rdi) | 1915 movq %r15,24(%rdi) |
1624 leaq 32(%rdi),%rdi | 1916 leaq 32(%rdi),%rdi |
1625 | 1917 |
1626 incq %rcx | 1918 incq %rcx |
1627 jnz L$sqr4x_sub | 1919 jnz L$sqr4x_sub |
| 1920 |
1628 movq %r9,%r10 | 1921 movq %r9,%r10 |
1629 negq %r9 | 1922 negq %r9 |
1630 .byte 0xf3,0xc3 | 1923 .byte 0xf3,0xc3 |
1631 | 1924 |
1632 .globl _bn_from_montgomery | 1925 .globl _bn_from_montgomery |
1633 .private_extern _bn_from_montgomery | 1926 .private_extern _bn_from_montgomery |
1634 | 1927 |
1635 .p2align 5 | 1928 .p2align 5 |
1636 _bn_from_montgomery: | 1929 _bn_from_montgomery: |
1637 testl $7,%r9d | 1930 testl $7,%r9d |
1638 jz bn_from_mont8x | 1931 jz bn_from_mont8x |
1639 xorl %eax,%eax | 1932 xorl %eax,%eax |
1640 .byte 0xf3,0xc3 | 1933 .byte 0xf3,0xc3 |
1641 | 1934 |
1642 | 1935 |
1643 | 1936 |
1644 .p2align 5 | 1937 .p2align 5 |
1645 bn_from_mont8x: | 1938 bn_from_mont8x: |
1646 .byte 0x67 | 1939 .byte 0x67 |
1647 movq %rsp,%rax | 1940 movq %rsp,%rax |
1648 pushq %rbx | 1941 pushq %rbx |
1649 pushq %rbp | 1942 pushq %rbp |
1650 pushq %r12 | 1943 pushq %r12 |
1651 pushq %r13 | 1944 pushq %r13 |
1652 pushq %r14 | 1945 pushq %r14 |
1653 pushq %r15 | 1946 pushq %r15 |
1654 .byte» 0x67 | 1947 |
1655 » movl» %r9d,%r10d | |
1656 shll $3,%r9d | 1948 shll $3,%r9d |
1657 » shll» $3+2,%r10d | 1949 » leaq» (%r9,%r9,2),%r10 |
1658 negq %r9 | 1950 negq %r9 |
1659 movq (%r8),%r8 | 1951 movq (%r8),%r8 |
1660 | 1952 |
1661 | 1953 |
1662 | 1954 |
1663 | 1955 |
1664 | 1956 |
1665 | 1957 |
1666 | 1958 |
1667 » leaq» -64(%rsp,%r9,2),%r11 | 1959 |
1668 » subq» %rsi,%r11 | 1960 » leaq» -320(%rsp,%r9,2),%r11 |
| 1961 » subq» %rdi,%r11 |
1669 andq $4095,%r11 | 1962 andq $4095,%r11 |
1670 cmpq %r11,%r10 | 1963 cmpq %r11,%r10 |
1671 jb L$from_sp_alt | 1964 jb L$from_sp_alt |
1672 subq %r11,%rsp | 1965 subq %r11,%rsp |
1673 » leaq» -64(%rsp,%r9,2),%rsp | 1966 » leaq» -320(%rsp,%r9,2),%rsp |
1674 jmp L$from_sp_done | 1967 jmp L$from_sp_done |
1675 | 1968 |
1676 .p2align 5 | 1969 .p2align 5 |
1677 L$from_sp_alt: | 1970 L$from_sp_alt: |
1678 » leaq» 4096-64(,%r9,2),%r10 | 1971 » leaq» 4096-320(,%r9,2),%r10 |
1679 » leaq» -64(%rsp,%r9,2),%rsp | 1972 » leaq» -320(%rsp,%r9,2),%rsp |
1680 subq %r10,%r11 | 1973 subq %r10,%r11 |
1681 movq $0,%r10 | 1974 movq $0,%r10 |
1682 cmovcq %r10,%r11 | 1975 cmovcq %r10,%r11 |
1683 subq %r11,%rsp | 1976 subq %r11,%rsp |
1684 L$from_sp_done: | 1977 L$from_sp_done: |
1685 andq $-64,%rsp | 1978 andq $-64,%rsp |
1686 movq %r9,%r10 | 1979 movq %r9,%r10 |
1687 negq %r9 | 1980 negq %r9 |
1688 | 1981 |
1689 | 1982 |
(...skipping 30 matching lines...) Expand all Loading... |
1720 movdqa %xmm4,48(%rax) | 2013 movdqa %xmm4,48(%rax) |
1721 leaq 64(%rax),%rax | 2014 leaq 64(%rax),%rax |
1722 subq $64,%r11 | 2015 subq $64,%r11 |
1723 jnz L$mul_by_1 | 2016 jnz L$mul_by_1 |
1724 | 2017 |
1725 .byte 102,72,15,110,207 | 2018 .byte 102,72,15,110,207 |
1726 .byte 102,72,15,110,209 | 2019 .byte 102,72,15,110,209 |
1727 .byte 0x67 | 2020 .byte 0x67 |
1728 movq %rcx,%rbp | 2021 movq %rcx,%rbp |
1729 .byte 102,73,15,110,218 | 2022 .byte 102,73,15,110,218 |
1730 » call» sqr8x_reduction | 2023 » call» __bn_sqr8x_reduction |
| 2024 » call» __bn_post4x_internal |
1731 | 2025 |
1732 pxor %xmm0,%xmm0 | 2026 pxor %xmm0,%xmm0 |
1733 leaq 48(%rsp),%rax | 2027 leaq 48(%rsp),%rax |
1734 movq 40(%rsp),%rsi | 2028 movq 40(%rsp),%rsi |
1735 jmp L$from_mont_zero | 2029 jmp L$from_mont_zero |
1736 | 2030 |
1737 .p2align 5 | 2031 .p2align 5 |
1738 L$from_mont_zero: | 2032 L$from_mont_zero: |
1739 movdqa %xmm0,0(%rax) | 2033 movdqa %xmm0,0(%rax) |
1740 movdqa %xmm0,16(%rax) | 2034 movdqa %xmm0,16(%rax) |
(...skipping 29 matching lines...) Expand all Loading... |
1770 leaq 256(%rdx),%rdx | 2064 leaq 256(%rdx),%rdx |
1771 subl $1,%esi | 2065 subl $1,%esi |
1772 jnz L$scatter | 2066 jnz L$scatter |
1773 L$scatter_epilogue: | 2067 L$scatter_epilogue: |
1774 .byte 0xf3,0xc3 | 2068 .byte 0xf3,0xc3 |
1775 | 2069 |
1776 | 2070 |
1777 .globl _bn_gather5 | 2071 .globl _bn_gather5 |
1778 .private_extern _bn_gather5 | 2072 .private_extern _bn_gather5 |
1779 | 2073 |
1780 .p2align» 4 | 2074 .p2align» 5 |
1781 _bn_gather5: | 2075 _bn_gather5: |
1782 » movl» %ecx,%r11d | 2076 L$SEH_begin_bn_gather5: |
1783 » shrl» $3,%ecx | 2077 |
1784 » andq» $7,%r11 | 2078 .byte» 0x4c,0x8d,0x14,0x24 |
1785 » notl» %ecx | 2079 .byte» 0x48,0x81,0xec,0x08,0x01,0x00,0x00 |
1786 » leaq» L$magic_masks(%rip),%rax | 2080 » leaq» L$inc(%rip),%rax |
1787 » andl» $3,%ecx | 2081 » andq» $-16,%rsp |
1788 » leaq» 128(%rdx,%r11,8),%rdx | 2082 |
1789 » movq» 0(%rax,%rcx,8),%xmm4 | 2083 » movd» %ecx,%xmm5 |
1790 » movq» 8(%rax,%rcx,8),%xmm5 | 2084 » movdqa» 0(%rax),%xmm0 |
1791 » movq» 16(%rax,%rcx,8),%xmm6 | 2085 » movdqa» 16(%rax),%xmm1 |
1792 » movq» 24(%rax,%rcx,8),%xmm7 | 2086 » leaq» 128(%rdx),%r11 |
| 2087 » leaq» 128(%rsp),%rax |
| 2088 |
| 2089 » pshufd» $0,%xmm5,%xmm5 |
| 2090 » movdqa» %xmm1,%xmm4 |
| 2091 » movdqa» %xmm1,%xmm2 |
| 2092 » paddd» %xmm0,%xmm1 |
| 2093 » pcmpeqd»%xmm5,%xmm0 |
| 2094 » movdqa» %xmm4,%xmm3 |
| 2095 |
| 2096 » paddd» %xmm1,%xmm2 |
| 2097 » pcmpeqd»%xmm5,%xmm1 |
| 2098 » movdqa» %xmm0,-128(%rax) |
| 2099 » movdqa» %xmm4,%xmm0 |
| 2100 |
| 2101 » paddd» %xmm2,%xmm3 |
| 2102 » pcmpeqd»%xmm5,%xmm2 |
| 2103 » movdqa» %xmm1,-112(%rax) |
| 2104 » movdqa» %xmm4,%xmm1 |
| 2105 |
| 2106 » paddd» %xmm3,%xmm0 |
| 2107 » pcmpeqd»%xmm5,%xmm3 |
| 2108 » movdqa» %xmm2,-96(%rax) |
| 2109 » movdqa» %xmm4,%xmm2 |
| 2110 » paddd» %xmm0,%xmm1 |
| 2111 » pcmpeqd»%xmm5,%xmm0 |
| 2112 » movdqa» %xmm3,-80(%rax) |
| 2113 » movdqa» %xmm4,%xmm3 |
| 2114 |
| 2115 » paddd» %xmm1,%xmm2 |
| 2116 » pcmpeqd»%xmm5,%xmm1 |
| 2117 » movdqa» %xmm0,-64(%rax) |
| 2118 » movdqa» %xmm4,%xmm0 |
| 2119 |
| 2120 » paddd» %xmm2,%xmm3 |
| 2121 » pcmpeqd»%xmm5,%xmm2 |
| 2122 » movdqa» %xmm1,-48(%rax) |
| 2123 » movdqa» %xmm4,%xmm1 |
| 2124 |
| 2125 » paddd» %xmm3,%xmm0 |
| 2126 » pcmpeqd»%xmm5,%xmm3 |
| 2127 » movdqa» %xmm2,-32(%rax) |
| 2128 » movdqa» %xmm4,%xmm2 |
| 2129 » paddd» %xmm0,%xmm1 |
| 2130 » pcmpeqd»%xmm5,%xmm0 |
| 2131 » movdqa» %xmm3,-16(%rax) |
| 2132 » movdqa» %xmm4,%xmm3 |
| 2133 |
| 2134 » paddd» %xmm1,%xmm2 |
| 2135 » pcmpeqd»%xmm5,%xmm1 |
| 2136 » movdqa» %xmm0,0(%rax) |
| 2137 » movdqa» %xmm4,%xmm0 |
| 2138 |
| 2139 » paddd» %xmm2,%xmm3 |
| 2140 » pcmpeqd»%xmm5,%xmm2 |
| 2141 » movdqa» %xmm1,16(%rax) |
| 2142 » movdqa» %xmm4,%xmm1 |
| 2143 |
| 2144 » paddd» %xmm3,%xmm0 |
| 2145 » pcmpeqd»%xmm5,%xmm3 |
| 2146 » movdqa» %xmm2,32(%rax) |
| 2147 » movdqa» %xmm4,%xmm2 |
| 2148 » paddd» %xmm0,%xmm1 |
| 2149 » pcmpeqd»%xmm5,%xmm0 |
| 2150 » movdqa» %xmm3,48(%rax) |
| 2151 » movdqa» %xmm4,%xmm3 |
| 2152 |
| 2153 » paddd» %xmm1,%xmm2 |
| 2154 » pcmpeqd»%xmm5,%xmm1 |
| 2155 » movdqa» %xmm0,64(%rax) |
| 2156 » movdqa» %xmm4,%xmm0 |
| 2157 |
| 2158 » paddd» %xmm2,%xmm3 |
| 2159 » pcmpeqd»%xmm5,%xmm2 |
| 2160 » movdqa» %xmm1,80(%rax) |
| 2161 » movdqa» %xmm4,%xmm1 |
| 2162 |
| 2163 » paddd» %xmm3,%xmm0 |
| 2164 » pcmpeqd»%xmm5,%xmm3 |
| 2165 » movdqa» %xmm2,96(%rax) |
| 2166 » movdqa» %xmm4,%xmm2 |
| 2167 » movdqa» %xmm3,112(%rax) |
1793 jmp L$gather | 2168 jmp L$gather |
1794 .p2align» 4 | 2169 |
| 2170 .p2align» 5 |
1795 L$gather: | 2171 L$gather: |
1796 » movq» -128(%rdx),%xmm0 | 2172 » pxor» %xmm4,%xmm4 |
1797 » movq» -64(%rdx),%xmm1 | 2173 » pxor» %xmm5,%xmm5 |
1798 » pand» %xmm4,%xmm0 | 2174 » movdqa» -128(%r11),%xmm0 |
1799 » movq» 0(%rdx),%xmm2 | 2175 » movdqa» -112(%r11),%xmm1 |
1800 » pand» %xmm5,%xmm1 | 2176 » movdqa» -96(%r11),%xmm2 |
1801 » movq» 64(%rdx),%xmm3 | 2177 » pand» -128(%rax),%xmm0 |
1802 » pand» %xmm6,%xmm2 | 2178 » movdqa» -80(%r11),%xmm3 |
1803 » por» %xmm1,%xmm0 | 2179 » pand» -112(%rax),%xmm1 |
1804 » pand» %xmm7,%xmm3 | 2180 » por» %xmm0,%xmm4 |
1805 .byte» 0x67,0x67 | 2181 » pand» -96(%rax),%xmm2 |
1806 » por» %xmm2,%xmm0 | 2182 » por» %xmm1,%xmm5 |
1807 » leaq» 256(%rdx),%rdx | 2183 » pand» -80(%rax),%xmm3 |
1808 » por» %xmm3,%xmm0 | 2184 » por» %xmm2,%xmm4 |
1809 | 2185 » por» %xmm3,%xmm5 |
| 2186 » movdqa» -64(%r11),%xmm0 |
| 2187 » movdqa» -48(%r11),%xmm1 |
| 2188 » movdqa» -32(%r11),%xmm2 |
| 2189 » pand» -64(%rax),%xmm0 |
| 2190 » movdqa» -16(%r11),%xmm3 |
| 2191 » pand» -48(%rax),%xmm1 |
| 2192 » por» %xmm0,%xmm4 |
| 2193 » pand» -32(%rax),%xmm2 |
| 2194 » por» %xmm1,%xmm5 |
| 2195 » pand» -16(%rax),%xmm3 |
| 2196 » por» %xmm2,%xmm4 |
| 2197 » por» %xmm3,%xmm5 |
| 2198 » movdqa» 0(%r11),%xmm0 |
| 2199 » movdqa» 16(%r11),%xmm1 |
| 2200 » movdqa» 32(%r11),%xmm2 |
| 2201 » pand» 0(%rax),%xmm0 |
| 2202 » movdqa» 48(%r11),%xmm3 |
| 2203 » pand» 16(%rax),%xmm1 |
| 2204 » por» %xmm0,%xmm4 |
| 2205 » pand» 32(%rax),%xmm2 |
| 2206 » por» %xmm1,%xmm5 |
| 2207 » pand» 48(%rax),%xmm3 |
| 2208 » por» %xmm2,%xmm4 |
| 2209 » por» %xmm3,%xmm5 |
| 2210 » movdqa» 64(%r11),%xmm0 |
| 2211 » movdqa» 80(%r11),%xmm1 |
| 2212 » movdqa» 96(%r11),%xmm2 |
| 2213 » pand» 64(%rax),%xmm0 |
| 2214 » movdqa» 112(%r11),%xmm3 |
| 2215 » pand» 80(%rax),%xmm1 |
| 2216 » por» %xmm0,%xmm4 |
| 2217 » pand» 96(%rax),%xmm2 |
| 2218 » por» %xmm1,%xmm5 |
| 2219 » pand» 112(%rax),%xmm3 |
| 2220 » por» %xmm2,%xmm4 |
| 2221 » por» %xmm3,%xmm5 |
| 2222 » por» %xmm5,%xmm4 |
| 2223 » leaq» 256(%r11),%r11 |
| 2224 » pshufd» $0x4e,%xmm4,%xmm0 |
| 2225 » por» %xmm4,%xmm0 |
1810 movq %xmm0,(%rdi) | 2226 movq %xmm0,(%rdi) |
1811 leaq 8(%rdi),%rdi | 2227 leaq 8(%rdi),%rdi |
1812 subl $1,%esi | 2228 subl $1,%esi |
1813 jnz L$gather | 2229 jnz L$gather |
| 2230 |
| 2231 leaq (%r10),%rsp |
1814 .byte 0xf3,0xc3 | 2232 .byte 0xf3,0xc3 |
1815 L$SEH_end_bn_gather5: | 2233 L$SEH_end_bn_gather5: |
1816 | 2234 |
1817 .p2align 6 | 2235 .p2align 6 |
1818 L$magic_masks: | 2236 L$inc: |
1819 .long» 0,0, 0,0, 0,0, -1,-1 | 2237 .long» 0,0, 1,1 |
1820 .long» 0,0, 0,0, 0,0, 0,0 | 2238 .long» 2,2, 2,2 |
1821 .byte 77,111,110,116,103,111,109,101,114,121,32,77,117,108,116,105,112,108,105
,99,97,116,105,111,110,32,119,105,116,104,32,115,99,97,116,116,101,114,47,103,97
,116,104,101,114,32,102,111,114,32,120,56,54,95,54,52,44,32,67,82,89,80,84,79,71
,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,1
11,114,103,62,0 | 2239 .byte 77,111,110,116,103,111,109,101,114,121,32,77,117,108,116,105,112,108,105
,99,97,116,105,111,110,32,119,105,116,104,32,115,99,97,116,116,101,114,47,103,97
,116,104,101,114,32,102,111,114,32,120,56,54,95,54,52,44,32,67,82,89,80,84,79,71
,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,1
11,114,103,62,0 |
1822 #endif | 2240 #endif |
OLD | NEW |