OLD | NEW |
1 #if defined(__x86_64__) | 1 #if defined(__x86_64__) |
2 .text | 2 .text |
3 | 3 |
4 .extern OPENSSL_ia32cap_P | 4 .extern OPENSSL_ia32cap_P |
5 .hidden OPENSSL_ia32cap_P | 5 .hidden OPENSSL_ia32cap_P |
6 | 6 |
7 .globl bn_mul_mont_gather5 | 7 .globl bn_mul_mont_gather5 |
8 .hidden bn_mul_mont_gather5 | 8 .hidden bn_mul_mont_gather5 |
9 .type bn_mul_mont_gather5,@function | 9 .type bn_mul_mont_gather5,@function |
10 .align 64 | 10 .align 64 |
11 bn_mul_mont_gather5: | 11 bn_mul_mont_gather5: |
12 testl $7,%r9d | 12 testl $7,%r9d |
13 jnz .Lmul_enter | 13 jnz .Lmul_enter |
14 jmp .Lmul4x_enter | 14 jmp .Lmul4x_enter |
15 | 15 |
16 .align 16 | 16 .align 16 |
17 .Lmul_enter: | 17 .Lmul_enter: |
18 movl %r9d,%r9d | 18 movl %r9d,%r9d |
19 movq %rsp,%rax | 19 movq %rsp,%rax |
20 » movl» 8(%rsp),%r10d | 20 » movd» 8(%rsp),%xmm5 |
| 21 » leaq» .Linc(%rip),%r10 |
21 pushq %rbx | 22 pushq %rbx |
22 pushq %rbp | 23 pushq %rbp |
23 pushq %r12 | 24 pushq %r12 |
24 pushq %r13 | 25 pushq %r13 |
25 pushq %r14 | 26 pushq %r14 |
26 pushq %r15 | 27 pushq %r15 |
| 28 |
27 leaq 2(%r9),%r11 | 29 leaq 2(%r9),%r11 |
28 negq %r11 | 30 negq %r11 |
29 » leaq» (%rsp,%r11,8),%rsp | 31 » leaq» -264(%rsp,%r11,8),%rsp |
30 andq $-1024,%rsp | 32 andq $-1024,%rsp |
31 | 33 |
32 movq %rax,8(%rsp,%r9,8) | 34 movq %rax,8(%rsp,%r9,8) |
33 .Lmul_body: | 35 .Lmul_body: |
34 » movq» %rdx,%r12 | 36 » leaq» 128(%rdx),%r12 |
35 » movq» %r10,%r11 | 37 » movdqa» 0(%r10),%xmm0 |
36 » shrq» $3,%r10 | 38 » movdqa» 16(%r10),%xmm1 |
37 » andq» $7,%r11 | 39 » leaq» 24-112(%rsp,%r9,8),%r10 |
38 » notq» %r10 | 40 » andq» $-16,%r10 |
39 » leaq» .Lmagic_masks(%rip),%rax | |
40 » andq» $3,%r10 | |
41 » leaq» 96(%r12,%r11,8),%r12 | |
42 » movq» 0(%rax,%r10,8),%xmm4 | |
43 » movq» 8(%rax,%r10,8),%xmm5 | |
44 » movq» 16(%rax,%r10,8),%xmm6 | |
45 » movq» 24(%rax,%r10,8),%xmm7 | |
46 | 41 |
47 » movq» -96(%r12),%xmm0 | 42 » pshufd» $0,%xmm5,%xmm5 |
48 » movq» -32(%r12),%xmm1 | 43 » movdqa» %xmm1,%xmm4 |
49 » pand» %xmm4,%xmm0 | 44 » movdqa» %xmm1,%xmm2 |
50 » movq» 32(%r12),%xmm2 | 45 » paddd» %xmm0,%xmm1 |
51 » pand» %xmm5,%xmm1 | 46 » pcmpeqd»%xmm5,%xmm0 |
52 » movq» 96(%r12),%xmm3 | 47 .byte» 0x67 |
53 » pand» %xmm6,%xmm2 | 48 » movdqa» %xmm4,%xmm3 |
| 49 » paddd» %xmm1,%xmm2 |
| 50 » pcmpeqd»%xmm5,%xmm1 |
| 51 » movdqa» %xmm0,112(%r10) |
| 52 » movdqa» %xmm4,%xmm0 |
| 53 |
| 54 » paddd» %xmm2,%xmm3 |
| 55 » pcmpeqd»%xmm5,%xmm2 |
| 56 » movdqa» %xmm1,128(%r10) |
| 57 » movdqa» %xmm4,%xmm1 |
| 58 |
| 59 » paddd» %xmm3,%xmm0 |
| 60 » pcmpeqd»%xmm5,%xmm3 |
| 61 » movdqa» %xmm2,144(%r10) |
| 62 » movdqa» %xmm4,%xmm2 |
| 63 |
| 64 » paddd» %xmm0,%xmm1 |
| 65 » pcmpeqd»%xmm5,%xmm0 |
| 66 » movdqa» %xmm3,160(%r10) |
| 67 » movdqa» %xmm4,%xmm3 |
| 68 » paddd» %xmm1,%xmm2 |
| 69 » pcmpeqd»%xmm5,%xmm1 |
| 70 » movdqa» %xmm0,176(%r10) |
| 71 » movdqa» %xmm4,%xmm0 |
| 72 |
| 73 » paddd» %xmm2,%xmm3 |
| 74 » pcmpeqd»%xmm5,%xmm2 |
| 75 » movdqa» %xmm1,192(%r10) |
| 76 » movdqa» %xmm4,%xmm1 |
| 77 |
| 78 » paddd» %xmm3,%xmm0 |
| 79 » pcmpeqd»%xmm5,%xmm3 |
| 80 » movdqa» %xmm2,208(%r10) |
| 81 » movdqa» %xmm4,%xmm2 |
| 82 |
| 83 » paddd» %xmm0,%xmm1 |
| 84 » pcmpeqd»%xmm5,%xmm0 |
| 85 » movdqa» %xmm3,224(%r10) |
| 86 » movdqa» %xmm4,%xmm3 |
| 87 » paddd» %xmm1,%xmm2 |
| 88 » pcmpeqd»%xmm5,%xmm1 |
| 89 » movdqa» %xmm0,240(%r10) |
| 90 » movdqa» %xmm4,%xmm0 |
| 91 |
| 92 » paddd» %xmm2,%xmm3 |
| 93 » pcmpeqd»%xmm5,%xmm2 |
| 94 » movdqa» %xmm1,256(%r10) |
| 95 » movdqa» %xmm4,%xmm1 |
| 96 |
| 97 » paddd» %xmm3,%xmm0 |
| 98 » pcmpeqd»%xmm5,%xmm3 |
| 99 » movdqa» %xmm2,272(%r10) |
| 100 » movdqa» %xmm4,%xmm2 |
| 101 |
| 102 » paddd» %xmm0,%xmm1 |
| 103 » pcmpeqd»%xmm5,%xmm0 |
| 104 » movdqa» %xmm3,288(%r10) |
| 105 » movdqa» %xmm4,%xmm3 |
| 106 » paddd» %xmm1,%xmm2 |
| 107 » pcmpeqd»%xmm5,%xmm1 |
| 108 » movdqa» %xmm0,304(%r10) |
| 109 |
| 110 » paddd» %xmm2,%xmm3 |
| 111 .byte» 0x67 |
| 112 » pcmpeqd»%xmm5,%xmm2 |
| 113 » movdqa» %xmm1,320(%r10) |
| 114 |
| 115 » pcmpeqd»%xmm5,%xmm3 |
| 116 » movdqa» %xmm2,336(%r10) |
| 117 » pand» 64(%r12),%xmm0 |
| 118 |
| 119 » pand» 80(%r12),%xmm1 |
| 120 » pand» 96(%r12),%xmm2 |
| 121 » movdqa» %xmm3,352(%r10) |
| 122 » pand» 112(%r12),%xmm3 |
| 123 » por» %xmm2,%xmm0 |
| 124 » por» %xmm3,%xmm1 |
| 125 » movdqa» -128(%r12),%xmm4 |
| 126 » movdqa» -112(%r12),%xmm5 |
| 127 » movdqa» -96(%r12),%xmm2 |
| 128 » pand» 112(%r10),%xmm4 |
| 129 » movdqa» -80(%r12),%xmm3 |
| 130 » pand» 128(%r10),%xmm5 |
| 131 » por» %xmm4,%xmm0 |
| 132 » pand» 144(%r10),%xmm2 |
| 133 » por» %xmm5,%xmm1 |
| 134 » pand» 160(%r10),%xmm3 |
| 135 » por» %xmm2,%xmm0 |
| 136 » por» %xmm3,%xmm1 |
| 137 » movdqa» -64(%r12),%xmm4 |
| 138 » movdqa» -48(%r12),%xmm5 |
| 139 » movdqa» -32(%r12),%xmm2 |
| 140 » pand» 176(%r10),%xmm4 |
| 141 » movdqa» -16(%r12),%xmm3 |
| 142 » pand» 192(%r10),%xmm5 |
| 143 » por» %xmm4,%xmm0 |
| 144 » pand» 208(%r10),%xmm2 |
| 145 » por» %xmm5,%xmm1 |
| 146 » pand» 224(%r10),%xmm3 |
| 147 » por» %xmm2,%xmm0 |
| 148 » por» %xmm3,%xmm1 |
| 149 » movdqa» 0(%r12),%xmm4 |
| 150 » movdqa» 16(%r12),%xmm5 |
| 151 » movdqa» 32(%r12),%xmm2 |
| 152 » pand» 240(%r10),%xmm4 |
| 153 » movdqa» 48(%r12),%xmm3 |
| 154 » pand» 256(%r10),%xmm5 |
| 155 » por» %xmm4,%xmm0 |
| 156 » pand» 272(%r10),%xmm2 |
| 157 » por» %xmm5,%xmm1 |
| 158 » pand» 288(%r10),%xmm3 |
| 159 » por» %xmm2,%xmm0 |
| 160 » por» %xmm3,%xmm1 |
54 por %xmm1,%xmm0 | 161 por %xmm1,%xmm0 |
55 » pand» %xmm7,%xmm3 | 162 » pshufd» $0x4e,%xmm0,%xmm1 |
56 » por» %xmm2,%xmm0 | 163 » por» %xmm1,%xmm0 |
57 leaq 256(%r12),%r12 | 164 leaq 256(%r12),%r12 |
58 por %xmm3,%xmm0 | |
59 | |
60 .byte 102,72,15,126,195 | 165 .byte 102,72,15,126,195 |
61 | 166 |
62 movq (%r8),%r8 | 167 movq (%r8),%r8 |
63 movq (%rsi),%rax | 168 movq (%rsi),%rax |
64 | 169 |
65 xorq %r14,%r14 | 170 xorq %r14,%r14 |
66 xorq %r15,%r15 | 171 xorq %r15,%r15 |
67 | 172 |
68 movq -96(%r12),%xmm0 | |
69 movq -32(%r12),%xmm1 | |
70 pand %xmm4,%xmm0 | |
71 movq 32(%r12),%xmm2 | |
72 pand %xmm5,%xmm1 | |
73 | |
74 movq %r8,%rbp | 173 movq %r8,%rbp |
75 mulq %rbx | 174 mulq %rbx |
76 movq %rax,%r10 | 175 movq %rax,%r10 |
77 movq (%rcx),%rax | 176 movq (%rcx),%rax |
78 | 177 |
79 movq 96(%r12),%xmm3 | |
80 pand %xmm6,%xmm2 | |
81 por %xmm1,%xmm0 | |
82 pand %xmm7,%xmm3 | |
83 | |
84 imulq %r10,%rbp | 178 imulq %r10,%rbp |
85 movq %rdx,%r11 | 179 movq %rdx,%r11 |
86 | 180 |
87 por %xmm2,%xmm0 | |
88 leaq 256(%r12),%r12 | |
89 por %xmm3,%xmm0 | |
90 | |
91 mulq %rbp | 181 mulq %rbp |
92 addq %rax,%r10 | 182 addq %rax,%r10 |
93 movq 8(%rsi),%rax | 183 movq 8(%rsi),%rax |
94 adcq $0,%rdx | 184 adcq $0,%rdx |
95 movq %rdx,%r13 | 185 movq %rdx,%r13 |
96 | 186 |
97 leaq 1(%r15),%r15 | 187 leaq 1(%r15),%r15 |
98 jmp .L1st_enter | 188 jmp .L1st_enter |
99 | 189 |
100 .align 16 | 190 .align 16 |
(...skipping 12 matching lines...) Expand all Loading... |
113 addq %rax,%r11 | 203 addq %rax,%r11 |
114 movq (%rcx,%r15,8),%rax | 204 movq (%rcx,%r15,8),%rax |
115 adcq $0,%rdx | 205 adcq $0,%rdx |
116 leaq 1(%r15),%r15 | 206 leaq 1(%r15),%r15 |
117 movq %rdx,%r10 | 207 movq %rdx,%r10 |
118 | 208 |
119 mulq %rbp | 209 mulq %rbp |
120 cmpq %r9,%r15 | 210 cmpq %r9,%r15 |
121 jne .L1st | 211 jne .L1st |
122 | 212 |
123 .byte 102,72,15,126,195 | |
124 | 213 |
125 addq %rax,%r13 | 214 addq %rax,%r13 |
126 movq (%rsi),%rax | |
127 adcq $0,%rdx | 215 adcq $0,%rdx |
128 addq %r11,%r13 | 216 addq %r11,%r13 |
129 adcq $0,%rdx | 217 adcq $0,%rdx |
130 » movq» %r13,-16(%rsp,%r15,8) | 218 » movq» %r13,-16(%rsp,%r9,8) |
131 movq %rdx,%r13 | 219 movq %rdx,%r13 |
132 movq %r10,%r11 | 220 movq %r10,%r11 |
133 | 221 |
134 xorq %rdx,%rdx | 222 xorq %rdx,%rdx |
135 addq %r11,%r13 | 223 addq %r11,%r13 |
136 adcq $0,%rdx | 224 adcq $0,%rdx |
137 movq %r13,-8(%rsp,%r9,8) | 225 movq %r13,-8(%rsp,%r9,8) |
138 movq %rdx,(%rsp,%r9,8) | 226 movq %rdx,(%rsp,%r9,8) |
139 | 227 |
140 leaq 1(%r14),%r14 | 228 leaq 1(%r14),%r14 |
141 jmp .Louter | 229 jmp .Louter |
142 .align 16 | 230 .align 16 |
143 .Louter: | 231 .Louter: |
| 232 leaq 24+128(%rsp,%r9,8),%rdx |
| 233 andq $-16,%rdx |
| 234 pxor %xmm4,%xmm4 |
| 235 pxor %xmm5,%xmm5 |
| 236 movdqa -128(%r12),%xmm0 |
| 237 movdqa -112(%r12),%xmm1 |
| 238 movdqa -96(%r12),%xmm2 |
| 239 movdqa -80(%r12),%xmm3 |
| 240 pand -128(%rdx),%xmm0 |
| 241 pand -112(%rdx),%xmm1 |
| 242 por %xmm0,%xmm4 |
| 243 pand -96(%rdx),%xmm2 |
| 244 por %xmm1,%xmm5 |
| 245 pand -80(%rdx),%xmm3 |
| 246 por %xmm2,%xmm4 |
| 247 por %xmm3,%xmm5 |
| 248 movdqa -64(%r12),%xmm0 |
| 249 movdqa -48(%r12),%xmm1 |
| 250 movdqa -32(%r12),%xmm2 |
| 251 movdqa -16(%r12),%xmm3 |
| 252 pand -64(%rdx),%xmm0 |
| 253 pand -48(%rdx),%xmm1 |
| 254 por %xmm0,%xmm4 |
| 255 pand -32(%rdx),%xmm2 |
| 256 por %xmm1,%xmm5 |
| 257 pand -16(%rdx),%xmm3 |
| 258 por %xmm2,%xmm4 |
| 259 por %xmm3,%xmm5 |
| 260 movdqa 0(%r12),%xmm0 |
| 261 movdqa 16(%r12),%xmm1 |
| 262 movdqa 32(%r12),%xmm2 |
| 263 movdqa 48(%r12),%xmm3 |
| 264 pand 0(%rdx),%xmm0 |
| 265 pand 16(%rdx),%xmm1 |
| 266 por %xmm0,%xmm4 |
| 267 pand 32(%rdx),%xmm2 |
| 268 por %xmm1,%xmm5 |
| 269 pand 48(%rdx),%xmm3 |
| 270 por %xmm2,%xmm4 |
| 271 por %xmm3,%xmm5 |
| 272 movdqa 64(%r12),%xmm0 |
| 273 movdqa 80(%r12),%xmm1 |
| 274 movdqa 96(%r12),%xmm2 |
| 275 movdqa 112(%r12),%xmm3 |
| 276 pand 64(%rdx),%xmm0 |
| 277 pand 80(%rdx),%xmm1 |
| 278 por %xmm0,%xmm4 |
| 279 pand 96(%rdx),%xmm2 |
| 280 por %xmm1,%xmm5 |
| 281 pand 112(%rdx),%xmm3 |
| 282 por %xmm2,%xmm4 |
| 283 por %xmm3,%xmm5 |
| 284 por %xmm5,%xmm4 |
| 285 pshufd $0x4e,%xmm4,%xmm0 |
| 286 por %xmm4,%xmm0 |
| 287 leaq 256(%r12),%r12 |
| 288 |
| 289 movq (%rsi),%rax |
| 290 .byte 102,72,15,126,195 |
| 291 |
144 xorq %r15,%r15 | 292 xorq %r15,%r15 |
145 movq %r8,%rbp | 293 movq %r8,%rbp |
146 movq (%rsp),%r10 | 294 movq (%rsp),%r10 |
147 | 295 |
148 movq -96(%r12),%xmm0 | |
149 movq -32(%r12),%xmm1 | |
150 pand %xmm4,%xmm0 | |
151 movq 32(%r12),%xmm2 | |
152 pand %xmm5,%xmm1 | |
153 | |
154 mulq %rbx | 296 mulq %rbx |
155 addq %rax,%r10 | 297 addq %rax,%r10 |
156 movq (%rcx),%rax | 298 movq (%rcx),%rax |
157 adcq $0,%rdx | 299 adcq $0,%rdx |
158 | 300 |
159 movq 96(%r12),%xmm3 | |
160 pand %xmm6,%xmm2 | |
161 por %xmm1,%xmm0 | |
162 pand %xmm7,%xmm3 | |
163 | |
164 imulq %r10,%rbp | 301 imulq %r10,%rbp |
165 movq %rdx,%r11 | 302 movq %rdx,%r11 |
166 | 303 |
167 por %xmm2,%xmm0 | |
168 leaq 256(%r12),%r12 | |
169 por %xmm3,%xmm0 | |
170 | |
171 mulq %rbp | 304 mulq %rbp |
172 addq %rax,%r10 | 305 addq %rax,%r10 |
173 movq 8(%rsi),%rax | 306 movq 8(%rsi),%rax |
174 adcq $0,%rdx | 307 adcq $0,%rdx |
175 movq 8(%rsp),%r10 | 308 movq 8(%rsp),%r10 |
176 movq %rdx,%r13 | 309 movq %rdx,%r13 |
177 | 310 |
178 leaq 1(%r15),%r15 | 311 leaq 1(%r15),%r15 |
179 jmp .Linner_enter | 312 jmp .Linner_enter |
180 | 313 |
(...skipping 15 matching lines...) Expand all Loading... |
196 adcq $0,%rdx | 329 adcq $0,%rdx |
197 addq %r11,%r10 | 330 addq %r11,%r10 |
198 movq %rdx,%r11 | 331 movq %rdx,%r11 |
199 adcq $0,%r11 | 332 adcq $0,%r11 |
200 leaq 1(%r15),%r15 | 333 leaq 1(%r15),%r15 |
201 | 334 |
202 mulq %rbp | 335 mulq %rbp |
203 cmpq %r9,%r15 | 336 cmpq %r9,%r15 |
204 jne .Linner | 337 jne .Linner |
205 | 338 |
206 .byte 102,72,15,126,195 | |
207 | |
208 addq %rax,%r13 | 339 addq %rax,%r13 |
209 movq (%rsi),%rax | |
210 adcq $0,%rdx | 340 adcq $0,%rdx |
211 addq %r10,%r13 | 341 addq %r10,%r13 |
212 » movq» (%rsp,%r15,8),%r10 | 342 » movq» (%rsp,%r9,8),%r10 |
213 adcq $0,%rdx | 343 adcq $0,%rdx |
214 » movq» %r13,-16(%rsp,%r15,8) | 344 » movq» %r13,-16(%rsp,%r9,8) |
215 movq %rdx,%r13 | 345 movq %rdx,%r13 |
216 | 346 |
217 xorq %rdx,%rdx | 347 xorq %rdx,%rdx |
218 addq %r11,%r13 | 348 addq %r11,%r13 |
219 adcq $0,%rdx | 349 adcq $0,%rdx |
220 addq %r10,%r13 | 350 addq %r10,%r13 |
221 adcq $0,%rdx | 351 adcq $0,%rdx |
222 movq %r13,-8(%rsp,%r9,8) | 352 movq %r13,-8(%rsp,%r9,8) |
223 movq %rdx,(%rsp,%r9,8) | 353 movq %rdx,(%rsp,%r9,8) |
224 | 354 |
(...skipping 25 matching lines...) Expand all Loading... |
250 andq %rax,%rsi | 380 andq %rax,%rsi |
251 xorq %rcx,%rsi | 381 xorq %rcx,%rsi |
252 movq %r14,(%rsp,%r14,8) | 382 movq %r14,(%rsp,%r14,8) |
253 movq %rsi,(%rdi,%r14,8) | 383 movq %rsi,(%rdi,%r14,8) |
254 leaq 1(%r14),%r14 | 384 leaq 1(%r14),%r14 |
255 subq $1,%r15 | 385 subq $1,%r15 |
256 jnz .Lcopy | 386 jnz .Lcopy |
257 | 387 |
258 movq 8(%rsp,%r9,8),%rsi | 388 movq 8(%rsp,%r9,8),%rsi |
259 movq $1,%rax | 389 movq $1,%rax |
| 390 |
260 movq -48(%rsi),%r15 | 391 movq -48(%rsi),%r15 |
261 movq -40(%rsi),%r14 | 392 movq -40(%rsi),%r14 |
262 movq -32(%rsi),%r13 | 393 movq -32(%rsi),%r13 |
263 movq -24(%rsi),%r12 | 394 movq -24(%rsi),%r12 |
264 movq -16(%rsi),%rbp | 395 movq -16(%rsi),%rbp |
265 movq -8(%rsi),%rbx | 396 movq -8(%rsi),%rbx |
266 leaq (%rsi),%rsp | 397 leaq (%rsi),%rsp |
267 .Lmul_epilogue: | 398 .Lmul_epilogue: |
268 .byte 0xf3,0xc3 | 399 .byte 0xf3,0xc3 |
269 .size bn_mul_mont_gather5,.-bn_mul_mont_gather5 | 400 .size bn_mul_mont_gather5,.-bn_mul_mont_gather5 |
270 .type bn_mul4x_mont_gather5,@function | 401 .type bn_mul4x_mont_gather5,@function |
271 .align 32 | 402 .align 32 |
272 bn_mul4x_mont_gather5: | 403 bn_mul4x_mont_gather5: |
273 .Lmul4x_enter: | 404 .Lmul4x_enter: |
274 .byte 0x67 | 405 .byte 0x67 |
275 movq %rsp,%rax | 406 movq %rsp,%rax |
276 pushq %rbx | 407 pushq %rbx |
277 pushq %rbp | 408 pushq %rbp |
278 pushq %r12 | 409 pushq %r12 |
279 pushq %r13 | 410 pushq %r13 |
280 pushq %r14 | 411 pushq %r14 |
281 pushq %r15 | 412 pushq %r15 |
| 413 |
282 .byte 0x67 | 414 .byte 0x67 |
283 movl %r9d,%r10d | |
284 shll $3,%r9d | 415 shll $3,%r9d |
285 » shll» $3+2,%r10d | 416 » leaq» (%r9,%r9,2),%r10 |
286 negq %r9 | 417 negq %r9 |
287 | 418 |
288 | 419 |
289 | 420 |
290 | 421 |
291 | 422 |
292 | 423 |
293 | 424 |
294 | 425 |
295 » leaq» -64(%rsp,%r9,2),%r11 | 426 |
296 » subq» %rsi,%r11 | 427 |
| 428 » leaq» -320(%rsp,%r9,2),%r11 |
| 429 » subq» %rdi,%r11 |
297 andq $4095,%r11 | 430 andq $4095,%r11 |
298 cmpq %r11,%r10 | 431 cmpq %r11,%r10 |
299 jb .Lmul4xsp_alt | 432 jb .Lmul4xsp_alt |
300 subq %r11,%rsp | 433 subq %r11,%rsp |
301 » leaq» -64(%rsp,%r9,2),%rsp | 434 » leaq» -320(%rsp,%r9,2),%rsp |
302 jmp .Lmul4xsp_done | 435 jmp .Lmul4xsp_done |
303 | 436 |
304 .align 32 | 437 .align 32 |
305 .Lmul4xsp_alt: | 438 .Lmul4xsp_alt: |
306 » leaq» 4096-64(,%r9,2),%r10 | 439 » leaq» 4096-320(,%r9,2),%r10 |
307 » leaq» -64(%rsp,%r9,2),%rsp | 440 » leaq» -320(%rsp,%r9,2),%rsp |
308 subq %r10,%r11 | 441 subq %r10,%r11 |
309 movq $0,%r10 | 442 movq $0,%r10 |
310 cmovcq %r10,%r11 | 443 cmovcq %r10,%r11 |
311 subq %r11,%rsp | 444 subq %r11,%rsp |
312 .Lmul4xsp_done: | 445 .Lmul4xsp_done: |
313 andq $-64,%rsp | 446 andq $-64,%rsp |
314 negq %r9 | 447 negq %r9 |
315 | 448 |
316 movq %rax,40(%rsp) | 449 movq %rax,40(%rsp) |
317 .Lmul4x_body: | 450 .Lmul4x_body: |
318 | 451 |
319 call mul4x_internal | 452 call mul4x_internal |
320 | 453 |
321 movq 40(%rsp),%rsi | 454 movq 40(%rsp),%rsi |
322 movq $1,%rax | 455 movq $1,%rax |
| 456 |
323 movq -48(%rsi),%r15 | 457 movq -48(%rsi),%r15 |
324 movq -40(%rsi),%r14 | 458 movq -40(%rsi),%r14 |
325 movq -32(%rsi),%r13 | 459 movq -32(%rsi),%r13 |
326 movq -24(%rsi),%r12 | 460 movq -24(%rsi),%r12 |
327 movq -16(%rsi),%rbp | 461 movq -16(%rsi),%rbp |
328 movq -8(%rsi),%rbx | 462 movq -8(%rsi),%rbx |
329 leaq (%rsi),%rsp | 463 leaq (%rsi),%rsp |
330 .Lmul4x_epilogue: | 464 .Lmul4x_epilogue: |
331 .byte 0xf3,0xc3 | 465 .byte 0xf3,0xc3 |
332 .size bn_mul4x_mont_gather5,.-bn_mul4x_mont_gather5 | 466 .size bn_mul4x_mont_gather5,.-bn_mul4x_mont_gather5 |
333 | 467 |
334 .type mul4x_internal,@function | 468 .type mul4x_internal,@function |
335 .align 32 | 469 .align 32 |
336 mul4x_internal: | 470 mul4x_internal: |
337 shlq $5,%r9 | 471 shlq $5,%r9 |
338 » movl» 8(%rax),%r10d | 472 » movd» 8(%rax),%xmm5 |
339 » leaq» 256(%rdx,%r9,1),%r13 | 473 » leaq» .Linc(%rip),%rax |
| 474 » leaq» 128(%rdx,%r9,1),%r13 |
340 shrq $5,%r9 | 475 shrq $5,%r9 |
341 » movq» %r10,%r11 | 476 » movdqa» 0(%rax),%xmm0 |
342 » shrq» $3,%r10 | 477 » movdqa» 16(%rax),%xmm1 |
343 » andq» $7,%r11 | 478 » leaq» 88-112(%rsp,%r9,1),%r10 |
344 » notq» %r10 | 479 » leaq» 128(%rdx),%r12 |
345 » leaq» .Lmagic_masks(%rip),%rax | |
346 » andq» $3,%r10 | |
347 » leaq» 96(%rdx,%r11,8),%r12 | |
348 » movq» 0(%rax,%r10,8),%xmm4 | |
349 » movq» 8(%rax,%r10,8),%xmm5 | |
350 » addq» $7,%r11 | |
351 » movq» 16(%rax,%r10,8),%xmm6 | |
352 » movq» 24(%rax,%r10,8),%xmm7 | |
353 » andq» $7,%r11 | |
354 | 480 |
355 » movq» -96(%r12),%xmm0 | 481 » pshufd» $0,%xmm5,%xmm5 |
356 » leaq» 256(%r12),%r14 | 482 » movdqa» %xmm1,%xmm4 |
357 » movq» -32(%r12),%xmm1 | 483 .byte» 0x67,0x67 |
358 » pand» %xmm4,%xmm0 | 484 » movdqa» %xmm1,%xmm2 |
359 » movq» 32(%r12),%xmm2 | 485 » paddd» %xmm0,%xmm1 |
360 » pand» %xmm5,%xmm1 | 486 » pcmpeqd»%xmm5,%xmm0 |
361 » movq» 96(%r12),%xmm3 | |
362 » pand» %xmm6,%xmm2 | |
363 .byte 0x67 | 487 .byte 0x67 |
| 488 movdqa %xmm4,%xmm3 |
| 489 paddd %xmm1,%xmm2 |
| 490 pcmpeqd %xmm5,%xmm1 |
| 491 movdqa %xmm0,112(%r10) |
| 492 movdqa %xmm4,%xmm0 |
| 493 |
| 494 paddd %xmm2,%xmm3 |
| 495 pcmpeqd %xmm5,%xmm2 |
| 496 movdqa %xmm1,128(%r10) |
| 497 movdqa %xmm4,%xmm1 |
| 498 |
| 499 paddd %xmm3,%xmm0 |
| 500 pcmpeqd %xmm5,%xmm3 |
| 501 movdqa %xmm2,144(%r10) |
| 502 movdqa %xmm4,%xmm2 |
| 503 |
| 504 paddd %xmm0,%xmm1 |
| 505 pcmpeqd %xmm5,%xmm0 |
| 506 movdqa %xmm3,160(%r10) |
| 507 movdqa %xmm4,%xmm3 |
| 508 paddd %xmm1,%xmm2 |
| 509 pcmpeqd %xmm5,%xmm1 |
| 510 movdqa %xmm0,176(%r10) |
| 511 movdqa %xmm4,%xmm0 |
| 512 |
| 513 paddd %xmm2,%xmm3 |
| 514 pcmpeqd %xmm5,%xmm2 |
| 515 movdqa %xmm1,192(%r10) |
| 516 movdqa %xmm4,%xmm1 |
| 517 |
| 518 paddd %xmm3,%xmm0 |
| 519 pcmpeqd %xmm5,%xmm3 |
| 520 movdqa %xmm2,208(%r10) |
| 521 movdqa %xmm4,%xmm2 |
| 522 |
| 523 paddd %xmm0,%xmm1 |
| 524 pcmpeqd %xmm5,%xmm0 |
| 525 movdqa %xmm3,224(%r10) |
| 526 movdqa %xmm4,%xmm3 |
| 527 paddd %xmm1,%xmm2 |
| 528 pcmpeqd %xmm5,%xmm1 |
| 529 movdqa %xmm0,240(%r10) |
| 530 movdqa %xmm4,%xmm0 |
| 531 |
| 532 paddd %xmm2,%xmm3 |
| 533 pcmpeqd %xmm5,%xmm2 |
| 534 movdqa %xmm1,256(%r10) |
| 535 movdqa %xmm4,%xmm1 |
| 536 |
| 537 paddd %xmm3,%xmm0 |
| 538 pcmpeqd %xmm5,%xmm3 |
| 539 movdqa %xmm2,272(%r10) |
| 540 movdqa %xmm4,%xmm2 |
| 541 |
| 542 paddd %xmm0,%xmm1 |
| 543 pcmpeqd %xmm5,%xmm0 |
| 544 movdqa %xmm3,288(%r10) |
| 545 movdqa %xmm4,%xmm3 |
| 546 paddd %xmm1,%xmm2 |
| 547 pcmpeqd %xmm5,%xmm1 |
| 548 movdqa %xmm0,304(%r10) |
| 549 |
| 550 paddd %xmm2,%xmm3 |
| 551 .byte 0x67 |
| 552 pcmpeqd %xmm5,%xmm2 |
| 553 movdqa %xmm1,320(%r10) |
| 554 |
| 555 pcmpeqd %xmm5,%xmm3 |
| 556 movdqa %xmm2,336(%r10) |
| 557 pand 64(%r12),%xmm0 |
| 558 |
| 559 pand 80(%r12),%xmm1 |
| 560 pand 96(%r12),%xmm2 |
| 561 movdqa %xmm3,352(%r10) |
| 562 pand 112(%r12),%xmm3 |
| 563 por %xmm2,%xmm0 |
| 564 por %xmm3,%xmm1 |
| 565 movdqa -128(%r12),%xmm4 |
| 566 movdqa -112(%r12),%xmm5 |
| 567 movdqa -96(%r12),%xmm2 |
| 568 pand 112(%r10),%xmm4 |
| 569 movdqa -80(%r12),%xmm3 |
| 570 pand 128(%r10),%xmm5 |
| 571 por %xmm4,%xmm0 |
| 572 pand 144(%r10),%xmm2 |
| 573 por %xmm5,%xmm1 |
| 574 pand 160(%r10),%xmm3 |
| 575 por %xmm2,%xmm0 |
| 576 por %xmm3,%xmm1 |
| 577 movdqa -64(%r12),%xmm4 |
| 578 movdqa -48(%r12),%xmm5 |
| 579 movdqa -32(%r12),%xmm2 |
| 580 pand 176(%r10),%xmm4 |
| 581 movdqa -16(%r12),%xmm3 |
| 582 pand 192(%r10),%xmm5 |
| 583 por %xmm4,%xmm0 |
| 584 pand 208(%r10),%xmm2 |
| 585 por %xmm5,%xmm1 |
| 586 pand 224(%r10),%xmm3 |
| 587 por %xmm2,%xmm0 |
| 588 por %xmm3,%xmm1 |
| 589 movdqa 0(%r12),%xmm4 |
| 590 movdqa 16(%r12),%xmm5 |
| 591 movdqa 32(%r12),%xmm2 |
| 592 pand 240(%r10),%xmm4 |
| 593 movdqa 48(%r12),%xmm3 |
| 594 pand 256(%r10),%xmm5 |
| 595 por %xmm4,%xmm0 |
| 596 pand 272(%r10),%xmm2 |
| 597 por %xmm5,%xmm1 |
| 598 pand 288(%r10),%xmm3 |
| 599 por %xmm2,%xmm0 |
| 600 por %xmm3,%xmm1 |
364 por %xmm1,%xmm0 | 601 por %xmm1,%xmm0 |
365 » movq» -96(%r14),%xmm1 | 602 » pshufd» $0x4e,%xmm0,%xmm1 |
366 .byte» 0x67 | 603 » por» %xmm1,%xmm0 |
367 » pand» %xmm7,%xmm3 | 604 » leaq» 256(%r12),%r12 |
368 .byte» 0x67 | 605 .byte» 102,72,15,126,195 |
369 » por» %xmm2,%xmm0 | |
370 » movq» -32(%r14),%xmm2 | |
371 .byte» 0x67 | |
372 » pand» %xmm4,%xmm1 | |
373 .byte» 0x67 | |
374 » por» %xmm3,%xmm0 | |
375 » movq» 32(%r14),%xmm3 | |
376 | 606 |
377 .byte 102,72,15,126,195 | |
378 movq 96(%r14),%xmm0 | |
379 movq %r13,16+8(%rsp) | 607 movq %r13,16+8(%rsp) |
380 movq %rdi,56+8(%rsp) | 608 movq %rdi,56+8(%rsp) |
381 | 609 |
382 movq (%r8),%r8 | 610 movq (%r8),%r8 |
383 movq (%rsi),%rax | 611 movq (%rsi),%rax |
384 leaq (%rsi,%r9,1),%rsi | 612 leaq (%rsi,%r9,1),%rsi |
385 negq %r9 | 613 negq %r9 |
386 | 614 |
387 movq %r8,%rbp | 615 movq %r8,%rbp |
388 mulq %rbx | 616 mulq %rbx |
389 movq %rax,%r10 | 617 movq %rax,%r10 |
390 movq (%rcx),%rax | 618 movq (%rcx),%rax |
391 | 619 |
392 pand %xmm5,%xmm2 | |
393 pand %xmm6,%xmm3 | |
394 por %xmm2,%xmm1 | |
395 | |
396 imulq %r10,%rbp | 620 imulq %r10,%rbp |
397 | 621 » leaq» 64+8(%rsp),%r14 |
398 | |
399 | |
400 | |
401 | |
402 | |
403 | |
404 » leaq» 64+8(%rsp,%r11,8),%r14 | |
405 movq %rdx,%r11 | 622 movq %rdx,%r11 |
406 | 623 |
407 pand %xmm7,%xmm0 | |
408 por %xmm3,%xmm1 | |
409 leaq 512(%r12),%r12 | |
410 por %xmm1,%xmm0 | |
411 | |
412 mulq %rbp | 624 mulq %rbp |
413 addq %rax,%r10 | 625 addq %rax,%r10 |
414 movq 8(%rsi,%r9,1),%rax | 626 movq 8(%rsi,%r9,1),%rax |
415 adcq $0,%rdx | 627 adcq $0,%rdx |
416 movq %rdx,%rdi | 628 movq %rdx,%rdi |
417 | 629 |
418 mulq %rbx | 630 mulq %rbx |
419 addq %rax,%r11 | 631 addq %rax,%r11 |
420 » movq» 16(%rcx),%rax | 632 » movq» 8(%rcx),%rax |
421 adcq $0,%rdx | 633 adcq $0,%rdx |
422 movq %rdx,%r10 | 634 movq %rdx,%r10 |
423 | 635 |
424 mulq %rbp | 636 mulq %rbp |
425 addq %rax,%rdi | 637 addq %rax,%rdi |
426 movq 16(%rsi,%r9,1),%rax | 638 movq 16(%rsi,%r9,1),%rax |
427 adcq $0,%rdx | 639 adcq $0,%rdx |
428 addq %r11,%rdi | 640 addq %r11,%rdi |
429 leaq 32(%r9),%r15 | 641 leaq 32(%r9),%r15 |
430 » leaq» 64(%rcx),%rcx | 642 » leaq» 32(%rcx),%rcx |
431 adcq $0,%rdx | 643 adcq $0,%rdx |
432 movq %rdi,(%r14) | 644 movq %rdi,(%r14) |
433 movq %rdx,%r13 | 645 movq %rdx,%r13 |
434 jmp .L1st4x | 646 jmp .L1st4x |
435 | 647 |
436 .align 32 | 648 .align 32 |
437 .L1st4x: | 649 .L1st4x: |
438 mulq %rbx | 650 mulq %rbx |
439 addq %rax,%r10 | 651 addq %rax,%r10 |
440 » movq» -32(%rcx),%rax | 652 » movq» -16(%rcx),%rax |
441 leaq 32(%r14),%r14 | 653 leaq 32(%r14),%r14 |
442 adcq $0,%rdx | 654 adcq $0,%rdx |
443 movq %rdx,%r11 | 655 movq %rdx,%r11 |
444 | 656 |
445 mulq %rbp | 657 mulq %rbp |
446 addq %rax,%r13 | 658 addq %rax,%r13 |
447 movq -8(%rsi,%r15,1),%rax | 659 movq -8(%rsi,%r15,1),%rax |
448 adcq $0,%rdx | 660 adcq $0,%rdx |
449 addq %r10,%r13 | 661 addq %r10,%r13 |
450 adcq $0,%rdx | 662 adcq $0,%rdx |
451 movq %r13,-24(%r14) | 663 movq %r13,-24(%r14) |
452 movq %rdx,%rdi | 664 movq %rdx,%rdi |
453 | 665 |
454 mulq %rbx | 666 mulq %rbx |
455 addq %rax,%r11 | 667 addq %rax,%r11 |
456 » movq» -16(%rcx),%rax | 668 » movq» -8(%rcx),%rax |
457 adcq $0,%rdx | 669 adcq $0,%rdx |
458 movq %rdx,%r10 | 670 movq %rdx,%r10 |
459 | 671 |
460 mulq %rbp | 672 mulq %rbp |
461 addq %rax,%rdi | 673 addq %rax,%rdi |
462 movq (%rsi,%r15,1),%rax | 674 movq (%rsi,%r15,1),%rax |
463 adcq $0,%rdx | 675 adcq $0,%rdx |
464 addq %r11,%rdi | 676 addq %r11,%rdi |
465 adcq $0,%rdx | 677 adcq $0,%rdx |
466 movq %rdi,-16(%r14) | 678 movq %rdi,-16(%r14) |
467 movq %rdx,%r13 | 679 movq %rdx,%r13 |
468 | 680 |
469 mulq %rbx | 681 mulq %rbx |
470 addq %rax,%r10 | 682 addq %rax,%r10 |
471 movq 0(%rcx),%rax | 683 movq 0(%rcx),%rax |
472 adcq $0,%rdx | 684 adcq $0,%rdx |
473 movq %rdx,%r11 | 685 movq %rdx,%r11 |
474 | 686 |
475 mulq %rbp | 687 mulq %rbp |
476 addq %rax,%r13 | 688 addq %rax,%r13 |
477 movq 8(%rsi,%r15,1),%rax | 689 movq 8(%rsi,%r15,1),%rax |
478 adcq $0,%rdx | 690 adcq $0,%rdx |
479 addq %r10,%r13 | 691 addq %r10,%r13 |
480 adcq $0,%rdx | 692 adcq $0,%rdx |
481 movq %r13,-8(%r14) | 693 movq %r13,-8(%r14) |
482 movq %rdx,%rdi | 694 movq %rdx,%rdi |
483 | 695 |
484 mulq %rbx | 696 mulq %rbx |
485 addq %rax,%r11 | 697 addq %rax,%r11 |
486 » movq» 16(%rcx),%rax | 698 » movq» 8(%rcx),%rax |
487 adcq $0,%rdx | 699 adcq $0,%rdx |
488 movq %rdx,%r10 | 700 movq %rdx,%r10 |
489 | 701 |
490 mulq %rbp | 702 mulq %rbp |
491 addq %rax,%rdi | 703 addq %rax,%rdi |
492 movq 16(%rsi,%r15,1),%rax | 704 movq 16(%rsi,%r15,1),%rax |
493 adcq $0,%rdx | 705 adcq $0,%rdx |
494 addq %r11,%rdi | 706 addq %r11,%rdi |
495 » leaq» 64(%rcx),%rcx | 707 » leaq» 32(%rcx),%rcx |
496 adcq $0,%rdx | 708 adcq $0,%rdx |
497 movq %rdi,(%r14) | 709 movq %rdi,(%r14) |
498 movq %rdx,%r13 | 710 movq %rdx,%r13 |
499 | 711 |
500 addq $32,%r15 | 712 addq $32,%r15 |
501 jnz .L1st4x | 713 jnz .L1st4x |
502 | 714 |
503 mulq %rbx | 715 mulq %rbx |
504 addq %rax,%r10 | 716 addq %rax,%r10 |
505 » movq» -32(%rcx),%rax | 717 » movq» -16(%rcx),%rax |
506 leaq 32(%r14),%r14 | 718 leaq 32(%r14),%r14 |
507 adcq $0,%rdx | 719 adcq $0,%rdx |
508 movq %rdx,%r11 | 720 movq %rdx,%r11 |
509 | 721 |
510 mulq %rbp | 722 mulq %rbp |
511 addq %rax,%r13 | 723 addq %rax,%r13 |
512 movq -8(%rsi),%rax | 724 movq -8(%rsi),%rax |
513 adcq $0,%rdx | 725 adcq $0,%rdx |
514 addq %r10,%r13 | 726 addq %r10,%r13 |
515 adcq $0,%rdx | 727 adcq $0,%rdx |
516 movq %r13,-24(%r14) | 728 movq %r13,-24(%r14) |
517 movq %rdx,%rdi | 729 movq %rdx,%rdi |
518 | 730 |
519 mulq %rbx | 731 mulq %rbx |
520 addq %rax,%r11 | 732 addq %rax,%r11 |
521 » movq» -16(%rcx),%rax | 733 » movq» -8(%rcx),%rax |
522 adcq $0,%rdx | 734 adcq $0,%rdx |
523 movq %rdx,%r10 | 735 movq %rdx,%r10 |
524 | 736 |
525 mulq %rbp | 737 mulq %rbp |
526 addq %rax,%rdi | 738 addq %rax,%rdi |
527 movq (%rsi,%r9,1),%rax | 739 movq (%rsi,%r9,1),%rax |
528 adcq $0,%rdx | 740 adcq $0,%rdx |
529 addq %r11,%rdi | 741 addq %r11,%rdi |
530 adcq $0,%rdx | 742 adcq $0,%rdx |
531 movq %rdi,-16(%r14) | 743 movq %rdi,-16(%r14) |
532 movq %rdx,%r13 | 744 movq %rdx,%r13 |
533 | 745 |
534 .byte» 102,72,15,126,195 | 746 » leaq» (%rcx,%r9,1),%rcx |
535 » leaq» (%rcx,%r9,2),%rcx | |
536 | 747 |
537 xorq %rdi,%rdi | 748 xorq %rdi,%rdi |
538 addq %r10,%r13 | 749 addq %r10,%r13 |
539 adcq $0,%rdi | 750 adcq $0,%rdi |
540 movq %r13,-8(%r14) | 751 movq %r13,-8(%r14) |
541 | 752 |
542 jmp .Louter4x | 753 jmp .Louter4x |
543 | 754 |
544 .align 32 | 755 .align 32 |
545 .Louter4x: | 756 .Louter4x: |
| 757 leaq 16+128(%r14),%rdx |
| 758 pxor %xmm4,%xmm4 |
| 759 pxor %xmm5,%xmm5 |
| 760 movdqa -128(%r12),%xmm0 |
| 761 movdqa -112(%r12),%xmm1 |
| 762 movdqa -96(%r12),%xmm2 |
| 763 movdqa -80(%r12),%xmm3 |
| 764 pand -128(%rdx),%xmm0 |
| 765 pand -112(%rdx),%xmm1 |
| 766 por %xmm0,%xmm4 |
| 767 pand -96(%rdx),%xmm2 |
| 768 por %xmm1,%xmm5 |
| 769 pand -80(%rdx),%xmm3 |
| 770 por %xmm2,%xmm4 |
| 771 por %xmm3,%xmm5 |
| 772 movdqa -64(%r12),%xmm0 |
| 773 movdqa -48(%r12),%xmm1 |
| 774 movdqa -32(%r12),%xmm2 |
| 775 movdqa -16(%r12),%xmm3 |
| 776 pand -64(%rdx),%xmm0 |
| 777 pand -48(%rdx),%xmm1 |
| 778 por %xmm0,%xmm4 |
| 779 pand -32(%rdx),%xmm2 |
| 780 por %xmm1,%xmm5 |
| 781 pand -16(%rdx),%xmm3 |
| 782 por %xmm2,%xmm4 |
| 783 por %xmm3,%xmm5 |
| 784 movdqa 0(%r12),%xmm0 |
| 785 movdqa 16(%r12),%xmm1 |
| 786 movdqa 32(%r12),%xmm2 |
| 787 movdqa 48(%r12),%xmm3 |
| 788 pand 0(%rdx),%xmm0 |
| 789 pand 16(%rdx),%xmm1 |
| 790 por %xmm0,%xmm4 |
| 791 pand 32(%rdx),%xmm2 |
| 792 por %xmm1,%xmm5 |
| 793 pand 48(%rdx),%xmm3 |
| 794 por %xmm2,%xmm4 |
| 795 por %xmm3,%xmm5 |
| 796 movdqa 64(%r12),%xmm0 |
| 797 movdqa 80(%r12),%xmm1 |
| 798 movdqa 96(%r12),%xmm2 |
| 799 movdqa 112(%r12),%xmm3 |
| 800 pand 64(%rdx),%xmm0 |
| 801 pand 80(%rdx),%xmm1 |
| 802 por %xmm0,%xmm4 |
| 803 pand 96(%rdx),%xmm2 |
| 804 por %xmm1,%xmm5 |
| 805 pand 112(%rdx),%xmm3 |
| 806 por %xmm2,%xmm4 |
| 807 por %xmm3,%xmm5 |
| 808 por %xmm5,%xmm4 |
| 809 pshufd $0x4e,%xmm4,%xmm0 |
| 810 por %xmm4,%xmm0 |
| 811 leaq 256(%r12),%r12 |
| 812 .byte 102,72,15,126,195 |
| 813 |
546 movq (%r14,%r9,1),%r10 | 814 movq (%r14,%r9,1),%r10 |
547 movq %r8,%rbp | 815 movq %r8,%rbp |
548 mulq %rbx | 816 mulq %rbx |
549 addq %rax,%r10 | 817 addq %rax,%r10 |
550 movq (%rcx),%rax | 818 movq (%rcx),%rax |
551 adcq $0,%rdx | 819 adcq $0,%rdx |
552 | 820 |
553 movq -96(%r12),%xmm0 | |
554 movq -32(%r12),%xmm1 | |
555 pand %xmm4,%xmm0 | |
556 movq 32(%r12),%xmm2 | |
557 pand %xmm5,%xmm1 | |
558 movq 96(%r12),%xmm3 | |
559 | |
560 imulq %r10,%rbp | 821 imulq %r10,%rbp |
561 .byte 0x67 | |
562 movq %rdx,%r11 | 822 movq %rdx,%r11 |
563 movq %rdi,(%r14) | 823 movq %rdi,(%r14) |
564 | 824 |
565 pand %xmm6,%xmm2 | |
566 por %xmm1,%xmm0 | |
567 pand %xmm7,%xmm3 | |
568 por %xmm2,%xmm0 | |
569 leaq (%r14,%r9,1),%r14 | 825 leaq (%r14,%r9,1),%r14 |
570 leaq 256(%r12),%r12 | |
571 por %xmm3,%xmm0 | |
572 | 826 |
573 mulq %rbp | 827 mulq %rbp |
574 addq %rax,%r10 | 828 addq %rax,%r10 |
575 movq 8(%rsi,%r9,1),%rax | 829 movq 8(%rsi,%r9,1),%rax |
576 adcq $0,%rdx | 830 adcq $0,%rdx |
577 movq %rdx,%rdi | 831 movq %rdx,%rdi |
578 | 832 |
579 mulq %rbx | 833 mulq %rbx |
580 addq %rax,%r11 | 834 addq %rax,%r11 |
581 » movq» 16(%rcx),%rax | 835 » movq» 8(%rcx),%rax |
582 adcq $0,%rdx | 836 adcq $0,%rdx |
583 addq 8(%r14),%r11 | 837 addq 8(%r14),%r11 |
584 adcq $0,%rdx | 838 adcq $0,%rdx |
585 movq %rdx,%r10 | 839 movq %rdx,%r10 |
586 | 840 |
587 mulq %rbp | 841 mulq %rbp |
588 addq %rax,%rdi | 842 addq %rax,%rdi |
589 movq 16(%rsi,%r9,1),%rax | 843 movq 16(%rsi,%r9,1),%rax |
590 adcq $0,%rdx | 844 adcq $0,%rdx |
591 addq %r11,%rdi | 845 addq %r11,%rdi |
592 leaq 32(%r9),%r15 | 846 leaq 32(%r9),%r15 |
593 » leaq» 64(%rcx),%rcx | 847 » leaq» 32(%rcx),%rcx |
594 adcq $0,%rdx | 848 adcq $0,%rdx |
595 movq %rdx,%r13 | 849 movq %rdx,%r13 |
596 jmp .Linner4x | 850 jmp .Linner4x |
597 | 851 |
598 .align 32 | 852 .align 32 |
599 .Linner4x: | 853 .Linner4x: |
600 mulq %rbx | 854 mulq %rbx |
601 addq %rax,%r10 | 855 addq %rax,%r10 |
602 » movq» -32(%rcx),%rax | 856 » movq» -16(%rcx),%rax |
603 adcq $0,%rdx | 857 adcq $0,%rdx |
604 addq 16(%r14),%r10 | 858 addq 16(%r14),%r10 |
605 leaq 32(%r14),%r14 | 859 leaq 32(%r14),%r14 |
606 adcq $0,%rdx | 860 adcq $0,%rdx |
607 movq %rdx,%r11 | 861 movq %rdx,%r11 |
608 | 862 |
609 mulq %rbp | 863 mulq %rbp |
610 addq %rax,%r13 | 864 addq %rax,%r13 |
611 movq -8(%rsi,%r15,1),%rax | 865 movq -8(%rsi,%r15,1),%rax |
612 adcq $0,%rdx | 866 adcq $0,%rdx |
613 addq %r10,%r13 | 867 addq %r10,%r13 |
614 adcq $0,%rdx | 868 adcq $0,%rdx |
615 movq %rdi,-32(%r14) | 869 movq %rdi,-32(%r14) |
616 movq %rdx,%rdi | 870 movq %rdx,%rdi |
617 | 871 |
618 mulq %rbx | 872 mulq %rbx |
619 addq %rax,%r11 | 873 addq %rax,%r11 |
620 » movq» -16(%rcx),%rax | 874 » movq» -8(%rcx),%rax |
621 adcq $0,%rdx | 875 adcq $0,%rdx |
622 addq -8(%r14),%r11 | 876 addq -8(%r14),%r11 |
623 adcq $0,%rdx | 877 adcq $0,%rdx |
624 movq %rdx,%r10 | 878 movq %rdx,%r10 |
625 | 879 |
626 mulq %rbp | 880 mulq %rbp |
627 addq %rax,%rdi | 881 addq %rax,%rdi |
628 movq (%rsi,%r15,1),%rax | 882 movq (%rsi,%r15,1),%rax |
629 adcq $0,%rdx | 883 adcq $0,%rdx |
630 addq %r11,%rdi | 884 addq %r11,%rdi |
(...skipping 13 matching lines...) Expand all Loading... |
644 addq %rax,%r13 | 898 addq %rax,%r13 |
645 movq 8(%rsi,%r15,1),%rax | 899 movq 8(%rsi,%r15,1),%rax |
646 adcq $0,%rdx | 900 adcq $0,%rdx |
647 addq %r10,%r13 | 901 addq %r10,%r13 |
648 adcq $0,%rdx | 902 adcq $0,%rdx |
649 movq %rdi,-16(%r14) | 903 movq %rdi,-16(%r14) |
650 movq %rdx,%rdi | 904 movq %rdx,%rdi |
651 | 905 |
652 mulq %rbx | 906 mulq %rbx |
653 addq %rax,%r11 | 907 addq %rax,%r11 |
654 » movq» 16(%rcx),%rax | 908 » movq» 8(%rcx),%rax |
655 adcq $0,%rdx | 909 adcq $0,%rdx |
656 addq 8(%r14),%r11 | 910 addq 8(%r14),%r11 |
657 adcq $0,%rdx | 911 adcq $0,%rdx |
658 movq %rdx,%r10 | 912 movq %rdx,%r10 |
659 | 913 |
660 mulq %rbp | 914 mulq %rbp |
661 addq %rax,%rdi | 915 addq %rax,%rdi |
662 movq 16(%rsi,%r15,1),%rax | 916 movq 16(%rsi,%r15,1),%rax |
663 adcq $0,%rdx | 917 adcq $0,%rdx |
664 addq %r11,%rdi | 918 addq %r11,%rdi |
665 » leaq» 64(%rcx),%rcx | 919 » leaq» 32(%rcx),%rcx |
666 adcq $0,%rdx | 920 adcq $0,%rdx |
667 movq %r13,-8(%r14) | 921 movq %r13,-8(%r14) |
668 movq %rdx,%r13 | 922 movq %rdx,%r13 |
669 | 923 |
670 addq $32,%r15 | 924 addq $32,%r15 |
671 jnz .Linner4x | 925 jnz .Linner4x |
672 | 926 |
673 mulq %rbx | 927 mulq %rbx |
674 addq %rax,%r10 | 928 addq %rax,%r10 |
675 » movq» -32(%rcx),%rax | 929 » movq» -16(%rcx),%rax |
676 adcq $0,%rdx | 930 adcq $0,%rdx |
677 addq 16(%r14),%r10 | 931 addq 16(%r14),%r10 |
678 leaq 32(%r14),%r14 | 932 leaq 32(%r14),%r14 |
679 adcq $0,%rdx | 933 adcq $0,%rdx |
680 movq %rdx,%r11 | 934 movq %rdx,%r11 |
681 | 935 |
682 mulq %rbp | 936 mulq %rbp |
683 addq %rax,%r13 | 937 addq %rax,%r13 |
684 movq -8(%rsi),%rax | 938 movq -8(%rsi),%rax |
685 adcq $0,%rdx | 939 adcq $0,%rdx |
686 addq %r10,%r13 | 940 addq %r10,%r13 |
687 adcq $0,%rdx | 941 adcq $0,%rdx |
688 movq %rdi,-32(%r14) | 942 movq %rdi,-32(%r14) |
689 movq %rdx,%rdi | 943 movq %rdx,%rdi |
690 | 944 |
691 mulq %rbx | 945 mulq %rbx |
692 addq %rax,%r11 | 946 addq %rax,%r11 |
693 movq %rbp,%rax | 947 movq %rbp,%rax |
694 » movq» -16(%rcx),%rbp | 948 » movq» -8(%rcx),%rbp |
695 adcq $0,%rdx | 949 adcq $0,%rdx |
696 addq -8(%r14),%r11 | 950 addq -8(%r14),%r11 |
697 adcq $0,%rdx | 951 adcq $0,%rdx |
698 movq %rdx,%r10 | 952 movq %rdx,%r10 |
699 | 953 |
700 mulq %rbp | 954 mulq %rbp |
701 addq %rax,%rdi | 955 addq %rax,%rdi |
702 movq (%rsi,%r9,1),%rax | 956 movq (%rsi,%r9,1),%rax |
703 adcq $0,%rdx | 957 adcq $0,%rdx |
704 addq %r11,%rdi | 958 addq %r11,%rdi |
705 adcq $0,%rdx | 959 adcq $0,%rdx |
706 movq %r13,-24(%r14) | 960 movq %r13,-24(%r14) |
707 movq %rdx,%r13 | 961 movq %rdx,%r13 |
708 | 962 |
709 .byte 102,72,15,126,195 | |
710 movq %rdi,-16(%r14) | 963 movq %rdi,-16(%r14) |
711 » leaq» (%rcx,%r9,2),%rcx | 964 » leaq» (%rcx,%r9,1),%rcx |
712 | 965 |
713 xorq %rdi,%rdi | 966 xorq %rdi,%rdi |
714 addq %r10,%r13 | 967 addq %r10,%r13 |
715 adcq $0,%rdi | 968 adcq $0,%rdi |
716 addq (%r14),%r13 | 969 addq (%r14),%r13 |
717 adcq $0,%rdi | 970 adcq $0,%rdi |
718 movq %r13,-8(%r14) | 971 movq %r13,-8(%r14) |
719 | 972 |
720 cmpq 16+8(%rsp),%r12 | 973 cmpq 16+8(%rsp),%r12 |
721 jb .Louter4x | 974 jb .Louter4x |
| 975 xorq %rax,%rax |
722 subq %r13,%rbp | 976 subq %r13,%rbp |
723 adcq %r15,%r15 | 977 adcq %r15,%r15 |
724 orq %r15,%rdi | 978 orq %r15,%rdi |
725 » xorq» $1,%rdi | 979 » subq» %rdi,%rax |
726 leaq (%r14,%r9,1),%rbx | 980 leaq (%r14,%r9,1),%rbx |
727 » leaq» (%rcx,%rdi,8),%rbp | 981 » movq» (%rcx),%r12 |
| 982 » leaq» (%rcx),%rbp |
728 movq %r9,%rcx | 983 movq %r9,%rcx |
729 sarq $3+2,%rcx | 984 sarq $3+2,%rcx |
730 movq 56+8(%rsp),%rdi | 985 movq 56+8(%rsp),%rdi |
731 » jmp» .Lsqr4x_sub | 986 » decq» %r12 |
| 987 » xorq» %r10,%r10 |
| 988 » movq» 8(%rbp),%r13 |
| 989 » movq» 16(%rbp),%r14 |
| 990 » movq» 24(%rbp),%r15 |
| 991 » jmp» .Lsqr4x_sub_entry |
732 .size mul4x_internal,.-mul4x_internal | 992 .size mul4x_internal,.-mul4x_internal |
733 .globl bn_power5 | 993 .globl bn_power5 |
734 .hidden bn_power5 | 994 .hidden bn_power5 |
735 .type bn_power5,@function | 995 .type bn_power5,@function |
736 .align 32 | 996 .align 32 |
737 bn_power5: | 997 bn_power5: |
738 movq %rsp,%rax | 998 movq %rsp,%rax |
739 pushq %rbx | 999 pushq %rbx |
740 pushq %rbp | 1000 pushq %rbp |
741 pushq %r12 | 1001 pushq %r12 |
742 pushq %r13 | 1002 pushq %r13 |
743 pushq %r14 | 1003 pushq %r14 |
744 pushq %r15 | 1004 pushq %r15 |
745 » movl» %r9d,%r10d | 1005 |
746 shll $3,%r9d | 1006 shll $3,%r9d |
747 » shll» $3+2,%r10d | 1007 » leal» (%r9,%r9,2),%r10d |
748 negq %r9 | 1008 negq %r9 |
749 movq (%r8),%r8 | 1009 movq (%r8),%r8 |
750 | 1010 |
751 | 1011 |
752 | 1012 |
753 | 1013 |
754 | 1014 |
755 | 1015 |
756 | 1016 |
757 » leaq» -64(%rsp,%r9,2),%r11 | 1017 |
758 » subq» %rsi,%r11 | 1018 » leaq» -320(%rsp,%r9,2),%r11 |
| 1019 » subq» %rdi,%r11 |
759 andq $4095,%r11 | 1020 andq $4095,%r11 |
760 cmpq %r11,%r10 | 1021 cmpq %r11,%r10 |
761 jb .Lpwr_sp_alt | 1022 jb .Lpwr_sp_alt |
762 subq %r11,%rsp | 1023 subq %r11,%rsp |
763 » leaq» -64(%rsp,%r9,2),%rsp | 1024 » leaq» -320(%rsp,%r9,2),%rsp |
764 jmp .Lpwr_sp_done | 1025 jmp .Lpwr_sp_done |
765 | 1026 |
766 .align 32 | 1027 .align 32 |
767 .Lpwr_sp_alt: | 1028 .Lpwr_sp_alt: |
768 » leaq» 4096-64(,%r9,2),%r10 | 1029 » leaq» 4096-320(,%r9,2),%r10 |
769 » leaq» -64(%rsp,%r9,2),%rsp | 1030 » leaq» -320(%rsp,%r9,2),%rsp |
770 subq %r10,%r11 | 1031 subq %r10,%r11 |
771 movq $0,%r10 | 1032 movq $0,%r10 |
772 cmovcq %r10,%r11 | 1033 cmovcq %r10,%r11 |
773 subq %r11,%rsp | 1034 subq %r11,%rsp |
774 .Lpwr_sp_done: | 1035 .Lpwr_sp_done: |
775 andq $-64,%rsp | 1036 andq $-64,%rsp |
776 movq %r9,%r10 | 1037 movq %r9,%r10 |
777 negq %r9 | 1038 negq %r9 |
778 | 1039 |
779 | 1040 |
780 | 1041 |
781 | 1042 |
782 | 1043 |
783 | 1044 |
784 | 1045 |
785 | 1046 |
786 | 1047 |
787 | 1048 |
788 movq %r8,32(%rsp) | 1049 movq %r8,32(%rsp) |
789 movq %rax,40(%rsp) | 1050 movq %rax,40(%rsp) |
790 .Lpower5_body: | 1051 .Lpower5_body: |
791 .byte 102,72,15,110,207 | 1052 .byte 102,72,15,110,207 |
792 .byte 102,72,15,110,209 | 1053 .byte 102,72,15,110,209 |
793 .byte 102,73,15,110,218 | 1054 .byte 102,73,15,110,218 |
794 .byte 102,72,15,110,226 | 1055 .byte 102,72,15,110,226 |
795 | 1056 |
796 call __bn_sqr8x_internal | 1057 call __bn_sqr8x_internal |
| 1058 call __bn_post4x_internal |
797 call __bn_sqr8x_internal | 1059 call __bn_sqr8x_internal |
| 1060 call __bn_post4x_internal |
798 call __bn_sqr8x_internal | 1061 call __bn_sqr8x_internal |
| 1062 call __bn_post4x_internal |
799 call __bn_sqr8x_internal | 1063 call __bn_sqr8x_internal |
| 1064 call __bn_post4x_internal |
800 call __bn_sqr8x_internal | 1065 call __bn_sqr8x_internal |
| 1066 call __bn_post4x_internal |
801 | 1067 |
802 .byte 102,72,15,126,209 | 1068 .byte 102,72,15,126,209 |
803 .byte 102,72,15,126,226 | 1069 .byte 102,72,15,126,226 |
804 movq %rsi,%rdi | 1070 movq %rsi,%rdi |
805 movq 40(%rsp),%rax | 1071 movq 40(%rsp),%rax |
806 leaq 32(%rsp),%r8 | 1072 leaq 32(%rsp),%r8 |
807 | 1073 |
808 call mul4x_internal | 1074 call mul4x_internal |
809 | 1075 |
810 movq 40(%rsp),%rsi | 1076 movq 40(%rsp),%rsi |
(...skipping 524 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
1335 leaq (%rcx,%r11,2),%r8 | 1601 leaq (%rcx,%r11,2),%r8 |
1336 shrq $63,%r11 | 1602 shrq $63,%r11 |
1337 orq %r10,%r8 | 1603 orq %r10,%r8 |
1338 mulq %rax | 1604 mulq %rax |
1339 negq %r15 | 1605 negq %r15 |
1340 adcq %rax,%rbx | 1606 adcq %rax,%rbx |
1341 adcq %rdx,%r8 | 1607 adcq %rdx,%r8 |
1342 movq %rbx,-16(%rdi) | 1608 movq %rbx,-16(%rdi) |
1343 movq %r8,-8(%rdi) | 1609 movq %r8,-8(%rdi) |
1344 .byte 102,72,15,126,213 | 1610 .byte 102,72,15,126,213 |
1345 sqr8x_reduction: | 1611 __bn_sqr8x_reduction: |
1346 xorq %rax,%rax | 1612 xorq %rax,%rax |
1347 » leaq» (%rbp,%r9,2),%rcx | 1613 » leaq» (%r9,%rbp,1),%rcx |
1348 leaq 48+8(%rsp,%r9,2),%rdx | 1614 leaq 48+8(%rsp,%r9,2),%rdx |
1349 movq %rcx,0+8(%rsp) | 1615 movq %rcx,0+8(%rsp) |
1350 leaq 48+8(%rsp,%r9,1),%rdi | 1616 leaq 48+8(%rsp,%r9,1),%rdi |
1351 movq %rdx,8+8(%rsp) | 1617 movq %rdx,8+8(%rsp) |
1352 negq %r9 | 1618 negq %r9 |
1353 jmp .L8x_reduction_loop | 1619 jmp .L8x_reduction_loop |
1354 | 1620 |
1355 .align 32 | 1621 .align 32 |
1356 .L8x_reduction_loop: | 1622 .L8x_reduction_loop: |
1357 leaq (%rdi,%r9,1),%rdi | 1623 leaq (%rdi,%r9,1),%rdi |
(...skipping 12 matching lines...) Expand all Loading... |
1370 .byte 0x67 | 1636 .byte 0x67 |
1371 movq %rbx,%r8 | 1637 movq %rbx,%r8 |
1372 imulq 32+8(%rsp),%rbx | 1638 imulq 32+8(%rsp),%rbx |
1373 movq 0(%rbp),%rax | 1639 movq 0(%rbp),%rax |
1374 movl $8,%ecx | 1640 movl $8,%ecx |
1375 jmp .L8x_reduce | 1641 jmp .L8x_reduce |
1376 | 1642 |
1377 .align 32 | 1643 .align 32 |
1378 .L8x_reduce: | 1644 .L8x_reduce: |
1379 mulq %rbx | 1645 mulq %rbx |
1380 » movq» 16(%rbp),%rax | 1646 » movq» 8(%rbp),%rax |
1381 negq %r8 | 1647 negq %r8 |
1382 movq %rdx,%r8 | 1648 movq %rdx,%r8 |
1383 adcq $0,%r8 | 1649 adcq $0,%r8 |
1384 | 1650 |
1385 mulq %rbx | 1651 mulq %rbx |
1386 addq %rax,%r9 | 1652 addq %rax,%r9 |
1387 » movq» 32(%rbp),%rax | 1653 » movq» 16(%rbp),%rax |
1388 adcq $0,%rdx | 1654 adcq $0,%rdx |
1389 addq %r9,%r8 | 1655 addq %r9,%r8 |
1390 movq %rbx,48-8+8(%rsp,%rcx,8) | 1656 movq %rbx,48-8+8(%rsp,%rcx,8) |
1391 movq %rdx,%r9 | 1657 movq %rdx,%r9 |
1392 adcq $0,%r9 | 1658 adcq $0,%r9 |
1393 | 1659 |
1394 mulq %rbx | 1660 mulq %rbx |
1395 addq %rax,%r10 | 1661 addq %rax,%r10 |
1396 » movq» 48(%rbp),%rax | 1662 » movq» 24(%rbp),%rax |
1397 adcq $0,%rdx | 1663 adcq $0,%rdx |
1398 addq %r10,%r9 | 1664 addq %r10,%r9 |
1399 movq 32+8(%rsp),%rsi | 1665 movq 32+8(%rsp),%rsi |
1400 movq %rdx,%r10 | 1666 movq %rdx,%r10 |
1401 adcq $0,%r10 | 1667 adcq $0,%r10 |
1402 | 1668 |
1403 mulq %rbx | 1669 mulq %rbx |
1404 addq %rax,%r11 | 1670 addq %rax,%r11 |
1405 » movq» 64(%rbp),%rax | 1671 » movq» 32(%rbp),%rax |
1406 adcq $0,%rdx | 1672 adcq $0,%rdx |
1407 imulq %r8,%rsi | 1673 imulq %r8,%rsi |
1408 addq %r11,%r10 | 1674 addq %r11,%r10 |
1409 movq %rdx,%r11 | 1675 movq %rdx,%r11 |
1410 adcq $0,%r11 | 1676 adcq $0,%r11 |
1411 | 1677 |
1412 mulq %rbx | 1678 mulq %rbx |
1413 addq %rax,%r12 | 1679 addq %rax,%r12 |
1414 » movq» 80(%rbp),%rax | 1680 » movq» 40(%rbp),%rax |
1415 adcq $0,%rdx | 1681 adcq $0,%rdx |
1416 addq %r12,%r11 | 1682 addq %r12,%r11 |
1417 movq %rdx,%r12 | 1683 movq %rdx,%r12 |
1418 adcq $0,%r12 | 1684 adcq $0,%r12 |
1419 | 1685 |
1420 mulq %rbx | 1686 mulq %rbx |
1421 addq %rax,%r13 | 1687 addq %rax,%r13 |
1422 » movq» 96(%rbp),%rax | 1688 » movq» 48(%rbp),%rax |
1423 adcq $0,%rdx | 1689 adcq $0,%rdx |
1424 addq %r13,%r12 | 1690 addq %r13,%r12 |
1425 movq %rdx,%r13 | 1691 movq %rdx,%r13 |
1426 adcq $0,%r13 | 1692 adcq $0,%r13 |
1427 | 1693 |
1428 mulq %rbx | 1694 mulq %rbx |
1429 addq %rax,%r14 | 1695 addq %rax,%r14 |
1430 » movq» 112(%rbp),%rax | 1696 » movq» 56(%rbp),%rax |
1431 adcq $0,%rdx | 1697 adcq $0,%rdx |
1432 addq %r14,%r13 | 1698 addq %r14,%r13 |
1433 movq %rdx,%r14 | 1699 movq %rdx,%r14 |
1434 adcq $0,%r14 | 1700 adcq $0,%r14 |
1435 | 1701 |
1436 mulq %rbx | 1702 mulq %rbx |
1437 movq %rsi,%rbx | 1703 movq %rsi,%rbx |
1438 addq %rax,%r15 | 1704 addq %rax,%r15 |
1439 movq 0(%rbp),%rax | 1705 movq 0(%rbp),%rax |
1440 adcq $0,%rdx | 1706 adcq $0,%rdx |
1441 addq %r15,%r14 | 1707 addq %r15,%r14 |
1442 movq %rdx,%r15 | 1708 movq %rdx,%r15 |
1443 adcq $0,%r15 | 1709 adcq $0,%r15 |
1444 | 1710 |
1445 decl %ecx | 1711 decl %ecx |
1446 jnz .L8x_reduce | 1712 jnz .L8x_reduce |
1447 | 1713 |
1448 » leaq» 128(%rbp),%rbp | 1714 » leaq» 64(%rbp),%rbp |
1449 xorq %rax,%rax | 1715 xorq %rax,%rax |
1450 movq 8+8(%rsp),%rdx | 1716 movq 8+8(%rsp),%rdx |
1451 cmpq 0+8(%rsp),%rbp | 1717 cmpq 0+8(%rsp),%rbp |
1452 jae .L8x_no_tail | 1718 jae .L8x_no_tail |
1453 | 1719 |
1454 .byte 0x66 | 1720 .byte 0x66 |
1455 addq 0(%rdi),%r8 | 1721 addq 0(%rdi),%r8 |
1456 adcq 8(%rdi),%r9 | 1722 adcq 8(%rdi),%r9 |
1457 adcq 16(%rdi),%r10 | 1723 adcq 16(%rdi),%r10 |
1458 adcq 24(%rdi),%r11 | 1724 adcq 24(%rdi),%r11 |
1459 adcq 32(%rdi),%r12 | 1725 adcq 32(%rdi),%r12 |
1460 adcq 40(%rdi),%r13 | 1726 adcq 40(%rdi),%r13 |
1461 adcq 48(%rdi),%r14 | 1727 adcq 48(%rdi),%r14 |
1462 adcq 56(%rdi),%r15 | 1728 adcq 56(%rdi),%r15 |
1463 sbbq %rsi,%rsi | 1729 sbbq %rsi,%rsi |
1464 | 1730 |
1465 movq 48+56+8(%rsp),%rbx | 1731 movq 48+56+8(%rsp),%rbx |
1466 movl $8,%ecx | 1732 movl $8,%ecx |
1467 movq 0(%rbp),%rax | 1733 movq 0(%rbp),%rax |
1468 jmp .L8x_tail | 1734 jmp .L8x_tail |
1469 | 1735 |
1470 .align 32 | 1736 .align 32 |
1471 .L8x_tail: | 1737 .L8x_tail: |
1472 mulq %rbx | 1738 mulq %rbx |
1473 addq %rax,%r8 | 1739 addq %rax,%r8 |
1474 » movq» 16(%rbp),%rax | 1740 » movq» 8(%rbp),%rax |
1475 movq %r8,(%rdi) | 1741 movq %r8,(%rdi) |
1476 movq %rdx,%r8 | 1742 movq %rdx,%r8 |
1477 adcq $0,%r8 | 1743 adcq $0,%r8 |
1478 | 1744 |
1479 mulq %rbx | 1745 mulq %rbx |
1480 addq %rax,%r9 | 1746 addq %rax,%r9 |
1481 » movq» 32(%rbp),%rax | 1747 » movq» 16(%rbp),%rax |
1482 adcq $0,%rdx | 1748 adcq $0,%rdx |
1483 addq %r9,%r8 | 1749 addq %r9,%r8 |
1484 leaq 8(%rdi),%rdi | 1750 leaq 8(%rdi),%rdi |
1485 movq %rdx,%r9 | 1751 movq %rdx,%r9 |
1486 adcq $0,%r9 | 1752 adcq $0,%r9 |
1487 | 1753 |
1488 mulq %rbx | 1754 mulq %rbx |
1489 addq %rax,%r10 | 1755 addq %rax,%r10 |
1490 » movq» 48(%rbp),%rax | 1756 » movq» 24(%rbp),%rax |
1491 adcq $0,%rdx | 1757 adcq $0,%rdx |
1492 addq %r10,%r9 | 1758 addq %r10,%r9 |
1493 movq %rdx,%r10 | 1759 movq %rdx,%r10 |
1494 adcq $0,%r10 | 1760 adcq $0,%r10 |
1495 | 1761 |
1496 mulq %rbx | 1762 mulq %rbx |
1497 addq %rax,%r11 | 1763 addq %rax,%r11 |
1498 » movq» 64(%rbp),%rax | 1764 » movq» 32(%rbp),%rax |
1499 adcq $0,%rdx | 1765 adcq $0,%rdx |
1500 addq %r11,%r10 | 1766 addq %r11,%r10 |
1501 movq %rdx,%r11 | 1767 movq %rdx,%r11 |
1502 adcq $0,%r11 | 1768 adcq $0,%r11 |
1503 | 1769 |
1504 mulq %rbx | 1770 mulq %rbx |
1505 addq %rax,%r12 | 1771 addq %rax,%r12 |
1506 » movq» 80(%rbp),%rax | 1772 » movq» 40(%rbp),%rax |
1507 adcq $0,%rdx | 1773 adcq $0,%rdx |
1508 addq %r12,%r11 | 1774 addq %r12,%r11 |
1509 movq %rdx,%r12 | 1775 movq %rdx,%r12 |
1510 adcq $0,%r12 | 1776 adcq $0,%r12 |
1511 | 1777 |
1512 mulq %rbx | 1778 mulq %rbx |
1513 addq %rax,%r13 | 1779 addq %rax,%r13 |
1514 » movq» 96(%rbp),%rax | 1780 » movq» 48(%rbp),%rax |
1515 adcq $0,%rdx | 1781 adcq $0,%rdx |
1516 addq %r13,%r12 | 1782 addq %r13,%r12 |
1517 movq %rdx,%r13 | 1783 movq %rdx,%r13 |
1518 adcq $0,%r13 | 1784 adcq $0,%r13 |
1519 | 1785 |
1520 mulq %rbx | 1786 mulq %rbx |
1521 addq %rax,%r14 | 1787 addq %rax,%r14 |
1522 » movq» 112(%rbp),%rax | 1788 » movq» 56(%rbp),%rax |
1523 adcq $0,%rdx | 1789 adcq $0,%rdx |
1524 addq %r14,%r13 | 1790 addq %r14,%r13 |
1525 movq %rdx,%r14 | 1791 movq %rdx,%r14 |
1526 adcq $0,%r14 | 1792 adcq $0,%r14 |
1527 | 1793 |
1528 mulq %rbx | 1794 mulq %rbx |
1529 movq 48-16+8(%rsp,%rcx,8),%rbx | 1795 movq 48-16+8(%rsp,%rcx,8),%rbx |
1530 addq %rax,%r15 | 1796 addq %rax,%r15 |
1531 adcq $0,%rdx | 1797 adcq $0,%rdx |
1532 addq %r15,%r14 | 1798 addq %r15,%r14 |
1533 movq 0(%rbp),%rax | 1799 movq 0(%rbp),%rax |
1534 movq %rdx,%r15 | 1800 movq %rdx,%r15 |
1535 adcq $0,%r15 | 1801 adcq $0,%r15 |
1536 | 1802 |
1537 decl %ecx | 1803 decl %ecx |
1538 jnz .L8x_tail | 1804 jnz .L8x_tail |
1539 | 1805 |
1540 » leaq» 128(%rbp),%rbp | 1806 » leaq» 64(%rbp),%rbp |
1541 movq 8+8(%rsp),%rdx | 1807 movq 8+8(%rsp),%rdx |
1542 cmpq 0+8(%rsp),%rbp | 1808 cmpq 0+8(%rsp),%rbp |
1543 jae .L8x_tail_done | 1809 jae .L8x_tail_done |
1544 | 1810 |
1545 movq 48+56+8(%rsp),%rbx | 1811 movq 48+56+8(%rsp),%rbx |
1546 negq %rsi | 1812 negq %rsi |
1547 movq 0(%rbp),%rax | 1813 movq 0(%rbp),%rax |
1548 adcq 0(%rdi),%r8 | 1814 adcq 0(%rdi),%r8 |
1549 adcq 8(%rdi),%r9 | 1815 adcq 8(%rdi),%r9 |
1550 adcq 16(%rdi),%r10 | 1816 adcq 16(%rdi),%r10 |
1551 adcq 24(%rdi),%r11 | 1817 adcq 24(%rdi),%r11 |
1552 adcq 32(%rdi),%r12 | 1818 adcq 32(%rdi),%r12 |
1553 adcq 40(%rdi),%r13 | 1819 adcq 40(%rdi),%r13 |
1554 adcq 48(%rdi),%r14 | 1820 adcq 48(%rdi),%r14 |
1555 adcq 56(%rdi),%r15 | 1821 adcq 56(%rdi),%r15 |
1556 sbbq %rsi,%rsi | 1822 sbbq %rsi,%rsi |
1557 | 1823 |
1558 movl $8,%ecx | 1824 movl $8,%ecx |
1559 jmp .L8x_tail | 1825 jmp .L8x_tail |
1560 | 1826 |
1561 .align 32 | 1827 .align 32 |
1562 .L8x_tail_done: | 1828 .L8x_tail_done: |
1563 addq (%rdx),%r8 | 1829 addq (%rdx),%r8 |
| 1830 adcq $0,%r9 |
| 1831 adcq $0,%r10 |
| 1832 adcq $0,%r11 |
| 1833 adcq $0,%r12 |
| 1834 adcq $0,%r13 |
| 1835 adcq $0,%r14 |
| 1836 adcq $0,%r15 |
| 1837 |
| 1838 |
1564 xorq %rax,%rax | 1839 xorq %rax,%rax |
1565 | 1840 |
1566 negq %rsi | 1841 negq %rsi |
1567 .L8x_no_tail: | 1842 .L8x_no_tail: |
1568 adcq 0(%rdi),%r8 | 1843 adcq 0(%rdi),%r8 |
1569 adcq 8(%rdi),%r9 | 1844 adcq 8(%rdi),%r9 |
1570 adcq 16(%rdi),%r10 | 1845 adcq 16(%rdi),%r10 |
1571 adcq 24(%rdi),%r11 | 1846 adcq 24(%rdi),%r11 |
1572 adcq 32(%rdi),%r12 | 1847 adcq 32(%rdi),%r12 |
1573 adcq 40(%rdi),%r13 | 1848 adcq 40(%rdi),%r13 |
1574 adcq 48(%rdi),%r14 | 1849 adcq 48(%rdi),%r14 |
1575 adcq 56(%rdi),%r15 | 1850 adcq 56(%rdi),%r15 |
1576 adcq $0,%rax | 1851 adcq $0,%rax |
1577 » movq» -16(%rbp),%rcx | 1852 » movq» -8(%rbp),%rcx |
1578 xorq %rsi,%rsi | 1853 xorq %rsi,%rsi |
1579 | 1854 |
1580 .byte 102,72,15,126,213 | 1855 .byte 102,72,15,126,213 |
1581 | 1856 |
1582 movq %r8,0(%rdi) | 1857 movq %r8,0(%rdi) |
1583 movq %r9,8(%rdi) | 1858 movq %r9,8(%rdi) |
1584 .byte 102,73,15,126,217 | 1859 .byte 102,73,15,126,217 |
1585 movq %r10,16(%rdi) | 1860 movq %r10,16(%rdi) |
1586 movq %r11,24(%rdi) | 1861 movq %r11,24(%rdi) |
1587 movq %r12,32(%rdi) | 1862 movq %r12,32(%rdi) |
1588 movq %r13,40(%rdi) | 1863 movq %r13,40(%rdi) |
1589 movq %r14,48(%rdi) | 1864 movq %r14,48(%rdi) |
1590 movq %r15,56(%rdi) | 1865 movq %r15,56(%rdi) |
1591 leaq 64(%rdi),%rdi | 1866 leaq 64(%rdi),%rdi |
1592 | 1867 |
1593 cmpq %rdx,%rdi | 1868 cmpq %rdx,%rdi |
1594 jb .L8x_reduction_loop | 1869 jb .L8x_reduction_loop |
| 1870 .byte 0xf3,0xc3 |
| 1871 .size bn_sqr8x_internal,.-bn_sqr8x_internal |
| 1872 .type __bn_post4x_internal,@function |
| 1873 .align 32 |
| 1874 __bn_post4x_internal: |
| 1875 movq 0(%rbp),%r12 |
| 1876 leaq (%rdi,%r9,1),%rbx |
| 1877 movq %r9,%rcx |
| 1878 .byte 102,72,15,126,207 |
| 1879 negq %rax |
| 1880 .byte 102,72,15,126,206 |
| 1881 sarq $3+2,%rcx |
| 1882 decq %r12 |
| 1883 xorq %r10,%r10 |
| 1884 movq 8(%rbp),%r13 |
| 1885 movq 16(%rbp),%r14 |
| 1886 movq 24(%rbp),%r15 |
| 1887 jmp .Lsqr4x_sub_entry |
1595 | 1888 |
1596 » subq» %r15,%rcx | 1889 .align» 16 |
1597 » leaq» (%rdi,%r9,1),%rbx | 1890 .Lsqr4x_sub: |
1598 » adcq» %rsi,%rsi | 1891 » movq» 0(%rbp),%r12 |
1599 » movq» %r9,%rcx | 1892 » movq» 8(%rbp),%r13 |
1600 » orq» %rsi,%rax | 1893 » movq» 16(%rbp),%r14 |
1601 .byte» 102,72,15,126,207 | 1894 » movq» 24(%rbp),%r15 |
1602 » xorq» $1,%rax | 1895 .Lsqr4x_sub_entry: |
1603 .byte» 102,72,15,126,206 | 1896 » leaq» 32(%rbp),%rbp |
1604 » leaq» (%rbp,%rax,8),%rbp | 1897 » notq» %r12 |
1605 » sarq» $3+2,%rcx | 1898 » notq» %r13 |
1606 » jmp» .Lsqr4x_sub | 1899 » notq» %r14 |
| 1900 » notq» %r15 |
| 1901 » andq» %rax,%r12 |
| 1902 » andq» %rax,%r13 |
| 1903 » andq» %rax,%r14 |
| 1904 » andq» %rax,%r15 |
1607 | 1905 |
1608 .align» 32 | 1906 » negq» %r10 |
1609 .Lsqr4x_sub: | 1907 » adcq» 0(%rbx),%r12 |
1610 .byte» 0x66 | 1908 » adcq» 8(%rbx),%r13 |
1611 » movq» 0(%rbx),%r12 | 1909 » adcq» 16(%rbx),%r14 |
1612 » movq» 8(%rbx),%r13 | 1910 » adcq» 24(%rbx),%r15 |
1613 » sbbq» 0(%rbp),%r12 | 1911 » movq» %r12,0(%rdi) |
1614 » movq» 16(%rbx),%r14 | |
1615 » sbbq» 16(%rbp),%r13 | |
1616 » movq» 24(%rbx),%r15 | |
1617 leaq 32(%rbx),%rbx | 1912 leaq 32(%rbx),%rbx |
1618 sbbq 32(%rbp),%r14 | |
1619 movq %r12,0(%rdi) | |
1620 sbbq 48(%rbp),%r15 | |
1621 leaq 64(%rbp),%rbp | |
1622 movq %r13,8(%rdi) | 1913 movq %r13,8(%rdi) |
| 1914 sbbq %r10,%r10 |
1623 movq %r14,16(%rdi) | 1915 movq %r14,16(%rdi) |
1624 movq %r15,24(%rdi) | 1916 movq %r15,24(%rdi) |
1625 leaq 32(%rdi),%rdi | 1917 leaq 32(%rdi),%rdi |
1626 | 1918 |
1627 incq %rcx | 1919 incq %rcx |
1628 jnz .Lsqr4x_sub | 1920 jnz .Lsqr4x_sub |
| 1921 |
1629 movq %r9,%r10 | 1922 movq %r9,%r10 |
1630 negq %r9 | 1923 negq %r9 |
1631 .byte 0xf3,0xc3 | 1924 .byte 0xf3,0xc3 |
1632 .size» bn_sqr8x_internal,.-bn_sqr8x_internal | 1925 .size» __bn_post4x_internal,.-__bn_post4x_internal |
1633 .globl bn_from_montgomery | 1926 .globl bn_from_montgomery |
1634 .hidden bn_from_montgomery | 1927 .hidden bn_from_montgomery |
1635 .type bn_from_montgomery,@function | 1928 .type bn_from_montgomery,@function |
1636 .align 32 | 1929 .align 32 |
1637 bn_from_montgomery: | 1930 bn_from_montgomery: |
1638 testl $7,%r9d | 1931 testl $7,%r9d |
1639 jz bn_from_mont8x | 1932 jz bn_from_mont8x |
1640 xorl %eax,%eax | 1933 xorl %eax,%eax |
1641 .byte 0xf3,0xc3 | 1934 .byte 0xf3,0xc3 |
1642 .size bn_from_montgomery,.-bn_from_montgomery | 1935 .size bn_from_montgomery,.-bn_from_montgomery |
1643 | 1936 |
1644 .type bn_from_mont8x,@function | 1937 .type bn_from_mont8x,@function |
1645 .align 32 | 1938 .align 32 |
1646 bn_from_mont8x: | 1939 bn_from_mont8x: |
1647 .byte 0x67 | 1940 .byte 0x67 |
1648 movq %rsp,%rax | 1941 movq %rsp,%rax |
1649 pushq %rbx | 1942 pushq %rbx |
1650 pushq %rbp | 1943 pushq %rbp |
1651 pushq %r12 | 1944 pushq %r12 |
1652 pushq %r13 | 1945 pushq %r13 |
1653 pushq %r14 | 1946 pushq %r14 |
1654 pushq %r15 | 1947 pushq %r15 |
1655 .byte» 0x67 | 1948 |
1656 » movl» %r9d,%r10d | |
1657 shll $3,%r9d | 1949 shll $3,%r9d |
1658 » shll» $3+2,%r10d | 1950 » leaq» (%r9,%r9,2),%r10 |
1659 negq %r9 | 1951 negq %r9 |
1660 movq (%r8),%r8 | 1952 movq (%r8),%r8 |
1661 | 1953 |
1662 | 1954 |
1663 | 1955 |
1664 | 1956 |
1665 | 1957 |
1666 | 1958 |
1667 | 1959 |
1668 » leaq» -64(%rsp,%r9,2),%r11 | 1960 |
1669 » subq» %rsi,%r11 | 1961 » leaq» -320(%rsp,%r9,2),%r11 |
| 1962 » subq» %rdi,%r11 |
1670 andq $4095,%r11 | 1963 andq $4095,%r11 |
1671 cmpq %r11,%r10 | 1964 cmpq %r11,%r10 |
1672 jb .Lfrom_sp_alt | 1965 jb .Lfrom_sp_alt |
1673 subq %r11,%rsp | 1966 subq %r11,%rsp |
1674 » leaq» -64(%rsp,%r9,2),%rsp | 1967 » leaq» -320(%rsp,%r9,2),%rsp |
1675 jmp .Lfrom_sp_done | 1968 jmp .Lfrom_sp_done |
1676 | 1969 |
1677 .align 32 | 1970 .align 32 |
1678 .Lfrom_sp_alt: | 1971 .Lfrom_sp_alt: |
1679 » leaq» 4096-64(,%r9,2),%r10 | 1972 » leaq» 4096-320(,%r9,2),%r10 |
1680 » leaq» -64(%rsp,%r9,2),%rsp | 1973 » leaq» -320(%rsp,%r9,2),%rsp |
1681 subq %r10,%r11 | 1974 subq %r10,%r11 |
1682 movq $0,%r10 | 1975 movq $0,%r10 |
1683 cmovcq %r10,%r11 | 1976 cmovcq %r10,%r11 |
1684 subq %r11,%rsp | 1977 subq %r11,%rsp |
1685 .Lfrom_sp_done: | 1978 .Lfrom_sp_done: |
1686 andq $-64,%rsp | 1979 andq $-64,%rsp |
1687 movq %r9,%r10 | 1980 movq %r9,%r10 |
1688 negq %r9 | 1981 negq %r9 |
1689 | 1982 |
1690 | 1983 |
(...skipping 30 matching lines...) Expand all Loading... |
1721 movdqa %xmm4,48(%rax) | 2014 movdqa %xmm4,48(%rax) |
1722 leaq 64(%rax),%rax | 2015 leaq 64(%rax),%rax |
1723 subq $64,%r11 | 2016 subq $64,%r11 |
1724 jnz .Lmul_by_1 | 2017 jnz .Lmul_by_1 |
1725 | 2018 |
1726 .byte 102,72,15,110,207 | 2019 .byte 102,72,15,110,207 |
1727 .byte 102,72,15,110,209 | 2020 .byte 102,72,15,110,209 |
1728 .byte 0x67 | 2021 .byte 0x67 |
1729 movq %rcx,%rbp | 2022 movq %rcx,%rbp |
1730 .byte 102,73,15,110,218 | 2023 .byte 102,73,15,110,218 |
1731 » call» sqr8x_reduction | 2024 » call» __bn_sqr8x_reduction |
| 2025 » call» __bn_post4x_internal |
1732 | 2026 |
1733 pxor %xmm0,%xmm0 | 2027 pxor %xmm0,%xmm0 |
1734 leaq 48(%rsp),%rax | 2028 leaq 48(%rsp),%rax |
1735 movq 40(%rsp),%rsi | 2029 movq 40(%rsp),%rsi |
1736 jmp .Lfrom_mont_zero | 2030 jmp .Lfrom_mont_zero |
1737 | 2031 |
1738 .align 32 | 2032 .align 32 |
1739 .Lfrom_mont_zero: | 2033 .Lfrom_mont_zero: |
1740 movdqa %xmm0,0(%rax) | 2034 movdqa %xmm0,0(%rax) |
1741 movdqa %xmm0,16(%rax) | 2035 movdqa %xmm0,16(%rax) |
(...skipping 29 matching lines...) Expand all Loading... |
1771 leaq 256(%rdx),%rdx | 2065 leaq 256(%rdx),%rdx |
1772 subl $1,%esi | 2066 subl $1,%esi |
1773 jnz .Lscatter | 2067 jnz .Lscatter |
1774 .Lscatter_epilogue: | 2068 .Lscatter_epilogue: |
1775 .byte 0xf3,0xc3 | 2069 .byte 0xf3,0xc3 |
1776 .size bn_scatter5,.-bn_scatter5 | 2070 .size bn_scatter5,.-bn_scatter5 |
1777 | 2071 |
1778 .globl bn_gather5 | 2072 .globl bn_gather5 |
1779 .hidden bn_gather5 | 2073 .hidden bn_gather5 |
1780 .type bn_gather5,@function | 2074 .type bn_gather5,@function |
1781 .align» 16 | 2075 .align» 32 |
1782 bn_gather5: | 2076 bn_gather5: |
1783 » movl» %ecx,%r11d | 2077 .LSEH_begin_bn_gather5: |
1784 » shrl» $3,%ecx | 2078 |
1785 » andq» $7,%r11 | 2079 .byte» 0x4c,0x8d,0x14,0x24 |
1786 » notl» %ecx | 2080 .byte» 0x48,0x81,0xec,0x08,0x01,0x00,0x00 |
1787 » leaq» .Lmagic_masks(%rip),%rax | 2081 » leaq» .Linc(%rip),%rax |
1788 » andl» $3,%ecx | 2082 » andq» $-16,%rsp |
1789 » leaq» 128(%rdx,%r11,8),%rdx | 2083 |
1790 » movq» 0(%rax,%rcx,8),%xmm4 | 2084 » movd» %ecx,%xmm5 |
1791 » movq» 8(%rax,%rcx,8),%xmm5 | 2085 » movdqa» 0(%rax),%xmm0 |
1792 » movq» 16(%rax,%rcx,8),%xmm6 | 2086 » movdqa» 16(%rax),%xmm1 |
1793 » movq» 24(%rax,%rcx,8),%xmm7 | 2087 » leaq» 128(%rdx),%r11 |
| 2088 » leaq» 128(%rsp),%rax |
| 2089 |
| 2090 » pshufd» $0,%xmm5,%xmm5 |
| 2091 » movdqa» %xmm1,%xmm4 |
| 2092 » movdqa» %xmm1,%xmm2 |
| 2093 » paddd» %xmm0,%xmm1 |
| 2094 » pcmpeqd»%xmm5,%xmm0 |
| 2095 » movdqa» %xmm4,%xmm3 |
| 2096 |
| 2097 » paddd» %xmm1,%xmm2 |
| 2098 » pcmpeqd»%xmm5,%xmm1 |
| 2099 » movdqa» %xmm0,-128(%rax) |
| 2100 » movdqa» %xmm4,%xmm0 |
| 2101 |
| 2102 » paddd» %xmm2,%xmm3 |
| 2103 » pcmpeqd»%xmm5,%xmm2 |
| 2104 » movdqa» %xmm1,-112(%rax) |
| 2105 » movdqa» %xmm4,%xmm1 |
| 2106 |
| 2107 » paddd» %xmm3,%xmm0 |
| 2108 » pcmpeqd»%xmm5,%xmm3 |
| 2109 » movdqa» %xmm2,-96(%rax) |
| 2110 » movdqa» %xmm4,%xmm2 |
| 2111 » paddd» %xmm0,%xmm1 |
| 2112 » pcmpeqd»%xmm5,%xmm0 |
| 2113 » movdqa» %xmm3,-80(%rax) |
| 2114 » movdqa» %xmm4,%xmm3 |
| 2115 |
| 2116 » paddd» %xmm1,%xmm2 |
| 2117 » pcmpeqd»%xmm5,%xmm1 |
| 2118 » movdqa» %xmm0,-64(%rax) |
| 2119 » movdqa» %xmm4,%xmm0 |
| 2120 |
| 2121 » paddd» %xmm2,%xmm3 |
| 2122 » pcmpeqd»%xmm5,%xmm2 |
| 2123 » movdqa» %xmm1,-48(%rax) |
| 2124 » movdqa» %xmm4,%xmm1 |
| 2125 |
| 2126 » paddd» %xmm3,%xmm0 |
| 2127 » pcmpeqd»%xmm5,%xmm3 |
| 2128 » movdqa» %xmm2,-32(%rax) |
| 2129 » movdqa» %xmm4,%xmm2 |
| 2130 » paddd» %xmm0,%xmm1 |
| 2131 » pcmpeqd»%xmm5,%xmm0 |
| 2132 » movdqa» %xmm3,-16(%rax) |
| 2133 » movdqa» %xmm4,%xmm3 |
| 2134 |
| 2135 » paddd» %xmm1,%xmm2 |
| 2136 » pcmpeqd»%xmm5,%xmm1 |
| 2137 » movdqa» %xmm0,0(%rax) |
| 2138 » movdqa» %xmm4,%xmm0 |
| 2139 |
| 2140 » paddd» %xmm2,%xmm3 |
| 2141 » pcmpeqd»%xmm5,%xmm2 |
| 2142 » movdqa» %xmm1,16(%rax) |
| 2143 » movdqa» %xmm4,%xmm1 |
| 2144 |
| 2145 » paddd» %xmm3,%xmm0 |
| 2146 » pcmpeqd»%xmm5,%xmm3 |
| 2147 » movdqa» %xmm2,32(%rax) |
| 2148 » movdqa» %xmm4,%xmm2 |
| 2149 » paddd» %xmm0,%xmm1 |
| 2150 » pcmpeqd»%xmm5,%xmm0 |
| 2151 » movdqa» %xmm3,48(%rax) |
| 2152 » movdqa» %xmm4,%xmm3 |
| 2153 |
| 2154 » paddd» %xmm1,%xmm2 |
| 2155 » pcmpeqd»%xmm5,%xmm1 |
| 2156 » movdqa» %xmm0,64(%rax) |
| 2157 » movdqa» %xmm4,%xmm0 |
| 2158 |
| 2159 » paddd» %xmm2,%xmm3 |
| 2160 » pcmpeqd»%xmm5,%xmm2 |
| 2161 » movdqa» %xmm1,80(%rax) |
| 2162 » movdqa» %xmm4,%xmm1 |
| 2163 |
| 2164 » paddd» %xmm3,%xmm0 |
| 2165 » pcmpeqd»%xmm5,%xmm3 |
| 2166 » movdqa» %xmm2,96(%rax) |
| 2167 » movdqa» %xmm4,%xmm2 |
| 2168 » movdqa» %xmm3,112(%rax) |
1794 jmp .Lgather | 2169 jmp .Lgather |
1795 .align» 16 | 2170 |
| 2171 .align» 32 |
1796 .Lgather: | 2172 .Lgather: |
1797 » movq» -128(%rdx),%xmm0 | 2173 » pxor» %xmm4,%xmm4 |
1798 » movq» -64(%rdx),%xmm1 | 2174 » pxor» %xmm5,%xmm5 |
1799 » pand» %xmm4,%xmm0 | 2175 » movdqa» -128(%r11),%xmm0 |
1800 » movq» 0(%rdx),%xmm2 | 2176 » movdqa» -112(%r11),%xmm1 |
1801 » pand» %xmm5,%xmm1 | 2177 » movdqa» -96(%r11),%xmm2 |
1802 » movq» 64(%rdx),%xmm3 | 2178 » pand» -128(%rax),%xmm0 |
1803 » pand» %xmm6,%xmm2 | 2179 » movdqa» -80(%r11),%xmm3 |
1804 » por» %xmm1,%xmm0 | 2180 » pand» -112(%rax),%xmm1 |
1805 » pand» %xmm7,%xmm3 | 2181 » por» %xmm0,%xmm4 |
1806 .byte» 0x67,0x67 | 2182 » pand» -96(%rax),%xmm2 |
1807 » por» %xmm2,%xmm0 | 2183 » por» %xmm1,%xmm5 |
1808 » leaq» 256(%rdx),%rdx | 2184 » pand» -80(%rax),%xmm3 |
1809 » por» %xmm3,%xmm0 | 2185 » por» %xmm2,%xmm4 |
1810 | 2186 » por» %xmm3,%xmm5 |
| 2187 » movdqa» -64(%r11),%xmm0 |
| 2188 » movdqa» -48(%r11),%xmm1 |
| 2189 » movdqa» -32(%r11),%xmm2 |
| 2190 » pand» -64(%rax),%xmm0 |
| 2191 » movdqa» -16(%r11),%xmm3 |
| 2192 » pand» -48(%rax),%xmm1 |
| 2193 » por» %xmm0,%xmm4 |
| 2194 » pand» -32(%rax),%xmm2 |
| 2195 » por» %xmm1,%xmm5 |
| 2196 » pand» -16(%rax),%xmm3 |
| 2197 » por» %xmm2,%xmm4 |
| 2198 » por» %xmm3,%xmm5 |
| 2199 » movdqa» 0(%r11),%xmm0 |
| 2200 » movdqa» 16(%r11),%xmm1 |
| 2201 » movdqa» 32(%r11),%xmm2 |
| 2202 » pand» 0(%rax),%xmm0 |
| 2203 » movdqa» 48(%r11),%xmm3 |
| 2204 » pand» 16(%rax),%xmm1 |
| 2205 » por» %xmm0,%xmm4 |
| 2206 » pand» 32(%rax),%xmm2 |
| 2207 » por» %xmm1,%xmm5 |
| 2208 » pand» 48(%rax),%xmm3 |
| 2209 » por» %xmm2,%xmm4 |
| 2210 » por» %xmm3,%xmm5 |
| 2211 » movdqa» 64(%r11),%xmm0 |
| 2212 » movdqa» 80(%r11),%xmm1 |
| 2213 » movdqa» 96(%r11),%xmm2 |
| 2214 » pand» 64(%rax),%xmm0 |
| 2215 » movdqa» 112(%r11),%xmm3 |
| 2216 » pand» 80(%rax),%xmm1 |
| 2217 » por» %xmm0,%xmm4 |
| 2218 » pand» 96(%rax),%xmm2 |
| 2219 » por» %xmm1,%xmm5 |
| 2220 » pand» 112(%rax),%xmm3 |
| 2221 » por» %xmm2,%xmm4 |
| 2222 » por» %xmm3,%xmm5 |
| 2223 » por» %xmm5,%xmm4 |
| 2224 » leaq» 256(%r11),%r11 |
| 2225 » pshufd» $0x4e,%xmm4,%xmm0 |
| 2226 » por» %xmm4,%xmm0 |
1811 movq %xmm0,(%rdi) | 2227 movq %xmm0,(%rdi) |
1812 leaq 8(%rdi),%rdi | 2228 leaq 8(%rdi),%rdi |
1813 subl $1,%esi | 2229 subl $1,%esi |
1814 jnz .Lgather | 2230 jnz .Lgather |
| 2231 |
| 2232 leaq (%r10),%rsp |
1815 .byte 0xf3,0xc3 | 2233 .byte 0xf3,0xc3 |
1816 .LSEH_end_bn_gather5: | 2234 .LSEH_end_bn_gather5: |
1817 .size bn_gather5,.-bn_gather5 | 2235 .size bn_gather5,.-bn_gather5 |
1818 .align 64 | 2236 .align 64 |
1819 .Lmagic_masks: | 2237 .Linc: |
1820 .long» 0,0, 0,0, 0,0, -1,-1 | 2238 .long» 0,0, 1,1 |
1821 .long» 0,0, 0,0, 0,0, 0,0 | 2239 .long» 2,2, 2,2 |
1822 .byte 77,111,110,116,103,111,109,101,114,121,32,77,117,108,116,105,112,108,105
,99,97,116,105,111,110,32,119,105,116,104,32,115,99,97,116,116,101,114,47,103,97
,116,104,101,114,32,102,111,114,32,120,56,54,95,54,52,44,32,67,82,89,80,84,79,71
,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,1
11,114,103,62,0 | 2240 .byte 77,111,110,116,103,111,109,101,114,121,32,77,117,108,116,105,112,108,105
,99,97,116,105,111,110,32,119,105,116,104,32,115,99,97,116,116,101,114,47,103,97
,116,104,101,114,32,102,111,114,32,120,56,54,95,54,52,44,32,67,82,89,80,84,79,71
,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,1
11,114,103,62,0 |
1823 #endif | 2241 #endif |
OLD | NEW |