| OLD | NEW |
| 1 #if defined(__x86_64__) | 1 #if defined(__x86_64__) |
| 2 .text | 2 .text |
| 3 | 3 |
| 4 .extern OPENSSL_ia32cap_P | 4 .extern OPENSSL_ia32cap_P |
| 5 .hidden OPENSSL_ia32cap_P | 5 .hidden OPENSSL_ia32cap_P |
| 6 | 6 |
| 7 .globl bn_mul_mont_gather5 | 7 .globl bn_mul_mont_gather5 |
| 8 .hidden bn_mul_mont_gather5 | 8 .hidden bn_mul_mont_gather5 |
| 9 .type bn_mul_mont_gather5,@function | 9 .type bn_mul_mont_gather5,@function |
| 10 .align 64 | 10 .align 64 |
| 11 bn_mul_mont_gather5: | 11 bn_mul_mont_gather5: |
| 12 testl $7,%r9d | 12 testl $7,%r9d |
| 13 jnz .Lmul_enter | 13 jnz .Lmul_enter |
| 14 jmp .Lmul4x_enter | 14 jmp .Lmul4x_enter |
| 15 | 15 |
| 16 .align 16 | 16 .align 16 |
| 17 .Lmul_enter: | 17 .Lmul_enter: |
| 18 movl %r9d,%r9d | 18 movl %r9d,%r9d |
| 19 movq %rsp,%rax | 19 movq %rsp,%rax |
| 20 » movl» 8(%rsp),%r10d | 20 » movd» 8(%rsp),%xmm5 |
| 21 » leaq» .Linc(%rip),%r10 |
| 21 pushq %rbx | 22 pushq %rbx |
| 22 pushq %rbp | 23 pushq %rbp |
| 23 pushq %r12 | 24 pushq %r12 |
| 24 pushq %r13 | 25 pushq %r13 |
| 25 pushq %r14 | 26 pushq %r14 |
| 26 pushq %r15 | 27 pushq %r15 |
| 28 |
| 27 leaq 2(%r9),%r11 | 29 leaq 2(%r9),%r11 |
| 28 negq %r11 | 30 negq %r11 |
| 29 » leaq» (%rsp,%r11,8),%rsp | 31 » leaq» -264(%rsp,%r11,8),%rsp |
| 30 andq $-1024,%rsp | 32 andq $-1024,%rsp |
| 31 | 33 |
| 32 movq %rax,8(%rsp,%r9,8) | 34 movq %rax,8(%rsp,%r9,8) |
| 33 .Lmul_body: | 35 .Lmul_body: |
| 34 » movq» %rdx,%r12 | 36 » leaq» 128(%rdx),%r12 |
| 35 » movq» %r10,%r11 | 37 » movdqa» 0(%r10),%xmm0 |
| 36 » shrq» $3,%r10 | 38 » movdqa» 16(%r10),%xmm1 |
| 37 » andq» $7,%r11 | 39 » leaq» 24-112(%rsp,%r9,8),%r10 |
| 38 » notq» %r10 | 40 » andq» $-16,%r10 |
| 39 » leaq» .Lmagic_masks(%rip),%rax | |
| 40 » andq» $3,%r10 | |
| 41 » leaq» 96(%r12,%r11,8),%r12 | |
| 42 » movq» 0(%rax,%r10,8),%xmm4 | |
| 43 » movq» 8(%rax,%r10,8),%xmm5 | |
| 44 » movq» 16(%rax,%r10,8),%xmm6 | |
| 45 » movq» 24(%rax,%r10,8),%xmm7 | |
| 46 | 41 |
| 47 » movq» -96(%r12),%xmm0 | 42 » pshufd» $0,%xmm5,%xmm5 |
| 48 » movq» -32(%r12),%xmm1 | 43 » movdqa» %xmm1,%xmm4 |
| 49 » pand» %xmm4,%xmm0 | 44 » movdqa» %xmm1,%xmm2 |
| 50 » movq» 32(%r12),%xmm2 | 45 » paddd» %xmm0,%xmm1 |
| 51 » pand» %xmm5,%xmm1 | 46 » pcmpeqd»%xmm5,%xmm0 |
| 52 » movq» 96(%r12),%xmm3 | 47 .byte» 0x67 |
| 53 » pand» %xmm6,%xmm2 | 48 » movdqa» %xmm4,%xmm3 |
| 49 » paddd» %xmm1,%xmm2 |
| 50 » pcmpeqd»%xmm5,%xmm1 |
| 51 » movdqa» %xmm0,112(%r10) |
| 52 » movdqa» %xmm4,%xmm0 |
| 53 |
| 54 » paddd» %xmm2,%xmm3 |
| 55 » pcmpeqd»%xmm5,%xmm2 |
| 56 » movdqa» %xmm1,128(%r10) |
| 57 » movdqa» %xmm4,%xmm1 |
| 58 |
| 59 » paddd» %xmm3,%xmm0 |
| 60 » pcmpeqd»%xmm5,%xmm3 |
| 61 » movdqa» %xmm2,144(%r10) |
| 62 » movdqa» %xmm4,%xmm2 |
| 63 |
| 64 » paddd» %xmm0,%xmm1 |
| 65 » pcmpeqd»%xmm5,%xmm0 |
| 66 » movdqa» %xmm3,160(%r10) |
| 67 » movdqa» %xmm4,%xmm3 |
| 68 » paddd» %xmm1,%xmm2 |
| 69 » pcmpeqd»%xmm5,%xmm1 |
| 70 » movdqa» %xmm0,176(%r10) |
| 71 » movdqa» %xmm4,%xmm0 |
| 72 |
| 73 » paddd» %xmm2,%xmm3 |
| 74 » pcmpeqd»%xmm5,%xmm2 |
| 75 » movdqa» %xmm1,192(%r10) |
| 76 » movdqa» %xmm4,%xmm1 |
| 77 |
| 78 » paddd» %xmm3,%xmm0 |
| 79 » pcmpeqd»%xmm5,%xmm3 |
| 80 » movdqa» %xmm2,208(%r10) |
| 81 » movdqa» %xmm4,%xmm2 |
| 82 |
| 83 » paddd» %xmm0,%xmm1 |
| 84 » pcmpeqd»%xmm5,%xmm0 |
| 85 » movdqa» %xmm3,224(%r10) |
| 86 » movdqa» %xmm4,%xmm3 |
| 87 » paddd» %xmm1,%xmm2 |
| 88 » pcmpeqd»%xmm5,%xmm1 |
| 89 » movdqa» %xmm0,240(%r10) |
| 90 » movdqa» %xmm4,%xmm0 |
| 91 |
| 92 » paddd» %xmm2,%xmm3 |
| 93 » pcmpeqd»%xmm5,%xmm2 |
| 94 » movdqa» %xmm1,256(%r10) |
| 95 » movdqa» %xmm4,%xmm1 |
| 96 |
| 97 » paddd» %xmm3,%xmm0 |
| 98 » pcmpeqd»%xmm5,%xmm3 |
| 99 » movdqa» %xmm2,272(%r10) |
| 100 » movdqa» %xmm4,%xmm2 |
| 101 |
| 102 » paddd» %xmm0,%xmm1 |
| 103 » pcmpeqd»%xmm5,%xmm0 |
| 104 » movdqa» %xmm3,288(%r10) |
| 105 » movdqa» %xmm4,%xmm3 |
| 106 » paddd» %xmm1,%xmm2 |
| 107 » pcmpeqd»%xmm5,%xmm1 |
| 108 » movdqa» %xmm0,304(%r10) |
| 109 |
| 110 » paddd» %xmm2,%xmm3 |
| 111 .byte» 0x67 |
| 112 » pcmpeqd»%xmm5,%xmm2 |
| 113 » movdqa» %xmm1,320(%r10) |
| 114 |
| 115 » pcmpeqd»%xmm5,%xmm3 |
| 116 » movdqa» %xmm2,336(%r10) |
| 117 » pand» 64(%r12),%xmm0 |
| 118 |
| 119 » pand» 80(%r12),%xmm1 |
| 120 » pand» 96(%r12),%xmm2 |
| 121 » movdqa» %xmm3,352(%r10) |
| 122 » pand» 112(%r12),%xmm3 |
| 123 » por» %xmm2,%xmm0 |
| 124 » por» %xmm3,%xmm1 |
| 125 » movdqa» -128(%r12),%xmm4 |
| 126 » movdqa» -112(%r12),%xmm5 |
| 127 » movdqa» -96(%r12),%xmm2 |
| 128 » pand» 112(%r10),%xmm4 |
| 129 » movdqa» -80(%r12),%xmm3 |
| 130 » pand» 128(%r10),%xmm5 |
| 131 » por» %xmm4,%xmm0 |
| 132 » pand» 144(%r10),%xmm2 |
| 133 » por» %xmm5,%xmm1 |
| 134 » pand» 160(%r10),%xmm3 |
| 135 » por» %xmm2,%xmm0 |
| 136 » por» %xmm3,%xmm1 |
| 137 » movdqa» -64(%r12),%xmm4 |
| 138 » movdqa» -48(%r12),%xmm5 |
| 139 » movdqa» -32(%r12),%xmm2 |
| 140 » pand» 176(%r10),%xmm4 |
| 141 » movdqa» -16(%r12),%xmm3 |
| 142 » pand» 192(%r10),%xmm5 |
| 143 » por» %xmm4,%xmm0 |
| 144 » pand» 208(%r10),%xmm2 |
| 145 » por» %xmm5,%xmm1 |
| 146 » pand» 224(%r10),%xmm3 |
| 147 » por» %xmm2,%xmm0 |
| 148 » por» %xmm3,%xmm1 |
| 149 » movdqa» 0(%r12),%xmm4 |
| 150 » movdqa» 16(%r12),%xmm5 |
| 151 » movdqa» 32(%r12),%xmm2 |
| 152 » pand» 240(%r10),%xmm4 |
| 153 » movdqa» 48(%r12),%xmm3 |
| 154 » pand» 256(%r10),%xmm5 |
| 155 » por» %xmm4,%xmm0 |
| 156 » pand» 272(%r10),%xmm2 |
| 157 » por» %xmm5,%xmm1 |
| 158 » pand» 288(%r10),%xmm3 |
| 159 » por» %xmm2,%xmm0 |
| 160 » por» %xmm3,%xmm1 |
| 54 por %xmm1,%xmm0 | 161 por %xmm1,%xmm0 |
| 55 » pand» %xmm7,%xmm3 | 162 » pshufd» $0x4e,%xmm0,%xmm1 |
| 56 » por» %xmm2,%xmm0 | 163 » por» %xmm1,%xmm0 |
| 57 leaq 256(%r12),%r12 | 164 leaq 256(%r12),%r12 |
| 58 por %xmm3,%xmm0 | |
| 59 | |
| 60 .byte 102,72,15,126,195 | 165 .byte 102,72,15,126,195 |
| 61 | 166 |
| 62 movq (%r8),%r8 | 167 movq (%r8),%r8 |
| 63 movq (%rsi),%rax | 168 movq (%rsi),%rax |
| 64 | 169 |
| 65 xorq %r14,%r14 | 170 xorq %r14,%r14 |
| 66 xorq %r15,%r15 | 171 xorq %r15,%r15 |
| 67 | 172 |
| 68 movq -96(%r12),%xmm0 | |
| 69 movq -32(%r12),%xmm1 | |
| 70 pand %xmm4,%xmm0 | |
| 71 movq 32(%r12),%xmm2 | |
| 72 pand %xmm5,%xmm1 | |
| 73 | |
| 74 movq %r8,%rbp | 173 movq %r8,%rbp |
| 75 mulq %rbx | 174 mulq %rbx |
| 76 movq %rax,%r10 | 175 movq %rax,%r10 |
| 77 movq (%rcx),%rax | 176 movq (%rcx),%rax |
| 78 | 177 |
| 79 movq 96(%r12),%xmm3 | |
| 80 pand %xmm6,%xmm2 | |
| 81 por %xmm1,%xmm0 | |
| 82 pand %xmm7,%xmm3 | |
| 83 | |
| 84 imulq %r10,%rbp | 178 imulq %r10,%rbp |
| 85 movq %rdx,%r11 | 179 movq %rdx,%r11 |
| 86 | 180 |
| 87 por %xmm2,%xmm0 | |
| 88 leaq 256(%r12),%r12 | |
| 89 por %xmm3,%xmm0 | |
| 90 | |
| 91 mulq %rbp | 181 mulq %rbp |
| 92 addq %rax,%r10 | 182 addq %rax,%r10 |
| 93 movq 8(%rsi),%rax | 183 movq 8(%rsi),%rax |
| 94 adcq $0,%rdx | 184 adcq $0,%rdx |
| 95 movq %rdx,%r13 | 185 movq %rdx,%r13 |
| 96 | 186 |
| 97 leaq 1(%r15),%r15 | 187 leaq 1(%r15),%r15 |
| 98 jmp .L1st_enter | 188 jmp .L1st_enter |
| 99 | 189 |
| 100 .align 16 | 190 .align 16 |
| (...skipping 12 matching lines...) Expand all Loading... |
| 113 addq %rax,%r11 | 203 addq %rax,%r11 |
| 114 movq (%rcx,%r15,8),%rax | 204 movq (%rcx,%r15,8),%rax |
| 115 adcq $0,%rdx | 205 adcq $0,%rdx |
| 116 leaq 1(%r15),%r15 | 206 leaq 1(%r15),%r15 |
| 117 movq %rdx,%r10 | 207 movq %rdx,%r10 |
| 118 | 208 |
| 119 mulq %rbp | 209 mulq %rbp |
| 120 cmpq %r9,%r15 | 210 cmpq %r9,%r15 |
| 121 jne .L1st | 211 jne .L1st |
| 122 | 212 |
| 123 .byte 102,72,15,126,195 | |
| 124 | 213 |
| 125 addq %rax,%r13 | 214 addq %rax,%r13 |
| 126 movq (%rsi),%rax | |
| 127 adcq $0,%rdx | 215 adcq $0,%rdx |
| 128 addq %r11,%r13 | 216 addq %r11,%r13 |
| 129 adcq $0,%rdx | 217 adcq $0,%rdx |
| 130 » movq» %r13,-16(%rsp,%r15,8) | 218 » movq» %r13,-16(%rsp,%r9,8) |
| 131 movq %rdx,%r13 | 219 movq %rdx,%r13 |
| 132 movq %r10,%r11 | 220 movq %r10,%r11 |
| 133 | 221 |
| 134 xorq %rdx,%rdx | 222 xorq %rdx,%rdx |
| 135 addq %r11,%r13 | 223 addq %r11,%r13 |
| 136 adcq $0,%rdx | 224 adcq $0,%rdx |
| 137 movq %r13,-8(%rsp,%r9,8) | 225 movq %r13,-8(%rsp,%r9,8) |
| 138 movq %rdx,(%rsp,%r9,8) | 226 movq %rdx,(%rsp,%r9,8) |
| 139 | 227 |
| 140 leaq 1(%r14),%r14 | 228 leaq 1(%r14),%r14 |
| 141 jmp .Louter | 229 jmp .Louter |
| 142 .align 16 | 230 .align 16 |
| 143 .Louter: | 231 .Louter: |
| 232 leaq 24+128(%rsp,%r9,8),%rdx |
| 233 andq $-16,%rdx |
| 234 pxor %xmm4,%xmm4 |
| 235 pxor %xmm5,%xmm5 |
| 236 movdqa -128(%r12),%xmm0 |
| 237 movdqa -112(%r12),%xmm1 |
| 238 movdqa -96(%r12),%xmm2 |
| 239 movdqa -80(%r12),%xmm3 |
| 240 pand -128(%rdx),%xmm0 |
| 241 pand -112(%rdx),%xmm1 |
| 242 por %xmm0,%xmm4 |
| 243 pand -96(%rdx),%xmm2 |
| 244 por %xmm1,%xmm5 |
| 245 pand -80(%rdx),%xmm3 |
| 246 por %xmm2,%xmm4 |
| 247 por %xmm3,%xmm5 |
| 248 movdqa -64(%r12),%xmm0 |
| 249 movdqa -48(%r12),%xmm1 |
| 250 movdqa -32(%r12),%xmm2 |
| 251 movdqa -16(%r12),%xmm3 |
| 252 pand -64(%rdx),%xmm0 |
| 253 pand -48(%rdx),%xmm1 |
| 254 por %xmm0,%xmm4 |
| 255 pand -32(%rdx),%xmm2 |
| 256 por %xmm1,%xmm5 |
| 257 pand -16(%rdx),%xmm3 |
| 258 por %xmm2,%xmm4 |
| 259 por %xmm3,%xmm5 |
| 260 movdqa 0(%r12),%xmm0 |
| 261 movdqa 16(%r12),%xmm1 |
| 262 movdqa 32(%r12),%xmm2 |
| 263 movdqa 48(%r12),%xmm3 |
| 264 pand 0(%rdx),%xmm0 |
| 265 pand 16(%rdx),%xmm1 |
| 266 por %xmm0,%xmm4 |
| 267 pand 32(%rdx),%xmm2 |
| 268 por %xmm1,%xmm5 |
| 269 pand 48(%rdx),%xmm3 |
| 270 por %xmm2,%xmm4 |
| 271 por %xmm3,%xmm5 |
| 272 movdqa 64(%r12),%xmm0 |
| 273 movdqa 80(%r12),%xmm1 |
| 274 movdqa 96(%r12),%xmm2 |
| 275 movdqa 112(%r12),%xmm3 |
| 276 pand 64(%rdx),%xmm0 |
| 277 pand 80(%rdx),%xmm1 |
| 278 por %xmm0,%xmm4 |
| 279 pand 96(%rdx),%xmm2 |
| 280 por %xmm1,%xmm5 |
| 281 pand 112(%rdx),%xmm3 |
| 282 por %xmm2,%xmm4 |
| 283 por %xmm3,%xmm5 |
| 284 por %xmm5,%xmm4 |
| 285 pshufd $0x4e,%xmm4,%xmm0 |
| 286 por %xmm4,%xmm0 |
| 287 leaq 256(%r12),%r12 |
| 288 |
| 289 movq (%rsi),%rax |
| 290 .byte 102,72,15,126,195 |
| 291 |
| 144 xorq %r15,%r15 | 292 xorq %r15,%r15 |
| 145 movq %r8,%rbp | 293 movq %r8,%rbp |
| 146 movq (%rsp),%r10 | 294 movq (%rsp),%r10 |
| 147 | 295 |
| 148 movq -96(%r12),%xmm0 | |
| 149 movq -32(%r12),%xmm1 | |
| 150 pand %xmm4,%xmm0 | |
| 151 movq 32(%r12),%xmm2 | |
| 152 pand %xmm5,%xmm1 | |
| 153 | |
| 154 mulq %rbx | 296 mulq %rbx |
| 155 addq %rax,%r10 | 297 addq %rax,%r10 |
| 156 movq (%rcx),%rax | 298 movq (%rcx),%rax |
| 157 adcq $0,%rdx | 299 adcq $0,%rdx |
| 158 | 300 |
| 159 movq 96(%r12),%xmm3 | |
| 160 pand %xmm6,%xmm2 | |
| 161 por %xmm1,%xmm0 | |
| 162 pand %xmm7,%xmm3 | |
| 163 | |
| 164 imulq %r10,%rbp | 301 imulq %r10,%rbp |
| 165 movq %rdx,%r11 | 302 movq %rdx,%r11 |
| 166 | 303 |
| 167 por %xmm2,%xmm0 | |
| 168 leaq 256(%r12),%r12 | |
| 169 por %xmm3,%xmm0 | |
| 170 | |
| 171 mulq %rbp | 304 mulq %rbp |
| 172 addq %rax,%r10 | 305 addq %rax,%r10 |
| 173 movq 8(%rsi),%rax | 306 movq 8(%rsi),%rax |
| 174 adcq $0,%rdx | 307 adcq $0,%rdx |
| 175 movq 8(%rsp),%r10 | 308 movq 8(%rsp),%r10 |
| 176 movq %rdx,%r13 | 309 movq %rdx,%r13 |
| 177 | 310 |
| 178 leaq 1(%r15),%r15 | 311 leaq 1(%r15),%r15 |
| 179 jmp .Linner_enter | 312 jmp .Linner_enter |
| 180 | 313 |
| (...skipping 15 matching lines...) Expand all Loading... |
| 196 adcq $0,%rdx | 329 adcq $0,%rdx |
| 197 addq %r11,%r10 | 330 addq %r11,%r10 |
| 198 movq %rdx,%r11 | 331 movq %rdx,%r11 |
| 199 adcq $0,%r11 | 332 adcq $0,%r11 |
| 200 leaq 1(%r15),%r15 | 333 leaq 1(%r15),%r15 |
| 201 | 334 |
| 202 mulq %rbp | 335 mulq %rbp |
| 203 cmpq %r9,%r15 | 336 cmpq %r9,%r15 |
| 204 jne .Linner | 337 jne .Linner |
| 205 | 338 |
| 206 .byte 102,72,15,126,195 | |
| 207 | |
| 208 addq %rax,%r13 | 339 addq %rax,%r13 |
| 209 movq (%rsi),%rax | |
| 210 adcq $0,%rdx | 340 adcq $0,%rdx |
| 211 addq %r10,%r13 | 341 addq %r10,%r13 |
| 212 » movq» (%rsp,%r15,8),%r10 | 342 » movq» (%rsp,%r9,8),%r10 |
| 213 adcq $0,%rdx | 343 adcq $0,%rdx |
| 214 » movq» %r13,-16(%rsp,%r15,8) | 344 » movq» %r13,-16(%rsp,%r9,8) |
| 215 movq %rdx,%r13 | 345 movq %rdx,%r13 |
| 216 | 346 |
| 217 xorq %rdx,%rdx | 347 xorq %rdx,%rdx |
| 218 addq %r11,%r13 | 348 addq %r11,%r13 |
| 219 adcq $0,%rdx | 349 adcq $0,%rdx |
| 220 addq %r10,%r13 | 350 addq %r10,%r13 |
| 221 adcq $0,%rdx | 351 adcq $0,%rdx |
| 222 movq %r13,-8(%rsp,%r9,8) | 352 movq %r13,-8(%rsp,%r9,8) |
| 223 movq %rdx,(%rsp,%r9,8) | 353 movq %rdx,(%rsp,%r9,8) |
| 224 | 354 |
| (...skipping 25 matching lines...) Expand all Loading... |
| 250 andq %rax,%rsi | 380 andq %rax,%rsi |
| 251 xorq %rcx,%rsi | 381 xorq %rcx,%rsi |
| 252 movq %r14,(%rsp,%r14,8) | 382 movq %r14,(%rsp,%r14,8) |
| 253 movq %rsi,(%rdi,%r14,8) | 383 movq %rsi,(%rdi,%r14,8) |
| 254 leaq 1(%r14),%r14 | 384 leaq 1(%r14),%r14 |
| 255 subq $1,%r15 | 385 subq $1,%r15 |
| 256 jnz .Lcopy | 386 jnz .Lcopy |
| 257 | 387 |
| 258 movq 8(%rsp,%r9,8),%rsi | 388 movq 8(%rsp,%r9,8),%rsi |
| 259 movq $1,%rax | 389 movq $1,%rax |
| 390 |
| 260 movq -48(%rsi),%r15 | 391 movq -48(%rsi),%r15 |
| 261 movq -40(%rsi),%r14 | 392 movq -40(%rsi),%r14 |
| 262 movq -32(%rsi),%r13 | 393 movq -32(%rsi),%r13 |
| 263 movq -24(%rsi),%r12 | 394 movq -24(%rsi),%r12 |
| 264 movq -16(%rsi),%rbp | 395 movq -16(%rsi),%rbp |
| 265 movq -8(%rsi),%rbx | 396 movq -8(%rsi),%rbx |
| 266 leaq (%rsi),%rsp | 397 leaq (%rsi),%rsp |
| 267 .Lmul_epilogue: | 398 .Lmul_epilogue: |
| 268 .byte 0xf3,0xc3 | 399 .byte 0xf3,0xc3 |
| 269 .size bn_mul_mont_gather5,.-bn_mul_mont_gather5 | 400 .size bn_mul_mont_gather5,.-bn_mul_mont_gather5 |
| 270 .type bn_mul4x_mont_gather5,@function | 401 .type bn_mul4x_mont_gather5,@function |
| 271 .align 32 | 402 .align 32 |
| 272 bn_mul4x_mont_gather5: | 403 bn_mul4x_mont_gather5: |
| 273 .Lmul4x_enter: | 404 .Lmul4x_enter: |
| 274 .byte 0x67 | 405 .byte 0x67 |
| 275 movq %rsp,%rax | 406 movq %rsp,%rax |
| 276 pushq %rbx | 407 pushq %rbx |
| 277 pushq %rbp | 408 pushq %rbp |
| 278 pushq %r12 | 409 pushq %r12 |
| 279 pushq %r13 | 410 pushq %r13 |
| 280 pushq %r14 | 411 pushq %r14 |
| 281 pushq %r15 | 412 pushq %r15 |
| 413 |
| 282 .byte 0x67 | 414 .byte 0x67 |
| 283 movl %r9d,%r10d | |
| 284 shll $3,%r9d | 415 shll $3,%r9d |
| 285 » shll» $3+2,%r10d | 416 » leaq» (%r9,%r9,2),%r10 |
| 286 negq %r9 | 417 negq %r9 |
| 287 | 418 |
| 288 | 419 |
| 289 | 420 |
| 290 | 421 |
| 291 | 422 |
| 292 | 423 |
| 293 | 424 |
| 294 | 425 |
| 295 » leaq» -64(%rsp,%r9,2),%r11 | 426 |
| 296 » subq» %rsi,%r11 | 427 |
| 428 » leaq» -320(%rsp,%r9,2),%r11 |
| 429 » subq» %rdi,%r11 |
| 297 andq $4095,%r11 | 430 andq $4095,%r11 |
| 298 cmpq %r11,%r10 | 431 cmpq %r11,%r10 |
| 299 jb .Lmul4xsp_alt | 432 jb .Lmul4xsp_alt |
| 300 subq %r11,%rsp | 433 subq %r11,%rsp |
| 301 » leaq» -64(%rsp,%r9,2),%rsp | 434 » leaq» -320(%rsp,%r9,2),%rsp |
| 302 jmp .Lmul4xsp_done | 435 jmp .Lmul4xsp_done |
| 303 | 436 |
| 304 .align 32 | 437 .align 32 |
| 305 .Lmul4xsp_alt: | 438 .Lmul4xsp_alt: |
| 306 » leaq» 4096-64(,%r9,2),%r10 | 439 » leaq» 4096-320(,%r9,2),%r10 |
| 307 » leaq» -64(%rsp,%r9,2),%rsp | 440 » leaq» -320(%rsp,%r9,2),%rsp |
| 308 subq %r10,%r11 | 441 subq %r10,%r11 |
| 309 movq $0,%r10 | 442 movq $0,%r10 |
| 310 cmovcq %r10,%r11 | 443 cmovcq %r10,%r11 |
| 311 subq %r11,%rsp | 444 subq %r11,%rsp |
| 312 .Lmul4xsp_done: | 445 .Lmul4xsp_done: |
| 313 andq $-64,%rsp | 446 andq $-64,%rsp |
| 314 negq %r9 | 447 negq %r9 |
| 315 | 448 |
| 316 movq %rax,40(%rsp) | 449 movq %rax,40(%rsp) |
| 317 .Lmul4x_body: | 450 .Lmul4x_body: |
| 318 | 451 |
| 319 call mul4x_internal | 452 call mul4x_internal |
| 320 | 453 |
| 321 movq 40(%rsp),%rsi | 454 movq 40(%rsp),%rsi |
| 322 movq $1,%rax | 455 movq $1,%rax |
| 456 |
| 323 movq -48(%rsi),%r15 | 457 movq -48(%rsi),%r15 |
| 324 movq -40(%rsi),%r14 | 458 movq -40(%rsi),%r14 |
| 325 movq -32(%rsi),%r13 | 459 movq -32(%rsi),%r13 |
| 326 movq -24(%rsi),%r12 | 460 movq -24(%rsi),%r12 |
| 327 movq -16(%rsi),%rbp | 461 movq -16(%rsi),%rbp |
| 328 movq -8(%rsi),%rbx | 462 movq -8(%rsi),%rbx |
| 329 leaq (%rsi),%rsp | 463 leaq (%rsi),%rsp |
| 330 .Lmul4x_epilogue: | 464 .Lmul4x_epilogue: |
| 331 .byte 0xf3,0xc3 | 465 .byte 0xf3,0xc3 |
| 332 .size bn_mul4x_mont_gather5,.-bn_mul4x_mont_gather5 | 466 .size bn_mul4x_mont_gather5,.-bn_mul4x_mont_gather5 |
| 333 | 467 |
| 334 .type mul4x_internal,@function | 468 .type mul4x_internal,@function |
| 335 .align 32 | 469 .align 32 |
| 336 mul4x_internal: | 470 mul4x_internal: |
| 337 shlq $5,%r9 | 471 shlq $5,%r9 |
| 338 » movl» 8(%rax),%r10d | 472 » movd» 8(%rax),%xmm5 |
| 339 » leaq» 256(%rdx,%r9,1),%r13 | 473 » leaq» .Linc(%rip),%rax |
| 474 » leaq» 128(%rdx,%r9,1),%r13 |
| 340 shrq $5,%r9 | 475 shrq $5,%r9 |
| 341 » movq» %r10,%r11 | 476 » movdqa» 0(%rax),%xmm0 |
| 342 » shrq» $3,%r10 | 477 » movdqa» 16(%rax),%xmm1 |
| 343 » andq» $7,%r11 | 478 » leaq» 88-112(%rsp,%r9,1),%r10 |
| 344 » notq» %r10 | 479 » leaq» 128(%rdx),%r12 |
| 345 » leaq» .Lmagic_masks(%rip),%rax | |
| 346 » andq» $3,%r10 | |
| 347 » leaq» 96(%rdx,%r11,8),%r12 | |
| 348 » movq» 0(%rax,%r10,8),%xmm4 | |
| 349 » movq» 8(%rax,%r10,8),%xmm5 | |
| 350 » addq» $7,%r11 | |
| 351 » movq» 16(%rax,%r10,8),%xmm6 | |
| 352 » movq» 24(%rax,%r10,8),%xmm7 | |
| 353 » andq» $7,%r11 | |
| 354 | 480 |
| 355 » movq» -96(%r12),%xmm0 | 481 » pshufd» $0,%xmm5,%xmm5 |
| 356 » leaq» 256(%r12),%r14 | 482 » movdqa» %xmm1,%xmm4 |
| 357 » movq» -32(%r12),%xmm1 | 483 .byte» 0x67,0x67 |
| 358 » pand» %xmm4,%xmm0 | 484 » movdqa» %xmm1,%xmm2 |
| 359 » movq» 32(%r12),%xmm2 | 485 » paddd» %xmm0,%xmm1 |
| 360 » pand» %xmm5,%xmm1 | 486 » pcmpeqd»%xmm5,%xmm0 |
| 361 » movq» 96(%r12),%xmm3 | |
| 362 » pand» %xmm6,%xmm2 | |
| 363 .byte 0x67 | 487 .byte 0x67 |
| 488 movdqa %xmm4,%xmm3 |
| 489 paddd %xmm1,%xmm2 |
| 490 pcmpeqd %xmm5,%xmm1 |
| 491 movdqa %xmm0,112(%r10) |
| 492 movdqa %xmm4,%xmm0 |
| 493 |
| 494 paddd %xmm2,%xmm3 |
| 495 pcmpeqd %xmm5,%xmm2 |
| 496 movdqa %xmm1,128(%r10) |
| 497 movdqa %xmm4,%xmm1 |
| 498 |
| 499 paddd %xmm3,%xmm0 |
| 500 pcmpeqd %xmm5,%xmm3 |
| 501 movdqa %xmm2,144(%r10) |
| 502 movdqa %xmm4,%xmm2 |
| 503 |
| 504 paddd %xmm0,%xmm1 |
| 505 pcmpeqd %xmm5,%xmm0 |
| 506 movdqa %xmm3,160(%r10) |
| 507 movdqa %xmm4,%xmm3 |
| 508 paddd %xmm1,%xmm2 |
| 509 pcmpeqd %xmm5,%xmm1 |
| 510 movdqa %xmm0,176(%r10) |
| 511 movdqa %xmm4,%xmm0 |
| 512 |
| 513 paddd %xmm2,%xmm3 |
| 514 pcmpeqd %xmm5,%xmm2 |
| 515 movdqa %xmm1,192(%r10) |
| 516 movdqa %xmm4,%xmm1 |
| 517 |
| 518 paddd %xmm3,%xmm0 |
| 519 pcmpeqd %xmm5,%xmm3 |
| 520 movdqa %xmm2,208(%r10) |
| 521 movdqa %xmm4,%xmm2 |
| 522 |
| 523 paddd %xmm0,%xmm1 |
| 524 pcmpeqd %xmm5,%xmm0 |
| 525 movdqa %xmm3,224(%r10) |
| 526 movdqa %xmm4,%xmm3 |
| 527 paddd %xmm1,%xmm2 |
| 528 pcmpeqd %xmm5,%xmm1 |
| 529 movdqa %xmm0,240(%r10) |
| 530 movdqa %xmm4,%xmm0 |
| 531 |
| 532 paddd %xmm2,%xmm3 |
| 533 pcmpeqd %xmm5,%xmm2 |
| 534 movdqa %xmm1,256(%r10) |
| 535 movdqa %xmm4,%xmm1 |
| 536 |
| 537 paddd %xmm3,%xmm0 |
| 538 pcmpeqd %xmm5,%xmm3 |
| 539 movdqa %xmm2,272(%r10) |
| 540 movdqa %xmm4,%xmm2 |
| 541 |
| 542 paddd %xmm0,%xmm1 |
| 543 pcmpeqd %xmm5,%xmm0 |
| 544 movdqa %xmm3,288(%r10) |
| 545 movdqa %xmm4,%xmm3 |
| 546 paddd %xmm1,%xmm2 |
| 547 pcmpeqd %xmm5,%xmm1 |
| 548 movdqa %xmm0,304(%r10) |
| 549 |
| 550 paddd %xmm2,%xmm3 |
| 551 .byte 0x67 |
| 552 pcmpeqd %xmm5,%xmm2 |
| 553 movdqa %xmm1,320(%r10) |
| 554 |
| 555 pcmpeqd %xmm5,%xmm3 |
| 556 movdqa %xmm2,336(%r10) |
| 557 pand 64(%r12),%xmm0 |
| 558 |
| 559 pand 80(%r12),%xmm1 |
| 560 pand 96(%r12),%xmm2 |
| 561 movdqa %xmm3,352(%r10) |
| 562 pand 112(%r12),%xmm3 |
| 563 por %xmm2,%xmm0 |
| 564 por %xmm3,%xmm1 |
| 565 movdqa -128(%r12),%xmm4 |
| 566 movdqa -112(%r12),%xmm5 |
| 567 movdqa -96(%r12),%xmm2 |
| 568 pand 112(%r10),%xmm4 |
| 569 movdqa -80(%r12),%xmm3 |
| 570 pand 128(%r10),%xmm5 |
| 571 por %xmm4,%xmm0 |
| 572 pand 144(%r10),%xmm2 |
| 573 por %xmm5,%xmm1 |
| 574 pand 160(%r10),%xmm3 |
| 575 por %xmm2,%xmm0 |
| 576 por %xmm3,%xmm1 |
| 577 movdqa -64(%r12),%xmm4 |
| 578 movdqa -48(%r12),%xmm5 |
| 579 movdqa -32(%r12),%xmm2 |
| 580 pand 176(%r10),%xmm4 |
| 581 movdqa -16(%r12),%xmm3 |
| 582 pand 192(%r10),%xmm5 |
| 583 por %xmm4,%xmm0 |
| 584 pand 208(%r10),%xmm2 |
| 585 por %xmm5,%xmm1 |
| 586 pand 224(%r10),%xmm3 |
| 587 por %xmm2,%xmm0 |
| 588 por %xmm3,%xmm1 |
| 589 movdqa 0(%r12),%xmm4 |
| 590 movdqa 16(%r12),%xmm5 |
| 591 movdqa 32(%r12),%xmm2 |
| 592 pand 240(%r10),%xmm4 |
| 593 movdqa 48(%r12),%xmm3 |
| 594 pand 256(%r10),%xmm5 |
| 595 por %xmm4,%xmm0 |
| 596 pand 272(%r10),%xmm2 |
| 597 por %xmm5,%xmm1 |
| 598 pand 288(%r10),%xmm3 |
| 599 por %xmm2,%xmm0 |
| 600 por %xmm3,%xmm1 |
| 364 por %xmm1,%xmm0 | 601 por %xmm1,%xmm0 |
| 365 » movq» -96(%r14),%xmm1 | 602 » pshufd» $0x4e,%xmm0,%xmm1 |
| 366 .byte» 0x67 | 603 » por» %xmm1,%xmm0 |
| 367 » pand» %xmm7,%xmm3 | 604 » leaq» 256(%r12),%r12 |
| 368 .byte» 0x67 | 605 .byte» 102,72,15,126,195 |
| 369 » por» %xmm2,%xmm0 | |
| 370 » movq» -32(%r14),%xmm2 | |
| 371 .byte» 0x67 | |
| 372 » pand» %xmm4,%xmm1 | |
| 373 .byte» 0x67 | |
| 374 » por» %xmm3,%xmm0 | |
| 375 » movq» 32(%r14),%xmm3 | |
| 376 | 606 |
| 377 .byte 102,72,15,126,195 | |
| 378 movq 96(%r14),%xmm0 | |
| 379 movq %r13,16+8(%rsp) | 607 movq %r13,16+8(%rsp) |
| 380 movq %rdi,56+8(%rsp) | 608 movq %rdi,56+8(%rsp) |
| 381 | 609 |
| 382 movq (%r8),%r8 | 610 movq (%r8),%r8 |
| 383 movq (%rsi),%rax | 611 movq (%rsi),%rax |
| 384 leaq (%rsi,%r9,1),%rsi | 612 leaq (%rsi,%r9,1),%rsi |
| 385 negq %r9 | 613 negq %r9 |
| 386 | 614 |
| 387 movq %r8,%rbp | 615 movq %r8,%rbp |
| 388 mulq %rbx | 616 mulq %rbx |
| 389 movq %rax,%r10 | 617 movq %rax,%r10 |
| 390 movq (%rcx),%rax | 618 movq (%rcx),%rax |
| 391 | 619 |
| 392 pand %xmm5,%xmm2 | |
| 393 pand %xmm6,%xmm3 | |
| 394 por %xmm2,%xmm1 | |
| 395 | |
| 396 imulq %r10,%rbp | 620 imulq %r10,%rbp |
| 397 | 621 » leaq» 64+8(%rsp),%r14 |
| 398 | |
| 399 | |
| 400 | |
| 401 | |
| 402 | |
| 403 | |
| 404 » leaq» 64+8(%rsp,%r11,8),%r14 | |
| 405 movq %rdx,%r11 | 622 movq %rdx,%r11 |
| 406 | 623 |
| 407 pand %xmm7,%xmm0 | |
| 408 por %xmm3,%xmm1 | |
| 409 leaq 512(%r12),%r12 | |
| 410 por %xmm1,%xmm0 | |
| 411 | |
| 412 mulq %rbp | 624 mulq %rbp |
| 413 addq %rax,%r10 | 625 addq %rax,%r10 |
| 414 movq 8(%rsi,%r9,1),%rax | 626 movq 8(%rsi,%r9,1),%rax |
| 415 adcq $0,%rdx | 627 adcq $0,%rdx |
| 416 movq %rdx,%rdi | 628 movq %rdx,%rdi |
| 417 | 629 |
| 418 mulq %rbx | 630 mulq %rbx |
| 419 addq %rax,%r11 | 631 addq %rax,%r11 |
| 420 » movq» 16(%rcx),%rax | 632 » movq» 8(%rcx),%rax |
| 421 adcq $0,%rdx | 633 adcq $0,%rdx |
| 422 movq %rdx,%r10 | 634 movq %rdx,%r10 |
| 423 | 635 |
| 424 mulq %rbp | 636 mulq %rbp |
| 425 addq %rax,%rdi | 637 addq %rax,%rdi |
| 426 movq 16(%rsi,%r9,1),%rax | 638 movq 16(%rsi,%r9,1),%rax |
| 427 adcq $0,%rdx | 639 adcq $0,%rdx |
| 428 addq %r11,%rdi | 640 addq %r11,%rdi |
| 429 leaq 32(%r9),%r15 | 641 leaq 32(%r9),%r15 |
| 430 » leaq» 64(%rcx),%rcx | 642 » leaq» 32(%rcx),%rcx |
| 431 adcq $0,%rdx | 643 adcq $0,%rdx |
| 432 movq %rdi,(%r14) | 644 movq %rdi,(%r14) |
| 433 movq %rdx,%r13 | 645 movq %rdx,%r13 |
| 434 jmp .L1st4x | 646 jmp .L1st4x |
| 435 | 647 |
| 436 .align 32 | 648 .align 32 |
| 437 .L1st4x: | 649 .L1st4x: |
| 438 mulq %rbx | 650 mulq %rbx |
| 439 addq %rax,%r10 | 651 addq %rax,%r10 |
| 440 » movq» -32(%rcx),%rax | 652 » movq» -16(%rcx),%rax |
| 441 leaq 32(%r14),%r14 | 653 leaq 32(%r14),%r14 |
| 442 adcq $0,%rdx | 654 adcq $0,%rdx |
| 443 movq %rdx,%r11 | 655 movq %rdx,%r11 |
| 444 | 656 |
| 445 mulq %rbp | 657 mulq %rbp |
| 446 addq %rax,%r13 | 658 addq %rax,%r13 |
| 447 movq -8(%rsi,%r15,1),%rax | 659 movq -8(%rsi,%r15,1),%rax |
| 448 adcq $0,%rdx | 660 adcq $0,%rdx |
| 449 addq %r10,%r13 | 661 addq %r10,%r13 |
| 450 adcq $0,%rdx | 662 adcq $0,%rdx |
| 451 movq %r13,-24(%r14) | 663 movq %r13,-24(%r14) |
| 452 movq %rdx,%rdi | 664 movq %rdx,%rdi |
| 453 | 665 |
| 454 mulq %rbx | 666 mulq %rbx |
| 455 addq %rax,%r11 | 667 addq %rax,%r11 |
| 456 » movq» -16(%rcx),%rax | 668 » movq» -8(%rcx),%rax |
| 457 adcq $0,%rdx | 669 adcq $0,%rdx |
| 458 movq %rdx,%r10 | 670 movq %rdx,%r10 |
| 459 | 671 |
| 460 mulq %rbp | 672 mulq %rbp |
| 461 addq %rax,%rdi | 673 addq %rax,%rdi |
| 462 movq (%rsi,%r15,1),%rax | 674 movq (%rsi,%r15,1),%rax |
| 463 adcq $0,%rdx | 675 adcq $0,%rdx |
| 464 addq %r11,%rdi | 676 addq %r11,%rdi |
| 465 adcq $0,%rdx | 677 adcq $0,%rdx |
| 466 movq %rdi,-16(%r14) | 678 movq %rdi,-16(%r14) |
| 467 movq %rdx,%r13 | 679 movq %rdx,%r13 |
| 468 | 680 |
| 469 mulq %rbx | 681 mulq %rbx |
| 470 addq %rax,%r10 | 682 addq %rax,%r10 |
| 471 movq 0(%rcx),%rax | 683 movq 0(%rcx),%rax |
| 472 adcq $0,%rdx | 684 adcq $0,%rdx |
| 473 movq %rdx,%r11 | 685 movq %rdx,%r11 |
| 474 | 686 |
| 475 mulq %rbp | 687 mulq %rbp |
| 476 addq %rax,%r13 | 688 addq %rax,%r13 |
| 477 movq 8(%rsi,%r15,1),%rax | 689 movq 8(%rsi,%r15,1),%rax |
| 478 adcq $0,%rdx | 690 adcq $0,%rdx |
| 479 addq %r10,%r13 | 691 addq %r10,%r13 |
| 480 adcq $0,%rdx | 692 adcq $0,%rdx |
| 481 movq %r13,-8(%r14) | 693 movq %r13,-8(%r14) |
| 482 movq %rdx,%rdi | 694 movq %rdx,%rdi |
| 483 | 695 |
| 484 mulq %rbx | 696 mulq %rbx |
| 485 addq %rax,%r11 | 697 addq %rax,%r11 |
| 486 » movq» 16(%rcx),%rax | 698 » movq» 8(%rcx),%rax |
| 487 adcq $0,%rdx | 699 adcq $0,%rdx |
| 488 movq %rdx,%r10 | 700 movq %rdx,%r10 |
| 489 | 701 |
| 490 mulq %rbp | 702 mulq %rbp |
| 491 addq %rax,%rdi | 703 addq %rax,%rdi |
| 492 movq 16(%rsi,%r15,1),%rax | 704 movq 16(%rsi,%r15,1),%rax |
| 493 adcq $0,%rdx | 705 adcq $0,%rdx |
| 494 addq %r11,%rdi | 706 addq %r11,%rdi |
| 495 » leaq» 64(%rcx),%rcx | 707 » leaq» 32(%rcx),%rcx |
| 496 adcq $0,%rdx | 708 adcq $0,%rdx |
| 497 movq %rdi,(%r14) | 709 movq %rdi,(%r14) |
| 498 movq %rdx,%r13 | 710 movq %rdx,%r13 |
| 499 | 711 |
| 500 addq $32,%r15 | 712 addq $32,%r15 |
| 501 jnz .L1st4x | 713 jnz .L1st4x |
| 502 | 714 |
| 503 mulq %rbx | 715 mulq %rbx |
| 504 addq %rax,%r10 | 716 addq %rax,%r10 |
| 505 » movq» -32(%rcx),%rax | 717 » movq» -16(%rcx),%rax |
| 506 leaq 32(%r14),%r14 | 718 leaq 32(%r14),%r14 |
| 507 adcq $0,%rdx | 719 adcq $0,%rdx |
| 508 movq %rdx,%r11 | 720 movq %rdx,%r11 |
| 509 | 721 |
| 510 mulq %rbp | 722 mulq %rbp |
| 511 addq %rax,%r13 | 723 addq %rax,%r13 |
| 512 movq -8(%rsi),%rax | 724 movq -8(%rsi),%rax |
| 513 adcq $0,%rdx | 725 adcq $0,%rdx |
| 514 addq %r10,%r13 | 726 addq %r10,%r13 |
| 515 adcq $0,%rdx | 727 adcq $0,%rdx |
| 516 movq %r13,-24(%r14) | 728 movq %r13,-24(%r14) |
| 517 movq %rdx,%rdi | 729 movq %rdx,%rdi |
| 518 | 730 |
| 519 mulq %rbx | 731 mulq %rbx |
| 520 addq %rax,%r11 | 732 addq %rax,%r11 |
| 521 » movq» -16(%rcx),%rax | 733 » movq» -8(%rcx),%rax |
| 522 adcq $0,%rdx | 734 adcq $0,%rdx |
| 523 movq %rdx,%r10 | 735 movq %rdx,%r10 |
| 524 | 736 |
| 525 mulq %rbp | 737 mulq %rbp |
| 526 addq %rax,%rdi | 738 addq %rax,%rdi |
| 527 movq (%rsi,%r9,1),%rax | 739 movq (%rsi,%r9,1),%rax |
| 528 adcq $0,%rdx | 740 adcq $0,%rdx |
| 529 addq %r11,%rdi | 741 addq %r11,%rdi |
| 530 adcq $0,%rdx | 742 adcq $0,%rdx |
| 531 movq %rdi,-16(%r14) | 743 movq %rdi,-16(%r14) |
| 532 movq %rdx,%r13 | 744 movq %rdx,%r13 |
| 533 | 745 |
| 534 .byte» 102,72,15,126,195 | 746 » leaq» (%rcx,%r9,1),%rcx |
| 535 » leaq» (%rcx,%r9,2),%rcx | |
| 536 | 747 |
| 537 xorq %rdi,%rdi | 748 xorq %rdi,%rdi |
| 538 addq %r10,%r13 | 749 addq %r10,%r13 |
| 539 adcq $0,%rdi | 750 adcq $0,%rdi |
| 540 movq %r13,-8(%r14) | 751 movq %r13,-8(%r14) |
| 541 | 752 |
| 542 jmp .Louter4x | 753 jmp .Louter4x |
| 543 | 754 |
| 544 .align 32 | 755 .align 32 |
| 545 .Louter4x: | 756 .Louter4x: |
| 757 leaq 16+128(%r14),%rdx |
| 758 pxor %xmm4,%xmm4 |
| 759 pxor %xmm5,%xmm5 |
| 760 movdqa -128(%r12),%xmm0 |
| 761 movdqa -112(%r12),%xmm1 |
| 762 movdqa -96(%r12),%xmm2 |
| 763 movdqa -80(%r12),%xmm3 |
| 764 pand -128(%rdx),%xmm0 |
| 765 pand -112(%rdx),%xmm1 |
| 766 por %xmm0,%xmm4 |
| 767 pand -96(%rdx),%xmm2 |
| 768 por %xmm1,%xmm5 |
| 769 pand -80(%rdx),%xmm3 |
| 770 por %xmm2,%xmm4 |
| 771 por %xmm3,%xmm5 |
| 772 movdqa -64(%r12),%xmm0 |
| 773 movdqa -48(%r12),%xmm1 |
| 774 movdqa -32(%r12),%xmm2 |
| 775 movdqa -16(%r12),%xmm3 |
| 776 pand -64(%rdx),%xmm0 |
| 777 pand -48(%rdx),%xmm1 |
| 778 por %xmm0,%xmm4 |
| 779 pand -32(%rdx),%xmm2 |
| 780 por %xmm1,%xmm5 |
| 781 pand -16(%rdx),%xmm3 |
| 782 por %xmm2,%xmm4 |
| 783 por %xmm3,%xmm5 |
| 784 movdqa 0(%r12),%xmm0 |
| 785 movdqa 16(%r12),%xmm1 |
| 786 movdqa 32(%r12),%xmm2 |
| 787 movdqa 48(%r12),%xmm3 |
| 788 pand 0(%rdx),%xmm0 |
| 789 pand 16(%rdx),%xmm1 |
| 790 por %xmm0,%xmm4 |
| 791 pand 32(%rdx),%xmm2 |
| 792 por %xmm1,%xmm5 |
| 793 pand 48(%rdx),%xmm3 |
| 794 por %xmm2,%xmm4 |
| 795 por %xmm3,%xmm5 |
| 796 movdqa 64(%r12),%xmm0 |
| 797 movdqa 80(%r12),%xmm1 |
| 798 movdqa 96(%r12),%xmm2 |
| 799 movdqa 112(%r12),%xmm3 |
| 800 pand 64(%rdx),%xmm0 |
| 801 pand 80(%rdx),%xmm1 |
| 802 por %xmm0,%xmm4 |
| 803 pand 96(%rdx),%xmm2 |
| 804 por %xmm1,%xmm5 |
| 805 pand 112(%rdx),%xmm3 |
| 806 por %xmm2,%xmm4 |
| 807 por %xmm3,%xmm5 |
| 808 por %xmm5,%xmm4 |
| 809 pshufd $0x4e,%xmm4,%xmm0 |
| 810 por %xmm4,%xmm0 |
| 811 leaq 256(%r12),%r12 |
| 812 .byte 102,72,15,126,195 |
| 813 |
| 546 movq (%r14,%r9,1),%r10 | 814 movq (%r14,%r9,1),%r10 |
| 547 movq %r8,%rbp | 815 movq %r8,%rbp |
| 548 mulq %rbx | 816 mulq %rbx |
| 549 addq %rax,%r10 | 817 addq %rax,%r10 |
| 550 movq (%rcx),%rax | 818 movq (%rcx),%rax |
| 551 adcq $0,%rdx | 819 adcq $0,%rdx |
| 552 | 820 |
| 553 movq -96(%r12),%xmm0 | |
| 554 movq -32(%r12),%xmm1 | |
| 555 pand %xmm4,%xmm0 | |
| 556 movq 32(%r12),%xmm2 | |
| 557 pand %xmm5,%xmm1 | |
| 558 movq 96(%r12),%xmm3 | |
| 559 | |
| 560 imulq %r10,%rbp | 821 imulq %r10,%rbp |
| 561 .byte 0x67 | |
| 562 movq %rdx,%r11 | 822 movq %rdx,%r11 |
| 563 movq %rdi,(%r14) | 823 movq %rdi,(%r14) |
| 564 | 824 |
| 565 pand %xmm6,%xmm2 | |
| 566 por %xmm1,%xmm0 | |
| 567 pand %xmm7,%xmm3 | |
| 568 por %xmm2,%xmm0 | |
| 569 leaq (%r14,%r9,1),%r14 | 825 leaq (%r14,%r9,1),%r14 |
| 570 leaq 256(%r12),%r12 | |
| 571 por %xmm3,%xmm0 | |
| 572 | 826 |
| 573 mulq %rbp | 827 mulq %rbp |
| 574 addq %rax,%r10 | 828 addq %rax,%r10 |
| 575 movq 8(%rsi,%r9,1),%rax | 829 movq 8(%rsi,%r9,1),%rax |
| 576 adcq $0,%rdx | 830 adcq $0,%rdx |
| 577 movq %rdx,%rdi | 831 movq %rdx,%rdi |
| 578 | 832 |
| 579 mulq %rbx | 833 mulq %rbx |
| 580 addq %rax,%r11 | 834 addq %rax,%r11 |
| 581 » movq» 16(%rcx),%rax | 835 » movq» 8(%rcx),%rax |
| 582 adcq $0,%rdx | 836 adcq $0,%rdx |
| 583 addq 8(%r14),%r11 | 837 addq 8(%r14),%r11 |
| 584 adcq $0,%rdx | 838 adcq $0,%rdx |
| 585 movq %rdx,%r10 | 839 movq %rdx,%r10 |
| 586 | 840 |
| 587 mulq %rbp | 841 mulq %rbp |
| 588 addq %rax,%rdi | 842 addq %rax,%rdi |
| 589 movq 16(%rsi,%r9,1),%rax | 843 movq 16(%rsi,%r9,1),%rax |
| 590 adcq $0,%rdx | 844 adcq $0,%rdx |
| 591 addq %r11,%rdi | 845 addq %r11,%rdi |
| 592 leaq 32(%r9),%r15 | 846 leaq 32(%r9),%r15 |
| 593 » leaq» 64(%rcx),%rcx | 847 » leaq» 32(%rcx),%rcx |
| 594 adcq $0,%rdx | 848 adcq $0,%rdx |
| 595 movq %rdx,%r13 | 849 movq %rdx,%r13 |
| 596 jmp .Linner4x | 850 jmp .Linner4x |
| 597 | 851 |
| 598 .align 32 | 852 .align 32 |
| 599 .Linner4x: | 853 .Linner4x: |
| 600 mulq %rbx | 854 mulq %rbx |
| 601 addq %rax,%r10 | 855 addq %rax,%r10 |
| 602 » movq» -32(%rcx),%rax | 856 » movq» -16(%rcx),%rax |
| 603 adcq $0,%rdx | 857 adcq $0,%rdx |
| 604 addq 16(%r14),%r10 | 858 addq 16(%r14),%r10 |
| 605 leaq 32(%r14),%r14 | 859 leaq 32(%r14),%r14 |
| 606 adcq $0,%rdx | 860 adcq $0,%rdx |
| 607 movq %rdx,%r11 | 861 movq %rdx,%r11 |
| 608 | 862 |
| 609 mulq %rbp | 863 mulq %rbp |
| 610 addq %rax,%r13 | 864 addq %rax,%r13 |
| 611 movq -8(%rsi,%r15,1),%rax | 865 movq -8(%rsi,%r15,1),%rax |
| 612 adcq $0,%rdx | 866 adcq $0,%rdx |
| 613 addq %r10,%r13 | 867 addq %r10,%r13 |
| 614 adcq $0,%rdx | 868 adcq $0,%rdx |
| 615 movq %rdi,-32(%r14) | 869 movq %rdi,-32(%r14) |
| 616 movq %rdx,%rdi | 870 movq %rdx,%rdi |
| 617 | 871 |
| 618 mulq %rbx | 872 mulq %rbx |
| 619 addq %rax,%r11 | 873 addq %rax,%r11 |
| 620 » movq» -16(%rcx),%rax | 874 » movq» -8(%rcx),%rax |
| 621 adcq $0,%rdx | 875 adcq $0,%rdx |
| 622 addq -8(%r14),%r11 | 876 addq -8(%r14),%r11 |
| 623 adcq $0,%rdx | 877 adcq $0,%rdx |
| 624 movq %rdx,%r10 | 878 movq %rdx,%r10 |
| 625 | 879 |
| 626 mulq %rbp | 880 mulq %rbp |
| 627 addq %rax,%rdi | 881 addq %rax,%rdi |
| 628 movq (%rsi,%r15,1),%rax | 882 movq (%rsi,%r15,1),%rax |
| 629 adcq $0,%rdx | 883 adcq $0,%rdx |
| 630 addq %r11,%rdi | 884 addq %r11,%rdi |
| (...skipping 13 matching lines...) Expand all Loading... |
| 644 addq %rax,%r13 | 898 addq %rax,%r13 |
| 645 movq 8(%rsi,%r15,1),%rax | 899 movq 8(%rsi,%r15,1),%rax |
| 646 adcq $0,%rdx | 900 adcq $0,%rdx |
| 647 addq %r10,%r13 | 901 addq %r10,%r13 |
| 648 adcq $0,%rdx | 902 adcq $0,%rdx |
| 649 movq %rdi,-16(%r14) | 903 movq %rdi,-16(%r14) |
| 650 movq %rdx,%rdi | 904 movq %rdx,%rdi |
| 651 | 905 |
| 652 mulq %rbx | 906 mulq %rbx |
| 653 addq %rax,%r11 | 907 addq %rax,%r11 |
| 654 » movq» 16(%rcx),%rax | 908 » movq» 8(%rcx),%rax |
| 655 adcq $0,%rdx | 909 adcq $0,%rdx |
| 656 addq 8(%r14),%r11 | 910 addq 8(%r14),%r11 |
| 657 adcq $0,%rdx | 911 adcq $0,%rdx |
| 658 movq %rdx,%r10 | 912 movq %rdx,%r10 |
| 659 | 913 |
| 660 mulq %rbp | 914 mulq %rbp |
| 661 addq %rax,%rdi | 915 addq %rax,%rdi |
| 662 movq 16(%rsi,%r15,1),%rax | 916 movq 16(%rsi,%r15,1),%rax |
| 663 adcq $0,%rdx | 917 adcq $0,%rdx |
| 664 addq %r11,%rdi | 918 addq %r11,%rdi |
| 665 » leaq» 64(%rcx),%rcx | 919 » leaq» 32(%rcx),%rcx |
| 666 adcq $0,%rdx | 920 adcq $0,%rdx |
| 667 movq %r13,-8(%r14) | 921 movq %r13,-8(%r14) |
| 668 movq %rdx,%r13 | 922 movq %rdx,%r13 |
| 669 | 923 |
| 670 addq $32,%r15 | 924 addq $32,%r15 |
| 671 jnz .Linner4x | 925 jnz .Linner4x |
| 672 | 926 |
| 673 mulq %rbx | 927 mulq %rbx |
| 674 addq %rax,%r10 | 928 addq %rax,%r10 |
| 675 » movq» -32(%rcx),%rax | 929 » movq» -16(%rcx),%rax |
| 676 adcq $0,%rdx | 930 adcq $0,%rdx |
| 677 addq 16(%r14),%r10 | 931 addq 16(%r14),%r10 |
| 678 leaq 32(%r14),%r14 | 932 leaq 32(%r14),%r14 |
| 679 adcq $0,%rdx | 933 adcq $0,%rdx |
| 680 movq %rdx,%r11 | 934 movq %rdx,%r11 |
| 681 | 935 |
| 682 mulq %rbp | 936 mulq %rbp |
| 683 addq %rax,%r13 | 937 addq %rax,%r13 |
| 684 movq -8(%rsi),%rax | 938 movq -8(%rsi),%rax |
| 685 adcq $0,%rdx | 939 adcq $0,%rdx |
| 686 addq %r10,%r13 | 940 addq %r10,%r13 |
| 687 adcq $0,%rdx | 941 adcq $0,%rdx |
| 688 movq %rdi,-32(%r14) | 942 movq %rdi,-32(%r14) |
| 689 movq %rdx,%rdi | 943 movq %rdx,%rdi |
| 690 | 944 |
| 691 mulq %rbx | 945 mulq %rbx |
| 692 addq %rax,%r11 | 946 addq %rax,%r11 |
| 693 movq %rbp,%rax | 947 movq %rbp,%rax |
| 694 » movq» -16(%rcx),%rbp | 948 » movq» -8(%rcx),%rbp |
| 695 adcq $0,%rdx | 949 adcq $0,%rdx |
| 696 addq -8(%r14),%r11 | 950 addq -8(%r14),%r11 |
| 697 adcq $0,%rdx | 951 adcq $0,%rdx |
| 698 movq %rdx,%r10 | 952 movq %rdx,%r10 |
| 699 | 953 |
| 700 mulq %rbp | 954 mulq %rbp |
| 701 addq %rax,%rdi | 955 addq %rax,%rdi |
| 702 movq (%rsi,%r9,1),%rax | 956 movq (%rsi,%r9,1),%rax |
| 703 adcq $0,%rdx | 957 adcq $0,%rdx |
| 704 addq %r11,%rdi | 958 addq %r11,%rdi |
| 705 adcq $0,%rdx | 959 adcq $0,%rdx |
| 706 movq %r13,-24(%r14) | 960 movq %r13,-24(%r14) |
| 707 movq %rdx,%r13 | 961 movq %rdx,%r13 |
| 708 | 962 |
| 709 .byte 102,72,15,126,195 | |
| 710 movq %rdi,-16(%r14) | 963 movq %rdi,-16(%r14) |
| 711 » leaq» (%rcx,%r9,2),%rcx | 964 » leaq» (%rcx,%r9,1),%rcx |
| 712 | 965 |
| 713 xorq %rdi,%rdi | 966 xorq %rdi,%rdi |
| 714 addq %r10,%r13 | 967 addq %r10,%r13 |
| 715 adcq $0,%rdi | 968 adcq $0,%rdi |
| 716 addq (%r14),%r13 | 969 addq (%r14),%r13 |
| 717 adcq $0,%rdi | 970 adcq $0,%rdi |
| 718 movq %r13,-8(%r14) | 971 movq %r13,-8(%r14) |
| 719 | 972 |
| 720 cmpq 16+8(%rsp),%r12 | 973 cmpq 16+8(%rsp),%r12 |
| 721 jb .Louter4x | 974 jb .Louter4x |
| 975 xorq %rax,%rax |
| 722 subq %r13,%rbp | 976 subq %r13,%rbp |
| 723 adcq %r15,%r15 | 977 adcq %r15,%r15 |
| 724 orq %r15,%rdi | 978 orq %r15,%rdi |
| 725 » xorq» $1,%rdi | 979 » subq» %rdi,%rax |
| 726 leaq (%r14,%r9,1),%rbx | 980 leaq (%r14,%r9,1),%rbx |
| 727 » leaq» (%rcx,%rdi,8),%rbp | 981 » movq» (%rcx),%r12 |
| 982 » leaq» (%rcx),%rbp |
| 728 movq %r9,%rcx | 983 movq %r9,%rcx |
| 729 sarq $3+2,%rcx | 984 sarq $3+2,%rcx |
| 730 movq 56+8(%rsp),%rdi | 985 movq 56+8(%rsp),%rdi |
| 731 » jmp» .Lsqr4x_sub | 986 » decq» %r12 |
| 987 » xorq» %r10,%r10 |
| 988 » movq» 8(%rbp),%r13 |
| 989 » movq» 16(%rbp),%r14 |
| 990 » movq» 24(%rbp),%r15 |
| 991 » jmp» .Lsqr4x_sub_entry |
| 732 .size mul4x_internal,.-mul4x_internal | 992 .size mul4x_internal,.-mul4x_internal |
| 733 .globl bn_power5 | 993 .globl bn_power5 |
| 734 .hidden bn_power5 | 994 .hidden bn_power5 |
| 735 .type bn_power5,@function | 995 .type bn_power5,@function |
| 736 .align 32 | 996 .align 32 |
| 737 bn_power5: | 997 bn_power5: |
| 738 movq %rsp,%rax | 998 movq %rsp,%rax |
| 739 pushq %rbx | 999 pushq %rbx |
| 740 pushq %rbp | 1000 pushq %rbp |
| 741 pushq %r12 | 1001 pushq %r12 |
| 742 pushq %r13 | 1002 pushq %r13 |
| 743 pushq %r14 | 1003 pushq %r14 |
| 744 pushq %r15 | 1004 pushq %r15 |
| 745 » movl» %r9d,%r10d | 1005 |
| 746 shll $3,%r9d | 1006 shll $3,%r9d |
| 747 » shll» $3+2,%r10d | 1007 » leal» (%r9,%r9,2),%r10d |
| 748 negq %r9 | 1008 negq %r9 |
| 749 movq (%r8),%r8 | 1009 movq (%r8),%r8 |
| 750 | 1010 |
| 751 | 1011 |
| 752 | 1012 |
| 753 | 1013 |
| 754 | 1014 |
| 755 | 1015 |
| 756 | 1016 |
| 757 » leaq» -64(%rsp,%r9,2),%r11 | 1017 |
| 758 » subq» %rsi,%r11 | 1018 » leaq» -320(%rsp,%r9,2),%r11 |
| 1019 » subq» %rdi,%r11 |
| 759 andq $4095,%r11 | 1020 andq $4095,%r11 |
| 760 cmpq %r11,%r10 | 1021 cmpq %r11,%r10 |
| 761 jb .Lpwr_sp_alt | 1022 jb .Lpwr_sp_alt |
| 762 subq %r11,%rsp | 1023 subq %r11,%rsp |
| 763 » leaq» -64(%rsp,%r9,2),%rsp | 1024 » leaq» -320(%rsp,%r9,2),%rsp |
| 764 jmp .Lpwr_sp_done | 1025 jmp .Lpwr_sp_done |
| 765 | 1026 |
| 766 .align 32 | 1027 .align 32 |
| 767 .Lpwr_sp_alt: | 1028 .Lpwr_sp_alt: |
| 768 » leaq» 4096-64(,%r9,2),%r10 | 1029 » leaq» 4096-320(,%r9,2),%r10 |
| 769 » leaq» -64(%rsp,%r9,2),%rsp | 1030 » leaq» -320(%rsp,%r9,2),%rsp |
| 770 subq %r10,%r11 | 1031 subq %r10,%r11 |
| 771 movq $0,%r10 | 1032 movq $0,%r10 |
| 772 cmovcq %r10,%r11 | 1033 cmovcq %r10,%r11 |
| 773 subq %r11,%rsp | 1034 subq %r11,%rsp |
| 774 .Lpwr_sp_done: | 1035 .Lpwr_sp_done: |
| 775 andq $-64,%rsp | 1036 andq $-64,%rsp |
| 776 movq %r9,%r10 | 1037 movq %r9,%r10 |
| 777 negq %r9 | 1038 negq %r9 |
| 778 | 1039 |
| 779 | 1040 |
| 780 | 1041 |
| 781 | 1042 |
| 782 | 1043 |
| 783 | 1044 |
| 784 | 1045 |
| 785 | 1046 |
| 786 | 1047 |
| 787 | 1048 |
| 788 movq %r8,32(%rsp) | 1049 movq %r8,32(%rsp) |
| 789 movq %rax,40(%rsp) | 1050 movq %rax,40(%rsp) |
| 790 .Lpower5_body: | 1051 .Lpower5_body: |
| 791 .byte 102,72,15,110,207 | 1052 .byte 102,72,15,110,207 |
| 792 .byte 102,72,15,110,209 | 1053 .byte 102,72,15,110,209 |
| 793 .byte 102,73,15,110,218 | 1054 .byte 102,73,15,110,218 |
| 794 .byte 102,72,15,110,226 | 1055 .byte 102,72,15,110,226 |
| 795 | 1056 |
| 796 call __bn_sqr8x_internal | 1057 call __bn_sqr8x_internal |
| 1058 call __bn_post4x_internal |
| 797 call __bn_sqr8x_internal | 1059 call __bn_sqr8x_internal |
| 1060 call __bn_post4x_internal |
| 798 call __bn_sqr8x_internal | 1061 call __bn_sqr8x_internal |
| 1062 call __bn_post4x_internal |
| 799 call __bn_sqr8x_internal | 1063 call __bn_sqr8x_internal |
| 1064 call __bn_post4x_internal |
| 800 call __bn_sqr8x_internal | 1065 call __bn_sqr8x_internal |
| 1066 call __bn_post4x_internal |
| 801 | 1067 |
| 802 .byte 102,72,15,126,209 | 1068 .byte 102,72,15,126,209 |
| 803 .byte 102,72,15,126,226 | 1069 .byte 102,72,15,126,226 |
| 804 movq %rsi,%rdi | 1070 movq %rsi,%rdi |
| 805 movq 40(%rsp),%rax | 1071 movq 40(%rsp),%rax |
| 806 leaq 32(%rsp),%r8 | 1072 leaq 32(%rsp),%r8 |
| 807 | 1073 |
| 808 call mul4x_internal | 1074 call mul4x_internal |
| 809 | 1075 |
| 810 movq 40(%rsp),%rsi | 1076 movq 40(%rsp),%rsi |
| (...skipping 524 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 1335 leaq (%rcx,%r11,2),%r8 | 1601 leaq (%rcx,%r11,2),%r8 |
| 1336 shrq $63,%r11 | 1602 shrq $63,%r11 |
| 1337 orq %r10,%r8 | 1603 orq %r10,%r8 |
| 1338 mulq %rax | 1604 mulq %rax |
| 1339 negq %r15 | 1605 negq %r15 |
| 1340 adcq %rax,%rbx | 1606 adcq %rax,%rbx |
| 1341 adcq %rdx,%r8 | 1607 adcq %rdx,%r8 |
| 1342 movq %rbx,-16(%rdi) | 1608 movq %rbx,-16(%rdi) |
| 1343 movq %r8,-8(%rdi) | 1609 movq %r8,-8(%rdi) |
| 1344 .byte 102,72,15,126,213 | 1610 .byte 102,72,15,126,213 |
| 1345 sqr8x_reduction: | 1611 __bn_sqr8x_reduction: |
| 1346 xorq %rax,%rax | 1612 xorq %rax,%rax |
| 1347 » leaq» (%rbp,%r9,2),%rcx | 1613 » leaq» (%r9,%rbp,1),%rcx |
| 1348 leaq 48+8(%rsp,%r9,2),%rdx | 1614 leaq 48+8(%rsp,%r9,2),%rdx |
| 1349 movq %rcx,0+8(%rsp) | 1615 movq %rcx,0+8(%rsp) |
| 1350 leaq 48+8(%rsp,%r9,1),%rdi | 1616 leaq 48+8(%rsp,%r9,1),%rdi |
| 1351 movq %rdx,8+8(%rsp) | 1617 movq %rdx,8+8(%rsp) |
| 1352 negq %r9 | 1618 negq %r9 |
| 1353 jmp .L8x_reduction_loop | 1619 jmp .L8x_reduction_loop |
| 1354 | 1620 |
| 1355 .align 32 | 1621 .align 32 |
| 1356 .L8x_reduction_loop: | 1622 .L8x_reduction_loop: |
| 1357 leaq (%rdi,%r9,1),%rdi | 1623 leaq (%rdi,%r9,1),%rdi |
| (...skipping 12 matching lines...) Expand all Loading... |
| 1370 .byte 0x67 | 1636 .byte 0x67 |
| 1371 movq %rbx,%r8 | 1637 movq %rbx,%r8 |
| 1372 imulq 32+8(%rsp),%rbx | 1638 imulq 32+8(%rsp),%rbx |
| 1373 movq 0(%rbp),%rax | 1639 movq 0(%rbp),%rax |
| 1374 movl $8,%ecx | 1640 movl $8,%ecx |
| 1375 jmp .L8x_reduce | 1641 jmp .L8x_reduce |
| 1376 | 1642 |
| 1377 .align 32 | 1643 .align 32 |
| 1378 .L8x_reduce: | 1644 .L8x_reduce: |
| 1379 mulq %rbx | 1645 mulq %rbx |
| 1380 » movq» 16(%rbp),%rax | 1646 » movq» 8(%rbp),%rax |
| 1381 negq %r8 | 1647 negq %r8 |
| 1382 movq %rdx,%r8 | 1648 movq %rdx,%r8 |
| 1383 adcq $0,%r8 | 1649 adcq $0,%r8 |
| 1384 | 1650 |
| 1385 mulq %rbx | 1651 mulq %rbx |
| 1386 addq %rax,%r9 | 1652 addq %rax,%r9 |
| 1387 » movq» 32(%rbp),%rax | 1653 » movq» 16(%rbp),%rax |
| 1388 adcq $0,%rdx | 1654 adcq $0,%rdx |
| 1389 addq %r9,%r8 | 1655 addq %r9,%r8 |
| 1390 movq %rbx,48-8+8(%rsp,%rcx,8) | 1656 movq %rbx,48-8+8(%rsp,%rcx,8) |
| 1391 movq %rdx,%r9 | 1657 movq %rdx,%r9 |
| 1392 adcq $0,%r9 | 1658 adcq $0,%r9 |
| 1393 | 1659 |
| 1394 mulq %rbx | 1660 mulq %rbx |
| 1395 addq %rax,%r10 | 1661 addq %rax,%r10 |
| 1396 » movq» 48(%rbp),%rax | 1662 » movq» 24(%rbp),%rax |
| 1397 adcq $0,%rdx | 1663 adcq $0,%rdx |
| 1398 addq %r10,%r9 | 1664 addq %r10,%r9 |
| 1399 movq 32+8(%rsp),%rsi | 1665 movq 32+8(%rsp),%rsi |
| 1400 movq %rdx,%r10 | 1666 movq %rdx,%r10 |
| 1401 adcq $0,%r10 | 1667 adcq $0,%r10 |
| 1402 | 1668 |
| 1403 mulq %rbx | 1669 mulq %rbx |
| 1404 addq %rax,%r11 | 1670 addq %rax,%r11 |
| 1405 » movq» 64(%rbp),%rax | 1671 » movq» 32(%rbp),%rax |
| 1406 adcq $0,%rdx | 1672 adcq $0,%rdx |
| 1407 imulq %r8,%rsi | 1673 imulq %r8,%rsi |
| 1408 addq %r11,%r10 | 1674 addq %r11,%r10 |
| 1409 movq %rdx,%r11 | 1675 movq %rdx,%r11 |
| 1410 adcq $0,%r11 | 1676 adcq $0,%r11 |
| 1411 | 1677 |
| 1412 mulq %rbx | 1678 mulq %rbx |
| 1413 addq %rax,%r12 | 1679 addq %rax,%r12 |
| 1414 » movq» 80(%rbp),%rax | 1680 » movq» 40(%rbp),%rax |
| 1415 adcq $0,%rdx | 1681 adcq $0,%rdx |
| 1416 addq %r12,%r11 | 1682 addq %r12,%r11 |
| 1417 movq %rdx,%r12 | 1683 movq %rdx,%r12 |
| 1418 adcq $0,%r12 | 1684 adcq $0,%r12 |
| 1419 | 1685 |
| 1420 mulq %rbx | 1686 mulq %rbx |
| 1421 addq %rax,%r13 | 1687 addq %rax,%r13 |
| 1422 » movq» 96(%rbp),%rax | 1688 » movq» 48(%rbp),%rax |
| 1423 adcq $0,%rdx | 1689 adcq $0,%rdx |
| 1424 addq %r13,%r12 | 1690 addq %r13,%r12 |
| 1425 movq %rdx,%r13 | 1691 movq %rdx,%r13 |
| 1426 adcq $0,%r13 | 1692 adcq $0,%r13 |
| 1427 | 1693 |
| 1428 mulq %rbx | 1694 mulq %rbx |
| 1429 addq %rax,%r14 | 1695 addq %rax,%r14 |
| 1430 » movq» 112(%rbp),%rax | 1696 » movq» 56(%rbp),%rax |
| 1431 adcq $0,%rdx | 1697 adcq $0,%rdx |
| 1432 addq %r14,%r13 | 1698 addq %r14,%r13 |
| 1433 movq %rdx,%r14 | 1699 movq %rdx,%r14 |
| 1434 adcq $0,%r14 | 1700 adcq $0,%r14 |
| 1435 | 1701 |
| 1436 mulq %rbx | 1702 mulq %rbx |
| 1437 movq %rsi,%rbx | 1703 movq %rsi,%rbx |
| 1438 addq %rax,%r15 | 1704 addq %rax,%r15 |
| 1439 movq 0(%rbp),%rax | 1705 movq 0(%rbp),%rax |
| 1440 adcq $0,%rdx | 1706 adcq $0,%rdx |
| 1441 addq %r15,%r14 | 1707 addq %r15,%r14 |
| 1442 movq %rdx,%r15 | 1708 movq %rdx,%r15 |
| 1443 adcq $0,%r15 | 1709 adcq $0,%r15 |
| 1444 | 1710 |
| 1445 decl %ecx | 1711 decl %ecx |
| 1446 jnz .L8x_reduce | 1712 jnz .L8x_reduce |
| 1447 | 1713 |
| 1448 » leaq» 128(%rbp),%rbp | 1714 » leaq» 64(%rbp),%rbp |
| 1449 xorq %rax,%rax | 1715 xorq %rax,%rax |
| 1450 movq 8+8(%rsp),%rdx | 1716 movq 8+8(%rsp),%rdx |
| 1451 cmpq 0+8(%rsp),%rbp | 1717 cmpq 0+8(%rsp),%rbp |
| 1452 jae .L8x_no_tail | 1718 jae .L8x_no_tail |
| 1453 | 1719 |
| 1454 .byte 0x66 | 1720 .byte 0x66 |
| 1455 addq 0(%rdi),%r8 | 1721 addq 0(%rdi),%r8 |
| 1456 adcq 8(%rdi),%r9 | 1722 adcq 8(%rdi),%r9 |
| 1457 adcq 16(%rdi),%r10 | 1723 adcq 16(%rdi),%r10 |
| 1458 adcq 24(%rdi),%r11 | 1724 adcq 24(%rdi),%r11 |
| 1459 adcq 32(%rdi),%r12 | 1725 adcq 32(%rdi),%r12 |
| 1460 adcq 40(%rdi),%r13 | 1726 adcq 40(%rdi),%r13 |
| 1461 adcq 48(%rdi),%r14 | 1727 adcq 48(%rdi),%r14 |
| 1462 adcq 56(%rdi),%r15 | 1728 adcq 56(%rdi),%r15 |
| 1463 sbbq %rsi,%rsi | 1729 sbbq %rsi,%rsi |
| 1464 | 1730 |
| 1465 movq 48+56+8(%rsp),%rbx | 1731 movq 48+56+8(%rsp),%rbx |
| 1466 movl $8,%ecx | 1732 movl $8,%ecx |
| 1467 movq 0(%rbp),%rax | 1733 movq 0(%rbp),%rax |
| 1468 jmp .L8x_tail | 1734 jmp .L8x_tail |
| 1469 | 1735 |
| 1470 .align 32 | 1736 .align 32 |
| 1471 .L8x_tail: | 1737 .L8x_tail: |
| 1472 mulq %rbx | 1738 mulq %rbx |
| 1473 addq %rax,%r8 | 1739 addq %rax,%r8 |
| 1474 » movq» 16(%rbp),%rax | 1740 » movq» 8(%rbp),%rax |
| 1475 movq %r8,(%rdi) | 1741 movq %r8,(%rdi) |
| 1476 movq %rdx,%r8 | 1742 movq %rdx,%r8 |
| 1477 adcq $0,%r8 | 1743 adcq $0,%r8 |
| 1478 | 1744 |
| 1479 mulq %rbx | 1745 mulq %rbx |
| 1480 addq %rax,%r9 | 1746 addq %rax,%r9 |
| 1481 » movq» 32(%rbp),%rax | 1747 » movq» 16(%rbp),%rax |
| 1482 adcq $0,%rdx | 1748 adcq $0,%rdx |
| 1483 addq %r9,%r8 | 1749 addq %r9,%r8 |
| 1484 leaq 8(%rdi),%rdi | 1750 leaq 8(%rdi),%rdi |
| 1485 movq %rdx,%r9 | 1751 movq %rdx,%r9 |
| 1486 adcq $0,%r9 | 1752 adcq $0,%r9 |
| 1487 | 1753 |
| 1488 mulq %rbx | 1754 mulq %rbx |
| 1489 addq %rax,%r10 | 1755 addq %rax,%r10 |
| 1490 » movq» 48(%rbp),%rax | 1756 » movq» 24(%rbp),%rax |
| 1491 adcq $0,%rdx | 1757 adcq $0,%rdx |
| 1492 addq %r10,%r9 | 1758 addq %r10,%r9 |
| 1493 movq %rdx,%r10 | 1759 movq %rdx,%r10 |
| 1494 adcq $0,%r10 | 1760 adcq $0,%r10 |
| 1495 | 1761 |
| 1496 mulq %rbx | 1762 mulq %rbx |
| 1497 addq %rax,%r11 | 1763 addq %rax,%r11 |
| 1498 » movq» 64(%rbp),%rax | 1764 » movq» 32(%rbp),%rax |
| 1499 adcq $0,%rdx | 1765 adcq $0,%rdx |
| 1500 addq %r11,%r10 | 1766 addq %r11,%r10 |
| 1501 movq %rdx,%r11 | 1767 movq %rdx,%r11 |
| 1502 adcq $0,%r11 | 1768 adcq $0,%r11 |
| 1503 | 1769 |
| 1504 mulq %rbx | 1770 mulq %rbx |
| 1505 addq %rax,%r12 | 1771 addq %rax,%r12 |
| 1506 » movq» 80(%rbp),%rax | 1772 » movq» 40(%rbp),%rax |
| 1507 adcq $0,%rdx | 1773 adcq $0,%rdx |
| 1508 addq %r12,%r11 | 1774 addq %r12,%r11 |
| 1509 movq %rdx,%r12 | 1775 movq %rdx,%r12 |
| 1510 adcq $0,%r12 | 1776 adcq $0,%r12 |
| 1511 | 1777 |
| 1512 mulq %rbx | 1778 mulq %rbx |
| 1513 addq %rax,%r13 | 1779 addq %rax,%r13 |
| 1514 » movq» 96(%rbp),%rax | 1780 » movq» 48(%rbp),%rax |
| 1515 adcq $0,%rdx | 1781 adcq $0,%rdx |
| 1516 addq %r13,%r12 | 1782 addq %r13,%r12 |
| 1517 movq %rdx,%r13 | 1783 movq %rdx,%r13 |
| 1518 adcq $0,%r13 | 1784 adcq $0,%r13 |
| 1519 | 1785 |
| 1520 mulq %rbx | 1786 mulq %rbx |
| 1521 addq %rax,%r14 | 1787 addq %rax,%r14 |
| 1522 » movq» 112(%rbp),%rax | 1788 » movq» 56(%rbp),%rax |
| 1523 adcq $0,%rdx | 1789 adcq $0,%rdx |
| 1524 addq %r14,%r13 | 1790 addq %r14,%r13 |
| 1525 movq %rdx,%r14 | 1791 movq %rdx,%r14 |
| 1526 adcq $0,%r14 | 1792 adcq $0,%r14 |
| 1527 | 1793 |
| 1528 mulq %rbx | 1794 mulq %rbx |
| 1529 movq 48-16+8(%rsp,%rcx,8),%rbx | 1795 movq 48-16+8(%rsp,%rcx,8),%rbx |
| 1530 addq %rax,%r15 | 1796 addq %rax,%r15 |
| 1531 adcq $0,%rdx | 1797 adcq $0,%rdx |
| 1532 addq %r15,%r14 | 1798 addq %r15,%r14 |
| 1533 movq 0(%rbp),%rax | 1799 movq 0(%rbp),%rax |
| 1534 movq %rdx,%r15 | 1800 movq %rdx,%r15 |
| 1535 adcq $0,%r15 | 1801 adcq $0,%r15 |
| 1536 | 1802 |
| 1537 decl %ecx | 1803 decl %ecx |
| 1538 jnz .L8x_tail | 1804 jnz .L8x_tail |
| 1539 | 1805 |
| 1540 » leaq» 128(%rbp),%rbp | 1806 » leaq» 64(%rbp),%rbp |
| 1541 movq 8+8(%rsp),%rdx | 1807 movq 8+8(%rsp),%rdx |
| 1542 cmpq 0+8(%rsp),%rbp | 1808 cmpq 0+8(%rsp),%rbp |
| 1543 jae .L8x_tail_done | 1809 jae .L8x_tail_done |
| 1544 | 1810 |
| 1545 movq 48+56+8(%rsp),%rbx | 1811 movq 48+56+8(%rsp),%rbx |
| 1546 negq %rsi | 1812 negq %rsi |
| 1547 movq 0(%rbp),%rax | 1813 movq 0(%rbp),%rax |
| 1548 adcq 0(%rdi),%r8 | 1814 adcq 0(%rdi),%r8 |
| 1549 adcq 8(%rdi),%r9 | 1815 adcq 8(%rdi),%r9 |
| 1550 adcq 16(%rdi),%r10 | 1816 adcq 16(%rdi),%r10 |
| 1551 adcq 24(%rdi),%r11 | 1817 adcq 24(%rdi),%r11 |
| 1552 adcq 32(%rdi),%r12 | 1818 adcq 32(%rdi),%r12 |
| 1553 adcq 40(%rdi),%r13 | 1819 adcq 40(%rdi),%r13 |
| 1554 adcq 48(%rdi),%r14 | 1820 adcq 48(%rdi),%r14 |
| 1555 adcq 56(%rdi),%r15 | 1821 adcq 56(%rdi),%r15 |
| 1556 sbbq %rsi,%rsi | 1822 sbbq %rsi,%rsi |
| 1557 | 1823 |
| 1558 movl $8,%ecx | 1824 movl $8,%ecx |
| 1559 jmp .L8x_tail | 1825 jmp .L8x_tail |
| 1560 | 1826 |
| 1561 .align 32 | 1827 .align 32 |
| 1562 .L8x_tail_done: | 1828 .L8x_tail_done: |
| 1563 addq (%rdx),%r8 | 1829 addq (%rdx),%r8 |
| 1830 adcq $0,%r9 |
| 1831 adcq $0,%r10 |
| 1832 adcq $0,%r11 |
| 1833 adcq $0,%r12 |
| 1834 adcq $0,%r13 |
| 1835 adcq $0,%r14 |
| 1836 adcq $0,%r15 |
| 1837 |
| 1838 |
| 1564 xorq %rax,%rax | 1839 xorq %rax,%rax |
| 1565 | 1840 |
| 1566 negq %rsi | 1841 negq %rsi |
| 1567 .L8x_no_tail: | 1842 .L8x_no_tail: |
| 1568 adcq 0(%rdi),%r8 | 1843 adcq 0(%rdi),%r8 |
| 1569 adcq 8(%rdi),%r9 | 1844 adcq 8(%rdi),%r9 |
| 1570 adcq 16(%rdi),%r10 | 1845 adcq 16(%rdi),%r10 |
| 1571 adcq 24(%rdi),%r11 | 1846 adcq 24(%rdi),%r11 |
| 1572 adcq 32(%rdi),%r12 | 1847 adcq 32(%rdi),%r12 |
| 1573 adcq 40(%rdi),%r13 | 1848 adcq 40(%rdi),%r13 |
| 1574 adcq 48(%rdi),%r14 | 1849 adcq 48(%rdi),%r14 |
| 1575 adcq 56(%rdi),%r15 | 1850 adcq 56(%rdi),%r15 |
| 1576 adcq $0,%rax | 1851 adcq $0,%rax |
| 1577 » movq» -16(%rbp),%rcx | 1852 » movq» -8(%rbp),%rcx |
| 1578 xorq %rsi,%rsi | 1853 xorq %rsi,%rsi |
| 1579 | 1854 |
| 1580 .byte 102,72,15,126,213 | 1855 .byte 102,72,15,126,213 |
| 1581 | 1856 |
| 1582 movq %r8,0(%rdi) | 1857 movq %r8,0(%rdi) |
| 1583 movq %r9,8(%rdi) | 1858 movq %r9,8(%rdi) |
| 1584 .byte 102,73,15,126,217 | 1859 .byte 102,73,15,126,217 |
| 1585 movq %r10,16(%rdi) | 1860 movq %r10,16(%rdi) |
| 1586 movq %r11,24(%rdi) | 1861 movq %r11,24(%rdi) |
| 1587 movq %r12,32(%rdi) | 1862 movq %r12,32(%rdi) |
| 1588 movq %r13,40(%rdi) | 1863 movq %r13,40(%rdi) |
| 1589 movq %r14,48(%rdi) | 1864 movq %r14,48(%rdi) |
| 1590 movq %r15,56(%rdi) | 1865 movq %r15,56(%rdi) |
| 1591 leaq 64(%rdi),%rdi | 1866 leaq 64(%rdi),%rdi |
| 1592 | 1867 |
| 1593 cmpq %rdx,%rdi | 1868 cmpq %rdx,%rdi |
| 1594 jb .L8x_reduction_loop | 1869 jb .L8x_reduction_loop |
| 1870 .byte 0xf3,0xc3 |
| 1871 .size bn_sqr8x_internal,.-bn_sqr8x_internal |
| 1872 .type __bn_post4x_internal,@function |
| 1873 .align 32 |
| 1874 __bn_post4x_internal: |
| 1875 movq 0(%rbp),%r12 |
| 1876 leaq (%rdi,%r9,1),%rbx |
| 1877 movq %r9,%rcx |
| 1878 .byte 102,72,15,126,207 |
| 1879 negq %rax |
| 1880 .byte 102,72,15,126,206 |
| 1881 sarq $3+2,%rcx |
| 1882 decq %r12 |
| 1883 xorq %r10,%r10 |
| 1884 movq 8(%rbp),%r13 |
| 1885 movq 16(%rbp),%r14 |
| 1886 movq 24(%rbp),%r15 |
| 1887 jmp .Lsqr4x_sub_entry |
| 1595 | 1888 |
| 1596 » subq» %r15,%rcx | 1889 .align» 16 |
| 1597 » leaq» (%rdi,%r9,1),%rbx | 1890 .Lsqr4x_sub: |
| 1598 » adcq» %rsi,%rsi | 1891 » movq» 0(%rbp),%r12 |
| 1599 » movq» %r9,%rcx | 1892 » movq» 8(%rbp),%r13 |
| 1600 » orq» %rsi,%rax | 1893 » movq» 16(%rbp),%r14 |
| 1601 .byte» 102,72,15,126,207 | 1894 » movq» 24(%rbp),%r15 |
| 1602 » xorq» $1,%rax | 1895 .Lsqr4x_sub_entry: |
| 1603 .byte» 102,72,15,126,206 | 1896 » leaq» 32(%rbp),%rbp |
| 1604 » leaq» (%rbp,%rax,8),%rbp | 1897 » notq» %r12 |
| 1605 » sarq» $3+2,%rcx | 1898 » notq» %r13 |
| 1606 » jmp» .Lsqr4x_sub | 1899 » notq» %r14 |
| 1900 » notq» %r15 |
| 1901 » andq» %rax,%r12 |
| 1902 » andq» %rax,%r13 |
| 1903 » andq» %rax,%r14 |
| 1904 » andq» %rax,%r15 |
| 1607 | 1905 |
| 1608 .align» 32 | 1906 » negq» %r10 |
| 1609 .Lsqr4x_sub: | 1907 » adcq» 0(%rbx),%r12 |
| 1610 .byte» 0x66 | 1908 » adcq» 8(%rbx),%r13 |
| 1611 » movq» 0(%rbx),%r12 | 1909 » adcq» 16(%rbx),%r14 |
| 1612 » movq» 8(%rbx),%r13 | 1910 » adcq» 24(%rbx),%r15 |
| 1613 » sbbq» 0(%rbp),%r12 | 1911 » movq» %r12,0(%rdi) |
| 1614 » movq» 16(%rbx),%r14 | |
| 1615 » sbbq» 16(%rbp),%r13 | |
| 1616 » movq» 24(%rbx),%r15 | |
| 1617 leaq 32(%rbx),%rbx | 1912 leaq 32(%rbx),%rbx |
| 1618 sbbq 32(%rbp),%r14 | |
| 1619 movq %r12,0(%rdi) | |
| 1620 sbbq 48(%rbp),%r15 | |
| 1621 leaq 64(%rbp),%rbp | |
| 1622 movq %r13,8(%rdi) | 1913 movq %r13,8(%rdi) |
| 1914 sbbq %r10,%r10 |
| 1623 movq %r14,16(%rdi) | 1915 movq %r14,16(%rdi) |
| 1624 movq %r15,24(%rdi) | 1916 movq %r15,24(%rdi) |
| 1625 leaq 32(%rdi),%rdi | 1917 leaq 32(%rdi),%rdi |
| 1626 | 1918 |
| 1627 incq %rcx | 1919 incq %rcx |
| 1628 jnz .Lsqr4x_sub | 1920 jnz .Lsqr4x_sub |
| 1921 |
| 1629 movq %r9,%r10 | 1922 movq %r9,%r10 |
| 1630 negq %r9 | 1923 negq %r9 |
| 1631 .byte 0xf3,0xc3 | 1924 .byte 0xf3,0xc3 |
| 1632 .size» bn_sqr8x_internal,.-bn_sqr8x_internal | 1925 .size» __bn_post4x_internal,.-__bn_post4x_internal |
| 1633 .globl bn_from_montgomery | 1926 .globl bn_from_montgomery |
| 1634 .hidden bn_from_montgomery | 1927 .hidden bn_from_montgomery |
| 1635 .type bn_from_montgomery,@function | 1928 .type bn_from_montgomery,@function |
| 1636 .align 32 | 1929 .align 32 |
| 1637 bn_from_montgomery: | 1930 bn_from_montgomery: |
| 1638 testl $7,%r9d | 1931 testl $7,%r9d |
| 1639 jz bn_from_mont8x | 1932 jz bn_from_mont8x |
| 1640 xorl %eax,%eax | 1933 xorl %eax,%eax |
| 1641 .byte 0xf3,0xc3 | 1934 .byte 0xf3,0xc3 |
| 1642 .size bn_from_montgomery,.-bn_from_montgomery | 1935 .size bn_from_montgomery,.-bn_from_montgomery |
| 1643 | 1936 |
| 1644 .type bn_from_mont8x,@function | 1937 .type bn_from_mont8x,@function |
| 1645 .align 32 | 1938 .align 32 |
| 1646 bn_from_mont8x: | 1939 bn_from_mont8x: |
| 1647 .byte 0x67 | 1940 .byte 0x67 |
| 1648 movq %rsp,%rax | 1941 movq %rsp,%rax |
| 1649 pushq %rbx | 1942 pushq %rbx |
| 1650 pushq %rbp | 1943 pushq %rbp |
| 1651 pushq %r12 | 1944 pushq %r12 |
| 1652 pushq %r13 | 1945 pushq %r13 |
| 1653 pushq %r14 | 1946 pushq %r14 |
| 1654 pushq %r15 | 1947 pushq %r15 |
| 1655 .byte» 0x67 | 1948 |
| 1656 » movl» %r9d,%r10d | |
| 1657 shll $3,%r9d | 1949 shll $3,%r9d |
| 1658 » shll» $3+2,%r10d | 1950 » leaq» (%r9,%r9,2),%r10 |
| 1659 negq %r9 | 1951 negq %r9 |
| 1660 movq (%r8),%r8 | 1952 movq (%r8),%r8 |
| 1661 | 1953 |
| 1662 | 1954 |
| 1663 | 1955 |
| 1664 | 1956 |
| 1665 | 1957 |
| 1666 | 1958 |
| 1667 | 1959 |
| 1668 » leaq» -64(%rsp,%r9,2),%r11 | 1960 |
| 1669 » subq» %rsi,%r11 | 1961 » leaq» -320(%rsp,%r9,2),%r11 |
| 1962 » subq» %rdi,%r11 |
| 1670 andq $4095,%r11 | 1963 andq $4095,%r11 |
| 1671 cmpq %r11,%r10 | 1964 cmpq %r11,%r10 |
| 1672 jb .Lfrom_sp_alt | 1965 jb .Lfrom_sp_alt |
| 1673 subq %r11,%rsp | 1966 subq %r11,%rsp |
| 1674 » leaq» -64(%rsp,%r9,2),%rsp | 1967 » leaq» -320(%rsp,%r9,2),%rsp |
| 1675 jmp .Lfrom_sp_done | 1968 jmp .Lfrom_sp_done |
| 1676 | 1969 |
| 1677 .align 32 | 1970 .align 32 |
| 1678 .Lfrom_sp_alt: | 1971 .Lfrom_sp_alt: |
| 1679 » leaq» 4096-64(,%r9,2),%r10 | 1972 » leaq» 4096-320(,%r9,2),%r10 |
| 1680 » leaq» -64(%rsp,%r9,2),%rsp | 1973 » leaq» -320(%rsp,%r9,2),%rsp |
| 1681 subq %r10,%r11 | 1974 subq %r10,%r11 |
| 1682 movq $0,%r10 | 1975 movq $0,%r10 |
| 1683 cmovcq %r10,%r11 | 1976 cmovcq %r10,%r11 |
| 1684 subq %r11,%rsp | 1977 subq %r11,%rsp |
| 1685 .Lfrom_sp_done: | 1978 .Lfrom_sp_done: |
| 1686 andq $-64,%rsp | 1979 andq $-64,%rsp |
| 1687 movq %r9,%r10 | 1980 movq %r9,%r10 |
| 1688 negq %r9 | 1981 negq %r9 |
| 1689 | 1982 |
| 1690 | 1983 |
| (...skipping 30 matching lines...) Expand all Loading... |
| 1721 movdqa %xmm4,48(%rax) | 2014 movdqa %xmm4,48(%rax) |
| 1722 leaq 64(%rax),%rax | 2015 leaq 64(%rax),%rax |
| 1723 subq $64,%r11 | 2016 subq $64,%r11 |
| 1724 jnz .Lmul_by_1 | 2017 jnz .Lmul_by_1 |
| 1725 | 2018 |
| 1726 .byte 102,72,15,110,207 | 2019 .byte 102,72,15,110,207 |
| 1727 .byte 102,72,15,110,209 | 2020 .byte 102,72,15,110,209 |
| 1728 .byte 0x67 | 2021 .byte 0x67 |
| 1729 movq %rcx,%rbp | 2022 movq %rcx,%rbp |
| 1730 .byte 102,73,15,110,218 | 2023 .byte 102,73,15,110,218 |
| 1731 » call» sqr8x_reduction | 2024 » call» __bn_sqr8x_reduction |
| 2025 » call» __bn_post4x_internal |
| 1732 | 2026 |
| 1733 pxor %xmm0,%xmm0 | 2027 pxor %xmm0,%xmm0 |
| 1734 leaq 48(%rsp),%rax | 2028 leaq 48(%rsp),%rax |
| 1735 movq 40(%rsp),%rsi | 2029 movq 40(%rsp),%rsi |
| 1736 jmp .Lfrom_mont_zero | 2030 jmp .Lfrom_mont_zero |
| 1737 | 2031 |
| 1738 .align 32 | 2032 .align 32 |
| 1739 .Lfrom_mont_zero: | 2033 .Lfrom_mont_zero: |
| 1740 movdqa %xmm0,0(%rax) | 2034 movdqa %xmm0,0(%rax) |
| 1741 movdqa %xmm0,16(%rax) | 2035 movdqa %xmm0,16(%rax) |
| (...skipping 29 matching lines...) Expand all Loading... |
| 1771 leaq 256(%rdx),%rdx | 2065 leaq 256(%rdx),%rdx |
| 1772 subl $1,%esi | 2066 subl $1,%esi |
| 1773 jnz .Lscatter | 2067 jnz .Lscatter |
| 1774 .Lscatter_epilogue: | 2068 .Lscatter_epilogue: |
| 1775 .byte 0xf3,0xc3 | 2069 .byte 0xf3,0xc3 |
| 1776 .size bn_scatter5,.-bn_scatter5 | 2070 .size bn_scatter5,.-bn_scatter5 |
| 1777 | 2071 |
| 1778 .globl bn_gather5 | 2072 .globl bn_gather5 |
| 1779 .hidden bn_gather5 | 2073 .hidden bn_gather5 |
| 1780 .type bn_gather5,@function | 2074 .type bn_gather5,@function |
| 1781 .align» 16 | 2075 .align» 32 |
| 1782 bn_gather5: | 2076 bn_gather5: |
| 1783 » movl» %ecx,%r11d | 2077 .LSEH_begin_bn_gather5: |
| 1784 » shrl» $3,%ecx | 2078 |
| 1785 » andq» $7,%r11 | 2079 .byte» 0x4c,0x8d,0x14,0x24 |
| 1786 » notl» %ecx | 2080 .byte» 0x48,0x81,0xec,0x08,0x01,0x00,0x00 |
| 1787 » leaq» .Lmagic_masks(%rip),%rax | 2081 » leaq» .Linc(%rip),%rax |
| 1788 » andl» $3,%ecx | 2082 » andq» $-16,%rsp |
| 1789 » leaq» 128(%rdx,%r11,8),%rdx | 2083 |
| 1790 » movq» 0(%rax,%rcx,8),%xmm4 | 2084 » movd» %ecx,%xmm5 |
| 1791 » movq» 8(%rax,%rcx,8),%xmm5 | 2085 » movdqa» 0(%rax),%xmm0 |
| 1792 » movq» 16(%rax,%rcx,8),%xmm6 | 2086 » movdqa» 16(%rax),%xmm1 |
| 1793 » movq» 24(%rax,%rcx,8),%xmm7 | 2087 » leaq» 128(%rdx),%r11 |
| 2088 » leaq» 128(%rsp),%rax |
| 2089 |
| 2090 » pshufd» $0,%xmm5,%xmm5 |
| 2091 » movdqa» %xmm1,%xmm4 |
| 2092 » movdqa» %xmm1,%xmm2 |
| 2093 » paddd» %xmm0,%xmm1 |
| 2094 » pcmpeqd»%xmm5,%xmm0 |
| 2095 » movdqa» %xmm4,%xmm3 |
| 2096 |
| 2097 » paddd» %xmm1,%xmm2 |
| 2098 » pcmpeqd»%xmm5,%xmm1 |
| 2099 » movdqa» %xmm0,-128(%rax) |
| 2100 » movdqa» %xmm4,%xmm0 |
| 2101 |
| 2102 » paddd» %xmm2,%xmm3 |
| 2103 » pcmpeqd»%xmm5,%xmm2 |
| 2104 » movdqa» %xmm1,-112(%rax) |
| 2105 » movdqa» %xmm4,%xmm1 |
| 2106 |
| 2107 » paddd» %xmm3,%xmm0 |
| 2108 » pcmpeqd»%xmm5,%xmm3 |
| 2109 » movdqa» %xmm2,-96(%rax) |
| 2110 » movdqa» %xmm4,%xmm2 |
| 2111 » paddd» %xmm0,%xmm1 |
| 2112 » pcmpeqd»%xmm5,%xmm0 |
| 2113 » movdqa» %xmm3,-80(%rax) |
| 2114 » movdqa» %xmm4,%xmm3 |
| 2115 |
| 2116 » paddd» %xmm1,%xmm2 |
| 2117 » pcmpeqd»%xmm5,%xmm1 |
| 2118 » movdqa» %xmm0,-64(%rax) |
| 2119 » movdqa» %xmm4,%xmm0 |
| 2120 |
| 2121 » paddd» %xmm2,%xmm3 |
| 2122 » pcmpeqd»%xmm5,%xmm2 |
| 2123 » movdqa» %xmm1,-48(%rax) |
| 2124 » movdqa» %xmm4,%xmm1 |
| 2125 |
| 2126 » paddd» %xmm3,%xmm0 |
| 2127 » pcmpeqd»%xmm5,%xmm3 |
| 2128 » movdqa» %xmm2,-32(%rax) |
| 2129 » movdqa» %xmm4,%xmm2 |
| 2130 » paddd» %xmm0,%xmm1 |
| 2131 » pcmpeqd»%xmm5,%xmm0 |
| 2132 » movdqa» %xmm3,-16(%rax) |
| 2133 » movdqa» %xmm4,%xmm3 |
| 2134 |
| 2135 » paddd» %xmm1,%xmm2 |
| 2136 » pcmpeqd»%xmm5,%xmm1 |
| 2137 » movdqa» %xmm0,0(%rax) |
| 2138 » movdqa» %xmm4,%xmm0 |
| 2139 |
| 2140 » paddd» %xmm2,%xmm3 |
| 2141 » pcmpeqd»%xmm5,%xmm2 |
| 2142 » movdqa» %xmm1,16(%rax) |
| 2143 » movdqa» %xmm4,%xmm1 |
| 2144 |
| 2145 » paddd» %xmm3,%xmm0 |
| 2146 » pcmpeqd»%xmm5,%xmm3 |
| 2147 » movdqa» %xmm2,32(%rax) |
| 2148 » movdqa» %xmm4,%xmm2 |
| 2149 » paddd» %xmm0,%xmm1 |
| 2150 » pcmpeqd»%xmm5,%xmm0 |
| 2151 » movdqa» %xmm3,48(%rax) |
| 2152 » movdqa» %xmm4,%xmm3 |
| 2153 |
| 2154 » paddd» %xmm1,%xmm2 |
| 2155 » pcmpeqd»%xmm5,%xmm1 |
| 2156 » movdqa» %xmm0,64(%rax) |
| 2157 » movdqa» %xmm4,%xmm0 |
| 2158 |
| 2159 » paddd» %xmm2,%xmm3 |
| 2160 » pcmpeqd»%xmm5,%xmm2 |
| 2161 » movdqa» %xmm1,80(%rax) |
| 2162 » movdqa» %xmm4,%xmm1 |
| 2163 |
| 2164 » paddd» %xmm3,%xmm0 |
| 2165 » pcmpeqd»%xmm5,%xmm3 |
| 2166 » movdqa» %xmm2,96(%rax) |
| 2167 » movdqa» %xmm4,%xmm2 |
| 2168 » movdqa» %xmm3,112(%rax) |
| 1794 jmp .Lgather | 2169 jmp .Lgather |
| 1795 .align» 16 | 2170 |
| 2171 .align» 32 |
| 1796 .Lgather: | 2172 .Lgather: |
| 1797 » movq» -128(%rdx),%xmm0 | 2173 » pxor» %xmm4,%xmm4 |
| 1798 » movq» -64(%rdx),%xmm1 | 2174 » pxor» %xmm5,%xmm5 |
| 1799 » pand» %xmm4,%xmm0 | 2175 » movdqa» -128(%r11),%xmm0 |
| 1800 » movq» 0(%rdx),%xmm2 | 2176 » movdqa» -112(%r11),%xmm1 |
| 1801 » pand» %xmm5,%xmm1 | 2177 » movdqa» -96(%r11),%xmm2 |
| 1802 » movq» 64(%rdx),%xmm3 | 2178 » pand» -128(%rax),%xmm0 |
| 1803 » pand» %xmm6,%xmm2 | 2179 » movdqa» -80(%r11),%xmm3 |
| 1804 » por» %xmm1,%xmm0 | 2180 » pand» -112(%rax),%xmm1 |
| 1805 » pand» %xmm7,%xmm3 | 2181 » por» %xmm0,%xmm4 |
| 1806 .byte» 0x67,0x67 | 2182 » pand» -96(%rax),%xmm2 |
| 1807 » por» %xmm2,%xmm0 | 2183 » por» %xmm1,%xmm5 |
| 1808 » leaq» 256(%rdx),%rdx | 2184 » pand» -80(%rax),%xmm3 |
| 1809 » por» %xmm3,%xmm0 | 2185 » por» %xmm2,%xmm4 |
| 1810 | 2186 » por» %xmm3,%xmm5 |
| 2187 » movdqa» -64(%r11),%xmm0 |
| 2188 » movdqa» -48(%r11),%xmm1 |
| 2189 » movdqa» -32(%r11),%xmm2 |
| 2190 » pand» -64(%rax),%xmm0 |
| 2191 » movdqa» -16(%r11),%xmm3 |
| 2192 » pand» -48(%rax),%xmm1 |
| 2193 » por» %xmm0,%xmm4 |
| 2194 » pand» -32(%rax),%xmm2 |
| 2195 » por» %xmm1,%xmm5 |
| 2196 » pand» -16(%rax),%xmm3 |
| 2197 » por» %xmm2,%xmm4 |
| 2198 » por» %xmm3,%xmm5 |
| 2199 » movdqa» 0(%r11),%xmm0 |
| 2200 » movdqa» 16(%r11),%xmm1 |
| 2201 » movdqa» 32(%r11),%xmm2 |
| 2202 » pand» 0(%rax),%xmm0 |
| 2203 » movdqa» 48(%r11),%xmm3 |
| 2204 » pand» 16(%rax),%xmm1 |
| 2205 » por» %xmm0,%xmm4 |
| 2206 » pand» 32(%rax),%xmm2 |
| 2207 » por» %xmm1,%xmm5 |
| 2208 » pand» 48(%rax),%xmm3 |
| 2209 » por» %xmm2,%xmm4 |
| 2210 » por» %xmm3,%xmm5 |
| 2211 » movdqa» 64(%r11),%xmm0 |
| 2212 » movdqa» 80(%r11),%xmm1 |
| 2213 » movdqa» 96(%r11),%xmm2 |
| 2214 » pand» 64(%rax),%xmm0 |
| 2215 » movdqa» 112(%r11),%xmm3 |
| 2216 » pand» 80(%rax),%xmm1 |
| 2217 » por» %xmm0,%xmm4 |
| 2218 » pand» 96(%rax),%xmm2 |
| 2219 » por» %xmm1,%xmm5 |
| 2220 » pand» 112(%rax),%xmm3 |
| 2221 » por» %xmm2,%xmm4 |
| 2222 » por» %xmm3,%xmm5 |
| 2223 » por» %xmm5,%xmm4 |
| 2224 » leaq» 256(%r11),%r11 |
| 2225 » pshufd» $0x4e,%xmm4,%xmm0 |
| 2226 » por» %xmm4,%xmm0 |
| 1811 movq %xmm0,(%rdi) | 2227 movq %xmm0,(%rdi) |
| 1812 leaq 8(%rdi),%rdi | 2228 leaq 8(%rdi),%rdi |
| 1813 subl $1,%esi | 2229 subl $1,%esi |
| 1814 jnz .Lgather | 2230 jnz .Lgather |
| 2231 |
| 2232 leaq (%r10),%rsp |
| 1815 .byte 0xf3,0xc3 | 2233 .byte 0xf3,0xc3 |
| 1816 .LSEH_end_bn_gather5: | 2234 .LSEH_end_bn_gather5: |
| 1817 .size bn_gather5,.-bn_gather5 | 2235 .size bn_gather5,.-bn_gather5 |
| 1818 .align 64 | 2236 .align 64 |
| 1819 .Lmagic_masks: | 2237 .Linc: |
| 1820 .long» 0,0, 0,0, 0,0, -1,-1 | 2238 .long» 0,0, 1,1 |
| 1821 .long» 0,0, 0,0, 0,0, 0,0 | 2239 .long» 2,2, 2,2 |
| 1822 .byte 77,111,110,116,103,111,109,101,114,121,32,77,117,108,116,105,112,108,105
,99,97,116,105,111,110,32,119,105,116,104,32,115,99,97,116,116,101,114,47,103,97
,116,104,101,114,32,102,111,114,32,120,56,54,95,54,52,44,32,67,82,89,80,84,79,71
,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,1
11,114,103,62,0 | 2240 .byte 77,111,110,116,103,111,109,101,114,121,32,77,117,108,116,105,112,108,105
,99,97,116,105,111,110,32,119,105,116,104,32,115,99,97,116,116,101,114,47,103,97
,116,104,101,114,32,102,111,114,32,120,56,54,95,54,52,44,32,67,82,89,80,84,79,71
,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,1
11,114,103,62,0 |
| 1823 #endif | 2241 #endif |
| OLD | NEW |